re PR target/84914 (PowerPC complex multiply/divide calls the wrong function when...
[official-gcc.git] / gcc / config / rs6000 / rs6000.c
blob37c55f43c56df500745cbc07193e259484e4648c
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2018 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #define IN_TARGET_CODE 1
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "memmodel.h"
30 #include "gimple.h"
31 #include "cfghooks.h"
32 #include "cfgloop.h"
33 #include "df.h"
34 #include "tm_p.h"
35 #include "stringpool.h"
36 #include "expmed.h"
37 #include "optabs.h"
38 #include "regs.h"
39 #include "ira.h"
40 #include "recog.h"
41 #include "cgraph.h"
42 #include "diagnostic-core.h"
43 #include "insn-attr.h"
44 #include "flags.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "attribs.h"
48 #include "stor-layout.h"
49 #include "calls.h"
50 #include "print-tree.h"
51 #include "varasm.h"
52 #include "explow.h"
53 #include "expr.h"
54 #include "output.h"
55 #include "dbxout.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "sched-int.h"
60 #include "gimplify.h"
61 #include "gimple-fold.h"
62 #include "gimple-iterator.h"
63 #include "gimple-ssa.h"
64 #include "gimple-walk.h"
65 #include "intl.h"
66 #include "params.h"
67 #include "tm-constrs.h"
68 #include "tree-vectorizer.h"
69 #include "target-globals.h"
70 #include "builtins.h"
71 #include "tree-vector-builder.h"
72 #include "context.h"
73 #include "tree-pass.h"
74 #include "except.h"
75 #if TARGET_XCOFF
76 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
77 #endif
78 #if TARGET_MACHO
79 #include "gstab.h" /* for N_SLINE */
80 #endif
81 #include "case-cfn-macros.h"
82 #include "ppc-auxv.h"
83 #include "tree-ssa-propagate.h"
85 /* This file should be included last. */
86 #include "target-def.h"
88 #ifndef TARGET_NO_PROTOTYPE
89 #define TARGET_NO_PROTOTYPE 0
90 #endif
92 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
93 systems will also set long double to be IEEE 128-bit. AIX and Darwin
94 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
95 those systems will not pick up this default. This needs to be after all
96 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
97 properly defined. */
98 #ifndef TARGET_IEEEQUAD_DEFAULT
99 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
100 #define TARGET_IEEEQUAD_DEFAULT 1
101 #else
102 #define TARGET_IEEEQUAD_DEFAULT 0
103 #endif
104 #endif
106 #define min(A,B) ((A) < (B) ? (A) : (B))
107 #define max(A,B) ((A) > (B) ? (A) : (B))
109 static pad_direction rs6000_function_arg_padding (machine_mode, const_tree);
111 /* Structure used to define the rs6000 stack */
112 typedef struct rs6000_stack {
113 int reload_completed; /* stack info won't change from here on */
114 int first_gp_reg_save; /* first callee saved GP register used */
115 int first_fp_reg_save; /* first callee saved FP register used */
116 int first_altivec_reg_save; /* first callee saved AltiVec register used */
117 int lr_save_p; /* true if the link reg needs to be saved */
118 int cr_save_p; /* true if the CR reg needs to be saved */
119 unsigned int vrsave_mask; /* mask of vec registers to save */
120 int push_p; /* true if we need to allocate stack space */
121 int calls_p; /* true if the function makes any calls */
122 int world_save_p; /* true if we're saving *everything*:
123 r13-r31, cr, f14-f31, vrsave, v20-v31 */
124 enum rs6000_abi abi; /* which ABI to use */
125 int gp_save_offset; /* offset to save GP regs from initial SP */
126 int fp_save_offset; /* offset to save FP regs from initial SP */
127 int altivec_save_offset; /* offset to save AltiVec regs from initial SP */
128 int lr_save_offset; /* offset to save LR from initial SP */
129 int cr_save_offset; /* offset to save CR from initial SP */
130 int vrsave_save_offset; /* offset to save VRSAVE from initial SP */
131 int varargs_save_offset; /* offset to save the varargs registers */
132 int ehrd_offset; /* offset to EH return data */
133 int ehcr_offset; /* offset to EH CR field data */
134 int reg_size; /* register size (4 or 8) */
135 HOST_WIDE_INT vars_size; /* variable save area size */
136 int parm_size; /* outgoing parameter size */
137 int save_size; /* save area size */
138 int fixed_size; /* fixed size of stack frame */
139 int gp_size; /* size of saved GP registers */
140 int fp_size; /* size of saved FP registers */
141 int altivec_size; /* size of saved AltiVec registers */
142 int cr_size; /* size to hold CR if not in fixed area */
143 int vrsave_size; /* size to hold VRSAVE */
144 int altivec_padding_size; /* size of altivec alignment padding */
145 HOST_WIDE_INT total_size; /* total bytes allocated for stack */
146 int savres_strategy;
147 } rs6000_stack_t;
149 /* A C structure for machine-specific, per-function data.
150 This is added to the cfun structure. */
151 typedef struct GTY(()) machine_function
153 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
154 int ra_needs_full_frame;
155 /* Flags if __builtin_return_address (0) was used. */
156 int ra_need_lr;
157 /* Cache lr_save_p after expansion of builtin_eh_return. */
158 int lr_save_state;
159 /* Whether we need to save the TOC to the reserved stack location in the
160 function prologue. */
161 bool save_toc_in_prologue;
162 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
163 varargs save area. */
164 HOST_WIDE_INT varargs_save_offset;
165 /* Alternative internal arg pointer for -fsplit-stack. */
166 rtx split_stack_arg_pointer;
167 bool split_stack_argp_used;
168 /* Flag if r2 setup is needed with ELFv2 ABI. */
169 bool r2_setup_needed;
170 /* The number of components we use for separate shrink-wrapping. */
171 int n_components;
172 /* The components already handled by separate shrink-wrapping, which should
173 not be considered by the prologue and epilogue. */
174 bool gpr_is_wrapped_separately[32];
175 bool fpr_is_wrapped_separately[32];
176 bool lr_is_wrapped_separately;
177 bool toc_is_wrapped_separately;
178 } machine_function;
180 /* Support targetm.vectorize.builtin_mask_for_load. */
181 static GTY(()) tree altivec_builtin_mask_for_load;
183 /* Set to nonzero once AIX common-mode calls have been defined. */
184 static GTY(()) int common_mode_defined;
186 /* Label number of label created for -mrelocatable, to call to so we can
187 get the address of the GOT section */
188 static int rs6000_pic_labelno;
190 #ifdef USING_ELFOS_H
191 /* Counter for labels which are to be placed in .fixup. */
192 int fixuplabelno = 0;
193 #endif
195 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
196 int dot_symbols;
198 /* Specify the machine mode that pointers have. After generation of rtl, the
199 compiler makes no further distinction between pointers and any other objects
200 of this machine mode. */
201 scalar_int_mode rs6000_pmode;
203 /* Width in bits of a pointer. */
204 unsigned rs6000_pointer_size;
206 #ifdef HAVE_AS_GNU_ATTRIBUTE
207 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
208 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
209 # endif
210 /* Flag whether floating point values have been passed/returned.
211 Note that this doesn't say whether fprs are used, since the
212 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
213 should be set for soft-float values passed in gprs and ieee128
214 values passed in vsx registers. */
215 static bool rs6000_passes_float;
216 static bool rs6000_passes_long_double;
217 /* Flag whether vector values have been passed/returned. */
218 static bool rs6000_passes_vector;
219 /* Flag whether small (<= 8 byte) structures have been returned. */
220 static bool rs6000_returns_struct;
221 #endif
223 /* Value is TRUE if register/mode pair is acceptable. */
224 static bool rs6000_hard_regno_mode_ok_p
225 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
227 /* Maximum number of registers needed for a given register class and mode. */
228 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
230 /* How many registers are needed for a given register and mode. */
231 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
233 /* Map register number to register class. */
234 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
236 static int dbg_cost_ctrl;
238 /* Built in types. */
239 tree rs6000_builtin_types[RS6000_BTI_MAX];
240 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
242 /* Flag to say the TOC is initialized */
243 int toc_initialized, need_toc_init;
244 char toc_label_name[10];
246 /* Cached value of rs6000_variable_issue. This is cached in
247 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
248 static short cached_can_issue_more;
250 static GTY(()) section *read_only_data_section;
251 static GTY(()) section *private_data_section;
252 static GTY(()) section *tls_data_section;
253 static GTY(()) section *tls_private_data_section;
254 static GTY(()) section *read_only_private_data_section;
255 static GTY(()) section *sdata2_section;
256 static GTY(()) section *toc_section;
258 struct builtin_description
260 const HOST_WIDE_INT mask;
261 const enum insn_code icode;
262 const char *const name;
263 const enum rs6000_builtins code;
266 /* Describe the vector unit used for modes. */
267 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
268 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
270 /* Register classes for various constraints that are based on the target
271 switches. */
272 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
274 /* Describe the alignment of a vector. */
275 int rs6000_vector_align[NUM_MACHINE_MODES];
277 /* Map selected modes to types for builtins. */
278 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
280 /* What modes to automatically generate reciprocal divide estimate (fre) and
281 reciprocal sqrt (frsqrte) for. */
282 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
284 /* Masks to determine which reciprocal esitmate instructions to generate
285 automatically. */
286 enum rs6000_recip_mask {
287 RECIP_SF_DIV = 0x001, /* Use divide estimate */
288 RECIP_DF_DIV = 0x002,
289 RECIP_V4SF_DIV = 0x004,
290 RECIP_V2DF_DIV = 0x008,
292 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
293 RECIP_DF_RSQRT = 0x020,
294 RECIP_V4SF_RSQRT = 0x040,
295 RECIP_V2DF_RSQRT = 0x080,
297 /* Various combination of flags for -mrecip=xxx. */
298 RECIP_NONE = 0,
299 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
300 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
301 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
303 RECIP_HIGH_PRECISION = RECIP_ALL,
305 /* On low precision machines like the power5, don't enable double precision
306 reciprocal square root estimate, since it isn't accurate enough. */
307 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
310 /* -mrecip options. */
311 static struct
313 const char *string; /* option name */
314 unsigned int mask; /* mask bits to set */
315 } recip_options[] = {
316 { "all", RECIP_ALL },
317 { "none", RECIP_NONE },
318 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
319 | RECIP_V2DF_DIV) },
320 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
321 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
322 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
323 | RECIP_V2DF_RSQRT) },
324 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
325 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
328 /* Used by __builtin_cpu_is(), mapping from PLATFORM names to values. */
329 static const struct
331 const char *cpu;
332 unsigned int cpuid;
333 } cpu_is_info[] = {
334 { "power9", PPC_PLATFORM_POWER9 },
335 { "power8", PPC_PLATFORM_POWER8 },
336 { "power7", PPC_PLATFORM_POWER7 },
337 { "power6x", PPC_PLATFORM_POWER6X },
338 { "power6", PPC_PLATFORM_POWER6 },
339 { "power5+", PPC_PLATFORM_POWER5_PLUS },
340 { "power5", PPC_PLATFORM_POWER5 },
341 { "ppc970", PPC_PLATFORM_PPC970 },
342 { "power4", PPC_PLATFORM_POWER4 },
343 { "ppca2", PPC_PLATFORM_PPCA2 },
344 { "ppc476", PPC_PLATFORM_PPC476 },
345 { "ppc464", PPC_PLATFORM_PPC464 },
346 { "ppc440", PPC_PLATFORM_PPC440 },
347 { "ppc405", PPC_PLATFORM_PPC405 },
348 { "ppc-cell-be", PPC_PLATFORM_CELL_BE }
351 /* Used by __builtin_cpu_supports(), mapping from HWCAP names to masks. */
352 static const struct
354 const char *hwcap;
355 int mask;
356 unsigned int id;
357 } cpu_supports_info[] = {
358 /* AT_HWCAP masks. */
359 { "4xxmac", PPC_FEATURE_HAS_4xxMAC, 0 },
360 { "altivec", PPC_FEATURE_HAS_ALTIVEC, 0 },
361 { "arch_2_05", PPC_FEATURE_ARCH_2_05, 0 },
362 { "arch_2_06", PPC_FEATURE_ARCH_2_06, 0 },
363 { "archpmu", PPC_FEATURE_PERFMON_COMPAT, 0 },
364 { "booke", PPC_FEATURE_BOOKE, 0 },
365 { "cellbe", PPC_FEATURE_CELL_BE, 0 },
366 { "dfp", PPC_FEATURE_HAS_DFP, 0 },
367 { "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE, 0 },
368 { "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE, 0 },
369 { "fpu", PPC_FEATURE_HAS_FPU, 0 },
370 { "ic_snoop", PPC_FEATURE_ICACHE_SNOOP, 0 },
371 { "mmu", PPC_FEATURE_HAS_MMU, 0 },
372 { "notb", PPC_FEATURE_NO_TB, 0 },
373 { "pa6t", PPC_FEATURE_PA6T, 0 },
374 { "power4", PPC_FEATURE_POWER4, 0 },
375 { "power5", PPC_FEATURE_POWER5, 0 },
376 { "power5+", PPC_FEATURE_POWER5_PLUS, 0 },
377 { "power6x", PPC_FEATURE_POWER6_EXT, 0 },
378 { "ppc32", PPC_FEATURE_32, 0 },
379 { "ppc601", PPC_FEATURE_601_INSTR, 0 },
380 { "ppc64", PPC_FEATURE_64, 0 },
381 { "ppcle", PPC_FEATURE_PPC_LE, 0 },
382 { "smt", PPC_FEATURE_SMT, 0 },
383 { "spe", PPC_FEATURE_HAS_SPE, 0 },
384 { "true_le", PPC_FEATURE_TRUE_LE, 0 },
385 { "ucache", PPC_FEATURE_UNIFIED_CACHE, 0 },
386 { "vsx", PPC_FEATURE_HAS_VSX, 0 },
388 /* AT_HWCAP2 masks. */
389 { "arch_2_07", PPC_FEATURE2_ARCH_2_07, 1 },
390 { "dscr", PPC_FEATURE2_HAS_DSCR, 1 },
391 { "ebb", PPC_FEATURE2_HAS_EBB, 1 },
392 { "htm", PPC_FEATURE2_HAS_HTM, 1 },
393 { "htm-nosc", PPC_FEATURE2_HTM_NOSC, 1 },
394 { "htm-no-suspend", PPC_FEATURE2_HTM_NO_SUSPEND, 1 },
395 { "isel", PPC_FEATURE2_HAS_ISEL, 1 },
396 { "tar", PPC_FEATURE2_HAS_TAR, 1 },
397 { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO, 1 },
398 { "arch_3_00", PPC_FEATURE2_ARCH_3_00, 1 },
399 { "ieee128", PPC_FEATURE2_HAS_IEEE128, 1 },
400 { "darn", PPC_FEATURE2_DARN, 1 },
401 { "scv", PPC_FEATURE2_SCV, 1 }
404 /* On PowerPC, we have a limited number of target clones that we care about
405 which means we can use an array to hold the options, rather than having more
406 elaborate data structures to identify each possible variation. Order the
407 clones from the default to the highest ISA. */
408 enum {
409 CLONE_DEFAULT = 0, /* default clone. */
410 CLONE_ISA_2_05, /* ISA 2.05 (power6). */
411 CLONE_ISA_2_06, /* ISA 2.06 (power7). */
412 CLONE_ISA_2_07, /* ISA 2.07 (power8). */
413 CLONE_ISA_3_00, /* ISA 3.00 (power9). */
414 CLONE_MAX
417 /* Map compiler ISA bits into HWCAP names. */
418 struct clone_map {
419 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */
420 const char *name; /* name to use in __builtin_cpu_supports. */
423 static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
424 { 0, "" }, /* Default options. */
425 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */
426 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */
427 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */
428 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.00 (power9). */
432 /* Newer LIBCs explicitly export this symbol to declare that they provide
433 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
434 reference to this symbol whenever we expand a CPU builtin, so that
435 we never link against an old LIBC. */
436 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
438 /* True if we have expanded a CPU builtin. */
439 bool cpu_builtin_p;
441 /* Pointer to function (in rs6000-c.c) that can define or undefine target
442 macros that have changed. Languages that don't support the preprocessor
443 don't link in rs6000-c.c, so we can't call it directly. */
444 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
446 /* Simplfy register classes into simpler classifications. We assume
447 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
448 check for standard register classes (gpr/floating/altivec/vsx) and
449 floating/vector classes (float/altivec/vsx). */
451 enum rs6000_reg_type {
452 NO_REG_TYPE,
453 PSEUDO_REG_TYPE,
454 GPR_REG_TYPE,
455 VSX_REG_TYPE,
456 ALTIVEC_REG_TYPE,
457 FPR_REG_TYPE,
458 SPR_REG_TYPE,
459 CR_REG_TYPE
462 /* Map register class to register type. */
463 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
465 /* First/last register type for the 'normal' register types (i.e. general
466 purpose, floating point, altivec, and VSX registers). */
467 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
469 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
472 /* Register classes we care about in secondary reload or go if legitimate
473 address. We only need to worry about GPR, FPR, and Altivec registers here,
474 along an ANY field that is the OR of the 3 register classes. */
476 enum rs6000_reload_reg_type {
477 RELOAD_REG_GPR, /* General purpose registers. */
478 RELOAD_REG_FPR, /* Traditional floating point regs. */
479 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
480 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
481 N_RELOAD_REG
484 /* For setting up register classes, loop through the 3 register classes mapping
485 into real registers, and skip the ANY class, which is just an OR of the
486 bits. */
487 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
488 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
490 /* Map reload register type to a register in the register class. */
491 struct reload_reg_map_type {
492 const char *name; /* Register class name. */
493 int reg; /* Register in the register class. */
496 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
497 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
498 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
499 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
500 { "Any", -1 }, /* RELOAD_REG_ANY. */
503 /* Mask bits for each register class, indexed per mode. Historically the
504 compiler has been more restrictive which types can do PRE_MODIFY instead of
505 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
506 typedef unsigned char addr_mask_type;
508 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
509 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
510 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
511 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
512 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
513 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
514 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
515 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
517 /* Register type masks based on the type, of valid addressing modes. */
518 struct rs6000_reg_addr {
519 enum insn_code reload_load; /* INSN to reload for loading. */
520 enum insn_code reload_store; /* INSN to reload for storing. */
521 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
522 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
523 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
524 enum insn_code fusion_gpr_ld; /* INSN for fusing gpr ADDIS/loads. */
525 /* INSNs for fusing addi with loads
526 or stores for each reg. class. */
527 enum insn_code fusion_addi_ld[(int)N_RELOAD_REG];
528 enum insn_code fusion_addi_st[(int)N_RELOAD_REG];
529 /* INSNs for fusing addis with loads
530 or stores for each reg. class. */
531 enum insn_code fusion_addis_ld[(int)N_RELOAD_REG];
532 enum insn_code fusion_addis_st[(int)N_RELOAD_REG];
533 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
534 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
535 bool fused_toc; /* Mode supports TOC fusion. */
538 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
540 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
541 static inline bool
542 mode_supports_pre_incdec_p (machine_mode mode)
544 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
545 != 0);
548 /* Helper function to say whether a mode supports PRE_MODIFY. */
549 static inline bool
550 mode_supports_pre_modify_p (machine_mode mode)
552 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
553 != 0);
556 /* Given that there exists at least one variable that is set (produced)
557 by OUT_INSN and read (consumed) by IN_INSN, return true iff
558 IN_INSN represents one or more memory store operations and none of
559 the variables set by OUT_INSN is used by IN_INSN as the address of a
560 store operation. If either IN_INSN or OUT_INSN does not represent
561 a "single" RTL SET expression (as loosely defined by the
562 implementation of the single_set function) or a PARALLEL with only
563 SETs, CLOBBERs, and USEs inside, this function returns false.
565 This rs6000-specific version of store_data_bypass_p checks for
566 certain conditions that result in assertion failures (and internal
567 compiler errors) in the generic store_data_bypass_p function and
568 returns false rather than calling store_data_bypass_p if one of the
569 problematic conditions is detected. */
572 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
574 rtx out_set, in_set;
575 rtx out_pat, in_pat;
576 rtx out_exp, in_exp;
577 int i, j;
579 in_set = single_set (in_insn);
580 if (in_set)
582 if (MEM_P (SET_DEST (in_set)))
584 out_set = single_set (out_insn);
585 if (!out_set)
587 out_pat = PATTERN (out_insn);
588 if (GET_CODE (out_pat) == PARALLEL)
590 for (i = 0; i < XVECLEN (out_pat, 0); i++)
592 out_exp = XVECEXP (out_pat, 0, i);
593 if ((GET_CODE (out_exp) == CLOBBER)
594 || (GET_CODE (out_exp) == USE))
595 continue;
596 else if (GET_CODE (out_exp) != SET)
597 return false;
603 else
605 in_pat = PATTERN (in_insn);
606 if (GET_CODE (in_pat) != PARALLEL)
607 return false;
609 for (i = 0; i < XVECLEN (in_pat, 0); i++)
611 in_exp = XVECEXP (in_pat, 0, i);
612 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
613 continue;
614 else if (GET_CODE (in_exp) != SET)
615 return false;
617 if (MEM_P (SET_DEST (in_exp)))
619 out_set = single_set (out_insn);
620 if (!out_set)
622 out_pat = PATTERN (out_insn);
623 if (GET_CODE (out_pat) != PARALLEL)
624 return false;
625 for (j = 0; j < XVECLEN (out_pat, 0); j++)
627 out_exp = XVECEXP (out_pat, 0, j);
628 if ((GET_CODE (out_exp) == CLOBBER)
629 || (GET_CODE (out_exp) == USE))
630 continue;
631 else if (GET_CODE (out_exp) != SET)
632 return false;
638 return store_data_bypass_p (out_insn, in_insn);
641 /* Return true if we have D-form addressing in altivec registers. */
642 static inline bool
643 mode_supports_vmx_dform (machine_mode mode)
645 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
648 /* Return true if we have D-form addressing in VSX registers. This addressing
649 is more limited than normal d-form addressing in that the offset must be
650 aligned on a 16-byte boundary. */
651 static inline bool
652 mode_supports_vsx_dform_quad (machine_mode mode)
654 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
655 != 0);
659 /* Processor costs (relative to an add) */
661 const struct processor_costs *rs6000_cost;
663 /* Instruction size costs on 32bit processors. */
664 static const
665 struct processor_costs size32_cost = {
666 COSTS_N_INSNS (1), /* mulsi */
667 COSTS_N_INSNS (1), /* mulsi_const */
668 COSTS_N_INSNS (1), /* mulsi_const9 */
669 COSTS_N_INSNS (1), /* muldi */
670 COSTS_N_INSNS (1), /* divsi */
671 COSTS_N_INSNS (1), /* divdi */
672 COSTS_N_INSNS (1), /* fp */
673 COSTS_N_INSNS (1), /* dmul */
674 COSTS_N_INSNS (1), /* sdiv */
675 COSTS_N_INSNS (1), /* ddiv */
676 32, /* cache line size */
677 0, /* l1 cache */
678 0, /* l2 cache */
679 0, /* streams */
680 0, /* SF->DF convert */
683 /* Instruction size costs on 64bit processors. */
684 static const
685 struct processor_costs size64_cost = {
686 COSTS_N_INSNS (1), /* mulsi */
687 COSTS_N_INSNS (1), /* mulsi_const */
688 COSTS_N_INSNS (1), /* mulsi_const9 */
689 COSTS_N_INSNS (1), /* muldi */
690 COSTS_N_INSNS (1), /* divsi */
691 COSTS_N_INSNS (1), /* divdi */
692 COSTS_N_INSNS (1), /* fp */
693 COSTS_N_INSNS (1), /* dmul */
694 COSTS_N_INSNS (1), /* sdiv */
695 COSTS_N_INSNS (1), /* ddiv */
696 128, /* cache line size */
697 0, /* l1 cache */
698 0, /* l2 cache */
699 0, /* streams */
700 0, /* SF->DF convert */
703 /* Instruction costs on RS64A processors. */
704 static const
705 struct processor_costs rs64a_cost = {
706 COSTS_N_INSNS (20), /* mulsi */
707 COSTS_N_INSNS (12), /* mulsi_const */
708 COSTS_N_INSNS (8), /* mulsi_const9 */
709 COSTS_N_INSNS (34), /* muldi */
710 COSTS_N_INSNS (65), /* divsi */
711 COSTS_N_INSNS (67), /* divdi */
712 COSTS_N_INSNS (4), /* fp */
713 COSTS_N_INSNS (4), /* dmul */
714 COSTS_N_INSNS (31), /* sdiv */
715 COSTS_N_INSNS (31), /* ddiv */
716 128, /* cache line size */
717 128, /* l1 cache */
718 2048, /* l2 cache */
719 1, /* streams */
720 0, /* SF->DF convert */
723 /* Instruction costs on MPCCORE processors. */
724 static const
725 struct processor_costs mpccore_cost = {
726 COSTS_N_INSNS (2), /* mulsi */
727 COSTS_N_INSNS (2), /* mulsi_const */
728 COSTS_N_INSNS (2), /* mulsi_const9 */
729 COSTS_N_INSNS (2), /* muldi */
730 COSTS_N_INSNS (6), /* divsi */
731 COSTS_N_INSNS (6), /* divdi */
732 COSTS_N_INSNS (4), /* fp */
733 COSTS_N_INSNS (5), /* dmul */
734 COSTS_N_INSNS (10), /* sdiv */
735 COSTS_N_INSNS (17), /* ddiv */
736 32, /* cache line size */
737 4, /* l1 cache */
738 16, /* l2 cache */
739 1, /* streams */
740 0, /* SF->DF convert */
743 /* Instruction costs on PPC403 processors. */
744 static const
745 struct processor_costs ppc403_cost = {
746 COSTS_N_INSNS (4), /* mulsi */
747 COSTS_N_INSNS (4), /* mulsi_const */
748 COSTS_N_INSNS (4), /* mulsi_const9 */
749 COSTS_N_INSNS (4), /* muldi */
750 COSTS_N_INSNS (33), /* divsi */
751 COSTS_N_INSNS (33), /* divdi */
752 COSTS_N_INSNS (11), /* fp */
753 COSTS_N_INSNS (11), /* dmul */
754 COSTS_N_INSNS (11), /* sdiv */
755 COSTS_N_INSNS (11), /* ddiv */
756 32, /* cache line size */
757 4, /* l1 cache */
758 16, /* l2 cache */
759 1, /* streams */
760 0, /* SF->DF convert */
763 /* Instruction costs on PPC405 processors. */
764 static const
765 struct processor_costs ppc405_cost = {
766 COSTS_N_INSNS (5), /* mulsi */
767 COSTS_N_INSNS (4), /* mulsi_const */
768 COSTS_N_INSNS (3), /* mulsi_const9 */
769 COSTS_N_INSNS (5), /* muldi */
770 COSTS_N_INSNS (35), /* divsi */
771 COSTS_N_INSNS (35), /* divdi */
772 COSTS_N_INSNS (11), /* fp */
773 COSTS_N_INSNS (11), /* dmul */
774 COSTS_N_INSNS (11), /* sdiv */
775 COSTS_N_INSNS (11), /* ddiv */
776 32, /* cache line size */
777 16, /* l1 cache */
778 128, /* l2 cache */
779 1, /* streams */
780 0, /* SF->DF convert */
783 /* Instruction costs on PPC440 processors. */
784 static const
785 struct processor_costs ppc440_cost = {
786 COSTS_N_INSNS (3), /* mulsi */
787 COSTS_N_INSNS (2), /* mulsi_const */
788 COSTS_N_INSNS (2), /* mulsi_const9 */
789 COSTS_N_INSNS (3), /* muldi */
790 COSTS_N_INSNS (34), /* divsi */
791 COSTS_N_INSNS (34), /* divdi */
792 COSTS_N_INSNS (5), /* fp */
793 COSTS_N_INSNS (5), /* dmul */
794 COSTS_N_INSNS (19), /* sdiv */
795 COSTS_N_INSNS (33), /* ddiv */
796 32, /* cache line size */
797 32, /* l1 cache */
798 256, /* l2 cache */
799 1, /* streams */
800 0, /* SF->DF convert */
803 /* Instruction costs on PPC476 processors. */
804 static const
805 struct processor_costs ppc476_cost = {
806 COSTS_N_INSNS (4), /* mulsi */
807 COSTS_N_INSNS (4), /* mulsi_const */
808 COSTS_N_INSNS (4), /* mulsi_const9 */
809 COSTS_N_INSNS (4), /* muldi */
810 COSTS_N_INSNS (11), /* divsi */
811 COSTS_N_INSNS (11), /* divdi */
812 COSTS_N_INSNS (6), /* fp */
813 COSTS_N_INSNS (6), /* dmul */
814 COSTS_N_INSNS (19), /* sdiv */
815 COSTS_N_INSNS (33), /* ddiv */
816 32, /* l1 cache line size */
817 32, /* l1 cache */
818 512, /* l2 cache */
819 1, /* streams */
820 0, /* SF->DF convert */
823 /* Instruction costs on PPC601 processors. */
824 static const
825 struct processor_costs ppc601_cost = {
826 COSTS_N_INSNS (5), /* mulsi */
827 COSTS_N_INSNS (5), /* mulsi_const */
828 COSTS_N_INSNS (5), /* mulsi_const9 */
829 COSTS_N_INSNS (5), /* muldi */
830 COSTS_N_INSNS (36), /* divsi */
831 COSTS_N_INSNS (36), /* divdi */
832 COSTS_N_INSNS (4), /* fp */
833 COSTS_N_INSNS (5), /* dmul */
834 COSTS_N_INSNS (17), /* sdiv */
835 COSTS_N_INSNS (31), /* ddiv */
836 32, /* cache line size */
837 32, /* l1 cache */
838 256, /* l2 cache */
839 1, /* streams */
840 0, /* SF->DF convert */
843 /* Instruction costs on PPC603 processors. */
844 static const
845 struct processor_costs ppc603_cost = {
846 COSTS_N_INSNS (5), /* mulsi */
847 COSTS_N_INSNS (3), /* mulsi_const */
848 COSTS_N_INSNS (2), /* mulsi_const9 */
849 COSTS_N_INSNS (5), /* muldi */
850 COSTS_N_INSNS (37), /* divsi */
851 COSTS_N_INSNS (37), /* divdi */
852 COSTS_N_INSNS (3), /* fp */
853 COSTS_N_INSNS (4), /* dmul */
854 COSTS_N_INSNS (18), /* sdiv */
855 COSTS_N_INSNS (33), /* ddiv */
856 32, /* cache line size */
857 8, /* l1 cache */
858 64, /* l2 cache */
859 1, /* streams */
860 0, /* SF->DF convert */
863 /* Instruction costs on PPC604 processors. */
864 static const
865 struct processor_costs ppc604_cost = {
866 COSTS_N_INSNS (4), /* mulsi */
867 COSTS_N_INSNS (4), /* mulsi_const */
868 COSTS_N_INSNS (4), /* mulsi_const9 */
869 COSTS_N_INSNS (4), /* muldi */
870 COSTS_N_INSNS (20), /* divsi */
871 COSTS_N_INSNS (20), /* divdi */
872 COSTS_N_INSNS (3), /* fp */
873 COSTS_N_INSNS (3), /* dmul */
874 COSTS_N_INSNS (18), /* sdiv */
875 COSTS_N_INSNS (32), /* ddiv */
876 32, /* cache line size */
877 16, /* l1 cache */
878 512, /* l2 cache */
879 1, /* streams */
880 0, /* SF->DF convert */
883 /* Instruction costs on PPC604e processors. */
884 static const
885 struct processor_costs ppc604e_cost = {
886 COSTS_N_INSNS (2), /* mulsi */
887 COSTS_N_INSNS (2), /* mulsi_const */
888 COSTS_N_INSNS (2), /* mulsi_const9 */
889 COSTS_N_INSNS (2), /* muldi */
890 COSTS_N_INSNS (20), /* divsi */
891 COSTS_N_INSNS (20), /* divdi */
892 COSTS_N_INSNS (3), /* fp */
893 COSTS_N_INSNS (3), /* dmul */
894 COSTS_N_INSNS (18), /* sdiv */
895 COSTS_N_INSNS (32), /* ddiv */
896 32, /* cache line size */
897 32, /* l1 cache */
898 1024, /* l2 cache */
899 1, /* streams */
900 0, /* SF->DF convert */
903 /* Instruction costs on PPC620 processors. */
904 static const
905 struct processor_costs ppc620_cost = {
906 COSTS_N_INSNS (5), /* mulsi */
907 COSTS_N_INSNS (4), /* mulsi_const */
908 COSTS_N_INSNS (3), /* mulsi_const9 */
909 COSTS_N_INSNS (7), /* muldi */
910 COSTS_N_INSNS (21), /* divsi */
911 COSTS_N_INSNS (37), /* divdi */
912 COSTS_N_INSNS (3), /* fp */
913 COSTS_N_INSNS (3), /* dmul */
914 COSTS_N_INSNS (18), /* sdiv */
915 COSTS_N_INSNS (32), /* ddiv */
916 128, /* cache line size */
917 32, /* l1 cache */
918 1024, /* l2 cache */
919 1, /* streams */
920 0, /* SF->DF convert */
923 /* Instruction costs on PPC630 processors. */
924 static const
925 struct processor_costs ppc630_cost = {
926 COSTS_N_INSNS (5), /* mulsi */
927 COSTS_N_INSNS (4), /* mulsi_const */
928 COSTS_N_INSNS (3), /* mulsi_const9 */
929 COSTS_N_INSNS (7), /* muldi */
930 COSTS_N_INSNS (21), /* divsi */
931 COSTS_N_INSNS (37), /* divdi */
932 COSTS_N_INSNS (3), /* fp */
933 COSTS_N_INSNS (3), /* dmul */
934 COSTS_N_INSNS (17), /* sdiv */
935 COSTS_N_INSNS (21), /* ddiv */
936 128, /* cache line size */
937 64, /* l1 cache */
938 1024, /* l2 cache */
939 1, /* streams */
940 0, /* SF->DF convert */
943 /* Instruction costs on Cell processor. */
944 /* COSTS_N_INSNS (1) ~ one add. */
945 static const
946 struct processor_costs ppccell_cost = {
947 COSTS_N_INSNS (9/2)+2, /* mulsi */
948 COSTS_N_INSNS (6/2), /* mulsi_const */
949 COSTS_N_INSNS (6/2), /* mulsi_const9 */
950 COSTS_N_INSNS (15/2)+2, /* muldi */
951 COSTS_N_INSNS (38/2), /* divsi */
952 COSTS_N_INSNS (70/2), /* divdi */
953 COSTS_N_INSNS (10/2), /* fp */
954 COSTS_N_INSNS (10/2), /* dmul */
955 COSTS_N_INSNS (74/2), /* sdiv */
956 COSTS_N_INSNS (74/2), /* ddiv */
957 128, /* cache line size */
958 32, /* l1 cache */
959 512, /* l2 cache */
960 6, /* streams */
961 0, /* SF->DF convert */
964 /* Instruction costs on PPC750 and PPC7400 processors. */
965 static const
966 struct processor_costs ppc750_cost = {
967 COSTS_N_INSNS (5), /* mulsi */
968 COSTS_N_INSNS (3), /* mulsi_const */
969 COSTS_N_INSNS (2), /* mulsi_const9 */
970 COSTS_N_INSNS (5), /* muldi */
971 COSTS_N_INSNS (17), /* divsi */
972 COSTS_N_INSNS (17), /* divdi */
973 COSTS_N_INSNS (3), /* fp */
974 COSTS_N_INSNS (3), /* dmul */
975 COSTS_N_INSNS (17), /* sdiv */
976 COSTS_N_INSNS (31), /* ddiv */
977 32, /* cache line size */
978 32, /* l1 cache */
979 512, /* l2 cache */
980 1, /* streams */
981 0, /* SF->DF convert */
984 /* Instruction costs on PPC7450 processors. */
985 static const
986 struct processor_costs ppc7450_cost = {
987 COSTS_N_INSNS (4), /* mulsi */
988 COSTS_N_INSNS (3), /* mulsi_const */
989 COSTS_N_INSNS (3), /* mulsi_const9 */
990 COSTS_N_INSNS (4), /* muldi */
991 COSTS_N_INSNS (23), /* divsi */
992 COSTS_N_INSNS (23), /* divdi */
993 COSTS_N_INSNS (5), /* fp */
994 COSTS_N_INSNS (5), /* dmul */
995 COSTS_N_INSNS (21), /* sdiv */
996 COSTS_N_INSNS (35), /* ddiv */
997 32, /* cache line size */
998 32, /* l1 cache */
999 1024, /* l2 cache */
1000 1, /* streams */
1001 0, /* SF->DF convert */
1004 /* Instruction costs on PPC8540 processors. */
1005 static const
1006 struct processor_costs ppc8540_cost = {
1007 COSTS_N_INSNS (4), /* mulsi */
1008 COSTS_N_INSNS (4), /* mulsi_const */
1009 COSTS_N_INSNS (4), /* mulsi_const9 */
1010 COSTS_N_INSNS (4), /* muldi */
1011 COSTS_N_INSNS (19), /* divsi */
1012 COSTS_N_INSNS (19), /* divdi */
1013 COSTS_N_INSNS (4), /* fp */
1014 COSTS_N_INSNS (4), /* dmul */
1015 COSTS_N_INSNS (29), /* sdiv */
1016 COSTS_N_INSNS (29), /* ddiv */
1017 32, /* cache line size */
1018 32, /* l1 cache */
1019 256, /* l2 cache */
1020 1, /* prefetch streams /*/
1021 0, /* SF->DF convert */
1024 /* Instruction costs on E300C2 and E300C3 cores. */
1025 static const
1026 struct processor_costs ppce300c2c3_cost = {
1027 COSTS_N_INSNS (4), /* mulsi */
1028 COSTS_N_INSNS (4), /* mulsi_const */
1029 COSTS_N_INSNS (4), /* mulsi_const9 */
1030 COSTS_N_INSNS (4), /* muldi */
1031 COSTS_N_INSNS (19), /* divsi */
1032 COSTS_N_INSNS (19), /* divdi */
1033 COSTS_N_INSNS (3), /* fp */
1034 COSTS_N_INSNS (4), /* dmul */
1035 COSTS_N_INSNS (18), /* sdiv */
1036 COSTS_N_INSNS (33), /* ddiv */
1038 16, /* l1 cache */
1039 16, /* l2 cache */
1040 1, /* prefetch streams /*/
1041 0, /* SF->DF convert */
1044 /* Instruction costs on PPCE500MC processors. */
1045 static const
1046 struct processor_costs ppce500mc_cost = {
1047 COSTS_N_INSNS (4), /* mulsi */
1048 COSTS_N_INSNS (4), /* mulsi_const */
1049 COSTS_N_INSNS (4), /* mulsi_const9 */
1050 COSTS_N_INSNS (4), /* muldi */
1051 COSTS_N_INSNS (14), /* divsi */
1052 COSTS_N_INSNS (14), /* divdi */
1053 COSTS_N_INSNS (8), /* fp */
1054 COSTS_N_INSNS (10), /* dmul */
1055 COSTS_N_INSNS (36), /* sdiv */
1056 COSTS_N_INSNS (66), /* ddiv */
1057 64, /* cache line size */
1058 32, /* l1 cache */
1059 128, /* l2 cache */
1060 1, /* prefetch streams /*/
1061 0, /* SF->DF convert */
1064 /* Instruction costs on PPCE500MC64 processors. */
1065 static const
1066 struct processor_costs ppce500mc64_cost = {
1067 COSTS_N_INSNS (4), /* mulsi */
1068 COSTS_N_INSNS (4), /* mulsi_const */
1069 COSTS_N_INSNS (4), /* mulsi_const9 */
1070 COSTS_N_INSNS (4), /* muldi */
1071 COSTS_N_INSNS (14), /* divsi */
1072 COSTS_N_INSNS (14), /* divdi */
1073 COSTS_N_INSNS (4), /* fp */
1074 COSTS_N_INSNS (10), /* dmul */
1075 COSTS_N_INSNS (36), /* sdiv */
1076 COSTS_N_INSNS (66), /* ddiv */
1077 64, /* cache line size */
1078 32, /* l1 cache */
1079 128, /* l2 cache */
1080 1, /* prefetch streams /*/
1081 0, /* SF->DF convert */
1084 /* Instruction costs on PPCE5500 processors. */
1085 static const
1086 struct processor_costs ppce5500_cost = {
1087 COSTS_N_INSNS (5), /* mulsi */
1088 COSTS_N_INSNS (5), /* mulsi_const */
1089 COSTS_N_INSNS (4), /* mulsi_const9 */
1090 COSTS_N_INSNS (5), /* muldi */
1091 COSTS_N_INSNS (14), /* divsi */
1092 COSTS_N_INSNS (14), /* divdi */
1093 COSTS_N_INSNS (7), /* fp */
1094 COSTS_N_INSNS (10), /* dmul */
1095 COSTS_N_INSNS (36), /* sdiv */
1096 COSTS_N_INSNS (66), /* ddiv */
1097 64, /* cache line size */
1098 32, /* l1 cache */
1099 128, /* l2 cache */
1100 1, /* prefetch streams /*/
1101 0, /* SF->DF convert */
1104 /* Instruction costs on PPCE6500 processors. */
1105 static const
1106 struct processor_costs ppce6500_cost = {
1107 COSTS_N_INSNS (5), /* mulsi */
1108 COSTS_N_INSNS (5), /* mulsi_const */
1109 COSTS_N_INSNS (4), /* mulsi_const9 */
1110 COSTS_N_INSNS (5), /* muldi */
1111 COSTS_N_INSNS (14), /* divsi */
1112 COSTS_N_INSNS (14), /* divdi */
1113 COSTS_N_INSNS (7), /* fp */
1114 COSTS_N_INSNS (10), /* dmul */
1115 COSTS_N_INSNS (36), /* sdiv */
1116 COSTS_N_INSNS (66), /* ddiv */
1117 64, /* cache line size */
1118 32, /* l1 cache */
1119 128, /* l2 cache */
1120 1, /* prefetch streams /*/
1121 0, /* SF->DF convert */
1124 /* Instruction costs on AppliedMicro Titan processors. */
1125 static const
1126 struct processor_costs titan_cost = {
1127 COSTS_N_INSNS (5), /* mulsi */
1128 COSTS_N_INSNS (5), /* mulsi_const */
1129 COSTS_N_INSNS (5), /* mulsi_const9 */
1130 COSTS_N_INSNS (5), /* muldi */
1131 COSTS_N_INSNS (18), /* divsi */
1132 COSTS_N_INSNS (18), /* divdi */
1133 COSTS_N_INSNS (10), /* fp */
1134 COSTS_N_INSNS (10), /* dmul */
1135 COSTS_N_INSNS (46), /* sdiv */
1136 COSTS_N_INSNS (72), /* ddiv */
1137 32, /* cache line size */
1138 32, /* l1 cache */
1139 512, /* l2 cache */
1140 1, /* prefetch streams /*/
1141 0, /* SF->DF convert */
1144 /* Instruction costs on POWER4 and POWER5 processors. */
1145 static const
1146 struct processor_costs power4_cost = {
1147 COSTS_N_INSNS (3), /* mulsi */
1148 COSTS_N_INSNS (2), /* mulsi_const */
1149 COSTS_N_INSNS (2), /* mulsi_const9 */
1150 COSTS_N_INSNS (4), /* muldi */
1151 COSTS_N_INSNS (18), /* divsi */
1152 COSTS_N_INSNS (34), /* divdi */
1153 COSTS_N_INSNS (3), /* fp */
1154 COSTS_N_INSNS (3), /* dmul */
1155 COSTS_N_INSNS (17), /* sdiv */
1156 COSTS_N_INSNS (17), /* ddiv */
1157 128, /* cache line size */
1158 32, /* l1 cache */
1159 1024, /* l2 cache */
1160 8, /* prefetch streams /*/
1161 0, /* SF->DF convert */
1164 /* Instruction costs on POWER6 processors. */
1165 static const
1166 struct processor_costs power6_cost = {
1167 COSTS_N_INSNS (8), /* mulsi */
1168 COSTS_N_INSNS (8), /* mulsi_const */
1169 COSTS_N_INSNS (8), /* mulsi_const9 */
1170 COSTS_N_INSNS (8), /* muldi */
1171 COSTS_N_INSNS (22), /* divsi */
1172 COSTS_N_INSNS (28), /* divdi */
1173 COSTS_N_INSNS (3), /* fp */
1174 COSTS_N_INSNS (3), /* dmul */
1175 COSTS_N_INSNS (13), /* sdiv */
1176 COSTS_N_INSNS (16), /* ddiv */
1177 128, /* cache line size */
1178 64, /* l1 cache */
1179 2048, /* l2 cache */
1180 16, /* prefetch streams */
1181 0, /* SF->DF convert */
1184 /* Instruction costs on POWER7 processors. */
1185 static const
1186 struct processor_costs power7_cost = {
1187 COSTS_N_INSNS (2), /* mulsi */
1188 COSTS_N_INSNS (2), /* mulsi_const */
1189 COSTS_N_INSNS (2), /* mulsi_const9 */
1190 COSTS_N_INSNS (2), /* muldi */
1191 COSTS_N_INSNS (18), /* divsi */
1192 COSTS_N_INSNS (34), /* divdi */
1193 COSTS_N_INSNS (3), /* fp */
1194 COSTS_N_INSNS (3), /* dmul */
1195 COSTS_N_INSNS (13), /* sdiv */
1196 COSTS_N_INSNS (16), /* ddiv */
1197 128, /* cache line size */
1198 32, /* l1 cache */
1199 256, /* l2 cache */
1200 12, /* prefetch streams */
1201 COSTS_N_INSNS (3), /* SF->DF convert */
1204 /* Instruction costs on POWER8 processors. */
1205 static const
1206 struct processor_costs power8_cost = {
1207 COSTS_N_INSNS (3), /* mulsi */
1208 COSTS_N_INSNS (3), /* mulsi_const */
1209 COSTS_N_INSNS (3), /* mulsi_const9 */
1210 COSTS_N_INSNS (3), /* muldi */
1211 COSTS_N_INSNS (19), /* divsi */
1212 COSTS_N_INSNS (35), /* divdi */
1213 COSTS_N_INSNS (3), /* fp */
1214 COSTS_N_INSNS (3), /* dmul */
1215 COSTS_N_INSNS (14), /* sdiv */
1216 COSTS_N_INSNS (17), /* ddiv */
1217 128, /* cache line size */
1218 32, /* l1 cache */
1219 256, /* l2 cache */
1220 12, /* prefetch streams */
1221 COSTS_N_INSNS (3), /* SF->DF convert */
1224 /* Instruction costs on POWER9 processors. */
1225 static const
1226 struct processor_costs power9_cost = {
1227 COSTS_N_INSNS (3), /* mulsi */
1228 COSTS_N_INSNS (3), /* mulsi_const */
1229 COSTS_N_INSNS (3), /* mulsi_const9 */
1230 COSTS_N_INSNS (3), /* muldi */
1231 COSTS_N_INSNS (8), /* divsi */
1232 COSTS_N_INSNS (12), /* divdi */
1233 COSTS_N_INSNS (3), /* fp */
1234 COSTS_N_INSNS (3), /* dmul */
1235 COSTS_N_INSNS (13), /* sdiv */
1236 COSTS_N_INSNS (18), /* ddiv */
1237 128, /* cache line size */
1238 32, /* l1 cache */
1239 512, /* l2 cache */
1240 8, /* prefetch streams */
1241 COSTS_N_INSNS (3), /* SF->DF convert */
1244 /* Instruction costs on POWER A2 processors. */
1245 static const
1246 struct processor_costs ppca2_cost = {
1247 COSTS_N_INSNS (16), /* mulsi */
1248 COSTS_N_INSNS (16), /* mulsi_const */
1249 COSTS_N_INSNS (16), /* mulsi_const9 */
1250 COSTS_N_INSNS (16), /* muldi */
1251 COSTS_N_INSNS (22), /* divsi */
1252 COSTS_N_INSNS (28), /* divdi */
1253 COSTS_N_INSNS (3), /* fp */
1254 COSTS_N_INSNS (3), /* dmul */
1255 COSTS_N_INSNS (59), /* sdiv */
1256 COSTS_N_INSNS (72), /* ddiv */
1258 16, /* l1 cache */
1259 2048, /* l2 cache */
1260 16, /* prefetch streams */
1261 0, /* SF->DF convert */
1265 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1266 #undef RS6000_BUILTIN_0
1267 #undef RS6000_BUILTIN_1
1268 #undef RS6000_BUILTIN_2
1269 #undef RS6000_BUILTIN_3
1270 #undef RS6000_BUILTIN_A
1271 #undef RS6000_BUILTIN_D
1272 #undef RS6000_BUILTIN_H
1273 #undef RS6000_BUILTIN_P
1274 #undef RS6000_BUILTIN_Q
1275 #undef RS6000_BUILTIN_X
1277 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
1278 { NAME, ICODE, MASK, ATTR },
1280 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1281 { NAME, ICODE, MASK, ATTR },
1283 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1284 { NAME, ICODE, MASK, ATTR },
1286 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1287 { NAME, ICODE, MASK, ATTR },
1289 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1290 { NAME, ICODE, MASK, ATTR },
1292 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1293 { NAME, ICODE, MASK, ATTR },
1295 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1296 { NAME, ICODE, MASK, ATTR },
1298 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1299 { NAME, ICODE, MASK, ATTR },
1301 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
1302 { NAME, ICODE, MASK, ATTR },
1304 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1305 { NAME, ICODE, MASK, ATTR },
1307 struct rs6000_builtin_info_type {
1308 const char *name;
1309 const enum insn_code icode;
1310 const HOST_WIDE_INT mask;
1311 const unsigned attr;
1314 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1316 #include "rs6000-builtin.def"
1319 #undef RS6000_BUILTIN_0
1320 #undef RS6000_BUILTIN_1
1321 #undef RS6000_BUILTIN_2
1322 #undef RS6000_BUILTIN_3
1323 #undef RS6000_BUILTIN_A
1324 #undef RS6000_BUILTIN_D
1325 #undef RS6000_BUILTIN_H
1326 #undef RS6000_BUILTIN_P
1327 #undef RS6000_BUILTIN_Q
1328 #undef RS6000_BUILTIN_X
1330 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1331 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1334 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1335 static struct machine_function * rs6000_init_machine_status (void);
1336 static int rs6000_ra_ever_killed (void);
1337 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1338 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1339 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1340 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1341 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1342 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1343 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1344 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1345 bool);
1346 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1347 unsigned int);
1348 static bool is_microcoded_insn (rtx_insn *);
1349 static bool is_nonpipeline_insn (rtx_insn *);
1350 static bool is_cracked_insn (rtx_insn *);
1351 static bool is_load_insn (rtx, rtx *);
1352 static bool is_store_insn (rtx, rtx *);
1353 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1354 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1355 static bool insn_must_be_first_in_group (rtx_insn *);
1356 static bool insn_must_be_last_in_group (rtx_insn *);
1357 static void altivec_init_builtins (void);
1358 static tree builtin_function_type (machine_mode, machine_mode,
1359 machine_mode, machine_mode,
1360 enum rs6000_builtins, const char *name);
1361 static void rs6000_common_init_builtins (void);
1362 static void paired_init_builtins (void);
1363 static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx);
1364 static void htm_init_builtins (void);
1365 static rs6000_stack_t *rs6000_stack_info (void);
1366 static void is_altivec_return_reg (rtx, void *);
1367 int easy_vector_constant (rtx, machine_mode);
1368 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1369 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1370 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1371 bool, bool);
1372 #if TARGET_MACHO
1373 static void macho_branch_islands (void);
1374 #endif
1375 static rtx rs6000_legitimize_reload_address (rtx, machine_mode, int, int,
1376 int, int *);
1377 static rtx rs6000_debug_legitimize_reload_address (rtx, machine_mode, int,
1378 int, int, int *);
1379 static bool rs6000_mode_dependent_address (const_rtx);
1380 static bool rs6000_debug_mode_dependent_address (const_rtx);
1381 static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool);
1382 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1383 machine_mode, rtx);
1384 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1385 machine_mode,
1386 rtx);
1387 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1388 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1389 enum reg_class);
1390 static bool rs6000_debug_secondary_memory_needed (machine_mode,
1391 reg_class_t,
1392 reg_class_t);
1393 static bool rs6000_debug_can_change_mode_class (machine_mode,
1394 machine_mode,
1395 reg_class_t);
1396 static bool rs6000_save_toc_in_prologue_p (void);
1397 static rtx rs6000_internal_arg_pointer (void);
1399 rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int,
1400 int, int *)
1401 = rs6000_legitimize_reload_address;
1403 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1404 = rs6000_mode_dependent_address;
1406 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1407 machine_mode, rtx)
1408 = rs6000_secondary_reload_class;
1410 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1411 = rs6000_preferred_reload_class;
1413 const int INSN_NOT_AVAILABLE = -1;
1415 static void rs6000_print_isa_options (FILE *, int, const char *,
1416 HOST_WIDE_INT);
1417 static void rs6000_print_builtin_options (FILE *, int, const char *,
1418 HOST_WIDE_INT);
1419 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1421 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1422 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1423 enum rs6000_reg_type,
1424 machine_mode,
1425 secondary_reload_info *,
1426 bool);
1427 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1428 static bool rs6000_keep_leaf_when_profiled () __attribute__ ((unused));
1429 static tree rs6000_fold_builtin (tree, int, tree *, bool);
1431 /* Hash table stuff for keeping track of TOC entries. */
1433 struct GTY((for_user)) toc_hash_struct
1435 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1436 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1437 rtx key;
1438 machine_mode key_mode;
1439 int labelno;
1442 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1444 static hashval_t hash (toc_hash_struct *);
1445 static bool equal (toc_hash_struct *, toc_hash_struct *);
1448 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1450 /* Hash table to keep track of the argument types for builtin functions. */
1452 struct GTY((for_user)) builtin_hash_struct
1454 tree type;
1455 machine_mode mode[4]; /* return value + 3 arguments. */
1456 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1459 struct builtin_hasher : ggc_ptr_hash<builtin_hash_struct>
1461 static hashval_t hash (builtin_hash_struct *);
1462 static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1465 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1468 /* Default register names. */
1469 char rs6000_reg_names[][8] =
1471 "0", "1", "2", "3", "4", "5", "6", "7",
1472 "8", "9", "10", "11", "12", "13", "14", "15",
1473 "16", "17", "18", "19", "20", "21", "22", "23",
1474 "24", "25", "26", "27", "28", "29", "30", "31",
1475 "0", "1", "2", "3", "4", "5", "6", "7",
1476 "8", "9", "10", "11", "12", "13", "14", "15",
1477 "16", "17", "18", "19", "20", "21", "22", "23",
1478 "24", "25", "26", "27", "28", "29", "30", "31",
1479 "mq", "lr", "ctr","ap",
1480 "0", "1", "2", "3", "4", "5", "6", "7",
1481 "ca",
1482 /* AltiVec registers. */
1483 "0", "1", "2", "3", "4", "5", "6", "7",
1484 "8", "9", "10", "11", "12", "13", "14", "15",
1485 "16", "17", "18", "19", "20", "21", "22", "23",
1486 "24", "25", "26", "27", "28", "29", "30", "31",
1487 "vrsave", "vscr",
1488 /* Soft frame pointer. */
1489 "sfp",
1490 /* HTM SPR registers. */
1491 "tfhar", "tfiar", "texasr"
1494 #ifdef TARGET_REGNAMES
1495 static const char alt_reg_names[][8] =
1497 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1498 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1499 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1500 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1501 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1502 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1503 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1504 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1505 "mq", "lr", "ctr", "ap",
1506 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1507 "ca",
1508 /* AltiVec registers. */
1509 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1510 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1511 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1512 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1513 "vrsave", "vscr",
1514 /* Soft frame pointer. */
1515 "sfp",
1516 /* HTM SPR registers. */
1517 "tfhar", "tfiar", "texasr"
1519 #endif
1521 /* Table of valid machine attributes. */
1523 static const struct attribute_spec rs6000_attribute_table[] =
1525 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1526 affects_type_identity, handler, exclude } */
1527 { "altivec", 1, 1, false, true, false, false,
1528 rs6000_handle_altivec_attribute, NULL },
1529 { "longcall", 0, 0, false, true, true, false,
1530 rs6000_handle_longcall_attribute, NULL },
1531 { "shortcall", 0, 0, false, true, true, false,
1532 rs6000_handle_longcall_attribute, NULL },
1533 { "ms_struct", 0, 0, false, false, false, false,
1534 rs6000_handle_struct_attribute, NULL },
1535 { "gcc_struct", 0, 0, false, false, false, false,
1536 rs6000_handle_struct_attribute, NULL },
1537 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1538 SUBTARGET_ATTRIBUTE_TABLE,
1539 #endif
1540 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1543 #ifndef TARGET_PROFILE_KERNEL
1544 #define TARGET_PROFILE_KERNEL 0
1545 #endif
1547 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1548 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1550 /* Initialize the GCC target structure. */
1551 #undef TARGET_ATTRIBUTE_TABLE
1552 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1553 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1554 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1555 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1556 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1558 #undef TARGET_ASM_ALIGNED_DI_OP
1559 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1561 /* Default unaligned ops are only provided for ELF. Find the ops needed
1562 for non-ELF systems. */
1563 #ifndef OBJECT_FORMAT_ELF
1564 #if TARGET_XCOFF
1565 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1566 64-bit targets. */
1567 #undef TARGET_ASM_UNALIGNED_HI_OP
1568 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1569 #undef TARGET_ASM_UNALIGNED_SI_OP
1570 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1571 #undef TARGET_ASM_UNALIGNED_DI_OP
1572 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1573 #else
1574 /* For Darwin. */
1575 #undef TARGET_ASM_UNALIGNED_HI_OP
1576 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1577 #undef TARGET_ASM_UNALIGNED_SI_OP
1578 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1579 #undef TARGET_ASM_UNALIGNED_DI_OP
1580 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1581 #undef TARGET_ASM_ALIGNED_DI_OP
1582 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1583 #endif
1584 #endif
1586 /* This hook deals with fixups for relocatable code and DI-mode objects
1587 in 64-bit code. */
1588 #undef TARGET_ASM_INTEGER
1589 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1591 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1592 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1593 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1594 #endif
1596 #undef TARGET_SET_UP_BY_PROLOGUE
1597 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1599 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1600 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1601 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1602 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1603 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1604 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1605 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1606 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1607 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1608 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1609 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1610 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1612 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1613 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1615 #undef TARGET_INTERNAL_ARG_POINTER
1616 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1618 #undef TARGET_HAVE_TLS
1619 #define TARGET_HAVE_TLS HAVE_AS_TLS
1621 #undef TARGET_CANNOT_FORCE_CONST_MEM
1622 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1624 #undef TARGET_DELEGITIMIZE_ADDRESS
1625 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1627 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1628 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1630 #undef TARGET_LEGITIMATE_COMBINED_INSN
1631 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1633 #undef TARGET_ASM_FUNCTION_PROLOGUE
1634 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1635 #undef TARGET_ASM_FUNCTION_EPILOGUE
1636 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1638 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1639 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1641 #undef TARGET_LEGITIMIZE_ADDRESS
1642 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1644 #undef TARGET_SCHED_VARIABLE_ISSUE
1645 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1647 #undef TARGET_SCHED_ISSUE_RATE
1648 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1649 #undef TARGET_SCHED_ADJUST_COST
1650 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1651 #undef TARGET_SCHED_ADJUST_PRIORITY
1652 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1653 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1654 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1655 #undef TARGET_SCHED_INIT
1656 #define TARGET_SCHED_INIT rs6000_sched_init
1657 #undef TARGET_SCHED_FINISH
1658 #define TARGET_SCHED_FINISH rs6000_sched_finish
1659 #undef TARGET_SCHED_REORDER
1660 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1661 #undef TARGET_SCHED_REORDER2
1662 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1664 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1665 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1667 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1668 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1670 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1671 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1672 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1673 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1674 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1675 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1676 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1677 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1679 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1680 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1682 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1683 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1684 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1685 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1686 rs6000_builtin_support_vector_misalignment
1687 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1688 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1689 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1690 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1691 rs6000_builtin_vectorization_cost
1692 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1693 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1694 rs6000_preferred_simd_mode
1695 #undef TARGET_VECTORIZE_INIT_COST
1696 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1697 #undef TARGET_VECTORIZE_ADD_STMT_COST
1698 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1699 #undef TARGET_VECTORIZE_FINISH_COST
1700 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1701 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1702 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1704 #undef TARGET_INIT_BUILTINS
1705 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1706 #undef TARGET_BUILTIN_DECL
1707 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1709 #undef TARGET_FOLD_BUILTIN
1710 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1711 #undef TARGET_GIMPLE_FOLD_BUILTIN
1712 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1714 #undef TARGET_EXPAND_BUILTIN
1715 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1717 #undef TARGET_MANGLE_TYPE
1718 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1720 #undef TARGET_INIT_LIBFUNCS
1721 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1723 #if TARGET_MACHO
1724 #undef TARGET_BINDS_LOCAL_P
1725 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1726 #endif
1728 #undef TARGET_MS_BITFIELD_LAYOUT_P
1729 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1731 #undef TARGET_ASM_OUTPUT_MI_THUNK
1732 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1734 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1735 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1737 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1738 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1740 #undef TARGET_REGISTER_MOVE_COST
1741 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1742 #undef TARGET_MEMORY_MOVE_COST
1743 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1744 #undef TARGET_CANNOT_COPY_INSN_P
1745 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1746 #undef TARGET_RTX_COSTS
1747 #define TARGET_RTX_COSTS rs6000_rtx_costs
1748 #undef TARGET_ADDRESS_COST
1749 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1750 #undef TARGET_INSN_COST
1751 #define TARGET_INSN_COST rs6000_insn_cost
1753 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1754 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1756 #undef TARGET_PROMOTE_FUNCTION_MODE
1757 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1759 #undef TARGET_RETURN_IN_MEMORY
1760 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1762 #undef TARGET_RETURN_IN_MSB
1763 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1765 #undef TARGET_SETUP_INCOMING_VARARGS
1766 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1768 /* Always strict argument naming on rs6000. */
1769 #undef TARGET_STRICT_ARGUMENT_NAMING
1770 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1771 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1772 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1773 #undef TARGET_SPLIT_COMPLEX_ARG
1774 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1775 #undef TARGET_MUST_PASS_IN_STACK
1776 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1777 #undef TARGET_PASS_BY_REFERENCE
1778 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1779 #undef TARGET_ARG_PARTIAL_BYTES
1780 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1781 #undef TARGET_FUNCTION_ARG_ADVANCE
1782 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1783 #undef TARGET_FUNCTION_ARG
1784 #define TARGET_FUNCTION_ARG rs6000_function_arg
1785 #undef TARGET_FUNCTION_ARG_PADDING
1786 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1787 #undef TARGET_FUNCTION_ARG_BOUNDARY
1788 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1790 #undef TARGET_BUILD_BUILTIN_VA_LIST
1791 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1793 #undef TARGET_EXPAND_BUILTIN_VA_START
1794 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1796 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1797 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1799 #undef TARGET_EH_RETURN_FILTER_MODE
1800 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1802 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1803 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1805 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1806 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1808 #undef TARGET_FLOATN_MODE
1809 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1811 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1812 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1814 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
1815 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
1817 #undef TARGET_MD_ASM_ADJUST
1818 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1820 #undef TARGET_OPTION_OVERRIDE
1821 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1823 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1824 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1825 rs6000_builtin_vectorized_function
1827 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1828 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1829 rs6000_builtin_md_vectorized_function
1831 #undef TARGET_STACK_PROTECT_GUARD
1832 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1834 #if !TARGET_MACHO
1835 #undef TARGET_STACK_PROTECT_FAIL
1836 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1837 #endif
1839 #ifdef HAVE_AS_TLS
1840 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1841 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1842 #endif
1844 /* Use a 32-bit anchor range. This leads to sequences like:
1846 addis tmp,anchor,high
1847 add dest,tmp,low
1849 where tmp itself acts as an anchor, and can be shared between
1850 accesses to the same 64k page. */
1851 #undef TARGET_MIN_ANCHOR_OFFSET
1852 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1853 #undef TARGET_MAX_ANCHOR_OFFSET
1854 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1855 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1856 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1857 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1858 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1860 #undef TARGET_BUILTIN_RECIPROCAL
1861 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1863 #undef TARGET_SECONDARY_RELOAD
1864 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1865 #undef TARGET_SECONDARY_MEMORY_NEEDED
1866 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1867 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1868 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1870 #undef TARGET_LEGITIMATE_ADDRESS_P
1871 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1873 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1874 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1876 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1877 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1879 #undef TARGET_CAN_ELIMINATE
1880 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1882 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1883 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1885 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1886 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1888 #undef TARGET_TRAMPOLINE_INIT
1889 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1891 #undef TARGET_FUNCTION_VALUE
1892 #define TARGET_FUNCTION_VALUE rs6000_function_value
1894 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1895 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1897 #undef TARGET_OPTION_SAVE
1898 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1900 #undef TARGET_OPTION_RESTORE
1901 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1903 #undef TARGET_OPTION_PRINT
1904 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1906 #undef TARGET_CAN_INLINE_P
1907 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1909 #undef TARGET_SET_CURRENT_FUNCTION
1910 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1912 #undef TARGET_LEGITIMATE_CONSTANT_P
1913 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1915 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1916 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1918 #undef TARGET_CAN_USE_DOLOOP_P
1919 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1921 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1922 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1924 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1925 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1926 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1927 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1928 #undef TARGET_UNWIND_WORD_MODE
1929 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1931 #undef TARGET_OFFLOAD_OPTIONS
1932 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1934 #undef TARGET_C_MODE_FOR_SUFFIX
1935 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1937 #undef TARGET_INVALID_BINARY_OP
1938 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1940 #undef TARGET_OPTAB_SUPPORTED_P
1941 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1943 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1944 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1946 #undef TARGET_COMPARE_VERSION_PRIORITY
1947 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1949 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1950 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1951 rs6000_generate_version_dispatcher_body
1953 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1954 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1955 rs6000_get_function_versions_dispatcher
1957 #undef TARGET_OPTION_FUNCTION_VERSIONS
1958 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1960 #undef TARGET_HARD_REGNO_NREGS
1961 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1962 #undef TARGET_HARD_REGNO_MODE_OK
1963 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1965 #undef TARGET_MODES_TIEABLE_P
1966 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1968 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1969 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1970 rs6000_hard_regno_call_part_clobbered
1972 #undef TARGET_SLOW_UNALIGNED_ACCESS
1973 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1975 #undef TARGET_CAN_CHANGE_MODE_CLASS
1976 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1978 #undef TARGET_CONSTANT_ALIGNMENT
1979 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1981 #undef TARGET_STARTING_FRAME_OFFSET
1982 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1985 /* Processor table. */
1986 struct rs6000_ptt
1988 const char *const name; /* Canonical processor name. */
1989 const enum processor_type processor; /* Processor type enum value. */
1990 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1993 static struct rs6000_ptt const processor_target_table[] =
1995 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1996 #include "rs6000-cpus.def"
1997 #undef RS6000_CPU
2000 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
2001 name is invalid. */
2003 static int
2004 rs6000_cpu_name_lookup (const char *name)
2006 size_t i;
2008 if (name != NULL)
2010 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
2011 if (! strcmp (name, processor_target_table[i].name))
2012 return (int)i;
2015 return -1;
2019 /* Return number of consecutive hard regs needed starting at reg REGNO
2020 to hold something of mode MODE.
2021 This is ordinarily the length in words of a value of mode MODE
2022 but can be less for certain modes in special long registers.
2024 POWER and PowerPC GPRs hold 32 bits worth;
2025 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
2027 static int
2028 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
2030 unsigned HOST_WIDE_INT reg_size;
2032 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
2033 128-bit floating point that can go in vector registers, which has VSX
2034 memory addressing. */
2035 if (FP_REGNO_P (regno))
2036 reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
2037 ? UNITS_PER_VSX_WORD
2038 : UNITS_PER_FP_WORD);
2040 else if (ALTIVEC_REGNO_P (regno))
2041 reg_size = UNITS_PER_ALTIVEC_WORD;
2043 else
2044 reg_size = UNITS_PER_WORD;
2046 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
2049 /* Value is 1 if hard register REGNO can hold a value of machine-mode
2050 MODE. */
2051 static int
2052 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
2054 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
2056 if (COMPLEX_MODE_P (mode))
2057 mode = GET_MODE_INNER (mode);
2059 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
2060 register combinations, and use PTImode where we need to deal with quad
2061 word memory operations. Don't allow quad words in the argument or frame
2062 pointer registers, just registers 0..31. */
2063 if (mode == PTImode)
2064 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
2065 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
2066 && ((regno & 1) == 0));
2068 /* VSX registers that overlap the FPR registers are larger than for non-VSX
2069 implementations. Don't allow an item to be split between a FP register
2070 and an Altivec register. Allow TImode in all VSX registers if the user
2071 asked for it. */
2072 if (TARGET_VSX && VSX_REGNO_P (regno)
2073 && (VECTOR_MEM_VSX_P (mode)
2074 || FLOAT128_VECTOR_P (mode)
2075 || reg_addr[mode].scalar_in_vmx_p
2076 || mode == TImode
2077 || (TARGET_VADDUQM && mode == V1TImode)))
2079 if (FP_REGNO_P (regno))
2080 return FP_REGNO_P (last_regno);
2082 if (ALTIVEC_REGNO_P (regno))
2084 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
2085 return 0;
2087 return ALTIVEC_REGNO_P (last_regno);
2091 /* The GPRs can hold any mode, but values bigger than one register
2092 cannot go past R31. */
2093 if (INT_REGNO_P (regno))
2094 return INT_REGNO_P (last_regno);
2096 /* The float registers (except for VSX vector modes) can only hold floating
2097 modes and DImode. */
2098 if (FP_REGNO_P (regno))
2100 if (FLOAT128_VECTOR_P (mode))
2101 return false;
2103 if (SCALAR_FLOAT_MODE_P (mode)
2104 && (mode != TDmode || (regno % 2) == 0)
2105 && FP_REGNO_P (last_regno))
2106 return 1;
2108 if (GET_MODE_CLASS (mode) == MODE_INT)
2110 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
2111 return 1;
2113 if (TARGET_P8_VECTOR && (mode == SImode))
2114 return 1;
2116 if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
2117 return 1;
2120 if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT
2121 && PAIRED_VECTOR_MODE (mode))
2122 return 1;
2124 return 0;
2127 /* The CR register can only hold CC modes. */
2128 if (CR_REGNO_P (regno))
2129 return GET_MODE_CLASS (mode) == MODE_CC;
2131 if (CA_REGNO_P (regno))
2132 return mode == Pmode || mode == SImode;
2134 /* AltiVec only in AldyVec registers. */
2135 if (ALTIVEC_REGNO_P (regno))
2136 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
2137 || mode == V1TImode);
2139 /* We cannot put non-VSX TImode or PTImode anywhere except general register
2140 and it must be able to fit within the register set. */
2142 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
2145 /* Implement TARGET_HARD_REGNO_NREGS. */
2147 static unsigned int
2148 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
2150 return rs6000_hard_regno_nregs[mode][regno];
2153 /* Implement TARGET_HARD_REGNO_MODE_OK. */
2155 static bool
2156 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
2158 return rs6000_hard_regno_mode_ok_p[mode][regno];
2161 /* Implement TARGET_MODES_TIEABLE_P.
2163 PTImode cannot tie with other modes because PTImode is restricted to even
2164 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
2165 57744).
2167 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
2168 128-bit floating point on VSX systems ties with other vectors. */
2170 static bool
2171 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
2173 if (mode1 == PTImode)
2174 return mode2 == PTImode;
2175 if (mode2 == PTImode)
2176 return false;
2178 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
2179 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
2180 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
2181 return false;
2183 if (SCALAR_FLOAT_MODE_P (mode1))
2184 return SCALAR_FLOAT_MODE_P (mode2);
2185 if (SCALAR_FLOAT_MODE_P (mode2))
2186 return false;
2188 if (GET_MODE_CLASS (mode1) == MODE_CC)
2189 return GET_MODE_CLASS (mode2) == MODE_CC;
2190 if (GET_MODE_CLASS (mode2) == MODE_CC)
2191 return false;
2193 if (PAIRED_VECTOR_MODE (mode1))
2194 return PAIRED_VECTOR_MODE (mode2);
2195 if (PAIRED_VECTOR_MODE (mode2))
2196 return false;
2198 return true;
2201 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
2203 static bool
2204 rs6000_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode)
2206 if (TARGET_32BIT
2207 && TARGET_POWERPC64
2208 && GET_MODE_SIZE (mode) > 4
2209 && INT_REGNO_P (regno))
2210 return true;
2212 if (TARGET_VSX
2213 && FP_REGNO_P (regno)
2214 && GET_MODE_SIZE (mode) > 8
2215 && !FLOAT128_2REG_P (mode))
2216 return true;
2218 return false;
2221 /* Print interesting facts about registers. */
2222 static void
2223 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2225 int r, m;
2227 for (r = first_regno; r <= last_regno; ++r)
2229 const char *comma = "";
2230 int len;
2232 if (first_regno == last_regno)
2233 fprintf (stderr, "%s:\t", reg_name);
2234 else
2235 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2237 len = 8;
2238 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2239 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2241 if (len > 70)
2243 fprintf (stderr, ",\n\t");
2244 len = 8;
2245 comma = "";
2248 if (rs6000_hard_regno_nregs[m][r] > 1)
2249 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2250 rs6000_hard_regno_nregs[m][r]);
2251 else
2252 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2254 comma = ", ";
2257 if (call_used_regs[r])
2259 if (len > 70)
2261 fprintf (stderr, ",\n\t");
2262 len = 8;
2263 comma = "";
2266 len += fprintf (stderr, "%s%s", comma, "call-used");
2267 comma = ", ";
2270 if (fixed_regs[r])
2272 if (len > 70)
2274 fprintf (stderr, ",\n\t");
2275 len = 8;
2276 comma = "";
2279 len += fprintf (stderr, "%s%s", comma, "fixed");
2280 comma = ", ";
2283 if (len > 70)
2285 fprintf (stderr, ",\n\t");
2286 comma = "";
2289 len += fprintf (stderr, "%sreg-class = %s", comma,
2290 reg_class_names[(int)rs6000_regno_regclass[r]]);
2291 comma = ", ";
2293 if (len > 70)
2295 fprintf (stderr, ",\n\t");
2296 comma = "";
2299 fprintf (stderr, "%sregno = %d\n", comma, r);
2303 static const char *
2304 rs6000_debug_vector_unit (enum rs6000_vector v)
2306 const char *ret;
2308 switch (v)
2310 case VECTOR_NONE: ret = "none"; break;
2311 case VECTOR_ALTIVEC: ret = "altivec"; break;
2312 case VECTOR_VSX: ret = "vsx"; break;
2313 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2314 case VECTOR_PAIRED: ret = "paired"; break;
2315 case VECTOR_OTHER: ret = "other"; break;
2316 default: ret = "unknown"; break;
2319 return ret;
2322 /* Inner function printing just the address mask for a particular reload
2323 register class. */
2324 DEBUG_FUNCTION char *
2325 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2327 static char ret[8];
2328 char *p = ret;
2330 if ((mask & RELOAD_REG_VALID) != 0)
2331 *p++ = 'v';
2332 else if (keep_spaces)
2333 *p++ = ' ';
2335 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2336 *p++ = 'm';
2337 else if (keep_spaces)
2338 *p++ = ' ';
2340 if ((mask & RELOAD_REG_INDEXED) != 0)
2341 *p++ = 'i';
2342 else if (keep_spaces)
2343 *p++ = ' ';
2345 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2346 *p++ = 'O';
2347 else if ((mask & RELOAD_REG_OFFSET) != 0)
2348 *p++ = 'o';
2349 else if (keep_spaces)
2350 *p++ = ' ';
2352 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2353 *p++ = '+';
2354 else if (keep_spaces)
2355 *p++ = ' ';
2357 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2358 *p++ = '+';
2359 else if (keep_spaces)
2360 *p++ = ' ';
2362 if ((mask & RELOAD_REG_AND_M16) != 0)
2363 *p++ = '&';
2364 else if (keep_spaces)
2365 *p++ = ' ';
2367 *p = '\0';
2369 return ret;
2372 /* Print the address masks in a human readble fashion. */
2373 DEBUG_FUNCTION void
2374 rs6000_debug_print_mode (ssize_t m)
2376 ssize_t rc;
2377 int spaces = 0;
2378 bool fuse_extra_p;
2380 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2381 for (rc = 0; rc < N_RELOAD_REG; rc++)
2382 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2383 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2385 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2386 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2387 fprintf (stderr, " Reload=%c%c",
2388 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2389 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2390 else
2391 spaces += sizeof (" Reload=sl") - 1;
2393 if (reg_addr[m].scalar_in_vmx_p)
2395 fprintf (stderr, "%*s Upper=y", spaces, "");
2396 spaces = 0;
2398 else
2399 spaces += sizeof (" Upper=y") - 1;
2401 fuse_extra_p = ((reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2402 || reg_addr[m].fused_toc);
2403 if (!fuse_extra_p)
2405 for (rc = 0; rc < N_RELOAD_REG; rc++)
2407 if (rc != RELOAD_REG_ANY)
2409 if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2410 || reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2411 || reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing
2412 || reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing
2413 || reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2415 fuse_extra_p = true;
2416 break;
2422 if (fuse_extra_p)
2424 fprintf (stderr, "%*s Fuse:", spaces, "");
2425 spaces = 0;
2427 for (rc = 0; rc < N_RELOAD_REG; rc++)
2429 if (rc != RELOAD_REG_ANY)
2431 char load, store;
2433 if (reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing)
2434 load = 'l';
2435 else if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing)
2436 load = 'L';
2437 else
2438 load = '-';
2440 if (reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2441 store = 's';
2442 else if (reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing)
2443 store = 'S';
2444 else
2445 store = '-';
2447 if (load == '-' && store == '-')
2448 spaces += 5;
2449 else
2451 fprintf (stderr, "%*s%c=%c%c", (spaces + 1), "",
2452 reload_reg_map[rc].name[0], load, store);
2453 spaces = 0;
2458 if (reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2460 fprintf (stderr, "%*sP8gpr", (spaces + 1), "");
2461 spaces = 0;
2463 else
2464 spaces += sizeof (" P8gpr") - 1;
2466 if (reg_addr[m].fused_toc)
2468 fprintf (stderr, "%*sToc", (spaces + 1), "");
2469 spaces = 0;
2471 else
2472 spaces += sizeof (" Toc") - 1;
2474 else
2475 spaces += sizeof (" Fuse: G=ls F=ls v=ls P8gpr Toc") - 1;
2477 if (rs6000_vector_unit[m] != VECTOR_NONE
2478 || rs6000_vector_mem[m] != VECTOR_NONE)
2480 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2481 spaces, "",
2482 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2483 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2486 fputs ("\n", stderr);
2489 #define DEBUG_FMT_ID "%-32s= "
2490 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2491 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2492 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2494 /* Print various interesting information with -mdebug=reg. */
2495 static void
2496 rs6000_debug_reg_global (void)
2498 static const char *const tf[2] = { "false", "true" };
2499 const char *nl = (const char *)0;
2500 int m;
2501 size_t m1, m2, v;
2502 char costly_num[20];
2503 char nop_num[20];
2504 char flags_buffer[40];
2505 const char *costly_str;
2506 const char *nop_str;
2507 const char *trace_str;
2508 const char *abi_str;
2509 const char *cmodel_str;
2510 struct cl_target_option cl_opts;
2512 /* Modes we want tieable information on. */
2513 static const machine_mode print_tieable_modes[] = {
2514 QImode,
2515 HImode,
2516 SImode,
2517 DImode,
2518 TImode,
2519 PTImode,
2520 SFmode,
2521 DFmode,
2522 TFmode,
2523 IFmode,
2524 KFmode,
2525 SDmode,
2526 DDmode,
2527 TDmode,
2528 V2SImode,
2529 V16QImode,
2530 V8HImode,
2531 V4SImode,
2532 V2DImode,
2533 V1TImode,
2534 V32QImode,
2535 V16HImode,
2536 V8SImode,
2537 V4DImode,
2538 V2TImode,
2539 V2SFmode,
2540 V4SFmode,
2541 V2DFmode,
2542 V8SFmode,
2543 V4DFmode,
2544 CCmode,
2545 CCUNSmode,
2546 CCEQmode,
2549 /* Virtual regs we are interested in. */
2550 const static struct {
2551 int regno; /* register number. */
2552 const char *name; /* register name. */
2553 } virtual_regs[] = {
2554 { STACK_POINTER_REGNUM, "stack pointer:" },
2555 { TOC_REGNUM, "toc: " },
2556 { STATIC_CHAIN_REGNUM, "static chain: " },
2557 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2558 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2559 { ARG_POINTER_REGNUM, "arg pointer: " },
2560 { FRAME_POINTER_REGNUM, "frame pointer:" },
2561 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2562 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2563 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2564 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2565 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2566 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2567 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2568 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2569 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2572 fputs ("\nHard register information:\n", stderr);
2573 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2574 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2575 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2576 LAST_ALTIVEC_REGNO,
2577 "vs");
2578 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2579 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2580 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2581 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2582 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2583 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2585 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2586 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2587 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2589 fprintf (stderr,
2590 "\n"
2591 "d reg_class = %s\n"
2592 "f reg_class = %s\n"
2593 "v reg_class = %s\n"
2594 "wa reg_class = %s\n"
2595 "wb reg_class = %s\n"
2596 "wd reg_class = %s\n"
2597 "we reg_class = %s\n"
2598 "wf reg_class = %s\n"
2599 "wg reg_class = %s\n"
2600 "wh reg_class = %s\n"
2601 "wi reg_class = %s\n"
2602 "wj reg_class = %s\n"
2603 "wk reg_class = %s\n"
2604 "wl reg_class = %s\n"
2605 "wm reg_class = %s\n"
2606 "wo reg_class = %s\n"
2607 "wp reg_class = %s\n"
2608 "wq reg_class = %s\n"
2609 "wr reg_class = %s\n"
2610 "ws reg_class = %s\n"
2611 "wt reg_class = %s\n"
2612 "wu reg_class = %s\n"
2613 "wv reg_class = %s\n"
2614 "ww reg_class = %s\n"
2615 "wx reg_class = %s\n"
2616 "wy reg_class = %s\n"
2617 "wz reg_class = %s\n"
2618 "wA reg_class = %s\n"
2619 "wH reg_class = %s\n"
2620 "wI reg_class = %s\n"
2621 "wJ reg_class = %s\n"
2622 "wK reg_class = %s\n"
2623 "\n",
2624 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2625 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2626 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2627 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2628 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wb]],
2629 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
2630 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2631 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
2632 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
2633 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]],
2634 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]],
2635 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]],
2636 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]],
2637 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
2638 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
2639 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wo]],
2640 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wp]],
2641 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wq]],
2642 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2643 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
2644 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
2645 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
2646 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
2647 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
2648 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2649 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
2650 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]],
2651 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]],
2652 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wH]],
2653 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wI]],
2654 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wJ]],
2655 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wK]]);
2657 nl = "\n";
2658 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2659 rs6000_debug_print_mode (m);
2661 fputs ("\n", stderr);
2663 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2665 machine_mode mode1 = print_tieable_modes[m1];
2666 bool first_time = true;
2668 nl = (const char *)0;
2669 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2671 machine_mode mode2 = print_tieable_modes[m2];
2672 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2674 if (first_time)
2676 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2677 nl = "\n";
2678 first_time = false;
2681 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2685 if (!first_time)
2686 fputs ("\n", stderr);
2689 if (nl)
2690 fputs (nl, stderr);
2692 if (rs6000_recip_control)
2694 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2696 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2697 if (rs6000_recip_bits[m])
2699 fprintf (stderr,
2700 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2701 GET_MODE_NAME (m),
2702 (RS6000_RECIP_AUTO_RE_P (m)
2703 ? "auto"
2704 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2705 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2706 ? "auto"
2707 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2710 fputs ("\n", stderr);
2713 if (rs6000_cpu_index >= 0)
2715 const char *name = processor_target_table[rs6000_cpu_index].name;
2716 HOST_WIDE_INT flags
2717 = processor_target_table[rs6000_cpu_index].target_enable;
2719 sprintf (flags_buffer, "-mcpu=%s flags", name);
2720 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2722 else
2723 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2725 if (rs6000_tune_index >= 0)
2727 const char *name = processor_target_table[rs6000_tune_index].name;
2728 HOST_WIDE_INT flags
2729 = processor_target_table[rs6000_tune_index].target_enable;
2731 sprintf (flags_buffer, "-mtune=%s flags", name);
2732 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2734 else
2735 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2737 cl_target_option_save (&cl_opts, &global_options);
2738 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2739 rs6000_isa_flags);
2741 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2742 rs6000_isa_flags_explicit);
2744 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2745 rs6000_builtin_mask);
2747 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2749 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2750 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2752 switch (rs6000_sched_costly_dep)
2754 case max_dep_latency:
2755 costly_str = "max_dep_latency";
2756 break;
2758 case no_dep_costly:
2759 costly_str = "no_dep_costly";
2760 break;
2762 case all_deps_costly:
2763 costly_str = "all_deps_costly";
2764 break;
2766 case true_store_to_load_dep_costly:
2767 costly_str = "true_store_to_load_dep_costly";
2768 break;
2770 case store_to_load_dep_costly:
2771 costly_str = "store_to_load_dep_costly";
2772 break;
2774 default:
2775 costly_str = costly_num;
2776 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2777 break;
2780 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2782 switch (rs6000_sched_insert_nops)
2784 case sched_finish_regroup_exact:
2785 nop_str = "sched_finish_regroup_exact";
2786 break;
2788 case sched_finish_pad_groups:
2789 nop_str = "sched_finish_pad_groups";
2790 break;
2792 case sched_finish_none:
2793 nop_str = "sched_finish_none";
2794 break;
2796 default:
2797 nop_str = nop_num;
2798 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2799 break;
2802 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2804 switch (rs6000_sdata)
2806 default:
2807 case SDATA_NONE:
2808 break;
2810 case SDATA_DATA:
2811 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2812 break;
2814 case SDATA_SYSV:
2815 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2816 break;
2818 case SDATA_EABI:
2819 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2820 break;
2824 switch (rs6000_traceback)
2826 case traceback_default: trace_str = "default"; break;
2827 case traceback_none: trace_str = "none"; break;
2828 case traceback_part: trace_str = "part"; break;
2829 case traceback_full: trace_str = "full"; break;
2830 default: trace_str = "unknown"; break;
2833 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2835 switch (rs6000_current_cmodel)
2837 case CMODEL_SMALL: cmodel_str = "small"; break;
2838 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2839 case CMODEL_LARGE: cmodel_str = "large"; break;
2840 default: cmodel_str = "unknown"; break;
2843 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2845 switch (rs6000_current_abi)
2847 case ABI_NONE: abi_str = "none"; break;
2848 case ABI_AIX: abi_str = "aix"; break;
2849 case ABI_ELFv2: abi_str = "ELFv2"; break;
2850 case ABI_V4: abi_str = "V4"; break;
2851 case ABI_DARWIN: abi_str = "darwin"; break;
2852 default: abi_str = "unknown"; break;
2855 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2857 if (rs6000_altivec_abi)
2858 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2860 if (rs6000_darwin64_abi)
2861 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2863 fprintf (stderr, DEBUG_FMT_S, "single_float",
2864 (TARGET_SINGLE_FLOAT ? "true" : "false"));
2866 fprintf (stderr, DEBUG_FMT_S, "double_float",
2867 (TARGET_DOUBLE_FLOAT ? "true" : "false"));
2869 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2870 (TARGET_SOFT_FLOAT ? "true" : "false"));
2872 if (TARGET_LINK_STACK)
2873 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2875 if (TARGET_P8_FUSION)
2877 char options[80];
2879 strcpy (options, (TARGET_P9_FUSION) ? "power9" : "power8");
2880 if (TARGET_TOC_FUSION)
2881 strcat (options, ", toc");
2883 if (TARGET_P8_FUSION_SIGN)
2884 strcat (options, ", sign");
2886 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2889 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2890 TARGET_SECURE_PLT ? "secure" : "bss");
2891 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2892 aix_struct_return ? "aix" : "sysv");
2893 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2894 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2895 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2896 tf[!!rs6000_align_branch_targets]);
2897 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2898 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2899 rs6000_long_double_type_size);
2900 if (rs6000_long_double_type_size == 128)
2902 fprintf (stderr, DEBUG_FMT_S, "long double type",
2903 TARGET_IEEEQUAD ? "IEEE" : "IBM");
2904 fprintf (stderr, DEBUG_FMT_S, "default long double type",
2905 TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM");
2907 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2908 (int)rs6000_sched_restricted_insns_priority);
2909 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2910 (int)END_BUILTINS);
2911 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2912 (int)RS6000_BUILTIN_COUNT);
2914 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2915 (int)TARGET_FLOAT128_ENABLE_TYPE);
2917 if (TARGET_VSX)
2918 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2919 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2921 if (TARGET_DIRECT_MOVE_128)
2922 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2923 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2927 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2928 legitimate address support to figure out the appropriate addressing to
2929 use. */
2931 static void
2932 rs6000_setup_reg_addr_masks (void)
2934 ssize_t rc, reg, m, nregs;
2935 addr_mask_type any_addr_mask, addr_mask;
2937 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2939 machine_mode m2 = (machine_mode) m;
2940 bool complex_p = false;
2941 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2942 size_t msize;
2944 if (COMPLEX_MODE_P (m2))
2946 complex_p = true;
2947 m2 = GET_MODE_INNER (m2);
2950 msize = GET_MODE_SIZE (m2);
2952 /* SDmode is special in that we want to access it only via REG+REG
2953 addressing on power7 and above, since we want to use the LFIWZX and
2954 STFIWZX instructions to load it. */
2955 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2957 any_addr_mask = 0;
2958 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2960 addr_mask = 0;
2961 reg = reload_reg_map[rc].reg;
2963 /* Can mode values go in the GPR/FPR/Altivec registers? */
2964 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2966 bool small_int_vsx_p = (small_int_p
2967 && (rc == RELOAD_REG_FPR
2968 || rc == RELOAD_REG_VMX));
2970 nregs = rs6000_hard_regno_nregs[m][reg];
2971 addr_mask |= RELOAD_REG_VALID;
2973 /* Indicate if the mode takes more than 1 physical register. If
2974 it takes a single register, indicate it can do REG+REG
2975 addressing. Small integers in VSX registers can only do
2976 REG+REG addressing. */
2977 if (small_int_vsx_p)
2978 addr_mask |= RELOAD_REG_INDEXED;
2979 else if (nregs > 1 || m == BLKmode || complex_p)
2980 addr_mask |= RELOAD_REG_MULTIPLE;
2981 else
2982 addr_mask |= RELOAD_REG_INDEXED;
2984 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2985 addressing. If we allow scalars into Altivec registers,
2986 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2988 For VSX systems, we don't allow update addressing for
2989 DFmode/SFmode if those registers can go in both the
2990 traditional floating point registers and Altivec registers.
2991 The load/store instructions for the Altivec registers do not
2992 have update forms. If we allowed update addressing, it seems
2993 to break IV-OPT code using floating point if the index type is
2994 int instead of long (PR target/81550 and target/84042). */
2996 if (TARGET_UPDATE
2997 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2998 && msize <= 8
2999 && !VECTOR_MODE_P (m2)
3000 && !FLOAT128_VECTOR_P (m2)
3001 && !complex_p
3002 && (m != E_DFmode || !TARGET_VSX)
3003 && (m != E_SFmode || !TARGET_P8_VECTOR)
3004 && !small_int_vsx_p)
3006 addr_mask |= RELOAD_REG_PRE_INCDEC;
3008 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
3009 we don't allow PRE_MODIFY for some multi-register
3010 operations. */
3011 switch (m)
3013 default:
3014 addr_mask |= RELOAD_REG_PRE_MODIFY;
3015 break;
3017 case E_DImode:
3018 if (TARGET_POWERPC64)
3019 addr_mask |= RELOAD_REG_PRE_MODIFY;
3020 break;
3022 case E_DFmode:
3023 case E_DDmode:
3024 if (TARGET_DF_INSN)
3025 addr_mask |= RELOAD_REG_PRE_MODIFY;
3026 break;
3031 /* GPR and FPR registers can do REG+OFFSET addressing, except
3032 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
3033 for 64-bit scalars and 32-bit SFmode to altivec registers. */
3034 if ((addr_mask != 0) && !indexed_only_p
3035 && msize <= 8
3036 && (rc == RELOAD_REG_GPR
3037 || ((msize == 8 || m2 == SFmode)
3038 && (rc == RELOAD_REG_FPR
3039 || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)))))
3040 addr_mask |= RELOAD_REG_OFFSET;
3042 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
3043 instructions are enabled. The offset for 128-bit VSX registers is
3044 only 12-bits. While GPRs can handle the full offset range, VSX
3045 registers can only handle the restricted range. */
3046 else if ((addr_mask != 0) && !indexed_only_p
3047 && msize == 16 && TARGET_P9_VECTOR
3048 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
3049 || (m2 == TImode && TARGET_VSX)))
3051 addr_mask |= RELOAD_REG_OFFSET;
3052 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
3053 addr_mask |= RELOAD_REG_QUAD_OFFSET;
3056 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
3057 addressing on 128-bit types. */
3058 if (rc == RELOAD_REG_VMX && msize == 16
3059 && (addr_mask & RELOAD_REG_VALID) != 0)
3060 addr_mask |= RELOAD_REG_AND_M16;
3062 reg_addr[m].addr_mask[rc] = addr_mask;
3063 any_addr_mask |= addr_mask;
3066 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
3071 /* Initialize the various global tables that are based on register size. */
3072 static void
3073 rs6000_init_hard_regno_mode_ok (bool global_init_p)
3075 ssize_t r, m, c;
3076 int align64;
3077 int align32;
3079 /* Precalculate REGNO_REG_CLASS. */
3080 rs6000_regno_regclass[0] = GENERAL_REGS;
3081 for (r = 1; r < 32; ++r)
3082 rs6000_regno_regclass[r] = BASE_REGS;
3084 for (r = 32; r < 64; ++r)
3085 rs6000_regno_regclass[r] = FLOAT_REGS;
3087 for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r)
3088 rs6000_regno_regclass[r] = NO_REGS;
3090 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
3091 rs6000_regno_regclass[r] = ALTIVEC_REGS;
3093 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
3094 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
3095 rs6000_regno_regclass[r] = CR_REGS;
3097 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
3098 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
3099 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
3100 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
3101 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
3102 rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
3103 rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
3104 rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
3105 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
3106 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
3108 /* Precalculate register class to simpler reload register class. We don't
3109 need all of the register classes that are combinations of different
3110 classes, just the simple ones that have constraint letters. */
3111 for (c = 0; c < N_REG_CLASSES; c++)
3112 reg_class_to_reg_type[c] = NO_REG_TYPE;
3114 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
3115 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
3116 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
3117 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
3118 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
3119 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
3120 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
3121 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
3122 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
3123 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
3125 if (TARGET_VSX)
3127 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
3128 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
3130 else
3132 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
3133 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
3136 /* Precalculate the valid memory formats as well as the vector information,
3137 this must be set up before the rs6000_hard_regno_nregs_internal calls
3138 below. */
3139 gcc_assert ((int)VECTOR_NONE == 0);
3140 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
3141 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
3143 gcc_assert ((int)CODE_FOR_nothing == 0);
3144 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
3146 gcc_assert ((int)NO_REGS == 0);
3147 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
3149 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
3150 believes it can use native alignment or still uses 128-bit alignment. */
3151 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
3153 align64 = 64;
3154 align32 = 32;
3156 else
3158 align64 = 128;
3159 align32 = 128;
3162 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
3163 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
3164 if (TARGET_FLOAT128_TYPE)
3166 rs6000_vector_mem[KFmode] = VECTOR_VSX;
3167 rs6000_vector_align[KFmode] = 128;
3169 if (FLOAT128_IEEE_P (TFmode))
3171 rs6000_vector_mem[TFmode] = VECTOR_VSX;
3172 rs6000_vector_align[TFmode] = 128;
3176 /* V2DF mode, VSX only. */
3177 if (TARGET_VSX)
3179 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
3180 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
3181 rs6000_vector_align[V2DFmode] = align64;
3184 /* V4SF mode, either VSX or Altivec. */
3185 if (TARGET_VSX)
3187 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
3188 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
3189 rs6000_vector_align[V4SFmode] = align32;
3191 else if (TARGET_ALTIVEC)
3193 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
3194 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
3195 rs6000_vector_align[V4SFmode] = align32;
3198 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
3199 and stores. */
3200 if (TARGET_ALTIVEC)
3202 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
3203 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
3204 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
3205 rs6000_vector_align[V4SImode] = align32;
3206 rs6000_vector_align[V8HImode] = align32;
3207 rs6000_vector_align[V16QImode] = align32;
3209 if (TARGET_VSX)
3211 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
3212 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
3213 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
3215 else
3217 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
3218 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
3219 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
3223 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
3224 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
3225 if (TARGET_VSX)
3227 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
3228 rs6000_vector_unit[V2DImode]
3229 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3230 rs6000_vector_align[V2DImode] = align64;
3232 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
3233 rs6000_vector_unit[V1TImode]
3234 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3235 rs6000_vector_align[V1TImode] = 128;
3238 /* DFmode, see if we want to use the VSX unit. Memory is handled
3239 differently, so don't set rs6000_vector_mem. */
3240 if (TARGET_VSX)
3242 rs6000_vector_unit[DFmode] = VECTOR_VSX;
3243 rs6000_vector_align[DFmode] = 64;
3246 /* SFmode, see if we want to use the VSX unit. */
3247 if (TARGET_P8_VECTOR)
3249 rs6000_vector_unit[SFmode] = VECTOR_VSX;
3250 rs6000_vector_align[SFmode] = 32;
3253 /* Allow TImode in VSX register and set the VSX memory macros. */
3254 if (TARGET_VSX)
3256 rs6000_vector_mem[TImode] = VECTOR_VSX;
3257 rs6000_vector_align[TImode] = align64;
3260 /* TODO add paired floating point vector support. */
3262 /* Register class constraints for the constraints that depend on compile
3263 switches. When the VSX code was added, different constraints were added
3264 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
3265 of the VSX registers are used. The register classes for scalar floating
3266 point types is set, based on whether we allow that type into the upper
3267 (Altivec) registers. GCC has register classes to target the Altivec
3268 registers for load/store operations, to select using a VSX memory
3269 operation instead of the traditional floating point operation. The
3270 constraints are:
3272 d - Register class to use with traditional DFmode instructions.
3273 f - Register class to use with traditional SFmode instructions.
3274 v - Altivec register.
3275 wa - Any VSX register.
3276 wc - Reserved to represent individual CR bits (used in LLVM).
3277 wd - Preferred register class for V2DFmode.
3278 wf - Preferred register class for V4SFmode.
3279 wg - Float register for power6x move insns.
3280 wh - FP register for direct move instructions.
3281 wi - FP or VSX register to hold 64-bit integers for VSX insns.
3282 wj - FP or VSX register to hold 64-bit integers for direct moves.
3283 wk - FP or VSX register to hold 64-bit doubles for direct moves.
3284 wl - Float register if we can do 32-bit signed int loads.
3285 wm - VSX register for ISA 2.07 direct move operations.
3286 wn - always NO_REGS.
3287 wr - GPR if 64-bit mode is permitted.
3288 ws - Register class to do ISA 2.06 DF operations.
3289 wt - VSX register for TImode in VSX registers.
3290 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
3291 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
3292 ww - Register class to do SF conversions in with VSX operations.
3293 wx - Float register if we can do 32-bit int stores.
3294 wy - Register class to do ISA 2.07 SF operations.
3295 wz - Float register if we can do 32-bit unsigned int loads.
3296 wH - Altivec register if SImode is allowed in VSX registers.
3297 wI - VSX register if SImode is allowed in VSX registers.
3298 wJ - VSX register if QImode/HImode are allowed in VSX registers.
3299 wK - Altivec register if QImode/HImode are allowed in VSX registers. */
3301 if (TARGET_HARD_FLOAT)
3302 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
3304 if (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
3305 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
3307 if (TARGET_VSX)
3309 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
3310 rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */
3311 rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */
3312 rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS; /* DFmode */
3313 rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS; /* DFmode */
3314 rs6000_constraints[RS6000_CONSTRAINT_wi] = VSX_REGS; /* DImode */
3315 rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */
3318 /* Add conditional constraints based on various options, to allow us to
3319 collapse multiple insn patterns. */
3320 if (TARGET_ALTIVEC)
3321 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
3323 if (TARGET_MFPGPR) /* DFmode */
3324 rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
3326 if (TARGET_LFIWAX)
3327 rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */
3329 if (TARGET_DIRECT_MOVE)
3331 rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS;
3332 rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */
3333 = rs6000_constraints[RS6000_CONSTRAINT_wi];
3334 rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */
3335 = rs6000_constraints[RS6000_CONSTRAINT_ws];
3336 rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
3339 if (TARGET_POWERPC64)
3341 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
3342 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
3345 if (TARGET_P8_VECTOR) /* SFmode */
3347 rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
3348 rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
3349 rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
3351 else if (TARGET_VSX)
3352 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3354 if (TARGET_STFIWX)
3355 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
3357 if (TARGET_LFIWZX)
3358 rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */
3360 if (TARGET_FLOAT128_TYPE)
3362 rs6000_constraints[RS6000_CONSTRAINT_wq] = VSX_REGS; /* KFmode */
3363 if (FLOAT128_IEEE_P (TFmode))
3364 rs6000_constraints[RS6000_CONSTRAINT_wp] = VSX_REGS; /* TFmode */
3367 if (TARGET_P9_VECTOR)
3369 /* Support for new D-form instructions. */
3370 rs6000_constraints[RS6000_CONSTRAINT_wb] = ALTIVEC_REGS;
3372 /* Support for ISA 3.0 (power9) vectors. */
3373 rs6000_constraints[RS6000_CONSTRAINT_wo] = VSX_REGS;
3376 /* Support for new direct moves (ISA 3.0 + 64bit). */
3377 if (TARGET_DIRECT_MOVE_128)
3378 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
3380 /* Support small integers in VSX registers. */
3381 if (TARGET_P8_VECTOR)
3383 rs6000_constraints[RS6000_CONSTRAINT_wH] = ALTIVEC_REGS;
3384 rs6000_constraints[RS6000_CONSTRAINT_wI] = FLOAT_REGS;
3385 if (TARGET_P9_VECTOR)
3387 rs6000_constraints[RS6000_CONSTRAINT_wJ] = FLOAT_REGS;
3388 rs6000_constraints[RS6000_CONSTRAINT_wK] = ALTIVEC_REGS;
3392 /* Set up the reload helper and direct move functions. */
3393 if (TARGET_VSX || TARGET_ALTIVEC)
3395 if (TARGET_64BIT)
3397 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
3398 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
3399 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
3400 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
3401 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
3402 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
3403 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
3404 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
3405 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
3406 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
3407 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
3408 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
3409 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
3410 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
3411 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
3412 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
3413 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
3414 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3415 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3416 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3418 if (FLOAT128_VECTOR_P (KFmode))
3420 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3421 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3424 if (FLOAT128_VECTOR_P (TFmode))
3426 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3427 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3430 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3431 available. */
3432 if (TARGET_NO_SDMODE_STACK)
3434 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3435 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3438 if (TARGET_VSX)
3440 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3441 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3444 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3446 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3447 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3448 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3449 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3450 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3451 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3452 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3453 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3454 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3456 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3457 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3458 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3459 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3460 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3461 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3462 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3463 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3464 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3466 if (FLOAT128_VECTOR_P (KFmode))
3468 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3469 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3472 if (FLOAT128_VECTOR_P (TFmode))
3474 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3475 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3479 else
3481 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3482 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3483 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3484 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3485 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3486 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3487 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3488 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3489 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3490 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3491 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3492 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3493 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3494 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3495 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3496 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3497 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3498 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3499 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3500 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3502 if (FLOAT128_VECTOR_P (KFmode))
3504 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3505 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3508 if (FLOAT128_IEEE_P (TFmode))
3510 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3511 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3514 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3515 available. */
3516 if (TARGET_NO_SDMODE_STACK)
3518 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3519 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3522 if (TARGET_VSX)
3524 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3525 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3528 if (TARGET_DIRECT_MOVE)
3530 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3531 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3532 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3536 reg_addr[DFmode].scalar_in_vmx_p = true;
3537 reg_addr[DImode].scalar_in_vmx_p = true;
3539 if (TARGET_P8_VECTOR)
3541 reg_addr[SFmode].scalar_in_vmx_p = true;
3542 reg_addr[SImode].scalar_in_vmx_p = true;
3544 if (TARGET_P9_VECTOR)
3546 reg_addr[HImode].scalar_in_vmx_p = true;
3547 reg_addr[QImode].scalar_in_vmx_p = true;
3552 /* Setup the fusion operations. */
3553 if (TARGET_P8_FUSION)
3555 reg_addr[QImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_qi;
3556 reg_addr[HImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_hi;
3557 reg_addr[SImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_si;
3558 if (TARGET_64BIT)
3559 reg_addr[DImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_di;
3562 if (TARGET_P9_FUSION)
3564 struct fuse_insns {
3565 enum machine_mode mode; /* mode of the fused type. */
3566 enum machine_mode pmode; /* pointer mode. */
3567 enum rs6000_reload_reg_type rtype; /* register type. */
3568 enum insn_code load; /* load insn. */
3569 enum insn_code store; /* store insn. */
3572 static const struct fuse_insns addis_insns[] = {
3573 { E_SFmode, E_DImode, RELOAD_REG_FPR,
3574 CODE_FOR_fusion_vsx_di_sf_load,
3575 CODE_FOR_fusion_vsx_di_sf_store },
3577 { E_SFmode, E_SImode, RELOAD_REG_FPR,
3578 CODE_FOR_fusion_vsx_si_sf_load,
3579 CODE_FOR_fusion_vsx_si_sf_store },
3581 { E_DFmode, E_DImode, RELOAD_REG_FPR,
3582 CODE_FOR_fusion_vsx_di_df_load,
3583 CODE_FOR_fusion_vsx_di_df_store },
3585 { E_DFmode, E_SImode, RELOAD_REG_FPR,
3586 CODE_FOR_fusion_vsx_si_df_load,
3587 CODE_FOR_fusion_vsx_si_df_store },
3589 { E_DImode, E_DImode, RELOAD_REG_FPR,
3590 CODE_FOR_fusion_vsx_di_di_load,
3591 CODE_FOR_fusion_vsx_di_di_store },
3593 { E_DImode, E_SImode, RELOAD_REG_FPR,
3594 CODE_FOR_fusion_vsx_si_di_load,
3595 CODE_FOR_fusion_vsx_si_di_store },
3597 { E_QImode, E_DImode, RELOAD_REG_GPR,
3598 CODE_FOR_fusion_gpr_di_qi_load,
3599 CODE_FOR_fusion_gpr_di_qi_store },
3601 { E_QImode, E_SImode, RELOAD_REG_GPR,
3602 CODE_FOR_fusion_gpr_si_qi_load,
3603 CODE_FOR_fusion_gpr_si_qi_store },
3605 { E_HImode, E_DImode, RELOAD_REG_GPR,
3606 CODE_FOR_fusion_gpr_di_hi_load,
3607 CODE_FOR_fusion_gpr_di_hi_store },
3609 { E_HImode, E_SImode, RELOAD_REG_GPR,
3610 CODE_FOR_fusion_gpr_si_hi_load,
3611 CODE_FOR_fusion_gpr_si_hi_store },
3613 { E_SImode, E_DImode, RELOAD_REG_GPR,
3614 CODE_FOR_fusion_gpr_di_si_load,
3615 CODE_FOR_fusion_gpr_di_si_store },
3617 { E_SImode, E_SImode, RELOAD_REG_GPR,
3618 CODE_FOR_fusion_gpr_si_si_load,
3619 CODE_FOR_fusion_gpr_si_si_store },
3621 { E_SFmode, E_DImode, RELOAD_REG_GPR,
3622 CODE_FOR_fusion_gpr_di_sf_load,
3623 CODE_FOR_fusion_gpr_di_sf_store },
3625 { E_SFmode, E_SImode, RELOAD_REG_GPR,
3626 CODE_FOR_fusion_gpr_si_sf_load,
3627 CODE_FOR_fusion_gpr_si_sf_store },
3629 { E_DImode, E_DImode, RELOAD_REG_GPR,
3630 CODE_FOR_fusion_gpr_di_di_load,
3631 CODE_FOR_fusion_gpr_di_di_store },
3633 { E_DFmode, E_DImode, RELOAD_REG_GPR,
3634 CODE_FOR_fusion_gpr_di_df_load,
3635 CODE_FOR_fusion_gpr_di_df_store },
3638 machine_mode cur_pmode = Pmode;
3639 size_t i;
3641 for (i = 0; i < ARRAY_SIZE (addis_insns); i++)
3643 machine_mode xmode = addis_insns[i].mode;
3644 enum rs6000_reload_reg_type rtype = addis_insns[i].rtype;
3646 if (addis_insns[i].pmode != cur_pmode)
3647 continue;
3649 if (rtype == RELOAD_REG_FPR && !TARGET_HARD_FLOAT)
3650 continue;
3652 reg_addr[xmode].fusion_addis_ld[rtype] = addis_insns[i].load;
3653 reg_addr[xmode].fusion_addis_st[rtype] = addis_insns[i].store;
3655 if (rtype == RELOAD_REG_FPR && TARGET_P9_VECTOR)
3657 reg_addr[xmode].fusion_addis_ld[RELOAD_REG_VMX]
3658 = addis_insns[i].load;
3659 reg_addr[xmode].fusion_addis_st[RELOAD_REG_VMX]
3660 = addis_insns[i].store;
3665 /* Note which types we support fusing TOC setup plus memory insn. We only do
3666 fused TOCs for medium/large code models. */
3667 if (TARGET_P8_FUSION && TARGET_TOC_FUSION && TARGET_POWERPC64
3668 && (TARGET_CMODEL != CMODEL_SMALL))
3670 reg_addr[QImode].fused_toc = true;
3671 reg_addr[HImode].fused_toc = true;
3672 reg_addr[SImode].fused_toc = true;
3673 reg_addr[DImode].fused_toc = true;
3674 if (TARGET_HARD_FLOAT)
3676 if (TARGET_SINGLE_FLOAT)
3677 reg_addr[SFmode].fused_toc = true;
3678 if (TARGET_DOUBLE_FLOAT)
3679 reg_addr[DFmode].fused_toc = true;
3683 /* Precalculate HARD_REGNO_NREGS. */
3684 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3685 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3686 rs6000_hard_regno_nregs[m][r]
3687 = rs6000_hard_regno_nregs_internal (r, (machine_mode)m);
3689 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3690 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3691 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3692 if (rs6000_hard_regno_mode_ok_uncached (r, (machine_mode)m))
3693 rs6000_hard_regno_mode_ok_p[m][r] = true;
3695 /* Precalculate CLASS_MAX_NREGS sizes. */
3696 for (c = 0; c < LIM_REG_CLASSES; ++c)
3698 int reg_size;
3700 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3701 reg_size = UNITS_PER_VSX_WORD;
3703 else if (c == ALTIVEC_REGS)
3704 reg_size = UNITS_PER_ALTIVEC_WORD;
3706 else if (c == FLOAT_REGS)
3707 reg_size = UNITS_PER_FP_WORD;
3709 else
3710 reg_size = UNITS_PER_WORD;
3712 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3714 machine_mode m2 = (machine_mode)m;
3715 int reg_size2 = reg_size;
3717 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3718 in VSX. */
3719 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3720 reg_size2 = UNITS_PER_FP_WORD;
3722 rs6000_class_max_nregs[m][c]
3723 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3727 /* Calculate which modes to automatically generate code to use a the
3728 reciprocal divide and square root instructions. In the future, possibly
3729 automatically generate the instructions even if the user did not specify
3730 -mrecip. The older machines double precision reciprocal sqrt estimate is
3731 not accurate enough. */
3732 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3733 if (TARGET_FRES)
3734 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3735 if (TARGET_FRE)
3736 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3737 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3738 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3739 if (VECTOR_UNIT_VSX_P (V2DFmode))
3740 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3742 if (TARGET_FRSQRTES)
3743 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3744 if (TARGET_FRSQRTE)
3745 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3746 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3747 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3748 if (VECTOR_UNIT_VSX_P (V2DFmode))
3749 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3751 if (rs6000_recip_control)
3753 if (!flag_finite_math_only)
3754 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3755 "-ffast-math");
3756 if (flag_trapping_math)
3757 warning (0, "%qs requires %qs or %qs", "-mrecip",
3758 "-fno-trapping-math", "-ffast-math");
3759 if (!flag_reciprocal_math)
3760 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3761 "-ffast-math");
3762 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3764 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3765 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3766 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3768 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3769 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3770 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3772 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3773 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3774 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3776 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3777 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3778 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3780 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3781 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3782 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3784 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3785 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3786 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3788 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3789 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3790 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3792 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3793 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3794 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3798 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3799 legitimate address support to figure out the appropriate addressing to
3800 use. */
3801 rs6000_setup_reg_addr_masks ();
3803 if (global_init_p || TARGET_DEBUG_TARGET)
3805 if (TARGET_DEBUG_REG)
3806 rs6000_debug_reg_global ();
3808 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3809 fprintf (stderr,
3810 "SImode variable mult cost = %d\n"
3811 "SImode constant mult cost = %d\n"
3812 "SImode short constant mult cost = %d\n"
3813 "DImode multipliciation cost = %d\n"
3814 "SImode division cost = %d\n"
3815 "DImode division cost = %d\n"
3816 "Simple fp operation cost = %d\n"
3817 "DFmode multiplication cost = %d\n"
3818 "SFmode division cost = %d\n"
3819 "DFmode division cost = %d\n"
3820 "cache line size = %d\n"
3821 "l1 cache size = %d\n"
3822 "l2 cache size = %d\n"
3823 "simultaneous prefetches = %d\n"
3824 "\n",
3825 rs6000_cost->mulsi,
3826 rs6000_cost->mulsi_const,
3827 rs6000_cost->mulsi_const9,
3828 rs6000_cost->muldi,
3829 rs6000_cost->divsi,
3830 rs6000_cost->divdi,
3831 rs6000_cost->fp,
3832 rs6000_cost->dmul,
3833 rs6000_cost->sdiv,
3834 rs6000_cost->ddiv,
3835 rs6000_cost->cache_line_size,
3836 rs6000_cost->l1_cache_size,
3837 rs6000_cost->l2_cache_size,
3838 rs6000_cost->simultaneous_prefetches);
3842 #if TARGET_MACHO
3843 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3845 static void
3846 darwin_rs6000_override_options (void)
3848 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3849 off. */
3850 rs6000_altivec_abi = 1;
3851 TARGET_ALTIVEC_VRSAVE = 1;
3852 rs6000_current_abi = ABI_DARWIN;
3854 if (DEFAULT_ABI == ABI_DARWIN
3855 && TARGET_64BIT)
3856 darwin_one_byte_bool = 1;
3858 if (TARGET_64BIT && ! TARGET_POWERPC64)
3860 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3861 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3863 if (flag_mkernel)
3865 rs6000_default_long_calls = 1;
3866 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3869 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3870 Altivec. */
3871 if (!flag_mkernel && !flag_apple_kext
3872 && TARGET_64BIT
3873 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3874 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3876 /* Unless the user (not the configurer) has explicitly overridden
3877 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3878 G4 unless targeting the kernel. */
3879 if (!flag_mkernel
3880 && !flag_apple_kext
3881 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3882 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3883 && ! global_options_set.x_rs6000_cpu_index)
3885 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3888 #endif
3890 /* If not otherwise specified by a target, make 'long double' equivalent to
3891 'double'. */
3893 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3894 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3895 #endif
3897 /* Return the builtin mask of the various options used that could affect which
3898 builtins were used. In the past we used target_flags, but we've run out of
3899 bits, and some options like PAIRED are no longer in target_flags. */
3901 HOST_WIDE_INT
3902 rs6000_builtin_mask_calculate (void)
3904 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3905 | ((TARGET_CMPB) ? RS6000_BTM_CMPB : 0)
3906 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3907 | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0)
3908 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3909 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3910 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3911 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3912 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3913 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3914 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3915 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3916 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3917 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3918 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3919 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3920 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3921 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3922 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3923 | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0)
3924 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0)
3925 | ((TARGET_FLOAT128_HW) ? RS6000_BTM_FLOAT128_HW : 0));
3928 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3929 to clobber the XER[CA] bit because clobbering that bit without telling
3930 the compiler worked just fine with versions of GCC before GCC 5, and
3931 breaking a lot of older code in ways that are hard to track down is
3932 not such a great idea. */
3934 static rtx_insn *
3935 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3936 vec<const char *> &/*constraints*/,
3937 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3939 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3940 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3941 return NULL;
3944 /* Override command line options.
3946 Combine build-specific configuration information with options
3947 specified on the command line to set various state variables which
3948 influence code generation, optimization, and expansion of built-in
3949 functions. Assure that command-line configuration preferences are
3950 compatible with each other and with the build configuration; issue
3951 warnings while adjusting configuration or error messages while
3952 rejecting configuration.
3954 Upon entry to this function:
3956 This function is called once at the beginning of
3957 compilation, and then again at the start and end of compiling
3958 each section of code that has a different configuration, as
3959 indicated, for example, by adding the
3961 __attribute__((__target__("cpu=power9")))
3963 qualifier to a function definition or, for example, by bracketing
3964 code between
3966 #pragma GCC target("altivec")
3970 #pragma GCC reset_options
3972 directives. Parameter global_init_p is true for the initial
3973 invocation, which initializes global variables, and false for all
3974 subsequent invocations.
3977 Various global state information is assumed to be valid. This
3978 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3979 default CPU specified at build configure time, TARGET_DEFAULT,
3980 representing the default set of option flags for the default
3981 target, and global_options_set.x_rs6000_isa_flags, representing
3982 which options were requested on the command line.
3984 Upon return from this function:
3986 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3987 was set by name on the command line. Additionally, if certain
3988 attributes are automatically enabled or disabled by this function
3989 in order to assure compatibility between options and
3990 configuration, the flags associated with those attributes are
3991 also set. By setting these "explicit bits", we avoid the risk
3992 that other code might accidentally overwrite these particular
3993 attributes with "default values".
3995 The various bits of rs6000_isa_flags are set to indicate the
3996 target options that have been selected for the most current
3997 compilation efforts. This has the effect of also turning on the
3998 associated TARGET_XXX values since these are macros which are
3999 generally defined to test the corresponding bit of the
4000 rs6000_isa_flags variable.
4002 The variable rs6000_builtin_mask is set to represent the target
4003 options for the most current compilation efforts, consistent with
4004 the current contents of rs6000_isa_flags. This variable controls
4005 expansion of built-in functions.
4007 Various other global variables and fields of global structures
4008 (over 50 in all) are initialized to reflect the desired options
4009 for the most current compilation efforts. */
4011 static bool
4012 rs6000_option_override_internal (bool global_init_p)
4014 bool ret = true;
4016 HOST_WIDE_INT set_masks;
4017 HOST_WIDE_INT ignore_masks;
4018 int cpu_index = -1;
4019 int tune_index;
4020 struct cl_target_option *main_target_opt
4021 = ((global_init_p || target_option_default_node == NULL)
4022 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
4024 /* Print defaults. */
4025 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
4026 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
4028 /* Remember the explicit arguments. */
4029 if (global_init_p)
4030 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
4032 /* We plan to deprecate the -maltivec=be option. For now, just
4033 issue a warning message. */
4034 if (global_init_p
4035 && rs6000_altivec_element_order == 2)
4036 warning (0, "%qs command-line option is deprecated",
4037 "-maltivec=be");
4039 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
4040 library functions, so warn about it. The flag may be useful for
4041 performance studies from time to time though, so don't disable it
4042 entirely. */
4043 if (global_options_set.x_rs6000_alignment_flags
4044 && rs6000_alignment_flags == MASK_ALIGN_POWER
4045 && DEFAULT_ABI == ABI_DARWIN
4046 && TARGET_64BIT)
4047 warning (0, "%qs is not supported for 64-bit Darwin;"
4048 " it is incompatible with the installed C and C++ libraries",
4049 "-malign-power");
4051 /* Numerous experiment shows that IRA based loop pressure
4052 calculation works better for RTL loop invariant motion on targets
4053 with enough (>= 32) registers. It is an expensive optimization.
4054 So it is on only for peak performance. */
4055 if (optimize >= 3 && global_init_p
4056 && !global_options_set.x_flag_ira_loop_pressure)
4057 flag_ira_loop_pressure = 1;
4059 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
4060 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
4061 options were already specified. */
4062 if (flag_sanitize & SANITIZE_USER_ADDRESS
4063 && !global_options_set.x_flag_asynchronous_unwind_tables)
4064 flag_asynchronous_unwind_tables = 1;
4066 /* Set the pointer size. */
4067 if (TARGET_64BIT)
4069 rs6000_pmode = DImode;
4070 rs6000_pointer_size = 64;
4072 else
4074 rs6000_pmode = SImode;
4075 rs6000_pointer_size = 32;
4078 /* Some OSs don't support saving the high part of 64-bit registers on context
4079 switch. Other OSs don't support saving Altivec registers. On those OSs,
4080 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
4081 if the user wants either, the user must explicitly specify them and we
4082 won't interfere with the user's specification. */
4084 set_masks = POWERPC_MASKS;
4085 #ifdef OS_MISSING_POWERPC64
4086 if (OS_MISSING_POWERPC64)
4087 set_masks &= ~OPTION_MASK_POWERPC64;
4088 #endif
4089 #ifdef OS_MISSING_ALTIVEC
4090 if (OS_MISSING_ALTIVEC)
4091 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
4092 | OTHER_VSX_VECTOR_MASKS);
4093 #endif
4095 /* Don't override by the processor default if given explicitly. */
4096 set_masks &= ~rs6000_isa_flags_explicit;
4098 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
4099 the cpu in a target attribute or pragma, but did not specify a tuning
4100 option, use the cpu for the tuning option rather than the option specified
4101 with -mtune on the command line. Process a '--with-cpu' configuration
4102 request as an implicit --cpu. */
4103 if (rs6000_cpu_index >= 0)
4104 cpu_index = rs6000_cpu_index;
4105 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
4106 cpu_index = main_target_opt->x_rs6000_cpu_index;
4107 else if (OPTION_TARGET_CPU_DEFAULT)
4108 cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT);
4110 if (cpu_index >= 0)
4112 const char *unavailable_cpu = NULL;
4113 switch (processor_target_table[cpu_index].processor)
4115 #ifndef HAVE_AS_POWER9
4116 case PROCESSOR_POWER9:
4117 unavailable_cpu = "power9";
4118 break;
4119 #endif
4120 #ifndef HAVE_AS_POWER8
4121 case PROCESSOR_POWER8:
4122 unavailable_cpu = "power8";
4123 break;
4124 #endif
4125 #ifndef HAVE_AS_POPCNTD
4126 case PROCESSOR_POWER7:
4127 unavailable_cpu = "power7";
4128 break;
4129 #endif
4130 #ifndef HAVE_AS_DFP
4131 case PROCESSOR_POWER6:
4132 unavailable_cpu = "power6";
4133 break;
4134 #endif
4135 #ifndef HAVE_AS_POPCNTB
4136 case PROCESSOR_POWER5:
4137 unavailable_cpu = "power5";
4138 break;
4139 #endif
4140 default:
4141 break;
4143 if (unavailable_cpu)
4145 cpu_index = -1;
4146 warning (0, "will not generate %qs instructions because "
4147 "assembler lacks %qs support", unavailable_cpu,
4148 unavailable_cpu);
4152 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
4153 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
4154 with those from the cpu, except for options that were explicitly set. If
4155 we don't have a cpu, do not override the target bits set in
4156 TARGET_DEFAULT. */
4157 if (cpu_index >= 0)
4159 rs6000_cpu_index = cpu_index;
4160 rs6000_isa_flags &= ~set_masks;
4161 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
4162 & set_masks);
4164 else
4166 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
4167 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
4168 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
4169 to using rs6000_isa_flags, we need to do the initialization here.
4171 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
4172 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
4173 HOST_WIDE_INT flags;
4174 if (TARGET_DEFAULT)
4175 flags = TARGET_DEFAULT;
4176 else
4178 /* PowerPC 64-bit LE requires at least ISA 2.07. */
4179 const char *default_cpu = (!TARGET_POWERPC64
4180 ? "powerpc"
4181 : (BYTES_BIG_ENDIAN
4182 ? "powerpc64"
4183 : "powerpc64le"));
4184 int default_cpu_index = rs6000_cpu_name_lookup (default_cpu);
4185 flags = processor_target_table[default_cpu_index].target_enable;
4187 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
4190 if (rs6000_tune_index >= 0)
4191 tune_index = rs6000_tune_index;
4192 else if (cpu_index >= 0)
4193 rs6000_tune_index = tune_index = cpu_index;
4194 else
4196 size_t i;
4197 enum processor_type tune_proc
4198 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
4200 tune_index = -1;
4201 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
4202 if (processor_target_table[i].processor == tune_proc)
4204 tune_index = i;
4205 break;
4209 if (cpu_index >= 0)
4210 rs6000_cpu = processor_target_table[cpu_index].processor;
4211 else
4212 rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT;
4214 gcc_assert (tune_index >= 0);
4215 rs6000_tune = processor_target_table[tune_index].processor;
4217 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
4218 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
4219 || rs6000_cpu == PROCESSOR_PPCE5500)
4221 if (TARGET_ALTIVEC)
4222 error ("AltiVec not supported in this target");
4225 /* If we are optimizing big endian systems for space, use the load/store
4226 multiple instructions. */
4227 if (BYTES_BIG_ENDIAN && optimize_size)
4228 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE;
4230 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
4231 because the hardware doesn't support the instructions used in little
4232 endian mode, and causes an alignment trap. The 750 does not cause an
4233 alignment trap (except when the target is unaligned). */
4235 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750 && TARGET_MULTIPLE)
4237 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
4238 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
4239 warning (0, "%qs is not supported on little endian systems",
4240 "-mmultiple");
4243 /* If little-endian, default to -mstrict-align on older processors.
4244 Testing for htm matches power8 and later. */
4245 if (!BYTES_BIG_ENDIAN
4246 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
4247 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
4249 /* -maltivec={le,be} implies -maltivec. */
4250 if (rs6000_altivec_element_order != 0)
4251 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
4253 /* Disallow -maltivec=le in big endian mode for now. This is not
4254 known to be useful for anyone. */
4255 if (BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 1)
4257 warning (0, N_("-maltivec=le not allowed for big-endian targets"));
4258 rs6000_altivec_element_order = 0;
4261 if (!rs6000_fold_gimple)
4262 fprintf (stderr,
4263 "gimple folding of rs6000 builtins has been disabled.\n");
4265 /* Add some warnings for VSX. */
4266 if (TARGET_VSX)
4268 const char *msg = NULL;
4269 if (!TARGET_HARD_FLOAT || !TARGET_SINGLE_FLOAT || !TARGET_DOUBLE_FLOAT)
4271 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4272 msg = N_("-mvsx requires hardware floating point");
4273 else
4275 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
4276 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4279 else if (TARGET_PAIRED_FLOAT)
4280 msg = N_("-mvsx and -mpaired are incompatible");
4281 else if (TARGET_AVOID_XFORM > 0)
4282 msg = N_("-mvsx needs indexed addressing");
4283 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
4284 & OPTION_MASK_ALTIVEC))
4286 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4287 msg = N_("-mvsx and -mno-altivec are incompatible");
4288 else
4289 msg = N_("-mno-altivec disables vsx");
4292 if (msg)
4294 warning (0, msg);
4295 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
4296 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4300 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
4301 the -mcpu setting to enable options that conflict. */
4302 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
4303 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
4304 | OPTION_MASK_ALTIVEC
4305 | OPTION_MASK_VSX)) != 0)
4306 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
4307 | OPTION_MASK_DIRECT_MOVE)
4308 & ~rs6000_isa_flags_explicit);
4310 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4311 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
4313 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
4314 off all of the options that depend on those flags. */
4315 ignore_masks = rs6000_disable_incompatible_switches ();
4317 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
4318 unless the user explicitly used the -mno-<option> to disable the code. */
4319 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
4320 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
4321 else if (TARGET_P9_MINMAX)
4323 if (cpu_index >= 0)
4325 if (cpu_index == PROCESSOR_POWER9)
4327 /* legacy behavior: allow -mcpu=power9 with certain
4328 capabilities explicitly disabled. */
4329 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
4331 else
4332 error ("power9 target option is incompatible with %<%s=<xxx>%> "
4333 "for <xxx> less than power9", "-mcpu");
4335 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
4336 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
4337 & rs6000_isa_flags_explicit))
4338 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
4339 were explicitly cleared. */
4340 error ("%qs incompatible with explicitly disabled options",
4341 "-mpower9-minmax");
4342 else
4343 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
4345 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
4346 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
4347 else if (TARGET_VSX)
4348 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
4349 else if (TARGET_POPCNTD)
4350 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
4351 else if (TARGET_DFP)
4352 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
4353 else if (TARGET_CMPB)
4354 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
4355 else if (TARGET_FPRND)
4356 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
4357 else if (TARGET_POPCNTB)
4358 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
4359 else if (TARGET_ALTIVEC)
4360 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
4362 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
4364 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
4365 error ("%qs requires %qs", "-mcrypto", "-maltivec");
4366 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
4369 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
4371 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
4372 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
4373 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
4376 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
4378 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4379 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
4380 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4383 if (TARGET_P8_VECTOR && !TARGET_VSX)
4385 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4386 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
4387 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
4388 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
4390 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4391 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4392 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4394 else
4396 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
4397 not explicit. */
4398 rs6000_isa_flags |= OPTION_MASK_VSX;
4399 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4403 if (TARGET_DFP && !TARGET_HARD_FLOAT)
4405 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
4406 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
4407 rs6000_isa_flags &= ~OPTION_MASK_DFP;
4410 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
4411 silently turn off quad memory mode. */
4412 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
4414 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4415 warning (0, N_("-mquad-memory requires 64-bit mode"));
4417 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
4418 warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
4420 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
4421 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
4424 /* Non-atomic quad memory load/store are disabled for little endian, since
4425 the words are reversed, but atomic operations can still be done by
4426 swapping the words. */
4427 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
4429 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4430 warning (0, N_("-mquad-memory is not available in little endian "
4431 "mode"));
4433 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
4436 /* Assume if the user asked for normal quad memory instructions, they want
4437 the atomic versions as well, unless they explicity told us not to use quad
4438 word atomic instructions. */
4439 if (TARGET_QUAD_MEMORY
4440 && !TARGET_QUAD_MEMORY_ATOMIC
4441 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
4442 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
4444 /* If we can shrink-wrap the TOC register save separately, then use
4445 -msave-toc-indirect unless explicitly disabled. */
4446 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
4447 && flag_shrink_wrap_separate
4448 && optimize_function_for_speed_p (cfun))
4449 rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
4451 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4452 generating power8 instructions. */
4453 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4454 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4455 & OPTION_MASK_P8_FUSION);
4457 /* Setting additional fusion flags turns on base fusion. */
4458 if (!TARGET_P8_FUSION && (TARGET_P8_FUSION_SIGN || TARGET_TOC_FUSION))
4460 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4462 if (TARGET_P8_FUSION_SIGN)
4463 error ("%qs requires %qs", "-mpower8-fusion-sign",
4464 "-mpower8-fusion");
4466 if (TARGET_TOC_FUSION)
4467 error ("%qs requires %qs", "-mtoc-fusion", "-mpower8-fusion");
4469 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4471 else
4472 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4475 /* Power9 fusion is a superset over power8 fusion. */
4476 if (TARGET_P9_FUSION && !TARGET_P8_FUSION)
4478 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4480 /* We prefer to not mention undocumented options in
4481 error messages. However, if users have managed to select
4482 power9-fusion without selecting power8-fusion, they
4483 already know about undocumented flags. */
4484 error ("%qs requires %qs", "-mpower9-fusion", "-mpower8-fusion");
4485 rs6000_isa_flags &= ~OPTION_MASK_P9_FUSION;
4487 else
4488 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4491 /* Enable power9 fusion if we are tuning for power9, even if we aren't
4492 generating power9 instructions. */
4493 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_FUSION))
4494 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4495 & OPTION_MASK_P9_FUSION);
4497 /* Power8 does not fuse sign extended loads with the addis. If we are
4498 optimizing at high levels for speed, convert a sign extended load into a
4499 zero extending load, and an explicit sign extension. */
4500 if (TARGET_P8_FUSION
4501 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4502 && optimize_function_for_speed_p (cfun)
4503 && optimize >= 3)
4504 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4506 /* TOC fusion requires 64-bit and medium/large code model. */
4507 if (TARGET_TOC_FUSION && !TARGET_POWERPC64)
4509 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4510 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4511 warning (0, N_("-mtoc-fusion requires 64-bit"));
4514 if (TARGET_TOC_FUSION && (TARGET_CMODEL == CMODEL_SMALL))
4516 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4517 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4518 warning (0, N_("-mtoc-fusion requires medium/large code model"));
4521 /* Turn on -mtoc-fusion by default if p8-fusion and 64-bit medium/large code
4522 model. */
4523 if (TARGET_P8_FUSION && !TARGET_TOC_FUSION && TARGET_POWERPC64
4524 && (TARGET_CMODEL != CMODEL_SMALL)
4525 && !(rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION))
4526 rs6000_isa_flags |= OPTION_MASK_TOC_FUSION;
4528 /* ISA 3.0 vector instructions include ISA 2.07. */
4529 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4531 /* We prefer to not mention undocumented options in
4532 error messages. However, if users have managed to select
4533 power9-vector without selecting power8-vector, they
4534 already know about undocumented flags. */
4535 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
4536 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
4537 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
4538 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
4540 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4541 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4542 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
4544 else
4546 /* OPTION_MASK_P9_VECTOR is explicit and
4547 OPTION_MASK_P8_VECTOR is not explicit. */
4548 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
4549 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4553 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4554 support. If we only have ISA 2.06 support, and the user did not specify
4555 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4556 but we don't enable the full vectorization support */
4557 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4558 TARGET_ALLOW_MOVMISALIGN = 1;
4560 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4562 if (TARGET_ALLOW_MOVMISALIGN > 0
4563 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
4564 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
4566 TARGET_ALLOW_MOVMISALIGN = 0;
4569 /* Determine when unaligned vector accesses are permitted, and when
4570 they are preferred over masked Altivec loads. Note that if
4571 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4572 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4573 not true. */
4574 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4576 if (!TARGET_VSX)
4578 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4579 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
4581 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4584 else if (!TARGET_ALLOW_MOVMISALIGN)
4586 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4587 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
4588 "-mallow-movmisalign");
4590 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4594 /* Set long double size before the IEEE 128-bit tests. */
4595 if (!global_options_set.x_rs6000_long_double_type_size)
4597 if (main_target_opt != NULL
4598 && (main_target_opt->x_rs6000_long_double_type_size
4599 != RS6000_DEFAULT_LONG_DOUBLE_SIZE))
4600 error ("target attribute or pragma changes long double size");
4601 else
4602 rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE;
4605 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
4606 systems will also set long double to be IEEE 128-bit. AIX and Darwin
4607 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
4608 those systems will not pick up this default. Warn if the user changes the
4609 default unless either the user used the -Wno-psabi option, or the compiler
4610 was built to enable multilibs to switch between the two long double
4611 types. */
4612 if (!global_options_set.x_rs6000_ieeequad)
4613 rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT;
4615 else if (!TARGET_IEEEQUAD_MULTILIB
4616 && rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT
4617 && TARGET_LONG_DOUBLE_128)
4619 static bool warned_change_long_double;
4620 if (!warned_change_long_double)
4622 warned_change_long_double = true;
4623 if (TARGET_IEEEQUAD)
4624 warning (OPT_Wpsabi, "Using IEEE extended precision long double");
4625 else
4626 warning (OPT_Wpsabi, "Using IBM extended precision long double");
4630 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4631 sytems. In GCC 7, we would enable the the IEEE 128-bit floating point
4632 infrastructure (-mfloat128-type) but not enable the actual __float128 type
4633 unless the user used the explicit -mfloat128. In GCC 8, we enable both
4634 the keyword as well as the type. */
4635 TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX;
4637 /* IEEE 128-bit floating point requires VSX support. */
4638 if (TARGET_FLOAT128_KEYWORD)
4640 if (!TARGET_VSX)
4642 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4643 error ("%qs requires VSX support", "-mfloat128");
4645 TARGET_FLOAT128_TYPE = 0;
4646 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
4647 | OPTION_MASK_FLOAT128_HW);
4649 else if (!TARGET_FLOAT128_TYPE)
4651 TARGET_FLOAT128_TYPE = 1;
4652 warning (0, "The -mfloat128 option may not be fully supported");
4656 /* Enable the __float128 keyword under Linux by default. */
4657 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD
4658 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4659 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4661 /* If we have are supporting the float128 type and full ISA 3.0 support,
4662 enable -mfloat128-hardware by default. However, don't enable the
4663 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
4664 because sometimes the compiler wants to put things in an integer
4665 container, and if we don't have __int128 support, it is impossible. */
4666 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT
4667 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4668 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4669 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4671 if (TARGET_FLOAT128_HW
4672 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4674 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4675 error ("%qs requires full ISA 3.0 support", "-mfloat128-hardware");
4677 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4680 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4682 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4683 error ("%qs requires %qs", "-mfloat128-hardware", "-m64");
4685 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4688 /* Print the options after updating the defaults. */
4689 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4690 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4692 /* E500mc does "better" if we inline more aggressively. Respect the
4693 user's opinion, though. */
4694 if (rs6000_block_move_inline_limit == 0
4695 && (rs6000_tune == PROCESSOR_PPCE500MC
4696 || rs6000_tune == PROCESSOR_PPCE500MC64
4697 || rs6000_tune == PROCESSOR_PPCE5500
4698 || rs6000_tune == PROCESSOR_PPCE6500))
4699 rs6000_block_move_inline_limit = 128;
4701 /* store_one_arg depends on expand_block_move to handle at least the
4702 size of reg_parm_stack_space. */
4703 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4704 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4706 if (global_init_p)
4708 /* If the appropriate debug option is enabled, replace the target hooks
4709 with debug versions that call the real version and then prints
4710 debugging information. */
4711 if (TARGET_DEBUG_COST)
4713 targetm.rtx_costs = rs6000_debug_rtx_costs;
4714 targetm.address_cost = rs6000_debug_address_cost;
4715 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4718 if (TARGET_DEBUG_ADDR)
4720 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4721 targetm.legitimize_address = rs6000_debug_legitimize_address;
4722 rs6000_secondary_reload_class_ptr
4723 = rs6000_debug_secondary_reload_class;
4724 targetm.secondary_memory_needed
4725 = rs6000_debug_secondary_memory_needed;
4726 targetm.can_change_mode_class
4727 = rs6000_debug_can_change_mode_class;
4728 rs6000_preferred_reload_class_ptr
4729 = rs6000_debug_preferred_reload_class;
4730 rs6000_legitimize_reload_address_ptr
4731 = rs6000_debug_legitimize_reload_address;
4732 rs6000_mode_dependent_address_ptr
4733 = rs6000_debug_mode_dependent_address;
4736 if (rs6000_veclibabi_name)
4738 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4739 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4740 else
4742 error ("unknown vectorization library ABI type (%qs) for "
4743 "%qs switch", rs6000_veclibabi_name, "-mveclibabi=");
4744 ret = false;
4749 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4750 target attribute or pragma which automatically enables both options,
4751 unless the altivec ABI was set. This is set by default for 64-bit, but
4752 not for 32-bit. */
4753 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4755 TARGET_FLOAT128_TYPE = 0;
4756 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4757 | OPTION_MASK_FLOAT128_KEYWORD)
4758 & ~rs6000_isa_flags_explicit);
4761 /* Enable Altivec ABI for AIX -maltivec. */
4762 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
4764 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4765 error ("target attribute or pragma changes AltiVec ABI");
4766 else
4767 rs6000_altivec_abi = 1;
4770 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4771 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4772 be explicitly overridden in either case. */
4773 if (TARGET_ELF)
4775 if (!global_options_set.x_rs6000_altivec_abi
4776 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4778 if (main_target_opt != NULL &&
4779 !main_target_opt->x_rs6000_altivec_abi)
4780 error ("target attribute or pragma changes AltiVec ABI");
4781 else
4782 rs6000_altivec_abi = 1;
4786 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4787 So far, the only darwin64 targets are also MACH-O. */
4788 if (TARGET_MACHO
4789 && DEFAULT_ABI == ABI_DARWIN
4790 && TARGET_64BIT)
4792 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4793 error ("target attribute or pragma changes darwin64 ABI");
4794 else
4796 rs6000_darwin64_abi = 1;
4797 /* Default to natural alignment, for better performance. */
4798 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4802 /* Place FP constants in the constant pool instead of TOC
4803 if section anchors enabled. */
4804 if (flag_section_anchors
4805 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4806 TARGET_NO_FP_IN_TOC = 1;
4808 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4809 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4811 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4812 SUBTARGET_OVERRIDE_OPTIONS;
4813 #endif
4814 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4815 SUBSUBTARGET_OVERRIDE_OPTIONS;
4816 #endif
4817 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4818 SUB3TARGET_OVERRIDE_OPTIONS;
4819 #endif
4821 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4822 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4824 if (main_target_opt)
4826 if (main_target_opt->x_rs6000_single_float != rs6000_single_float)
4827 error ("target attribute or pragma changes single precision floating "
4828 "point");
4829 if (main_target_opt->x_rs6000_double_float != rs6000_double_float)
4830 error ("target attribute or pragma changes double precision floating "
4831 "point");
4834 rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4
4835 && rs6000_tune != PROCESSOR_POWER5
4836 && rs6000_tune != PROCESSOR_POWER6
4837 && rs6000_tune != PROCESSOR_POWER7
4838 && rs6000_tune != PROCESSOR_POWER8
4839 && rs6000_tune != PROCESSOR_POWER9
4840 && rs6000_tune != PROCESSOR_PPCA2
4841 && rs6000_tune != PROCESSOR_CELL
4842 && rs6000_tune != PROCESSOR_PPC476);
4843 rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4
4844 || rs6000_tune == PROCESSOR_POWER5
4845 || rs6000_tune == PROCESSOR_POWER7
4846 || rs6000_tune == PROCESSOR_POWER8);
4847 rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4
4848 || rs6000_tune == PROCESSOR_POWER5
4849 || rs6000_tune == PROCESSOR_POWER6
4850 || rs6000_tune == PROCESSOR_POWER7
4851 || rs6000_tune == PROCESSOR_POWER8
4852 || rs6000_tune == PROCESSOR_POWER9
4853 || rs6000_tune == PROCESSOR_PPCE500MC
4854 || rs6000_tune == PROCESSOR_PPCE500MC64
4855 || rs6000_tune == PROCESSOR_PPCE5500
4856 || rs6000_tune == PROCESSOR_PPCE6500);
4858 /* Allow debug switches to override the above settings. These are set to -1
4859 in rs6000.opt to indicate the user hasn't directly set the switch. */
4860 if (TARGET_ALWAYS_HINT >= 0)
4861 rs6000_always_hint = TARGET_ALWAYS_HINT;
4863 if (TARGET_SCHED_GROUPS >= 0)
4864 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4866 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4867 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4869 rs6000_sched_restricted_insns_priority
4870 = (rs6000_sched_groups ? 1 : 0);
4872 /* Handle -msched-costly-dep option. */
4873 rs6000_sched_costly_dep
4874 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4876 if (rs6000_sched_costly_dep_str)
4878 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4879 rs6000_sched_costly_dep = no_dep_costly;
4880 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4881 rs6000_sched_costly_dep = all_deps_costly;
4882 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4883 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4884 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4885 rs6000_sched_costly_dep = store_to_load_dep_costly;
4886 else
4887 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4888 atoi (rs6000_sched_costly_dep_str));
4891 /* Handle -minsert-sched-nops option. */
4892 rs6000_sched_insert_nops
4893 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4895 if (rs6000_sched_insert_nops_str)
4897 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4898 rs6000_sched_insert_nops = sched_finish_none;
4899 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4900 rs6000_sched_insert_nops = sched_finish_pad_groups;
4901 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4902 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4903 else
4904 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4905 atoi (rs6000_sched_insert_nops_str));
4908 /* Handle stack protector */
4909 if (!global_options_set.x_rs6000_stack_protector_guard)
4910 #ifdef TARGET_THREAD_SSP_OFFSET
4911 rs6000_stack_protector_guard = SSP_TLS;
4912 #else
4913 rs6000_stack_protector_guard = SSP_GLOBAL;
4914 #endif
4916 #ifdef TARGET_THREAD_SSP_OFFSET
4917 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4918 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
4919 #endif
4921 if (global_options_set.x_rs6000_stack_protector_guard_offset_str)
4923 char *endp;
4924 const char *str = rs6000_stack_protector_guard_offset_str;
4926 errno = 0;
4927 long offset = strtol (str, &endp, 0);
4928 if (!*str || *endp || errno)
4929 error ("%qs is not a valid number in %qs", str,
4930 "-mstack-protector-guard-offset=");
4932 if (!IN_RANGE (offset, -0x8000, 0x7fff)
4933 || (TARGET_64BIT && (offset & 3)))
4934 error ("%qs is not a valid offset in %qs", str,
4935 "-mstack-protector-guard-offset=");
4937 rs6000_stack_protector_guard_offset = offset;
4940 if (global_options_set.x_rs6000_stack_protector_guard_reg_str)
4942 const char *str = rs6000_stack_protector_guard_reg_str;
4943 int reg = decode_reg_name (str);
4945 if (!IN_RANGE (reg, 1, 31))
4946 error ("%qs is not a valid base register in %qs", str,
4947 "-mstack-protector-guard-reg=");
4949 rs6000_stack_protector_guard_reg = reg;
4952 if (rs6000_stack_protector_guard == SSP_TLS
4953 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
4954 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4956 if (global_init_p)
4958 #ifdef TARGET_REGNAMES
4959 /* If the user desires alternate register names, copy in the
4960 alternate names now. */
4961 if (TARGET_REGNAMES)
4962 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4963 #endif
4965 /* Set aix_struct_return last, after the ABI is determined.
4966 If -maix-struct-return or -msvr4-struct-return was explicitly
4967 used, don't override with the ABI default. */
4968 if (!global_options_set.x_aix_struct_return)
4969 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4971 #if 0
4972 /* IBM XL compiler defaults to unsigned bitfields. */
4973 if (TARGET_XL_COMPAT)
4974 flag_signed_bitfields = 0;
4975 #endif
4977 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4978 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4980 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4982 /* We can only guarantee the availability of DI pseudo-ops when
4983 assembling for 64-bit targets. */
4984 if (!TARGET_64BIT)
4986 targetm.asm_out.aligned_op.di = NULL;
4987 targetm.asm_out.unaligned_op.di = NULL;
4991 /* Set branch target alignment, if not optimizing for size. */
4992 if (!optimize_size)
4994 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4995 aligned 8byte to avoid misprediction by the branch predictor. */
4996 if (rs6000_tune == PROCESSOR_TITAN
4997 || rs6000_tune == PROCESSOR_CELL)
4999 if (align_functions <= 0)
5000 align_functions = 8;
5001 if (align_jumps <= 0)
5002 align_jumps = 8;
5003 if (align_loops <= 0)
5004 align_loops = 8;
5006 if (rs6000_align_branch_targets)
5008 if (align_functions <= 0)
5009 align_functions = 16;
5010 if (align_jumps <= 0)
5011 align_jumps = 16;
5012 if (align_loops <= 0)
5014 can_override_loop_align = 1;
5015 align_loops = 16;
5018 if (align_jumps_max_skip <= 0)
5019 align_jumps_max_skip = 15;
5020 if (align_loops_max_skip <= 0)
5021 align_loops_max_skip = 15;
5024 /* Arrange to save and restore machine status around nested functions. */
5025 init_machine_status = rs6000_init_machine_status;
5027 /* We should always be splitting complex arguments, but we can't break
5028 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
5029 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
5030 targetm.calls.split_complex_arg = NULL;
5032 /* The AIX and ELFv1 ABIs define standard function descriptors. */
5033 if (DEFAULT_ABI == ABI_AIX)
5034 targetm.calls.custom_function_descriptors = 0;
5037 /* Initialize rs6000_cost with the appropriate target costs. */
5038 if (optimize_size)
5039 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
5040 else
5041 switch (rs6000_tune)
5043 case PROCESSOR_RS64A:
5044 rs6000_cost = &rs64a_cost;
5045 break;
5047 case PROCESSOR_MPCCORE:
5048 rs6000_cost = &mpccore_cost;
5049 break;
5051 case PROCESSOR_PPC403:
5052 rs6000_cost = &ppc403_cost;
5053 break;
5055 case PROCESSOR_PPC405:
5056 rs6000_cost = &ppc405_cost;
5057 break;
5059 case PROCESSOR_PPC440:
5060 rs6000_cost = &ppc440_cost;
5061 break;
5063 case PROCESSOR_PPC476:
5064 rs6000_cost = &ppc476_cost;
5065 break;
5067 case PROCESSOR_PPC601:
5068 rs6000_cost = &ppc601_cost;
5069 break;
5071 case PROCESSOR_PPC603:
5072 rs6000_cost = &ppc603_cost;
5073 break;
5075 case PROCESSOR_PPC604:
5076 rs6000_cost = &ppc604_cost;
5077 break;
5079 case PROCESSOR_PPC604e:
5080 rs6000_cost = &ppc604e_cost;
5081 break;
5083 case PROCESSOR_PPC620:
5084 rs6000_cost = &ppc620_cost;
5085 break;
5087 case PROCESSOR_PPC630:
5088 rs6000_cost = &ppc630_cost;
5089 break;
5091 case PROCESSOR_CELL:
5092 rs6000_cost = &ppccell_cost;
5093 break;
5095 case PROCESSOR_PPC750:
5096 case PROCESSOR_PPC7400:
5097 rs6000_cost = &ppc750_cost;
5098 break;
5100 case PROCESSOR_PPC7450:
5101 rs6000_cost = &ppc7450_cost;
5102 break;
5104 case PROCESSOR_PPC8540:
5105 case PROCESSOR_PPC8548:
5106 rs6000_cost = &ppc8540_cost;
5107 break;
5109 case PROCESSOR_PPCE300C2:
5110 case PROCESSOR_PPCE300C3:
5111 rs6000_cost = &ppce300c2c3_cost;
5112 break;
5114 case PROCESSOR_PPCE500MC:
5115 rs6000_cost = &ppce500mc_cost;
5116 break;
5118 case PROCESSOR_PPCE500MC64:
5119 rs6000_cost = &ppce500mc64_cost;
5120 break;
5122 case PROCESSOR_PPCE5500:
5123 rs6000_cost = &ppce5500_cost;
5124 break;
5126 case PROCESSOR_PPCE6500:
5127 rs6000_cost = &ppce6500_cost;
5128 break;
5130 case PROCESSOR_TITAN:
5131 rs6000_cost = &titan_cost;
5132 break;
5134 case PROCESSOR_POWER4:
5135 case PROCESSOR_POWER5:
5136 rs6000_cost = &power4_cost;
5137 break;
5139 case PROCESSOR_POWER6:
5140 rs6000_cost = &power6_cost;
5141 break;
5143 case PROCESSOR_POWER7:
5144 rs6000_cost = &power7_cost;
5145 break;
5147 case PROCESSOR_POWER8:
5148 rs6000_cost = &power8_cost;
5149 break;
5151 case PROCESSOR_POWER9:
5152 rs6000_cost = &power9_cost;
5153 break;
5155 case PROCESSOR_PPCA2:
5156 rs6000_cost = &ppca2_cost;
5157 break;
5159 default:
5160 gcc_unreachable ();
5163 if (global_init_p)
5165 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
5166 rs6000_cost->simultaneous_prefetches,
5167 global_options.x_param_values,
5168 global_options_set.x_param_values);
5169 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
5170 global_options.x_param_values,
5171 global_options_set.x_param_values);
5172 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
5173 rs6000_cost->cache_line_size,
5174 global_options.x_param_values,
5175 global_options_set.x_param_values);
5176 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
5177 global_options.x_param_values,
5178 global_options_set.x_param_values);
5180 /* Increase loop peeling limits based on performance analysis. */
5181 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
5182 global_options.x_param_values,
5183 global_options_set.x_param_values);
5184 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
5185 global_options.x_param_values,
5186 global_options_set.x_param_values);
5188 /* Use the 'model' -fsched-pressure algorithm by default. */
5189 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM,
5190 SCHED_PRESSURE_MODEL,
5191 global_options.x_param_values,
5192 global_options_set.x_param_values);
5194 /* If using typedef char *va_list, signal that
5195 __builtin_va_start (&ap, 0) can be optimized to
5196 ap = __builtin_next_arg (0). */
5197 if (DEFAULT_ABI != ABI_V4)
5198 targetm.expand_builtin_va_start = NULL;
5201 /* Set up single/double float flags.
5202 If TARGET_HARD_FLOAT is set, but neither single or double is set,
5203 then set both flags. */
5204 if (TARGET_HARD_FLOAT && rs6000_single_float == 0 && rs6000_double_float == 0)
5205 rs6000_single_float = rs6000_double_float = 1;
5207 /* If not explicitly specified via option, decide whether to generate indexed
5208 load/store instructions. A value of -1 indicates that the
5209 initial value of this variable has not been overwritten. During
5210 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
5211 if (TARGET_AVOID_XFORM == -1)
5212 /* Avoid indexed addressing when targeting Power6 in order to avoid the
5213 DERAT mispredict penalty. However the LVE and STVE altivec instructions
5214 need indexed accesses and the type used is the scalar type of the element
5215 being loaded or stored. */
5216 TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
5217 && !TARGET_ALTIVEC);
5219 /* Set the -mrecip options. */
5220 if (rs6000_recip_name)
5222 char *p = ASTRDUP (rs6000_recip_name);
5223 char *q;
5224 unsigned int mask, i;
5225 bool invert;
5227 while ((q = strtok (p, ",")) != NULL)
5229 p = NULL;
5230 if (*q == '!')
5232 invert = true;
5233 q++;
5235 else
5236 invert = false;
5238 if (!strcmp (q, "default"))
5239 mask = ((TARGET_RECIP_PRECISION)
5240 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
5241 else
5243 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
5244 if (!strcmp (q, recip_options[i].string))
5246 mask = recip_options[i].mask;
5247 break;
5250 if (i == ARRAY_SIZE (recip_options))
5252 error ("unknown option for %<%s=%s%>", "-mrecip", q);
5253 invert = false;
5254 mask = 0;
5255 ret = false;
5259 if (invert)
5260 rs6000_recip_control &= ~mask;
5261 else
5262 rs6000_recip_control |= mask;
5266 /* Set the builtin mask of the various options used that could affect which
5267 builtins were used. In the past we used target_flags, but we've run out
5268 of bits, and some options like PAIRED are no longer in target_flags. */
5269 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
5270 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
5271 rs6000_print_builtin_options (stderr, 0, "builtin mask",
5272 rs6000_builtin_mask);
5274 /* Initialize all of the registers. */
5275 rs6000_init_hard_regno_mode_ok (global_init_p);
5277 /* Save the initial options in case the user does function specific options */
5278 if (global_init_p)
5279 target_option_default_node = target_option_current_node
5280 = build_target_option_node (&global_options);
5282 /* If not explicitly specified via option, decide whether to generate the
5283 extra blr's required to preserve the link stack on some cpus (eg, 476). */
5284 if (TARGET_LINK_STACK == -1)
5285 SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic);
5287 /* Deprecate use of -mno-speculate-indirect-jumps. */
5288 if (!rs6000_speculate_indirect_jumps)
5289 warning (0, "%qs is deprecated and not recommended in any circumstances",
5290 "-mno-speculate-indirect-jumps");
5292 return ret;
5295 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
5296 define the target cpu type. */
5298 static void
5299 rs6000_option_override (void)
5301 (void) rs6000_option_override_internal (true);
5305 /* Implement targetm.vectorize.builtin_mask_for_load. */
5306 static tree
5307 rs6000_builtin_mask_for_load (void)
5309 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
5310 if ((TARGET_ALTIVEC && !TARGET_VSX)
5311 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
5312 return altivec_builtin_mask_for_load;
5313 else
5314 return 0;
5317 /* Implement LOOP_ALIGN. */
5319 rs6000_loop_align (rtx label)
5321 basic_block bb;
5322 int ninsns;
5324 /* Don't override loop alignment if -falign-loops was specified. */
5325 if (!can_override_loop_align)
5326 return align_loops_log;
5328 bb = BLOCK_FOR_INSN (label);
5329 ninsns = num_loop_insns(bb->loop_father);
5331 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
5332 if (ninsns > 4 && ninsns <= 8
5333 && (rs6000_tune == PROCESSOR_POWER4
5334 || rs6000_tune == PROCESSOR_POWER5
5335 || rs6000_tune == PROCESSOR_POWER6
5336 || rs6000_tune == PROCESSOR_POWER7
5337 || rs6000_tune == PROCESSOR_POWER8))
5338 return 5;
5339 else
5340 return align_loops_log;
5343 /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
5344 static int
5345 rs6000_loop_align_max_skip (rtx_insn *label)
5347 return (1 << rs6000_loop_align (label)) - 1;
5350 /* Return true iff, data reference of TYPE can reach vector alignment (16)
5351 after applying N number of iterations. This routine does not determine
5352 how may iterations are required to reach desired alignment. */
5354 static bool
5355 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
5357 if (is_packed)
5358 return false;
5360 if (TARGET_32BIT)
5362 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
5363 return true;
5365 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
5366 return true;
5368 return false;
5370 else
5372 if (TARGET_MACHO)
5373 return false;
5375 /* Assuming that all other types are naturally aligned. CHECKME! */
5376 return true;
5380 /* Return true if the vector misalignment factor is supported by the
5381 target. */
5382 static bool
5383 rs6000_builtin_support_vector_misalignment (machine_mode mode,
5384 const_tree type,
5385 int misalignment,
5386 bool is_packed)
5388 if (TARGET_VSX)
5390 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5391 return true;
5393 /* Return if movmisalign pattern is not supported for this mode. */
5394 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
5395 return false;
5397 if (misalignment == -1)
5399 /* Misalignment factor is unknown at compile time but we know
5400 it's word aligned. */
5401 if (rs6000_vector_alignment_reachable (type, is_packed))
5403 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
5405 if (element_size == 64 || element_size == 32)
5406 return true;
5409 return false;
5412 /* VSX supports word-aligned vector. */
5413 if (misalignment % 4 == 0)
5414 return true;
5416 return false;
5419 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5420 static int
5421 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5422 tree vectype, int misalign)
5424 unsigned elements;
5425 tree elem_type;
5427 switch (type_of_cost)
5429 case scalar_stmt:
5430 case scalar_load:
5431 case scalar_store:
5432 case vector_stmt:
5433 case vector_load:
5434 case vector_store:
5435 case vec_to_scalar:
5436 case scalar_to_vec:
5437 case cond_branch_not_taken:
5438 return 1;
5440 case vec_perm:
5441 if (TARGET_VSX)
5442 return 3;
5443 else
5444 return 1;
5446 case vec_promote_demote:
5447 if (TARGET_VSX)
5448 return 4;
5449 else
5450 return 1;
5452 case cond_branch_taken:
5453 return 3;
5455 case unaligned_load:
5456 case vector_gather_load:
5457 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5458 return 1;
5460 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5462 elements = TYPE_VECTOR_SUBPARTS (vectype);
5463 if (elements == 2)
5464 /* Double word aligned. */
5465 return 2;
5467 if (elements == 4)
5469 switch (misalign)
5471 case 8:
5472 /* Double word aligned. */
5473 return 2;
5475 case -1:
5476 /* Unknown misalignment. */
5477 case 4:
5478 case 12:
5479 /* Word aligned. */
5480 return 22;
5482 default:
5483 gcc_unreachable ();
5488 if (TARGET_ALTIVEC)
5489 /* Misaligned loads are not supported. */
5490 gcc_unreachable ();
5492 return 2;
5494 case unaligned_store:
5495 case vector_scatter_store:
5496 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5497 return 1;
5499 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5501 elements = TYPE_VECTOR_SUBPARTS (vectype);
5502 if (elements == 2)
5503 /* Double word aligned. */
5504 return 2;
5506 if (elements == 4)
5508 switch (misalign)
5510 case 8:
5511 /* Double word aligned. */
5512 return 2;
5514 case -1:
5515 /* Unknown misalignment. */
5516 case 4:
5517 case 12:
5518 /* Word aligned. */
5519 return 23;
5521 default:
5522 gcc_unreachable ();
5527 if (TARGET_ALTIVEC)
5528 /* Misaligned stores are not supported. */
5529 gcc_unreachable ();
5531 return 2;
5533 case vec_construct:
5534 /* This is a rough approximation assuming non-constant elements
5535 constructed into a vector via element insertion. FIXME:
5536 vec_construct is not granular enough for uniformly good
5537 decisions. If the initialization is a splat, this is
5538 cheaper than we estimate. Improve this someday. */
5539 elem_type = TREE_TYPE (vectype);
5540 /* 32-bit vectors loaded into registers are stored as double
5541 precision, so we need 2 permutes, 2 converts, and 1 merge
5542 to construct a vector of short floats from them. */
5543 if (SCALAR_FLOAT_TYPE_P (elem_type)
5544 && TYPE_PRECISION (elem_type) == 32)
5545 return 5;
5546 /* On POWER9, integer vector types are built up in GPRs and then
5547 use a direct move (2 cycles). For POWER8 this is even worse,
5548 as we need two direct moves and a merge, and the direct moves
5549 are five cycles. */
5550 else if (INTEGRAL_TYPE_P (elem_type))
5552 if (TARGET_P9_VECTOR)
5553 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
5554 else
5555 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
5557 else
5558 /* V2DFmode doesn't need a direct move. */
5559 return 2;
5561 default:
5562 gcc_unreachable ();
5566 /* Implement targetm.vectorize.preferred_simd_mode. */
5568 static machine_mode
5569 rs6000_preferred_simd_mode (scalar_mode mode)
5571 if (TARGET_VSX)
5572 switch (mode)
5574 case E_DFmode:
5575 return V2DFmode;
5576 default:;
5578 if (TARGET_ALTIVEC || TARGET_VSX)
5579 switch (mode)
5581 case E_SFmode:
5582 return V4SFmode;
5583 case E_TImode:
5584 return V1TImode;
5585 case E_DImode:
5586 return V2DImode;
5587 case E_SImode:
5588 return V4SImode;
5589 case E_HImode:
5590 return V8HImode;
5591 case E_QImode:
5592 return V16QImode;
5593 default:;
5595 if (TARGET_PAIRED_FLOAT
5596 && mode == SFmode)
5597 return V2SFmode;
5598 return word_mode;
5601 typedef struct _rs6000_cost_data
5603 struct loop *loop_info;
5604 unsigned cost[3];
5605 } rs6000_cost_data;
5607 /* Test for likely overcommitment of vector hardware resources. If a
5608 loop iteration is relatively large, and too large a percentage of
5609 instructions in the loop are vectorized, the cost model may not
5610 adequately reflect delays from unavailable vector resources.
5611 Penalize the loop body cost for this case. */
5613 static void
5614 rs6000_density_test (rs6000_cost_data *data)
5616 const int DENSITY_PCT_THRESHOLD = 85;
5617 const int DENSITY_SIZE_THRESHOLD = 70;
5618 const int DENSITY_PENALTY = 10;
5619 struct loop *loop = data->loop_info;
5620 basic_block *bbs = get_loop_body (loop);
5621 int nbbs = loop->num_nodes;
5622 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
5623 int i, density_pct;
5625 for (i = 0; i < nbbs; i++)
5627 basic_block bb = bbs[i];
5628 gimple_stmt_iterator gsi;
5630 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5632 gimple *stmt = gsi_stmt (gsi);
5633 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5635 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5636 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5637 not_vec_cost++;
5641 free (bbs);
5642 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5644 if (density_pct > DENSITY_PCT_THRESHOLD
5645 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
5647 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
5648 if (dump_enabled_p ())
5649 dump_printf_loc (MSG_NOTE, vect_location,
5650 "density %d%%, cost %d exceeds threshold, penalizing "
5651 "loop body cost by %d%%", density_pct,
5652 vec_cost + not_vec_cost, DENSITY_PENALTY);
5656 /* Implement targetm.vectorize.init_cost. */
5658 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5659 instruction is needed by the vectorization. */
5660 static bool rs6000_vect_nonmem;
5662 static void *
5663 rs6000_init_cost (struct loop *loop_info)
5665 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
5666 data->loop_info = loop_info;
5667 data->cost[vect_prologue] = 0;
5668 data->cost[vect_body] = 0;
5669 data->cost[vect_epilogue] = 0;
5670 rs6000_vect_nonmem = false;
5671 return data;
5674 /* Implement targetm.vectorize.add_stmt_cost. */
5676 static unsigned
5677 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5678 struct _stmt_vec_info *stmt_info, int misalign,
5679 enum vect_cost_model_location where)
5681 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5682 unsigned retval = 0;
5684 if (flag_vect_cost_model)
5686 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5687 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5688 misalign);
5689 /* Statements in an inner loop relative to the loop being
5690 vectorized are weighted more heavily. The value here is
5691 arbitrary and could potentially be improved with analysis. */
5692 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5693 count *= 50; /* FIXME. */
5695 retval = (unsigned) (count * stmt_cost);
5696 cost_data->cost[where] += retval;
5698 /* Check whether we're doing something other than just a copy loop.
5699 Not all such loops may be profitably vectorized; see
5700 rs6000_finish_cost. */
5701 if ((kind == vec_to_scalar || kind == vec_perm
5702 || kind == vec_promote_demote || kind == vec_construct
5703 || kind == scalar_to_vec)
5704 || (where == vect_body && kind == vector_stmt))
5705 rs6000_vect_nonmem = true;
5708 return retval;
5711 /* Implement targetm.vectorize.finish_cost. */
5713 static void
5714 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5715 unsigned *body_cost, unsigned *epilogue_cost)
5717 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5719 if (cost_data->loop_info)
5720 rs6000_density_test (cost_data);
5722 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5723 that require versioning for any reason. The vectorization is at
5724 best a wash inside the loop, and the versioning checks make
5725 profitability highly unlikely and potentially quite harmful. */
5726 if (cost_data->loop_info)
5728 loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info);
5729 if (!rs6000_vect_nonmem
5730 && LOOP_VINFO_VECT_FACTOR (vec_info) == 2
5731 && LOOP_REQUIRES_VERSIONING (vec_info))
5732 cost_data->cost[vect_body] += 10000;
5735 *prologue_cost = cost_data->cost[vect_prologue];
5736 *body_cost = cost_data->cost[vect_body];
5737 *epilogue_cost = cost_data->cost[vect_epilogue];
5740 /* Implement targetm.vectorize.destroy_cost_data. */
5742 static void
5743 rs6000_destroy_cost_data (void *data)
5745 free (data);
5748 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5749 library with vectorized intrinsics. */
5751 static tree
5752 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5753 tree type_in)
5755 char name[32];
5756 const char *suffix = NULL;
5757 tree fntype, new_fndecl, bdecl = NULL_TREE;
5758 int n_args = 1;
5759 const char *bname;
5760 machine_mode el_mode, in_mode;
5761 int n, in_n;
5763 /* Libmass is suitable for unsafe math only as it does not correctly support
5764 parts of IEEE with the required precision such as denormals. Only support
5765 it if we have VSX to use the simd d2 or f4 functions.
5766 XXX: Add variable length support. */
5767 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5768 return NULL_TREE;
5770 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5771 n = TYPE_VECTOR_SUBPARTS (type_out);
5772 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5773 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5774 if (el_mode != in_mode
5775 || n != in_n)
5776 return NULL_TREE;
5778 switch (fn)
5780 CASE_CFN_ATAN2:
5781 CASE_CFN_HYPOT:
5782 CASE_CFN_POW:
5783 n_args = 2;
5784 gcc_fallthrough ();
5786 CASE_CFN_ACOS:
5787 CASE_CFN_ACOSH:
5788 CASE_CFN_ASIN:
5789 CASE_CFN_ASINH:
5790 CASE_CFN_ATAN:
5791 CASE_CFN_ATANH:
5792 CASE_CFN_CBRT:
5793 CASE_CFN_COS:
5794 CASE_CFN_COSH:
5795 CASE_CFN_ERF:
5796 CASE_CFN_ERFC:
5797 CASE_CFN_EXP2:
5798 CASE_CFN_EXP:
5799 CASE_CFN_EXPM1:
5800 CASE_CFN_LGAMMA:
5801 CASE_CFN_LOG10:
5802 CASE_CFN_LOG1P:
5803 CASE_CFN_LOG2:
5804 CASE_CFN_LOG:
5805 CASE_CFN_SIN:
5806 CASE_CFN_SINH:
5807 CASE_CFN_SQRT:
5808 CASE_CFN_TAN:
5809 CASE_CFN_TANH:
5810 if (el_mode == DFmode && n == 2)
5812 bdecl = mathfn_built_in (double_type_node, fn);
5813 suffix = "d2"; /* pow -> powd2 */
5815 else if (el_mode == SFmode && n == 4)
5817 bdecl = mathfn_built_in (float_type_node, fn);
5818 suffix = "4"; /* powf -> powf4 */
5820 else
5821 return NULL_TREE;
5822 if (!bdecl)
5823 return NULL_TREE;
5824 break;
5826 default:
5827 return NULL_TREE;
5830 gcc_assert (suffix != NULL);
5831 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5832 if (!bname)
5833 return NULL_TREE;
5835 strcpy (name, bname + sizeof ("__builtin_") - 1);
5836 strcat (name, suffix);
5838 if (n_args == 1)
5839 fntype = build_function_type_list (type_out, type_in, NULL);
5840 else if (n_args == 2)
5841 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5842 else
5843 gcc_unreachable ();
5845 /* Build a function declaration for the vectorized function. */
5846 new_fndecl = build_decl (BUILTINS_LOCATION,
5847 FUNCTION_DECL, get_identifier (name), fntype);
5848 TREE_PUBLIC (new_fndecl) = 1;
5849 DECL_EXTERNAL (new_fndecl) = 1;
5850 DECL_IS_NOVOPS (new_fndecl) = 1;
5851 TREE_READONLY (new_fndecl) = 1;
5853 return new_fndecl;
5856 /* Returns a function decl for a vectorized version of the builtin function
5857 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5858 if it is not available. */
5860 static tree
5861 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5862 tree type_in)
5864 machine_mode in_mode, out_mode;
5865 int in_n, out_n;
5867 if (TARGET_DEBUG_BUILTIN)
5868 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5869 combined_fn_name (combined_fn (fn)),
5870 GET_MODE_NAME (TYPE_MODE (type_out)),
5871 GET_MODE_NAME (TYPE_MODE (type_in)));
5873 if (TREE_CODE (type_out) != VECTOR_TYPE
5874 || TREE_CODE (type_in) != VECTOR_TYPE)
5875 return NULL_TREE;
5877 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5878 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5879 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5880 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5882 switch (fn)
5884 CASE_CFN_COPYSIGN:
5885 if (VECTOR_UNIT_VSX_P (V2DFmode)
5886 && out_mode == DFmode && out_n == 2
5887 && in_mode == DFmode && in_n == 2)
5888 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
5889 if (VECTOR_UNIT_VSX_P (V4SFmode)
5890 && out_mode == SFmode && out_n == 4
5891 && in_mode == SFmode && in_n == 4)
5892 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
5893 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5894 && out_mode == SFmode && out_n == 4
5895 && in_mode == SFmode && in_n == 4)
5896 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
5897 break;
5898 CASE_CFN_CEIL:
5899 if (VECTOR_UNIT_VSX_P (V2DFmode)
5900 && out_mode == DFmode && out_n == 2
5901 && in_mode == DFmode && in_n == 2)
5902 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
5903 if (VECTOR_UNIT_VSX_P (V4SFmode)
5904 && out_mode == SFmode && out_n == 4
5905 && in_mode == SFmode && in_n == 4)
5906 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
5907 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5908 && out_mode == SFmode && out_n == 4
5909 && in_mode == SFmode && in_n == 4)
5910 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
5911 break;
5912 CASE_CFN_FLOOR:
5913 if (VECTOR_UNIT_VSX_P (V2DFmode)
5914 && out_mode == DFmode && out_n == 2
5915 && in_mode == DFmode && in_n == 2)
5916 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
5917 if (VECTOR_UNIT_VSX_P (V4SFmode)
5918 && out_mode == SFmode && out_n == 4
5919 && in_mode == SFmode && in_n == 4)
5920 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
5921 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5922 && out_mode == SFmode && out_n == 4
5923 && in_mode == SFmode && in_n == 4)
5924 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
5925 break;
5926 CASE_CFN_FMA:
5927 if (VECTOR_UNIT_VSX_P (V2DFmode)
5928 && out_mode == DFmode && out_n == 2
5929 && in_mode == DFmode && in_n == 2)
5930 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5931 if (VECTOR_UNIT_VSX_P (V4SFmode)
5932 && out_mode == SFmode && out_n == 4
5933 && in_mode == SFmode && in_n == 4)
5934 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5935 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5936 && out_mode == SFmode && out_n == 4
5937 && in_mode == SFmode && in_n == 4)
5938 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5939 break;
5940 CASE_CFN_TRUNC:
5941 if (VECTOR_UNIT_VSX_P (V2DFmode)
5942 && out_mode == DFmode && out_n == 2
5943 && in_mode == DFmode && in_n == 2)
5944 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5945 if (VECTOR_UNIT_VSX_P (V4SFmode)
5946 && out_mode == SFmode && out_n == 4
5947 && in_mode == SFmode && in_n == 4)
5948 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5949 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5950 && out_mode == SFmode && out_n == 4
5951 && in_mode == SFmode && in_n == 4)
5952 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5953 break;
5954 CASE_CFN_NEARBYINT:
5955 if (VECTOR_UNIT_VSX_P (V2DFmode)
5956 && flag_unsafe_math_optimizations
5957 && out_mode == DFmode && out_n == 2
5958 && in_mode == DFmode && in_n == 2)
5959 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5960 if (VECTOR_UNIT_VSX_P (V4SFmode)
5961 && flag_unsafe_math_optimizations
5962 && out_mode == SFmode && out_n == 4
5963 && in_mode == SFmode && in_n == 4)
5964 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5965 break;
5966 CASE_CFN_RINT:
5967 if (VECTOR_UNIT_VSX_P (V2DFmode)
5968 && !flag_trapping_math
5969 && out_mode == DFmode && out_n == 2
5970 && in_mode == DFmode && in_n == 2)
5971 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5972 if (VECTOR_UNIT_VSX_P (V4SFmode)
5973 && !flag_trapping_math
5974 && out_mode == SFmode && out_n == 4
5975 && in_mode == SFmode && in_n == 4)
5976 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5977 break;
5978 default:
5979 break;
5982 /* Generate calls to libmass if appropriate. */
5983 if (rs6000_veclib_handler)
5984 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5986 return NULL_TREE;
5989 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
5991 static tree
5992 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
5993 tree type_in)
5995 machine_mode in_mode, out_mode;
5996 int in_n, out_n;
5998 if (TARGET_DEBUG_BUILTIN)
5999 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
6000 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
6001 GET_MODE_NAME (TYPE_MODE (type_out)),
6002 GET_MODE_NAME (TYPE_MODE (type_in)));
6004 if (TREE_CODE (type_out) != VECTOR_TYPE
6005 || TREE_CODE (type_in) != VECTOR_TYPE)
6006 return NULL_TREE;
6008 out_mode = TYPE_MODE (TREE_TYPE (type_out));
6009 out_n = TYPE_VECTOR_SUBPARTS (type_out);
6010 in_mode = TYPE_MODE (TREE_TYPE (type_in));
6011 in_n = TYPE_VECTOR_SUBPARTS (type_in);
6013 enum rs6000_builtins fn
6014 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
6015 switch (fn)
6017 case RS6000_BUILTIN_RSQRTF:
6018 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
6019 && out_mode == SFmode && out_n == 4
6020 && in_mode == SFmode && in_n == 4)
6021 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
6022 break;
6023 case RS6000_BUILTIN_RSQRT:
6024 if (VECTOR_UNIT_VSX_P (V2DFmode)
6025 && out_mode == DFmode && out_n == 2
6026 && in_mode == DFmode && in_n == 2)
6027 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
6028 break;
6029 case RS6000_BUILTIN_RECIPF:
6030 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
6031 && out_mode == SFmode && out_n == 4
6032 && in_mode == SFmode && in_n == 4)
6033 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
6034 break;
6035 case RS6000_BUILTIN_RECIP:
6036 if (VECTOR_UNIT_VSX_P (V2DFmode)
6037 && out_mode == DFmode && out_n == 2
6038 && in_mode == DFmode && in_n == 2)
6039 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
6040 break;
6041 default:
6042 break;
6044 return NULL_TREE;
6047 /* Default CPU string for rs6000*_file_start functions. */
6048 static const char *rs6000_default_cpu;
6050 /* Do anything needed at the start of the asm file. */
6052 static void
6053 rs6000_file_start (void)
6055 char buffer[80];
6056 const char *start = buffer;
6057 FILE *file = asm_out_file;
6059 rs6000_default_cpu = TARGET_CPU_DEFAULT;
6061 default_file_start ();
6063 if (flag_verbose_asm)
6065 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
6067 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
6069 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
6070 start = "";
6073 if (global_options_set.x_rs6000_cpu_index)
6075 fprintf (file, "%s -mcpu=%s", start,
6076 processor_target_table[rs6000_cpu_index].name);
6077 start = "";
6080 if (global_options_set.x_rs6000_tune_index)
6082 fprintf (file, "%s -mtune=%s", start,
6083 processor_target_table[rs6000_tune_index].name);
6084 start = "";
6087 if (PPC405_ERRATUM77)
6089 fprintf (file, "%s PPC405CR_ERRATUM77", start);
6090 start = "";
6093 #ifdef USING_ELFOS_H
6094 switch (rs6000_sdata)
6096 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
6097 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
6098 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
6099 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
6102 if (rs6000_sdata && g_switch_value)
6104 fprintf (file, "%s -G %d", start,
6105 g_switch_value);
6106 start = "";
6108 #endif
6110 if (*start == '\0')
6111 putc ('\n', file);
6114 #ifdef USING_ELFOS_H
6115 if (!(rs6000_default_cpu && rs6000_default_cpu[0])
6116 && !global_options_set.x_rs6000_cpu_index)
6118 fputs ("\t.machine ", asm_out_file);
6119 if ((rs6000_isa_flags & OPTION_MASK_MODULO) != 0)
6120 fputs ("power9\n", asm_out_file);
6121 else if ((rs6000_isa_flags & OPTION_MASK_DIRECT_MOVE) != 0)
6122 fputs ("power8\n", asm_out_file);
6123 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTD) != 0)
6124 fputs ("power7\n", asm_out_file);
6125 else if ((rs6000_isa_flags & OPTION_MASK_CMPB) != 0)
6126 fputs ("power6\n", asm_out_file);
6127 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTB) != 0)
6128 fputs ("power5\n", asm_out_file);
6129 else if ((rs6000_isa_flags & OPTION_MASK_MFCRF) != 0)
6130 fputs ("power4\n", asm_out_file);
6131 else if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) != 0)
6132 fputs ("ppc64\n", asm_out_file);
6133 else
6134 fputs ("ppc\n", asm_out_file);
6136 #endif
6138 if (DEFAULT_ABI == ABI_ELFv2)
6139 fprintf (file, "\t.abiversion 2\n");
6143 /* Return nonzero if this function is known to have a null epilogue. */
6146 direct_return (void)
6148 if (reload_completed)
6150 rs6000_stack_t *info = rs6000_stack_info ();
6152 if (info->first_gp_reg_save == 32
6153 && info->first_fp_reg_save == 64
6154 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
6155 && ! info->lr_save_p
6156 && ! info->cr_save_p
6157 && info->vrsave_size == 0
6158 && ! info->push_p)
6159 return 1;
6162 return 0;
6165 /* Return the number of instructions it takes to form a constant in an
6166 integer register. */
6169 num_insns_constant_wide (HOST_WIDE_INT value)
6171 /* signed constant loadable with addi */
6172 if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
6173 return 1;
6175 /* constant loadable with addis */
6176 else if ((value & 0xffff) == 0
6177 && (value >> 31 == -1 || value >> 31 == 0))
6178 return 1;
6180 else if (TARGET_POWERPC64)
6182 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
6183 HOST_WIDE_INT high = value >> 31;
6185 if (high == 0 || high == -1)
6186 return 2;
6188 high >>= 1;
6190 if (low == 0)
6191 return num_insns_constant_wide (high) + 1;
6192 else if (high == 0)
6193 return num_insns_constant_wide (low) + 1;
6194 else
6195 return (num_insns_constant_wide (high)
6196 + num_insns_constant_wide (low) + 1);
6199 else
6200 return 2;
6204 num_insns_constant (rtx op, machine_mode mode)
6206 HOST_WIDE_INT low, high;
6208 switch (GET_CODE (op))
6210 case CONST_INT:
6211 if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1
6212 && rs6000_is_valid_and_mask (op, mode))
6213 return 2;
6214 else
6215 return num_insns_constant_wide (INTVAL (op));
6217 case CONST_WIDE_INT:
6219 int i;
6220 int ins = CONST_WIDE_INT_NUNITS (op) - 1;
6221 for (i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
6222 ins += num_insns_constant_wide (CONST_WIDE_INT_ELT (op, i));
6223 return ins;
6226 case CONST_DOUBLE:
6227 if (mode == SFmode || mode == SDmode)
6229 long l;
6231 if (DECIMAL_FLOAT_MODE_P (mode))
6232 REAL_VALUE_TO_TARGET_DECIMAL32
6233 (*CONST_DOUBLE_REAL_VALUE (op), l);
6234 else
6235 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), l);
6236 return num_insns_constant_wide ((HOST_WIDE_INT) l);
6239 long l[2];
6240 if (DECIMAL_FLOAT_MODE_P (mode))
6241 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (op), l);
6242 else
6243 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (op), l);
6244 high = l[WORDS_BIG_ENDIAN == 0];
6245 low = l[WORDS_BIG_ENDIAN != 0];
6247 if (TARGET_32BIT)
6248 return (num_insns_constant_wide (low)
6249 + num_insns_constant_wide (high));
6250 else
6252 if ((high == 0 && low >= 0)
6253 || (high == -1 && low < 0))
6254 return num_insns_constant_wide (low);
6256 else if (rs6000_is_valid_and_mask (op, mode))
6257 return 2;
6259 else if (low == 0)
6260 return num_insns_constant_wide (high) + 1;
6262 else
6263 return (num_insns_constant_wide (high)
6264 + num_insns_constant_wide (low) + 1);
6267 default:
6268 gcc_unreachable ();
6272 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6273 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6274 corresponding element of the vector, but for V4SFmode and V2SFmode,
6275 the corresponding "float" is interpreted as an SImode integer. */
6277 HOST_WIDE_INT
6278 const_vector_elt_as_int (rtx op, unsigned int elt)
6280 rtx tmp;
6282 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6283 gcc_assert (GET_MODE (op) != V2DImode
6284 && GET_MODE (op) != V2DFmode);
6286 tmp = CONST_VECTOR_ELT (op, elt);
6287 if (GET_MODE (op) == V4SFmode
6288 || GET_MODE (op) == V2SFmode)
6289 tmp = gen_lowpart (SImode, tmp);
6290 return INTVAL (tmp);
6293 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6294 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6295 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6296 all items are set to the same value and contain COPIES replicas of the
6297 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6298 operand and the others are set to the value of the operand's msb. */
6300 static bool
6301 vspltis_constant (rtx op, unsigned step, unsigned copies)
6303 machine_mode mode = GET_MODE (op);
6304 machine_mode inner = GET_MODE_INNER (mode);
6306 unsigned i;
6307 unsigned nunits;
6308 unsigned bitsize;
6309 unsigned mask;
6311 HOST_WIDE_INT val;
6312 HOST_WIDE_INT splat_val;
6313 HOST_WIDE_INT msb_val;
6315 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
6316 return false;
6318 nunits = GET_MODE_NUNITS (mode);
6319 bitsize = GET_MODE_BITSIZE (inner);
6320 mask = GET_MODE_MASK (inner);
6322 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6323 splat_val = val;
6324 msb_val = val >= 0 ? 0 : -1;
6326 /* Construct the value to be splatted, if possible. If not, return 0. */
6327 for (i = 2; i <= copies; i *= 2)
6329 HOST_WIDE_INT small_val;
6330 bitsize /= 2;
6331 small_val = splat_val >> bitsize;
6332 mask >>= bitsize;
6333 if (splat_val != ((HOST_WIDE_INT)
6334 ((unsigned HOST_WIDE_INT) small_val << bitsize)
6335 | (small_val & mask)))
6336 return false;
6337 splat_val = small_val;
6340 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6341 if (EASY_VECTOR_15 (splat_val))
6344 /* Also check if we can splat, and then add the result to itself. Do so if
6345 the value is positive, of if the splat instruction is using OP's mode;
6346 for splat_val < 0, the splat and the add should use the same mode. */
6347 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
6348 && (splat_val >= 0 || (step == 1 && copies == 1)))
6351 /* Also check if are loading up the most significant bit which can be done by
6352 loading up -1 and shifting the value left by -1. */
6353 else if (EASY_VECTOR_MSB (splat_val, inner))
6356 else
6357 return false;
6359 /* Check if VAL is present in every STEP-th element, and the
6360 other elements are filled with its most significant bit. */
6361 for (i = 1; i < nunits; ++i)
6363 HOST_WIDE_INT desired_val;
6364 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6365 if ((i & (step - 1)) == 0)
6366 desired_val = val;
6367 else
6368 desired_val = msb_val;
6370 if (desired_val != const_vector_elt_as_int (op, elt))
6371 return false;
6374 return true;
6377 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6378 instruction, filling in the bottom elements with 0 or -1.
6380 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6381 for the number of zeroes to shift in, or negative for the number of 0xff
6382 bytes to shift in.
6384 OP is a CONST_VECTOR. */
6387 vspltis_shifted (rtx op)
6389 machine_mode mode = GET_MODE (op);
6390 machine_mode inner = GET_MODE_INNER (mode);
6392 unsigned i, j;
6393 unsigned nunits;
6394 unsigned mask;
6396 HOST_WIDE_INT val;
6398 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
6399 return false;
6401 /* We need to create pseudo registers to do the shift, so don't recognize
6402 shift vector constants after reload. */
6403 if (!can_create_pseudo_p ())
6404 return false;
6406 nunits = GET_MODE_NUNITS (mode);
6407 mask = GET_MODE_MASK (inner);
6409 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
6411 /* Check if the value can really be the operand of a vspltis[bhw]. */
6412 if (EASY_VECTOR_15 (val))
6415 /* Also check if we are loading up the most significant bit which can be done
6416 by loading up -1 and shifting the value left by -1. */
6417 else if (EASY_VECTOR_MSB (val, inner))
6420 else
6421 return 0;
6423 /* Check if VAL is present in every STEP-th element until we find elements
6424 that are 0 or all 1 bits. */
6425 for (i = 1; i < nunits; ++i)
6427 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6428 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6430 /* If the value isn't the splat value, check for the remaining elements
6431 being 0/-1. */
6432 if (val != elt_val)
6434 if (elt_val == 0)
6436 for (j = i+1; j < nunits; ++j)
6438 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6439 if (const_vector_elt_as_int (op, elt2) != 0)
6440 return 0;
6443 return (nunits - i) * GET_MODE_SIZE (inner);
6446 else if ((elt_val & mask) == mask)
6448 for (j = i+1; j < nunits; ++j)
6450 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6451 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6452 return 0;
6455 return -((nunits - i) * GET_MODE_SIZE (inner));
6458 else
6459 return 0;
6463 /* If all elements are equal, we don't need to do VLSDOI. */
6464 return 0;
6468 /* Return true if OP is of the given MODE and can be synthesized
6469 with a vspltisb, vspltish or vspltisw. */
6471 bool
6472 easy_altivec_constant (rtx op, machine_mode mode)
6474 unsigned step, copies;
6476 if (mode == VOIDmode)
6477 mode = GET_MODE (op);
6478 else if (mode != GET_MODE (op))
6479 return false;
6481 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6482 constants. */
6483 if (mode == V2DFmode)
6484 return zero_constant (op, mode);
6486 else if (mode == V2DImode)
6488 if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
6489 || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT)
6490 return false;
6492 if (zero_constant (op, mode))
6493 return true;
6495 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6496 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6497 return true;
6499 return false;
6502 /* V1TImode is a special container for TImode. Ignore for now. */
6503 else if (mode == V1TImode)
6504 return false;
6506 /* Start with a vspltisw. */
6507 step = GET_MODE_NUNITS (mode) / 4;
6508 copies = 1;
6510 if (vspltis_constant (op, step, copies))
6511 return true;
6513 /* Then try with a vspltish. */
6514 if (step == 1)
6515 copies <<= 1;
6516 else
6517 step >>= 1;
6519 if (vspltis_constant (op, step, copies))
6520 return true;
6522 /* And finally a vspltisb. */
6523 if (step == 1)
6524 copies <<= 1;
6525 else
6526 step >>= 1;
6528 if (vspltis_constant (op, step, copies))
6529 return true;
6531 if (vspltis_shifted (op) != 0)
6532 return true;
6534 return false;
6537 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6538 result is OP. Abort if it is not possible. */
6541 gen_easy_altivec_constant (rtx op)
6543 machine_mode mode = GET_MODE (op);
6544 int nunits = GET_MODE_NUNITS (mode);
6545 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6546 unsigned step = nunits / 4;
6547 unsigned copies = 1;
6549 /* Start with a vspltisw. */
6550 if (vspltis_constant (op, step, copies))
6551 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6553 /* Then try with a vspltish. */
6554 if (step == 1)
6555 copies <<= 1;
6556 else
6557 step >>= 1;
6559 if (vspltis_constant (op, step, copies))
6560 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6562 /* And finally a vspltisb. */
6563 if (step == 1)
6564 copies <<= 1;
6565 else
6566 step >>= 1;
6568 if (vspltis_constant (op, step, copies))
6569 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6571 gcc_unreachable ();
6574 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6575 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6577 Return the number of instructions needed (1 or 2) into the address pointed
6578 via NUM_INSNS_PTR.
6580 Return the constant that is being split via CONSTANT_PTR. */
6582 bool
6583 xxspltib_constant_p (rtx op,
6584 machine_mode mode,
6585 int *num_insns_ptr,
6586 int *constant_ptr)
6588 size_t nunits = GET_MODE_NUNITS (mode);
6589 size_t i;
6590 HOST_WIDE_INT value;
6591 rtx element;
6593 /* Set the returned values to out of bound values. */
6594 *num_insns_ptr = -1;
6595 *constant_ptr = 256;
6597 if (!TARGET_P9_VECTOR)
6598 return false;
6600 if (mode == VOIDmode)
6601 mode = GET_MODE (op);
6603 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6604 return false;
6606 /* Handle (vec_duplicate <constant>). */
6607 if (GET_CODE (op) == VEC_DUPLICATE)
6609 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6610 && mode != V2DImode)
6611 return false;
6613 element = XEXP (op, 0);
6614 if (!CONST_INT_P (element))
6615 return false;
6617 value = INTVAL (element);
6618 if (!IN_RANGE (value, -128, 127))
6619 return false;
6622 /* Handle (const_vector [...]). */
6623 else if (GET_CODE (op) == CONST_VECTOR)
6625 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6626 && mode != V2DImode)
6627 return false;
6629 element = CONST_VECTOR_ELT (op, 0);
6630 if (!CONST_INT_P (element))
6631 return false;
6633 value = INTVAL (element);
6634 if (!IN_RANGE (value, -128, 127))
6635 return false;
6637 for (i = 1; i < nunits; i++)
6639 element = CONST_VECTOR_ELT (op, i);
6640 if (!CONST_INT_P (element))
6641 return false;
6643 if (value != INTVAL (element))
6644 return false;
6648 /* Handle integer constants being loaded into the upper part of the VSX
6649 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6650 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6651 else if (CONST_INT_P (op))
6653 if (!SCALAR_INT_MODE_P (mode))
6654 return false;
6656 value = INTVAL (op);
6657 if (!IN_RANGE (value, -128, 127))
6658 return false;
6660 if (!IN_RANGE (value, -1, 0))
6662 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6663 return false;
6665 if (EASY_VECTOR_15 (value))
6666 return false;
6670 else
6671 return false;
6673 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6674 sign extend. Special case 0/-1 to allow getting any VSX register instead
6675 of an Altivec register. */
6676 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6677 && EASY_VECTOR_15 (value))
6678 return false;
6680 /* Return # of instructions and the constant byte for XXSPLTIB. */
6681 if (mode == V16QImode)
6682 *num_insns_ptr = 1;
6684 else if (IN_RANGE (value, -1, 0))
6685 *num_insns_ptr = 1;
6687 else
6688 *num_insns_ptr = 2;
6690 *constant_ptr = (int) value;
6691 return true;
6694 const char *
6695 output_vec_const_move (rtx *operands)
6697 int shift;
6698 machine_mode mode;
6699 rtx dest, vec;
6701 dest = operands[0];
6702 vec = operands[1];
6703 mode = GET_MODE (dest);
6705 if (TARGET_VSX)
6707 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6708 int xxspltib_value = 256;
6709 int num_insns = -1;
6711 if (zero_constant (vec, mode))
6713 if (TARGET_P9_VECTOR)
6714 return "xxspltib %x0,0";
6716 else if (dest_vmx_p)
6717 return "vspltisw %0,0";
6719 else
6720 return "xxlxor %x0,%x0,%x0";
6723 if (all_ones_constant (vec, mode))
6725 if (TARGET_P9_VECTOR)
6726 return "xxspltib %x0,255";
6728 else if (dest_vmx_p)
6729 return "vspltisw %0,-1";
6731 else if (TARGET_P8_VECTOR)
6732 return "xxlorc %x0,%x0,%x0";
6734 else
6735 gcc_unreachable ();
6738 if (TARGET_P9_VECTOR
6739 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6741 if (num_insns == 1)
6743 operands[2] = GEN_INT (xxspltib_value & 0xff);
6744 return "xxspltib %x0,%2";
6747 return "#";
6751 if (TARGET_ALTIVEC)
6753 rtx splat_vec;
6755 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6756 if (zero_constant (vec, mode))
6757 return "vspltisw %0,0";
6759 if (all_ones_constant (vec, mode))
6760 return "vspltisw %0,-1";
6762 /* Do we need to construct a value using VSLDOI? */
6763 shift = vspltis_shifted (vec);
6764 if (shift != 0)
6765 return "#";
6767 splat_vec = gen_easy_altivec_constant (vec);
6768 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6769 operands[1] = XEXP (splat_vec, 0);
6770 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6771 return "#";
6773 switch (GET_MODE (splat_vec))
6775 case E_V4SImode:
6776 return "vspltisw %0,%1";
6778 case E_V8HImode:
6779 return "vspltish %0,%1";
6781 case E_V16QImode:
6782 return "vspltisb %0,%1";
6784 default:
6785 gcc_unreachable ();
6789 gcc_unreachable ();
6792 /* Initialize TARGET of vector PAIRED to VALS. */
6794 void
6795 paired_expand_vector_init (rtx target, rtx vals)
6797 machine_mode mode = GET_MODE (target);
6798 int n_elts = GET_MODE_NUNITS (mode);
6799 int n_var = 0;
6800 rtx x, new_rtx, tmp, constant_op, op1, op2;
6801 int i;
6803 for (i = 0; i < n_elts; ++i)
6805 x = XVECEXP (vals, 0, i);
6806 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6807 ++n_var;
6809 if (n_var == 0)
6811 /* Load from constant pool. */
6812 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6813 return;
6816 if (n_var == 2)
6818 /* The vector is initialized only with non-constants. */
6819 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, XVECEXP (vals, 0, 0),
6820 XVECEXP (vals, 0, 1));
6822 emit_move_insn (target, new_rtx);
6823 return;
6826 /* One field is non-constant and the other one is a constant. Load the
6827 constant from the constant pool and use ps_merge instruction to
6828 construct the whole vector. */
6829 op1 = XVECEXP (vals, 0, 0);
6830 op2 = XVECEXP (vals, 0, 1);
6832 constant_op = (CONSTANT_P (op1)) ? op1 : op2;
6834 tmp = gen_reg_rtx (GET_MODE (constant_op));
6835 emit_move_insn (tmp, constant_op);
6837 if (CONSTANT_P (op1))
6838 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, tmp, op2);
6839 else
6840 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, op1, tmp);
6842 emit_move_insn (target, new_rtx);
6845 void
6846 paired_expand_vector_move (rtx operands[])
6848 rtx op0 = operands[0], op1 = operands[1];
6850 emit_move_insn (op0, op1);
6853 /* Emit vector compare for code RCODE. DEST is destination, OP1 and
6854 OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two
6855 operands for the relation operation COND. This is a recursive
6856 function. */
6858 static void
6859 paired_emit_vector_compare (enum rtx_code rcode,
6860 rtx dest, rtx op0, rtx op1,
6861 rtx cc_op0, rtx cc_op1)
6863 rtx tmp = gen_reg_rtx (V2SFmode);
6864 rtx tmp1, max, min;
6866 gcc_assert (TARGET_PAIRED_FLOAT);
6867 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6869 switch (rcode)
6871 case LT:
6872 case LTU:
6873 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
6874 return;
6875 case GE:
6876 case GEU:
6877 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
6878 emit_insn (gen_selv2sf4 (dest, tmp, op0, op1, CONST0_RTX (SFmode)));
6879 return;
6880 case LE:
6881 case LEU:
6882 paired_emit_vector_compare (GE, dest, op0, op1, cc_op1, cc_op0);
6883 return;
6884 case GT:
6885 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
6886 return;
6887 case EQ:
6888 tmp1 = gen_reg_rtx (V2SFmode);
6889 max = gen_reg_rtx (V2SFmode);
6890 min = gen_reg_rtx (V2SFmode);
6891 gen_reg_rtx (V2SFmode);
6893 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
6894 emit_insn (gen_selv2sf4
6895 (max, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
6896 emit_insn (gen_subv2sf3 (tmp, cc_op1, cc_op0));
6897 emit_insn (gen_selv2sf4
6898 (min, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
6899 emit_insn (gen_subv2sf3 (tmp1, min, max));
6900 emit_insn (gen_selv2sf4 (dest, tmp1, op0, op1, CONST0_RTX (SFmode)));
6901 return;
6902 case NE:
6903 paired_emit_vector_compare (EQ, dest, op1, op0, cc_op0, cc_op1);
6904 return;
6905 case UNLE:
6906 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
6907 return;
6908 case UNLT:
6909 paired_emit_vector_compare (LT, dest, op1, op0, cc_op0, cc_op1);
6910 return;
6911 case UNGE:
6912 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
6913 return;
6914 case UNGT:
6915 paired_emit_vector_compare (GT, dest, op1, op0, cc_op0, cc_op1);
6916 return;
6917 default:
6918 gcc_unreachable ();
6921 return;
6924 /* Emit vector conditional expression.
6925 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6926 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6929 paired_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6930 rtx cond, rtx cc_op0, rtx cc_op1)
6932 enum rtx_code rcode = GET_CODE (cond);
6934 if (!TARGET_PAIRED_FLOAT)
6935 return 0;
6937 paired_emit_vector_compare (rcode, dest, op1, op2, cc_op0, cc_op1);
6939 return 1;
6942 /* Initialize vector TARGET to VALS. */
6944 void
6945 rs6000_expand_vector_init (rtx target, rtx vals)
6947 machine_mode mode = GET_MODE (target);
6948 machine_mode inner_mode = GET_MODE_INNER (mode);
6949 int n_elts = GET_MODE_NUNITS (mode);
6950 int n_var = 0, one_var = -1;
6951 bool all_same = true, all_const_zero = true;
6952 rtx x, mem;
6953 int i;
6955 for (i = 0; i < n_elts; ++i)
6957 x = XVECEXP (vals, 0, i);
6958 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6959 ++n_var, one_var = i;
6960 else if (x != CONST0_RTX (inner_mode))
6961 all_const_zero = false;
6963 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6964 all_same = false;
6967 if (n_var == 0)
6969 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6970 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6971 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6973 /* Zero register. */
6974 emit_move_insn (target, CONST0_RTX (mode));
6975 return;
6977 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6979 /* Splat immediate. */
6980 emit_insn (gen_rtx_SET (target, const_vec));
6981 return;
6983 else
6985 /* Load from constant pool. */
6986 emit_move_insn (target, const_vec);
6987 return;
6991 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6992 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6994 rtx op[2];
6995 size_t i;
6996 size_t num_elements = all_same ? 1 : 2;
6997 for (i = 0; i < num_elements; i++)
6999 op[i] = XVECEXP (vals, 0, i);
7000 /* Just in case there is a SUBREG with a smaller mode, do a
7001 conversion. */
7002 if (GET_MODE (op[i]) != inner_mode)
7004 rtx tmp = gen_reg_rtx (inner_mode);
7005 convert_move (tmp, op[i], 0);
7006 op[i] = tmp;
7008 /* Allow load with splat double word. */
7009 else if (MEM_P (op[i]))
7011 if (!all_same)
7012 op[i] = force_reg (inner_mode, op[i]);
7014 else if (!REG_P (op[i]))
7015 op[i] = force_reg (inner_mode, op[i]);
7018 if (all_same)
7020 if (mode == V2DFmode)
7021 emit_insn (gen_vsx_splat_v2df (target, op[0]));
7022 else
7023 emit_insn (gen_vsx_splat_v2di (target, op[0]));
7025 else
7027 if (mode == V2DFmode)
7028 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
7029 else
7030 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
7032 return;
7035 /* Special case initializing vector int if we are on 64-bit systems with
7036 direct move or we have the ISA 3.0 instructions. */
7037 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
7038 && TARGET_DIRECT_MOVE_64BIT)
7040 if (all_same)
7042 rtx element0 = XVECEXP (vals, 0, 0);
7043 if (MEM_P (element0))
7044 element0 = rs6000_address_for_fpconvert (element0);
7045 else
7046 element0 = force_reg (SImode, element0);
7048 if (TARGET_P9_VECTOR)
7049 emit_insn (gen_vsx_splat_v4si (target, element0));
7050 else
7052 rtx tmp = gen_reg_rtx (DImode);
7053 emit_insn (gen_zero_extendsidi2 (tmp, element0));
7054 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
7056 return;
7058 else
7060 rtx elements[4];
7061 size_t i;
7063 for (i = 0; i < 4; i++)
7065 elements[i] = XVECEXP (vals, 0, i);
7066 if (!CONST_INT_P (elements[i]) && !REG_P (elements[i]))
7067 elements[i] = copy_to_mode_reg (SImode, elements[i]);
7070 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
7071 elements[2], elements[3]));
7072 return;
7076 /* With single precision floating point on VSX, know that internally single
7077 precision is actually represented as a double, and either make 2 V2DF
7078 vectors, and convert these vectors to single precision, or do one
7079 conversion, and splat the result to the other elements. */
7080 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
7082 if (all_same)
7084 rtx element0 = XVECEXP (vals, 0, 0);
7086 if (TARGET_P9_VECTOR)
7088 if (MEM_P (element0))
7089 element0 = rs6000_address_for_fpconvert (element0);
7091 emit_insn (gen_vsx_splat_v4sf (target, element0));
7094 else
7096 rtx freg = gen_reg_rtx (V4SFmode);
7097 rtx sreg = force_reg (SFmode, element0);
7098 rtx cvt = (TARGET_XSCVDPSPN
7099 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
7100 : gen_vsx_xscvdpsp_scalar (freg, sreg));
7102 emit_insn (cvt);
7103 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
7104 const0_rtx));
7107 else
7109 rtx dbl_even = gen_reg_rtx (V2DFmode);
7110 rtx dbl_odd = gen_reg_rtx (V2DFmode);
7111 rtx flt_even = gen_reg_rtx (V4SFmode);
7112 rtx flt_odd = gen_reg_rtx (V4SFmode);
7113 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
7114 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
7115 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
7116 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
7118 /* Use VMRGEW if we can instead of doing a permute. */
7119 if (TARGET_P8_VECTOR)
7121 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op2));
7122 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op1, op3));
7123 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
7124 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
7125 if (BYTES_BIG_ENDIAN)
7126 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_even, flt_odd));
7127 else
7128 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_odd, flt_even));
7130 else
7132 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
7133 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
7134 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
7135 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
7136 rs6000_expand_extract_even (target, flt_even, flt_odd);
7139 return;
7142 /* Special case initializing vector short/char that are splats if we are on
7143 64-bit systems with direct move. */
7144 if (all_same && TARGET_DIRECT_MOVE_64BIT
7145 && (mode == V16QImode || mode == V8HImode))
7147 rtx op0 = XVECEXP (vals, 0, 0);
7148 rtx di_tmp = gen_reg_rtx (DImode);
7150 if (!REG_P (op0))
7151 op0 = force_reg (GET_MODE_INNER (mode), op0);
7153 if (mode == V16QImode)
7155 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
7156 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
7157 return;
7160 if (mode == V8HImode)
7162 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
7163 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
7164 return;
7168 /* Store value to stack temp. Load vector element. Splat. However, splat
7169 of 64-bit items is not supported on Altivec. */
7170 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
7172 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7173 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
7174 XVECEXP (vals, 0, 0));
7175 x = gen_rtx_UNSPEC (VOIDmode,
7176 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7177 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7178 gen_rtvec (2,
7179 gen_rtx_SET (target, mem),
7180 x)));
7181 x = gen_rtx_VEC_SELECT (inner_mode, target,
7182 gen_rtx_PARALLEL (VOIDmode,
7183 gen_rtvec (1, const0_rtx)));
7184 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
7185 return;
7188 /* One field is non-constant. Load constant then overwrite
7189 varying field. */
7190 if (n_var == 1)
7192 rtx copy = copy_rtx (vals);
7194 /* Load constant part of vector, substitute neighboring value for
7195 varying element. */
7196 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
7197 rs6000_expand_vector_init (target, copy);
7199 /* Insert variable. */
7200 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
7201 return;
7204 /* Construct the vector in memory one field at a time
7205 and load the whole vector. */
7206 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7207 for (i = 0; i < n_elts; i++)
7208 emit_move_insn (adjust_address_nv (mem, inner_mode,
7209 i * GET_MODE_SIZE (inner_mode)),
7210 XVECEXP (vals, 0, i));
7211 emit_move_insn (target, mem);
7214 /* Set field ELT of TARGET to VAL. */
7216 void
7217 rs6000_expand_vector_set (rtx target, rtx val, int elt)
7219 machine_mode mode = GET_MODE (target);
7220 machine_mode inner_mode = GET_MODE_INNER (mode);
7221 rtx reg = gen_reg_rtx (mode);
7222 rtx mask, mem, x;
7223 int width = GET_MODE_SIZE (inner_mode);
7224 int i;
7226 val = force_reg (GET_MODE (val), val);
7228 if (VECTOR_MEM_VSX_P (mode))
7230 rtx insn = NULL_RTX;
7231 rtx elt_rtx = GEN_INT (elt);
7233 if (mode == V2DFmode)
7234 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
7236 else if (mode == V2DImode)
7237 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
7239 else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
7241 if (mode == V4SImode)
7242 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
7243 else if (mode == V8HImode)
7244 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
7245 else if (mode == V16QImode)
7246 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
7247 else if (mode == V4SFmode)
7248 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
7251 if (insn)
7253 emit_insn (insn);
7254 return;
7258 /* Simplify setting single element vectors like V1TImode. */
7259 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
7261 emit_move_insn (target, gen_lowpart (mode, val));
7262 return;
7265 /* Load single variable value. */
7266 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7267 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
7268 x = gen_rtx_UNSPEC (VOIDmode,
7269 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7270 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7271 gen_rtvec (2,
7272 gen_rtx_SET (reg, mem),
7273 x)));
7275 /* Linear sequence. */
7276 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
7277 for (i = 0; i < 16; ++i)
7278 XVECEXP (mask, 0, i) = GEN_INT (i);
7280 /* Set permute mask to insert element into target. */
7281 for (i = 0; i < width; ++i)
7282 XVECEXP (mask, 0, elt*width + i)
7283 = GEN_INT (i + 0x10);
7284 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
7286 if (BYTES_BIG_ENDIAN)
7287 x = gen_rtx_UNSPEC (mode,
7288 gen_rtvec (3, target, reg,
7289 force_reg (V16QImode, x)),
7290 UNSPEC_VPERM);
7291 else
7293 if (TARGET_P9_VECTOR)
7294 x = gen_rtx_UNSPEC (mode,
7295 gen_rtvec (3, reg, target,
7296 force_reg (V16QImode, x)),
7297 UNSPEC_VPERMR);
7298 else
7300 /* Invert selector. We prefer to generate VNAND on P8 so
7301 that future fusion opportunities can kick in, but must
7302 generate VNOR elsewhere. */
7303 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
7304 rtx iorx = (TARGET_P8_VECTOR
7305 ? gen_rtx_IOR (V16QImode, notx, notx)
7306 : gen_rtx_AND (V16QImode, notx, notx));
7307 rtx tmp = gen_reg_rtx (V16QImode);
7308 emit_insn (gen_rtx_SET (tmp, iorx));
7310 /* Permute with operands reversed and adjusted selector. */
7311 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
7312 UNSPEC_VPERM);
7316 emit_insn (gen_rtx_SET (target, x));
7319 /* Extract field ELT from VEC into TARGET. */
7321 void
7322 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
7324 machine_mode mode = GET_MODE (vec);
7325 machine_mode inner_mode = GET_MODE_INNER (mode);
7326 rtx mem;
7328 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
7330 switch (mode)
7332 default:
7333 break;
7334 case E_V1TImode:
7335 gcc_assert (INTVAL (elt) == 0 && inner_mode == TImode);
7336 emit_move_insn (target, gen_lowpart (TImode, vec));
7337 break;
7338 case E_V2DFmode:
7339 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
7340 return;
7341 case E_V2DImode:
7342 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
7343 return;
7344 case E_V4SFmode:
7345 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
7346 return;
7347 case E_V16QImode:
7348 if (TARGET_DIRECT_MOVE_64BIT)
7350 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
7351 return;
7353 else
7354 break;
7355 case E_V8HImode:
7356 if (TARGET_DIRECT_MOVE_64BIT)
7358 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
7359 return;
7361 else
7362 break;
7363 case E_V4SImode:
7364 if (TARGET_DIRECT_MOVE_64BIT)
7366 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
7367 return;
7369 break;
7372 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
7373 && TARGET_DIRECT_MOVE_64BIT)
7375 if (GET_MODE (elt) != DImode)
7377 rtx tmp = gen_reg_rtx (DImode);
7378 convert_move (tmp, elt, 0);
7379 elt = tmp;
7381 else if (!REG_P (elt))
7382 elt = force_reg (DImode, elt);
7384 switch (mode)
7386 case E_V2DFmode:
7387 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
7388 return;
7390 case E_V2DImode:
7391 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
7392 return;
7394 case E_V4SFmode:
7395 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
7396 return;
7398 case E_V4SImode:
7399 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
7400 return;
7402 case E_V8HImode:
7403 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
7404 return;
7406 case E_V16QImode:
7407 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
7408 return;
7410 default:
7411 gcc_unreachable ();
7415 gcc_assert (CONST_INT_P (elt));
7417 /* Allocate mode-sized buffer. */
7418 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7420 emit_move_insn (mem, vec);
7422 /* Add offset to field within buffer matching vector element. */
7423 mem = adjust_address_nv (mem, inner_mode,
7424 INTVAL (elt) * GET_MODE_SIZE (inner_mode));
7426 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
7429 /* Helper function to return the register number of a RTX. */
7430 static inline int
7431 regno_or_subregno (rtx op)
7433 if (REG_P (op))
7434 return REGNO (op);
7435 else if (SUBREG_P (op))
7436 return subreg_regno (op);
7437 else
7438 gcc_unreachable ();
7441 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7442 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7443 temporary (BASE_TMP) to fixup the address. Return the new memory address
7444 that is valid for reads or writes to a given register (SCALAR_REG). */
7447 rs6000_adjust_vec_address (rtx scalar_reg,
7448 rtx mem,
7449 rtx element,
7450 rtx base_tmp,
7451 machine_mode scalar_mode)
7453 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7454 rtx addr = XEXP (mem, 0);
7455 rtx element_offset;
7456 rtx new_addr;
7457 bool valid_addr_p;
7459 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7460 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
7462 /* Calculate what we need to add to the address to get the element
7463 address. */
7464 if (CONST_INT_P (element))
7465 element_offset = GEN_INT (INTVAL (element) * scalar_size);
7466 else
7468 int byte_shift = exact_log2 (scalar_size);
7469 gcc_assert (byte_shift >= 0);
7471 if (byte_shift == 0)
7472 element_offset = element;
7474 else
7476 if (TARGET_POWERPC64)
7477 emit_insn (gen_ashldi3 (base_tmp, element, GEN_INT (byte_shift)));
7478 else
7479 emit_insn (gen_ashlsi3 (base_tmp, element, GEN_INT (byte_shift)));
7481 element_offset = base_tmp;
7485 /* Create the new address pointing to the element within the vector. If we
7486 are adding 0, we don't have to change the address. */
7487 if (element_offset == const0_rtx)
7488 new_addr = addr;
7490 /* A simple indirect address can be converted into a reg + offset
7491 address. */
7492 else if (REG_P (addr) || SUBREG_P (addr))
7493 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
7495 /* Optimize D-FORM addresses with constant offset with a constant element, to
7496 include the element offset in the address directly. */
7497 else if (GET_CODE (addr) == PLUS)
7499 rtx op0 = XEXP (addr, 0);
7500 rtx op1 = XEXP (addr, 1);
7501 rtx insn;
7503 gcc_assert (REG_P (op0) || SUBREG_P (op0));
7504 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
7506 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
7507 rtx offset_rtx = GEN_INT (offset);
7509 if (IN_RANGE (offset, -32768, 32767)
7510 && (scalar_size < 8 || (offset & 0x3) == 0))
7511 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
7512 else
7514 emit_move_insn (base_tmp, offset_rtx);
7515 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7518 else
7520 bool op1_reg_p = (REG_P (op1) || SUBREG_P (op1));
7521 bool ele_reg_p = (REG_P (element_offset) || SUBREG_P (element_offset));
7523 /* Note, ADDI requires the register being added to be a base
7524 register. If the register was R0, load it up into the temporary
7525 and do the add. */
7526 if (op1_reg_p
7527 && (ele_reg_p || reg_or_subregno (op1) != FIRST_GPR_REGNO))
7529 insn = gen_add3_insn (base_tmp, op1, element_offset);
7530 gcc_assert (insn != NULL_RTX);
7531 emit_insn (insn);
7534 else if (ele_reg_p
7535 && reg_or_subregno (element_offset) != FIRST_GPR_REGNO)
7537 insn = gen_add3_insn (base_tmp, element_offset, op1);
7538 gcc_assert (insn != NULL_RTX);
7539 emit_insn (insn);
7542 else
7544 emit_move_insn (base_tmp, op1);
7545 emit_insn (gen_add2_insn (base_tmp, element_offset));
7548 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7552 else
7554 emit_move_insn (base_tmp, addr);
7555 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7558 /* If we have a PLUS, we need to see whether the particular register class
7559 allows for D-FORM or X-FORM addressing. */
7560 if (GET_CODE (new_addr) == PLUS)
7562 rtx op1 = XEXP (new_addr, 1);
7563 addr_mask_type addr_mask;
7564 int scalar_regno = regno_or_subregno (scalar_reg);
7566 gcc_assert (scalar_regno < FIRST_PSEUDO_REGISTER);
7567 if (INT_REGNO_P (scalar_regno))
7568 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_GPR];
7570 else if (FP_REGNO_P (scalar_regno))
7571 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_FPR];
7573 else if (ALTIVEC_REGNO_P (scalar_regno))
7574 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_VMX];
7576 else
7577 gcc_unreachable ();
7579 if (REG_P (op1) || SUBREG_P (op1))
7580 valid_addr_p = (addr_mask & RELOAD_REG_INDEXED) != 0;
7581 else
7582 valid_addr_p = (addr_mask & RELOAD_REG_OFFSET) != 0;
7585 else if (REG_P (new_addr) || SUBREG_P (new_addr))
7586 valid_addr_p = true;
7588 else
7589 valid_addr_p = false;
7591 if (!valid_addr_p)
7593 emit_move_insn (base_tmp, new_addr);
7594 new_addr = base_tmp;
7597 return change_address (mem, scalar_mode, new_addr);
7600 /* Split a variable vec_extract operation into the component instructions. */
7602 void
7603 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
7604 rtx tmp_altivec)
7606 machine_mode mode = GET_MODE (src);
7607 machine_mode scalar_mode = GET_MODE (dest);
7608 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7609 int byte_shift = exact_log2 (scalar_size);
7611 gcc_assert (byte_shift >= 0);
7613 /* If we are given a memory address, optimize to load just the element. We
7614 don't have to adjust the vector element number on little endian
7615 systems. */
7616 if (MEM_P (src))
7618 gcc_assert (REG_P (tmp_gpr));
7619 emit_move_insn (dest, rs6000_adjust_vec_address (dest, src, element,
7620 tmp_gpr, scalar_mode));
7621 return;
7624 else if (REG_P (src) || SUBREG_P (src))
7626 int bit_shift = byte_shift + 3;
7627 rtx element2;
7628 int dest_regno = regno_or_subregno (dest);
7629 int src_regno = regno_or_subregno (src);
7630 int element_regno = regno_or_subregno (element);
7632 gcc_assert (REG_P (tmp_gpr));
7634 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7635 a general purpose register. */
7636 if (TARGET_P9_VECTOR
7637 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
7638 && INT_REGNO_P (dest_regno)
7639 && ALTIVEC_REGNO_P (src_regno)
7640 && INT_REGNO_P (element_regno))
7642 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
7643 rtx element_si = gen_rtx_REG (SImode, element_regno);
7645 if (mode == V16QImode)
7646 emit_insn (VECTOR_ELT_ORDER_BIG
7647 ? gen_vextublx (dest_si, element_si, src)
7648 : gen_vextubrx (dest_si, element_si, src));
7650 else if (mode == V8HImode)
7652 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7653 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
7654 emit_insn (VECTOR_ELT_ORDER_BIG
7655 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
7656 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
7660 else
7662 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7663 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
7664 emit_insn (VECTOR_ELT_ORDER_BIG
7665 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
7666 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
7669 return;
7673 gcc_assert (REG_P (tmp_altivec));
7675 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7676 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7677 will shift the element into the upper position (adding 3 to convert a
7678 byte shift into a bit shift). */
7679 if (scalar_size == 8)
7681 if (!VECTOR_ELT_ORDER_BIG)
7683 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7684 element2 = tmp_gpr;
7686 else
7687 element2 = element;
7689 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7690 bit. */
7691 emit_insn (gen_rtx_SET (tmp_gpr,
7692 gen_rtx_AND (DImode,
7693 gen_rtx_ASHIFT (DImode,
7694 element2,
7695 GEN_INT (6)),
7696 GEN_INT (64))));
7698 else
7700 if (!VECTOR_ELT_ORDER_BIG)
7702 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
7704 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
7705 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
7706 element2 = tmp_gpr;
7708 else
7709 element2 = element;
7711 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7714 /* Get the value into the lower byte of the Altivec register where VSLO
7715 expects it. */
7716 if (TARGET_P9_VECTOR)
7717 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7718 else if (can_create_pseudo_p ())
7719 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7720 else
7722 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7723 emit_move_insn (tmp_di, tmp_gpr);
7724 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
7727 /* Do the VSLO to get the value into the final location. */
7728 switch (mode)
7730 case E_V2DFmode:
7731 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
7732 return;
7734 case E_V2DImode:
7735 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
7736 return;
7738 case E_V4SFmode:
7740 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7741 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
7742 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7743 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7744 tmp_altivec));
7746 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
7747 return;
7750 case E_V4SImode:
7751 case E_V8HImode:
7752 case E_V16QImode:
7754 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7755 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7756 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7757 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7758 tmp_altivec));
7759 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7760 emit_insn (gen_ashrdi3 (tmp_gpr_di, tmp_gpr_di,
7761 GEN_INT (64 - (8 * scalar_size))));
7762 return;
7765 default:
7766 gcc_unreachable ();
7769 return;
7771 else
7772 gcc_unreachable ();
7775 /* Helper function for rs6000_split_v4si_init to build up a DImode value from
7776 two SImode values. */
7778 static void
7779 rs6000_split_v4si_init_di_reg (rtx dest, rtx si1, rtx si2, rtx tmp)
7781 const unsigned HOST_WIDE_INT mask_32bit = HOST_WIDE_INT_C (0xffffffff);
7783 if (CONST_INT_P (si1) && CONST_INT_P (si2))
7785 unsigned HOST_WIDE_INT const1 = (UINTVAL (si1) & mask_32bit) << 32;
7786 unsigned HOST_WIDE_INT const2 = UINTVAL (si2) & mask_32bit;
7788 emit_move_insn (dest, GEN_INT (const1 | const2));
7789 return;
7792 /* Put si1 into upper 32-bits of dest. */
7793 if (CONST_INT_P (si1))
7794 emit_move_insn (dest, GEN_INT ((UINTVAL (si1) & mask_32bit) << 32));
7795 else
7797 /* Generate RLDIC. */
7798 rtx si1_di = gen_rtx_REG (DImode, regno_or_subregno (si1));
7799 rtx shift_rtx = gen_rtx_ASHIFT (DImode, si1_di, GEN_INT (32));
7800 rtx mask_rtx = GEN_INT (mask_32bit << 32);
7801 rtx and_rtx = gen_rtx_AND (DImode, shift_rtx, mask_rtx);
7802 gcc_assert (!reg_overlap_mentioned_p (dest, si1));
7803 emit_insn (gen_rtx_SET (dest, and_rtx));
7806 /* Put si2 into the temporary. */
7807 gcc_assert (!reg_overlap_mentioned_p (dest, tmp));
7808 if (CONST_INT_P (si2))
7809 emit_move_insn (tmp, GEN_INT (UINTVAL (si2) & mask_32bit));
7810 else
7811 emit_insn (gen_zero_extendsidi2 (tmp, si2));
7813 /* Combine the two parts. */
7814 emit_insn (gen_iordi3 (dest, dest, tmp));
7815 return;
7818 /* Split a V4SI initialization. */
7820 void
7821 rs6000_split_v4si_init (rtx operands[])
7823 rtx dest = operands[0];
7825 /* Destination is a GPR, build up the two DImode parts in place. */
7826 if (REG_P (dest) || SUBREG_P (dest))
7828 int d_regno = regno_or_subregno (dest);
7829 rtx scalar1 = operands[1];
7830 rtx scalar2 = operands[2];
7831 rtx scalar3 = operands[3];
7832 rtx scalar4 = operands[4];
7833 rtx tmp1 = operands[5];
7834 rtx tmp2 = operands[6];
7836 /* Even though we only need one temporary (plus the destination, which
7837 has an early clobber constraint, try to use two temporaries, one for
7838 each double word created. That way the 2nd insn scheduling pass can
7839 rearrange things so the two parts are done in parallel. */
7840 if (BYTES_BIG_ENDIAN)
7842 rtx di_lo = gen_rtx_REG (DImode, d_regno);
7843 rtx di_hi = gen_rtx_REG (DImode, d_regno + 1);
7844 rs6000_split_v4si_init_di_reg (di_lo, scalar1, scalar2, tmp1);
7845 rs6000_split_v4si_init_di_reg (di_hi, scalar3, scalar4, tmp2);
7847 else
7849 rtx di_lo = gen_rtx_REG (DImode, d_regno + 1);
7850 rtx di_hi = gen_rtx_REG (DImode, d_regno);
7851 gcc_assert (!VECTOR_ELT_ORDER_BIG);
7852 rs6000_split_v4si_init_di_reg (di_lo, scalar4, scalar3, tmp1);
7853 rs6000_split_v4si_init_di_reg (di_hi, scalar2, scalar1, tmp2);
7855 return;
7858 else
7859 gcc_unreachable ();
7862 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7863 selects whether the alignment is abi mandated, optional, or
7864 both abi and optional alignment. */
7866 unsigned int
7867 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
7869 if (how != align_opt)
7871 if (TREE_CODE (type) == VECTOR_TYPE)
7873 if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type)))
7875 if (align < 64)
7876 align = 64;
7878 else if (align < 128)
7879 align = 128;
7883 if (how != align_abi)
7885 if (TREE_CODE (type) == ARRAY_TYPE
7886 && TYPE_MODE (TREE_TYPE (type)) == QImode)
7888 if (align < BITS_PER_WORD)
7889 align = BITS_PER_WORD;
7893 return align;
7896 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
7897 instructions simply ignore the low bits; VSX memory instructions
7898 are aligned to 4 or 8 bytes. */
7900 static bool
7901 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
7903 return (STRICT_ALIGNMENT
7904 || (!TARGET_EFFICIENT_UNALIGNED_VSX
7905 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
7906 || ((VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode))
7907 && (int) align < VECTOR_ALIGN (mode)))));
7910 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
7912 bool
7913 rs6000_special_adjust_field_align_p (tree type, unsigned int computed)
7915 if (TARGET_ALTIVEC && TREE_CODE (type) == VECTOR_TYPE)
7917 if (computed != 128)
7919 static bool warned;
7920 if (!warned && warn_psabi)
7922 warned = true;
7923 inform (input_location,
7924 "the layout of aggregates containing vectors with"
7925 " %d-byte alignment has changed in GCC 5",
7926 computed / BITS_PER_UNIT);
7929 /* In current GCC there is no special case. */
7930 return false;
7933 return false;
7936 /* AIX increases natural record alignment to doubleword if the first
7937 field is an FP double while the FP fields remain word aligned. */
7939 unsigned int
7940 rs6000_special_round_type_align (tree type, unsigned int computed,
7941 unsigned int specified)
7943 unsigned int align = MAX (computed, specified);
7944 tree field = TYPE_FIELDS (type);
7946 /* Skip all non field decls */
7947 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7948 field = DECL_CHAIN (field);
7950 if (field != NULL && field != type)
7952 type = TREE_TYPE (field);
7953 while (TREE_CODE (type) == ARRAY_TYPE)
7954 type = TREE_TYPE (type);
7956 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
7957 align = MAX (align, 64);
7960 return align;
7963 /* Darwin increases record alignment to the natural alignment of
7964 the first field. */
7966 unsigned int
7967 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
7968 unsigned int specified)
7970 unsigned int align = MAX (computed, specified);
7972 if (TYPE_PACKED (type))
7973 return align;
7975 /* Find the first field, looking down into aggregates. */
7976 do {
7977 tree field = TYPE_FIELDS (type);
7978 /* Skip all non field decls */
7979 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7980 field = DECL_CHAIN (field);
7981 if (! field)
7982 break;
7983 /* A packed field does not contribute any extra alignment. */
7984 if (DECL_PACKED (field))
7985 return align;
7986 type = TREE_TYPE (field);
7987 while (TREE_CODE (type) == ARRAY_TYPE)
7988 type = TREE_TYPE (type);
7989 } while (AGGREGATE_TYPE_P (type));
7991 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
7992 align = MAX (align, TYPE_ALIGN (type));
7994 return align;
7997 /* Return 1 for an operand in small memory on V.4/eabi. */
8000 small_data_operand (rtx op ATTRIBUTE_UNUSED,
8001 machine_mode mode ATTRIBUTE_UNUSED)
8003 #if TARGET_ELF
8004 rtx sym_ref;
8006 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
8007 return 0;
8009 if (DEFAULT_ABI != ABI_V4)
8010 return 0;
8012 if (GET_CODE (op) == SYMBOL_REF)
8013 sym_ref = op;
8015 else if (GET_CODE (op) != CONST
8016 || GET_CODE (XEXP (op, 0)) != PLUS
8017 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF
8018 || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT)
8019 return 0;
8021 else
8023 rtx sum = XEXP (op, 0);
8024 HOST_WIDE_INT summand;
8026 /* We have to be careful here, because it is the referenced address
8027 that must be 32k from _SDA_BASE_, not just the symbol. */
8028 summand = INTVAL (XEXP (sum, 1));
8029 if (summand < 0 || summand > g_switch_value)
8030 return 0;
8032 sym_ref = XEXP (sum, 0);
8035 return SYMBOL_REF_SMALL_P (sym_ref);
8036 #else
8037 return 0;
8038 #endif
8041 /* Return true if either operand is a general purpose register. */
8043 bool
8044 gpr_or_gpr_p (rtx op0, rtx op1)
8046 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
8047 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
8050 /* Return true if this is a move direct operation between GPR registers and
8051 floating point/VSX registers. */
8053 bool
8054 direct_move_p (rtx op0, rtx op1)
8056 int regno0, regno1;
8058 if (!REG_P (op0) || !REG_P (op1))
8059 return false;
8061 if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
8062 return false;
8064 regno0 = REGNO (op0);
8065 regno1 = REGNO (op1);
8066 if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
8067 return false;
8069 if (INT_REGNO_P (regno0))
8070 return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
8072 else if (INT_REGNO_P (regno1))
8074 if (TARGET_MFPGPR && FP_REGNO_P (regno0))
8075 return true;
8077 else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
8078 return true;
8081 return false;
8084 /* Return true if the OFFSET is valid for the quad address instructions that
8085 use d-form (register + offset) addressing. */
8087 static inline bool
8088 quad_address_offset_p (HOST_WIDE_INT offset)
8090 return (IN_RANGE (offset, -32768, 32767) && ((offset) & 0xf) == 0);
8093 /* Return true if the ADDR is an acceptable address for a quad memory
8094 operation of mode MODE (either LQ/STQ for general purpose registers, or
8095 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
8096 is intended for LQ/STQ. If it is false, the address is intended for the ISA
8097 3.0 LXV/STXV instruction. */
8099 bool
8100 quad_address_p (rtx addr, machine_mode mode, bool strict)
8102 rtx op0, op1;
8104 if (GET_MODE_SIZE (mode) != 16)
8105 return false;
8107 if (legitimate_indirect_address_p (addr, strict))
8108 return true;
8110 if (VECTOR_MODE_P (mode) && !mode_supports_vsx_dform_quad (mode))
8111 return false;
8113 if (GET_CODE (addr) != PLUS)
8114 return false;
8116 op0 = XEXP (addr, 0);
8117 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
8118 return false;
8120 op1 = XEXP (addr, 1);
8121 if (!CONST_INT_P (op1))
8122 return false;
8124 return quad_address_offset_p (INTVAL (op1));
8127 /* Return true if this is a load or store quad operation. This function does
8128 not handle the atomic quad memory instructions. */
8130 bool
8131 quad_load_store_p (rtx op0, rtx op1)
8133 bool ret;
8135 if (!TARGET_QUAD_MEMORY)
8136 ret = false;
8138 else if (REG_P (op0) && MEM_P (op1))
8139 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
8140 && quad_memory_operand (op1, GET_MODE (op1))
8141 && !reg_overlap_mentioned_p (op0, op1));
8143 else if (MEM_P (op0) && REG_P (op1))
8144 ret = (quad_memory_operand (op0, GET_MODE (op0))
8145 && quad_int_reg_operand (op1, GET_MODE (op1)));
8147 else
8148 ret = false;
8150 if (TARGET_DEBUG_ADDR)
8152 fprintf (stderr, "\n========== quad_load_store, return %s\n",
8153 ret ? "true" : "false");
8154 debug_rtx (gen_rtx_SET (op0, op1));
8157 return ret;
8160 /* Given an address, return a constant offset term if one exists. */
8162 static rtx
8163 address_offset (rtx op)
8165 if (GET_CODE (op) == PRE_INC
8166 || GET_CODE (op) == PRE_DEC)
8167 op = XEXP (op, 0);
8168 else if (GET_CODE (op) == PRE_MODIFY
8169 || GET_CODE (op) == LO_SUM)
8170 op = XEXP (op, 1);
8172 if (GET_CODE (op) == CONST)
8173 op = XEXP (op, 0);
8175 if (GET_CODE (op) == PLUS)
8176 op = XEXP (op, 1);
8178 if (CONST_INT_P (op))
8179 return op;
8181 return NULL_RTX;
8184 /* Return true if the MEM operand is a memory operand suitable for use
8185 with a (full width, possibly multiple) gpr load/store. On
8186 powerpc64 this means the offset must be divisible by 4.
8187 Implements 'Y' constraint.
8189 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8190 a constraint function we know the operand has satisfied a suitable
8191 memory predicate. Also accept some odd rtl generated by reload
8192 (see rs6000_legitimize_reload_address for various forms). It is
8193 important that reload rtl be accepted by appropriate constraints
8194 but not by the operand predicate.
8196 Offsetting a lo_sum should not be allowed, except where we know by
8197 alignment that a 32k boundary is not crossed, but see the ???
8198 comment in rs6000_legitimize_reload_address. Note that by
8199 "offsetting" here we mean a further offset to access parts of the
8200 MEM. It's fine to have a lo_sum where the inner address is offset
8201 from a sym, since the same sym+offset will appear in the high part
8202 of the address calculation. */
8204 bool
8205 mem_operand_gpr (rtx op, machine_mode mode)
8207 unsigned HOST_WIDE_INT offset;
8208 int extra;
8209 rtx addr = XEXP (op, 0);
8211 /* Don't allow non-offsettable addresses. See PRs 83969 and 84279. */
8212 if (!rs6000_offsettable_memref_p (op, mode, false))
8213 return false;
8215 op = address_offset (addr);
8216 if (op == NULL_RTX)
8217 return true;
8219 offset = INTVAL (op);
8220 if (TARGET_POWERPC64 && (offset & 3) != 0)
8221 return false;
8223 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8224 if (extra < 0)
8225 extra = 0;
8227 if (GET_CODE (addr) == LO_SUM)
8228 /* For lo_sum addresses, we must allow any offset except one that
8229 causes a wrap, so test only the low 16 bits. */
8230 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8232 return offset + 0x8000 < 0x10000u - extra;
8235 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8236 enforce an offset divisible by 4 even for 32-bit. */
8238 bool
8239 mem_operand_ds_form (rtx op, machine_mode mode)
8241 unsigned HOST_WIDE_INT offset;
8242 int extra;
8243 rtx addr = XEXP (op, 0);
8245 if (!offsettable_address_p (false, mode, addr))
8246 return false;
8248 op = address_offset (addr);
8249 if (op == NULL_RTX)
8250 return true;
8252 offset = INTVAL (op);
8253 if ((offset & 3) != 0)
8254 return false;
8256 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8257 if (extra < 0)
8258 extra = 0;
8260 if (GET_CODE (addr) == LO_SUM)
8261 /* For lo_sum addresses, we must allow any offset except one that
8262 causes a wrap, so test only the low 16 bits. */
8263 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8265 return offset + 0x8000 < 0x10000u - extra;
8268 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8270 static bool
8271 reg_offset_addressing_ok_p (machine_mode mode)
8273 switch (mode)
8275 case E_V16QImode:
8276 case E_V8HImode:
8277 case E_V4SFmode:
8278 case E_V4SImode:
8279 case E_V2DFmode:
8280 case E_V2DImode:
8281 case E_V1TImode:
8282 case E_TImode:
8283 case E_TFmode:
8284 case E_KFmode:
8285 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8286 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8287 a vector mode, if we want to use the VSX registers to move it around,
8288 we need to restrict ourselves to reg+reg addressing. Similarly for
8289 IEEE 128-bit floating point that is passed in a single vector
8290 register. */
8291 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
8292 return mode_supports_vsx_dform_quad (mode);
8293 break;
8295 case E_V2SImode:
8296 case E_V2SFmode:
8297 /* Paired vector modes. Only reg+reg addressing is valid. */
8298 if (TARGET_PAIRED_FLOAT)
8299 return false;
8300 break;
8302 case E_SDmode:
8303 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8304 addressing for the LFIWZX and STFIWX instructions. */
8305 if (TARGET_NO_SDMODE_STACK)
8306 return false;
8307 break;
8309 default:
8310 break;
8313 return true;
8316 static bool
8317 virtual_stack_registers_memory_p (rtx op)
8319 int regnum;
8321 if (GET_CODE (op) == REG)
8322 regnum = REGNO (op);
8324 else if (GET_CODE (op) == PLUS
8325 && GET_CODE (XEXP (op, 0)) == REG
8326 && GET_CODE (XEXP (op, 1)) == CONST_INT)
8327 regnum = REGNO (XEXP (op, 0));
8329 else
8330 return false;
8332 return (regnum >= FIRST_VIRTUAL_REGISTER
8333 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
8336 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8337 is known to not straddle a 32k boundary. This function is used
8338 to determine whether -mcmodel=medium code can use TOC pointer
8339 relative addressing for OP. This means the alignment of the TOC
8340 pointer must also be taken into account, and unfortunately that is
8341 only 8 bytes. */
8343 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8344 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8345 #endif
8347 static bool
8348 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
8349 machine_mode mode)
8351 tree decl;
8352 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
8354 if (GET_CODE (op) != SYMBOL_REF)
8355 return false;
8357 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8358 SYMBOL_REF. */
8359 if (mode_supports_vsx_dform_quad (mode))
8360 return false;
8362 dsize = GET_MODE_SIZE (mode);
8363 decl = SYMBOL_REF_DECL (op);
8364 if (!decl)
8366 if (dsize == 0)
8367 return false;
8369 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8370 replacing memory addresses with an anchor plus offset. We
8371 could find the decl by rummaging around in the block->objects
8372 VEC for the given offset but that seems like too much work. */
8373 dalign = BITS_PER_UNIT;
8374 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
8375 && SYMBOL_REF_ANCHOR_P (op)
8376 && SYMBOL_REF_BLOCK (op) != NULL)
8378 struct object_block *block = SYMBOL_REF_BLOCK (op);
8380 dalign = block->alignment;
8381 offset += SYMBOL_REF_BLOCK_OFFSET (op);
8383 else if (CONSTANT_POOL_ADDRESS_P (op))
8385 /* It would be nice to have get_pool_align().. */
8386 machine_mode cmode = get_pool_mode (op);
8388 dalign = GET_MODE_ALIGNMENT (cmode);
8391 else if (DECL_P (decl))
8393 dalign = DECL_ALIGN (decl);
8395 if (dsize == 0)
8397 /* Allow BLKmode when the entire object is known to not
8398 cross a 32k boundary. */
8399 if (!DECL_SIZE_UNIT (decl))
8400 return false;
8402 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
8403 return false;
8405 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
8406 if (dsize > 32768)
8407 return false;
8409 dalign /= BITS_PER_UNIT;
8410 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8411 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8412 return dalign >= dsize;
8415 else
8416 gcc_unreachable ();
8418 /* Find how many bits of the alignment we know for this access. */
8419 dalign /= BITS_PER_UNIT;
8420 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8421 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8422 mask = dalign - 1;
8423 lsb = offset & -offset;
8424 mask &= lsb - 1;
8425 dalign = mask + 1;
8427 return dalign >= dsize;
8430 static bool
8431 constant_pool_expr_p (rtx op)
8433 rtx base, offset;
8435 split_const (op, &base, &offset);
8436 return (GET_CODE (base) == SYMBOL_REF
8437 && CONSTANT_POOL_ADDRESS_P (base)
8438 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
8441 /* These are only used to pass through from print_operand/print_operand_address
8442 to rs6000_output_addr_const_extra over the intervening function
8443 output_addr_const which is not target code. */
8444 static const_rtx tocrel_base_oac, tocrel_offset_oac;
8446 /* Return true if OP is a toc pointer relative address (the output
8447 of create_TOC_reference). If STRICT, do not match non-split
8448 -mcmodel=large/medium toc pointer relative addresses. If the pointers
8449 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
8450 TOCREL_OFFSET_RET respectively. */
8452 bool
8453 toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
8454 const_rtx *tocrel_offset_ret)
8456 if (!TARGET_TOC)
8457 return false;
8459 if (TARGET_CMODEL != CMODEL_SMALL)
8461 /* When strict ensure we have everything tidy. */
8462 if (strict
8463 && !(GET_CODE (op) == LO_SUM
8464 && REG_P (XEXP (op, 0))
8465 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
8466 return false;
8468 /* When not strict, allow non-split TOC addresses and also allow
8469 (lo_sum (high ..)) TOC addresses created during reload. */
8470 if (GET_CODE (op) == LO_SUM)
8471 op = XEXP (op, 1);
8474 const_rtx tocrel_base = op;
8475 const_rtx tocrel_offset = const0_rtx;
8477 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
8479 tocrel_base = XEXP (op, 0);
8480 tocrel_offset = XEXP (op, 1);
8483 if (tocrel_base_ret)
8484 *tocrel_base_ret = tocrel_base;
8485 if (tocrel_offset_ret)
8486 *tocrel_offset_ret = tocrel_offset;
8488 return (GET_CODE (tocrel_base) == UNSPEC
8489 && XINT (tocrel_base, 1) == UNSPEC_TOCREL);
8492 /* Return true if X is a constant pool address, and also for cmodel=medium
8493 if X is a toc-relative address known to be offsettable within MODE. */
8495 bool
8496 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
8497 bool strict)
8499 const_rtx tocrel_base, tocrel_offset;
8500 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
8501 && (TARGET_CMODEL != CMODEL_MEDIUM
8502 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
8503 || mode == QImode
8504 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
8505 INTVAL (tocrel_offset), mode)));
8508 static bool
8509 legitimate_small_data_p (machine_mode mode, rtx x)
8511 return (DEFAULT_ABI == ABI_V4
8512 && !flag_pic && !TARGET_TOC
8513 && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
8514 && small_data_operand (x, mode));
8517 bool
8518 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
8519 bool strict, bool worst_case)
8521 unsigned HOST_WIDE_INT offset;
8522 unsigned int extra;
8524 if (GET_CODE (x) != PLUS)
8525 return false;
8526 if (!REG_P (XEXP (x, 0)))
8527 return false;
8528 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8529 return false;
8530 if (mode_supports_vsx_dform_quad (mode))
8531 return quad_address_p (x, mode, strict);
8532 if (!reg_offset_addressing_ok_p (mode))
8533 return virtual_stack_registers_memory_p (x);
8534 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
8535 return true;
8536 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
8537 return false;
8539 offset = INTVAL (XEXP (x, 1));
8540 extra = 0;
8541 switch (mode)
8543 case E_V2SImode:
8544 case E_V2SFmode:
8545 /* Paired single modes: offset addressing isn't valid. */
8546 return false;
8548 case E_DFmode:
8549 case E_DDmode:
8550 case E_DImode:
8551 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8552 addressing. */
8553 if (VECTOR_MEM_VSX_P (mode))
8554 return false;
8556 if (!worst_case)
8557 break;
8558 if (!TARGET_POWERPC64)
8559 extra = 4;
8560 else if (offset & 3)
8561 return false;
8562 break;
8564 case E_TFmode:
8565 case E_IFmode:
8566 case E_KFmode:
8567 case E_TDmode:
8568 case E_TImode:
8569 case E_PTImode:
8570 extra = 8;
8571 if (!worst_case)
8572 break;
8573 if (!TARGET_POWERPC64)
8574 extra = 12;
8575 else if (offset & 3)
8576 return false;
8577 break;
8579 default:
8580 break;
8583 offset += 0x8000;
8584 return offset < 0x10000 - extra;
8587 bool
8588 legitimate_indexed_address_p (rtx x, int strict)
8590 rtx op0, op1;
8592 if (GET_CODE (x) != PLUS)
8593 return false;
8595 op0 = XEXP (x, 0);
8596 op1 = XEXP (x, 1);
8598 return (REG_P (op0) && REG_P (op1)
8599 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
8600 && INT_REG_OK_FOR_INDEX_P (op1, strict))
8601 || (INT_REG_OK_FOR_BASE_P (op1, strict)
8602 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
8605 bool
8606 avoiding_indexed_address_p (machine_mode mode)
8608 /* Avoid indexed addressing for modes that have non-indexed
8609 load/store instruction forms. */
8610 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
8613 bool
8614 legitimate_indirect_address_p (rtx x, int strict)
8616 return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict);
8619 bool
8620 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
8622 if (!TARGET_MACHO || !flag_pic
8623 || mode != SImode || GET_CODE (x) != MEM)
8624 return false;
8625 x = XEXP (x, 0);
8627 if (GET_CODE (x) != LO_SUM)
8628 return false;
8629 if (GET_CODE (XEXP (x, 0)) != REG)
8630 return false;
8631 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
8632 return false;
8633 x = XEXP (x, 1);
8635 return CONSTANT_P (x);
8638 static bool
8639 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
8641 if (GET_CODE (x) != LO_SUM)
8642 return false;
8643 if (GET_CODE (XEXP (x, 0)) != REG)
8644 return false;
8645 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8646 return false;
8647 /* quad word addresses are restricted, and we can't use LO_SUM. */
8648 if (mode_supports_vsx_dform_quad (mode))
8649 return false;
8650 x = XEXP (x, 1);
8652 if (TARGET_ELF || TARGET_MACHO)
8654 bool large_toc_ok;
8656 if (DEFAULT_ABI == ABI_V4 && flag_pic)
8657 return false;
8658 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
8659 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
8660 recognizes some LO_SUM addresses as valid although this
8661 function says opposite. In most cases, LRA through different
8662 transformations can generate correct code for address reloads.
8663 It can not manage only some LO_SUM cases. So we need to add
8664 code analogous to one in rs6000_legitimize_reload_address for
8665 LOW_SUM here saying that some addresses are still valid. */
8666 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
8667 && small_toc_ref (x, VOIDmode));
8668 if (TARGET_TOC && ! large_toc_ok)
8669 return false;
8670 if (GET_MODE_NUNITS (mode) != 1)
8671 return false;
8672 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
8673 && !(/* ??? Assume floating point reg based on mode? */
8674 TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
8675 && (mode == DFmode || mode == DDmode)))
8676 return false;
8678 return CONSTANT_P (x) || large_toc_ok;
8681 return false;
8685 /* Try machine-dependent ways of modifying an illegitimate address
8686 to be legitimate. If we find one, return the new, valid address.
8687 This is used from only one place: `memory_address' in explow.c.
8689 OLDX is the address as it was before break_out_memory_refs was
8690 called. In some cases it is useful to look at this to decide what
8691 needs to be done.
8693 It is always safe for this function to do nothing. It exists to
8694 recognize opportunities to optimize the output.
8696 On RS/6000, first check for the sum of a register with a constant
8697 integer that is out of range. If so, generate code to add the
8698 constant with the low-order 16 bits masked to the register and force
8699 this result into another register (this can be done with `cau').
8700 Then generate an address of REG+(CONST&0xffff), allowing for the
8701 possibility of bit 16 being a one.
8703 Then check for the sum of a register and something not constant, try to
8704 load the other things into a register and return the sum. */
8706 static rtx
8707 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
8708 machine_mode mode)
8710 unsigned int extra;
8712 if (!reg_offset_addressing_ok_p (mode)
8713 || mode_supports_vsx_dform_quad (mode))
8715 if (virtual_stack_registers_memory_p (x))
8716 return x;
8718 /* In theory we should not be seeing addresses of the form reg+0,
8719 but just in case it is generated, optimize it away. */
8720 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
8721 return force_reg (Pmode, XEXP (x, 0));
8723 /* For TImode with load/store quad, restrict addresses to just a single
8724 pointer, so it works with both GPRs and VSX registers. */
8725 /* Make sure both operands are registers. */
8726 else if (GET_CODE (x) == PLUS
8727 && (mode != TImode || !TARGET_VSX))
8728 return gen_rtx_PLUS (Pmode,
8729 force_reg (Pmode, XEXP (x, 0)),
8730 force_reg (Pmode, XEXP (x, 1)));
8731 else
8732 return force_reg (Pmode, x);
8734 if (GET_CODE (x) == SYMBOL_REF)
8736 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
8737 if (model != 0)
8738 return rs6000_legitimize_tls_address (x, model);
8741 extra = 0;
8742 switch (mode)
8744 case E_TFmode:
8745 case E_TDmode:
8746 case E_TImode:
8747 case E_PTImode:
8748 case E_IFmode:
8749 case E_KFmode:
8750 /* As in legitimate_offset_address_p we do not assume
8751 worst-case. The mode here is just a hint as to the registers
8752 used. A TImode is usually in gprs, but may actually be in
8753 fprs. Leave worst-case scenario for reload to handle via
8754 insn constraints. PTImode is only GPRs. */
8755 extra = 8;
8756 break;
8757 default:
8758 break;
8761 if (GET_CODE (x) == PLUS
8762 && GET_CODE (XEXP (x, 0)) == REG
8763 && GET_CODE (XEXP (x, 1)) == CONST_INT
8764 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
8765 >= 0x10000 - extra)
8766 && !PAIRED_VECTOR_MODE (mode))
8768 HOST_WIDE_INT high_int, low_int;
8769 rtx sum;
8770 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
8771 if (low_int >= 0x8000 - extra)
8772 low_int = 0;
8773 high_int = INTVAL (XEXP (x, 1)) - low_int;
8774 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
8775 GEN_INT (high_int)), 0);
8776 return plus_constant (Pmode, sum, low_int);
8778 else if (GET_CODE (x) == PLUS
8779 && GET_CODE (XEXP (x, 0)) == REG
8780 && GET_CODE (XEXP (x, 1)) != CONST_INT
8781 && GET_MODE_NUNITS (mode) == 1
8782 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8783 || (/* ??? Assume floating point reg based on mode? */
8784 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
8785 && (mode == DFmode || mode == DDmode)))
8786 && !avoiding_indexed_address_p (mode))
8788 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
8789 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
8791 else if (PAIRED_VECTOR_MODE (mode))
8793 if (mode == DImode)
8794 return x;
8795 /* We accept [reg + reg]. */
8797 if (GET_CODE (x) == PLUS)
8799 rtx op1 = XEXP (x, 0);
8800 rtx op2 = XEXP (x, 1);
8801 rtx y;
8803 op1 = force_reg (Pmode, op1);
8804 op2 = force_reg (Pmode, op2);
8806 /* We can't always do [reg + reg] for these, because [reg +
8807 reg + offset] is not a legitimate addressing mode. */
8808 y = gen_rtx_PLUS (Pmode, op1, op2);
8810 if ((GET_MODE_SIZE (mode) > 8 || mode == DDmode) && REG_P (op2))
8811 return force_reg (Pmode, y);
8812 else
8813 return y;
8816 return force_reg (Pmode, x);
8818 else if ((TARGET_ELF
8819 #if TARGET_MACHO
8820 || !MACHO_DYNAMIC_NO_PIC_P
8821 #endif
8823 && TARGET_32BIT
8824 && TARGET_NO_TOC
8825 && ! flag_pic
8826 && GET_CODE (x) != CONST_INT
8827 && GET_CODE (x) != CONST_WIDE_INT
8828 && GET_CODE (x) != CONST_DOUBLE
8829 && CONSTANT_P (x)
8830 && GET_MODE_NUNITS (mode) == 1
8831 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8832 || (/* ??? Assume floating point reg based on mode? */
8833 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
8834 && (mode == DFmode || mode == DDmode))))
8836 rtx reg = gen_reg_rtx (Pmode);
8837 if (TARGET_ELF)
8838 emit_insn (gen_elf_high (reg, x));
8839 else
8840 emit_insn (gen_macho_high (reg, x));
8841 return gen_rtx_LO_SUM (Pmode, reg, x);
8843 else if (TARGET_TOC
8844 && GET_CODE (x) == SYMBOL_REF
8845 && constant_pool_expr_p (x)
8846 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
8847 return create_TOC_reference (x, NULL_RTX);
8848 else
8849 return x;
8852 /* Debug version of rs6000_legitimize_address. */
8853 static rtx
8854 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
8856 rtx ret;
8857 rtx_insn *insns;
8859 start_sequence ();
8860 ret = rs6000_legitimize_address (x, oldx, mode);
8861 insns = get_insns ();
8862 end_sequence ();
8864 if (ret != x)
8866 fprintf (stderr,
8867 "\nrs6000_legitimize_address: mode %s, old code %s, "
8868 "new code %s, modified\n",
8869 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
8870 GET_RTX_NAME (GET_CODE (ret)));
8872 fprintf (stderr, "Original address:\n");
8873 debug_rtx (x);
8875 fprintf (stderr, "oldx:\n");
8876 debug_rtx (oldx);
8878 fprintf (stderr, "New address:\n");
8879 debug_rtx (ret);
8881 if (insns)
8883 fprintf (stderr, "Insns added:\n");
8884 debug_rtx_list (insns, 20);
8887 else
8889 fprintf (stderr,
8890 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
8891 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
8893 debug_rtx (x);
8896 if (insns)
8897 emit_insn (insns);
8899 return ret;
8902 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8903 We need to emit DTP-relative relocations. */
8905 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
8906 static void
8907 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
8909 switch (size)
8911 case 4:
8912 fputs ("\t.long\t", file);
8913 break;
8914 case 8:
8915 fputs (DOUBLE_INT_ASM_OP, file);
8916 break;
8917 default:
8918 gcc_unreachable ();
8920 output_addr_const (file, x);
8921 if (TARGET_ELF)
8922 fputs ("@dtprel+0x8000", file);
8923 else if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF)
8925 switch (SYMBOL_REF_TLS_MODEL (x))
8927 case 0:
8928 break;
8929 case TLS_MODEL_LOCAL_EXEC:
8930 fputs ("@le", file);
8931 break;
8932 case TLS_MODEL_INITIAL_EXEC:
8933 fputs ("@ie", file);
8934 break;
8935 case TLS_MODEL_GLOBAL_DYNAMIC:
8936 case TLS_MODEL_LOCAL_DYNAMIC:
8937 fputs ("@m", file);
8938 break;
8939 default:
8940 gcc_unreachable ();
8945 /* Return true if X is a symbol that refers to real (rather than emulated)
8946 TLS. */
8948 static bool
8949 rs6000_real_tls_symbol_ref_p (rtx x)
8951 return (GET_CODE (x) == SYMBOL_REF
8952 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
8955 /* In the name of slightly smaller debug output, and to cater to
8956 general assembler lossage, recognize various UNSPEC sequences
8957 and turn them back into a direct symbol reference. */
8959 static rtx
8960 rs6000_delegitimize_address (rtx orig_x)
8962 rtx x, y, offset;
8964 orig_x = delegitimize_mem_from_attrs (orig_x);
8965 x = orig_x;
8966 if (MEM_P (x))
8967 x = XEXP (x, 0);
8969 y = x;
8970 if (TARGET_CMODEL != CMODEL_SMALL
8971 && GET_CODE (y) == LO_SUM)
8972 y = XEXP (y, 1);
8974 offset = NULL_RTX;
8975 if (GET_CODE (y) == PLUS
8976 && GET_MODE (y) == Pmode
8977 && CONST_INT_P (XEXP (y, 1)))
8979 offset = XEXP (y, 1);
8980 y = XEXP (y, 0);
8983 if (GET_CODE (y) == UNSPEC
8984 && XINT (y, 1) == UNSPEC_TOCREL)
8986 y = XVECEXP (y, 0, 0);
8988 #ifdef HAVE_AS_TLS
8989 /* Do not associate thread-local symbols with the original
8990 constant pool symbol. */
8991 if (TARGET_XCOFF
8992 && GET_CODE (y) == SYMBOL_REF
8993 && CONSTANT_POOL_ADDRESS_P (y)
8994 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
8995 return orig_x;
8996 #endif
8998 if (offset != NULL_RTX)
8999 y = gen_rtx_PLUS (Pmode, y, offset);
9000 if (!MEM_P (orig_x))
9001 return y;
9002 else
9003 return replace_equiv_address_nv (orig_x, y);
9006 if (TARGET_MACHO
9007 && GET_CODE (orig_x) == LO_SUM
9008 && GET_CODE (XEXP (orig_x, 1)) == CONST)
9010 y = XEXP (XEXP (orig_x, 1), 0);
9011 if (GET_CODE (y) == UNSPEC
9012 && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
9013 return XVECEXP (y, 0, 0);
9016 return orig_x;
9019 /* Return true if X shouldn't be emitted into the debug info.
9020 The linker doesn't like .toc section references from
9021 .debug_* sections, so reject .toc section symbols. */
9023 static bool
9024 rs6000_const_not_ok_for_debug_p (rtx x)
9026 if (GET_CODE (x) == UNSPEC)
9027 return true;
9028 if (GET_CODE (x) == SYMBOL_REF
9029 && CONSTANT_POOL_ADDRESS_P (x))
9031 rtx c = get_pool_constant (x);
9032 machine_mode cmode = get_pool_mode (x);
9033 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
9034 return true;
9037 return false;
9041 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
9043 static bool
9044 rs6000_legitimate_combined_insn (rtx_insn *insn)
9046 int icode = INSN_CODE (insn);
9048 /* Reject creating doloop insns. Combine should not be allowed
9049 to create these for a number of reasons:
9050 1) In a nested loop, if combine creates one of these in an
9051 outer loop and the register allocator happens to allocate ctr
9052 to the outer loop insn, then the inner loop can't use ctr.
9053 Inner loops ought to be more highly optimized.
9054 2) Combine often wants to create one of these from what was
9055 originally a three insn sequence, first combining the three
9056 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
9057 allocated ctr, the splitter takes use back to the three insn
9058 sequence. It's better to stop combine at the two insn
9059 sequence.
9060 3) Faced with not being able to allocate ctr for ctrsi/crtdi
9061 insns, the register allocator sometimes uses floating point
9062 or vector registers for the pseudo. Since ctrsi/ctrdi is a
9063 jump insn and output reloads are not implemented for jumps,
9064 the ctrsi/ctrdi splitters need to handle all possible cases.
9065 That's a pain, and it gets to be seriously difficult when a
9066 splitter that runs after reload needs memory to transfer from
9067 a gpr to fpr. See PR70098 and PR71763 which are not fixed
9068 for the difficult case. It's better to not create problems
9069 in the first place. */
9070 if (icode != CODE_FOR_nothing
9071 && (icode == CODE_FOR_bdz_si
9072 || icode == CODE_FOR_bdz_di
9073 || icode == CODE_FOR_bdnz_si
9074 || icode == CODE_FOR_bdnz_di
9075 || icode == CODE_FOR_bdztf_si
9076 || icode == CODE_FOR_bdztf_di
9077 || icode == CODE_FOR_bdnztf_si
9078 || icode == CODE_FOR_bdnztf_di))
9079 return false;
9081 return true;
9084 /* Construct the SYMBOL_REF for the tls_get_addr function. */
9086 static GTY(()) rtx rs6000_tls_symbol;
9087 static rtx
9088 rs6000_tls_get_addr (void)
9090 if (!rs6000_tls_symbol)
9091 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
9093 return rs6000_tls_symbol;
9096 /* Construct the SYMBOL_REF for TLS GOT references. */
9098 static GTY(()) rtx rs6000_got_symbol;
9099 static rtx
9100 rs6000_got_sym (void)
9102 if (!rs6000_got_symbol)
9104 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
9105 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
9106 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
9109 return rs6000_got_symbol;
9112 /* AIX Thread-Local Address support. */
9114 static rtx
9115 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
9117 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
9118 const char *name;
9119 char *tlsname;
9121 name = XSTR (addr, 0);
9122 /* Append TLS CSECT qualifier, unless the symbol already is qualified
9123 or the symbol will be in TLS private data section. */
9124 if (name[strlen (name) - 1] != ']'
9125 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
9126 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
9128 tlsname = XALLOCAVEC (char, strlen (name) + 4);
9129 strcpy (tlsname, name);
9130 strcat (tlsname,
9131 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
9132 tlsaddr = copy_rtx (addr);
9133 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
9135 else
9136 tlsaddr = addr;
9138 /* Place addr into TOC constant pool. */
9139 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
9141 /* Output the TOC entry and create the MEM referencing the value. */
9142 if (constant_pool_expr_p (XEXP (sym, 0))
9143 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
9145 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
9146 mem = gen_const_mem (Pmode, tocref);
9147 set_mem_alias_set (mem, get_TOC_alias_set ());
9149 else
9150 return sym;
9152 /* Use global-dynamic for local-dynamic. */
9153 if (model == TLS_MODEL_GLOBAL_DYNAMIC
9154 || model == TLS_MODEL_LOCAL_DYNAMIC)
9156 /* Create new TOC reference for @m symbol. */
9157 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
9158 tlsname = XALLOCAVEC (char, strlen (name) + 1);
9159 strcpy (tlsname, "*LCM");
9160 strcat (tlsname, name + 3);
9161 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
9162 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
9163 tocref = create_TOC_reference (modaddr, NULL_RTX);
9164 rtx modmem = gen_const_mem (Pmode, tocref);
9165 set_mem_alias_set (modmem, get_TOC_alias_set ());
9167 rtx modreg = gen_reg_rtx (Pmode);
9168 emit_insn (gen_rtx_SET (modreg, modmem));
9170 tmpreg = gen_reg_rtx (Pmode);
9171 emit_insn (gen_rtx_SET (tmpreg, mem));
9173 dest = gen_reg_rtx (Pmode);
9174 if (TARGET_32BIT)
9175 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
9176 else
9177 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
9178 return dest;
9180 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
9181 else if (TARGET_32BIT)
9183 tlsreg = gen_reg_rtx (SImode);
9184 emit_insn (gen_tls_get_tpointer (tlsreg));
9186 else
9187 tlsreg = gen_rtx_REG (DImode, 13);
9189 /* Load the TOC value into temporary register. */
9190 tmpreg = gen_reg_rtx (Pmode);
9191 emit_insn (gen_rtx_SET (tmpreg, mem));
9192 set_unique_reg_note (get_last_insn (), REG_EQUAL,
9193 gen_rtx_MINUS (Pmode, addr, tlsreg));
9195 /* Add TOC symbol value to TLS pointer. */
9196 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
9198 return dest;
9201 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9202 this (thread-local) address. */
9204 static rtx
9205 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
9207 rtx dest, insn;
9209 if (TARGET_XCOFF)
9210 return rs6000_legitimize_tls_address_aix (addr, model);
9212 dest = gen_reg_rtx (Pmode);
9213 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
9215 rtx tlsreg;
9217 if (TARGET_64BIT)
9219 tlsreg = gen_rtx_REG (Pmode, 13);
9220 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
9222 else
9224 tlsreg = gen_rtx_REG (Pmode, 2);
9225 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
9227 emit_insn (insn);
9229 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
9231 rtx tlsreg, tmp;
9233 tmp = gen_reg_rtx (Pmode);
9234 if (TARGET_64BIT)
9236 tlsreg = gen_rtx_REG (Pmode, 13);
9237 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
9239 else
9241 tlsreg = gen_rtx_REG (Pmode, 2);
9242 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
9244 emit_insn (insn);
9245 if (TARGET_64BIT)
9246 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
9247 else
9248 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
9249 emit_insn (insn);
9251 else
9253 rtx r3, got, tga, tmp1, tmp2, call_insn;
9255 /* We currently use relocations like @got@tlsgd for tls, which
9256 means the linker will handle allocation of tls entries, placing
9257 them in the .got section. So use a pointer to the .got section,
9258 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9259 or to secondary GOT sections used by 32-bit -fPIC. */
9260 if (TARGET_64BIT)
9261 got = gen_rtx_REG (Pmode, 2);
9262 else
9264 if (flag_pic == 1)
9265 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
9266 else
9268 rtx gsym = rs6000_got_sym ();
9269 got = gen_reg_rtx (Pmode);
9270 if (flag_pic == 0)
9271 rs6000_emit_move (got, gsym, Pmode);
9272 else
9274 rtx mem, lab;
9276 tmp1 = gen_reg_rtx (Pmode);
9277 tmp2 = gen_reg_rtx (Pmode);
9278 mem = gen_const_mem (Pmode, tmp1);
9279 lab = gen_label_rtx ();
9280 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
9281 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
9282 if (TARGET_LINK_STACK)
9283 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
9284 emit_move_insn (tmp2, mem);
9285 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
9286 set_unique_reg_note (last, REG_EQUAL, gsym);
9291 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
9293 tga = rs6000_tls_get_addr ();
9294 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
9295 const0_rtx, Pmode);
9297 r3 = gen_rtx_REG (Pmode, 3);
9298 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9300 if (TARGET_64BIT)
9301 insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
9302 else
9303 insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
9305 else if (DEFAULT_ABI == ABI_V4)
9306 insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
9307 else
9308 gcc_unreachable ();
9309 call_insn = last_call_insn ();
9310 PATTERN (call_insn) = insn;
9311 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
9312 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
9313 pic_offset_table_rtx);
9315 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
9317 tga = rs6000_tls_get_addr ();
9318 tmp1 = gen_reg_rtx (Pmode);
9319 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
9320 const0_rtx, Pmode);
9322 r3 = gen_rtx_REG (Pmode, 3);
9323 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9325 if (TARGET_64BIT)
9326 insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
9327 else
9328 insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
9330 else if (DEFAULT_ABI == ABI_V4)
9331 insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
9332 else
9333 gcc_unreachable ();
9334 call_insn = last_call_insn ();
9335 PATTERN (call_insn) = insn;
9336 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
9337 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
9338 pic_offset_table_rtx);
9340 if (rs6000_tls_size == 16)
9342 if (TARGET_64BIT)
9343 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
9344 else
9345 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
9347 else if (rs6000_tls_size == 32)
9349 tmp2 = gen_reg_rtx (Pmode);
9350 if (TARGET_64BIT)
9351 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
9352 else
9353 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
9354 emit_insn (insn);
9355 if (TARGET_64BIT)
9356 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
9357 else
9358 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
9360 else
9362 tmp2 = gen_reg_rtx (Pmode);
9363 if (TARGET_64BIT)
9364 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
9365 else
9366 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
9367 emit_insn (insn);
9368 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
9370 emit_insn (insn);
9372 else
9374 /* IE, or 64-bit offset LE. */
9375 tmp2 = gen_reg_rtx (Pmode);
9376 if (TARGET_64BIT)
9377 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
9378 else
9379 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
9380 emit_insn (insn);
9381 if (TARGET_64BIT)
9382 insn = gen_tls_tls_64 (dest, tmp2, addr);
9383 else
9384 insn = gen_tls_tls_32 (dest, tmp2, addr);
9385 emit_insn (insn);
9389 return dest;
9392 /* Only create the global variable for the stack protect guard if we are using
9393 the global flavor of that guard. */
9394 static tree
9395 rs6000_init_stack_protect_guard (void)
9397 if (rs6000_stack_protector_guard == SSP_GLOBAL)
9398 return default_stack_protect_guard ();
9400 return NULL_TREE;
9403 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9405 static bool
9406 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9408 if (GET_CODE (x) == HIGH
9409 && GET_CODE (XEXP (x, 0)) == UNSPEC)
9410 return true;
9412 /* A TLS symbol in the TOC cannot contain a sum. */
9413 if (GET_CODE (x) == CONST
9414 && GET_CODE (XEXP (x, 0)) == PLUS
9415 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9416 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
9417 return true;
9419 /* Do not place an ELF TLS symbol in the constant pool. */
9420 return TARGET_ELF && tls_referenced_p (x);
9423 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9424 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9425 can be addressed relative to the toc pointer. */
9427 static bool
9428 use_toc_relative_ref (rtx sym, machine_mode mode)
9430 return ((constant_pool_expr_p (sym)
9431 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
9432 get_pool_mode (sym)))
9433 || (TARGET_CMODEL == CMODEL_MEDIUM
9434 && SYMBOL_REF_LOCAL_P (sym)
9435 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
9438 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
9439 replace the input X, or the original X if no replacement is called for.
9440 The output parameter *WIN is 1 if the calling macro should goto WIN,
9441 0 if it should not.
9443 For RS/6000, we wish to handle large displacements off a base
9444 register by splitting the addend across an addiu/addis and the mem insn.
9445 This cuts number of extra insns needed from 3 to 1.
9447 On Darwin, we use this to generate code for floating point constants.
9448 A movsf_low is generated so we wind up with 2 instructions rather than 3.
9449 The Darwin code is inside #if TARGET_MACHO because only then are the
9450 machopic_* functions defined. */
9451 static rtx
9452 rs6000_legitimize_reload_address (rtx x, machine_mode mode,
9453 int opnum, int type,
9454 int ind_levels ATTRIBUTE_UNUSED, int *win)
9456 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9457 bool quad_offset_p = mode_supports_vsx_dform_quad (mode);
9459 /* Nasty hack for vsx_splat_v2df/v2di load from mem, which takes a
9460 DFmode/DImode MEM. Ditto for ISA 3.0 vsx_splat_v4sf/v4si. */
9461 if (reg_offset_p
9462 && opnum == 1
9463 && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
9464 || (mode == DImode && recog_data.operand_mode[0] == V2DImode)
9465 || (mode == SFmode && recog_data.operand_mode[0] == V4SFmode
9466 && TARGET_P9_VECTOR)
9467 || (mode == SImode && recog_data.operand_mode[0] == V4SImode
9468 && TARGET_P9_VECTOR)))
9469 reg_offset_p = false;
9471 /* We must recognize output that we have already generated ourselves. */
9472 if (GET_CODE (x) == PLUS
9473 && GET_CODE (XEXP (x, 0)) == PLUS
9474 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
9475 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
9476 && GET_CODE (XEXP (x, 1)) == CONST_INT)
9478 if (TARGET_DEBUG_ADDR)
9480 fprintf (stderr, "\nlegitimize_reload_address push_reload #1:\n");
9481 debug_rtx (x);
9483 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9484 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
9485 opnum, (enum reload_type) type);
9486 *win = 1;
9487 return x;
9490 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
9491 if (GET_CODE (x) == LO_SUM
9492 && GET_CODE (XEXP (x, 0)) == HIGH)
9494 if (TARGET_DEBUG_ADDR)
9496 fprintf (stderr, "\nlegitimize_reload_address push_reload #2:\n");
9497 debug_rtx (x);
9499 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9500 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9501 opnum, (enum reload_type) type);
9502 *win = 1;
9503 return x;
9506 #if TARGET_MACHO
9507 if (DEFAULT_ABI == ABI_DARWIN && flag_pic
9508 && GET_CODE (x) == LO_SUM
9509 && GET_CODE (XEXP (x, 0)) == PLUS
9510 && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx
9511 && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH
9512 && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1)
9513 && machopic_operand_p (XEXP (x, 1)))
9515 /* Result of previous invocation of this function on Darwin
9516 floating point constant. */
9517 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9518 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9519 opnum, (enum reload_type) type);
9520 *win = 1;
9521 return x;
9523 #endif
9525 if (TARGET_CMODEL != CMODEL_SMALL
9526 && reg_offset_p
9527 && !quad_offset_p
9528 && small_toc_ref (x, VOIDmode))
9530 rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x));
9531 x = gen_rtx_LO_SUM (Pmode, hi, x);
9532 if (TARGET_DEBUG_ADDR)
9534 fprintf (stderr, "\nlegitimize_reload_address push_reload #3:\n");
9535 debug_rtx (x);
9537 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9538 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9539 opnum, (enum reload_type) type);
9540 *win = 1;
9541 return x;
9544 if (GET_CODE (x) == PLUS
9545 && REG_P (XEXP (x, 0))
9546 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
9547 && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
9548 && CONST_INT_P (XEXP (x, 1))
9549 && reg_offset_p
9550 && !PAIRED_VECTOR_MODE (mode)
9551 && (quad_offset_p || !VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
9553 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
9554 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
9555 HOST_WIDE_INT high
9556 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
9558 /* Check for 32-bit overflow or quad addresses with one of the
9559 four least significant bits set. */
9560 if (high + low != val
9561 || (quad_offset_p && (low & 0xf)))
9563 *win = 0;
9564 return x;
9567 /* Reload the high part into a base reg; leave the low part
9568 in the mem directly. */
9570 x = gen_rtx_PLUS (GET_MODE (x),
9571 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
9572 GEN_INT (high)),
9573 GEN_INT (low));
9575 if (TARGET_DEBUG_ADDR)
9577 fprintf (stderr, "\nlegitimize_reload_address push_reload #4:\n");
9578 debug_rtx (x);
9580 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9581 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
9582 opnum, (enum reload_type) type);
9583 *win = 1;
9584 return x;
9587 if (GET_CODE (x) == SYMBOL_REF
9588 && reg_offset_p
9589 && !quad_offset_p
9590 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
9591 && !PAIRED_VECTOR_MODE (mode)
9592 #if TARGET_MACHO
9593 && DEFAULT_ABI == ABI_DARWIN
9594 && (flag_pic || MACHO_DYNAMIC_NO_PIC_P)
9595 && machopic_symbol_defined_p (x)
9596 #else
9597 && DEFAULT_ABI == ABI_V4
9598 && !flag_pic
9599 #endif
9600 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
9601 The same goes for DImode without 64-bit gprs and DFmode and DDmode
9602 without fprs.
9603 ??? Assume floating point reg based on mode? This assumption is
9604 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
9605 where reload ends up doing a DFmode load of a constant from
9606 mem using two gprs. Unfortunately, at this point reload
9607 hasn't yet selected regs so poking around in reload data
9608 won't help and even if we could figure out the regs reliably,
9609 we'd still want to allow this transformation when the mem is
9610 naturally aligned. Since we say the address is good here, we
9611 can't disable offsets from LO_SUMs in mem_operand_gpr.
9612 FIXME: Allow offset from lo_sum for other modes too, when
9613 mem is sufficiently aligned.
9615 Also disallow this if the type can go in VMX/Altivec registers, since
9616 those registers do not have d-form (reg+offset) address modes. */
9617 && !reg_addr[mode].scalar_in_vmx_p
9618 && mode != TFmode
9619 && mode != TDmode
9620 && mode != IFmode
9621 && mode != KFmode
9622 && (mode != TImode || !TARGET_VSX)
9623 && mode != PTImode
9624 && (mode != DImode || TARGET_POWERPC64)
9625 && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
9626 || (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)))
9628 #if TARGET_MACHO
9629 if (flag_pic)
9631 rtx offset = machopic_gen_offset (x);
9632 x = gen_rtx_LO_SUM (GET_MODE (x),
9633 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
9634 gen_rtx_HIGH (Pmode, offset)), offset);
9636 else
9637 #endif
9638 x = gen_rtx_LO_SUM (GET_MODE (x),
9639 gen_rtx_HIGH (Pmode, x), x);
9641 if (TARGET_DEBUG_ADDR)
9643 fprintf (stderr, "\nlegitimize_reload_address push_reload #5:\n");
9644 debug_rtx (x);
9646 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9647 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9648 opnum, (enum reload_type) type);
9649 *win = 1;
9650 return x;
9653 /* Reload an offset address wrapped by an AND that represents the
9654 masking of the lower bits. Strip the outer AND and let reload
9655 convert the offset address into an indirect address. For VSX,
9656 force reload to create the address with an AND in a separate
9657 register, because we can't guarantee an altivec register will
9658 be used. */
9659 if (VECTOR_MEM_ALTIVEC_P (mode)
9660 && GET_CODE (x) == AND
9661 && GET_CODE (XEXP (x, 0)) == PLUS
9662 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
9663 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
9664 && GET_CODE (XEXP (x, 1)) == CONST_INT
9665 && INTVAL (XEXP (x, 1)) == -16)
9667 x = XEXP (x, 0);
9668 *win = 1;
9669 return x;
9672 if (TARGET_TOC
9673 && reg_offset_p
9674 && !quad_offset_p
9675 && GET_CODE (x) == SYMBOL_REF
9676 && use_toc_relative_ref (x, mode))
9678 x = create_TOC_reference (x, NULL_RTX);
9679 if (TARGET_CMODEL != CMODEL_SMALL)
9681 if (TARGET_DEBUG_ADDR)
9683 fprintf (stderr, "\nlegitimize_reload_address push_reload #6:\n");
9684 debug_rtx (x);
9686 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9687 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9688 opnum, (enum reload_type) type);
9690 *win = 1;
9691 return x;
9693 *win = 0;
9694 return x;
9697 /* Debug version of rs6000_legitimize_reload_address. */
9698 static rtx
9699 rs6000_debug_legitimize_reload_address (rtx x, machine_mode mode,
9700 int opnum, int type,
9701 int ind_levels, int *win)
9703 rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
9704 ind_levels, win);
9705 fprintf (stderr,
9706 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
9707 "type = %d, ind_levels = %d, win = %d, original addr:\n",
9708 GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
9709 debug_rtx (x);
9711 if (x == ret)
9712 fprintf (stderr, "Same address returned\n");
9713 else if (!ret)
9714 fprintf (stderr, "NULL returned\n");
9715 else
9717 fprintf (stderr, "New address:\n");
9718 debug_rtx (ret);
9721 return ret;
9724 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9725 that is a valid memory address for an instruction.
9726 The MODE argument is the machine mode for the MEM expression
9727 that wants to use this address.
9729 On the RS/6000, there are four valid address: a SYMBOL_REF that
9730 refers to a constant pool entry of an address (or the sum of it
9731 plus a constant), a short (16-bit signed) constant plus a register,
9732 the sum of two registers, or a register indirect, possibly with an
9733 auto-increment. For DFmode, DDmode and DImode with a constant plus
9734 register, we must ensure that both words are addressable or PowerPC64
9735 with offset word aligned.
9737 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9738 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9739 because adjacent memory cells are accessed by adding word-sized offsets
9740 during assembly output. */
9741 static bool
9742 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
9744 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9745 bool quad_offset_p = mode_supports_vsx_dform_quad (mode);
9747 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
9748 if (VECTOR_MEM_ALTIVEC_P (mode)
9749 && GET_CODE (x) == AND
9750 && GET_CODE (XEXP (x, 1)) == CONST_INT
9751 && INTVAL (XEXP (x, 1)) == -16)
9752 x = XEXP (x, 0);
9754 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
9755 return 0;
9756 if (legitimate_indirect_address_p (x, reg_ok_strict))
9757 return 1;
9758 if (TARGET_UPDATE
9759 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
9760 && mode_supports_pre_incdec_p (mode)
9761 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
9762 return 1;
9763 /* Handle restricted vector d-form offsets in ISA 3.0. */
9764 if (quad_offset_p)
9766 if (quad_address_p (x, mode, reg_ok_strict))
9767 return 1;
9769 else if (virtual_stack_registers_memory_p (x))
9770 return 1;
9772 else if (reg_offset_p)
9774 if (legitimate_small_data_p (mode, x))
9775 return 1;
9776 if (legitimate_constant_pool_address_p (x, mode,
9777 reg_ok_strict || lra_in_progress))
9778 return 1;
9779 if (reg_addr[mode].fused_toc && GET_CODE (x) == UNSPEC
9780 && XINT (x, 1) == UNSPEC_FUSION_ADDIS)
9781 return 1;
9784 /* For TImode, if we have TImode in VSX registers, only allow register
9785 indirect addresses. This will allow the values to go in either GPRs
9786 or VSX registers without reloading. The vector types would tend to
9787 go into VSX registers, so we allow REG+REG, while TImode seems
9788 somewhat split, in that some uses are GPR based, and some VSX based. */
9789 /* FIXME: We could loosen this by changing the following to
9790 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
9791 but currently we cannot allow REG+REG addressing for TImode. See
9792 PR72827 for complete details on how this ends up hoodwinking DSE. */
9793 if (mode == TImode && TARGET_VSX)
9794 return 0;
9795 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
9796 if (! reg_ok_strict
9797 && reg_offset_p
9798 && GET_CODE (x) == PLUS
9799 && GET_CODE (XEXP (x, 0)) == REG
9800 && (XEXP (x, 0) == virtual_stack_vars_rtx
9801 || XEXP (x, 0) == arg_pointer_rtx)
9802 && GET_CODE (XEXP (x, 1)) == CONST_INT)
9803 return 1;
9804 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
9805 return 1;
9806 if (!FLOAT128_2REG_P (mode)
9807 && ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
9808 || TARGET_POWERPC64
9809 || (mode != DFmode && mode != DDmode))
9810 && (TARGET_POWERPC64 || mode != DImode)
9811 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
9812 && mode != PTImode
9813 && !avoiding_indexed_address_p (mode)
9814 && legitimate_indexed_address_p (x, reg_ok_strict))
9815 return 1;
9816 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
9817 && mode_supports_pre_modify_p (mode)
9818 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
9819 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
9820 reg_ok_strict, false)
9821 || (!avoiding_indexed_address_p (mode)
9822 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
9823 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
9824 return 1;
9825 if (reg_offset_p && !quad_offset_p
9826 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
9827 return 1;
9828 return 0;
9831 /* Debug version of rs6000_legitimate_address_p. */
9832 static bool
9833 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
9834 bool reg_ok_strict)
9836 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
9837 fprintf (stderr,
9838 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
9839 "strict = %d, reload = %s, code = %s\n",
9840 ret ? "true" : "false",
9841 GET_MODE_NAME (mode),
9842 reg_ok_strict,
9843 (reload_completed ? "after" : "before"),
9844 GET_RTX_NAME (GET_CODE (x)));
9845 debug_rtx (x);
9847 return ret;
9850 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
9852 static bool
9853 rs6000_mode_dependent_address_p (const_rtx addr,
9854 addr_space_t as ATTRIBUTE_UNUSED)
9856 return rs6000_mode_dependent_address_ptr (addr);
9859 /* Go to LABEL if ADDR (a legitimate address expression)
9860 has an effect that depends on the machine mode it is used for.
9862 On the RS/6000 this is true of all integral offsets (since AltiVec
9863 and VSX modes don't allow them) or is a pre-increment or decrement.
9865 ??? Except that due to conceptual problems in offsettable_address_p
9866 we can't really report the problems of integral offsets. So leave
9867 this assuming that the adjustable offset must be valid for the
9868 sub-words of a TFmode operand, which is what we had before. */
9870 static bool
9871 rs6000_mode_dependent_address (const_rtx addr)
9873 switch (GET_CODE (addr))
9875 case PLUS:
9876 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
9877 is considered a legitimate address before reload, so there
9878 are no offset restrictions in that case. Note that this
9879 condition is safe in strict mode because any address involving
9880 virtual_stack_vars_rtx or arg_pointer_rtx would already have
9881 been rejected as illegitimate. */
9882 if (XEXP (addr, 0) != virtual_stack_vars_rtx
9883 && XEXP (addr, 0) != arg_pointer_rtx
9884 && GET_CODE (XEXP (addr, 1)) == CONST_INT)
9886 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
9887 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
9889 break;
9891 case LO_SUM:
9892 /* Anything in the constant pool is sufficiently aligned that
9893 all bytes have the same high part address. */
9894 return !legitimate_constant_pool_address_p (addr, QImode, false);
9896 /* Auto-increment cases are now treated generically in recog.c. */
9897 case PRE_MODIFY:
9898 return TARGET_UPDATE;
9900 /* AND is only allowed in Altivec loads. */
9901 case AND:
9902 return true;
9904 default:
9905 break;
9908 return false;
9911 /* Debug version of rs6000_mode_dependent_address. */
9912 static bool
9913 rs6000_debug_mode_dependent_address (const_rtx addr)
9915 bool ret = rs6000_mode_dependent_address (addr);
9917 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
9918 ret ? "true" : "false");
9919 debug_rtx (addr);
9921 return ret;
9924 /* Implement FIND_BASE_TERM. */
9927 rs6000_find_base_term (rtx op)
9929 rtx base;
9931 base = op;
9932 if (GET_CODE (base) == CONST)
9933 base = XEXP (base, 0);
9934 if (GET_CODE (base) == PLUS)
9935 base = XEXP (base, 0);
9936 if (GET_CODE (base) == UNSPEC)
9937 switch (XINT (base, 1))
9939 case UNSPEC_TOCREL:
9940 case UNSPEC_MACHOPIC_OFFSET:
9941 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
9942 for aliasing purposes. */
9943 return XVECEXP (base, 0, 0);
9946 return op;
9949 /* More elaborate version of recog's offsettable_memref_p predicate
9950 that works around the ??? note of rs6000_mode_dependent_address.
9951 In particular it accepts
9953 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
9955 in 32-bit mode, that the recog predicate rejects. */
9957 static bool
9958 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict)
9960 bool worst_case;
9962 if (!MEM_P (op))
9963 return false;
9965 /* First mimic offsettable_memref_p. */
9966 if (offsettable_address_p (strict, GET_MODE (op), XEXP (op, 0)))
9967 return true;
9969 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
9970 the latter predicate knows nothing about the mode of the memory
9971 reference and, therefore, assumes that it is the largest supported
9972 mode (TFmode). As a consequence, legitimate offsettable memory
9973 references are rejected. rs6000_legitimate_offset_address_p contains
9974 the correct logic for the PLUS case of rs6000_mode_dependent_address,
9975 at least with a little bit of help here given that we know the
9976 actual registers used. */
9977 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
9978 || GET_MODE_SIZE (reg_mode) == 4);
9979 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
9980 strict, worst_case);
9983 /* Determine the reassociation width to be used in reassociate_bb.
9984 This takes into account how many parallel operations we
9985 can actually do of a given type, and also the latency.
9987 int add/sub 6/cycle
9988 mul 2/cycle
9989 vect add/sub/mul 2/cycle
9990 fp add/sub/mul 2/cycle
9991 dfp 1/cycle
9994 static int
9995 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
9996 machine_mode mode)
9998 switch (rs6000_tune)
10000 case PROCESSOR_POWER8:
10001 case PROCESSOR_POWER9:
10002 if (DECIMAL_FLOAT_MODE_P (mode))
10003 return 1;
10004 if (VECTOR_MODE_P (mode))
10005 return 4;
10006 if (INTEGRAL_MODE_P (mode))
10007 return 1;
10008 if (FLOAT_MODE_P (mode))
10009 return 4;
10010 break;
10011 default:
10012 break;
10014 return 1;
10017 /* Change register usage conditional on target flags. */
10018 static void
10019 rs6000_conditional_register_usage (void)
10021 int i;
10023 if (TARGET_DEBUG_TARGET)
10024 fprintf (stderr, "rs6000_conditional_register_usage called\n");
10026 /* Set MQ register fixed (already call_used) so that it will not be
10027 allocated. */
10028 fixed_regs[64] = 1;
10030 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
10031 if (TARGET_64BIT)
10032 fixed_regs[13] = call_used_regs[13]
10033 = call_really_used_regs[13] = 1;
10035 /* Conditionally disable FPRs. */
10036 if (TARGET_SOFT_FLOAT)
10037 for (i = 32; i < 64; i++)
10038 fixed_regs[i] = call_used_regs[i]
10039 = call_really_used_regs[i] = 1;
10041 /* The TOC register is not killed across calls in a way that is
10042 visible to the compiler. */
10043 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10044 call_really_used_regs[2] = 0;
10046 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
10047 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10049 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
10050 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10051 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10052 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10054 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
10055 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10056 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10057 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10059 if (TARGET_TOC && TARGET_MINIMAL_TOC)
10060 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10061 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10063 if (!TARGET_ALTIVEC && !TARGET_VSX)
10065 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
10066 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
10067 call_really_used_regs[VRSAVE_REGNO] = 1;
10070 if (TARGET_ALTIVEC || TARGET_VSX)
10071 global_regs[VSCR_REGNO] = 1;
10073 if (TARGET_ALTIVEC_ABI)
10075 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
10076 call_used_regs[i] = call_really_used_regs[i] = 1;
10078 /* AIX reserves VR20:31 in non-extended ABI mode. */
10079 if (TARGET_XCOFF)
10080 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
10081 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
10086 /* Output insns to set DEST equal to the constant SOURCE as a series of
10087 lis, ori and shl instructions and return TRUE. */
10089 bool
10090 rs6000_emit_set_const (rtx dest, rtx source)
10092 machine_mode mode = GET_MODE (dest);
10093 rtx temp, set;
10094 rtx_insn *insn;
10095 HOST_WIDE_INT c;
10097 gcc_checking_assert (CONST_INT_P (source));
10098 c = INTVAL (source);
10099 switch (mode)
10101 case E_QImode:
10102 case E_HImode:
10103 emit_insn (gen_rtx_SET (dest, source));
10104 return true;
10106 case E_SImode:
10107 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
10109 emit_insn (gen_rtx_SET (copy_rtx (temp),
10110 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
10111 emit_insn (gen_rtx_SET (dest,
10112 gen_rtx_IOR (SImode, copy_rtx (temp),
10113 GEN_INT (c & 0xffff))));
10114 break;
10116 case E_DImode:
10117 if (!TARGET_POWERPC64)
10119 rtx hi, lo;
10121 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
10122 DImode);
10123 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
10124 DImode);
10125 emit_move_insn (hi, GEN_INT (c >> 32));
10126 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
10127 emit_move_insn (lo, GEN_INT (c));
10129 else
10130 rs6000_emit_set_long_const (dest, c);
10131 break;
10133 default:
10134 gcc_unreachable ();
10137 insn = get_last_insn ();
10138 set = single_set (insn);
10139 if (! CONSTANT_P (SET_SRC (set)))
10140 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
10142 return true;
10145 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
10146 Output insns to set DEST equal to the constant C as a series of
10147 lis, ori and shl instructions. */
10149 static void
10150 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
10152 rtx temp;
10153 HOST_WIDE_INT ud1, ud2, ud3, ud4;
10155 ud1 = c & 0xffff;
10156 c = c >> 16;
10157 ud2 = c & 0xffff;
10158 c = c >> 16;
10159 ud3 = c & 0xffff;
10160 c = c >> 16;
10161 ud4 = c & 0xffff;
10163 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
10164 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
10165 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
10167 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
10168 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
10170 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10172 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10173 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
10174 if (ud1 != 0)
10175 emit_move_insn (dest,
10176 gen_rtx_IOR (DImode, copy_rtx (temp),
10177 GEN_INT (ud1)));
10179 else if (ud3 == 0 && ud4 == 0)
10181 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10183 gcc_assert (ud2 & 0x8000);
10184 emit_move_insn (copy_rtx (temp),
10185 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
10186 if (ud1 != 0)
10187 emit_move_insn (copy_rtx (temp),
10188 gen_rtx_IOR (DImode, copy_rtx (temp),
10189 GEN_INT (ud1)));
10190 emit_move_insn (dest,
10191 gen_rtx_ZERO_EXTEND (DImode,
10192 gen_lowpart (SImode,
10193 copy_rtx (temp))));
10195 else if ((ud4 == 0xffff && (ud3 & 0x8000))
10196 || (ud4 == 0 && ! (ud3 & 0x8000)))
10198 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10200 emit_move_insn (copy_rtx (temp),
10201 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
10202 if (ud2 != 0)
10203 emit_move_insn (copy_rtx (temp),
10204 gen_rtx_IOR (DImode, copy_rtx (temp),
10205 GEN_INT (ud2)));
10206 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10207 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10208 GEN_INT (16)));
10209 if (ud1 != 0)
10210 emit_move_insn (dest,
10211 gen_rtx_IOR (DImode, copy_rtx (temp),
10212 GEN_INT (ud1)));
10214 else
10216 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10218 emit_move_insn (copy_rtx (temp),
10219 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
10220 if (ud3 != 0)
10221 emit_move_insn (copy_rtx (temp),
10222 gen_rtx_IOR (DImode, copy_rtx (temp),
10223 GEN_INT (ud3)));
10225 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
10226 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10227 GEN_INT (32)));
10228 if (ud2 != 0)
10229 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10230 gen_rtx_IOR (DImode, copy_rtx (temp),
10231 GEN_INT (ud2 << 16)));
10232 if (ud1 != 0)
10233 emit_move_insn (dest,
10234 gen_rtx_IOR (DImode, copy_rtx (temp),
10235 GEN_INT (ud1)));
10239 /* Helper for the following. Get rid of [r+r] memory refs
10240 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
10242 static void
10243 rs6000_eliminate_indexed_memrefs (rtx operands[2])
10245 if (GET_CODE (operands[0]) == MEM
10246 && GET_CODE (XEXP (operands[0], 0)) != REG
10247 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
10248 GET_MODE (operands[0]), false))
10249 operands[0]
10250 = replace_equiv_address (operands[0],
10251 copy_addr_to_reg (XEXP (operands[0], 0)));
10253 if (GET_CODE (operands[1]) == MEM
10254 && GET_CODE (XEXP (operands[1], 0)) != REG
10255 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
10256 GET_MODE (operands[1]), false))
10257 operands[1]
10258 = replace_equiv_address (operands[1],
10259 copy_addr_to_reg (XEXP (operands[1], 0)));
10262 /* Generate a vector of constants to permute MODE for a little-endian
10263 storage operation by swapping the two halves of a vector. */
10264 static rtvec
10265 rs6000_const_vec (machine_mode mode)
10267 int i, subparts;
10268 rtvec v;
10270 switch (mode)
10272 case E_V1TImode:
10273 subparts = 1;
10274 break;
10275 case E_V2DFmode:
10276 case E_V2DImode:
10277 subparts = 2;
10278 break;
10279 case E_V4SFmode:
10280 case E_V4SImode:
10281 subparts = 4;
10282 break;
10283 case E_V8HImode:
10284 subparts = 8;
10285 break;
10286 case E_V16QImode:
10287 subparts = 16;
10288 break;
10289 default:
10290 gcc_unreachable();
10293 v = rtvec_alloc (subparts);
10295 for (i = 0; i < subparts / 2; ++i)
10296 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
10297 for (i = subparts / 2; i < subparts; ++i)
10298 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
10300 return v;
10303 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
10304 store operation. */
10305 void
10306 rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
10308 /* Scalar permutations are easier to express in integer modes rather than
10309 floating-point modes, so cast them here. We use V1TImode instead
10310 of TImode to ensure that the values don't go through GPRs. */
10311 if (FLOAT128_VECTOR_P (mode))
10313 dest = gen_lowpart (V1TImode, dest);
10314 source = gen_lowpart (V1TImode, source);
10315 mode = V1TImode;
10318 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
10319 scalar. */
10320 if (mode == TImode || mode == V1TImode)
10321 emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
10322 GEN_INT (64))));
10323 else
10325 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
10326 emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
10330 /* Emit a little-endian load from vector memory location SOURCE to VSX
10331 register DEST in mode MODE. The load is done with two permuting
10332 insn's that represent an lxvd2x and xxpermdi. */
10333 void
10334 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
10336 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10337 V1TImode). */
10338 if (mode == TImode || mode == V1TImode)
10340 mode = V2DImode;
10341 dest = gen_lowpart (V2DImode, dest);
10342 source = adjust_address (source, V2DImode, 0);
10345 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
10346 rs6000_emit_le_vsx_permute (tmp, source, mode);
10347 rs6000_emit_le_vsx_permute (dest, tmp, mode);
10350 /* Emit a little-endian store to vector memory location DEST from VSX
10351 register SOURCE in mode MODE. The store is done with two permuting
10352 insn's that represent an xxpermdi and an stxvd2x. */
10353 void
10354 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
10356 /* This should never be called during or after LRA, because it does
10357 not re-permute the source register. It is intended only for use
10358 during expand. */
10359 gcc_assert (!lra_in_progress && !reload_completed);
10361 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10362 V1TImode). */
10363 if (mode == TImode || mode == V1TImode)
10365 mode = V2DImode;
10366 dest = adjust_address (dest, V2DImode, 0);
10367 source = gen_lowpart (V2DImode, source);
10370 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
10371 rs6000_emit_le_vsx_permute (tmp, source, mode);
10372 rs6000_emit_le_vsx_permute (dest, tmp, mode);
10375 /* Emit a sequence representing a little-endian VSX load or store,
10376 moving data from SOURCE to DEST in mode MODE. This is done
10377 separately from rs6000_emit_move to ensure it is called only
10378 during expand. LE VSX loads and stores introduced later are
10379 handled with a split. The expand-time RTL generation allows
10380 us to optimize away redundant pairs of register-permutes. */
10381 void
10382 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
10384 gcc_assert (!BYTES_BIG_ENDIAN
10385 && VECTOR_MEM_VSX_P (mode)
10386 && !TARGET_P9_VECTOR
10387 && !gpr_or_gpr_p (dest, source)
10388 && (MEM_P (source) ^ MEM_P (dest)));
10390 if (MEM_P (source))
10392 gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG);
10393 rs6000_emit_le_vsx_load (dest, source, mode);
10395 else
10397 if (!REG_P (source))
10398 source = force_reg (mode, source);
10399 rs6000_emit_le_vsx_store (dest, source, mode);
10403 /* Return whether a SFmode or SImode move can be done without converting one
10404 mode to another. This arrises when we have:
10406 (SUBREG:SF (REG:SI ...))
10407 (SUBREG:SI (REG:SF ...))
10409 and one of the values is in a floating point/vector register, where SFmode
10410 scalars are stored in DFmode format. */
10412 bool
10413 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
10415 if (TARGET_ALLOW_SF_SUBREG)
10416 return true;
10418 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
10419 return true;
10421 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
10422 return true;
10424 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
10425 if (SUBREG_P (dest))
10427 rtx dest_subreg = SUBREG_REG (dest);
10428 rtx src_subreg = SUBREG_REG (src);
10429 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
10432 return false;
10436 /* Helper function to change moves with:
10438 (SUBREG:SF (REG:SI)) and
10439 (SUBREG:SI (REG:SF))
10441 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
10442 values are stored as DFmode values in the VSX registers. We need to convert
10443 the bits before we can use a direct move or operate on the bits in the
10444 vector register as an integer type.
10446 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
10448 static bool
10449 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
10451 if (TARGET_DIRECT_MOVE_64BIT && !lra_in_progress && !reload_completed
10452 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
10453 && SUBREG_P (source) && sf_subreg_operand (source, mode))
10455 rtx inner_source = SUBREG_REG (source);
10456 machine_mode inner_mode = GET_MODE (inner_source);
10458 if (mode == SImode && inner_mode == SFmode)
10460 emit_insn (gen_movsi_from_sf (dest, inner_source));
10461 return true;
10464 if (mode == SFmode && inner_mode == SImode)
10466 emit_insn (gen_movsf_from_si (dest, inner_source));
10467 return true;
10471 return false;
10474 /* Emit a move from SOURCE to DEST in mode MODE. */
10475 void
10476 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
10478 rtx operands[2];
10479 operands[0] = dest;
10480 operands[1] = source;
10482 if (TARGET_DEBUG_ADDR)
10484 fprintf (stderr,
10485 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
10486 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10487 GET_MODE_NAME (mode),
10488 lra_in_progress,
10489 reload_completed,
10490 can_create_pseudo_p ());
10491 debug_rtx (dest);
10492 fprintf (stderr, "source:\n");
10493 debug_rtx (source);
10496 /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
10497 if (CONST_WIDE_INT_P (operands[1])
10498 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
10500 /* This should be fixed with the introduction of CONST_WIDE_INT. */
10501 gcc_unreachable ();
10504 #ifdef HAVE_AS_GNU_ATTRIBUTE
10505 /* If we use a long double type, set the flags in .gnu_attribute that say
10506 what the long double type is. This is to allow the linker's warning
10507 message for the wrong long double to be useful, even if the function does
10508 not do a call (for example, doing a 128-bit add on power9 if the long
10509 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
10510 used if they aren't the default long dobule type. */
10511 if (rs6000_gnu_attr && (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT))
10513 if (TARGET_LONG_DOUBLE_128 && (mode == TFmode || mode == TCmode))
10514 rs6000_passes_float = rs6000_passes_long_double = true;
10516 else if (!TARGET_LONG_DOUBLE_128 && (mode == DFmode || mode == DCmode))
10517 rs6000_passes_float = rs6000_passes_long_double = true;
10519 #endif
10521 /* See if we need to special case SImode/SFmode SUBREG moves. */
10522 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
10523 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
10524 return;
10526 /* Check if GCC is setting up a block move that will end up using FP
10527 registers as temporaries. We must make sure this is acceptable. */
10528 if (GET_CODE (operands[0]) == MEM
10529 && GET_CODE (operands[1]) == MEM
10530 && mode == DImode
10531 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
10532 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
10533 && ! (rs6000_slow_unaligned_access (SImode,
10534 (MEM_ALIGN (operands[0]) > 32
10535 ? 32 : MEM_ALIGN (operands[0])))
10536 || rs6000_slow_unaligned_access (SImode,
10537 (MEM_ALIGN (operands[1]) > 32
10538 ? 32 : MEM_ALIGN (operands[1]))))
10539 && ! MEM_VOLATILE_P (operands [0])
10540 && ! MEM_VOLATILE_P (operands [1]))
10542 emit_move_insn (adjust_address (operands[0], SImode, 0),
10543 adjust_address (operands[1], SImode, 0));
10544 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
10545 adjust_address (copy_rtx (operands[1]), SImode, 4));
10546 return;
10549 if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM
10550 && !gpc_reg_operand (operands[1], mode))
10551 operands[1] = force_reg (mode, operands[1]);
10553 /* Recognize the case where operand[1] is a reference to thread-local
10554 data and load its address to a register. */
10555 if (tls_referenced_p (operands[1]))
10557 enum tls_model model;
10558 rtx tmp = operands[1];
10559 rtx addend = NULL;
10561 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
10563 addend = XEXP (XEXP (tmp, 0), 1);
10564 tmp = XEXP (XEXP (tmp, 0), 0);
10567 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
10568 model = SYMBOL_REF_TLS_MODEL (tmp);
10569 gcc_assert (model != 0);
10571 tmp = rs6000_legitimize_tls_address (tmp, model);
10572 if (addend)
10574 tmp = gen_rtx_PLUS (mode, tmp, addend);
10575 tmp = force_operand (tmp, operands[0]);
10577 operands[1] = tmp;
10580 /* 128-bit constant floating-point values on Darwin should really be loaded
10581 as two parts. However, this premature splitting is a problem when DFmode
10582 values can go into Altivec registers. */
10583 if (FLOAT128_IBM_P (mode) && !reg_addr[DFmode].scalar_in_vmx_p
10584 && GET_CODE (operands[1]) == CONST_DOUBLE)
10586 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
10587 simplify_gen_subreg (DFmode, operands[1], mode, 0),
10588 DFmode);
10589 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
10590 GET_MODE_SIZE (DFmode)),
10591 simplify_gen_subreg (DFmode, operands[1], mode,
10592 GET_MODE_SIZE (DFmode)),
10593 DFmode);
10594 return;
10597 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
10598 p1:SD) if p1 is not of floating point class and p0 is spilled as
10599 we can have no analogous movsd_store for this. */
10600 if (lra_in_progress && mode == DDmode
10601 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
10602 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10603 && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))
10604 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
10606 enum reg_class cl;
10607 int regno = REGNO (SUBREG_REG (operands[1]));
10609 if (regno >= FIRST_PSEUDO_REGISTER)
10611 cl = reg_preferred_class (regno);
10612 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
10614 if (regno >= 0 && ! FP_REGNO_P (regno))
10616 mode = SDmode;
10617 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
10618 operands[1] = SUBREG_REG (operands[1]);
10621 if (lra_in_progress
10622 && mode == SDmode
10623 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
10624 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10625 && (REG_P (operands[1])
10626 || (GET_CODE (operands[1]) == SUBREG
10627 && REG_P (SUBREG_REG (operands[1])))))
10629 int regno = REGNO (GET_CODE (operands[1]) == SUBREG
10630 ? SUBREG_REG (operands[1]) : operands[1]);
10631 enum reg_class cl;
10633 if (regno >= FIRST_PSEUDO_REGISTER)
10635 cl = reg_preferred_class (regno);
10636 gcc_assert (cl != NO_REGS);
10637 regno = ira_class_hard_regs[cl][0];
10639 if (FP_REGNO_P (regno))
10641 if (GET_MODE (operands[0]) != DDmode)
10642 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
10643 emit_insn (gen_movsd_store (operands[0], operands[1]));
10645 else if (INT_REGNO_P (regno))
10646 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10647 else
10648 gcc_unreachable();
10649 return;
10651 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
10652 p:DD)) if p0 is not of floating point class and p1 is spilled as
10653 we can have no analogous movsd_load for this. */
10654 if (lra_in_progress && mode == DDmode
10655 && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0]))
10656 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
10657 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
10658 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10660 enum reg_class cl;
10661 int regno = REGNO (SUBREG_REG (operands[0]));
10663 if (regno >= FIRST_PSEUDO_REGISTER)
10665 cl = reg_preferred_class (regno);
10666 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
10668 if (regno >= 0 && ! FP_REGNO_P (regno))
10670 mode = SDmode;
10671 operands[0] = SUBREG_REG (operands[0]);
10672 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
10675 if (lra_in_progress
10676 && mode == SDmode
10677 && (REG_P (operands[0])
10678 || (GET_CODE (operands[0]) == SUBREG
10679 && REG_P (SUBREG_REG (operands[0]))))
10680 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
10681 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10683 int regno = REGNO (GET_CODE (operands[0]) == SUBREG
10684 ? SUBREG_REG (operands[0]) : operands[0]);
10685 enum reg_class cl;
10687 if (regno >= FIRST_PSEUDO_REGISTER)
10689 cl = reg_preferred_class (regno);
10690 gcc_assert (cl != NO_REGS);
10691 regno = ira_class_hard_regs[cl][0];
10693 if (FP_REGNO_P (regno))
10695 if (GET_MODE (operands[1]) != DDmode)
10696 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
10697 emit_insn (gen_movsd_load (operands[0], operands[1]));
10699 else if (INT_REGNO_P (regno))
10700 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10701 else
10702 gcc_unreachable();
10703 return;
10706 /* FIXME: In the long term, this switch statement should go away
10707 and be replaced by a sequence of tests based on things like
10708 mode == Pmode. */
10709 switch (mode)
10711 case E_HImode:
10712 case E_QImode:
10713 if (CONSTANT_P (operands[1])
10714 && GET_CODE (operands[1]) != CONST_INT)
10715 operands[1] = force_const_mem (mode, operands[1]);
10716 break;
10718 case E_TFmode:
10719 case E_TDmode:
10720 case E_IFmode:
10721 case E_KFmode:
10722 if (FLOAT128_2REG_P (mode))
10723 rs6000_eliminate_indexed_memrefs (operands);
10724 /* fall through */
10726 case E_DFmode:
10727 case E_DDmode:
10728 case E_SFmode:
10729 case E_SDmode:
10730 if (CONSTANT_P (operands[1])
10731 && ! easy_fp_constant (operands[1], mode))
10732 operands[1] = force_const_mem (mode, operands[1]);
10733 break;
10735 case E_V16QImode:
10736 case E_V8HImode:
10737 case E_V4SFmode:
10738 case E_V4SImode:
10739 case E_V2SFmode:
10740 case E_V2SImode:
10741 case E_V2DFmode:
10742 case E_V2DImode:
10743 case E_V1TImode:
10744 if (CONSTANT_P (operands[1])
10745 && !easy_vector_constant (operands[1], mode))
10746 operands[1] = force_const_mem (mode, operands[1]);
10747 break;
10749 case E_SImode:
10750 case E_DImode:
10751 /* Use default pattern for address of ELF small data */
10752 if (TARGET_ELF
10753 && mode == Pmode
10754 && DEFAULT_ABI == ABI_V4
10755 && (GET_CODE (operands[1]) == SYMBOL_REF
10756 || GET_CODE (operands[1]) == CONST)
10757 && small_data_operand (operands[1], mode))
10759 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10760 return;
10763 if (DEFAULT_ABI == ABI_V4
10764 && mode == Pmode && mode == SImode
10765 && flag_pic == 1 && got_operand (operands[1], mode))
10767 emit_insn (gen_movsi_got (operands[0], operands[1]));
10768 return;
10771 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
10772 && TARGET_NO_TOC
10773 && ! flag_pic
10774 && mode == Pmode
10775 && CONSTANT_P (operands[1])
10776 && GET_CODE (operands[1]) != HIGH
10777 && GET_CODE (operands[1]) != CONST_INT)
10779 rtx target = (!can_create_pseudo_p ()
10780 ? operands[0]
10781 : gen_reg_rtx (mode));
10783 /* If this is a function address on -mcall-aixdesc,
10784 convert it to the address of the descriptor. */
10785 if (DEFAULT_ABI == ABI_AIX
10786 && GET_CODE (operands[1]) == SYMBOL_REF
10787 && XSTR (operands[1], 0)[0] == '.')
10789 const char *name = XSTR (operands[1], 0);
10790 rtx new_ref;
10791 while (*name == '.')
10792 name++;
10793 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
10794 CONSTANT_POOL_ADDRESS_P (new_ref)
10795 = CONSTANT_POOL_ADDRESS_P (operands[1]);
10796 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
10797 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
10798 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
10799 operands[1] = new_ref;
10802 if (DEFAULT_ABI == ABI_DARWIN)
10804 #if TARGET_MACHO
10805 if (MACHO_DYNAMIC_NO_PIC_P)
10807 /* Take care of any required data indirection. */
10808 operands[1] = rs6000_machopic_legitimize_pic_address (
10809 operands[1], mode, operands[0]);
10810 if (operands[0] != operands[1])
10811 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10812 return;
10814 #endif
10815 emit_insn (gen_macho_high (target, operands[1]));
10816 emit_insn (gen_macho_low (operands[0], target, operands[1]));
10817 return;
10820 emit_insn (gen_elf_high (target, operands[1]));
10821 emit_insn (gen_elf_low (operands[0], target, operands[1]));
10822 return;
10825 /* If this is a SYMBOL_REF that refers to a constant pool entry,
10826 and we have put it in the TOC, we just need to make a TOC-relative
10827 reference to it. */
10828 if (TARGET_TOC
10829 && GET_CODE (operands[1]) == SYMBOL_REF
10830 && use_toc_relative_ref (operands[1], mode))
10831 operands[1] = create_TOC_reference (operands[1], operands[0]);
10832 else if (mode == Pmode
10833 && CONSTANT_P (operands[1])
10834 && GET_CODE (operands[1]) != HIGH
10835 && ((GET_CODE (operands[1]) != CONST_INT
10836 && ! easy_fp_constant (operands[1], mode))
10837 || (GET_CODE (operands[1]) == CONST_INT
10838 && (num_insns_constant (operands[1], mode)
10839 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
10840 || (GET_CODE (operands[0]) == REG
10841 && FP_REGNO_P (REGNO (operands[0]))))
10842 && !toc_relative_expr_p (operands[1], false, NULL, NULL)
10843 && (TARGET_CMODEL == CMODEL_SMALL
10844 || can_create_pseudo_p ()
10845 || (REG_P (operands[0])
10846 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
10849 #if TARGET_MACHO
10850 /* Darwin uses a special PIC legitimizer. */
10851 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
10853 operands[1] =
10854 rs6000_machopic_legitimize_pic_address (operands[1], mode,
10855 operands[0]);
10856 if (operands[0] != operands[1])
10857 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10858 return;
10860 #endif
10862 /* If we are to limit the number of things we put in the TOC and
10863 this is a symbol plus a constant we can add in one insn,
10864 just put the symbol in the TOC and add the constant. */
10865 if (GET_CODE (operands[1]) == CONST
10866 && TARGET_NO_SUM_IN_TOC
10867 && GET_CODE (XEXP (operands[1], 0)) == PLUS
10868 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
10869 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
10870 || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF)
10871 && ! side_effects_p (operands[0]))
10873 rtx sym =
10874 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
10875 rtx other = XEXP (XEXP (operands[1], 0), 1);
10877 sym = force_reg (mode, sym);
10878 emit_insn (gen_add3_insn (operands[0], sym, other));
10879 return;
10882 operands[1] = force_const_mem (mode, operands[1]);
10884 if (TARGET_TOC
10885 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10886 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
10888 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
10889 operands[0]);
10890 operands[1] = gen_const_mem (mode, tocref);
10891 set_mem_alias_set (operands[1], get_TOC_alias_set ());
10894 break;
10896 case E_TImode:
10897 if (!VECTOR_MEM_VSX_P (TImode))
10898 rs6000_eliminate_indexed_memrefs (operands);
10899 break;
10901 case E_PTImode:
10902 rs6000_eliminate_indexed_memrefs (operands);
10903 break;
10905 default:
10906 fatal_insn ("bad move", gen_rtx_SET (dest, source));
10909 /* Above, we may have called force_const_mem which may have returned
10910 an invalid address. If we can, fix this up; otherwise, reload will
10911 have to deal with it. */
10912 if (GET_CODE (operands[1]) == MEM)
10913 operands[1] = validize_mem (operands[1]);
10915 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10918 /* Nonzero if we can use a floating-point register to pass this arg. */
10919 #define USE_FP_FOR_ARG_P(CUM,MODE) \
10920 (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \
10921 && (CUM)->fregno <= FP_ARG_MAX_REG \
10922 && TARGET_HARD_FLOAT)
10924 /* Nonzero if we can use an AltiVec register to pass this arg. */
10925 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
10926 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
10927 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
10928 && TARGET_ALTIVEC_ABI \
10929 && (NAMED))
10931 /* Walk down the type tree of TYPE counting consecutive base elements.
10932 If *MODEP is VOIDmode, then set it to the first valid floating point
10933 or vector type. If a non-floating point or vector type is found, or
10934 if a floating point or vector type that doesn't match a non-VOIDmode
10935 *MODEP is found, then return -1, otherwise return the count in the
10936 sub-tree. */
10938 static int
10939 rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
10941 machine_mode mode;
10942 HOST_WIDE_INT size;
10944 switch (TREE_CODE (type))
10946 case REAL_TYPE:
10947 mode = TYPE_MODE (type);
10948 if (!SCALAR_FLOAT_MODE_P (mode))
10949 return -1;
10951 if (*modep == VOIDmode)
10952 *modep = mode;
10954 if (*modep == mode)
10955 return 1;
10957 break;
10959 case COMPLEX_TYPE:
10960 mode = TYPE_MODE (TREE_TYPE (type));
10961 if (!SCALAR_FLOAT_MODE_P (mode))
10962 return -1;
10964 if (*modep == VOIDmode)
10965 *modep = mode;
10967 if (*modep == mode)
10968 return 2;
10970 break;
10972 case VECTOR_TYPE:
10973 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
10974 return -1;
10976 /* Use V4SImode as representative of all 128-bit vector types. */
10977 size = int_size_in_bytes (type);
10978 switch (size)
10980 case 16:
10981 mode = V4SImode;
10982 break;
10983 default:
10984 return -1;
10987 if (*modep == VOIDmode)
10988 *modep = mode;
10990 /* Vector modes are considered to be opaque: two vectors are
10991 equivalent for the purposes of being homogeneous aggregates
10992 if they are the same size. */
10993 if (*modep == mode)
10994 return 1;
10996 break;
10998 case ARRAY_TYPE:
11000 int count;
11001 tree index = TYPE_DOMAIN (type);
11003 /* Can't handle incomplete types nor sizes that are not
11004 fixed. */
11005 if (!COMPLETE_TYPE_P (type)
11006 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
11007 return -1;
11009 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
11010 if (count == -1
11011 || !index
11012 || !TYPE_MAX_VALUE (index)
11013 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
11014 || !TYPE_MIN_VALUE (index)
11015 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
11016 || count < 0)
11017 return -1;
11019 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
11020 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
11022 /* There must be no padding. */
11023 if (wi::to_wide (TYPE_SIZE (type))
11024 != count * GET_MODE_BITSIZE (*modep))
11025 return -1;
11027 return count;
11030 case RECORD_TYPE:
11032 int count = 0;
11033 int sub_count;
11034 tree field;
11036 /* Can't handle incomplete types nor sizes that are not
11037 fixed. */
11038 if (!COMPLETE_TYPE_P (type)
11039 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
11040 return -1;
11042 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
11044 if (TREE_CODE (field) != FIELD_DECL)
11045 continue;
11047 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
11048 if (sub_count < 0)
11049 return -1;
11050 count += sub_count;
11053 /* There must be no padding. */
11054 if (wi::to_wide (TYPE_SIZE (type))
11055 != count * GET_MODE_BITSIZE (*modep))
11056 return -1;
11058 return count;
11061 case UNION_TYPE:
11062 case QUAL_UNION_TYPE:
11064 /* These aren't very interesting except in a degenerate case. */
11065 int count = 0;
11066 int sub_count;
11067 tree field;
11069 /* Can't handle incomplete types nor sizes that are not
11070 fixed. */
11071 if (!COMPLETE_TYPE_P (type)
11072 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
11073 return -1;
11075 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
11077 if (TREE_CODE (field) != FIELD_DECL)
11078 continue;
11080 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
11081 if (sub_count < 0)
11082 return -1;
11083 count = count > sub_count ? count : sub_count;
11086 /* There must be no padding. */
11087 if (wi::to_wide (TYPE_SIZE (type))
11088 != count * GET_MODE_BITSIZE (*modep))
11089 return -1;
11091 return count;
11094 default:
11095 break;
11098 return -1;
11101 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
11102 float or vector aggregate that shall be passed in FP/vector registers
11103 according to the ELFv2 ABI, return the homogeneous element mode in
11104 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
11106 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
11108 static bool
11109 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
11110 machine_mode *elt_mode,
11111 int *n_elts)
11113 /* Note that we do not accept complex types at the top level as
11114 homogeneous aggregates; these types are handled via the
11115 targetm.calls.split_complex_arg mechanism. Complex types
11116 can be elements of homogeneous aggregates, however. */
11117 if (TARGET_HARD_FLOAT && DEFAULT_ABI == ABI_ELFv2 && type
11118 && AGGREGATE_TYPE_P (type))
11120 machine_mode field_mode = VOIDmode;
11121 int field_count = rs6000_aggregate_candidate (type, &field_mode);
11123 if (field_count > 0)
11125 int n_regs = (SCALAR_FLOAT_MODE_P (field_mode) ?
11126 (GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
11128 /* The ELFv2 ABI allows homogeneous aggregates to occupy
11129 up to AGGR_ARG_NUM_REG registers. */
11130 if (field_count * n_regs <= AGGR_ARG_NUM_REG)
11132 if (elt_mode)
11133 *elt_mode = field_mode;
11134 if (n_elts)
11135 *n_elts = field_count;
11136 return true;
11141 if (elt_mode)
11142 *elt_mode = mode;
11143 if (n_elts)
11144 *n_elts = 1;
11145 return false;
11148 /* Return a nonzero value to say to return the function value in
11149 memory, just as large structures are always returned. TYPE will be
11150 the data type of the value, and FNTYPE will be the type of the
11151 function doing the returning, or @code{NULL} for libcalls.
11153 The AIX ABI for the RS/6000 specifies that all structures are
11154 returned in memory. The Darwin ABI does the same.
11156 For the Darwin 64 Bit ABI, a function result can be returned in
11157 registers or in memory, depending on the size of the return data
11158 type. If it is returned in registers, the value occupies the same
11159 registers as it would if it were the first and only function
11160 argument. Otherwise, the function places its result in memory at
11161 the location pointed to by GPR3.
11163 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
11164 but a draft put them in memory, and GCC used to implement the draft
11165 instead of the final standard. Therefore, aix_struct_return
11166 controls this instead of DEFAULT_ABI; V.4 targets needing backward
11167 compatibility can change DRAFT_V4_STRUCT_RET to override the
11168 default, and -m switches get the final word. See
11169 rs6000_option_override_internal for more details.
11171 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
11172 long double support is enabled. These values are returned in memory.
11174 int_size_in_bytes returns -1 for variable size objects, which go in
11175 memory always. The cast to unsigned makes -1 > 8. */
11177 static bool
11178 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
11180 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
11181 if (TARGET_MACHO
11182 && rs6000_darwin64_abi
11183 && TREE_CODE (type) == RECORD_TYPE
11184 && int_size_in_bytes (type) > 0)
11186 CUMULATIVE_ARGS valcum;
11187 rtx valret;
11189 valcum.words = 0;
11190 valcum.fregno = FP_ARG_MIN_REG;
11191 valcum.vregno = ALTIVEC_ARG_MIN_REG;
11192 /* Do a trial code generation as if this were going to be passed
11193 as an argument; if any part goes in memory, we return NULL. */
11194 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
11195 if (valret)
11196 return false;
11197 /* Otherwise fall through to more conventional ABI rules. */
11200 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
11201 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
11202 NULL, NULL))
11203 return false;
11205 /* The ELFv2 ABI returns aggregates up to 16B in registers */
11206 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
11207 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
11208 return false;
11210 if (AGGREGATE_TYPE_P (type)
11211 && (aix_struct_return
11212 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
11213 return true;
11215 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
11216 modes only exist for GCC vector types if -maltivec. */
11217 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
11218 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
11219 return false;
11221 /* Return synthetic vectors in memory. */
11222 if (TREE_CODE (type) == VECTOR_TYPE
11223 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
11225 static bool warned_for_return_big_vectors = false;
11226 if (!warned_for_return_big_vectors)
11228 warning (OPT_Wpsabi, "GCC vector returned by reference: "
11229 "non-standard ABI extension with no compatibility "
11230 "guarantee");
11231 warned_for_return_big_vectors = true;
11233 return true;
11236 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
11237 && FLOAT128_IEEE_P (TYPE_MODE (type)))
11238 return true;
11240 return false;
11243 /* Specify whether values returned in registers should be at the most
11244 significant end of a register. We want aggregates returned by
11245 value to match the way aggregates are passed to functions. */
11247 static bool
11248 rs6000_return_in_msb (const_tree valtype)
11250 return (DEFAULT_ABI == ABI_ELFv2
11251 && BYTES_BIG_ENDIAN
11252 && AGGREGATE_TYPE_P (valtype)
11253 && (rs6000_function_arg_padding (TYPE_MODE (valtype), valtype)
11254 == PAD_UPWARD));
11257 #ifdef HAVE_AS_GNU_ATTRIBUTE
11258 /* Return TRUE if a call to function FNDECL may be one that
11259 potentially affects the function calling ABI of the object file. */
11261 static bool
11262 call_ABI_of_interest (tree fndecl)
11264 if (rs6000_gnu_attr && symtab->state == EXPANSION)
11266 struct cgraph_node *c_node;
11268 /* Libcalls are always interesting. */
11269 if (fndecl == NULL_TREE)
11270 return true;
11272 /* Any call to an external function is interesting. */
11273 if (DECL_EXTERNAL (fndecl))
11274 return true;
11276 /* Interesting functions that we are emitting in this object file. */
11277 c_node = cgraph_node::get (fndecl);
11278 c_node = c_node->ultimate_alias_target ();
11279 return !c_node->only_called_directly_p ();
11281 return false;
11283 #endif
11285 /* Initialize a variable CUM of type CUMULATIVE_ARGS
11286 for a call to a function whose data type is FNTYPE.
11287 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
11289 For incoming args we set the number of arguments in the prototype large
11290 so we never return a PARALLEL. */
11292 void
11293 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
11294 rtx libname ATTRIBUTE_UNUSED, int incoming,
11295 int libcall, int n_named_args,
11296 tree fndecl ATTRIBUTE_UNUSED,
11297 machine_mode return_mode ATTRIBUTE_UNUSED)
11299 static CUMULATIVE_ARGS zero_cumulative;
11301 *cum = zero_cumulative;
11302 cum->words = 0;
11303 cum->fregno = FP_ARG_MIN_REG;
11304 cum->vregno = ALTIVEC_ARG_MIN_REG;
11305 cum->prototype = (fntype && prototype_p (fntype));
11306 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
11307 ? CALL_LIBCALL : CALL_NORMAL);
11308 cum->sysv_gregno = GP_ARG_MIN_REG;
11309 cum->stdarg = stdarg_p (fntype);
11310 cum->libcall = libcall;
11312 cum->nargs_prototype = 0;
11313 if (incoming || cum->prototype)
11314 cum->nargs_prototype = n_named_args;
11316 /* Check for a longcall attribute. */
11317 if ((!fntype && rs6000_default_long_calls)
11318 || (fntype
11319 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
11320 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
11321 cum->call_cookie |= CALL_LONG;
11323 if (TARGET_DEBUG_ARG)
11325 fprintf (stderr, "\ninit_cumulative_args:");
11326 if (fntype)
11328 tree ret_type = TREE_TYPE (fntype);
11329 fprintf (stderr, " ret code = %s,",
11330 get_tree_code_name (TREE_CODE (ret_type)));
11333 if (cum->call_cookie & CALL_LONG)
11334 fprintf (stderr, " longcall,");
11336 fprintf (stderr, " proto = %d, nargs = %d\n",
11337 cum->prototype, cum->nargs_prototype);
11340 #ifdef HAVE_AS_GNU_ATTRIBUTE
11341 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4))
11343 cum->escapes = call_ABI_of_interest (fndecl);
11344 if (cum->escapes)
11346 tree return_type;
11348 if (fntype)
11350 return_type = TREE_TYPE (fntype);
11351 return_mode = TYPE_MODE (return_type);
11353 else
11354 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
11356 if (return_type != NULL)
11358 if (TREE_CODE (return_type) == RECORD_TYPE
11359 && TYPE_TRANSPARENT_AGGR (return_type))
11361 return_type = TREE_TYPE (first_field (return_type));
11362 return_mode = TYPE_MODE (return_type);
11364 if (AGGREGATE_TYPE_P (return_type)
11365 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
11366 <= 8))
11367 rs6000_returns_struct = true;
11369 if (SCALAR_FLOAT_MODE_P (return_mode))
11371 rs6000_passes_float = true;
11372 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
11373 && (FLOAT128_IBM_P (return_mode)
11374 || FLOAT128_IEEE_P (return_mode)
11375 || (return_type != NULL
11376 && (TYPE_MAIN_VARIANT (return_type)
11377 == long_double_type_node))))
11378 rs6000_passes_long_double = true;
11380 if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode)
11381 || PAIRED_VECTOR_MODE (return_mode))
11382 rs6000_passes_vector = true;
11385 #endif
11387 if (fntype
11388 && !TARGET_ALTIVEC
11389 && TARGET_ALTIVEC_ABI
11390 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
11392 error ("cannot return value in vector register because"
11393 " altivec instructions are disabled, use %qs"
11394 " to enable them", "-maltivec");
11398 /* The mode the ABI uses for a word. This is not the same as word_mode
11399 for -m32 -mpowerpc64. This is used to implement various target hooks. */
11401 static scalar_int_mode
11402 rs6000_abi_word_mode (void)
11404 return TARGET_32BIT ? SImode : DImode;
11407 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
11408 static char *
11409 rs6000_offload_options (void)
11411 if (TARGET_64BIT)
11412 return xstrdup ("-foffload-abi=lp64");
11413 else
11414 return xstrdup ("-foffload-abi=ilp32");
11417 /* On rs6000, function arguments are promoted, as are function return
11418 values. */
11420 static machine_mode
11421 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
11422 machine_mode mode,
11423 int *punsignedp ATTRIBUTE_UNUSED,
11424 const_tree, int)
11426 PROMOTE_MODE (mode, *punsignedp, type);
11428 return mode;
11431 /* Return true if TYPE must be passed on the stack and not in registers. */
11433 static bool
11434 rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
11436 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
11437 return must_pass_in_stack_var_size (mode, type);
11438 else
11439 return must_pass_in_stack_var_size_or_pad (mode, type);
11442 static inline bool
11443 is_complex_IBM_long_double (machine_mode mode)
11445 return mode == ICmode || (mode == TCmode && FLOAT128_IBM_P (TCmode));
11448 /* Whether ABI_V4 passes MODE args to a function in floating point
11449 registers. */
11451 static bool
11452 abi_v4_pass_in_fpr (machine_mode mode, bool named)
11454 if (!TARGET_HARD_FLOAT)
11455 return false;
11456 if (TARGET_DOUBLE_FLOAT && mode == DFmode)
11457 return true;
11458 if (TARGET_SINGLE_FLOAT && mode == SFmode && named)
11459 return true;
11460 /* ABI_V4 passes complex IBM long double in 8 gprs.
11461 Stupid, but we can't change the ABI now. */
11462 if (is_complex_IBM_long_double (mode))
11463 return false;
11464 if (FLOAT128_2REG_P (mode))
11465 return true;
11466 if (DECIMAL_FLOAT_MODE_P (mode))
11467 return true;
11468 return false;
11471 /* Implement TARGET_FUNCTION_ARG_PADDING.
11473 For the AIX ABI structs are always stored left shifted in their
11474 argument slot. */
11476 static pad_direction
11477 rs6000_function_arg_padding (machine_mode mode, const_tree type)
11479 #ifndef AGGREGATE_PADDING_FIXED
11480 #define AGGREGATE_PADDING_FIXED 0
11481 #endif
11482 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
11483 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
11484 #endif
11486 if (!AGGREGATE_PADDING_FIXED)
11488 /* GCC used to pass structures of the same size as integer types as
11489 if they were in fact integers, ignoring TARGET_FUNCTION_ARG_PADDING.
11490 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
11491 passed padded downward, except that -mstrict-align further
11492 muddied the water in that multi-component structures of 2 and 4
11493 bytes in size were passed padded upward.
11495 The following arranges for best compatibility with previous
11496 versions of gcc, but removes the -mstrict-align dependency. */
11497 if (BYTES_BIG_ENDIAN)
11499 HOST_WIDE_INT size = 0;
11501 if (mode == BLKmode)
11503 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
11504 size = int_size_in_bytes (type);
11506 else
11507 size = GET_MODE_SIZE (mode);
11509 if (size == 1 || size == 2 || size == 4)
11510 return PAD_DOWNWARD;
11512 return PAD_UPWARD;
11515 if (AGGREGATES_PAD_UPWARD_ALWAYS)
11517 if (type != 0 && AGGREGATE_TYPE_P (type))
11518 return PAD_UPWARD;
11521 /* Fall back to the default. */
11522 return default_function_arg_padding (mode, type);
11525 /* If defined, a C expression that gives the alignment boundary, in bits,
11526 of an argument with the specified mode and type. If it is not defined,
11527 PARM_BOUNDARY is used for all arguments.
11529 V.4 wants long longs and doubles to be double word aligned. Just
11530 testing the mode size is a boneheaded way to do this as it means
11531 that other types such as complex int are also double word aligned.
11532 However, we're stuck with this because changing the ABI might break
11533 existing library interfaces.
11535 Quadword align Altivec/VSX vectors.
11536 Quadword align large synthetic vector types. */
11538 static unsigned int
11539 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
11541 machine_mode elt_mode;
11542 int n_elts;
11544 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11546 if (DEFAULT_ABI == ABI_V4
11547 && (GET_MODE_SIZE (mode) == 8
11548 || (TARGET_HARD_FLOAT
11549 && !is_complex_IBM_long_double (mode)
11550 && FLOAT128_2REG_P (mode))))
11551 return 64;
11552 else if (FLOAT128_VECTOR_P (mode))
11553 return 128;
11554 else if (PAIRED_VECTOR_MODE (mode)
11555 || (type && TREE_CODE (type) == VECTOR_TYPE
11556 && int_size_in_bytes (type) >= 8
11557 && int_size_in_bytes (type) < 16))
11558 return 64;
11559 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
11560 || (type && TREE_CODE (type) == VECTOR_TYPE
11561 && int_size_in_bytes (type) >= 16))
11562 return 128;
11564 /* Aggregate types that need > 8 byte alignment are quadword-aligned
11565 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
11566 -mcompat-align-parm is used. */
11567 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
11568 || DEFAULT_ABI == ABI_ELFv2)
11569 && type && TYPE_ALIGN (type) > 64)
11571 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
11572 or homogeneous float/vector aggregates here. We already handled
11573 vector aggregates above, but still need to check for float here. */
11574 bool aggregate_p = (AGGREGATE_TYPE_P (type)
11575 && !SCALAR_FLOAT_MODE_P (elt_mode));
11577 /* We used to check for BLKmode instead of the above aggregate type
11578 check. Warn when this results in any difference to the ABI. */
11579 if (aggregate_p != (mode == BLKmode))
11581 static bool warned;
11582 if (!warned && warn_psabi)
11584 warned = true;
11585 inform (input_location,
11586 "the ABI of passing aggregates with %d-byte alignment"
11587 " has changed in GCC 5",
11588 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
11592 if (aggregate_p)
11593 return 128;
11596 /* Similar for the Darwin64 ABI. Note that for historical reasons we
11597 implement the "aggregate type" check as a BLKmode check here; this
11598 means certain aggregate types are in fact not aligned. */
11599 if (TARGET_MACHO && rs6000_darwin64_abi
11600 && mode == BLKmode
11601 && type && TYPE_ALIGN (type) > 64)
11602 return 128;
11604 return PARM_BOUNDARY;
11607 /* The offset in words to the start of the parameter save area. */
11609 static unsigned int
11610 rs6000_parm_offset (void)
11612 return (DEFAULT_ABI == ABI_V4 ? 2
11613 : DEFAULT_ABI == ABI_ELFv2 ? 4
11614 : 6);
11617 /* For a function parm of MODE and TYPE, return the starting word in
11618 the parameter area. NWORDS of the parameter area are already used. */
11620 static unsigned int
11621 rs6000_parm_start (machine_mode mode, const_tree type,
11622 unsigned int nwords)
11624 unsigned int align;
11626 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
11627 return nwords + (-(rs6000_parm_offset () + nwords) & align);
11630 /* Compute the size (in words) of a function argument. */
11632 static unsigned long
11633 rs6000_arg_size (machine_mode mode, const_tree type)
11635 unsigned long size;
11637 if (mode != BLKmode)
11638 size = GET_MODE_SIZE (mode);
11639 else
11640 size = int_size_in_bytes (type);
11642 if (TARGET_32BIT)
11643 return (size + 3) >> 2;
11644 else
11645 return (size + 7) >> 3;
11648 /* Use this to flush pending int fields. */
11650 static void
11651 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
11652 HOST_WIDE_INT bitpos, int final)
11654 unsigned int startbit, endbit;
11655 int intregs, intoffset;
11657 /* Handle the situations where a float is taking up the first half
11658 of the GPR, and the other half is empty (typically due to
11659 alignment restrictions). We can detect this by a 8-byte-aligned
11660 int field, or by seeing that this is the final flush for this
11661 argument. Count the word and continue on. */
11662 if (cum->floats_in_gpr == 1
11663 && (cum->intoffset % 64 == 0
11664 || (cum->intoffset == -1 && final)))
11666 cum->words++;
11667 cum->floats_in_gpr = 0;
11670 if (cum->intoffset == -1)
11671 return;
11673 intoffset = cum->intoffset;
11674 cum->intoffset = -1;
11675 cum->floats_in_gpr = 0;
11677 if (intoffset % BITS_PER_WORD != 0)
11679 unsigned int bits = BITS_PER_WORD - intoffset % BITS_PER_WORD;
11680 if (!int_mode_for_size (bits, 0).exists ())
11682 /* We couldn't find an appropriate mode, which happens,
11683 e.g., in packed structs when there are 3 bytes to load.
11684 Back intoffset back to the beginning of the word in this
11685 case. */
11686 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
11690 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
11691 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
11692 intregs = (endbit - startbit) / BITS_PER_WORD;
11693 cum->words += intregs;
11694 /* words should be unsigned. */
11695 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
11697 int pad = (endbit/BITS_PER_WORD) - cum->words;
11698 cum->words += pad;
11702 /* The darwin64 ABI calls for us to recurse down through structs,
11703 looking for elements passed in registers. Unfortunately, we have
11704 to track int register count here also because of misalignments
11705 in powerpc alignment mode. */
11707 static void
11708 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
11709 const_tree type,
11710 HOST_WIDE_INT startbitpos)
11712 tree f;
11714 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
11715 if (TREE_CODE (f) == FIELD_DECL)
11717 HOST_WIDE_INT bitpos = startbitpos;
11718 tree ftype = TREE_TYPE (f);
11719 machine_mode mode;
11720 if (ftype == error_mark_node)
11721 continue;
11722 mode = TYPE_MODE (ftype);
11724 if (DECL_SIZE (f) != 0
11725 && tree_fits_uhwi_p (bit_position (f)))
11726 bitpos += int_bit_position (f);
11728 /* ??? FIXME: else assume zero offset. */
11730 if (TREE_CODE (ftype) == RECORD_TYPE)
11731 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
11732 else if (USE_FP_FOR_ARG_P (cum, mode))
11734 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
11735 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
11736 cum->fregno += n_fpregs;
11737 /* Single-precision floats present a special problem for
11738 us, because they are smaller than an 8-byte GPR, and so
11739 the structure-packing rules combined with the standard
11740 varargs behavior mean that we want to pack float/float
11741 and float/int combinations into a single register's
11742 space. This is complicated by the arg advance flushing,
11743 which works on arbitrarily large groups of int-type
11744 fields. */
11745 if (mode == SFmode)
11747 if (cum->floats_in_gpr == 1)
11749 /* Two floats in a word; count the word and reset
11750 the float count. */
11751 cum->words++;
11752 cum->floats_in_gpr = 0;
11754 else if (bitpos % 64 == 0)
11756 /* A float at the beginning of an 8-byte word;
11757 count it and put off adjusting cum->words until
11758 we see if a arg advance flush is going to do it
11759 for us. */
11760 cum->floats_in_gpr++;
11762 else
11764 /* The float is at the end of a word, preceded
11765 by integer fields, so the arg advance flush
11766 just above has already set cum->words and
11767 everything is taken care of. */
11770 else
11771 cum->words += n_fpregs;
11773 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
11775 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
11776 cum->vregno++;
11777 cum->words += 2;
11779 else if (cum->intoffset == -1)
11780 cum->intoffset = bitpos;
11784 /* Check for an item that needs to be considered specially under the darwin 64
11785 bit ABI. These are record types where the mode is BLK or the structure is
11786 8 bytes in size. */
11787 static int
11788 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
11790 return rs6000_darwin64_abi
11791 && ((mode == BLKmode
11792 && TREE_CODE (type) == RECORD_TYPE
11793 && int_size_in_bytes (type) > 0)
11794 || (type && TREE_CODE (type) == RECORD_TYPE
11795 && int_size_in_bytes (type) == 8)) ? 1 : 0;
11798 /* Update the data in CUM to advance over an argument
11799 of mode MODE and data type TYPE.
11800 (TYPE is null for libcalls where that information may not be available.)
11802 Note that for args passed by reference, function_arg will be called
11803 with MODE and TYPE set to that of the pointer to the arg, not the arg
11804 itself. */
11806 static void
11807 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
11808 const_tree type, bool named, int depth)
11810 machine_mode elt_mode;
11811 int n_elts;
11813 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11815 /* Only tick off an argument if we're not recursing. */
11816 if (depth == 0)
11817 cum->nargs_prototype--;
11819 #ifdef HAVE_AS_GNU_ATTRIBUTE
11820 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4)
11821 && cum->escapes)
11823 if (SCALAR_FLOAT_MODE_P (mode))
11825 rs6000_passes_float = true;
11826 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
11827 && (FLOAT128_IBM_P (mode)
11828 || FLOAT128_IEEE_P (mode)
11829 || (type != NULL
11830 && TYPE_MAIN_VARIANT (type) == long_double_type_node)))
11831 rs6000_passes_long_double = true;
11833 if ((named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
11834 || (PAIRED_VECTOR_MODE (mode)
11835 && !cum->stdarg
11836 && cum->sysv_gregno <= GP_ARG_MAX_REG))
11837 rs6000_passes_vector = true;
11839 #endif
11841 if (TARGET_ALTIVEC_ABI
11842 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
11843 || (type && TREE_CODE (type) == VECTOR_TYPE
11844 && int_size_in_bytes (type) == 16)))
11846 bool stack = false;
11848 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
11850 cum->vregno += n_elts;
11852 if (!TARGET_ALTIVEC)
11853 error ("cannot pass argument in vector register because"
11854 " altivec instructions are disabled, use %qs"
11855 " to enable them", "-maltivec");
11857 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
11858 even if it is going to be passed in a vector register.
11859 Darwin does the same for variable-argument functions. */
11860 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
11861 && TARGET_64BIT)
11862 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
11863 stack = true;
11865 else
11866 stack = true;
11868 if (stack)
11870 int align;
11872 /* Vector parameters must be 16-byte aligned. In 32-bit
11873 mode this means we need to take into account the offset
11874 to the parameter save area. In 64-bit mode, they just
11875 have to start on an even word, since the parameter save
11876 area is 16-byte aligned. */
11877 if (TARGET_32BIT)
11878 align = -(rs6000_parm_offset () + cum->words) & 3;
11879 else
11880 align = cum->words & 1;
11881 cum->words += align + rs6000_arg_size (mode, type);
11883 if (TARGET_DEBUG_ARG)
11885 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
11886 cum->words, align);
11887 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
11888 cum->nargs_prototype, cum->prototype,
11889 GET_MODE_NAME (mode));
11893 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
11895 int size = int_size_in_bytes (type);
11896 /* Variable sized types have size == -1 and are
11897 treated as if consisting entirely of ints.
11898 Pad to 16 byte boundary if needed. */
11899 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
11900 && (cum->words % 2) != 0)
11901 cum->words++;
11902 /* For varargs, we can just go up by the size of the struct. */
11903 if (!named)
11904 cum->words += (size + 7) / 8;
11905 else
11907 /* It is tempting to say int register count just goes up by
11908 sizeof(type)/8, but this is wrong in a case such as
11909 { int; double; int; } [powerpc alignment]. We have to
11910 grovel through the fields for these too. */
11911 cum->intoffset = 0;
11912 cum->floats_in_gpr = 0;
11913 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
11914 rs6000_darwin64_record_arg_advance_flush (cum,
11915 size * BITS_PER_UNIT, 1);
11917 if (TARGET_DEBUG_ARG)
11919 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
11920 cum->words, TYPE_ALIGN (type), size);
11921 fprintf (stderr,
11922 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
11923 cum->nargs_prototype, cum->prototype,
11924 GET_MODE_NAME (mode));
11927 else if (DEFAULT_ABI == ABI_V4)
11929 if (abi_v4_pass_in_fpr (mode, named))
11931 /* _Decimal128 must use an even/odd register pair. This assumes
11932 that the register number is odd when fregno is odd. */
11933 if (mode == TDmode && (cum->fregno % 2) == 1)
11934 cum->fregno++;
11936 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
11937 <= FP_ARG_V4_MAX_REG)
11938 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
11939 else
11941 cum->fregno = FP_ARG_V4_MAX_REG + 1;
11942 if (mode == DFmode || FLOAT128_IBM_P (mode)
11943 || mode == DDmode || mode == TDmode)
11944 cum->words += cum->words & 1;
11945 cum->words += rs6000_arg_size (mode, type);
11948 else
11950 int n_words = rs6000_arg_size (mode, type);
11951 int gregno = cum->sysv_gregno;
11953 /* Long long is put in (r3,r4), (r5,r6), (r7,r8) or (r9,r10).
11954 As does any other 2 word item such as complex int due to a
11955 historical mistake. */
11956 if (n_words == 2)
11957 gregno += (1 - gregno) & 1;
11959 /* Multi-reg args are not split between registers and stack. */
11960 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
11962 /* Long long is aligned on the stack. So are other 2 word
11963 items such as complex int due to a historical mistake. */
11964 if (n_words == 2)
11965 cum->words += cum->words & 1;
11966 cum->words += n_words;
11969 /* Note: continuing to accumulate gregno past when we've started
11970 spilling to the stack indicates the fact that we've started
11971 spilling to the stack to expand_builtin_saveregs. */
11972 cum->sysv_gregno = gregno + n_words;
11975 if (TARGET_DEBUG_ARG)
11977 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
11978 cum->words, cum->fregno);
11979 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
11980 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
11981 fprintf (stderr, "mode = %4s, named = %d\n",
11982 GET_MODE_NAME (mode), named);
11985 else
11987 int n_words = rs6000_arg_size (mode, type);
11988 int start_words = cum->words;
11989 int align_words = rs6000_parm_start (mode, type, start_words);
11991 cum->words = align_words + n_words;
11993 if (SCALAR_FLOAT_MODE_P (elt_mode) && TARGET_HARD_FLOAT)
11995 /* _Decimal128 must be passed in an even/odd float register pair.
11996 This assumes that the register number is odd when fregno is
11997 odd. */
11998 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
11999 cum->fregno++;
12000 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
12003 if (TARGET_DEBUG_ARG)
12005 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
12006 cum->words, cum->fregno);
12007 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
12008 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
12009 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
12010 named, align_words - start_words, depth);
12015 static void
12016 rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
12017 const_tree type, bool named)
12019 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
12023 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
12024 structure between cum->intoffset and bitpos to integer registers. */
12026 static void
12027 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
12028 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
12030 machine_mode mode;
12031 unsigned int regno;
12032 unsigned int startbit, endbit;
12033 int this_regno, intregs, intoffset;
12034 rtx reg;
12036 if (cum->intoffset == -1)
12037 return;
12039 intoffset = cum->intoffset;
12040 cum->intoffset = -1;
12042 /* If this is the trailing part of a word, try to only load that
12043 much into the register. Otherwise load the whole register. Note
12044 that in the latter case we may pick up unwanted bits. It's not a
12045 problem at the moment but may wish to revisit. */
12047 if (intoffset % BITS_PER_WORD != 0)
12049 unsigned int bits = BITS_PER_WORD - intoffset % BITS_PER_WORD;
12050 if (!int_mode_for_size (bits, 0).exists (&mode))
12052 /* We couldn't find an appropriate mode, which happens,
12053 e.g., in packed structs when there are 3 bytes to load.
12054 Back intoffset back to the beginning of the word in this
12055 case. */
12056 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
12057 mode = word_mode;
12060 else
12061 mode = word_mode;
12063 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
12064 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
12065 intregs = (endbit - startbit) / BITS_PER_WORD;
12066 this_regno = cum->words + intoffset / BITS_PER_WORD;
12068 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
12069 cum->use_stack = 1;
12071 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
12072 if (intregs <= 0)
12073 return;
12075 intoffset /= BITS_PER_UNIT;
12078 regno = GP_ARG_MIN_REG + this_regno;
12079 reg = gen_rtx_REG (mode, regno);
12080 rvec[(*k)++] =
12081 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
12083 this_regno += 1;
12084 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
12085 mode = word_mode;
12086 intregs -= 1;
12088 while (intregs > 0);
12091 /* Recursive workhorse for the following. */
12093 static void
12094 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
12095 HOST_WIDE_INT startbitpos, rtx rvec[],
12096 int *k)
12098 tree f;
12100 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
12101 if (TREE_CODE (f) == FIELD_DECL)
12103 HOST_WIDE_INT bitpos = startbitpos;
12104 tree ftype = TREE_TYPE (f);
12105 machine_mode mode;
12106 if (ftype == error_mark_node)
12107 continue;
12108 mode = TYPE_MODE (ftype);
12110 if (DECL_SIZE (f) != 0
12111 && tree_fits_uhwi_p (bit_position (f)))
12112 bitpos += int_bit_position (f);
12114 /* ??? FIXME: else assume zero offset. */
12116 if (TREE_CODE (ftype) == RECORD_TYPE)
12117 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
12118 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
12120 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
12121 #if 0
12122 switch (mode)
12124 case E_SCmode: mode = SFmode; break;
12125 case E_DCmode: mode = DFmode; break;
12126 case E_TCmode: mode = TFmode; break;
12127 default: break;
12129 #endif
12130 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
12131 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
12133 gcc_assert (cum->fregno == FP_ARG_MAX_REG
12134 && (mode == TFmode || mode == TDmode));
12135 /* Long double or _Decimal128 split over regs and memory. */
12136 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
12137 cum->use_stack=1;
12139 rvec[(*k)++]
12140 = gen_rtx_EXPR_LIST (VOIDmode,
12141 gen_rtx_REG (mode, cum->fregno++),
12142 GEN_INT (bitpos / BITS_PER_UNIT));
12143 if (FLOAT128_2REG_P (mode))
12144 cum->fregno++;
12146 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
12148 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
12149 rvec[(*k)++]
12150 = gen_rtx_EXPR_LIST (VOIDmode,
12151 gen_rtx_REG (mode, cum->vregno++),
12152 GEN_INT (bitpos / BITS_PER_UNIT));
12154 else if (cum->intoffset == -1)
12155 cum->intoffset = bitpos;
12159 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
12160 the register(s) to be used for each field and subfield of a struct
12161 being passed by value, along with the offset of where the
12162 register's value may be found in the block. FP fields go in FP
12163 register, vector fields go in vector registers, and everything
12164 else goes in int registers, packed as in memory.
12166 This code is also used for function return values. RETVAL indicates
12167 whether this is the case.
12169 Much of this is taken from the SPARC V9 port, which has a similar
12170 calling convention. */
12172 static rtx
12173 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
12174 bool named, bool retval)
12176 rtx rvec[FIRST_PSEUDO_REGISTER];
12177 int k = 1, kbase = 1;
12178 HOST_WIDE_INT typesize = int_size_in_bytes (type);
12179 /* This is a copy; modifications are not visible to our caller. */
12180 CUMULATIVE_ARGS copy_cum = *orig_cum;
12181 CUMULATIVE_ARGS *cum = &copy_cum;
12183 /* Pad to 16 byte boundary if needed. */
12184 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
12185 && (cum->words % 2) != 0)
12186 cum->words++;
12188 cum->intoffset = 0;
12189 cum->use_stack = 0;
12190 cum->named = named;
12192 /* Put entries into rvec[] for individual FP and vector fields, and
12193 for the chunks of memory that go in int regs. Note we start at
12194 element 1; 0 is reserved for an indication of using memory, and
12195 may or may not be filled in below. */
12196 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
12197 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
12199 /* If any part of the struct went on the stack put all of it there.
12200 This hack is because the generic code for
12201 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
12202 parts of the struct are not at the beginning. */
12203 if (cum->use_stack)
12205 if (retval)
12206 return NULL_RTX; /* doesn't go in registers at all */
12207 kbase = 0;
12208 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12210 if (k > 1 || cum->use_stack)
12211 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
12212 else
12213 return NULL_RTX;
12216 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
12218 static rtx
12219 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
12220 int align_words)
12222 int n_units;
12223 int i, k;
12224 rtx rvec[GP_ARG_NUM_REG + 1];
12226 if (align_words >= GP_ARG_NUM_REG)
12227 return NULL_RTX;
12229 n_units = rs6000_arg_size (mode, type);
12231 /* Optimize the simple case where the arg fits in one gpr, except in
12232 the case of BLKmode due to assign_parms assuming that registers are
12233 BITS_PER_WORD wide. */
12234 if (n_units == 0
12235 || (n_units == 1 && mode != BLKmode))
12236 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12238 k = 0;
12239 if (align_words + n_units > GP_ARG_NUM_REG)
12240 /* Not all of the arg fits in gprs. Say that it goes in memory too,
12241 using a magic NULL_RTX component.
12242 This is not strictly correct. Only some of the arg belongs in
12243 memory, not all of it. However, the normal scheme using
12244 function_arg_partial_nregs can result in unusual subregs, eg.
12245 (subreg:SI (reg:DF) 4), which are not handled well. The code to
12246 store the whole arg to memory is often more efficient than code
12247 to store pieces, and we know that space is available in the right
12248 place for the whole arg. */
12249 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12251 i = 0;
12254 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
12255 rtx off = GEN_INT (i++ * 4);
12256 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12258 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
12260 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
12263 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
12264 but must also be copied into the parameter save area starting at
12265 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
12266 to the GPRs and/or memory. Return the number of elements used. */
12268 static int
12269 rs6000_psave_function_arg (machine_mode mode, const_tree type,
12270 int align_words, rtx *rvec)
12272 int k = 0;
12274 if (align_words < GP_ARG_NUM_REG)
12276 int n_words = rs6000_arg_size (mode, type);
12278 if (align_words + n_words > GP_ARG_NUM_REG
12279 || mode == BLKmode
12280 || (TARGET_32BIT && TARGET_POWERPC64))
12282 /* If this is partially on the stack, then we only
12283 include the portion actually in registers here. */
12284 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
12285 int i = 0;
12287 if (align_words + n_words > GP_ARG_NUM_REG)
12289 /* Not all of the arg fits in gprs. Say that it goes in memory
12290 too, using a magic NULL_RTX component. Also see comment in
12291 rs6000_mixed_function_arg for why the normal
12292 function_arg_partial_nregs scheme doesn't work in this case. */
12293 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12298 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
12299 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
12300 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12302 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
12304 else
12306 /* The whole arg fits in gprs. */
12307 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12308 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
12311 else
12313 /* It's entirely in memory. */
12314 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12317 return k;
12320 /* RVEC is a vector of K components of an argument of mode MODE.
12321 Construct the final function_arg return value from it. */
12323 static rtx
12324 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
12326 gcc_assert (k >= 1);
12328 /* Avoid returning a PARALLEL in the trivial cases. */
12329 if (k == 1)
12331 if (XEXP (rvec[0], 0) == NULL_RTX)
12332 return NULL_RTX;
12334 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
12335 return XEXP (rvec[0], 0);
12338 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
12341 /* Determine where to put an argument to a function.
12342 Value is zero to push the argument on the stack,
12343 or a hard register in which to store the argument.
12345 MODE is the argument's machine mode.
12346 TYPE is the data type of the argument (as a tree).
12347 This is null for libcalls where that information may
12348 not be available.
12349 CUM is a variable of type CUMULATIVE_ARGS which gives info about
12350 the preceding args and about the function being called. It is
12351 not modified in this routine.
12352 NAMED is nonzero if this argument is a named parameter
12353 (otherwise it is an extra parameter matching an ellipsis).
12355 On RS/6000 the first eight words of non-FP are normally in registers
12356 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
12357 Under V.4, the first 8 FP args are in registers.
12359 If this is floating-point and no prototype is specified, we use
12360 both an FP and integer register (or possibly FP reg and stack). Library
12361 functions (when CALL_LIBCALL is set) always have the proper types for args,
12362 so we can pass the FP value just in one register. emit_library_function
12363 doesn't support PARALLEL anyway.
12365 Note that for args passed by reference, function_arg will be called
12366 with MODE and TYPE set to that of the pointer to the arg, not the arg
12367 itself. */
12369 static rtx
12370 rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
12371 const_tree type, bool named)
12373 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12374 enum rs6000_abi abi = DEFAULT_ABI;
12375 machine_mode elt_mode;
12376 int n_elts;
12378 /* Return a marker to indicate whether CR1 needs to set or clear the
12379 bit that V.4 uses to say fp args were passed in registers.
12380 Assume that we don't need the marker for software floating point,
12381 or compiler generated library calls. */
12382 if (mode == VOIDmode)
12384 if (abi == ABI_V4
12385 && (cum->call_cookie & CALL_LIBCALL) == 0
12386 && (cum->stdarg
12387 || (cum->nargs_prototype < 0
12388 && (cum->prototype || TARGET_NO_PROTOTYPE)))
12389 && TARGET_HARD_FLOAT)
12390 return GEN_INT (cum->call_cookie
12391 | ((cum->fregno == FP_ARG_MIN_REG)
12392 ? CALL_V4_SET_FP_ARGS
12393 : CALL_V4_CLEAR_FP_ARGS));
12395 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
12398 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
12400 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
12402 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
12403 if (rslt != NULL_RTX)
12404 return rslt;
12405 /* Else fall through to usual handling. */
12408 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
12410 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
12411 rtx r, off;
12412 int i, k = 0;
12414 /* Do we also need to pass this argument in the parameter save area?
12415 Library support functions for IEEE 128-bit are assumed to not need the
12416 value passed both in GPRs and in vector registers. */
12417 if (TARGET_64BIT && !cum->prototype
12418 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
12420 int align_words = ROUND_UP (cum->words, 2);
12421 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
12424 /* Describe where this argument goes in the vector registers. */
12425 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
12427 r = gen_rtx_REG (elt_mode, cum->vregno + i);
12428 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
12429 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12432 return rs6000_finish_function_arg (mode, rvec, k);
12434 else if (TARGET_ALTIVEC_ABI
12435 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
12436 || (type && TREE_CODE (type) == VECTOR_TYPE
12437 && int_size_in_bytes (type) == 16)))
12439 if (named || abi == ABI_V4)
12440 return NULL_RTX;
12441 else
12443 /* Vector parameters to varargs functions under AIX or Darwin
12444 get passed in memory and possibly also in GPRs. */
12445 int align, align_words, n_words;
12446 machine_mode part_mode;
12448 /* Vector parameters must be 16-byte aligned. In 32-bit
12449 mode this means we need to take into account the offset
12450 to the parameter save area. In 64-bit mode, they just
12451 have to start on an even word, since the parameter save
12452 area is 16-byte aligned. */
12453 if (TARGET_32BIT)
12454 align = -(rs6000_parm_offset () + cum->words) & 3;
12455 else
12456 align = cum->words & 1;
12457 align_words = cum->words + align;
12459 /* Out of registers? Memory, then. */
12460 if (align_words >= GP_ARG_NUM_REG)
12461 return NULL_RTX;
12463 if (TARGET_32BIT && TARGET_POWERPC64)
12464 return rs6000_mixed_function_arg (mode, type, align_words);
12466 /* The vector value goes in GPRs. Only the part of the
12467 value in GPRs is reported here. */
12468 part_mode = mode;
12469 n_words = rs6000_arg_size (mode, type);
12470 if (align_words + n_words > GP_ARG_NUM_REG)
12471 /* Fortunately, there are only two possibilities, the value
12472 is either wholly in GPRs or half in GPRs and half not. */
12473 part_mode = DImode;
12475 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
12479 else if (abi == ABI_V4)
12481 if (abi_v4_pass_in_fpr (mode, named))
12483 /* _Decimal128 must use an even/odd register pair. This assumes
12484 that the register number is odd when fregno is odd. */
12485 if (mode == TDmode && (cum->fregno % 2) == 1)
12486 cum->fregno++;
12488 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
12489 <= FP_ARG_V4_MAX_REG)
12490 return gen_rtx_REG (mode, cum->fregno);
12491 else
12492 return NULL_RTX;
12494 else
12496 int n_words = rs6000_arg_size (mode, type);
12497 int gregno = cum->sysv_gregno;
12499 /* Long long is put in (r3,r4), (r5,r6), (r7,r8) or (r9,r10).
12500 As does any other 2 word item such as complex int due to a
12501 historical mistake. */
12502 if (n_words == 2)
12503 gregno += (1 - gregno) & 1;
12505 /* Multi-reg args are not split between registers and stack. */
12506 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
12507 return NULL_RTX;
12509 if (TARGET_32BIT && TARGET_POWERPC64)
12510 return rs6000_mixed_function_arg (mode, type,
12511 gregno - GP_ARG_MIN_REG);
12512 return gen_rtx_REG (mode, gregno);
12515 else
12517 int align_words = rs6000_parm_start (mode, type, cum->words);
12519 /* _Decimal128 must be passed in an even/odd float register pair.
12520 This assumes that the register number is odd when fregno is odd. */
12521 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
12522 cum->fregno++;
12524 if (USE_FP_FOR_ARG_P (cum, elt_mode))
12526 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
12527 rtx r, off;
12528 int i, k = 0;
12529 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
12530 int fpr_words;
12532 /* Do we also need to pass this argument in the parameter
12533 save area? */
12534 if (type && (cum->nargs_prototype <= 0
12535 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
12536 && TARGET_XL_COMPAT
12537 && align_words >= GP_ARG_NUM_REG)))
12538 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
12540 /* Describe where this argument goes in the fprs. */
12541 for (i = 0; i < n_elts
12542 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
12544 /* Check if the argument is split over registers and memory.
12545 This can only ever happen for long double or _Decimal128;
12546 complex types are handled via split_complex_arg. */
12547 machine_mode fmode = elt_mode;
12548 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
12550 gcc_assert (FLOAT128_2REG_P (fmode));
12551 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
12554 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
12555 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
12556 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12559 /* If there were not enough FPRs to hold the argument, the rest
12560 usually goes into memory. However, if the current position
12561 is still within the register parameter area, a portion may
12562 actually have to go into GPRs.
12564 Note that it may happen that the portion of the argument
12565 passed in the first "half" of the first GPR was already
12566 passed in the last FPR as well.
12568 For unnamed arguments, we already set up GPRs to cover the
12569 whole argument in rs6000_psave_function_arg, so there is
12570 nothing further to do at this point. */
12571 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
12572 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
12573 && cum->nargs_prototype > 0)
12575 static bool warned;
12577 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
12578 int n_words = rs6000_arg_size (mode, type);
12580 align_words += fpr_words;
12581 n_words -= fpr_words;
12585 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
12586 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
12587 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12589 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
12591 if (!warned && warn_psabi)
12593 warned = true;
12594 inform (input_location,
12595 "the ABI of passing homogeneous float aggregates"
12596 " has changed in GCC 5");
12600 return rs6000_finish_function_arg (mode, rvec, k);
12602 else if (align_words < GP_ARG_NUM_REG)
12604 if (TARGET_32BIT && TARGET_POWERPC64)
12605 return rs6000_mixed_function_arg (mode, type, align_words);
12607 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12609 else
12610 return NULL_RTX;
12614 /* For an arg passed partly in registers and partly in memory, this is
12615 the number of bytes passed in registers. For args passed entirely in
12616 registers or entirely in memory, zero. When an arg is described by a
12617 PARALLEL, perhaps using more than one register type, this function
12618 returns the number of bytes used by the first element of the PARALLEL. */
12620 static int
12621 rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
12622 tree type, bool named)
12624 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12625 bool passed_in_gprs = true;
12626 int ret = 0;
12627 int align_words;
12628 machine_mode elt_mode;
12629 int n_elts;
12631 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
12633 if (DEFAULT_ABI == ABI_V4)
12634 return 0;
12636 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
12638 /* If we are passing this arg in the fixed parameter save area (gprs or
12639 memory) as well as VRs, we do not use the partial bytes mechanism;
12640 instead, rs6000_function_arg will return a PARALLEL including a memory
12641 element as necessary. Library support functions for IEEE 128-bit are
12642 assumed to not need the value passed both in GPRs and in vector
12643 registers. */
12644 if (TARGET_64BIT && !cum->prototype
12645 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
12646 return 0;
12648 /* Otherwise, we pass in VRs only. Check for partial copies. */
12649 passed_in_gprs = false;
12650 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
12651 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
12654 /* In this complicated case we just disable the partial_nregs code. */
12655 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
12656 return 0;
12658 align_words = rs6000_parm_start (mode, type, cum->words);
12660 if (USE_FP_FOR_ARG_P (cum, elt_mode))
12662 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
12664 /* If we are passing this arg in the fixed parameter save area
12665 (gprs or memory) as well as FPRs, we do not use the partial
12666 bytes mechanism; instead, rs6000_function_arg will return a
12667 PARALLEL including a memory element as necessary. */
12668 if (type
12669 && (cum->nargs_prototype <= 0
12670 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
12671 && TARGET_XL_COMPAT
12672 && align_words >= GP_ARG_NUM_REG)))
12673 return 0;
12675 /* Otherwise, we pass in FPRs only. Check for partial copies. */
12676 passed_in_gprs = false;
12677 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
12679 /* Compute number of bytes / words passed in FPRs. If there
12680 is still space available in the register parameter area
12681 *after* that amount, a part of the argument will be passed
12682 in GPRs. In that case, the total amount passed in any
12683 registers is equal to the amount that would have been passed
12684 in GPRs if everything were passed there, so we fall back to
12685 the GPR code below to compute the appropriate value. */
12686 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
12687 * MIN (8, GET_MODE_SIZE (elt_mode)));
12688 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
12690 if (align_words + fpr_words < GP_ARG_NUM_REG)
12691 passed_in_gprs = true;
12692 else
12693 ret = fpr;
12697 if (passed_in_gprs
12698 && align_words < GP_ARG_NUM_REG
12699 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
12700 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
12702 if (ret != 0 && TARGET_DEBUG_ARG)
12703 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
12705 return ret;
12708 /* A C expression that indicates when an argument must be passed by
12709 reference. If nonzero for an argument, a copy of that argument is
12710 made in memory and a pointer to the argument is passed instead of
12711 the argument itself. The pointer is passed in whatever way is
12712 appropriate for passing a pointer to that type.
12714 Under V.4, aggregates and long double are passed by reference.
12716 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
12717 reference unless the AltiVec vector extension ABI is in force.
12719 As an extension to all ABIs, variable sized types are passed by
12720 reference. */
12722 static bool
12723 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
12724 machine_mode mode, const_tree type,
12725 bool named ATTRIBUTE_UNUSED)
12727 if (!type)
12728 return 0;
12730 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
12731 && FLOAT128_IEEE_P (TYPE_MODE (type)))
12733 if (TARGET_DEBUG_ARG)
12734 fprintf (stderr, "function_arg_pass_by_reference: V4 IEEE 128-bit\n");
12735 return 1;
12738 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
12740 if (TARGET_DEBUG_ARG)
12741 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
12742 return 1;
12745 if (int_size_in_bytes (type) < 0)
12747 if (TARGET_DEBUG_ARG)
12748 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
12749 return 1;
12752 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
12753 modes only exist for GCC vector types if -maltivec. */
12754 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
12756 if (TARGET_DEBUG_ARG)
12757 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
12758 return 1;
12761 /* Pass synthetic vectors in memory. */
12762 if (TREE_CODE (type) == VECTOR_TYPE
12763 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
12765 static bool warned_for_pass_big_vectors = false;
12766 if (TARGET_DEBUG_ARG)
12767 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
12768 if (!warned_for_pass_big_vectors)
12770 warning (OPT_Wpsabi, "GCC vector passed by reference: "
12771 "non-standard ABI extension with no compatibility "
12772 "guarantee");
12773 warned_for_pass_big_vectors = true;
12775 return 1;
12778 return 0;
12781 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
12782 already processes. Return true if the parameter must be passed
12783 (fully or partially) on the stack. */
12785 static bool
12786 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
12788 machine_mode mode;
12789 int unsignedp;
12790 rtx entry_parm;
12792 /* Catch errors. */
12793 if (type == NULL || type == error_mark_node)
12794 return true;
12796 /* Handle types with no storage requirement. */
12797 if (TYPE_MODE (type) == VOIDmode)
12798 return false;
12800 /* Handle complex types. */
12801 if (TREE_CODE (type) == COMPLEX_TYPE)
12802 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
12803 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
12805 /* Handle transparent aggregates. */
12806 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
12807 && TYPE_TRANSPARENT_AGGR (type))
12808 type = TREE_TYPE (first_field (type));
12810 /* See if this arg was passed by invisible reference. */
12811 if (pass_by_reference (get_cumulative_args (args_so_far),
12812 TYPE_MODE (type), type, true))
12813 type = build_pointer_type (type);
12815 /* Find mode as it is passed by the ABI. */
12816 unsignedp = TYPE_UNSIGNED (type);
12817 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
12819 /* If we must pass in stack, we need a stack. */
12820 if (rs6000_must_pass_in_stack (mode, type))
12821 return true;
12823 /* If there is no incoming register, we need a stack. */
12824 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
12825 if (entry_parm == NULL)
12826 return true;
12828 /* Likewise if we need to pass both in registers and on the stack. */
12829 if (GET_CODE (entry_parm) == PARALLEL
12830 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
12831 return true;
12833 /* Also true if we're partially in registers and partially not. */
12834 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
12835 return true;
12837 /* Update info on where next arg arrives in registers. */
12838 rs6000_function_arg_advance (args_so_far, mode, type, true);
12839 return false;
12842 /* Return true if FUN has no prototype, has a variable argument
12843 list, or passes any parameter in memory. */
12845 static bool
12846 rs6000_function_parms_need_stack (tree fun, bool incoming)
12848 tree fntype, result;
12849 CUMULATIVE_ARGS args_so_far_v;
12850 cumulative_args_t args_so_far;
12852 if (!fun)
12853 /* Must be a libcall, all of which only use reg parms. */
12854 return false;
12856 fntype = fun;
12857 if (!TYPE_P (fun))
12858 fntype = TREE_TYPE (fun);
12860 /* Varargs functions need the parameter save area. */
12861 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
12862 return true;
12864 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
12865 args_so_far = pack_cumulative_args (&args_so_far_v);
12867 /* When incoming, we will have been passed the function decl.
12868 It is necessary to use the decl to handle K&R style functions,
12869 where TYPE_ARG_TYPES may not be available. */
12870 if (incoming)
12872 gcc_assert (DECL_P (fun));
12873 result = DECL_RESULT (fun);
12875 else
12876 result = TREE_TYPE (fntype);
12878 if (result && aggregate_value_p (result, fntype))
12880 if (!TYPE_P (result))
12881 result = TREE_TYPE (result);
12882 result = build_pointer_type (result);
12883 rs6000_parm_needs_stack (args_so_far, result);
12886 if (incoming)
12888 tree parm;
12890 for (parm = DECL_ARGUMENTS (fun);
12891 parm && parm != void_list_node;
12892 parm = TREE_CHAIN (parm))
12893 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
12894 return true;
12896 else
12898 function_args_iterator args_iter;
12899 tree arg_type;
12901 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
12902 if (rs6000_parm_needs_stack (args_so_far, arg_type))
12903 return true;
12906 return false;
12909 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
12910 usually a constant depending on the ABI. However, in the ELFv2 ABI
12911 the register parameter area is optional when calling a function that
12912 has a prototype is scope, has no variable argument list, and passes
12913 all parameters in registers. */
12916 rs6000_reg_parm_stack_space (tree fun, bool incoming)
12918 int reg_parm_stack_space;
12920 switch (DEFAULT_ABI)
12922 default:
12923 reg_parm_stack_space = 0;
12924 break;
12926 case ABI_AIX:
12927 case ABI_DARWIN:
12928 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
12929 break;
12931 case ABI_ELFv2:
12932 /* ??? Recomputing this every time is a bit expensive. Is there
12933 a place to cache this information? */
12934 if (rs6000_function_parms_need_stack (fun, incoming))
12935 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
12936 else
12937 reg_parm_stack_space = 0;
12938 break;
12941 return reg_parm_stack_space;
12944 static void
12945 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
12947 int i;
12948 machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
12950 if (nregs == 0)
12951 return;
12953 for (i = 0; i < nregs; i++)
12955 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
12956 if (reload_completed)
12958 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
12959 tem = NULL_RTX;
12960 else
12961 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
12962 i * GET_MODE_SIZE (reg_mode));
12964 else
12965 tem = replace_equiv_address (tem, XEXP (tem, 0));
12967 gcc_assert (tem);
12969 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
12973 /* Perform any needed actions needed for a function that is receiving a
12974 variable number of arguments.
12976 CUM is as above.
12978 MODE and TYPE are the mode and type of the current parameter.
12980 PRETEND_SIZE is a variable that should be set to the amount of stack
12981 that must be pushed by the prolog to pretend that our caller pushed
12984 Normally, this macro will push all remaining incoming registers on the
12985 stack and set PRETEND_SIZE to the length of the registers pushed. */
12987 static void
12988 setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
12989 tree type, int *pretend_size ATTRIBUTE_UNUSED,
12990 int no_rtl)
12992 CUMULATIVE_ARGS next_cum;
12993 int reg_size = TARGET_32BIT ? 4 : 8;
12994 rtx save_area = NULL_RTX, mem;
12995 int first_reg_offset;
12996 alias_set_type set;
12998 /* Skip the last named argument. */
12999 next_cum = *get_cumulative_args (cum);
13000 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
13002 if (DEFAULT_ABI == ABI_V4)
13004 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
13006 if (! no_rtl)
13008 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
13009 HOST_WIDE_INT offset = 0;
13011 /* Try to optimize the size of the varargs save area.
13012 The ABI requires that ap.reg_save_area is doubleword
13013 aligned, but we don't need to allocate space for all
13014 the bytes, only those to which we actually will save
13015 anything. */
13016 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
13017 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
13018 if (TARGET_HARD_FLOAT
13019 && next_cum.fregno <= FP_ARG_V4_MAX_REG
13020 && cfun->va_list_fpr_size)
13022 if (gpr_reg_num)
13023 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
13024 * UNITS_PER_FP_WORD;
13025 if (cfun->va_list_fpr_size
13026 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
13027 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
13028 else
13029 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
13030 * UNITS_PER_FP_WORD;
13032 if (gpr_reg_num)
13034 offset = -((first_reg_offset * reg_size) & ~7);
13035 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
13037 gpr_reg_num = cfun->va_list_gpr_size;
13038 if (reg_size == 4 && (first_reg_offset & 1))
13039 gpr_reg_num++;
13041 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
13043 else if (fpr_size)
13044 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
13045 * UNITS_PER_FP_WORD
13046 - (int) (GP_ARG_NUM_REG * reg_size);
13048 if (gpr_size + fpr_size)
13050 rtx reg_save_area
13051 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
13052 gcc_assert (GET_CODE (reg_save_area) == MEM);
13053 reg_save_area = XEXP (reg_save_area, 0);
13054 if (GET_CODE (reg_save_area) == PLUS)
13056 gcc_assert (XEXP (reg_save_area, 0)
13057 == virtual_stack_vars_rtx);
13058 gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT);
13059 offset += INTVAL (XEXP (reg_save_area, 1));
13061 else
13062 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
13065 cfun->machine->varargs_save_offset = offset;
13066 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
13069 else
13071 first_reg_offset = next_cum.words;
13072 save_area = crtl->args.internal_arg_pointer;
13074 if (targetm.calls.must_pass_in_stack (mode, type))
13075 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
13078 set = get_varargs_alias_set ();
13079 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
13080 && cfun->va_list_gpr_size)
13082 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
13084 if (va_list_gpr_counter_field)
13085 /* V4 va_list_gpr_size counts number of registers needed. */
13086 n_gpr = cfun->va_list_gpr_size;
13087 else
13088 /* char * va_list instead counts number of bytes needed. */
13089 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
13091 if (nregs > n_gpr)
13092 nregs = n_gpr;
13094 mem = gen_rtx_MEM (BLKmode,
13095 plus_constant (Pmode, save_area,
13096 first_reg_offset * reg_size));
13097 MEM_NOTRAP_P (mem) = 1;
13098 set_mem_alias_set (mem, set);
13099 set_mem_align (mem, BITS_PER_WORD);
13101 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
13102 nregs);
13105 /* Save FP registers if needed. */
13106 if (DEFAULT_ABI == ABI_V4
13107 && TARGET_HARD_FLOAT
13108 && ! no_rtl
13109 && next_cum.fregno <= FP_ARG_V4_MAX_REG
13110 && cfun->va_list_fpr_size)
13112 int fregno = next_cum.fregno, nregs;
13113 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
13114 rtx lab = gen_label_rtx ();
13115 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
13116 * UNITS_PER_FP_WORD);
13118 emit_jump_insn
13119 (gen_rtx_SET (pc_rtx,
13120 gen_rtx_IF_THEN_ELSE (VOIDmode,
13121 gen_rtx_NE (VOIDmode, cr1,
13122 const0_rtx),
13123 gen_rtx_LABEL_REF (VOIDmode, lab),
13124 pc_rtx)));
13126 for (nregs = 0;
13127 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
13128 fregno++, off += UNITS_PER_FP_WORD, nregs++)
13130 mem = gen_rtx_MEM ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13131 ? DFmode : SFmode,
13132 plus_constant (Pmode, save_area, off));
13133 MEM_NOTRAP_P (mem) = 1;
13134 set_mem_alias_set (mem, set);
13135 set_mem_align (mem, GET_MODE_ALIGNMENT (
13136 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13137 ? DFmode : SFmode));
13138 emit_move_insn (mem, gen_rtx_REG (
13139 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13140 ? DFmode : SFmode, fregno));
13143 emit_label (lab);
13147 /* Create the va_list data type. */
13149 static tree
13150 rs6000_build_builtin_va_list (void)
13152 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
13154 /* For AIX, prefer 'char *' because that's what the system
13155 header files like. */
13156 if (DEFAULT_ABI != ABI_V4)
13157 return build_pointer_type (char_type_node);
13159 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
13160 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
13161 get_identifier ("__va_list_tag"), record);
13163 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
13164 unsigned_char_type_node);
13165 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
13166 unsigned_char_type_node);
13167 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
13168 every user file. */
13169 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13170 get_identifier ("reserved"), short_unsigned_type_node);
13171 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13172 get_identifier ("overflow_arg_area"),
13173 ptr_type_node);
13174 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13175 get_identifier ("reg_save_area"),
13176 ptr_type_node);
13178 va_list_gpr_counter_field = f_gpr;
13179 va_list_fpr_counter_field = f_fpr;
13181 DECL_FIELD_CONTEXT (f_gpr) = record;
13182 DECL_FIELD_CONTEXT (f_fpr) = record;
13183 DECL_FIELD_CONTEXT (f_res) = record;
13184 DECL_FIELD_CONTEXT (f_ovf) = record;
13185 DECL_FIELD_CONTEXT (f_sav) = record;
13187 TYPE_STUB_DECL (record) = type_decl;
13188 TYPE_NAME (record) = type_decl;
13189 TYPE_FIELDS (record) = f_gpr;
13190 DECL_CHAIN (f_gpr) = f_fpr;
13191 DECL_CHAIN (f_fpr) = f_res;
13192 DECL_CHAIN (f_res) = f_ovf;
13193 DECL_CHAIN (f_ovf) = f_sav;
13195 layout_type (record);
13197 /* The correct type is an array type of one element. */
13198 return build_array_type (record, build_index_type (size_zero_node));
13201 /* Implement va_start. */
13203 static void
13204 rs6000_va_start (tree valist, rtx nextarg)
13206 HOST_WIDE_INT words, n_gpr, n_fpr;
13207 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
13208 tree gpr, fpr, ovf, sav, t;
13210 /* Only SVR4 needs something special. */
13211 if (DEFAULT_ABI != ABI_V4)
13213 std_expand_builtin_va_start (valist, nextarg);
13214 return;
13217 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
13218 f_fpr = DECL_CHAIN (f_gpr);
13219 f_res = DECL_CHAIN (f_fpr);
13220 f_ovf = DECL_CHAIN (f_res);
13221 f_sav = DECL_CHAIN (f_ovf);
13223 valist = build_simple_mem_ref (valist);
13224 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
13225 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
13226 f_fpr, NULL_TREE);
13227 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
13228 f_ovf, NULL_TREE);
13229 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
13230 f_sav, NULL_TREE);
13232 /* Count number of gp and fp argument registers used. */
13233 words = crtl->args.info.words;
13234 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
13235 GP_ARG_NUM_REG);
13236 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
13237 FP_ARG_NUM_REG);
13239 if (TARGET_DEBUG_ARG)
13240 fprintf (stderr, "va_start: words = " HOST_WIDE_INT_PRINT_DEC", n_gpr = "
13241 HOST_WIDE_INT_PRINT_DEC", n_fpr = " HOST_WIDE_INT_PRINT_DEC"\n",
13242 words, n_gpr, n_fpr);
13244 if (cfun->va_list_gpr_size)
13246 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
13247 build_int_cst (NULL_TREE, n_gpr));
13248 TREE_SIDE_EFFECTS (t) = 1;
13249 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13252 if (cfun->va_list_fpr_size)
13254 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
13255 build_int_cst (NULL_TREE, n_fpr));
13256 TREE_SIDE_EFFECTS (t) = 1;
13257 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13259 #ifdef HAVE_AS_GNU_ATTRIBUTE
13260 if (call_ABI_of_interest (cfun->decl))
13261 rs6000_passes_float = true;
13262 #endif
13265 /* Find the overflow area. */
13266 t = make_tree (TREE_TYPE (ovf), crtl->args.internal_arg_pointer);
13267 if (words != 0)
13268 t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD);
13269 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
13270 TREE_SIDE_EFFECTS (t) = 1;
13271 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13273 /* If there were no va_arg invocations, don't set up the register
13274 save area. */
13275 if (!cfun->va_list_gpr_size
13276 && !cfun->va_list_fpr_size
13277 && n_gpr < GP_ARG_NUM_REG
13278 && n_fpr < FP_ARG_V4_MAX_REG)
13279 return;
13281 /* Find the register save area. */
13282 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
13283 if (cfun->machine->varargs_save_offset)
13284 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
13285 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
13286 TREE_SIDE_EFFECTS (t) = 1;
13287 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13290 /* Implement va_arg. */
13292 static tree
13293 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
13294 gimple_seq *post_p)
13296 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
13297 tree gpr, fpr, ovf, sav, reg, t, u;
13298 int size, rsize, n_reg, sav_ofs, sav_scale;
13299 tree lab_false, lab_over, addr;
13300 int align;
13301 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
13302 int regalign = 0;
13303 gimple *stmt;
13305 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
13307 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
13308 return build_va_arg_indirect_ref (t);
13311 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
13312 earlier version of gcc, with the property that it always applied alignment
13313 adjustments to the va-args (even for zero-sized types). The cheapest way
13314 to deal with this is to replicate the effect of the part of
13315 std_gimplify_va_arg_expr that carries out the align adjust, for the case
13316 of relevance.
13317 We don't need to check for pass-by-reference because of the test above.
13318 We can return a simplifed answer, since we know there's no offset to add. */
13320 if (((TARGET_MACHO
13321 && rs6000_darwin64_abi)
13322 || DEFAULT_ABI == ABI_ELFv2
13323 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
13324 && integer_zerop (TYPE_SIZE (type)))
13326 unsigned HOST_WIDE_INT align, boundary;
13327 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
13328 align = PARM_BOUNDARY / BITS_PER_UNIT;
13329 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
13330 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
13331 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
13332 boundary /= BITS_PER_UNIT;
13333 if (boundary > align)
13335 tree t ;
13336 /* This updates arg ptr by the amount that would be necessary
13337 to align the zero-sized (but not zero-alignment) item. */
13338 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
13339 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
13340 gimplify_and_add (t, pre_p);
13342 t = fold_convert (sizetype, valist_tmp);
13343 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
13344 fold_convert (TREE_TYPE (valist),
13345 fold_build2 (BIT_AND_EXPR, sizetype, t,
13346 size_int (-boundary))));
13347 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
13348 gimplify_and_add (t, pre_p);
13350 /* Since it is zero-sized there's no increment for the item itself. */
13351 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
13352 return build_va_arg_indirect_ref (valist_tmp);
13355 if (DEFAULT_ABI != ABI_V4)
13357 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
13359 tree elem_type = TREE_TYPE (type);
13360 machine_mode elem_mode = TYPE_MODE (elem_type);
13361 int elem_size = GET_MODE_SIZE (elem_mode);
13363 if (elem_size < UNITS_PER_WORD)
13365 tree real_part, imag_part;
13366 gimple_seq post = NULL;
13368 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
13369 &post);
13370 /* Copy the value into a temporary, lest the formal temporary
13371 be reused out from under us. */
13372 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
13373 gimple_seq_add_seq (pre_p, post);
13375 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
13376 post_p);
13378 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
13382 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
13385 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
13386 f_fpr = DECL_CHAIN (f_gpr);
13387 f_res = DECL_CHAIN (f_fpr);
13388 f_ovf = DECL_CHAIN (f_res);
13389 f_sav = DECL_CHAIN (f_ovf);
13391 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
13392 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
13393 f_fpr, NULL_TREE);
13394 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
13395 f_ovf, NULL_TREE);
13396 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
13397 f_sav, NULL_TREE);
13399 size = int_size_in_bytes (type);
13400 rsize = (size + 3) / 4;
13401 int pad = 4 * rsize - size;
13402 align = 1;
13404 machine_mode mode = TYPE_MODE (type);
13405 if (abi_v4_pass_in_fpr (mode, false))
13407 /* FP args go in FP registers, if present. */
13408 reg = fpr;
13409 n_reg = (size + 7) / 8;
13410 sav_ofs = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4) * 4;
13411 sav_scale = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4);
13412 if (mode != SFmode && mode != SDmode)
13413 align = 8;
13415 else
13417 /* Otherwise into GP registers. */
13418 reg = gpr;
13419 n_reg = rsize;
13420 sav_ofs = 0;
13421 sav_scale = 4;
13422 if (n_reg == 2)
13423 align = 8;
13426 /* Pull the value out of the saved registers.... */
13428 lab_over = NULL;
13429 addr = create_tmp_var (ptr_type_node, "addr");
13431 /* AltiVec vectors never go in registers when -mabi=altivec. */
13432 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
13433 align = 16;
13434 else
13436 lab_false = create_artificial_label (input_location);
13437 lab_over = create_artificial_label (input_location);
13439 /* Long long is aligned in the registers. As are any other 2 gpr
13440 item such as complex int due to a historical mistake. */
13441 u = reg;
13442 if (n_reg == 2 && reg == gpr)
13444 regalign = 1;
13445 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
13446 build_int_cst (TREE_TYPE (reg), n_reg - 1));
13447 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
13448 unshare_expr (reg), u);
13450 /* _Decimal128 is passed in even/odd fpr pairs; the stored
13451 reg number is 0 for f1, so we want to make it odd. */
13452 else if (reg == fpr && mode == TDmode)
13454 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
13455 build_int_cst (TREE_TYPE (reg), 1));
13456 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
13459 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
13460 t = build2 (GE_EXPR, boolean_type_node, u, t);
13461 u = build1 (GOTO_EXPR, void_type_node, lab_false);
13462 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
13463 gimplify_and_add (t, pre_p);
13465 t = sav;
13466 if (sav_ofs)
13467 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
13469 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
13470 build_int_cst (TREE_TYPE (reg), n_reg));
13471 u = fold_convert (sizetype, u);
13472 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
13473 t = fold_build_pointer_plus (t, u);
13475 /* _Decimal32 varargs are located in the second word of the 64-bit
13476 FP register for 32-bit binaries. */
13477 if (TARGET_32BIT && TARGET_HARD_FLOAT && mode == SDmode)
13478 t = fold_build_pointer_plus_hwi (t, size);
13480 /* Args are passed right-aligned. */
13481 if (BYTES_BIG_ENDIAN)
13482 t = fold_build_pointer_plus_hwi (t, pad);
13484 gimplify_assign (addr, t, pre_p);
13486 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
13488 stmt = gimple_build_label (lab_false);
13489 gimple_seq_add_stmt (pre_p, stmt);
13491 if ((n_reg == 2 && !regalign) || n_reg > 2)
13493 /* Ensure that we don't find any more args in regs.
13494 Alignment has taken care of for special cases. */
13495 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
13499 /* ... otherwise out of the overflow area. */
13501 /* Care for on-stack alignment if needed. */
13502 t = ovf;
13503 if (align != 1)
13505 t = fold_build_pointer_plus_hwi (t, align - 1);
13506 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
13507 build_int_cst (TREE_TYPE (t), -align));
13510 /* Args are passed right-aligned. */
13511 if (BYTES_BIG_ENDIAN)
13512 t = fold_build_pointer_plus_hwi (t, pad);
13514 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
13516 gimplify_assign (unshare_expr (addr), t, pre_p);
13518 t = fold_build_pointer_plus_hwi (t, size);
13519 gimplify_assign (unshare_expr (ovf), t, pre_p);
13521 if (lab_over)
13523 stmt = gimple_build_label (lab_over);
13524 gimple_seq_add_stmt (pre_p, stmt);
13527 if (STRICT_ALIGNMENT
13528 && (TYPE_ALIGN (type)
13529 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
13531 /* The value (of type complex double, for example) may not be
13532 aligned in memory in the saved registers, so copy via a
13533 temporary. (This is the same code as used for SPARC.) */
13534 tree tmp = create_tmp_var (type, "va_arg_tmp");
13535 tree dest_addr = build_fold_addr_expr (tmp);
13537 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
13538 3, dest_addr, addr, size_int (rsize * 4));
13539 TREE_ADDRESSABLE (tmp) = 1;
13541 gimplify_and_add (copy, pre_p);
13542 addr = dest_addr;
13545 addr = fold_convert (ptrtype, addr);
13546 return build_va_arg_indirect_ref (addr);
13549 /* Builtins. */
13551 static void
13552 def_builtin (const char *name, tree type, enum rs6000_builtins code)
13554 tree t;
13555 unsigned classify = rs6000_builtin_info[(int)code].attr;
13556 const char *attr_string = "";
13558 gcc_assert (name != NULL);
13559 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
13561 if (rs6000_builtin_decls[(int)code])
13562 fatal_error (input_location,
13563 "internal error: builtin function %qs already processed",
13564 name);
13566 rs6000_builtin_decls[(int)code] = t =
13567 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
13569 /* Set any special attributes. */
13570 if ((classify & RS6000_BTC_CONST) != 0)
13572 /* const function, function only depends on the inputs. */
13573 TREE_READONLY (t) = 1;
13574 TREE_NOTHROW (t) = 1;
13575 attr_string = ", const";
13577 else if ((classify & RS6000_BTC_PURE) != 0)
13579 /* pure function, function can read global memory, but does not set any
13580 external state. */
13581 DECL_PURE_P (t) = 1;
13582 TREE_NOTHROW (t) = 1;
13583 attr_string = ", pure";
13585 else if ((classify & RS6000_BTC_FP) != 0)
13587 /* Function is a math function. If rounding mode is on, then treat the
13588 function as not reading global memory, but it can have arbitrary side
13589 effects. If it is off, then assume the function is a const function.
13590 This mimics the ATTR_MATHFN_FPROUNDING attribute in
13591 builtin-attribute.def that is used for the math functions. */
13592 TREE_NOTHROW (t) = 1;
13593 if (flag_rounding_math)
13595 DECL_PURE_P (t) = 1;
13596 DECL_IS_NOVOPS (t) = 1;
13597 attr_string = ", fp, pure";
13599 else
13601 TREE_READONLY (t) = 1;
13602 attr_string = ", fp, const";
13605 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
13606 gcc_unreachable ();
13608 if (TARGET_DEBUG_BUILTIN)
13609 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
13610 (int)code, name, attr_string);
13613 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
13615 #undef RS6000_BUILTIN_0
13616 #undef RS6000_BUILTIN_1
13617 #undef RS6000_BUILTIN_2
13618 #undef RS6000_BUILTIN_3
13619 #undef RS6000_BUILTIN_A
13620 #undef RS6000_BUILTIN_D
13621 #undef RS6000_BUILTIN_H
13622 #undef RS6000_BUILTIN_P
13623 #undef RS6000_BUILTIN_Q
13624 #undef RS6000_BUILTIN_X
13626 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13627 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13628 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13629 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
13630 { MASK, ICODE, NAME, ENUM },
13632 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13633 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13634 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13635 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13636 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13637 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13639 static const struct builtin_description bdesc_3arg[] =
13641 #include "rs6000-builtin.def"
13644 /* DST operations: void foo (void *, const int, const char). */
13646 #undef RS6000_BUILTIN_0
13647 #undef RS6000_BUILTIN_1
13648 #undef RS6000_BUILTIN_2
13649 #undef RS6000_BUILTIN_3
13650 #undef RS6000_BUILTIN_A
13651 #undef RS6000_BUILTIN_D
13652 #undef RS6000_BUILTIN_H
13653 #undef RS6000_BUILTIN_P
13654 #undef RS6000_BUILTIN_Q
13655 #undef RS6000_BUILTIN_X
13657 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13658 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13659 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13660 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13661 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13662 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
13663 { MASK, ICODE, NAME, ENUM },
13665 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13666 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13667 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13668 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13670 static const struct builtin_description bdesc_dst[] =
13672 #include "rs6000-builtin.def"
13675 /* Simple binary operations: VECc = foo (VECa, VECb). */
13677 #undef RS6000_BUILTIN_0
13678 #undef RS6000_BUILTIN_1
13679 #undef RS6000_BUILTIN_2
13680 #undef RS6000_BUILTIN_3
13681 #undef RS6000_BUILTIN_A
13682 #undef RS6000_BUILTIN_D
13683 #undef RS6000_BUILTIN_H
13684 #undef RS6000_BUILTIN_P
13685 #undef RS6000_BUILTIN_Q
13686 #undef RS6000_BUILTIN_X
13688 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13689 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13690 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
13691 { MASK, ICODE, NAME, ENUM },
13693 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13694 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13695 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13696 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13697 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13698 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13699 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13701 static const struct builtin_description bdesc_2arg[] =
13703 #include "rs6000-builtin.def"
13706 #undef RS6000_BUILTIN_0
13707 #undef RS6000_BUILTIN_1
13708 #undef RS6000_BUILTIN_2
13709 #undef RS6000_BUILTIN_3
13710 #undef RS6000_BUILTIN_A
13711 #undef RS6000_BUILTIN_D
13712 #undef RS6000_BUILTIN_H
13713 #undef RS6000_BUILTIN_P
13714 #undef RS6000_BUILTIN_Q
13715 #undef RS6000_BUILTIN_X
13717 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13718 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13719 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13720 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13721 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13722 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13723 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13724 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
13725 { MASK, ICODE, NAME, ENUM },
13727 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13728 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13730 /* AltiVec predicates. */
13732 static const struct builtin_description bdesc_altivec_preds[] =
13734 #include "rs6000-builtin.def"
13737 /* PAIRED predicates. */
13738 #undef RS6000_BUILTIN_0
13739 #undef RS6000_BUILTIN_1
13740 #undef RS6000_BUILTIN_2
13741 #undef RS6000_BUILTIN_3
13742 #undef RS6000_BUILTIN_A
13743 #undef RS6000_BUILTIN_D
13744 #undef RS6000_BUILTIN_H
13745 #undef RS6000_BUILTIN_P
13746 #undef RS6000_BUILTIN_Q
13747 #undef RS6000_BUILTIN_X
13749 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13750 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13751 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13752 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13753 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13754 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13755 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13756 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13757 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
13758 { MASK, ICODE, NAME, ENUM },
13760 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13762 static const struct builtin_description bdesc_paired_preds[] =
13764 #include "rs6000-builtin.def"
13767 /* ABS* operations. */
13769 #undef RS6000_BUILTIN_0
13770 #undef RS6000_BUILTIN_1
13771 #undef RS6000_BUILTIN_2
13772 #undef RS6000_BUILTIN_3
13773 #undef RS6000_BUILTIN_A
13774 #undef RS6000_BUILTIN_D
13775 #undef RS6000_BUILTIN_H
13776 #undef RS6000_BUILTIN_P
13777 #undef RS6000_BUILTIN_Q
13778 #undef RS6000_BUILTIN_X
13780 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13781 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13782 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13783 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13784 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
13785 { MASK, ICODE, NAME, ENUM },
13787 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13788 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13789 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13790 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13791 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13793 static const struct builtin_description bdesc_abs[] =
13795 #include "rs6000-builtin.def"
13798 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
13799 foo (VECa). */
13801 #undef RS6000_BUILTIN_0
13802 #undef RS6000_BUILTIN_1
13803 #undef RS6000_BUILTIN_2
13804 #undef RS6000_BUILTIN_3
13805 #undef RS6000_BUILTIN_A
13806 #undef RS6000_BUILTIN_D
13807 #undef RS6000_BUILTIN_H
13808 #undef RS6000_BUILTIN_P
13809 #undef RS6000_BUILTIN_Q
13810 #undef RS6000_BUILTIN_X
13812 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13813 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
13814 { MASK, ICODE, NAME, ENUM },
13816 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13817 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13818 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13819 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13820 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13821 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13822 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13823 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13825 static const struct builtin_description bdesc_1arg[] =
13827 #include "rs6000-builtin.def"
13830 /* Simple no-argument operations: result = __builtin_darn_32 () */
13832 #undef RS6000_BUILTIN_0
13833 #undef RS6000_BUILTIN_1
13834 #undef RS6000_BUILTIN_2
13835 #undef RS6000_BUILTIN_3
13836 #undef RS6000_BUILTIN_A
13837 #undef RS6000_BUILTIN_D
13838 #undef RS6000_BUILTIN_H
13839 #undef RS6000_BUILTIN_P
13840 #undef RS6000_BUILTIN_Q
13841 #undef RS6000_BUILTIN_X
13843 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
13844 { MASK, ICODE, NAME, ENUM },
13846 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13847 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13848 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13849 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13850 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13851 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13852 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13853 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13854 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13856 static const struct builtin_description bdesc_0arg[] =
13858 #include "rs6000-builtin.def"
13861 /* HTM builtins. */
13862 #undef RS6000_BUILTIN_0
13863 #undef RS6000_BUILTIN_1
13864 #undef RS6000_BUILTIN_2
13865 #undef RS6000_BUILTIN_3
13866 #undef RS6000_BUILTIN_A
13867 #undef RS6000_BUILTIN_D
13868 #undef RS6000_BUILTIN_H
13869 #undef RS6000_BUILTIN_P
13870 #undef RS6000_BUILTIN_Q
13871 #undef RS6000_BUILTIN_X
13873 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13874 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13875 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13876 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13877 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13878 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13879 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
13880 { MASK, ICODE, NAME, ENUM },
13882 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13883 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13884 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13886 static const struct builtin_description bdesc_htm[] =
13888 #include "rs6000-builtin.def"
13891 #undef RS6000_BUILTIN_0
13892 #undef RS6000_BUILTIN_1
13893 #undef RS6000_BUILTIN_2
13894 #undef RS6000_BUILTIN_3
13895 #undef RS6000_BUILTIN_A
13896 #undef RS6000_BUILTIN_D
13897 #undef RS6000_BUILTIN_H
13898 #undef RS6000_BUILTIN_P
13899 #undef RS6000_BUILTIN_Q
13901 /* Return true if a builtin function is overloaded. */
13902 bool
13903 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
13905 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
13908 const char *
13909 rs6000_overloaded_builtin_name (enum rs6000_builtins fncode)
13911 return rs6000_builtin_info[(int)fncode].name;
13914 /* Expand an expression EXP that calls a builtin without arguments. */
13915 static rtx
13916 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
13918 rtx pat;
13919 machine_mode tmode = insn_data[icode].operand[0].mode;
13921 if (icode == CODE_FOR_nothing)
13922 /* Builtin not supported on this processor. */
13923 return 0;
13925 if (target == 0
13926 || GET_MODE (target) != tmode
13927 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13928 target = gen_reg_rtx (tmode);
13930 pat = GEN_FCN (icode) (target);
13931 if (! pat)
13932 return 0;
13933 emit_insn (pat);
13935 return target;
13939 static rtx
13940 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
13942 rtx pat;
13943 tree arg0 = CALL_EXPR_ARG (exp, 0);
13944 tree arg1 = CALL_EXPR_ARG (exp, 1);
13945 rtx op0 = expand_normal (arg0);
13946 rtx op1 = expand_normal (arg1);
13947 machine_mode mode0 = insn_data[icode].operand[0].mode;
13948 machine_mode mode1 = insn_data[icode].operand[1].mode;
13950 if (icode == CODE_FOR_nothing)
13951 /* Builtin not supported on this processor. */
13952 return 0;
13954 /* If we got invalid arguments bail out before generating bad rtl. */
13955 if (arg0 == error_mark_node || arg1 == error_mark_node)
13956 return const0_rtx;
13958 if (GET_CODE (op0) != CONST_INT
13959 || INTVAL (op0) > 255
13960 || INTVAL (op0) < 0)
13962 error ("argument 1 must be an 8-bit field value");
13963 return const0_rtx;
13966 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13967 op0 = copy_to_mode_reg (mode0, op0);
13969 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13970 op1 = copy_to_mode_reg (mode1, op1);
13972 pat = GEN_FCN (icode) (op0, op1);
13973 if (! pat)
13974 return const0_rtx;
13975 emit_insn (pat);
13977 return NULL_RTX;
13980 static rtx
13981 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
13983 rtx pat;
13984 tree arg0 = CALL_EXPR_ARG (exp, 0);
13985 rtx op0 = expand_normal (arg0);
13986 machine_mode tmode = insn_data[icode].operand[0].mode;
13987 machine_mode mode0 = insn_data[icode].operand[1].mode;
13989 if (icode == CODE_FOR_nothing)
13990 /* Builtin not supported on this processor. */
13991 return 0;
13993 /* If we got invalid arguments bail out before generating bad rtl. */
13994 if (arg0 == error_mark_node)
13995 return const0_rtx;
13997 if (icode == CODE_FOR_altivec_vspltisb
13998 || icode == CODE_FOR_altivec_vspltish
13999 || icode == CODE_FOR_altivec_vspltisw)
14001 /* Only allow 5-bit *signed* literals. */
14002 if (GET_CODE (op0) != CONST_INT
14003 || INTVAL (op0) > 15
14004 || INTVAL (op0) < -16)
14006 error ("argument 1 must be a 5-bit signed literal");
14007 return CONST0_RTX (tmode);
14011 if (target == 0
14012 || GET_MODE (target) != tmode
14013 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14014 target = gen_reg_rtx (tmode);
14016 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14017 op0 = copy_to_mode_reg (mode0, op0);
14019 pat = GEN_FCN (icode) (target, op0);
14020 if (! pat)
14021 return 0;
14022 emit_insn (pat);
14024 return target;
14027 static rtx
14028 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
14030 rtx pat, scratch1, scratch2;
14031 tree arg0 = CALL_EXPR_ARG (exp, 0);
14032 rtx op0 = expand_normal (arg0);
14033 machine_mode tmode = insn_data[icode].operand[0].mode;
14034 machine_mode mode0 = insn_data[icode].operand[1].mode;
14036 /* If we have invalid arguments, bail out before generating bad rtl. */
14037 if (arg0 == error_mark_node)
14038 return const0_rtx;
14040 if (target == 0
14041 || GET_MODE (target) != tmode
14042 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14043 target = gen_reg_rtx (tmode);
14045 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14046 op0 = copy_to_mode_reg (mode0, op0);
14048 scratch1 = gen_reg_rtx (mode0);
14049 scratch2 = gen_reg_rtx (mode0);
14051 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
14052 if (! pat)
14053 return 0;
14054 emit_insn (pat);
14056 return target;
14059 static rtx
14060 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
14062 rtx pat;
14063 tree arg0 = CALL_EXPR_ARG (exp, 0);
14064 tree arg1 = CALL_EXPR_ARG (exp, 1);
14065 rtx op0 = expand_normal (arg0);
14066 rtx op1 = expand_normal (arg1);
14067 machine_mode tmode = insn_data[icode].operand[0].mode;
14068 machine_mode mode0 = insn_data[icode].operand[1].mode;
14069 machine_mode mode1 = insn_data[icode].operand[2].mode;
14071 if (icode == CODE_FOR_nothing)
14072 /* Builtin not supported on this processor. */
14073 return 0;
14075 /* If we got invalid arguments bail out before generating bad rtl. */
14076 if (arg0 == error_mark_node || arg1 == error_mark_node)
14077 return const0_rtx;
14079 if (icode == CODE_FOR_altivec_vcfux
14080 || icode == CODE_FOR_altivec_vcfsx
14081 || icode == CODE_FOR_altivec_vctsxs
14082 || icode == CODE_FOR_altivec_vctuxs
14083 || icode == CODE_FOR_altivec_vspltb
14084 || icode == CODE_FOR_altivec_vsplth
14085 || icode == CODE_FOR_altivec_vspltw)
14087 /* Only allow 5-bit unsigned literals. */
14088 STRIP_NOPS (arg1);
14089 if (TREE_CODE (arg1) != INTEGER_CST
14090 || TREE_INT_CST_LOW (arg1) & ~0x1f)
14092 error ("argument 2 must be a 5-bit unsigned literal");
14093 return CONST0_RTX (tmode);
14096 else if (icode == CODE_FOR_dfptstsfi_eq_dd
14097 || icode == CODE_FOR_dfptstsfi_lt_dd
14098 || icode == CODE_FOR_dfptstsfi_gt_dd
14099 || icode == CODE_FOR_dfptstsfi_unordered_dd
14100 || icode == CODE_FOR_dfptstsfi_eq_td
14101 || icode == CODE_FOR_dfptstsfi_lt_td
14102 || icode == CODE_FOR_dfptstsfi_gt_td
14103 || icode == CODE_FOR_dfptstsfi_unordered_td)
14105 /* Only allow 6-bit unsigned literals. */
14106 STRIP_NOPS (arg0);
14107 if (TREE_CODE (arg0) != INTEGER_CST
14108 || !IN_RANGE (TREE_INT_CST_LOW (arg0), 0, 63))
14110 error ("argument 1 must be a 6-bit unsigned literal");
14111 return CONST0_RTX (tmode);
14114 else if (icode == CODE_FOR_xststdcqp_kf
14115 || icode == CODE_FOR_xststdcqp_tf
14116 || icode == CODE_FOR_xststdcdp
14117 || icode == CODE_FOR_xststdcsp
14118 || icode == CODE_FOR_xvtstdcdp
14119 || icode == CODE_FOR_xvtstdcsp)
14121 /* Only allow 7-bit unsigned literals. */
14122 STRIP_NOPS (arg1);
14123 if (TREE_CODE (arg1) != INTEGER_CST
14124 || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 127))
14126 error ("argument 2 must be a 7-bit unsigned literal");
14127 return CONST0_RTX (tmode);
14130 else if (icode == CODE_FOR_unpackv1ti
14131 || icode == CODE_FOR_unpackkf
14132 || icode == CODE_FOR_unpacktf
14133 || icode == CODE_FOR_unpackif
14134 || icode == CODE_FOR_unpacktd)
14136 /* Only allow 1-bit unsigned literals. */
14137 STRIP_NOPS (arg1);
14138 if (TREE_CODE (arg1) != INTEGER_CST
14139 || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 1))
14141 error ("argument 2 must be a 1-bit unsigned literal");
14142 return CONST0_RTX (tmode);
14146 if (target == 0
14147 || GET_MODE (target) != tmode
14148 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14149 target = gen_reg_rtx (tmode);
14151 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14152 op0 = copy_to_mode_reg (mode0, op0);
14153 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14154 op1 = copy_to_mode_reg (mode1, op1);
14156 pat = GEN_FCN (icode) (target, op0, op1);
14157 if (! pat)
14158 return 0;
14159 emit_insn (pat);
14161 return target;
14164 static rtx
14165 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
14167 rtx pat, scratch;
14168 tree cr6_form = CALL_EXPR_ARG (exp, 0);
14169 tree arg0 = CALL_EXPR_ARG (exp, 1);
14170 tree arg1 = CALL_EXPR_ARG (exp, 2);
14171 rtx op0 = expand_normal (arg0);
14172 rtx op1 = expand_normal (arg1);
14173 machine_mode tmode = SImode;
14174 machine_mode mode0 = insn_data[icode].operand[1].mode;
14175 machine_mode mode1 = insn_data[icode].operand[2].mode;
14176 int cr6_form_int;
14178 if (TREE_CODE (cr6_form) != INTEGER_CST)
14180 error ("argument 1 of %qs must be a constant",
14181 "__builtin_altivec_predicate");
14182 return const0_rtx;
14184 else
14185 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
14187 gcc_assert (mode0 == mode1);
14189 /* If we have invalid arguments, bail out before generating bad rtl. */
14190 if (arg0 == error_mark_node || arg1 == error_mark_node)
14191 return const0_rtx;
14193 if (target == 0
14194 || GET_MODE (target) != tmode
14195 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14196 target = gen_reg_rtx (tmode);
14198 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14199 op0 = copy_to_mode_reg (mode0, op0);
14200 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14201 op1 = copy_to_mode_reg (mode1, op1);
14203 /* Note that for many of the relevant operations (e.g. cmpne or
14204 cmpeq) with float or double operands, it makes more sense for the
14205 mode of the allocated scratch register to select a vector of
14206 integer. But the choice to copy the mode of operand 0 was made
14207 long ago and there are no plans to change it. */
14208 scratch = gen_reg_rtx (mode0);
14210 pat = GEN_FCN (icode) (scratch, op0, op1);
14211 if (! pat)
14212 return 0;
14213 emit_insn (pat);
14215 /* The vec_any* and vec_all* predicates use the same opcodes for two
14216 different operations, but the bits in CR6 will be different
14217 depending on what information we want. So we have to play tricks
14218 with CR6 to get the right bits out.
14220 If you think this is disgusting, look at the specs for the
14221 AltiVec predicates. */
14223 switch (cr6_form_int)
14225 case 0:
14226 emit_insn (gen_cr6_test_for_zero (target));
14227 break;
14228 case 1:
14229 emit_insn (gen_cr6_test_for_zero_reverse (target));
14230 break;
14231 case 2:
14232 emit_insn (gen_cr6_test_for_lt (target));
14233 break;
14234 case 3:
14235 emit_insn (gen_cr6_test_for_lt_reverse (target));
14236 break;
14237 default:
14238 error ("argument 1 of %qs is out of range",
14239 "__builtin_altivec_predicate");
14240 break;
14243 return target;
14246 static rtx
14247 paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
14249 rtx pat, addr;
14250 tree arg0 = CALL_EXPR_ARG (exp, 0);
14251 tree arg1 = CALL_EXPR_ARG (exp, 1);
14252 machine_mode tmode = insn_data[icode].operand[0].mode;
14253 machine_mode mode0 = Pmode;
14254 machine_mode mode1 = Pmode;
14255 rtx op0 = expand_normal (arg0);
14256 rtx op1 = expand_normal (arg1);
14258 if (icode == CODE_FOR_nothing)
14259 /* Builtin not supported on this processor. */
14260 return 0;
14262 /* If we got invalid arguments bail out before generating bad rtl. */
14263 if (arg0 == error_mark_node || arg1 == error_mark_node)
14264 return const0_rtx;
14266 if (target == 0
14267 || GET_MODE (target) != tmode
14268 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14269 target = gen_reg_rtx (tmode);
14271 op1 = copy_to_mode_reg (mode1, op1);
14273 if (op0 == const0_rtx)
14275 addr = gen_rtx_MEM (tmode, op1);
14277 else
14279 op0 = copy_to_mode_reg (mode0, op0);
14280 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op0, op1));
14283 pat = GEN_FCN (icode) (target, addr);
14285 if (! pat)
14286 return 0;
14287 emit_insn (pat);
14289 return target;
14292 /* Return a constant vector for use as a little-endian permute control vector
14293 to reverse the order of elements of the given vector mode. */
14294 static rtx
14295 swap_selector_for_mode (machine_mode mode)
14297 /* These are little endian vectors, so their elements are reversed
14298 from what you would normally expect for a permute control vector. */
14299 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
14300 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
14301 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
14302 unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
14303 unsigned int *swaparray, i;
14304 rtx perm[16];
14306 switch (mode)
14308 case E_V2DFmode:
14309 case E_V2DImode:
14310 swaparray = swap2;
14311 break;
14312 case E_V4SFmode:
14313 case E_V4SImode:
14314 swaparray = swap4;
14315 break;
14316 case E_V8HImode:
14317 swaparray = swap8;
14318 break;
14319 case E_V16QImode:
14320 swaparray = swap16;
14321 break;
14322 default:
14323 gcc_unreachable ();
14326 for (i = 0; i < 16; ++i)
14327 perm[i] = GEN_INT (swaparray[i]);
14329 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)));
14333 swap_endian_selector_for_mode (machine_mode mode)
14335 unsigned int swap1[16] = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
14336 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
14337 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
14338 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
14340 unsigned int *swaparray, i;
14341 rtx perm[16];
14343 switch (mode)
14345 case E_V1TImode:
14346 swaparray = swap1;
14347 break;
14348 case E_V2DFmode:
14349 case E_V2DImode:
14350 swaparray = swap2;
14351 break;
14352 case E_V4SFmode:
14353 case E_V4SImode:
14354 swaparray = swap4;
14355 break;
14356 case E_V8HImode:
14357 swaparray = swap8;
14358 break;
14359 default:
14360 gcc_unreachable ();
14363 for (i = 0; i < 16; ++i)
14364 perm[i] = GEN_INT (swaparray[i]);
14366 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode,
14367 gen_rtvec_v (16, perm)));
14370 /* Generate code for an "lvxl", or "lve*x" built-in for a little endian target
14371 with -maltivec=be specified. Issue the load followed by an element-
14372 reversing permute. */
14373 void
14374 altivec_expand_lvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
14376 rtx tmp = gen_reg_rtx (mode);
14377 rtx load = gen_rtx_SET (tmp, op1);
14378 rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
14379 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx));
14380 rtx sel = swap_selector_for_mode (mode);
14381 rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM);
14383 gcc_assert (REG_P (op0));
14384 emit_insn (par);
14385 emit_insn (gen_rtx_SET (op0, vperm));
14388 /* Generate code for a "stvxl" built-in for a little endian target with
14389 -maltivec=be specified. Issue the store preceded by an element-reversing
14390 permute. */
14391 void
14392 altivec_expand_stvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
14394 rtx tmp = gen_reg_rtx (mode);
14395 rtx store = gen_rtx_SET (op0, tmp);
14396 rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
14397 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx));
14398 rtx sel = swap_selector_for_mode (mode);
14399 rtx vperm;
14401 gcc_assert (REG_P (op1));
14402 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
14403 emit_insn (gen_rtx_SET (tmp, vperm));
14404 emit_insn (par);
14407 /* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
14408 specified. Issue the store preceded by an element-reversing permute. */
14409 void
14410 altivec_expand_stvex_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
14412 machine_mode inner_mode = GET_MODE_INNER (mode);
14413 rtx tmp = gen_reg_rtx (mode);
14414 rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec);
14415 rtx sel = swap_selector_for_mode (mode);
14416 rtx vperm;
14418 gcc_assert (REG_P (op1));
14419 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
14420 emit_insn (gen_rtx_SET (tmp, vperm));
14421 emit_insn (gen_rtx_SET (op0, stvx));
14424 static rtx
14425 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
14427 rtx pat, addr;
14428 tree arg0 = CALL_EXPR_ARG (exp, 0);
14429 tree arg1 = CALL_EXPR_ARG (exp, 1);
14430 machine_mode tmode = insn_data[icode].operand[0].mode;
14431 machine_mode mode0 = Pmode;
14432 machine_mode mode1 = Pmode;
14433 rtx op0 = expand_normal (arg0);
14434 rtx op1 = expand_normal (arg1);
14436 if (icode == CODE_FOR_nothing)
14437 /* Builtin not supported on this processor. */
14438 return 0;
14440 /* If we got invalid arguments bail out before generating bad rtl. */
14441 if (arg0 == error_mark_node || arg1 == error_mark_node)
14442 return const0_rtx;
14444 if (target == 0
14445 || GET_MODE (target) != tmode
14446 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14447 target = gen_reg_rtx (tmode);
14449 op1 = copy_to_mode_reg (mode1, op1);
14451 /* For LVX, express the RTL accurately by ANDing the address with -16.
14452 LVXL and LVE*X expand to use UNSPECs to hide their special behavior,
14453 so the raw address is fine. */
14454 if (icode == CODE_FOR_altivec_lvx_v1ti
14455 || icode == CODE_FOR_altivec_lvx_v2df
14456 || icode == CODE_FOR_altivec_lvx_v2di
14457 || icode == CODE_FOR_altivec_lvx_v4sf
14458 || icode == CODE_FOR_altivec_lvx_v4si
14459 || icode == CODE_FOR_altivec_lvx_v8hi
14460 || icode == CODE_FOR_altivec_lvx_v16qi)
14462 rtx rawaddr;
14463 if (op0 == const0_rtx)
14464 rawaddr = op1;
14465 else
14467 op0 = copy_to_mode_reg (mode0, op0);
14468 rawaddr = gen_rtx_PLUS (Pmode, op1, op0);
14470 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
14471 addr = gen_rtx_MEM (blk ? BLKmode : tmode, addr);
14473 /* For -maltivec=be, emit the load and follow it up with a
14474 permute to swap the elements. */
14475 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
14477 rtx temp = gen_reg_rtx (tmode);
14478 emit_insn (gen_rtx_SET (temp, addr));
14480 rtx sel = swap_selector_for_mode (tmode);
14481 rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, temp, temp, sel),
14482 UNSPEC_VPERM);
14483 emit_insn (gen_rtx_SET (target, vperm));
14485 else
14486 emit_insn (gen_rtx_SET (target, addr));
14488 else
14490 if (op0 == const0_rtx)
14491 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
14492 else
14494 op0 = copy_to_mode_reg (mode0, op0);
14495 addr = gen_rtx_MEM (blk ? BLKmode : tmode,
14496 gen_rtx_PLUS (Pmode, op1, op0));
14499 pat = GEN_FCN (icode) (target, addr);
14500 if (! pat)
14501 return 0;
14502 emit_insn (pat);
14505 return target;
14508 static rtx
14509 paired_expand_stv_builtin (enum insn_code icode, tree exp)
14511 tree arg0 = CALL_EXPR_ARG (exp, 0);
14512 tree arg1 = CALL_EXPR_ARG (exp, 1);
14513 tree arg2 = CALL_EXPR_ARG (exp, 2);
14514 rtx op0 = expand_normal (arg0);
14515 rtx op1 = expand_normal (arg1);
14516 rtx op2 = expand_normal (arg2);
14517 rtx pat, addr;
14518 machine_mode tmode = insn_data[icode].operand[0].mode;
14519 machine_mode mode1 = Pmode;
14520 machine_mode mode2 = Pmode;
14522 /* Invalid arguments. Bail before doing anything stoopid! */
14523 if (arg0 == error_mark_node
14524 || arg1 == error_mark_node
14525 || arg2 == error_mark_node)
14526 return const0_rtx;
14528 if (! (*insn_data[icode].operand[1].predicate) (op0, tmode))
14529 op0 = copy_to_mode_reg (tmode, op0);
14531 op2 = copy_to_mode_reg (mode2, op2);
14533 if (op1 == const0_rtx)
14535 addr = gen_rtx_MEM (tmode, op2);
14537 else
14539 op1 = copy_to_mode_reg (mode1, op1);
14540 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
14543 pat = GEN_FCN (icode) (addr, op0);
14544 if (pat)
14545 emit_insn (pat);
14546 return NULL_RTX;
14549 static rtx
14550 altivec_expand_stxvl_builtin (enum insn_code icode, tree exp)
14552 rtx pat;
14553 tree arg0 = CALL_EXPR_ARG (exp, 0);
14554 tree arg1 = CALL_EXPR_ARG (exp, 1);
14555 tree arg2 = CALL_EXPR_ARG (exp, 2);
14556 rtx op0 = expand_normal (arg0);
14557 rtx op1 = expand_normal (arg1);
14558 rtx op2 = expand_normal (arg2);
14559 machine_mode mode0 = insn_data[icode].operand[0].mode;
14560 machine_mode mode1 = insn_data[icode].operand[1].mode;
14561 machine_mode mode2 = insn_data[icode].operand[2].mode;
14563 if (icode == CODE_FOR_nothing)
14564 /* Builtin not supported on this processor. */
14565 return NULL_RTX;
14567 /* If we got invalid arguments bail out before generating bad rtl. */
14568 if (arg0 == error_mark_node
14569 || arg1 == error_mark_node
14570 || arg2 == error_mark_node)
14571 return NULL_RTX;
14573 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14574 op0 = copy_to_mode_reg (mode0, op0);
14575 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14576 op1 = copy_to_mode_reg (mode1, op1);
14577 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14578 op2 = copy_to_mode_reg (mode2, op2);
14580 pat = GEN_FCN (icode) (op0, op1, op2);
14581 if (pat)
14582 emit_insn (pat);
14584 return NULL_RTX;
14587 static rtx
14588 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
14590 tree arg0 = CALL_EXPR_ARG (exp, 0);
14591 tree arg1 = CALL_EXPR_ARG (exp, 1);
14592 tree arg2 = CALL_EXPR_ARG (exp, 2);
14593 rtx op0 = expand_normal (arg0);
14594 rtx op1 = expand_normal (arg1);
14595 rtx op2 = expand_normal (arg2);
14596 rtx pat, addr, rawaddr;
14597 machine_mode tmode = insn_data[icode].operand[0].mode;
14598 machine_mode smode = insn_data[icode].operand[1].mode;
14599 machine_mode mode1 = Pmode;
14600 machine_mode mode2 = Pmode;
14602 /* Invalid arguments. Bail before doing anything stoopid! */
14603 if (arg0 == error_mark_node
14604 || arg1 == error_mark_node
14605 || arg2 == error_mark_node)
14606 return const0_rtx;
14608 op2 = copy_to_mode_reg (mode2, op2);
14610 /* For STVX, express the RTL accurately by ANDing the address with -16.
14611 STVXL and STVE*X expand to use UNSPECs to hide their special behavior,
14612 so the raw address is fine. */
14613 if (icode == CODE_FOR_altivec_stvx_v2df
14614 || icode == CODE_FOR_altivec_stvx_v2di
14615 || icode == CODE_FOR_altivec_stvx_v4sf
14616 || icode == CODE_FOR_altivec_stvx_v4si
14617 || icode == CODE_FOR_altivec_stvx_v8hi
14618 || icode == CODE_FOR_altivec_stvx_v16qi)
14620 if (op1 == const0_rtx)
14621 rawaddr = op2;
14622 else
14624 op1 = copy_to_mode_reg (mode1, op1);
14625 rawaddr = gen_rtx_PLUS (Pmode, op2, op1);
14628 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
14629 addr = gen_rtx_MEM (tmode, addr);
14631 op0 = copy_to_mode_reg (tmode, op0);
14633 /* For -maltivec=be, emit a permute to swap the elements, followed
14634 by the store. */
14635 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
14637 rtx temp = gen_reg_rtx (tmode);
14638 rtx sel = swap_selector_for_mode (tmode);
14639 rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, op0, op0, sel),
14640 UNSPEC_VPERM);
14641 emit_insn (gen_rtx_SET (temp, vperm));
14642 emit_insn (gen_rtx_SET (addr, temp));
14644 else
14645 emit_insn (gen_rtx_SET (addr, op0));
14647 else
14649 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
14650 op0 = copy_to_mode_reg (smode, op0);
14652 if (op1 == const0_rtx)
14653 addr = gen_rtx_MEM (tmode, op2);
14654 else
14656 op1 = copy_to_mode_reg (mode1, op1);
14657 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op2, op1));
14660 pat = GEN_FCN (icode) (addr, op0);
14661 if (pat)
14662 emit_insn (pat);
14665 return NULL_RTX;
14668 /* Return the appropriate SPR number associated with the given builtin. */
14669 static inline HOST_WIDE_INT
14670 htm_spr_num (enum rs6000_builtins code)
14672 if (code == HTM_BUILTIN_GET_TFHAR
14673 || code == HTM_BUILTIN_SET_TFHAR)
14674 return TFHAR_SPR;
14675 else if (code == HTM_BUILTIN_GET_TFIAR
14676 || code == HTM_BUILTIN_SET_TFIAR)
14677 return TFIAR_SPR;
14678 else if (code == HTM_BUILTIN_GET_TEXASR
14679 || code == HTM_BUILTIN_SET_TEXASR)
14680 return TEXASR_SPR;
14681 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
14682 || code == HTM_BUILTIN_SET_TEXASRU);
14683 return TEXASRU_SPR;
14686 /* Return the appropriate SPR regno associated with the given builtin. */
14687 static inline HOST_WIDE_INT
14688 htm_spr_regno (enum rs6000_builtins code)
14690 if (code == HTM_BUILTIN_GET_TFHAR
14691 || code == HTM_BUILTIN_SET_TFHAR)
14692 return TFHAR_REGNO;
14693 else if (code == HTM_BUILTIN_GET_TFIAR
14694 || code == HTM_BUILTIN_SET_TFIAR)
14695 return TFIAR_REGNO;
14696 gcc_assert (code == HTM_BUILTIN_GET_TEXASR
14697 || code == HTM_BUILTIN_SET_TEXASR
14698 || code == HTM_BUILTIN_GET_TEXASRU
14699 || code == HTM_BUILTIN_SET_TEXASRU);
14700 return TEXASR_REGNO;
14703 /* Return the correct ICODE value depending on whether we are
14704 setting or reading the HTM SPRs. */
14705 static inline enum insn_code
14706 rs6000_htm_spr_icode (bool nonvoid)
14708 if (nonvoid)
14709 return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
14710 else
14711 return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
14714 /* Expand the HTM builtin in EXP and store the result in TARGET.
14715 Store true in *EXPANDEDP if we found a builtin to expand. */
14716 static rtx
14717 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
14719 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14720 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
14721 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14722 const struct builtin_description *d;
14723 size_t i;
14725 *expandedp = true;
14727 if (!TARGET_POWERPC64
14728 && (fcode == HTM_BUILTIN_TABORTDC
14729 || fcode == HTM_BUILTIN_TABORTDCI))
14731 size_t uns_fcode = (size_t)fcode;
14732 const char *name = rs6000_builtin_info[uns_fcode].name;
14733 error ("builtin %qs is only valid in 64-bit mode", name);
14734 return const0_rtx;
14737 /* Expand the HTM builtins. */
14738 d = bdesc_htm;
14739 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
14740 if (d->code == fcode)
14742 rtx op[MAX_HTM_OPERANDS], pat;
14743 int nopnds = 0;
14744 tree arg;
14745 call_expr_arg_iterator iter;
14746 unsigned attr = rs6000_builtin_info[fcode].attr;
14747 enum insn_code icode = d->icode;
14748 const struct insn_operand_data *insn_op;
14749 bool uses_spr = (attr & RS6000_BTC_SPR);
14750 rtx cr = NULL_RTX;
14752 if (uses_spr)
14753 icode = rs6000_htm_spr_icode (nonvoid);
14754 insn_op = &insn_data[icode].operand[0];
14756 if (nonvoid)
14758 machine_mode tmode = (uses_spr) ? insn_op->mode : E_SImode;
14759 if (!target
14760 || GET_MODE (target) != tmode
14761 || (uses_spr && !(*insn_op->predicate) (target, tmode)))
14762 target = gen_reg_rtx (tmode);
14763 if (uses_spr)
14764 op[nopnds++] = target;
14767 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
14769 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
14770 return const0_rtx;
14772 insn_op = &insn_data[icode].operand[nopnds];
14774 op[nopnds] = expand_normal (arg);
14776 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
14778 if (!strcmp (insn_op->constraint, "n"))
14780 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
14781 if (!CONST_INT_P (op[nopnds]))
14782 error ("argument %d must be an unsigned literal", arg_num);
14783 else
14784 error ("argument %d is an unsigned literal that is "
14785 "out of range", arg_num);
14786 return const0_rtx;
14788 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
14791 nopnds++;
14794 /* Handle the builtins for extended mnemonics. These accept
14795 no arguments, but map to builtins that take arguments. */
14796 switch (fcode)
14798 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
14799 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
14800 op[nopnds++] = GEN_INT (1);
14801 if (flag_checking)
14802 attr |= RS6000_BTC_UNARY;
14803 break;
14804 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
14805 op[nopnds++] = GEN_INT (0);
14806 if (flag_checking)
14807 attr |= RS6000_BTC_UNARY;
14808 break;
14809 default:
14810 break;
14813 /* If this builtin accesses SPRs, then pass in the appropriate
14814 SPR number and SPR regno as the last two operands. */
14815 if (uses_spr)
14817 machine_mode mode = (TARGET_POWERPC64) ? DImode : SImode;
14818 op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode));
14819 op[nopnds++] = gen_rtx_REG (mode, htm_spr_regno (fcode));
14821 /* If this builtin accesses a CR, then pass in a scratch
14822 CR as the last operand. */
14823 else if (attr & RS6000_BTC_CR)
14824 { cr = gen_reg_rtx (CCmode);
14825 op[nopnds++] = cr;
14828 if (flag_checking)
14830 int expected_nopnds = 0;
14831 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
14832 expected_nopnds = 1;
14833 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
14834 expected_nopnds = 2;
14835 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
14836 expected_nopnds = 3;
14837 if (!(attr & RS6000_BTC_VOID))
14838 expected_nopnds += 1;
14839 if (uses_spr)
14840 expected_nopnds += 2;
14842 gcc_assert (nopnds == expected_nopnds
14843 && nopnds <= MAX_HTM_OPERANDS);
14846 switch (nopnds)
14848 case 1:
14849 pat = GEN_FCN (icode) (op[0]);
14850 break;
14851 case 2:
14852 pat = GEN_FCN (icode) (op[0], op[1]);
14853 break;
14854 case 3:
14855 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
14856 break;
14857 case 4:
14858 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
14859 break;
14860 default:
14861 gcc_unreachable ();
14863 if (!pat)
14864 return NULL_RTX;
14865 emit_insn (pat);
14867 if (attr & RS6000_BTC_CR)
14869 if (fcode == HTM_BUILTIN_TBEGIN)
14871 /* Emit code to set TARGET to true or false depending on
14872 whether the tbegin. instruction successfully or failed
14873 to start a transaction. We do this by placing the 1's
14874 complement of CR's EQ bit into TARGET. */
14875 rtx scratch = gen_reg_rtx (SImode);
14876 emit_insn (gen_rtx_SET (scratch,
14877 gen_rtx_EQ (SImode, cr,
14878 const0_rtx)));
14879 emit_insn (gen_rtx_SET (target,
14880 gen_rtx_XOR (SImode, scratch,
14881 GEN_INT (1))));
14883 else
14885 /* Emit code to copy the 4-bit condition register field
14886 CR into the least significant end of register TARGET. */
14887 rtx scratch1 = gen_reg_rtx (SImode);
14888 rtx scratch2 = gen_reg_rtx (SImode);
14889 rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0);
14890 emit_insn (gen_movcc (subreg, cr));
14891 emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28)));
14892 emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf)));
14896 if (nonvoid)
14897 return target;
14898 return const0_rtx;
14901 *expandedp = false;
14902 return NULL_RTX;
14905 /* Expand the CPU builtin in FCODE and store the result in TARGET. */
14907 static rtx
14908 cpu_expand_builtin (enum rs6000_builtins fcode, tree exp ATTRIBUTE_UNUSED,
14909 rtx target)
14911 /* __builtin_cpu_init () is a nop, so expand to nothing. */
14912 if (fcode == RS6000_BUILTIN_CPU_INIT)
14913 return const0_rtx;
14915 if (target == 0 || GET_MODE (target) != SImode)
14916 target = gen_reg_rtx (SImode);
14918 #ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
14919 tree arg = TREE_OPERAND (CALL_EXPR_ARG (exp, 0), 0);
14920 /* Target clones creates an ARRAY_REF instead of STRING_CST, convert it back
14921 to a STRING_CST. */
14922 if (TREE_CODE (arg) == ARRAY_REF
14923 && TREE_CODE (TREE_OPERAND (arg, 0)) == STRING_CST
14924 && TREE_CODE (TREE_OPERAND (arg, 1)) == INTEGER_CST
14925 && compare_tree_int (TREE_OPERAND (arg, 1), 0) == 0)
14926 arg = TREE_OPERAND (arg, 0);
14928 if (TREE_CODE (arg) != STRING_CST)
14930 error ("builtin %qs only accepts a string argument",
14931 rs6000_builtin_info[(size_t) fcode].name);
14932 return const0_rtx;
14935 if (fcode == RS6000_BUILTIN_CPU_IS)
14937 const char *cpu = TREE_STRING_POINTER (arg);
14938 rtx cpuid = NULL_RTX;
14939 for (size_t i = 0; i < ARRAY_SIZE (cpu_is_info); i++)
14940 if (strcmp (cpu, cpu_is_info[i].cpu) == 0)
14942 /* The CPUID value in the TCB is offset by _DL_FIRST_PLATFORM. */
14943 cpuid = GEN_INT (cpu_is_info[i].cpuid + _DL_FIRST_PLATFORM);
14944 break;
14946 if (cpuid == NULL_RTX)
14948 /* Invalid CPU argument. */
14949 error ("cpu %qs is an invalid argument to builtin %qs",
14950 cpu, rs6000_builtin_info[(size_t) fcode].name);
14951 return const0_rtx;
14954 rtx platform = gen_reg_rtx (SImode);
14955 rtx tcbmem = gen_const_mem (SImode,
14956 gen_rtx_PLUS (Pmode,
14957 gen_rtx_REG (Pmode, TLS_REGNUM),
14958 GEN_INT (TCB_PLATFORM_OFFSET)));
14959 emit_move_insn (platform, tcbmem);
14960 emit_insn (gen_eqsi3 (target, platform, cpuid));
14962 else if (fcode == RS6000_BUILTIN_CPU_SUPPORTS)
14964 const char *hwcap = TREE_STRING_POINTER (arg);
14965 rtx mask = NULL_RTX;
14966 int hwcap_offset;
14967 for (size_t i = 0; i < ARRAY_SIZE (cpu_supports_info); i++)
14968 if (strcmp (hwcap, cpu_supports_info[i].hwcap) == 0)
14970 mask = GEN_INT (cpu_supports_info[i].mask);
14971 hwcap_offset = TCB_HWCAP_OFFSET (cpu_supports_info[i].id);
14972 break;
14974 if (mask == NULL_RTX)
14976 /* Invalid HWCAP argument. */
14977 error ("%s %qs is an invalid argument to builtin %qs",
14978 "hwcap", hwcap, rs6000_builtin_info[(size_t) fcode].name);
14979 return const0_rtx;
14982 rtx tcb_hwcap = gen_reg_rtx (SImode);
14983 rtx tcbmem = gen_const_mem (SImode,
14984 gen_rtx_PLUS (Pmode,
14985 gen_rtx_REG (Pmode, TLS_REGNUM),
14986 GEN_INT (hwcap_offset)));
14987 emit_move_insn (tcb_hwcap, tcbmem);
14988 rtx scratch1 = gen_reg_rtx (SImode);
14989 emit_insn (gen_rtx_SET (scratch1, gen_rtx_AND (SImode, tcb_hwcap, mask)));
14990 rtx scratch2 = gen_reg_rtx (SImode);
14991 emit_insn (gen_eqsi3 (scratch2, scratch1, const0_rtx));
14992 emit_insn (gen_rtx_SET (target, gen_rtx_XOR (SImode, scratch2, const1_rtx)));
14994 else
14995 gcc_unreachable ();
14997 /* Record that we have expanded a CPU builtin, so that we can later
14998 emit a reference to the special symbol exported by LIBC to ensure we
14999 do not link against an old LIBC that doesn't support this feature. */
15000 cpu_builtin_p = true;
15002 #else
15003 warning (0, "builtin %qs needs GLIBC (2.23 and newer) that exports hardware "
15004 "capability bits", rs6000_builtin_info[(size_t) fcode].name);
15006 /* For old LIBCs, always return FALSE. */
15007 emit_move_insn (target, GEN_INT (0));
15008 #endif /* TARGET_LIBC_PROVIDES_HWCAP_IN_TCB */
15010 return target;
15013 static rtx
15014 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
15016 rtx pat;
15017 tree arg0 = CALL_EXPR_ARG (exp, 0);
15018 tree arg1 = CALL_EXPR_ARG (exp, 1);
15019 tree arg2 = CALL_EXPR_ARG (exp, 2);
15020 rtx op0 = expand_normal (arg0);
15021 rtx op1 = expand_normal (arg1);
15022 rtx op2 = expand_normal (arg2);
15023 machine_mode tmode = insn_data[icode].operand[0].mode;
15024 machine_mode mode0 = insn_data[icode].operand[1].mode;
15025 machine_mode mode1 = insn_data[icode].operand[2].mode;
15026 machine_mode mode2 = insn_data[icode].operand[3].mode;
15028 if (icode == CODE_FOR_nothing)
15029 /* Builtin not supported on this processor. */
15030 return 0;
15032 /* If we got invalid arguments bail out before generating bad rtl. */
15033 if (arg0 == error_mark_node
15034 || arg1 == error_mark_node
15035 || arg2 == error_mark_node)
15036 return const0_rtx;
15038 /* Check and prepare argument depending on the instruction code.
15040 Note that a switch statement instead of the sequence of tests
15041 would be incorrect as many of the CODE_FOR values could be
15042 CODE_FOR_nothing and that would yield multiple alternatives
15043 with identical values. We'd never reach here at runtime in
15044 this case. */
15045 if (icode == CODE_FOR_altivec_vsldoi_v4sf
15046 || icode == CODE_FOR_altivec_vsldoi_v2df
15047 || icode == CODE_FOR_altivec_vsldoi_v4si
15048 || icode == CODE_FOR_altivec_vsldoi_v8hi
15049 || icode == CODE_FOR_altivec_vsldoi_v16qi)
15051 /* Only allow 4-bit unsigned literals. */
15052 STRIP_NOPS (arg2);
15053 if (TREE_CODE (arg2) != INTEGER_CST
15054 || TREE_INT_CST_LOW (arg2) & ~0xf)
15056 error ("argument 3 must be a 4-bit unsigned literal");
15057 return CONST0_RTX (tmode);
15060 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
15061 || icode == CODE_FOR_vsx_xxpermdi_v2di
15062 || icode == CODE_FOR_vsx_xxpermdi_v2df_be
15063 || icode == CODE_FOR_vsx_xxpermdi_v2di_be
15064 || icode == CODE_FOR_vsx_xxpermdi_v1ti
15065 || icode == CODE_FOR_vsx_xxpermdi_v4sf
15066 || icode == CODE_FOR_vsx_xxpermdi_v4si
15067 || icode == CODE_FOR_vsx_xxpermdi_v8hi
15068 || icode == CODE_FOR_vsx_xxpermdi_v16qi
15069 || icode == CODE_FOR_vsx_xxsldwi_v16qi
15070 || icode == CODE_FOR_vsx_xxsldwi_v8hi
15071 || icode == CODE_FOR_vsx_xxsldwi_v4si
15072 || icode == CODE_FOR_vsx_xxsldwi_v4sf
15073 || icode == CODE_FOR_vsx_xxsldwi_v2di
15074 || icode == CODE_FOR_vsx_xxsldwi_v2df)
15076 /* Only allow 2-bit unsigned literals. */
15077 STRIP_NOPS (arg2);
15078 if (TREE_CODE (arg2) != INTEGER_CST
15079 || TREE_INT_CST_LOW (arg2) & ~0x3)
15081 error ("argument 3 must be a 2-bit unsigned literal");
15082 return CONST0_RTX (tmode);
15085 else if (icode == CODE_FOR_vsx_set_v2df
15086 || icode == CODE_FOR_vsx_set_v2di
15087 || icode == CODE_FOR_bcdadd
15088 || icode == CODE_FOR_bcdadd_lt
15089 || icode == CODE_FOR_bcdadd_eq
15090 || icode == CODE_FOR_bcdadd_gt
15091 || icode == CODE_FOR_bcdsub
15092 || icode == CODE_FOR_bcdsub_lt
15093 || icode == CODE_FOR_bcdsub_eq
15094 || icode == CODE_FOR_bcdsub_gt)
15096 /* Only allow 1-bit unsigned literals. */
15097 STRIP_NOPS (arg2);
15098 if (TREE_CODE (arg2) != INTEGER_CST
15099 || TREE_INT_CST_LOW (arg2) & ~0x1)
15101 error ("argument 3 must be a 1-bit unsigned literal");
15102 return CONST0_RTX (tmode);
15105 else if (icode == CODE_FOR_dfp_ddedpd_dd
15106 || icode == CODE_FOR_dfp_ddedpd_td)
15108 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
15109 STRIP_NOPS (arg0);
15110 if (TREE_CODE (arg0) != INTEGER_CST
15111 || TREE_INT_CST_LOW (arg2) & ~0x3)
15113 error ("argument 1 must be 0 or 2");
15114 return CONST0_RTX (tmode);
15117 else if (icode == CODE_FOR_dfp_denbcd_dd
15118 || icode == CODE_FOR_dfp_denbcd_td)
15120 /* Only allow 1-bit unsigned literals. */
15121 STRIP_NOPS (arg0);
15122 if (TREE_CODE (arg0) != INTEGER_CST
15123 || TREE_INT_CST_LOW (arg0) & ~0x1)
15125 error ("argument 1 must be a 1-bit unsigned literal");
15126 return CONST0_RTX (tmode);
15129 else if (icode == CODE_FOR_dfp_dscli_dd
15130 || icode == CODE_FOR_dfp_dscli_td
15131 || icode == CODE_FOR_dfp_dscri_dd
15132 || icode == CODE_FOR_dfp_dscri_td)
15134 /* Only allow 6-bit unsigned literals. */
15135 STRIP_NOPS (arg1);
15136 if (TREE_CODE (arg1) != INTEGER_CST
15137 || TREE_INT_CST_LOW (arg1) & ~0x3f)
15139 error ("argument 2 must be a 6-bit unsigned literal");
15140 return CONST0_RTX (tmode);
15143 else if (icode == CODE_FOR_crypto_vshasigmaw
15144 || icode == CODE_FOR_crypto_vshasigmad)
15146 /* Check whether the 2nd and 3rd arguments are integer constants and in
15147 range and prepare arguments. */
15148 STRIP_NOPS (arg1);
15149 if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (wi::to_wide (arg1), 2))
15151 error ("argument 2 must be 0 or 1");
15152 return CONST0_RTX (tmode);
15155 STRIP_NOPS (arg2);
15156 if (TREE_CODE (arg2) != INTEGER_CST
15157 || wi::geu_p (wi::to_wide (arg2), 16))
15159 error ("argument 3 must be in the range 0..15");
15160 return CONST0_RTX (tmode);
15164 if (target == 0
15165 || GET_MODE (target) != tmode
15166 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15167 target = gen_reg_rtx (tmode);
15169 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15170 op0 = copy_to_mode_reg (mode0, op0);
15171 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15172 op1 = copy_to_mode_reg (mode1, op1);
15173 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15174 op2 = copy_to_mode_reg (mode2, op2);
15176 if (TARGET_PAIRED_FLOAT && icode == CODE_FOR_selv2sf4)
15177 pat = GEN_FCN (icode) (target, op0, op1, op2, CONST0_RTX (SFmode));
15178 else
15179 pat = GEN_FCN (icode) (target, op0, op1, op2);
15180 if (! pat)
15181 return 0;
15182 emit_insn (pat);
15184 return target;
15188 /* Expand the dst builtins. */
15189 static rtx
15190 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
15191 bool *expandedp)
15193 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15194 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15195 tree arg0, arg1, arg2;
15196 machine_mode mode0, mode1;
15197 rtx pat, op0, op1, op2;
15198 const struct builtin_description *d;
15199 size_t i;
15201 *expandedp = false;
15203 /* Handle DST variants. */
15204 d = bdesc_dst;
15205 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
15206 if (d->code == fcode)
15208 arg0 = CALL_EXPR_ARG (exp, 0);
15209 arg1 = CALL_EXPR_ARG (exp, 1);
15210 arg2 = CALL_EXPR_ARG (exp, 2);
15211 op0 = expand_normal (arg0);
15212 op1 = expand_normal (arg1);
15213 op2 = expand_normal (arg2);
15214 mode0 = insn_data[d->icode].operand[0].mode;
15215 mode1 = insn_data[d->icode].operand[1].mode;
15217 /* Invalid arguments, bail out before generating bad rtl. */
15218 if (arg0 == error_mark_node
15219 || arg1 == error_mark_node
15220 || arg2 == error_mark_node)
15221 return const0_rtx;
15223 *expandedp = true;
15224 STRIP_NOPS (arg2);
15225 if (TREE_CODE (arg2) != INTEGER_CST
15226 || TREE_INT_CST_LOW (arg2) & ~0x3)
15228 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
15229 return const0_rtx;
15232 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
15233 op0 = copy_to_mode_reg (Pmode, op0);
15234 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
15235 op1 = copy_to_mode_reg (mode1, op1);
15237 pat = GEN_FCN (d->icode) (op0, op1, op2);
15238 if (pat != 0)
15239 emit_insn (pat);
15241 return NULL_RTX;
15244 return NULL_RTX;
15247 /* Expand vec_init builtin. */
15248 static rtx
15249 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
15251 machine_mode tmode = TYPE_MODE (type);
15252 machine_mode inner_mode = GET_MODE_INNER (tmode);
15253 int i, n_elt = GET_MODE_NUNITS (tmode);
15255 gcc_assert (VECTOR_MODE_P (tmode));
15256 gcc_assert (n_elt == call_expr_nargs (exp));
15258 if (!target || !register_operand (target, tmode))
15259 target = gen_reg_rtx (tmode);
15261 /* If we have a vector compromised of a single element, such as V1TImode, do
15262 the initialization directly. */
15263 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
15265 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
15266 emit_move_insn (target, gen_lowpart (tmode, x));
15268 else
15270 rtvec v = rtvec_alloc (n_elt);
15272 for (i = 0; i < n_elt; ++i)
15274 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
15275 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
15278 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
15281 return target;
15284 /* Return the integer constant in ARG. Constrain it to be in the range
15285 of the subparts of VEC_TYPE; issue an error if not. */
15287 static int
15288 get_element_number (tree vec_type, tree arg)
15290 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
15292 if (!tree_fits_uhwi_p (arg)
15293 || (elt = tree_to_uhwi (arg), elt > max))
15295 error ("selector must be an integer constant in the range 0..%wi", max);
15296 return 0;
15299 return elt;
15302 /* Expand vec_set builtin. */
15303 static rtx
15304 altivec_expand_vec_set_builtin (tree exp)
15306 machine_mode tmode, mode1;
15307 tree arg0, arg1, arg2;
15308 int elt;
15309 rtx op0, op1;
15311 arg0 = CALL_EXPR_ARG (exp, 0);
15312 arg1 = CALL_EXPR_ARG (exp, 1);
15313 arg2 = CALL_EXPR_ARG (exp, 2);
15315 tmode = TYPE_MODE (TREE_TYPE (arg0));
15316 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15317 gcc_assert (VECTOR_MODE_P (tmode));
15319 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
15320 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
15321 elt = get_element_number (TREE_TYPE (arg0), arg2);
15323 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
15324 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
15326 op0 = force_reg (tmode, op0);
15327 op1 = force_reg (mode1, op1);
15329 rs6000_expand_vector_set (op0, op1, elt);
15331 return op0;
15334 /* Expand vec_ext builtin. */
15335 static rtx
15336 altivec_expand_vec_ext_builtin (tree exp, rtx target)
15338 machine_mode tmode, mode0;
15339 tree arg0, arg1;
15340 rtx op0;
15341 rtx op1;
15343 arg0 = CALL_EXPR_ARG (exp, 0);
15344 arg1 = CALL_EXPR_ARG (exp, 1);
15346 op0 = expand_normal (arg0);
15347 op1 = expand_normal (arg1);
15349 /* Call get_element_number to validate arg1 if it is a constant. */
15350 if (TREE_CODE (arg1) == INTEGER_CST)
15351 (void) get_element_number (TREE_TYPE (arg0), arg1);
15353 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15354 mode0 = TYPE_MODE (TREE_TYPE (arg0));
15355 gcc_assert (VECTOR_MODE_P (mode0));
15357 op0 = force_reg (mode0, op0);
15359 if (optimize || !target || !register_operand (target, tmode))
15360 target = gen_reg_rtx (tmode);
15362 rs6000_expand_vector_extract (target, op0, op1);
15364 return target;
15367 /* Expand the builtin in EXP and store the result in TARGET. Store
15368 true in *EXPANDEDP if we found a builtin to expand. */
15369 static rtx
15370 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
15372 const struct builtin_description *d;
15373 size_t i;
15374 enum insn_code icode;
15375 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15376 tree arg0, arg1, arg2;
15377 rtx op0, pat;
15378 machine_mode tmode, mode0;
15379 enum rs6000_builtins fcode
15380 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15382 if (rs6000_overloaded_builtin_p (fcode))
15384 *expandedp = true;
15385 error ("unresolved overload for Altivec builtin %qF", fndecl);
15387 /* Given it is invalid, just generate a normal call. */
15388 return expand_call (exp, target, false);
15391 target = altivec_expand_dst_builtin (exp, target, expandedp);
15392 if (*expandedp)
15393 return target;
15395 *expandedp = true;
15397 switch (fcode)
15399 case ALTIVEC_BUILTIN_STVX_V2DF:
15400 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df, exp);
15401 case ALTIVEC_BUILTIN_STVX_V2DI:
15402 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di, exp);
15403 case ALTIVEC_BUILTIN_STVX_V4SF:
15404 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf, exp);
15405 case ALTIVEC_BUILTIN_STVX:
15406 case ALTIVEC_BUILTIN_STVX_V4SI:
15407 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si, exp);
15408 case ALTIVEC_BUILTIN_STVX_V8HI:
15409 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi, exp);
15410 case ALTIVEC_BUILTIN_STVX_V16QI:
15411 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi, exp);
15412 case ALTIVEC_BUILTIN_STVEBX:
15413 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
15414 case ALTIVEC_BUILTIN_STVEHX:
15415 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
15416 case ALTIVEC_BUILTIN_STVEWX:
15417 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
15418 case ALTIVEC_BUILTIN_STVXL_V2DF:
15419 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
15420 case ALTIVEC_BUILTIN_STVXL_V2DI:
15421 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
15422 case ALTIVEC_BUILTIN_STVXL_V4SF:
15423 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
15424 case ALTIVEC_BUILTIN_STVXL:
15425 case ALTIVEC_BUILTIN_STVXL_V4SI:
15426 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
15427 case ALTIVEC_BUILTIN_STVXL_V8HI:
15428 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
15429 case ALTIVEC_BUILTIN_STVXL_V16QI:
15430 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
15432 case ALTIVEC_BUILTIN_STVLX:
15433 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
15434 case ALTIVEC_BUILTIN_STVLXL:
15435 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
15436 case ALTIVEC_BUILTIN_STVRX:
15437 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
15438 case ALTIVEC_BUILTIN_STVRXL:
15439 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
15441 case P9V_BUILTIN_STXVL:
15442 return altivec_expand_stxvl_builtin (CODE_FOR_stxvl, exp);
15444 case P9V_BUILTIN_XST_LEN_R:
15445 return altivec_expand_stxvl_builtin (CODE_FOR_xst_len_r, exp);
15447 case VSX_BUILTIN_STXVD2X_V1TI:
15448 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
15449 case VSX_BUILTIN_STXVD2X_V2DF:
15450 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
15451 case VSX_BUILTIN_STXVD2X_V2DI:
15452 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
15453 case VSX_BUILTIN_STXVW4X_V4SF:
15454 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
15455 case VSX_BUILTIN_STXVW4X_V4SI:
15456 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
15457 case VSX_BUILTIN_STXVW4X_V8HI:
15458 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
15459 case VSX_BUILTIN_STXVW4X_V16QI:
15460 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
15462 /* For the following on big endian, it's ok to use any appropriate
15463 unaligned-supporting store, so use a generic expander. For
15464 little-endian, the exact element-reversing instruction must
15465 be used. */
15466 case VSX_BUILTIN_ST_ELEMREV_V1TI:
15468 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v1ti
15469 : CODE_FOR_vsx_st_elemrev_v1ti);
15470 return altivec_expand_stv_builtin (code, exp);
15472 case VSX_BUILTIN_ST_ELEMREV_V2DF:
15474 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2df
15475 : CODE_FOR_vsx_st_elemrev_v2df);
15476 return altivec_expand_stv_builtin (code, exp);
15478 case VSX_BUILTIN_ST_ELEMREV_V2DI:
15480 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2di
15481 : CODE_FOR_vsx_st_elemrev_v2di);
15482 return altivec_expand_stv_builtin (code, exp);
15484 case VSX_BUILTIN_ST_ELEMREV_V4SF:
15486 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4sf
15487 : CODE_FOR_vsx_st_elemrev_v4sf);
15488 return altivec_expand_stv_builtin (code, exp);
15490 case VSX_BUILTIN_ST_ELEMREV_V4SI:
15492 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4si
15493 : CODE_FOR_vsx_st_elemrev_v4si);
15494 return altivec_expand_stv_builtin (code, exp);
15496 case VSX_BUILTIN_ST_ELEMREV_V8HI:
15498 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v8hi
15499 : CODE_FOR_vsx_st_elemrev_v8hi);
15500 return altivec_expand_stv_builtin (code, exp);
15502 case VSX_BUILTIN_ST_ELEMREV_V16QI:
15504 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v16qi
15505 : CODE_FOR_vsx_st_elemrev_v16qi);
15506 return altivec_expand_stv_builtin (code, exp);
15509 case ALTIVEC_BUILTIN_MFVSCR:
15510 icode = CODE_FOR_altivec_mfvscr;
15511 tmode = insn_data[icode].operand[0].mode;
15513 if (target == 0
15514 || GET_MODE (target) != tmode
15515 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15516 target = gen_reg_rtx (tmode);
15518 pat = GEN_FCN (icode) (target);
15519 if (! pat)
15520 return 0;
15521 emit_insn (pat);
15522 return target;
15524 case ALTIVEC_BUILTIN_MTVSCR:
15525 icode = CODE_FOR_altivec_mtvscr;
15526 arg0 = CALL_EXPR_ARG (exp, 0);
15527 op0 = expand_normal (arg0);
15528 mode0 = insn_data[icode].operand[0].mode;
15530 /* If we got invalid arguments bail out before generating bad rtl. */
15531 if (arg0 == error_mark_node)
15532 return const0_rtx;
15534 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15535 op0 = copy_to_mode_reg (mode0, op0);
15537 pat = GEN_FCN (icode) (op0);
15538 if (pat)
15539 emit_insn (pat);
15540 return NULL_RTX;
15542 case ALTIVEC_BUILTIN_DSSALL:
15543 emit_insn (gen_altivec_dssall ());
15544 return NULL_RTX;
15546 case ALTIVEC_BUILTIN_DSS:
15547 icode = CODE_FOR_altivec_dss;
15548 arg0 = CALL_EXPR_ARG (exp, 0);
15549 STRIP_NOPS (arg0);
15550 op0 = expand_normal (arg0);
15551 mode0 = insn_data[icode].operand[0].mode;
15553 /* If we got invalid arguments bail out before generating bad rtl. */
15554 if (arg0 == error_mark_node)
15555 return const0_rtx;
15557 if (TREE_CODE (arg0) != INTEGER_CST
15558 || TREE_INT_CST_LOW (arg0) & ~0x3)
15560 error ("argument to %qs must be a 2-bit unsigned literal", "dss");
15561 return const0_rtx;
15564 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15565 op0 = copy_to_mode_reg (mode0, op0);
15567 emit_insn (gen_altivec_dss (op0));
15568 return NULL_RTX;
15570 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
15571 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
15572 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
15573 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
15574 case VSX_BUILTIN_VEC_INIT_V2DF:
15575 case VSX_BUILTIN_VEC_INIT_V2DI:
15576 case VSX_BUILTIN_VEC_INIT_V1TI:
15577 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
15579 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
15580 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
15581 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
15582 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
15583 case VSX_BUILTIN_VEC_SET_V2DF:
15584 case VSX_BUILTIN_VEC_SET_V2DI:
15585 case VSX_BUILTIN_VEC_SET_V1TI:
15586 return altivec_expand_vec_set_builtin (exp);
15588 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
15589 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
15590 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
15591 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
15592 case VSX_BUILTIN_VEC_EXT_V2DF:
15593 case VSX_BUILTIN_VEC_EXT_V2DI:
15594 case VSX_BUILTIN_VEC_EXT_V1TI:
15595 return altivec_expand_vec_ext_builtin (exp, target);
15597 case P9V_BUILTIN_VEC_EXTRACT4B:
15598 arg1 = CALL_EXPR_ARG (exp, 1);
15599 STRIP_NOPS (arg1);
15601 /* Generate a normal call if it is invalid. */
15602 if (arg1 == error_mark_node)
15603 return expand_call (exp, target, false);
15605 if (TREE_CODE (arg1) != INTEGER_CST || TREE_INT_CST_LOW (arg1) > 12)
15607 error ("second argument to %qs must be 0..12", "vec_vextract4b");
15608 return expand_call (exp, target, false);
15610 break;
15612 case P9V_BUILTIN_VEC_INSERT4B:
15613 arg2 = CALL_EXPR_ARG (exp, 2);
15614 STRIP_NOPS (arg2);
15616 /* Generate a normal call if it is invalid. */
15617 if (arg2 == error_mark_node)
15618 return expand_call (exp, target, false);
15620 if (TREE_CODE (arg2) != INTEGER_CST || TREE_INT_CST_LOW (arg2) > 12)
15622 error ("third argument to %qs must be 0..12", "vec_vinsert4b");
15623 return expand_call (exp, target, false);
15625 break;
15627 default:
15628 break;
15629 /* Fall through. */
15632 /* Expand abs* operations. */
15633 d = bdesc_abs;
15634 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
15635 if (d->code == fcode)
15636 return altivec_expand_abs_builtin (d->icode, exp, target);
15638 /* Expand the AltiVec predicates. */
15639 d = bdesc_altivec_preds;
15640 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
15641 if (d->code == fcode)
15642 return altivec_expand_predicate_builtin (d->icode, exp, target);
15644 /* LV* are funky. We initialized them differently. */
15645 switch (fcode)
15647 case ALTIVEC_BUILTIN_LVSL:
15648 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
15649 exp, target, false);
15650 case ALTIVEC_BUILTIN_LVSR:
15651 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
15652 exp, target, false);
15653 case ALTIVEC_BUILTIN_LVEBX:
15654 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
15655 exp, target, false);
15656 case ALTIVEC_BUILTIN_LVEHX:
15657 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
15658 exp, target, false);
15659 case ALTIVEC_BUILTIN_LVEWX:
15660 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
15661 exp, target, false);
15662 case ALTIVEC_BUILTIN_LVXL_V2DF:
15663 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
15664 exp, target, false);
15665 case ALTIVEC_BUILTIN_LVXL_V2DI:
15666 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
15667 exp, target, false);
15668 case ALTIVEC_BUILTIN_LVXL_V4SF:
15669 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
15670 exp, target, false);
15671 case ALTIVEC_BUILTIN_LVXL:
15672 case ALTIVEC_BUILTIN_LVXL_V4SI:
15673 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
15674 exp, target, false);
15675 case ALTIVEC_BUILTIN_LVXL_V8HI:
15676 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
15677 exp, target, false);
15678 case ALTIVEC_BUILTIN_LVXL_V16QI:
15679 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
15680 exp, target, false);
15681 case ALTIVEC_BUILTIN_LVX_V1TI:
15682 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v1ti,
15683 exp, target, false);
15684 case ALTIVEC_BUILTIN_LVX_V2DF:
15685 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df,
15686 exp, target, false);
15687 case ALTIVEC_BUILTIN_LVX_V2DI:
15688 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di,
15689 exp, target, false);
15690 case ALTIVEC_BUILTIN_LVX_V4SF:
15691 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf,
15692 exp, target, false);
15693 case ALTIVEC_BUILTIN_LVX:
15694 case ALTIVEC_BUILTIN_LVX_V4SI:
15695 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si,
15696 exp, target, false);
15697 case ALTIVEC_BUILTIN_LVX_V8HI:
15698 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi,
15699 exp, target, false);
15700 case ALTIVEC_BUILTIN_LVX_V16QI:
15701 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi,
15702 exp, target, false);
15703 case ALTIVEC_BUILTIN_LVLX:
15704 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
15705 exp, target, true);
15706 case ALTIVEC_BUILTIN_LVLXL:
15707 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
15708 exp, target, true);
15709 case ALTIVEC_BUILTIN_LVRX:
15710 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
15711 exp, target, true);
15712 case ALTIVEC_BUILTIN_LVRXL:
15713 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
15714 exp, target, true);
15715 case VSX_BUILTIN_LXVD2X_V1TI:
15716 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
15717 exp, target, false);
15718 case VSX_BUILTIN_LXVD2X_V2DF:
15719 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
15720 exp, target, false);
15721 case VSX_BUILTIN_LXVD2X_V2DI:
15722 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
15723 exp, target, false);
15724 case VSX_BUILTIN_LXVW4X_V4SF:
15725 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
15726 exp, target, false);
15727 case VSX_BUILTIN_LXVW4X_V4SI:
15728 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
15729 exp, target, false);
15730 case VSX_BUILTIN_LXVW4X_V8HI:
15731 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
15732 exp, target, false);
15733 case VSX_BUILTIN_LXVW4X_V16QI:
15734 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
15735 exp, target, false);
15736 /* For the following on big endian, it's ok to use any appropriate
15737 unaligned-supporting load, so use a generic expander. For
15738 little-endian, the exact element-reversing instruction must
15739 be used. */
15740 case VSX_BUILTIN_LD_ELEMREV_V2DF:
15742 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2df
15743 : CODE_FOR_vsx_ld_elemrev_v2df);
15744 return altivec_expand_lv_builtin (code, exp, target, false);
15746 case VSX_BUILTIN_LD_ELEMREV_V1TI:
15748 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v1ti
15749 : CODE_FOR_vsx_ld_elemrev_v1ti);
15750 return altivec_expand_lv_builtin (code, exp, target, false);
15752 case VSX_BUILTIN_LD_ELEMREV_V2DI:
15754 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2di
15755 : CODE_FOR_vsx_ld_elemrev_v2di);
15756 return altivec_expand_lv_builtin (code, exp, target, false);
15758 case VSX_BUILTIN_LD_ELEMREV_V4SF:
15760 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4sf
15761 : CODE_FOR_vsx_ld_elemrev_v4sf);
15762 return altivec_expand_lv_builtin (code, exp, target, false);
15764 case VSX_BUILTIN_LD_ELEMREV_V4SI:
15766 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4si
15767 : CODE_FOR_vsx_ld_elemrev_v4si);
15768 return altivec_expand_lv_builtin (code, exp, target, false);
15770 case VSX_BUILTIN_LD_ELEMREV_V8HI:
15772 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v8hi
15773 : CODE_FOR_vsx_ld_elemrev_v8hi);
15774 return altivec_expand_lv_builtin (code, exp, target, false);
15776 case VSX_BUILTIN_LD_ELEMREV_V16QI:
15778 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v16qi
15779 : CODE_FOR_vsx_ld_elemrev_v16qi);
15780 return altivec_expand_lv_builtin (code, exp, target, false);
15782 break;
15783 default:
15784 break;
15785 /* Fall through. */
15788 *expandedp = false;
15789 return NULL_RTX;
15792 /* Expand the builtin in EXP and store the result in TARGET. Store
15793 true in *EXPANDEDP if we found a builtin to expand. */
15794 static rtx
15795 paired_expand_builtin (tree exp, rtx target, bool * expandedp)
15797 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15798 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15799 const struct builtin_description *d;
15800 size_t i;
15802 *expandedp = true;
15804 switch (fcode)
15806 case PAIRED_BUILTIN_STX:
15807 return paired_expand_stv_builtin (CODE_FOR_paired_stx, exp);
15808 case PAIRED_BUILTIN_LX:
15809 return paired_expand_lv_builtin (CODE_FOR_paired_lx, exp, target);
15810 default:
15811 break;
15812 /* Fall through. */
15815 /* Expand the paired predicates. */
15816 d = bdesc_paired_preds;
15817 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); i++, d++)
15818 if (d->code == fcode)
15819 return paired_expand_predicate_builtin (d->icode, exp, target);
15821 *expandedp = false;
15822 return NULL_RTX;
15825 static rtx
15826 paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
15828 rtx pat, scratch, tmp;
15829 tree form = CALL_EXPR_ARG (exp, 0);
15830 tree arg0 = CALL_EXPR_ARG (exp, 1);
15831 tree arg1 = CALL_EXPR_ARG (exp, 2);
15832 rtx op0 = expand_normal (arg0);
15833 rtx op1 = expand_normal (arg1);
15834 machine_mode mode0 = insn_data[icode].operand[1].mode;
15835 machine_mode mode1 = insn_data[icode].operand[2].mode;
15836 int form_int;
15837 enum rtx_code code;
15839 if (TREE_CODE (form) != INTEGER_CST)
15841 error ("argument 1 of %s must be a constant",
15842 "__builtin_paired_predicate");
15843 return const0_rtx;
15845 else
15846 form_int = TREE_INT_CST_LOW (form);
15848 gcc_assert (mode0 == mode1);
15850 if (arg0 == error_mark_node || arg1 == error_mark_node)
15851 return const0_rtx;
15853 if (target == 0
15854 || GET_MODE (target) != SImode
15855 || !(*insn_data[icode].operand[0].predicate) (target, SImode))
15856 target = gen_reg_rtx (SImode);
15857 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
15858 op0 = copy_to_mode_reg (mode0, op0);
15859 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
15860 op1 = copy_to_mode_reg (mode1, op1);
15862 scratch = gen_reg_rtx (CCFPmode);
15864 pat = GEN_FCN (icode) (scratch, op0, op1);
15865 if (!pat)
15866 return const0_rtx;
15868 emit_insn (pat);
15870 switch (form_int)
15872 /* LT bit. */
15873 case 0:
15874 code = LT;
15875 break;
15876 /* GT bit. */
15877 case 1:
15878 code = GT;
15879 break;
15880 /* EQ bit. */
15881 case 2:
15882 code = EQ;
15883 break;
15884 /* UN bit. */
15885 case 3:
15886 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
15887 return target;
15888 default:
15889 error ("argument 1 of %qs is out of range",
15890 "__builtin_paired_predicate");
15891 return const0_rtx;
15894 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
15895 emit_move_insn (target, tmp);
15896 return target;
15899 /* Raise an error message for a builtin function that is called without the
15900 appropriate target options being set. */
15902 static void
15903 rs6000_invalid_builtin (enum rs6000_builtins fncode)
15905 size_t uns_fncode = (size_t) fncode;
15906 const char *name = rs6000_builtin_info[uns_fncode].name;
15907 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
15909 gcc_assert (name != NULL);
15910 if ((fnmask & RS6000_BTM_CELL) != 0)
15911 error ("builtin function %qs is only valid for the cell processor", name);
15912 else if ((fnmask & RS6000_BTM_VSX) != 0)
15913 error ("builtin function %qs requires the %qs option", name, "-mvsx");
15914 else if ((fnmask & RS6000_BTM_HTM) != 0)
15915 error ("builtin function %qs requires the %qs option", name, "-mhtm");
15916 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
15917 error ("builtin function %qs requires the %qs option", name, "-maltivec");
15918 else if ((fnmask & RS6000_BTM_PAIRED) != 0)
15919 error ("builtin function %qs requires the %qs option", name, "-mpaired");
15920 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
15921 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
15922 error ("builtin function %qs requires the %qs and %qs options",
15923 name, "-mhard-dfp", "-mpower8-vector");
15924 else if ((fnmask & RS6000_BTM_DFP) != 0)
15925 error ("builtin function %qs requires the %qs option", name, "-mhard-dfp");
15926 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
15927 error ("builtin function %qs requires the %qs option", name,
15928 "-mpower8-vector");
15929 else if ((fnmask & (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
15930 == (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
15931 error ("builtin function %qs requires the %qs and %qs options",
15932 name, "-mcpu=power9", "-m64");
15933 else if ((fnmask & RS6000_BTM_P9_VECTOR) != 0)
15934 error ("builtin function %qs requires the %qs option", name,
15935 "-mcpu=power9");
15936 else if ((fnmask & (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
15937 == (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
15938 error ("builtin function %qs requires the %qs and %qs options",
15939 name, "-mcpu=power9", "-m64");
15940 else if ((fnmask & RS6000_BTM_P9_MISC) == RS6000_BTM_P9_MISC)
15941 error ("builtin function %qs requires the %qs option", name,
15942 "-mcpu=power9");
15943 else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
15944 == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
15945 error ("builtin function %qs requires the %qs and %qs options",
15946 name, "-mhard-float", "-mlong-double-128");
15947 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
15948 error ("builtin function %qs requires the %qs option", name,
15949 "-mhard-float");
15950 else if ((fnmask & RS6000_BTM_FLOAT128_HW) != 0)
15951 error ("builtin function %qs requires ISA 3.0 IEEE 128-bit floating point",
15952 name);
15953 else if ((fnmask & RS6000_BTM_FLOAT128) != 0)
15954 error ("builtin function %qs requires the %qs option", name, "-mfloat128");
15955 else
15956 error ("builtin function %qs is not supported with the current options",
15957 name);
15960 /* Target hook for early folding of built-ins, shamelessly stolen
15961 from ia64.c. */
15963 static tree
15964 rs6000_fold_builtin (tree fndecl ATTRIBUTE_UNUSED,
15965 int n_args ATTRIBUTE_UNUSED,
15966 tree *args ATTRIBUTE_UNUSED,
15967 bool ignore ATTRIBUTE_UNUSED)
15969 #ifdef SUBTARGET_FOLD_BUILTIN
15970 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
15971 #else
15972 return NULL_TREE;
15973 #endif
15976 /* Helper function to sort out which built-ins may be valid without having
15977 a LHS. */
15978 static bool
15979 rs6000_builtin_valid_without_lhs (enum rs6000_builtins fn_code)
15981 switch (fn_code)
15983 case ALTIVEC_BUILTIN_STVX_V16QI:
15984 case ALTIVEC_BUILTIN_STVX_V8HI:
15985 case ALTIVEC_BUILTIN_STVX_V4SI:
15986 case ALTIVEC_BUILTIN_STVX_V4SF:
15987 case ALTIVEC_BUILTIN_STVX_V2DI:
15988 case ALTIVEC_BUILTIN_STVX_V2DF:
15989 return true;
15990 default:
15991 return false;
15995 /* Helper function to handle the gimple folding of a vector compare
15996 operation. This sets up true/false vectors, and uses the
15997 VEC_COND_EXPR operation.
15998 CODE indicates which comparison is to be made. (EQ, GT, ...).
15999 TYPE indicates the type of the result. */
16000 static tree
16001 fold_build_vec_cmp (tree_code code, tree type,
16002 tree arg0, tree arg1)
16004 tree cmp_type = build_same_sized_truth_vector_type (type);
16005 tree zero_vec = build_zero_cst (type);
16006 tree minus_one_vec = build_minus_one_cst (type);
16007 tree cmp = fold_build2 (code, cmp_type, arg0, arg1);
16008 return fold_build3 (VEC_COND_EXPR, type, cmp, minus_one_vec, zero_vec);
16011 /* Helper function to handle the in-between steps for the
16012 vector compare built-ins. */
16013 static void
16014 fold_compare_helper (gimple_stmt_iterator *gsi, tree_code code, gimple *stmt)
16016 tree arg0 = gimple_call_arg (stmt, 0);
16017 tree arg1 = gimple_call_arg (stmt, 1);
16018 tree lhs = gimple_call_lhs (stmt);
16019 tree cmp = fold_build_vec_cmp (code, TREE_TYPE (lhs), arg0, arg1);
16020 gimple *g = gimple_build_assign (lhs, cmp);
16021 gimple_set_location (g, gimple_location (stmt));
16022 gsi_replace (gsi, g, true);
16025 /* Helper function to handle the vector merge[hl] built-ins. The
16026 implementation difference between h and l versions for this code are in
16027 the values used when building of the permute vector for high word versus
16028 low word merge. The variance is keyed off the use_high parameter. */
16029 static void
16030 fold_mergehl_helper (gimple_stmt_iterator *gsi, gimple *stmt, int use_high)
16032 tree arg0 = gimple_call_arg (stmt, 0);
16033 tree arg1 = gimple_call_arg (stmt, 1);
16034 tree lhs = gimple_call_lhs (stmt);
16035 tree lhs_type = TREE_TYPE (lhs);
16036 tree lhs_type_type = TREE_TYPE (lhs_type);
16037 int n_elts = TYPE_VECTOR_SUBPARTS (lhs_type);
16038 int midpoint = n_elts / 2;
16039 int offset = 0;
16041 if (use_high == 1)
16042 offset = midpoint;
16044 tree_vector_builder elts (lhs_type, VECTOR_CST_NELTS (arg0), 1);
16046 for (int i = 0; i < midpoint; i++)
16048 elts.safe_push (build_int_cst (lhs_type_type, offset + i));
16049 elts.safe_push (build_int_cst (lhs_type_type, offset + n_elts + i));
16052 tree permute = elts.build ();
16054 gimple *g = gimple_build_assign (lhs, VEC_PERM_EXPR, arg0, arg1, permute);
16055 gimple_set_location (g, gimple_location (stmt));
16056 gsi_replace (gsi, g, true);
16059 /* Fold a machine-dependent built-in in GIMPLE. (For folding into
16060 a constant, use rs6000_fold_builtin.) */
16062 bool
16063 rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
16065 gimple *stmt = gsi_stmt (*gsi);
16066 tree fndecl = gimple_call_fndecl (stmt);
16067 gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD);
16068 enum rs6000_builtins fn_code
16069 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
16070 tree arg0, arg1, lhs, temp;
16071 gimple *g;
16073 size_t uns_fncode = (size_t) fn_code;
16074 enum insn_code icode = rs6000_builtin_info[uns_fncode].icode;
16075 const char *fn_name1 = rs6000_builtin_info[uns_fncode].name;
16076 const char *fn_name2 = (icode != CODE_FOR_nothing)
16077 ? get_insn_name ((int) icode)
16078 : "nothing";
16080 if (TARGET_DEBUG_BUILTIN)
16081 fprintf (stderr, "rs6000_gimple_fold_builtin %d %s %s\n",
16082 fn_code, fn_name1, fn_name2);
16084 if (!rs6000_fold_gimple)
16085 return false;
16087 /* Prevent gimple folding for code that does not have a LHS, unless it is
16088 allowed per the rs6000_builtin_valid_without_lhs helper function. */
16089 if (!gimple_call_lhs (stmt) && !rs6000_builtin_valid_without_lhs (fn_code))
16090 return false;
16092 /* Don't fold invalid builtins, let rs6000_expand_builtin diagnose it. */
16093 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fncode].mask;
16094 bool func_valid_p = (rs6000_builtin_mask & mask) == mask;
16095 if (!func_valid_p)
16096 return false;
16098 switch (fn_code)
16100 /* Flavors of vec_add. We deliberately don't expand
16101 P8V_BUILTIN_VADDUQM as it gets lowered from V1TImode to
16102 TImode, resulting in much poorer code generation. */
16103 case ALTIVEC_BUILTIN_VADDUBM:
16104 case ALTIVEC_BUILTIN_VADDUHM:
16105 case ALTIVEC_BUILTIN_VADDUWM:
16106 case P8V_BUILTIN_VADDUDM:
16107 case ALTIVEC_BUILTIN_VADDFP:
16108 case VSX_BUILTIN_XVADDDP:
16109 arg0 = gimple_call_arg (stmt, 0);
16110 arg1 = gimple_call_arg (stmt, 1);
16111 lhs = gimple_call_lhs (stmt);
16112 g = gimple_build_assign (lhs, PLUS_EXPR, arg0, arg1);
16113 gimple_set_location (g, gimple_location (stmt));
16114 gsi_replace (gsi, g, true);
16115 return true;
16116 /* Flavors of vec_sub. We deliberately don't expand
16117 P8V_BUILTIN_VSUBUQM. */
16118 case ALTIVEC_BUILTIN_VSUBUBM:
16119 case ALTIVEC_BUILTIN_VSUBUHM:
16120 case ALTIVEC_BUILTIN_VSUBUWM:
16121 case P8V_BUILTIN_VSUBUDM:
16122 case ALTIVEC_BUILTIN_VSUBFP:
16123 case VSX_BUILTIN_XVSUBDP:
16124 arg0 = gimple_call_arg (stmt, 0);
16125 arg1 = gimple_call_arg (stmt, 1);
16126 lhs = gimple_call_lhs (stmt);
16127 g = gimple_build_assign (lhs, MINUS_EXPR, arg0, arg1);
16128 gimple_set_location (g, gimple_location (stmt));
16129 gsi_replace (gsi, g, true);
16130 return true;
16131 case VSX_BUILTIN_XVMULSP:
16132 case VSX_BUILTIN_XVMULDP:
16133 arg0 = gimple_call_arg (stmt, 0);
16134 arg1 = gimple_call_arg (stmt, 1);
16135 lhs = gimple_call_lhs (stmt);
16136 g = gimple_build_assign (lhs, MULT_EXPR, arg0, arg1);
16137 gimple_set_location (g, gimple_location (stmt));
16138 gsi_replace (gsi, g, true);
16139 return true;
16140 /* Even element flavors of vec_mul (signed). */
16141 case ALTIVEC_BUILTIN_VMULESB:
16142 case ALTIVEC_BUILTIN_VMULESH:
16143 case P8V_BUILTIN_VMULESW:
16144 /* Even element flavors of vec_mul (unsigned). */
16145 case ALTIVEC_BUILTIN_VMULEUB:
16146 case ALTIVEC_BUILTIN_VMULEUH:
16147 case P8V_BUILTIN_VMULEUW:
16148 arg0 = gimple_call_arg (stmt, 0);
16149 arg1 = gimple_call_arg (stmt, 1);
16150 lhs = gimple_call_lhs (stmt);
16151 g = gimple_build_assign (lhs, VEC_WIDEN_MULT_EVEN_EXPR, arg0, arg1);
16152 gimple_set_location (g, gimple_location (stmt));
16153 gsi_replace (gsi, g, true);
16154 return true;
16155 /* Odd element flavors of vec_mul (signed). */
16156 case ALTIVEC_BUILTIN_VMULOSB:
16157 case ALTIVEC_BUILTIN_VMULOSH:
16158 case P8V_BUILTIN_VMULOSW:
16159 /* Odd element flavors of vec_mul (unsigned). */
16160 case ALTIVEC_BUILTIN_VMULOUB:
16161 case ALTIVEC_BUILTIN_VMULOUH:
16162 case P8V_BUILTIN_VMULOUW:
16163 arg0 = gimple_call_arg (stmt, 0);
16164 arg1 = gimple_call_arg (stmt, 1);
16165 lhs = gimple_call_lhs (stmt);
16166 g = gimple_build_assign (lhs, VEC_WIDEN_MULT_ODD_EXPR, arg0, arg1);
16167 gimple_set_location (g, gimple_location (stmt));
16168 gsi_replace (gsi, g, true);
16169 return true;
16170 /* Flavors of vec_div (Integer). */
16171 case VSX_BUILTIN_DIV_V2DI:
16172 case VSX_BUILTIN_UDIV_V2DI:
16173 arg0 = gimple_call_arg (stmt, 0);
16174 arg1 = gimple_call_arg (stmt, 1);
16175 lhs = gimple_call_lhs (stmt);
16176 g = gimple_build_assign (lhs, TRUNC_DIV_EXPR, arg0, arg1);
16177 gimple_set_location (g, gimple_location (stmt));
16178 gsi_replace (gsi, g, true);
16179 return true;
16180 /* Flavors of vec_div (Float). */
16181 case VSX_BUILTIN_XVDIVSP:
16182 case VSX_BUILTIN_XVDIVDP:
16183 arg0 = gimple_call_arg (stmt, 0);
16184 arg1 = gimple_call_arg (stmt, 1);
16185 lhs = gimple_call_lhs (stmt);
16186 g = gimple_build_assign (lhs, RDIV_EXPR, arg0, arg1);
16187 gimple_set_location (g, gimple_location (stmt));
16188 gsi_replace (gsi, g, true);
16189 return true;
16190 /* Flavors of vec_and. */
16191 case ALTIVEC_BUILTIN_VAND:
16192 arg0 = gimple_call_arg (stmt, 0);
16193 arg1 = gimple_call_arg (stmt, 1);
16194 lhs = gimple_call_lhs (stmt);
16195 g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, arg1);
16196 gimple_set_location (g, gimple_location (stmt));
16197 gsi_replace (gsi, g, true);
16198 return true;
16199 /* Flavors of vec_andc. */
16200 case ALTIVEC_BUILTIN_VANDC:
16201 arg0 = gimple_call_arg (stmt, 0);
16202 arg1 = gimple_call_arg (stmt, 1);
16203 lhs = gimple_call_lhs (stmt);
16204 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
16205 g = gimple_build_assign (temp, BIT_NOT_EXPR, arg1);
16206 gimple_set_location (g, gimple_location (stmt));
16207 gsi_insert_before (gsi, g, GSI_SAME_STMT);
16208 g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, temp);
16209 gimple_set_location (g, gimple_location (stmt));
16210 gsi_replace (gsi, g, true);
16211 return true;
16212 /* Flavors of vec_nand. */
16213 case P8V_BUILTIN_VEC_NAND:
16214 case P8V_BUILTIN_NAND_V16QI:
16215 case P8V_BUILTIN_NAND_V8HI:
16216 case P8V_BUILTIN_NAND_V4SI:
16217 case P8V_BUILTIN_NAND_V4SF:
16218 case P8V_BUILTIN_NAND_V2DF:
16219 case P8V_BUILTIN_NAND_V2DI:
16220 arg0 = gimple_call_arg (stmt, 0);
16221 arg1 = gimple_call_arg (stmt, 1);
16222 lhs = gimple_call_lhs (stmt);
16223 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
16224 g = gimple_build_assign (temp, BIT_AND_EXPR, arg0, arg1);
16225 gimple_set_location (g, gimple_location (stmt));
16226 gsi_insert_before (gsi, g, GSI_SAME_STMT);
16227 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
16228 gimple_set_location (g, gimple_location (stmt));
16229 gsi_replace (gsi, g, true);
16230 return true;
16231 /* Flavors of vec_or. */
16232 case ALTIVEC_BUILTIN_VOR:
16233 arg0 = gimple_call_arg (stmt, 0);
16234 arg1 = gimple_call_arg (stmt, 1);
16235 lhs = gimple_call_lhs (stmt);
16236 g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, arg1);
16237 gimple_set_location (g, gimple_location (stmt));
16238 gsi_replace (gsi, g, true);
16239 return true;
16240 /* flavors of vec_orc. */
16241 case P8V_BUILTIN_ORC_V16QI:
16242 case P8V_BUILTIN_ORC_V8HI:
16243 case P8V_BUILTIN_ORC_V4SI:
16244 case P8V_BUILTIN_ORC_V4SF:
16245 case P8V_BUILTIN_ORC_V2DF:
16246 case P8V_BUILTIN_ORC_V2DI:
16247 arg0 = gimple_call_arg (stmt, 0);
16248 arg1 = gimple_call_arg (stmt, 1);
16249 lhs = gimple_call_lhs (stmt);
16250 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
16251 g = gimple_build_assign (temp, BIT_NOT_EXPR, arg1);
16252 gimple_set_location (g, gimple_location (stmt));
16253 gsi_insert_before (gsi, g, GSI_SAME_STMT);
16254 g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, temp);
16255 gimple_set_location (g, gimple_location (stmt));
16256 gsi_replace (gsi, g, true);
16257 return true;
16258 /* Flavors of vec_xor. */
16259 case ALTIVEC_BUILTIN_VXOR:
16260 arg0 = gimple_call_arg (stmt, 0);
16261 arg1 = gimple_call_arg (stmt, 1);
16262 lhs = gimple_call_lhs (stmt);
16263 g = gimple_build_assign (lhs, BIT_XOR_EXPR, arg0, arg1);
16264 gimple_set_location (g, gimple_location (stmt));
16265 gsi_replace (gsi, g, true);
16266 return true;
16267 /* Flavors of vec_nor. */
16268 case ALTIVEC_BUILTIN_VNOR:
16269 arg0 = gimple_call_arg (stmt, 0);
16270 arg1 = gimple_call_arg (stmt, 1);
16271 lhs = gimple_call_lhs (stmt);
16272 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
16273 g = gimple_build_assign (temp, BIT_IOR_EXPR, arg0, arg1);
16274 gimple_set_location (g, gimple_location (stmt));
16275 gsi_insert_before (gsi, g, GSI_SAME_STMT);
16276 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
16277 gimple_set_location (g, gimple_location (stmt));
16278 gsi_replace (gsi, g, true);
16279 return true;
16280 /* flavors of vec_abs. */
16281 case ALTIVEC_BUILTIN_ABS_V16QI:
16282 case ALTIVEC_BUILTIN_ABS_V8HI:
16283 case ALTIVEC_BUILTIN_ABS_V4SI:
16284 case ALTIVEC_BUILTIN_ABS_V4SF:
16285 case P8V_BUILTIN_ABS_V2DI:
16286 case VSX_BUILTIN_XVABSDP:
16287 arg0 = gimple_call_arg (stmt, 0);
16288 if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (arg0)))
16289 && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (arg0))))
16290 return false;
16291 lhs = gimple_call_lhs (stmt);
16292 g = gimple_build_assign (lhs, ABS_EXPR, arg0);
16293 gimple_set_location (g, gimple_location (stmt));
16294 gsi_replace (gsi, g, true);
16295 return true;
16296 /* flavors of vec_min. */
16297 case VSX_BUILTIN_XVMINDP:
16298 case P8V_BUILTIN_VMINSD:
16299 case P8V_BUILTIN_VMINUD:
16300 case ALTIVEC_BUILTIN_VMINSB:
16301 case ALTIVEC_BUILTIN_VMINSH:
16302 case ALTIVEC_BUILTIN_VMINSW:
16303 case ALTIVEC_BUILTIN_VMINUB:
16304 case ALTIVEC_BUILTIN_VMINUH:
16305 case ALTIVEC_BUILTIN_VMINUW:
16306 case ALTIVEC_BUILTIN_VMINFP:
16307 arg0 = gimple_call_arg (stmt, 0);
16308 arg1 = gimple_call_arg (stmt, 1);
16309 lhs = gimple_call_lhs (stmt);
16310 g = gimple_build_assign (lhs, MIN_EXPR, arg0, arg1);
16311 gimple_set_location (g, gimple_location (stmt));
16312 gsi_replace (gsi, g, true);
16313 return true;
16314 /* flavors of vec_max. */
16315 case VSX_BUILTIN_XVMAXDP:
16316 case P8V_BUILTIN_VMAXSD:
16317 case P8V_BUILTIN_VMAXUD:
16318 case ALTIVEC_BUILTIN_VMAXSB:
16319 case ALTIVEC_BUILTIN_VMAXSH:
16320 case ALTIVEC_BUILTIN_VMAXSW:
16321 case ALTIVEC_BUILTIN_VMAXUB:
16322 case ALTIVEC_BUILTIN_VMAXUH:
16323 case ALTIVEC_BUILTIN_VMAXUW:
16324 case ALTIVEC_BUILTIN_VMAXFP:
16325 arg0 = gimple_call_arg (stmt, 0);
16326 arg1 = gimple_call_arg (stmt, 1);
16327 lhs = gimple_call_lhs (stmt);
16328 g = gimple_build_assign (lhs, MAX_EXPR, arg0, arg1);
16329 gimple_set_location (g, gimple_location (stmt));
16330 gsi_replace (gsi, g, true);
16331 return true;
16332 /* Flavors of vec_eqv. */
16333 case P8V_BUILTIN_EQV_V16QI:
16334 case P8V_BUILTIN_EQV_V8HI:
16335 case P8V_BUILTIN_EQV_V4SI:
16336 case P8V_BUILTIN_EQV_V4SF:
16337 case P8V_BUILTIN_EQV_V2DF:
16338 case P8V_BUILTIN_EQV_V2DI:
16339 arg0 = gimple_call_arg (stmt, 0);
16340 arg1 = gimple_call_arg (stmt, 1);
16341 lhs = gimple_call_lhs (stmt);
16342 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
16343 g = gimple_build_assign (temp, BIT_XOR_EXPR, arg0, arg1);
16344 gimple_set_location (g, gimple_location (stmt));
16345 gsi_insert_before (gsi, g, GSI_SAME_STMT);
16346 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
16347 gimple_set_location (g, gimple_location (stmt));
16348 gsi_replace (gsi, g, true);
16349 return true;
16350 /* Flavors of vec_rotate_left. */
16351 case ALTIVEC_BUILTIN_VRLB:
16352 case ALTIVEC_BUILTIN_VRLH:
16353 case ALTIVEC_BUILTIN_VRLW:
16354 case P8V_BUILTIN_VRLD:
16355 arg0 = gimple_call_arg (stmt, 0);
16356 arg1 = gimple_call_arg (stmt, 1);
16357 lhs = gimple_call_lhs (stmt);
16358 g = gimple_build_assign (lhs, LROTATE_EXPR, arg0, arg1);
16359 gimple_set_location (g, gimple_location (stmt));
16360 gsi_replace (gsi, g, true);
16361 return true;
16362 /* Flavors of vector shift right algebraic.
16363 vec_sra{b,h,w} -> vsra{b,h,w}. */
16364 case ALTIVEC_BUILTIN_VSRAB:
16365 case ALTIVEC_BUILTIN_VSRAH:
16366 case ALTIVEC_BUILTIN_VSRAW:
16367 case P8V_BUILTIN_VSRAD:
16368 arg0 = gimple_call_arg (stmt, 0);
16369 arg1 = gimple_call_arg (stmt, 1);
16370 lhs = gimple_call_lhs (stmt);
16371 g = gimple_build_assign (lhs, RSHIFT_EXPR, arg0, arg1);
16372 gimple_set_location (g, gimple_location (stmt));
16373 gsi_replace (gsi, g, true);
16374 return true;
16375 /* Flavors of vector shift left.
16376 builtin_altivec_vsl{b,h,w} -> vsl{b,h,w}. */
16377 case ALTIVEC_BUILTIN_VSLB:
16378 case ALTIVEC_BUILTIN_VSLH:
16379 case ALTIVEC_BUILTIN_VSLW:
16380 case P8V_BUILTIN_VSLD:
16381 arg0 = gimple_call_arg (stmt, 0);
16382 if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (arg0)))
16383 && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (arg0))))
16384 return false;
16385 arg1 = gimple_call_arg (stmt, 1);
16386 lhs = gimple_call_lhs (stmt);
16387 g = gimple_build_assign (lhs, LSHIFT_EXPR, arg0, arg1);
16388 gimple_set_location (g, gimple_location (stmt));
16389 gsi_replace (gsi, g, true);
16390 return true;
16391 /* Flavors of vector shift right. */
16392 case ALTIVEC_BUILTIN_VSRB:
16393 case ALTIVEC_BUILTIN_VSRH:
16394 case ALTIVEC_BUILTIN_VSRW:
16395 case P8V_BUILTIN_VSRD:
16397 arg0 = gimple_call_arg (stmt, 0);
16398 arg1 = gimple_call_arg (stmt, 1);
16399 lhs = gimple_call_lhs (stmt);
16400 gimple_seq stmts = NULL;
16401 /* Convert arg0 to unsigned. */
16402 tree arg0_unsigned
16403 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
16404 unsigned_type_for (TREE_TYPE (arg0)), arg0);
16405 tree res
16406 = gimple_build (&stmts, RSHIFT_EXPR,
16407 TREE_TYPE (arg0_unsigned), arg0_unsigned, arg1);
16408 /* Convert result back to the lhs type. */
16409 res = gimple_build (&stmts, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), res);
16410 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
16411 update_call_from_tree (gsi, res);
16412 return true;
16414 /* Vector loads. */
16415 case ALTIVEC_BUILTIN_LVX_V16QI:
16416 case ALTIVEC_BUILTIN_LVX_V8HI:
16417 case ALTIVEC_BUILTIN_LVX_V4SI:
16418 case ALTIVEC_BUILTIN_LVX_V4SF:
16419 case ALTIVEC_BUILTIN_LVX_V2DI:
16420 case ALTIVEC_BUILTIN_LVX_V2DF:
16421 case ALTIVEC_BUILTIN_LVX_V1TI:
16423 arg0 = gimple_call_arg (stmt, 0); // offset
16424 arg1 = gimple_call_arg (stmt, 1); // address
16425 /* Do not fold for -maltivec=be on LE targets. */
16426 if (VECTOR_ELT_ORDER_BIG && !BYTES_BIG_ENDIAN)
16427 return false;
16428 lhs = gimple_call_lhs (stmt);
16429 location_t loc = gimple_location (stmt);
16430 /* Since arg1 may be cast to a different type, just use ptr_type_node
16431 here instead of trying to enforce TBAA on pointer types. */
16432 tree arg1_type = ptr_type_node;
16433 tree lhs_type = TREE_TYPE (lhs);
16434 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
16435 the tree using the value from arg0. The resulting type will match
16436 the type of arg1. */
16437 gimple_seq stmts = NULL;
16438 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0);
16439 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
16440 arg1_type, arg1, temp_offset);
16441 /* Mask off any lower bits from the address. */
16442 tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR,
16443 arg1_type, temp_addr,
16444 build_int_cst (arg1_type, -16));
16445 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
16446 /* Use the build2 helper to set up the mem_ref. The MEM_REF could also
16447 take an offset, but since we've already incorporated the offset
16448 above, here we just pass in a zero. */
16449 gimple *g
16450 = gimple_build_assign (lhs, build2 (MEM_REF, lhs_type, aligned_addr,
16451 build_int_cst (arg1_type, 0)));
16452 gimple_set_location (g, loc);
16453 gsi_replace (gsi, g, true);
16454 return true;
16456 /* Vector stores. */
16457 case ALTIVEC_BUILTIN_STVX_V16QI:
16458 case ALTIVEC_BUILTIN_STVX_V8HI:
16459 case ALTIVEC_BUILTIN_STVX_V4SI:
16460 case ALTIVEC_BUILTIN_STVX_V4SF:
16461 case ALTIVEC_BUILTIN_STVX_V2DI:
16462 case ALTIVEC_BUILTIN_STVX_V2DF:
16464 /* Do not fold for -maltivec=be on LE targets. */
16465 if (VECTOR_ELT_ORDER_BIG && !BYTES_BIG_ENDIAN)
16466 return false;
16467 arg0 = gimple_call_arg (stmt, 0); /* Value to be stored. */
16468 arg1 = gimple_call_arg (stmt, 1); /* Offset. */
16469 tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address. */
16470 location_t loc = gimple_location (stmt);
16471 tree arg0_type = TREE_TYPE (arg0);
16472 /* Use ptr_type_node (no TBAA) for the arg2_type.
16473 FIXME: (Richard) "A proper fix would be to transition this type as
16474 seen from the frontend to GIMPLE, for example in a similar way we
16475 do for MEM_REFs by piggy-backing that on an extra argument, a
16476 constant zero pointer of the alias pointer type to use (which would
16477 also serve as a type indicator of the store itself). I'd use a
16478 target specific internal function for this (not sure if we can have
16479 those target specific, but I guess if it's folded away then that's
16480 fine) and get away with the overload set." */
16481 tree arg2_type = ptr_type_node;
16482 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
16483 the tree using the value from arg0. The resulting type will match
16484 the type of arg2. */
16485 gimple_seq stmts = NULL;
16486 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1);
16487 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
16488 arg2_type, arg2, temp_offset);
16489 /* Mask off any lower bits from the address. */
16490 tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR,
16491 arg2_type, temp_addr,
16492 build_int_cst (arg2_type, -16));
16493 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
16494 /* The desired gimple result should be similar to:
16495 MEM[(__vector floatD.1407 *)_1] = vf1D.2697; */
16496 gimple *g
16497 = gimple_build_assign (build2 (MEM_REF, arg0_type, aligned_addr,
16498 build_int_cst (arg2_type, 0)), arg0);
16499 gimple_set_location (g, loc);
16500 gsi_replace (gsi, g, true);
16501 return true;
16504 /* Vector Fused multiply-add (fma). */
16505 case ALTIVEC_BUILTIN_VMADDFP:
16506 case VSX_BUILTIN_XVMADDDP:
16507 case ALTIVEC_BUILTIN_VMLADDUHM:
16509 arg0 = gimple_call_arg (stmt, 0);
16510 arg1 = gimple_call_arg (stmt, 1);
16511 tree arg2 = gimple_call_arg (stmt, 2);
16512 lhs = gimple_call_lhs (stmt);
16513 gimple *g = gimple_build_assign (lhs, FMA_EXPR, arg0, arg1, arg2);
16514 gimple_set_location (g, gimple_location (stmt));
16515 gsi_replace (gsi, g, true);
16516 return true;
16519 /* Vector compares; EQ, NE, GE, GT, LE. */
16520 case ALTIVEC_BUILTIN_VCMPEQUB:
16521 case ALTIVEC_BUILTIN_VCMPEQUH:
16522 case ALTIVEC_BUILTIN_VCMPEQUW:
16523 case P8V_BUILTIN_VCMPEQUD:
16524 fold_compare_helper (gsi, EQ_EXPR, stmt);
16525 return true;
16527 case P9V_BUILTIN_CMPNEB:
16528 case P9V_BUILTIN_CMPNEH:
16529 case P9V_BUILTIN_CMPNEW:
16530 fold_compare_helper (gsi, NE_EXPR, stmt);
16531 return true;
16533 case VSX_BUILTIN_CMPGE_16QI:
16534 case VSX_BUILTIN_CMPGE_U16QI:
16535 case VSX_BUILTIN_CMPGE_8HI:
16536 case VSX_BUILTIN_CMPGE_U8HI:
16537 case VSX_BUILTIN_CMPGE_4SI:
16538 case VSX_BUILTIN_CMPGE_U4SI:
16539 case VSX_BUILTIN_CMPGE_2DI:
16540 case VSX_BUILTIN_CMPGE_U2DI:
16541 fold_compare_helper (gsi, GE_EXPR, stmt);
16542 return true;
16544 case ALTIVEC_BUILTIN_VCMPGTSB:
16545 case ALTIVEC_BUILTIN_VCMPGTUB:
16546 case ALTIVEC_BUILTIN_VCMPGTSH:
16547 case ALTIVEC_BUILTIN_VCMPGTUH:
16548 case ALTIVEC_BUILTIN_VCMPGTSW:
16549 case ALTIVEC_BUILTIN_VCMPGTUW:
16550 case P8V_BUILTIN_VCMPGTUD:
16551 case P8V_BUILTIN_VCMPGTSD:
16552 fold_compare_helper (gsi, GT_EXPR, stmt);
16553 return true;
16555 case VSX_BUILTIN_CMPLE_16QI:
16556 case VSX_BUILTIN_CMPLE_U16QI:
16557 case VSX_BUILTIN_CMPLE_8HI:
16558 case VSX_BUILTIN_CMPLE_U8HI:
16559 case VSX_BUILTIN_CMPLE_4SI:
16560 case VSX_BUILTIN_CMPLE_U4SI:
16561 case VSX_BUILTIN_CMPLE_2DI:
16562 case VSX_BUILTIN_CMPLE_U2DI:
16563 fold_compare_helper (gsi, LE_EXPR, stmt);
16564 return true;
16566 /* flavors of vec_splat_[us]{8,16,32}. */
16567 case ALTIVEC_BUILTIN_VSPLTISB:
16568 case ALTIVEC_BUILTIN_VSPLTISH:
16569 case ALTIVEC_BUILTIN_VSPLTISW:
16571 arg0 = gimple_call_arg (stmt, 0);
16572 lhs = gimple_call_lhs (stmt);
16573 /* Only fold the vec_splat_*() if arg0 is constant. */
16574 if (TREE_CODE (arg0) != INTEGER_CST)
16575 return false;
16576 gimple_seq stmts = NULL;
16577 location_t loc = gimple_location (stmt);
16578 tree splat_value = gimple_convert (&stmts, loc,
16579 TREE_TYPE (TREE_TYPE (lhs)), arg0);
16580 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
16581 tree splat_tree = build_vector_from_val (TREE_TYPE (lhs), splat_value);
16582 g = gimple_build_assign (lhs, splat_tree);
16583 gimple_set_location (g, gimple_location (stmt));
16584 gsi_replace (gsi, g, true);
16585 return true;
16588 /* vec_mergel (integrals). */
16589 case ALTIVEC_BUILTIN_VMRGLH:
16590 case ALTIVEC_BUILTIN_VMRGLW:
16591 case VSX_BUILTIN_XXMRGLW_4SI:
16592 case ALTIVEC_BUILTIN_VMRGLB:
16593 case VSX_BUILTIN_VEC_MERGEL_V2DI:
16594 /* Do not fold for -maltivec=be on LE targets. */
16595 if (VECTOR_ELT_ORDER_BIG && !BYTES_BIG_ENDIAN)
16596 return false;
16597 fold_mergehl_helper (gsi, stmt, 1);
16598 return true;
16599 /* vec_mergeh (integrals). */
16600 case ALTIVEC_BUILTIN_VMRGHH:
16601 case ALTIVEC_BUILTIN_VMRGHW:
16602 case VSX_BUILTIN_XXMRGHW_4SI:
16603 case ALTIVEC_BUILTIN_VMRGHB:
16604 case VSX_BUILTIN_VEC_MERGEH_V2DI:
16605 /* Do not fold for -maltivec=be on LE targets. */
16606 if (VECTOR_ELT_ORDER_BIG && !BYTES_BIG_ENDIAN)
16607 return false;
16608 fold_mergehl_helper (gsi, stmt, 0);
16609 return true;
16610 default:
16611 if (TARGET_DEBUG_BUILTIN)
16612 fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
16613 fn_code, fn_name1, fn_name2);
16614 break;
16617 return false;
16620 /* Expand an expression EXP that calls a built-in function,
16621 with result going to TARGET if that's convenient
16622 (and in mode MODE if that's convenient).
16623 SUBTARGET may be used as the target for computing one of EXP's operands.
16624 IGNORE is nonzero if the value is to be ignored. */
16626 static rtx
16627 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
16628 machine_mode mode ATTRIBUTE_UNUSED,
16629 int ignore ATTRIBUTE_UNUSED)
16631 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16632 enum rs6000_builtins fcode
16633 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
16634 size_t uns_fcode = (size_t)fcode;
16635 const struct builtin_description *d;
16636 size_t i;
16637 rtx ret;
16638 bool success;
16639 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
16640 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
16641 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
16643 /* We have two different modes (KFmode, TFmode) that are the IEEE 128-bit
16644 floating point type, depending on whether long double is the IBM extended
16645 double (KFmode) or long double is IEEE 128-bit (TFmode). It is simpler if
16646 we only define one variant of the built-in function, and switch the code
16647 when defining it, rather than defining two built-ins and using the
16648 overload table in rs6000-c.c to switch between the two. If we don't have
16649 the proper assembler, don't do this switch because CODE_FOR_*kf* and
16650 CODE_FOR_*tf* will be CODE_FOR_nothing. */
16651 #ifdef HAVE_AS_POWER9
16652 if (FLOAT128_IEEE_P (TFmode))
16653 switch (icode)
16655 default:
16656 break;
16658 case CODE_FOR_sqrtkf2_odd: icode = CODE_FOR_sqrttf2_odd; break;
16659 case CODE_FOR_trunckfdf2_odd: icode = CODE_FOR_trunctfdf2_odd; break;
16660 case CODE_FOR_addkf3_odd: icode = CODE_FOR_addtf3_odd; break;
16661 case CODE_FOR_subkf3_odd: icode = CODE_FOR_subtf3_odd; break;
16662 case CODE_FOR_mulkf3_odd: icode = CODE_FOR_multf3_odd; break;
16663 case CODE_FOR_divkf3_odd: icode = CODE_FOR_divtf3_odd; break;
16664 case CODE_FOR_fmakf4_odd: icode = CODE_FOR_fmatf4_odd; break;
16665 case CODE_FOR_xsxexpqp_kf: icode = CODE_FOR_xsxexpqp_tf; break;
16666 case CODE_FOR_xsxsigqp_kf: icode = CODE_FOR_xsxsigqp_tf; break;
16667 case CODE_FOR_xststdcnegqp_kf: icode = CODE_FOR_xststdcnegqp_tf; break;
16668 case CODE_FOR_xsiexpqp_kf: icode = CODE_FOR_xsiexpqp_tf; break;
16669 case CODE_FOR_xsiexpqpf_kf: icode = CODE_FOR_xsiexpqpf_tf; break;
16670 case CODE_FOR_xststdcqp_kf: icode = CODE_FOR_xststdcqp_tf; break;
16672 #endif
16674 if (TARGET_DEBUG_BUILTIN)
16676 const char *name1 = rs6000_builtin_info[uns_fcode].name;
16677 const char *name2 = (icode != CODE_FOR_nothing)
16678 ? get_insn_name ((int) icode)
16679 : "nothing";
16680 const char *name3;
16682 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
16684 default: name3 = "unknown"; break;
16685 case RS6000_BTC_SPECIAL: name3 = "special"; break;
16686 case RS6000_BTC_UNARY: name3 = "unary"; break;
16687 case RS6000_BTC_BINARY: name3 = "binary"; break;
16688 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
16689 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
16690 case RS6000_BTC_ABS: name3 = "abs"; break;
16691 case RS6000_BTC_DST: name3 = "dst"; break;
16695 fprintf (stderr,
16696 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
16697 (name1) ? name1 : "---", fcode,
16698 (name2) ? name2 : "---", (int) icode,
16699 name3,
16700 func_valid_p ? "" : ", not valid");
16703 if (!func_valid_p)
16705 rs6000_invalid_builtin (fcode);
16707 /* Given it is invalid, just generate a normal call. */
16708 return expand_call (exp, target, ignore);
16711 switch (fcode)
16713 case RS6000_BUILTIN_RECIP:
16714 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
16716 case RS6000_BUILTIN_RECIPF:
16717 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
16719 case RS6000_BUILTIN_RSQRTF:
16720 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
16722 case RS6000_BUILTIN_RSQRT:
16723 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
16725 case POWER7_BUILTIN_BPERMD:
16726 return rs6000_expand_binop_builtin (((TARGET_64BIT)
16727 ? CODE_FOR_bpermd_di
16728 : CODE_FOR_bpermd_si), exp, target);
16730 case RS6000_BUILTIN_GET_TB:
16731 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
16732 target);
16734 case RS6000_BUILTIN_MFTB:
16735 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
16736 ? CODE_FOR_rs6000_mftb_di
16737 : CODE_FOR_rs6000_mftb_si),
16738 target);
16740 case RS6000_BUILTIN_MFFS:
16741 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
16743 case RS6000_BUILTIN_MTFSF:
16744 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
16746 case RS6000_BUILTIN_CPU_INIT:
16747 case RS6000_BUILTIN_CPU_IS:
16748 case RS6000_BUILTIN_CPU_SUPPORTS:
16749 return cpu_expand_builtin (fcode, exp, target);
16751 case MISC_BUILTIN_SPEC_BARRIER:
16753 emit_insn (gen_rs6000_speculation_barrier ());
16754 return NULL_RTX;
16757 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
16758 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
16760 int icode2 = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
16761 : (int) CODE_FOR_altivec_lvsl_direct);
16762 machine_mode tmode = insn_data[icode2].operand[0].mode;
16763 machine_mode mode = insn_data[icode2].operand[1].mode;
16764 tree arg;
16765 rtx op, addr, pat;
16767 gcc_assert (TARGET_ALTIVEC);
16769 arg = CALL_EXPR_ARG (exp, 0);
16770 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
16771 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
16772 addr = memory_address (mode, op);
16773 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
16774 op = addr;
16775 else
16777 /* For the load case need to negate the address. */
16778 op = gen_reg_rtx (GET_MODE (addr));
16779 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
16781 op = gen_rtx_MEM (mode, op);
16783 if (target == 0
16784 || GET_MODE (target) != tmode
16785 || ! (*insn_data[icode2].operand[0].predicate) (target, tmode))
16786 target = gen_reg_rtx (tmode);
16788 pat = GEN_FCN (icode2) (target, op);
16789 if (!pat)
16790 return 0;
16791 emit_insn (pat);
16793 return target;
16796 case ALTIVEC_BUILTIN_VCFUX:
16797 case ALTIVEC_BUILTIN_VCFSX:
16798 case ALTIVEC_BUILTIN_VCTUXS:
16799 case ALTIVEC_BUILTIN_VCTSXS:
16800 /* FIXME: There's got to be a nicer way to handle this case than
16801 constructing a new CALL_EXPR. */
16802 if (call_expr_nargs (exp) == 1)
16804 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
16805 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
16807 break;
16809 default:
16810 break;
16813 if (TARGET_ALTIVEC)
16815 ret = altivec_expand_builtin (exp, target, &success);
16817 if (success)
16818 return ret;
16820 if (TARGET_PAIRED_FLOAT)
16822 ret = paired_expand_builtin (exp, target, &success);
16824 if (success)
16825 return ret;
16827 if (TARGET_HTM)
16829 ret = htm_expand_builtin (exp, target, &success);
16831 if (success)
16832 return ret;
16835 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
16836 /* RS6000_BTC_SPECIAL represents no-operand operators. */
16837 gcc_assert (attr == RS6000_BTC_UNARY
16838 || attr == RS6000_BTC_BINARY
16839 || attr == RS6000_BTC_TERNARY
16840 || attr == RS6000_BTC_SPECIAL);
16842 /* Handle simple unary operations. */
16843 d = bdesc_1arg;
16844 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16845 if (d->code == fcode)
16846 return rs6000_expand_unop_builtin (icode, exp, target);
16848 /* Handle simple binary operations. */
16849 d = bdesc_2arg;
16850 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16851 if (d->code == fcode)
16852 return rs6000_expand_binop_builtin (icode, exp, target);
16854 /* Handle simple ternary operations. */
16855 d = bdesc_3arg;
16856 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
16857 if (d->code == fcode)
16858 return rs6000_expand_ternop_builtin (icode, exp, target);
16860 /* Handle simple no-argument operations. */
16861 d = bdesc_0arg;
16862 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
16863 if (d->code == fcode)
16864 return rs6000_expand_zeroop_builtin (icode, target);
16866 gcc_unreachable ();
16869 /* Create a builtin vector type with a name. Taking care not to give
16870 the canonical type a name. */
16872 static tree
16873 rs6000_vector_type (const char *name, tree elt_type, unsigned num_elts)
16875 tree result = build_vector_type (elt_type, num_elts);
16877 /* Copy so we don't give the canonical type a name. */
16878 result = build_variant_type_copy (result);
16880 add_builtin_type (name, result);
16882 return result;
16885 static void
16886 rs6000_init_builtins (void)
16888 tree tdecl;
16889 tree ftype;
16890 machine_mode mode;
16892 if (TARGET_DEBUG_BUILTIN)
16893 fprintf (stderr, "rs6000_init_builtins%s%s%s\n",
16894 (TARGET_PAIRED_FLOAT) ? ", paired" : "",
16895 (TARGET_ALTIVEC) ? ", altivec" : "",
16896 (TARGET_VSX) ? ", vsx" : "");
16898 V2SI_type_node = build_vector_type (intSI_type_node, 2);
16899 V2SF_type_node = build_vector_type (float_type_node, 2);
16900 V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64 ? "__vector long"
16901 : "__vector long long",
16902 intDI_type_node, 2);
16903 V2DF_type_node = rs6000_vector_type ("__vector double", double_type_node, 2);
16904 V4SI_type_node = rs6000_vector_type ("__vector signed int",
16905 intSI_type_node, 4);
16906 V4SF_type_node = rs6000_vector_type ("__vector float", float_type_node, 4);
16907 V8HI_type_node = rs6000_vector_type ("__vector signed short",
16908 intHI_type_node, 8);
16909 V16QI_type_node = rs6000_vector_type ("__vector signed char",
16910 intQI_type_node, 16);
16912 unsigned_V16QI_type_node = rs6000_vector_type ("__vector unsigned char",
16913 unsigned_intQI_type_node, 16);
16914 unsigned_V8HI_type_node = rs6000_vector_type ("__vector unsigned short",
16915 unsigned_intHI_type_node, 8);
16916 unsigned_V4SI_type_node = rs6000_vector_type ("__vector unsigned int",
16917 unsigned_intSI_type_node, 4);
16918 unsigned_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64
16919 ? "__vector unsigned long"
16920 : "__vector unsigned long long",
16921 unsigned_intDI_type_node, 2);
16923 opaque_V2SF_type_node = build_opaque_vector_type (float_type_node, 2);
16924 opaque_V2SI_type_node = build_opaque_vector_type (intSI_type_node, 2);
16925 opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node);
16926 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
16928 const_str_type_node
16929 = build_pointer_type (build_qualified_type (char_type_node,
16930 TYPE_QUAL_CONST));
16932 /* We use V1TI mode as a special container to hold __int128_t items that
16933 must live in VSX registers. */
16934 if (intTI_type_node)
16936 V1TI_type_node = rs6000_vector_type ("__vector __int128",
16937 intTI_type_node, 1);
16938 unsigned_V1TI_type_node
16939 = rs6000_vector_type ("__vector unsigned __int128",
16940 unsigned_intTI_type_node, 1);
16943 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
16944 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
16945 'vector unsigned short'. */
16947 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
16948 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
16949 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
16950 bool_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
16951 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
16953 long_integer_type_internal_node = long_integer_type_node;
16954 long_unsigned_type_internal_node = long_unsigned_type_node;
16955 long_long_integer_type_internal_node = long_long_integer_type_node;
16956 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
16957 intQI_type_internal_node = intQI_type_node;
16958 uintQI_type_internal_node = unsigned_intQI_type_node;
16959 intHI_type_internal_node = intHI_type_node;
16960 uintHI_type_internal_node = unsigned_intHI_type_node;
16961 intSI_type_internal_node = intSI_type_node;
16962 uintSI_type_internal_node = unsigned_intSI_type_node;
16963 intDI_type_internal_node = intDI_type_node;
16964 uintDI_type_internal_node = unsigned_intDI_type_node;
16965 intTI_type_internal_node = intTI_type_node;
16966 uintTI_type_internal_node = unsigned_intTI_type_node;
16967 float_type_internal_node = float_type_node;
16968 double_type_internal_node = double_type_node;
16969 long_double_type_internal_node = long_double_type_node;
16970 dfloat64_type_internal_node = dfloat64_type_node;
16971 dfloat128_type_internal_node = dfloat128_type_node;
16972 void_type_internal_node = void_type_node;
16974 /* 128-bit floating point support. KFmode is IEEE 128-bit floating point.
16975 IFmode is the IBM extended 128-bit format that is a pair of doubles.
16976 TFmode will be either IEEE 128-bit floating point or the IBM double-double
16977 format that uses a pair of doubles, depending on the switches and
16978 defaults.
16980 If we don't support for either 128-bit IBM double double or IEEE 128-bit
16981 floating point, we need make sure the type is non-zero or else self-test
16982 fails during bootstrap.
16984 We don't register a built-in type for __ibm128 if the type is the same as
16985 long double. Instead we add a #define for __ibm128 in
16986 rs6000_cpu_cpp_builtins to long double.
16988 For IEEE 128-bit floating point, always create the type __ieee128. If the
16989 user used -mfloat128, rs6000-c.c will create a define from __float128 to
16990 __ieee128. */
16991 if (TARGET_LONG_DOUBLE_128 && FLOAT128_IEEE_P (TFmode))
16993 ibm128_float_type_node = make_node (REAL_TYPE);
16994 TYPE_PRECISION (ibm128_float_type_node) = 128;
16995 SET_TYPE_MODE (ibm128_float_type_node, IFmode);
16996 layout_type (ibm128_float_type_node);
16998 lang_hooks.types.register_builtin_type (ibm128_float_type_node,
16999 "__ibm128");
17001 else
17002 ibm128_float_type_node = long_double_type_node;
17004 if (TARGET_FLOAT128_TYPE)
17006 ieee128_float_type_node = float128_type_node;
17007 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
17008 "__ieee128");
17011 else
17012 ieee128_float_type_node = long_double_type_node;
17014 /* Initialize the modes for builtin_function_type, mapping a machine mode to
17015 tree type node. */
17016 builtin_mode_to_type[QImode][0] = integer_type_node;
17017 builtin_mode_to_type[HImode][0] = integer_type_node;
17018 builtin_mode_to_type[SImode][0] = intSI_type_node;
17019 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
17020 builtin_mode_to_type[DImode][0] = intDI_type_node;
17021 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
17022 builtin_mode_to_type[TImode][0] = intTI_type_node;
17023 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
17024 builtin_mode_to_type[SFmode][0] = float_type_node;
17025 builtin_mode_to_type[DFmode][0] = double_type_node;
17026 builtin_mode_to_type[IFmode][0] = ibm128_float_type_node;
17027 builtin_mode_to_type[KFmode][0] = ieee128_float_type_node;
17028 builtin_mode_to_type[TFmode][0] = long_double_type_node;
17029 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
17030 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
17031 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
17032 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
17033 builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
17034 builtin_mode_to_type[V2SFmode][0] = V2SF_type_node;
17035 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
17036 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
17037 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
17038 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
17039 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
17040 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
17041 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
17042 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
17043 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
17044 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
17046 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
17047 TYPE_NAME (bool_char_type_node) = tdecl;
17049 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
17050 TYPE_NAME (bool_short_type_node) = tdecl;
17052 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
17053 TYPE_NAME (bool_int_type_node) = tdecl;
17055 tdecl = add_builtin_type ("__pixel", pixel_type_node);
17056 TYPE_NAME (pixel_type_node) = tdecl;
17058 bool_V16QI_type_node = rs6000_vector_type ("__vector __bool char",
17059 bool_char_type_node, 16);
17060 bool_V8HI_type_node = rs6000_vector_type ("__vector __bool short",
17061 bool_short_type_node, 8);
17062 bool_V4SI_type_node = rs6000_vector_type ("__vector __bool int",
17063 bool_int_type_node, 4);
17064 bool_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64
17065 ? "__vector __bool long"
17066 : "__vector __bool long long",
17067 bool_long_type_node, 2);
17068 pixel_V8HI_type_node = rs6000_vector_type ("__vector __pixel",
17069 pixel_type_node, 8);
17071 /* Paired builtins are only available if you build a compiler with the
17072 appropriate options, so only create those builtins with the appropriate
17073 compiler option. Create Altivec and VSX builtins on machines with at
17074 least the general purpose extensions (970 and newer) to allow the use of
17075 the target attribute. */
17076 if (TARGET_PAIRED_FLOAT)
17077 paired_init_builtins ();
17078 if (TARGET_EXTRA_BUILTINS)
17079 altivec_init_builtins ();
17080 if (TARGET_HTM)
17081 htm_init_builtins ();
17083 if (TARGET_EXTRA_BUILTINS || TARGET_PAIRED_FLOAT)
17084 rs6000_common_init_builtins ();
17086 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
17087 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
17088 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
17090 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
17091 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
17092 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
17094 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
17095 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
17096 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
17098 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
17099 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
17100 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
17102 mode = (TARGET_64BIT) ? DImode : SImode;
17103 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
17104 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
17105 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
17107 ftype = build_function_type_list (unsigned_intDI_type_node,
17108 NULL_TREE);
17109 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
17111 if (TARGET_64BIT)
17112 ftype = build_function_type_list (unsigned_intDI_type_node,
17113 NULL_TREE);
17114 else
17115 ftype = build_function_type_list (unsigned_intSI_type_node,
17116 NULL_TREE);
17117 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
17119 ftype = build_function_type_list (double_type_node, NULL_TREE);
17120 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
17122 ftype = build_function_type_list (void_type_node,
17123 intSI_type_node, double_type_node,
17124 NULL_TREE);
17125 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
17127 ftype = build_function_type_list (void_type_node, NULL_TREE);
17128 def_builtin ("__builtin_cpu_init", ftype, RS6000_BUILTIN_CPU_INIT);
17129 def_builtin ("__builtin_ppc_speculation_barrier", ftype,
17130 MISC_BUILTIN_SPEC_BARRIER);
17132 ftype = build_function_type_list (bool_int_type_node, const_ptr_type_node,
17133 NULL_TREE);
17134 def_builtin ("__builtin_cpu_is", ftype, RS6000_BUILTIN_CPU_IS);
17135 def_builtin ("__builtin_cpu_supports", ftype, RS6000_BUILTIN_CPU_SUPPORTS);
17137 /* AIX libm provides clog as __clog. */
17138 if (TARGET_XCOFF &&
17139 (tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
17140 set_user_assembler_name (tdecl, "__clog");
17142 #ifdef SUBTARGET_INIT_BUILTINS
17143 SUBTARGET_INIT_BUILTINS;
17144 #endif
17147 /* Returns the rs6000 builtin decl for CODE. */
17149 static tree
17150 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
17152 HOST_WIDE_INT fnmask;
17154 if (code >= RS6000_BUILTIN_COUNT)
17155 return error_mark_node;
17157 fnmask = rs6000_builtin_info[code].mask;
17158 if ((fnmask & rs6000_builtin_mask) != fnmask)
17160 rs6000_invalid_builtin ((enum rs6000_builtins)code);
17161 return error_mark_node;
17164 return rs6000_builtin_decls[code];
17167 static void
17168 paired_init_builtins (void)
17170 const struct builtin_description *d;
17171 size_t i;
17172 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17174 tree int_ftype_int_v2sf_v2sf
17175 = build_function_type_list (integer_type_node,
17176 integer_type_node,
17177 V2SF_type_node,
17178 V2SF_type_node,
17179 NULL_TREE);
17180 tree pcfloat_type_node =
17181 build_pointer_type (build_qualified_type
17182 (float_type_node, TYPE_QUAL_CONST));
17184 tree v2sf_ftype_long_pcfloat = build_function_type_list (V2SF_type_node,
17185 long_integer_type_node,
17186 pcfloat_type_node,
17187 NULL_TREE);
17188 tree void_ftype_v2sf_long_pcfloat =
17189 build_function_type_list (void_type_node,
17190 V2SF_type_node,
17191 long_integer_type_node,
17192 pcfloat_type_node,
17193 NULL_TREE);
17196 def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat,
17197 PAIRED_BUILTIN_LX);
17200 def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat,
17201 PAIRED_BUILTIN_STX);
17203 /* Predicates. */
17204 d = bdesc_paired_preds;
17205 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); ++i, d++)
17207 tree type;
17208 HOST_WIDE_INT mask = d->mask;
17210 if ((mask & builtin_mask) != mask)
17212 if (TARGET_DEBUG_BUILTIN)
17213 fprintf (stderr, "paired_init_builtins, skip predicate %s\n",
17214 d->name);
17215 continue;
17218 /* Cannot define builtin if the instruction is disabled. */
17219 gcc_assert (d->icode != CODE_FOR_nothing);
17221 if (TARGET_DEBUG_BUILTIN)
17222 fprintf (stderr, "paired pred #%d, insn = %s [%d], mode = %s\n",
17223 (int)i, get_insn_name (d->icode), (int)d->icode,
17224 GET_MODE_NAME (insn_data[d->icode].operand[1].mode));
17226 switch (insn_data[d->icode].operand[1].mode)
17228 case E_V2SFmode:
17229 type = int_ftype_int_v2sf_v2sf;
17230 break;
17231 default:
17232 gcc_unreachable ();
17235 def_builtin (d->name, type, d->code);
17239 static void
17240 altivec_init_builtins (void)
17242 const struct builtin_description *d;
17243 size_t i;
17244 tree ftype;
17245 tree decl;
17246 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17248 tree pvoid_type_node = build_pointer_type (void_type_node);
17250 tree pcvoid_type_node
17251 = build_pointer_type (build_qualified_type (void_type_node,
17252 TYPE_QUAL_CONST));
17254 tree int_ftype_opaque
17255 = build_function_type_list (integer_type_node,
17256 opaque_V4SI_type_node, NULL_TREE);
17257 tree opaque_ftype_opaque
17258 = build_function_type_list (integer_type_node, NULL_TREE);
17259 tree opaque_ftype_opaque_int
17260 = build_function_type_list (opaque_V4SI_type_node,
17261 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
17262 tree opaque_ftype_opaque_opaque_int
17263 = build_function_type_list (opaque_V4SI_type_node,
17264 opaque_V4SI_type_node, opaque_V4SI_type_node,
17265 integer_type_node, NULL_TREE);
17266 tree opaque_ftype_opaque_opaque_opaque
17267 = build_function_type_list (opaque_V4SI_type_node,
17268 opaque_V4SI_type_node, opaque_V4SI_type_node,
17269 opaque_V4SI_type_node, NULL_TREE);
17270 tree opaque_ftype_opaque_opaque
17271 = build_function_type_list (opaque_V4SI_type_node,
17272 opaque_V4SI_type_node, opaque_V4SI_type_node,
17273 NULL_TREE);
17274 tree int_ftype_int_opaque_opaque
17275 = build_function_type_list (integer_type_node,
17276 integer_type_node, opaque_V4SI_type_node,
17277 opaque_V4SI_type_node, NULL_TREE);
17278 tree int_ftype_int_v4si_v4si
17279 = build_function_type_list (integer_type_node,
17280 integer_type_node, V4SI_type_node,
17281 V4SI_type_node, NULL_TREE);
17282 tree int_ftype_int_v2di_v2di
17283 = build_function_type_list (integer_type_node,
17284 integer_type_node, V2DI_type_node,
17285 V2DI_type_node, NULL_TREE);
17286 tree void_ftype_v4si
17287 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
17288 tree v8hi_ftype_void
17289 = build_function_type_list (V8HI_type_node, NULL_TREE);
17290 tree void_ftype_void
17291 = build_function_type_list (void_type_node, NULL_TREE);
17292 tree void_ftype_int
17293 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
17295 tree opaque_ftype_long_pcvoid
17296 = build_function_type_list (opaque_V4SI_type_node,
17297 long_integer_type_node, pcvoid_type_node,
17298 NULL_TREE);
17299 tree v16qi_ftype_long_pcvoid
17300 = build_function_type_list (V16QI_type_node,
17301 long_integer_type_node, pcvoid_type_node,
17302 NULL_TREE);
17303 tree v8hi_ftype_long_pcvoid
17304 = build_function_type_list (V8HI_type_node,
17305 long_integer_type_node, pcvoid_type_node,
17306 NULL_TREE);
17307 tree v4si_ftype_long_pcvoid
17308 = build_function_type_list (V4SI_type_node,
17309 long_integer_type_node, pcvoid_type_node,
17310 NULL_TREE);
17311 tree v4sf_ftype_long_pcvoid
17312 = build_function_type_list (V4SF_type_node,
17313 long_integer_type_node, pcvoid_type_node,
17314 NULL_TREE);
17315 tree v2df_ftype_long_pcvoid
17316 = build_function_type_list (V2DF_type_node,
17317 long_integer_type_node, pcvoid_type_node,
17318 NULL_TREE);
17319 tree v2di_ftype_long_pcvoid
17320 = build_function_type_list (V2DI_type_node,
17321 long_integer_type_node, pcvoid_type_node,
17322 NULL_TREE);
17323 tree v1ti_ftype_long_pcvoid
17324 = build_function_type_list (V1TI_type_node,
17325 long_integer_type_node, pcvoid_type_node,
17326 NULL_TREE);
17328 tree void_ftype_opaque_long_pvoid
17329 = build_function_type_list (void_type_node,
17330 opaque_V4SI_type_node, long_integer_type_node,
17331 pvoid_type_node, NULL_TREE);
17332 tree void_ftype_v4si_long_pvoid
17333 = build_function_type_list (void_type_node,
17334 V4SI_type_node, long_integer_type_node,
17335 pvoid_type_node, NULL_TREE);
17336 tree void_ftype_v16qi_long_pvoid
17337 = build_function_type_list (void_type_node,
17338 V16QI_type_node, long_integer_type_node,
17339 pvoid_type_node, NULL_TREE);
17341 tree void_ftype_v16qi_pvoid_long
17342 = build_function_type_list (void_type_node,
17343 V16QI_type_node, pvoid_type_node,
17344 long_integer_type_node, NULL_TREE);
17346 tree void_ftype_v8hi_long_pvoid
17347 = build_function_type_list (void_type_node,
17348 V8HI_type_node, long_integer_type_node,
17349 pvoid_type_node, NULL_TREE);
17350 tree void_ftype_v4sf_long_pvoid
17351 = build_function_type_list (void_type_node,
17352 V4SF_type_node, long_integer_type_node,
17353 pvoid_type_node, NULL_TREE);
17354 tree void_ftype_v2df_long_pvoid
17355 = build_function_type_list (void_type_node,
17356 V2DF_type_node, long_integer_type_node,
17357 pvoid_type_node, NULL_TREE);
17358 tree void_ftype_v1ti_long_pvoid
17359 = build_function_type_list (void_type_node,
17360 V1TI_type_node, long_integer_type_node,
17361 pvoid_type_node, NULL_TREE);
17362 tree void_ftype_v2di_long_pvoid
17363 = build_function_type_list (void_type_node,
17364 V2DI_type_node, long_integer_type_node,
17365 pvoid_type_node, NULL_TREE);
17366 tree int_ftype_int_v8hi_v8hi
17367 = build_function_type_list (integer_type_node,
17368 integer_type_node, V8HI_type_node,
17369 V8HI_type_node, NULL_TREE);
17370 tree int_ftype_int_v16qi_v16qi
17371 = build_function_type_list (integer_type_node,
17372 integer_type_node, V16QI_type_node,
17373 V16QI_type_node, NULL_TREE);
17374 tree int_ftype_int_v4sf_v4sf
17375 = build_function_type_list (integer_type_node,
17376 integer_type_node, V4SF_type_node,
17377 V4SF_type_node, NULL_TREE);
17378 tree int_ftype_int_v2df_v2df
17379 = build_function_type_list (integer_type_node,
17380 integer_type_node, V2DF_type_node,
17381 V2DF_type_node, NULL_TREE);
17382 tree v2di_ftype_v2di
17383 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
17384 tree v4si_ftype_v4si
17385 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
17386 tree v8hi_ftype_v8hi
17387 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
17388 tree v16qi_ftype_v16qi
17389 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
17390 tree v4sf_ftype_v4sf
17391 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
17392 tree v2df_ftype_v2df
17393 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
17394 tree void_ftype_pcvoid_int_int
17395 = build_function_type_list (void_type_node,
17396 pcvoid_type_node, integer_type_node,
17397 integer_type_node, NULL_TREE);
17399 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
17400 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
17401 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
17402 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
17403 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
17404 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
17405 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
17406 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
17407 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
17408 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
17409 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
17410 ALTIVEC_BUILTIN_LVXL_V2DF);
17411 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
17412 ALTIVEC_BUILTIN_LVXL_V2DI);
17413 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
17414 ALTIVEC_BUILTIN_LVXL_V4SF);
17415 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
17416 ALTIVEC_BUILTIN_LVXL_V4SI);
17417 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
17418 ALTIVEC_BUILTIN_LVXL_V8HI);
17419 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
17420 ALTIVEC_BUILTIN_LVXL_V16QI);
17421 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
17422 def_builtin ("__builtin_altivec_lvx_v1ti", v1ti_ftype_long_pcvoid,
17423 ALTIVEC_BUILTIN_LVX_V1TI);
17424 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
17425 ALTIVEC_BUILTIN_LVX_V2DF);
17426 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
17427 ALTIVEC_BUILTIN_LVX_V2DI);
17428 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
17429 ALTIVEC_BUILTIN_LVX_V4SF);
17430 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
17431 ALTIVEC_BUILTIN_LVX_V4SI);
17432 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
17433 ALTIVEC_BUILTIN_LVX_V8HI);
17434 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
17435 ALTIVEC_BUILTIN_LVX_V16QI);
17436 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
17437 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
17438 ALTIVEC_BUILTIN_STVX_V2DF);
17439 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
17440 ALTIVEC_BUILTIN_STVX_V2DI);
17441 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
17442 ALTIVEC_BUILTIN_STVX_V4SF);
17443 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
17444 ALTIVEC_BUILTIN_STVX_V4SI);
17445 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
17446 ALTIVEC_BUILTIN_STVX_V8HI);
17447 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
17448 ALTIVEC_BUILTIN_STVX_V16QI);
17449 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
17450 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
17451 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
17452 ALTIVEC_BUILTIN_STVXL_V2DF);
17453 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
17454 ALTIVEC_BUILTIN_STVXL_V2DI);
17455 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
17456 ALTIVEC_BUILTIN_STVXL_V4SF);
17457 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
17458 ALTIVEC_BUILTIN_STVXL_V4SI);
17459 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
17460 ALTIVEC_BUILTIN_STVXL_V8HI);
17461 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
17462 ALTIVEC_BUILTIN_STVXL_V16QI);
17463 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
17464 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
17465 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
17466 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
17467 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
17468 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
17469 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
17470 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
17471 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
17472 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
17473 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
17474 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
17475 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
17476 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
17477 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
17478 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
17480 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
17481 VSX_BUILTIN_LXVD2X_V2DF);
17482 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
17483 VSX_BUILTIN_LXVD2X_V2DI);
17484 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
17485 VSX_BUILTIN_LXVW4X_V4SF);
17486 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
17487 VSX_BUILTIN_LXVW4X_V4SI);
17488 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
17489 VSX_BUILTIN_LXVW4X_V8HI);
17490 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
17491 VSX_BUILTIN_LXVW4X_V16QI);
17492 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
17493 VSX_BUILTIN_STXVD2X_V2DF);
17494 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
17495 VSX_BUILTIN_STXVD2X_V2DI);
17496 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
17497 VSX_BUILTIN_STXVW4X_V4SF);
17498 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
17499 VSX_BUILTIN_STXVW4X_V4SI);
17500 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
17501 VSX_BUILTIN_STXVW4X_V8HI);
17502 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
17503 VSX_BUILTIN_STXVW4X_V16QI);
17505 def_builtin ("__builtin_vsx_ld_elemrev_v2df", v2df_ftype_long_pcvoid,
17506 VSX_BUILTIN_LD_ELEMREV_V2DF);
17507 def_builtin ("__builtin_vsx_ld_elemrev_v2di", v2di_ftype_long_pcvoid,
17508 VSX_BUILTIN_LD_ELEMREV_V2DI);
17509 def_builtin ("__builtin_vsx_ld_elemrev_v4sf", v4sf_ftype_long_pcvoid,
17510 VSX_BUILTIN_LD_ELEMREV_V4SF);
17511 def_builtin ("__builtin_vsx_ld_elemrev_v4si", v4si_ftype_long_pcvoid,
17512 VSX_BUILTIN_LD_ELEMREV_V4SI);
17513 def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid,
17514 VSX_BUILTIN_LD_ELEMREV_V8HI);
17515 def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid,
17516 VSX_BUILTIN_LD_ELEMREV_V16QI);
17517 def_builtin ("__builtin_vsx_st_elemrev_v2df", void_ftype_v2df_long_pvoid,
17518 VSX_BUILTIN_ST_ELEMREV_V2DF);
17519 def_builtin ("__builtin_vsx_st_elemrev_v1ti", void_ftype_v1ti_long_pvoid,
17520 VSX_BUILTIN_ST_ELEMREV_V1TI);
17521 def_builtin ("__builtin_vsx_st_elemrev_v2di", void_ftype_v2di_long_pvoid,
17522 VSX_BUILTIN_ST_ELEMREV_V2DI);
17523 def_builtin ("__builtin_vsx_st_elemrev_v4sf", void_ftype_v4sf_long_pvoid,
17524 VSX_BUILTIN_ST_ELEMREV_V4SF);
17525 def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid,
17526 VSX_BUILTIN_ST_ELEMREV_V4SI);
17527 def_builtin ("__builtin_vsx_st_elemrev_v8hi", void_ftype_v8hi_long_pvoid,
17528 VSX_BUILTIN_ST_ELEMREV_V8HI);
17529 def_builtin ("__builtin_vsx_st_elemrev_v16qi", void_ftype_v16qi_long_pvoid,
17530 VSX_BUILTIN_ST_ELEMREV_V16QI);
17532 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
17533 VSX_BUILTIN_VEC_LD);
17534 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
17535 VSX_BUILTIN_VEC_ST);
17536 def_builtin ("__builtin_vec_xl", opaque_ftype_long_pcvoid,
17537 VSX_BUILTIN_VEC_XL);
17538 def_builtin ("__builtin_vec_xl_be", opaque_ftype_long_pcvoid,
17539 VSX_BUILTIN_VEC_XL_BE);
17540 def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid,
17541 VSX_BUILTIN_VEC_XST);
17542 def_builtin ("__builtin_vec_xst_be", void_ftype_opaque_long_pvoid,
17543 VSX_BUILTIN_VEC_XST_BE);
17545 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
17546 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
17547 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
17549 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
17550 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
17551 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
17552 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
17553 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
17554 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
17555 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
17556 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
17557 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
17558 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
17559 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
17560 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
17562 def_builtin ("__builtin_vec_adde", opaque_ftype_opaque_opaque_opaque,
17563 ALTIVEC_BUILTIN_VEC_ADDE);
17564 def_builtin ("__builtin_vec_addec", opaque_ftype_opaque_opaque_opaque,
17565 ALTIVEC_BUILTIN_VEC_ADDEC);
17566 def_builtin ("__builtin_vec_cmpne", opaque_ftype_opaque_opaque,
17567 ALTIVEC_BUILTIN_VEC_CMPNE);
17568 def_builtin ("__builtin_vec_mul", opaque_ftype_opaque_opaque,
17569 ALTIVEC_BUILTIN_VEC_MUL);
17570 def_builtin ("__builtin_vec_sube", opaque_ftype_opaque_opaque_opaque,
17571 ALTIVEC_BUILTIN_VEC_SUBE);
17572 def_builtin ("__builtin_vec_subec", opaque_ftype_opaque_opaque_opaque,
17573 ALTIVEC_BUILTIN_VEC_SUBEC);
17575 /* Cell builtins. */
17576 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
17577 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
17578 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
17579 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
17581 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
17582 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
17583 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
17584 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
17586 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
17587 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
17588 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
17589 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
17591 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
17592 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
17593 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
17594 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
17596 if (TARGET_P9_VECTOR)
17598 def_builtin ("__builtin_altivec_stxvl", void_ftype_v16qi_pvoid_long,
17599 P9V_BUILTIN_STXVL);
17600 def_builtin ("__builtin_xst_len_r", void_ftype_v16qi_pvoid_long,
17601 P9V_BUILTIN_XST_LEN_R);
17604 /* Add the DST variants. */
17605 d = bdesc_dst;
17606 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
17608 HOST_WIDE_INT mask = d->mask;
17610 /* It is expected that these dst built-in functions may have
17611 d->icode equal to CODE_FOR_nothing. */
17612 if ((mask & builtin_mask) != mask)
17614 if (TARGET_DEBUG_BUILTIN)
17615 fprintf (stderr, "altivec_init_builtins, skip dst %s\n",
17616 d->name);
17617 continue;
17619 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
17622 /* Initialize the predicates. */
17623 d = bdesc_altivec_preds;
17624 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
17626 machine_mode mode1;
17627 tree type;
17628 HOST_WIDE_INT mask = d->mask;
17630 if ((mask & builtin_mask) != mask)
17632 if (TARGET_DEBUG_BUILTIN)
17633 fprintf (stderr, "altivec_init_builtins, skip predicate %s\n",
17634 d->name);
17635 continue;
17638 if (rs6000_overloaded_builtin_p (d->code))
17639 mode1 = VOIDmode;
17640 else
17642 /* Cannot define builtin if the instruction is disabled. */
17643 gcc_assert (d->icode != CODE_FOR_nothing);
17644 mode1 = insn_data[d->icode].operand[1].mode;
17647 switch (mode1)
17649 case E_VOIDmode:
17650 type = int_ftype_int_opaque_opaque;
17651 break;
17652 case E_V2DImode:
17653 type = int_ftype_int_v2di_v2di;
17654 break;
17655 case E_V4SImode:
17656 type = int_ftype_int_v4si_v4si;
17657 break;
17658 case E_V8HImode:
17659 type = int_ftype_int_v8hi_v8hi;
17660 break;
17661 case E_V16QImode:
17662 type = int_ftype_int_v16qi_v16qi;
17663 break;
17664 case E_V4SFmode:
17665 type = int_ftype_int_v4sf_v4sf;
17666 break;
17667 case E_V2DFmode:
17668 type = int_ftype_int_v2df_v2df;
17669 break;
17670 default:
17671 gcc_unreachable ();
17674 def_builtin (d->name, type, d->code);
17677 /* Initialize the abs* operators. */
17678 d = bdesc_abs;
17679 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
17681 machine_mode mode0;
17682 tree type;
17683 HOST_WIDE_INT mask = d->mask;
17685 if ((mask & builtin_mask) != mask)
17687 if (TARGET_DEBUG_BUILTIN)
17688 fprintf (stderr, "altivec_init_builtins, skip abs %s\n",
17689 d->name);
17690 continue;
17693 /* Cannot define builtin if the instruction is disabled. */
17694 gcc_assert (d->icode != CODE_FOR_nothing);
17695 mode0 = insn_data[d->icode].operand[0].mode;
17697 switch (mode0)
17699 case E_V2DImode:
17700 type = v2di_ftype_v2di;
17701 break;
17702 case E_V4SImode:
17703 type = v4si_ftype_v4si;
17704 break;
17705 case E_V8HImode:
17706 type = v8hi_ftype_v8hi;
17707 break;
17708 case E_V16QImode:
17709 type = v16qi_ftype_v16qi;
17710 break;
17711 case E_V4SFmode:
17712 type = v4sf_ftype_v4sf;
17713 break;
17714 case E_V2DFmode:
17715 type = v2df_ftype_v2df;
17716 break;
17717 default:
17718 gcc_unreachable ();
17721 def_builtin (d->name, type, d->code);
17724 /* Initialize target builtin that implements
17725 targetm.vectorize.builtin_mask_for_load. */
17727 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
17728 v16qi_ftype_long_pcvoid,
17729 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
17730 BUILT_IN_MD, NULL, NULL_TREE);
17731 TREE_READONLY (decl) = 1;
17732 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
17733 altivec_builtin_mask_for_load = decl;
17735 /* Access to the vec_init patterns. */
17736 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
17737 integer_type_node, integer_type_node,
17738 integer_type_node, NULL_TREE);
17739 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
17741 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
17742 short_integer_type_node,
17743 short_integer_type_node,
17744 short_integer_type_node,
17745 short_integer_type_node,
17746 short_integer_type_node,
17747 short_integer_type_node,
17748 short_integer_type_node, NULL_TREE);
17749 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
17751 ftype = build_function_type_list (V16QI_type_node, char_type_node,
17752 char_type_node, char_type_node,
17753 char_type_node, char_type_node,
17754 char_type_node, char_type_node,
17755 char_type_node, char_type_node,
17756 char_type_node, char_type_node,
17757 char_type_node, char_type_node,
17758 char_type_node, char_type_node,
17759 char_type_node, NULL_TREE);
17760 def_builtin ("__builtin_vec_init_v16qi", ftype,
17761 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
17763 ftype = build_function_type_list (V4SF_type_node, float_type_node,
17764 float_type_node, float_type_node,
17765 float_type_node, NULL_TREE);
17766 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
17768 /* VSX builtins. */
17769 ftype = build_function_type_list (V2DF_type_node, double_type_node,
17770 double_type_node, NULL_TREE);
17771 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
17773 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
17774 intDI_type_node, NULL_TREE);
17775 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
17777 /* Access to the vec_set patterns. */
17778 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
17779 intSI_type_node,
17780 integer_type_node, NULL_TREE);
17781 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
17783 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
17784 intHI_type_node,
17785 integer_type_node, NULL_TREE);
17786 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
17788 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
17789 intQI_type_node,
17790 integer_type_node, NULL_TREE);
17791 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
17793 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
17794 float_type_node,
17795 integer_type_node, NULL_TREE);
17796 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
17798 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
17799 double_type_node,
17800 integer_type_node, NULL_TREE);
17801 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
17803 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
17804 intDI_type_node,
17805 integer_type_node, NULL_TREE);
17806 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
17808 /* Access to the vec_extract patterns. */
17809 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
17810 integer_type_node, NULL_TREE);
17811 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
17813 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
17814 integer_type_node, NULL_TREE);
17815 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
17817 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
17818 integer_type_node, NULL_TREE);
17819 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
17821 ftype = build_function_type_list (float_type_node, V4SF_type_node,
17822 integer_type_node, NULL_TREE);
17823 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
17825 ftype = build_function_type_list (double_type_node, V2DF_type_node,
17826 integer_type_node, NULL_TREE);
17827 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
17829 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
17830 integer_type_node, NULL_TREE);
17831 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
17834 if (V1TI_type_node)
17836 tree v1ti_ftype_long_pcvoid
17837 = build_function_type_list (V1TI_type_node,
17838 long_integer_type_node, pcvoid_type_node,
17839 NULL_TREE);
17840 tree void_ftype_v1ti_long_pvoid
17841 = build_function_type_list (void_type_node,
17842 V1TI_type_node, long_integer_type_node,
17843 pvoid_type_node, NULL_TREE);
17844 def_builtin ("__builtin_vsx_ld_elemrev_v1ti", v1ti_ftype_long_pcvoid,
17845 VSX_BUILTIN_LD_ELEMREV_V1TI);
17846 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
17847 VSX_BUILTIN_LXVD2X_V1TI);
17848 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
17849 VSX_BUILTIN_STXVD2X_V1TI);
17850 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
17851 NULL_TREE, NULL_TREE);
17852 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
17853 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
17854 intTI_type_node,
17855 integer_type_node, NULL_TREE);
17856 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
17857 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
17858 integer_type_node, NULL_TREE);
17859 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
17864 static void
17865 htm_init_builtins (void)
17867 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17868 const struct builtin_description *d;
17869 size_t i;
17871 d = bdesc_htm;
17872 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
17874 tree op[MAX_HTM_OPERANDS], type;
17875 HOST_WIDE_INT mask = d->mask;
17876 unsigned attr = rs6000_builtin_info[d->code].attr;
17877 bool void_func = (attr & RS6000_BTC_VOID);
17878 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
17879 int nopnds = 0;
17880 tree gpr_type_node;
17881 tree rettype;
17882 tree argtype;
17884 /* It is expected that these htm built-in functions may have
17885 d->icode equal to CODE_FOR_nothing. */
17887 if (TARGET_32BIT && TARGET_POWERPC64)
17888 gpr_type_node = long_long_unsigned_type_node;
17889 else
17890 gpr_type_node = long_unsigned_type_node;
17892 if (attr & RS6000_BTC_SPR)
17894 rettype = gpr_type_node;
17895 argtype = gpr_type_node;
17897 else if (d->code == HTM_BUILTIN_TABORTDC
17898 || d->code == HTM_BUILTIN_TABORTDCI)
17900 rettype = unsigned_type_node;
17901 argtype = gpr_type_node;
17903 else
17905 rettype = unsigned_type_node;
17906 argtype = unsigned_type_node;
17909 if ((mask & builtin_mask) != mask)
17911 if (TARGET_DEBUG_BUILTIN)
17912 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
17913 continue;
17916 if (d->name == 0)
17918 if (TARGET_DEBUG_BUILTIN)
17919 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
17920 (long unsigned) i);
17921 continue;
17924 op[nopnds++] = (void_func) ? void_type_node : rettype;
17926 if (attr_args == RS6000_BTC_UNARY)
17927 op[nopnds++] = argtype;
17928 else if (attr_args == RS6000_BTC_BINARY)
17930 op[nopnds++] = argtype;
17931 op[nopnds++] = argtype;
17933 else if (attr_args == RS6000_BTC_TERNARY)
17935 op[nopnds++] = argtype;
17936 op[nopnds++] = argtype;
17937 op[nopnds++] = argtype;
17940 switch (nopnds)
17942 case 1:
17943 type = build_function_type_list (op[0], NULL_TREE);
17944 break;
17945 case 2:
17946 type = build_function_type_list (op[0], op[1], NULL_TREE);
17947 break;
17948 case 3:
17949 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
17950 break;
17951 case 4:
17952 type = build_function_type_list (op[0], op[1], op[2], op[3],
17953 NULL_TREE);
17954 break;
17955 default:
17956 gcc_unreachable ();
17959 def_builtin (d->name, type, d->code);
17963 /* Hash function for builtin functions with up to 3 arguments and a return
17964 type. */
17965 hashval_t
17966 builtin_hasher::hash (builtin_hash_struct *bh)
17968 unsigned ret = 0;
17969 int i;
17971 for (i = 0; i < 4; i++)
17973 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
17974 ret = (ret * 2) + bh->uns_p[i];
17977 return ret;
17980 /* Compare builtin hash entries H1 and H2 for equivalence. */
17981 bool
17982 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
17984 return ((p1->mode[0] == p2->mode[0])
17985 && (p1->mode[1] == p2->mode[1])
17986 && (p1->mode[2] == p2->mode[2])
17987 && (p1->mode[3] == p2->mode[3])
17988 && (p1->uns_p[0] == p2->uns_p[0])
17989 && (p1->uns_p[1] == p2->uns_p[1])
17990 && (p1->uns_p[2] == p2->uns_p[2])
17991 && (p1->uns_p[3] == p2->uns_p[3]));
17994 /* Map types for builtin functions with an explicit return type and up to 3
17995 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
17996 of the argument. */
17997 static tree
17998 builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
17999 machine_mode mode_arg1, machine_mode mode_arg2,
18000 enum rs6000_builtins builtin, const char *name)
18002 struct builtin_hash_struct h;
18003 struct builtin_hash_struct *h2;
18004 int num_args = 3;
18005 int i;
18006 tree ret_type = NULL_TREE;
18007 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
18009 /* Create builtin_hash_table. */
18010 if (builtin_hash_table == NULL)
18011 builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
18013 h.type = NULL_TREE;
18014 h.mode[0] = mode_ret;
18015 h.mode[1] = mode_arg0;
18016 h.mode[2] = mode_arg1;
18017 h.mode[3] = mode_arg2;
18018 h.uns_p[0] = 0;
18019 h.uns_p[1] = 0;
18020 h.uns_p[2] = 0;
18021 h.uns_p[3] = 0;
18023 /* If the builtin is a type that produces unsigned results or takes unsigned
18024 arguments, and it is returned as a decl for the vectorizer (such as
18025 widening multiplies, permute), make sure the arguments and return value
18026 are type correct. */
18027 switch (builtin)
18029 /* unsigned 1 argument functions. */
18030 case CRYPTO_BUILTIN_VSBOX:
18031 case P8V_BUILTIN_VGBBD:
18032 case MISC_BUILTIN_CDTBCD:
18033 case MISC_BUILTIN_CBCDTD:
18034 h.uns_p[0] = 1;
18035 h.uns_p[1] = 1;
18036 break;
18038 /* unsigned 2 argument functions. */
18039 case ALTIVEC_BUILTIN_VMULEUB:
18040 case ALTIVEC_BUILTIN_VMULEUH:
18041 case P8V_BUILTIN_VMULEUW:
18042 case ALTIVEC_BUILTIN_VMULOUB:
18043 case ALTIVEC_BUILTIN_VMULOUH:
18044 case P8V_BUILTIN_VMULOUW:
18045 case CRYPTO_BUILTIN_VCIPHER:
18046 case CRYPTO_BUILTIN_VCIPHERLAST:
18047 case CRYPTO_BUILTIN_VNCIPHER:
18048 case CRYPTO_BUILTIN_VNCIPHERLAST:
18049 case CRYPTO_BUILTIN_VPMSUMB:
18050 case CRYPTO_BUILTIN_VPMSUMH:
18051 case CRYPTO_BUILTIN_VPMSUMW:
18052 case CRYPTO_BUILTIN_VPMSUMD:
18053 case CRYPTO_BUILTIN_VPMSUM:
18054 case MISC_BUILTIN_ADDG6S:
18055 case MISC_BUILTIN_DIVWEU:
18056 case MISC_BUILTIN_DIVWEUO:
18057 case MISC_BUILTIN_DIVDEU:
18058 case MISC_BUILTIN_DIVDEUO:
18059 case VSX_BUILTIN_UDIV_V2DI:
18060 case ALTIVEC_BUILTIN_VMAXUB:
18061 case ALTIVEC_BUILTIN_VMINUB:
18062 case ALTIVEC_BUILTIN_VMAXUH:
18063 case ALTIVEC_BUILTIN_VMINUH:
18064 case ALTIVEC_BUILTIN_VMAXUW:
18065 case ALTIVEC_BUILTIN_VMINUW:
18066 case P8V_BUILTIN_VMAXUD:
18067 case P8V_BUILTIN_VMINUD:
18068 h.uns_p[0] = 1;
18069 h.uns_p[1] = 1;
18070 h.uns_p[2] = 1;
18071 break;
18073 /* unsigned 3 argument functions. */
18074 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
18075 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
18076 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
18077 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
18078 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
18079 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
18080 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
18081 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
18082 case VSX_BUILTIN_VPERM_16QI_UNS:
18083 case VSX_BUILTIN_VPERM_8HI_UNS:
18084 case VSX_BUILTIN_VPERM_4SI_UNS:
18085 case VSX_BUILTIN_VPERM_2DI_UNS:
18086 case VSX_BUILTIN_XXSEL_16QI_UNS:
18087 case VSX_BUILTIN_XXSEL_8HI_UNS:
18088 case VSX_BUILTIN_XXSEL_4SI_UNS:
18089 case VSX_BUILTIN_XXSEL_2DI_UNS:
18090 case CRYPTO_BUILTIN_VPERMXOR:
18091 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
18092 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
18093 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
18094 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
18095 case CRYPTO_BUILTIN_VSHASIGMAW:
18096 case CRYPTO_BUILTIN_VSHASIGMAD:
18097 case CRYPTO_BUILTIN_VSHASIGMA:
18098 h.uns_p[0] = 1;
18099 h.uns_p[1] = 1;
18100 h.uns_p[2] = 1;
18101 h.uns_p[3] = 1;
18102 break;
18104 /* signed permute functions with unsigned char mask. */
18105 case ALTIVEC_BUILTIN_VPERM_16QI:
18106 case ALTIVEC_BUILTIN_VPERM_8HI:
18107 case ALTIVEC_BUILTIN_VPERM_4SI:
18108 case ALTIVEC_BUILTIN_VPERM_4SF:
18109 case ALTIVEC_BUILTIN_VPERM_2DI:
18110 case ALTIVEC_BUILTIN_VPERM_2DF:
18111 case VSX_BUILTIN_VPERM_16QI:
18112 case VSX_BUILTIN_VPERM_8HI:
18113 case VSX_BUILTIN_VPERM_4SI:
18114 case VSX_BUILTIN_VPERM_4SF:
18115 case VSX_BUILTIN_VPERM_2DI:
18116 case VSX_BUILTIN_VPERM_2DF:
18117 h.uns_p[3] = 1;
18118 break;
18120 /* unsigned args, signed return. */
18121 case VSX_BUILTIN_XVCVUXDSP:
18122 case VSX_BUILTIN_XVCVUXDDP_UNS:
18123 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
18124 h.uns_p[1] = 1;
18125 break;
18127 /* signed args, unsigned return. */
18128 case VSX_BUILTIN_XVCVDPUXDS_UNS:
18129 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
18130 case MISC_BUILTIN_UNPACK_TD:
18131 case MISC_BUILTIN_UNPACK_V1TI:
18132 h.uns_p[0] = 1;
18133 break;
18135 /* unsigned arguments, bool return (compares). */
18136 case ALTIVEC_BUILTIN_VCMPEQUB:
18137 case ALTIVEC_BUILTIN_VCMPEQUH:
18138 case ALTIVEC_BUILTIN_VCMPEQUW:
18139 case P8V_BUILTIN_VCMPEQUD:
18140 case VSX_BUILTIN_CMPGE_U16QI:
18141 case VSX_BUILTIN_CMPGE_U8HI:
18142 case VSX_BUILTIN_CMPGE_U4SI:
18143 case VSX_BUILTIN_CMPGE_U2DI:
18144 case ALTIVEC_BUILTIN_VCMPGTUB:
18145 case ALTIVEC_BUILTIN_VCMPGTUH:
18146 case ALTIVEC_BUILTIN_VCMPGTUW:
18147 case P8V_BUILTIN_VCMPGTUD:
18148 h.uns_p[1] = 1;
18149 h.uns_p[2] = 1;
18150 break;
18152 /* unsigned arguments for 128-bit pack instructions. */
18153 case MISC_BUILTIN_PACK_TD:
18154 case MISC_BUILTIN_PACK_V1TI:
18155 h.uns_p[1] = 1;
18156 h.uns_p[2] = 1;
18157 break;
18159 /* unsigned second arguments (vector shift right). */
18160 case ALTIVEC_BUILTIN_VSRB:
18161 case ALTIVEC_BUILTIN_VSRH:
18162 case ALTIVEC_BUILTIN_VSRW:
18163 case P8V_BUILTIN_VSRD:
18164 h.uns_p[2] = 1;
18165 break;
18167 default:
18168 break;
18171 /* Figure out how many args are present. */
18172 while (num_args > 0 && h.mode[num_args] == VOIDmode)
18173 num_args--;
18175 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
18176 if (!ret_type && h.uns_p[0])
18177 ret_type = builtin_mode_to_type[h.mode[0]][0];
18179 if (!ret_type)
18180 fatal_error (input_location,
18181 "internal error: builtin function %qs had an unexpected "
18182 "return type %qs", name, GET_MODE_NAME (h.mode[0]));
18184 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
18185 arg_type[i] = NULL_TREE;
18187 for (i = 0; i < num_args; i++)
18189 int m = (int) h.mode[i+1];
18190 int uns_p = h.uns_p[i+1];
18192 arg_type[i] = builtin_mode_to_type[m][uns_p];
18193 if (!arg_type[i] && uns_p)
18194 arg_type[i] = builtin_mode_to_type[m][0];
18196 if (!arg_type[i])
18197 fatal_error (input_location,
18198 "internal error: builtin function %qs, argument %d "
18199 "had unexpected argument type %qs", name, i,
18200 GET_MODE_NAME (m));
18203 builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
18204 if (*found == NULL)
18206 h2 = ggc_alloc<builtin_hash_struct> ();
18207 *h2 = h;
18208 *found = h2;
18210 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
18211 arg_type[2], NULL_TREE);
18214 return (*found)->type;
18217 static void
18218 rs6000_common_init_builtins (void)
18220 const struct builtin_description *d;
18221 size_t i;
18223 tree opaque_ftype_opaque = NULL_TREE;
18224 tree opaque_ftype_opaque_opaque = NULL_TREE;
18225 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
18226 tree v2si_ftype = NULL_TREE;
18227 tree v2si_ftype_qi = NULL_TREE;
18228 tree v2si_ftype_v2si_qi = NULL_TREE;
18229 tree v2si_ftype_int_qi = NULL_TREE;
18230 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
18232 if (!TARGET_PAIRED_FLOAT)
18234 builtin_mode_to_type[V2SImode][0] = opaque_V2SI_type_node;
18235 builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node;
18238 /* Paired builtins are only available if you build a compiler with the
18239 appropriate options, so only create those builtins with the appropriate
18240 compiler option. Create Altivec and VSX builtins on machines with at
18241 least the general purpose extensions (970 and newer) to allow the use of
18242 the target attribute.. */
18244 if (TARGET_EXTRA_BUILTINS)
18245 builtin_mask |= RS6000_BTM_COMMON;
18247 /* Add the ternary operators. */
18248 d = bdesc_3arg;
18249 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
18251 tree type;
18252 HOST_WIDE_INT mask = d->mask;
18254 if ((mask & builtin_mask) != mask)
18256 if (TARGET_DEBUG_BUILTIN)
18257 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
18258 continue;
18261 if (rs6000_overloaded_builtin_p (d->code))
18263 if (! (type = opaque_ftype_opaque_opaque_opaque))
18264 type = opaque_ftype_opaque_opaque_opaque
18265 = build_function_type_list (opaque_V4SI_type_node,
18266 opaque_V4SI_type_node,
18267 opaque_V4SI_type_node,
18268 opaque_V4SI_type_node,
18269 NULL_TREE);
18271 else
18273 enum insn_code icode = d->icode;
18274 if (d->name == 0)
18276 if (TARGET_DEBUG_BUILTIN)
18277 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
18278 (long unsigned)i);
18280 continue;
18283 if (icode == CODE_FOR_nothing)
18285 if (TARGET_DEBUG_BUILTIN)
18286 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
18287 d->name);
18289 continue;
18292 type = builtin_function_type (insn_data[icode].operand[0].mode,
18293 insn_data[icode].operand[1].mode,
18294 insn_data[icode].operand[2].mode,
18295 insn_data[icode].operand[3].mode,
18296 d->code, d->name);
18299 def_builtin (d->name, type, d->code);
18302 /* Add the binary operators. */
18303 d = bdesc_2arg;
18304 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18306 machine_mode mode0, mode1, mode2;
18307 tree type;
18308 HOST_WIDE_INT mask = d->mask;
18310 if ((mask & builtin_mask) != mask)
18312 if (TARGET_DEBUG_BUILTIN)
18313 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
18314 continue;
18317 if (rs6000_overloaded_builtin_p (d->code))
18319 if (! (type = opaque_ftype_opaque_opaque))
18320 type = opaque_ftype_opaque_opaque
18321 = build_function_type_list (opaque_V4SI_type_node,
18322 opaque_V4SI_type_node,
18323 opaque_V4SI_type_node,
18324 NULL_TREE);
18326 else
18328 enum insn_code icode = d->icode;
18329 if (d->name == 0)
18331 if (TARGET_DEBUG_BUILTIN)
18332 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
18333 (long unsigned)i);
18335 continue;
18338 if (icode == CODE_FOR_nothing)
18340 if (TARGET_DEBUG_BUILTIN)
18341 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
18342 d->name);
18344 continue;
18347 mode0 = insn_data[icode].operand[0].mode;
18348 mode1 = insn_data[icode].operand[1].mode;
18349 mode2 = insn_data[icode].operand[2].mode;
18351 if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode)
18353 if (! (type = v2si_ftype_v2si_qi))
18354 type = v2si_ftype_v2si_qi
18355 = build_function_type_list (opaque_V2SI_type_node,
18356 opaque_V2SI_type_node,
18357 char_type_node,
18358 NULL_TREE);
18361 else if (mode0 == V2SImode && GET_MODE_CLASS (mode1) == MODE_INT
18362 && mode2 == QImode)
18364 if (! (type = v2si_ftype_int_qi))
18365 type = v2si_ftype_int_qi
18366 = build_function_type_list (opaque_V2SI_type_node,
18367 integer_type_node,
18368 char_type_node,
18369 NULL_TREE);
18372 else
18373 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
18374 d->code, d->name);
18377 def_builtin (d->name, type, d->code);
18380 /* Add the simple unary operators. */
18381 d = bdesc_1arg;
18382 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18384 machine_mode mode0, mode1;
18385 tree type;
18386 HOST_WIDE_INT mask = d->mask;
18388 if ((mask & builtin_mask) != mask)
18390 if (TARGET_DEBUG_BUILTIN)
18391 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
18392 continue;
18395 if (rs6000_overloaded_builtin_p (d->code))
18397 if (! (type = opaque_ftype_opaque))
18398 type = opaque_ftype_opaque
18399 = build_function_type_list (opaque_V4SI_type_node,
18400 opaque_V4SI_type_node,
18401 NULL_TREE);
18403 else
18405 enum insn_code icode = d->icode;
18406 if (d->name == 0)
18408 if (TARGET_DEBUG_BUILTIN)
18409 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
18410 (long unsigned)i);
18412 continue;
18415 if (icode == CODE_FOR_nothing)
18417 if (TARGET_DEBUG_BUILTIN)
18418 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
18419 d->name);
18421 continue;
18424 mode0 = insn_data[icode].operand[0].mode;
18425 mode1 = insn_data[icode].operand[1].mode;
18427 if (mode0 == V2SImode && mode1 == QImode)
18429 if (! (type = v2si_ftype_qi))
18430 type = v2si_ftype_qi
18431 = build_function_type_list (opaque_V2SI_type_node,
18432 char_type_node,
18433 NULL_TREE);
18436 else
18437 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
18438 d->code, d->name);
18441 def_builtin (d->name, type, d->code);
18444 /* Add the simple no-argument operators. */
18445 d = bdesc_0arg;
18446 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
18448 machine_mode mode0;
18449 tree type;
18450 HOST_WIDE_INT mask = d->mask;
18452 if ((mask & builtin_mask) != mask)
18454 if (TARGET_DEBUG_BUILTIN)
18455 fprintf (stderr, "rs6000_builtin, skip no-argument %s\n", d->name);
18456 continue;
18458 if (rs6000_overloaded_builtin_p (d->code))
18460 if (!opaque_ftype_opaque)
18461 opaque_ftype_opaque
18462 = build_function_type_list (opaque_V4SI_type_node, NULL_TREE);
18463 type = opaque_ftype_opaque;
18465 else
18467 enum insn_code icode = d->icode;
18468 if (d->name == 0)
18470 if (TARGET_DEBUG_BUILTIN)
18471 fprintf (stderr, "rs6000_builtin, bdesc_0arg[%lu] no name\n",
18472 (long unsigned) i);
18473 continue;
18475 if (icode == CODE_FOR_nothing)
18477 if (TARGET_DEBUG_BUILTIN)
18478 fprintf (stderr,
18479 "rs6000_builtin, skip no-argument %s (no code)\n",
18480 d->name);
18481 continue;
18483 mode0 = insn_data[icode].operand[0].mode;
18484 if (mode0 == V2SImode)
18486 /* code for paired single */
18487 if (! (type = v2si_ftype))
18489 v2si_ftype
18490 = build_function_type_list (opaque_V2SI_type_node,
18491 NULL_TREE);
18492 type = v2si_ftype;
18495 else
18496 type = builtin_function_type (mode0, VOIDmode, VOIDmode, VOIDmode,
18497 d->code, d->name);
18499 def_builtin (d->name, type, d->code);
18503 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
18504 static void
18505 init_float128_ibm (machine_mode mode)
18507 if (!TARGET_XL_COMPAT)
18509 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
18510 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
18511 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
18512 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
18514 if (!TARGET_HARD_FLOAT)
18516 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
18517 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
18518 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
18519 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
18520 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
18521 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
18522 set_optab_libfunc (le_optab, mode, "__gcc_qle");
18523 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
18525 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
18526 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
18527 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
18528 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
18529 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
18530 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
18531 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
18532 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
18535 else
18537 set_optab_libfunc (add_optab, mode, "_xlqadd");
18538 set_optab_libfunc (sub_optab, mode, "_xlqsub");
18539 set_optab_libfunc (smul_optab, mode, "_xlqmul");
18540 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
18543 /* Add various conversions for IFmode to use the traditional TFmode
18544 names. */
18545 if (mode == IFmode)
18547 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf2");
18548 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf2");
18549 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctftd2");
18550 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd2");
18551 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd2");
18552 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdtf2");
18554 if (TARGET_POWERPC64)
18556 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
18557 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
18558 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
18559 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
18564 /* Create a decl for either complex long double multiply or complex long double
18565 divide when long double is IEEE 128-bit floating point. We can't use
18566 __multc3 and __divtc3 because the original long double using IBM extended
18567 double used those names. The complex multiply/divide functions are encoded
18568 as builtin functions with a complex result and 4 scalar inputs. */
18570 static void
18571 create_complex_muldiv (const char *name, built_in_function fncode, tree fntype)
18573 tree fndecl = add_builtin_function (name, fntype, fncode, BUILT_IN_NORMAL,
18574 name, NULL_TREE);
18576 set_builtin_decl (fncode, fndecl, true);
18578 if (TARGET_DEBUG_BUILTIN)
18579 fprintf (stderr, "create complex %s, fncode: %d\n", name, (int) fncode);
18581 return;
18584 /* Set up IEEE 128-bit floating point routines. Use different names if the
18585 arguments can be passed in a vector register. The historical PowerPC
18586 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
18587 continue to use that if we aren't using vector registers to pass IEEE
18588 128-bit floating point. */
18590 static void
18591 init_float128_ieee (machine_mode mode)
18593 if (FLOAT128_VECTOR_P (mode))
18595 /* Set up to call __mulkc3 and __divkc3 under -mabi=ieeelongdouble. */
18596 if (mode == TFmode && TARGET_IEEEQUAD)
18598 built_in_function fncode_mul =
18599 (built_in_function) (BUILT_IN_COMPLEX_MUL_MIN + TCmode
18600 - MIN_MODE_COMPLEX_FLOAT);
18601 built_in_function fncode_div =
18602 (built_in_function) (BUILT_IN_COMPLEX_DIV_MIN + TCmode
18603 - MIN_MODE_COMPLEX_FLOAT);
18605 tree fntype = build_function_type_list (complex_long_double_type_node,
18606 long_double_type_node,
18607 long_double_type_node,
18608 long_double_type_node,
18609 long_double_type_node,
18610 NULL_TREE);
18612 create_complex_muldiv ("__mulkc3", fncode_mul, fntype);
18613 create_complex_muldiv ("__divkc3", fncode_div, fntype);
18616 set_optab_libfunc (add_optab, mode, "__addkf3");
18617 set_optab_libfunc (sub_optab, mode, "__subkf3");
18618 set_optab_libfunc (neg_optab, mode, "__negkf2");
18619 set_optab_libfunc (smul_optab, mode, "__mulkf3");
18620 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
18621 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
18622 set_optab_libfunc (abs_optab, mode, "__abstkf2");
18624 set_optab_libfunc (eq_optab, mode, "__eqkf2");
18625 set_optab_libfunc (ne_optab, mode, "__nekf2");
18626 set_optab_libfunc (gt_optab, mode, "__gtkf2");
18627 set_optab_libfunc (ge_optab, mode, "__gekf2");
18628 set_optab_libfunc (lt_optab, mode, "__ltkf2");
18629 set_optab_libfunc (le_optab, mode, "__lekf2");
18630 set_optab_libfunc (unord_optab, mode, "__unordkf2");
18632 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
18633 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
18634 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
18635 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
18637 set_conv_libfunc (sext_optab, mode, IFmode, "__extendtfkf2");
18638 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
18639 set_conv_libfunc (sext_optab, mode, TFmode, "__extendtfkf2");
18641 set_conv_libfunc (trunc_optab, IFmode, mode, "__trunckftf2");
18642 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
18643 set_conv_libfunc (trunc_optab, TFmode, mode, "__trunckftf2");
18645 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf2");
18646 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf2");
18647 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunckftd2");
18648 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd2");
18649 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd2");
18650 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdkf2");
18652 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
18653 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
18654 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
18655 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
18657 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
18658 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
18659 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
18660 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
18662 if (TARGET_POWERPC64)
18664 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
18665 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
18666 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
18667 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
18671 else
18673 set_optab_libfunc (add_optab, mode, "_q_add");
18674 set_optab_libfunc (sub_optab, mode, "_q_sub");
18675 set_optab_libfunc (neg_optab, mode, "_q_neg");
18676 set_optab_libfunc (smul_optab, mode, "_q_mul");
18677 set_optab_libfunc (sdiv_optab, mode, "_q_div");
18678 if (TARGET_PPC_GPOPT)
18679 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
18681 set_optab_libfunc (eq_optab, mode, "_q_feq");
18682 set_optab_libfunc (ne_optab, mode, "_q_fne");
18683 set_optab_libfunc (gt_optab, mode, "_q_fgt");
18684 set_optab_libfunc (ge_optab, mode, "_q_fge");
18685 set_optab_libfunc (lt_optab, mode, "_q_flt");
18686 set_optab_libfunc (le_optab, mode, "_q_fle");
18688 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
18689 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
18690 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
18691 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
18692 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
18693 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
18694 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
18695 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
18699 static void
18700 rs6000_init_libfuncs (void)
18702 /* __float128 support. */
18703 if (TARGET_FLOAT128_TYPE)
18705 init_float128_ibm (IFmode);
18706 init_float128_ieee (KFmode);
18709 /* AIX/Darwin/64-bit Linux quad floating point routines. */
18710 if (TARGET_LONG_DOUBLE_128)
18712 if (!TARGET_IEEEQUAD)
18713 init_float128_ibm (TFmode);
18715 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
18716 else
18717 init_float128_ieee (TFmode);
18721 /* Emit a potentially record-form instruction, setting DST from SRC.
18722 If DOT is 0, that is all; otherwise, set CCREG to the result of the
18723 signed comparison of DST with zero. If DOT is 1, the generated RTL
18724 doesn't care about the DST result; if DOT is 2, it does. If CCREG
18725 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
18726 a separate COMPARE. */
18728 void
18729 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
18731 if (dot == 0)
18733 emit_move_insn (dst, src);
18734 return;
18737 if (cc_reg_not_cr0_operand (ccreg, CCmode))
18739 emit_move_insn (dst, src);
18740 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
18741 return;
18744 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
18745 if (dot == 1)
18747 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
18748 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
18750 else
18752 rtx set = gen_rtx_SET (dst, src);
18753 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
18758 /* A validation routine: say whether CODE, a condition code, and MODE
18759 match. The other alternatives either don't make sense or should
18760 never be generated. */
18762 void
18763 validate_condition_mode (enum rtx_code code, machine_mode mode)
18765 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
18766 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
18767 && GET_MODE_CLASS (mode) == MODE_CC);
18769 /* These don't make sense. */
18770 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
18771 || mode != CCUNSmode);
18773 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
18774 || mode == CCUNSmode);
18776 gcc_assert (mode == CCFPmode
18777 || (code != ORDERED && code != UNORDERED
18778 && code != UNEQ && code != LTGT
18779 && code != UNGT && code != UNLT
18780 && code != UNGE && code != UNLE));
18782 /* These should never be generated except for
18783 flag_finite_math_only. */
18784 gcc_assert (mode != CCFPmode
18785 || flag_finite_math_only
18786 || (code != LE && code != GE
18787 && code != UNEQ && code != LTGT
18788 && code != UNGT && code != UNLT));
18790 /* These are invalid; the information is not there. */
18791 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
18795 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
18796 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
18797 not zero, store there the bit offset (counted from the right) where
18798 the single stretch of 1 bits begins; and similarly for B, the bit
18799 offset where it ends. */
18801 bool
18802 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
18804 unsigned HOST_WIDE_INT val = INTVAL (mask);
18805 unsigned HOST_WIDE_INT bit;
18806 int nb, ne;
18807 int n = GET_MODE_PRECISION (mode);
18809 if (mode != DImode && mode != SImode)
18810 return false;
18812 if (INTVAL (mask) >= 0)
18814 bit = val & -val;
18815 ne = exact_log2 (bit);
18816 nb = exact_log2 (val + bit);
18818 else if (val + 1 == 0)
18820 nb = n;
18821 ne = 0;
18823 else if (val & 1)
18825 val = ~val;
18826 bit = val & -val;
18827 nb = exact_log2 (bit);
18828 ne = exact_log2 (val + bit);
18830 else
18832 bit = val & -val;
18833 ne = exact_log2 (bit);
18834 if (val + bit == 0)
18835 nb = n;
18836 else
18837 nb = 0;
18840 nb--;
18842 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
18843 return false;
18845 if (b)
18846 *b = nb;
18847 if (e)
18848 *e = ne;
18850 return true;
18853 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
18854 or rldicr instruction, to implement an AND with it in mode MODE. */
18856 bool
18857 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
18859 int nb, ne;
18861 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
18862 return false;
18864 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
18865 does not wrap. */
18866 if (mode == DImode)
18867 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
18869 /* For SImode, rlwinm can do everything. */
18870 if (mode == SImode)
18871 return (nb < 32 && ne < 32);
18873 return false;
18876 /* Return the instruction template for an AND with mask in mode MODE, with
18877 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18879 const char *
18880 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
18882 int nb, ne;
18884 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
18885 gcc_unreachable ();
18887 if (mode == DImode && ne == 0)
18889 operands[3] = GEN_INT (63 - nb);
18890 if (dot)
18891 return "rldicl. %0,%1,0,%3";
18892 return "rldicl %0,%1,0,%3";
18895 if (mode == DImode && nb == 63)
18897 operands[3] = GEN_INT (63 - ne);
18898 if (dot)
18899 return "rldicr. %0,%1,0,%3";
18900 return "rldicr %0,%1,0,%3";
18903 if (nb < 32 && ne < 32)
18905 operands[3] = GEN_INT (31 - nb);
18906 operands[4] = GEN_INT (31 - ne);
18907 if (dot)
18908 return "rlwinm. %0,%1,0,%3,%4";
18909 return "rlwinm %0,%1,0,%3,%4";
18912 gcc_unreachable ();
18915 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
18916 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
18917 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
18919 bool
18920 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
18922 int nb, ne;
18924 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
18925 return false;
18927 int n = GET_MODE_PRECISION (mode);
18928 int sh = -1;
18930 if (CONST_INT_P (XEXP (shift, 1)))
18932 sh = INTVAL (XEXP (shift, 1));
18933 if (sh < 0 || sh >= n)
18934 return false;
18937 rtx_code code = GET_CODE (shift);
18939 /* Convert any shift by 0 to a rotate, to simplify below code. */
18940 if (sh == 0)
18941 code = ROTATE;
18943 /* Convert rotate to simple shift if we can, to make analysis simpler. */
18944 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
18945 code = ASHIFT;
18946 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
18948 code = LSHIFTRT;
18949 sh = n - sh;
18952 /* DImode rotates need rld*. */
18953 if (mode == DImode && code == ROTATE)
18954 return (nb == 63 || ne == 0 || ne == sh);
18956 /* SImode rotates need rlw*. */
18957 if (mode == SImode && code == ROTATE)
18958 return (nb < 32 && ne < 32 && sh < 32);
18960 /* Wrap-around masks are only okay for rotates. */
18961 if (ne > nb)
18962 return false;
18964 /* Variable shifts are only okay for rotates. */
18965 if (sh < 0)
18966 return false;
18968 /* Don't allow ASHIFT if the mask is wrong for that. */
18969 if (code == ASHIFT && ne < sh)
18970 return false;
18972 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
18973 if the mask is wrong for that. */
18974 if (nb < 32 && ne < 32 && sh < 32
18975 && !(code == LSHIFTRT && nb >= 32 - sh))
18976 return true;
18978 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
18979 if the mask is wrong for that. */
18980 if (code == LSHIFTRT)
18981 sh = 64 - sh;
18982 if (nb == 63 || ne == 0 || ne == sh)
18983 return !(code == LSHIFTRT && nb >= sh);
18985 return false;
18988 /* Return the instruction template for a shift with mask in mode MODE, with
18989 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18991 const char *
18992 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
18994 int nb, ne;
18996 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
18997 gcc_unreachable ();
18999 if (mode == DImode && ne == 0)
19001 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
19002 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
19003 operands[3] = GEN_INT (63 - nb);
19004 if (dot)
19005 return "rld%I2cl. %0,%1,%2,%3";
19006 return "rld%I2cl %0,%1,%2,%3";
19009 if (mode == DImode && nb == 63)
19011 operands[3] = GEN_INT (63 - ne);
19012 if (dot)
19013 return "rld%I2cr. %0,%1,%2,%3";
19014 return "rld%I2cr %0,%1,%2,%3";
19017 if (mode == DImode
19018 && GET_CODE (operands[4]) != LSHIFTRT
19019 && CONST_INT_P (operands[2])
19020 && ne == INTVAL (operands[2]))
19022 operands[3] = GEN_INT (63 - nb);
19023 if (dot)
19024 return "rld%I2c. %0,%1,%2,%3";
19025 return "rld%I2c %0,%1,%2,%3";
19028 if (nb < 32 && ne < 32)
19030 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
19031 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
19032 operands[3] = GEN_INT (31 - nb);
19033 operands[4] = GEN_INT (31 - ne);
19034 /* This insn can also be a 64-bit rotate with mask that really makes
19035 it just a shift right (with mask); the %h below are to adjust for
19036 that situation (shift count is >= 32 in that case). */
19037 if (dot)
19038 return "rlw%I2nm. %0,%1,%h2,%3,%4";
19039 return "rlw%I2nm %0,%1,%h2,%3,%4";
19042 gcc_unreachable ();
19045 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
19046 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
19047 ASHIFT, or LSHIFTRT) in mode MODE. */
19049 bool
19050 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
19052 int nb, ne;
19054 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
19055 return false;
19057 int n = GET_MODE_PRECISION (mode);
19059 int sh = INTVAL (XEXP (shift, 1));
19060 if (sh < 0 || sh >= n)
19061 return false;
19063 rtx_code code = GET_CODE (shift);
19065 /* Convert any shift by 0 to a rotate, to simplify below code. */
19066 if (sh == 0)
19067 code = ROTATE;
19069 /* Convert rotate to simple shift if we can, to make analysis simpler. */
19070 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
19071 code = ASHIFT;
19072 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
19074 code = LSHIFTRT;
19075 sh = n - sh;
19078 /* DImode rotates need rldimi. */
19079 if (mode == DImode && code == ROTATE)
19080 return (ne == sh);
19082 /* SImode rotates need rlwimi. */
19083 if (mode == SImode && code == ROTATE)
19084 return (nb < 32 && ne < 32 && sh < 32);
19086 /* Wrap-around masks are only okay for rotates. */
19087 if (ne > nb)
19088 return false;
19090 /* Don't allow ASHIFT if the mask is wrong for that. */
19091 if (code == ASHIFT && ne < sh)
19092 return false;
19094 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
19095 if the mask is wrong for that. */
19096 if (nb < 32 && ne < 32 && sh < 32
19097 && !(code == LSHIFTRT && nb >= 32 - sh))
19098 return true;
19100 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
19101 if the mask is wrong for that. */
19102 if (code == LSHIFTRT)
19103 sh = 64 - sh;
19104 if (ne == sh)
19105 return !(code == LSHIFTRT && nb >= sh);
19107 return false;
19110 /* Return the instruction template for an insert with mask in mode MODE, with
19111 operands OPERANDS. If DOT is true, make it a record-form instruction. */
19113 const char *
19114 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
19116 int nb, ne;
19118 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
19119 gcc_unreachable ();
19121 /* Prefer rldimi because rlwimi is cracked. */
19122 if (TARGET_POWERPC64
19123 && (!dot || mode == DImode)
19124 && GET_CODE (operands[4]) != LSHIFTRT
19125 && ne == INTVAL (operands[2]))
19127 operands[3] = GEN_INT (63 - nb);
19128 if (dot)
19129 return "rldimi. %0,%1,%2,%3";
19130 return "rldimi %0,%1,%2,%3";
19133 if (nb < 32 && ne < 32)
19135 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
19136 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
19137 operands[3] = GEN_INT (31 - nb);
19138 operands[4] = GEN_INT (31 - ne);
19139 if (dot)
19140 return "rlwimi. %0,%1,%2,%3,%4";
19141 return "rlwimi %0,%1,%2,%3,%4";
19144 gcc_unreachable ();
19147 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
19148 using two machine instructions. */
19150 bool
19151 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
19153 /* There are two kinds of AND we can handle with two insns:
19154 1) those we can do with two rl* insn;
19155 2) ori[s];xori[s].
19157 We do not handle that last case yet. */
19159 /* If there is just one stretch of ones, we can do it. */
19160 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
19161 return true;
19163 /* Otherwise, fill in the lowest "hole"; if we can do the result with
19164 one insn, we can do the whole thing with two. */
19165 unsigned HOST_WIDE_INT val = INTVAL (c);
19166 unsigned HOST_WIDE_INT bit1 = val & -val;
19167 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
19168 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
19169 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
19170 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
19173 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
19174 If EXPAND is true, split rotate-and-mask instructions we generate to
19175 their constituent parts as well (this is used during expand); if DOT
19176 is 1, make the last insn a record-form instruction clobbering the
19177 destination GPR and setting the CC reg (from operands[3]); if 2, set
19178 that GPR as well as the CC reg. */
19180 void
19181 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
19183 gcc_assert (!(expand && dot));
19185 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
19187 /* If it is one stretch of ones, it is DImode; shift left, mask, then
19188 shift right. This generates better code than doing the masks without
19189 shifts, or shifting first right and then left. */
19190 int nb, ne;
19191 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
19193 gcc_assert (mode == DImode);
19195 int shift = 63 - nb;
19196 if (expand)
19198 rtx tmp1 = gen_reg_rtx (DImode);
19199 rtx tmp2 = gen_reg_rtx (DImode);
19200 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
19201 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
19202 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
19204 else
19206 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
19207 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
19208 emit_move_insn (operands[0], tmp);
19209 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
19210 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
19212 return;
19215 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
19216 that does the rest. */
19217 unsigned HOST_WIDE_INT bit1 = val & -val;
19218 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
19219 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
19220 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
19222 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
19223 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
19225 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
19227 /* Two "no-rotate"-and-mask instructions, for SImode. */
19228 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
19230 gcc_assert (mode == SImode);
19232 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
19233 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
19234 emit_move_insn (reg, tmp);
19235 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
19236 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
19237 return;
19240 gcc_assert (mode == DImode);
19242 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
19243 insns; we have to do the first in SImode, because it wraps. */
19244 if (mask2 <= 0xffffffff
19245 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
19247 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
19248 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
19249 GEN_INT (mask1));
19250 rtx reg_low = gen_lowpart (SImode, reg);
19251 emit_move_insn (reg_low, tmp);
19252 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
19253 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
19254 return;
19257 /* Two rld* insns: rotate, clear the hole in the middle (which now is
19258 at the top end), rotate back and clear the other hole. */
19259 int right = exact_log2 (bit3);
19260 int left = 64 - right;
19262 /* Rotate the mask too. */
19263 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
19265 if (expand)
19267 rtx tmp1 = gen_reg_rtx (DImode);
19268 rtx tmp2 = gen_reg_rtx (DImode);
19269 rtx tmp3 = gen_reg_rtx (DImode);
19270 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
19271 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
19272 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
19273 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
19275 else
19277 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
19278 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
19279 emit_move_insn (operands[0], tmp);
19280 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
19281 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
19282 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
19286 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
19287 for lfq and stfq insns iff the registers are hard registers. */
19290 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
19292 /* We might have been passed a SUBREG. */
19293 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
19294 return 0;
19296 /* We might have been passed non floating point registers. */
19297 if (!FP_REGNO_P (REGNO (reg1))
19298 || !FP_REGNO_P (REGNO (reg2)))
19299 return 0;
19301 return (REGNO (reg1) == REGNO (reg2) - 1);
19304 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
19305 addr1 and addr2 must be in consecutive memory locations
19306 (addr2 == addr1 + 8). */
19309 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
19311 rtx addr1, addr2;
19312 unsigned int reg1, reg2;
19313 int offset1, offset2;
19315 /* The mems cannot be volatile. */
19316 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
19317 return 0;
19319 addr1 = XEXP (mem1, 0);
19320 addr2 = XEXP (mem2, 0);
19322 /* Extract an offset (if used) from the first addr. */
19323 if (GET_CODE (addr1) == PLUS)
19325 /* If not a REG, return zero. */
19326 if (GET_CODE (XEXP (addr1, 0)) != REG)
19327 return 0;
19328 else
19330 reg1 = REGNO (XEXP (addr1, 0));
19331 /* The offset must be constant! */
19332 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
19333 return 0;
19334 offset1 = INTVAL (XEXP (addr1, 1));
19337 else if (GET_CODE (addr1) != REG)
19338 return 0;
19339 else
19341 reg1 = REGNO (addr1);
19342 /* This was a simple (mem (reg)) expression. Offset is 0. */
19343 offset1 = 0;
19346 /* And now for the second addr. */
19347 if (GET_CODE (addr2) == PLUS)
19349 /* If not a REG, return zero. */
19350 if (GET_CODE (XEXP (addr2, 0)) != REG)
19351 return 0;
19352 else
19354 reg2 = REGNO (XEXP (addr2, 0));
19355 /* The offset must be constant. */
19356 if (GET_CODE (XEXP (addr2, 1)) != CONST_INT)
19357 return 0;
19358 offset2 = INTVAL (XEXP (addr2, 1));
19361 else if (GET_CODE (addr2) != REG)
19362 return 0;
19363 else
19365 reg2 = REGNO (addr2);
19366 /* This was a simple (mem (reg)) expression. Offset is 0. */
19367 offset2 = 0;
19370 /* Both of these must have the same base register. */
19371 if (reg1 != reg2)
19372 return 0;
19374 /* The offset for the second addr must be 8 more than the first addr. */
19375 if (offset2 != offset1 + 8)
19376 return 0;
19378 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
19379 instructions. */
19380 return 1;
19383 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
19384 need to use DDmode, in all other cases we can use the same mode. */
19385 static machine_mode
19386 rs6000_secondary_memory_needed_mode (machine_mode mode)
19388 if (lra_in_progress && mode == SDmode)
19389 return DDmode;
19390 return mode;
19393 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
19394 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
19395 only work on the traditional altivec registers, note if an altivec register
19396 was chosen. */
19398 static enum rs6000_reg_type
19399 register_to_reg_type (rtx reg, bool *is_altivec)
19401 HOST_WIDE_INT regno;
19402 enum reg_class rclass;
19404 if (GET_CODE (reg) == SUBREG)
19405 reg = SUBREG_REG (reg);
19407 if (!REG_P (reg))
19408 return NO_REG_TYPE;
19410 regno = REGNO (reg);
19411 if (regno >= FIRST_PSEUDO_REGISTER)
19413 if (!lra_in_progress && !reload_completed)
19414 return PSEUDO_REG_TYPE;
19416 regno = true_regnum (reg);
19417 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
19418 return PSEUDO_REG_TYPE;
19421 gcc_assert (regno >= 0);
19423 if (is_altivec && ALTIVEC_REGNO_P (regno))
19424 *is_altivec = true;
19426 rclass = rs6000_regno_regclass[regno];
19427 return reg_class_to_reg_type[(int)rclass];
19430 /* Helper function to return the cost of adding a TOC entry address. */
19432 static inline int
19433 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
19435 int ret;
19437 if (TARGET_CMODEL != CMODEL_SMALL)
19438 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
19440 else
19441 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
19443 return ret;
19446 /* Helper function for rs6000_secondary_reload to determine whether the memory
19447 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
19448 needs reloading. Return negative if the memory is not handled by the memory
19449 helper functions and to try a different reload method, 0 if no additional
19450 instructions are need, and positive to give the extra cost for the
19451 memory. */
19453 static int
19454 rs6000_secondary_reload_memory (rtx addr,
19455 enum reg_class rclass,
19456 machine_mode mode)
19458 int extra_cost = 0;
19459 rtx reg, and_arg, plus_arg0, plus_arg1;
19460 addr_mask_type addr_mask;
19461 const char *type = NULL;
19462 const char *fail_msg = NULL;
19464 if (GPR_REG_CLASS_P (rclass))
19465 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
19467 else if (rclass == FLOAT_REGS)
19468 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
19470 else if (rclass == ALTIVEC_REGS)
19471 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
19473 /* For the combined VSX_REGS, turn off Altivec AND -16. */
19474 else if (rclass == VSX_REGS)
19475 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
19476 & ~RELOAD_REG_AND_M16);
19478 /* If the register allocator hasn't made up its mind yet on the register
19479 class to use, settle on defaults to use. */
19480 else if (rclass == NO_REGS)
19482 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
19483 & ~RELOAD_REG_AND_M16);
19485 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
19486 addr_mask &= ~(RELOAD_REG_INDEXED
19487 | RELOAD_REG_PRE_INCDEC
19488 | RELOAD_REG_PRE_MODIFY);
19491 else
19492 addr_mask = 0;
19494 /* If the register isn't valid in this register class, just return now. */
19495 if ((addr_mask & RELOAD_REG_VALID) == 0)
19497 if (TARGET_DEBUG_ADDR)
19499 fprintf (stderr,
19500 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
19501 "not valid in class\n",
19502 GET_MODE_NAME (mode), reg_class_names[rclass]);
19503 debug_rtx (addr);
19506 return -1;
19509 switch (GET_CODE (addr))
19511 /* Does the register class supports auto update forms for this mode? We
19512 don't need a scratch register, since the powerpc only supports
19513 PRE_INC, PRE_DEC, and PRE_MODIFY. */
19514 case PRE_INC:
19515 case PRE_DEC:
19516 reg = XEXP (addr, 0);
19517 if (!base_reg_operand (addr, GET_MODE (reg)))
19519 fail_msg = "no base register #1";
19520 extra_cost = -1;
19523 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
19525 extra_cost = 1;
19526 type = "update";
19528 break;
19530 case PRE_MODIFY:
19531 reg = XEXP (addr, 0);
19532 plus_arg1 = XEXP (addr, 1);
19533 if (!base_reg_operand (reg, GET_MODE (reg))
19534 || GET_CODE (plus_arg1) != PLUS
19535 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
19537 fail_msg = "bad PRE_MODIFY";
19538 extra_cost = -1;
19541 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
19543 extra_cost = 1;
19544 type = "update";
19546 break;
19548 /* Do we need to simulate AND -16 to clear the bottom address bits used
19549 in VMX load/stores? Only allow the AND for vector sizes. */
19550 case AND:
19551 and_arg = XEXP (addr, 0);
19552 if (GET_MODE_SIZE (mode) != 16
19553 || GET_CODE (XEXP (addr, 1)) != CONST_INT
19554 || INTVAL (XEXP (addr, 1)) != -16)
19556 fail_msg = "bad Altivec AND #1";
19557 extra_cost = -1;
19560 if (rclass != ALTIVEC_REGS)
19562 if (legitimate_indirect_address_p (and_arg, false))
19563 extra_cost = 1;
19565 else if (legitimate_indexed_address_p (and_arg, false))
19566 extra_cost = 2;
19568 else
19570 fail_msg = "bad Altivec AND #2";
19571 extra_cost = -1;
19574 type = "and";
19576 break;
19578 /* If this is an indirect address, make sure it is a base register. */
19579 case REG:
19580 case SUBREG:
19581 if (!legitimate_indirect_address_p (addr, false))
19583 extra_cost = 1;
19584 type = "move";
19586 break;
19588 /* If this is an indexed address, make sure the register class can handle
19589 indexed addresses for this mode. */
19590 case PLUS:
19591 plus_arg0 = XEXP (addr, 0);
19592 plus_arg1 = XEXP (addr, 1);
19594 /* (plus (plus (reg) (constant)) (constant)) is generated during
19595 push_reload processing, so handle it now. */
19596 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
19598 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19600 extra_cost = 1;
19601 type = "offset";
19605 /* (plus (plus (reg) (constant)) (reg)) is also generated during
19606 push_reload processing, so handle it now. */
19607 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
19609 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
19611 extra_cost = 1;
19612 type = "indexed #2";
19616 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
19618 fail_msg = "no base register #2";
19619 extra_cost = -1;
19622 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
19624 if ((addr_mask & RELOAD_REG_INDEXED) == 0
19625 || !legitimate_indexed_address_p (addr, false))
19627 extra_cost = 1;
19628 type = "indexed";
19632 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
19633 && CONST_INT_P (plus_arg1))
19635 if (!quad_address_offset_p (INTVAL (plus_arg1)))
19637 extra_cost = 1;
19638 type = "vector d-form offset";
19642 /* Make sure the register class can handle offset addresses. */
19643 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
19645 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19647 extra_cost = 1;
19648 type = "offset #2";
19652 else
19654 fail_msg = "bad PLUS";
19655 extra_cost = -1;
19658 break;
19660 case LO_SUM:
19661 /* Quad offsets are restricted and can't handle normal addresses. */
19662 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
19664 extra_cost = -1;
19665 type = "vector d-form lo_sum";
19668 else if (!legitimate_lo_sum_address_p (mode, addr, false))
19670 fail_msg = "bad LO_SUM";
19671 extra_cost = -1;
19674 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19676 extra_cost = 1;
19677 type = "lo_sum";
19679 break;
19681 /* Static addresses need to create a TOC entry. */
19682 case CONST:
19683 case SYMBOL_REF:
19684 case LABEL_REF:
19685 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
19687 extra_cost = -1;
19688 type = "vector d-form lo_sum #2";
19691 else
19693 type = "address";
19694 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
19696 break;
19698 /* TOC references look like offsetable memory. */
19699 case UNSPEC:
19700 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
19702 fail_msg = "bad UNSPEC";
19703 extra_cost = -1;
19706 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
19708 extra_cost = -1;
19709 type = "vector d-form lo_sum #3";
19712 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19714 extra_cost = 1;
19715 type = "toc reference";
19717 break;
19719 default:
19721 fail_msg = "bad address";
19722 extra_cost = -1;
19726 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
19728 if (extra_cost < 0)
19729 fprintf (stderr,
19730 "rs6000_secondary_reload_memory error: mode = %s, "
19731 "class = %s, addr_mask = '%s', %s\n",
19732 GET_MODE_NAME (mode),
19733 reg_class_names[rclass],
19734 rs6000_debug_addr_mask (addr_mask, false),
19735 (fail_msg != NULL) ? fail_msg : "<bad address>");
19737 else
19738 fprintf (stderr,
19739 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
19740 "addr_mask = '%s', extra cost = %d, %s\n",
19741 GET_MODE_NAME (mode),
19742 reg_class_names[rclass],
19743 rs6000_debug_addr_mask (addr_mask, false),
19744 extra_cost,
19745 (type) ? type : "<none>");
19747 debug_rtx (addr);
19750 return extra_cost;
19753 /* Helper function for rs6000_secondary_reload to return true if a move to a
19754 different register classe is really a simple move. */
19756 static bool
19757 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
19758 enum rs6000_reg_type from_type,
19759 machine_mode mode)
19761 int size = GET_MODE_SIZE (mode);
19763 /* Add support for various direct moves available. In this function, we only
19764 look at cases where we don't need any extra registers, and one or more
19765 simple move insns are issued. Originally small integers are not allowed
19766 in FPR/VSX registers. Single precision binary floating is not a simple
19767 move because we need to convert to the single precision memory layout.
19768 The 4-byte SDmode can be moved. TDmode values are disallowed since they
19769 need special direct move handling, which we do not support yet. */
19770 if (TARGET_DIRECT_MOVE
19771 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
19772 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
19774 if (TARGET_POWERPC64)
19776 /* ISA 2.07: MTVSRD or MVFVSRD. */
19777 if (size == 8)
19778 return true;
19780 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
19781 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
19782 return true;
19785 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
19786 if (TARGET_P8_VECTOR)
19788 if (mode == SImode)
19789 return true;
19791 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
19792 return true;
19795 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
19796 if (mode == SDmode)
19797 return true;
19800 /* Power6+: MFTGPR or MFFGPR. */
19801 else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
19802 && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
19803 || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
19804 return true;
19806 /* Move to/from SPR. */
19807 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
19808 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
19809 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
19810 return true;
19812 return false;
19815 /* Direct move helper function for rs6000_secondary_reload, handle all of the
19816 special direct moves that involve allocating an extra register, return the
19817 insn code of the helper function if there is such a function or
19818 CODE_FOR_nothing if not. */
19820 static bool
19821 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
19822 enum rs6000_reg_type from_type,
19823 machine_mode mode,
19824 secondary_reload_info *sri,
19825 bool altivec_p)
19827 bool ret = false;
19828 enum insn_code icode = CODE_FOR_nothing;
19829 int cost = 0;
19830 int size = GET_MODE_SIZE (mode);
19832 if (TARGET_POWERPC64 && size == 16)
19834 /* Handle moving 128-bit values from GPRs to VSX point registers on
19835 ISA 2.07 (power8, power9) when running in 64-bit mode using
19836 XXPERMDI to glue the two 64-bit values back together. */
19837 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
19839 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
19840 icode = reg_addr[mode].reload_vsx_gpr;
19843 /* Handle moving 128-bit values from VSX point registers to GPRs on
19844 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
19845 bottom 64-bit value. */
19846 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
19848 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
19849 icode = reg_addr[mode].reload_gpr_vsx;
19853 else if (TARGET_POWERPC64 && mode == SFmode)
19855 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
19857 cost = 3; /* xscvdpspn, mfvsrd, and. */
19858 icode = reg_addr[mode].reload_gpr_vsx;
19861 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
19863 cost = 2; /* mtvsrz, xscvspdpn. */
19864 icode = reg_addr[mode].reload_vsx_gpr;
19868 else if (!TARGET_POWERPC64 && size == 8)
19870 /* Handle moving 64-bit values from GPRs to floating point registers on
19871 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
19872 32-bit values back together. Altivec register classes must be handled
19873 specially since a different instruction is used, and the secondary
19874 reload support requires a single instruction class in the scratch
19875 register constraint. However, right now TFmode is not allowed in
19876 Altivec registers, so the pattern will never match. */
19877 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
19879 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
19880 icode = reg_addr[mode].reload_fpr_gpr;
19884 if (icode != CODE_FOR_nothing)
19886 ret = true;
19887 if (sri)
19889 sri->icode = icode;
19890 sri->extra_cost = cost;
19894 return ret;
19897 /* Return whether a move between two register classes can be done either
19898 directly (simple move) or via a pattern that uses a single extra temporary
19899 (using ISA 2.07's direct move in this case. */
19901 static bool
19902 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
19903 enum rs6000_reg_type from_type,
19904 machine_mode mode,
19905 secondary_reload_info *sri,
19906 bool altivec_p)
19908 /* Fall back to load/store reloads if either type is not a register. */
19909 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
19910 return false;
19912 /* If we haven't allocated registers yet, assume the move can be done for the
19913 standard register types. */
19914 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
19915 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
19916 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
19917 return true;
19919 /* Moves to the same set of registers is a simple move for non-specialized
19920 registers. */
19921 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
19922 return true;
19924 /* Check whether a simple move can be done directly. */
19925 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
19927 if (sri)
19929 sri->icode = CODE_FOR_nothing;
19930 sri->extra_cost = 0;
19932 return true;
19935 /* Now check if we can do it in a few steps. */
19936 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
19937 altivec_p);
19940 /* Inform reload about cases where moving X with a mode MODE to a register in
19941 RCLASS requires an extra scratch or immediate register. Return the class
19942 needed for the immediate register.
19944 For VSX and Altivec, we may need a register to convert sp+offset into
19945 reg+sp.
19947 For misaligned 64-bit gpr loads and stores we need a register to
19948 convert an offset address to indirect. */
19950 static reg_class_t
19951 rs6000_secondary_reload (bool in_p,
19952 rtx x,
19953 reg_class_t rclass_i,
19954 machine_mode mode,
19955 secondary_reload_info *sri)
19957 enum reg_class rclass = (enum reg_class) rclass_i;
19958 reg_class_t ret = ALL_REGS;
19959 enum insn_code icode;
19960 bool default_p = false;
19961 bool done_p = false;
19963 /* Allow subreg of memory before/during reload. */
19964 bool memory_p = (MEM_P (x)
19965 || (!reload_completed && GET_CODE (x) == SUBREG
19966 && MEM_P (SUBREG_REG (x))));
19968 sri->icode = CODE_FOR_nothing;
19969 sri->t_icode = CODE_FOR_nothing;
19970 sri->extra_cost = 0;
19971 icode = ((in_p)
19972 ? reg_addr[mode].reload_load
19973 : reg_addr[mode].reload_store);
19975 if (REG_P (x) || register_operand (x, mode))
19977 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
19978 bool altivec_p = (rclass == ALTIVEC_REGS);
19979 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
19981 if (!in_p)
19982 std::swap (to_type, from_type);
19984 /* Can we do a direct move of some sort? */
19985 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
19986 altivec_p))
19988 icode = (enum insn_code)sri->icode;
19989 default_p = false;
19990 done_p = true;
19991 ret = NO_REGS;
19995 /* Make sure 0.0 is not reloaded or forced into memory. */
19996 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
19998 ret = NO_REGS;
19999 default_p = false;
20000 done_p = true;
20003 /* If this is a scalar floating point value and we want to load it into the
20004 traditional Altivec registers, do it via a move via a traditional floating
20005 point register, unless we have D-form addressing. Also make sure that
20006 non-zero constants use a FPR. */
20007 if (!done_p && reg_addr[mode].scalar_in_vmx_p
20008 && !mode_supports_vmx_dform (mode)
20009 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
20010 && (memory_p || (GET_CODE (x) == CONST_DOUBLE)))
20012 ret = FLOAT_REGS;
20013 default_p = false;
20014 done_p = true;
20017 /* Handle reload of load/stores if we have reload helper functions. */
20018 if (!done_p && icode != CODE_FOR_nothing && memory_p)
20020 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
20021 mode);
20023 if (extra_cost >= 0)
20025 done_p = true;
20026 ret = NO_REGS;
20027 if (extra_cost > 0)
20029 sri->extra_cost = extra_cost;
20030 sri->icode = icode;
20035 /* Handle unaligned loads and stores of integer registers. */
20036 if (!done_p && TARGET_POWERPC64
20037 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
20038 && memory_p
20039 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
20041 rtx addr = XEXP (x, 0);
20042 rtx off = address_offset (addr);
20044 if (off != NULL_RTX)
20046 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
20047 unsigned HOST_WIDE_INT offset = INTVAL (off);
20049 /* We need a secondary reload when our legitimate_address_p
20050 says the address is good (as otherwise the entire address
20051 will be reloaded), and the offset is not a multiple of
20052 four or we have an address wrap. Address wrap will only
20053 occur for LO_SUMs since legitimate_offset_address_p
20054 rejects addresses for 16-byte mems that will wrap. */
20055 if (GET_CODE (addr) == LO_SUM
20056 ? (1 /* legitimate_address_p allows any offset for lo_sum */
20057 && ((offset & 3) != 0
20058 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
20059 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
20060 && (offset & 3) != 0))
20062 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
20063 if (in_p)
20064 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
20065 : CODE_FOR_reload_di_load);
20066 else
20067 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
20068 : CODE_FOR_reload_di_store);
20069 sri->extra_cost = 2;
20070 ret = NO_REGS;
20071 done_p = true;
20073 else
20074 default_p = true;
20076 else
20077 default_p = true;
20080 if (!done_p && !TARGET_POWERPC64
20081 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
20082 && memory_p
20083 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
20085 rtx addr = XEXP (x, 0);
20086 rtx off = address_offset (addr);
20088 if (off != NULL_RTX)
20090 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
20091 unsigned HOST_WIDE_INT offset = INTVAL (off);
20093 /* We need a secondary reload when our legitimate_address_p
20094 says the address is good (as otherwise the entire address
20095 will be reloaded), and we have a wrap.
20097 legitimate_lo_sum_address_p allows LO_SUM addresses to
20098 have any offset so test for wrap in the low 16 bits.
20100 legitimate_offset_address_p checks for the range
20101 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
20102 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
20103 [0x7ff4,0x7fff] respectively, so test for the
20104 intersection of these ranges, [0x7ffc,0x7fff] and
20105 [0x7ff4,0x7ff7] respectively.
20107 Note that the address we see here may have been
20108 manipulated by legitimize_reload_address. */
20109 if (GET_CODE (addr) == LO_SUM
20110 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
20111 : offset - (0x8000 - extra) < UNITS_PER_WORD)
20113 if (in_p)
20114 sri->icode = CODE_FOR_reload_si_load;
20115 else
20116 sri->icode = CODE_FOR_reload_si_store;
20117 sri->extra_cost = 2;
20118 ret = NO_REGS;
20119 done_p = true;
20121 else
20122 default_p = true;
20124 else
20125 default_p = true;
20128 if (!done_p)
20129 default_p = true;
20131 if (default_p)
20132 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
20134 gcc_assert (ret != ALL_REGS);
20136 if (TARGET_DEBUG_ADDR)
20138 fprintf (stderr,
20139 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
20140 "mode = %s",
20141 reg_class_names[ret],
20142 in_p ? "true" : "false",
20143 reg_class_names[rclass],
20144 GET_MODE_NAME (mode));
20146 if (reload_completed)
20147 fputs (", after reload", stderr);
20149 if (!done_p)
20150 fputs (", done_p not set", stderr);
20152 if (default_p)
20153 fputs (", default secondary reload", stderr);
20155 if (sri->icode != CODE_FOR_nothing)
20156 fprintf (stderr, ", reload func = %s, extra cost = %d",
20157 insn_data[sri->icode].name, sri->extra_cost);
20159 else if (sri->extra_cost > 0)
20160 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
20162 fputs ("\n", stderr);
20163 debug_rtx (x);
20166 return ret;
20169 /* Better tracing for rs6000_secondary_reload_inner. */
20171 static void
20172 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
20173 bool store_p)
20175 rtx set, clobber;
20177 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
20179 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
20180 store_p ? "store" : "load");
20182 if (store_p)
20183 set = gen_rtx_SET (mem, reg);
20184 else
20185 set = gen_rtx_SET (reg, mem);
20187 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
20188 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
20191 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
20192 ATTRIBUTE_NORETURN;
20194 static void
20195 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
20196 bool store_p)
20198 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
20199 gcc_unreachable ();
20202 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
20203 reload helper functions. These were identified in
20204 rs6000_secondary_reload_memory, and if reload decided to use the secondary
20205 reload, it calls the insns:
20206 reload_<RELOAD:mode>_<P:mptrsize>_store
20207 reload_<RELOAD:mode>_<P:mptrsize>_load
20209 which in turn calls this function, to do whatever is necessary to create
20210 valid addresses. */
20212 void
20213 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
20215 int regno = true_regnum (reg);
20216 machine_mode mode = GET_MODE (reg);
20217 addr_mask_type addr_mask;
20218 rtx addr;
20219 rtx new_addr;
20220 rtx op_reg, op0, op1;
20221 rtx and_op;
20222 rtx cc_clobber;
20223 rtvec rv;
20225 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER || !MEM_P (mem)
20226 || !base_reg_operand (scratch, GET_MODE (scratch)))
20227 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20229 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
20230 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
20232 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
20233 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
20235 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
20236 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
20238 else
20239 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20241 /* Make sure the mode is valid in this register class. */
20242 if ((addr_mask & RELOAD_REG_VALID) == 0)
20243 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20245 if (TARGET_DEBUG_ADDR)
20246 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
20248 new_addr = addr = XEXP (mem, 0);
20249 switch (GET_CODE (addr))
20251 /* Does the register class support auto update forms for this mode? If
20252 not, do the update now. We don't need a scratch register, since the
20253 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
20254 case PRE_INC:
20255 case PRE_DEC:
20256 op_reg = XEXP (addr, 0);
20257 if (!base_reg_operand (op_reg, Pmode))
20258 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20260 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
20262 emit_insn (gen_add2_insn (op_reg, GEN_INT (GET_MODE_SIZE (mode))));
20263 new_addr = op_reg;
20265 break;
20267 case PRE_MODIFY:
20268 op0 = XEXP (addr, 0);
20269 op1 = XEXP (addr, 1);
20270 if (!base_reg_operand (op0, Pmode)
20271 || GET_CODE (op1) != PLUS
20272 || !rtx_equal_p (op0, XEXP (op1, 0)))
20273 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20275 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
20277 emit_insn (gen_rtx_SET (op0, op1));
20278 new_addr = reg;
20280 break;
20282 /* Do we need to simulate AND -16 to clear the bottom address bits used
20283 in VMX load/stores? */
20284 case AND:
20285 op0 = XEXP (addr, 0);
20286 op1 = XEXP (addr, 1);
20287 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
20289 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
20290 op_reg = op0;
20292 else if (GET_CODE (op1) == PLUS)
20294 emit_insn (gen_rtx_SET (scratch, op1));
20295 op_reg = scratch;
20298 else
20299 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20301 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
20302 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
20303 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
20304 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
20305 new_addr = scratch;
20307 break;
20309 /* If this is an indirect address, make sure it is a base register. */
20310 case REG:
20311 case SUBREG:
20312 if (!base_reg_operand (addr, GET_MODE (addr)))
20314 emit_insn (gen_rtx_SET (scratch, addr));
20315 new_addr = scratch;
20317 break;
20319 /* If this is an indexed address, make sure the register class can handle
20320 indexed addresses for this mode. */
20321 case PLUS:
20322 op0 = XEXP (addr, 0);
20323 op1 = XEXP (addr, 1);
20324 if (!base_reg_operand (op0, Pmode))
20325 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20327 else if (int_reg_operand (op1, Pmode))
20329 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
20331 emit_insn (gen_rtx_SET (scratch, addr));
20332 new_addr = scratch;
20336 else if (mode_supports_vsx_dform_quad (mode) && CONST_INT_P (op1))
20338 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
20339 || !quad_address_p (addr, mode, false))
20341 emit_insn (gen_rtx_SET (scratch, addr));
20342 new_addr = scratch;
20346 /* Make sure the register class can handle offset addresses. */
20347 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
20349 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
20351 emit_insn (gen_rtx_SET (scratch, addr));
20352 new_addr = scratch;
20356 else
20357 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20359 break;
20361 case LO_SUM:
20362 op0 = XEXP (addr, 0);
20363 op1 = XEXP (addr, 1);
20364 if (!base_reg_operand (op0, Pmode))
20365 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20367 else if (int_reg_operand (op1, Pmode))
20369 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
20371 emit_insn (gen_rtx_SET (scratch, addr));
20372 new_addr = scratch;
20376 /* Quad offsets are restricted and can't handle normal addresses. */
20377 else if (mode_supports_vsx_dform_quad (mode))
20379 emit_insn (gen_rtx_SET (scratch, addr));
20380 new_addr = scratch;
20383 /* Make sure the register class can handle offset addresses. */
20384 else if (legitimate_lo_sum_address_p (mode, addr, false))
20386 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
20388 emit_insn (gen_rtx_SET (scratch, addr));
20389 new_addr = scratch;
20393 else
20394 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20396 break;
20398 case SYMBOL_REF:
20399 case CONST:
20400 case LABEL_REF:
20401 rs6000_emit_move (scratch, addr, Pmode);
20402 new_addr = scratch;
20403 break;
20405 default:
20406 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20409 /* Adjust the address if it changed. */
20410 if (addr != new_addr)
20412 mem = replace_equiv_address_nv (mem, new_addr);
20413 if (TARGET_DEBUG_ADDR)
20414 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
20417 /* Now create the move. */
20418 if (store_p)
20419 emit_insn (gen_rtx_SET (mem, reg));
20420 else
20421 emit_insn (gen_rtx_SET (reg, mem));
20423 return;
20426 /* Convert reloads involving 64-bit gprs and misaligned offset
20427 addressing, or multiple 32-bit gprs and offsets that are too large,
20428 to use indirect addressing. */
20430 void
20431 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
20433 int regno = true_regnum (reg);
20434 enum reg_class rclass;
20435 rtx addr;
20436 rtx scratch_or_premodify = scratch;
20438 if (TARGET_DEBUG_ADDR)
20440 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
20441 store_p ? "store" : "load");
20442 fprintf (stderr, "reg:\n");
20443 debug_rtx (reg);
20444 fprintf (stderr, "mem:\n");
20445 debug_rtx (mem);
20446 fprintf (stderr, "scratch:\n");
20447 debug_rtx (scratch);
20450 gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
20451 gcc_assert (GET_CODE (mem) == MEM);
20452 rclass = REGNO_REG_CLASS (regno);
20453 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
20454 addr = XEXP (mem, 0);
20456 if (GET_CODE (addr) == PRE_MODIFY)
20458 gcc_assert (REG_P (XEXP (addr, 0))
20459 && GET_CODE (XEXP (addr, 1)) == PLUS
20460 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
20461 scratch_or_premodify = XEXP (addr, 0);
20462 if (!HARD_REGISTER_P (scratch_or_premodify))
20463 /* If we have a pseudo here then reload will have arranged
20464 to have it replaced, but only in the original insn.
20465 Use the replacement here too. */
20466 scratch_or_premodify = find_replacement (&XEXP (addr, 0));
20468 /* RTL emitted by rs6000_secondary_reload_gpr uses RTL
20469 expressions from the original insn, without unsharing them.
20470 Any RTL that points into the original insn will of course
20471 have register replacements applied. That is why we don't
20472 need to look for replacements under the PLUS. */
20473 addr = XEXP (addr, 1);
20475 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
20477 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
20479 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
20481 /* Now create the move. */
20482 if (store_p)
20483 emit_insn (gen_rtx_SET (mem, reg));
20484 else
20485 emit_insn (gen_rtx_SET (reg, mem));
20487 return;
20490 /* Given an rtx X being reloaded into a reg required to be
20491 in class CLASS, return the class of reg to actually use.
20492 In general this is just CLASS; but on some machines
20493 in some cases it is preferable to use a more restrictive class.
20495 On the RS/6000, we have to return NO_REGS when we want to reload a
20496 floating-point CONST_DOUBLE to force it to be copied to memory.
20498 We also don't want to reload integer values into floating-point
20499 registers if we can at all help it. In fact, this can
20500 cause reload to die, if it tries to generate a reload of CTR
20501 into a FP register and discovers it doesn't have the memory location
20502 required.
20504 ??? Would it be a good idea to have reload do the converse, that is
20505 try to reload floating modes into FP registers if possible?
20508 static enum reg_class
20509 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
20511 machine_mode mode = GET_MODE (x);
20512 bool is_constant = CONSTANT_P (x);
20514 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
20515 reload class for it. */
20516 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
20517 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
20518 return NO_REGS;
20520 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
20521 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
20522 return NO_REGS;
20524 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
20525 the reloading of address expressions using PLUS into floating point
20526 registers. */
20527 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
20529 if (is_constant)
20531 /* Zero is always allowed in all VSX registers. */
20532 if (x == CONST0_RTX (mode))
20533 return rclass;
20535 /* If this is a vector constant that can be formed with a few Altivec
20536 instructions, we want altivec registers. */
20537 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
20538 return ALTIVEC_REGS;
20540 /* If this is an integer constant that can easily be loaded into
20541 vector registers, allow it. */
20542 if (CONST_INT_P (x))
20544 HOST_WIDE_INT value = INTVAL (x);
20546 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
20547 2.06 can generate it in the Altivec registers with
20548 VSPLTI<x>. */
20549 if (value == -1)
20551 if (TARGET_P8_VECTOR)
20552 return rclass;
20553 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
20554 return ALTIVEC_REGS;
20555 else
20556 return NO_REGS;
20559 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
20560 a sign extend in the Altivec registers. */
20561 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
20562 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
20563 return ALTIVEC_REGS;
20566 /* Force constant to memory. */
20567 return NO_REGS;
20570 /* D-form addressing can easily reload the value. */
20571 if (mode_supports_vmx_dform (mode)
20572 || mode_supports_vsx_dform_quad (mode))
20573 return rclass;
20575 /* If this is a scalar floating point value and we don't have D-form
20576 addressing, prefer the traditional floating point registers so that we
20577 can use D-form (register+offset) addressing. */
20578 if (rclass == VSX_REGS
20579 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
20580 return FLOAT_REGS;
20582 /* Prefer the Altivec registers if Altivec is handling the vector
20583 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
20584 loads. */
20585 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
20586 || mode == V1TImode)
20587 return ALTIVEC_REGS;
20589 return rclass;
20592 if (is_constant || GET_CODE (x) == PLUS)
20594 if (reg_class_subset_p (GENERAL_REGS, rclass))
20595 return GENERAL_REGS;
20596 if (reg_class_subset_p (BASE_REGS, rclass))
20597 return BASE_REGS;
20598 return NO_REGS;
20601 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
20602 return GENERAL_REGS;
20604 return rclass;
20607 /* Debug version of rs6000_preferred_reload_class. */
20608 static enum reg_class
20609 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
20611 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
20613 fprintf (stderr,
20614 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
20615 "mode = %s, x:\n",
20616 reg_class_names[ret], reg_class_names[rclass],
20617 GET_MODE_NAME (GET_MODE (x)));
20618 debug_rtx (x);
20620 return ret;
20623 /* If we are copying between FP or AltiVec registers and anything else, we need
20624 a memory location. The exception is when we are targeting ppc64 and the
20625 move to/from fpr to gpr instructions are available. Also, under VSX, you
20626 can copy vector registers from the FP register set to the Altivec register
20627 set and vice versa. */
20629 static bool
20630 rs6000_secondary_memory_needed (machine_mode mode,
20631 reg_class_t from_class,
20632 reg_class_t to_class)
20634 enum rs6000_reg_type from_type, to_type;
20635 bool altivec_p = ((from_class == ALTIVEC_REGS)
20636 || (to_class == ALTIVEC_REGS));
20638 /* If a simple/direct move is available, we don't need secondary memory */
20639 from_type = reg_class_to_reg_type[(int)from_class];
20640 to_type = reg_class_to_reg_type[(int)to_class];
20642 if (rs6000_secondary_reload_move (to_type, from_type, mode,
20643 (secondary_reload_info *)0, altivec_p))
20644 return false;
20646 /* If we have a floating point or vector register class, we need to use
20647 memory to transfer the data. */
20648 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
20649 return true;
20651 return false;
20654 /* Debug version of rs6000_secondary_memory_needed. */
20655 static bool
20656 rs6000_debug_secondary_memory_needed (machine_mode mode,
20657 reg_class_t from_class,
20658 reg_class_t to_class)
20660 bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
20662 fprintf (stderr,
20663 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
20664 "to_class = %s, mode = %s\n",
20665 ret ? "true" : "false",
20666 reg_class_names[from_class],
20667 reg_class_names[to_class],
20668 GET_MODE_NAME (mode));
20670 return ret;
20673 /* Return the register class of a scratch register needed to copy IN into
20674 or out of a register in RCLASS in MODE. If it can be done directly,
20675 NO_REGS is returned. */
20677 static enum reg_class
20678 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
20679 rtx in)
20681 int regno;
20683 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
20684 #if TARGET_MACHO
20685 && MACHOPIC_INDIRECT
20686 #endif
20689 /* We cannot copy a symbolic operand directly into anything
20690 other than BASE_REGS for TARGET_ELF. So indicate that a
20691 register from BASE_REGS is needed as an intermediate
20692 register.
20694 On Darwin, pic addresses require a load from memory, which
20695 needs a base register. */
20696 if (rclass != BASE_REGS
20697 && (GET_CODE (in) == SYMBOL_REF
20698 || GET_CODE (in) == HIGH
20699 || GET_CODE (in) == LABEL_REF
20700 || GET_CODE (in) == CONST))
20701 return BASE_REGS;
20704 if (GET_CODE (in) == REG)
20706 regno = REGNO (in);
20707 if (regno >= FIRST_PSEUDO_REGISTER)
20709 regno = true_regnum (in);
20710 if (regno >= FIRST_PSEUDO_REGISTER)
20711 regno = -1;
20714 else if (GET_CODE (in) == SUBREG)
20716 regno = true_regnum (in);
20717 if (regno >= FIRST_PSEUDO_REGISTER)
20718 regno = -1;
20720 else
20721 regno = -1;
20723 /* If we have VSX register moves, prefer moving scalar values between
20724 Altivec registers and GPR by going via an FPR (and then via memory)
20725 instead of reloading the secondary memory address for Altivec moves. */
20726 if (TARGET_VSX
20727 && GET_MODE_SIZE (mode) < 16
20728 && !mode_supports_vmx_dform (mode)
20729 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
20730 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
20731 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
20732 && (regno >= 0 && INT_REGNO_P (regno)))))
20733 return FLOAT_REGS;
20735 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
20736 into anything. */
20737 if (rclass == GENERAL_REGS || rclass == BASE_REGS
20738 || (regno >= 0 && INT_REGNO_P (regno)))
20739 return NO_REGS;
20741 /* Constants, memory, and VSX registers can go into VSX registers (both the
20742 traditional floating point and the altivec registers). */
20743 if (rclass == VSX_REGS
20744 && (regno == -1 || VSX_REGNO_P (regno)))
20745 return NO_REGS;
20747 /* Constants, memory, and FP registers can go into FP registers. */
20748 if ((regno == -1 || FP_REGNO_P (regno))
20749 && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
20750 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
20752 /* Memory, and AltiVec registers can go into AltiVec registers. */
20753 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
20754 && rclass == ALTIVEC_REGS)
20755 return NO_REGS;
20757 /* We can copy among the CR registers. */
20758 if ((rclass == CR_REGS || rclass == CR0_REGS)
20759 && regno >= 0 && CR_REGNO_P (regno))
20760 return NO_REGS;
20762 /* Otherwise, we need GENERAL_REGS. */
20763 return GENERAL_REGS;
20766 /* Debug version of rs6000_secondary_reload_class. */
20767 static enum reg_class
20768 rs6000_debug_secondary_reload_class (enum reg_class rclass,
20769 machine_mode mode, rtx in)
20771 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
20772 fprintf (stderr,
20773 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
20774 "mode = %s, input rtx:\n",
20775 reg_class_names[ret], reg_class_names[rclass],
20776 GET_MODE_NAME (mode));
20777 debug_rtx (in);
20779 return ret;
20782 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
20784 static bool
20785 rs6000_can_change_mode_class (machine_mode from,
20786 machine_mode to,
20787 reg_class_t rclass)
20789 unsigned from_size = GET_MODE_SIZE (from);
20790 unsigned to_size = GET_MODE_SIZE (to);
20792 if (from_size != to_size)
20794 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
20796 if (reg_classes_intersect_p (xclass, rclass))
20798 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
20799 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
20800 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
20801 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
20803 /* Don't allow 64-bit types to overlap with 128-bit types that take a
20804 single register under VSX because the scalar part of the register
20805 is in the upper 64-bits, and not the lower 64-bits. Types like
20806 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
20807 IEEE floating point can't overlap, and neither can small
20808 values. */
20810 if (to_float128_vector_p && from_float128_vector_p)
20811 return true;
20813 else if (to_float128_vector_p || from_float128_vector_p)
20814 return false;
20816 /* TDmode in floating-mode registers must always go into a register
20817 pair with the most significant word in the even-numbered register
20818 to match ISA requirements. In little-endian mode, this does not
20819 match subreg numbering, so we cannot allow subregs. */
20820 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
20821 return false;
20823 if (from_size < 8 || to_size < 8)
20824 return false;
20826 if (from_size == 8 && (8 * to_nregs) != to_size)
20827 return false;
20829 if (to_size == 8 && (8 * from_nregs) != from_size)
20830 return false;
20832 return true;
20834 else
20835 return true;
20838 /* Since the VSX register set includes traditional floating point registers
20839 and altivec registers, just check for the size being different instead of
20840 trying to check whether the modes are vector modes. Otherwise it won't
20841 allow say DF and DI to change classes. For types like TFmode and TDmode
20842 that take 2 64-bit registers, rather than a single 128-bit register, don't
20843 allow subregs of those types to other 128 bit types. */
20844 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
20846 unsigned num_regs = (from_size + 15) / 16;
20847 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
20848 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
20849 return false;
20851 return (from_size == 8 || from_size == 16);
20854 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
20855 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
20856 return false;
20858 return true;
20861 /* Debug version of rs6000_can_change_mode_class. */
20862 static bool
20863 rs6000_debug_can_change_mode_class (machine_mode from,
20864 machine_mode to,
20865 reg_class_t rclass)
20867 bool ret = rs6000_can_change_mode_class (from, to, rclass);
20869 fprintf (stderr,
20870 "rs6000_can_change_mode_class, return %s, from = %s, "
20871 "to = %s, rclass = %s\n",
20872 ret ? "true" : "false",
20873 GET_MODE_NAME (from), GET_MODE_NAME (to),
20874 reg_class_names[rclass]);
20876 return ret;
20879 /* Return a string to do a move operation of 128 bits of data. */
20881 const char *
20882 rs6000_output_move_128bit (rtx operands[])
20884 rtx dest = operands[0];
20885 rtx src = operands[1];
20886 machine_mode mode = GET_MODE (dest);
20887 int dest_regno;
20888 int src_regno;
20889 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
20890 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
20892 if (REG_P (dest))
20894 dest_regno = REGNO (dest);
20895 dest_gpr_p = INT_REGNO_P (dest_regno);
20896 dest_fp_p = FP_REGNO_P (dest_regno);
20897 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
20898 dest_vsx_p = dest_fp_p | dest_vmx_p;
20900 else
20902 dest_regno = -1;
20903 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
20906 if (REG_P (src))
20908 src_regno = REGNO (src);
20909 src_gpr_p = INT_REGNO_P (src_regno);
20910 src_fp_p = FP_REGNO_P (src_regno);
20911 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
20912 src_vsx_p = src_fp_p | src_vmx_p;
20914 else
20916 src_regno = -1;
20917 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
20920 /* Register moves. */
20921 if (dest_regno >= 0 && src_regno >= 0)
20923 if (dest_gpr_p)
20925 if (src_gpr_p)
20926 return "#";
20928 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
20929 return (WORDS_BIG_ENDIAN
20930 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
20931 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
20933 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
20934 return "#";
20937 else if (TARGET_VSX && dest_vsx_p)
20939 if (src_vsx_p)
20940 return "xxlor %x0,%x1,%x1";
20942 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
20943 return (WORDS_BIG_ENDIAN
20944 ? "mtvsrdd %x0,%1,%L1"
20945 : "mtvsrdd %x0,%L1,%1");
20947 else if (TARGET_DIRECT_MOVE && src_gpr_p)
20948 return "#";
20951 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
20952 return "vor %0,%1,%1";
20954 else if (dest_fp_p && src_fp_p)
20955 return "#";
20958 /* Loads. */
20959 else if (dest_regno >= 0 && MEM_P (src))
20961 if (dest_gpr_p)
20963 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
20964 return "lq %0,%1";
20965 else
20966 return "#";
20969 else if (TARGET_ALTIVEC && dest_vmx_p
20970 && altivec_indexed_or_indirect_operand (src, mode))
20971 return "lvx %0,%y1";
20973 else if (TARGET_VSX && dest_vsx_p)
20975 if (mode_supports_vsx_dform_quad (mode)
20976 && quad_address_p (XEXP (src, 0), mode, true))
20977 return "lxv %x0,%1";
20979 else if (TARGET_P9_VECTOR)
20980 return "lxvx %x0,%y1";
20982 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
20983 return "lxvw4x %x0,%y1";
20985 else
20986 return "lxvd2x %x0,%y1";
20989 else if (TARGET_ALTIVEC && dest_vmx_p)
20990 return "lvx %0,%y1";
20992 else if (dest_fp_p)
20993 return "#";
20996 /* Stores. */
20997 else if (src_regno >= 0 && MEM_P (dest))
20999 if (src_gpr_p)
21001 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
21002 return "stq %1,%0";
21003 else
21004 return "#";
21007 else if (TARGET_ALTIVEC && src_vmx_p
21008 && altivec_indexed_or_indirect_operand (src, mode))
21009 return "stvx %1,%y0";
21011 else if (TARGET_VSX && src_vsx_p)
21013 if (mode_supports_vsx_dform_quad (mode)
21014 && quad_address_p (XEXP (dest, 0), mode, true))
21015 return "stxv %x1,%0";
21017 else if (TARGET_P9_VECTOR)
21018 return "stxvx %x1,%y0";
21020 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
21021 return "stxvw4x %x1,%y0";
21023 else
21024 return "stxvd2x %x1,%y0";
21027 else if (TARGET_ALTIVEC && src_vmx_p)
21028 return "stvx %1,%y0";
21030 else if (src_fp_p)
21031 return "#";
21034 /* Constants. */
21035 else if (dest_regno >= 0
21036 && (GET_CODE (src) == CONST_INT
21037 || GET_CODE (src) == CONST_WIDE_INT
21038 || GET_CODE (src) == CONST_DOUBLE
21039 || GET_CODE (src) == CONST_VECTOR))
21041 if (dest_gpr_p)
21042 return "#";
21044 else if ((dest_vmx_p && TARGET_ALTIVEC)
21045 || (dest_vsx_p && TARGET_VSX))
21046 return output_vec_const_move (operands);
21049 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
21052 /* Validate a 128-bit move. */
21053 bool
21054 rs6000_move_128bit_ok_p (rtx operands[])
21056 machine_mode mode = GET_MODE (operands[0]);
21057 return (gpc_reg_operand (operands[0], mode)
21058 || gpc_reg_operand (operands[1], mode));
21061 /* Return true if a 128-bit move needs to be split. */
21062 bool
21063 rs6000_split_128bit_ok_p (rtx operands[])
21065 if (!reload_completed)
21066 return false;
21068 if (!gpr_or_gpr_p (operands[0], operands[1]))
21069 return false;
21071 if (quad_load_store_p (operands[0], operands[1]))
21072 return false;
21074 return true;
21078 /* Given a comparison operation, return the bit number in CCR to test. We
21079 know this is a valid comparison.
21081 SCC_P is 1 if this is for an scc. That means that %D will have been
21082 used instead of %C, so the bits will be in different places.
21084 Return -1 if OP isn't a valid comparison for some reason. */
21087 ccr_bit (rtx op, int scc_p)
21089 enum rtx_code code = GET_CODE (op);
21090 machine_mode cc_mode;
21091 int cc_regnum;
21092 int base_bit;
21093 rtx reg;
21095 if (!COMPARISON_P (op))
21096 return -1;
21098 reg = XEXP (op, 0);
21100 gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg)));
21102 cc_mode = GET_MODE (reg);
21103 cc_regnum = REGNO (reg);
21104 base_bit = 4 * (cc_regnum - CR0_REGNO);
21106 validate_condition_mode (code, cc_mode);
21108 /* When generating a sCOND operation, only positive conditions are
21109 allowed. */
21110 gcc_assert (!scc_p
21111 || code == EQ || code == GT || code == LT || code == UNORDERED
21112 || code == GTU || code == LTU);
21114 switch (code)
21116 case NE:
21117 return scc_p ? base_bit + 3 : base_bit + 2;
21118 case EQ:
21119 return base_bit + 2;
21120 case GT: case GTU: case UNLE:
21121 return base_bit + 1;
21122 case LT: case LTU: case UNGE:
21123 return base_bit;
21124 case ORDERED: case UNORDERED:
21125 return base_bit + 3;
21127 case GE: case GEU:
21128 /* If scc, we will have done a cror to put the bit in the
21129 unordered position. So test that bit. For integer, this is ! LT
21130 unless this is an scc insn. */
21131 return scc_p ? base_bit + 3 : base_bit;
21133 case LE: case LEU:
21134 return scc_p ? base_bit + 3 : base_bit + 1;
21136 default:
21137 gcc_unreachable ();
21141 /* Return the GOT register. */
21144 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
21146 /* The second flow pass currently (June 1999) can't update
21147 regs_ever_live without disturbing other parts of the compiler, so
21148 update it here to make the prolog/epilogue code happy. */
21149 if (!can_create_pseudo_p ()
21150 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
21151 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
21153 crtl->uses_pic_offset_table = 1;
21155 return pic_offset_table_rtx;
21158 static rs6000_stack_t stack_info;
21160 /* Function to init struct machine_function.
21161 This will be called, via a pointer variable,
21162 from push_function_context. */
21164 static struct machine_function *
21165 rs6000_init_machine_status (void)
21167 stack_info.reload_completed = 0;
21168 return ggc_cleared_alloc<machine_function> ();
21171 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
21173 /* Write out a function code label. */
21175 void
21176 rs6000_output_function_entry (FILE *file, const char *fname)
21178 if (fname[0] != '.')
21180 switch (DEFAULT_ABI)
21182 default:
21183 gcc_unreachable ();
21185 case ABI_AIX:
21186 if (DOT_SYMBOLS)
21187 putc ('.', file);
21188 else
21189 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
21190 break;
21192 case ABI_ELFv2:
21193 case ABI_V4:
21194 case ABI_DARWIN:
21195 break;
21199 RS6000_OUTPUT_BASENAME (file, fname);
21202 /* Print an operand. Recognize special options, documented below. */
21204 #if TARGET_ELF
21205 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
21206 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
21207 #else
21208 #define SMALL_DATA_RELOC "sda21"
21209 #define SMALL_DATA_REG 0
21210 #endif
21212 void
21213 print_operand (FILE *file, rtx x, int code)
21215 int i;
21216 unsigned HOST_WIDE_INT uval;
21218 switch (code)
21220 /* %a is output_address. */
21222 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
21223 output_operand. */
21225 case 'D':
21226 /* Like 'J' but get to the GT bit only. */
21227 gcc_assert (REG_P (x));
21229 /* Bit 1 is GT bit. */
21230 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
21232 /* Add one for shift count in rlinm for scc. */
21233 fprintf (file, "%d", i + 1);
21234 return;
21236 case 'e':
21237 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
21238 if (! INT_P (x))
21240 output_operand_lossage ("invalid %%e value");
21241 return;
21244 uval = INTVAL (x);
21245 if ((uval & 0xffff) == 0 && uval != 0)
21246 putc ('s', file);
21247 return;
21249 case 'E':
21250 /* X is a CR register. Print the number of the EQ bit of the CR */
21251 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
21252 output_operand_lossage ("invalid %%E value");
21253 else
21254 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
21255 return;
21257 case 'f':
21258 /* X is a CR register. Print the shift count needed to move it
21259 to the high-order four bits. */
21260 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
21261 output_operand_lossage ("invalid %%f value");
21262 else
21263 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
21264 return;
21266 case 'F':
21267 /* Similar, but print the count for the rotate in the opposite
21268 direction. */
21269 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
21270 output_operand_lossage ("invalid %%F value");
21271 else
21272 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
21273 return;
21275 case 'G':
21276 /* X is a constant integer. If it is negative, print "m",
21277 otherwise print "z". This is to make an aze or ame insn. */
21278 if (GET_CODE (x) != CONST_INT)
21279 output_operand_lossage ("invalid %%G value");
21280 else if (INTVAL (x) >= 0)
21281 putc ('z', file);
21282 else
21283 putc ('m', file);
21284 return;
21286 case 'h':
21287 /* If constant, output low-order five bits. Otherwise, write
21288 normally. */
21289 if (INT_P (x))
21290 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
21291 else
21292 print_operand (file, x, 0);
21293 return;
21295 case 'H':
21296 /* If constant, output low-order six bits. Otherwise, write
21297 normally. */
21298 if (INT_P (x))
21299 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
21300 else
21301 print_operand (file, x, 0);
21302 return;
21304 case 'I':
21305 /* Print `i' if this is a constant, else nothing. */
21306 if (INT_P (x))
21307 putc ('i', file);
21308 return;
21310 case 'j':
21311 /* Write the bit number in CCR for jump. */
21312 i = ccr_bit (x, 0);
21313 if (i == -1)
21314 output_operand_lossage ("invalid %%j code");
21315 else
21316 fprintf (file, "%d", i);
21317 return;
21319 case 'J':
21320 /* Similar, but add one for shift count in rlinm for scc and pass
21321 scc flag to `ccr_bit'. */
21322 i = ccr_bit (x, 1);
21323 if (i == -1)
21324 output_operand_lossage ("invalid %%J code");
21325 else
21326 /* If we want bit 31, write a shift count of zero, not 32. */
21327 fprintf (file, "%d", i == 31 ? 0 : i + 1);
21328 return;
21330 case 'k':
21331 /* X must be a constant. Write the 1's complement of the
21332 constant. */
21333 if (! INT_P (x))
21334 output_operand_lossage ("invalid %%k value");
21335 else
21336 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
21337 return;
21339 case 'K':
21340 /* X must be a symbolic constant on ELF. Write an
21341 expression suitable for an 'addi' that adds in the low 16
21342 bits of the MEM. */
21343 if (GET_CODE (x) == CONST)
21345 if (GET_CODE (XEXP (x, 0)) != PLUS
21346 || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
21347 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
21348 || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
21349 output_operand_lossage ("invalid %%K value");
21351 print_operand_address (file, x);
21352 fputs ("@l", file);
21353 return;
21355 /* %l is output_asm_label. */
21357 case 'L':
21358 /* Write second word of DImode or DFmode reference. Works on register
21359 or non-indexed memory only. */
21360 if (REG_P (x))
21361 fputs (reg_names[REGNO (x) + 1], file);
21362 else if (MEM_P (x))
21364 machine_mode mode = GET_MODE (x);
21365 /* Handle possible auto-increment. Since it is pre-increment and
21366 we have already done it, we can just use an offset of word. */
21367 if (GET_CODE (XEXP (x, 0)) == PRE_INC
21368 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
21369 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
21370 UNITS_PER_WORD));
21371 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
21372 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
21373 UNITS_PER_WORD));
21374 else
21375 output_address (mode, XEXP (adjust_address_nv (x, SImode,
21376 UNITS_PER_WORD),
21377 0));
21379 if (small_data_operand (x, GET_MODE (x)))
21380 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
21381 reg_names[SMALL_DATA_REG]);
21383 return;
21385 case 'N': /* Unused */
21386 /* Write the number of elements in the vector times 4. */
21387 if (GET_CODE (x) != PARALLEL)
21388 output_operand_lossage ("invalid %%N value");
21389 else
21390 fprintf (file, "%d", XVECLEN (x, 0) * 4);
21391 return;
21393 case 'O': /* Unused */
21394 /* Similar, but subtract 1 first. */
21395 if (GET_CODE (x) != PARALLEL)
21396 output_operand_lossage ("invalid %%O value");
21397 else
21398 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
21399 return;
21401 case 'p':
21402 /* X is a CONST_INT that is a power of two. Output the logarithm. */
21403 if (! INT_P (x)
21404 || INTVAL (x) < 0
21405 || (i = exact_log2 (INTVAL (x))) < 0)
21406 output_operand_lossage ("invalid %%p value");
21407 else
21408 fprintf (file, "%d", i);
21409 return;
21411 case 'P':
21412 /* The operand must be an indirect memory reference. The result
21413 is the register name. */
21414 if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG
21415 || REGNO (XEXP (x, 0)) >= 32)
21416 output_operand_lossage ("invalid %%P value");
21417 else
21418 fputs (reg_names[REGNO (XEXP (x, 0))], file);
21419 return;
21421 case 'q':
21422 /* This outputs the logical code corresponding to a boolean
21423 expression. The expression may have one or both operands
21424 negated (if one, only the first one). For condition register
21425 logical operations, it will also treat the negated
21426 CR codes as NOTs, but not handle NOTs of them. */
21428 const char *const *t = 0;
21429 const char *s;
21430 enum rtx_code code = GET_CODE (x);
21431 static const char * const tbl[3][3] = {
21432 { "and", "andc", "nor" },
21433 { "or", "orc", "nand" },
21434 { "xor", "eqv", "xor" } };
21436 if (code == AND)
21437 t = tbl[0];
21438 else if (code == IOR)
21439 t = tbl[1];
21440 else if (code == XOR)
21441 t = tbl[2];
21442 else
21443 output_operand_lossage ("invalid %%q value");
21445 if (GET_CODE (XEXP (x, 0)) != NOT)
21446 s = t[0];
21447 else
21449 if (GET_CODE (XEXP (x, 1)) == NOT)
21450 s = t[2];
21451 else
21452 s = t[1];
21455 fputs (s, file);
21457 return;
21459 case 'Q':
21460 if (! TARGET_MFCRF)
21461 return;
21462 fputc (',', file);
21463 /* FALLTHRU */
21465 case 'R':
21466 /* X is a CR register. Print the mask for `mtcrf'. */
21467 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
21468 output_operand_lossage ("invalid %%R value");
21469 else
21470 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
21471 return;
21473 case 's':
21474 /* Low 5 bits of 32 - value */
21475 if (! INT_P (x))
21476 output_operand_lossage ("invalid %%s value");
21477 else
21478 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
21479 return;
21481 case 't':
21482 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
21483 gcc_assert (REG_P (x) && GET_MODE (x) == CCmode);
21485 /* Bit 3 is OV bit. */
21486 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
21488 /* If we want bit 31, write a shift count of zero, not 32. */
21489 fprintf (file, "%d", i == 31 ? 0 : i + 1);
21490 return;
21492 case 'T':
21493 /* Print the symbolic name of a branch target register. */
21494 if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO
21495 && REGNO (x) != CTR_REGNO))
21496 output_operand_lossage ("invalid %%T value");
21497 else if (REGNO (x) == LR_REGNO)
21498 fputs ("lr", file);
21499 else
21500 fputs ("ctr", file);
21501 return;
21503 case 'u':
21504 /* High-order or low-order 16 bits of constant, whichever is non-zero,
21505 for use in unsigned operand. */
21506 if (! INT_P (x))
21508 output_operand_lossage ("invalid %%u value");
21509 return;
21512 uval = INTVAL (x);
21513 if ((uval & 0xffff) == 0)
21514 uval >>= 16;
21516 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
21517 return;
21519 case 'v':
21520 /* High-order 16 bits of constant for use in signed operand. */
21521 if (! INT_P (x))
21522 output_operand_lossage ("invalid %%v value");
21523 else
21524 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
21525 (INTVAL (x) >> 16) & 0xffff);
21526 return;
21528 case 'U':
21529 /* Print `u' if this has an auto-increment or auto-decrement. */
21530 if (MEM_P (x)
21531 && (GET_CODE (XEXP (x, 0)) == PRE_INC
21532 || GET_CODE (XEXP (x, 0)) == PRE_DEC
21533 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
21534 putc ('u', file);
21535 return;
21537 case 'V':
21538 /* Print the trap code for this operand. */
21539 switch (GET_CODE (x))
21541 case EQ:
21542 fputs ("eq", file); /* 4 */
21543 break;
21544 case NE:
21545 fputs ("ne", file); /* 24 */
21546 break;
21547 case LT:
21548 fputs ("lt", file); /* 16 */
21549 break;
21550 case LE:
21551 fputs ("le", file); /* 20 */
21552 break;
21553 case GT:
21554 fputs ("gt", file); /* 8 */
21555 break;
21556 case GE:
21557 fputs ("ge", file); /* 12 */
21558 break;
21559 case LTU:
21560 fputs ("llt", file); /* 2 */
21561 break;
21562 case LEU:
21563 fputs ("lle", file); /* 6 */
21564 break;
21565 case GTU:
21566 fputs ("lgt", file); /* 1 */
21567 break;
21568 case GEU:
21569 fputs ("lge", file); /* 5 */
21570 break;
21571 default:
21572 gcc_unreachable ();
21574 break;
21576 case 'w':
21577 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
21578 normally. */
21579 if (INT_P (x))
21580 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
21581 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
21582 else
21583 print_operand (file, x, 0);
21584 return;
21586 case 'x':
21587 /* X is a FPR or Altivec register used in a VSX context. */
21588 if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
21589 output_operand_lossage ("invalid %%x value");
21590 else
21592 int reg = REGNO (x);
21593 int vsx_reg = (FP_REGNO_P (reg)
21594 ? reg - 32
21595 : reg - FIRST_ALTIVEC_REGNO + 32);
21597 #ifdef TARGET_REGNAMES
21598 if (TARGET_REGNAMES)
21599 fprintf (file, "%%vs%d", vsx_reg);
21600 else
21601 #endif
21602 fprintf (file, "%d", vsx_reg);
21604 return;
21606 case 'X':
21607 if (MEM_P (x)
21608 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
21609 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
21610 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
21611 putc ('x', file);
21612 return;
21614 case 'Y':
21615 /* Like 'L', for third word of TImode/PTImode */
21616 if (REG_P (x))
21617 fputs (reg_names[REGNO (x) + 2], file);
21618 else if (MEM_P (x))
21620 machine_mode mode = GET_MODE (x);
21621 if (GET_CODE (XEXP (x, 0)) == PRE_INC
21622 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
21623 output_address (mode, plus_constant (Pmode,
21624 XEXP (XEXP (x, 0), 0), 8));
21625 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
21626 output_address (mode, plus_constant (Pmode,
21627 XEXP (XEXP (x, 0), 0), 8));
21628 else
21629 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
21630 if (small_data_operand (x, GET_MODE (x)))
21631 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
21632 reg_names[SMALL_DATA_REG]);
21634 return;
21636 case 'z':
21637 /* X is a SYMBOL_REF. Write out the name preceded by a
21638 period and without any trailing data in brackets. Used for function
21639 names. If we are configured for System V (or the embedded ABI) on
21640 the PowerPC, do not emit the period, since those systems do not use
21641 TOCs and the like. */
21642 gcc_assert (GET_CODE (x) == SYMBOL_REF);
21644 /* For macho, check to see if we need a stub. */
21645 if (TARGET_MACHO)
21647 const char *name = XSTR (x, 0);
21648 #if TARGET_MACHO
21649 if (darwin_emit_branch_islands
21650 && MACHOPIC_INDIRECT
21651 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
21652 name = machopic_indirection_name (x, /*stub_p=*/true);
21653 #endif
21654 assemble_name (file, name);
21656 else if (!DOT_SYMBOLS)
21657 assemble_name (file, XSTR (x, 0));
21658 else
21659 rs6000_output_function_entry (file, XSTR (x, 0));
21660 return;
21662 case 'Z':
21663 /* Like 'L', for last word of TImode/PTImode. */
21664 if (REG_P (x))
21665 fputs (reg_names[REGNO (x) + 3], file);
21666 else if (MEM_P (x))
21668 machine_mode mode = GET_MODE (x);
21669 if (GET_CODE (XEXP (x, 0)) == PRE_INC
21670 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
21671 output_address (mode, plus_constant (Pmode,
21672 XEXP (XEXP (x, 0), 0), 12));
21673 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
21674 output_address (mode, plus_constant (Pmode,
21675 XEXP (XEXP (x, 0), 0), 12));
21676 else
21677 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
21678 if (small_data_operand (x, GET_MODE (x)))
21679 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
21680 reg_names[SMALL_DATA_REG]);
21682 return;
21684 /* Print AltiVec memory operand. */
21685 case 'y':
21687 rtx tmp;
21689 gcc_assert (MEM_P (x));
21691 tmp = XEXP (x, 0);
21693 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x))
21694 && GET_CODE (tmp) == AND
21695 && GET_CODE (XEXP (tmp, 1)) == CONST_INT
21696 && INTVAL (XEXP (tmp, 1)) == -16)
21697 tmp = XEXP (tmp, 0);
21698 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
21699 && GET_CODE (tmp) == PRE_MODIFY)
21700 tmp = XEXP (tmp, 1);
21701 if (REG_P (tmp))
21702 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
21703 else
21705 if (GET_CODE (tmp) != PLUS
21706 || !REG_P (XEXP (tmp, 0))
21707 || !REG_P (XEXP (tmp, 1)))
21709 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
21710 break;
21713 if (REGNO (XEXP (tmp, 0)) == 0)
21714 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
21715 reg_names[ REGNO (XEXP (tmp, 0)) ]);
21716 else
21717 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
21718 reg_names[ REGNO (XEXP (tmp, 1)) ]);
21720 break;
21723 case 0:
21724 if (REG_P (x))
21725 fprintf (file, "%s", reg_names[REGNO (x)]);
21726 else if (MEM_P (x))
21728 /* We need to handle PRE_INC and PRE_DEC here, since we need to
21729 know the width from the mode. */
21730 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
21731 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
21732 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
21733 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
21734 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
21735 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
21736 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
21737 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
21738 else
21739 output_address (GET_MODE (x), XEXP (x, 0));
21741 else
21743 if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
21744 /* This hack along with a corresponding hack in
21745 rs6000_output_addr_const_extra arranges to output addends
21746 where the assembler expects to find them. eg.
21747 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
21748 without this hack would be output as "x@toc+4". We
21749 want "x+4@toc". */
21750 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
21751 else
21752 output_addr_const (file, x);
21754 return;
21756 case '&':
21757 if (const char *name = get_some_local_dynamic_name ())
21758 assemble_name (file, name);
21759 else
21760 output_operand_lossage ("'%%&' used without any "
21761 "local dynamic TLS references");
21762 return;
21764 default:
21765 output_operand_lossage ("invalid %%xn code");
21769 /* Print the address of an operand. */
21771 void
21772 print_operand_address (FILE *file, rtx x)
21774 if (REG_P (x))
21775 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
21776 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST
21777 || GET_CODE (x) == LABEL_REF)
21779 output_addr_const (file, x);
21780 if (small_data_operand (x, GET_MODE (x)))
21781 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
21782 reg_names[SMALL_DATA_REG]);
21783 else
21784 gcc_assert (!TARGET_TOC);
21786 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
21787 && REG_P (XEXP (x, 1)))
21789 if (REGNO (XEXP (x, 0)) == 0)
21790 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
21791 reg_names[ REGNO (XEXP (x, 0)) ]);
21792 else
21793 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
21794 reg_names[ REGNO (XEXP (x, 1)) ]);
21796 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
21797 && GET_CODE (XEXP (x, 1)) == CONST_INT)
21798 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
21799 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
21800 #if TARGET_MACHO
21801 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
21802 && CONSTANT_P (XEXP (x, 1)))
21804 fprintf (file, "lo16(");
21805 output_addr_const (file, XEXP (x, 1));
21806 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
21808 #endif
21809 #if TARGET_ELF
21810 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
21811 && CONSTANT_P (XEXP (x, 1)))
21813 output_addr_const (file, XEXP (x, 1));
21814 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
21816 #endif
21817 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
21819 /* This hack along with a corresponding hack in
21820 rs6000_output_addr_const_extra arranges to output addends
21821 where the assembler expects to find them. eg.
21822 (lo_sum (reg 9)
21823 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
21824 without this hack would be output as "x@toc+8@l(9)". We
21825 want "x+8@toc@l(9)". */
21826 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
21827 if (GET_CODE (x) == LO_SUM)
21828 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
21829 else
21830 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
21832 else
21833 gcc_unreachable ();
21836 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
21838 static bool
21839 rs6000_output_addr_const_extra (FILE *file, rtx x)
21841 if (GET_CODE (x) == UNSPEC)
21842 switch (XINT (x, 1))
21844 case UNSPEC_TOCREL:
21845 gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF
21846 && REG_P (XVECEXP (x, 0, 1))
21847 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
21848 output_addr_const (file, XVECEXP (x, 0, 0));
21849 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
21851 if (INTVAL (tocrel_offset_oac) >= 0)
21852 fprintf (file, "+");
21853 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
21855 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
21857 putc ('-', file);
21858 assemble_name (file, toc_label_name);
21859 need_toc_init = 1;
21861 else if (TARGET_ELF)
21862 fputs ("@toc", file);
21863 return true;
21865 #if TARGET_MACHO
21866 case UNSPEC_MACHOPIC_OFFSET:
21867 output_addr_const (file, XVECEXP (x, 0, 0));
21868 putc ('-', file);
21869 machopic_output_function_base_name (file);
21870 return true;
21871 #endif
21873 return false;
21876 /* Target hook for assembling integer objects. The PowerPC version has
21877 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
21878 is defined. It also needs to handle DI-mode objects on 64-bit
21879 targets. */
21881 static bool
21882 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
21884 #ifdef RELOCATABLE_NEEDS_FIXUP
21885 /* Special handling for SI values. */
21886 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
21888 static int recurse = 0;
21890 /* For -mrelocatable, we mark all addresses that need to be fixed up in
21891 the .fixup section. Since the TOC section is already relocated, we
21892 don't need to mark it here. We used to skip the text section, but it
21893 should never be valid for relocated addresses to be placed in the text
21894 section. */
21895 if (DEFAULT_ABI == ABI_V4
21896 && (TARGET_RELOCATABLE || flag_pic > 1)
21897 && in_section != toc_section
21898 && !recurse
21899 && !CONST_SCALAR_INT_P (x)
21900 && CONSTANT_P (x))
21902 char buf[256];
21904 recurse = 1;
21905 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
21906 fixuplabelno++;
21907 ASM_OUTPUT_LABEL (asm_out_file, buf);
21908 fprintf (asm_out_file, "\t.long\t(");
21909 output_addr_const (asm_out_file, x);
21910 fprintf (asm_out_file, ")@fixup\n");
21911 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
21912 ASM_OUTPUT_ALIGN (asm_out_file, 2);
21913 fprintf (asm_out_file, "\t.long\t");
21914 assemble_name (asm_out_file, buf);
21915 fprintf (asm_out_file, "\n\t.previous\n");
21916 recurse = 0;
21917 return true;
21919 /* Remove initial .'s to turn a -mcall-aixdesc function
21920 address into the address of the descriptor, not the function
21921 itself. */
21922 else if (GET_CODE (x) == SYMBOL_REF
21923 && XSTR (x, 0)[0] == '.'
21924 && DEFAULT_ABI == ABI_AIX)
21926 const char *name = XSTR (x, 0);
21927 while (*name == '.')
21928 name++;
21930 fprintf (asm_out_file, "\t.long\t%s\n", name);
21931 return true;
21934 #endif /* RELOCATABLE_NEEDS_FIXUP */
21935 return default_assemble_integer (x, size, aligned_p);
21938 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
21939 /* Emit an assembler directive to set symbol visibility for DECL to
21940 VISIBILITY_TYPE. */
21942 static void
21943 rs6000_assemble_visibility (tree decl, int vis)
21945 if (TARGET_XCOFF)
21946 return;
21948 /* Functions need to have their entry point symbol visibility set as
21949 well as their descriptor symbol visibility. */
21950 if (DEFAULT_ABI == ABI_AIX
21951 && DOT_SYMBOLS
21952 && TREE_CODE (decl) == FUNCTION_DECL)
21954 static const char * const visibility_types[] = {
21955 NULL, "protected", "hidden", "internal"
21958 const char *name, *type;
21960 name = ((* targetm.strip_name_encoding)
21961 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
21962 type = visibility_types[vis];
21964 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
21965 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
21967 else
21968 default_assemble_visibility (decl, vis);
21970 #endif
21972 enum rtx_code
21973 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
21975 /* Reversal of FP compares takes care -- an ordered compare
21976 becomes an unordered compare and vice versa. */
21977 if (mode == CCFPmode
21978 && (!flag_finite_math_only
21979 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
21980 || code == UNEQ || code == LTGT))
21981 return reverse_condition_maybe_unordered (code);
21982 else
21983 return reverse_condition (code);
21986 /* Generate a compare for CODE. Return a brand-new rtx that
21987 represents the result of the compare. */
21989 static rtx
21990 rs6000_generate_compare (rtx cmp, machine_mode mode)
21992 machine_mode comp_mode;
21993 rtx compare_result;
21994 enum rtx_code code = GET_CODE (cmp);
21995 rtx op0 = XEXP (cmp, 0);
21996 rtx op1 = XEXP (cmp, 1);
21998 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
21999 comp_mode = CCmode;
22000 else if (FLOAT_MODE_P (mode))
22001 comp_mode = CCFPmode;
22002 else if (code == GTU || code == LTU
22003 || code == GEU || code == LEU)
22004 comp_mode = CCUNSmode;
22005 else if ((code == EQ || code == NE)
22006 && unsigned_reg_p (op0)
22007 && (unsigned_reg_p (op1)
22008 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
22009 /* These are unsigned values, perhaps there will be a later
22010 ordering compare that can be shared with this one. */
22011 comp_mode = CCUNSmode;
22012 else
22013 comp_mode = CCmode;
22015 /* If we have an unsigned compare, make sure we don't have a signed value as
22016 an immediate. */
22017 if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT
22018 && INTVAL (op1) < 0)
22020 op0 = copy_rtx_if_shared (op0);
22021 op1 = force_reg (GET_MODE (op0), op1);
22022 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
22025 /* First, the compare. */
22026 compare_result = gen_reg_rtx (comp_mode);
22028 /* IEEE 128-bit support in VSX registers when we do not have hardware
22029 support. */
22030 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
22032 rtx libfunc = NULL_RTX;
22033 bool check_nan = false;
22034 rtx dest;
22036 switch (code)
22038 case EQ:
22039 case NE:
22040 libfunc = optab_libfunc (eq_optab, mode);
22041 break;
22043 case GT:
22044 case GE:
22045 libfunc = optab_libfunc (ge_optab, mode);
22046 break;
22048 case LT:
22049 case LE:
22050 libfunc = optab_libfunc (le_optab, mode);
22051 break;
22053 case UNORDERED:
22054 case ORDERED:
22055 libfunc = optab_libfunc (unord_optab, mode);
22056 code = (code == UNORDERED) ? NE : EQ;
22057 break;
22059 case UNGE:
22060 case UNGT:
22061 check_nan = true;
22062 libfunc = optab_libfunc (ge_optab, mode);
22063 code = (code == UNGE) ? GE : GT;
22064 break;
22066 case UNLE:
22067 case UNLT:
22068 check_nan = true;
22069 libfunc = optab_libfunc (le_optab, mode);
22070 code = (code == UNLE) ? LE : LT;
22071 break;
22073 case UNEQ:
22074 case LTGT:
22075 check_nan = true;
22076 libfunc = optab_libfunc (eq_optab, mode);
22077 code = (code = UNEQ) ? EQ : NE;
22078 break;
22080 default:
22081 gcc_unreachable ();
22084 gcc_assert (libfunc);
22086 if (!check_nan)
22087 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
22088 SImode, op0, mode, op1, mode);
22090 /* The library signals an exception for signalling NaNs, so we need to
22091 handle isgreater, etc. by first checking isordered. */
22092 else
22094 rtx ne_rtx, normal_dest, unord_dest;
22095 rtx unord_func = optab_libfunc (unord_optab, mode);
22096 rtx join_label = gen_label_rtx ();
22097 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
22098 rtx unord_cmp = gen_reg_rtx (comp_mode);
22101 /* Test for either value being a NaN. */
22102 gcc_assert (unord_func);
22103 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
22104 SImode, op0, mode, op1, mode);
22106 /* Set value (0) if either value is a NaN, and jump to the join
22107 label. */
22108 dest = gen_reg_rtx (SImode);
22109 emit_move_insn (dest, const1_rtx);
22110 emit_insn (gen_rtx_SET (unord_cmp,
22111 gen_rtx_COMPARE (comp_mode, unord_dest,
22112 const0_rtx)));
22114 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
22115 emit_jump_insn (gen_rtx_SET (pc_rtx,
22116 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
22117 join_ref,
22118 pc_rtx)));
22120 /* Do the normal comparison, knowing that the values are not
22121 NaNs. */
22122 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
22123 SImode, op0, mode, op1, mode);
22125 emit_insn (gen_cstoresi4 (dest,
22126 gen_rtx_fmt_ee (code, SImode, normal_dest,
22127 const0_rtx),
22128 normal_dest, const0_rtx));
22130 /* Join NaN and non-Nan paths. Compare dest against 0. */
22131 emit_label (join_label);
22132 code = NE;
22135 emit_insn (gen_rtx_SET (compare_result,
22136 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
22139 else
22141 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
22142 CLOBBERs to match cmptf_internal2 pattern. */
22143 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
22144 && FLOAT128_IBM_P (GET_MODE (op0))
22145 && TARGET_HARD_FLOAT)
22146 emit_insn (gen_rtx_PARALLEL (VOIDmode,
22147 gen_rtvec (10,
22148 gen_rtx_SET (compare_result,
22149 gen_rtx_COMPARE (comp_mode, op0, op1)),
22150 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22151 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22152 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22153 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22154 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22155 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22156 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22157 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22158 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
22159 else if (GET_CODE (op1) == UNSPEC
22160 && XINT (op1, 1) == UNSPEC_SP_TEST)
22162 rtx op1b = XVECEXP (op1, 0, 0);
22163 comp_mode = CCEQmode;
22164 compare_result = gen_reg_rtx (CCEQmode);
22165 if (TARGET_64BIT)
22166 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
22167 else
22168 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
22170 else
22171 emit_insn (gen_rtx_SET (compare_result,
22172 gen_rtx_COMPARE (comp_mode, op0, op1)));
22175 /* Some kinds of FP comparisons need an OR operation;
22176 under flag_finite_math_only we don't bother. */
22177 if (FLOAT_MODE_P (mode)
22178 && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)
22179 && !flag_finite_math_only
22180 && (code == LE || code == GE
22181 || code == UNEQ || code == LTGT
22182 || code == UNGT || code == UNLT))
22184 enum rtx_code or1, or2;
22185 rtx or1_rtx, or2_rtx, compare2_rtx;
22186 rtx or_result = gen_reg_rtx (CCEQmode);
22188 switch (code)
22190 case LE: or1 = LT; or2 = EQ; break;
22191 case GE: or1 = GT; or2 = EQ; break;
22192 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
22193 case LTGT: or1 = LT; or2 = GT; break;
22194 case UNGT: or1 = UNORDERED; or2 = GT; break;
22195 case UNLT: or1 = UNORDERED; or2 = LT; break;
22196 default: gcc_unreachable ();
22198 validate_condition_mode (or1, comp_mode);
22199 validate_condition_mode (or2, comp_mode);
22200 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
22201 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
22202 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
22203 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
22204 const_true_rtx);
22205 emit_insn (gen_rtx_SET (or_result, compare2_rtx));
22207 compare_result = or_result;
22208 code = EQ;
22211 validate_condition_mode (code, GET_MODE (compare_result));
22213 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
22217 /* Return the diagnostic message string if the binary operation OP is
22218 not permitted on TYPE1 and TYPE2, NULL otherwise. */
22220 static const char*
22221 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
22222 const_tree type1,
22223 const_tree type2)
22225 machine_mode mode1 = TYPE_MODE (type1);
22226 machine_mode mode2 = TYPE_MODE (type2);
22228 /* For complex modes, use the inner type. */
22229 if (COMPLEX_MODE_P (mode1))
22230 mode1 = GET_MODE_INNER (mode1);
22232 if (COMPLEX_MODE_P (mode2))
22233 mode2 = GET_MODE_INNER (mode2);
22235 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
22236 double to intermix unless -mfloat128-convert. */
22237 if (mode1 == mode2)
22238 return NULL;
22240 if (!TARGET_FLOAT128_CVT)
22242 if ((mode1 == KFmode && mode2 == IFmode)
22243 || (mode1 == IFmode && mode2 == KFmode))
22244 return N_("__float128 and __ibm128 cannot be used in the same "
22245 "expression");
22247 if (TARGET_IEEEQUAD
22248 && ((mode1 == IFmode && mode2 == TFmode)
22249 || (mode1 == TFmode && mode2 == IFmode)))
22250 return N_("__ibm128 and long double cannot be used in the same "
22251 "expression");
22253 if (!TARGET_IEEEQUAD
22254 && ((mode1 == KFmode && mode2 == TFmode)
22255 || (mode1 == TFmode && mode2 == KFmode)))
22256 return N_("__float128 and long double cannot be used in the same "
22257 "expression");
22260 return NULL;
22264 /* Expand floating point conversion to/from __float128 and __ibm128. */
22266 void
22267 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
22269 machine_mode dest_mode = GET_MODE (dest);
22270 machine_mode src_mode = GET_MODE (src);
22271 convert_optab cvt = unknown_optab;
22272 bool do_move = false;
22273 rtx libfunc = NULL_RTX;
22274 rtx dest2;
22275 typedef rtx (*rtx_2func_t) (rtx, rtx);
22276 rtx_2func_t hw_convert = (rtx_2func_t)0;
22277 size_t kf_or_tf;
22279 struct hw_conv_t {
22280 rtx_2func_t from_df;
22281 rtx_2func_t from_sf;
22282 rtx_2func_t from_si_sign;
22283 rtx_2func_t from_si_uns;
22284 rtx_2func_t from_di_sign;
22285 rtx_2func_t from_di_uns;
22286 rtx_2func_t to_df;
22287 rtx_2func_t to_sf;
22288 rtx_2func_t to_si_sign;
22289 rtx_2func_t to_si_uns;
22290 rtx_2func_t to_di_sign;
22291 rtx_2func_t to_di_uns;
22292 } hw_conversions[2] = {
22293 /* convertions to/from KFmode */
22295 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
22296 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
22297 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
22298 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
22299 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
22300 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
22301 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
22302 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
22303 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
22304 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
22305 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
22306 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
22309 /* convertions to/from TFmode */
22311 gen_extenddftf2_hw, /* TFmode <- DFmode. */
22312 gen_extendsftf2_hw, /* TFmode <- SFmode. */
22313 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
22314 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
22315 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
22316 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
22317 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
22318 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
22319 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
22320 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
22321 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
22322 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
22326 if (dest_mode == src_mode)
22327 gcc_unreachable ();
22329 /* Eliminate memory operations. */
22330 if (MEM_P (src))
22331 src = force_reg (src_mode, src);
22333 if (MEM_P (dest))
22335 rtx tmp = gen_reg_rtx (dest_mode);
22336 rs6000_expand_float128_convert (tmp, src, unsigned_p);
22337 rs6000_emit_move (dest, tmp, dest_mode);
22338 return;
22341 /* Convert to IEEE 128-bit floating point. */
22342 if (FLOAT128_IEEE_P (dest_mode))
22344 if (dest_mode == KFmode)
22345 kf_or_tf = 0;
22346 else if (dest_mode == TFmode)
22347 kf_or_tf = 1;
22348 else
22349 gcc_unreachable ();
22351 switch (src_mode)
22353 case E_DFmode:
22354 cvt = sext_optab;
22355 hw_convert = hw_conversions[kf_or_tf].from_df;
22356 break;
22358 case E_SFmode:
22359 cvt = sext_optab;
22360 hw_convert = hw_conversions[kf_or_tf].from_sf;
22361 break;
22363 case E_KFmode:
22364 case E_IFmode:
22365 case E_TFmode:
22366 if (FLOAT128_IBM_P (src_mode))
22367 cvt = sext_optab;
22368 else
22369 do_move = true;
22370 break;
22372 case E_SImode:
22373 if (unsigned_p)
22375 cvt = ufloat_optab;
22376 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
22378 else
22380 cvt = sfloat_optab;
22381 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
22383 break;
22385 case E_DImode:
22386 if (unsigned_p)
22388 cvt = ufloat_optab;
22389 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
22391 else
22393 cvt = sfloat_optab;
22394 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
22396 break;
22398 default:
22399 gcc_unreachable ();
22403 /* Convert from IEEE 128-bit floating point. */
22404 else if (FLOAT128_IEEE_P (src_mode))
22406 if (src_mode == KFmode)
22407 kf_or_tf = 0;
22408 else if (src_mode == TFmode)
22409 kf_or_tf = 1;
22410 else
22411 gcc_unreachable ();
22413 switch (dest_mode)
22415 case E_DFmode:
22416 cvt = trunc_optab;
22417 hw_convert = hw_conversions[kf_or_tf].to_df;
22418 break;
22420 case E_SFmode:
22421 cvt = trunc_optab;
22422 hw_convert = hw_conversions[kf_or_tf].to_sf;
22423 break;
22425 case E_KFmode:
22426 case E_IFmode:
22427 case E_TFmode:
22428 if (FLOAT128_IBM_P (dest_mode))
22429 cvt = trunc_optab;
22430 else
22431 do_move = true;
22432 break;
22434 case E_SImode:
22435 if (unsigned_p)
22437 cvt = ufix_optab;
22438 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
22440 else
22442 cvt = sfix_optab;
22443 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
22445 break;
22447 case E_DImode:
22448 if (unsigned_p)
22450 cvt = ufix_optab;
22451 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
22453 else
22455 cvt = sfix_optab;
22456 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
22458 break;
22460 default:
22461 gcc_unreachable ();
22465 /* Both IBM format. */
22466 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
22467 do_move = true;
22469 else
22470 gcc_unreachable ();
22472 /* Handle conversion between TFmode/KFmode. */
22473 if (do_move)
22474 emit_move_insn (dest, gen_lowpart (dest_mode, src));
22476 /* Handle conversion if we have hardware support. */
22477 else if (TARGET_FLOAT128_HW && hw_convert)
22478 emit_insn ((hw_convert) (dest, src));
22480 /* Call an external function to do the conversion. */
22481 else if (cvt != unknown_optab)
22483 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
22484 gcc_assert (libfunc != NULL_RTX);
22486 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
22487 src, src_mode);
22489 gcc_assert (dest2 != NULL_RTX);
22490 if (!rtx_equal_p (dest, dest2))
22491 emit_move_insn (dest, dest2);
22494 else
22495 gcc_unreachable ();
22497 return;
22501 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
22502 can be used as that dest register. Return the dest register. */
22505 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
22507 if (op2 == const0_rtx)
22508 return op1;
22510 if (GET_CODE (scratch) == SCRATCH)
22511 scratch = gen_reg_rtx (mode);
22513 if (logical_operand (op2, mode))
22514 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
22515 else
22516 emit_insn (gen_rtx_SET (scratch,
22517 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
22519 return scratch;
22522 void
22523 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
22525 rtx condition_rtx;
22526 machine_mode op_mode;
22527 enum rtx_code cond_code;
22528 rtx result = operands[0];
22530 condition_rtx = rs6000_generate_compare (operands[1], mode);
22531 cond_code = GET_CODE (condition_rtx);
22533 if (cond_code == NE
22534 || cond_code == GE || cond_code == LE
22535 || cond_code == GEU || cond_code == LEU
22536 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
22538 rtx not_result = gen_reg_rtx (CCEQmode);
22539 rtx not_op, rev_cond_rtx;
22540 machine_mode cc_mode;
22542 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
22544 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
22545 SImode, XEXP (condition_rtx, 0), const0_rtx);
22546 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
22547 emit_insn (gen_rtx_SET (not_result, not_op));
22548 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
22551 op_mode = GET_MODE (XEXP (operands[1], 0));
22552 if (op_mode == VOIDmode)
22553 op_mode = GET_MODE (XEXP (operands[1], 1));
22555 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
22557 PUT_MODE (condition_rtx, DImode);
22558 convert_move (result, condition_rtx, 0);
22560 else
22562 PUT_MODE (condition_rtx, SImode);
22563 emit_insn (gen_rtx_SET (result, condition_rtx));
22567 /* Emit a branch of kind CODE to location LOC. */
22569 void
22570 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
22572 rtx condition_rtx, loc_ref;
22574 condition_rtx = rs6000_generate_compare (operands[0], mode);
22575 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
22576 emit_jump_insn (gen_rtx_SET (pc_rtx,
22577 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
22578 loc_ref, pc_rtx)));
22581 /* Return the string to output a conditional branch to LABEL, which is
22582 the operand template of the label, or NULL if the branch is really a
22583 conditional return.
22585 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
22586 condition code register and its mode specifies what kind of
22587 comparison we made.
22589 REVERSED is nonzero if we should reverse the sense of the comparison.
22591 INSN is the insn. */
22593 char *
22594 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
22596 static char string[64];
22597 enum rtx_code code = GET_CODE (op);
22598 rtx cc_reg = XEXP (op, 0);
22599 machine_mode mode = GET_MODE (cc_reg);
22600 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
22601 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
22602 int really_reversed = reversed ^ need_longbranch;
22603 char *s = string;
22604 const char *ccode;
22605 const char *pred;
22606 rtx note;
22608 validate_condition_mode (code, mode);
22610 /* Work out which way this really branches. We could use
22611 reverse_condition_maybe_unordered here always but this
22612 makes the resulting assembler clearer. */
22613 if (really_reversed)
22615 /* Reversal of FP compares takes care -- an ordered compare
22616 becomes an unordered compare and vice versa. */
22617 if (mode == CCFPmode)
22618 code = reverse_condition_maybe_unordered (code);
22619 else
22620 code = reverse_condition (code);
22623 switch (code)
22625 /* Not all of these are actually distinct opcodes, but
22626 we distinguish them for clarity of the resulting assembler. */
22627 case NE: case LTGT:
22628 ccode = "ne"; break;
22629 case EQ: case UNEQ:
22630 ccode = "eq"; break;
22631 case GE: case GEU:
22632 ccode = "ge"; break;
22633 case GT: case GTU: case UNGT:
22634 ccode = "gt"; break;
22635 case LE: case LEU:
22636 ccode = "le"; break;
22637 case LT: case LTU: case UNLT:
22638 ccode = "lt"; break;
22639 case UNORDERED: ccode = "un"; break;
22640 case ORDERED: ccode = "nu"; break;
22641 case UNGE: ccode = "nl"; break;
22642 case UNLE: ccode = "ng"; break;
22643 default:
22644 gcc_unreachable ();
22647 /* Maybe we have a guess as to how likely the branch is. */
22648 pred = "";
22649 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
22650 if (note != NULL_RTX)
22652 /* PROB is the difference from 50%. */
22653 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
22654 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
22656 /* Only hint for highly probable/improbable branches on newer cpus when
22657 we have real profile data, as static prediction overrides processor
22658 dynamic prediction. For older cpus we may as well always hint, but
22659 assume not taken for branches that are very close to 50% as a
22660 mispredicted taken branch is more expensive than a
22661 mispredicted not-taken branch. */
22662 if (rs6000_always_hint
22663 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
22664 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
22665 && br_prob_note_reliable_p (note)))
22667 if (abs (prob) > REG_BR_PROB_BASE / 20
22668 && ((prob > 0) ^ need_longbranch))
22669 pred = "+";
22670 else
22671 pred = "-";
22675 if (label == NULL)
22676 s += sprintf (s, "b%slr%s ", ccode, pred);
22677 else
22678 s += sprintf (s, "b%s%s ", ccode, pred);
22680 /* We need to escape any '%' characters in the reg_names string.
22681 Assume they'd only be the first character.... */
22682 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
22683 *s++ = '%';
22684 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
22686 if (label != NULL)
22688 /* If the branch distance was too far, we may have to use an
22689 unconditional branch to go the distance. */
22690 if (need_longbranch)
22691 s += sprintf (s, ",$+8\n\tb %s", label);
22692 else
22693 s += sprintf (s, ",%s", label);
22696 return string;
22699 /* Return insn for VSX or Altivec comparisons. */
22701 static rtx
22702 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
22704 rtx mask;
22705 machine_mode mode = GET_MODE (op0);
22707 switch (code)
22709 default:
22710 break;
22712 case GE:
22713 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22714 return NULL_RTX;
22715 /* FALLTHRU */
22717 case EQ:
22718 case GT:
22719 case GTU:
22720 case ORDERED:
22721 case UNORDERED:
22722 case UNEQ:
22723 case LTGT:
22724 mask = gen_reg_rtx (mode);
22725 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
22726 return mask;
22729 return NULL_RTX;
22732 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
22733 DMODE is expected destination mode. This is a recursive function. */
22735 static rtx
22736 rs6000_emit_vector_compare (enum rtx_code rcode,
22737 rtx op0, rtx op1,
22738 machine_mode dmode)
22740 rtx mask;
22741 bool swap_operands = false;
22742 bool try_again = false;
22744 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
22745 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
22747 /* See if the comparison works as is. */
22748 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
22749 if (mask)
22750 return mask;
22752 switch (rcode)
22754 case LT:
22755 rcode = GT;
22756 swap_operands = true;
22757 try_again = true;
22758 break;
22759 case LTU:
22760 rcode = GTU;
22761 swap_operands = true;
22762 try_again = true;
22763 break;
22764 case NE:
22765 case UNLE:
22766 case UNLT:
22767 case UNGE:
22768 case UNGT:
22769 /* Invert condition and try again.
22770 e.g., A != B becomes ~(A==B). */
22772 enum rtx_code rev_code;
22773 enum insn_code nor_code;
22774 rtx mask2;
22776 rev_code = reverse_condition_maybe_unordered (rcode);
22777 if (rev_code == UNKNOWN)
22778 return NULL_RTX;
22780 nor_code = optab_handler (one_cmpl_optab, dmode);
22781 if (nor_code == CODE_FOR_nothing)
22782 return NULL_RTX;
22784 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
22785 if (!mask2)
22786 return NULL_RTX;
22788 mask = gen_reg_rtx (dmode);
22789 emit_insn (GEN_FCN (nor_code) (mask, mask2));
22790 return mask;
22792 break;
22793 case GE:
22794 case GEU:
22795 case LE:
22796 case LEU:
22797 /* Try GT/GTU/LT/LTU OR EQ */
22799 rtx c_rtx, eq_rtx;
22800 enum insn_code ior_code;
22801 enum rtx_code new_code;
22803 switch (rcode)
22805 case GE:
22806 new_code = GT;
22807 break;
22809 case GEU:
22810 new_code = GTU;
22811 break;
22813 case LE:
22814 new_code = LT;
22815 break;
22817 case LEU:
22818 new_code = LTU;
22819 break;
22821 default:
22822 gcc_unreachable ();
22825 ior_code = optab_handler (ior_optab, dmode);
22826 if (ior_code == CODE_FOR_nothing)
22827 return NULL_RTX;
22829 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
22830 if (!c_rtx)
22831 return NULL_RTX;
22833 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
22834 if (!eq_rtx)
22835 return NULL_RTX;
22837 mask = gen_reg_rtx (dmode);
22838 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
22839 return mask;
22841 break;
22842 default:
22843 return NULL_RTX;
22846 if (try_again)
22848 if (swap_operands)
22849 std::swap (op0, op1);
22851 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
22852 if (mask)
22853 return mask;
22856 /* You only get two chances. */
22857 return NULL_RTX;
22860 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
22861 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
22862 operands for the relation operation COND. */
22865 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
22866 rtx cond, rtx cc_op0, rtx cc_op1)
22868 machine_mode dest_mode = GET_MODE (dest);
22869 machine_mode mask_mode = GET_MODE (cc_op0);
22870 enum rtx_code rcode = GET_CODE (cond);
22871 machine_mode cc_mode = CCmode;
22872 rtx mask;
22873 rtx cond2;
22874 bool invert_move = false;
22876 if (VECTOR_UNIT_NONE_P (dest_mode))
22877 return 0;
22879 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
22880 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
22882 switch (rcode)
22884 /* Swap operands if we can, and fall back to doing the operation as
22885 specified, and doing a NOR to invert the test. */
22886 case NE:
22887 case UNLE:
22888 case UNLT:
22889 case UNGE:
22890 case UNGT:
22891 /* Invert condition and try again.
22892 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
22893 invert_move = true;
22894 rcode = reverse_condition_maybe_unordered (rcode);
22895 if (rcode == UNKNOWN)
22896 return 0;
22897 break;
22899 case GE:
22900 case LE:
22901 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
22903 /* Invert condition to avoid compound test. */
22904 invert_move = true;
22905 rcode = reverse_condition (rcode);
22907 break;
22909 case GTU:
22910 case GEU:
22911 case LTU:
22912 case LEU:
22913 /* Mark unsigned tests with CCUNSmode. */
22914 cc_mode = CCUNSmode;
22916 /* Invert condition to avoid compound test if necessary. */
22917 if (rcode == GEU || rcode == LEU)
22919 invert_move = true;
22920 rcode = reverse_condition (rcode);
22922 break;
22924 default:
22925 break;
22928 /* Get the vector mask for the given relational operations. */
22929 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
22931 if (!mask)
22932 return 0;
22934 if (invert_move)
22935 std::swap (op_true, op_false);
22937 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
22938 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
22939 && (GET_CODE (op_true) == CONST_VECTOR
22940 || GET_CODE (op_false) == CONST_VECTOR))
22942 rtx constant_0 = CONST0_RTX (dest_mode);
22943 rtx constant_m1 = CONSTM1_RTX (dest_mode);
22945 if (op_true == constant_m1 && op_false == constant_0)
22947 emit_move_insn (dest, mask);
22948 return 1;
22951 else if (op_true == constant_0 && op_false == constant_m1)
22953 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
22954 return 1;
22957 /* If we can't use the vector comparison directly, perhaps we can use
22958 the mask for the true or false fields, instead of loading up a
22959 constant. */
22960 if (op_true == constant_m1)
22961 op_true = mask;
22963 if (op_false == constant_0)
22964 op_false = mask;
22967 if (!REG_P (op_true) && !SUBREG_P (op_true))
22968 op_true = force_reg (dest_mode, op_true);
22970 if (!REG_P (op_false) && !SUBREG_P (op_false))
22971 op_false = force_reg (dest_mode, op_false);
22973 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
22974 CONST0_RTX (dest_mode));
22975 emit_insn (gen_rtx_SET (dest,
22976 gen_rtx_IF_THEN_ELSE (dest_mode,
22977 cond2,
22978 op_true,
22979 op_false)));
22980 return 1;
22983 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
22984 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
22985 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
22986 hardware has no such operation. */
22988 static int
22989 rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22991 enum rtx_code code = GET_CODE (op);
22992 rtx op0 = XEXP (op, 0);
22993 rtx op1 = XEXP (op, 1);
22994 machine_mode compare_mode = GET_MODE (op0);
22995 machine_mode result_mode = GET_MODE (dest);
22996 bool max_p = false;
22998 if (result_mode != compare_mode)
22999 return 0;
23001 if (code == GE || code == GT)
23002 max_p = true;
23003 else if (code == LE || code == LT)
23004 max_p = false;
23005 else
23006 return 0;
23008 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
23011 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond))
23012 max_p = !max_p;
23014 else
23015 return 0;
23017 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
23018 return 1;
23021 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
23022 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
23023 operands of the last comparison is nonzero/true, FALSE_COND if it is
23024 zero/false. Return 0 if the hardware has no such operation. */
23026 static int
23027 rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
23029 enum rtx_code code = GET_CODE (op);
23030 rtx op0 = XEXP (op, 0);
23031 rtx op1 = XEXP (op, 1);
23032 machine_mode result_mode = GET_MODE (dest);
23033 rtx compare_rtx;
23034 rtx cmove_rtx;
23035 rtx clobber_rtx;
23037 if (!can_create_pseudo_p ())
23038 return 0;
23040 switch (code)
23042 case EQ:
23043 case GE:
23044 case GT:
23045 break;
23047 case NE:
23048 case LT:
23049 case LE:
23050 code = swap_condition (code);
23051 std::swap (op0, op1);
23052 break;
23054 default:
23055 return 0;
23058 /* Generate: [(parallel [(set (dest)
23059 (if_then_else (op (cmp1) (cmp2))
23060 (true)
23061 (false)))
23062 (clobber (scratch))])]. */
23064 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
23065 cmove_rtx = gen_rtx_SET (dest,
23066 gen_rtx_IF_THEN_ELSE (result_mode,
23067 compare_rtx,
23068 true_cond,
23069 false_cond));
23071 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
23072 emit_insn (gen_rtx_PARALLEL (VOIDmode,
23073 gen_rtvec (2, cmove_rtx, clobber_rtx)));
23075 return 1;
23078 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
23079 operands of the last comparison is nonzero/true, FALSE_COND if it
23080 is zero/false. Return 0 if the hardware has no such operation. */
23083 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
23085 enum rtx_code code = GET_CODE (op);
23086 rtx op0 = XEXP (op, 0);
23087 rtx op1 = XEXP (op, 1);
23088 machine_mode compare_mode = GET_MODE (op0);
23089 machine_mode result_mode = GET_MODE (dest);
23090 rtx temp;
23091 bool is_against_zero;
23093 /* These modes should always match. */
23094 if (GET_MODE (op1) != compare_mode
23095 /* In the isel case however, we can use a compare immediate, so
23096 op1 may be a small constant. */
23097 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
23098 return 0;
23099 if (GET_MODE (true_cond) != result_mode)
23100 return 0;
23101 if (GET_MODE (false_cond) != result_mode)
23102 return 0;
23104 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
23105 if (TARGET_P9_MINMAX
23106 && (compare_mode == SFmode || compare_mode == DFmode)
23107 && (result_mode == SFmode || result_mode == DFmode))
23109 if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond))
23110 return 1;
23112 if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond))
23113 return 1;
23116 /* Don't allow using floating point comparisons for integer results for
23117 now. */
23118 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
23119 return 0;
23121 /* First, work out if the hardware can do this at all, or
23122 if it's too slow.... */
23123 if (!FLOAT_MODE_P (compare_mode))
23125 if (TARGET_ISEL)
23126 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
23127 return 0;
23130 is_against_zero = op1 == CONST0_RTX (compare_mode);
23132 /* A floating-point subtract might overflow, underflow, or produce
23133 an inexact result, thus changing the floating-point flags, so it
23134 can't be generated if we care about that. It's safe if one side
23135 of the construct is zero, since then no subtract will be
23136 generated. */
23137 if (SCALAR_FLOAT_MODE_P (compare_mode)
23138 && flag_trapping_math && ! is_against_zero)
23139 return 0;
23141 /* Eliminate half of the comparisons by switching operands, this
23142 makes the remaining code simpler. */
23143 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
23144 || code == LTGT || code == LT || code == UNLE)
23146 code = reverse_condition_maybe_unordered (code);
23147 temp = true_cond;
23148 true_cond = false_cond;
23149 false_cond = temp;
23152 /* UNEQ and LTGT take four instructions for a comparison with zero,
23153 it'll probably be faster to use a branch here too. */
23154 if (code == UNEQ && HONOR_NANS (compare_mode))
23155 return 0;
23157 /* We're going to try to implement comparisons by performing
23158 a subtract, then comparing against zero. Unfortunately,
23159 Inf - Inf is NaN which is not zero, and so if we don't
23160 know that the operand is finite and the comparison
23161 would treat EQ different to UNORDERED, we can't do it. */
23162 if (HONOR_INFINITIES (compare_mode)
23163 && code != GT && code != UNGE
23164 && (GET_CODE (op1) != CONST_DOUBLE
23165 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
23166 /* Constructs of the form (a OP b ? a : b) are safe. */
23167 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
23168 || (! rtx_equal_p (op0, true_cond)
23169 && ! rtx_equal_p (op1, true_cond))))
23170 return 0;
23172 /* At this point we know we can use fsel. */
23174 /* Reduce the comparison to a comparison against zero. */
23175 if (! is_against_zero)
23177 temp = gen_reg_rtx (compare_mode);
23178 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
23179 op0 = temp;
23180 op1 = CONST0_RTX (compare_mode);
23183 /* If we don't care about NaNs we can reduce some of the comparisons
23184 down to faster ones. */
23185 if (! HONOR_NANS (compare_mode))
23186 switch (code)
23188 case GT:
23189 code = LE;
23190 temp = true_cond;
23191 true_cond = false_cond;
23192 false_cond = temp;
23193 break;
23194 case UNGE:
23195 code = GE;
23196 break;
23197 case UNEQ:
23198 code = EQ;
23199 break;
23200 default:
23201 break;
23204 /* Now, reduce everything down to a GE. */
23205 switch (code)
23207 case GE:
23208 break;
23210 case LE:
23211 temp = gen_reg_rtx (compare_mode);
23212 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
23213 op0 = temp;
23214 break;
23216 case ORDERED:
23217 temp = gen_reg_rtx (compare_mode);
23218 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
23219 op0 = temp;
23220 break;
23222 case EQ:
23223 temp = gen_reg_rtx (compare_mode);
23224 emit_insn (gen_rtx_SET (temp,
23225 gen_rtx_NEG (compare_mode,
23226 gen_rtx_ABS (compare_mode, op0))));
23227 op0 = temp;
23228 break;
23230 case UNGE:
23231 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
23232 temp = gen_reg_rtx (result_mode);
23233 emit_insn (gen_rtx_SET (temp,
23234 gen_rtx_IF_THEN_ELSE (result_mode,
23235 gen_rtx_GE (VOIDmode,
23236 op0, op1),
23237 true_cond, false_cond)));
23238 false_cond = true_cond;
23239 true_cond = temp;
23241 temp = gen_reg_rtx (compare_mode);
23242 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
23243 op0 = temp;
23244 break;
23246 case GT:
23247 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
23248 temp = gen_reg_rtx (result_mode);
23249 emit_insn (gen_rtx_SET (temp,
23250 gen_rtx_IF_THEN_ELSE (result_mode,
23251 gen_rtx_GE (VOIDmode,
23252 op0, op1),
23253 true_cond, false_cond)));
23254 true_cond = false_cond;
23255 false_cond = temp;
23257 temp = gen_reg_rtx (compare_mode);
23258 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
23259 op0 = temp;
23260 break;
23262 default:
23263 gcc_unreachable ();
23266 emit_insn (gen_rtx_SET (dest,
23267 gen_rtx_IF_THEN_ELSE (result_mode,
23268 gen_rtx_GE (VOIDmode,
23269 op0, op1),
23270 true_cond, false_cond)));
23271 return 1;
23274 /* Same as above, but for ints (isel). */
23277 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
23279 rtx condition_rtx, cr;
23280 machine_mode mode = GET_MODE (dest);
23281 enum rtx_code cond_code;
23282 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
23283 bool signedp;
23285 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
23286 return 0;
23288 /* We still have to do the compare, because isel doesn't do a
23289 compare, it just looks at the CRx bits set by a previous compare
23290 instruction. */
23291 condition_rtx = rs6000_generate_compare (op, mode);
23292 cond_code = GET_CODE (condition_rtx);
23293 cr = XEXP (condition_rtx, 0);
23294 signedp = GET_MODE (cr) == CCmode;
23296 isel_func = (mode == SImode
23297 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
23298 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
23300 switch (cond_code)
23302 case LT: case GT: case LTU: case GTU: case EQ:
23303 /* isel handles these directly. */
23304 break;
23306 default:
23307 /* We need to swap the sense of the comparison. */
23309 std::swap (false_cond, true_cond);
23310 PUT_CODE (condition_rtx, reverse_condition (cond_code));
23312 break;
23315 false_cond = force_reg (mode, false_cond);
23316 if (true_cond != const0_rtx)
23317 true_cond = force_reg (mode, true_cond);
23319 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
23321 return 1;
23324 void
23325 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
23327 machine_mode mode = GET_MODE (op0);
23328 enum rtx_code c;
23329 rtx target;
23331 /* VSX/altivec have direct min/max insns. */
23332 if ((code == SMAX || code == SMIN)
23333 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
23334 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
23336 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
23337 return;
23340 if (code == SMAX || code == SMIN)
23341 c = GE;
23342 else
23343 c = GEU;
23345 if (code == SMAX || code == UMAX)
23346 target = emit_conditional_move (dest, c, op0, op1, mode,
23347 op0, op1, mode, 0);
23348 else
23349 target = emit_conditional_move (dest, c, op0, op1, mode,
23350 op1, op0, mode, 0);
23351 gcc_assert (target);
23352 if (target != dest)
23353 emit_move_insn (dest, target);
23356 /* A subroutine of the atomic operation splitters. Jump to LABEL if
23357 COND is true. Mark the jump as unlikely to be taken. */
23359 static void
23360 emit_unlikely_jump (rtx cond, rtx label)
23362 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
23363 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
23364 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
23367 /* A subroutine of the atomic operation splitters. Emit a load-locked
23368 instruction in MODE. For QI/HImode, possibly use a pattern than includes
23369 the zero_extend operation. */
23371 static void
23372 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
23374 rtx (*fn) (rtx, rtx) = NULL;
23376 switch (mode)
23378 case E_QImode:
23379 fn = gen_load_lockedqi;
23380 break;
23381 case E_HImode:
23382 fn = gen_load_lockedhi;
23383 break;
23384 case E_SImode:
23385 if (GET_MODE (mem) == QImode)
23386 fn = gen_load_lockedqi_si;
23387 else if (GET_MODE (mem) == HImode)
23388 fn = gen_load_lockedhi_si;
23389 else
23390 fn = gen_load_lockedsi;
23391 break;
23392 case E_DImode:
23393 fn = gen_load_lockeddi;
23394 break;
23395 case E_TImode:
23396 fn = gen_load_lockedti;
23397 break;
23398 default:
23399 gcc_unreachable ();
23401 emit_insn (fn (reg, mem));
23404 /* A subroutine of the atomic operation splitters. Emit a store-conditional
23405 instruction in MODE. */
23407 static void
23408 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
23410 rtx (*fn) (rtx, rtx, rtx) = NULL;
23412 switch (mode)
23414 case E_QImode:
23415 fn = gen_store_conditionalqi;
23416 break;
23417 case E_HImode:
23418 fn = gen_store_conditionalhi;
23419 break;
23420 case E_SImode:
23421 fn = gen_store_conditionalsi;
23422 break;
23423 case E_DImode:
23424 fn = gen_store_conditionaldi;
23425 break;
23426 case E_TImode:
23427 fn = gen_store_conditionalti;
23428 break;
23429 default:
23430 gcc_unreachable ();
23433 /* Emit sync before stwcx. to address PPC405 Erratum. */
23434 if (PPC405_ERRATUM77)
23435 emit_insn (gen_hwsync ());
23437 emit_insn (fn (res, mem, val));
23440 /* Expand barriers before and after a load_locked/store_cond sequence. */
23442 static rtx
23443 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
23445 rtx addr = XEXP (mem, 0);
23447 if (!legitimate_indirect_address_p (addr, reload_completed)
23448 && !legitimate_indexed_address_p (addr, reload_completed))
23450 addr = force_reg (Pmode, addr);
23451 mem = replace_equiv_address_nv (mem, addr);
23454 switch (model)
23456 case MEMMODEL_RELAXED:
23457 case MEMMODEL_CONSUME:
23458 case MEMMODEL_ACQUIRE:
23459 break;
23460 case MEMMODEL_RELEASE:
23461 case MEMMODEL_ACQ_REL:
23462 emit_insn (gen_lwsync ());
23463 break;
23464 case MEMMODEL_SEQ_CST:
23465 emit_insn (gen_hwsync ());
23466 break;
23467 default:
23468 gcc_unreachable ();
23470 return mem;
23473 static void
23474 rs6000_post_atomic_barrier (enum memmodel model)
23476 switch (model)
23478 case MEMMODEL_RELAXED:
23479 case MEMMODEL_CONSUME:
23480 case MEMMODEL_RELEASE:
23481 break;
23482 case MEMMODEL_ACQUIRE:
23483 case MEMMODEL_ACQ_REL:
23484 case MEMMODEL_SEQ_CST:
23485 emit_insn (gen_isync ());
23486 break;
23487 default:
23488 gcc_unreachable ();
23492 /* A subroutine of the various atomic expanders. For sub-word operations,
23493 we must adjust things to operate on SImode. Given the original MEM,
23494 return a new aligned memory. Also build and return the quantities by
23495 which to shift and mask. */
23497 static rtx
23498 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
23500 rtx addr, align, shift, mask, mem;
23501 HOST_WIDE_INT shift_mask;
23502 machine_mode mode = GET_MODE (orig_mem);
23504 /* For smaller modes, we have to implement this via SImode. */
23505 shift_mask = (mode == QImode ? 0x18 : 0x10);
23507 addr = XEXP (orig_mem, 0);
23508 addr = force_reg (GET_MODE (addr), addr);
23510 /* Aligned memory containing subword. Generate a new memory. We
23511 do not want any of the existing MEM_ATTR data, as we're now
23512 accessing memory outside the original object. */
23513 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
23514 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23515 mem = gen_rtx_MEM (SImode, align);
23516 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
23517 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
23518 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
23520 /* Shift amount for subword relative to aligned word. */
23521 shift = gen_reg_rtx (SImode);
23522 addr = gen_lowpart (SImode, addr);
23523 rtx tmp = gen_reg_rtx (SImode);
23524 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
23525 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
23526 if (BYTES_BIG_ENDIAN)
23527 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
23528 shift, 1, OPTAB_LIB_WIDEN);
23529 *pshift = shift;
23531 /* Mask for insertion. */
23532 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
23533 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
23534 *pmask = mask;
23536 return mem;
23539 /* A subroutine of the various atomic expanders. For sub-word operands,
23540 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
23542 static rtx
23543 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
23545 rtx x;
23547 x = gen_reg_rtx (SImode);
23548 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
23549 gen_rtx_NOT (SImode, mask),
23550 oldval)));
23552 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
23554 return x;
23557 /* A subroutine of the various atomic expanders. For sub-word operands,
23558 extract WIDE to NARROW via SHIFT. */
23560 static void
23561 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
23563 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
23564 wide, 1, OPTAB_LIB_WIDEN);
23565 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
23568 /* Expand an atomic compare and swap operation. */
23570 void
23571 rs6000_expand_atomic_compare_and_swap (rtx operands[])
23573 rtx boolval, retval, mem, oldval, newval, cond;
23574 rtx label1, label2, x, mask, shift;
23575 machine_mode mode, orig_mode;
23576 enum memmodel mod_s, mod_f;
23577 bool is_weak;
23579 boolval = operands[0];
23580 retval = operands[1];
23581 mem = operands[2];
23582 oldval = operands[3];
23583 newval = operands[4];
23584 is_weak = (INTVAL (operands[5]) != 0);
23585 mod_s = memmodel_base (INTVAL (operands[6]));
23586 mod_f = memmodel_base (INTVAL (operands[7]));
23587 orig_mode = mode = GET_MODE (mem);
23589 mask = shift = NULL_RTX;
23590 if (mode == QImode || mode == HImode)
23592 /* Before power8, we didn't have access to lbarx/lharx, so generate a
23593 lwarx and shift/mask operations. With power8, we need to do the
23594 comparison in SImode, but the store is still done in QI/HImode. */
23595 oldval = convert_modes (SImode, mode, oldval, 1);
23597 if (!TARGET_SYNC_HI_QI)
23599 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
23601 /* Shift and mask OLDVAL into position with the word. */
23602 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
23603 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23605 /* Shift and mask NEWVAL into position within the word. */
23606 newval = convert_modes (SImode, mode, newval, 1);
23607 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
23608 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23611 /* Prepare to adjust the return value. */
23612 retval = gen_reg_rtx (SImode);
23613 mode = SImode;
23615 else if (reg_overlap_mentioned_p (retval, oldval))
23616 oldval = copy_to_reg (oldval);
23618 if (mode != TImode && !reg_or_short_operand (oldval, mode))
23619 oldval = copy_to_mode_reg (mode, oldval);
23621 if (reg_overlap_mentioned_p (retval, newval))
23622 newval = copy_to_reg (newval);
23624 mem = rs6000_pre_atomic_barrier (mem, mod_s);
23626 label1 = NULL_RTX;
23627 if (!is_weak)
23629 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
23630 emit_label (XEXP (label1, 0));
23632 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
23634 emit_load_locked (mode, retval, mem);
23636 x = retval;
23637 if (mask)
23638 x = expand_simple_binop (SImode, AND, retval, mask,
23639 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23641 cond = gen_reg_rtx (CCmode);
23642 /* If we have TImode, synthesize a comparison. */
23643 if (mode != TImode)
23644 x = gen_rtx_COMPARE (CCmode, x, oldval);
23645 else
23647 rtx xor1_result = gen_reg_rtx (DImode);
23648 rtx xor2_result = gen_reg_rtx (DImode);
23649 rtx or_result = gen_reg_rtx (DImode);
23650 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
23651 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
23652 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
23653 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
23655 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
23656 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
23657 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
23658 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
23661 emit_insn (gen_rtx_SET (cond, x));
23663 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23664 emit_unlikely_jump (x, label2);
23666 x = newval;
23667 if (mask)
23668 x = rs6000_mask_atomic_subword (retval, newval, mask);
23670 emit_store_conditional (orig_mode, cond, mem, x);
23672 if (!is_weak)
23674 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23675 emit_unlikely_jump (x, label1);
23678 if (!is_mm_relaxed (mod_f))
23679 emit_label (XEXP (label2, 0));
23681 rs6000_post_atomic_barrier (mod_s);
23683 if (is_mm_relaxed (mod_f))
23684 emit_label (XEXP (label2, 0));
23686 if (shift)
23687 rs6000_finish_atomic_subword (operands[1], retval, shift);
23688 else if (mode != GET_MODE (operands[1]))
23689 convert_move (operands[1], retval, 1);
23691 /* In all cases, CR0 contains EQ on success, and NE on failure. */
23692 x = gen_rtx_EQ (SImode, cond, const0_rtx);
23693 emit_insn (gen_rtx_SET (boolval, x));
23696 /* Expand an atomic exchange operation. */
23698 void
23699 rs6000_expand_atomic_exchange (rtx operands[])
23701 rtx retval, mem, val, cond;
23702 machine_mode mode;
23703 enum memmodel model;
23704 rtx label, x, mask, shift;
23706 retval = operands[0];
23707 mem = operands[1];
23708 val = operands[2];
23709 model = memmodel_base (INTVAL (operands[3]));
23710 mode = GET_MODE (mem);
23712 mask = shift = NULL_RTX;
23713 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
23715 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
23717 /* Shift and mask VAL into position with the word. */
23718 val = convert_modes (SImode, mode, val, 1);
23719 val = expand_simple_binop (SImode, ASHIFT, val, shift,
23720 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23722 /* Prepare to adjust the return value. */
23723 retval = gen_reg_rtx (SImode);
23724 mode = SImode;
23727 mem = rs6000_pre_atomic_barrier (mem, model);
23729 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
23730 emit_label (XEXP (label, 0));
23732 emit_load_locked (mode, retval, mem);
23734 x = val;
23735 if (mask)
23736 x = rs6000_mask_atomic_subword (retval, val, mask);
23738 cond = gen_reg_rtx (CCmode);
23739 emit_store_conditional (mode, cond, mem, x);
23741 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23742 emit_unlikely_jump (x, label);
23744 rs6000_post_atomic_barrier (model);
23746 if (shift)
23747 rs6000_finish_atomic_subword (operands[0], retval, shift);
23750 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
23751 to perform. MEM is the memory on which to operate. VAL is the second
23752 operand of the binary operator. BEFORE and AFTER are optional locations to
23753 return the value of MEM either before of after the operation. MODEL_RTX
23754 is a CONST_INT containing the memory model to use. */
23756 void
23757 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
23758 rtx orig_before, rtx orig_after, rtx model_rtx)
23760 enum memmodel model = memmodel_base (INTVAL (model_rtx));
23761 machine_mode mode = GET_MODE (mem);
23762 machine_mode store_mode = mode;
23763 rtx label, x, cond, mask, shift;
23764 rtx before = orig_before, after = orig_after;
23766 mask = shift = NULL_RTX;
23767 /* On power8, we want to use SImode for the operation. On previous systems,
23768 use the operation in a subword and shift/mask to get the proper byte or
23769 halfword. */
23770 if (mode == QImode || mode == HImode)
23772 if (TARGET_SYNC_HI_QI)
23774 val = convert_modes (SImode, mode, val, 1);
23776 /* Prepare to adjust the return value. */
23777 before = gen_reg_rtx (SImode);
23778 if (after)
23779 after = gen_reg_rtx (SImode);
23780 mode = SImode;
23782 else
23784 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
23786 /* Shift and mask VAL into position with the word. */
23787 val = convert_modes (SImode, mode, val, 1);
23788 val = expand_simple_binop (SImode, ASHIFT, val, shift,
23789 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23791 switch (code)
23793 case IOR:
23794 case XOR:
23795 /* We've already zero-extended VAL. That is sufficient to
23796 make certain that it does not affect other bits. */
23797 mask = NULL;
23798 break;
23800 case AND:
23801 /* If we make certain that all of the other bits in VAL are
23802 set, that will be sufficient to not affect other bits. */
23803 x = gen_rtx_NOT (SImode, mask);
23804 x = gen_rtx_IOR (SImode, x, val);
23805 emit_insn (gen_rtx_SET (val, x));
23806 mask = NULL;
23807 break;
23809 case NOT:
23810 case PLUS:
23811 case MINUS:
23812 /* These will all affect bits outside the field and need
23813 adjustment via MASK within the loop. */
23814 break;
23816 default:
23817 gcc_unreachable ();
23820 /* Prepare to adjust the return value. */
23821 before = gen_reg_rtx (SImode);
23822 if (after)
23823 after = gen_reg_rtx (SImode);
23824 store_mode = mode = SImode;
23828 mem = rs6000_pre_atomic_barrier (mem, model);
23830 label = gen_label_rtx ();
23831 emit_label (label);
23832 label = gen_rtx_LABEL_REF (VOIDmode, label);
23834 if (before == NULL_RTX)
23835 before = gen_reg_rtx (mode);
23837 emit_load_locked (mode, before, mem);
23839 if (code == NOT)
23841 x = expand_simple_binop (mode, AND, before, val,
23842 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23843 after = expand_simple_unop (mode, NOT, x, after, 1);
23845 else
23847 after = expand_simple_binop (mode, code, before, val,
23848 after, 1, OPTAB_LIB_WIDEN);
23851 x = after;
23852 if (mask)
23854 x = expand_simple_binop (SImode, AND, after, mask,
23855 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23856 x = rs6000_mask_atomic_subword (before, x, mask);
23858 else if (store_mode != mode)
23859 x = convert_modes (store_mode, mode, x, 1);
23861 cond = gen_reg_rtx (CCmode);
23862 emit_store_conditional (store_mode, cond, mem, x);
23864 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23865 emit_unlikely_jump (x, label);
23867 rs6000_post_atomic_barrier (model);
23869 if (shift)
23871 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
23872 then do the calcuations in a SImode register. */
23873 if (orig_before)
23874 rs6000_finish_atomic_subword (orig_before, before, shift);
23875 if (orig_after)
23876 rs6000_finish_atomic_subword (orig_after, after, shift);
23878 else if (store_mode != mode)
23880 /* QImode/HImode on machines with lbarx/lharx where we do the native
23881 operation and then do the calcuations in a SImode register. */
23882 if (orig_before)
23883 convert_move (orig_before, before, 1);
23884 if (orig_after)
23885 convert_move (orig_after, after, 1);
23887 else if (orig_after && after != orig_after)
23888 emit_move_insn (orig_after, after);
23891 /* Emit instructions to move SRC to DST. Called by splitters for
23892 multi-register moves. It will emit at most one instruction for
23893 each register that is accessed; that is, it won't emit li/lis pairs
23894 (or equivalent for 64-bit code). One of SRC or DST must be a hard
23895 register. */
23897 void
23898 rs6000_split_multireg_move (rtx dst, rtx src)
23900 /* The register number of the first register being moved. */
23901 int reg;
23902 /* The mode that is to be moved. */
23903 machine_mode mode;
23904 /* The mode that the move is being done in, and its size. */
23905 machine_mode reg_mode;
23906 int reg_mode_size;
23907 /* The number of registers that will be moved. */
23908 int nregs;
23910 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
23911 mode = GET_MODE (dst);
23912 nregs = hard_regno_nregs (reg, mode);
23913 if (FP_REGNO_P (reg))
23914 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
23915 ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? DFmode : SFmode);
23916 else if (ALTIVEC_REGNO_P (reg))
23917 reg_mode = V16QImode;
23918 else
23919 reg_mode = word_mode;
23920 reg_mode_size = GET_MODE_SIZE (reg_mode);
23922 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
23924 /* TDmode residing in FP registers is special, since the ISA requires that
23925 the lower-numbered word of a register pair is always the most significant
23926 word, even in little-endian mode. This does not match the usual subreg
23927 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
23928 the appropriate constituent registers "by hand" in little-endian mode.
23930 Note we do not need to check for destructive overlap here since TDmode
23931 can only reside in even/odd register pairs. */
23932 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
23934 rtx p_src, p_dst;
23935 int i;
23937 for (i = 0; i < nregs; i++)
23939 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
23940 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
23941 else
23942 p_src = simplify_gen_subreg (reg_mode, src, mode,
23943 i * reg_mode_size);
23945 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
23946 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
23947 else
23948 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
23949 i * reg_mode_size);
23951 emit_insn (gen_rtx_SET (p_dst, p_src));
23954 return;
23957 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
23959 /* Move register range backwards, if we might have destructive
23960 overlap. */
23961 int i;
23962 for (i = nregs - 1; i >= 0; i--)
23963 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
23964 i * reg_mode_size),
23965 simplify_gen_subreg (reg_mode, src, mode,
23966 i * reg_mode_size)));
23968 else
23970 int i;
23971 int j = -1;
23972 bool used_update = false;
23973 rtx restore_basereg = NULL_RTX;
23975 if (MEM_P (src) && INT_REGNO_P (reg))
23977 rtx breg;
23979 if (GET_CODE (XEXP (src, 0)) == PRE_INC
23980 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
23982 rtx delta_rtx;
23983 breg = XEXP (XEXP (src, 0), 0);
23984 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
23985 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
23986 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
23987 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
23988 src = replace_equiv_address (src, breg);
23990 else if (! rs6000_offsettable_memref_p (src, reg_mode, true))
23992 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
23994 rtx basereg = XEXP (XEXP (src, 0), 0);
23995 if (TARGET_UPDATE)
23997 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
23998 emit_insn (gen_rtx_SET (ndst,
23999 gen_rtx_MEM (reg_mode,
24000 XEXP (src, 0))));
24001 used_update = true;
24003 else
24004 emit_insn (gen_rtx_SET (basereg,
24005 XEXP (XEXP (src, 0), 1)));
24006 src = replace_equiv_address (src, basereg);
24008 else
24010 rtx basereg = gen_rtx_REG (Pmode, reg);
24011 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
24012 src = replace_equiv_address (src, basereg);
24016 breg = XEXP (src, 0);
24017 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
24018 breg = XEXP (breg, 0);
24020 /* If the base register we are using to address memory is
24021 also a destination reg, then change that register last. */
24022 if (REG_P (breg)
24023 && REGNO (breg) >= REGNO (dst)
24024 && REGNO (breg) < REGNO (dst) + nregs)
24025 j = REGNO (breg) - REGNO (dst);
24027 else if (MEM_P (dst) && INT_REGNO_P (reg))
24029 rtx breg;
24031 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
24032 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
24034 rtx delta_rtx;
24035 breg = XEXP (XEXP (dst, 0), 0);
24036 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
24037 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
24038 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
24040 /* We have to update the breg before doing the store.
24041 Use store with update, if available. */
24043 if (TARGET_UPDATE)
24045 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
24046 emit_insn (TARGET_32BIT
24047 ? (TARGET_POWERPC64
24048 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
24049 : gen_movsi_update (breg, breg, delta_rtx, nsrc))
24050 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
24051 used_update = true;
24053 else
24054 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
24055 dst = replace_equiv_address (dst, breg);
24057 else if (!rs6000_offsettable_memref_p (dst, reg_mode, true)
24058 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
24060 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
24062 rtx basereg = XEXP (XEXP (dst, 0), 0);
24063 if (TARGET_UPDATE)
24065 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
24066 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
24067 XEXP (dst, 0)),
24068 nsrc));
24069 used_update = true;
24071 else
24072 emit_insn (gen_rtx_SET (basereg,
24073 XEXP (XEXP (dst, 0), 1)));
24074 dst = replace_equiv_address (dst, basereg);
24076 else
24078 rtx basereg = XEXP (XEXP (dst, 0), 0);
24079 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
24080 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
24081 && REG_P (basereg)
24082 && REG_P (offsetreg)
24083 && REGNO (basereg) != REGNO (offsetreg));
24084 if (REGNO (basereg) == 0)
24086 rtx tmp = offsetreg;
24087 offsetreg = basereg;
24088 basereg = tmp;
24090 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
24091 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
24092 dst = replace_equiv_address (dst, basereg);
24095 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
24096 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
24099 for (i = 0; i < nregs; i++)
24101 /* Calculate index to next subword. */
24102 ++j;
24103 if (j == nregs)
24104 j = 0;
24106 /* If compiler already emitted move of first word by
24107 store with update, no need to do anything. */
24108 if (j == 0 && used_update)
24109 continue;
24111 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
24112 j * reg_mode_size),
24113 simplify_gen_subreg (reg_mode, src, mode,
24114 j * reg_mode_size)));
24116 if (restore_basereg != NULL_RTX)
24117 emit_insn (restore_basereg);
24122 /* This page contains routines that are used to determine what the
24123 function prologue and epilogue code will do and write them out. */
24125 /* Determine whether the REG is really used. */
24127 static bool
24128 save_reg_p (int reg)
24130 /* We need to mark the PIC offset register live for the same conditions
24131 as it is set up, or otherwise it won't be saved before we clobber it. */
24133 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM && !TARGET_SINGLE_PIC_BASE)
24135 /* When calling eh_return, we must return true for all the cases
24136 where conditional_register_usage marks the PIC offset reg
24137 call used. */
24138 if (TARGET_TOC && TARGET_MINIMAL_TOC
24139 && (crtl->calls_eh_return
24140 || df_regs_ever_live_p (reg)
24141 || !constant_pool_empty_p ()))
24142 return true;
24144 if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
24145 && flag_pic)
24146 return true;
24149 return !call_used_regs[reg] && df_regs_ever_live_p (reg);
24152 /* Return the first fixed-point register that is required to be
24153 saved. 32 if none. */
24156 first_reg_to_save (void)
24158 int first_reg;
24160 /* Find lowest numbered live register. */
24161 for (first_reg = 13; first_reg <= 31; first_reg++)
24162 if (save_reg_p (first_reg))
24163 break;
24165 #if TARGET_MACHO
24166 if (flag_pic
24167 && crtl->uses_pic_offset_table
24168 && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM)
24169 return RS6000_PIC_OFFSET_TABLE_REGNUM;
24170 #endif
24172 return first_reg;
24175 /* Similar, for FP regs. */
24178 first_fp_reg_to_save (void)
24180 int first_reg;
24182 /* Find lowest numbered live register. */
24183 for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
24184 if (save_reg_p (first_reg))
24185 break;
24187 return first_reg;
24190 /* Similar, for AltiVec regs. */
24192 static int
24193 first_altivec_reg_to_save (void)
24195 int i;
24197 /* Stack frame remains as is unless we are in AltiVec ABI. */
24198 if (! TARGET_ALTIVEC_ABI)
24199 return LAST_ALTIVEC_REGNO + 1;
24201 /* On Darwin, the unwind routines are compiled without
24202 TARGET_ALTIVEC, and use save_world to save/restore the
24203 altivec registers when necessary. */
24204 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
24205 && ! TARGET_ALTIVEC)
24206 return FIRST_ALTIVEC_REGNO + 20;
24208 /* Find lowest numbered live register. */
24209 for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
24210 if (save_reg_p (i))
24211 break;
24213 return i;
24216 /* Return a 32-bit mask of the AltiVec registers we need to set in
24217 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
24218 the 32-bit word is 0. */
24220 static unsigned int
24221 compute_vrsave_mask (void)
24223 unsigned int i, mask = 0;
24225 /* On Darwin, the unwind routines are compiled without
24226 TARGET_ALTIVEC, and use save_world to save/restore the
24227 call-saved altivec registers when necessary. */
24228 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
24229 && ! TARGET_ALTIVEC)
24230 mask |= 0xFFF;
24232 /* First, find out if we use _any_ altivec registers. */
24233 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
24234 if (df_regs_ever_live_p (i))
24235 mask |= ALTIVEC_REG_BIT (i);
24237 if (mask == 0)
24238 return mask;
24240 /* Next, remove the argument registers from the set. These must
24241 be in the VRSAVE mask set by the caller, so we don't need to add
24242 them in again. More importantly, the mask we compute here is
24243 used to generate CLOBBERs in the set_vrsave insn, and we do not
24244 wish the argument registers to die. */
24245 for (i = ALTIVEC_ARG_MIN_REG; i < (unsigned) crtl->args.info.vregno; i++)
24246 mask &= ~ALTIVEC_REG_BIT (i);
24248 /* Similarly, remove the return value from the set. */
24250 bool yes = false;
24251 diddle_return_value (is_altivec_return_reg, &yes);
24252 if (yes)
24253 mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
24256 return mask;
24259 /* For a very restricted set of circumstances, we can cut down the
24260 size of prologues/epilogues by calling our own save/restore-the-world
24261 routines. */
24263 static void
24264 compute_save_world_info (rs6000_stack_t *info)
24266 info->world_save_p = 1;
24267 info->world_save_p
24268 = (WORLD_SAVE_P (info)
24269 && DEFAULT_ABI == ABI_DARWIN
24270 && !cfun->has_nonlocal_label
24271 && info->first_fp_reg_save == FIRST_SAVED_FP_REGNO
24272 && info->first_gp_reg_save == FIRST_SAVED_GP_REGNO
24273 && info->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
24274 && info->cr_save_p);
24276 /* This will not work in conjunction with sibcalls. Make sure there
24277 are none. (This check is expensive, but seldom executed.) */
24278 if (WORLD_SAVE_P (info))
24280 rtx_insn *insn;
24281 for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
24282 if (CALL_P (insn) && SIBLING_CALL_P (insn))
24284 info->world_save_p = 0;
24285 break;
24289 if (WORLD_SAVE_P (info))
24291 /* Even if we're not touching VRsave, make sure there's room on the
24292 stack for it, if it looks like we're calling SAVE_WORLD, which
24293 will attempt to save it. */
24294 info->vrsave_size = 4;
24296 /* If we are going to save the world, we need to save the link register too. */
24297 info->lr_save_p = 1;
24299 /* "Save" the VRsave register too if we're saving the world. */
24300 if (info->vrsave_mask == 0)
24301 info->vrsave_mask = compute_vrsave_mask ();
24303 /* Because the Darwin register save/restore routines only handle
24304 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
24305 check. */
24306 gcc_assert (info->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
24307 && (info->first_altivec_reg_save
24308 >= FIRST_SAVED_ALTIVEC_REGNO));
24311 return;
24315 static void
24316 is_altivec_return_reg (rtx reg, void *xyes)
24318 bool *yes = (bool *) xyes;
24319 if (REGNO (reg) == ALTIVEC_ARG_RETURN)
24320 *yes = true;
24324 /* Return whether REG is a global user reg or has been specifed by
24325 -ffixed-REG. We should not restore these, and so cannot use
24326 lmw or out-of-line restore functions if there are any. We also
24327 can't save them (well, emit frame notes for them), because frame
24328 unwinding during exception handling will restore saved registers. */
24330 static bool
24331 fixed_reg_p (int reg)
24333 /* Ignore fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] when the
24334 backend sets it, overriding anything the user might have given. */
24335 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
24336 && ((DEFAULT_ABI == ABI_V4 && flag_pic)
24337 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
24338 || (TARGET_TOC && TARGET_MINIMAL_TOC)))
24339 return false;
24341 return fixed_regs[reg];
24344 /* Determine the strategy for savings/restoring registers. */
24346 enum {
24347 SAVE_MULTIPLE = 0x1,
24348 SAVE_INLINE_GPRS = 0x2,
24349 SAVE_INLINE_FPRS = 0x4,
24350 SAVE_NOINLINE_GPRS_SAVES_LR = 0x8,
24351 SAVE_NOINLINE_FPRS_SAVES_LR = 0x10,
24352 SAVE_INLINE_VRS = 0x20,
24353 REST_MULTIPLE = 0x100,
24354 REST_INLINE_GPRS = 0x200,
24355 REST_INLINE_FPRS = 0x400,
24356 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x800,
24357 REST_INLINE_VRS = 0x1000
24360 static int
24361 rs6000_savres_strategy (rs6000_stack_t *info,
24362 bool using_static_chain_p)
24364 int strategy = 0;
24366 /* Select between in-line and out-of-line save and restore of regs.
24367 First, all the obvious cases where we don't use out-of-line. */
24368 if (crtl->calls_eh_return
24369 || cfun->machine->ra_need_lr)
24370 strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
24371 | SAVE_INLINE_GPRS | REST_INLINE_GPRS
24372 | SAVE_INLINE_VRS | REST_INLINE_VRS);
24374 if (info->first_gp_reg_save == 32)
24375 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24377 if (info->first_fp_reg_save == 64
24378 /* The out-of-line FP routines use double-precision stores;
24379 we can't use those routines if we don't have such stores. */
24380 || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT))
24381 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
24383 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1)
24384 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
24386 /* Define cutoff for using out-of-line functions to save registers. */
24387 if (DEFAULT_ABI == ABI_V4 || TARGET_ELF)
24389 if (!optimize_size)
24391 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
24392 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24393 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
24395 else
24397 /* Prefer out-of-line restore if it will exit. */
24398 if (info->first_fp_reg_save > 61)
24399 strategy |= SAVE_INLINE_FPRS;
24400 if (info->first_gp_reg_save > 29)
24402 if (info->first_fp_reg_save == 64)
24403 strategy |= SAVE_INLINE_GPRS;
24404 else
24405 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24407 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO)
24408 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
24411 else if (DEFAULT_ABI == ABI_DARWIN)
24413 if (info->first_fp_reg_save > 60)
24414 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
24415 if (info->first_gp_reg_save > 29)
24416 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24417 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
24419 else
24421 gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
24422 if ((flag_shrink_wrap_separate && optimize_function_for_speed_p (cfun))
24423 || info->first_fp_reg_save > 61)
24424 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
24425 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24426 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
24429 /* Don't bother to try to save things out-of-line if r11 is occupied
24430 by the static chain. It would require too much fiddling and the
24431 static chain is rarely used anyway. FPRs are saved w.r.t the stack
24432 pointer on Darwin, and AIX uses r1 or r12. */
24433 if (using_static_chain_p
24434 && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
24435 strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
24436 | SAVE_INLINE_GPRS
24437 | SAVE_INLINE_VRS);
24439 /* Don't ever restore fixed regs. That means we can't use the
24440 out-of-line register restore functions if a fixed reg is in the
24441 range of regs restored. */
24442 if (!(strategy & REST_INLINE_FPRS))
24443 for (int i = info->first_fp_reg_save; i < 64; i++)
24444 if (fixed_regs[i])
24446 strategy |= REST_INLINE_FPRS;
24447 break;
24450 /* We can only use the out-of-line routines to restore fprs if we've
24451 saved all the registers from first_fp_reg_save in the prologue.
24452 Otherwise, we risk loading garbage. Of course, if we have saved
24453 out-of-line then we know we haven't skipped any fprs. */
24454 if ((strategy & SAVE_INLINE_FPRS)
24455 && !(strategy & REST_INLINE_FPRS))
24456 for (int i = info->first_fp_reg_save; i < 64; i++)
24457 if (!save_reg_p (i))
24459 strategy |= REST_INLINE_FPRS;
24460 break;
24463 /* Similarly, for altivec regs. */
24464 if (!(strategy & REST_INLINE_VRS))
24465 for (int i = info->first_altivec_reg_save; i < LAST_ALTIVEC_REGNO + 1; i++)
24466 if (fixed_regs[i])
24468 strategy |= REST_INLINE_VRS;
24469 break;
24472 if ((strategy & SAVE_INLINE_VRS)
24473 && !(strategy & REST_INLINE_VRS))
24474 for (int i = info->first_altivec_reg_save; i < LAST_ALTIVEC_REGNO + 1; i++)
24475 if (!save_reg_p (i))
24477 strategy |= REST_INLINE_VRS;
24478 break;
24481 /* info->lr_save_p isn't yet set if the only reason lr needs to be
24482 saved is an out-of-line save or restore. Set up the value for
24483 the next test (excluding out-of-line gprs). */
24484 bool lr_save_p = (info->lr_save_p
24485 || !(strategy & SAVE_INLINE_FPRS)
24486 || !(strategy & SAVE_INLINE_VRS)
24487 || !(strategy & REST_INLINE_FPRS)
24488 || !(strategy & REST_INLINE_VRS));
24490 if (TARGET_MULTIPLE
24491 && !TARGET_POWERPC64
24492 && info->first_gp_reg_save < 31
24493 && !(flag_shrink_wrap
24494 && flag_shrink_wrap_separate
24495 && optimize_function_for_speed_p (cfun)))
24497 int count = 0;
24498 for (int i = info->first_gp_reg_save; i < 32; i++)
24499 if (save_reg_p (i))
24500 count++;
24502 if (count <= 1)
24503 /* Don't use store multiple if only one reg needs to be
24504 saved. This can occur for example when the ABI_V4 pic reg
24505 (r30) needs to be saved to make calls, but r31 is not
24506 used. */
24507 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24508 else
24510 /* Prefer store multiple for saves over out-of-line
24511 routines, since the store-multiple instruction will
24512 always be smaller. */
24513 strategy |= SAVE_INLINE_GPRS | SAVE_MULTIPLE;
24515 /* The situation is more complicated with load multiple.
24516 We'd prefer to use the out-of-line routines for restores,
24517 since the "exit" out-of-line routines can handle the
24518 restore of LR and the frame teardown. However if doesn't
24519 make sense to use the out-of-line routine if that is the
24520 only reason we'd need to save LR, and we can't use the
24521 "exit" out-of-line gpr restore if we have saved some
24522 fprs; In those cases it is advantageous to use load
24523 multiple when available. */
24524 if (info->first_fp_reg_save != 64 || !lr_save_p)
24525 strategy |= REST_INLINE_GPRS | REST_MULTIPLE;
24529 /* Using the "exit" out-of-line routine does not improve code size
24530 if using it would require lr to be saved and if only saving one
24531 or two gprs. */
24532 else if (!lr_save_p && info->first_gp_reg_save > 29)
24533 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24535 /* Don't ever restore fixed regs. */
24536 if ((strategy & (REST_INLINE_GPRS | REST_MULTIPLE)) != REST_INLINE_GPRS)
24537 for (int i = info->first_gp_reg_save; i < 32; i++)
24538 if (fixed_reg_p (i))
24540 strategy |= REST_INLINE_GPRS;
24541 strategy &= ~REST_MULTIPLE;
24542 break;
24545 /* We can only use load multiple or the out-of-line routines to
24546 restore gprs if we've saved all the registers from
24547 first_gp_reg_save. Otherwise, we risk loading garbage.
24548 Of course, if we have saved out-of-line or used stmw then we know
24549 we haven't skipped any gprs. */
24550 if ((strategy & (SAVE_INLINE_GPRS | SAVE_MULTIPLE)) == SAVE_INLINE_GPRS
24551 && (strategy & (REST_INLINE_GPRS | REST_MULTIPLE)) != REST_INLINE_GPRS)
24552 for (int i = info->first_gp_reg_save; i < 32; i++)
24553 if (!save_reg_p (i))
24555 strategy |= REST_INLINE_GPRS;
24556 strategy &= ~REST_MULTIPLE;
24557 break;
24560 if (TARGET_ELF && TARGET_64BIT)
24562 if (!(strategy & SAVE_INLINE_FPRS))
24563 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
24564 else if (!(strategy & SAVE_INLINE_GPRS)
24565 && info->first_fp_reg_save == 64)
24566 strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
24568 else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
24569 strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
24571 if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
24572 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
24574 return strategy;
24577 /* Calculate the stack information for the current function. This is
24578 complicated by having two separate calling sequences, the AIX calling
24579 sequence and the V.4 calling sequence.
24581 AIX (and Darwin/Mac OS X) stack frames look like:
24582 32-bit 64-bit
24583 SP----> +---------------------------------------+
24584 | back chain to caller | 0 0
24585 +---------------------------------------+
24586 | saved CR | 4 8 (8-11)
24587 +---------------------------------------+
24588 | saved LR | 8 16
24589 +---------------------------------------+
24590 | reserved for compilers | 12 24
24591 +---------------------------------------+
24592 | reserved for binders | 16 32
24593 +---------------------------------------+
24594 | saved TOC pointer | 20 40
24595 +---------------------------------------+
24596 | Parameter save area (+padding*) (P) | 24 48
24597 +---------------------------------------+
24598 | Alloca space (A) | 24+P etc.
24599 +---------------------------------------+
24600 | Local variable space (L) | 24+P+A
24601 +---------------------------------------+
24602 | Float/int conversion temporary (X) | 24+P+A+L
24603 +---------------------------------------+
24604 | Save area for AltiVec registers (W) | 24+P+A+L+X
24605 +---------------------------------------+
24606 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
24607 +---------------------------------------+
24608 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
24609 +---------------------------------------+
24610 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
24611 +---------------------------------------+
24612 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
24613 +---------------------------------------+
24614 old SP->| back chain to caller's caller |
24615 +---------------------------------------+
24617 * If the alloca area is present, the parameter save area is
24618 padded so that the former starts 16-byte aligned.
24620 The required alignment for AIX configurations is two words (i.e., 8
24621 or 16 bytes).
24623 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
24625 SP----> +---------------------------------------+
24626 | Back chain to caller | 0
24627 +---------------------------------------+
24628 | Save area for CR | 8
24629 +---------------------------------------+
24630 | Saved LR | 16
24631 +---------------------------------------+
24632 | Saved TOC pointer | 24
24633 +---------------------------------------+
24634 | Parameter save area (+padding*) (P) | 32
24635 +---------------------------------------+
24636 | Alloca space (A) | 32+P
24637 +---------------------------------------+
24638 | Local variable space (L) | 32+P+A
24639 +---------------------------------------+
24640 | Save area for AltiVec registers (W) | 32+P+A+L
24641 +---------------------------------------+
24642 | AltiVec alignment padding (Y) | 32+P+A+L+W
24643 +---------------------------------------+
24644 | Save area for GP registers (G) | 32+P+A+L+W+Y
24645 +---------------------------------------+
24646 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
24647 +---------------------------------------+
24648 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
24649 +---------------------------------------+
24651 * If the alloca area is present, the parameter save area is
24652 padded so that the former starts 16-byte aligned.
24654 V.4 stack frames look like:
24656 SP----> +---------------------------------------+
24657 | back chain to caller | 0
24658 +---------------------------------------+
24659 | caller's saved LR | 4
24660 +---------------------------------------+
24661 | Parameter save area (+padding*) (P) | 8
24662 +---------------------------------------+
24663 | Alloca space (A) | 8+P
24664 +---------------------------------------+
24665 | Varargs save area (V) | 8+P+A
24666 +---------------------------------------+
24667 | Local variable space (L) | 8+P+A+V
24668 +---------------------------------------+
24669 | Float/int conversion temporary (X) | 8+P+A+V+L
24670 +---------------------------------------+
24671 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
24672 +---------------------------------------+
24673 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
24674 +---------------------------------------+
24675 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
24676 +---------------------------------------+
24677 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
24678 +---------------------------------------+
24679 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
24680 +---------------------------------------+
24681 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
24682 +---------------------------------------+
24683 old SP->| back chain to caller's caller |
24684 +---------------------------------------+
24686 * If the alloca area is present and the required alignment is
24687 16 bytes, the parameter save area is padded so that the
24688 alloca area starts 16-byte aligned.
24690 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
24691 given. (But note below and in sysv4.h that we require only 8 and
24692 may round up the size of our stack frame anyways. The historical
24693 reason is early versions of powerpc-linux which didn't properly
24694 align the stack at program startup. A happy side-effect is that
24695 -mno-eabi libraries can be used with -meabi programs.)
24697 The EABI configuration defaults to the V.4 layout. However,
24698 the stack alignment requirements may differ. If -mno-eabi is not
24699 given, the required stack alignment is 8 bytes; if -mno-eabi is
24700 given, the required alignment is 16 bytes. (But see V.4 comment
24701 above.) */
24703 #ifndef ABI_STACK_BOUNDARY
24704 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
24705 #endif
24707 static rs6000_stack_t *
24708 rs6000_stack_info (void)
24710 /* We should never be called for thunks, we are not set up for that. */
24711 gcc_assert (!cfun->is_thunk);
24713 rs6000_stack_t *info = &stack_info;
24714 int reg_size = TARGET_32BIT ? 4 : 8;
24715 int ehrd_size;
24716 int ehcr_size;
24717 int save_align;
24718 int first_gp;
24719 HOST_WIDE_INT non_fixed_size;
24720 bool using_static_chain_p;
24722 if (reload_completed && info->reload_completed)
24723 return info;
24725 memset (info, 0, sizeof (*info));
24726 info->reload_completed = reload_completed;
24728 /* Select which calling sequence. */
24729 info->abi = DEFAULT_ABI;
24731 /* Calculate which registers need to be saved & save area size. */
24732 info->first_gp_reg_save = first_reg_to_save ();
24733 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
24734 even if it currently looks like we won't. Reload may need it to
24735 get at a constant; if so, it will have already created a constant
24736 pool entry for it. */
24737 if (((TARGET_TOC && TARGET_MINIMAL_TOC)
24738 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
24739 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
24740 && crtl->uses_const_pool
24741 && info->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
24742 first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
24743 else
24744 first_gp = info->first_gp_reg_save;
24746 info->gp_size = reg_size * (32 - first_gp);
24748 info->first_fp_reg_save = first_fp_reg_to_save ();
24749 info->fp_size = 8 * (64 - info->first_fp_reg_save);
24751 info->first_altivec_reg_save = first_altivec_reg_to_save ();
24752 info->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
24753 - info->first_altivec_reg_save);
24755 /* Does this function call anything? */
24756 info->calls_p = (!crtl->is_leaf || cfun->machine->ra_needs_full_frame);
24758 /* Determine if we need to save the condition code registers. */
24759 if (save_reg_p (CR2_REGNO)
24760 || save_reg_p (CR3_REGNO)
24761 || save_reg_p (CR4_REGNO))
24763 info->cr_save_p = 1;
24764 if (DEFAULT_ABI == ABI_V4)
24765 info->cr_size = reg_size;
24768 /* If the current function calls __builtin_eh_return, then we need
24769 to allocate stack space for registers that will hold data for
24770 the exception handler. */
24771 if (crtl->calls_eh_return)
24773 unsigned int i;
24774 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
24775 continue;
24777 ehrd_size = i * UNITS_PER_WORD;
24779 else
24780 ehrd_size = 0;
24782 /* In the ELFv2 ABI, we also need to allocate space for separate
24783 CR field save areas if the function calls __builtin_eh_return. */
24784 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
24786 /* This hard-codes that we have three call-saved CR fields. */
24787 ehcr_size = 3 * reg_size;
24788 /* We do *not* use the regular CR save mechanism. */
24789 info->cr_save_p = 0;
24791 else
24792 ehcr_size = 0;
24794 /* Determine various sizes. */
24795 info->reg_size = reg_size;
24796 info->fixed_size = RS6000_SAVE_AREA;
24797 info->vars_size = RS6000_ALIGN (get_frame_size (), 8);
24798 if (cfun->calls_alloca)
24799 info->parm_size =
24800 RS6000_ALIGN (crtl->outgoing_args_size + info->fixed_size,
24801 STACK_BOUNDARY / BITS_PER_UNIT) - info->fixed_size;
24802 else
24803 info->parm_size = RS6000_ALIGN (crtl->outgoing_args_size,
24804 TARGET_ALTIVEC ? 16 : 8);
24805 if (FRAME_GROWS_DOWNWARD)
24806 info->vars_size
24807 += RS6000_ALIGN (info->fixed_size + info->vars_size + info->parm_size,
24808 ABI_STACK_BOUNDARY / BITS_PER_UNIT)
24809 - (info->fixed_size + info->vars_size + info->parm_size);
24811 if (TARGET_ALTIVEC_ABI)
24812 info->vrsave_mask = compute_vrsave_mask ();
24814 if (TARGET_ALTIVEC_VRSAVE && info->vrsave_mask)
24815 info->vrsave_size = 4;
24817 compute_save_world_info (info);
24819 /* Calculate the offsets. */
24820 switch (DEFAULT_ABI)
24822 case ABI_NONE:
24823 default:
24824 gcc_unreachable ();
24826 case ABI_AIX:
24827 case ABI_ELFv2:
24828 case ABI_DARWIN:
24829 info->fp_save_offset = -info->fp_size;
24830 info->gp_save_offset = info->fp_save_offset - info->gp_size;
24832 if (TARGET_ALTIVEC_ABI)
24834 info->vrsave_save_offset = info->gp_save_offset - info->vrsave_size;
24836 /* Align stack so vector save area is on a quadword boundary.
24837 The padding goes above the vectors. */
24838 if (info->altivec_size != 0)
24839 info->altivec_padding_size = info->vrsave_save_offset & 0xF;
24841 info->altivec_save_offset = info->vrsave_save_offset
24842 - info->altivec_padding_size
24843 - info->altivec_size;
24844 gcc_assert (info->altivec_size == 0
24845 || info->altivec_save_offset % 16 == 0);
24847 /* Adjust for AltiVec case. */
24848 info->ehrd_offset = info->altivec_save_offset - ehrd_size;
24850 else
24851 info->ehrd_offset = info->gp_save_offset - ehrd_size;
24853 info->ehcr_offset = info->ehrd_offset - ehcr_size;
24854 info->cr_save_offset = reg_size; /* first word when 64-bit. */
24855 info->lr_save_offset = 2*reg_size;
24856 break;
24858 case ABI_V4:
24859 info->fp_save_offset = -info->fp_size;
24860 info->gp_save_offset = info->fp_save_offset - info->gp_size;
24861 info->cr_save_offset = info->gp_save_offset - info->cr_size;
24863 if (TARGET_ALTIVEC_ABI)
24865 info->vrsave_save_offset = info->cr_save_offset - info->vrsave_size;
24867 /* Align stack so vector save area is on a quadword boundary. */
24868 if (info->altivec_size != 0)
24869 info->altivec_padding_size = 16 - (-info->vrsave_save_offset % 16);
24871 info->altivec_save_offset = info->vrsave_save_offset
24872 - info->altivec_padding_size
24873 - info->altivec_size;
24875 /* Adjust for AltiVec case. */
24876 info->ehrd_offset = info->altivec_save_offset;
24878 else
24879 info->ehrd_offset = info->cr_save_offset;
24881 info->ehrd_offset -= ehrd_size;
24882 info->lr_save_offset = reg_size;
24885 save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
24886 info->save_size = RS6000_ALIGN (info->fp_size
24887 + info->gp_size
24888 + info->altivec_size
24889 + info->altivec_padding_size
24890 + ehrd_size
24891 + ehcr_size
24892 + info->cr_size
24893 + info->vrsave_size,
24894 save_align);
24896 non_fixed_size = info->vars_size + info->parm_size + info->save_size;
24898 info->total_size = RS6000_ALIGN (non_fixed_size + info->fixed_size,
24899 ABI_STACK_BOUNDARY / BITS_PER_UNIT);
24901 /* Determine if we need to save the link register. */
24902 if (info->calls_p
24903 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
24904 && crtl->profile
24905 && !TARGET_PROFILE_KERNEL)
24906 || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
24907 #ifdef TARGET_RELOCATABLE
24908 || (DEFAULT_ABI == ABI_V4
24909 && (TARGET_RELOCATABLE || flag_pic > 1)
24910 && !constant_pool_empty_p ())
24911 #endif
24912 || rs6000_ra_ever_killed ())
24913 info->lr_save_p = 1;
24915 using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
24916 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
24917 && call_used_regs[STATIC_CHAIN_REGNUM]);
24918 info->savres_strategy = rs6000_savres_strategy (info, using_static_chain_p);
24920 if (!(info->savres_strategy & SAVE_INLINE_GPRS)
24921 || !(info->savres_strategy & SAVE_INLINE_FPRS)
24922 || !(info->savres_strategy & SAVE_INLINE_VRS)
24923 || !(info->savres_strategy & REST_INLINE_GPRS)
24924 || !(info->savres_strategy & REST_INLINE_FPRS)
24925 || !(info->savres_strategy & REST_INLINE_VRS))
24926 info->lr_save_p = 1;
24928 if (info->lr_save_p)
24929 df_set_regs_ever_live (LR_REGNO, true);
24931 /* Determine if we need to allocate any stack frame:
24933 For AIX we need to push the stack if a frame pointer is needed
24934 (because the stack might be dynamically adjusted), if we are
24935 debugging, if we make calls, or if the sum of fp_save, gp_save,
24936 and local variables are more than the space needed to save all
24937 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
24938 + 18*8 = 288 (GPR13 reserved).
24940 For V.4 we don't have the stack cushion that AIX uses, but assume
24941 that the debugger can handle stackless frames. */
24943 if (info->calls_p)
24944 info->push_p = 1;
24946 else if (DEFAULT_ABI == ABI_V4)
24947 info->push_p = non_fixed_size != 0;
24949 else if (frame_pointer_needed)
24950 info->push_p = 1;
24952 else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
24953 info->push_p = 1;
24955 else
24956 info->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
24958 return info;
24961 static void
24962 debug_stack_info (rs6000_stack_t *info)
24964 const char *abi_string;
24966 if (! info)
24967 info = rs6000_stack_info ();
24969 fprintf (stderr, "\nStack information for function %s:\n",
24970 ((current_function_decl && DECL_NAME (current_function_decl))
24971 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
24972 : "<unknown>"));
24974 switch (info->abi)
24976 default: abi_string = "Unknown"; break;
24977 case ABI_NONE: abi_string = "NONE"; break;
24978 case ABI_AIX: abi_string = "AIX"; break;
24979 case ABI_ELFv2: abi_string = "ELFv2"; break;
24980 case ABI_DARWIN: abi_string = "Darwin"; break;
24981 case ABI_V4: abi_string = "V.4"; break;
24984 fprintf (stderr, "\tABI = %5s\n", abi_string);
24986 if (TARGET_ALTIVEC_ABI)
24987 fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
24989 if (info->first_gp_reg_save != 32)
24990 fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save);
24992 if (info->first_fp_reg_save != 64)
24993 fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save);
24995 if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
24996 fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
24997 info->first_altivec_reg_save);
24999 if (info->lr_save_p)
25000 fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p);
25002 if (info->cr_save_p)
25003 fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p);
25005 if (info->vrsave_mask)
25006 fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask);
25008 if (info->push_p)
25009 fprintf (stderr, "\tpush_p = %5d\n", info->push_p);
25011 if (info->calls_p)
25012 fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p);
25014 if (info->gp_size)
25015 fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset);
25017 if (info->fp_size)
25018 fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset);
25020 if (info->altivec_size)
25021 fprintf (stderr, "\taltivec_save_offset = %5d\n",
25022 info->altivec_save_offset);
25024 if (info->vrsave_size)
25025 fprintf (stderr, "\tvrsave_save_offset = %5d\n",
25026 info->vrsave_save_offset);
25028 if (info->lr_save_p)
25029 fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset);
25031 if (info->cr_save_p)
25032 fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset);
25034 if (info->varargs_save_offset)
25035 fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
25037 if (info->total_size)
25038 fprintf (stderr, "\ttotal_size = " HOST_WIDE_INT_PRINT_DEC"\n",
25039 info->total_size);
25041 if (info->vars_size)
25042 fprintf (stderr, "\tvars_size = " HOST_WIDE_INT_PRINT_DEC"\n",
25043 info->vars_size);
25045 if (info->parm_size)
25046 fprintf (stderr, "\tparm_size = %5d\n", info->parm_size);
25048 if (info->fixed_size)
25049 fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size);
25051 if (info->gp_size)
25052 fprintf (stderr, "\tgp_size = %5d\n", info->gp_size);
25054 if (info->fp_size)
25055 fprintf (stderr, "\tfp_size = %5d\n", info->fp_size);
25057 if (info->altivec_size)
25058 fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size);
25060 if (info->vrsave_size)
25061 fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size);
25063 if (info->altivec_padding_size)
25064 fprintf (stderr, "\taltivec_padding_size= %5d\n",
25065 info->altivec_padding_size);
25067 if (info->cr_size)
25068 fprintf (stderr, "\tcr_size = %5d\n", info->cr_size);
25070 if (info->save_size)
25071 fprintf (stderr, "\tsave_size = %5d\n", info->save_size);
25073 if (info->reg_size != 4)
25074 fprintf (stderr, "\treg_size = %5d\n", info->reg_size);
25076 fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy);
25078 fprintf (stderr, "\n");
25082 rs6000_return_addr (int count, rtx frame)
25084 /* We can't use get_hard_reg_initial_val for LR when count == 0 if LR
25085 is trashed by the prologue, as it is for PIC on ABI_V4 and Darwin. */
25086 if (count != 0
25087 || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
25089 cfun->machine->ra_needs_full_frame = 1;
25091 if (count == 0)
25092 /* FRAME is set to frame_pointer_rtx by the generic code, but that
25093 is good for loading 0(r1) only when !FRAME_GROWS_DOWNWARD. */
25094 frame = stack_pointer_rtx;
25095 rtx prev_frame_addr = memory_address (Pmode, frame);
25096 rtx prev_frame = copy_to_reg (gen_rtx_MEM (Pmode, prev_frame_addr));
25097 rtx lr_save_off = plus_constant (Pmode,
25098 prev_frame, RETURN_ADDRESS_OFFSET);
25099 rtx lr_save_addr = memory_address (Pmode, lr_save_off);
25100 return gen_rtx_MEM (Pmode, lr_save_addr);
25103 cfun->machine->ra_need_lr = 1;
25104 return get_hard_reg_initial_val (Pmode, LR_REGNO);
25107 /* Say whether a function is a candidate for sibcall handling or not. */
25109 static bool
25110 rs6000_function_ok_for_sibcall (tree decl, tree exp)
25112 tree fntype;
25114 if (decl)
25115 fntype = TREE_TYPE (decl);
25116 else
25117 fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
25119 /* We can't do it if the called function has more vector parameters
25120 than the current function; there's nowhere to put the VRsave code. */
25121 if (TARGET_ALTIVEC_ABI
25122 && TARGET_ALTIVEC_VRSAVE
25123 && !(decl && decl == current_function_decl))
25125 function_args_iterator args_iter;
25126 tree type;
25127 int nvreg = 0;
25129 /* Functions with vector parameters are required to have a
25130 prototype, so the argument type info must be available
25131 here. */
25132 FOREACH_FUNCTION_ARGS(fntype, type, args_iter)
25133 if (TREE_CODE (type) == VECTOR_TYPE
25134 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
25135 nvreg++;
25137 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter)
25138 if (TREE_CODE (type) == VECTOR_TYPE
25139 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
25140 nvreg--;
25142 if (nvreg > 0)
25143 return false;
25146 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
25147 functions, because the callee may have a different TOC pointer to
25148 the caller and there's no way to ensure we restore the TOC when
25149 we return. With the secure-plt SYSV ABI we can't make non-local
25150 calls when -fpic/PIC because the plt call stubs use r30. */
25151 if (DEFAULT_ABI == ABI_DARWIN
25152 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
25153 && decl
25154 && !DECL_EXTERNAL (decl)
25155 && !DECL_WEAK (decl)
25156 && (*targetm.binds_local_p) (decl))
25157 || (DEFAULT_ABI == ABI_V4
25158 && (!TARGET_SECURE_PLT
25159 || !flag_pic
25160 || (decl
25161 && (*targetm.binds_local_p) (decl)))))
25163 tree attr_list = TYPE_ATTRIBUTES (fntype);
25165 if (!lookup_attribute ("longcall", attr_list)
25166 || lookup_attribute ("shortcall", attr_list))
25167 return true;
25170 return false;
25173 static int
25174 rs6000_ra_ever_killed (void)
25176 rtx_insn *top;
25177 rtx reg;
25178 rtx_insn *insn;
25180 if (cfun->is_thunk)
25181 return 0;
25183 if (cfun->machine->lr_save_state)
25184 return cfun->machine->lr_save_state - 1;
25186 /* regs_ever_live has LR marked as used if any sibcalls are present,
25187 but this should not force saving and restoring in the
25188 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
25189 clobbers LR, so that is inappropriate. */
25191 /* Also, the prologue can generate a store into LR that
25192 doesn't really count, like this:
25194 move LR->R0
25195 bcl to set PIC register
25196 move LR->R31
25197 move R0->LR
25199 When we're called from the epilogue, we need to avoid counting
25200 this as a store. */
25202 push_topmost_sequence ();
25203 top = get_insns ();
25204 pop_topmost_sequence ();
25205 reg = gen_rtx_REG (Pmode, LR_REGNO);
25207 for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
25209 if (INSN_P (insn))
25211 if (CALL_P (insn))
25213 if (!SIBLING_CALL_P (insn))
25214 return 1;
25216 else if (find_regno_note (insn, REG_INC, LR_REGNO))
25217 return 1;
25218 else if (set_of (reg, insn) != NULL_RTX
25219 && !prologue_epilogue_contains (insn))
25220 return 1;
25223 return 0;
25226 /* Emit instructions needed to load the TOC register.
25227 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
25228 a constant pool; or for SVR4 -fpic. */
25230 void
25231 rs6000_emit_load_toc_table (int fromprolog)
25233 rtx dest;
25234 dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
25236 if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
25238 char buf[30];
25239 rtx lab, tmp1, tmp2, got;
25241 lab = gen_label_rtx ();
25242 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
25243 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
25244 if (flag_pic == 2)
25246 got = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
25247 need_toc_init = 1;
25249 else
25250 got = rs6000_got_sym ();
25251 tmp1 = tmp2 = dest;
25252 if (!fromprolog)
25254 tmp1 = gen_reg_rtx (Pmode);
25255 tmp2 = gen_reg_rtx (Pmode);
25257 emit_insn (gen_load_toc_v4_PIC_1 (lab));
25258 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
25259 emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
25260 emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
25262 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
25264 emit_insn (gen_load_toc_v4_pic_si ());
25265 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
25267 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
25269 char buf[30];
25270 rtx temp0 = (fromprolog
25271 ? gen_rtx_REG (Pmode, 0)
25272 : gen_reg_rtx (Pmode));
25274 if (fromprolog)
25276 rtx symF, symL;
25278 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
25279 symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
25281 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
25282 symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
25284 emit_insn (gen_load_toc_v4_PIC_1 (symF));
25285 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
25286 emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
25288 else
25290 rtx tocsym, lab;
25292 tocsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
25293 need_toc_init = 1;
25294 lab = gen_label_rtx ();
25295 emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
25296 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
25297 if (TARGET_LINK_STACK)
25298 emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
25299 emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
25301 emit_insn (gen_addsi3 (dest, temp0, dest));
25303 else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
25305 /* This is for AIX code running in non-PIC ELF32. */
25306 rtx realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
25308 need_toc_init = 1;
25309 emit_insn (gen_elf_high (dest, realsym));
25310 emit_insn (gen_elf_low (dest, dest, realsym));
25312 else
25314 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
25316 if (TARGET_32BIT)
25317 emit_insn (gen_load_toc_aix_si (dest));
25318 else
25319 emit_insn (gen_load_toc_aix_di (dest));
25323 /* Emit instructions to restore the link register after determining where
25324 its value has been stored. */
25326 void
25327 rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
25329 rs6000_stack_t *info = rs6000_stack_info ();
25330 rtx operands[2];
25332 operands[0] = source;
25333 operands[1] = scratch;
25335 if (info->lr_save_p)
25337 rtx frame_rtx = stack_pointer_rtx;
25338 HOST_WIDE_INT sp_offset = 0;
25339 rtx tmp;
25341 if (frame_pointer_needed
25342 || cfun->calls_alloca
25343 || info->total_size > 32767)
25345 tmp = gen_frame_mem (Pmode, frame_rtx);
25346 emit_move_insn (operands[1], tmp);
25347 frame_rtx = operands[1];
25349 else if (info->push_p)
25350 sp_offset = info->total_size;
25352 tmp = plus_constant (Pmode, frame_rtx,
25353 info->lr_save_offset + sp_offset);
25354 tmp = gen_frame_mem (Pmode, tmp);
25355 emit_move_insn (tmp, operands[0]);
25357 else
25358 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
25360 /* Freeze lr_save_p. We've just emitted rtl that depends on the
25361 state of lr_save_p so any change from here on would be a bug. In
25362 particular, stop rs6000_ra_ever_killed from considering the SET
25363 of lr we may have added just above. */
25364 cfun->machine->lr_save_state = info->lr_save_p + 1;
25367 static GTY(()) alias_set_type set = -1;
25369 alias_set_type
25370 get_TOC_alias_set (void)
25372 if (set == -1)
25373 set = new_alias_set ();
25374 return set;
25377 /* This returns nonzero if the current function uses the TOC. This is
25378 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
25379 is generated by the ABI_V4 load_toc_* patterns.
25380 Return 2 instead of 1 if the load_toc_* pattern is in the function
25381 partition that doesn't start the function. */
25382 #if TARGET_ELF
25383 static int
25384 uses_TOC (void)
25386 rtx_insn *insn;
25387 int ret = 1;
25389 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
25391 if (INSN_P (insn))
25393 rtx pat = PATTERN (insn);
25394 int i;
25396 if (GET_CODE (pat) == PARALLEL)
25397 for (i = 0; i < XVECLEN (pat, 0); i++)
25399 rtx sub = XVECEXP (pat, 0, i);
25400 if (GET_CODE (sub) == USE)
25402 sub = XEXP (sub, 0);
25403 if (GET_CODE (sub) == UNSPEC
25404 && XINT (sub, 1) == UNSPEC_TOC)
25405 return ret;
25409 else if (crtl->has_bb_partition
25410 && NOTE_P (insn)
25411 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
25412 ret = 2;
25414 return 0;
25416 #endif
25419 create_TOC_reference (rtx symbol, rtx largetoc_reg)
25421 rtx tocrel, tocreg, hi;
25423 if (TARGET_DEBUG_ADDR)
25425 if (GET_CODE (symbol) == SYMBOL_REF)
25426 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
25427 XSTR (symbol, 0));
25428 else
25430 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
25431 GET_RTX_NAME (GET_CODE (symbol)));
25432 debug_rtx (symbol);
25436 if (!can_create_pseudo_p ())
25437 df_set_regs_ever_live (TOC_REGISTER, true);
25439 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
25440 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
25441 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
25442 return tocrel;
25444 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
25445 if (largetoc_reg != NULL)
25447 emit_move_insn (largetoc_reg, hi);
25448 hi = largetoc_reg;
25450 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
25453 /* Issue assembly directives that create a reference to the given DWARF
25454 FRAME_TABLE_LABEL from the current function section. */
25455 void
25456 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
25458 fprintf (asm_out_file, "\t.ref %s\n",
25459 (* targetm.strip_name_encoding) (frame_table_label));
25462 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
25463 and the change to the stack pointer. */
25465 static void
25466 rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
25468 rtvec p;
25469 int i;
25470 rtx regs[3];
25472 i = 0;
25473 regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
25474 if (hard_frame_needed)
25475 regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
25476 if (!(REGNO (fp) == STACK_POINTER_REGNUM
25477 || (hard_frame_needed
25478 && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
25479 regs[i++] = fp;
25481 p = rtvec_alloc (i);
25482 while (--i >= 0)
25484 rtx mem = gen_frame_mem (BLKmode, regs[i]);
25485 RTVEC_ELT (p, i) = gen_rtx_SET (mem, const0_rtx);
25488 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
25491 /* Allocate SIZE_INT bytes on the stack using a store with update style insn
25492 and set the appropriate attributes for the generated insn. Return the
25493 first insn which adjusts the stack pointer or the last insn before
25494 the stack adjustment loop.
25496 SIZE_INT is used to create the CFI note for the allocation.
25498 SIZE_RTX is an rtx containing the size of the adjustment. Note that
25499 since stacks grow to lower addresses its runtime value is -SIZE_INT.
25501 ORIG_SP contains the backchain value that must be stored at *sp. */
25503 static rtx_insn *
25504 rs6000_emit_allocate_stack_1 (HOST_WIDE_INT size_int, rtx orig_sp)
25506 rtx_insn *insn;
25508 rtx size_rtx = GEN_INT (-size_int);
25509 if (size_int > 32767)
25511 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
25512 /* Need a note here so that try_split doesn't get confused. */
25513 if (get_last_insn () == NULL_RTX)
25514 emit_note (NOTE_INSN_DELETED);
25515 insn = emit_move_insn (tmp_reg, size_rtx);
25516 try_split (PATTERN (insn), insn, 0);
25517 size_rtx = tmp_reg;
25520 if (Pmode == SImode)
25521 insn = emit_insn (gen_movsi_update_stack (stack_pointer_rtx,
25522 stack_pointer_rtx,
25523 size_rtx,
25524 orig_sp));
25525 else
25526 insn = emit_insn (gen_movdi_di_update_stack (stack_pointer_rtx,
25527 stack_pointer_rtx,
25528 size_rtx,
25529 orig_sp));
25530 rtx par = PATTERN (insn);
25531 gcc_assert (GET_CODE (par) == PARALLEL);
25532 rtx set = XVECEXP (par, 0, 0);
25533 gcc_assert (GET_CODE (set) == SET);
25534 rtx mem = SET_DEST (set);
25535 gcc_assert (MEM_P (mem));
25536 MEM_NOTRAP_P (mem) = 1;
25537 set_mem_alias_set (mem, get_frame_alias_set ());
25539 RTX_FRAME_RELATED_P (insn) = 1;
25540 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
25541 gen_rtx_SET (stack_pointer_rtx,
25542 gen_rtx_PLUS (Pmode,
25543 stack_pointer_rtx,
25544 GEN_INT (-size_int))));
25546 /* Emit a blockage to ensure the allocation/probing insns are
25547 not optimized, combined, removed, etc. Add REG_STACK_CHECK
25548 note for similar reasons. */
25549 if (flag_stack_clash_protection)
25551 add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
25552 emit_insn (gen_blockage ());
25555 return insn;
25558 static HOST_WIDE_INT
25559 get_stack_clash_protection_probe_interval (void)
25561 return (HOST_WIDE_INT_1U
25562 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL));
25565 static HOST_WIDE_INT
25566 get_stack_clash_protection_guard_size (void)
25568 return (HOST_WIDE_INT_1U
25569 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE));
25572 /* Allocate ORIG_SIZE bytes on the stack and probe the newly
25573 allocated space every STACK_CLASH_PROTECTION_PROBE_INTERVAL bytes.
25575 COPY_REG, if non-null, should contain a copy of the original
25576 stack pointer at exit from this function.
25578 This is subtly different than the Ada probing in that it tries hard to
25579 prevent attacks that jump the stack guard. Thus it is never allowed to
25580 allocate more than STACK_CLASH_PROTECTION_PROBE_INTERVAL bytes of stack
25581 space without a suitable probe. */
25582 static rtx_insn *
25583 rs6000_emit_probe_stack_range_stack_clash (HOST_WIDE_INT orig_size,
25584 rtx copy_reg)
25586 rtx orig_sp = copy_reg;
25588 HOST_WIDE_INT probe_interval = get_stack_clash_protection_probe_interval ();
25590 /* Round the size down to a multiple of PROBE_INTERVAL. */
25591 HOST_WIDE_INT rounded_size = ROUND_DOWN (orig_size, probe_interval);
25593 /* If explicitly requested,
25594 or the rounded size is not the same as the original size
25595 or the the rounded size is greater than a page,
25596 then we will need a copy of the original stack pointer. */
25597 if (rounded_size != orig_size
25598 || rounded_size > probe_interval
25599 || copy_reg)
25601 /* If the caller did not request a copy of the incoming stack
25602 pointer, then we use r0 to hold the copy. */
25603 if (!copy_reg)
25604 orig_sp = gen_rtx_REG (Pmode, 0);
25605 emit_move_insn (orig_sp, stack_pointer_rtx);
25608 /* There's three cases here.
25610 One is a single probe which is the most common and most efficiently
25611 implemented as it does not have to have a copy of the original
25612 stack pointer if there are no residuals.
25614 Second is unrolled allocation/probes which we use if there's just
25615 a few of them. It needs to save the original stack pointer into a
25616 temporary for use as a source register in the allocation/probe.
25618 Last is a loop. This is the most uncommon case and least efficient. */
25619 rtx_insn *retval = NULL;
25620 if (rounded_size == probe_interval)
25622 retval = rs6000_emit_allocate_stack_1 (probe_interval, stack_pointer_rtx);
25624 dump_stack_clash_frame_info (PROBE_INLINE, rounded_size != orig_size);
25626 else if (rounded_size <= 8 * probe_interval)
25628 /* The ABI requires using the store with update insns to allocate
25629 space and store the backchain into the stack
25631 So we save the current stack pointer into a temporary, then
25632 emit the store-with-update insns to store the saved stack pointer
25633 into the right location in each new page. */
25634 for (int i = 0; i < rounded_size; i += probe_interval)
25636 rtx_insn *insn
25637 = rs6000_emit_allocate_stack_1 (probe_interval, orig_sp);
25639 /* Save the first stack adjustment in RETVAL. */
25640 if (i == 0)
25641 retval = insn;
25644 dump_stack_clash_frame_info (PROBE_INLINE, rounded_size != orig_size);
25646 else
25648 /* Compute the ending address. */
25649 rtx end_addr
25650 = copy_reg ? gen_rtx_REG (Pmode, 0) : gen_rtx_REG (Pmode, 12);
25651 rtx rs = GEN_INT (-rounded_size);
25652 rtx_insn *insn;
25653 if (add_operand (rs, Pmode))
25654 insn = emit_insn (gen_add3_insn (end_addr, stack_pointer_rtx, rs));
25655 else
25657 emit_move_insn (end_addr, GEN_INT (-rounded_size));
25658 insn = emit_insn (gen_add3_insn (end_addr, end_addr,
25659 stack_pointer_rtx));
25660 /* Describe the effect of INSN to the CFI engine. */
25661 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
25662 gen_rtx_SET (end_addr,
25663 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
25664 rs)));
25666 RTX_FRAME_RELATED_P (insn) = 1;
25668 /* Emit the loop. */
25669 if (TARGET_64BIT)
25670 retval = emit_insn (gen_probe_stack_rangedi (stack_pointer_rtx,
25671 stack_pointer_rtx, orig_sp,
25672 end_addr));
25673 else
25674 retval = emit_insn (gen_probe_stack_rangesi (stack_pointer_rtx,
25675 stack_pointer_rtx, orig_sp,
25676 end_addr));
25677 RTX_FRAME_RELATED_P (retval) = 1;
25678 /* Describe the effect of INSN to the CFI engine. */
25679 add_reg_note (retval, REG_FRAME_RELATED_EXPR,
25680 gen_rtx_SET (stack_pointer_rtx, end_addr));
25682 /* Emit a blockage to ensure the allocation/probing insns are
25683 not optimized, combined, removed, etc. Other cases handle this
25684 within their call to rs6000_emit_allocate_stack_1. */
25685 emit_insn (gen_blockage ());
25687 dump_stack_clash_frame_info (PROBE_LOOP, rounded_size != orig_size);
25690 if (orig_size != rounded_size)
25692 /* Allocate (and implicitly probe) any residual space. */
25693 HOST_WIDE_INT residual = orig_size - rounded_size;
25695 rtx_insn *insn = rs6000_emit_allocate_stack_1 (residual, orig_sp);
25697 /* If the residual was the only allocation, then we can return the
25698 allocating insn. */
25699 if (!retval)
25700 retval = insn;
25703 return retval;
25706 /* Emit the correct code for allocating stack space, as insns.
25707 If COPY_REG, make sure a copy of the old frame is left there.
25708 The generated code may use hard register 0 as a temporary. */
25710 static rtx_insn *
25711 rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
25713 rtx_insn *insn;
25714 rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
25715 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
25716 rtx todec = gen_int_mode (-size, Pmode);
25718 if (INTVAL (todec) != -size)
25720 warning (0, "stack frame too large");
25721 emit_insn (gen_trap ());
25722 return 0;
25725 if (crtl->limit_stack)
25727 if (REG_P (stack_limit_rtx)
25728 && REGNO (stack_limit_rtx) > 1
25729 && REGNO (stack_limit_rtx) <= 31)
25731 rtx_insn *insn
25732 = gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size));
25733 gcc_assert (insn);
25734 emit_insn (insn);
25735 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg, const0_rtx));
25737 else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF
25738 && TARGET_32BIT
25739 && DEFAULT_ABI == ABI_V4
25740 && !flag_pic)
25742 rtx toload = gen_rtx_CONST (VOIDmode,
25743 gen_rtx_PLUS (Pmode,
25744 stack_limit_rtx,
25745 GEN_INT (size)));
25747 emit_insn (gen_elf_high (tmp_reg, toload));
25748 emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
25749 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
25750 const0_rtx));
25752 else
25753 warning (0, "stack limit expression is not supported");
25756 if (flag_stack_clash_protection)
25758 if (size < get_stack_clash_protection_guard_size ())
25759 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
25760 else
25762 rtx_insn *insn = rs6000_emit_probe_stack_range_stack_clash (size,
25763 copy_reg);
25765 /* If we asked for a copy with an offset, then we still need add in
25766 the offset. */
25767 if (copy_reg && copy_off)
25768 emit_insn (gen_add3_insn (copy_reg, copy_reg, GEN_INT (copy_off)));
25769 return insn;
25773 if (copy_reg)
25775 if (copy_off != 0)
25776 emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
25777 else
25778 emit_move_insn (copy_reg, stack_reg);
25781 /* Since we didn't use gen_frame_mem to generate the MEM, grab
25782 it now and set the alias set/attributes. The above gen_*_update
25783 calls will generate a PARALLEL with the MEM set being the first
25784 operation. */
25785 insn = rs6000_emit_allocate_stack_1 (size, stack_reg);
25786 return insn;
25789 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
25791 #if PROBE_INTERVAL > 32768
25792 #error Cannot use indexed addressing mode for stack probing
25793 #endif
25795 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
25796 inclusive. These are offsets from the current stack pointer. */
25798 static void
25799 rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
25801 /* See if we have a constant small number of probes to generate. If so,
25802 that's the easy case. */
25803 if (first + size <= 32768)
25805 HOST_WIDE_INT i;
25807 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
25808 it exceeds SIZE. If only one probe is needed, this will not
25809 generate any code. Then probe at FIRST + SIZE. */
25810 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
25811 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
25812 -(first + i)));
25814 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
25815 -(first + size)));
25818 /* Otherwise, do the same as above, but in a loop. Note that we must be
25819 extra careful with variables wrapping around because we might be at
25820 the very top (or the very bottom) of the address space and we have
25821 to be able to handle this case properly; in particular, we use an
25822 equality test for the loop condition. */
25823 else
25825 HOST_WIDE_INT rounded_size;
25826 rtx r12 = gen_rtx_REG (Pmode, 12);
25827 rtx r0 = gen_rtx_REG (Pmode, 0);
25829 /* Sanity check for the addressing mode we're going to use. */
25830 gcc_assert (first <= 32768);
25832 /* Step 1: round SIZE to the previous multiple of the interval. */
25834 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
25837 /* Step 2: compute initial and final value of the loop counter. */
25839 /* TEST_ADDR = SP + FIRST. */
25840 emit_insn (gen_rtx_SET (r12, plus_constant (Pmode, stack_pointer_rtx,
25841 -first)));
25843 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
25844 if (rounded_size > 32768)
25846 emit_move_insn (r0, GEN_INT (-rounded_size));
25847 emit_insn (gen_rtx_SET (r0, gen_rtx_PLUS (Pmode, r12, r0)));
25849 else
25850 emit_insn (gen_rtx_SET (r0, plus_constant (Pmode, r12,
25851 -rounded_size)));
25854 /* Step 3: the loop
25858 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
25859 probe at TEST_ADDR
25861 while (TEST_ADDR != LAST_ADDR)
25863 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
25864 until it is equal to ROUNDED_SIZE. */
25866 if (TARGET_64BIT)
25867 emit_insn (gen_probe_stack_rangedi (r12, r12, stack_pointer_rtx, r0));
25868 else
25869 emit_insn (gen_probe_stack_rangesi (r12, r12, stack_pointer_rtx, r0));
25872 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
25873 that SIZE is equal to ROUNDED_SIZE. */
25875 if (size != rounded_size)
25876 emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
25880 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
25881 addresses, not offsets. */
25883 static const char *
25884 output_probe_stack_range_1 (rtx reg1, rtx reg2)
25886 static int labelno = 0;
25887 char loop_lab[32];
25888 rtx xops[2];
25890 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
25892 /* Loop. */
25893 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
25895 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
25896 xops[0] = reg1;
25897 xops[1] = GEN_INT (-PROBE_INTERVAL);
25898 output_asm_insn ("addi %0,%0,%1", xops);
25900 /* Probe at TEST_ADDR. */
25901 xops[1] = gen_rtx_REG (Pmode, 0);
25902 output_asm_insn ("stw %1,0(%0)", xops);
25904 /* Test if TEST_ADDR == LAST_ADDR. */
25905 xops[1] = reg2;
25906 if (TARGET_64BIT)
25907 output_asm_insn ("cmpd 0,%0,%1", xops);
25908 else
25909 output_asm_insn ("cmpw 0,%0,%1", xops);
25911 /* Branch. */
25912 fputs ("\tbne 0,", asm_out_file);
25913 assemble_name_raw (asm_out_file, loop_lab);
25914 fputc ('\n', asm_out_file);
25916 return "";
25919 /* This function is called when rs6000_frame_related is processing
25920 SETs within a PARALLEL, and returns whether the REGNO save ought to
25921 be marked RTX_FRAME_RELATED_P. The PARALLELs involved are those
25922 for out-of-line register save functions, store multiple, and the
25923 Darwin world_save. They may contain registers that don't really
25924 need saving. */
25926 static bool
25927 interesting_frame_related_regno (unsigned int regno)
25929 /* Saves apparently of r0 are actually saving LR. It doesn't make
25930 sense to substitute the regno here to test save_reg_p (LR_REGNO).
25931 We *know* LR needs saving, and dwarf2cfi.c is able to deduce that
25932 (set (mem) (r0)) is saving LR from a prior (set (r0) (lr)) marked
25933 as frame related. */
25934 if (regno == 0)
25935 return true;
25936 /* If we see CR2 then we are here on a Darwin world save. Saves of
25937 CR2 signify the whole CR is being saved. This is a long-standing
25938 ABI wart fixed by ELFv2. As for r0/lr there is no need to check
25939 that CR needs to be saved. */
25940 if (regno == CR2_REGNO)
25941 return true;
25942 /* Omit frame info for any user-defined global regs. If frame info
25943 is supplied for them, frame unwinding will restore a user reg.
25944 Also omit frame info for any reg we don't need to save, as that
25945 bloats frame info and can cause problems with shrink wrapping.
25946 Since global regs won't be seen as needing to be saved, both of
25947 these conditions are covered by save_reg_p. */
25948 return save_reg_p (regno);
25951 /* Probe a range of stack addresses from REG1 to REG3 inclusive. These are
25952 addresses, not offsets.
25954 REG2 contains the backchain that must be stored into *sp at each allocation.
25956 This is subtly different than the Ada probing above in that it tries hard
25957 to prevent attacks that jump the stack guard. Thus, it is never allowed
25958 to allocate more than PROBE_INTERVAL bytes of stack space without a
25959 suitable probe. */
25961 static const char *
25962 output_probe_stack_range_stack_clash (rtx reg1, rtx reg2, rtx reg3)
25964 static int labelno = 0;
25965 char loop_lab[32];
25966 rtx xops[3];
25968 HOST_WIDE_INT probe_interval = get_stack_clash_protection_probe_interval ();
25970 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
25972 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
25974 /* This allocates and probes. */
25975 xops[0] = reg1;
25976 xops[1] = reg2;
25977 xops[2] = GEN_INT (-probe_interval);
25978 if (TARGET_64BIT)
25979 output_asm_insn ("stdu %1,%2(%0)", xops);
25980 else
25981 output_asm_insn ("stwu %1,%2(%0)", xops);
25983 /* Jump to LOOP_LAB if TEST_ADDR != LAST_ADDR. */
25984 xops[0] = reg1;
25985 xops[1] = reg3;
25986 if (TARGET_64BIT)
25987 output_asm_insn ("cmpd 0,%0,%1", xops);
25988 else
25989 output_asm_insn ("cmpw 0,%0,%1", xops);
25991 fputs ("\tbne 0,", asm_out_file);
25992 assemble_name_raw (asm_out_file, loop_lab);
25993 fputc ('\n', asm_out_file);
25995 return "";
25998 /* Wrapper around the output_probe_stack_range routines. */
25999 const char *
26000 output_probe_stack_range (rtx reg1, rtx reg2, rtx reg3)
26002 if (flag_stack_clash_protection)
26003 return output_probe_stack_range_stack_clash (reg1, reg2, reg3);
26004 else
26005 return output_probe_stack_range_1 (reg1, reg3);
26008 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
26009 with (plus:P (reg 1) VAL), and with REG2 replaced with REPL2 if REG2
26010 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
26011 deduce these equivalences by itself so it wasn't necessary to hold
26012 its hand so much. Don't be tempted to always supply d2_f_d_e with
26013 the actual cfa register, ie. r31 when we are using a hard frame
26014 pointer. That fails when saving regs off r1, and sched moves the
26015 r31 setup past the reg saves. */
26017 static rtx_insn *
26018 rs6000_frame_related (rtx_insn *insn, rtx reg, HOST_WIDE_INT val,
26019 rtx reg2, rtx repl2)
26021 rtx repl;
26023 if (REGNO (reg) == STACK_POINTER_REGNUM)
26025 gcc_checking_assert (val == 0);
26026 repl = NULL_RTX;
26028 else
26029 repl = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
26030 GEN_INT (val));
26032 rtx pat = PATTERN (insn);
26033 if (!repl && !reg2)
26035 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
26036 if (GET_CODE (pat) == PARALLEL)
26037 for (int i = 0; i < XVECLEN (pat, 0); i++)
26038 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26040 rtx set = XVECEXP (pat, 0, i);
26042 if (!REG_P (SET_SRC (set))
26043 || interesting_frame_related_regno (REGNO (SET_SRC (set))))
26044 RTX_FRAME_RELATED_P (set) = 1;
26046 RTX_FRAME_RELATED_P (insn) = 1;
26047 return insn;
26050 /* We expect that 'pat' is either a SET or a PARALLEL containing
26051 SETs (and possibly other stuff). In a PARALLEL, all the SETs
26052 are important so they all have to be marked RTX_FRAME_RELATED_P.
26053 Call simplify_replace_rtx on the SETs rather than the whole insn
26054 so as to leave the other stuff alone (for example USE of r12). */
26056 set_used_flags (pat);
26057 if (GET_CODE (pat) == SET)
26059 if (repl)
26060 pat = simplify_replace_rtx (pat, reg, repl);
26061 if (reg2)
26062 pat = simplify_replace_rtx (pat, reg2, repl2);
26064 else if (GET_CODE (pat) == PARALLEL)
26066 pat = shallow_copy_rtx (pat);
26067 XVEC (pat, 0) = shallow_copy_rtvec (XVEC (pat, 0));
26069 for (int i = 0; i < XVECLEN (pat, 0); i++)
26070 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26072 rtx set = XVECEXP (pat, 0, i);
26074 if (repl)
26075 set = simplify_replace_rtx (set, reg, repl);
26076 if (reg2)
26077 set = simplify_replace_rtx (set, reg2, repl2);
26078 XVECEXP (pat, 0, i) = set;
26080 if (!REG_P (SET_SRC (set))
26081 || interesting_frame_related_regno (REGNO (SET_SRC (set))))
26082 RTX_FRAME_RELATED_P (set) = 1;
26085 else
26086 gcc_unreachable ();
26088 RTX_FRAME_RELATED_P (insn) = 1;
26089 add_reg_note (insn, REG_FRAME_RELATED_EXPR, copy_rtx_if_shared (pat));
26091 return insn;
26094 /* Returns an insn that has a vrsave set operation with the
26095 appropriate CLOBBERs. */
26097 static rtx
26098 generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
26100 int nclobs, i;
26101 rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
26102 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
26104 clobs[0]
26105 = gen_rtx_SET (vrsave,
26106 gen_rtx_UNSPEC_VOLATILE (SImode,
26107 gen_rtvec (2, reg, vrsave),
26108 UNSPECV_SET_VRSAVE));
26110 nclobs = 1;
26112 /* We need to clobber the registers in the mask so the scheduler
26113 does not move sets to VRSAVE before sets of AltiVec registers.
26115 However, if the function receives nonlocal gotos, reload will set
26116 all call saved registers live. We will end up with:
26118 (set (reg 999) (mem))
26119 (parallel [ (set (reg vrsave) (unspec blah))
26120 (clobber (reg 999))])
26122 The clobber will cause the store into reg 999 to be dead, and
26123 flow will attempt to delete an epilogue insn. In this case, we
26124 need an unspec use/set of the register. */
26126 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
26127 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
26129 if (!epiloguep || call_used_regs [i])
26130 clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode,
26131 gen_rtx_REG (V4SImode, i));
26132 else
26134 rtx reg = gen_rtx_REG (V4SImode, i);
26136 clobs[nclobs++]
26137 = gen_rtx_SET (reg,
26138 gen_rtx_UNSPEC (V4SImode,
26139 gen_rtvec (1, reg), 27));
26143 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
26145 for (i = 0; i < nclobs; ++i)
26146 XVECEXP (insn, 0, i) = clobs[i];
26148 return insn;
26151 static rtx
26152 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
26154 rtx addr, mem;
26156 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
26157 mem = gen_frame_mem (GET_MODE (reg), addr);
26158 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
26161 static rtx
26162 gen_frame_load (rtx reg, rtx frame_reg, int offset)
26164 return gen_frame_set (reg, frame_reg, offset, false);
26167 static rtx
26168 gen_frame_store (rtx reg, rtx frame_reg, int offset)
26170 return gen_frame_set (reg, frame_reg, offset, true);
26173 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
26174 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
26176 static rtx_insn *
26177 emit_frame_save (rtx frame_reg, machine_mode mode,
26178 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
26180 rtx reg;
26182 /* Some cases that need register indexed addressing. */
26183 gcc_checking_assert (!(TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
26184 || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode)));
26186 reg = gen_rtx_REG (mode, regno);
26187 rtx_insn *insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
26188 return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
26189 NULL_RTX, NULL_RTX);
26192 /* Emit an offset memory reference suitable for a frame store, while
26193 converting to a valid addressing mode. */
26195 static rtx
26196 gen_frame_mem_offset (machine_mode mode, rtx reg, int offset)
26198 return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, GEN_INT (offset)));
26201 #ifndef TARGET_FIX_AND_CONTINUE
26202 #define TARGET_FIX_AND_CONTINUE 0
26203 #endif
26205 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
26206 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
26207 #define LAST_SAVRES_REGISTER 31
26208 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
26210 enum {
26211 SAVRES_LR = 0x1,
26212 SAVRES_SAVE = 0x2,
26213 SAVRES_REG = 0x0c,
26214 SAVRES_GPR = 0,
26215 SAVRES_FPR = 4,
26216 SAVRES_VR = 8
26219 static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
26221 /* Temporary holding space for an out-of-line register save/restore
26222 routine name. */
26223 static char savres_routine_name[30];
26225 /* Return the name for an out-of-line register save/restore routine.
26226 We are saving/restoring GPRs if GPR is true. */
26228 static char *
26229 rs6000_savres_routine_name (int regno, int sel)
26231 const char *prefix = "";
26232 const char *suffix = "";
26234 /* Different targets are supposed to define
26235 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
26236 routine name could be defined with:
26238 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
26240 This is a nice idea in practice, but in reality, things are
26241 complicated in several ways:
26243 - ELF targets have save/restore routines for GPRs.
26245 - PPC64 ELF targets have routines for save/restore of GPRs that
26246 differ in what they do with the link register, so having a set
26247 prefix doesn't work. (We only use one of the save routines at
26248 the moment, though.)
26250 - PPC32 elf targets have "exit" versions of the restore routines
26251 that restore the link register and can save some extra space.
26252 These require an extra suffix. (There are also "tail" versions
26253 of the restore routines and "GOT" versions of the save routines,
26254 but we don't generate those at present. Same problems apply,
26255 though.)
26257 We deal with all this by synthesizing our own prefix/suffix and
26258 using that for the simple sprintf call shown above. */
26259 if (DEFAULT_ABI == ABI_V4)
26261 if (TARGET_64BIT)
26262 goto aix_names;
26264 if ((sel & SAVRES_REG) == SAVRES_GPR)
26265 prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
26266 else if ((sel & SAVRES_REG) == SAVRES_FPR)
26267 prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
26268 else if ((sel & SAVRES_REG) == SAVRES_VR)
26269 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
26270 else
26271 abort ();
26273 if ((sel & SAVRES_LR))
26274 suffix = "_x";
26276 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26278 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
26279 /* No out-of-line save/restore routines for GPRs on AIX. */
26280 gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
26281 #endif
26283 aix_names:
26284 if ((sel & SAVRES_REG) == SAVRES_GPR)
26285 prefix = ((sel & SAVRES_SAVE)
26286 ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
26287 : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
26288 else if ((sel & SAVRES_REG) == SAVRES_FPR)
26290 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
26291 if ((sel & SAVRES_LR))
26292 prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
26293 else
26294 #endif
26296 prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
26297 suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
26300 else if ((sel & SAVRES_REG) == SAVRES_VR)
26301 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
26302 else
26303 abort ();
26306 if (DEFAULT_ABI == ABI_DARWIN)
26308 /* The Darwin approach is (slightly) different, in order to be
26309 compatible with code generated by the system toolchain. There is a
26310 single symbol for the start of save sequence, and the code here
26311 embeds an offset into that code on the basis of the first register
26312 to be saved. */
26313 prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
26314 if ((sel & SAVRES_REG) == SAVRES_GPR)
26315 sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
26316 ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
26317 (regno - 13) * 4, prefix, regno);
26318 else if ((sel & SAVRES_REG) == SAVRES_FPR)
26319 sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
26320 (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
26321 else if ((sel & SAVRES_REG) == SAVRES_VR)
26322 sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
26323 (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
26324 else
26325 abort ();
26327 else
26328 sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
26330 return savres_routine_name;
26333 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
26334 We are saving/restoring GPRs if GPR is true. */
26336 static rtx
26337 rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
26339 int regno = ((sel & SAVRES_REG) == SAVRES_GPR
26340 ? info->first_gp_reg_save
26341 : (sel & SAVRES_REG) == SAVRES_FPR
26342 ? info->first_fp_reg_save - 32
26343 : (sel & SAVRES_REG) == SAVRES_VR
26344 ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
26345 : -1);
26346 rtx sym;
26347 int select = sel;
26349 /* Don't generate bogus routine names. */
26350 gcc_assert (FIRST_SAVRES_REGISTER <= regno
26351 && regno <= LAST_SAVRES_REGISTER
26352 && select >= 0 && select <= 12);
26354 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
26356 if (sym == NULL)
26358 char *name;
26360 name = rs6000_savres_routine_name (regno, sel);
26362 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
26363 = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
26364 SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
26367 return sym;
26370 /* Emit a sequence of insns, including a stack tie if needed, for
26371 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
26372 reset the stack pointer, but move the base of the frame into
26373 reg UPDT_REGNO for use by out-of-line register restore routines. */
26375 static rtx
26376 rs6000_emit_stack_reset (rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
26377 unsigned updt_regno)
26379 /* If there is nothing to do, don't do anything. */
26380 if (frame_off == 0 && REGNO (frame_reg_rtx) == updt_regno)
26381 return NULL_RTX;
26383 rtx updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
26385 /* This blockage is needed so that sched doesn't decide to move
26386 the sp change before the register restores. */
26387 if (DEFAULT_ABI == ABI_V4)
26388 return emit_insn (gen_stack_restore_tie (updt_reg_rtx, frame_reg_rtx,
26389 GEN_INT (frame_off)));
26391 /* If we are restoring registers out-of-line, we will be using the
26392 "exit" variants of the restore routines, which will reset the
26393 stack for us. But we do need to point updt_reg into the
26394 right place for those routines. */
26395 if (frame_off != 0)
26396 return emit_insn (gen_add3_insn (updt_reg_rtx,
26397 frame_reg_rtx, GEN_INT (frame_off)));
26398 else
26399 return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
26401 return NULL_RTX;
26404 /* Return the register number used as a pointer by out-of-line
26405 save/restore functions. */
26407 static inline unsigned
26408 ptr_regno_for_savres (int sel)
26410 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26411 return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
26412 return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
26415 /* Construct a parallel rtx describing the effect of a call to an
26416 out-of-line register save/restore routine, and emit the insn
26417 or jump_insn as appropriate. */
26419 static rtx_insn *
26420 rs6000_emit_savres_rtx (rs6000_stack_t *info,
26421 rtx frame_reg_rtx, int save_area_offset, int lr_offset,
26422 machine_mode reg_mode, int sel)
26424 int i;
26425 int offset, start_reg, end_reg, n_regs, use_reg;
26426 int reg_size = GET_MODE_SIZE (reg_mode);
26427 rtx sym;
26428 rtvec p;
26429 rtx par;
26430 rtx_insn *insn;
26432 offset = 0;
26433 start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
26434 ? info->first_gp_reg_save
26435 : (sel & SAVRES_REG) == SAVRES_FPR
26436 ? info->first_fp_reg_save
26437 : (sel & SAVRES_REG) == SAVRES_VR
26438 ? info->first_altivec_reg_save
26439 : -1);
26440 end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
26441 ? 32
26442 : (sel & SAVRES_REG) == SAVRES_FPR
26443 ? 64
26444 : (sel & SAVRES_REG) == SAVRES_VR
26445 ? LAST_ALTIVEC_REGNO + 1
26446 : -1);
26447 n_regs = end_reg - start_reg;
26448 p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
26449 + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
26450 + n_regs);
26452 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
26453 RTVEC_ELT (p, offset++) = ret_rtx;
26455 RTVEC_ELT (p, offset++)
26456 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
26458 sym = rs6000_savres_routine_sym (info, sel);
26459 RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
26461 use_reg = ptr_regno_for_savres (sel);
26462 if ((sel & SAVRES_REG) == SAVRES_VR)
26464 /* Vector regs are saved/restored using [reg+reg] addressing. */
26465 RTVEC_ELT (p, offset++)
26466 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg));
26467 RTVEC_ELT (p, offset++)
26468 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
26470 else
26471 RTVEC_ELT (p, offset++)
26472 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
26474 for (i = 0; i < end_reg - start_reg; i++)
26475 RTVEC_ELT (p, i + offset)
26476 = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
26477 frame_reg_rtx, save_area_offset + reg_size * i,
26478 (sel & SAVRES_SAVE) != 0);
26480 if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
26481 RTVEC_ELT (p, i + offset)
26482 = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
26484 par = gen_rtx_PARALLEL (VOIDmode, p);
26486 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
26488 insn = emit_jump_insn (par);
26489 JUMP_LABEL (insn) = ret_rtx;
26491 else
26492 insn = emit_insn (par);
26493 return insn;
26496 /* Emit prologue code to store CR fields that need to be saved into REG. This
26497 function should only be called when moving the non-volatile CRs to REG, it
26498 is not a general purpose routine to move the entire set of CRs to REG.
26499 Specifically, gen_prologue_movesi_from_cr() does not contain uses of the
26500 volatile CRs. */
26502 static void
26503 rs6000_emit_prologue_move_from_cr (rtx reg)
26505 /* Only the ELFv2 ABI allows storing only selected fields. */
26506 if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
26508 int i, cr_reg[8], count = 0;
26510 /* Collect CR fields that must be saved. */
26511 for (i = 0; i < 8; i++)
26512 if (save_reg_p (CR0_REGNO + i))
26513 cr_reg[count++] = i;
26515 /* If it's just a single one, use mfcrf. */
26516 if (count == 1)
26518 rtvec p = rtvec_alloc (1);
26519 rtvec r = rtvec_alloc (2);
26520 RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
26521 RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
26522 RTVEC_ELT (p, 0)
26523 = gen_rtx_SET (reg,
26524 gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
26526 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
26527 return;
26530 /* ??? It might be better to handle count == 2 / 3 cases here
26531 as well, using logical operations to combine the values. */
26534 emit_insn (gen_prologue_movesi_from_cr (reg));
26537 /* Return whether the split-stack arg pointer (r12) is used. */
26539 static bool
26540 split_stack_arg_pointer_used_p (void)
26542 /* If the pseudo holding the arg pointer is no longer a pseudo,
26543 then the arg pointer is used. */
26544 if (cfun->machine->split_stack_arg_pointer != NULL_RTX
26545 && (!REG_P (cfun->machine->split_stack_arg_pointer)
26546 || (REGNO (cfun->machine->split_stack_arg_pointer)
26547 < FIRST_PSEUDO_REGISTER)))
26548 return true;
26550 /* Unfortunately we also need to do some code scanning, since
26551 r12 may have been substituted for the pseudo. */
26552 rtx_insn *insn;
26553 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
26554 FOR_BB_INSNS (bb, insn)
26555 if (NONDEBUG_INSN_P (insn))
26557 /* A call destroys r12. */
26558 if (CALL_P (insn))
26559 return false;
26561 df_ref use;
26562 FOR_EACH_INSN_USE (use, insn)
26564 rtx x = DF_REF_REG (use);
26565 if (REG_P (x) && REGNO (x) == 12)
26566 return true;
26568 df_ref def;
26569 FOR_EACH_INSN_DEF (def, insn)
26571 rtx x = DF_REF_REG (def);
26572 if (REG_P (x) && REGNO (x) == 12)
26573 return false;
26576 return bitmap_bit_p (DF_LR_OUT (bb), 12);
26579 /* Return whether we need to emit an ELFv2 global entry point prologue. */
26581 static bool
26582 rs6000_global_entry_point_needed_p (void)
26584 /* Only needed for the ELFv2 ABI. */
26585 if (DEFAULT_ABI != ABI_ELFv2)
26586 return false;
26588 /* With -msingle-pic-base, we assume the whole program shares the same
26589 TOC, so no global entry point prologues are needed anywhere. */
26590 if (TARGET_SINGLE_PIC_BASE)
26591 return false;
26593 /* Ensure we have a global entry point for thunks. ??? We could
26594 avoid that if the target routine doesn't need a global entry point,
26595 but we do not know whether this is the case at this point. */
26596 if (cfun->is_thunk)
26597 return true;
26599 /* For regular functions, rs6000_emit_prologue sets this flag if the
26600 routine ever uses the TOC pointer. */
26601 return cfun->machine->r2_setup_needed;
26604 /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
26605 static sbitmap
26606 rs6000_get_separate_components (void)
26608 rs6000_stack_t *info = rs6000_stack_info ();
26610 if (WORLD_SAVE_P (info))
26611 return NULL;
26613 gcc_assert (!(info->savres_strategy & SAVE_MULTIPLE)
26614 && !(info->savres_strategy & REST_MULTIPLE));
26616 /* Component 0 is the save/restore of LR (done via GPR0).
26617 Component 2 is the save of the TOC (GPR2).
26618 Components 13..31 are the save/restore of GPR13..GPR31.
26619 Components 46..63 are the save/restore of FPR14..FPR31. */
26621 cfun->machine->n_components = 64;
26623 sbitmap components = sbitmap_alloc (cfun->machine->n_components);
26624 bitmap_clear (components);
26626 int reg_size = TARGET_32BIT ? 4 : 8;
26627 int fp_reg_size = 8;
26629 /* The GPRs we need saved to the frame. */
26630 if ((info->savres_strategy & SAVE_INLINE_GPRS)
26631 && (info->savres_strategy & REST_INLINE_GPRS))
26633 int offset = info->gp_save_offset;
26634 if (info->push_p)
26635 offset += info->total_size;
26637 for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++)
26639 if (IN_RANGE (offset, -0x8000, 0x7fff)
26640 && save_reg_p (regno))
26641 bitmap_set_bit (components, regno);
26643 offset += reg_size;
26647 /* Don't mess with the hard frame pointer. */
26648 if (frame_pointer_needed)
26649 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
26651 /* Don't mess with the fixed TOC register. */
26652 if ((TARGET_TOC && TARGET_MINIMAL_TOC)
26653 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
26654 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
26655 bitmap_clear_bit (components, RS6000_PIC_OFFSET_TABLE_REGNUM);
26657 /* The FPRs we need saved to the frame. */
26658 if ((info->savres_strategy & SAVE_INLINE_FPRS)
26659 && (info->savres_strategy & REST_INLINE_FPRS))
26661 int offset = info->fp_save_offset;
26662 if (info->push_p)
26663 offset += info->total_size;
26665 for (unsigned regno = info->first_fp_reg_save; regno < 64; regno++)
26667 if (IN_RANGE (offset, -0x8000, 0x7fff) && save_reg_p (regno))
26668 bitmap_set_bit (components, regno);
26670 offset += fp_reg_size;
26674 /* Optimize LR save and restore if we can. This is component 0. Any
26675 out-of-line register save/restore routines need LR. */
26676 if (info->lr_save_p
26677 && !(flag_pic && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
26678 && (info->savres_strategy & SAVE_INLINE_GPRS)
26679 && (info->savres_strategy & REST_INLINE_GPRS)
26680 && (info->savres_strategy & SAVE_INLINE_FPRS)
26681 && (info->savres_strategy & REST_INLINE_FPRS)
26682 && (info->savres_strategy & SAVE_INLINE_VRS)
26683 && (info->savres_strategy & REST_INLINE_VRS))
26685 int offset = info->lr_save_offset;
26686 if (info->push_p)
26687 offset += info->total_size;
26688 if (IN_RANGE (offset, -0x8000, 0x7fff))
26689 bitmap_set_bit (components, 0);
26692 /* Optimize saving the TOC. This is component 2. */
26693 if (cfun->machine->save_toc_in_prologue)
26694 bitmap_set_bit (components, 2);
26696 return components;
26699 /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
26700 static sbitmap
26701 rs6000_components_for_bb (basic_block bb)
26703 rs6000_stack_t *info = rs6000_stack_info ();
26705 bitmap in = DF_LIVE_IN (bb);
26706 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
26707 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
26709 sbitmap components = sbitmap_alloc (cfun->machine->n_components);
26710 bitmap_clear (components);
26712 /* A register is used in a bb if it is in the IN, GEN, or KILL sets. */
26714 /* GPRs. */
26715 for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++)
26716 if (bitmap_bit_p (in, regno)
26717 || bitmap_bit_p (gen, regno)
26718 || bitmap_bit_p (kill, regno))
26719 bitmap_set_bit (components, regno);
26721 /* FPRs. */
26722 for (unsigned regno = info->first_fp_reg_save; regno < 64; regno++)
26723 if (bitmap_bit_p (in, regno)
26724 || bitmap_bit_p (gen, regno)
26725 || bitmap_bit_p (kill, regno))
26726 bitmap_set_bit (components, regno);
26728 /* The link register. */
26729 if (bitmap_bit_p (in, LR_REGNO)
26730 || bitmap_bit_p (gen, LR_REGNO)
26731 || bitmap_bit_p (kill, LR_REGNO))
26732 bitmap_set_bit (components, 0);
26734 /* The TOC save. */
26735 if (bitmap_bit_p (in, TOC_REGNUM)
26736 || bitmap_bit_p (gen, TOC_REGNUM)
26737 || bitmap_bit_p (kill, TOC_REGNUM))
26738 bitmap_set_bit (components, 2);
26740 return components;
26743 /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */
26744 static void
26745 rs6000_disqualify_components (sbitmap components, edge e,
26746 sbitmap edge_components, bool /*is_prologue*/)
26748 /* Our LR pro/epilogue code moves LR via R0, so R0 had better not be
26749 live where we want to place that code. */
26750 if (bitmap_bit_p (edge_components, 0)
26751 && bitmap_bit_p (DF_LIVE_IN (e->dest), 0))
26753 if (dump_file)
26754 fprintf (dump_file, "Disqualifying LR because GPR0 is live "
26755 "on entry to bb %d\n", e->dest->index);
26756 bitmap_clear_bit (components, 0);
26760 /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
26761 static void
26762 rs6000_emit_prologue_components (sbitmap components)
26764 rs6000_stack_t *info = rs6000_stack_info ();
26765 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
26766 ? HARD_FRAME_POINTER_REGNUM
26767 : STACK_POINTER_REGNUM);
26769 machine_mode reg_mode = Pmode;
26770 int reg_size = TARGET_32BIT ? 4 : 8;
26771 machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
26772 ? DFmode : SFmode;
26773 int fp_reg_size = 8;
26775 /* Prologue for LR. */
26776 if (bitmap_bit_p (components, 0))
26778 rtx reg = gen_rtx_REG (reg_mode, 0);
26779 rtx_insn *insn = emit_move_insn (reg, gen_rtx_REG (reg_mode, LR_REGNO));
26780 RTX_FRAME_RELATED_P (insn) = 1;
26781 add_reg_note (insn, REG_CFA_REGISTER, NULL);
26783 int offset = info->lr_save_offset;
26784 if (info->push_p)
26785 offset += info->total_size;
26787 insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
26788 RTX_FRAME_RELATED_P (insn) = 1;
26789 rtx lr = gen_rtx_REG (reg_mode, LR_REGNO);
26790 rtx mem = copy_rtx (SET_DEST (single_set (insn)));
26791 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, lr));
26794 /* Prologue for TOC. */
26795 if (bitmap_bit_p (components, 2))
26797 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
26798 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
26799 emit_insn (gen_frame_store (reg, sp_reg, RS6000_TOC_SAVE_SLOT));
26802 /* Prologue for the GPRs. */
26803 int offset = info->gp_save_offset;
26804 if (info->push_p)
26805 offset += info->total_size;
26807 for (int i = info->first_gp_reg_save; i < 32; i++)
26809 if (bitmap_bit_p (components, i))
26811 rtx reg = gen_rtx_REG (reg_mode, i);
26812 rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
26813 RTX_FRAME_RELATED_P (insn) = 1;
26814 rtx set = copy_rtx (single_set (insn));
26815 add_reg_note (insn, REG_CFA_OFFSET, set);
26818 offset += reg_size;
26821 /* Prologue for the FPRs. */
26822 offset = info->fp_save_offset;
26823 if (info->push_p)
26824 offset += info->total_size;
26826 for (int i = info->first_fp_reg_save; i < 64; i++)
26828 if (bitmap_bit_p (components, i))
26830 rtx reg = gen_rtx_REG (fp_reg_mode, i);
26831 rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
26832 RTX_FRAME_RELATED_P (insn) = 1;
26833 rtx set = copy_rtx (single_set (insn));
26834 add_reg_note (insn, REG_CFA_OFFSET, set);
26837 offset += fp_reg_size;
26841 /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
26842 static void
26843 rs6000_emit_epilogue_components (sbitmap components)
26845 rs6000_stack_t *info = rs6000_stack_info ();
26846 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
26847 ? HARD_FRAME_POINTER_REGNUM
26848 : STACK_POINTER_REGNUM);
26850 machine_mode reg_mode = Pmode;
26851 int reg_size = TARGET_32BIT ? 4 : 8;
26853 machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
26854 ? DFmode : SFmode;
26855 int fp_reg_size = 8;
26857 /* Epilogue for the FPRs. */
26858 int offset = info->fp_save_offset;
26859 if (info->push_p)
26860 offset += info->total_size;
26862 for (int i = info->first_fp_reg_save; i < 64; i++)
26864 if (bitmap_bit_p (components, i))
26866 rtx reg = gen_rtx_REG (fp_reg_mode, i);
26867 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
26868 RTX_FRAME_RELATED_P (insn) = 1;
26869 add_reg_note (insn, REG_CFA_RESTORE, reg);
26872 offset += fp_reg_size;
26875 /* Epilogue for the GPRs. */
26876 offset = info->gp_save_offset;
26877 if (info->push_p)
26878 offset += info->total_size;
26880 for (int i = info->first_gp_reg_save; i < 32; i++)
26882 if (bitmap_bit_p (components, i))
26884 rtx reg = gen_rtx_REG (reg_mode, i);
26885 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
26886 RTX_FRAME_RELATED_P (insn) = 1;
26887 add_reg_note (insn, REG_CFA_RESTORE, reg);
26890 offset += reg_size;
26893 /* Epilogue for LR. */
26894 if (bitmap_bit_p (components, 0))
26896 int offset = info->lr_save_offset;
26897 if (info->push_p)
26898 offset += info->total_size;
26900 rtx reg = gen_rtx_REG (reg_mode, 0);
26901 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
26903 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
26904 insn = emit_move_insn (lr, reg);
26905 RTX_FRAME_RELATED_P (insn) = 1;
26906 add_reg_note (insn, REG_CFA_RESTORE, lr);
26910 /* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
26911 static void
26912 rs6000_set_handled_components (sbitmap components)
26914 rs6000_stack_t *info = rs6000_stack_info ();
26916 for (int i = info->first_gp_reg_save; i < 32; i++)
26917 if (bitmap_bit_p (components, i))
26918 cfun->machine->gpr_is_wrapped_separately[i] = true;
26920 for (int i = info->first_fp_reg_save; i < 64; i++)
26921 if (bitmap_bit_p (components, i))
26922 cfun->machine->fpr_is_wrapped_separately[i - 32] = true;
26924 if (bitmap_bit_p (components, 0))
26925 cfun->machine->lr_is_wrapped_separately = true;
26927 if (bitmap_bit_p (components, 2))
26928 cfun->machine->toc_is_wrapped_separately = true;
26931 /* VRSAVE is a bit vector representing which AltiVec registers
26932 are used. The OS uses this to determine which vector
26933 registers to save on a context switch. We need to save
26934 VRSAVE on the stack frame, add whatever AltiVec registers we
26935 used in this function, and do the corresponding magic in the
26936 epilogue. */
26937 static void
26938 emit_vrsave_prologue (rs6000_stack_t *info, int save_regno,
26939 HOST_WIDE_INT frame_off, rtx frame_reg_rtx)
26941 /* Get VRSAVE into a GPR. */
26942 rtx reg = gen_rtx_REG (SImode, save_regno);
26943 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
26944 if (TARGET_MACHO)
26945 emit_insn (gen_get_vrsave_internal (reg));
26946 else
26947 emit_insn (gen_rtx_SET (reg, vrsave));
26949 /* Save VRSAVE. */
26950 int offset = info->vrsave_save_offset + frame_off;
26951 emit_insn (gen_frame_store (reg, frame_reg_rtx, offset));
26953 /* Include the registers in the mask. */
26954 emit_insn (gen_iorsi3 (reg, reg, GEN_INT (info->vrsave_mask)));
26956 emit_insn (generate_set_vrsave (reg, info, 0));
26959 /* Set up the arg pointer (r12) for -fsplit-stack code. If __morestack was
26960 called, it left the arg pointer to the old stack in r29. Otherwise, the
26961 arg pointer is the top of the current frame. */
26962 static void
26963 emit_split_stack_prologue (rs6000_stack_t *info, rtx_insn *sp_adjust,
26964 HOST_WIDE_INT frame_off, rtx frame_reg_rtx)
26966 cfun->machine->split_stack_argp_used = true;
26968 if (sp_adjust)
26970 rtx r12 = gen_rtx_REG (Pmode, 12);
26971 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
26972 rtx set_r12 = gen_rtx_SET (r12, sp_reg_rtx);
26973 emit_insn_before (set_r12, sp_adjust);
26975 else if (frame_off != 0 || REGNO (frame_reg_rtx) != 12)
26977 rtx r12 = gen_rtx_REG (Pmode, 12);
26978 if (frame_off == 0)
26979 emit_move_insn (r12, frame_reg_rtx);
26980 else
26981 emit_insn (gen_add3_insn (r12, frame_reg_rtx, GEN_INT (frame_off)));
26984 if (info->push_p)
26986 rtx r12 = gen_rtx_REG (Pmode, 12);
26987 rtx r29 = gen_rtx_REG (Pmode, 29);
26988 rtx cr7 = gen_rtx_REG (CCUNSmode, CR7_REGNO);
26989 rtx not_more = gen_label_rtx ();
26990 rtx jump;
26992 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
26993 gen_rtx_GEU (VOIDmode, cr7, const0_rtx),
26994 gen_rtx_LABEL_REF (VOIDmode, not_more),
26995 pc_rtx);
26996 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
26997 JUMP_LABEL (jump) = not_more;
26998 LABEL_NUSES (not_more) += 1;
26999 emit_move_insn (r12, r29);
27000 emit_label (not_more);
27004 /* Emit function prologue as insns. */
27006 void
27007 rs6000_emit_prologue (void)
27009 rs6000_stack_t *info = rs6000_stack_info ();
27010 machine_mode reg_mode = Pmode;
27011 int reg_size = TARGET_32BIT ? 4 : 8;
27012 machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
27013 ? DFmode : SFmode;
27014 int fp_reg_size = 8;
27015 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
27016 rtx frame_reg_rtx = sp_reg_rtx;
27017 unsigned int cr_save_regno;
27018 rtx cr_save_rtx = NULL_RTX;
27019 rtx_insn *insn;
27020 int strategy;
27021 int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
27022 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
27023 && call_used_regs[STATIC_CHAIN_REGNUM]);
27024 int using_split_stack = (flag_split_stack
27025 && (lookup_attribute ("no_split_stack",
27026 DECL_ATTRIBUTES (cfun->decl))
27027 == NULL));
27029 /* Offset to top of frame for frame_reg and sp respectively. */
27030 HOST_WIDE_INT frame_off = 0;
27031 HOST_WIDE_INT sp_off = 0;
27032 /* sp_adjust is the stack adjusting instruction, tracked so that the
27033 insn setting up the split-stack arg pointer can be emitted just
27034 prior to it, when r12 is not used here for other purposes. */
27035 rtx_insn *sp_adjust = 0;
27037 #if CHECKING_P
27038 /* Track and check usage of r0, r11, r12. */
27039 int reg_inuse = using_static_chain_p ? 1 << 11 : 0;
27040 #define START_USE(R) do \
27042 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
27043 reg_inuse |= 1 << (R); \
27044 } while (0)
27045 #define END_USE(R) do \
27047 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
27048 reg_inuse &= ~(1 << (R)); \
27049 } while (0)
27050 #define NOT_INUSE(R) do \
27052 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
27053 } while (0)
27054 #else
27055 #define START_USE(R) do {} while (0)
27056 #define END_USE(R) do {} while (0)
27057 #define NOT_INUSE(R) do {} while (0)
27058 #endif
27060 if (DEFAULT_ABI == ABI_ELFv2
27061 && !TARGET_SINGLE_PIC_BASE)
27063 cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
27065 /* With -mminimal-toc we may generate an extra use of r2 below. */
27066 if (TARGET_TOC && TARGET_MINIMAL_TOC
27067 && !constant_pool_empty_p ())
27068 cfun->machine->r2_setup_needed = true;
27072 if (flag_stack_usage_info)
27073 current_function_static_stack_size = info->total_size;
27075 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
27077 HOST_WIDE_INT size = info->total_size;
27079 if (crtl->is_leaf && !cfun->calls_alloca)
27081 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
27082 rs6000_emit_probe_stack_range (get_stack_check_protect (),
27083 size - get_stack_check_protect ());
27085 else if (size > 0)
27086 rs6000_emit_probe_stack_range (get_stack_check_protect (), size);
27089 if (TARGET_FIX_AND_CONTINUE)
27091 /* gdb on darwin arranges to forward a function from the old
27092 address by modifying the first 5 instructions of the function
27093 to branch to the overriding function. This is necessary to
27094 permit function pointers that point to the old function to
27095 actually forward to the new function. */
27096 emit_insn (gen_nop ());
27097 emit_insn (gen_nop ());
27098 emit_insn (gen_nop ());
27099 emit_insn (gen_nop ());
27100 emit_insn (gen_nop ());
27103 /* Handle world saves specially here. */
27104 if (WORLD_SAVE_P (info))
27106 int i, j, sz;
27107 rtx treg;
27108 rtvec p;
27109 rtx reg0;
27111 /* save_world expects lr in r0. */
27112 reg0 = gen_rtx_REG (Pmode, 0);
27113 if (info->lr_save_p)
27115 insn = emit_move_insn (reg0,
27116 gen_rtx_REG (Pmode, LR_REGNO));
27117 RTX_FRAME_RELATED_P (insn) = 1;
27120 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
27121 assumptions about the offsets of various bits of the stack
27122 frame. */
27123 gcc_assert (info->gp_save_offset == -220
27124 && info->fp_save_offset == -144
27125 && info->lr_save_offset == 8
27126 && info->cr_save_offset == 4
27127 && info->push_p
27128 && info->lr_save_p
27129 && (!crtl->calls_eh_return
27130 || info->ehrd_offset == -432)
27131 && info->vrsave_save_offset == -224
27132 && info->altivec_save_offset == -416);
27134 treg = gen_rtx_REG (SImode, 11);
27135 emit_move_insn (treg, GEN_INT (-info->total_size));
27137 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
27138 in R11. It also clobbers R12, so beware! */
27140 /* Preserve CR2 for save_world prologues */
27141 sz = 5;
27142 sz += 32 - info->first_gp_reg_save;
27143 sz += 64 - info->first_fp_reg_save;
27144 sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
27145 p = rtvec_alloc (sz);
27146 j = 0;
27147 RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
27148 gen_rtx_REG (SImode,
27149 LR_REGNO));
27150 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
27151 gen_rtx_SYMBOL_REF (Pmode,
27152 "*save_world"));
27153 /* We do floats first so that the instruction pattern matches
27154 properly. */
27155 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
27156 RTVEC_ELT (p, j++)
27157 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
27158 ? DFmode : SFmode,
27159 info->first_fp_reg_save + i),
27160 frame_reg_rtx,
27161 info->fp_save_offset + frame_off + 8 * i);
27162 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
27163 RTVEC_ELT (p, j++)
27164 = gen_frame_store (gen_rtx_REG (V4SImode,
27165 info->first_altivec_reg_save + i),
27166 frame_reg_rtx,
27167 info->altivec_save_offset + frame_off + 16 * i);
27168 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
27169 RTVEC_ELT (p, j++)
27170 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
27171 frame_reg_rtx,
27172 info->gp_save_offset + frame_off + reg_size * i);
27174 /* CR register traditionally saved as CR2. */
27175 RTVEC_ELT (p, j++)
27176 = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO),
27177 frame_reg_rtx, info->cr_save_offset + frame_off);
27178 /* Explain about use of R0. */
27179 if (info->lr_save_p)
27180 RTVEC_ELT (p, j++)
27181 = gen_frame_store (reg0,
27182 frame_reg_rtx, info->lr_save_offset + frame_off);
27183 /* Explain what happens to the stack pointer. */
27185 rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
27186 RTVEC_ELT (p, j++) = gen_rtx_SET (sp_reg_rtx, newval);
27189 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
27190 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
27191 treg, GEN_INT (-info->total_size));
27192 sp_off = frame_off = info->total_size;
27195 strategy = info->savres_strategy;
27197 /* For V.4, update stack before we do any saving and set back pointer. */
27198 if (! WORLD_SAVE_P (info)
27199 && info->push_p
27200 && (DEFAULT_ABI == ABI_V4
27201 || crtl->calls_eh_return))
27203 bool need_r11 = (!(strategy & SAVE_INLINE_FPRS)
27204 || !(strategy & SAVE_INLINE_GPRS)
27205 || !(strategy & SAVE_INLINE_VRS));
27206 int ptr_regno = -1;
27207 rtx ptr_reg = NULL_RTX;
27208 int ptr_off = 0;
27210 if (info->total_size < 32767)
27211 frame_off = info->total_size;
27212 else if (need_r11)
27213 ptr_regno = 11;
27214 else if (info->cr_save_p
27215 || info->lr_save_p
27216 || info->first_fp_reg_save < 64
27217 || info->first_gp_reg_save < 32
27218 || info->altivec_size != 0
27219 || info->vrsave_size != 0
27220 || crtl->calls_eh_return)
27221 ptr_regno = 12;
27222 else
27224 /* The prologue won't be saving any regs so there is no need
27225 to set up a frame register to access any frame save area.
27226 We also won't be using frame_off anywhere below, but set
27227 the correct value anyway to protect against future
27228 changes to this function. */
27229 frame_off = info->total_size;
27231 if (ptr_regno != -1)
27233 /* Set up the frame offset to that needed by the first
27234 out-of-line save function. */
27235 START_USE (ptr_regno);
27236 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
27237 frame_reg_rtx = ptr_reg;
27238 if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0)
27239 gcc_checking_assert (info->fp_save_offset + info->fp_size == 0);
27240 else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32)
27241 ptr_off = info->gp_save_offset + info->gp_size;
27242 else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0)
27243 ptr_off = info->altivec_save_offset + info->altivec_size;
27244 frame_off = -ptr_off;
27246 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
27247 ptr_reg, ptr_off);
27248 if (REGNO (frame_reg_rtx) == 12)
27249 sp_adjust = 0;
27250 sp_off = info->total_size;
27251 if (frame_reg_rtx != sp_reg_rtx)
27252 rs6000_emit_stack_tie (frame_reg_rtx, false);
27255 /* If we use the link register, get it into r0. */
27256 if (!WORLD_SAVE_P (info) && info->lr_save_p
27257 && !cfun->machine->lr_is_wrapped_separately)
27259 rtx addr, reg, mem;
27261 reg = gen_rtx_REG (Pmode, 0);
27262 START_USE (0);
27263 insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
27264 RTX_FRAME_RELATED_P (insn) = 1;
27266 if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
27267 | SAVE_NOINLINE_FPRS_SAVES_LR)))
27269 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
27270 GEN_INT (info->lr_save_offset + frame_off));
27271 mem = gen_rtx_MEM (Pmode, addr);
27272 /* This should not be of rs6000_sr_alias_set, because of
27273 __builtin_return_address. */
27275 insn = emit_move_insn (mem, reg);
27276 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
27277 NULL_RTX, NULL_RTX);
27278 END_USE (0);
27282 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
27283 r12 will be needed by out-of-line gpr restore. */
27284 cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27285 && !(strategy & (SAVE_INLINE_GPRS
27286 | SAVE_NOINLINE_GPRS_SAVES_LR))
27287 ? 11 : 12);
27288 if (!WORLD_SAVE_P (info)
27289 && info->cr_save_p
27290 && REGNO (frame_reg_rtx) != cr_save_regno
27291 && !(using_static_chain_p && cr_save_regno == 11)
27292 && !(using_split_stack && cr_save_regno == 12 && sp_adjust))
27294 cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
27295 START_USE (cr_save_regno);
27296 rs6000_emit_prologue_move_from_cr (cr_save_rtx);
27299 /* Do any required saving of fpr's. If only one or two to save, do
27300 it ourselves. Otherwise, call function. */
27301 if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
27303 int offset = info->fp_save_offset + frame_off;
27304 for (int i = info->first_fp_reg_save; i < 64; i++)
27306 if (save_reg_p (i)
27307 && !cfun->machine->fpr_is_wrapped_separately[i - 32])
27308 emit_frame_save (frame_reg_rtx, fp_reg_mode, i, offset,
27309 sp_off - frame_off);
27311 offset += fp_reg_size;
27314 else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
27316 bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
27317 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
27318 unsigned ptr_regno = ptr_regno_for_savres (sel);
27319 rtx ptr_reg = frame_reg_rtx;
27321 if (REGNO (frame_reg_rtx) == ptr_regno)
27322 gcc_checking_assert (frame_off == 0);
27323 else
27325 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
27326 NOT_INUSE (ptr_regno);
27327 emit_insn (gen_add3_insn (ptr_reg,
27328 frame_reg_rtx, GEN_INT (frame_off)));
27330 insn = rs6000_emit_savres_rtx (info, ptr_reg,
27331 info->fp_save_offset,
27332 info->lr_save_offset,
27333 DFmode, sel);
27334 rs6000_frame_related (insn, ptr_reg, sp_off,
27335 NULL_RTX, NULL_RTX);
27336 if (lr)
27337 END_USE (0);
27340 /* Save GPRs. This is done as a PARALLEL if we are using
27341 the store-multiple instructions. */
27342 if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS))
27344 bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0;
27345 int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0);
27346 unsigned ptr_regno = ptr_regno_for_savres (sel);
27347 rtx ptr_reg = frame_reg_rtx;
27348 bool ptr_set_up = REGNO (ptr_reg) == ptr_regno;
27349 int end_save = info->gp_save_offset + info->gp_size;
27350 int ptr_off;
27352 if (ptr_regno == 12)
27353 sp_adjust = 0;
27354 if (!ptr_set_up)
27355 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
27357 /* Need to adjust r11 (r12) if we saved any FPRs. */
27358 if (end_save + frame_off != 0)
27360 rtx offset = GEN_INT (end_save + frame_off);
27362 if (ptr_set_up)
27363 frame_off = -end_save;
27364 else
27365 NOT_INUSE (ptr_regno);
27366 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
27368 else if (!ptr_set_up)
27370 NOT_INUSE (ptr_regno);
27371 emit_move_insn (ptr_reg, frame_reg_rtx);
27373 ptr_off = -end_save;
27374 insn = rs6000_emit_savres_rtx (info, ptr_reg,
27375 info->gp_save_offset + ptr_off,
27376 info->lr_save_offset + ptr_off,
27377 reg_mode, sel);
27378 rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
27379 NULL_RTX, NULL_RTX);
27380 if (lr)
27381 END_USE (0);
27383 else if (!WORLD_SAVE_P (info) && (strategy & SAVE_MULTIPLE))
27385 rtvec p;
27386 int i;
27387 p = rtvec_alloc (32 - info->first_gp_reg_save);
27388 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
27389 RTVEC_ELT (p, i)
27390 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
27391 frame_reg_rtx,
27392 info->gp_save_offset + frame_off + reg_size * i);
27393 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
27394 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
27395 NULL_RTX, NULL_RTX);
27397 else if (!WORLD_SAVE_P (info))
27399 int offset = info->gp_save_offset + frame_off;
27400 for (int i = info->first_gp_reg_save; i < 32; i++)
27402 if (save_reg_p (i)
27403 && !cfun->machine->gpr_is_wrapped_separately[i])
27404 emit_frame_save (frame_reg_rtx, reg_mode, i, offset,
27405 sp_off - frame_off);
27407 offset += reg_size;
27411 if (crtl->calls_eh_return)
27413 unsigned int i;
27414 rtvec p;
27416 for (i = 0; ; ++i)
27418 unsigned int regno = EH_RETURN_DATA_REGNO (i);
27419 if (regno == INVALID_REGNUM)
27420 break;
27423 p = rtvec_alloc (i);
27425 for (i = 0; ; ++i)
27427 unsigned int regno = EH_RETURN_DATA_REGNO (i);
27428 if (regno == INVALID_REGNUM)
27429 break;
27431 rtx set
27432 = gen_frame_store (gen_rtx_REG (reg_mode, regno),
27433 sp_reg_rtx,
27434 info->ehrd_offset + sp_off + reg_size * (int) i);
27435 RTVEC_ELT (p, i) = set;
27436 RTX_FRAME_RELATED_P (set) = 1;
27439 insn = emit_insn (gen_blockage ());
27440 RTX_FRAME_RELATED_P (insn) = 1;
27441 add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p));
27444 /* In AIX ABI we need to make sure r2 is really saved. */
27445 if (TARGET_AIX && crtl->calls_eh_return)
27447 rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
27448 rtx join_insn, note;
27449 rtx_insn *save_insn;
27450 long toc_restore_insn;
27452 tmp_reg = gen_rtx_REG (Pmode, 11);
27453 tmp_reg_si = gen_rtx_REG (SImode, 11);
27454 if (using_static_chain_p)
27456 START_USE (0);
27457 emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
27459 else
27460 START_USE (11);
27461 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
27462 /* Peek at instruction to which this function returns. If it's
27463 restoring r2, then we know we've already saved r2. We can't
27464 unconditionally save r2 because the value we have will already
27465 be updated if we arrived at this function via a plt call or
27466 toc adjusting stub. */
27467 emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
27468 toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
27469 + RS6000_TOC_SAVE_SLOT);
27470 hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
27471 emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
27472 compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
27473 validate_condition_mode (EQ, CCUNSmode);
27474 lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
27475 emit_insn (gen_rtx_SET (compare_result,
27476 gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
27477 toc_save_done = gen_label_rtx ();
27478 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
27479 gen_rtx_EQ (VOIDmode, compare_result,
27480 const0_rtx),
27481 gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
27482 pc_rtx);
27483 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
27484 JUMP_LABEL (jump) = toc_save_done;
27485 LABEL_NUSES (toc_save_done) += 1;
27487 save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
27488 TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
27489 sp_off - frame_off);
27491 emit_label (toc_save_done);
27493 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
27494 have a CFG that has different saves along different paths.
27495 Move the note to a dummy blockage insn, which describes that
27496 R2 is unconditionally saved after the label. */
27497 /* ??? An alternate representation might be a special insn pattern
27498 containing both the branch and the store. That might let the
27499 code that minimizes the number of DW_CFA_advance opcodes better
27500 freedom in placing the annotations. */
27501 note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL);
27502 if (note)
27503 remove_note (save_insn, note);
27504 else
27505 note = alloc_reg_note (REG_FRAME_RELATED_EXPR,
27506 copy_rtx (PATTERN (save_insn)), NULL_RTX);
27507 RTX_FRAME_RELATED_P (save_insn) = 0;
27509 join_insn = emit_insn (gen_blockage ());
27510 REG_NOTES (join_insn) = note;
27511 RTX_FRAME_RELATED_P (join_insn) = 1;
27513 if (using_static_chain_p)
27515 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
27516 END_USE (0);
27518 else
27519 END_USE (11);
27522 /* Save CR if we use any that must be preserved. */
27523 if (!WORLD_SAVE_P (info) && info->cr_save_p)
27525 rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
27526 GEN_INT (info->cr_save_offset + frame_off));
27527 rtx mem = gen_frame_mem (SImode, addr);
27529 /* If we didn't copy cr before, do so now using r0. */
27530 if (cr_save_rtx == NULL_RTX)
27532 START_USE (0);
27533 cr_save_rtx = gen_rtx_REG (SImode, 0);
27534 rs6000_emit_prologue_move_from_cr (cr_save_rtx);
27537 /* Saving CR requires a two-instruction sequence: one instruction
27538 to move the CR to a general-purpose register, and a second
27539 instruction that stores the GPR to memory.
27541 We do not emit any DWARF CFI records for the first of these,
27542 because we cannot properly represent the fact that CR is saved in
27543 a register. One reason is that we cannot express that multiple
27544 CR fields are saved; another reason is that on 64-bit, the size
27545 of the CR register in DWARF (4 bytes) differs from the size of
27546 a general-purpose register.
27548 This means if any intervening instruction were to clobber one of
27549 the call-saved CR fields, we'd have incorrect CFI. To prevent
27550 this from happening, we mark the store to memory as a use of
27551 those CR fields, which prevents any such instruction from being
27552 scheduled in between the two instructions. */
27553 rtx crsave_v[9];
27554 int n_crsave = 0;
27555 int i;
27557 crsave_v[n_crsave++] = gen_rtx_SET (mem, cr_save_rtx);
27558 for (i = 0; i < 8; i++)
27559 if (save_reg_p (CR0_REGNO + i))
27560 crsave_v[n_crsave++]
27561 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
27563 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
27564 gen_rtvec_v (n_crsave, crsave_v)));
27565 END_USE (REGNO (cr_save_rtx));
27567 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
27568 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
27569 so we need to construct a frame expression manually. */
27570 RTX_FRAME_RELATED_P (insn) = 1;
27572 /* Update address to be stack-pointer relative, like
27573 rs6000_frame_related would do. */
27574 addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
27575 GEN_INT (info->cr_save_offset + sp_off));
27576 mem = gen_frame_mem (SImode, addr);
27578 if (DEFAULT_ABI == ABI_ELFv2)
27580 /* In the ELFv2 ABI we generate separate CFI records for each
27581 CR field that was actually saved. They all point to the
27582 same 32-bit stack slot. */
27583 rtx crframe[8];
27584 int n_crframe = 0;
27586 for (i = 0; i < 8; i++)
27587 if (save_reg_p (CR0_REGNO + i))
27589 crframe[n_crframe]
27590 = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR0_REGNO + i));
27592 RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
27593 n_crframe++;
27596 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
27597 gen_rtx_PARALLEL (VOIDmode,
27598 gen_rtvec_v (n_crframe, crframe)));
27600 else
27602 /* In other ABIs, by convention, we use a single CR regnum to
27603 represent the fact that all call-saved CR fields are saved.
27604 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
27605 rtx set = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR2_REGNO));
27606 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
27610 /* In the ELFv2 ABI we need to save all call-saved CR fields into
27611 *separate* slots if the routine calls __builtin_eh_return, so
27612 that they can be independently restored by the unwinder. */
27613 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
27615 int i, cr_off = info->ehcr_offset;
27616 rtx crsave;
27618 /* ??? We might get better performance by using multiple mfocrf
27619 instructions. */
27620 crsave = gen_rtx_REG (SImode, 0);
27621 emit_insn (gen_prologue_movesi_from_cr (crsave));
27623 for (i = 0; i < 8; i++)
27624 if (!call_used_regs[CR0_REGNO + i])
27626 rtvec p = rtvec_alloc (2);
27627 RTVEC_ELT (p, 0)
27628 = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
27629 RTVEC_ELT (p, 1)
27630 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
27632 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
27634 RTX_FRAME_RELATED_P (insn) = 1;
27635 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
27636 gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
27637 sp_reg_rtx, cr_off + sp_off));
27639 cr_off += reg_size;
27643 /* If we are emitting stack probes, but allocate no stack, then
27644 just note that in the dump file. */
27645 if (flag_stack_clash_protection
27646 && dump_file
27647 && !info->push_p)
27648 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
27650 /* Update stack and set back pointer unless this is V.4,
27651 for which it was done previously. */
27652 if (!WORLD_SAVE_P (info) && info->push_p
27653 && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
27655 rtx ptr_reg = NULL;
27656 int ptr_off = 0;
27658 /* If saving altivec regs we need to be able to address all save
27659 locations using a 16-bit offset. */
27660 if ((strategy & SAVE_INLINE_VRS) == 0
27661 || (info->altivec_size != 0
27662 && (info->altivec_save_offset + info->altivec_size - 16
27663 + info->total_size - frame_off) > 32767)
27664 || (info->vrsave_size != 0
27665 && (info->vrsave_save_offset
27666 + info->total_size - frame_off) > 32767))
27668 int sel = SAVRES_SAVE | SAVRES_VR;
27669 unsigned ptr_regno = ptr_regno_for_savres (sel);
27671 if (using_static_chain_p
27672 && ptr_regno == STATIC_CHAIN_REGNUM)
27673 ptr_regno = 12;
27674 if (REGNO (frame_reg_rtx) != ptr_regno)
27675 START_USE (ptr_regno);
27676 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
27677 frame_reg_rtx = ptr_reg;
27678 ptr_off = info->altivec_save_offset + info->altivec_size;
27679 frame_off = -ptr_off;
27681 else if (REGNO (frame_reg_rtx) == 1)
27682 frame_off = info->total_size;
27683 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
27684 ptr_reg, ptr_off);
27685 if (REGNO (frame_reg_rtx) == 12)
27686 sp_adjust = 0;
27687 sp_off = info->total_size;
27688 if (frame_reg_rtx != sp_reg_rtx)
27689 rs6000_emit_stack_tie (frame_reg_rtx, false);
27692 /* Set frame pointer, if needed. */
27693 if (frame_pointer_needed)
27695 insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
27696 sp_reg_rtx);
27697 RTX_FRAME_RELATED_P (insn) = 1;
27700 /* Save AltiVec registers if needed. Save here because the red zone does
27701 not always include AltiVec registers. */
27702 if (!WORLD_SAVE_P (info)
27703 && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0)
27705 int end_save = info->altivec_save_offset + info->altivec_size;
27706 int ptr_off;
27707 /* Oddly, the vector save/restore functions point r0 at the end
27708 of the save area, then use r11 or r12 to load offsets for
27709 [reg+reg] addressing. */
27710 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
27711 int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR);
27712 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
27714 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
27715 NOT_INUSE (0);
27716 if (scratch_regno == 12)
27717 sp_adjust = 0;
27718 if (end_save + frame_off != 0)
27720 rtx offset = GEN_INT (end_save + frame_off);
27722 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
27724 else
27725 emit_move_insn (ptr_reg, frame_reg_rtx);
27727 ptr_off = -end_save;
27728 insn = rs6000_emit_savres_rtx (info, scratch_reg,
27729 info->altivec_save_offset + ptr_off,
27730 0, V4SImode, SAVRES_SAVE | SAVRES_VR);
27731 rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
27732 NULL_RTX, NULL_RTX);
27733 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
27735 /* The oddity mentioned above clobbered our frame reg. */
27736 emit_move_insn (frame_reg_rtx, ptr_reg);
27737 frame_off = ptr_off;
27740 else if (!WORLD_SAVE_P (info)
27741 && info->altivec_size != 0)
27743 int i;
27745 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
27746 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
27748 rtx areg, savereg, mem;
27749 HOST_WIDE_INT offset;
27751 offset = (info->altivec_save_offset + frame_off
27752 + 16 * (i - info->first_altivec_reg_save));
27754 savereg = gen_rtx_REG (V4SImode, i);
27756 if (TARGET_P9_VECTOR && quad_address_offset_p (offset))
27758 mem = gen_frame_mem (V4SImode,
27759 gen_rtx_PLUS (Pmode, frame_reg_rtx,
27760 GEN_INT (offset)));
27761 insn = emit_insn (gen_rtx_SET (mem, savereg));
27762 areg = NULL_RTX;
27764 else
27766 NOT_INUSE (0);
27767 areg = gen_rtx_REG (Pmode, 0);
27768 emit_move_insn (areg, GEN_INT (offset));
27770 /* AltiVec addressing mode is [reg+reg]. */
27771 mem = gen_frame_mem (V4SImode,
27772 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
27774 /* Rather than emitting a generic move, force use of the stvx
27775 instruction, which we always want on ISA 2.07 (power8) systems.
27776 In particular we don't want xxpermdi/stxvd2x for little
27777 endian. */
27778 insn = emit_insn (gen_altivec_stvx_v4si_internal (mem, savereg));
27781 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
27782 areg, GEN_INT (offset));
27786 /* VRSAVE is a bit vector representing which AltiVec registers
27787 are used. The OS uses this to determine which vector
27788 registers to save on a context switch. We need to save
27789 VRSAVE on the stack frame, add whatever AltiVec registers we
27790 used in this function, and do the corresponding magic in the
27791 epilogue. */
27793 if (!WORLD_SAVE_P (info) && info->vrsave_size != 0)
27795 /* Get VRSAVE into a GPR. Note that ABI_V4 and ABI_DARWIN might
27796 be using r12 as frame_reg_rtx and r11 as the static chain
27797 pointer for nested functions. */
27798 int save_regno = 12;
27799 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27800 && !using_static_chain_p)
27801 save_regno = 11;
27802 else if (using_split_stack || REGNO (frame_reg_rtx) == 12)
27804 save_regno = 11;
27805 if (using_static_chain_p)
27806 save_regno = 0;
27808 NOT_INUSE (save_regno);
27810 emit_vrsave_prologue (info, save_regno, frame_off, frame_reg_rtx);
27813 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
27814 if (!TARGET_SINGLE_PIC_BASE
27815 && ((TARGET_TOC && TARGET_MINIMAL_TOC
27816 && !constant_pool_empty_p ())
27817 || (DEFAULT_ABI == ABI_V4
27818 && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
27819 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
27821 /* If emit_load_toc_table will use the link register, we need to save
27822 it. We use R12 for this purpose because emit_load_toc_table
27823 can use register 0. This allows us to use a plain 'blr' to return
27824 from the procedure more often. */
27825 int save_LR_around_toc_setup = (TARGET_ELF
27826 && DEFAULT_ABI == ABI_V4
27827 && flag_pic
27828 && ! info->lr_save_p
27829 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0);
27830 if (save_LR_around_toc_setup)
27832 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
27833 rtx tmp = gen_rtx_REG (Pmode, 12);
27835 sp_adjust = 0;
27836 insn = emit_move_insn (tmp, lr);
27837 RTX_FRAME_RELATED_P (insn) = 1;
27839 rs6000_emit_load_toc_table (TRUE);
27841 insn = emit_move_insn (lr, tmp);
27842 add_reg_note (insn, REG_CFA_RESTORE, lr);
27843 RTX_FRAME_RELATED_P (insn) = 1;
27845 else
27846 rs6000_emit_load_toc_table (TRUE);
27849 #if TARGET_MACHO
27850 if (!TARGET_SINGLE_PIC_BASE
27851 && DEFAULT_ABI == ABI_DARWIN
27852 && flag_pic && crtl->uses_pic_offset_table)
27854 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
27855 rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
27857 /* Save and restore LR locally around this call (in R0). */
27858 if (!info->lr_save_p)
27859 emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
27861 emit_insn (gen_load_macho_picbase (src));
27863 emit_move_insn (gen_rtx_REG (Pmode,
27864 RS6000_PIC_OFFSET_TABLE_REGNUM),
27865 lr);
27867 if (!info->lr_save_p)
27868 emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
27870 #endif
27872 /* If we need to, save the TOC register after doing the stack setup.
27873 Do not emit eh frame info for this save. The unwinder wants info,
27874 conceptually attached to instructions in this function, about
27875 register values in the caller of this function. This R2 may have
27876 already been changed from the value in the caller.
27877 We don't attempt to write accurate DWARF EH frame info for R2
27878 because code emitted by gcc for a (non-pointer) function call
27879 doesn't save and restore R2. Instead, R2 is managed out-of-line
27880 by a linker generated plt call stub when the function resides in
27881 a shared library. This behavior is costly to describe in DWARF,
27882 both in terms of the size of DWARF info and the time taken in the
27883 unwinder to interpret it. R2 changes, apart from the
27884 calls_eh_return case earlier in this function, are handled by
27885 linux-unwind.h frob_update_context. */
27886 if (rs6000_save_toc_in_prologue_p ()
27887 && !cfun->machine->toc_is_wrapped_separately)
27889 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
27890 emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
27893 /* Set up the arg pointer (r12) for -fsplit-stack code. */
27894 if (using_split_stack && split_stack_arg_pointer_used_p ())
27895 emit_split_stack_prologue (info, sp_adjust, frame_off, frame_reg_rtx);
27898 /* Output .extern statements for the save/restore routines we use. */
27900 static void
27901 rs6000_output_savres_externs (FILE *file)
27903 rs6000_stack_t *info = rs6000_stack_info ();
27905 if (TARGET_DEBUG_STACK)
27906 debug_stack_info (info);
27908 /* Write .extern for any function we will call to save and restore
27909 fp values. */
27910 if (info->first_fp_reg_save < 64
27911 && !TARGET_MACHO
27912 && !TARGET_ELF)
27914 char *name;
27915 int regno = info->first_fp_reg_save - 32;
27917 if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
27919 bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
27920 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
27921 name = rs6000_savres_routine_name (regno, sel);
27922 fprintf (file, "\t.extern %s\n", name);
27924 if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
27926 bool lr = (info->savres_strategy
27927 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
27928 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
27929 name = rs6000_savres_routine_name (regno, sel);
27930 fprintf (file, "\t.extern %s\n", name);
27935 /* Write function prologue. */
27937 static void
27938 rs6000_output_function_prologue (FILE *file)
27940 if (!cfun->is_thunk)
27941 rs6000_output_savres_externs (file);
27943 /* ELFv2 ABI r2 setup code and local entry point. This must follow
27944 immediately after the global entry point label. */
27945 if (rs6000_global_entry_point_needed_p ())
27947 const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
27949 (*targetm.asm_out.internal_label) (file, "LCF", rs6000_pic_labelno);
27951 if (TARGET_CMODEL != CMODEL_LARGE)
27953 /* In the small and medium code models, we assume the TOC is less
27954 2 GB away from the text section, so it can be computed via the
27955 following two-instruction sequence. */
27956 char buf[256];
27958 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
27959 fprintf (file, "0:\taddis 2,12,.TOC.-");
27960 assemble_name (file, buf);
27961 fprintf (file, "@ha\n");
27962 fprintf (file, "\taddi 2,2,.TOC.-");
27963 assemble_name (file, buf);
27964 fprintf (file, "@l\n");
27966 else
27968 /* In the large code model, we allow arbitrary offsets between the
27969 TOC and the text section, so we have to load the offset from
27970 memory. The data field is emitted directly before the global
27971 entry point in rs6000_elf_declare_function_name. */
27972 char buf[256];
27974 #ifdef HAVE_AS_ENTRY_MARKERS
27975 /* If supported by the linker, emit a marker relocation. If the
27976 total code size of the final executable or shared library
27977 happens to fit into 2 GB after all, the linker will replace
27978 this code sequence with the sequence for the small or medium
27979 code model. */
27980 fprintf (file, "\t.reloc .,R_PPC64_ENTRY\n");
27981 #endif
27982 fprintf (file, "\tld 2,");
27983 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
27984 assemble_name (file, buf);
27985 fprintf (file, "-");
27986 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
27987 assemble_name (file, buf);
27988 fprintf (file, "(12)\n");
27989 fprintf (file, "\tadd 2,2,12\n");
27992 fputs ("\t.localentry\t", file);
27993 assemble_name (file, name);
27994 fputs (",.-", file);
27995 assemble_name (file, name);
27996 fputs ("\n", file);
27999 /* Output -mprofile-kernel code. This needs to be done here instead of
28000 in output_function_profile since it must go after the ELFv2 ABI
28001 local entry point. */
28002 if (TARGET_PROFILE_KERNEL && crtl->profile)
28004 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
28005 gcc_assert (!TARGET_32BIT);
28007 asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
28009 /* In the ELFv2 ABI we have no compiler stack word. It must be
28010 the resposibility of _mcount to preserve the static chain
28011 register if required. */
28012 if (DEFAULT_ABI != ABI_ELFv2
28013 && cfun->static_chain_decl != NULL)
28015 asm_fprintf (file, "\tstd %s,24(%s)\n",
28016 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
28017 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
28018 asm_fprintf (file, "\tld %s,24(%s)\n",
28019 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
28021 else
28022 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
28025 rs6000_pic_labelno++;
28028 /* -mprofile-kernel code calls mcount before the function prolog,
28029 so a profiled leaf function should stay a leaf function. */
28030 static bool
28031 rs6000_keep_leaf_when_profiled ()
28033 return TARGET_PROFILE_KERNEL;
28036 /* Non-zero if vmx regs are restored before the frame pop, zero if
28037 we restore after the pop when possible. */
28038 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
28040 /* Restoring cr is a two step process: loading a reg from the frame
28041 save, then moving the reg to cr. For ABI_V4 we must let the
28042 unwinder know that the stack location is no longer valid at or
28043 before the stack deallocation, but we can't emit a cfa_restore for
28044 cr at the stack deallocation like we do for other registers.
28045 The trouble is that it is possible for the move to cr to be
28046 scheduled after the stack deallocation. So say exactly where cr
28047 is located on each of the two insns. */
28049 static rtx
28050 load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
28052 rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
28053 rtx reg = gen_rtx_REG (SImode, regno);
28054 rtx_insn *insn = emit_move_insn (reg, mem);
28056 if (!exit_func && DEFAULT_ABI == ABI_V4)
28058 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
28059 rtx set = gen_rtx_SET (reg, cr);
28061 add_reg_note (insn, REG_CFA_REGISTER, set);
28062 RTX_FRAME_RELATED_P (insn) = 1;
28064 return reg;
28067 /* Reload CR from REG. */
28069 static void
28070 restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
28072 int count = 0;
28073 int i;
28075 if (using_mfcr_multiple)
28077 for (i = 0; i < 8; i++)
28078 if (save_reg_p (CR0_REGNO + i))
28079 count++;
28080 gcc_assert (count);
28083 if (using_mfcr_multiple && count > 1)
28085 rtx_insn *insn;
28086 rtvec p;
28087 int ndx;
28089 p = rtvec_alloc (count);
28091 ndx = 0;
28092 for (i = 0; i < 8; i++)
28093 if (save_reg_p (CR0_REGNO + i))
28095 rtvec r = rtvec_alloc (2);
28096 RTVEC_ELT (r, 0) = reg;
28097 RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
28098 RTVEC_ELT (p, ndx) =
28099 gen_rtx_SET (gen_rtx_REG (CCmode, CR0_REGNO + i),
28100 gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
28101 ndx++;
28103 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
28104 gcc_assert (ndx == count);
28106 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
28107 CR field separately. */
28108 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
28110 for (i = 0; i < 8; i++)
28111 if (save_reg_p (CR0_REGNO + i))
28112 add_reg_note (insn, REG_CFA_RESTORE,
28113 gen_rtx_REG (SImode, CR0_REGNO + i));
28115 RTX_FRAME_RELATED_P (insn) = 1;
28118 else
28119 for (i = 0; i < 8; i++)
28120 if (save_reg_p (CR0_REGNO + i))
28122 rtx insn = emit_insn (gen_movsi_to_cr_one
28123 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
28125 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
28126 CR field separately, attached to the insn that in fact
28127 restores this particular CR field. */
28128 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
28130 add_reg_note (insn, REG_CFA_RESTORE,
28131 gen_rtx_REG (SImode, CR0_REGNO + i));
28133 RTX_FRAME_RELATED_P (insn) = 1;
28137 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
28138 if (!exit_func && DEFAULT_ABI != ABI_ELFv2
28139 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
28141 rtx_insn *insn = get_last_insn ();
28142 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
28144 add_reg_note (insn, REG_CFA_RESTORE, cr);
28145 RTX_FRAME_RELATED_P (insn) = 1;
28149 /* Like cr, the move to lr instruction can be scheduled after the
28150 stack deallocation, but unlike cr, its stack frame save is still
28151 valid. So we only need to emit the cfa_restore on the correct
28152 instruction. */
28154 static void
28155 load_lr_save (int regno, rtx frame_reg_rtx, int offset)
28157 rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
28158 rtx reg = gen_rtx_REG (Pmode, regno);
28160 emit_move_insn (reg, mem);
28163 static void
28164 restore_saved_lr (int regno, bool exit_func)
28166 rtx reg = gen_rtx_REG (Pmode, regno);
28167 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
28168 rtx_insn *insn = emit_move_insn (lr, reg);
28170 if (!exit_func && flag_shrink_wrap)
28172 add_reg_note (insn, REG_CFA_RESTORE, lr);
28173 RTX_FRAME_RELATED_P (insn) = 1;
28177 static rtx
28178 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
28180 if (DEFAULT_ABI == ABI_ELFv2)
28182 int i;
28183 for (i = 0; i < 8; i++)
28184 if (save_reg_p (CR0_REGNO + i))
28186 rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
28187 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
28188 cfa_restores);
28191 else if (info->cr_save_p)
28192 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
28193 gen_rtx_REG (SImode, CR2_REGNO),
28194 cfa_restores);
28196 if (info->lr_save_p)
28197 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
28198 gen_rtx_REG (Pmode, LR_REGNO),
28199 cfa_restores);
28200 return cfa_restores;
28203 /* Return true if OFFSET from stack pointer can be clobbered by signals.
28204 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
28205 below stack pointer not cloberred by signals. */
28207 static inline bool
28208 offset_below_red_zone_p (HOST_WIDE_INT offset)
28210 return offset < (DEFAULT_ABI == ABI_V4
28212 : TARGET_32BIT ? -220 : -288);
28215 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
28217 static void
28218 emit_cfa_restores (rtx cfa_restores)
28220 rtx_insn *insn = get_last_insn ();
28221 rtx *loc = &REG_NOTES (insn);
28223 while (*loc)
28224 loc = &XEXP (*loc, 1);
28225 *loc = cfa_restores;
28226 RTX_FRAME_RELATED_P (insn) = 1;
28229 /* Emit function epilogue as insns. */
28231 void
28232 rs6000_emit_epilogue (int sibcall)
28234 rs6000_stack_t *info;
28235 int restoring_GPRs_inline;
28236 int restoring_FPRs_inline;
28237 int using_load_multiple;
28238 int using_mtcr_multiple;
28239 int use_backchain_to_restore_sp;
28240 int restore_lr;
28241 int strategy;
28242 HOST_WIDE_INT frame_off = 0;
28243 rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
28244 rtx frame_reg_rtx = sp_reg_rtx;
28245 rtx cfa_restores = NULL_RTX;
28246 rtx insn;
28247 rtx cr_save_reg = NULL_RTX;
28248 machine_mode reg_mode = Pmode;
28249 int reg_size = TARGET_32BIT ? 4 : 8;
28250 machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
28251 ? DFmode : SFmode;
28252 int fp_reg_size = 8;
28253 int i;
28254 bool exit_func;
28255 unsigned ptr_regno;
28257 info = rs6000_stack_info ();
28259 strategy = info->savres_strategy;
28260 using_load_multiple = strategy & REST_MULTIPLE;
28261 restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
28262 restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
28263 using_mtcr_multiple = (rs6000_tune == PROCESSOR_PPC601
28264 || rs6000_tune == PROCESSOR_PPC603
28265 || rs6000_tune == PROCESSOR_PPC750
28266 || optimize_size);
28267 /* Restore via the backchain when we have a large frame, since this
28268 is more efficient than an addis, addi pair. The second condition
28269 here will not trigger at the moment; We don't actually need a
28270 frame pointer for alloca, but the generic parts of the compiler
28271 give us one anyway. */
28272 use_backchain_to_restore_sp = (info->total_size + (info->lr_save_p
28273 ? info->lr_save_offset
28274 : 0) > 32767
28275 || (cfun->calls_alloca
28276 && !frame_pointer_needed));
28277 restore_lr = (info->lr_save_p
28278 && (restoring_FPRs_inline
28279 || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
28280 && (restoring_GPRs_inline
28281 || info->first_fp_reg_save < 64)
28282 && !cfun->machine->lr_is_wrapped_separately);
28285 if (WORLD_SAVE_P (info))
28287 int i, j;
28288 char rname[30];
28289 const char *alloc_rname;
28290 rtvec p;
28292 /* eh_rest_world_r10 will return to the location saved in the LR
28293 stack slot (which is not likely to be our caller.)
28294 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
28295 rest_world is similar, except any R10 parameter is ignored.
28296 The exception-handling stuff that was here in 2.95 is no
28297 longer necessary. */
28299 p = rtvec_alloc (9
28300 + 32 - info->first_gp_reg_save
28301 + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
28302 + 63 + 1 - info->first_fp_reg_save);
28304 strcpy (rname, ((crtl->calls_eh_return) ?
28305 "*eh_rest_world_r10" : "*rest_world"));
28306 alloc_rname = ggc_strdup (rname);
28308 j = 0;
28309 RTVEC_ELT (p, j++) = ret_rtx;
28310 RTVEC_ELT (p, j++)
28311 = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
28312 /* The instruction pattern requires a clobber here;
28313 it is shared with the restVEC helper. */
28314 RTVEC_ELT (p, j++)
28315 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
28318 /* CR register traditionally saved as CR2. */
28319 rtx reg = gen_rtx_REG (SImode, CR2_REGNO);
28320 RTVEC_ELT (p, j++)
28321 = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset);
28322 if (flag_shrink_wrap)
28324 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
28325 gen_rtx_REG (Pmode, LR_REGNO),
28326 cfa_restores);
28327 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28331 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
28333 rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
28334 RTVEC_ELT (p, j++)
28335 = gen_frame_load (reg,
28336 frame_reg_rtx, info->gp_save_offset + reg_size * i);
28337 if (flag_shrink_wrap
28338 && save_reg_p (info->first_gp_reg_save + i))
28339 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28341 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
28343 rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
28344 RTVEC_ELT (p, j++)
28345 = gen_frame_load (reg,
28346 frame_reg_rtx, info->altivec_save_offset + 16 * i);
28347 if (flag_shrink_wrap
28348 && save_reg_p (info->first_altivec_reg_save + i))
28349 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28351 for (i = 0; info->first_fp_reg_save + i <= 63; i++)
28353 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
28354 ? DFmode : SFmode),
28355 info->first_fp_reg_save + i);
28356 RTVEC_ELT (p, j++)
28357 = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i);
28358 if (flag_shrink_wrap
28359 && save_reg_p (info->first_fp_reg_save + i))
28360 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28362 RTVEC_ELT (p, j++)
28363 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
28364 RTVEC_ELT (p, j++)
28365 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
28366 RTVEC_ELT (p, j++)
28367 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
28368 RTVEC_ELT (p, j++)
28369 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
28370 RTVEC_ELT (p, j++)
28371 = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
28372 insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
28374 if (flag_shrink_wrap)
28376 REG_NOTES (insn) = cfa_restores;
28377 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
28378 RTX_FRAME_RELATED_P (insn) = 1;
28380 return;
28383 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
28384 if (info->push_p)
28385 frame_off = info->total_size;
28387 /* Restore AltiVec registers if we must do so before adjusting the
28388 stack. */
28389 if (info->altivec_size != 0
28390 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28391 || (DEFAULT_ABI != ABI_V4
28392 && offset_below_red_zone_p (info->altivec_save_offset))))
28394 int i;
28395 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
28397 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
28398 if (use_backchain_to_restore_sp)
28400 int frame_regno = 11;
28402 if ((strategy & REST_INLINE_VRS) == 0)
28404 /* Of r11 and r12, select the one not clobbered by an
28405 out-of-line restore function for the frame register. */
28406 frame_regno = 11 + 12 - scratch_regno;
28408 frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno);
28409 emit_move_insn (frame_reg_rtx,
28410 gen_rtx_MEM (Pmode, sp_reg_rtx));
28411 frame_off = 0;
28413 else if (frame_pointer_needed)
28414 frame_reg_rtx = hard_frame_pointer_rtx;
28416 if ((strategy & REST_INLINE_VRS) == 0)
28418 int end_save = info->altivec_save_offset + info->altivec_size;
28419 int ptr_off;
28420 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
28421 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
28423 if (end_save + frame_off != 0)
28425 rtx offset = GEN_INT (end_save + frame_off);
28427 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
28429 else
28430 emit_move_insn (ptr_reg, frame_reg_rtx);
28432 ptr_off = -end_save;
28433 insn = rs6000_emit_savres_rtx (info, scratch_reg,
28434 info->altivec_save_offset + ptr_off,
28435 0, V4SImode, SAVRES_VR);
28437 else
28439 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
28440 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
28442 rtx addr, areg, mem, insn;
28443 rtx reg = gen_rtx_REG (V4SImode, i);
28444 HOST_WIDE_INT offset
28445 = (info->altivec_save_offset + frame_off
28446 + 16 * (i - info->first_altivec_reg_save));
28448 if (TARGET_P9_VECTOR && quad_address_offset_p (offset))
28450 mem = gen_frame_mem (V4SImode,
28451 gen_rtx_PLUS (Pmode, frame_reg_rtx,
28452 GEN_INT (offset)));
28453 insn = gen_rtx_SET (reg, mem);
28455 else
28457 areg = gen_rtx_REG (Pmode, 0);
28458 emit_move_insn (areg, GEN_INT (offset));
28460 /* AltiVec addressing mode is [reg+reg]. */
28461 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
28462 mem = gen_frame_mem (V4SImode, addr);
28464 /* Rather than emitting a generic move, force use of the
28465 lvx instruction, which we always want. In particular we
28466 don't want lxvd2x/xxpermdi for little endian. */
28467 insn = gen_altivec_lvx_v4si_internal (reg, mem);
28470 (void) emit_insn (insn);
28474 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
28475 if (((strategy & REST_INLINE_VRS) == 0
28476 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
28477 && (flag_shrink_wrap
28478 || (offset_below_red_zone_p
28479 (info->altivec_save_offset
28480 + 16 * (i - info->first_altivec_reg_save))))
28481 && save_reg_p (i))
28483 rtx reg = gen_rtx_REG (V4SImode, i);
28484 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28488 /* Restore VRSAVE if we must do so before adjusting the stack. */
28489 if (info->vrsave_size != 0
28490 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28491 || (DEFAULT_ABI != ABI_V4
28492 && offset_below_red_zone_p (info->vrsave_save_offset))))
28494 rtx reg;
28496 if (frame_reg_rtx == sp_reg_rtx)
28498 if (use_backchain_to_restore_sp)
28500 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
28501 emit_move_insn (frame_reg_rtx,
28502 gen_rtx_MEM (Pmode, sp_reg_rtx));
28503 frame_off = 0;
28505 else if (frame_pointer_needed)
28506 frame_reg_rtx = hard_frame_pointer_rtx;
28509 reg = gen_rtx_REG (SImode, 12);
28510 emit_insn (gen_frame_load (reg, frame_reg_rtx,
28511 info->vrsave_save_offset + frame_off));
28513 emit_insn (generate_set_vrsave (reg, info, 1));
28516 insn = NULL_RTX;
28517 /* If we have a large stack frame, restore the old stack pointer
28518 using the backchain. */
28519 if (use_backchain_to_restore_sp)
28521 if (frame_reg_rtx == sp_reg_rtx)
28523 /* Under V.4, don't reset the stack pointer until after we're done
28524 loading the saved registers. */
28525 if (DEFAULT_ABI == ABI_V4)
28526 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
28528 insn = emit_move_insn (frame_reg_rtx,
28529 gen_rtx_MEM (Pmode, sp_reg_rtx));
28530 frame_off = 0;
28532 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28533 && DEFAULT_ABI == ABI_V4)
28534 /* frame_reg_rtx has been set up by the altivec restore. */
28536 else
28538 insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
28539 frame_reg_rtx = sp_reg_rtx;
28542 /* If we have a frame pointer, we can restore the old stack pointer
28543 from it. */
28544 else if (frame_pointer_needed)
28546 frame_reg_rtx = sp_reg_rtx;
28547 if (DEFAULT_ABI == ABI_V4)
28548 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
28549 /* Prevent reordering memory accesses against stack pointer restore. */
28550 else if (cfun->calls_alloca
28551 || offset_below_red_zone_p (-info->total_size))
28552 rs6000_emit_stack_tie (frame_reg_rtx, true);
28554 insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
28555 GEN_INT (info->total_size)));
28556 frame_off = 0;
28558 else if (info->push_p
28559 && DEFAULT_ABI != ABI_V4
28560 && !crtl->calls_eh_return)
28562 /* Prevent reordering memory accesses against stack pointer restore. */
28563 if (cfun->calls_alloca
28564 || offset_below_red_zone_p (-info->total_size))
28565 rs6000_emit_stack_tie (frame_reg_rtx, false);
28566 insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
28567 GEN_INT (info->total_size)));
28568 frame_off = 0;
28570 if (insn && frame_reg_rtx == sp_reg_rtx)
28572 if (cfa_restores)
28574 REG_NOTES (insn) = cfa_restores;
28575 cfa_restores = NULL_RTX;
28577 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
28578 RTX_FRAME_RELATED_P (insn) = 1;
28581 /* Restore AltiVec registers if we have not done so already. */
28582 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28583 && info->altivec_size != 0
28584 && (DEFAULT_ABI == ABI_V4
28585 || !offset_below_red_zone_p (info->altivec_save_offset)))
28587 int i;
28589 if ((strategy & REST_INLINE_VRS) == 0)
28591 int end_save = info->altivec_save_offset + info->altivec_size;
28592 int ptr_off;
28593 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
28594 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
28595 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
28597 if (end_save + frame_off != 0)
28599 rtx offset = GEN_INT (end_save + frame_off);
28601 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
28603 else
28604 emit_move_insn (ptr_reg, frame_reg_rtx);
28606 ptr_off = -end_save;
28607 insn = rs6000_emit_savres_rtx (info, scratch_reg,
28608 info->altivec_save_offset + ptr_off,
28609 0, V4SImode, SAVRES_VR);
28610 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
28612 /* Frame reg was clobbered by out-of-line save. Restore it
28613 from ptr_reg, and if we are calling out-of-line gpr or
28614 fpr restore set up the correct pointer and offset. */
28615 unsigned newptr_regno = 1;
28616 if (!restoring_GPRs_inline)
28618 bool lr = info->gp_save_offset + info->gp_size == 0;
28619 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
28620 newptr_regno = ptr_regno_for_savres (sel);
28621 end_save = info->gp_save_offset + info->gp_size;
28623 else if (!restoring_FPRs_inline)
28625 bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR);
28626 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
28627 newptr_regno = ptr_regno_for_savres (sel);
28628 end_save = info->fp_save_offset + info->fp_size;
28631 if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
28632 frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
28634 if (end_save + ptr_off != 0)
28636 rtx offset = GEN_INT (end_save + ptr_off);
28638 frame_off = -end_save;
28639 if (TARGET_32BIT)
28640 emit_insn (gen_addsi3_carry (frame_reg_rtx,
28641 ptr_reg, offset));
28642 else
28643 emit_insn (gen_adddi3_carry (frame_reg_rtx,
28644 ptr_reg, offset));
28646 else
28648 frame_off = ptr_off;
28649 emit_move_insn (frame_reg_rtx, ptr_reg);
28653 else
28655 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
28656 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
28658 rtx addr, areg, mem, insn;
28659 rtx reg = gen_rtx_REG (V4SImode, i);
28660 HOST_WIDE_INT offset
28661 = (info->altivec_save_offset + frame_off
28662 + 16 * (i - info->first_altivec_reg_save));
28664 if (TARGET_P9_VECTOR && quad_address_offset_p (offset))
28666 mem = gen_frame_mem (V4SImode,
28667 gen_rtx_PLUS (Pmode, frame_reg_rtx,
28668 GEN_INT (offset)));
28669 insn = gen_rtx_SET (reg, mem);
28671 else
28673 areg = gen_rtx_REG (Pmode, 0);
28674 emit_move_insn (areg, GEN_INT (offset));
28676 /* AltiVec addressing mode is [reg+reg]. */
28677 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
28678 mem = gen_frame_mem (V4SImode, addr);
28680 /* Rather than emitting a generic move, force use of the
28681 lvx instruction, which we always want. In particular we
28682 don't want lxvd2x/xxpermdi for little endian. */
28683 insn = gen_altivec_lvx_v4si_internal (reg, mem);
28686 (void) emit_insn (insn);
28690 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
28691 if (((strategy & REST_INLINE_VRS) == 0
28692 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
28693 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
28694 && save_reg_p (i))
28696 rtx reg = gen_rtx_REG (V4SImode, i);
28697 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28701 /* Restore VRSAVE if we have not done so already. */
28702 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28703 && info->vrsave_size != 0
28704 && (DEFAULT_ABI == ABI_V4
28705 || !offset_below_red_zone_p (info->vrsave_save_offset)))
28707 rtx reg;
28709 reg = gen_rtx_REG (SImode, 12);
28710 emit_insn (gen_frame_load (reg, frame_reg_rtx,
28711 info->vrsave_save_offset + frame_off));
28713 emit_insn (generate_set_vrsave (reg, info, 1));
28716 /* If we exit by an out-of-line restore function on ABI_V4 then that
28717 function will deallocate the stack, so we don't need to worry
28718 about the unwinder restoring cr from an invalid stack frame
28719 location. */
28720 exit_func = (!restoring_FPRs_inline
28721 || (!restoring_GPRs_inline
28722 && info->first_fp_reg_save == 64));
28724 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
28725 *separate* slots if the routine calls __builtin_eh_return, so
28726 that they can be independently restored by the unwinder. */
28727 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
28729 int i, cr_off = info->ehcr_offset;
28731 for (i = 0; i < 8; i++)
28732 if (!call_used_regs[CR0_REGNO + i])
28734 rtx reg = gen_rtx_REG (SImode, 0);
28735 emit_insn (gen_frame_load (reg, frame_reg_rtx,
28736 cr_off + frame_off));
28738 insn = emit_insn (gen_movsi_to_cr_one
28739 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
28741 if (!exit_func && flag_shrink_wrap)
28743 add_reg_note (insn, REG_CFA_RESTORE,
28744 gen_rtx_REG (SImode, CR0_REGNO + i));
28746 RTX_FRAME_RELATED_P (insn) = 1;
28749 cr_off += reg_size;
28753 /* Get the old lr if we saved it. If we are restoring registers
28754 out-of-line, then the out-of-line routines can do this for us. */
28755 if (restore_lr && restoring_GPRs_inline)
28756 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
28758 /* Get the old cr if we saved it. */
28759 if (info->cr_save_p)
28761 unsigned cr_save_regno = 12;
28763 if (!restoring_GPRs_inline)
28765 /* Ensure we don't use the register used by the out-of-line
28766 gpr register restore below. */
28767 bool lr = info->gp_save_offset + info->gp_size == 0;
28768 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
28769 int gpr_ptr_regno = ptr_regno_for_savres (sel);
28771 if (gpr_ptr_regno == 12)
28772 cr_save_regno = 11;
28773 gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno);
28775 else if (REGNO (frame_reg_rtx) == 12)
28776 cr_save_regno = 11;
28778 cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx,
28779 info->cr_save_offset + frame_off,
28780 exit_func);
28783 /* Set LR here to try to overlap restores below. */
28784 if (restore_lr && restoring_GPRs_inline)
28785 restore_saved_lr (0, exit_func);
28787 /* Load exception handler data registers, if needed. */
28788 if (crtl->calls_eh_return)
28790 unsigned int i, regno;
28792 if (TARGET_AIX)
28794 rtx reg = gen_rtx_REG (reg_mode, 2);
28795 emit_insn (gen_frame_load (reg, frame_reg_rtx,
28796 frame_off + RS6000_TOC_SAVE_SLOT));
28799 for (i = 0; ; ++i)
28801 rtx mem;
28803 regno = EH_RETURN_DATA_REGNO (i);
28804 if (regno == INVALID_REGNUM)
28805 break;
28807 mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
28808 info->ehrd_offset + frame_off
28809 + reg_size * (int) i);
28811 emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
28815 /* Restore GPRs. This is done as a PARALLEL if we are using
28816 the load-multiple instructions. */
28817 if (!restoring_GPRs_inline)
28819 /* We are jumping to an out-of-line function. */
28820 rtx ptr_reg;
28821 int end_save = info->gp_save_offset + info->gp_size;
28822 bool can_use_exit = end_save == 0;
28823 int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0);
28824 int ptr_off;
28826 /* Emit stack reset code if we need it. */
28827 ptr_regno = ptr_regno_for_savres (sel);
28828 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
28829 if (can_use_exit)
28830 rs6000_emit_stack_reset (frame_reg_rtx, frame_off, ptr_regno);
28831 else if (end_save + frame_off != 0)
28832 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx,
28833 GEN_INT (end_save + frame_off)));
28834 else if (REGNO (frame_reg_rtx) != ptr_regno)
28835 emit_move_insn (ptr_reg, frame_reg_rtx);
28836 if (REGNO (frame_reg_rtx) == ptr_regno)
28837 frame_off = -end_save;
28839 if (can_use_exit && info->cr_save_p)
28840 restore_saved_cr (cr_save_reg, using_mtcr_multiple, true);
28842 ptr_off = -end_save;
28843 rs6000_emit_savres_rtx (info, ptr_reg,
28844 info->gp_save_offset + ptr_off,
28845 info->lr_save_offset + ptr_off,
28846 reg_mode, sel);
28848 else if (using_load_multiple)
28850 rtvec p;
28851 p = rtvec_alloc (32 - info->first_gp_reg_save);
28852 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
28853 RTVEC_ELT (p, i)
28854 = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
28855 frame_reg_rtx,
28856 info->gp_save_offset + frame_off + reg_size * i);
28857 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
28859 else
28861 int offset = info->gp_save_offset + frame_off;
28862 for (i = info->first_gp_reg_save; i < 32; i++)
28864 if (save_reg_p (i)
28865 && !cfun->machine->gpr_is_wrapped_separately[i])
28867 rtx reg = gen_rtx_REG (reg_mode, i);
28868 emit_insn (gen_frame_load (reg, frame_reg_rtx, offset));
28871 offset += reg_size;
28875 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
28877 /* If the frame pointer was used then we can't delay emitting
28878 a REG_CFA_DEF_CFA note. This must happen on the insn that
28879 restores the frame pointer, r31. We may have already emitted
28880 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
28881 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
28882 be harmless if emitted. */
28883 if (frame_pointer_needed)
28885 insn = get_last_insn ();
28886 add_reg_note (insn, REG_CFA_DEF_CFA,
28887 plus_constant (Pmode, frame_reg_rtx, frame_off));
28888 RTX_FRAME_RELATED_P (insn) = 1;
28891 /* Set up cfa_restores. We always need these when
28892 shrink-wrapping. If not shrink-wrapping then we only need
28893 the cfa_restore when the stack location is no longer valid.
28894 The cfa_restores must be emitted on or before the insn that
28895 invalidates the stack, and of course must not be emitted
28896 before the insn that actually does the restore. The latter
28897 is why it is a bad idea to emit the cfa_restores as a group
28898 on the last instruction here that actually does a restore:
28899 That insn may be reordered with respect to others doing
28900 restores. */
28901 if (flag_shrink_wrap
28902 && !restoring_GPRs_inline
28903 && info->first_fp_reg_save == 64)
28904 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
28906 for (i = info->first_gp_reg_save; i < 32; i++)
28907 if (save_reg_p (i)
28908 && !cfun->machine->gpr_is_wrapped_separately[i])
28910 rtx reg = gen_rtx_REG (reg_mode, i);
28911 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28915 if (!restoring_GPRs_inline
28916 && info->first_fp_reg_save == 64)
28918 /* We are jumping to an out-of-line function. */
28919 if (cfa_restores)
28920 emit_cfa_restores (cfa_restores);
28921 return;
28924 if (restore_lr && !restoring_GPRs_inline)
28926 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
28927 restore_saved_lr (0, exit_func);
28930 /* Restore fpr's if we need to do it without calling a function. */
28931 if (restoring_FPRs_inline)
28933 int offset = info->fp_save_offset + frame_off;
28934 for (i = info->first_fp_reg_save; i < 64; i++)
28936 if (save_reg_p (i)
28937 && !cfun->machine->fpr_is_wrapped_separately[i - 32])
28939 rtx reg = gen_rtx_REG (fp_reg_mode, i);
28940 emit_insn (gen_frame_load (reg, frame_reg_rtx, offset));
28941 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
28942 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
28943 cfa_restores);
28946 offset += fp_reg_size;
28950 /* If we saved cr, restore it here. Just those that were used. */
28951 if (info->cr_save_p)
28952 restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func);
28954 /* If this is V.4, unwind the stack pointer after all of the loads
28955 have been done, or set up r11 if we are restoring fp out of line. */
28956 ptr_regno = 1;
28957 if (!restoring_FPRs_inline)
28959 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
28960 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
28961 ptr_regno = ptr_regno_for_savres (sel);
28964 insn = rs6000_emit_stack_reset (frame_reg_rtx, frame_off, ptr_regno);
28965 if (REGNO (frame_reg_rtx) == ptr_regno)
28966 frame_off = 0;
28968 if (insn && restoring_FPRs_inline)
28970 if (cfa_restores)
28972 REG_NOTES (insn) = cfa_restores;
28973 cfa_restores = NULL_RTX;
28975 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
28976 RTX_FRAME_RELATED_P (insn) = 1;
28979 if (crtl->calls_eh_return)
28981 rtx sa = EH_RETURN_STACKADJ_RTX;
28982 emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
28985 if (!sibcall && restoring_FPRs_inline)
28987 if (cfa_restores)
28989 /* We can't hang the cfa_restores off a simple return,
28990 since the shrink-wrap code sometimes uses an existing
28991 return. This means there might be a path from
28992 pre-prologue code to this return, and dwarf2cfi code
28993 wants the eh_frame unwinder state to be the same on
28994 all paths to any point. So we need to emit the
28995 cfa_restores before the return. For -m64 we really
28996 don't need epilogue cfa_restores at all, except for
28997 this irritating dwarf2cfi with shrink-wrap
28998 requirement; The stack red-zone means eh_frame info
28999 from the prologue telling the unwinder to restore
29000 from the stack is perfectly good right to the end of
29001 the function. */
29002 emit_insn (gen_blockage ());
29003 emit_cfa_restores (cfa_restores);
29004 cfa_restores = NULL_RTX;
29007 emit_jump_insn (targetm.gen_simple_return ());
29010 if (!sibcall && !restoring_FPRs_inline)
29012 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
29013 rtvec p = rtvec_alloc (3 + !!lr + 64 - info->first_fp_reg_save);
29014 int elt = 0;
29015 RTVEC_ELT (p, elt++) = ret_rtx;
29016 if (lr)
29017 RTVEC_ELT (p, elt++)
29018 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
29020 /* We have to restore more than two FP registers, so branch to the
29021 restore function. It will return to our caller. */
29022 int i;
29023 int reg;
29024 rtx sym;
29026 if (flag_shrink_wrap)
29027 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
29029 sym = rs6000_savres_routine_sym (info, SAVRES_FPR | (lr ? SAVRES_LR : 0));
29030 RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, sym);
29031 reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
29032 RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
29034 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
29036 rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
29038 RTVEC_ELT (p, elt++)
29039 = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i);
29040 if (flag_shrink_wrap
29041 && save_reg_p (info->first_fp_reg_save + i))
29042 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
29045 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
29048 if (cfa_restores)
29050 if (sibcall)
29051 /* Ensure the cfa_restores are hung off an insn that won't
29052 be reordered above other restores. */
29053 emit_insn (gen_blockage ());
29055 emit_cfa_restores (cfa_restores);
29059 /* Write function epilogue. */
29061 static void
29062 rs6000_output_function_epilogue (FILE *file)
29064 #if TARGET_MACHO
29065 macho_branch_islands ();
29068 rtx_insn *insn = get_last_insn ();
29069 rtx_insn *deleted_debug_label = NULL;
29071 /* Mach-O doesn't support labels at the end of objects, so if
29072 it looks like we might want one, take special action.
29074 First, collect any sequence of deleted debug labels. */
29075 while (insn
29076 && NOTE_P (insn)
29077 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
29079 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
29080 notes only, instead set their CODE_LABEL_NUMBER to -1,
29081 otherwise there would be code generation differences
29082 in between -g and -g0. */
29083 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
29084 deleted_debug_label = insn;
29085 insn = PREV_INSN (insn);
29088 /* Second, if we have:
29089 label:
29090 barrier
29091 then this needs to be detected, so skip past the barrier. */
29093 if (insn && BARRIER_P (insn))
29094 insn = PREV_INSN (insn);
29096 /* Up to now we've only seen notes or barriers. */
29097 if (insn)
29099 if (LABEL_P (insn)
29100 || (NOTE_P (insn)
29101 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
29102 /* Trailing label: <barrier>. */
29103 fputs ("\tnop\n", file);
29104 else
29106 /* Lastly, see if we have a completely empty function body. */
29107 while (insn && ! INSN_P (insn))
29108 insn = PREV_INSN (insn);
29109 /* If we don't find any insns, we've got an empty function body;
29110 I.e. completely empty - without a return or branch. This is
29111 taken as the case where a function body has been removed
29112 because it contains an inline __builtin_unreachable(). GCC
29113 states that reaching __builtin_unreachable() means UB so we're
29114 not obliged to do anything special; however, we want
29115 non-zero-sized function bodies. To meet this, and help the
29116 user out, let's trap the case. */
29117 if (insn == NULL)
29118 fputs ("\ttrap\n", file);
29121 else if (deleted_debug_label)
29122 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
29123 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
29124 CODE_LABEL_NUMBER (insn) = -1;
29126 #endif
29128 /* Output a traceback table here. See /usr/include/sys/debug.h for info
29129 on its format.
29131 We don't output a traceback table if -finhibit-size-directive was
29132 used. The documentation for -finhibit-size-directive reads
29133 ``don't output a @code{.size} assembler directive, or anything
29134 else that would cause trouble if the function is split in the
29135 middle, and the two halves are placed at locations far apart in
29136 memory.'' The traceback table has this property, since it
29137 includes the offset from the start of the function to the
29138 traceback table itself.
29140 System V.4 Powerpc's (and the embedded ABI derived from it) use a
29141 different traceback table. */
29142 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29143 && ! flag_inhibit_size_directive
29144 && rs6000_traceback != traceback_none && !cfun->is_thunk)
29146 const char *fname = NULL;
29147 const char *language_string = lang_hooks.name;
29148 int fixed_parms = 0, float_parms = 0, parm_info = 0;
29149 int i;
29150 int optional_tbtab;
29151 rs6000_stack_t *info = rs6000_stack_info ();
29153 if (rs6000_traceback == traceback_full)
29154 optional_tbtab = 1;
29155 else if (rs6000_traceback == traceback_part)
29156 optional_tbtab = 0;
29157 else
29158 optional_tbtab = !optimize_size && !TARGET_ELF;
29160 if (optional_tbtab)
29162 fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
29163 while (*fname == '.') /* V.4 encodes . in the name */
29164 fname++;
29166 /* Need label immediately before tbtab, so we can compute
29167 its offset from the function start. */
29168 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
29169 ASM_OUTPUT_LABEL (file, fname);
29172 /* The .tbtab pseudo-op can only be used for the first eight
29173 expressions, since it can't handle the possibly variable
29174 length fields that follow. However, if you omit the optional
29175 fields, the assembler outputs zeros for all optional fields
29176 anyways, giving each variable length field is minimum length
29177 (as defined in sys/debug.h). Thus we can not use the .tbtab
29178 pseudo-op at all. */
29180 /* An all-zero word flags the start of the tbtab, for debuggers
29181 that have to find it by searching forward from the entry
29182 point or from the current pc. */
29183 fputs ("\t.long 0\n", file);
29185 /* Tbtab format type. Use format type 0. */
29186 fputs ("\t.byte 0,", file);
29188 /* Language type. Unfortunately, there does not seem to be any
29189 official way to discover the language being compiled, so we
29190 use language_string.
29191 C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9.
29192 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
29193 a number, so for now use 9. LTO, Go and JIT aren't assigned numbers
29194 either, so for now use 0. */
29195 if (lang_GNU_C ()
29196 || ! strcmp (language_string, "GNU GIMPLE")
29197 || ! strcmp (language_string, "GNU Go")
29198 || ! strcmp (language_string, "libgccjit"))
29199 i = 0;
29200 else if (! strcmp (language_string, "GNU F77")
29201 || lang_GNU_Fortran ())
29202 i = 1;
29203 else if (! strcmp (language_string, "GNU Pascal"))
29204 i = 2;
29205 else if (! strcmp (language_string, "GNU Ada"))
29206 i = 3;
29207 else if (lang_GNU_CXX ()
29208 || ! strcmp (language_string, "GNU Objective-C++"))
29209 i = 9;
29210 else if (! strcmp (language_string, "GNU Java"))
29211 i = 13;
29212 else if (! strcmp (language_string, "GNU Objective-C"))
29213 i = 14;
29214 else
29215 gcc_unreachable ();
29216 fprintf (file, "%d,", i);
29218 /* 8 single bit fields: global linkage (not set for C extern linkage,
29219 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
29220 from start of procedure stored in tbtab, internal function, function
29221 has controlled storage, function has no toc, function uses fp,
29222 function logs/aborts fp operations. */
29223 /* Assume that fp operations are used if any fp reg must be saved. */
29224 fprintf (file, "%d,",
29225 (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
29227 /* 6 bitfields: function is interrupt handler, name present in
29228 proc table, function calls alloca, on condition directives
29229 (controls stack walks, 3 bits), saves condition reg, saves
29230 link reg. */
29231 /* The `function calls alloca' bit seems to be set whenever reg 31 is
29232 set up as a frame pointer, even when there is no alloca call. */
29233 fprintf (file, "%d,",
29234 ((optional_tbtab << 6)
29235 | ((optional_tbtab & frame_pointer_needed) << 5)
29236 | (info->cr_save_p << 1)
29237 | (info->lr_save_p)));
29239 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
29240 (6 bits). */
29241 fprintf (file, "%d,",
29242 (info->push_p << 7) | (64 - info->first_fp_reg_save));
29244 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
29245 fprintf (file, "%d,", (32 - first_reg_to_save ()));
29247 if (optional_tbtab)
29249 /* Compute the parameter info from the function decl argument
29250 list. */
29251 tree decl;
29252 int next_parm_info_bit = 31;
29254 for (decl = DECL_ARGUMENTS (current_function_decl);
29255 decl; decl = DECL_CHAIN (decl))
29257 rtx parameter = DECL_INCOMING_RTL (decl);
29258 machine_mode mode = GET_MODE (parameter);
29260 if (GET_CODE (parameter) == REG)
29262 if (SCALAR_FLOAT_MODE_P (mode))
29264 int bits;
29266 float_parms++;
29268 switch (mode)
29270 case E_SFmode:
29271 case E_SDmode:
29272 bits = 0x2;
29273 break;
29275 case E_DFmode:
29276 case E_DDmode:
29277 case E_TFmode:
29278 case E_TDmode:
29279 case E_IFmode:
29280 case E_KFmode:
29281 bits = 0x3;
29282 break;
29284 default:
29285 gcc_unreachable ();
29288 /* If only one bit will fit, don't or in this entry. */
29289 if (next_parm_info_bit > 0)
29290 parm_info |= (bits << (next_parm_info_bit - 1));
29291 next_parm_info_bit -= 2;
29293 else
29295 fixed_parms += ((GET_MODE_SIZE (mode)
29296 + (UNITS_PER_WORD - 1))
29297 / UNITS_PER_WORD);
29298 next_parm_info_bit -= 1;
29304 /* Number of fixed point parameters. */
29305 /* This is actually the number of words of fixed point parameters; thus
29306 an 8 byte struct counts as 2; and thus the maximum value is 8. */
29307 fprintf (file, "%d,", fixed_parms);
29309 /* 2 bitfields: number of floating point parameters (7 bits), parameters
29310 all on stack. */
29311 /* This is actually the number of fp registers that hold parameters;
29312 and thus the maximum value is 13. */
29313 /* Set parameters on stack bit if parameters are not in their original
29314 registers, regardless of whether they are on the stack? Xlc
29315 seems to set the bit when not optimizing. */
29316 fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
29318 if (optional_tbtab)
29320 /* Optional fields follow. Some are variable length. */
29322 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single
29323 float, 11 double float. */
29324 /* There is an entry for each parameter in a register, in the order
29325 that they occur in the parameter list. Any intervening arguments
29326 on the stack are ignored. If the list overflows a long (max
29327 possible length 34 bits) then completely leave off all elements
29328 that don't fit. */
29329 /* Only emit this long if there was at least one parameter. */
29330 if (fixed_parms || float_parms)
29331 fprintf (file, "\t.long %d\n", parm_info);
29333 /* Offset from start of code to tb table. */
29334 fputs ("\t.long ", file);
29335 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
29336 RS6000_OUTPUT_BASENAME (file, fname);
29337 putc ('-', file);
29338 rs6000_output_function_entry (file, fname);
29339 putc ('\n', file);
29341 /* Interrupt handler mask. */
29342 /* Omit this long, since we never set the interrupt handler bit
29343 above. */
29345 /* Number of CTL (controlled storage) anchors. */
29346 /* Omit this long, since the has_ctl bit is never set above. */
29348 /* Displacement into stack of each CTL anchor. */
29349 /* Omit this list of longs, because there are no CTL anchors. */
29351 /* Length of function name. */
29352 if (*fname == '*')
29353 ++fname;
29354 fprintf (file, "\t.short %d\n", (int) strlen (fname));
29356 /* Function name. */
29357 assemble_string (fname, strlen (fname));
29359 /* Register for alloca automatic storage; this is always reg 31.
29360 Only emit this if the alloca bit was set above. */
29361 if (frame_pointer_needed)
29362 fputs ("\t.byte 31\n", file);
29364 fputs ("\t.align 2\n", file);
29368 /* Arrange to define .LCTOC1 label, if not already done. */
29369 if (need_toc_init)
29371 need_toc_init = 0;
29372 if (!toc_initialized)
29374 switch_to_section (toc_section);
29375 switch_to_section (current_function_section ());
29380 /* -fsplit-stack support. */
29382 /* A SYMBOL_REF for __morestack. */
29383 static GTY(()) rtx morestack_ref;
29385 static rtx
29386 gen_add3_const (rtx rt, rtx ra, long c)
29388 if (TARGET_64BIT)
29389 return gen_adddi3 (rt, ra, GEN_INT (c));
29390 else
29391 return gen_addsi3 (rt, ra, GEN_INT (c));
29394 /* Emit -fsplit-stack prologue, which goes before the regular function
29395 prologue (at local entry point in the case of ELFv2). */
29397 void
29398 rs6000_expand_split_stack_prologue (void)
29400 rs6000_stack_t *info = rs6000_stack_info ();
29401 unsigned HOST_WIDE_INT allocate;
29402 long alloc_hi, alloc_lo;
29403 rtx r0, r1, r12, lr, ok_label, compare, jump, call_fusage;
29404 rtx_insn *insn;
29406 gcc_assert (flag_split_stack && reload_completed);
29408 if (!info->push_p)
29409 return;
29411 if (global_regs[29])
29413 error ("%qs uses register r29", "-fsplit-stack");
29414 inform (DECL_SOURCE_LOCATION (global_regs_decl[29]),
29415 "conflicts with %qD", global_regs_decl[29]);
29418 allocate = info->total_size;
29419 if (allocate > (unsigned HOST_WIDE_INT) 1 << 31)
29421 sorry ("Stack frame larger than 2G is not supported for -fsplit-stack");
29422 return;
29424 if (morestack_ref == NULL_RTX)
29426 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
29427 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
29428 | SYMBOL_FLAG_FUNCTION);
29431 r0 = gen_rtx_REG (Pmode, 0);
29432 r1 = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
29433 r12 = gen_rtx_REG (Pmode, 12);
29434 emit_insn (gen_load_split_stack_limit (r0));
29435 /* Always emit two insns here to calculate the requested stack,
29436 so that the linker can edit them when adjusting size for calling
29437 non-split-stack code. */
29438 alloc_hi = (-allocate + 0x8000) & ~0xffffL;
29439 alloc_lo = -allocate - alloc_hi;
29440 if (alloc_hi != 0)
29442 emit_insn (gen_add3_const (r12, r1, alloc_hi));
29443 if (alloc_lo != 0)
29444 emit_insn (gen_add3_const (r12, r12, alloc_lo));
29445 else
29446 emit_insn (gen_nop ());
29448 else
29450 emit_insn (gen_add3_const (r12, r1, alloc_lo));
29451 emit_insn (gen_nop ());
29454 compare = gen_rtx_REG (CCUNSmode, CR7_REGNO);
29455 emit_insn (gen_rtx_SET (compare, gen_rtx_COMPARE (CCUNSmode, r12, r0)));
29456 ok_label = gen_label_rtx ();
29457 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
29458 gen_rtx_GEU (VOIDmode, compare, const0_rtx),
29459 gen_rtx_LABEL_REF (VOIDmode, ok_label),
29460 pc_rtx);
29461 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
29462 JUMP_LABEL (insn) = ok_label;
29463 /* Mark the jump as very likely to be taken. */
29464 add_reg_br_prob_note (insn, profile_probability::very_likely ());
29466 lr = gen_rtx_REG (Pmode, LR_REGNO);
29467 insn = emit_move_insn (r0, lr);
29468 RTX_FRAME_RELATED_P (insn) = 1;
29469 insn = emit_insn (gen_frame_store (r0, r1, info->lr_save_offset));
29470 RTX_FRAME_RELATED_P (insn) = 1;
29472 insn = emit_call_insn (gen_call (gen_rtx_MEM (SImode, morestack_ref),
29473 const0_rtx, const0_rtx));
29474 call_fusage = NULL_RTX;
29475 use_reg (&call_fusage, r12);
29476 /* Say the call uses r0, even though it doesn't, to stop regrename
29477 from twiddling with the insns saving lr, trashing args for cfun.
29478 The insns restoring lr are similarly protected by making
29479 split_stack_return use r0. */
29480 use_reg (&call_fusage, r0);
29481 add_function_usage_to (insn, call_fusage);
29482 /* Indicate that this function can't jump to non-local gotos. */
29483 make_reg_eh_region_note_nothrow_nononlocal (insn);
29484 emit_insn (gen_frame_load (r0, r1, info->lr_save_offset));
29485 insn = emit_move_insn (lr, r0);
29486 add_reg_note (insn, REG_CFA_RESTORE, lr);
29487 RTX_FRAME_RELATED_P (insn) = 1;
29488 emit_insn (gen_split_stack_return ());
29490 emit_label (ok_label);
29491 LABEL_NUSES (ok_label) = 1;
29494 /* Return the internal arg pointer used for function incoming
29495 arguments. When -fsplit-stack, the arg pointer is r12 so we need
29496 to copy it to a pseudo in order for it to be preserved over calls
29497 and suchlike. We'd really like to use a pseudo here for the
29498 internal arg pointer but data-flow analysis is not prepared to
29499 accept pseudos as live at the beginning of a function. */
29501 static rtx
29502 rs6000_internal_arg_pointer (void)
29504 if (flag_split_stack
29505 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
29506 == NULL))
29509 if (cfun->machine->split_stack_arg_pointer == NULL_RTX)
29511 rtx pat;
29513 cfun->machine->split_stack_arg_pointer = gen_reg_rtx (Pmode);
29514 REG_POINTER (cfun->machine->split_stack_arg_pointer) = 1;
29516 /* Put the pseudo initialization right after the note at the
29517 beginning of the function. */
29518 pat = gen_rtx_SET (cfun->machine->split_stack_arg_pointer,
29519 gen_rtx_REG (Pmode, 12));
29520 push_topmost_sequence ();
29521 emit_insn_after (pat, get_insns ());
29522 pop_topmost_sequence ();
29524 rtx ret = plus_constant (Pmode, cfun->machine->split_stack_arg_pointer,
29525 FIRST_PARM_OFFSET (current_function_decl));
29526 return copy_to_reg (ret);
29528 return virtual_incoming_args_rtx;
29531 /* We may have to tell the dataflow pass that the split stack prologue
29532 is initializing a register. */
29534 static void
29535 rs6000_live_on_entry (bitmap regs)
29537 if (flag_split_stack)
29538 bitmap_set_bit (regs, 12);
29541 /* Emit -fsplit-stack dynamic stack allocation space check. */
29543 void
29544 rs6000_split_stack_space_check (rtx size, rtx label)
29546 rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
29547 rtx limit = gen_reg_rtx (Pmode);
29548 rtx requested = gen_reg_rtx (Pmode);
29549 rtx cmp = gen_reg_rtx (CCUNSmode);
29550 rtx jump;
29552 emit_insn (gen_load_split_stack_limit (limit));
29553 if (CONST_INT_P (size))
29554 emit_insn (gen_add3_insn (requested, sp, GEN_INT (-INTVAL (size))));
29555 else
29557 size = force_reg (Pmode, size);
29558 emit_move_insn (requested, gen_rtx_MINUS (Pmode, sp, size));
29560 emit_insn (gen_rtx_SET (cmp, gen_rtx_COMPARE (CCUNSmode, requested, limit)));
29561 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
29562 gen_rtx_GEU (VOIDmode, cmp, const0_rtx),
29563 gen_rtx_LABEL_REF (VOIDmode, label),
29564 pc_rtx);
29565 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
29566 JUMP_LABEL (jump) = label;
29569 /* A C compound statement that outputs the assembler code for a thunk
29570 function, used to implement C++ virtual function calls with
29571 multiple inheritance. The thunk acts as a wrapper around a virtual
29572 function, adjusting the implicit object parameter before handing
29573 control off to the real function.
29575 First, emit code to add the integer DELTA to the location that
29576 contains the incoming first argument. Assume that this argument
29577 contains a pointer, and is the one used to pass the `this' pointer
29578 in C++. This is the incoming argument *before* the function
29579 prologue, e.g. `%o0' on a sparc. The addition must preserve the
29580 values of all other incoming arguments.
29582 After the addition, emit code to jump to FUNCTION, which is a
29583 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
29584 not touch the return address. Hence returning from FUNCTION will
29585 return to whoever called the current `thunk'.
29587 The effect must be as if FUNCTION had been called directly with the
29588 adjusted first argument. This macro is responsible for emitting
29589 all of the code for a thunk function; output_function_prologue()
29590 and output_function_epilogue() are not invoked.
29592 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
29593 been extracted from it.) It might possibly be useful on some
29594 targets, but probably not.
29596 If you do not define this macro, the target-independent code in the
29597 C++ frontend will generate a less efficient heavyweight thunk that
29598 calls FUNCTION instead of jumping to it. The generic approach does
29599 not support varargs. */
29601 static void
29602 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
29603 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
29604 tree function)
29606 rtx this_rtx, funexp;
29607 rtx_insn *insn;
29609 reload_completed = 1;
29610 epilogue_completed = 1;
29612 /* Mark the end of the (empty) prologue. */
29613 emit_note (NOTE_INSN_PROLOGUE_END);
29615 /* Find the "this" pointer. If the function returns a structure,
29616 the structure return pointer is in r3. */
29617 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
29618 this_rtx = gen_rtx_REG (Pmode, 4);
29619 else
29620 this_rtx = gen_rtx_REG (Pmode, 3);
29622 /* Apply the constant offset, if required. */
29623 if (delta)
29624 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
29626 /* Apply the offset from the vtable, if required. */
29627 if (vcall_offset)
29629 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
29630 rtx tmp = gen_rtx_REG (Pmode, 12);
29632 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
29633 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
29635 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
29636 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
29638 else
29640 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
29642 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
29644 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
29647 /* Generate a tail call to the target function. */
29648 if (!TREE_USED (function))
29650 assemble_external (function);
29651 TREE_USED (function) = 1;
29653 funexp = XEXP (DECL_RTL (function), 0);
29654 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
29656 #if TARGET_MACHO
29657 if (MACHOPIC_INDIRECT)
29658 funexp = machopic_indirect_call_target (funexp);
29659 #endif
29661 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
29662 generate sibcall RTL explicitly. */
29663 insn = emit_call_insn (
29664 gen_rtx_PARALLEL (VOIDmode,
29665 gen_rtvec (3,
29666 gen_rtx_CALL (VOIDmode,
29667 funexp, const0_rtx),
29668 gen_rtx_USE (VOIDmode, const0_rtx),
29669 simple_return_rtx)));
29670 SIBLING_CALL_P (insn) = 1;
29671 emit_barrier ();
29673 /* Run just enough of rest_of_compilation to get the insns emitted.
29674 There's not really enough bulk here to make other passes such as
29675 instruction scheduling worth while. Note that use_thunk calls
29676 assemble_start_function and assemble_end_function. */
29677 insn = get_insns ();
29678 shorten_branches (insn);
29679 final_start_function (insn, file, 1);
29680 final (insn, file, 1);
29681 final_end_function ();
29683 reload_completed = 0;
29684 epilogue_completed = 0;
29687 /* A quick summary of the various types of 'constant-pool tables'
29688 under PowerPC:
29690 Target Flags Name One table per
29691 AIX (none) AIX TOC object file
29692 AIX -mfull-toc AIX TOC object file
29693 AIX -mminimal-toc AIX minimal TOC translation unit
29694 SVR4/EABI (none) SVR4 SDATA object file
29695 SVR4/EABI -fpic SVR4 pic object file
29696 SVR4/EABI -fPIC SVR4 PIC translation unit
29697 SVR4/EABI -mrelocatable EABI TOC function
29698 SVR4/EABI -maix AIX TOC object file
29699 SVR4/EABI -maix -mminimal-toc
29700 AIX minimal TOC translation unit
29702 Name Reg. Set by entries contains:
29703 made by addrs? fp? sum?
29705 AIX TOC 2 crt0 as Y option option
29706 AIX minimal TOC 30 prolog gcc Y Y option
29707 SVR4 SDATA 13 crt0 gcc N Y N
29708 SVR4 pic 30 prolog ld Y not yet N
29709 SVR4 PIC 30 prolog gcc Y option option
29710 EABI TOC 30 prolog gcc Y option option
29714 /* Hash functions for the hash table. */
29716 static unsigned
29717 rs6000_hash_constant (rtx k)
29719 enum rtx_code code = GET_CODE (k);
29720 machine_mode mode = GET_MODE (k);
29721 unsigned result = (code << 3) ^ mode;
29722 const char *format;
29723 int flen, fidx;
29725 format = GET_RTX_FORMAT (code);
29726 flen = strlen (format);
29727 fidx = 0;
29729 switch (code)
29731 case LABEL_REF:
29732 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
29734 case CONST_WIDE_INT:
29736 int i;
29737 flen = CONST_WIDE_INT_NUNITS (k);
29738 for (i = 0; i < flen; i++)
29739 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
29740 return result;
29743 case CONST_DOUBLE:
29744 if (mode != VOIDmode)
29745 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
29746 flen = 2;
29747 break;
29749 case CODE_LABEL:
29750 fidx = 3;
29751 break;
29753 default:
29754 break;
29757 for (; fidx < flen; fidx++)
29758 switch (format[fidx])
29760 case 's':
29762 unsigned i, len;
29763 const char *str = XSTR (k, fidx);
29764 len = strlen (str);
29765 result = result * 613 + len;
29766 for (i = 0; i < len; i++)
29767 result = result * 613 + (unsigned) str[i];
29768 break;
29770 case 'u':
29771 case 'e':
29772 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
29773 break;
29774 case 'i':
29775 case 'n':
29776 result = result * 613 + (unsigned) XINT (k, fidx);
29777 break;
29778 case 'w':
29779 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
29780 result = result * 613 + (unsigned) XWINT (k, fidx);
29781 else
29783 size_t i;
29784 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
29785 result = result * 613 + (unsigned) (XWINT (k, fidx)
29786 >> CHAR_BIT * i);
29788 break;
29789 case '0':
29790 break;
29791 default:
29792 gcc_unreachable ();
29795 return result;
29798 hashval_t
29799 toc_hasher::hash (toc_hash_struct *thc)
29801 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
29804 /* Compare H1 and H2 for equivalence. */
29806 bool
29807 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
29809 rtx r1 = h1->key;
29810 rtx r2 = h2->key;
29812 if (h1->key_mode != h2->key_mode)
29813 return 0;
29815 return rtx_equal_p (r1, r2);
29818 /* These are the names given by the C++ front-end to vtables, and
29819 vtable-like objects. Ideally, this logic should not be here;
29820 instead, there should be some programmatic way of inquiring as
29821 to whether or not an object is a vtable. */
29823 #define VTABLE_NAME_P(NAME) \
29824 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
29825 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
29826 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
29827 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
29828 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
29830 #ifdef NO_DOLLAR_IN_LABEL
29831 /* Return a GGC-allocated character string translating dollar signs in
29832 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
29834 const char *
29835 rs6000_xcoff_strip_dollar (const char *name)
29837 char *strip, *p;
29838 const char *q;
29839 size_t len;
29841 q = (const char *) strchr (name, '$');
29843 if (q == 0 || q == name)
29844 return name;
29846 len = strlen (name);
29847 strip = XALLOCAVEC (char, len + 1);
29848 strcpy (strip, name);
29849 p = strip + (q - name);
29850 while (p)
29852 *p = '_';
29853 p = strchr (p + 1, '$');
29856 return ggc_alloc_string (strip, len);
29858 #endif
29860 void
29861 rs6000_output_symbol_ref (FILE *file, rtx x)
29863 const char *name = XSTR (x, 0);
29865 /* Currently C++ toc references to vtables can be emitted before it
29866 is decided whether the vtable is public or private. If this is
29867 the case, then the linker will eventually complain that there is
29868 a reference to an unknown section. Thus, for vtables only,
29869 we emit the TOC reference to reference the identifier and not the
29870 symbol. */
29871 if (VTABLE_NAME_P (name))
29873 RS6000_OUTPUT_BASENAME (file, name);
29875 else
29876 assemble_name (file, name);
29879 /* Output a TOC entry. We derive the entry name from what is being
29880 written. */
29882 void
29883 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
29885 char buf[256];
29886 const char *name = buf;
29887 rtx base = x;
29888 HOST_WIDE_INT offset = 0;
29890 gcc_assert (!TARGET_NO_TOC);
29892 /* When the linker won't eliminate them, don't output duplicate
29893 TOC entries (this happens on AIX if there is any kind of TOC,
29894 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
29895 CODE_LABELs. */
29896 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
29898 struct toc_hash_struct *h;
29900 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
29901 time because GGC is not initialized at that point. */
29902 if (toc_hash_table == NULL)
29903 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
29905 h = ggc_alloc<toc_hash_struct> ();
29906 h->key = x;
29907 h->key_mode = mode;
29908 h->labelno = labelno;
29910 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
29911 if (*found == NULL)
29912 *found = h;
29913 else /* This is indeed a duplicate.
29914 Set this label equal to that label. */
29916 fputs ("\t.set ", file);
29917 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
29918 fprintf (file, "%d,", labelno);
29919 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
29920 fprintf (file, "%d\n", ((*found)->labelno));
29922 #ifdef HAVE_AS_TLS
29923 if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF
29924 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
29925 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
29927 fputs ("\t.set ", file);
29928 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
29929 fprintf (file, "%d,", labelno);
29930 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
29931 fprintf (file, "%d\n", ((*found)->labelno));
29933 #endif
29934 return;
29938 /* If we're going to put a double constant in the TOC, make sure it's
29939 aligned properly when strict alignment is on. */
29940 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
29941 && STRICT_ALIGNMENT
29942 && GET_MODE_BITSIZE (mode) >= 64
29943 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
29944 ASM_OUTPUT_ALIGN (file, 3);
29947 (*targetm.asm_out.internal_label) (file, "LC", labelno);
29949 /* Handle FP constants specially. Note that if we have a minimal
29950 TOC, things we put here aren't actually in the TOC, so we can allow
29951 FP constants. */
29952 if (GET_CODE (x) == CONST_DOUBLE &&
29953 (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
29954 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
29956 long k[4];
29958 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
29959 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
29960 else
29961 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
29963 if (TARGET_64BIT)
29965 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29966 fputs (DOUBLE_INT_ASM_OP, file);
29967 else
29968 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
29969 k[0] & 0xffffffff, k[1] & 0xffffffff,
29970 k[2] & 0xffffffff, k[3] & 0xffffffff);
29971 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
29972 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
29973 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
29974 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
29975 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
29976 return;
29978 else
29980 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29981 fputs ("\t.long ", file);
29982 else
29983 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
29984 k[0] & 0xffffffff, k[1] & 0xffffffff,
29985 k[2] & 0xffffffff, k[3] & 0xffffffff);
29986 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
29987 k[0] & 0xffffffff, k[1] & 0xffffffff,
29988 k[2] & 0xffffffff, k[3] & 0xffffffff);
29989 return;
29992 else if (GET_CODE (x) == CONST_DOUBLE &&
29993 (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
29995 long k[2];
29997 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
29998 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
29999 else
30000 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
30002 if (TARGET_64BIT)
30004 if (TARGET_ELF || TARGET_MINIMAL_TOC)
30005 fputs (DOUBLE_INT_ASM_OP, file);
30006 else
30007 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
30008 k[0] & 0xffffffff, k[1] & 0xffffffff);
30009 fprintf (file, "0x%lx%08lx\n",
30010 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
30011 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
30012 return;
30014 else
30016 if (TARGET_ELF || TARGET_MINIMAL_TOC)
30017 fputs ("\t.long ", file);
30018 else
30019 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
30020 k[0] & 0xffffffff, k[1] & 0xffffffff);
30021 fprintf (file, "0x%lx,0x%lx\n",
30022 k[0] & 0xffffffff, k[1] & 0xffffffff);
30023 return;
30026 else if (GET_CODE (x) == CONST_DOUBLE &&
30027 (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
30029 long l;
30031 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
30032 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
30033 else
30034 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
30036 if (TARGET_64BIT)
30038 if (TARGET_ELF || TARGET_MINIMAL_TOC)
30039 fputs (DOUBLE_INT_ASM_OP, file);
30040 else
30041 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
30042 if (WORDS_BIG_ENDIAN)
30043 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
30044 else
30045 fprintf (file, "0x%lx\n", l & 0xffffffff);
30046 return;
30048 else
30050 if (TARGET_ELF || TARGET_MINIMAL_TOC)
30051 fputs ("\t.long ", file);
30052 else
30053 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
30054 fprintf (file, "0x%lx\n", l & 0xffffffff);
30055 return;
30058 else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT)
30060 unsigned HOST_WIDE_INT low;
30061 HOST_WIDE_INT high;
30063 low = INTVAL (x) & 0xffffffff;
30064 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
30066 /* TOC entries are always Pmode-sized, so when big-endian
30067 smaller integer constants in the TOC need to be padded.
30068 (This is still a win over putting the constants in
30069 a separate constant pool, because then we'd have
30070 to have both a TOC entry _and_ the actual constant.)
30072 For a 32-bit target, CONST_INT values are loaded and shifted
30073 entirely within `low' and can be stored in one TOC entry. */
30075 /* It would be easy to make this work, but it doesn't now. */
30076 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
30078 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
30080 low |= high << 32;
30081 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
30082 high = (HOST_WIDE_INT) low >> 32;
30083 low &= 0xffffffff;
30086 if (TARGET_64BIT)
30088 if (TARGET_ELF || TARGET_MINIMAL_TOC)
30089 fputs (DOUBLE_INT_ASM_OP, file);
30090 else
30091 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
30092 (long) high & 0xffffffff, (long) low & 0xffffffff);
30093 fprintf (file, "0x%lx%08lx\n",
30094 (long) high & 0xffffffff, (long) low & 0xffffffff);
30095 return;
30097 else
30099 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
30101 if (TARGET_ELF || TARGET_MINIMAL_TOC)
30102 fputs ("\t.long ", file);
30103 else
30104 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
30105 (long) high & 0xffffffff, (long) low & 0xffffffff);
30106 fprintf (file, "0x%lx,0x%lx\n",
30107 (long) high & 0xffffffff, (long) low & 0xffffffff);
30109 else
30111 if (TARGET_ELF || TARGET_MINIMAL_TOC)
30112 fputs ("\t.long ", file);
30113 else
30114 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
30115 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
30117 return;
30121 if (GET_CODE (x) == CONST)
30123 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
30124 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
30126 base = XEXP (XEXP (x, 0), 0);
30127 offset = INTVAL (XEXP (XEXP (x, 0), 1));
30130 switch (GET_CODE (base))
30132 case SYMBOL_REF:
30133 name = XSTR (base, 0);
30134 break;
30136 case LABEL_REF:
30137 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
30138 CODE_LABEL_NUMBER (XEXP (base, 0)));
30139 break;
30141 case CODE_LABEL:
30142 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
30143 break;
30145 default:
30146 gcc_unreachable ();
30149 if (TARGET_ELF || TARGET_MINIMAL_TOC)
30150 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
30151 else
30153 fputs ("\t.tc ", file);
30154 RS6000_OUTPUT_BASENAME (file, name);
30156 if (offset < 0)
30157 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
30158 else if (offset)
30159 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
30161 /* Mark large TOC symbols on AIX with [TE] so they are mapped
30162 after other TOC symbols, reducing overflow of small TOC access
30163 to [TC] symbols. */
30164 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
30165 ? "[TE]," : "[TC],", file);
30168 /* Currently C++ toc references to vtables can be emitted before it
30169 is decided whether the vtable is public or private. If this is
30170 the case, then the linker will eventually complain that there is
30171 a TOC reference to an unknown section. Thus, for vtables only,
30172 we emit the TOC reference to reference the symbol and not the
30173 section. */
30174 if (VTABLE_NAME_P (name))
30176 RS6000_OUTPUT_BASENAME (file, name);
30177 if (offset < 0)
30178 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
30179 else if (offset > 0)
30180 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
30182 else
30183 output_addr_const (file, x);
30185 #if HAVE_AS_TLS
30186 if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF)
30188 switch (SYMBOL_REF_TLS_MODEL (base))
30190 case 0:
30191 break;
30192 case TLS_MODEL_LOCAL_EXEC:
30193 fputs ("@le", file);
30194 break;
30195 case TLS_MODEL_INITIAL_EXEC:
30196 fputs ("@ie", file);
30197 break;
30198 /* Use global-dynamic for local-dynamic. */
30199 case TLS_MODEL_GLOBAL_DYNAMIC:
30200 case TLS_MODEL_LOCAL_DYNAMIC:
30201 putc ('\n', file);
30202 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
30203 fputs ("\t.tc .", file);
30204 RS6000_OUTPUT_BASENAME (file, name);
30205 fputs ("[TC],", file);
30206 output_addr_const (file, x);
30207 fputs ("@m", file);
30208 break;
30209 default:
30210 gcc_unreachable ();
30213 #endif
30215 putc ('\n', file);
30218 /* Output an assembler pseudo-op to write an ASCII string of N characters
30219 starting at P to FILE.
30221 On the RS/6000, we have to do this using the .byte operation and
30222 write out special characters outside the quoted string.
30223 Also, the assembler is broken; very long strings are truncated,
30224 so we must artificially break them up early. */
30226 void
30227 output_ascii (FILE *file, const char *p, int n)
30229 char c;
30230 int i, count_string;
30231 const char *for_string = "\t.byte \"";
30232 const char *for_decimal = "\t.byte ";
30233 const char *to_close = NULL;
30235 count_string = 0;
30236 for (i = 0; i < n; i++)
30238 c = *p++;
30239 if (c >= ' ' && c < 0177)
30241 if (for_string)
30242 fputs (for_string, file);
30243 putc (c, file);
30245 /* Write two quotes to get one. */
30246 if (c == '"')
30248 putc (c, file);
30249 ++count_string;
30252 for_string = NULL;
30253 for_decimal = "\"\n\t.byte ";
30254 to_close = "\"\n";
30255 ++count_string;
30257 if (count_string >= 512)
30259 fputs (to_close, file);
30261 for_string = "\t.byte \"";
30262 for_decimal = "\t.byte ";
30263 to_close = NULL;
30264 count_string = 0;
30267 else
30269 if (for_decimal)
30270 fputs (for_decimal, file);
30271 fprintf (file, "%d", c);
30273 for_string = "\n\t.byte \"";
30274 for_decimal = ", ";
30275 to_close = "\n";
30276 count_string = 0;
30280 /* Now close the string if we have written one. Then end the line. */
30281 if (to_close)
30282 fputs (to_close, file);
30285 /* Generate a unique section name for FILENAME for a section type
30286 represented by SECTION_DESC. Output goes into BUF.
30288 SECTION_DESC can be any string, as long as it is different for each
30289 possible section type.
30291 We name the section in the same manner as xlc. The name begins with an
30292 underscore followed by the filename (after stripping any leading directory
30293 names) with the last period replaced by the string SECTION_DESC. If
30294 FILENAME does not contain a period, SECTION_DESC is appended to the end of
30295 the name. */
30297 void
30298 rs6000_gen_section_name (char **buf, const char *filename,
30299 const char *section_desc)
30301 const char *q, *after_last_slash, *last_period = 0;
30302 char *p;
30303 int len;
30305 after_last_slash = filename;
30306 for (q = filename; *q; q++)
30308 if (*q == '/')
30309 after_last_slash = q + 1;
30310 else if (*q == '.')
30311 last_period = q;
30314 len = strlen (after_last_slash) + strlen (section_desc) + 2;
30315 *buf = (char *) xmalloc (len);
30317 p = *buf;
30318 *p++ = '_';
30320 for (q = after_last_slash; *q; q++)
30322 if (q == last_period)
30324 strcpy (p, section_desc);
30325 p += strlen (section_desc);
30326 break;
30329 else if (ISALNUM (*q))
30330 *p++ = *q;
30333 if (last_period == 0)
30334 strcpy (p, section_desc);
30335 else
30336 *p = '\0';
30339 /* Emit profile function. */
30341 void
30342 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
30344 /* Non-standard profiling for kernels, which just saves LR then calls
30345 _mcount without worrying about arg saves. The idea is to change
30346 the function prologue as little as possible as it isn't easy to
30347 account for arg save/restore code added just for _mcount. */
30348 if (TARGET_PROFILE_KERNEL)
30349 return;
30351 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
30353 #ifndef NO_PROFILE_COUNTERS
30354 # define NO_PROFILE_COUNTERS 0
30355 #endif
30356 if (NO_PROFILE_COUNTERS)
30357 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
30358 LCT_NORMAL, VOIDmode);
30359 else
30361 char buf[30];
30362 const char *label_name;
30363 rtx fun;
30365 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
30366 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
30367 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
30369 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
30370 LCT_NORMAL, VOIDmode, fun, Pmode);
30373 else if (DEFAULT_ABI == ABI_DARWIN)
30375 const char *mcount_name = RS6000_MCOUNT;
30376 int caller_addr_regno = LR_REGNO;
30378 /* Be conservative and always set this, at least for now. */
30379 crtl->uses_pic_offset_table = 1;
30381 #if TARGET_MACHO
30382 /* For PIC code, set up a stub and collect the caller's address
30383 from r0, which is where the prologue puts it. */
30384 if (MACHOPIC_INDIRECT
30385 && crtl->uses_pic_offset_table)
30386 caller_addr_regno = 0;
30387 #endif
30388 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
30389 LCT_NORMAL, VOIDmode,
30390 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
30394 /* Write function profiler code. */
30396 void
30397 output_function_profiler (FILE *file, int labelno)
30399 char buf[100];
30401 switch (DEFAULT_ABI)
30403 default:
30404 gcc_unreachable ();
30406 case ABI_V4:
30407 if (!TARGET_32BIT)
30409 warning (0, "no profiling of 64-bit code for this ABI");
30410 return;
30412 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
30413 fprintf (file, "\tmflr %s\n", reg_names[0]);
30414 if (NO_PROFILE_COUNTERS)
30416 asm_fprintf (file, "\tstw %s,4(%s)\n",
30417 reg_names[0], reg_names[1]);
30419 else if (TARGET_SECURE_PLT && flag_pic)
30421 if (TARGET_LINK_STACK)
30423 char name[32];
30424 get_ppc476_thunk_name (name);
30425 asm_fprintf (file, "\tbl %s\n", name);
30427 else
30428 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
30429 asm_fprintf (file, "\tstw %s,4(%s)\n",
30430 reg_names[0], reg_names[1]);
30431 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
30432 asm_fprintf (file, "\taddis %s,%s,",
30433 reg_names[12], reg_names[12]);
30434 assemble_name (file, buf);
30435 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
30436 assemble_name (file, buf);
30437 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
30439 else if (flag_pic == 1)
30441 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
30442 asm_fprintf (file, "\tstw %s,4(%s)\n",
30443 reg_names[0], reg_names[1]);
30444 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
30445 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
30446 assemble_name (file, buf);
30447 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
30449 else if (flag_pic > 1)
30451 asm_fprintf (file, "\tstw %s,4(%s)\n",
30452 reg_names[0], reg_names[1]);
30453 /* Now, we need to get the address of the label. */
30454 if (TARGET_LINK_STACK)
30456 char name[32];
30457 get_ppc476_thunk_name (name);
30458 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
30459 assemble_name (file, buf);
30460 fputs ("-.\n1:", file);
30461 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
30462 asm_fprintf (file, "\taddi %s,%s,4\n",
30463 reg_names[11], reg_names[11]);
30465 else
30467 fputs ("\tbcl 20,31,1f\n\t.long ", file);
30468 assemble_name (file, buf);
30469 fputs ("-.\n1:", file);
30470 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
30472 asm_fprintf (file, "\tlwz %s,0(%s)\n",
30473 reg_names[0], reg_names[11]);
30474 asm_fprintf (file, "\tadd %s,%s,%s\n",
30475 reg_names[0], reg_names[0], reg_names[11]);
30477 else
30479 asm_fprintf (file, "\tlis %s,", reg_names[12]);
30480 assemble_name (file, buf);
30481 fputs ("@ha\n", file);
30482 asm_fprintf (file, "\tstw %s,4(%s)\n",
30483 reg_names[0], reg_names[1]);
30484 asm_fprintf (file, "\tla %s,", reg_names[0]);
30485 assemble_name (file, buf);
30486 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
30489 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
30490 fprintf (file, "\tbl %s%s\n",
30491 RS6000_MCOUNT, flag_pic ? "@plt" : "");
30492 break;
30494 case ABI_AIX:
30495 case ABI_ELFv2:
30496 case ABI_DARWIN:
30497 /* Don't do anything, done in output_profile_hook (). */
30498 break;
30504 /* The following variable value is the last issued insn. */
30506 static rtx_insn *last_scheduled_insn;
30508 /* The following variable helps to balance issuing of load and
30509 store instructions */
30511 static int load_store_pendulum;
30513 /* The following variable helps pair divide insns during scheduling. */
30514 static int divide_cnt;
30515 /* The following variable helps pair and alternate vector and vector load
30516 insns during scheduling. */
30517 static int vec_pairing;
30520 /* Power4 load update and store update instructions are cracked into a
30521 load or store and an integer insn which are executed in the same cycle.
30522 Branches have their own dispatch slot which does not count against the
30523 GCC issue rate, but it changes the program flow so there are no other
30524 instructions to issue in this cycle. */
30526 static int
30527 rs6000_variable_issue_1 (rtx_insn *insn, int more)
30529 last_scheduled_insn = insn;
30530 if (GET_CODE (PATTERN (insn)) == USE
30531 || GET_CODE (PATTERN (insn)) == CLOBBER)
30533 cached_can_issue_more = more;
30534 return cached_can_issue_more;
30537 if (insn_terminates_group_p (insn, current_group))
30539 cached_can_issue_more = 0;
30540 return cached_can_issue_more;
30543 /* If no reservation, but reach here */
30544 if (recog_memoized (insn) < 0)
30545 return more;
30547 if (rs6000_sched_groups)
30549 if (is_microcoded_insn (insn))
30550 cached_can_issue_more = 0;
30551 else if (is_cracked_insn (insn))
30552 cached_can_issue_more = more > 2 ? more - 2 : 0;
30553 else
30554 cached_can_issue_more = more - 1;
30556 return cached_can_issue_more;
30559 if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn))
30560 return 0;
30562 cached_can_issue_more = more - 1;
30563 return cached_can_issue_more;
30566 static int
30567 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
30569 int r = rs6000_variable_issue_1 (insn, more);
30570 if (verbose)
30571 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
30572 return r;
30575 /* Adjust the cost of a scheduling dependency. Return the new cost of
30576 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
30578 static int
30579 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
30580 unsigned int)
30582 enum attr_type attr_type;
30584 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
30585 return cost;
30587 switch (dep_type)
30589 case REG_DEP_TRUE:
30591 /* Data dependency; DEP_INSN writes a register that INSN reads
30592 some cycles later. */
30594 /* Separate a load from a narrower, dependent store. */
30595 if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9)
30596 && GET_CODE (PATTERN (insn)) == SET
30597 && GET_CODE (PATTERN (dep_insn)) == SET
30598 && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
30599 && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
30600 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
30601 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
30602 return cost + 14;
30604 attr_type = get_attr_type (insn);
30606 switch (attr_type)
30608 case TYPE_JMPREG:
30609 /* Tell the first scheduling pass about the latency between
30610 a mtctr and bctr (and mtlr and br/blr). The first
30611 scheduling pass will not know about this latency since
30612 the mtctr instruction, which has the latency associated
30613 to it, will be generated by reload. */
30614 return 4;
30615 case TYPE_BRANCH:
30616 /* Leave some extra cycles between a compare and its
30617 dependent branch, to inhibit expensive mispredicts. */
30618 if ((rs6000_tune == PROCESSOR_PPC603
30619 || rs6000_tune == PROCESSOR_PPC604
30620 || rs6000_tune == PROCESSOR_PPC604e
30621 || rs6000_tune == PROCESSOR_PPC620
30622 || rs6000_tune == PROCESSOR_PPC630
30623 || rs6000_tune == PROCESSOR_PPC750
30624 || rs6000_tune == PROCESSOR_PPC7400
30625 || rs6000_tune == PROCESSOR_PPC7450
30626 || rs6000_tune == PROCESSOR_PPCE5500
30627 || rs6000_tune == PROCESSOR_PPCE6500
30628 || rs6000_tune == PROCESSOR_POWER4
30629 || rs6000_tune == PROCESSOR_POWER5
30630 || rs6000_tune == PROCESSOR_POWER7
30631 || rs6000_tune == PROCESSOR_POWER8
30632 || rs6000_tune == PROCESSOR_POWER9
30633 || rs6000_tune == PROCESSOR_CELL)
30634 && recog_memoized (dep_insn)
30635 && (INSN_CODE (dep_insn) >= 0))
30637 switch (get_attr_type (dep_insn))
30639 case TYPE_CMP:
30640 case TYPE_FPCOMPARE:
30641 case TYPE_CR_LOGICAL:
30642 return cost + 2;
30643 case TYPE_EXTS:
30644 case TYPE_MUL:
30645 if (get_attr_dot (dep_insn) == DOT_YES)
30646 return cost + 2;
30647 else
30648 break;
30649 case TYPE_SHIFT:
30650 if (get_attr_dot (dep_insn) == DOT_YES
30651 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
30652 return cost + 2;
30653 else
30654 break;
30655 default:
30656 break;
30658 break;
30660 case TYPE_STORE:
30661 case TYPE_FPSTORE:
30662 if ((rs6000_tune == PROCESSOR_POWER6)
30663 && recog_memoized (dep_insn)
30664 && (INSN_CODE (dep_insn) >= 0))
30667 if (GET_CODE (PATTERN (insn)) != SET)
30668 /* If this happens, we have to extend this to schedule
30669 optimally. Return default for now. */
30670 return cost;
30672 /* Adjust the cost for the case where the value written
30673 by a fixed point operation is used as the address
30674 gen value on a store. */
30675 switch (get_attr_type (dep_insn))
30677 case TYPE_LOAD:
30678 case TYPE_CNTLZ:
30680 if (! rs6000_store_data_bypass_p (dep_insn, insn))
30681 return get_attr_sign_extend (dep_insn)
30682 == SIGN_EXTEND_YES ? 6 : 4;
30683 break;
30685 case TYPE_SHIFT:
30687 if (! rs6000_store_data_bypass_p (dep_insn, insn))
30688 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
30689 6 : 3;
30690 break;
30692 case TYPE_INTEGER:
30693 case TYPE_ADD:
30694 case TYPE_LOGICAL:
30695 case TYPE_EXTS:
30696 case TYPE_INSERT:
30698 if (! rs6000_store_data_bypass_p (dep_insn, insn))
30699 return 3;
30700 break;
30702 case TYPE_STORE:
30703 case TYPE_FPLOAD:
30704 case TYPE_FPSTORE:
30706 if (get_attr_update (dep_insn) == UPDATE_YES
30707 && ! rs6000_store_data_bypass_p (dep_insn, insn))
30708 return 3;
30709 break;
30711 case TYPE_MUL:
30713 if (! rs6000_store_data_bypass_p (dep_insn, insn))
30714 return 17;
30715 break;
30717 case TYPE_DIV:
30719 if (! rs6000_store_data_bypass_p (dep_insn, insn))
30720 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
30721 break;
30723 default:
30724 break;
30727 break;
30729 case TYPE_LOAD:
30730 if ((rs6000_tune == PROCESSOR_POWER6)
30731 && recog_memoized (dep_insn)
30732 && (INSN_CODE (dep_insn) >= 0))
30735 /* Adjust the cost for the case where the value written
30736 by a fixed point instruction is used within the address
30737 gen portion of a subsequent load(u)(x) */
30738 switch (get_attr_type (dep_insn))
30740 case TYPE_LOAD:
30741 case TYPE_CNTLZ:
30743 if (set_to_load_agen (dep_insn, insn))
30744 return get_attr_sign_extend (dep_insn)
30745 == SIGN_EXTEND_YES ? 6 : 4;
30746 break;
30748 case TYPE_SHIFT:
30750 if (set_to_load_agen (dep_insn, insn))
30751 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
30752 6 : 3;
30753 break;
30755 case TYPE_INTEGER:
30756 case TYPE_ADD:
30757 case TYPE_LOGICAL:
30758 case TYPE_EXTS:
30759 case TYPE_INSERT:
30761 if (set_to_load_agen (dep_insn, insn))
30762 return 3;
30763 break;
30765 case TYPE_STORE:
30766 case TYPE_FPLOAD:
30767 case TYPE_FPSTORE:
30769 if (get_attr_update (dep_insn) == UPDATE_YES
30770 && set_to_load_agen (dep_insn, insn))
30771 return 3;
30772 break;
30774 case TYPE_MUL:
30776 if (set_to_load_agen (dep_insn, insn))
30777 return 17;
30778 break;
30780 case TYPE_DIV:
30782 if (set_to_load_agen (dep_insn, insn))
30783 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
30784 break;
30786 default:
30787 break;
30790 break;
30792 case TYPE_FPLOAD:
30793 if ((rs6000_tune == PROCESSOR_POWER6)
30794 && get_attr_update (insn) == UPDATE_NO
30795 && recog_memoized (dep_insn)
30796 && (INSN_CODE (dep_insn) >= 0)
30797 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
30798 return 2;
30800 default:
30801 break;
30804 /* Fall out to return default cost. */
30806 break;
30808 case REG_DEP_OUTPUT:
30809 /* Output dependency; DEP_INSN writes a register that INSN writes some
30810 cycles later. */
30811 if ((rs6000_tune == PROCESSOR_POWER6)
30812 && recog_memoized (dep_insn)
30813 && (INSN_CODE (dep_insn) >= 0))
30815 attr_type = get_attr_type (insn);
30817 switch (attr_type)
30819 case TYPE_FP:
30820 case TYPE_FPSIMPLE:
30821 if (get_attr_type (dep_insn) == TYPE_FP
30822 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
30823 return 1;
30824 break;
30825 case TYPE_FPLOAD:
30826 if (get_attr_update (insn) == UPDATE_NO
30827 && get_attr_type (dep_insn) == TYPE_MFFGPR)
30828 return 2;
30829 break;
30830 default:
30831 break;
30834 /* Fall through, no cost for output dependency. */
30835 /* FALLTHRU */
30837 case REG_DEP_ANTI:
30838 /* Anti dependency; DEP_INSN reads a register that INSN writes some
30839 cycles later. */
30840 return 0;
30842 default:
30843 gcc_unreachable ();
30846 return cost;
30849 /* Debug version of rs6000_adjust_cost. */
30851 static int
30852 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
30853 int cost, unsigned int dw)
30855 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
30857 if (ret != cost)
30859 const char *dep;
30861 switch (dep_type)
30863 default: dep = "unknown depencency"; break;
30864 case REG_DEP_TRUE: dep = "data dependency"; break;
30865 case REG_DEP_OUTPUT: dep = "output dependency"; break;
30866 case REG_DEP_ANTI: dep = "anti depencency"; break;
30869 fprintf (stderr,
30870 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
30871 "%s, insn:\n", ret, cost, dep);
30873 debug_rtx (insn);
30876 return ret;
30879 /* The function returns a true if INSN is microcoded.
30880 Return false otherwise. */
30882 static bool
30883 is_microcoded_insn (rtx_insn *insn)
30885 if (!insn || !NONDEBUG_INSN_P (insn)
30886 || GET_CODE (PATTERN (insn)) == USE
30887 || GET_CODE (PATTERN (insn)) == CLOBBER)
30888 return false;
30890 if (rs6000_tune == PROCESSOR_CELL)
30891 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
30893 if (rs6000_sched_groups
30894 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
30896 enum attr_type type = get_attr_type (insn);
30897 if ((type == TYPE_LOAD
30898 && get_attr_update (insn) == UPDATE_YES
30899 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
30900 || ((type == TYPE_LOAD || type == TYPE_STORE)
30901 && get_attr_update (insn) == UPDATE_YES
30902 && get_attr_indexed (insn) == INDEXED_YES)
30903 || type == TYPE_MFCR)
30904 return true;
30907 return false;
30910 /* The function returns true if INSN is cracked into 2 instructions
30911 by the processor (and therefore occupies 2 issue slots). */
30913 static bool
30914 is_cracked_insn (rtx_insn *insn)
30916 if (!insn || !NONDEBUG_INSN_P (insn)
30917 || GET_CODE (PATTERN (insn)) == USE
30918 || GET_CODE (PATTERN (insn)) == CLOBBER)
30919 return false;
30921 if (rs6000_sched_groups
30922 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
30924 enum attr_type type = get_attr_type (insn);
30925 if ((type == TYPE_LOAD
30926 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
30927 && get_attr_update (insn) == UPDATE_NO)
30928 || (type == TYPE_LOAD
30929 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
30930 && get_attr_update (insn) == UPDATE_YES
30931 && get_attr_indexed (insn) == INDEXED_NO)
30932 || (type == TYPE_STORE
30933 && get_attr_update (insn) == UPDATE_YES
30934 && get_attr_indexed (insn) == INDEXED_NO)
30935 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
30936 && get_attr_update (insn) == UPDATE_YES)
30937 || (type == TYPE_CR_LOGICAL
30938 && get_attr_cr_logical_3op (insn) == CR_LOGICAL_3OP_YES)
30939 || (type == TYPE_EXTS
30940 && get_attr_dot (insn) == DOT_YES)
30941 || (type == TYPE_SHIFT
30942 && get_attr_dot (insn) == DOT_YES
30943 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
30944 || (type == TYPE_MUL
30945 && get_attr_dot (insn) == DOT_YES)
30946 || type == TYPE_DIV
30947 || (type == TYPE_INSERT
30948 && get_attr_size (insn) == SIZE_32))
30949 return true;
30952 return false;
30955 /* The function returns true if INSN can be issued only from
30956 the branch slot. */
30958 static bool
30959 is_branch_slot_insn (rtx_insn *insn)
30961 if (!insn || !NONDEBUG_INSN_P (insn)
30962 || GET_CODE (PATTERN (insn)) == USE
30963 || GET_CODE (PATTERN (insn)) == CLOBBER)
30964 return false;
30966 if (rs6000_sched_groups)
30968 enum attr_type type = get_attr_type (insn);
30969 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
30970 return true;
30971 return false;
30974 return false;
30977 /* The function returns true if out_inst sets a value that is
30978 used in the address generation computation of in_insn */
30979 static bool
30980 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
30982 rtx out_set, in_set;
30984 /* For performance reasons, only handle the simple case where
30985 both loads are a single_set. */
30986 out_set = single_set (out_insn);
30987 if (out_set)
30989 in_set = single_set (in_insn);
30990 if (in_set)
30991 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
30994 return false;
30997 /* Try to determine base/offset/size parts of the given MEM.
30998 Return true if successful, false if all the values couldn't
30999 be determined.
31001 This function only looks for REG or REG+CONST address forms.
31002 REG+REG address form will return false. */
31004 static bool
31005 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
31006 HOST_WIDE_INT *size)
31008 rtx addr_rtx;
31009 if MEM_SIZE_KNOWN_P (mem)
31010 *size = MEM_SIZE (mem);
31011 else
31012 return false;
31014 addr_rtx = (XEXP (mem, 0));
31015 if (GET_CODE (addr_rtx) == PRE_MODIFY)
31016 addr_rtx = XEXP (addr_rtx, 1);
31018 *offset = 0;
31019 while (GET_CODE (addr_rtx) == PLUS
31020 && CONST_INT_P (XEXP (addr_rtx, 1)))
31022 *offset += INTVAL (XEXP (addr_rtx, 1));
31023 addr_rtx = XEXP (addr_rtx, 0);
31025 if (!REG_P (addr_rtx))
31026 return false;
31028 *base = addr_rtx;
31029 return true;
31032 /* The function returns true if the target storage location of
31033 mem1 is adjacent to the target storage location of mem2 */
31034 /* Return 1 if memory locations are adjacent. */
31036 static bool
31037 adjacent_mem_locations (rtx mem1, rtx mem2)
31039 rtx reg1, reg2;
31040 HOST_WIDE_INT off1, size1, off2, size2;
31042 if (get_memref_parts (mem1, &reg1, &off1, &size1)
31043 && get_memref_parts (mem2, &reg2, &off2, &size2))
31044 return ((REGNO (reg1) == REGNO (reg2))
31045 && ((off1 + size1 == off2)
31046 || (off2 + size2 == off1)));
31048 return false;
31051 /* This function returns true if it can be determined that the two MEM
31052 locations overlap by at least 1 byte based on base reg/offset/size. */
31054 static bool
31055 mem_locations_overlap (rtx mem1, rtx mem2)
31057 rtx reg1, reg2;
31058 HOST_WIDE_INT off1, size1, off2, size2;
31060 if (get_memref_parts (mem1, &reg1, &off1, &size1)
31061 && get_memref_parts (mem2, &reg2, &off2, &size2))
31062 return ((REGNO (reg1) == REGNO (reg2))
31063 && (((off1 <= off2) && (off1 + size1 > off2))
31064 || ((off2 <= off1) && (off2 + size2 > off1))));
31066 return false;
31069 /* A C statement (sans semicolon) to update the integer scheduling
31070 priority INSN_PRIORITY (INSN). Increase the priority to execute the
31071 INSN earlier, reduce the priority to execute INSN later. Do not
31072 define this macro if you do not need to adjust the scheduling
31073 priorities of insns. */
31075 static int
31076 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
31078 rtx load_mem, str_mem;
31079 /* On machines (like the 750) which have asymmetric integer units,
31080 where one integer unit can do multiply and divides and the other
31081 can't, reduce the priority of multiply/divide so it is scheduled
31082 before other integer operations. */
31084 #if 0
31085 if (! INSN_P (insn))
31086 return priority;
31088 if (GET_CODE (PATTERN (insn)) == USE)
31089 return priority;
31091 switch (rs6000_tune) {
31092 case PROCESSOR_PPC750:
31093 switch (get_attr_type (insn))
31095 default:
31096 break;
31098 case TYPE_MUL:
31099 case TYPE_DIV:
31100 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
31101 priority, priority);
31102 if (priority >= 0 && priority < 0x01000000)
31103 priority >>= 3;
31104 break;
31107 #endif
31109 if (insn_must_be_first_in_group (insn)
31110 && reload_completed
31111 && current_sched_info->sched_max_insns_priority
31112 && rs6000_sched_restricted_insns_priority)
31115 /* Prioritize insns that can be dispatched only in the first
31116 dispatch slot. */
31117 if (rs6000_sched_restricted_insns_priority == 1)
31118 /* Attach highest priority to insn. This means that in
31119 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
31120 precede 'priority' (critical path) considerations. */
31121 return current_sched_info->sched_max_insns_priority;
31122 else if (rs6000_sched_restricted_insns_priority == 2)
31123 /* Increase priority of insn by a minimal amount. This means that in
31124 haifa-sched.c:ready_sort(), only 'priority' (critical path)
31125 considerations precede dispatch-slot restriction considerations. */
31126 return (priority + 1);
31129 if (rs6000_tune == PROCESSOR_POWER6
31130 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
31131 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
31132 /* Attach highest priority to insn if the scheduler has just issued two
31133 stores and this instruction is a load, or two loads and this instruction
31134 is a store. Power6 wants loads and stores scheduled alternately
31135 when possible */
31136 return current_sched_info->sched_max_insns_priority;
31138 return priority;
31141 /* Return true if the instruction is nonpipelined on the Cell. */
31142 static bool
31143 is_nonpipeline_insn (rtx_insn *insn)
31145 enum attr_type type;
31146 if (!insn || !NONDEBUG_INSN_P (insn)
31147 || GET_CODE (PATTERN (insn)) == USE
31148 || GET_CODE (PATTERN (insn)) == CLOBBER)
31149 return false;
31151 type = get_attr_type (insn);
31152 if (type == TYPE_MUL
31153 || type == TYPE_DIV
31154 || type == TYPE_SDIV
31155 || type == TYPE_DDIV
31156 || type == TYPE_SSQRT
31157 || type == TYPE_DSQRT
31158 || type == TYPE_MFCR
31159 || type == TYPE_MFCRF
31160 || type == TYPE_MFJMPR)
31162 return true;
31164 return false;
31168 /* Return how many instructions the machine can issue per cycle. */
31170 static int
31171 rs6000_issue_rate (void)
31173 /* Unless scheduling for register pressure, use issue rate of 1 for
31174 first scheduling pass to decrease degradation. */
31175 if (!reload_completed && !flag_sched_pressure)
31176 return 1;
31178 switch (rs6000_tune) {
31179 case PROCESSOR_RS64A:
31180 case PROCESSOR_PPC601: /* ? */
31181 case PROCESSOR_PPC7450:
31182 return 3;
31183 case PROCESSOR_PPC440:
31184 case PROCESSOR_PPC603:
31185 case PROCESSOR_PPC750:
31186 case PROCESSOR_PPC7400:
31187 case PROCESSOR_PPC8540:
31188 case PROCESSOR_PPC8548:
31189 case PROCESSOR_CELL:
31190 case PROCESSOR_PPCE300C2:
31191 case PROCESSOR_PPCE300C3:
31192 case PROCESSOR_PPCE500MC:
31193 case PROCESSOR_PPCE500MC64:
31194 case PROCESSOR_PPCE5500:
31195 case PROCESSOR_PPCE6500:
31196 case PROCESSOR_TITAN:
31197 return 2;
31198 case PROCESSOR_PPC476:
31199 case PROCESSOR_PPC604:
31200 case PROCESSOR_PPC604e:
31201 case PROCESSOR_PPC620:
31202 case PROCESSOR_PPC630:
31203 return 4;
31204 case PROCESSOR_POWER4:
31205 case PROCESSOR_POWER5:
31206 case PROCESSOR_POWER6:
31207 case PROCESSOR_POWER7:
31208 return 5;
31209 case PROCESSOR_POWER8:
31210 return 7;
31211 case PROCESSOR_POWER9:
31212 return 6;
31213 default:
31214 return 1;
31218 /* Return how many instructions to look ahead for better insn
31219 scheduling. */
31221 static int
31222 rs6000_use_sched_lookahead (void)
31224 switch (rs6000_tune)
31226 case PROCESSOR_PPC8540:
31227 case PROCESSOR_PPC8548:
31228 return 4;
31230 case PROCESSOR_CELL:
31231 return (reload_completed ? 8 : 0);
31233 default:
31234 return 0;
31238 /* We are choosing insn from the ready queue. Return zero if INSN can be
31239 chosen. */
31240 static int
31241 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
31243 if (ready_index == 0)
31244 return 0;
31246 if (rs6000_tune != PROCESSOR_CELL)
31247 return 0;
31249 gcc_assert (insn != NULL_RTX && INSN_P (insn));
31251 if (!reload_completed
31252 || is_nonpipeline_insn (insn)
31253 || is_microcoded_insn (insn))
31254 return 1;
31256 return 0;
31259 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
31260 and return true. */
31262 static bool
31263 find_mem_ref (rtx pat, rtx *mem_ref)
31265 const char * fmt;
31266 int i, j;
31268 /* stack_tie does not produce any real memory traffic. */
31269 if (tie_operand (pat, VOIDmode))
31270 return false;
31272 if (GET_CODE (pat) == MEM)
31274 *mem_ref = pat;
31275 return true;
31278 /* Recursively process the pattern. */
31279 fmt = GET_RTX_FORMAT (GET_CODE (pat));
31281 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
31283 if (fmt[i] == 'e')
31285 if (find_mem_ref (XEXP (pat, i), mem_ref))
31286 return true;
31288 else if (fmt[i] == 'E')
31289 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
31291 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
31292 return true;
31296 return false;
31299 /* Determine if PAT is a PATTERN of a load insn. */
31301 static bool
31302 is_load_insn1 (rtx pat, rtx *load_mem)
31304 if (!pat || pat == NULL_RTX)
31305 return false;
31307 if (GET_CODE (pat) == SET)
31308 return find_mem_ref (SET_SRC (pat), load_mem);
31310 if (GET_CODE (pat) == PARALLEL)
31312 int i;
31314 for (i = 0; i < XVECLEN (pat, 0); i++)
31315 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
31316 return true;
31319 return false;
31322 /* Determine if INSN loads from memory. */
31324 static bool
31325 is_load_insn (rtx insn, rtx *load_mem)
31327 if (!insn || !INSN_P (insn))
31328 return false;
31330 if (CALL_P (insn))
31331 return false;
31333 return is_load_insn1 (PATTERN (insn), load_mem);
31336 /* Determine if PAT is a PATTERN of a store insn. */
31338 static bool
31339 is_store_insn1 (rtx pat, rtx *str_mem)
31341 if (!pat || pat == NULL_RTX)
31342 return false;
31344 if (GET_CODE (pat) == SET)
31345 return find_mem_ref (SET_DEST (pat), str_mem);
31347 if (GET_CODE (pat) == PARALLEL)
31349 int i;
31351 for (i = 0; i < XVECLEN (pat, 0); i++)
31352 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
31353 return true;
31356 return false;
31359 /* Determine if INSN stores to memory. */
31361 static bool
31362 is_store_insn (rtx insn, rtx *str_mem)
31364 if (!insn || !INSN_P (insn))
31365 return false;
31367 return is_store_insn1 (PATTERN (insn), str_mem);
31370 /* Return whether TYPE is a Power9 pairable vector instruction type. */
31372 static bool
31373 is_power9_pairable_vec_type (enum attr_type type)
31375 switch (type)
31377 case TYPE_VECSIMPLE:
31378 case TYPE_VECCOMPLEX:
31379 case TYPE_VECDIV:
31380 case TYPE_VECCMP:
31381 case TYPE_VECPERM:
31382 case TYPE_VECFLOAT:
31383 case TYPE_VECFDIV:
31384 case TYPE_VECDOUBLE:
31385 return true;
31386 default:
31387 break;
31389 return false;
31392 /* Returns whether the dependence between INSN and NEXT is considered
31393 costly by the given target. */
31395 static bool
31396 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
31398 rtx insn;
31399 rtx next;
31400 rtx load_mem, str_mem;
31402 /* If the flag is not enabled - no dependence is considered costly;
31403 allow all dependent insns in the same group.
31404 This is the most aggressive option. */
31405 if (rs6000_sched_costly_dep == no_dep_costly)
31406 return false;
31408 /* If the flag is set to 1 - a dependence is always considered costly;
31409 do not allow dependent instructions in the same group.
31410 This is the most conservative option. */
31411 if (rs6000_sched_costly_dep == all_deps_costly)
31412 return true;
31414 insn = DEP_PRO (dep);
31415 next = DEP_CON (dep);
31417 if (rs6000_sched_costly_dep == store_to_load_dep_costly
31418 && is_load_insn (next, &load_mem)
31419 && is_store_insn (insn, &str_mem))
31420 /* Prevent load after store in the same group. */
31421 return true;
31423 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
31424 && is_load_insn (next, &load_mem)
31425 && is_store_insn (insn, &str_mem)
31426 && DEP_TYPE (dep) == REG_DEP_TRUE
31427 && mem_locations_overlap(str_mem, load_mem))
31428 /* Prevent load after store in the same group if it is a true
31429 dependence. */
31430 return true;
31432 /* The flag is set to X; dependences with latency >= X are considered costly,
31433 and will not be scheduled in the same group. */
31434 if (rs6000_sched_costly_dep <= max_dep_latency
31435 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
31436 return true;
31438 return false;
31441 /* Return the next insn after INSN that is found before TAIL is reached,
31442 skipping any "non-active" insns - insns that will not actually occupy
31443 an issue slot. Return NULL_RTX if such an insn is not found. */
31445 static rtx_insn *
31446 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
31448 if (insn == NULL_RTX || insn == tail)
31449 return NULL;
31451 while (1)
31453 insn = NEXT_INSN (insn);
31454 if (insn == NULL_RTX || insn == tail)
31455 return NULL;
31457 if (CALL_P (insn)
31458 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
31459 || (NONJUMP_INSN_P (insn)
31460 && GET_CODE (PATTERN (insn)) != USE
31461 && GET_CODE (PATTERN (insn)) != CLOBBER
31462 && INSN_CODE (insn) != CODE_FOR_stack_tie))
31463 break;
31465 return insn;
31468 /* Do Power9 specific sched_reorder2 reordering of ready list. */
31470 static int
31471 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
31473 int pos;
31474 int i;
31475 rtx_insn *tmp;
31476 enum attr_type type, type2;
31478 type = get_attr_type (last_scheduled_insn);
31480 /* Try to issue fixed point divides back-to-back in pairs so they will be
31481 routed to separate execution units and execute in parallel. */
31482 if (type == TYPE_DIV && divide_cnt == 0)
31484 /* First divide has been scheduled. */
31485 divide_cnt = 1;
31487 /* Scan the ready list looking for another divide, if found move it
31488 to the end of the list so it is chosen next. */
31489 pos = lastpos;
31490 while (pos >= 0)
31492 if (recog_memoized (ready[pos]) >= 0
31493 && get_attr_type (ready[pos]) == TYPE_DIV)
31495 tmp = ready[pos];
31496 for (i = pos; i < lastpos; i++)
31497 ready[i] = ready[i + 1];
31498 ready[lastpos] = tmp;
31499 break;
31501 pos--;
31504 else
31506 /* Last insn was the 2nd divide or not a divide, reset the counter. */
31507 divide_cnt = 0;
31509 /* The best dispatch throughput for vector and vector load insns can be
31510 achieved by interleaving a vector and vector load such that they'll
31511 dispatch to the same superslice. If this pairing cannot be achieved
31512 then it is best to pair vector insns together and vector load insns
31513 together.
31515 To aid in this pairing, vec_pairing maintains the current state with
31516 the following values:
31518 0 : Initial state, no vecload/vector pairing has been started.
31520 1 : A vecload or vector insn has been issued and a candidate for
31521 pairing has been found and moved to the end of the ready
31522 list. */
31523 if (type == TYPE_VECLOAD)
31525 /* Issued a vecload. */
31526 if (vec_pairing == 0)
31528 int vecload_pos = -1;
31529 /* We issued a single vecload, look for a vector insn to pair it
31530 with. If one isn't found, try to pair another vecload. */
31531 pos = lastpos;
31532 while (pos >= 0)
31534 if (recog_memoized (ready[pos]) >= 0)
31536 type2 = get_attr_type (ready[pos]);
31537 if (is_power9_pairable_vec_type (type2))
31539 /* Found a vector insn to pair with, move it to the
31540 end of the ready list so it is scheduled next. */
31541 tmp = ready[pos];
31542 for (i = pos; i < lastpos; i++)
31543 ready[i] = ready[i + 1];
31544 ready[lastpos] = tmp;
31545 vec_pairing = 1;
31546 return cached_can_issue_more;
31548 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
31549 /* Remember position of first vecload seen. */
31550 vecload_pos = pos;
31552 pos--;
31554 if (vecload_pos >= 0)
31556 /* Didn't find a vector to pair with but did find a vecload,
31557 move it to the end of the ready list. */
31558 tmp = ready[vecload_pos];
31559 for (i = vecload_pos; i < lastpos; i++)
31560 ready[i] = ready[i + 1];
31561 ready[lastpos] = tmp;
31562 vec_pairing = 1;
31563 return cached_can_issue_more;
31567 else if (is_power9_pairable_vec_type (type))
31569 /* Issued a vector operation. */
31570 if (vec_pairing == 0)
31572 int vec_pos = -1;
31573 /* We issued a single vector insn, look for a vecload to pair it
31574 with. If one isn't found, try to pair another vector. */
31575 pos = lastpos;
31576 while (pos >= 0)
31578 if (recog_memoized (ready[pos]) >= 0)
31580 type2 = get_attr_type (ready[pos]);
31581 if (type2 == TYPE_VECLOAD)
31583 /* Found a vecload insn to pair with, move it to the
31584 end of the ready list so it is scheduled next. */
31585 tmp = ready[pos];
31586 for (i = pos; i < lastpos; i++)
31587 ready[i] = ready[i + 1];
31588 ready[lastpos] = tmp;
31589 vec_pairing = 1;
31590 return cached_can_issue_more;
31592 else if (is_power9_pairable_vec_type (type2)
31593 && vec_pos == -1)
31594 /* Remember position of first vector insn seen. */
31595 vec_pos = pos;
31597 pos--;
31599 if (vec_pos >= 0)
31601 /* Didn't find a vecload to pair with but did find a vector
31602 insn, move it to the end of the ready list. */
31603 tmp = ready[vec_pos];
31604 for (i = vec_pos; i < lastpos; i++)
31605 ready[i] = ready[i + 1];
31606 ready[lastpos] = tmp;
31607 vec_pairing = 1;
31608 return cached_can_issue_more;
31613 /* We've either finished a vec/vecload pair, couldn't find an insn to
31614 continue the current pair, or the last insn had nothing to do with
31615 with pairing. In any case, reset the state. */
31616 vec_pairing = 0;
31619 return cached_can_issue_more;
31622 /* We are about to begin issuing insns for this clock cycle. */
31624 static int
31625 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
31626 rtx_insn **ready ATTRIBUTE_UNUSED,
31627 int *pn_ready ATTRIBUTE_UNUSED,
31628 int clock_var ATTRIBUTE_UNUSED)
31630 int n_ready = *pn_ready;
31632 if (sched_verbose)
31633 fprintf (dump, "// rs6000_sched_reorder :\n");
31635 /* Reorder the ready list, if the second to last ready insn
31636 is a nonepipeline insn. */
31637 if (rs6000_tune == PROCESSOR_CELL && n_ready > 1)
31639 if (is_nonpipeline_insn (ready[n_ready - 1])
31640 && (recog_memoized (ready[n_ready - 2]) > 0))
31641 /* Simply swap first two insns. */
31642 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
31645 if (rs6000_tune == PROCESSOR_POWER6)
31646 load_store_pendulum = 0;
31648 return rs6000_issue_rate ();
31651 /* Like rs6000_sched_reorder, but called after issuing each insn. */
31653 static int
31654 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
31655 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
31657 if (sched_verbose)
31658 fprintf (dump, "// rs6000_sched_reorder2 :\n");
31660 /* For Power6, we need to handle some special cases to try and keep the
31661 store queue from overflowing and triggering expensive flushes.
31663 This code monitors how load and store instructions are being issued
31664 and skews the ready list one way or the other to increase the likelihood
31665 that a desired instruction is issued at the proper time.
31667 A couple of things are done. First, we maintain a "load_store_pendulum"
31668 to track the current state of load/store issue.
31670 - If the pendulum is at zero, then no loads or stores have been
31671 issued in the current cycle so we do nothing.
31673 - If the pendulum is 1, then a single load has been issued in this
31674 cycle and we attempt to locate another load in the ready list to
31675 issue with it.
31677 - If the pendulum is -2, then two stores have already been
31678 issued in this cycle, so we increase the priority of the first load
31679 in the ready list to increase it's likelihood of being chosen first
31680 in the next cycle.
31682 - If the pendulum is -1, then a single store has been issued in this
31683 cycle and we attempt to locate another store in the ready list to
31684 issue with it, preferring a store to an adjacent memory location to
31685 facilitate store pairing in the store queue.
31687 - If the pendulum is 2, then two loads have already been
31688 issued in this cycle, so we increase the priority of the first store
31689 in the ready list to increase it's likelihood of being chosen first
31690 in the next cycle.
31692 - If the pendulum < -2 or > 2, then do nothing.
31694 Note: This code covers the most common scenarios. There exist non
31695 load/store instructions which make use of the LSU and which
31696 would need to be accounted for to strictly model the behavior
31697 of the machine. Those instructions are currently unaccounted
31698 for to help minimize compile time overhead of this code.
31700 if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
31702 int pos;
31703 int i;
31704 rtx_insn *tmp;
31705 rtx load_mem, str_mem;
31707 if (is_store_insn (last_scheduled_insn, &str_mem))
31708 /* Issuing a store, swing the load_store_pendulum to the left */
31709 load_store_pendulum--;
31710 else if (is_load_insn (last_scheduled_insn, &load_mem))
31711 /* Issuing a load, swing the load_store_pendulum to the right */
31712 load_store_pendulum++;
31713 else
31714 return cached_can_issue_more;
31716 /* If the pendulum is balanced, or there is only one instruction on
31717 the ready list, then all is well, so return. */
31718 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
31719 return cached_can_issue_more;
31721 if (load_store_pendulum == 1)
31723 /* A load has been issued in this cycle. Scan the ready list
31724 for another load to issue with it */
31725 pos = *pn_ready-1;
31727 while (pos >= 0)
31729 if (is_load_insn (ready[pos], &load_mem))
31731 /* Found a load. Move it to the head of the ready list,
31732 and adjust it's priority so that it is more likely to
31733 stay there */
31734 tmp = ready[pos];
31735 for (i=pos; i<*pn_ready-1; i++)
31736 ready[i] = ready[i + 1];
31737 ready[*pn_ready-1] = tmp;
31739 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
31740 INSN_PRIORITY (tmp)++;
31741 break;
31743 pos--;
31746 else if (load_store_pendulum == -2)
31748 /* Two stores have been issued in this cycle. Increase the
31749 priority of the first load in the ready list to favor it for
31750 issuing in the next cycle. */
31751 pos = *pn_ready-1;
31753 while (pos >= 0)
31755 if (is_load_insn (ready[pos], &load_mem)
31756 && !sel_sched_p ()
31757 && INSN_PRIORITY_KNOWN (ready[pos]))
31759 INSN_PRIORITY (ready[pos])++;
31761 /* Adjust the pendulum to account for the fact that a load
31762 was found and increased in priority. This is to prevent
31763 increasing the priority of multiple loads */
31764 load_store_pendulum--;
31766 break;
31768 pos--;
31771 else if (load_store_pendulum == -1)
31773 /* A store has been issued in this cycle. Scan the ready list for
31774 another store to issue with it, preferring a store to an adjacent
31775 memory location */
31776 int first_store_pos = -1;
31778 pos = *pn_ready-1;
31780 while (pos >= 0)
31782 if (is_store_insn (ready[pos], &str_mem))
31784 rtx str_mem2;
31785 /* Maintain the index of the first store found on the
31786 list */
31787 if (first_store_pos == -1)
31788 first_store_pos = pos;
31790 if (is_store_insn (last_scheduled_insn, &str_mem2)
31791 && adjacent_mem_locations (str_mem, str_mem2))
31793 /* Found an adjacent store. Move it to the head of the
31794 ready list, and adjust it's priority so that it is
31795 more likely to stay there */
31796 tmp = ready[pos];
31797 for (i=pos; i<*pn_ready-1; i++)
31798 ready[i] = ready[i + 1];
31799 ready[*pn_ready-1] = tmp;
31801 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
31802 INSN_PRIORITY (tmp)++;
31804 first_store_pos = -1;
31806 break;
31809 pos--;
31812 if (first_store_pos >= 0)
31814 /* An adjacent store wasn't found, but a non-adjacent store was,
31815 so move the non-adjacent store to the front of the ready
31816 list, and adjust its priority so that it is more likely to
31817 stay there. */
31818 tmp = ready[first_store_pos];
31819 for (i=first_store_pos; i<*pn_ready-1; i++)
31820 ready[i] = ready[i + 1];
31821 ready[*pn_ready-1] = tmp;
31822 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
31823 INSN_PRIORITY (tmp)++;
31826 else if (load_store_pendulum == 2)
31828 /* Two loads have been issued in this cycle. Increase the priority
31829 of the first store in the ready list to favor it for issuing in
31830 the next cycle. */
31831 pos = *pn_ready-1;
31833 while (pos >= 0)
31835 if (is_store_insn (ready[pos], &str_mem)
31836 && !sel_sched_p ()
31837 && INSN_PRIORITY_KNOWN (ready[pos]))
31839 INSN_PRIORITY (ready[pos])++;
31841 /* Adjust the pendulum to account for the fact that a store
31842 was found and increased in priority. This is to prevent
31843 increasing the priority of multiple stores */
31844 load_store_pendulum++;
31846 break;
31848 pos--;
31853 /* Do Power9 dependent reordering if necessary. */
31854 if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
31855 && recog_memoized (last_scheduled_insn) >= 0)
31856 return power9_sched_reorder2 (ready, *pn_ready - 1);
31858 return cached_can_issue_more;
31861 /* Return whether the presence of INSN causes a dispatch group termination
31862 of group WHICH_GROUP.
31864 If WHICH_GROUP == current_group, this function will return true if INSN
31865 causes the termination of the current group (i.e, the dispatch group to
31866 which INSN belongs). This means that INSN will be the last insn in the
31867 group it belongs to.
31869 If WHICH_GROUP == previous_group, this function will return true if INSN
31870 causes the termination of the previous group (i.e, the dispatch group that
31871 precedes the group to which INSN belongs). This means that INSN will be
31872 the first insn in the group it belongs to). */
31874 static bool
31875 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
31877 bool first, last;
31879 if (! insn)
31880 return false;
31882 first = insn_must_be_first_in_group (insn);
31883 last = insn_must_be_last_in_group (insn);
31885 if (first && last)
31886 return true;
31888 if (which_group == current_group)
31889 return last;
31890 else if (which_group == previous_group)
31891 return first;
31893 return false;
31897 static bool
31898 insn_must_be_first_in_group (rtx_insn *insn)
31900 enum attr_type type;
31902 if (!insn
31903 || NOTE_P (insn)
31904 || DEBUG_INSN_P (insn)
31905 || GET_CODE (PATTERN (insn)) == USE
31906 || GET_CODE (PATTERN (insn)) == CLOBBER)
31907 return false;
31909 switch (rs6000_tune)
31911 case PROCESSOR_POWER5:
31912 if (is_cracked_insn (insn))
31913 return true;
31914 /* FALLTHRU */
31915 case PROCESSOR_POWER4:
31916 if (is_microcoded_insn (insn))
31917 return true;
31919 if (!rs6000_sched_groups)
31920 return false;
31922 type = get_attr_type (insn);
31924 switch (type)
31926 case TYPE_MFCR:
31927 case TYPE_MFCRF:
31928 case TYPE_MTCR:
31929 case TYPE_CR_LOGICAL:
31930 case TYPE_MTJMPR:
31931 case TYPE_MFJMPR:
31932 case TYPE_DIV:
31933 case TYPE_LOAD_L:
31934 case TYPE_STORE_C:
31935 case TYPE_ISYNC:
31936 case TYPE_SYNC:
31937 return true;
31938 default:
31939 break;
31941 break;
31942 case PROCESSOR_POWER6:
31943 type = get_attr_type (insn);
31945 switch (type)
31947 case TYPE_EXTS:
31948 case TYPE_CNTLZ:
31949 case TYPE_TRAP:
31950 case TYPE_MUL:
31951 case TYPE_INSERT:
31952 case TYPE_FPCOMPARE:
31953 case TYPE_MFCR:
31954 case TYPE_MTCR:
31955 case TYPE_MFJMPR:
31956 case TYPE_MTJMPR:
31957 case TYPE_ISYNC:
31958 case TYPE_SYNC:
31959 case TYPE_LOAD_L:
31960 case TYPE_STORE_C:
31961 return true;
31962 case TYPE_SHIFT:
31963 if (get_attr_dot (insn) == DOT_NO
31964 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
31965 return true;
31966 else
31967 break;
31968 case TYPE_DIV:
31969 if (get_attr_size (insn) == SIZE_32)
31970 return true;
31971 else
31972 break;
31973 case TYPE_LOAD:
31974 case TYPE_STORE:
31975 case TYPE_FPLOAD:
31976 case TYPE_FPSTORE:
31977 if (get_attr_update (insn) == UPDATE_YES)
31978 return true;
31979 else
31980 break;
31981 default:
31982 break;
31984 break;
31985 case PROCESSOR_POWER7:
31986 type = get_attr_type (insn);
31988 switch (type)
31990 case TYPE_CR_LOGICAL:
31991 case TYPE_MFCR:
31992 case TYPE_MFCRF:
31993 case TYPE_MTCR:
31994 case TYPE_DIV:
31995 case TYPE_ISYNC:
31996 case TYPE_LOAD_L:
31997 case TYPE_STORE_C:
31998 case TYPE_MFJMPR:
31999 case TYPE_MTJMPR:
32000 return true;
32001 case TYPE_MUL:
32002 case TYPE_SHIFT:
32003 case TYPE_EXTS:
32004 if (get_attr_dot (insn) == DOT_YES)
32005 return true;
32006 else
32007 break;
32008 case TYPE_LOAD:
32009 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
32010 || get_attr_update (insn) == UPDATE_YES)
32011 return true;
32012 else
32013 break;
32014 case TYPE_STORE:
32015 case TYPE_FPLOAD:
32016 case TYPE_FPSTORE:
32017 if (get_attr_update (insn) == UPDATE_YES)
32018 return true;
32019 else
32020 break;
32021 default:
32022 break;
32024 break;
32025 case PROCESSOR_POWER8:
32026 type = get_attr_type (insn);
32028 switch (type)
32030 case TYPE_CR_LOGICAL:
32031 case TYPE_MFCR:
32032 case TYPE_MFCRF:
32033 case TYPE_MTCR:
32034 case TYPE_SYNC:
32035 case TYPE_ISYNC:
32036 case TYPE_LOAD_L:
32037 case TYPE_STORE_C:
32038 case TYPE_VECSTORE:
32039 case TYPE_MFJMPR:
32040 case TYPE_MTJMPR:
32041 return true;
32042 case TYPE_SHIFT:
32043 case TYPE_EXTS:
32044 case TYPE_MUL:
32045 if (get_attr_dot (insn) == DOT_YES)
32046 return true;
32047 else
32048 break;
32049 case TYPE_LOAD:
32050 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
32051 || get_attr_update (insn) == UPDATE_YES)
32052 return true;
32053 else
32054 break;
32055 case TYPE_STORE:
32056 if (get_attr_update (insn) == UPDATE_YES
32057 && get_attr_indexed (insn) == INDEXED_YES)
32058 return true;
32059 else
32060 break;
32061 default:
32062 break;
32064 break;
32065 default:
32066 break;
32069 return false;
32072 static bool
32073 insn_must_be_last_in_group (rtx_insn *insn)
32075 enum attr_type type;
32077 if (!insn
32078 || NOTE_P (insn)
32079 || DEBUG_INSN_P (insn)
32080 || GET_CODE (PATTERN (insn)) == USE
32081 || GET_CODE (PATTERN (insn)) == CLOBBER)
32082 return false;
32084 switch (rs6000_tune) {
32085 case PROCESSOR_POWER4:
32086 case PROCESSOR_POWER5:
32087 if (is_microcoded_insn (insn))
32088 return true;
32090 if (is_branch_slot_insn (insn))
32091 return true;
32093 break;
32094 case PROCESSOR_POWER6:
32095 type = get_attr_type (insn);
32097 switch (type)
32099 case TYPE_EXTS:
32100 case TYPE_CNTLZ:
32101 case TYPE_TRAP:
32102 case TYPE_MUL:
32103 case TYPE_FPCOMPARE:
32104 case TYPE_MFCR:
32105 case TYPE_MTCR:
32106 case TYPE_MFJMPR:
32107 case TYPE_MTJMPR:
32108 case TYPE_ISYNC:
32109 case TYPE_SYNC:
32110 case TYPE_LOAD_L:
32111 case TYPE_STORE_C:
32112 return true;
32113 case TYPE_SHIFT:
32114 if (get_attr_dot (insn) == DOT_NO
32115 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
32116 return true;
32117 else
32118 break;
32119 case TYPE_DIV:
32120 if (get_attr_size (insn) == SIZE_32)
32121 return true;
32122 else
32123 break;
32124 default:
32125 break;
32127 break;
32128 case PROCESSOR_POWER7:
32129 type = get_attr_type (insn);
32131 switch (type)
32133 case TYPE_ISYNC:
32134 case TYPE_SYNC:
32135 case TYPE_LOAD_L:
32136 case TYPE_STORE_C:
32137 return true;
32138 case TYPE_LOAD:
32139 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
32140 && get_attr_update (insn) == UPDATE_YES)
32141 return true;
32142 else
32143 break;
32144 case TYPE_STORE:
32145 if (get_attr_update (insn) == UPDATE_YES
32146 && get_attr_indexed (insn) == INDEXED_YES)
32147 return true;
32148 else
32149 break;
32150 default:
32151 break;
32153 break;
32154 case PROCESSOR_POWER8:
32155 type = get_attr_type (insn);
32157 switch (type)
32159 case TYPE_MFCR:
32160 case TYPE_MTCR:
32161 case TYPE_ISYNC:
32162 case TYPE_SYNC:
32163 case TYPE_LOAD_L:
32164 case TYPE_STORE_C:
32165 return true;
32166 case TYPE_LOAD:
32167 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
32168 && get_attr_update (insn) == UPDATE_YES)
32169 return true;
32170 else
32171 break;
32172 case TYPE_STORE:
32173 if (get_attr_update (insn) == UPDATE_YES
32174 && get_attr_indexed (insn) == INDEXED_YES)
32175 return true;
32176 else
32177 break;
32178 default:
32179 break;
32181 break;
32182 default:
32183 break;
32186 return false;
32189 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
32190 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
32192 static bool
32193 is_costly_group (rtx *group_insns, rtx next_insn)
32195 int i;
32196 int issue_rate = rs6000_issue_rate ();
32198 for (i = 0; i < issue_rate; i++)
32200 sd_iterator_def sd_it;
32201 dep_t dep;
32202 rtx insn = group_insns[i];
32204 if (!insn)
32205 continue;
32207 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
32209 rtx next = DEP_CON (dep);
32211 if (next == next_insn
32212 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
32213 return true;
32217 return false;
32220 /* Utility of the function redefine_groups.
32221 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
32222 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
32223 to keep it "far" (in a separate group) from GROUP_INSNS, following
32224 one of the following schemes, depending on the value of the flag
32225 -minsert_sched_nops = X:
32226 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
32227 in order to force NEXT_INSN into a separate group.
32228 (2) X < sched_finish_regroup_exact: insert exactly X nops.
32229 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
32230 insertion (has a group just ended, how many vacant issue slots remain in the
32231 last group, and how many dispatch groups were encountered so far). */
32233 static int
32234 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
32235 rtx_insn *next_insn, bool *group_end, int can_issue_more,
32236 int *group_count)
32238 rtx nop;
32239 bool force;
32240 int issue_rate = rs6000_issue_rate ();
32241 bool end = *group_end;
32242 int i;
32244 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
32245 return can_issue_more;
32247 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
32248 return can_issue_more;
32250 force = is_costly_group (group_insns, next_insn);
32251 if (!force)
32252 return can_issue_more;
32254 if (sched_verbose > 6)
32255 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
32256 *group_count ,can_issue_more);
32258 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
32260 if (*group_end)
32261 can_issue_more = 0;
32263 /* Since only a branch can be issued in the last issue_slot, it is
32264 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
32265 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
32266 in this case the last nop will start a new group and the branch
32267 will be forced to the new group. */
32268 if (can_issue_more && !is_branch_slot_insn (next_insn))
32269 can_issue_more--;
32271 /* Do we have a special group ending nop? */
32272 if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7
32273 || rs6000_tune == PROCESSOR_POWER8)
32275 nop = gen_group_ending_nop ();
32276 emit_insn_before (nop, next_insn);
32277 can_issue_more = 0;
32279 else
32280 while (can_issue_more > 0)
32282 nop = gen_nop ();
32283 emit_insn_before (nop, next_insn);
32284 can_issue_more--;
32287 *group_end = true;
32288 return 0;
32291 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
32293 int n_nops = rs6000_sched_insert_nops;
32295 /* Nops can't be issued from the branch slot, so the effective
32296 issue_rate for nops is 'issue_rate - 1'. */
32297 if (can_issue_more == 0)
32298 can_issue_more = issue_rate;
32299 can_issue_more--;
32300 if (can_issue_more == 0)
32302 can_issue_more = issue_rate - 1;
32303 (*group_count)++;
32304 end = true;
32305 for (i = 0; i < issue_rate; i++)
32307 group_insns[i] = 0;
32311 while (n_nops > 0)
32313 nop = gen_nop ();
32314 emit_insn_before (nop, next_insn);
32315 if (can_issue_more == issue_rate - 1) /* new group begins */
32316 end = false;
32317 can_issue_more--;
32318 if (can_issue_more == 0)
32320 can_issue_more = issue_rate - 1;
32321 (*group_count)++;
32322 end = true;
32323 for (i = 0; i < issue_rate; i++)
32325 group_insns[i] = 0;
32328 n_nops--;
32331 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
32332 can_issue_more++;
32334 /* Is next_insn going to start a new group? */
32335 *group_end
32336 = (end
32337 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
32338 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
32339 || (can_issue_more < issue_rate &&
32340 insn_terminates_group_p (next_insn, previous_group)));
32341 if (*group_end && end)
32342 (*group_count)--;
32344 if (sched_verbose > 6)
32345 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
32346 *group_count, can_issue_more);
32347 return can_issue_more;
32350 return can_issue_more;
32353 /* This function tries to synch the dispatch groups that the compiler "sees"
32354 with the dispatch groups that the processor dispatcher is expected to
32355 form in practice. It tries to achieve this synchronization by forcing the
32356 estimated processor grouping on the compiler (as opposed to the function
32357 'pad_goups' which tries to force the scheduler's grouping on the processor).
32359 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
32360 examines the (estimated) dispatch groups that will be formed by the processor
32361 dispatcher. It marks these group boundaries to reflect the estimated
32362 processor grouping, overriding the grouping that the scheduler had marked.
32363 Depending on the value of the flag '-minsert-sched-nops' this function can
32364 force certain insns into separate groups or force a certain distance between
32365 them by inserting nops, for example, if there exists a "costly dependence"
32366 between the insns.
32368 The function estimates the group boundaries that the processor will form as
32369 follows: It keeps track of how many vacant issue slots are available after
32370 each insn. A subsequent insn will start a new group if one of the following
32371 4 cases applies:
32372 - no more vacant issue slots remain in the current dispatch group.
32373 - only the last issue slot, which is the branch slot, is vacant, but the next
32374 insn is not a branch.
32375 - only the last 2 or less issue slots, including the branch slot, are vacant,
32376 which means that a cracked insn (which occupies two issue slots) can't be
32377 issued in this group.
32378 - less than 'issue_rate' slots are vacant, and the next insn always needs to
32379 start a new group. */
32381 static int
32382 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
32383 rtx_insn *tail)
32385 rtx_insn *insn, *next_insn;
32386 int issue_rate;
32387 int can_issue_more;
32388 int slot, i;
32389 bool group_end;
32390 int group_count = 0;
32391 rtx *group_insns;
32393 /* Initialize. */
32394 issue_rate = rs6000_issue_rate ();
32395 group_insns = XALLOCAVEC (rtx, issue_rate);
32396 for (i = 0; i < issue_rate; i++)
32398 group_insns[i] = 0;
32400 can_issue_more = issue_rate;
32401 slot = 0;
32402 insn = get_next_active_insn (prev_head_insn, tail);
32403 group_end = false;
32405 while (insn != NULL_RTX)
32407 slot = (issue_rate - can_issue_more);
32408 group_insns[slot] = insn;
32409 can_issue_more =
32410 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
32411 if (insn_terminates_group_p (insn, current_group))
32412 can_issue_more = 0;
32414 next_insn = get_next_active_insn (insn, tail);
32415 if (next_insn == NULL_RTX)
32416 return group_count + 1;
32418 /* Is next_insn going to start a new group? */
32419 group_end
32420 = (can_issue_more == 0
32421 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
32422 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
32423 || (can_issue_more < issue_rate &&
32424 insn_terminates_group_p (next_insn, previous_group)));
32426 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
32427 next_insn, &group_end, can_issue_more,
32428 &group_count);
32430 if (group_end)
32432 group_count++;
32433 can_issue_more = 0;
32434 for (i = 0; i < issue_rate; i++)
32436 group_insns[i] = 0;
32440 if (GET_MODE (next_insn) == TImode && can_issue_more)
32441 PUT_MODE (next_insn, VOIDmode);
32442 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
32443 PUT_MODE (next_insn, TImode);
32445 insn = next_insn;
32446 if (can_issue_more == 0)
32447 can_issue_more = issue_rate;
32448 } /* while */
32450 return group_count;
32453 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
32454 dispatch group boundaries that the scheduler had marked. Pad with nops
32455 any dispatch groups which have vacant issue slots, in order to force the
32456 scheduler's grouping on the processor dispatcher. The function
32457 returns the number of dispatch groups found. */
32459 static int
32460 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
32461 rtx_insn *tail)
32463 rtx_insn *insn, *next_insn;
32464 rtx nop;
32465 int issue_rate;
32466 int can_issue_more;
32467 int group_end;
32468 int group_count = 0;
32470 /* Initialize issue_rate. */
32471 issue_rate = rs6000_issue_rate ();
32472 can_issue_more = issue_rate;
32474 insn = get_next_active_insn (prev_head_insn, tail);
32475 next_insn = get_next_active_insn (insn, tail);
32477 while (insn != NULL_RTX)
32479 can_issue_more =
32480 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
32482 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
32484 if (next_insn == NULL_RTX)
32485 break;
32487 if (group_end)
32489 /* If the scheduler had marked group termination at this location
32490 (between insn and next_insn), and neither insn nor next_insn will
32491 force group termination, pad the group with nops to force group
32492 termination. */
32493 if (can_issue_more
32494 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
32495 && !insn_terminates_group_p (insn, current_group)
32496 && !insn_terminates_group_p (next_insn, previous_group))
32498 if (!is_branch_slot_insn (next_insn))
32499 can_issue_more--;
32501 while (can_issue_more)
32503 nop = gen_nop ();
32504 emit_insn_before (nop, next_insn);
32505 can_issue_more--;
32509 can_issue_more = issue_rate;
32510 group_count++;
32513 insn = next_insn;
32514 next_insn = get_next_active_insn (insn, tail);
32517 return group_count;
32520 /* We're beginning a new block. Initialize data structures as necessary. */
32522 static void
32523 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
32524 int sched_verbose ATTRIBUTE_UNUSED,
32525 int max_ready ATTRIBUTE_UNUSED)
32527 last_scheduled_insn = NULL;
32528 load_store_pendulum = 0;
32529 divide_cnt = 0;
32530 vec_pairing = 0;
32533 /* The following function is called at the end of scheduling BB.
32534 After reload, it inserts nops at insn group bundling. */
32536 static void
32537 rs6000_sched_finish (FILE *dump, int sched_verbose)
32539 int n_groups;
32541 if (sched_verbose)
32542 fprintf (dump, "=== Finishing schedule.\n");
32544 if (reload_completed && rs6000_sched_groups)
32546 /* Do not run sched_finish hook when selective scheduling enabled. */
32547 if (sel_sched_p ())
32548 return;
32550 if (rs6000_sched_insert_nops == sched_finish_none)
32551 return;
32553 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
32554 n_groups = pad_groups (dump, sched_verbose,
32555 current_sched_info->prev_head,
32556 current_sched_info->next_tail);
32557 else
32558 n_groups = redefine_groups (dump, sched_verbose,
32559 current_sched_info->prev_head,
32560 current_sched_info->next_tail);
32562 if (sched_verbose >= 6)
32564 fprintf (dump, "ngroups = %d\n", n_groups);
32565 print_rtl (dump, current_sched_info->prev_head);
32566 fprintf (dump, "Done finish_sched\n");
32571 struct rs6000_sched_context
32573 short cached_can_issue_more;
32574 rtx_insn *last_scheduled_insn;
32575 int load_store_pendulum;
32576 int divide_cnt;
32577 int vec_pairing;
32580 typedef struct rs6000_sched_context rs6000_sched_context_def;
32581 typedef rs6000_sched_context_def *rs6000_sched_context_t;
32583 /* Allocate store for new scheduling context. */
32584 static void *
32585 rs6000_alloc_sched_context (void)
32587 return xmalloc (sizeof (rs6000_sched_context_def));
32590 /* If CLEAN_P is true then initializes _SC with clean data,
32591 and from the global context otherwise. */
32592 static void
32593 rs6000_init_sched_context (void *_sc, bool clean_p)
32595 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
32597 if (clean_p)
32599 sc->cached_can_issue_more = 0;
32600 sc->last_scheduled_insn = NULL;
32601 sc->load_store_pendulum = 0;
32602 sc->divide_cnt = 0;
32603 sc->vec_pairing = 0;
32605 else
32607 sc->cached_can_issue_more = cached_can_issue_more;
32608 sc->last_scheduled_insn = last_scheduled_insn;
32609 sc->load_store_pendulum = load_store_pendulum;
32610 sc->divide_cnt = divide_cnt;
32611 sc->vec_pairing = vec_pairing;
32615 /* Sets the global scheduling context to the one pointed to by _SC. */
32616 static void
32617 rs6000_set_sched_context (void *_sc)
32619 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
32621 gcc_assert (sc != NULL);
32623 cached_can_issue_more = sc->cached_can_issue_more;
32624 last_scheduled_insn = sc->last_scheduled_insn;
32625 load_store_pendulum = sc->load_store_pendulum;
32626 divide_cnt = sc->divide_cnt;
32627 vec_pairing = sc->vec_pairing;
32630 /* Free _SC. */
32631 static void
32632 rs6000_free_sched_context (void *_sc)
32634 gcc_assert (_sc != NULL);
32636 free (_sc);
32639 static bool
32640 rs6000_sched_can_speculate_insn (rtx_insn *insn)
32642 switch (get_attr_type (insn))
32644 case TYPE_DIV:
32645 case TYPE_SDIV:
32646 case TYPE_DDIV:
32647 case TYPE_VECDIV:
32648 case TYPE_SSQRT:
32649 case TYPE_DSQRT:
32650 return false;
32652 default:
32653 return true;
32657 /* Length in units of the trampoline for entering a nested function. */
32660 rs6000_trampoline_size (void)
32662 int ret = 0;
32664 switch (DEFAULT_ABI)
32666 default:
32667 gcc_unreachable ();
32669 case ABI_AIX:
32670 ret = (TARGET_32BIT) ? 12 : 24;
32671 break;
32673 case ABI_ELFv2:
32674 gcc_assert (!TARGET_32BIT);
32675 ret = 32;
32676 break;
32678 case ABI_DARWIN:
32679 case ABI_V4:
32680 ret = (TARGET_32BIT) ? 40 : 48;
32681 break;
32684 return ret;
32687 /* Emit RTL insns to initialize the variable parts of a trampoline.
32688 FNADDR is an RTX for the address of the function's pure code.
32689 CXT is an RTX for the static chain value for the function. */
32691 static void
32692 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
32694 int regsize = (TARGET_32BIT) ? 4 : 8;
32695 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
32696 rtx ctx_reg = force_reg (Pmode, cxt);
32697 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
32699 switch (DEFAULT_ABI)
32701 default:
32702 gcc_unreachable ();
32704 /* Under AIX, just build the 3 word function descriptor */
32705 case ABI_AIX:
32707 rtx fnmem, fn_reg, toc_reg;
32709 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
32710 error ("you cannot take the address of a nested function if you use "
32711 "the %qs option", "-mno-pointers-to-nested-functions");
32713 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
32714 fn_reg = gen_reg_rtx (Pmode);
32715 toc_reg = gen_reg_rtx (Pmode);
32717 /* Macro to shorten the code expansions below. */
32718 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
32720 m_tramp = replace_equiv_address (m_tramp, addr);
32722 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
32723 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
32724 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
32725 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
32726 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
32728 # undef MEM_PLUS
32730 break;
32732 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
32733 case ABI_ELFv2:
32734 case ABI_DARWIN:
32735 case ABI_V4:
32736 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
32737 LCT_NORMAL, VOIDmode,
32738 addr, Pmode,
32739 GEN_INT (rs6000_trampoline_size ()), SImode,
32740 fnaddr, Pmode,
32741 ctx_reg, Pmode);
32742 break;
32747 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
32748 identifier as an argument, so the front end shouldn't look it up. */
32750 static bool
32751 rs6000_attribute_takes_identifier_p (const_tree attr_id)
32753 return is_attribute_p ("altivec", attr_id);
32756 /* Handle the "altivec" attribute. The attribute may have
32757 arguments as follows:
32759 __attribute__((altivec(vector__)))
32760 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
32761 __attribute__((altivec(bool__))) (always followed by 'unsigned')
32763 and may appear more than once (e.g., 'vector bool char') in a
32764 given declaration. */
32766 static tree
32767 rs6000_handle_altivec_attribute (tree *node,
32768 tree name ATTRIBUTE_UNUSED,
32769 tree args,
32770 int flags ATTRIBUTE_UNUSED,
32771 bool *no_add_attrs)
32773 tree type = *node, result = NULL_TREE;
32774 machine_mode mode;
32775 int unsigned_p;
32776 char altivec_type
32777 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
32778 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
32779 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
32780 : '?');
32782 while (POINTER_TYPE_P (type)
32783 || TREE_CODE (type) == FUNCTION_TYPE
32784 || TREE_CODE (type) == METHOD_TYPE
32785 || TREE_CODE (type) == ARRAY_TYPE)
32786 type = TREE_TYPE (type);
32788 mode = TYPE_MODE (type);
32790 /* Check for invalid AltiVec type qualifiers. */
32791 if (type == long_double_type_node)
32792 error ("use of %<long double%> in AltiVec types is invalid");
32793 else if (type == boolean_type_node)
32794 error ("use of boolean types in AltiVec types is invalid");
32795 else if (TREE_CODE (type) == COMPLEX_TYPE)
32796 error ("use of %<complex%> in AltiVec types is invalid");
32797 else if (DECIMAL_FLOAT_MODE_P (mode))
32798 error ("use of decimal floating point types in AltiVec types is invalid");
32799 else if (!TARGET_VSX)
32801 if (type == long_unsigned_type_node || type == long_integer_type_node)
32803 if (TARGET_64BIT)
32804 error ("use of %<long%> in AltiVec types is invalid for "
32805 "64-bit code without %qs", "-mvsx");
32806 else if (rs6000_warn_altivec_long)
32807 warning (0, "use of %<long%> in AltiVec types is deprecated; "
32808 "use %<int%>");
32810 else if (type == long_long_unsigned_type_node
32811 || type == long_long_integer_type_node)
32812 error ("use of %<long long%> in AltiVec types is invalid without %qs",
32813 "-mvsx");
32814 else if (type == double_type_node)
32815 error ("use of %<double%> in AltiVec types is invalid without %qs",
32816 "-mvsx");
32819 switch (altivec_type)
32821 case 'v':
32822 unsigned_p = TYPE_UNSIGNED (type);
32823 switch (mode)
32825 case E_TImode:
32826 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
32827 break;
32828 case E_DImode:
32829 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
32830 break;
32831 case E_SImode:
32832 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
32833 break;
32834 case E_HImode:
32835 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
32836 break;
32837 case E_QImode:
32838 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
32839 break;
32840 case E_SFmode: result = V4SF_type_node; break;
32841 case E_DFmode: result = V2DF_type_node; break;
32842 /* If the user says 'vector int bool', we may be handed the 'bool'
32843 attribute _before_ the 'vector' attribute, and so select the
32844 proper type in the 'b' case below. */
32845 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
32846 case E_V2DImode: case E_V2DFmode:
32847 result = type;
32848 default: break;
32850 break;
32851 case 'b':
32852 switch (mode)
32854 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
32855 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
32856 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
32857 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
32858 default: break;
32860 break;
32861 case 'p':
32862 switch (mode)
32864 case E_V8HImode: result = pixel_V8HI_type_node;
32865 default: break;
32867 default: break;
32870 /* Propagate qualifiers attached to the element type
32871 onto the vector type. */
32872 if (result && result != type && TYPE_QUALS (type))
32873 result = build_qualified_type (result, TYPE_QUALS (type));
32875 *no_add_attrs = true; /* No need to hang on to the attribute. */
32877 if (result)
32878 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
32880 return NULL_TREE;
32883 /* AltiVec defines four built-in scalar types that serve as vector
32884 elements; we must teach the compiler how to mangle them. */
32886 static const char *
32887 rs6000_mangle_type (const_tree type)
32889 type = TYPE_MAIN_VARIANT (type);
32891 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
32892 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
32893 return NULL;
32895 if (type == bool_char_type_node) return "U6__boolc";
32896 if (type == bool_short_type_node) return "U6__bools";
32897 if (type == pixel_type_node) return "u7__pixel";
32898 if (type == bool_int_type_node) return "U6__booli";
32899 if (type == bool_long_type_node) return "U6__booll";
32901 /* Use a unique name for __float128 rather than trying to use "e" or "g". Use
32902 "g" for IBM extended double, no matter whether it is long double (using
32903 -mabi=ibmlongdouble) or the distinct __ibm128 type. */
32904 if (TARGET_FLOAT128_TYPE)
32906 if (type == ieee128_float_type_node)
32907 return "U10__float128";
32909 if (TARGET_LONG_DOUBLE_128)
32911 if (type == long_double_type_node)
32912 return (TARGET_IEEEQUAD) ? "U10__float128" : "g";
32914 if (type == ibm128_float_type_node)
32915 return "g";
32919 /* Mangle IBM extended float long double as `g' (__float128) on
32920 powerpc*-linux where long-double-64 previously was the default. */
32921 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
32922 && TARGET_ELF
32923 && TARGET_LONG_DOUBLE_128
32924 && !TARGET_IEEEQUAD)
32925 return "g";
32927 /* For all other types, use normal C++ mangling. */
32928 return NULL;
32931 /* Handle a "longcall" or "shortcall" attribute; arguments as in
32932 struct attribute_spec.handler. */
32934 static tree
32935 rs6000_handle_longcall_attribute (tree *node, tree name,
32936 tree args ATTRIBUTE_UNUSED,
32937 int flags ATTRIBUTE_UNUSED,
32938 bool *no_add_attrs)
32940 if (TREE_CODE (*node) != FUNCTION_TYPE
32941 && TREE_CODE (*node) != FIELD_DECL
32942 && TREE_CODE (*node) != TYPE_DECL)
32944 warning (OPT_Wattributes, "%qE attribute only applies to functions",
32945 name);
32946 *no_add_attrs = true;
32949 return NULL_TREE;
32952 /* Set longcall attributes on all functions declared when
32953 rs6000_default_long_calls is true. */
32954 static void
32955 rs6000_set_default_type_attributes (tree type)
32957 if (rs6000_default_long_calls
32958 && (TREE_CODE (type) == FUNCTION_TYPE
32959 || TREE_CODE (type) == METHOD_TYPE))
32960 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
32961 NULL_TREE,
32962 TYPE_ATTRIBUTES (type));
32964 #if TARGET_MACHO
32965 darwin_set_default_type_attributes (type);
32966 #endif
32969 /* Return a reference suitable for calling a function with the
32970 longcall attribute. */
32973 rs6000_longcall_ref (rtx call_ref)
32975 const char *call_name;
32976 tree node;
32978 if (GET_CODE (call_ref) != SYMBOL_REF)
32979 return call_ref;
32981 /* System V adds '.' to the internal name, so skip them. */
32982 call_name = XSTR (call_ref, 0);
32983 if (*call_name == '.')
32985 while (*call_name == '.')
32986 call_name++;
32988 node = get_identifier (call_name);
32989 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
32992 return force_reg (Pmode, call_ref);
32995 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
32996 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
32997 #endif
32999 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
33000 struct attribute_spec.handler. */
33001 static tree
33002 rs6000_handle_struct_attribute (tree *node, tree name,
33003 tree args ATTRIBUTE_UNUSED,
33004 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
33006 tree *type = NULL;
33007 if (DECL_P (*node))
33009 if (TREE_CODE (*node) == TYPE_DECL)
33010 type = &TREE_TYPE (*node);
33012 else
33013 type = node;
33015 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
33016 || TREE_CODE (*type) == UNION_TYPE)))
33018 warning (OPT_Wattributes, "%qE attribute ignored", name);
33019 *no_add_attrs = true;
33022 else if ((is_attribute_p ("ms_struct", name)
33023 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
33024 || ((is_attribute_p ("gcc_struct", name)
33025 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
33027 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
33028 name);
33029 *no_add_attrs = true;
33032 return NULL_TREE;
33035 static bool
33036 rs6000_ms_bitfield_layout_p (const_tree record_type)
33038 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
33039 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
33040 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
33043 #ifdef USING_ELFOS_H
33045 /* A get_unnamed_section callback, used for switching to toc_section. */
33047 static void
33048 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
33050 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
33051 && TARGET_MINIMAL_TOC)
33053 if (!toc_initialized)
33055 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
33056 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
33057 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
33058 fprintf (asm_out_file, "\t.tc ");
33059 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
33060 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
33061 fprintf (asm_out_file, "\n");
33063 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
33064 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
33065 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
33066 fprintf (asm_out_file, " = .+32768\n");
33067 toc_initialized = 1;
33069 else
33070 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
33072 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
33074 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
33075 if (!toc_initialized)
33077 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
33078 toc_initialized = 1;
33081 else
33083 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
33084 if (!toc_initialized)
33086 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
33087 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
33088 fprintf (asm_out_file, " = .+32768\n");
33089 toc_initialized = 1;
33094 /* Implement TARGET_ASM_INIT_SECTIONS. */
33096 static void
33097 rs6000_elf_asm_init_sections (void)
33099 toc_section
33100 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
33102 sdata2_section
33103 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
33104 SDATA2_SECTION_ASM_OP);
33107 /* Implement TARGET_SELECT_RTX_SECTION. */
33109 static section *
33110 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
33111 unsigned HOST_WIDE_INT align)
33113 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
33114 return toc_section;
33115 else
33116 return default_elf_select_rtx_section (mode, x, align);
33119 /* For a SYMBOL_REF, set generic flags and then perform some
33120 target-specific processing.
33122 When the AIX ABI is requested on a non-AIX system, replace the
33123 function name with the real name (with a leading .) rather than the
33124 function descriptor name. This saves a lot of overriding code to
33125 read the prefixes. */
33127 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
33128 static void
33129 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
33131 default_encode_section_info (decl, rtl, first);
33133 if (first
33134 && TREE_CODE (decl) == FUNCTION_DECL
33135 && !TARGET_AIX
33136 && DEFAULT_ABI == ABI_AIX)
33138 rtx sym_ref = XEXP (rtl, 0);
33139 size_t len = strlen (XSTR (sym_ref, 0));
33140 char *str = XALLOCAVEC (char, len + 2);
33141 str[0] = '.';
33142 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
33143 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
33147 static inline bool
33148 compare_section_name (const char *section, const char *templ)
33150 int len;
33152 len = strlen (templ);
33153 return (strncmp (section, templ, len) == 0
33154 && (section[len] == 0 || section[len] == '.'));
33157 bool
33158 rs6000_elf_in_small_data_p (const_tree decl)
33160 if (rs6000_sdata == SDATA_NONE)
33161 return false;
33163 /* We want to merge strings, so we never consider them small data. */
33164 if (TREE_CODE (decl) == STRING_CST)
33165 return false;
33167 /* Functions are never in the small data area. */
33168 if (TREE_CODE (decl) == FUNCTION_DECL)
33169 return false;
33171 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
33173 const char *section = DECL_SECTION_NAME (decl);
33174 if (compare_section_name (section, ".sdata")
33175 || compare_section_name (section, ".sdata2")
33176 || compare_section_name (section, ".gnu.linkonce.s")
33177 || compare_section_name (section, ".sbss")
33178 || compare_section_name (section, ".sbss2")
33179 || compare_section_name (section, ".gnu.linkonce.sb")
33180 || strcmp (section, ".PPC.EMB.sdata0") == 0
33181 || strcmp (section, ".PPC.EMB.sbss0") == 0)
33182 return true;
33184 else
33186 /* If we are told not to put readonly data in sdata, then don't. */
33187 if (TREE_READONLY (decl) && rs6000_sdata != SDATA_EABI
33188 && !rs6000_readonly_in_sdata)
33189 return false;
33191 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
33193 if (size > 0
33194 && size <= g_switch_value
33195 /* If it's not public, and we're not going to reference it there,
33196 there's no need to put it in the small data section. */
33197 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
33198 return true;
33201 return false;
33204 #endif /* USING_ELFOS_H */
33206 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
33208 static bool
33209 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
33211 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
33214 /* Do not place thread-local symbols refs in the object blocks. */
33216 static bool
33217 rs6000_use_blocks_for_decl_p (const_tree decl)
33219 return !DECL_THREAD_LOCAL_P (decl);
33222 /* Return a REG that occurs in ADDR with coefficient 1.
33223 ADDR can be effectively incremented by incrementing REG.
33225 r0 is special and we must not select it as an address
33226 register by this routine since our caller will try to
33227 increment the returned register via an "la" instruction. */
33230 find_addr_reg (rtx addr)
33232 while (GET_CODE (addr) == PLUS)
33234 if (GET_CODE (XEXP (addr, 0)) == REG
33235 && REGNO (XEXP (addr, 0)) != 0)
33236 addr = XEXP (addr, 0);
33237 else if (GET_CODE (XEXP (addr, 1)) == REG
33238 && REGNO (XEXP (addr, 1)) != 0)
33239 addr = XEXP (addr, 1);
33240 else if (CONSTANT_P (XEXP (addr, 0)))
33241 addr = XEXP (addr, 1);
33242 else if (CONSTANT_P (XEXP (addr, 1)))
33243 addr = XEXP (addr, 0);
33244 else
33245 gcc_unreachable ();
33247 gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0);
33248 return addr;
33251 void
33252 rs6000_fatal_bad_address (rtx op)
33254 fatal_insn ("bad address", op);
33257 #if TARGET_MACHO
33259 typedef struct branch_island_d {
33260 tree function_name;
33261 tree label_name;
33262 int line_number;
33263 } branch_island;
33266 static vec<branch_island, va_gc> *branch_islands;
33268 /* Remember to generate a branch island for far calls to the given
33269 function. */
33271 static void
33272 add_compiler_branch_island (tree label_name, tree function_name,
33273 int line_number)
33275 branch_island bi = {function_name, label_name, line_number};
33276 vec_safe_push (branch_islands, bi);
33279 /* Generate far-jump branch islands for everything recorded in
33280 branch_islands. Invoked immediately after the last instruction of
33281 the epilogue has been emitted; the branch islands must be appended
33282 to, and contiguous with, the function body. Mach-O stubs are
33283 generated in machopic_output_stub(). */
33285 static void
33286 macho_branch_islands (void)
33288 char tmp_buf[512];
33290 while (!vec_safe_is_empty (branch_islands))
33292 branch_island *bi = &branch_islands->last ();
33293 const char *label = IDENTIFIER_POINTER (bi->label_name);
33294 const char *name = IDENTIFIER_POINTER (bi->function_name);
33295 char name_buf[512];
33296 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
33297 if (name[0] == '*' || name[0] == '&')
33298 strcpy (name_buf, name+1);
33299 else
33301 name_buf[0] = '_';
33302 strcpy (name_buf+1, name);
33304 strcpy (tmp_buf, "\n");
33305 strcat (tmp_buf, label);
33306 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
33307 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
33308 dbxout_stabd (N_SLINE, bi->line_number);
33309 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
33310 if (flag_pic)
33312 if (TARGET_LINK_STACK)
33314 char name[32];
33315 get_ppc476_thunk_name (name);
33316 strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
33317 strcat (tmp_buf, name);
33318 strcat (tmp_buf, "\n");
33319 strcat (tmp_buf, label);
33320 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
33322 else
33324 strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
33325 strcat (tmp_buf, label);
33326 strcat (tmp_buf, "_pic\n");
33327 strcat (tmp_buf, label);
33328 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
33331 strcat (tmp_buf, "\taddis r11,r11,ha16(");
33332 strcat (tmp_buf, name_buf);
33333 strcat (tmp_buf, " - ");
33334 strcat (tmp_buf, label);
33335 strcat (tmp_buf, "_pic)\n");
33337 strcat (tmp_buf, "\tmtlr r0\n");
33339 strcat (tmp_buf, "\taddi r12,r11,lo16(");
33340 strcat (tmp_buf, name_buf);
33341 strcat (tmp_buf, " - ");
33342 strcat (tmp_buf, label);
33343 strcat (tmp_buf, "_pic)\n");
33345 strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
33347 else
33349 strcat (tmp_buf, ":\nlis r12,hi16(");
33350 strcat (tmp_buf, name_buf);
33351 strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
33352 strcat (tmp_buf, name_buf);
33353 strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
33355 output_asm_insn (tmp_buf, 0);
33356 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
33357 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
33358 dbxout_stabd (N_SLINE, bi->line_number);
33359 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
33360 branch_islands->pop ();
33364 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
33365 already there or not. */
33367 static int
33368 no_previous_def (tree function_name)
33370 branch_island *bi;
33371 unsigned ix;
33373 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
33374 if (function_name == bi->function_name)
33375 return 0;
33376 return 1;
33379 /* GET_PREV_LABEL gets the label name from the previous definition of
33380 the function. */
33382 static tree
33383 get_prev_label (tree function_name)
33385 branch_island *bi;
33386 unsigned ix;
33388 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
33389 if (function_name == bi->function_name)
33390 return bi->label_name;
33391 return NULL_TREE;
33394 /* INSN is either a function call or a millicode call. It may have an
33395 unconditional jump in its delay slot.
33397 CALL_DEST is the routine we are calling. */
33399 char *
33400 output_call (rtx_insn *insn, rtx *operands, int dest_operand_number,
33401 int cookie_operand_number)
33403 static char buf[256];
33404 if (darwin_emit_branch_islands
33405 && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
33406 && (INTVAL (operands[cookie_operand_number]) & CALL_LONG))
33408 tree labelname;
33409 tree funname = get_identifier (XSTR (operands[dest_operand_number], 0));
33411 if (no_previous_def (funname))
33413 rtx label_rtx = gen_label_rtx ();
33414 char *label_buf, temp_buf[256];
33415 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
33416 CODE_LABEL_NUMBER (label_rtx));
33417 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
33418 labelname = get_identifier (label_buf);
33419 add_compiler_branch_island (labelname, funname, insn_line (insn));
33421 else
33422 labelname = get_prev_label (funname);
33424 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
33425 instruction will reach 'foo', otherwise link as 'bl L42'".
33426 "L42" should be a 'branch island', that will do a far jump to
33427 'foo'. Branch islands are generated in
33428 macho_branch_islands(). */
33429 sprintf (buf, "jbsr %%z%d,%.246s",
33430 dest_operand_number, IDENTIFIER_POINTER (labelname));
33432 else
33433 sprintf (buf, "bl %%z%d", dest_operand_number);
33434 return buf;
33437 /* Generate PIC and indirect symbol stubs. */
33439 void
33440 machopic_output_stub (FILE *file, const char *symb, const char *stub)
33442 unsigned int length;
33443 char *symbol_name, *lazy_ptr_name;
33444 char *local_label_0;
33445 static int label = 0;
33447 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
33448 symb = (*targetm.strip_name_encoding) (symb);
33451 length = strlen (symb);
33452 symbol_name = XALLOCAVEC (char, length + 32);
33453 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
33455 lazy_ptr_name = XALLOCAVEC (char, length + 32);
33456 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
33458 if (flag_pic == 2)
33459 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
33460 else
33461 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
33463 if (flag_pic == 2)
33465 fprintf (file, "\t.align 5\n");
33467 fprintf (file, "%s:\n", stub);
33468 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
33470 label++;
33471 local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
33472 sprintf (local_label_0, "\"L%011d$spb\"", label);
33474 fprintf (file, "\tmflr r0\n");
33475 if (TARGET_LINK_STACK)
33477 char name[32];
33478 get_ppc476_thunk_name (name);
33479 fprintf (file, "\tbl %s\n", name);
33480 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
33482 else
33484 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
33485 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
33487 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
33488 lazy_ptr_name, local_label_0);
33489 fprintf (file, "\tmtlr r0\n");
33490 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
33491 (TARGET_64BIT ? "ldu" : "lwzu"),
33492 lazy_ptr_name, local_label_0);
33493 fprintf (file, "\tmtctr r12\n");
33494 fprintf (file, "\tbctr\n");
33496 else
33498 fprintf (file, "\t.align 4\n");
33500 fprintf (file, "%s:\n", stub);
33501 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
33503 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
33504 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
33505 (TARGET_64BIT ? "ldu" : "lwzu"),
33506 lazy_ptr_name);
33507 fprintf (file, "\tmtctr r12\n");
33508 fprintf (file, "\tbctr\n");
33511 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
33512 fprintf (file, "%s:\n", lazy_ptr_name);
33513 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
33514 fprintf (file, "%sdyld_stub_binding_helper\n",
33515 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
33518 /* Legitimize PIC addresses. If the address is already
33519 position-independent, we return ORIG. Newly generated
33520 position-independent addresses go into a reg. This is REG if non
33521 zero, otherwise we allocate register(s) as necessary. */
33523 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
33526 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
33527 rtx reg)
33529 rtx base, offset;
33531 if (reg == NULL && !reload_completed)
33532 reg = gen_reg_rtx (Pmode);
33534 if (GET_CODE (orig) == CONST)
33536 rtx reg_temp;
33538 if (GET_CODE (XEXP (orig, 0)) == PLUS
33539 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
33540 return orig;
33542 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
33544 /* Use a different reg for the intermediate value, as
33545 it will be marked UNCHANGING. */
33546 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
33547 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
33548 Pmode, reg_temp);
33549 offset =
33550 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
33551 Pmode, reg);
33553 if (GET_CODE (offset) == CONST_INT)
33555 if (SMALL_INT (offset))
33556 return plus_constant (Pmode, base, INTVAL (offset));
33557 else if (!reload_completed)
33558 offset = force_reg (Pmode, offset);
33559 else
33561 rtx mem = force_const_mem (Pmode, orig);
33562 return machopic_legitimize_pic_address (mem, Pmode, reg);
33565 return gen_rtx_PLUS (Pmode, base, offset);
33568 /* Fall back on generic machopic code. */
33569 return machopic_legitimize_pic_address (orig, mode, reg);
33572 /* Output a .machine directive for the Darwin assembler, and call
33573 the generic start_file routine. */
33575 static void
33576 rs6000_darwin_file_start (void)
33578 static const struct
33580 const char *arg;
33581 const char *name;
33582 HOST_WIDE_INT if_set;
33583 } mapping[] = {
33584 { "ppc64", "ppc64", MASK_64BIT },
33585 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
33586 { "power4", "ppc970", 0 },
33587 { "G5", "ppc970", 0 },
33588 { "7450", "ppc7450", 0 },
33589 { "7400", "ppc7400", MASK_ALTIVEC },
33590 { "G4", "ppc7400", 0 },
33591 { "750", "ppc750", 0 },
33592 { "740", "ppc750", 0 },
33593 { "G3", "ppc750", 0 },
33594 { "604e", "ppc604e", 0 },
33595 { "604", "ppc604", 0 },
33596 { "603e", "ppc603", 0 },
33597 { "603", "ppc603", 0 },
33598 { "601", "ppc601", 0 },
33599 { NULL, "ppc", 0 } };
33600 const char *cpu_id = "";
33601 size_t i;
33603 rs6000_file_start ();
33604 darwin_file_start ();
33606 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
33608 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
33609 cpu_id = rs6000_default_cpu;
33611 if (global_options_set.x_rs6000_cpu_index)
33612 cpu_id = processor_target_table[rs6000_cpu_index].name;
33614 /* Look through the mapping array. Pick the first name that either
33615 matches the argument, has a bit set in IF_SET that is also set
33616 in the target flags, or has a NULL name. */
33618 i = 0;
33619 while (mapping[i].arg != NULL
33620 && strcmp (mapping[i].arg, cpu_id) != 0
33621 && (mapping[i].if_set & rs6000_isa_flags) == 0)
33622 i++;
33624 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
33627 #endif /* TARGET_MACHO */
33629 #if TARGET_ELF
33630 static int
33631 rs6000_elf_reloc_rw_mask (void)
33633 if (flag_pic)
33634 return 3;
33635 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
33636 return 2;
33637 else
33638 return 0;
33641 /* Record an element in the table of global constructors. SYMBOL is
33642 a SYMBOL_REF of the function to be called; PRIORITY is a number
33643 between 0 and MAX_INIT_PRIORITY.
33645 This differs from default_named_section_asm_out_constructor in
33646 that we have special handling for -mrelocatable. */
33648 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
33649 static void
33650 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
33652 const char *section = ".ctors";
33653 char buf[18];
33655 if (priority != DEFAULT_INIT_PRIORITY)
33657 sprintf (buf, ".ctors.%.5u",
33658 /* Invert the numbering so the linker puts us in the proper
33659 order; constructors are run from right to left, and the
33660 linker sorts in increasing order. */
33661 MAX_INIT_PRIORITY - priority);
33662 section = buf;
33665 switch_to_section (get_section (section, SECTION_WRITE, NULL));
33666 assemble_align (POINTER_SIZE);
33668 if (DEFAULT_ABI == ABI_V4
33669 && (TARGET_RELOCATABLE || flag_pic > 1))
33671 fputs ("\t.long (", asm_out_file);
33672 output_addr_const (asm_out_file, symbol);
33673 fputs (")@fixup\n", asm_out_file);
33675 else
33676 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
33679 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
33680 static void
33681 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
33683 const char *section = ".dtors";
33684 char buf[18];
33686 if (priority != DEFAULT_INIT_PRIORITY)
33688 sprintf (buf, ".dtors.%.5u",
33689 /* Invert the numbering so the linker puts us in the proper
33690 order; constructors are run from right to left, and the
33691 linker sorts in increasing order. */
33692 MAX_INIT_PRIORITY - priority);
33693 section = buf;
33696 switch_to_section (get_section (section, SECTION_WRITE, NULL));
33697 assemble_align (POINTER_SIZE);
33699 if (DEFAULT_ABI == ABI_V4
33700 && (TARGET_RELOCATABLE || flag_pic > 1))
33702 fputs ("\t.long (", asm_out_file);
33703 output_addr_const (asm_out_file, symbol);
33704 fputs (")@fixup\n", asm_out_file);
33706 else
33707 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
33710 void
33711 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
33713 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
33715 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
33716 ASM_OUTPUT_LABEL (file, name);
33717 fputs (DOUBLE_INT_ASM_OP, file);
33718 rs6000_output_function_entry (file, name);
33719 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
33720 if (DOT_SYMBOLS)
33722 fputs ("\t.size\t", file);
33723 assemble_name (file, name);
33724 fputs (",24\n\t.type\t.", file);
33725 assemble_name (file, name);
33726 fputs (",@function\n", file);
33727 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
33729 fputs ("\t.globl\t.", file);
33730 assemble_name (file, name);
33731 putc ('\n', file);
33734 else
33735 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
33736 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
33737 rs6000_output_function_entry (file, name);
33738 fputs (":\n", file);
33739 return;
33742 int uses_toc;
33743 if (DEFAULT_ABI == ABI_V4
33744 && (TARGET_RELOCATABLE || flag_pic > 1)
33745 && !TARGET_SECURE_PLT
33746 && (!constant_pool_empty_p () || crtl->profile)
33747 && (uses_toc = uses_TOC ()))
33749 char buf[256];
33751 if (uses_toc == 2)
33752 switch_to_other_text_partition ();
33753 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
33755 fprintf (file, "\t.long ");
33756 assemble_name (file, toc_label_name);
33757 need_toc_init = 1;
33758 putc ('-', file);
33759 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
33760 assemble_name (file, buf);
33761 putc ('\n', file);
33762 if (uses_toc == 2)
33763 switch_to_other_text_partition ();
33766 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
33767 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
33769 if (TARGET_CMODEL == CMODEL_LARGE && rs6000_global_entry_point_needed_p ())
33771 char buf[256];
33773 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
33775 fprintf (file, "\t.quad .TOC.-");
33776 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
33777 assemble_name (file, buf);
33778 putc ('\n', file);
33781 if (DEFAULT_ABI == ABI_AIX)
33783 const char *desc_name, *orig_name;
33785 orig_name = (*targetm.strip_name_encoding) (name);
33786 desc_name = orig_name;
33787 while (*desc_name == '.')
33788 desc_name++;
33790 if (TREE_PUBLIC (decl))
33791 fprintf (file, "\t.globl %s\n", desc_name);
33793 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
33794 fprintf (file, "%s:\n", desc_name);
33795 fprintf (file, "\t.long %s\n", orig_name);
33796 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
33797 fputs ("\t.long 0\n", file);
33798 fprintf (file, "\t.previous\n");
33800 ASM_OUTPUT_LABEL (file, name);
33803 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
33804 static void
33805 rs6000_elf_file_end (void)
33807 #ifdef HAVE_AS_GNU_ATTRIBUTE
33808 /* ??? The value emitted depends on options active at file end.
33809 Assume anyone using #pragma or attributes that might change
33810 options knows what they are doing. */
33811 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
33812 && rs6000_passes_float)
33814 int fp;
33816 if (TARGET_DF_FPR)
33817 fp = 1;
33818 else if (TARGET_SF_FPR)
33819 fp = 3;
33820 else
33821 fp = 2;
33822 if (rs6000_passes_long_double)
33824 if (!TARGET_LONG_DOUBLE_128)
33825 fp |= 2 * 4;
33826 else if (TARGET_IEEEQUAD)
33827 fp |= 3 * 4;
33828 else
33829 fp |= 1 * 4;
33831 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
33833 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
33835 if (rs6000_passes_vector)
33836 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
33837 (TARGET_ALTIVEC_ABI ? 2 : 1));
33838 if (rs6000_returns_struct)
33839 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
33840 aix_struct_return ? 2 : 1);
33842 #endif
33843 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
33844 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
33845 file_end_indicate_exec_stack ();
33846 #endif
33848 if (flag_split_stack)
33849 file_end_indicate_split_stack ();
33851 if (cpu_builtin_p)
33853 /* We have expanded a CPU builtin, so we need to emit a reference to
33854 the special symbol that LIBC uses to declare it supports the
33855 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
33856 switch_to_section (data_section);
33857 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
33858 fprintf (asm_out_file, "\t%s %s\n",
33859 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
33862 #endif
33864 #if TARGET_XCOFF
33866 #ifndef HAVE_XCOFF_DWARF_EXTRAS
33867 #define HAVE_XCOFF_DWARF_EXTRAS 0
33868 #endif
33870 static enum unwind_info_type
33871 rs6000_xcoff_debug_unwind_info (void)
33873 return UI_NONE;
33876 static void
33877 rs6000_xcoff_asm_output_anchor (rtx symbol)
33879 char buffer[100];
33881 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
33882 SYMBOL_REF_BLOCK_OFFSET (symbol));
33883 fprintf (asm_out_file, "%s", SET_ASM_OP);
33884 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
33885 fprintf (asm_out_file, ",");
33886 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
33887 fprintf (asm_out_file, "\n");
33890 static void
33891 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
33893 fputs (GLOBAL_ASM_OP, stream);
33894 RS6000_OUTPUT_BASENAME (stream, name);
33895 putc ('\n', stream);
33898 /* A get_unnamed_decl callback, used for read-only sections. PTR
33899 points to the section string variable. */
33901 static void
33902 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
33904 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
33905 *(const char *const *) directive,
33906 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
33909 /* Likewise for read-write sections. */
33911 static void
33912 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
33914 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
33915 *(const char *const *) directive,
33916 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
33919 static void
33920 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
33922 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
33923 *(const char *const *) directive,
33924 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
33927 /* A get_unnamed_section callback, used for switching to toc_section. */
33929 static void
33930 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
33932 if (TARGET_MINIMAL_TOC)
33934 /* toc_section is always selected at least once from
33935 rs6000_xcoff_file_start, so this is guaranteed to
33936 always be defined once and only once in each file. */
33937 if (!toc_initialized)
33939 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
33940 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
33941 toc_initialized = 1;
33943 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
33944 (TARGET_32BIT ? "" : ",3"));
33946 else
33947 fputs ("\t.toc\n", asm_out_file);
33950 /* Implement TARGET_ASM_INIT_SECTIONS. */
33952 static void
33953 rs6000_xcoff_asm_init_sections (void)
33955 read_only_data_section
33956 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
33957 &xcoff_read_only_section_name);
33959 private_data_section
33960 = get_unnamed_section (SECTION_WRITE,
33961 rs6000_xcoff_output_readwrite_section_asm_op,
33962 &xcoff_private_data_section_name);
33964 tls_data_section
33965 = get_unnamed_section (SECTION_TLS,
33966 rs6000_xcoff_output_tls_section_asm_op,
33967 &xcoff_tls_data_section_name);
33969 tls_private_data_section
33970 = get_unnamed_section (SECTION_TLS,
33971 rs6000_xcoff_output_tls_section_asm_op,
33972 &xcoff_private_data_section_name);
33974 read_only_private_data_section
33975 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
33976 &xcoff_private_data_section_name);
33978 toc_section
33979 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
33981 readonly_data_section = read_only_data_section;
33984 static int
33985 rs6000_xcoff_reloc_rw_mask (void)
33987 return 3;
33990 static void
33991 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
33992 tree decl ATTRIBUTE_UNUSED)
33994 int smclass;
33995 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
33997 if (flags & SECTION_EXCLUDE)
33998 smclass = 4;
33999 else if (flags & SECTION_DEBUG)
34001 fprintf (asm_out_file, "\t.dwsect %s\n", name);
34002 return;
34004 else if (flags & SECTION_CODE)
34005 smclass = 0;
34006 else if (flags & SECTION_TLS)
34007 smclass = 3;
34008 else if (flags & SECTION_WRITE)
34009 smclass = 2;
34010 else
34011 smclass = 1;
34013 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
34014 (flags & SECTION_CODE) ? "." : "",
34015 name, suffix[smclass], flags & SECTION_ENTSIZE);
34018 #define IN_NAMED_SECTION(DECL) \
34019 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
34020 && DECL_SECTION_NAME (DECL) != NULL)
34022 static section *
34023 rs6000_xcoff_select_section (tree decl, int reloc,
34024 unsigned HOST_WIDE_INT align)
34026 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
34027 named section. */
34028 if (align > BIGGEST_ALIGNMENT)
34030 resolve_unique_section (decl, reloc, true);
34031 if (IN_NAMED_SECTION (decl))
34032 return get_named_section (decl, NULL, reloc);
34035 if (decl_readonly_section (decl, reloc))
34037 if (TREE_PUBLIC (decl))
34038 return read_only_data_section;
34039 else
34040 return read_only_private_data_section;
34042 else
34044 #if HAVE_AS_TLS
34045 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
34047 if (TREE_PUBLIC (decl))
34048 return tls_data_section;
34049 else if (bss_initializer_p (decl))
34051 /* Convert to COMMON to emit in BSS. */
34052 DECL_COMMON (decl) = 1;
34053 return tls_comm_section;
34055 else
34056 return tls_private_data_section;
34058 else
34059 #endif
34060 if (TREE_PUBLIC (decl))
34061 return data_section;
34062 else
34063 return private_data_section;
34067 static void
34068 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
34070 const char *name;
34072 /* Use select_section for private data and uninitialized data with
34073 alignment <= BIGGEST_ALIGNMENT. */
34074 if (!TREE_PUBLIC (decl)
34075 || DECL_COMMON (decl)
34076 || (DECL_INITIAL (decl) == NULL_TREE
34077 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
34078 || DECL_INITIAL (decl) == error_mark_node
34079 || (flag_zero_initialized_in_bss
34080 && initializer_zerop (DECL_INITIAL (decl))))
34081 return;
34083 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
34084 name = (*targetm.strip_name_encoding) (name);
34085 set_decl_section_name (decl, name);
34088 /* Select section for constant in constant pool.
34090 On RS/6000, all constants are in the private read-only data area.
34091 However, if this is being placed in the TOC it must be output as a
34092 toc entry. */
34094 static section *
34095 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
34096 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
34098 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
34099 return toc_section;
34100 else
34101 return read_only_private_data_section;
34104 /* Remove any trailing [DS] or the like from the symbol name. */
34106 static const char *
34107 rs6000_xcoff_strip_name_encoding (const char *name)
34109 size_t len;
34110 if (*name == '*')
34111 name++;
34112 len = strlen (name);
34113 if (name[len - 1] == ']')
34114 return ggc_alloc_string (name, len - 4);
34115 else
34116 return name;
34119 /* Section attributes. AIX is always PIC. */
34121 static unsigned int
34122 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
34124 unsigned int align;
34125 unsigned int flags = default_section_type_flags (decl, name, reloc);
34127 /* Align to at least UNIT size. */
34128 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
34129 align = MIN_UNITS_PER_WORD;
34130 else
34131 /* Increase alignment of large objects if not already stricter. */
34132 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
34133 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
34134 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
34136 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
34139 /* Output at beginning of assembler file.
34141 Initialize the section names for the RS/6000 at this point.
34143 Specify filename, including full path, to assembler.
34145 We want to go into the TOC section so at least one .toc will be emitted.
34146 Also, in order to output proper .bs/.es pairs, we need at least one static
34147 [RW] section emitted.
34149 Finally, declare mcount when profiling to make the assembler happy. */
34151 static void
34152 rs6000_xcoff_file_start (void)
34154 rs6000_gen_section_name (&xcoff_bss_section_name,
34155 main_input_filename, ".bss_");
34156 rs6000_gen_section_name (&xcoff_private_data_section_name,
34157 main_input_filename, ".rw_");
34158 rs6000_gen_section_name (&xcoff_read_only_section_name,
34159 main_input_filename, ".ro_");
34160 rs6000_gen_section_name (&xcoff_tls_data_section_name,
34161 main_input_filename, ".tls_");
34162 rs6000_gen_section_name (&xcoff_tbss_section_name,
34163 main_input_filename, ".tbss_[UL]");
34165 fputs ("\t.file\t", asm_out_file);
34166 output_quoted_string (asm_out_file, main_input_filename);
34167 fputc ('\n', asm_out_file);
34168 if (write_symbols != NO_DEBUG)
34169 switch_to_section (private_data_section);
34170 switch_to_section (toc_section);
34171 switch_to_section (text_section);
34172 if (profile_flag)
34173 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
34174 rs6000_file_start ();
34177 /* Output at end of assembler file.
34178 On the RS/6000, referencing data should automatically pull in text. */
34180 static void
34181 rs6000_xcoff_file_end (void)
34183 switch_to_section (text_section);
34184 fputs ("_section_.text:\n", asm_out_file);
34185 switch_to_section (data_section);
34186 fputs (TARGET_32BIT
34187 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
34188 asm_out_file);
34191 struct declare_alias_data
34193 FILE *file;
34194 bool function_descriptor;
34197 /* Declare alias N. A helper function for for_node_and_aliases. */
34199 static bool
34200 rs6000_declare_alias (struct symtab_node *n, void *d)
34202 struct declare_alias_data *data = (struct declare_alias_data *)d;
34203 /* Main symbol is output specially, because varasm machinery does part of
34204 the job for us - we do not need to declare .globl/lglobs and such. */
34205 if (!n->alias || n->weakref)
34206 return false;
34208 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
34209 return false;
34211 /* Prevent assemble_alias from trying to use .set pseudo operation
34212 that does not behave as expected by the middle-end. */
34213 TREE_ASM_WRITTEN (n->decl) = true;
34215 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
34216 char *buffer = (char *) alloca (strlen (name) + 2);
34217 char *p;
34218 int dollar_inside = 0;
34220 strcpy (buffer, name);
34221 p = strchr (buffer, '$');
34222 while (p) {
34223 *p = '_';
34224 dollar_inside++;
34225 p = strchr (p + 1, '$');
34227 if (TREE_PUBLIC (n->decl))
34229 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
34231 if (dollar_inside) {
34232 if (data->function_descriptor)
34233 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
34234 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
34236 if (data->function_descriptor)
34238 fputs ("\t.globl .", data->file);
34239 RS6000_OUTPUT_BASENAME (data->file, buffer);
34240 putc ('\n', data->file);
34242 fputs ("\t.globl ", data->file);
34243 RS6000_OUTPUT_BASENAME (data->file, buffer);
34244 putc ('\n', data->file);
34246 #ifdef ASM_WEAKEN_DECL
34247 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
34248 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
34249 #endif
34251 else
34253 if (dollar_inside)
34255 if (data->function_descriptor)
34256 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
34257 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
34259 if (data->function_descriptor)
34261 fputs ("\t.lglobl .", data->file);
34262 RS6000_OUTPUT_BASENAME (data->file, buffer);
34263 putc ('\n', data->file);
34265 fputs ("\t.lglobl ", data->file);
34266 RS6000_OUTPUT_BASENAME (data->file, buffer);
34267 putc ('\n', data->file);
34269 if (data->function_descriptor)
34270 fputs (".", data->file);
34271 RS6000_OUTPUT_BASENAME (data->file, buffer);
34272 fputs (":\n", data->file);
34273 return false;
34277 #ifdef HAVE_GAS_HIDDEN
34278 /* Helper function to calculate visibility of a DECL
34279 and return the value as a const string. */
34281 static const char *
34282 rs6000_xcoff_visibility (tree decl)
34284 static const char * const visibility_types[] = {
34285 "", ",protected", ",hidden", ",internal"
34288 enum symbol_visibility vis = DECL_VISIBILITY (decl);
34290 if (TREE_CODE (decl) == FUNCTION_DECL
34291 && cgraph_node::get (decl)
34292 && cgraph_node::get (decl)->instrumentation_clone
34293 && cgraph_node::get (decl)->instrumented_version)
34294 vis = DECL_VISIBILITY (cgraph_node::get (decl)->instrumented_version->decl);
34296 return visibility_types[vis];
34298 #endif
34301 /* This macro produces the initial definition of a function name.
34302 On the RS/6000, we need to place an extra '.' in the function name and
34303 output the function descriptor.
34304 Dollar signs are converted to underscores.
34306 The csect for the function will have already been created when
34307 text_section was selected. We do have to go back to that csect, however.
34309 The third and fourth parameters to the .function pseudo-op (16 and 044)
34310 are placeholders which no longer have any use.
34312 Because AIX assembler's .set command has unexpected semantics, we output
34313 all aliases as alternative labels in front of the definition. */
34315 void
34316 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
34318 char *buffer = (char *) alloca (strlen (name) + 1);
34319 char *p;
34320 int dollar_inside = 0;
34321 struct declare_alias_data data = {file, false};
34323 strcpy (buffer, name);
34324 p = strchr (buffer, '$');
34325 while (p) {
34326 *p = '_';
34327 dollar_inside++;
34328 p = strchr (p + 1, '$');
34330 if (TREE_PUBLIC (decl))
34332 if (!RS6000_WEAK || !DECL_WEAK (decl))
34334 if (dollar_inside) {
34335 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
34336 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
34338 fputs ("\t.globl .", file);
34339 RS6000_OUTPUT_BASENAME (file, buffer);
34340 #ifdef HAVE_GAS_HIDDEN
34341 fputs (rs6000_xcoff_visibility (decl), file);
34342 #endif
34343 putc ('\n', file);
34346 else
34348 if (dollar_inside) {
34349 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
34350 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
34352 fputs ("\t.lglobl .", file);
34353 RS6000_OUTPUT_BASENAME (file, buffer);
34354 putc ('\n', file);
34356 fputs ("\t.csect ", file);
34357 RS6000_OUTPUT_BASENAME (file, buffer);
34358 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
34359 RS6000_OUTPUT_BASENAME (file, buffer);
34360 fputs (":\n", file);
34361 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
34362 &data, true);
34363 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
34364 RS6000_OUTPUT_BASENAME (file, buffer);
34365 fputs (", TOC[tc0], 0\n", file);
34366 in_section = NULL;
34367 switch_to_section (function_section (decl));
34368 putc ('.', file);
34369 RS6000_OUTPUT_BASENAME (file, buffer);
34370 fputs (":\n", file);
34371 data.function_descriptor = true;
34372 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
34373 &data, true);
34374 if (!DECL_IGNORED_P (decl))
34376 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
34377 xcoffout_declare_function (file, decl, buffer);
34378 else if (write_symbols == DWARF2_DEBUG)
34380 name = (*targetm.strip_name_encoding) (name);
34381 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
34384 return;
34388 /* Output assembly language to globalize a symbol from a DECL,
34389 possibly with visibility. */
34391 void
34392 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
34394 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
34395 fputs (GLOBAL_ASM_OP, stream);
34396 RS6000_OUTPUT_BASENAME (stream, name);
34397 #ifdef HAVE_GAS_HIDDEN
34398 fputs (rs6000_xcoff_visibility (decl), stream);
34399 #endif
34400 putc ('\n', stream);
34403 /* Output assembly language to define a symbol as COMMON from a DECL,
34404 possibly with visibility. */
34406 void
34407 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
34408 tree decl ATTRIBUTE_UNUSED,
34409 const char *name,
34410 unsigned HOST_WIDE_INT size,
34411 unsigned HOST_WIDE_INT align)
34413 unsigned HOST_WIDE_INT align2 = 2;
34415 if (align > 32)
34416 align2 = floor_log2 (align / BITS_PER_UNIT);
34417 else if (size > 4)
34418 align2 = 3;
34420 fputs (COMMON_ASM_OP, stream);
34421 RS6000_OUTPUT_BASENAME (stream, name);
34423 fprintf (stream,
34424 "," HOST_WIDE_INT_PRINT_UNSIGNED "," HOST_WIDE_INT_PRINT_UNSIGNED,
34425 size, align2);
34427 #ifdef HAVE_GAS_HIDDEN
34428 if (decl != NULL)
34429 fputs (rs6000_xcoff_visibility (decl), stream);
34430 #endif
34431 putc ('\n', stream);
34434 /* This macro produces the initial definition of a object (variable) name.
34435 Because AIX assembler's .set command has unexpected semantics, we output
34436 all aliases as alternative labels in front of the definition. */
34438 void
34439 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
34441 struct declare_alias_data data = {file, false};
34442 RS6000_OUTPUT_BASENAME (file, name);
34443 fputs (":\n", file);
34444 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
34445 &data, true);
34448 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
34450 void
34451 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
34453 fputs (integer_asm_op (size, FALSE), file);
34454 assemble_name (file, label);
34455 fputs ("-$", file);
34458 /* Output a symbol offset relative to the dbase for the current object.
34459 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
34460 signed offsets.
34462 __gcc_unwind_dbase is embedded in all executables/libraries through
34463 libgcc/config/rs6000/crtdbase.S. */
34465 void
34466 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
34468 fputs (integer_asm_op (size, FALSE), file);
34469 assemble_name (file, label);
34470 fputs("-__gcc_unwind_dbase", file);
34473 #ifdef HAVE_AS_TLS
34474 static void
34475 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
34477 rtx symbol;
34478 int flags;
34479 const char *symname;
34481 default_encode_section_info (decl, rtl, first);
34483 /* Careful not to prod global register variables. */
34484 if (!MEM_P (rtl))
34485 return;
34486 symbol = XEXP (rtl, 0);
34487 if (GET_CODE (symbol) != SYMBOL_REF)
34488 return;
34490 flags = SYMBOL_REF_FLAGS (symbol);
34492 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
34493 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
34495 SYMBOL_REF_FLAGS (symbol) = flags;
34497 /* Append mapping class to extern decls. */
34498 symname = XSTR (symbol, 0);
34499 if (decl /* sync condition with assemble_external () */
34500 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
34501 && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl))
34502 || TREE_CODE (decl) == FUNCTION_DECL)
34503 && symname[strlen (symname) - 1] != ']')
34505 char *newname = (char *) alloca (strlen (symname) + 5);
34506 strcpy (newname, symname);
34507 strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL
34508 ? "[DS]" : "[UA]"));
34509 XSTR (symbol, 0) = ggc_strdup (newname);
34512 #endif /* HAVE_AS_TLS */
34513 #endif /* TARGET_XCOFF */
34515 void
34516 rs6000_asm_weaken_decl (FILE *stream, tree decl,
34517 const char *name, const char *val)
34519 fputs ("\t.weak\t", stream);
34520 RS6000_OUTPUT_BASENAME (stream, name);
34521 if (decl && TREE_CODE (decl) == FUNCTION_DECL
34522 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
34524 if (TARGET_XCOFF)
34525 fputs ("[DS]", stream);
34526 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
34527 if (TARGET_XCOFF)
34528 fputs (rs6000_xcoff_visibility (decl), stream);
34529 #endif
34530 fputs ("\n\t.weak\t.", stream);
34531 RS6000_OUTPUT_BASENAME (stream, name);
34533 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
34534 if (TARGET_XCOFF)
34535 fputs (rs6000_xcoff_visibility (decl), stream);
34536 #endif
34537 fputc ('\n', stream);
34538 if (val)
34540 #ifdef ASM_OUTPUT_DEF
34541 ASM_OUTPUT_DEF (stream, name, val);
34542 #endif
34543 if (decl && TREE_CODE (decl) == FUNCTION_DECL
34544 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
34546 fputs ("\t.set\t.", stream);
34547 RS6000_OUTPUT_BASENAME (stream, name);
34548 fputs (",.", stream);
34549 RS6000_OUTPUT_BASENAME (stream, val);
34550 fputc ('\n', stream);
34556 /* Return true if INSN should not be copied. */
34558 static bool
34559 rs6000_cannot_copy_insn_p (rtx_insn *insn)
34561 return recog_memoized (insn) >= 0
34562 && get_attr_cannot_copy (insn);
34565 /* Compute a (partial) cost for rtx X. Return true if the complete
34566 cost has been computed, and false if subexpressions should be
34567 scanned. In either case, *TOTAL contains the cost result. */
34569 static bool
34570 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
34571 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
34573 int code = GET_CODE (x);
34575 switch (code)
34577 /* On the RS/6000, if it is valid in the insn, it is free. */
34578 case CONST_INT:
34579 if (((outer_code == SET
34580 || outer_code == PLUS
34581 || outer_code == MINUS)
34582 && (satisfies_constraint_I (x)
34583 || satisfies_constraint_L (x)))
34584 || (outer_code == AND
34585 && (satisfies_constraint_K (x)
34586 || (mode == SImode
34587 ? satisfies_constraint_L (x)
34588 : satisfies_constraint_J (x))))
34589 || ((outer_code == IOR || outer_code == XOR)
34590 && (satisfies_constraint_K (x)
34591 || (mode == SImode
34592 ? satisfies_constraint_L (x)
34593 : satisfies_constraint_J (x))))
34594 || outer_code == ASHIFT
34595 || outer_code == ASHIFTRT
34596 || outer_code == LSHIFTRT
34597 || outer_code == ROTATE
34598 || outer_code == ROTATERT
34599 || outer_code == ZERO_EXTRACT
34600 || (outer_code == MULT
34601 && satisfies_constraint_I (x))
34602 || ((outer_code == DIV || outer_code == UDIV
34603 || outer_code == MOD || outer_code == UMOD)
34604 && exact_log2 (INTVAL (x)) >= 0)
34605 || (outer_code == COMPARE
34606 && (satisfies_constraint_I (x)
34607 || satisfies_constraint_K (x)))
34608 || ((outer_code == EQ || outer_code == NE)
34609 && (satisfies_constraint_I (x)
34610 || satisfies_constraint_K (x)
34611 || (mode == SImode
34612 ? satisfies_constraint_L (x)
34613 : satisfies_constraint_J (x))))
34614 || (outer_code == GTU
34615 && satisfies_constraint_I (x))
34616 || (outer_code == LTU
34617 && satisfies_constraint_P (x)))
34619 *total = 0;
34620 return true;
34622 else if ((outer_code == PLUS
34623 && reg_or_add_cint_operand (x, VOIDmode))
34624 || (outer_code == MINUS
34625 && reg_or_sub_cint_operand (x, VOIDmode))
34626 || ((outer_code == SET
34627 || outer_code == IOR
34628 || outer_code == XOR)
34629 && (INTVAL (x)
34630 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
34632 *total = COSTS_N_INSNS (1);
34633 return true;
34635 /* FALLTHRU */
34637 case CONST_DOUBLE:
34638 case CONST_WIDE_INT:
34639 case CONST:
34640 case HIGH:
34641 case SYMBOL_REF:
34642 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
34643 return true;
34645 case MEM:
34646 /* When optimizing for size, MEM should be slightly more expensive
34647 than generating address, e.g., (plus (reg) (const)).
34648 L1 cache latency is about two instructions. */
34649 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
34650 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
34651 *total += COSTS_N_INSNS (100);
34652 return true;
34654 case LABEL_REF:
34655 *total = 0;
34656 return true;
34658 case PLUS:
34659 case MINUS:
34660 if (FLOAT_MODE_P (mode))
34661 *total = rs6000_cost->fp;
34662 else
34663 *total = COSTS_N_INSNS (1);
34664 return false;
34666 case MULT:
34667 if (GET_CODE (XEXP (x, 1)) == CONST_INT
34668 && satisfies_constraint_I (XEXP (x, 1)))
34670 if (INTVAL (XEXP (x, 1)) >= -256
34671 && INTVAL (XEXP (x, 1)) <= 255)
34672 *total = rs6000_cost->mulsi_const9;
34673 else
34674 *total = rs6000_cost->mulsi_const;
34676 else if (mode == SFmode)
34677 *total = rs6000_cost->fp;
34678 else if (FLOAT_MODE_P (mode))
34679 *total = rs6000_cost->dmul;
34680 else if (mode == DImode)
34681 *total = rs6000_cost->muldi;
34682 else
34683 *total = rs6000_cost->mulsi;
34684 return false;
34686 case FMA:
34687 if (mode == SFmode)
34688 *total = rs6000_cost->fp;
34689 else
34690 *total = rs6000_cost->dmul;
34691 break;
34693 case DIV:
34694 case MOD:
34695 if (FLOAT_MODE_P (mode))
34697 *total = mode == DFmode ? rs6000_cost->ddiv
34698 : rs6000_cost->sdiv;
34699 return false;
34701 /* FALLTHRU */
34703 case UDIV:
34704 case UMOD:
34705 if (GET_CODE (XEXP (x, 1)) == CONST_INT
34706 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
34708 if (code == DIV || code == MOD)
34709 /* Shift, addze */
34710 *total = COSTS_N_INSNS (2);
34711 else
34712 /* Shift */
34713 *total = COSTS_N_INSNS (1);
34715 else
34717 if (GET_MODE (XEXP (x, 1)) == DImode)
34718 *total = rs6000_cost->divdi;
34719 else
34720 *total = rs6000_cost->divsi;
34722 /* Add in shift and subtract for MOD unless we have a mod instruction. */
34723 if (!TARGET_MODULO && (code == MOD || code == UMOD))
34724 *total += COSTS_N_INSNS (2);
34725 return false;
34727 case CTZ:
34728 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
34729 return false;
34731 case FFS:
34732 *total = COSTS_N_INSNS (4);
34733 return false;
34735 case POPCOUNT:
34736 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
34737 return false;
34739 case PARITY:
34740 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
34741 return false;
34743 case NOT:
34744 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
34745 *total = 0;
34746 else
34747 *total = COSTS_N_INSNS (1);
34748 return false;
34750 case AND:
34751 if (CONST_INT_P (XEXP (x, 1)))
34753 rtx left = XEXP (x, 0);
34754 rtx_code left_code = GET_CODE (left);
34756 /* rotate-and-mask: 1 insn. */
34757 if ((left_code == ROTATE
34758 || left_code == ASHIFT
34759 || left_code == LSHIFTRT)
34760 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
34762 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
34763 if (!CONST_INT_P (XEXP (left, 1)))
34764 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
34765 *total += COSTS_N_INSNS (1);
34766 return true;
34769 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
34770 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
34771 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
34772 || (val & 0xffff) == val
34773 || (val & 0xffff0000) == val
34774 || ((val & 0xffff) == 0 && mode == SImode))
34776 *total = rtx_cost (left, mode, AND, 0, speed);
34777 *total += COSTS_N_INSNS (1);
34778 return true;
34781 /* 2 insns. */
34782 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
34784 *total = rtx_cost (left, mode, AND, 0, speed);
34785 *total += COSTS_N_INSNS (2);
34786 return true;
34790 *total = COSTS_N_INSNS (1);
34791 return false;
34793 case IOR:
34794 /* FIXME */
34795 *total = COSTS_N_INSNS (1);
34796 return true;
34798 case CLZ:
34799 case XOR:
34800 case ZERO_EXTRACT:
34801 *total = COSTS_N_INSNS (1);
34802 return false;
34804 case ASHIFT:
34805 /* The EXTSWSLI instruction is a combined instruction. Don't count both
34806 the sign extend and shift separately within the insn. */
34807 if (TARGET_EXTSWSLI && mode == DImode
34808 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
34809 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
34811 *total = 0;
34812 return false;
34814 /* fall through */
34816 case ASHIFTRT:
34817 case LSHIFTRT:
34818 case ROTATE:
34819 case ROTATERT:
34820 /* Handle mul_highpart. */
34821 if (outer_code == TRUNCATE
34822 && GET_CODE (XEXP (x, 0)) == MULT)
34824 if (mode == DImode)
34825 *total = rs6000_cost->muldi;
34826 else
34827 *total = rs6000_cost->mulsi;
34828 return true;
34830 else if (outer_code == AND)
34831 *total = 0;
34832 else
34833 *total = COSTS_N_INSNS (1);
34834 return false;
34836 case SIGN_EXTEND:
34837 case ZERO_EXTEND:
34838 if (GET_CODE (XEXP (x, 0)) == MEM)
34839 *total = 0;
34840 else
34841 *total = COSTS_N_INSNS (1);
34842 return false;
34844 case COMPARE:
34845 case NEG:
34846 case ABS:
34847 if (!FLOAT_MODE_P (mode))
34849 *total = COSTS_N_INSNS (1);
34850 return false;
34852 /* FALLTHRU */
34854 case FLOAT:
34855 case UNSIGNED_FLOAT:
34856 case FIX:
34857 case UNSIGNED_FIX:
34858 case FLOAT_TRUNCATE:
34859 *total = rs6000_cost->fp;
34860 return false;
34862 case FLOAT_EXTEND:
34863 if (mode == DFmode)
34864 *total = rs6000_cost->sfdf_convert;
34865 else
34866 *total = rs6000_cost->fp;
34867 return false;
34869 case UNSPEC:
34870 switch (XINT (x, 1))
34872 case UNSPEC_FRSP:
34873 *total = rs6000_cost->fp;
34874 return true;
34876 default:
34877 break;
34879 break;
34881 case CALL:
34882 case IF_THEN_ELSE:
34883 if (!speed)
34885 *total = COSTS_N_INSNS (1);
34886 return true;
34888 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
34890 *total = rs6000_cost->fp;
34891 return false;
34893 break;
34895 case NE:
34896 case EQ:
34897 case GTU:
34898 case LTU:
34899 /* Carry bit requires mode == Pmode.
34900 NEG or PLUS already counted so only add one. */
34901 if (mode == Pmode
34902 && (outer_code == NEG || outer_code == PLUS))
34904 *total = COSTS_N_INSNS (1);
34905 return true;
34907 /* FALLTHRU */
34909 case GT:
34910 case LT:
34911 case UNORDERED:
34912 if (outer_code == SET)
34914 if (XEXP (x, 1) == const0_rtx)
34916 *total = COSTS_N_INSNS (2);
34917 return true;
34919 else
34921 *total = COSTS_N_INSNS (3);
34922 return false;
34925 /* CC COMPARE. */
34926 if (outer_code == COMPARE)
34928 *total = 0;
34929 return true;
34931 break;
34933 default:
34934 break;
34937 return false;
34940 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
34942 static bool
34943 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
34944 int opno, int *total, bool speed)
34946 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
34948 fprintf (stderr,
34949 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
34950 "opno = %d, total = %d, speed = %s, x:\n",
34951 ret ? "complete" : "scan inner",
34952 GET_MODE_NAME (mode),
34953 GET_RTX_NAME (outer_code),
34954 opno,
34955 *total,
34956 speed ? "true" : "false");
34958 debug_rtx (x);
34960 return ret;
34963 static int
34964 rs6000_insn_cost (rtx_insn *insn, bool speed)
34966 if (recog_memoized (insn) < 0)
34967 return 0;
34969 if (!speed)
34970 return get_attr_length (insn);
34972 int cost = get_attr_cost (insn);
34973 if (cost > 0)
34974 return cost;
34976 int n = get_attr_length (insn) / 4;
34977 enum attr_type type = get_attr_type (insn);
34979 switch (type)
34981 case TYPE_LOAD:
34982 case TYPE_FPLOAD:
34983 case TYPE_VECLOAD:
34984 cost = COSTS_N_INSNS (n + 1);
34985 break;
34987 case TYPE_MUL:
34988 switch (get_attr_size (insn))
34990 case SIZE_8:
34991 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
34992 break;
34993 case SIZE_16:
34994 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
34995 break;
34996 case SIZE_32:
34997 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
34998 break;
34999 case SIZE_64:
35000 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
35001 break;
35002 default:
35003 gcc_unreachable ();
35005 break;
35006 case TYPE_DIV:
35007 switch (get_attr_size (insn))
35009 case SIZE_32:
35010 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
35011 break;
35012 case SIZE_64:
35013 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
35014 break;
35015 default:
35016 gcc_unreachable ();
35018 break;
35020 case TYPE_FP:
35021 cost = n * rs6000_cost->fp;
35022 break;
35023 case TYPE_DMUL:
35024 cost = n * rs6000_cost->dmul;
35025 break;
35026 case TYPE_SDIV:
35027 cost = n * rs6000_cost->sdiv;
35028 break;
35029 case TYPE_DDIV:
35030 cost = n * rs6000_cost->ddiv;
35031 break;
35033 case TYPE_SYNC:
35034 case TYPE_LOAD_L:
35035 case TYPE_MFCR:
35036 case TYPE_MFCRF:
35037 cost = COSTS_N_INSNS (n + 2);
35038 break;
35040 default:
35041 cost = COSTS_N_INSNS (n);
35044 return cost;
35047 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
35049 static int
35050 rs6000_debug_address_cost (rtx x, machine_mode mode,
35051 addr_space_t as, bool speed)
35053 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
35055 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
35056 ret, speed ? "true" : "false");
35057 debug_rtx (x);
35059 return ret;
35063 /* A C expression returning the cost of moving data from a register of class
35064 CLASS1 to one of CLASS2. */
35066 static int
35067 rs6000_register_move_cost (machine_mode mode,
35068 reg_class_t from, reg_class_t to)
35070 int ret;
35072 if (TARGET_DEBUG_COST)
35073 dbg_cost_ctrl++;
35075 /* Moves from/to GENERAL_REGS. */
35076 if (reg_classes_intersect_p (to, GENERAL_REGS)
35077 || reg_classes_intersect_p (from, GENERAL_REGS))
35079 reg_class_t rclass = from;
35081 if (! reg_classes_intersect_p (to, GENERAL_REGS))
35082 rclass = to;
35084 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
35085 ret = (rs6000_memory_move_cost (mode, rclass, false)
35086 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
35088 /* It's more expensive to move CR_REGS than CR0_REGS because of the
35089 shift. */
35090 else if (rclass == CR_REGS)
35091 ret = 4;
35093 /* For those processors that have slow LR/CTR moves, make them more
35094 expensive than memory in order to bias spills to memory .*/
35095 else if ((rs6000_tune == PROCESSOR_POWER6
35096 || rs6000_tune == PROCESSOR_POWER7
35097 || rs6000_tune == PROCESSOR_POWER8
35098 || rs6000_tune == PROCESSOR_POWER9)
35099 && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
35100 ret = 6 * hard_regno_nregs (0, mode);
35102 else
35103 /* A move will cost one instruction per GPR moved. */
35104 ret = 2 * hard_regno_nregs (0, mode);
35107 /* If we have VSX, we can easily move between FPR or Altivec registers. */
35108 else if (VECTOR_MEM_VSX_P (mode)
35109 && reg_classes_intersect_p (to, VSX_REGS)
35110 && reg_classes_intersect_p (from, VSX_REGS))
35111 ret = 2 * hard_regno_nregs (FIRST_FPR_REGNO, mode);
35113 /* Moving between two similar registers is just one instruction. */
35114 else if (reg_classes_intersect_p (to, from))
35115 ret = (FLOAT128_2REG_P (mode)) ? 4 : 2;
35117 /* Everything else has to go through GENERAL_REGS. */
35118 else
35119 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
35120 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
35122 if (TARGET_DEBUG_COST)
35124 if (dbg_cost_ctrl == 1)
35125 fprintf (stderr,
35126 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
35127 ret, GET_MODE_NAME (mode), reg_class_names[from],
35128 reg_class_names[to]);
35129 dbg_cost_ctrl--;
35132 return ret;
35135 /* A C expressions returning the cost of moving data of MODE from a register to
35136 or from memory. */
35138 static int
35139 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
35140 bool in ATTRIBUTE_UNUSED)
35142 int ret;
35144 if (TARGET_DEBUG_COST)
35145 dbg_cost_ctrl++;
35147 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
35148 ret = 4 * hard_regno_nregs (0, mode);
35149 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
35150 || reg_classes_intersect_p (rclass, VSX_REGS)))
35151 ret = 4 * hard_regno_nregs (32, mode);
35152 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
35153 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
35154 else
35155 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
35157 if (TARGET_DEBUG_COST)
35159 if (dbg_cost_ctrl == 1)
35160 fprintf (stderr,
35161 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
35162 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
35163 dbg_cost_ctrl--;
35166 return ret;
35169 /* Returns a code for a target-specific builtin that implements
35170 reciprocal of the function, or NULL_TREE if not available. */
35172 static tree
35173 rs6000_builtin_reciprocal (tree fndecl)
35175 switch (DECL_FUNCTION_CODE (fndecl))
35177 case VSX_BUILTIN_XVSQRTDP:
35178 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
35179 return NULL_TREE;
35181 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
35183 case VSX_BUILTIN_XVSQRTSP:
35184 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
35185 return NULL_TREE;
35187 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
35189 default:
35190 return NULL_TREE;
35194 /* Load up a constant. If the mode is a vector mode, splat the value across
35195 all of the vector elements. */
35197 static rtx
35198 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
35200 rtx reg;
35202 if (mode == SFmode || mode == DFmode)
35204 rtx d = const_double_from_real_value (dconst, mode);
35205 reg = force_reg (mode, d);
35207 else if (mode == V4SFmode)
35209 rtx d = const_double_from_real_value (dconst, SFmode);
35210 rtvec v = gen_rtvec (4, d, d, d, d);
35211 reg = gen_reg_rtx (mode);
35212 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
35214 else if (mode == V2DFmode)
35216 rtx d = const_double_from_real_value (dconst, DFmode);
35217 rtvec v = gen_rtvec (2, d, d);
35218 reg = gen_reg_rtx (mode);
35219 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
35221 else
35222 gcc_unreachable ();
35224 return reg;
35227 /* Generate an FMA instruction. */
35229 static void
35230 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
35232 machine_mode mode = GET_MODE (target);
35233 rtx dst;
35235 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
35236 gcc_assert (dst != NULL);
35238 if (dst != target)
35239 emit_move_insn (target, dst);
35242 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
35244 static void
35245 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
35247 machine_mode mode = GET_MODE (dst);
35248 rtx r;
35250 /* This is a tad more complicated, since the fnma_optab is for
35251 a different expression: fma(-m1, m2, a), which is the same
35252 thing except in the case of signed zeros.
35254 Fortunately we know that if FMA is supported that FNMSUB is
35255 also supported in the ISA. Just expand it directly. */
35257 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
35259 r = gen_rtx_NEG (mode, a);
35260 r = gen_rtx_FMA (mode, m1, m2, r);
35261 r = gen_rtx_NEG (mode, r);
35262 emit_insn (gen_rtx_SET (dst, r));
35265 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
35266 add a reg_note saying that this was a division. Support both scalar and
35267 vector divide. Assumes no trapping math and finite arguments. */
35269 void
35270 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
35272 machine_mode mode = GET_MODE (dst);
35273 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
35274 int i;
35276 /* Low precision estimates guarantee 5 bits of accuracy. High
35277 precision estimates guarantee 14 bits of accuracy. SFmode
35278 requires 23 bits of accuracy. DFmode requires 52 bits of
35279 accuracy. Each pass at least doubles the accuracy, leading
35280 to the following. */
35281 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
35282 if (mode == DFmode || mode == V2DFmode)
35283 passes++;
35285 enum insn_code code = optab_handler (smul_optab, mode);
35286 insn_gen_fn gen_mul = GEN_FCN (code);
35288 gcc_assert (code != CODE_FOR_nothing);
35290 one = rs6000_load_constant_and_splat (mode, dconst1);
35292 /* x0 = 1./d estimate */
35293 x0 = gen_reg_rtx (mode);
35294 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
35295 UNSPEC_FRES)));
35297 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
35298 if (passes > 1) {
35300 /* e0 = 1. - d * x0 */
35301 e0 = gen_reg_rtx (mode);
35302 rs6000_emit_nmsub (e0, d, x0, one);
35304 /* x1 = x0 + e0 * x0 */
35305 x1 = gen_reg_rtx (mode);
35306 rs6000_emit_madd (x1, e0, x0, x0);
35308 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
35309 ++i, xprev = xnext, eprev = enext) {
35311 /* enext = eprev * eprev */
35312 enext = gen_reg_rtx (mode);
35313 emit_insn (gen_mul (enext, eprev, eprev));
35315 /* xnext = xprev + enext * xprev */
35316 xnext = gen_reg_rtx (mode);
35317 rs6000_emit_madd (xnext, enext, xprev, xprev);
35320 } else
35321 xprev = x0;
35323 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
35325 /* u = n * xprev */
35326 u = gen_reg_rtx (mode);
35327 emit_insn (gen_mul (u, n, xprev));
35329 /* v = n - (d * u) */
35330 v = gen_reg_rtx (mode);
35331 rs6000_emit_nmsub (v, d, u, n);
35333 /* dst = (v * xprev) + u */
35334 rs6000_emit_madd (dst, v, xprev, u);
35336 if (note_p)
35337 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
35340 /* Goldschmidt's Algorithm for single/double-precision floating point
35341 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
35343 void
35344 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
35346 machine_mode mode = GET_MODE (src);
35347 rtx e = gen_reg_rtx (mode);
35348 rtx g = gen_reg_rtx (mode);
35349 rtx h = gen_reg_rtx (mode);
35351 /* Low precision estimates guarantee 5 bits of accuracy. High
35352 precision estimates guarantee 14 bits of accuracy. SFmode
35353 requires 23 bits of accuracy. DFmode requires 52 bits of
35354 accuracy. Each pass at least doubles the accuracy, leading
35355 to the following. */
35356 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
35357 if (mode == DFmode || mode == V2DFmode)
35358 passes++;
35360 int i;
35361 rtx mhalf;
35362 enum insn_code code = optab_handler (smul_optab, mode);
35363 insn_gen_fn gen_mul = GEN_FCN (code);
35365 gcc_assert (code != CODE_FOR_nothing);
35367 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
35369 /* e = rsqrt estimate */
35370 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
35371 UNSPEC_RSQRT)));
35373 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
35374 if (!recip)
35376 rtx zero = force_reg (mode, CONST0_RTX (mode));
35378 if (mode == SFmode)
35380 rtx target = emit_conditional_move (e, GT, src, zero, mode,
35381 e, zero, mode, 0);
35382 if (target != e)
35383 emit_move_insn (e, target);
35385 else
35387 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
35388 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
35392 /* g = sqrt estimate. */
35393 emit_insn (gen_mul (g, e, src));
35394 /* h = 1/(2*sqrt) estimate. */
35395 emit_insn (gen_mul (h, e, mhalf));
35397 if (recip)
35399 if (passes == 1)
35401 rtx t = gen_reg_rtx (mode);
35402 rs6000_emit_nmsub (t, g, h, mhalf);
35403 /* Apply correction directly to 1/rsqrt estimate. */
35404 rs6000_emit_madd (dst, e, t, e);
35406 else
35408 for (i = 0; i < passes; i++)
35410 rtx t1 = gen_reg_rtx (mode);
35411 rtx g1 = gen_reg_rtx (mode);
35412 rtx h1 = gen_reg_rtx (mode);
35414 rs6000_emit_nmsub (t1, g, h, mhalf);
35415 rs6000_emit_madd (g1, g, t1, g);
35416 rs6000_emit_madd (h1, h, t1, h);
35418 g = g1;
35419 h = h1;
35421 /* Multiply by 2 for 1/rsqrt. */
35422 emit_insn (gen_add3_insn (dst, h, h));
35425 else
35427 rtx t = gen_reg_rtx (mode);
35428 rs6000_emit_nmsub (t, g, h, mhalf);
35429 rs6000_emit_madd (dst, g, t, g);
35432 return;
35435 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
35436 (Power7) targets. DST is the target, and SRC is the argument operand. */
35438 void
35439 rs6000_emit_popcount (rtx dst, rtx src)
35441 machine_mode mode = GET_MODE (dst);
35442 rtx tmp1, tmp2;
35444 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
35445 if (TARGET_POPCNTD)
35447 if (mode == SImode)
35448 emit_insn (gen_popcntdsi2 (dst, src));
35449 else
35450 emit_insn (gen_popcntddi2 (dst, src));
35451 return;
35454 tmp1 = gen_reg_rtx (mode);
35456 if (mode == SImode)
35458 emit_insn (gen_popcntbsi2 (tmp1, src));
35459 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
35460 NULL_RTX, 0);
35461 tmp2 = force_reg (SImode, tmp2);
35462 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
35464 else
35466 emit_insn (gen_popcntbdi2 (tmp1, src));
35467 tmp2 = expand_mult (DImode, tmp1,
35468 GEN_INT ((HOST_WIDE_INT)
35469 0x01010101 << 32 | 0x01010101),
35470 NULL_RTX, 0);
35471 tmp2 = force_reg (DImode, tmp2);
35472 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
35477 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
35478 target, and SRC is the argument operand. */
35480 void
35481 rs6000_emit_parity (rtx dst, rtx src)
35483 machine_mode mode = GET_MODE (dst);
35484 rtx tmp;
35486 tmp = gen_reg_rtx (mode);
35488 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
35489 if (TARGET_CMPB)
35491 if (mode == SImode)
35493 emit_insn (gen_popcntbsi2 (tmp, src));
35494 emit_insn (gen_paritysi2_cmpb (dst, tmp));
35496 else
35498 emit_insn (gen_popcntbdi2 (tmp, src));
35499 emit_insn (gen_paritydi2_cmpb (dst, tmp));
35501 return;
35504 if (mode == SImode)
35506 /* Is mult+shift >= shift+xor+shift+xor? */
35507 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
35509 rtx tmp1, tmp2, tmp3, tmp4;
35511 tmp1 = gen_reg_rtx (SImode);
35512 emit_insn (gen_popcntbsi2 (tmp1, src));
35514 tmp2 = gen_reg_rtx (SImode);
35515 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
35516 tmp3 = gen_reg_rtx (SImode);
35517 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
35519 tmp4 = gen_reg_rtx (SImode);
35520 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
35521 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
35523 else
35524 rs6000_emit_popcount (tmp, src);
35525 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
35527 else
35529 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
35530 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
35532 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
35534 tmp1 = gen_reg_rtx (DImode);
35535 emit_insn (gen_popcntbdi2 (tmp1, src));
35537 tmp2 = gen_reg_rtx (DImode);
35538 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
35539 tmp3 = gen_reg_rtx (DImode);
35540 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
35542 tmp4 = gen_reg_rtx (DImode);
35543 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
35544 tmp5 = gen_reg_rtx (DImode);
35545 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
35547 tmp6 = gen_reg_rtx (DImode);
35548 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
35549 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
35551 else
35552 rs6000_emit_popcount (tmp, src);
35553 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
35557 /* Expand an Altivec constant permutation for little endian mode.
35558 OP0 and OP1 are the input vectors and TARGET is the output vector.
35559 SEL specifies the constant permutation vector.
35561 There are two issues: First, the two input operands must be
35562 swapped so that together they form a double-wide array in LE
35563 order. Second, the vperm instruction has surprising behavior
35564 in LE mode: it interprets the elements of the source vectors
35565 in BE mode ("left to right") and interprets the elements of
35566 the destination vector in LE mode ("right to left"). To
35567 correct for this, we must subtract each element of the permute
35568 control vector from 31.
35570 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
35571 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
35572 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
35573 serve as the permute control vector. Then, in BE mode,
35575 vperm 9,10,11,12
35577 places the desired result in vr9. However, in LE mode the
35578 vector contents will be
35580 vr10 = 00000003 00000002 00000001 00000000
35581 vr11 = 00000007 00000006 00000005 00000004
35583 The result of the vperm using the same permute control vector is
35585 vr9 = 05000000 07000000 01000000 03000000
35587 That is, the leftmost 4 bytes of vr10 are interpreted as the
35588 source for the rightmost 4 bytes of vr9, and so on.
35590 If we change the permute control vector to
35592 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
35594 and issue
35596 vperm 9,11,10,12
35598 we get the desired
35600 vr9 = 00000006 00000004 00000002 00000000. */
35602 static void
35603 altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
35604 const vec_perm_indices &sel)
35606 unsigned int i;
35607 rtx perm[16];
35608 rtx constv, unspec;
35610 /* Unpack and adjust the constant selector. */
35611 for (i = 0; i < 16; ++i)
35613 unsigned int elt = 31 - (sel[i] & 31);
35614 perm[i] = GEN_INT (elt);
35617 /* Expand to a permute, swapping the inputs and using the
35618 adjusted selector. */
35619 if (!REG_P (op0))
35620 op0 = force_reg (V16QImode, op0);
35621 if (!REG_P (op1))
35622 op1 = force_reg (V16QImode, op1);
35624 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
35625 constv = force_reg (V16QImode, constv);
35626 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
35627 UNSPEC_VPERM);
35628 if (!REG_P (target))
35630 rtx tmp = gen_reg_rtx (V16QImode);
35631 emit_move_insn (tmp, unspec);
35632 unspec = tmp;
35635 emit_move_insn (target, unspec);
35638 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
35639 permute control vector. But here it's not a constant, so we must
35640 generate a vector NAND or NOR to do the adjustment. */
35642 void
35643 altivec_expand_vec_perm_le (rtx operands[4])
35645 rtx notx, iorx, unspec;
35646 rtx target = operands[0];
35647 rtx op0 = operands[1];
35648 rtx op1 = operands[2];
35649 rtx sel = operands[3];
35650 rtx tmp = target;
35651 rtx norreg = gen_reg_rtx (V16QImode);
35652 machine_mode mode = GET_MODE (target);
35654 /* Get everything in regs so the pattern matches. */
35655 if (!REG_P (op0))
35656 op0 = force_reg (mode, op0);
35657 if (!REG_P (op1))
35658 op1 = force_reg (mode, op1);
35659 if (!REG_P (sel))
35660 sel = force_reg (V16QImode, sel);
35661 if (!REG_P (target))
35662 tmp = gen_reg_rtx (mode);
35664 if (TARGET_P9_VECTOR)
35666 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel),
35667 UNSPEC_VPERMR);
35669 else
35671 /* Invert the selector with a VNAND if available, else a VNOR.
35672 The VNAND is preferred for future fusion opportunities. */
35673 notx = gen_rtx_NOT (V16QImode, sel);
35674 iorx = (TARGET_P8_VECTOR
35675 ? gen_rtx_IOR (V16QImode, notx, notx)
35676 : gen_rtx_AND (V16QImode, notx, notx));
35677 emit_insn (gen_rtx_SET (norreg, iorx));
35679 /* Permute with operands reversed and adjusted selector. */
35680 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
35681 UNSPEC_VPERM);
35684 /* Copy into target, possibly by way of a register. */
35685 if (!REG_P (target))
35687 emit_move_insn (tmp, unspec);
35688 unspec = tmp;
35691 emit_move_insn (target, unspec);
35694 /* Expand an Altivec constant permutation. Return true if we match
35695 an efficient implementation; false to fall back to VPERM.
35697 OP0 and OP1 are the input vectors and TARGET is the output vector.
35698 SEL specifies the constant permutation vector. */
35700 static bool
35701 altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
35702 const vec_perm_indices &sel)
35704 struct altivec_perm_insn {
35705 HOST_WIDE_INT mask;
35706 enum insn_code impl;
35707 unsigned char perm[16];
35709 static const struct altivec_perm_insn patterns[] = {
35710 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
35711 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
35712 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
35713 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
35714 { OPTION_MASK_ALTIVEC,
35715 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
35716 : CODE_FOR_altivec_vmrglb_direct),
35717 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
35718 { OPTION_MASK_ALTIVEC,
35719 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
35720 : CODE_FOR_altivec_vmrglh_direct),
35721 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
35722 { OPTION_MASK_ALTIVEC,
35723 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
35724 : CODE_FOR_altivec_vmrglw_direct),
35725 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
35726 { OPTION_MASK_ALTIVEC,
35727 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
35728 : CODE_FOR_altivec_vmrghb_direct),
35729 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
35730 { OPTION_MASK_ALTIVEC,
35731 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
35732 : CODE_FOR_altivec_vmrghh_direct),
35733 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
35734 { OPTION_MASK_ALTIVEC,
35735 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
35736 : CODE_FOR_altivec_vmrghw_direct),
35737 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
35738 { OPTION_MASK_P8_VECTOR,
35739 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
35740 : CODE_FOR_p8_vmrgow_v4sf_direct),
35741 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
35742 { OPTION_MASK_P8_VECTOR,
35743 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct
35744 : CODE_FOR_p8_vmrgew_v4sf_direct),
35745 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
35748 unsigned int i, j, elt, which;
35749 unsigned char perm[16];
35750 rtx x;
35751 bool one_vec;
35753 /* Unpack the constant selector. */
35754 for (i = which = 0; i < 16; ++i)
35756 elt = sel[i] & 31;
35757 which |= (elt < 16 ? 1 : 2);
35758 perm[i] = elt;
35761 /* Simplify the constant selector based on operands. */
35762 switch (which)
35764 default:
35765 gcc_unreachable ();
35767 case 3:
35768 one_vec = false;
35769 if (!rtx_equal_p (op0, op1))
35770 break;
35771 /* FALLTHRU */
35773 case 2:
35774 for (i = 0; i < 16; ++i)
35775 perm[i] &= 15;
35776 op0 = op1;
35777 one_vec = true;
35778 break;
35780 case 1:
35781 op1 = op0;
35782 one_vec = true;
35783 break;
35786 /* Look for splat patterns. */
35787 if (one_vec)
35789 elt = perm[0];
35791 for (i = 0; i < 16; ++i)
35792 if (perm[i] != elt)
35793 break;
35794 if (i == 16)
35796 if (!BYTES_BIG_ENDIAN)
35797 elt = 15 - elt;
35798 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
35799 return true;
35802 if (elt % 2 == 0)
35804 for (i = 0; i < 16; i += 2)
35805 if (perm[i] != elt || perm[i + 1] != elt + 1)
35806 break;
35807 if (i == 16)
35809 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
35810 x = gen_reg_rtx (V8HImode);
35811 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
35812 GEN_INT (field)));
35813 emit_move_insn (target, gen_lowpart (V16QImode, x));
35814 return true;
35818 if (elt % 4 == 0)
35820 for (i = 0; i < 16; i += 4)
35821 if (perm[i] != elt
35822 || perm[i + 1] != elt + 1
35823 || perm[i + 2] != elt + 2
35824 || perm[i + 3] != elt + 3)
35825 break;
35826 if (i == 16)
35828 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
35829 x = gen_reg_rtx (V4SImode);
35830 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
35831 GEN_INT (field)));
35832 emit_move_insn (target, gen_lowpart (V16QImode, x));
35833 return true;
35838 /* Look for merge and pack patterns. */
35839 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
35841 bool swapped;
35843 if ((patterns[j].mask & rs6000_isa_flags) == 0)
35844 continue;
35846 elt = patterns[j].perm[0];
35847 if (perm[0] == elt)
35848 swapped = false;
35849 else if (perm[0] == elt + 16)
35850 swapped = true;
35851 else
35852 continue;
35853 for (i = 1; i < 16; ++i)
35855 elt = patterns[j].perm[i];
35856 if (swapped)
35857 elt = (elt >= 16 ? elt - 16 : elt + 16);
35858 else if (one_vec && elt >= 16)
35859 elt -= 16;
35860 if (perm[i] != elt)
35861 break;
35863 if (i == 16)
35865 enum insn_code icode = patterns[j].impl;
35866 machine_mode omode = insn_data[icode].operand[0].mode;
35867 machine_mode imode = insn_data[icode].operand[1].mode;
35869 /* For little-endian, don't use vpkuwum and vpkuhum if the
35870 underlying vector type is not V4SI and V8HI, respectively.
35871 For example, using vpkuwum with a V8HI picks up the even
35872 halfwords (BE numbering) when the even halfwords (LE
35873 numbering) are what we need. */
35874 if (!BYTES_BIG_ENDIAN
35875 && icode == CODE_FOR_altivec_vpkuwum_direct
35876 && ((GET_CODE (op0) == REG
35877 && GET_MODE (op0) != V4SImode)
35878 || (GET_CODE (op0) == SUBREG
35879 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
35880 continue;
35881 if (!BYTES_BIG_ENDIAN
35882 && icode == CODE_FOR_altivec_vpkuhum_direct
35883 && ((GET_CODE (op0) == REG
35884 && GET_MODE (op0) != V8HImode)
35885 || (GET_CODE (op0) == SUBREG
35886 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
35887 continue;
35889 /* For little-endian, the two input operands must be swapped
35890 (or swapped back) to ensure proper right-to-left numbering
35891 from 0 to 2N-1. */
35892 if (swapped ^ !BYTES_BIG_ENDIAN)
35893 std::swap (op0, op1);
35894 if (imode != V16QImode)
35896 op0 = gen_lowpart (imode, op0);
35897 op1 = gen_lowpart (imode, op1);
35899 if (omode == V16QImode)
35900 x = target;
35901 else
35902 x = gen_reg_rtx (omode);
35903 emit_insn (GEN_FCN (icode) (x, op0, op1));
35904 if (omode != V16QImode)
35905 emit_move_insn (target, gen_lowpart (V16QImode, x));
35906 return true;
35910 if (!BYTES_BIG_ENDIAN)
35912 altivec_expand_vec_perm_const_le (target, op0, op1, sel);
35913 return true;
35916 return false;
35919 /* Expand a Paired Single or VSX Permute Doubleword constant permutation.
35920 Return true if we match an efficient implementation. */
35922 static bool
35923 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
35924 unsigned char perm0, unsigned char perm1)
35926 rtx x;
35928 /* If both selectors come from the same operand, fold to single op. */
35929 if ((perm0 & 2) == (perm1 & 2))
35931 if (perm0 & 2)
35932 op0 = op1;
35933 else
35934 op1 = op0;
35936 /* If both operands are equal, fold to simpler permutation. */
35937 if (rtx_equal_p (op0, op1))
35939 perm0 = perm0 & 1;
35940 perm1 = (perm1 & 1) + 2;
35942 /* If the first selector comes from the second operand, swap. */
35943 else if (perm0 & 2)
35945 if (perm1 & 2)
35946 return false;
35947 perm0 -= 2;
35948 perm1 += 2;
35949 std::swap (op0, op1);
35951 /* If the second selector does not come from the second operand, fail. */
35952 else if ((perm1 & 2) == 0)
35953 return false;
35955 /* Success! */
35956 if (target != NULL)
35958 machine_mode vmode, dmode;
35959 rtvec v;
35961 vmode = GET_MODE (target);
35962 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
35963 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
35964 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
35965 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
35966 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
35967 emit_insn (gen_rtx_SET (target, x));
35969 return true;
35972 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
35974 static bool
35975 rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
35976 rtx op1, const vec_perm_indices &sel)
35978 bool testing_p = !target;
35980 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
35981 if (TARGET_ALTIVEC && testing_p)
35982 return true;
35984 /* Check for ps_merge* or xxpermdi insns. */
35985 if ((vmode == V2SFmode && TARGET_PAIRED_FLOAT)
35986 || ((vmode == V2DFmode || vmode == V2DImode)
35987 && VECTOR_MEM_VSX_P (vmode)))
35989 if (testing_p)
35991 op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
35992 op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
35994 if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
35995 return true;
35998 if (TARGET_ALTIVEC)
36000 /* Force the target-independent code to lower to V16QImode. */
36001 if (vmode != V16QImode)
36002 return false;
36003 if (altivec_expand_vec_perm_const (target, op0, op1, sel))
36004 return true;
36007 return false;
36010 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
36011 OP0 and OP1 are the input vectors and TARGET is the output vector.
36012 PERM specifies the constant permutation vector. */
36014 static void
36015 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
36016 machine_mode vmode, const vec_perm_builder &perm)
36018 rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
36019 if (x != target)
36020 emit_move_insn (target, x);
36023 /* Expand an extract even operation. */
36025 void
36026 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
36028 machine_mode vmode = GET_MODE (target);
36029 unsigned i, nelt = GET_MODE_NUNITS (vmode);
36030 vec_perm_builder perm (nelt, nelt, 1);
36032 for (i = 0; i < nelt; i++)
36033 perm.quick_push (i * 2);
36035 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
36038 /* Expand a vector interleave operation. */
36040 void
36041 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
36043 machine_mode vmode = GET_MODE (target);
36044 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
36045 vec_perm_builder perm (nelt, nelt, 1);
36047 high = (highp ? 0 : nelt / 2);
36048 for (i = 0; i < nelt / 2; i++)
36050 perm.quick_push (i + high);
36051 perm.quick_push (i + nelt + high);
36054 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
36057 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
36058 void
36059 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
36061 HOST_WIDE_INT hwi_scale (scale);
36062 REAL_VALUE_TYPE r_pow;
36063 rtvec v = rtvec_alloc (2);
36064 rtx elt;
36065 rtx scale_vec = gen_reg_rtx (V2DFmode);
36066 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
36067 elt = const_double_from_real_value (r_pow, DFmode);
36068 RTVEC_ELT (v, 0) = elt;
36069 RTVEC_ELT (v, 1) = elt;
36070 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
36071 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
36074 /* Return an RTX representing where to find the function value of a
36075 function returning MODE. */
36076 static rtx
36077 rs6000_complex_function_value (machine_mode mode)
36079 unsigned int regno;
36080 rtx r1, r2;
36081 machine_mode inner = GET_MODE_INNER (mode);
36082 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
36084 if (TARGET_FLOAT128_TYPE
36085 && (mode == KCmode
36086 || (mode == TCmode && TARGET_IEEEQUAD)))
36087 regno = ALTIVEC_ARG_RETURN;
36089 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
36090 regno = FP_ARG_RETURN;
36092 else
36094 regno = GP_ARG_RETURN;
36096 /* 32-bit is OK since it'll go in r3/r4. */
36097 if (TARGET_32BIT && inner_bytes >= 4)
36098 return gen_rtx_REG (mode, regno);
36101 if (inner_bytes >= 8)
36102 return gen_rtx_REG (mode, regno);
36104 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
36105 const0_rtx);
36106 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
36107 GEN_INT (inner_bytes));
36108 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
36111 /* Return an rtx describing a return value of MODE as a PARALLEL
36112 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
36113 stride REG_STRIDE. */
36115 static rtx
36116 rs6000_parallel_return (machine_mode mode,
36117 int n_elts, machine_mode elt_mode,
36118 unsigned int regno, unsigned int reg_stride)
36120 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
36122 int i;
36123 for (i = 0; i < n_elts; i++)
36125 rtx r = gen_rtx_REG (elt_mode, regno);
36126 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
36127 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
36128 regno += reg_stride;
36131 return par;
36134 /* Target hook for TARGET_FUNCTION_VALUE.
36136 An integer value is in r3 and a floating-point value is in fp1,
36137 unless -msoft-float. */
36139 static rtx
36140 rs6000_function_value (const_tree valtype,
36141 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
36142 bool outgoing ATTRIBUTE_UNUSED)
36144 machine_mode mode;
36145 unsigned int regno;
36146 machine_mode elt_mode;
36147 int n_elts;
36149 /* Special handling for structs in darwin64. */
36150 if (TARGET_MACHO
36151 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
36153 CUMULATIVE_ARGS valcum;
36154 rtx valret;
36156 valcum.words = 0;
36157 valcum.fregno = FP_ARG_MIN_REG;
36158 valcum.vregno = ALTIVEC_ARG_MIN_REG;
36159 /* Do a trial code generation as if this were going to be passed as
36160 an argument; if any part goes in memory, we return NULL. */
36161 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
36162 if (valret)
36163 return valret;
36164 /* Otherwise fall through to standard ABI rules. */
36167 mode = TYPE_MODE (valtype);
36169 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
36170 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
36172 int first_reg, n_regs;
36174 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
36176 /* _Decimal128 must use even/odd register pairs. */
36177 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
36178 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
36180 else
36182 first_reg = ALTIVEC_ARG_RETURN;
36183 n_regs = 1;
36186 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
36189 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
36190 if (TARGET_32BIT && TARGET_POWERPC64)
36191 switch (mode)
36193 default:
36194 break;
36195 case E_DImode:
36196 case E_SCmode:
36197 case E_DCmode:
36198 case E_TCmode:
36199 int count = GET_MODE_SIZE (mode) / 4;
36200 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
36203 if ((INTEGRAL_TYPE_P (valtype)
36204 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
36205 || POINTER_TYPE_P (valtype))
36206 mode = TARGET_32BIT ? SImode : DImode;
36208 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
36209 /* _Decimal128 must use an even/odd register pair. */
36210 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
36211 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
36212 && !FLOAT128_VECTOR_P (mode)
36213 && ((TARGET_SINGLE_FLOAT && (mode == SFmode)) || TARGET_DOUBLE_FLOAT))
36214 regno = FP_ARG_RETURN;
36215 else if (TREE_CODE (valtype) == COMPLEX_TYPE
36216 && targetm.calls.split_complex_arg)
36217 return rs6000_complex_function_value (mode);
36218 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
36219 return register is used in both cases, and we won't see V2DImode/V2DFmode
36220 for pure altivec, combine the two cases. */
36221 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
36222 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
36223 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
36224 regno = ALTIVEC_ARG_RETURN;
36225 else
36226 regno = GP_ARG_RETURN;
36228 return gen_rtx_REG (mode, regno);
36231 /* Define how to find the value returned by a library function
36232 assuming the value has mode MODE. */
36234 rs6000_libcall_value (machine_mode mode)
36236 unsigned int regno;
36238 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
36239 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
36240 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
36242 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
36243 /* _Decimal128 must use an even/odd register pair. */
36244 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
36245 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode)
36246 && TARGET_HARD_FLOAT
36247 && ((TARGET_SINGLE_FLOAT && mode == SFmode) || TARGET_DOUBLE_FLOAT))
36248 regno = FP_ARG_RETURN;
36249 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
36250 return register is used in both cases, and we won't see V2DImode/V2DFmode
36251 for pure altivec, combine the two cases. */
36252 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
36253 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
36254 regno = ALTIVEC_ARG_RETURN;
36255 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
36256 return rs6000_complex_function_value (mode);
36257 else
36258 regno = GP_ARG_RETURN;
36260 return gen_rtx_REG (mode, regno);
36263 /* Compute register pressure classes. We implement the target hook to avoid
36264 IRA picking something like NON_SPECIAL_REGS as a pressure class, which can
36265 lead to incorrect estimates of number of available registers and therefor
36266 increased register pressure/spill. */
36267 static int
36268 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
36270 int n;
36272 n = 0;
36273 pressure_classes[n++] = GENERAL_REGS;
36274 if (TARGET_VSX)
36275 pressure_classes[n++] = VSX_REGS;
36276 else
36278 if (TARGET_ALTIVEC)
36279 pressure_classes[n++] = ALTIVEC_REGS;
36280 if (TARGET_HARD_FLOAT)
36281 pressure_classes[n++] = FLOAT_REGS;
36283 pressure_classes[n++] = CR_REGS;
36284 pressure_classes[n++] = SPECIAL_REGS;
36286 return n;
36289 /* Given FROM and TO register numbers, say whether this elimination is allowed.
36290 Frame pointer elimination is automatically handled.
36292 For the RS/6000, if frame pointer elimination is being done, we would like
36293 to convert ap into fp, not sp.
36295 We need r30 if -mminimal-toc was specified, and there are constant pool
36296 references. */
36298 static bool
36299 rs6000_can_eliminate (const int from, const int to)
36301 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
36302 ? ! frame_pointer_needed
36303 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
36304 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC
36305 || constant_pool_empty_p ()
36306 : true);
36309 /* Define the offset between two registers, FROM to be eliminated and its
36310 replacement TO, at the start of a routine. */
36311 HOST_WIDE_INT
36312 rs6000_initial_elimination_offset (int from, int to)
36314 rs6000_stack_t *info = rs6000_stack_info ();
36315 HOST_WIDE_INT offset;
36317 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
36318 offset = info->push_p ? 0 : -info->total_size;
36319 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
36321 offset = info->push_p ? 0 : -info->total_size;
36322 if (FRAME_GROWS_DOWNWARD)
36323 offset += info->fixed_size + info->vars_size + info->parm_size;
36325 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
36326 offset = FRAME_GROWS_DOWNWARD
36327 ? info->fixed_size + info->vars_size + info->parm_size
36328 : 0;
36329 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
36330 offset = info->total_size;
36331 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
36332 offset = info->push_p ? info->total_size : 0;
36333 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
36334 offset = 0;
36335 else
36336 gcc_unreachable ();
36338 return offset;
36341 /* Fill in sizes of registers used by unwinder. */
36343 static void
36344 rs6000_init_dwarf_reg_sizes_extra (tree address)
36346 if (TARGET_MACHO && ! TARGET_ALTIVEC)
36348 int i;
36349 machine_mode mode = TYPE_MODE (char_type_node);
36350 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
36351 rtx mem = gen_rtx_MEM (BLKmode, addr);
36352 rtx value = gen_int_mode (16, mode);
36354 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
36355 The unwinder still needs to know the size of Altivec registers. */
36357 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
36359 int column = DWARF_REG_TO_UNWIND_COLUMN
36360 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
36361 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
36363 emit_move_insn (adjust_address (mem, mode, offset), value);
36368 /* Map internal gcc register numbers to debug format register numbers.
36369 FORMAT specifies the type of debug register number to use:
36370 0 -- debug information, except for frame-related sections
36371 1 -- DWARF .debug_frame section
36372 2 -- DWARF .eh_frame section */
36374 unsigned int
36375 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
36377 /* Except for the above, we use the internal number for non-DWARF
36378 debug information, and also for .eh_frame. */
36379 if ((format == 0 && write_symbols != DWARF2_DEBUG) || format == 2)
36380 return regno;
36382 /* On some platforms, we use the standard DWARF register
36383 numbering for .debug_info and .debug_frame. */
36384 #ifdef RS6000_USE_DWARF_NUMBERING
36385 if (regno <= 63)
36386 return regno;
36387 if (regno == LR_REGNO)
36388 return 108;
36389 if (regno == CTR_REGNO)
36390 return 109;
36391 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
36392 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
36393 The actual code emitted saves the whole of CR, so we map CR2_REGNO
36394 to the DWARF reg for CR. */
36395 if (format == 1 && regno == CR2_REGNO)
36396 return 64;
36397 if (CR_REGNO_P (regno))
36398 return regno - CR0_REGNO + 86;
36399 if (regno == CA_REGNO)
36400 return 101; /* XER */
36401 if (ALTIVEC_REGNO_P (regno))
36402 return regno - FIRST_ALTIVEC_REGNO + 1124;
36403 if (regno == VRSAVE_REGNO)
36404 return 356;
36405 if (regno == VSCR_REGNO)
36406 return 67;
36407 #endif
36408 return regno;
36411 /* target hook eh_return_filter_mode */
36412 static scalar_int_mode
36413 rs6000_eh_return_filter_mode (void)
36415 return TARGET_32BIT ? SImode : word_mode;
36418 /* Target hook for scalar_mode_supported_p. */
36419 static bool
36420 rs6000_scalar_mode_supported_p (scalar_mode mode)
36422 /* -m32 does not support TImode. This is the default, from
36423 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
36424 same ABI as for -m32. But default_scalar_mode_supported_p allows
36425 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
36426 for -mpowerpc64. */
36427 if (TARGET_32BIT && mode == TImode)
36428 return false;
36430 if (DECIMAL_FLOAT_MODE_P (mode))
36431 return default_decimal_float_supported_p ();
36432 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
36433 return true;
36434 else
36435 return default_scalar_mode_supported_p (mode);
36438 /* Target hook for vector_mode_supported_p. */
36439 static bool
36440 rs6000_vector_mode_supported_p (machine_mode mode)
36443 if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (mode))
36444 return true;
36446 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
36447 128-bit, the compiler might try to widen IEEE 128-bit to IBM
36448 double-double. */
36449 else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
36450 return true;
36452 else
36453 return false;
36456 /* Target hook for floatn_mode. */
36457 static opt_scalar_float_mode
36458 rs6000_floatn_mode (int n, bool extended)
36460 if (extended)
36462 switch (n)
36464 case 32:
36465 return DFmode;
36467 case 64:
36468 if (TARGET_FLOAT128_TYPE)
36469 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
36470 else
36471 return opt_scalar_float_mode ();
36473 case 128:
36474 return opt_scalar_float_mode ();
36476 default:
36477 /* Those are the only valid _FloatNx types. */
36478 gcc_unreachable ();
36481 else
36483 switch (n)
36485 case 32:
36486 return SFmode;
36488 case 64:
36489 return DFmode;
36491 case 128:
36492 if (TARGET_FLOAT128_TYPE)
36493 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
36494 else
36495 return opt_scalar_float_mode ();
36497 default:
36498 return opt_scalar_float_mode ();
36504 /* Target hook for c_mode_for_suffix. */
36505 static machine_mode
36506 rs6000_c_mode_for_suffix (char suffix)
36508 if (TARGET_FLOAT128_TYPE)
36510 if (suffix == 'q' || suffix == 'Q')
36511 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
36513 /* At the moment, we are not defining a suffix for IBM extended double.
36514 If/when the default for -mabi=ieeelongdouble is changed, and we want
36515 to support __ibm128 constants in legacy library code, we may need to
36516 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
36517 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
36518 __float80 constants. */
36521 return VOIDmode;
36524 /* Target hook for invalid_arg_for_unprototyped_fn. */
36525 static const char *
36526 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
36528 return (!rs6000_darwin64_abi
36529 && typelist == 0
36530 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
36531 && (funcdecl == NULL_TREE
36532 || (TREE_CODE (funcdecl) == FUNCTION_DECL
36533 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
36534 ? N_("AltiVec argument passed to unprototyped function")
36535 : NULL;
36538 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
36539 setup by using __stack_chk_fail_local hidden function instead of
36540 calling __stack_chk_fail directly. Otherwise it is better to call
36541 __stack_chk_fail directly. */
36543 static tree ATTRIBUTE_UNUSED
36544 rs6000_stack_protect_fail (void)
36546 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
36547 ? default_hidden_stack_protect_fail ()
36548 : default_external_stack_protect_fail ();
36551 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
36553 #if TARGET_ELF
36554 static unsigned HOST_WIDE_INT
36555 rs6000_asan_shadow_offset (void)
36557 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
36559 #endif
36561 /* Mask options that we want to support inside of attribute((target)) and
36562 #pragma GCC target operations. Note, we do not include things like
36563 64/32-bit, endianness, hard/soft floating point, etc. that would have
36564 different calling sequences. */
36566 struct rs6000_opt_mask {
36567 const char *name; /* option name */
36568 HOST_WIDE_INT mask; /* mask to set */
36569 bool invert; /* invert sense of mask */
36570 bool valid_target; /* option is a target option */
36573 static struct rs6000_opt_mask const rs6000_opt_masks[] =
36575 { "altivec", OPTION_MASK_ALTIVEC, false, true },
36576 { "cmpb", OPTION_MASK_CMPB, false, true },
36577 { "crypto", OPTION_MASK_CRYPTO, false, true },
36578 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
36579 { "dlmzb", OPTION_MASK_DLMZB, false, true },
36580 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
36581 false, true },
36582 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, true },
36583 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
36584 { "fprnd", OPTION_MASK_FPRND, false, true },
36585 { "hard-dfp", OPTION_MASK_DFP, false, true },
36586 { "htm", OPTION_MASK_HTM, false, true },
36587 { "isel", OPTION_MASK_ISEL, false, true },
36588 { "mfcrf", OPTION_MASK_MFCRF, false, true },
36589 { "mfpgpr", OPTION_MASK_MFPGPR, false, true },
36590 { "modulo", OPTION_MASK_MODULO, false, true },
36591 { "mulhw", OPTION_MASK_MULHW, false, true },
36592 { "multiple", OPTION_MASK_MULTIPLE, false, true },
36593 { "popcntb", OPTION_MASK_POPCNTB, false, true },
36594 { "popcntd", OPTION_MASK_POPCNTD, false, true },
36595 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
36596 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
36597 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
36598 { "power9-fusion", OPTION_MASK_P9_FUSION, false, true },
36599 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
36600 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
36601 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
36602 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
36603 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
36604 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
36605 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
36606 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
36607 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
36608 { "string", 0, false, true },
36609 { "toc-fusion", OPTION_MASK_TOC_FUSION, false, true },
36610 { "update", OPTION_MASK_NO_UPDATE, true , true },
36611 { "vsx", OPTION_MASK_VSX, false, true },
36612 #ifdef OPTION_MASK_64BIT
36613 #if TARGET_AIX_OS
36614 { "aix64", OPTION_MASK_64BIT, false, false },
36615 { "aix32", OPTION_MASK_64BIT, true, false },
36616 #else
36617 { "64", OPTION_MASK_64BIT, false, false },
36618 { "32", OPTION_MASK_64BIT, true, false },
36619 #endif
36620 #endif
36621 #ifdef OPTION_MASK_EABI
36622 { "eabi", OPTION_MASK_EABI, false, false },
36623 #endif
36624 #ifdef OPTION_MASK_LITTLE_ENDIAN
36625 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
36626 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
36627 #endif
36628 #ifdef OPTION_MASK_RELOCATABLE
36629 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
36630 #endif
36631 #ifdef OPTION_MASK_STRICT_ALIGN
36632 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
36633 #endif
36634 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
36635 { "string", 0, false, false },
36638 /* Builtin mask mapping for printing the flags. */
36639 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
36641 { "altivec", RS6000_BTM_ALTIVEC, false, false },
36642 { "vsx", RS6000_BTM_VSX, false, false },
36643 { "paired", RS6000_BTM_PAIRED, false, false },
36644 { "fre", RS6000_BTM_FRE, false, false },
36645 { "fres", RS6000_BTM_FRES, false, false },
36646 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
36647 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
36648 { "popcntd", RS6000_BTM_POPCNTD, false, false },
36649 { "cell", RS6000_BTM_CELL, false, false },
36650 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
36651 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
36652 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
36653 { "crypto", RS6000_BTM_CRYPTO, false, false },
36654 { "htm", RS6000_BTM_HTM, false, false },
36655 { "hard-dfp", RS6000_BTM_DFP, false, false },
36656 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
36657 { "long-double-128", RS6000_BTM_LDBL128, false, false },
36658 { "float128", RS6000_BTM_FLOAT128, false, false },
36659 { "float128-hw", RS6000_BTM_FLOAT128_HW,false, false },
36662 /* Option variables that we want to support inside attribute((target)) and
36663 #pragma GCC target operations. */
36665 struct rs6000_opt_var {
36666 const char *name; /* option name */
36667 size_t global_offset; /* offset of the option in global_options. */
36668 size_t target_offset; /* offset of the option in target options. */
36671 static struct rs6000_opt_var const rs6000_opt_vars[] =
36673 { "friz",
36674 offsetof (struct gcc_options, x_TARGET_FRIZ),
36675 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
36676 { "avoid-indexed-addresses",
36677 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
36678 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
36679 { "paired",
36680 offsetof (struct gcc_options, x_rs6000_paired_float),
36681 offsetof (struct cl_target_option, x_rs6000_paired_float), },
36682 { "longcall",
36683 offsetof (struct gcc_options, x_rs6000_default_long_calls),
36684 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
36685 { "optimize-swaps",
36686 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
36687 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
36688 { "allow-movmisalign",
36689 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
36690 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
36691 { "sched-groups",
36692 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
36693 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
36694 { "always-hint",
36695 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
36696 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
36697 { "align-branch-targets",
36698 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
36699 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
36700 { "tls-markers",
36701 offsetof (struct gcc_options, x_tls_markers),
36702 offsetof (struct cl_target_option, x_tls_markers), },
36703 { "sched-prolog",
36704 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
36705 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
36706 { "sched-epilog",
36707 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
36708 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
36709 { "speculate-indirect-jumps",
36710 offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps),
36711 offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), },
36714 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
36715 parsing. Return true if there were no errors. */
36717 static bool
36718 rs6000_inner_target_options (tree args, bool attr_p)
36720 bool ret = true;
36722 if (args == NULL_TREE)
36725 else if (TREE_CODE (args) == STRING_CST)
36727 char *p = ASTRDUP (TREE_STRING_POINTER (args));
36728 char *q;
36730 while ((q = strtok (p, ",")) != NULL)
36732 bool error_p = false;
36733 bool not_valid_p = false;
36734 const char *cpu_opt = NULL;
36736 p = NULL;
36737 if (strncmp (q, "cpu=", 4) == 0)
36739 int cpu_index = rs6000_cpu_name_lookup (q+4);
36740 if (cpu_index >= 0)
36741 rs6000_cpu_index = cpu_index;
36742 else
36744 error_p = true;
36745 cpu_opt = q+4;
36748 else if (strncmp (q, "tune=", 5) == 0)
36750 int tune_index = rs6000_cpu_name_lookup (q+5);
36751 if (tune_index >= 0)
36752 rs6000_tune_index = tune_index;
36753 else
36755 error_p = true;
36756 cpu_opt = q+5;
36759 else
36761 size_t i;
36762 bool invert = false;
36763 char *r = q;
36765 error_p = true;
36766 if (strncmp (r, "no-", 3) == 0)
36768 invert = true;
36769 r += 3;
36772 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
36773 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
36775 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
36777 if (!rs6000_opt_masks[i].valid_target)
36778 not_valid_p = true;
36779 else
36781 error_p = false;
36782 rs6000_isa_flags_explicit |= mask;
36784 /* VSX needs altivec, so -mvsx automagically sets
36785 altivec and disables -mavoid-indexed-addresses. */
36786 if (!invert)
36788 if (mask == OPTION_MASK_VSX)
36790 mask |= OPTION_MASK_ALTIVEC;
36791 TARGET_AVOID_XFORM = 0;
36795 if (rs6000_opt_masks[i].invert)
36796 invert = !invert;
36798 if (invert)
36799 rs6000_isa_flags &= ~mask;
36800 else
36801 rs6000_isa_flags |= mask;
36803 break;
36806 if (error_p && !not_valid_p)
36808 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
36809 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
36811 size_t j = rs6000_opt_vars[i].global_offset;
36812 *((int *) ((char *)&global_options + j)) = !invert;
36813 error_p = false;
36814 not_valid_p = false;
36815 break;
36820 if (error_p)
36822 const char *eprefix, *esuffix;
36824 ret = false;
36825 if (attr_p)
36827 eprefix = "__attribute__((__target__(";
36828 esuffix = ")))";
36830 else
36832 eprefix = "#pragma GCC target ";
36833 esuffix = "";
36836 if (cpu_opt)
36837 error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix,
36838 q, esuffix);
36839 else if (not_valid_p)
36840 error ("%s%qs%s is not allowed", eprefix, q, esuffix);
36841 else
36842 error ("%s%qs%s is invalid", eprefix, q, esuffix);
36847 else if (TREE_CODE (args) == TREE_LIST)
36851 tree value = TREE_VALUE (args);
36852 if (value)
36854 bool ret2 = rs6000_inner_target_options (value, attr_p);
36855 if (!ret2)
36856 ret = false;
36858 args = TREE_CHAIN (args);
36860 while (args != NULL_TREE);
36863 else
36865 error ("attribute %<target%> argument not a string");
36866 return false;
36869 return ret;
36872 /* Print out the target options as a list for -mdebug=target. */
36874 static void
36875 rs6000_debug_target_options (tree args, const char *prefix)
36877 if (args == NULL_TREE)
36878 fprintf (stderr, "%s<NULL>", prefix);
36880 else if (TREE_CODE (args) == STRING_CST)
36882 char *p = ASTRDUP (TREE_STRING_POINTER (args));
36883 char *q;
36885 while ((q = strtok (p, ",")) != NULL)
36887 p = NULL;
36888 fprintf (stderr, "%s\"%s\"", prefix, q);
36889 prefix = ", ";
36893 else if (TREE_CODE (args) == TREE_LIST)
36897 tree value = TREE_VALUE (args);
36898 if (value)
36900 rs6000_debug_target_options (value, prefix);
36901 prefix = ", ";
36903 args = TREE_CHAIN (args);
36905 while (args != NULL_TREE);
36908 else
36909 gcc_unreachable ();
36911 return;
36915 /* Hook to validate attribute((target("..."))). */
36917 static bool
36918 rs6000_valid_attribute_p (tree fndecl,
36919 tree ARG_UNUSED (name),
36920 tree args,
36921 int flags)
36923 struct cl_target_option cur_target;
36924 bool ret;
36925 tree old_optimize;
36926 tree new_target, new_optimize;
36927 tree func_optimize;
36929 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
36931 if (TARGET_DEBUG_TARGET)
36933 tree tname = DECL_NAME (fndecl);
36934 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
36935 if (tname)
36936 fprintf (stderr, "function: %.*s\n",
36937 (int) IDENTIFIER_LENGTH (tname),
36938 IDENTIFIER_POINTER (tname));
36939 else
36940 fprintf (stderr, "function: unknown\n");
36942 fprintf (stderr, "args:");
36943 rs6000_debug_target_options (args, " ");
36944 fprintf (stderr, "\n");
36946 if (flags)
36947 fprintf (stderr, "flags: 0x%x\n", flags);
36949 fprintf (stderr, "--------------------\n");
36952 /* attribute((target("default"))) does nothing, beyond
36953 affecting multi-versioning. */
36954 if (TREE_VALUE (args)
36955 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
36956 && TREE_CHAIN (args) == NULL_TREE
36957 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
36958 return true;
36960 old_optimize = build_optimization_node (&global_options);
36961 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
36963 /* If the function changed the optimization levels as well as setting target
36964 options, start with the optimizations specified. */
36965 if (func_optimize && func_optimize != old_optimize)
36966 cl_optimization_restore (&global_options,
36967 TREE_OPTIMIZATION (func_optimize));
36969 /* The target attributes may also change some optimization flags, so update
36970 the optimization options if necessary. */
36971 cl_target_option_save (&cur_target, &global_options);
36972 rs6000_cpu_index = rs6000_tune_index = -1;
36973 ret = rs6000_inner_target_options (args, true);
36975 /* Set up any additional state. */
36976 if (ret)
36978 ret = rs6000_option_override_internal (false);
36979 new_target = build_target_option_node (&global_options);
36981 else
36982 new_target = NULL;
36984 new_optimize = build_optimization_node (&global_options);
36986 if (!new_target)
36987 ret = false;
36989 else if (fndecl)
36991 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
36993 if (old_optimize != new_optimize)
36994 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
36997 cl_target_option_restore (&global_options, &cur_target);
36999 if (old_optimize != new_optimize)
37000 cl_optimization_restore (&global_options,
37001 TREE_OPTIMIZATION (old_optimize));
37003 return ret;
37007 /* Hook to validate the current #pragma GCC target and set the state, and
37008 update the macros based on what was changed. If ARGS is NULL, then
37009 POP_TARGET is used to reset the options. */
37011 bool
37012 rs6000_pragma_target_parse (tree args, tree pop_target)
37014 tree prev_tree = build_target_option_node (&global_options);
37015 tree cur_tree;
37016 struct cl_target_option *prev_opt, *cur_opt;
37017 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
37018 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
37020 if (TARGET_DEBUG_TARGET)
37022 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
37023 fprintf (stderr, "args:");
37024 rs6000_debug_target_options (args, " ");
37025 fprintf (stderr, "\n");
37027 if (pop_target)
37029 fprintf (stderr, "pop_target:\n");
37030 debug_tree (pop_target);
37032 else
37033 fprintf (stderr, "pop_target: <NULL>\n");
37035 fprintf (stderr, "--------------------\n");
37038 if (! args)
37040 cur_tree = ((pop_target)
37041 ? pop_target
37042 : target_option_default_node);
37043 cl_target_option_restore (&global_options,
37044 TREE_TARGET_OPTION (cur_tree));
37046 else
37048 rs6000_cpu_index = rs6000_tune_index = -1;
37049 if (!rs6000_inner_target_options (args, false)
37050 || !rs6000_option_override_internal (false)
37051 || (cur_tree = build_target_option_node (&global_options))
37052 == NULL_TREE)
37054 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
37055 fprintf (stderr, "invalid pragma\n");
37057 return false;
37061 target_option_current_node = cur_tree;
37062 rs6000_activate_target_options (target_option_current_node);
37064 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
37065 change the macros that are defined. */
37066 if (rs6000_target_modify_macros_ptr)
37068 prev_opt = TREE_TARGET_OPTION (prev_tree);
37069 prev_bumask = prev_opt->x_rs6000_builtin_mask;
37070 prev_flags = prev_opt->x_rs6000_isa_flags;
37072 cur_opt = TREE_TARGET_OPTION (cur_tree);
37073 cur_flags = cur_opt->x_rs6000_isa_flags;
37074 cur_bumask = cur_opt->x_rs6000_builtin_mask;
37076 diff_bumask = (prev_bumask ^ cur_bumask);
37077 diff_flags = (prev_flags ^ cur_flags);
37079 if ((diff_flags != 0) || (diff_bumask != 0))
37081 /* Delete old macros. */
37082 rs6000_target_modify_macros_ptr (false,
37083 prev_flags & diff_flags,
37084 prev_bumask & diff_bumask);
37086 /* Define new macros. */
37087 rs6000_target_modify_macros_ptr (true,
37088 cur_flags & diff_flags,
37089 cur_bumask & diff_bumask);
37093 return true;
37097 /* Remember the last target of rs6000_set_current_function. */
37098 static GTY(()) tree rs6000_previous_fndecl;
37100 /* Restore target's globals from NEW_TREE and invalidate the
37101 rs6000_previous_fndecl cache. */
37103 void
37104 rs6000_activate_target_options (tree new_tree)
37106 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
37107 if (TREE_TARGET_GLOBALS (new_tree))
37108 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
37109 else if (new_tree == target_option_default_node)
37110 restore_target_globals (&default_target_globals);
37111 else
37112 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
37113 rs6000_previous_fndecl = NULL_TREE;
37116 /* Establish appropriate back-end context for processing the function
37117 FNDECL. The argument might be NULL to indicate processing at top
37118 level, outside of any function scope. */
37119 static void
37120 rs6000_set_current_function (tree fndecl)
37122 if (TARGET_DEBUG_TARGET)
37124 fprintf (stderr, "\n==================== rs6000_set_current_function");
37126 if (fndecl)
37127 fprintf (stderr, ", fndecl %s (%p)",
37128 (DECL_NAME (fndecl)
37129 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
37130 : "<unknown>"), (void *)fndecl);
37132 if (rs6000_previous_fndecl)
37133 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
37135 fprintf (stderr, "\n");
37138 /* Only change the context if the function changes. This hook is called
37139 several times in the course of compiling a function, and we don't want to
37140 slow things down too much or call target_reinit when it isn't safe. */
37141 if (fndecl == rs6000_previous_fndecl)
37142 return;
37144 tree old_tree;
37145 if (rs6000_previous_fndecl == NULL_TREE)
37146 old_tree = target_option_current_node;
37147 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl))
37148 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl);
37149 else
37150 old_tree = target_option_default_node;
37152 tree new_tree;
37153 if (fndecl == NULL_TREE)
37155 if (old_tree != target_option_current_node)
37156 new_tree = target_option_current_node;
37157 else
37158 new_tree = NULL_TREE;
37160 else
37162 new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
37163 if (new_tree == NULL_TREE)
37164 new_tree = target_option_default_node;
37167 if (TARGET_DEBUG_TARGET)
37169 if (new_tree)
37171 fprintf (stderr, "\nnew fndecl target specific options:\n");
37172 debug_tree (new_tree);
37175 if (old_tree)
37177 fprintf (stderr, "\nold fndecl target specific options:\n");
37178 debug_tree (old_tree);
37181 if (old_tree != NULL_TREE || new_tree != NULL_TREE)
37182 fprintf (stderr, "--------------------\n");
37185 if (new_tree && old_tree != new_tree)
37186 rs6000_activate_target_options (new_tree);
37188 if (fndecl)
37189 rs6000_previous_fndecl = fndecl;
37193 /* Save the current options */
37195 static void
37196 rs6000_function_specific_save (struct cl_target_option *ptr,
37197 struct gcc_options *opts)
37199 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
37200 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
37203 /* Restore the current options */
37205 static void
37206 rs6000_function_specific_restore (struct gcc_options *opts,
37207 struct cl_target_option *ptr)
37210 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
37211 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
37212 (void) rs6000_option_override_internal (false);
37215 /* Print the current options */
37217 static void
37218 rs6000_function_specific_print (FILE *file, int indent,
37219 struct cl_target_option *ptr)
37221 rs6000_print_isa_options (file, indent, "Isa options set",
37222 ptr->x_rs6000_isa_flags);
37224 rs6000_print_isa_options (file, indent, "Isa options explicit",
37225 ptr->x_rs6000_isa_flags_explicit);
37228 /* Helper function to print the current isa or misc options on a line. */
37230 static void
37231 rs6000_print_options_internal (FILE *file,
37232 int indent,
37233 const char *string,
37234 HOST_WIDE_INT flags,
37235 const char *prefix,
37236 const struct rs6000_opt_mask *opts,
37237 size_t num_elements)
37239 size_t i;
37240 size_t start_column = 0;
37241 size_t cur_column;
37242 size_t max_column = 120;
37243 size_t prefix_len = strlen (prefix);
37244 size_t comma_len = 0;
37245 const char *comma = "";
37247 if (indent)
37248 start_column += fprintf (file, "%*s", indent, "");
37250 if (!flags)
37252 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
37253 return;
37256 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
37258 /* Print the various mask options. */
37259 cur_column = start_column;
37260 for (i = 0; i < num_elements; i++)
37262 bool invert = opts[i].invert;
37263 const char *name = opts[i].name;
37264 const char *no_str = "";
37265 HOST_WIDE_INT mask = opts[i].mask;
37266 size_t len = comma_len + prefix_len + strlen (name);
37268 if (!invert)
37270 if ((flags & mask) == 0)
37272 no_str = "no-";
37273 len += sizeof ("no-") - 1;
37276 flags &= ~mask;
37279 else
37281 if ((flags & mask) != 0)
37283 no_str = "no-";
37284 len += sizeof ("no-") - 1;
37287 flags |= mask;
37290 cur_column += len;
37291 if (cur_column > max_column)
37293 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
37294 cur_column = start_column + len;
37295 comma = "";
37298 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
37299 comma = ", ";
37300 comma_len = sizeof (", ") - 1;
37303 fputs ("\n", file);
37306 /* Helper function to print the current isa options on a line. */
37308 static void
37309 rs6000_print_isa_options (FILE *file, int indent, const char *string,
37310 HOST_WIDE_INT flags)
37312 rs6000_print_options_internal (file, indent, string, flags, "-m",
37313 &rs6000_opt_masks[0],
37314 ARRAY_SIZE (rs6000_opt_masks));
37317 static void
37318 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
37319 HOST_WIDE_INT flags)
37321 rs6000_print_options_internal (file, indent, string, flags, "",
37322 &rs6000_builtin_mask_names[0],
37323 ARRAY_SIZE (rs6000_builtin_mask_names));
37326 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
37327 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
37328 -mupper-regs-df, etc.).
37330 If the user used -mno-power8-vector, we need to turn off all of the implicit
37331 ISA 2.07 and 3.0 options that relate to the vector unit.
37333 If the user used -mno-power9-vector, we need to turn off all of the implicit
37334 ISA 3.0 options that relate to the vector unit.
37336 This function does not handle explicit options such as the user specifying
37337 -mdirect-move. These are handled in rs6000_option_override_internal, and
37338 the appropriate error is given if needed.
37340 We return a mask of all of the implicit options that should not be enabled
37341 by default. */
37343 static HOST_WIDE_INT
37344 rs6000_disable_incompatible_switches (void)
37346 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
37347 size_t i, j;
37349 static const struct {
37350 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
37351 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
37352 const char *const name; /* name of the switch. */
37353 } flags[] = {
37354 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
37355 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
37356 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
37359 for (i = 0; i < ARRAY_SIZE (flags); i++)
37361 HOST_WIDE_INT no_flag = flags[i].no_flag;
37363 if ((rs6000_isa_flags & no_flag) == 0
37364 && (rs6000_isa_flags_explicit & no_flag) != 0)
37366 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
37367 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
37368 & rs6000_isa_flags
37369 & dep_flags);
37371 if (set_flags)
37373 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
37374 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
37376 set_flags &= ~rs6000_opt_masks[j].mask;
37377 error ("%<-mno-%s%> turns off %<-m%s%>",
37378 flags[i].name,
37379 rs6000_opt_masks[j].name);
37382 gcc_assert (!set_flags);
37385 rs6000_isa_flags &= ~dep_flags;
37386 ignore_masks |= no_flag | dep_flags;
37390 return ignore_masks;
37394 /* Helper function for printing the function name when debugging. */
37396 static const char *
37397 get_decl_name (tree fn)
37399 tree name;
37401 if (!fn)
37402 return "<null>";
37404 name = DECL_NAME (fn);
37405 if (!name)
37406 return "<no-name>";
37408 return IDENTIFIER_POINTER (name);
37411 /* Return the clone id of the target we are compiling code for in a target
37412 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
37413 the priority list for the target clones (ordered from lowest to
37414 highest). */
37416 static int
37417 rs6000_clone_priority (tree fndecl)
37419 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
37420 HOST_WIDE_INT isa_masks;
37421 int ret = CLONE_DEFAULT;
37422 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
37423 const char *attrs_str = NULL;
37425 attrs = TREE_VALUE (TREE_VALUE (attrs));
37426 attrs_str = TREE_STRING_POINTER (attrs);
37428 /* Return priority zero for default function. Return the ISA needed for the
37429 function if it is not the default. */
37430 if (strcmp (attrs_str, "default") != 0)
37432 if (fn_opts == NULL_TREE)
37433 fn_opts = target_option_default_node;
37435 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
37436 isa_masks = rs6000_isa_flags;
37437 else
37438 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
37440 for (ret = CLONE_MAX - 1; ret != 0; ret--)
37441 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
37442 break;
37445 if (TARGET_DEBUG_TARGET)
37446 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
37447 get_decl_name (fndecl), ret);
37449 return ret;
37452 /* This compares the priority of target features in function DECL1 and DECL2.
37453 It returns positive value if DECL1 is higher priority, negative value if
37454 DECL2 is higher priority and 0 if they are the same. Note, priorities are
37455 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
37457 static int
37458 rs6000_compare_version_priority (tree decl1, tree decl2)
37460 int priority1 = rs6000_clone_priority (decl1);
37461 int priority2 = rs6000_clone_priority (decl2);
37462 int ret = priority1 - priority2;
37464 if (TARGET_DEBUG_TARGET)
37465 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
37466 get_decl_name (decl1), get_decl_name (decl2), ret);
37468 return ret;
37471 /* Make a dispatcher declaration for the multi-versioned function DECL.
37472 Calls to DECL function will be replaced with calls to the dispatcher
37473 by the front-end. Returns the decl of the dispatcher function. */
37475 static tree
37476 rs6000_get_function_versions_dispatcher (void *decl)
37478 tree fn = (tree) decl;
37479 struct cgraph_node *node = NULL;
37480 struct cgraph_node *default_node = NULL;
37481 struct cgraph_function_version_info *node_v = NULL;
37482 struct cgraph_function_version_info *first_v = NULL;
37484 tree dispatch_decl = NULL;
37486 struct cgraph_function_version_info *default_version_info = NULL;
37487 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
37489 if (TARGET_DEBUG_TARGET)
37490 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
37491 get_decl_name (fn));
37493 node = cgraph_node::get (fn);
37494 gcc_assert (node != NULL);
37496 node_v = node->function_version ();
37497 gcc_assert (node_v != NULL);
37499 if (node_v->dispatcher_resolver != NULL)
37500 return node_v->dispatcher_resolver;
37502 /* Find the default version and make it the first node. */
37503 first_v = node_v;
37504 /* Go to the beginning of the chain. */
37505 while (first_v->prev != NULL)
37506 first_v = first_v->prev;
37508 default_version_info = first_v;
37509 while (default_version_info != NULL)
37511 const tree decl2 = default_version_info->this_node->decl;
37512 if (is_function_default_version (decl2))
37513 break;
37514 default_version_info = default_version_info->next;
37517 /* If there is no default node, just return NULL. */
37518 if (default_version_info == NULL)
37519 return NULL;
37521 /* Make default info the first node. */
37522 if (first_v != default_version_info)
37524 default_version_info->prev->next = default_version_info->next;
37525 if (default_version_info->next)
37526 default_version_info->next->prev = default_version_info->prev;
37527 first_v->prev = default_version_info;
37528 default_version_info->next = first_v;
37529 default_version_info->prev = NULL;
37532 default_node = default_version_info->this_node;
37534 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
37535 error_at (DECL_SOURCE_LOCATION (default_node->decl),
37536 "target_clones attribute needs GLIBC (2.23 and newer) that "
37537 "exports hardware capability bits");
37538 #else
37540 if (targetm.has_ifunc_p ())
37542 struct cgraph_function_version_info *it_v = NULL;
37543 struct cgraph_node *dispatcher_node = NULL;
37544 struct cgraph_function_version_info *dispatcher_version_info = NULL;
37546 /* Right now, the dispatching is done via ifunc. */
37547 dispatch_decl = make_dispatcher_decl (default_node->decl);
37549 dispatcher_node = cgraph_node::get_create (dispatch_decl);
37550 gcc_assert (dispatcher_node != NULL);
37551 dispatcher_node->dispatcher_function = 1;
37552 dispatcher_version_info
37553 = dispatcher_node->insert_new_function_version ();
37554 dispatcher_version_info->next = default_version_info;
37555 dispatcher_node->definition = 1;
37557 /* Set the dispatcher for all the versions. */
37558 it_v = default_version_info;
37559 while (it_v != NULL)
37561 it_v->dispatcher_resolver = dispatch_decl;
37562 it_v = it_v->next;
37565 else
37567 error_at (DECL_SOURCE_LOCATION (default_node->decl),
37568 "multiversioning needs ifunc which is not supported "
37569 "on this target");
37571 #endif
37573 return dispatch_decl;
37576 /* Make the resolver function decl to dispatch the versions of a multi-
37577 versioned function, DEFAULT_DECL. Create an empty basic block in the
37578 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
37579 function. */
37581 static tree
37582 make_resolver_func (const tree default_decl,
37583 const tree dispatch_decl,
37584 basic_block *empty_bb)
37586 /* Make the resolver function static. The resolver function returns
37587 void *. */
37588 tree decl_name = clone_function_name (default_decl, "resolver");
37589 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
37590 tree type = build_function_type_list (ptr_type_node, NULL_TREE);
37591 tree decl = build_fn_decl (resolver_name, type);
37592 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
37594 DECL_NAME (decl) = decl_name;
37595 TREE_USED (decl) = 1;
37596 DECL_ARTIFICIAL (decl) = 1;
37597 DECL_IGNORED_P (decl) = 0;
37598 TREE_PUBLIC (decl) = 0;
37599 DECL_UNINLINABLE (decl) = 1;
37601 /* Resolver is not external, body is generated. */
37602 DECL_EXTERNAL (decl) = 0;
37603 DECL_EXTERNAL (dispatch_decl) = 0;
37605 DECL_CONTEXT (decl) = NULL_TREE;
37606 DECL_INITIAL (decl) = make_node (BLOCK);
37607 DECL_STATIC_CONSTRUCTOR (decl) = 0;
37609 /* Build result decl and add to function_decl. */
37610 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
37611 DECL_ARTIFICIAL (t) = 1;
37612 DECL_IGNORED_P (t) = 1;
37613 DECL_RESULT (decl) = t;
37615 gimplify_function_tree (decl);
37616 push_cfun (DECL_STRUCT_FUNCTION (decl));
37617 *empty_bb = init_lowered_empty_function (decl, false,
37618 profile_count::uninitialized ());
37620 cgraph_node::add_new_function (decl, true);
37621 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
37623 pop_cfun ();
37625 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
37626 DECL_ATTRIBUTES (dispatch_decl)
37627 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
37629 cgraph_node::create_same_body_alias (dispatch_decl, decl);
37631 return decl;
37634 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
37635 return a pointer to VERSION_DECL if we are running on a machine that
37636 supports the index CLONE_ISA hardware architecture bits. This function will
37637 be called during version dispatch to decide which function version to
37638 execute. It returns the basic block at the end, to which more conditions
37639 can be added. */
37641 static basic_block
37642 add_condition_to_bb (tree function_decl, tree version_decl,
37643 int clone_isa, basic_block new_bb)
37645 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
37647 gcc_assert (new_bb != NULL);
37648 gimple_seq gseq = bb_seq (new_bb);
37651 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
37652 build_fold_addr_expr (version_decl));
37653 tree result_var = create_tmp_var (ptr_type_node);
37654 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
37655 gimple *return_stmt = gimple_build_return (result_var);
37657 if (clone_isa == CLONE_DEFAULT)
37659 gimple_seq_add_stmt (&gseq, convert_stmt);
37660 gimple_seq_add_stmt (&gseq, return_stmt);
37661 set_bb_seq (new_bb, gseq);
37662 gimple_set_bb (convert_stmt, new_bb);
37663 gimple_set_bb (return_stmt, new_bb);
37664 pop_cfun ();
37665 return new_bb;
37668 tree bool_zero = build_int_cst (bool_int_type_node, 0);
37669 tree cond_var = create_tmp_var (bool_int_type_node);
37670 tree predicate_decl = rs6000_builtin_decls [(int) RS6000_BUILTIN_CPU_SUPPORTS];
37671 const char *arg_str = rs6000_clone_map[clone_isa].name;
37672 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
37673 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
37674 gimple_call_set_lhs (call_cond_stmt, cond_var);
37676 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
37677 gimple_set_bb (call_cond_stmt, new_bb);
37678 gimple_seq_add_stmt (&gseq, call_cond_stmt);
37680 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
37681 NULL_TREE, NULL_TREE);
37682 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
37683 gimple_set_bb (if_else_stmt, new_bb);
37684 gimple_seq_add_stmt (&gseq, if_else_stmt);
37686 gimple_seq_add_stmt (&gseq, convert_stmt);
37687 gimple_seq_add_stmt (&gseq, return_stmt);
37688 set_bb_seq (new_bb, gseq);
37690 basic_block bb1 = new_bb;
37691 edge e12 = split_block (bb1, if_else_stmt);
37692 basic_block bb2 = e12->dest;
37693 e12->flags &= ~EDGE_FALLTHRU;
37694 e12->flags |= EDGE_TRUE_VALUE;
37696 edge e23 = split_block (bb2, return_stmt);
37697 gimple_set_bb (convert_stmt, bb2);
37698 gimple_set_bb (return_stmt, bb2);
37700 basic_block bb3 = e23->dest;
37701 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
37703 remove_edge (e23);
37704 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
37706 pop_cfun ();
37707 return bb3;
37710 /* This function generates the dispatch function for multi-versioned functions.
37711 DISPATCH_DECL is the function which will contain the dispatch logic.
37712 FNDECLS are the function choices for dispatch, and is a tree chain.
37713 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
37714 code is generated. */
37716 static int
37717 dispatch_function_versions (tree dispatch_decl,
37718 void *fndecls_p,
37719 basic_block *empty_bb)
37721 int ix;
37722 tree ele;
37723 vec<tree> *fndecls;
37724 tree clones[CLONE_MAX];
37726 if (TARGET_DEBUG_TARGET)
37727 fputs ("dispatch_function_versions, top\n", stderr);
37729 gcc_assert (dispatch_decl != NULL
37730 && fndecls_p != NULL
37731 && empty_bb != NULL);
37733 /* fndecls_p is actually a vector. */
37734 fndecls = static_cast<vec<tree> *> (fndecls_p);
37736 /* At least one more version other than the default. */
37737 gcc_assert (fndecls->length () >= 2);
37739 /* The first version in the vector is the default decl. */
37740 memset ((void *) clones, '\0', sizeof (clones));
37741 clones[CLONE_DEFAULT] = (*fndecls)[0];
37743 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
37744 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
37745 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
37746 recent glibc. If we ever need to call __builtin_cpu_init, we would need
37747 to insert the code here to do the call. */
37749 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
37751 int priority = rs6000_clone_priority (ele);
37752 if (!clones[priority])
37753 clones[priority] = ele;
37756 for (ix = CLONE_MAX - 1; ix >= 0; ix--)
37757 if (clones[ix])
37759 if (TARGET_DEBUG_TARGET)
37760 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
37761 ix, get_decl_name (clones[ix]));
37763 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
37764 *empty_bb);
37767 return 0;
37770 /* Generate the dispatching code body to dispatch multi-versioned function
37771 DECL. The target hook is called to process the "target" attributes and
37772 provide the code to dispatch the right function at run-time. NODE points
37773 to the dispatcher decl whose body will be created. */
37775 static tree
37776 rs6000_generate_version_dispatcher_body (void *node_p)
37778 tree resolver;
37779 basic_block empty_bb;
37780 struct cgraph_node *node = (cgraph_node *) node_p;
37781 struct cgraph_function_version_info *ninfo = node->function_version ();
37783 if (ninfo->dispatcher_resolver)
37784 return ninfo->dispatcher_resolver;
37786 /* node is going to be an alias, so remove the finalized bit. */
37787 node->definition = false;
37789 /* The first version in the chain corresponds to the default version. */
37790 ninfo->dispatcher_resolver = resolver
37791 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
37793 if (TARGET_DEBUG_TARGET)
37794 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
37795 get_decl_name (resolver));
37797 push_cfun (DECL_STRUCT_FUNCTION (resolver));
37798 auto_vec<tree, 2> fn_ver_vec;
37800 for (struct cgraph_function_version_info *vinfo = ninfo->next;
37801 vinfo;
37802 vinfo = vinfo->next)
37804 struct cgraph_node *version = vinfo->this_node;
37805 /* Check for virtual functions here again, as by this time it should
37806 have been determined if this function needs a vtable index or
37807 not. This happens for methods in derived classes that override
37808 virtual methods in base classes but are not explicitly marked as
37809 virtual. */
37810 if (DECL_VINDEX (version->decl))
37811 sorry ("Virtual function multiversioning not supported");
37813 fn_ver_vec.safe_push (version->decl);
37816 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
37817 cgraph_edge::rebuild_edges ();
37818 pop_cfun ();
37819 return resolver;
37823 /* Hook to determine if one function can safely inline another. */
37825 static bool
37826 rs6000_can_inline_p (tree caller, tree callee)
37828 bool ret = false;
37829 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
37830 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
37832 /* If callee has no option attributes, then it is ok to inline. */
37833 if (!callee_tree)
37834 ret = true;
37836 /* If caller has no option attributes, but callee does then it is not ok to
37837 inline. */
37838 else if (!caller_tree)
37839 ret = false;
37841 else
37843 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
37844 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
37846 /* Callee's options should a subset of the caller's, i.e. a vsx function
37847 can inline an altivec function but a non-vsx function can't inline a
37848 vsx function. */
37849 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
37850 == callee_opts->x_rs6000_isa_flags)
37851 ret = true;
37854 if (TARGET_DEBUG_TARGET)
37855 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
37856 get_decl_name (caller), get_decl_name (callee),
37857 (ret ? "can" : "cannot"));
37859 return ret;
37862 /* Allocate a stack temp and fixup the address so it meets the particular
37863 memory requirements (either offetable or REG+REG addressing). */
37866 rs6000_allocate_stack_temp (machine_mode mode,
37867 bool offsettable_p,
37868 bool reg_reg_p)
37870 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
37871 rtx addr = XEXP (stack, 0);
37872 int strict_p = reload_completed;
37874 if (!legitimate_indirect_address_p (addr, strict_p))
37876 if (offsettable_p
37877 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
37878 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
37880 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
37881 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
37884 return stack;
37887 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
37888 to such a form to deal with memory reference instructions like STFIWX that
37889 only take reg+reg addressing. */
37892 rs6000_address_for_fpconvert (rtx x)
37894 rtx addr;
37896 gcc_assert (MEM_P (x));
37897 addr = XEXP (x, 0);
37898 if (can_create_pseudo_p ()
37899 && ! legitimate_indirect_address_p (addr, reload_completed)
37900 && ! legitimate_indexed_address_p (addr, reload_completed))
37902 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
37904 rtx reg = XEXP (addr, 0);
37905 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
37906 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
37907 gcc_assert (REG_P (reg));
37908 emit_insn (gen_add3_insn (reg, reg, size_rtx));
37909 addr = reg;
37911 else if (GET_CODE (addr) == PRE_MODIFY)
37913 rtx reg = XEXP (addr, 0);
37914 rtx expr = XEXP (addr, 1);
37915 gcc_assert (REG_P (reg));
37916 gcc_assert (GET_CODE (expr) == PLUS);
37917 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
37918 addr = reg;
37921 x = replace_equiv_address (x, copy_addr_to_reg (addr));
37924 return x;
37927 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
37929 On the RS/6000, all integer constants are acceptable, most won't be valid
37930 for particular insns, though. Only easy FP constants are acceptable. */
37932 static bool
37933 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
37935 if (TARGET_ELF && tls_referenced_p (x))
37936 return false;
37938 return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR)
37939 || GET_MODE (x) == VOIDmode
37940 || (TARGET_POWERPC64 && mode == DImode)
37941 || easy_fp_constant (x, mode)
37942 || easy_vector_constant (x, mode));
37946 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
37948 static bool
37949 chain_already_loaded (rtx_insn *last)
37951 for (; last != NULL; last = PREV_INSN (last))
37953 if (NONJUMP_INSN_P (last))
37955 rtx patt = PATTERN (last);
37957 if (GET_CODE (patt) == SET)
37959 rtx lhs = XEXP (patt, 0);
37961 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
37962 return true;
37966 return false;
37969 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
37971 void
37972 rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
37974 const bool direct_call_p
37975 = GET_CODE (func_desc) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (func_desc);
37976 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
37977 rtx toc_load = NULL_RTX;
37978 rtx toc_restore = NULL_RTX;
37979 rtx func_addr;
37980 rtx abi_reg = NULL_RTX;
37981 rtx call[4];
37982 int n_call;
37983 rtx insn;
37985 /* Handle longcall attributes. */
37986 if (INTVAL (cookie) & CALL_LONG)
37987 func_desc = rs6000_longcall_ref (func_desc);
37989 /* Handle indirect calls. */
37990 if (GET_CODE (func_desc) != SYMBOL_REF
37991 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
37993 /* Save the TOC into its reserved slot before the call,
37994 and prepare to restore it after the call. */
37995 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
37996 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
37997 rtx stack_toc_mem = gen_frame_mem (Pmode,
37998 gen_rtx_PLUS (Pmode, stack_ptr,
37999 stack_toc_offset));
38000 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
38001 gen_rtvec (1, stack_toc_offset),
38002 UNSPEC_TOCSLOT);
38003 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
38005 /* Can we optimize saving the TOC in the prologue or
38006 do we need to do it at every call? */
38007 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
38008 cfun->machine->save_toc_in_prologue = true;
38009 else
38011 MEM_VOLATILE_P (stack_toc_mem) = 1;
38012 emit_move_insn (stack_toc_mem, toc_reg);
38015 if (DEFAULT_ABI == ABI_ELFv2)
38017 /* A function pointer in the ELFv2 ABI is just a plain address, but
38018 the ABI requires it to be loaded into r12 before the call. */
38019 func_addr = gen_rtx_REG (Pmode, 12);
38020 emit_move_insn (func_addr, func_desc);
38021 abi_reg = func_addr;
38023 else
38025 /* A function pointer under AIX is a pointer to a data area whose
38026 first word contains the actual address of the function, whose
38027 second word contains a pointer to its TOC, and whose third word
38028 contains a value to place in the static chain register (r11).
38029 Note that if we load the static chain, our "trampoline" need
38030 not have any executable code. */
38032 /* Load up address of the actual function. */
38033 func_desc = force_reg (Pmode, func_desc);
38034 func_addr = gen_reg_rtx (Pmode);
38035 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
38037 /* Prepare to load the TOC of the called function. Note that the
38038 TOC load must happen immediately before the actual call so
38039 that unwinding the TOC registers works correctly. See the
38040 comment in frob_update_context. */
38041 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
38042 rtx func_toc_mem = gen_rtx_MEM (Pmode,
38043 gen_rtx_PLUS (Pmode, func_desc,
38044 func_toc_offset));
38045 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
38047 /* If we have a static chain, load it up. But, if the call was
38048 originally direct, the 3rd word has not been written since no
38049 trampoline has been built, so we ought not to load it, lest we
38050 override a static chain value. */
38051 if (!direct_call_p
38052 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
38053 && !chain_already_loaded (get_current_sequence ()->next->last))
38055 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
38056 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
38057 rtx func_sc_mem = gen_rtx_MEM (Pmode,
38058 gen_rtx_PLUS (Pmode, func_desc,
38059 func_sc_offset));
38060 emit_move_insn (sc_reg, func_sc_mem);
38061 abi_reg = sc_reg;
38065 else
38067 /* Direct calls use the TOC: for local calls, the callee will
38068 assume the TOC register is set; for non-local calls, the
38069 PLT stub needs the TOC register. */
38070 abi_reg = toc_reg;
38071 func_addr = func_desc;
38074 /* Create the call. */
38075 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
38076 if (value != NULL_RTX)
38077 call[0] = gen_rtx_SET (value, call[0]);
38078 n_call = 1;
38080 if (toc_load)
38081 call[n_call++] = toc_load;
38082 if (toc_restore)
38083 call[n_call++] = toc_restore;
38085 call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
38087 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
38088 insn = emit_call_insn (insn);
38090 /* Mention all registers defined by the ABI to hold information
38091 as uses in CALL_INSN_FUNCTION_USAGE. */
38092 if (abi_reg)
38093 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
38096 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
38098 void
38099 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
38101 rtx call[2];
38102 rtx insn;
38104 gcc_assert (INTVAL (cookie) == 0);
38106 /* Create the call. */
38107 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
38108 if (value != NULL_RTX)
38109 call[0] = gen_rtx_SET (value, call[0]);
38111 call[1] = simple_return_rtx;
38113 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
38114 insn = emit_call_insn (insn);
38116 /* Note use of the TOC register. */
38117 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
38120 /* Return whether we need to always update the saved TOC pointer when we update
38121 the stack pointer. */
38123 static bool
38124 rs6000_save_toc_in_prologue_p (void)
38126 return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue);
38129 #ifdef HAVE_GAS_HIDDEN
38130 # define USE_HIDDEN_LINKONCE 1
38131 #else
38132 # define USE_HIDDEN_LINKONCE 0
38133 #endif
38135 /* Fills in the label name that should be used for a 476 link stack thunk. */
38137 void
38138 get_ppc476_thunk_name (char name[32])
38140 gcc_assert (TARGET_LINK_STACK);
38142 if (USE_HIDDEN_LINKONCE)
38143 sprintf (name, "__ppc476.get_thunk");
38144 else
38145 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
38148 /* This function emits the simple thunk routine that is used to preserve
38149 the link stack on the 476 cpu. */
38151 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
38152 static void
38153 rs6000_code_end (void)
38155 char name[32];
38156 tree decl;
38158 if (!TARGET_LINK_STACK)
38159 return;
38161 get_ppc476_thunk_name (name);
38163 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
38164 build_function_type_list (void_type_node, NULL_TREE));
38165 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
38166 NULL_TREE, void_type_node);
38167 TREE_PUBLIC (decl) = 1;
38168 TREE_STATIC (decl) = 1;
38170 #if RS6000_WEAK
38171 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
38173 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
38174 targetm.asm_out.unique_section (decl, 0);
38175 switch_to_section (get_named_section (decl, NULL, 0));
38176 DECL_WEAK (decl) = 1;
38177 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
38178 targetm.asm_out.globalize_label (asm_out_file, name);
38179 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
38180 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
38182 else
38183 #endif
38185 switch_to_section (text_section);
38186 ASM_OUTPUT_LABEL (asm_out_file, name);
38189 DECL_INITIAL (decl) = make_node (BLOCK);
38190 current_function_decl = decl;
38191 allocate_struct_function (decl, false);
38192 init_function_start (decl);
38193 first_function_block_is_cold = false;
38194 /* Make sure unwind info is emitted for the thunk if needed. */
38195 final_start_function (emit_barrier (), asm_out_file, 1);
38197 fputs ("\tblr\n", asm_out_file);
38199 final_end_function ();
38200 init_insn_lengths ();
38201 free_after_compilation (cfun);
38202 set_cfun (NULL);
38203 current_function_decl = NULL;
38206 /* Add r30 to hard reg set if the prologue sets it up and it is not
38207 pic_offset_table_rtx. */
38209 static void
38210 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
38212 if (!TARGET_SINGLE_PIC_BASE
38213 && TARGET_TOC
38214 && TARGET_MINIMAL_TOC
38215 && !constant_pool_empty_p ())
38216 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
38217 if (cfun->machine->split_stack_argp_used)
38218 add_to_hard_reg_set (&set->set, Pmode, 12);
38220 /* Make sure the hard reg set doesn't include r2, which was possibly added
38221 via PIC_OFFSET_TABLE_REGNUM. */
38222 if (TARGET_TOC)
38223 remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
38227 /* Helper function for rs6000_split_logical to emit a logical instruction after
38228 spliting the operation to single GPR registers.
38230 DEST is the destination register.
38231 OP1 and OP2 are the input source registers.
38232 CODE is the base operation (AND, IOR, XOR, NOT).
38233 MODE is the machine mode.
38234 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
38235 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
38236 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
38238 static void
38239 rs6000_split_logical_inner (rtx dest,
38240 rtx op1,
38241 rtx op2,
38242 enum rtx_code code,
38243 machine_mode mode,
38244 bool complement_final_p,
38245 bool complement_op1_p,
38246 bool complement_op2_p)
38248 rtx bool_rtx;
38250 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
38251 if (op2 && GET_CODE (op2) == CONST_INT
38252 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
38253 && !complement_final_p && !complement_op1_p && !complement_op2_p)
38255 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
38256 HOST_WIDE_INT value = INTVAL (op2) & mask;
38258 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
38259 if (code == AND)
38261 if (value == 0)
38263 emit_insn (gen_rtx_SET (dest, const0_rtx));
38264 return;
38267 else if (value == mask)
38269 if (!rtx_equal_p (dest, op1))
38270 emit_insn (gen_rtx_SET (dest, op1));
38271 return;
38275 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
38276 into separate ORI/ORIS or XORI/XORIS instrucitons. */
38277 else if (code == IOR || code == XOR)
38279 if (value == 0)
38281 if (!rtx_equal_p (dest, op1))
38282 emit_insn (gen_rtx_SET (dest, op1));
38283 return;
38288 if (code == AND && mode == SImode
38289 && !complement_final_p && !complement_op1_p && !complement_op2_p)
38291 emit_insn (gen_andsi3 (dest, op1, op2));
38292 return;
38295 if (complement_op1_p)
38296 op1 = gen_rtx_NOT (mode, op1);
38298 if (complement_op2_p)
38299 op2 = gen_rtx_NOT (mode, op2);
38301 /* For canonical RTL, if only one arm is inverted it is the first. */
38302 if (!complement_op1_p && complement_op2_p)
38303 std::swap (op1, op2);
38305 bool_rtx = ((code == NOT)
38306 ? gen_rtx_NOT (mode, op1)
38307 : gen_rtx_fmt_ee (code, mode, op1, op2));
38309 if (complement_final_p)
38310 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
38312 emit_insn (gen_rtx_SET (dest, bool_rtx));
38315 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
38316 operations are split immediately during RTL generation to allow for more
38317 optimizations of the AND/IOR/XOR.
38319 OPERANDS is an array containing the destination and two input operands.
38320 CODE is the base operation (AND, IOR, XOR, NOT).
38321 MODE is the machine mode.
38322 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
38323 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
38324 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
38325 CLOBBER_REG is either NULL or a scratch register of type CC to allow
38326 formation of the AND instructions. */
38328 static void
38329 rs6000_split_logical_di (rtx operands[3],
38330 enum rtx_code code,
38331 bool complement_final_p,
38332 bool complement_op1_p,
38333 bool complement_op2_p)
38335 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
38336 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
38337 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
38338 enum hi_lo { hi = 0, lo = 1 };
38339 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
38340 size_t i;
38342 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
38343 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
38344 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
38345 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
38347 if (code == NOT)
38348 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
38349 else
38351 if (GET_CODE (operands[2]) != CONST_INT)
38353 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
38354 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
38356 else
38358 HOST_WIDE_INT value = INTVAL (operands[2]);
38359 HOST_WIDE_INT value_hi_lo[2];
38361 gcc_assert (!complement_final_p);
38362 gcc_assert (!complement_op1_p);
38363 gcc_assert (!complement_op2_p);
38365 value_hi_lo[hi] = value >> 32;
38366 value_hi_lo[lo] = value & lower_32bits;
38368 for (i = 0; i < 2; i++)
38370 HOST_WIDE_INT sub_value = value_hi_lo[i];
38372 if (sub_value & sign_bit)
38373 sub_value |= upper_32bits;
38375 op2_hi_lo[i] = GEN_INT (sub_value);
38377 /* If this is an AND instruction, check to see if we need to load
38378 the value in a register. */
38379 if (code == AND && sub_value != -1 && sub_value != 0
38380 && !and_operand (op2_hi_lo[i], SImode))
38381 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
38386 for (i = 0; i < 2; i++)
38388 /* Split large IOR/XOR operations. */
38389 if ((code == IOR || code == XOR)
38390 && GET_CODE (op2_hi_lo[i]) == CONST_INT
38391 && !complement_final_p
38392 && !complement_op1_p
38393 && !complement_op2_p
38394 && !logical_const_operand (op2_hi_lo[i], SImode))
38396 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
38397 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
38398 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
38399 rtx tmp = gen_reg_rtx (SImode);
38401 /* Make sure the constant is sign extended. */
38402 if ((hi_16bits & sign_bit) != 0)
38403 hi_16bits |= upper_32bits;
38405 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
38406 code, SImode, false, false, false);
38408 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
38409 code, SImode, false, false, false);
38411 else
38412 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
38413 code, SImode, complement_final_p,
38414 complement_op1_p, complement_op2_p);
38417 return;
38420 /* Split the insns that make up boolean operations operating on multiple GPR
38421 registers. The boolean MD patterns ensure that the inputs either are
38422 exactly the same as the output registers, or there is no overlap.
38424 OPERANDS is an array containing the destination and two input operands.
38425 CODE is the base operation (AND, IOR, XOR, NOT).
38426 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
38427 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
38428 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
38430 void
38431 rs6000_split_logical (rtx operands[3],
38432 enum rtx_code code,
38433 bool complement_final_p,
38434 bool complement_op1_p,
38435 bool complement_op2_p)
38437 machine_mode mode = GET_MODE (operands[0]);
38438 machine_mode sub_mode;
38439 rtx op0, op1, op2;
38440 int sub_size, regno0, regno1, nregs, i;
38442 /* If this is DImode, use the specialized version that can run before
38443 register allocation. */
38444 if (mode == DImode && !TARGET_POWERPC64)
38446 rs6000_split_logical_di (operands, code, complement_final_p,
38447 complement_op1_p, complement_op2_p);
38448 return;
38451 op0 = operands[0];
38452 op1 = operands[1];
38453 op2 = (code == NOT) ? NULL_RTX : operands[2];
38454 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
38455 sub_size = GET_MODE_SIZE (sub_mode);
38456 regno0 = REGNO (op0);
38457 regno1 = REGNO (op1);
38459 gcc_assert (reload_completed);
38460 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
38461 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
38463 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
38464 gcc_assert (nregs > 1);
38466 if (op2 && REG_P (op2))
38467 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
38469 for (i = 0; i < nregs; i++)
38471 int offset = i * sub_size;
38472 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
38473 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
38474 rtx sub_op2 = ((code == NOT)
38475 ? NULL_RTX
38476 : simplify_subreg (sub_mode, op2, mode, offset));
38478 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
38479 complement_final_p, complement_op1_p,
38480 complement_op2_p);
38483 return;
38487 /* Return true if the peephole2 can combine a load involving a combination of
38488 an addis instruction and a load with an offset that can be fused together on
38489 a power8. */
38491 bool
38492 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
38493 rtx addis_value, /* addis value. */
38494 rtx target, /* target register that is loaded. */
38495 rtx mem) /* bottom part of the memory addr. */
38497 rtx addr;
38498 rtx base_reg;
38500 /* Validate arguments. */
38501 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
38502 return false;
38504 if (!base_reg_operand (target, GET_MODE (target)))
38505 return false;
38507 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
38508 return false;
38510 /* Allow sign/zero extension. */
38511 if (GET_CODE (mem) == ZERO_EXTEND
38512 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
38513 mem = XEXP (mem, 0);
38515 if (!MEM_P (mem))
38516 return false;
38518 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
38519 return false;
38521 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
38522 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
38523 return false;
38525 /* Validate that the register used to load the high value is either the
38526 register being loaded, or we can safely replace its use.
38528 This function is only called from the peephole2 pass and we assume that
38529 there are 2 instructions in the peephole (addis and load), so we want to
38530 check if the target register was not used in the memory address and the
38531 register to hold the addis result is dead after the peephole. */
38532 if (REGNO (addis_reg) != REGNO (target))
38534 if (reg_mentioned_p (target, mem))
38535 return false;
38537 if (!peep2_reg_dead_p (2, addis_reg))
38538 return false;
38540 /* If the target register being loaded is the stack pointer, we must
38541 avoid loading any other value into it, even temporarily. */
38542 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
38543 return false;
38546 base_reg = XEXP (addr, 0);
38547 return REGNO (addis_reg) == REGNO (base_reg);
38550 /* During the peephole2 pass, adjust and expand the insns for a load fusion
38551 sequence. We adjust the addis register to use the target register. If the
38552 load sign extends, we adjust the code to do the zero extending load, and an
38553 explicit sign extension later since the fusion only covers zero extending
38554 loads.
38556 The operands are:
38557 operands[0] register set with addis (to be replaced with target)
38558 operands[1] value set via addis
38559 operands[2] target register being loaded
38560 operands[3] D-form memory reference using operands[0]. */
38562 void
38563 expand_fusion_gpr_load (rtx *operands)
38565 rtx addis_value = operands[1];
38566 rtx target = operands[2];
38567 rtx orig_mem = operands[3];
38568 rtx new_addr, new_mem, orig_addr, offset;
38569 enum rtx_code plus_or_lo_sum;
38570 machine_mode target_mode = GET_MODE (target);
38571 machine_mode extend_mode = target_mode;
38572 machine_mode ptr_mode = Pmode;
38573 enum rtx_code extend = UNKNOWN;
38575 if (GET_CODE (orig_mem) == ZERO_EXTEND
38576 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
38578 extend = GET_CODE (orig_mem);
38579 orig_mem = XEXP (orig_mem, 0);
38580 target_mode = GET_MODE (orig_mem);
38583 gcc_assert (MEM_P (orig_mem));
38585 orig_addr = XEXP (orig_mem, 0);
38586 plus_or_lo_sum = GET_CODE (orig_addr);
38587 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
38589 offset = XEXP (orig_addr, 1);
38590 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
38591 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
38593 if (extend != UNKNOWN)
38594 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
38596 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
38597 UNSPEC_FUSION_GPR);
38598 emit_insn (gen_rtx_SET (target, new_mem));
38600 if (extend == SIGN_EXTEND)
38602 int sub_off = ((BYTES_BIG_ENDIAN)
38603 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
38604 : 0);
38605 rtx sign_reg
38606 = simplify_subreg (target_mode, target, extend_mode, sub_off);
38608 emit_insn (gen_rtx_SET (target,
38609 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
38612 return;
38615 /* Emit the addis instruction that will be part of a fused instruction
38616 sequence. */
38618 void
38619 emit_fusion_addis (rtx target, rtx addis_value)
38621 rtx fuse_ops[10];
38622 const char *addis_str = NULL;
38624 /* Emit the addis instruction. */
38625 fuse_ops[0] = target;
38626 if (satisfies_constraint_L (addis_value))
38628 fuse_ops[1] = addis_value;
38629 addis_str = "lis %0,%v1";
38632 else if (GET_CODE (addis_value) == PLUS)
38634 rtx op0 = XEXP (addis_value, 0);
38635 rtx op1 = XEXP (addis_value, 1);
38637 if (REG_P (op0) && CONST_INT_P (op1)
38638 && satisfies_constraint_L (op1))
38640 fuse_ops[1] = op0;
38641 fuse_ops[2] = op1;
38642 addis_str = "addis %0,%1,%v2";
38646 else if (GET_CODE (addis_value) == HIGH)
38648 rtx value = XEXP (addis_value, 0);
38649 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
38651 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
38652 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
38653 if (TARGET_ELF)
38654 addis_str = "addis %0,%2,%1@toc@ha";
38656 else if (TARGET_XCOFF)
38657 addis_str = "addis %0,%1@u(%2)";
38659 else
38660 gcc_unreachable ();
38663 else if (GET_CODE (value) == PLUS)
38665 rtx op0 = XEXP (value, 0);
38666 rtx op1 = XEXP (value, 1);
38668 if (GET_CODE (op0) == UNSPEC
38669 && XINT (op0, 1) == UNSPEC_TOCREL
38670 && CONST_INT_P (op1))
38672 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
38673 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
38674 fuse_ops[3] = op1;
38675 if (TARGET_ELF)
38676 addis_str = "addis %0,%2,%1+%3@toc@ha";
38678 else if (TARGET_XCOFF)
38679 addis_str = "addis %0,%1+%3@u(%2)";
38681 else
38682 gcc_unreachable ();
38686 else if (satisfies_constraint_L (value))
38688 fuse_ops[1] = value;
38689 addis_str = "lis %0,%v1";
38692 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
38694 fuse_ops[1] = value;
38695 addis_str = "lis %0,%1@ha";
38699 if (!addis_str)
38700 fatal_insn ("Could not generate addis value for fusion", addis_value);
38702 output_asm_insn (addis_str, fuse_ops);
38705 /* Emit a D-form load or store instruction that is the second instruction
38706 of a fusion sequence. */
38708 void
38709 emit_fusion_load_store (rtx load_store_reg, rtx addis_reg, rtx offset,
38710 const char *insn_str)
38712 rtx fuse_ops[10];
38713 char insn_template[80];
38715 fuse_ops[0] = load_store_reg;
38716 fuse_ops[1] = addis_reg;
38718 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
38720 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
38721 fuse_ops[2] = offset;
38722 output_asm_insn (insn_template, fuse_ops);
38725 else if (GET_CODE (offset) == UNSPEC
38726 && XINT (offset, 1) == UNSPEC_TOCREL)
38728 if (TARGET_ELF)
38729 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
38731 else if (TARGET_XCOFF)
38732 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
38734 else
38735 gcc_unreachable ();
38737 fuse_ops[2] = XVECEXP (offset, 0, 0);
38738 output_asm_insn (insn_template, fuse_ops);
38741 else if (GET_CODE (offset) == PLUS
38742 && GET_CODE (XEXP (offset, 0)) == UNSPEC
38743 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
38744 && CONST_INT_P (XEXP (offset, 1)))
38746 rtx tocrel_unspec = XEXP (offset, 0);
38747 if (TARGET_ELF)
38748 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
38750 else if (TARGET_XCOFF)
38751 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
38753 else
38754 gcc_unreachable ();
38756 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
38757 fuse_ops[3] = XEXP (offset, 1);
38758 output_asm_insn (insn_template, fuse_ops);
38761 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
38763 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
38765 fuse_ops[2] = offset;
38766 output_asm_insn (insn_template, fuse_ops);
38769 else
38770 fatal_insn ("Unable to generate load/store offset for fusion", offset);
38772 return;
38775 /* Wrap a TOC address that can be fused to indicate that special fusion
38776 processing is needed. */
38779 fusion_wrap_memory_address (rtx old_mem)
38781 rtx old_addr = XEXP (old_mem, 0);
38782 rtvec v = gen_rtvec (1, old_addr);
38783 rtx new_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_FUSION_ADDIS);
38784 return replace_equiv_address_nv (old_mem, new_addr, false);
38787 /* Given an address, convert it into the addis and load offset parts. Addresses
38788 created during the peephole2 process look like:
38789 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
38790 (unspec [(...)] UNSPEC_TOCREL))
38792 Addresses created via toc fusion look like:
38793 (unspec [(unspec [(...)] UNSPEC_TOCREL)] UNSPEC_FUSION_ADDIS)) */
38795 static void
38796 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
38798 rtx hi, lo;
38800 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_FUSION_ADDIS)
38802 lo = XVECEXP (addr, 0, 0);
38803 hi = gen_rtx_HIGH (Pmode, lo);
38805 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
38807 hi = XEXP (addr, 0);
38808 lo = XEXP (addr, 1);
38810 else
38811 gcc_unreachable ();
38813 *p_hi = hi;
38814 *p_lo = lo;
38817 /* Return a string to fuse an addis instruction with a gpr load to the same
38818 register that we loaded up the addis instruction. The address that is used
38819 is the logical address that was formed during peephole2:
38820 (lo_sum (high) (low-part))
38822 Or the address is the TOC address that is wrapped before register allocation:
38823 (unspec [(addr) (toc-reg)] UNSPEC_FUSION_ADDIS)
38825 The code is complicated, so we call output_asm_insn directly, and just
38826 return "". */
38828 const char *
38829 emit_fusion_gpr_load (rtx target, rtx mem)
38831 rtx addis_value;
38832 rtx addr;
38833 rtx load_offset;
38834 const char *load_str = NULL;
38835 machine_mode mode;
38837 if (GET_CODE (mem) == ZERO_EXTEND)
38838 mem = XEXP (mem, 0);
38840 gcc_assert (REG_P (target) && MEM_P (mem));
38842 addr = XEXP (mem, 0);
38843 fusion_split_address (addr, &addis_value, &load_offset);
38845 /* Now emit the load instruction to the same register. */
38846 mode = GET_MODE (mem);
38847 switch (mode)
38849 case E_QImode:
38850 load_str = "lbz";
38851 break;
38853 case E_HImode:
38854 load_str = "lhz";
38855 break;
38857 case E_SImode:
38858 case E_SFmode:
38859 load_str = "lwz";
38860 break;
38862 case E_DImode:
38863 case E_DFmode:
38864 gcc_assert (TARGET_POWERPC64);
38865 load_str = "ld";
38866 break;
38868 default:
38869 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
38872 /* Emit the addis instruction. */
38873 emit_fusion_addis (target, addis_value);
38875 /* Emit the D-form load instruction. */
38876 emit_fusion_load_store (target, target, load_offset, load_str);
38878 return "";
38882 /* Return true if the peephole2 can combine a load/store involving a
38883 combination of an addis instruction and the memory operation. This was
38884 added to the ISA 3.0 (power9) hardware. */
38886 bool
38887 fusion_p9_p (rtx addis_reg, /* register set via addis. */
38888 rtx addis_value, /* addis value. */
38889 rtx dest, /* destination (memory or register). */
38890 rtx src) /* source (register or memory). */
38892 rtx addr, mem, offset;
38893 machine_mode mode = GET_MODE (src);
38895 /* Validate arguments. */
38896 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
38897 return false;
38899 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
38900 return false;
38902 /* Ignore extend operations that are part of the load. */
38903 if (GET_CODE (src) == FLOAT_EXTEND || GET_CODE (src) == ZERO_EXTEND)
38904 src = XEXP (src, 0);
38906 /* Test for memory<-register or register<-memory. */
38907 if (fpr_reg_operand (src, mode) || int_reg_operand (src, mode))
38909 if (!MEM_P (dest))
38910 return false;
38912 mem = dest;
38915 else if (MEM_P (src))
38917 if (!fpr_reg_operand (dest, mode) && !int_reg_operand (dest, mode))
38918 return false;
38920 mem = src;
38923 else
38924 return false;
38926 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
38927 if (GET_CODE (addr) == PLUS)
38929 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
38930 return false;
38932 return satisfies_constraint_I (XEXP (addr, 1));
38935 else if (GET_CODE (addr) == LO_SUM)
38937 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
38938 return false;
38940 offset = XEXP (addr, 1);
38941 if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
38942 return small_toc_ref (offset, GET_MODE (offset));
38944 else if (TARGET_ELF && !TARGET_POWERPC64)
38945 return CONSTANT_P (offset);
38948 return false;
38951 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
38952 load sequence.
38954 The operands are:
38955 operands[0] register set with addis
38956 operands[1] value set via addis
38957 operands[2] target register being loaded
38958 operands[3] D-form memory reference using operands[0].
38960 This is similar to the fusion introduced with power8, except it scales to
38961 both loads/stores and does not require the result register to be the same as
38962 the base register. At the moment, we only do this if register set with addis
38963 is dead. */
38965 void
38966 expand_fusion_p9_load (rtx *operands)
38968 rtx tmp_reg = operands[0];
38969 rtx addis_value = operands[1];
38970 rtx target = operands[2];
38971 rtx orig_mem = operands[3];
38972 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn;
38973 enum rtx_code plus_or_lo_sum;
38974 machine_mode target_mode = GET_MODE (target);
38975 machine_mode extend_mode = target_mode;
38976 machine_mode ptr_mode = Pmode;
38977 enum rtx_code extend = UNKNOWN;
38979 if (GET_CODE (orig_mem) == FLOAT_EXTEND || GET_CODE (orig_mem) == ZERO_EXTEND)
38981 extend = GET_CODE (orig_mem);
38982 orig_mem = XEXP (orig_mem, 0);
38983 target_mode = GET_MODE (orig_mem);
38986 gcc_assert (MEM_P (orig_mem));
38988 orig_addr = XEXP (orig_mem, 0);
38989 plus_or_lo_sum = GET_CODE (orig_addr);
38990 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
38992 offset = XEXP (orig_addr, 1);
38993 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
38994 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
38996 if (extend != UNKNOWN)
38997 new_mem = gen_rtx_fmt_e (extend, extend_mode, new_mem);
38999 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
39000 UNSPEC_FUSION_P9);
39002 set = gen_rtx_SET (target, new_mem);
39003 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
39004 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
39005 emit_insn (insn);
39007 return;
39010 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
39011 store sequence.
39013 The operands are:
39014 operands[0] register set with addis
39015 operands[1] value set via addis
39016 operands[2] target D-form memory being stored to
39017 operands[3] register being stored
39019 This is similar to the fusion introduced with power8, except it scales to
39020 both loads/stores and does not require the result register to be the same as
39021 the base register. At the moment, we only do this if register set with addis
39022 is dead. */
39024 void
39025 expand_fusion_p9_store (rtx *operands)
39027 rtx tmp_reg = operands[0];
39028 rtx addis_value = operands[1];
39029 rtx orig_mem = operands[2];
39030 rtx src = operands[3];
39031 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn, new_src;
39032 enum rtx_code plus_or_lo_sum;
39033 machine_mode target_mode = GET_MODE (orig_mem);
39034 machine_mode ptr_mode = Pmode;
39036 gcc_assert (MEM_P (orig_mem));
39038 orig_addr = XEXP (orig_mem, 0);
39039 plus_or_lo_sum = GET_CODE (orig_addr);
39040 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
39042 offset = XEXP (orig_addr, 1);
39043 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
39044 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
39046 new_src = gen_rtx_UNSPEC (target_mode, gen_rtvec (1, src),
39047 UNSPEC_FUSION_P9);
39049 set = gen_rtx_SET (new_mem, new_src);
39050 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
39051 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
39052 emit_insn (insn);
39054 return;
39057 /* Return a string to fuse an addis instruction with a load using extended
39058 fusion. The address that is used is the logical address that was formed
39059 during peephole2: (lo_sum (high) (low-part))
39061 The code is complicated, so we call output_asm_insn directly, and just
39062 return "". */
39064 const char *
39065 emit_fusion_p9_load (rtx reg, rtx mem, rtx tmp_reg)
39067 machine_mode mode = GET_MODE (reg);
39068 rtx hi;
39069 rtx lo;
39070 rtx addr;
39071 const char *load_string;
39072 int r;
39074 if (GET_CODE (mem) == FLOAT_EXTEND || GET_CODE (mem) == ZERO_EXTEND)
39076 mem = XEXP (mem, 0);
39077 mode = GET_MODE (mem);
39080 if (GET_CODE (reg) == SUBREG)
39082 gcc_assert (SUBREG_BYTE (reg) == 0);
39083 reg = SUBREG_REG (reg);
39086 if (!REG_P (reg))
39087 fatal_insn ("emit_fusion_p9_load, bad reg #1", reg);
39089 r = REGNO (reg);
39090 if (FP_REGNO_P (r))
39092 if (mode == SFmode)
39093 load_string = "lfs";
39094 else if (mode == DFmode || mode == DImode)
39095 load_string = "lfd";
39096 else
39097 gcc_unreachable ();
39099 else if (ALTIVEC_REGNO_P (r) && TARGET_P9_VECTOR)
39101 if (mode == SFmode)
39102 load_string = "lxssp";
39103 else if (mode == DFmode || mode == DImode)
39104 load_string = "lxsd";
39105 else
39106 gcc_unreachable ();
39108 else if (INT_REGNO_P (r))
39110 switch (mode)
39112 case E_QImode:
39113 load_string = "lbz";
39114 break;
39115 case E_HImode:
39116 load_string = "lhz";
39117 break;
39118 case E_SImode:
39119 case E_SFmode:
39120 load_string = "lwz";
39121 break;
39122 case E_DImode:
39123 case E_DFmode:
39124 if (!TARGET_POWERPC64)
39125 gcc_unreachable ();
39126 load_string = "ld";
39127 break;
39128 default:
39129 gcc_unreachable ();
39132 else
39133 fatal_insn ("emit_fusion_p9_load, bad reg #2", reg);
39135 if (!MEM_P (mem))
39136 fatal_insn ("emit_fusion_p9_load not MEM", mem);
39138 addr = XEXP (mem, 0);
39139 fusion_split_address (addr, &hi, &lo);
39141 /* Emit the addis instruction. */
39142 emit_fusion_addis (tmp_reg, hi);
39144 /* Emit the D-form load instruction. */
39145 emit_fusion_load_store (reg, tmp_reg, lo, load_string);
39147 return "";
39150 /* Return a string to fuse an addis instruction with a store using extended
39151 fusion. The address that is used is the logical address that was formed
39152 during peephole2: (lo_sum (high) (low-part))
39154 The code is complicated, so we call output_asm_insn directly, and just
39155 return "". */
39157 const char *
39158 emit_fusion_p9_store (rtx mem, rtx reg, rtx tmp_reg)
39160 machine_mode mode = GET_MODE (reg);
39161 rtx hi;
39162 rtx lo;
39163 rtx addr;
39164 const char *store_string;
39165 int r;
39167 if (GET_CODE (reg) == SUBREG)
39169 gcc_assert (SUBREG_BYTE (reg) == 0);
39170 reg = SUBREG_REG (reg);
39173 if (!REG_P (reg))
39174 fatal_insn ("emit_fusion_p9_store, bad reg #1", reg);
39176 r = REGNO (reg);
39177 if (FP_REGNO_P (r))
39179 if (mode == SFmode)
39180 store_string = "stfs";
39181 else if (mode == DFmode)
39182 store_string = "stfd";
39183 else
39184 gcc_unreachable ();
39186 else if (ALTIVEC_REGNO_P (r) && TARGET_P9_VECTOR)
39188 if (mode == SFmode)
39189 store_string = "stxssp";
39190 else if (mode == DFmode || mode == DImode)
39191 store_string = "stxsd";
39192 else
39193 gcc_unreachable ();
39195 else if (INT_REGNO_P (r))
39197 switch (mode)
39199 case E_QImode:
39200 store_string = "stb";
39201 break;
39202 case E_HImode:
39203 store_string = "sth";
39204 break;
39205 case E_SImode:
39206 case E_SFmode:
39207 store_string = "stw";
39208 break;
39209 case E_DImode:
39210 case E_DFmode:
39211 if (!TARGET_POWERPC64)
39212 gcc_unreachable ();
39213 store_string = "std";
39214 break;
39215 default:
39216 gcc_unreachable ();
39219 else
39220 fatal_insn ("emit_fusion_p9_store, bad reg #2", reg);
39222 if (!MEM_P (mem))
39223 fatal_insn ("emit_fusion_p9_store not MEM", mem);
39225 addr = XEXP (mem, 0);
39226 fusion_split_address (addr, &hi, &lo);
39228 /* Emit the addis instruction. */
39229 emit_fusion_addis (tmp_reg, hi);
39231 /* Emit the D-form load instruction. */
39232 emit_fusion_load_store (reg, tmp_reg, lo, store_string);
39234 return "";
39237 #ifdef RS6000_GLIBC_ATOMIC_FENV
39238 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
39239 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
39240 #endif
39242 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
39244 static void
39245 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
39247 if (!TARGET_HARD_FLOAT)
39249 #ifdef RS6000_GLIBC_ATOMIC_FENV
39250 if (atomic_hold_decl == NULL_TREE)
39252 atomic_hold_decl
39253 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
39254 get_identifier ("__atomic_feholdexcept"),
39255 build_function_type_list (void_type_node,
39256 double_ptr_type_node,
39257 NULL_TREE));
39258 TREE_PUBLIC (atomic_hold_decl) = 1;
39259 DECL_EXTERNAL (atomic_hold_decl) = 1;
39262 if (atomic_clear_decl == NULL_TREE)
39264 atomic_clear_decl
39265 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
39266 get_identifier ("__atomic_feclearexcept"),
39267 build_function_type_list (void_type_node,
39268 NULL_TREE));
39269 TREE_PUBLIC (atomic_clear_decl) = 1;
39270 DECL_EXTERNAL (atomic_clear_decl) = 1;
39273 tree const_double = build_qualified_type (double_type_node,
39274 TYPE_QUAL_CONST);
39275 tree const_double_ptr = build_pointer_type (const_double);
39276 if (atomic_update_decl == NULL_TREE)
39278 atomic_update_decl
39279 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
39280 get_identifier ("__atomic_feupdateenv"),
39281 build_function_type_list (void_type_node,
39282 const_double_ptr,
39283 NULL_TREE));
39284 TREE_PUBLIC (atomic_update_decl) = 1;
39285 DECL_EXTERNAL (atomic_update_decl) = 1;
39288 tree fenv_var = create_tmp_var_raw (double_type_node);
39289 TREE_ADDRESSABLE (fenv_var) = 1;
39290 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
39292 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
39293 *clear = build_call_expr (atomic_clear_decl, 0);
39294 *update = build_call_expr (atomic_update_decl, 1,
39295 fold_convert (const_double_ptr, fenv_addr));
39296 #endif
39297 return;
39300 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
39301 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
39302 tree call_mffs = build_call_expr (mffs, 0);
39304 /* Generates the equivalent of feholdexcept (&fenv_var)
39306 *fenv_var = __builtin_mffs ();
39307 double fenv_hold;
39308 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
39309 __builtin_mtfsf (0xff, fenv_hold); */
39311 /* Mask to clear everything except for the rounding modes and non-IEEE
39312 arithmetic flag. */
39313 const unsigned HOST_WIDE_INT hold_exception_mask =
39314 HOST_WIDE_INT_C (0xffffffff00000007);
39316 tree fenv_var = create_tmp_var_raw (double_type_node);
39318 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
39320 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
39321 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
39322 build_int_cst (uint64_type_node,
39323 hold_exception_mask));
39325 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
39326 fenv_llu_and);
39328 tree hold_mtfsf = build_call_expr (mtfsf, 2,
39329 build_int_cst (unsigned_type_node, 0xff),
39330 fenv_hold_mtfsf);
39332 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
39334 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
39336 double fenv_clear = __builtin_mffs ();
39337 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
39338 __builtin_mtfsf (0xff, fenv_clear); */
39340 /* Mask to clear everything except for the rounding modes and non-IEEE
39341 arithmetic flag. */
39342 const unsigned HOST_WIDE_INT clear_exception_mask =
39343 HOST_WIDE_INT_C (0xffffffff00000000);
39345 tree fenv_clear = create_tmp_var_raw (double_type_node);
39347 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
39349 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
39350 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
39351 fenv_clean_llu,
39352 build_int_cst (uint64_type_node,
39353 clear_exception_mask));
39355 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
39356 fenv_clear_llu_and);
39358 tree clear_mtfsf = build_call_expr (mtfsf, 2,
39359 build_int_cst (unsigned_type_node, 0xff),
39360 fenv_clear_mtfsf);
39362 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
39364 /* Generates the equivalent of feupdateenv (&fenv_var)
39366 double old_fenv = __builtin_mffs ();
39367 double fenv_update;
39368 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
39369 (*(uint64_t*)fenv_var 0x1ff80fff);
39370 __builtin_mtfsf (0xff, fenv_update); */
39372 const unsigned HOST_WIDE_INT update_exception_mask =
39373 HOST_WIDE_INT_C (0xffffffff1fffff00);
39374 const unsigned HOST_WIDE_INT new_exception_mask =
39375 HOST_WIDE_INT_C (0x1ff80fff);
39377 tree old_fenv = create_tmp_var_raw (double_type_node);
39378 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
39380 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
39381 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
39382 build_int_cst (uint64_type_node,
39383 update_exception_mask));
39385 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
39386 build_int_cst (uint64_type_node,
39387 new_exception_mask));
39389 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
39390 old_llu_and, new_llu_and);
39392 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
39393 new_llu_mask);
39395 tree update_mtfsf = build_call_expr (mtfsf, 2,
39396 build_int_cst (unsigned_type_node, 0xff),
39397 fenv_update_mtfsf);
39399 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
39402 void
39403 rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2)
39405 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
39407 rtx_tmp0 = gen_reg_rtx (V2DFmode);
39408 rtx_tmp1 = gen_reg_rtx (V2DFmode);
39410 /* The destination of the vmrgew instruction layout is:
39411 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
39412 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
39413 vmrgew instruction will be correct. */
39414 if (VECTOR_ELT_ORDER_BIG)
39416 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2,
39417 GEN_INT (0)));
39418 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2,
39419 GEN_INT (3)));
39421 else
39423 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3)));
39424 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0)));
39427 rtx_tmp2 = gen_reg_rtx (V4SFmode);
39428 rtx_tmp3 = gen_reg_rtx (V4SFmode);
39430 emit_insn (gen_vsx_xvcdpsp (rtx_tmp2, rtx_tmp0));
39431 emit_insn (gen_vsx_xvcdpsp (rtx_tmp3, rtx_tmp1));
39433 if (VECTOR_ELT_ORDER_BIG)
39434 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
39435 else
39436 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
39439 void
39440 rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
39442 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
39444 rtx_tmp0 = gen_reg_rtx (V2DImode);
39445 rtx_tmp1 = gen_reg_rtx (V2DImode);
39447 /* The destination of the vmrgew instruction layout is:
39448 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
39449 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
39450 vmrgew instruction will be correct. */
39451 if (VECTOR_ELT_ORDER_BIG)
39453 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
39454 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
39456 else
39458 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
39459 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
39462 rtx_tmp2 = gen_reg_rtx (V4SFmode);
39463 rtx_tmp3 = gen_reg_rtx (V4SFmode);
39465 if (signed_convert)
39467 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
39468 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
39470 else
39472 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
39473 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
39476 if (VECTOR_ELT_ORDER_BIG)
39477 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
39478 else
39479 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
39482 void
39483 rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
39484 rtx src2)
39486 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
39488 rtx_tmp0 = gen_reg_rtx (V2DFmode);
39489 rtx_tmp1 = gen_reg_rtx (V2DFmode);
39491 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
39492 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
39494 rtx_tmp2 = gen_reg_rtx (V4SImode);
39495 rtx_tmp3 = gen_reg_rtx (V4SImode);
39497 if (signed_convert)
39499 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
39500 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
39502 else
39504 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
39505 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
39508 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
39511 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
39513 static bool
39514 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
39515 optimization_type opt_type)
39517 switch (op)
39519 case rsqrt_optab:
39520 return (opt_type == OPTIMIZE_FOR_SPEED
39521 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
39523 default:
39524 return true;
39528 /* Implement TARGET_CONSTANT_ALIGNMENT. */
39530 static HOST_WIDE_INT
39531 rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
39533 if (TREE_CODE (exp) == STRING_CST
39534 && (STRICT_ALIGNMENT || !optimize_size))
39535 return MAX (align, BITS_PER_WORD);
39536 return align;
39539 /* Implement TARGET_STARTING_FRAME_OFFSET. */
39541 static HOST_WIDE_INT
39542 rs6000_starting_frame_offset (void)
39544 if (FRAME_GROWS_DOWNWARD)
39545 return 0;
39546 return RS6000_STARTING_FRAME_OFFSET;
39549 struct gcc_target targetm = TARGET_INITIALIZER;
39551 #include "gt-rs6000.h"