Turn SECONDARY_MEMORY_NEEDED_MODE into a target hook
[official-gcc.git] / gcc / config / powerpcspe / powerpcspe.c
blob9b8d68b8ea7c955d7611f8ec0174f5c76523f794
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "memmodel.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "cfgloop.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "attribs.h"
35 #include "expmed.h"
36 #include "optabs.h"
37 #include "regs.h"
38 #include "ira.h"
39 #include "recog.h"
40 #include "cgraph.h"
41 #include "diagnostic-core.h"
42 #include "insn-attr.h"
43 #include "flags.h"
44 #include "alias.h"
45 #include "fold-const.h"
46 #include "stor-layout.h"
47 #include "calls.h"
48 #include "print-tree.h"
49 #include "varasm.h"
50 #include "explow.h"
51 #include "expr.h"
52 #include "output.h"
53 #include "dbxout.h"
54 #include "common/common-target.h"
55 #include "langhooks.h"
56 #include "reload.h"
57 #include "sched-int.h"
58 #include "gimplify.h"
59 #include "gimple-fold.h"
60 #include "gimple-iterator.h"
61 #include "gimple-ssa.h"
62 #include "gimple-walk.h"
63 #include "intl.h"
64 #include "params.h"
65 #include "tm-constrs.h"
66 #include "tree-vectorizer.h"
67 #include "target-globals.h"
68 #include "builtins.h"
69 #include "context.h"
70 #include "tree-pass.h"
71 #include "except.h"
72 #if TARGET_XCOFF
73 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
74 #endif
75 #if TARGET_MACHO
76 #include "gstab.h" /* for N_SLINE */
77 #endif
78 #include "case-cfn-macros.h"
79 #include "ppc-auxv.h"
81 /* This file should be included last. */
82 #include "target-def.h"
84 #ifndef TARGET_NO_PROTOTYPE
85 #define TARGET_NO_PROTOTYPE 0
86 #endif
88 #define min(A,B) ((A) < (B) ? (A) : (B))
89 #define max(A,B) ((A) > (B) ? (A) : (B))
91 static pad_direction rs6000_function_arg_padding (machine_mode, const_tree);
93 /* Structure used to define the rs6000 stack */
94 typedef struct rs6000_stack {
95 int reload_completed; /* stack info won't change from here on */
96 int first_gp_reg_save; /* first callee saved GP register used */
97 int first_fp_reg_save; /* first callee saved FP register used */
98 int first_altivec_reg_save; /* first callee saved AltiVec register used */
99 int lr_save_p; /* true if the link reg needs to be saved */
100 int cr_save_p; /* true if the CR reg needs to be saved */
101 unsigned int vrsave_mask; /* mask of vec registers to save */
102 int push_p; /* true if we need to allocate stack space */
103 int calls_p; /* true if the function makes any calls */
104 int world_save_p; /* true if we're saving *everything*:
105 r13-r31, cr, f14-f31, vrsave, v20-v31 */
106 enum rs6000_abi abi; /* which ABI to use */
107 int gp_save_offset; /* offset to save GP regs from initial SP */
108 int fp_save_offset; /* offset to save FP regs from initial SP */
109 int altivec_save_offset; /* offset to save AltiVec regs from initial SP */
110 int lr_save_offset; /* offset to save LR from initial SP */
111 int cr_save_offset; /* offset to save CR from initial SP */
112 int vrsave_save_offset; /* offset to save VRSAVE from initial SP */
113 int spe_gp_save_offset; /* offset to save spe 64-bit gprs */
114 int varargs_save_offset; /* offset to save the varargs registers */
115 int ehrd_offset; /* offset to EH return data */
116 int ehcr_offset; /* offset to EH CR field data */
117 int reg_size; /* register size (4 or 8) */
118 HOST_WIDE_INT vars_size; /* variable save area size */
119 int parm_size; /* outgoing parameter size */
120 int save_size; /* save area size */
121 int fixed_size; /* fixed size of stack frame */
122 int gp_size; /* size of saved GP registers */
123 int fp_size; /* size of saved FP registers */
124 int altivec_size; /* size of saved AltiVec registers */
125 int cr_size; /* size to hold CR if not in fixed area */
126 int vrsave_size; /* size to hold VRSAVE */
127 int altivec_padding_size; /* size of altivec alignment padding */
128 int spe_gp_size; /* size of 64-bit GPR save size for SPE */
129 int spe_padding_size;
130 HOST_WIDE_INT total_size; /* total bytes allocated for stack */
131 int spe_64bit_regs_used;
132 int savres_strategy;
133 } rs6000_stack_t;
135 /* A C structure for machine-specific, per-function data.
136 This is added to the cfun structure. */
137 typedef struct GTY(()) machine_function
139 /* Whether the instruction chain has been scanned already. */
140 int spe_insn_chain_scanned_p;
141 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
142 int ra_needs_full_frame;
143 /* Flags if __builtin_return_address (0) was used. */
144 int ra_need_lr;
145 /* Cache lr_save_p after expansion of builtin_eh_return. */
146 int lr_save_state;
147 /* Whether we need to save the TOC to the reserved stack location in the
148 function prologue. */
149 bool save_toc_in_prologue;
150 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
151 varargs save area. */
152 HOST_WIDE_INT varargs_save_offset;
153 /* Temporary stack slot to use for SDmode copies. This slot is
154 64-bits wide and is allocated early enough so that the offset
155 does not overflow the 16-bit load/store offset field. */
156 rtx sdmode_stack_slot;
157 /* Alternative internal arg pointer for -fsplit-stack. */
158 rtx split_stack_arg_pointer;
159 bool split_stack_argp_used;
160 /* Flag if r2 setup is needed with ELFv2 ABI. */
161 bool r2_setup_needed;
162 /* The number of components we use for separate shrink-wrapping. */
163 int n_components;
164 /* The components already handled by separate shrink-wrapping, which should
165 not be considered by the prologue and epilogue. */
166 bool gpr_is_wrapped_separately[32];
167 bool fpr_is_wrapped_separately[32];
168 bool lr_is_wrapped_separately;
169 } machine_function;
171 /* Support targetm.vectorize.builtin_mask_for_load. */
172 static GTY(()) tree altivec_builtin_mask_for_load;
174 /* Set to nonzero once AIX common-mode calls have been defined. */
175 static GTY(()) int common_mode_defined;
177 /* Label number of label created for -mrelocatable, to call to so we can
178 get the address of the GOT section */
179 static int rs6000_pic_labelno;
181 #ifdef USING_ELFOS_H
182 /* Counter for labels which are to be placed in .fixup. */
183 int fixuplabelno = 0;
184 #endif
186 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
187 int dot_symbols;
189 /* Specify the machine mode that pointers have. After generation of rtl, the
190 compiler makes no further distinction between pointers and any other objects
191 of this machine mode. */
192 scalar_int_mode rs6000_pmode;
194 /* Width in bits of a pointer. */
195 unsigned rs6000_pointer_size;
197 #ifdef HAVE_AS_GNU_ATTRIBUTE
198 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
199 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
200 # endif
201 /* Flag whether floating point values have been passed/returned.
202 Note that this doesn't say whether fprs are used, since the
203 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
204 should be set for soft-float values passed in gprs and ieee128
205 values passed in vsx registers. */
206 static bool rs6000_passes_float;
207 static bool rs6000_passes_long_double;
208 /* Flag whether vector values have been passed/returned. */
209 static bool rs6000_passes_vector;
210 /* Flag whether small (<= 8 byte) structures have been returned. */
211 static bool rs6000_returns_struct;
212 #endif
214 /* Value is TRUE if register/mode pair is acceptable. */
215 static bool rs6000_hard_regno_mode_ok_p
216 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
218 /* Maximum number of registers needed for a given register class and mode. */
219 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
221 /* How many registers are needed for a given register and mode. */
222 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
224 /* Map register number to register class. */
225 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
227 static int dbg_cost_ctrl;
229 /* Built in types. */
230 tree rs6000_builtin_types[RS6000_BTI_MAX];
231 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
233 /* Flag to say the TOC is initialized */
234 int toc_initialized, need_toc_init;
235 char toc_label_name[10];
237 /* Cached value of rs6000_variable_issue. This is cached in
238 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
239 static short cached_can_issue_more;
241 static GTY(()) section *read_only_data_section;
242 static GTY(()) section *private_data_section;
243 static GTY(()) section *tls_data_section;
244 static GTY(()) section *tls_private_data_section;
245 static GTY(()) section *read_only_private_data_section;
246 static GTY(()) section *sdata2_section;
247 static GTY(()) section *toc_section;
249 struct builtin_description
251 const HOST_WIDE_INT mask;
252 const enum insn_code icode;
253 const char *const name;
254 const enum rs6000_builtins code;
257 /* Describe the vector unit used for modes. */
258 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
259 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
261 /* Register classes for various constraints that are based on the target
262 switches. */
263 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
265 /* Describe the alignment of a vector. */
266 int rs6000_vector_align[NUM_MACHINE_MODES];
268 /* Map selected modes to types for builtins. */
269 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
271 /* What modes to automatically generate reciprocal divide estimate (fre) and
272 reciprocal sqrt (frsqrte) for. */
273 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
275 /* Masks to determine which reciprocal esitmate instructions to generate
276 automatically. */
277 enum rs6000_recip_mask {
278 RECIP_SF_DIV = 0x001, /* Use divide estimate */
279 RECIP_DF_DIV = 0x002,
280 RECIP_V4SF_DIV = 0x004,
281 RECIP_V2DF_DIV = 0x008,
283 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
284 RECIP_DF_RSQRT = 0x020,
285 RECIP_V4SF_RSQRT = 0x040,
286 RECIP_V2DF_RSQRT = 0x080,
288 /* Various combination of flags for -mrecip=xxx. */
289 RECIP_NONE = 0,
290 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
291 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
292 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
294 RECIP_HIGH_PRECISION = RECIP_ALL,
296 /* On low precision machines like the power5, don't enable double precision
297 reciprocal square root estimate, since it isn't accurate enough. */
298 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
301 /* -mrecip options. */
302 static struct
304 const char *string; /* option name */
305 unsigned int mask; /* mask bits to set */
306 } recip_options[] = {
307 { "all", RECIP_ALL },
308 { "none", RECIP_NONE },
309 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
310 | RECIP_V2DF_DIV) },
311 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
312 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
313 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
314 | RECIP_V2DF_RSQRT) },
315 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
316 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
319 /* Used by __builtin_cpu_is(), mapping from PLATFORM names to values. */
320 static const struct
322 const char *cpu;
323 unsigned int cpuid;
324 } cpu_is_info[] = {
325 { "power9", PPC_PLATFORM_POWER9 },
326 { "power8", PPC_PLATFORM_POWER8 },
327 { "power7", PPC_PLATFORM_POWER7 },
328 { "power6x", PPC_PLATFORM_POWER6X },
329 { "power6", PPC_PLATFORM_POWER6 },
330 { "power5+", PPC_PLATFORM_POWER5_PLUS },
331 { "power5", PPC_PLATFORM_POWER5 },
332 { "ppc970", PPC_PLATFORM_PPC970 },
333 { "power4", PPC_PLATFORM_POWER4 },
334 { "ppca2", PPC_PLATFORM_PPCA2 },
335 { "ppc476", PPC_PLATFORM_PPC476 },
336 { "ppc464", PPC_PLATFORM_PPC464 },
337 { "ppc440", PPC_PLATFORM_PPC440 },
338 { "ppc405", PPC_PLATFORM_PPC405 },
339 { "ppc-cell-be", PPC_PLATFORM_CELL_BE }
342 /* Used by __builtin_cpu_supports(), mapping from HWCAP names to masks. */
343 static const struct
345 const char *hwcap;
346 int mask;
347 unsigned int id;
348 } cpu_supports_info[] = {
349 /* AT_HWCAP masks. */
350 { "4xxmac", PPC_FEATURE_HAS_4xxMAC, 0 },
351 { "altivec", PPC_FEATURE_HAS_ALTIVEC, 0 },
352 { "arch_2_05", PPC_FEATURE_ARCH_2_05, 0 },
353 { "arch_2_06", PPC_FEATURE_ARCH_2_06, 0 },
354 { "archpmu", PPC_FEATURE_PERFMON_COMPAT, 0 },
355 { "booke", PPC_FEATURE_BOOKE, 0 },
356 { "cellbe", PPC_FEATURE_CELL_BE, 0 },
357 { "dfp", PPC_FEATURE_HAS_DFP, 0 },
358 { "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE, 0 },
359 { "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE, 0 },
360 { "fpu", PPC_FEATURE_HAS_FPU, 0 },
361 { "ic_snoop", PPC_FEATURE_ICACHE_SNOOP, 0 },
362 { "mmu", PPC_FEATURE_HAS_MMU, 0 },
363 { "notb", PPC_FEATURE_NO_TB, 0 },
364 { "pa6t", PPC_FEATURE_PA6T, 0 },
365 { "power4", PPC_FEATURE_POWER4, 0 },
366 { "power5", PPC_FEATURE_POWER5, 0 },
367 { "power5+", PPC_FEATURE_POWER5_PLUS, 0 },
368 { "power6x", PPC_FEATURE_POWER6_EXT, 0 },
369 { "ppc32", PPC_FEATURE_32, 0 },
370 { "ppc601", PPC_FEATURE_601_INSTR, 0 },
371 { "ppc64", PPC_FEATURE_64, 0 },
372 { "ppcle", PPC_FEATURE_PPC_LE, 0 },
373 { "smt", PPC_FEATURE_SMT, 0 },
374 { "spe", PPC_FEATURE_HAS_SPE, 0 },
375 { "true_le", PPC_FEATURE_TRUE_LE, 0 },
376 { "ucache", PPC_FEATURE_UNIFIED_CACHE, 0 },
377 { "vsx", PPC_FEATURE_HAS_VSX, 0 },
379 /* AT_HWCAP2 masks. */
380 { "arch_2_07", PPC_FEATURE2_ARCH_2_07, 1 },
381 { "dscr", PPC_FEATURE2_HAS_DSCR, 1 },
382 { "ebb", PPC_FEATURE2_HAS_EBB, 1 },
383 { "htm", PPC_FEATURE2_HAS_HTM, 1 },
384 { "htm-nosc", PPC_FEATURE2_HTM_NOSC, 1 },
385 { "isel", PPC_FEATURE2_HAS_ISEL, 1 },
386 { "tar", PPC_FEATURE2_HAS_TAR, 1 },
387 { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO, 1 },
388 { "arch_3_00", PPC_FEATURE2_ARCH_3_00, 1 },
389 { "ieee128", PPC_FEATURE2_HAS_IEEE128, 1 }
392 /* Newer LIBCs explicitly export this symbol to declare that they provide
393 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
394 reference to this symbol whenever we expand a CPU builtin, so that
395 we never link against an old LIBC. */
396 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
398 /* True if we have expanded a CPU builtin. */
399 bool cpu_builtin_p;
401 /* Pointer to function (in powerpcspe-c.c) that can define or undefine target
402 macros that have changed. Languages that don't support the preprocessor
403 don't link in powerpcspe-c.c, so we can't call it directly. */
404 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
406 /* Simplfy register classes into simpler classifications. We assume
407 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
408 check for standard register classes (gpr/floating/altivec/vsx) and
409 floating/vector classes (float/altivec/vsx). */
411 enum rs6000_reg_type {
412 NO_REG_TYPE,
413 PSEUDO_REG_TYPE,
414 GPR_REG_TYPE,
415 VSX_REG_TYPE,
416 ALTIVEC_REG_TYPE,
417 FPR_REG_TYPE,
418 SPR_REG_TYPE,
419 CR_REG_TYPE,
420 SPE_ACC_TYPE,
421 SPEFSCR_REG_TYPE
424 /* Map register class to register type. */
425 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
427 /* First/last register type for the 'normal' register types (i.e. general
428 purpose, floating point, altivec, and VSX registers). */
429 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
431 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
434 /* Register classes we care about in secondary reload or go if legitimate
435 address. We only need to worry about GPR, FPR, and Altivec registers here,
436 along an ANY field that is the OR of the 3 register classes. */
438 enum rs6000_reload_reg_type {
439 RELOAD_REG_GPR, /* General purpose registers. */
440 RELOAD_REG_FPR, /* Traditional floating point regs. */
441 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
442 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
443 N_RELOAD_REG
446 /* For setting up register classes, loop through the 3 register classes mapping
447 into real registers, and skip the ANY class, which is just an OR of the
448 bits. */
449 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
450 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
452 /* Map reload register type to a register in the register class. */
453 struct reload_reg_map_type {
454 const char *name; /* Register class name. */
455 int reg; /* Register in the register class. */
458 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
459 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
460 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
461 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
462 { "Any", -1 }, /* RELOAD_REG_ANY. */
465 /* Mask bits for each register class, indexed per mode. Historically the
466 compiler has been more restrictive which types can do PRE_MODIFY instead of
467 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
468 typedef unsigned char addr_mask_type;
470 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
471 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
472 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
473 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
474 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
475 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
476 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
477 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
479 /* Register type masks based on the type, of valid addressing modes. */
480 struct rs6000_reg_addr {
481 enum insn_code reload_load; /* INSN to reload for loading. */
482 enum insn_code reload_store; /* INSN to reload for storing. */
483 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
484 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
485 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
486 enum insn_code fusion_gpr_ld; /* INSN for fusing gpr ADDIS/loads. */
487 /* INSNs for fusing addi with loads
488 or stores for each reg. class. */
489 enum insn_code fusion_addi_ld[(int)N_RELOAD_REG];
490 enum insn_code fusion_addi_st[(int)N_RELOAD_REG];
491 /* INSNs for fusing addis with loads
492 or stores for each reg. class. */
493 enum insn_code fusion_addis_ld[(int)N_RELOAD_REG];
494 enum insn_code fusion_addis_st[(int)N_RELOAD_REG];
495 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
496 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
497 bool fused_toc; /* Mode supports TOC fusion. */
500 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
502 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
503 static inline bool
504 mode_supports_pre_incdec_p (machine_mode mode)
506 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
507 != 0);
510 /* Helper function to say whether a mode supports PRE_MODIFY. */
511 static inline bool
512 mode_supports_pre_modify_p (machine_mode mode)
514 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
515 != 0);
518 /* Given that there exists at least one variable that is set (produced)
519 by OUT_INSN and read (consumed) by IN_INSN, return true iff
520 IN_INSN represents one or more memory store operations and none of
521 the variables set by OUT_INSN is used by IN_INSN as the address of a
522 store operation. If either IN_INSN or OUT_INSN does not represent
523 a "single" RTL SET expression (as loosely defined by the
524 implementation of the single_set function) or a PARALLEL with only
525 SETs, CLOBBERs, and USEs inside, this function returns false.
527 This rs6000-specific version of store_data_bypass_p checks for
528 certain conditions that result in assertion failures (and internal
529 compiler errors) in the generic store_data_bypass_p function and
530 returns false rather than calling store_data_bypass_p if one of the
531 problematic conditions is detected. */
534 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
536 rtx out_set, in_set;
537 rtx out_pat, in_pat;
538 rtx out_exp, in_exp;
539 int i, j;
541 in_set = single_set (in_insn);
542 if (in_set)
544 if (MEM_P (SET_DEST (in_set)))
546 out_set = single_set (out_insn);
547 if (!out_set)
549 out_pat = PATTERN (out_insn);
550 if (GET_CODE (out_pat) == PARALLEL)
552 for (i = 0; i < XVECLEN (out_pat, 0); i++)
554 out_exp = XVECEXP (out_pat, 0, i);
555 if ((GET_CODE (out_exp) == CLOBBER)
556 || (GET_CODE (out_exp) == USE))
557 continue;
558 else if (GET_CODE (out_exp) != SET)
559 return false;
565 else
567 in_pat = PATTERN (in_insn);
568 if (GET_CODE (in_pat) != PARALLEL)
569 return false;
571 for (i = 0; i < XVECLEN (in_pat, 0); i++)
573 in_exp = XVECEXP (in_pat, 0, i);
574 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
575 continue;
576 else if (GET_CODE (in_exp) != SET)
577 return false;
579 if (MEM_P (SET_DEST (in_exp)))
581 out_set = single_set (out_insn);
582 if (!out_set)
584 out_pat = PATTERN (out_insn);
585 if (GET_CODE (out_pat) != PARALLEL)
586 return false;
587 for (j = 0; j < XVECLEN (out_pat, 0); j++)
589 out_exp = XVECEXP (out_pat, 0, j);
590 if ((GET_CODE (out_exp) == CLOBBER)
591 || (GET_CODE (out_exp) == USE))
592 continue;
593 else if (GET_CODE (out_exp) != SET)
594 return false;
600 return store_data_bypass_p (out_insn, in_insn);
603 /* Return true if we have D-form addressing in altivec registers. */
604 static inline bool
605 mode_supports_vmx_dform (machine_mode mode)
607 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
610 /* Return true if we have D-form addressing in VSX registers. This addressing
611 is more limited than normal d-form addressing in that the offset must be
612 aligned on a 16-byte boundary. */
613 static inline bool
614 mode_supports_vsx_dform_quad (machine_mode mode)
616 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
617 != 0);
621 /* Target cpu costs. */
623 struct processor_costs {
624 const int mulsi; /* cost of SImode multiplication. */
625 const int mulsi_const; /* cost of SImode multiplication by constant. */
626 const int mulsi_const9; /* cost of SImode mult by short constant. */
627 const int muldi; /* cost of DImode multiplication. */
628 const int divsi; /* cost of SImode division. */
629 const int divdi; /* cost of DImode division. */
630 const int fp; /* cost of simple SFmode and DFmode insns. */
631 const int dmul; /* cost of DFmode multiplication (and fmadd). */
632 const int sdiv; /* cost of SFmode division (fdivs). */
633 const int ddiv; /* cost of DFmode division (fdiv). */
634 const int cache_line_size; /* cache line size in bytes. */
635 const int l1_cache_size; /* size of l1 cache, in kilobytes. */
636 const int l2_cache_size; /* size of l2 cache, in kilobytes. */
637 const int simultaneous_prefetches; /* number of parallel prefetch
638 operations. */
639 const int sfdf_convert; /* cost of SF->DF conversion. */
642 const struct processor_costs *rs6000_cost;
644 /* Processor costs (relative to an add) */
646 /* Instruction size costs on 32bit processors. */
647 static const
648 struct processor_costs size32_cost = {
649 COSTS_N_INSNS (1), /* mulsi */
650 COSTS_N_INSNS (1), /* mulsi_const */
651 COSTS_N_INSNS (1), /* mulsi_const9 */
652 COSTS_N_INSNS (1), /* muldi */
653 COSTS_N_INSNS (1), /* divsi */
654 COSTS_N_INSNS (1), /* divdi */
655 COSTS_N_INSNS (1), /* fp */
656 COSTS_N_INSNS (1), /* dmul */
657 COSTS_N_INSNS (1), /* sdiv */
658 COSTS_N_INSNS (1), /* ddiv */
659 32, /* cache line size */
660 0, /* l1 cache */
661 0, /* l2 cache */
662 0, /* streams */
663 0, /* SF->DF convert */
666 /* Instruction size costs on 64bit processors. */
667 static const
668 struct processor_costs size64_cost = {
669 COSTS_N_INSNS (1), /* mulsi */
670 COSTS_N_INSNS (1), /* mulsi_const */
671 COSTS_N_INSNS (1), /* mulsi_const9 */
672 COSTS_N_INSNS (1), /* muldi */
673 COSTS_N_INSNS (1), /* divsi */
674 COSTS_N_INSNS (1), /* divdi */
675 COSTS_N_INSNS (1), /* fp */
676 COSTS_N_INSNS (1), /* dmul */
677 COSTS_N_INSNS (1), /* sdiv */
678 COSTS_N_INSNS (1), /* ddiv */
679 128, /* cache line size */
680 0, /* l1 cache */
681 0, /* l2 cache */
682 0, /* streams */
683 0, /* SF->DF convert */
686 /* Instruction costs on RS64A processors. */
687 static const
688 struct processor_costs rs64a_cost = {
689 COSTS_N_INSNS (20), /* mulsi */
690 COSTS_N_INSNS (12), /* mulsi_const */
691 COSTS_N_INSNS (8), /* mulsi_const9 */
692 COSTS_N_INSNS (34), /* muldi */
693 COSTS_N_INSNS (65), /* divsi */
694 COSTS_N_INSNS (67), /* divdi */
695 COSTS_N_INSNS (4), /* fp */
696 COSTS_N_INSNS (4), /* dmul */
697 COSTS_N_INSNS (31), /* sdiv */
698 COSTS_N_INSNS (31), /* ddiv */
699 128, /* cache line size */
700 128, /* l1 cache */
701 2048, /* l2 cache */
702 1, /* streams */
703 0, /* SF->DF convert */
706 /* Instruction costs on MPCCORE processors. */
707 static const
708 struct processor_costs mpccore_cost = {
709 COSTS_N_INSNS (2), /* mulsi */
710 COSTS_N_INSNS (2), /* mulsi_const */
711 COSTS_N_INSNS (2), /* mulsi_const9 */
712 COSTS_N_INSNS (2), /* muldi */
713 COSTS_N_INSNS (6), /* divsi */
714 COSTS_N_INSNS (6), /* divdi */
715 COSTS_N_INSNS (4), /* fp */
716 COSTS_N_INSNS (5), /* dmul */
717 COSTS_N_INSNS (10), /* sdiv */
718 COSTS_N_INSNS (17), /* ddiv */
719 32, /* cache line size */
720 4, /* l1 cache */
721 16, /* l2 cache */
722 1, /* streams */
723 0, /* SF->DF convert */
726 /* Instruction costs on PPC403 processors. */
727 static const
728 struct processor_costs ppc403_cost = {
729 COSTS_N_INSNS (4), /* mulsi */
730 COSTS_N_INSNS (4), /* mulsi_const */
731 COSTS_N_INSNS (4), /* mulsi_const9 */
732 COSTS_N_INSNS (4), /* muldi */
733 COSTS_N_INSNS (33), /* divsi */
734 COSTS_N_INSNS (33), /* divdi */
735 COSTS_N_INSNS (11), /* fp */
736 COSTS_N_INSNS (11), /* dmul */
737 COSTS_N_INSNS (11), /* sdiv */
738 COSTS_N_INSNS (11), /* ddiv */
739 32, /* cache line size */
740 4, /* l1 cache */
741 16, /* l2 cache */
742 1, /* streams */
743 0, /* SF->DF convert */
746 /* Instruction costs on PPC405 processors. */
747 static const
748 struct processor_costs ppc405_cost = {
749 COSTS_N_INSNS (5), /* mulsi */
750 COSTS_N_INSNS (4), /* mulsi_const */
751 COSTS_N_INSNS (3), /* mulsi_const9 */
752 COSTS_N_INSNS (5), /* muldi */
753 COSTS_N_INSNS (35), /* divsi */
754 COSTS_N_INSNS (35), /* divdi */
755 COSTS_N_INSNS (11), /* fp */
756 COSTS_N_INSNS (11), /* dmul */
757 COSTS_N_INSNS (11), /* sdiv */
758 COSTS_N_INSNS (11), /* ddiv */
759 32, /* cache line size */
760 16, /* l1 cache */
761 128, /* l2 cache */
762 1, /* streams */
763 0, /* SF->DF convert */
766 /* Instruction costs on PPC440 processors. */
767 static const
768 struct processor_costs ppc440_cost = {
769 COSTS_N_INSNS (3), /* mulsi */
770 COSTS_N_INSNS (2), /* mulsi_const */
771 COSTS_N_INSNS (2), /* mulsi_const9 */
772 COSTS_N_INSNS (3), /* muldi */
773 COSTS_N_INSNS (34), /* divsi */
774 COSTS_N_INSNS (34), /* divdi */
775 COSTS_N_INSNS (5), /* fp */
776 COSTS_N_INSNS (5), /* dmul */
777 COSTS_N_INSNS (19), /* sdiv */
778 COSTS_N_INSNS (33), /* ddiv */
779 32, /* cache line size */
780 32, /* l1 cache */
781 256, /* l2 cache */
782 1, /* streams */
783 0, /* SF->DF convert */
786 /* Instruction costs on PPC476 processors. */
787 static const
788 struct processor_costs ppc476_cost = {
789 COSTS_N_INSNS (4), /* mulsi */
790 COSTS_N_INSNS (4), /* mulsi_const */
791 COSTS_N_INSNS (4), /* mulsi_const9 */
792 COSTS_N_INSNS (4), /* muldi */
793 COSTS_N_INSNS (11), /* divsi */
794 COSTS_N_INSNS (11), /* divdi */
795 COSTS_N_INSNS (6), /* fp */
796 COSTS_N_INSNS (6), /* dmul */
797 COSTS_N_INSNS (19), /* sdiv */
798 COSTS_N_INSNS (33), /* ddiv */
799 32, /* l1 cache line size */
800 32, /* l1 cache */
801 512, /* l2 cache */
802 1, /* streams */
803 0, /* SF->DF convert */
806 /* Instruction costs on PPC601 processors. */
807 static const
808 struct processor_costs ppc601_cost = {
809 COSTS_N_INSNS (5), /* mulsi */
810 COSTS_N_INSNS (5), /* mulsi_const */
811 COSTS_N_INSNS (5), /* mulsi_const9 */
812 COSTS_N_INSNS (5), /* muldi */
813 COSTS_N_INSNS (36), /* divsi */
814 COSTS_N_INSNS (36), /* divdi */
815 COSTS_N_INSNS (4), /* fp */
816 COSTS_N_INSNS (5), /* dmul */
817 COSTS_N_INSNS (17), /* sdiv */
818 COSTS_N_INSNS (31), /* ddiv */
819 32, /* cache line size */
820 32, /* l1 cache */
821 256, /* l2 cache */
822 1, /* streams */
823 0, /* SF->DF convert */
826 /* Instruction costs on PPC603 processors. */
827 static const
828 struct processor_costs ppc603_cost = {
829 COSTS_N_INSNS (5), /* mulsi */
830 COSTS_N_INSNS (3), /* mulsi_const */
831 COSTS_N_INSNS (2), /* mulsi_const9 */
832 COSTS_N_INSNS (5), /* muldi */
833 COSTS_N_INSNS (37), /* divsi */
834 COSTS_N_INSNS (37), /* divdi */
835 COSTS_N_INSNS (3), /* fp */
836 COSTS_N_INSNS (4), /* dmul */
837 COSTS_N_INSNS (18), /* sdiv */
838 COSTS_N_INSNS (33), /* ddiv */
839 32, /* cache line size */
840 8, /* l1 cache */
841 64, /* l2 cache */
842 1, /* streams */
843 0, /* SF->DF convert */
846 /* Instruction costs on PPC604 processors. */
847 static const
848 struct processor_costs ppc604_cost = {
849 COSTS_N_INSNS (4), /* mulsi */
850 COSTS_N_INSNS (4), /* mulsi_const */
851 COSTS_N_INSNS (4), /* mulsi_const9 */
852 COSTS_N_INSNS (4), /* muldi */
853 COSTS_N_INSNS (20), /* divsi */
854 COSTS_N_INSNS (20), /* divdi */
855 COSTS_N_INSNS (3), /* fp */
856 COSTS_N_INSNS (3), /* dmul */
857 COSTS_N_INSNS (18), /* sdiv */
858 COSTS_N_INSNS (32), /* ddiv */
859 32, /* cache line size */
860 16, /* l1 cache */
861 512, /* l2 cache */
862 1, /* streams */
863 0, /* SF->DF convert */
866 /* Instruction costs on PPC604e processors. */
867 static const
868 struct processor_costs ppc604e_cost = {
869 COSTS_N_INSNS (2), /* mulsi */
870 COSTS_N_INSNS (2), /* mulsi_const */
871 COSTS_N_INSNS (2), /* mulsi_const9 */
872 COSTS_N_INSNS (2), /* muldi */
873 COSTS_N_INSNS (20), /* divsi */
874 COSTS_N_INSNS (20), /* divdi */
875 COSTS_N_INSNS (3), /* fp */
876 COSTS_N_INSNS (3), /* dmul */
877 COSTS_N_INSNS (18), /* sdiv */
878 COSTS_N_INSNS (32), /* ddiv */
879 32, /* cache line size */
880 32, /* l1 cache */
881 1024, /* l2 cache */
882 1, /* streams */
883 0, /* SF->DF convert */
886 /* Instruction costs on PPC620 processors. */
887 static const
888 struct processor_costs ppc620_cost = {
889 COSTS_N_INSNS (5), /* mulsi */
890 COSTS_N_INSNS (4), /* mulsi_const */
891 COSTS_N_INSNS (3), /* mulsi_const9 */
892 COSTS_N_INSNS (7), /* muldi */
893 COSTS_N_INSNS (21), /* divsi */
894 COSTS_N_INSNS (37), /* divdi */
895 COSTS_N_INSNS (3), /* fp */
896 COSTS_N_INSNS (3), /* dmul */
897 COSTS_N_INSNS (18), /* sdiv */
898 COSTS_N_INSNS (32), /* ddiv */
899 128, /* cache line size */
900 32, /* l1 cache */
901 1024, /* l2 cache */
902 1, /* streams */
903 0, /* SF->DF convert */
906 /* Instruction costs on PPC630 processors. */
907 static const
908 struct processor_costs ppc630_cost = {
909 COSTS_N_INSNS (5), /* mulsi */
910 COSTS_N_INSNS (4), /* mulsi_const */
911 COSTS_N_INSNS (3), /* mulsi_const9 */
912 COSTS_N_INSNS (7), /* muldi */
913 COSTS_N_INSNS (21), /* divsi */
914 COSTS_N_INSNS (37), /* divdi */
915 COSTS_N_INSNS (3), /* fp */
916 COSTS_N_INSNS (3), /* dmul */
917 COSTS_N_INSNS (17), /* sdiv */
918 COSTS_N_INSNS (21), /* ddiv */
919 128, /* cache line size */
920 64, /* l1 cache */
921 1024, /* l2 cache */
922 1, /* streams */
923 0, /* SF->DF convert */
926 /* Instruction costs on Cell processor. */
927 /* COSTS_N_INSNS (1) ~ one add. */
928 static const
929 struct processor_costs ppccell_cost = {
930 COSTS_N_INSNS (9/2)+2, /* mulsi */
931 COSTS_N_INSNS (6/2), /* mulsi_const */
932 COSTS_N_INSNS (6/2), /* mulsi_const9 */
933 COSTS_N_INSNS (15/2)+2, /* muldi */
934 COSTS_N_INSNS (38/2), /* divsi */
935 COSTS_N_INSNS (70/2), /* divdi */
936 COSTS_N_INSNS (10/2), /* fp */
937 COSTS_N_INSNS (10/2), /* dmul */
938 COSTS_N_INSNS (74/2), /* sdiv */
939 COSTS_N_INSNS (74/2), /* ddiv */
940 128, /* cache line size */
941 32, /* l1 cache */
942 512, /* l2 cache */
943 6, /* streams */
944 0, /* SF->DF convert */
947 /* Instruction costs on PPC750 and PPC7400 processors. */
948 static const
949 struct processor_costs ppc750_cost = {
950 COSTS_N_INSNS (5), /* mulsi */
951 COSTS_N_INSNS (3), /* mulsi_const */
952 COSTS_N_INSNS (2), /* mulsi_const9 */
953 COSTS_N_INSNS (5), /* muldi */
954 COSTS_N_INSNS (17), /* divsi */
955 COSTS_N_INSNS (17), /* divdi */
956 COSTS_N_INSNS (3), /* fp */
957 COSTS_N_INSNS (3), /* dmul */
958 COSTS_N_INSNS (17), /* sdiv */
959 COSTS_N_INSNS (31), /* ddiv */
960 32, /* cache line size */
961 32, /* l1 cache */
962 512, /* l2 cache */
963 1, /* streams */
964 0, /* SF->DF convert */
967 /* Instruction costs on PPC7450 processors. */
968 static const
969 struct processor_costs ppc7450_cost = {
970 COSTS_N_INSNS (4), /* mulsi */
971 COSTS_N_INSNS (3), /* mulsi_const */
972 COSTS_N_INSNS (3), /* mulsi_const9 */
973 COSTS_N_INSNS (4), /* muldi */
974 COSTS_N_INSNS (23), /* divsi */
975 COSTS_N_INSNS (23), /* divdi */
976 COSTS_N_INSNS (5), /* fp */
977 COSTS_N_INSNS (5), /* dmul */
978 COSTS_N_INSNS (21), /* sdiv */
979 COSTS_N_INSNS (35), /* ddiv */
980 32, /* cache line size */
981 32, /* l1 cache */
982 1024, /* l2 cache */
983 1, /* streams */
984 0, /* SF->DF convert */
987 /* Instruction costs on PPC8540 processors. */
988 static const
989 struct processor_costs ppc8540_cost = {
990 COSTS_N_INSNS (4), /* mulsi */
991 COSTS_N_INSNS (4), /* mulsi_const */
992 COSTS_N_INSNS (4), /* mulsi_const9 */
993 COSTS_N_INSNS (4), /* muldi */
994 COSTS_N_INSNS (19), /* divsi */
995 COSTS_N_INSNS (19), /* divdi */
996 COSTS_N_INSNS (4), /* fp */
997 COSTS_N_INSNS (4), /* dmul */
998 COSTS_N_INSNS (29), /* sdiv */
999 COSTS_N_INSNS (29), /* ddiv */
1000 32, /* cache line size */
1001 32, /* l1 cache */
1002 256, /* l2 cache */
1003 1, /* prefetch streams /*/
1004 0, /* SF->DF convert */
1007 /* Instruction costs on E300C2 and E300C3 cores. */
1008 static const
1009 struct processor_costs ppce300c2c3_cost = {
1010 COSTS_N_INSNS (4), /* mulsi */
1011 COSTS_N_INSNS (4), /* mulsi_const */
1012 COSTS_N_INSNS (4), /* mulsi_const9 */
1013 COSTS_N_INSNS (4), /* muldi */
1014 COSTS_N_INSNS (19), /* divsi */
1015 COSTS_N_INSNS (19), /* divdi */
1016 COSTS_N_INSNS (3), /* fp */
1017 COSTS_N_INSNS (4), /* dmul */
1018 COSTS_N_INSNS (18), /* sdiv */
1019 COSTS_N_INSNS (33), /* ddiv */
1021 16, /* l1 cache */
1022 16, /* l2 cache */
1023 1, /* prefetch streams /*/
1024 0, /* SF->DF convert */
1027 /* Instruction costs on PPCE500MC processors. */
1028 static const
1029 struct processor_costs ppce500mc_cost = {
1030 COSTS_N_INSNS (4), /* mulsi */
1031 COSTS_N_INSNS (4), /* mulsi_const */
1032 COSTS_N_INSNS (4), /* mulsi_const9 */
1033 COSTS_N_INSNS (4), /* muldi */
1034 COSTS_N_INSNS (14), /* divsi */
1035 COSTS_N_INSNS (14), /* divdi */
1036 COSTS_N_INSNS (8), /* fp */
1037 COSTS_N_INSNS (10), /* dmul */
1038 COSTS_N_INSNS (36), /* sdiv */
1039 COSTS_N_INSNS (66), /* ddiv */
1040 64, /* cache line size */
1041 32, /* l1 cache */
1042 128, /* l2 cache */
1043 1, /* prefetch streams /*/
1044 0, /* SF->DF convert */
1047 /* Instruction costs on PPCE500MC64 processors. */
1048 static const
1049 struct processor_costs ppce500mc64_cost = {
1050 COSTS_N_INSNS (4), /* mulsi */
1051 COSTS_N_INSNS (4), /* mulsi_const */
1052 COSTS_N_INSNS (4), /* mulsi_const9 */
1053 COSTS_N_INSNS (4), /* muldi */
1054 COSTS_N_INSNS (14), /* divsi */
1055 COSTS_N_INSNS (14), /* divdi */
1056 COSTS_N_INSNS (4), /* fp */
1057 COSTS_N_INSNS (10), /* dmul */
1058 COSTS_N_INSNS (36), /* sdiv */
1059 COSTS_N_INSNS (66), /* ddiv */
1060 64, /* cache line size */
1061 32, /* l1 cache */
1062 128, /* l2 cache */
1063 1, /* prefetch streams /*/
1064 0, /* SF->DF convert */
1067 /* Instruction costs on PPCE5500 processors. */
1068 static const
1069 struct processor_costs ppce5500_cost = {
1070 COSTS_N_INSNS (5), /* mulsi */
1071 COSTS_N_INSNS (5), /* mulsi_const */
1072 COSTS_N_INSNS (4), /* mulsi_const9 */
1073 COSTS_N_INSNS (5), /* muldi */
1074 COSTS_N_INSNS (14), /* divsi */
1075 COSTS_N_INSNS (14), /* divdi */
1076 COSTS_N_INSNS (7), /* fp */
1077 COSTS_N_INSNS (10), /* dmul */
1078 COSTS_N_INSNS (36), /* sdiv */
1079 COSTS_N_INSNS (66), /* ddiv */
1080 64, /* cache line size */
1081 32, /* l1 cache */
1082 128, /* l2 cache */
1083 1, /* prefetch streams /*/
1084 0, /* SF->DF convert */
1087 /* Instruction costs on PPCE6500 processors. */
1088 static const
1089 struct processor_costs ppce6500_cost = {
1090 COSTS_N_INSNS (5), /* mulsi */
1091 COSTS_N_INSNS (5), /* mulsi_const */
1092 COSTS_N_INSNS (4), /* mulsi_const9 */
1093 COSTS_N_INSNS (5), /* muldi */
1094 COSTS_N_INSNS (14), /* divsi */
1095 COSTS_N_INSNS (14), /* divdi */
1096 COSTS_N_INSNS (7), /* fp */
1097 COSTS_N_INSNS (10), /* dmul */
1098 COSTS_N_INSNS (36), /* sdiv */
1099 COSTS_N_INSNS (66), /* ddiv */
1100 64, /* cache line size */
1101 32, /* l1 cache */
1102 128, /* l2 cache */
1103 1, /* prefetch streams /*/
1104 0, /* SF->DF convert */
1107 /* Instruction costs on AppliedMicro Titan processors. */
1108 static const
1109 struct processor_costs titan_cost = {
1110 COSTS_N_INSNS (5), /* mulsi */
1111 COSTS_N_INSNS (5), /* mulsi_const */
1112 COSTS_N_INSNS (5), /* mulsi_const9 */
1113 COSTS_N_INSNS (5), /* muldi */
1114 COSTS_N_INSNS (18), /* divsi */
1115 COSTS_N_INSNS (18), /* divdi */
1116 COSTS_N_INSNS (10), /* fp */
1117 COSTS_N_INSNS (10), /* dmul */
1118 COSTS_N_INSNS (46), /* sdiv */
1119 COSTS_N_INSNS (72), /* ddiv */
1120 32, /* cache line size */
1121 32, /* l1 cache */
1122 512, /* l2 cache */
1123 1, /* prefetch streams /*/
1124 0, /* SF->DF convert */
1127 /* Instruction costs on POWER4 and POWER5 processors. */
1128 static const
1129 struct processor_costs power4_cost = {
1130 COSTS_N_INSNS (3), /* mulsi */
1131 COSTS_N_INSNS (2), /* mulsi_const */
1132 COSTS_N_INSNS (2), /* mulsi_const9 */
1133 COSTS_N_INSNS (4), /* muldi */
1134 COSTS_N_INSNS (18), /* divsi */
1135 COSTS_N_INSNS (34), /* divdi */
1136 COSTS_N_INSNS (3), /* fp */
1137 COSTS_N_INSNS (3), /* dmul */
1138 COSTS_N_INSNS (17), /* sdiv */
1139 COSTS_N_INSNS (17), /* ddiv */
1140 128, /* cache line size */
1141 32, /* l1 cache */
1142 1024, /* l2 cache */
1143 8, /* prefetch streams /*/
1144 0, /* SF->DF convert */
1147 /* Instruction costs on POWER6 processors. */
1148 static const
1149 struct processor_costs power6_cost = {
1150 COSTS_N_INSNS (8), /* mulsi */
1151 COSTS_N_INSNS (8), /* mulsi_const */
1152 COSTS_N_INSNS (8), /* mulsi_const9 */
1153 COSTS_N_INSNS (8), /* muldi */
1154 COSTS_N_INSNS (22), /* divsi */
1155 COSTS_N_INSNS (28), /* divdi */
1156 COSTS_N_INSNS (3), /* fp */
1157 COSTS_N_INSNS (3), /* dmul */
1158 COSTS_N_INSNS (13), /* sdiv */
1159 COSTS_N_INSNS (16), /* ddiv */
1160 128, /* cache line size */
1161 64, /* l1 cache */
1162 2048, /* l2 cache */
1163 16, /* prefetch streams */
1164 0, /* SF->DF convert */
1167 /* Instruction costs on POWER7 processors. */
1168 static const
1169 struct processor_costs power7_cost = {
1170 COSTS_N_INSNS (2), /* mulsi */
1171 COSTS_N_INSNS (2), /* mulsi_const */
1172 COSTS_N_INSNS (2), /* mulsi_const9 */
1173 COSTS_N_INSNS (2), /* muldi */
1174 COSTS_N_INSNS (18), /* divsi */
1175 COSTS_N_INSNS (34), /* divdi */
1176 COSTS_N_INSNS (3), /* fp */
1177 COSTS_N_INSNS (3), /* dmul */
1178 COSTS_N_INSNS (13), /* sdiv */
1179 COSTS_N_INSNS (16), /* ddiv */
1180 128, /* cache line size */
1181 32, /* l1 cache */
1182 256, /* l2 cache */
1183 12, /* prefetch streams */
1184 COSTS_N_INSNS (3), /* SF->DF convert */
1187 /* Instruction costs on POWER8 processors. */
1188 static const
1189 struct processor_costs power8_cost = {
1190 COSTS_N_INSNS (3), /* mulsi */
1191 COSTS_N_INSNS (3), /* mulsi_const */
1192 COSTS_N_INSNS (3), /* mulsi_const9 */
1193 COSTS_N_INSNS (3), /* muldi */
1194 COSTS_N_INSNS (19), /* divsi */
1195 COSTS_N_INSNS (35), /* divdi */
1196 COSTS_N_INSNS (3), /* fp */
1197 COSTS_N_INSNS (3), /* dmul */
1198 COSTS_N_INSNS (14), /* sdiv */
1199 COSTS_N_INSNS (17), /* ddiv */
1200 128, /* cache line size */
1201 32, /* l1 cache */
1202 256, /* l2 cache */
1203 12, /* prefetch streams */
1204 COSTS_N_INSNS (3), /* SF->DF convert */
1207 /* Instruction costs on POWER9 processors. */
1208 static const
1209 struct processor_costs power9_cost = {
1210 COSTS_N_INSNS (3), /* mulsi */
1211 COSTS_N_INSNS (3), /* mulsi_const */
1212 COSTS_N_INSNS (3), /* mulsi_const9 */
1213 COSTS_N_INSNS (3), /* muldi */
1214 COSTS_N_INSNS (8), /* divsi */
1215 COSTS_N_INSNS (12), /* divdi */
1216 COSTS_N_INSNS (3), /* fp */
1217 COSTS_N_INSNS (3), /* dmul */
1218 COSTS_N_INSNS (13), /* sdiv */
1219 COSTS_N_INSNS (18), /* ddiv */
1220 128, /* cache line size */
1221 32, /* l1 cache */
1222 512, /* l2 cache */
1223 8, /* prefetch streams */
1224 COSTS_N_INSNS (3), /* SF->DF convert */
1227 /* Instruction costs on POWER A2 processors. */
1228 static const
1229 struct processor_costs ppca2_cost = {
1230 COSTS_N_INSNS (16), /* mulsi */
1231 COSTS_N_INSNS (16), /* mulsi_const */
1232 COSTS_N_INSNS (16), /* mulsi_const9 */
1233 COSTS_N_INSNS (16), /* muldi */
1234 COSTS_N_INSNS (22), /* divsi */
1235 COSTS_N_INSNS (28), /* divdi */
1236 COSTS_N_INSNS (3), /* fp */
1237 COSTS_N_INSNS (3), /* dmul */
1238 COSTS_N_INSNS (59), /* sdiv */
1239 COSTS_N_INSNS (72), /* ddiv */
1241 16, /* l1 cache */
1242 2048, /* l2 cache */
1243 16, /* prefetch streams */
1244 0, /* SF->DF convert */
1248 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1249 #undef RS6000_BUILTIN_0
1250 #undef RS6000_BUILTIN_1
1251 #undef RS6000_BUILTIN_2
1252 #undef RS6000_BUILTIN_3
1253 #undef RS6000_BUILTIN_A
1254 #undef RS6000_BUILTIN_D
1255 #undef RS6000_BUILTIN_E
1256 #undef RS6000_BUILTIN_H
1257 #undef RS6000_BUILTIN_P
1258 #undef RS6000_BUILTIN_Q
1259 #undef RS6000_BUILTIN_S
1260 #undef RS6000_BUILTIN_X
1262 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
1263 { NAME, ICODE, MASK, ATTR },
1265 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1266 { NAME, ICODE, MASK, ATTR },
1268 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1269 { NAME, ICODE, MASK, ATTR },
1271 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1272 { NAME, ICODE, MASK, ATTR },
1274 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1275 { NAME, ICODE, MASK, ATTR },
1277 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1278 { NAME, ICODE, MASK, ATTR },
1280 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
1281 { NAME, ICODE, MASK, ATTR },
1283 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1284 { NAME, ICODE, MASK, ATTR },
1286 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1287 { NAME, ICODE, MASK, ATTR },
1289 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
1290 { NAME, ICODE, MASK, ATTR },
1292 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
1293 { NAME, ICODE, MASK, ATTR },
1295 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1296 { NAME, ICODE, MASK, ATTR },
1298 struct rs6000_builtin_info_type {
1299 const char *name;
1300 const enum insn_code icode;
1301 const HOST_WIDE_INT mask;
1302 const unsigned attr;
1305 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1307 #include "powerpcspe-builtin.def"
1310 #undef RS6000_BUILTIN_0
1311 #undef RS6000_BUILTIN_1
1312 #undef RS6000_BUILTIN_2
1313 #undef RS6000_BUILTIN_3
1314 #undef RS6000_BUILTIN_A
1315 #undef RS6000_BUILTIN_D
1316 #undef RS6000_BUILTIN_E
1317 #undef RS6000_BUILTIN_H
1318 #undef RS6000_BUILTIN_P
1319 #undef RS6000_BUILTIN_Q
1320 #undef RS6000_BUILTIN_S
1321 #undef RS6000_BUILTIN_X
1323 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1324 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1327 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1328 static bool spe_func_has_64bit_regs_p (void);
1329 static struct machine_function * rs6000_init_machine_status (void);
1330 static int rs6000_ra_ever_killed (void);
1331 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1332 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1333 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1334 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1335 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1336 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1337 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1338 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1339 bool);
1340 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1341 unsigned int);
1342 static bool is_microcoded_insn (rtx_insn *);
1343 static bool is_nonpipeline_insn (rtx_insn *);
1344 static bool is_cracked_insn (rtx_insn *);
1345 static bool is_load_insn (rtx, rtx *);
1346 static bool is_store_insn (rtx, rtx *);
1347 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1348 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1349 static bool insn_must_be_first_in_group (rtx_insn *);
1350 static bool insn_must_be_last_in_group (rtx_insn *);
1351 static void altivec_init_builtins (void);
1352 static tree builtin_function_type (machine_mode, machine_mode,
1353 machine_mode, machine_mode,
1354 enum rs6000_builtins, const char *name);
1355 static void rs6000_common_init_builtins (void);
1356 static void paired_init_builtins (void);
1357 static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx);
1358 static void spe_init_builtins (void);
1359 static void htm_init_builtins (void);
1360 static rtx spe_expand_predicate_builtin (enum insn_code, tree, rtx);
1361 static rtx spe_expand_evsel_builtin (enum insn_code, tree, rtx);
1362 static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
1363 static rs6000_stack_t *rs6000_stack_info (void);
1364 static void is_altivec_return_reg (rtx, void *);
1365 int easy_vector_constant (rtx, machine_mode);
1366 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1367 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1368 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1369 bool, bool);
1370 #if TARGET_MACHO
1371 static void macho_branch_islands (void);
1372 #endif
1373 static rtx rs6000_legitimize_reload_address (rtx, machine_mode, int, int,
1374 int, int *);
1375 static rtx rs6000_debug_legitimize_reload_address (rtx, machine_mode, int,
1376 int, int, int *);
1377 static bool rs6000_mode_dependent_address (const_rtx);
1378 static bool rs6000_debug_mode_dependent_address (const_rtx);
1379 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1380 machine_mode, rtx);
1381 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1382 machine_mode,
1383 rtx);
1384 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1385 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1386 enum reg_class);
1387 static bool rs6000_secondary_memory_needed (enum reg_class, enum reg_class,
1388 machine_mode);
1389 static bool rs6000_debug_secondary_memory_needed (enum reg_class,
1390 enum reg_class,
1391 machine_mode);
1392 static bool rs6000_cannot_change_mode_class (machine_mode,
1393 machine_mode,
1394 enum reg_class);
1395 static bool rs6000_debug_cannot_change_mode_class (machine_mode,
1396 machine_mode,
1397 enum reg_class);
1398 static bool rs6000_save_toc_in_prologue_p (void);
1399 static rtx rs6000_internal_arg_pointer (void);
1401 rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int,
1402 int, int *)
1403 = rs6000_legitimize_reload_address;
1405 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1406 = rs6000_mode_dependent_address;
1408 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1409 machine_mode, rtx)
1410 = rs6000_secondary_reload_class;
1412 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1413 = rs6000_preferred_reload_class;
1415 bool (*rs6000_secondary_memory_needed_ptr) (enum reg_class, enum reg_class,
1416 machine_mode)
1417 = rs6000_secondary_memory_needed;
1419 bool (*rs6000_cannot_change_mode_class_ptr) (machine_mode,
1420 machine_mode,
1421 enum reg_class)
1422 = rs6000_cannot_change_mode_class;
1424 const int INSN_NOT_AVAILABLE = -1;
1426 static void rs6000_print_isa_options (FILE *, int, const char *,
1427 HOST_WIDE_INT);
1428 static void rs6000_print_builtin_options (FILE *, int, const char *,
1429 HOST_WIDE_INT);
1430 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1432 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1433 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1434 enum rs6000_reg_type,
1435 machine_mode,
1436 secondary_reload_info *,
1437 bool);
1438 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1439 static bool rs6000_keep_leaf_when_profiled () __attribute__ ((unused));
1440 static tree rs6000_fold_builtin (tree, int, tree *, bool);
1442 /* Hash table stuff for keeping track of TOC entries. */
1444 struct GTY((for_user)) toc_hash_struct
1446 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1447 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1448 rtx key;
1449 machine_mode key_mode;
1450 int labelno;
1453 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1455 static hashval_t hash (toc_hash_struct *);
1456 static bool equal (toc_hash_struct *, toc_hash_struct *);
1459 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1461 /* Hash table to keep track of the argument types for builtin functions. */
1463 struct GTY((for_user)) builtin_hash_struct
1465 tree type;
1466 machine_mode mode[4]; /* return value + 3 arguments. */
1467 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1470 struct builtin_hasher : ggc_ptr_hash<builtin_hash_struct>
1472 static hashval_t hash (builtin_hash_struct *);
1473 static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1476 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1479 /* Default register names. */
1480 char rs6000_reg_names[][8] =
1482 "0", "1", "2", "3", "4", "5", "6", "7",
1483 "8", "9", "10", "11", "12", "13", "14", "15",
1484 "16", "17", "18", "19", "20", "21", "22", "23",
1485 "24", "25", "26", "27", "28", "29", "30", "31",
1486 "0", "1", "2", "3", "4", "5", "6", "7",
1487 "8", "9", "10", "11", "12", "13", "14", "15",
1488 "16", "17", "18", "19", "20", "21", "22", "23",
1489 "24", "25", "26", "27", "28", "29", "30", "31",
1490 "mq", "lr", "ctr","ap",
1491 "0", "1", "2", "3", "4", "5", "6", "7",
1492 "ca",
1493 /* AltiVec registers. */
1494 "0", "1", "2", "3", "4", "5", "6", "7",
1495 "8", "9", "10", "11", "12", "13", "14", "15",
1496 "16", "17", "18", "19", "20", "21", "22", "23",
1497 "24", "25", "26", "27", "28", "29", "30", "31",
1498 "vrsave", "vscr",
1499 /* SPE registers. */
1500 "spe_acc", "spefscr",
1501 /* Soft frame pointer. */
1502 "sfp",
1503 /* HTM SPR registers. */
1504 "tfhar", "tfiar", "texasr",
1505 /* SPE High registers. */
1506 "0", "1", "2", "3", "4", "5", "6", "7",
1507 "8", "9", "10", "11", "12", "13", "14", "15",
1508 "16", "17", "18", "19", "20", "21", "22", "23",
1509 "24", "25", "26", "27", "28", "29", "30", "31"
1512 #ifdef TARGET_REGNAMES
1513 static const char alt_reg_names[][8] =
1515 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1516 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1517 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1518 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1519 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1520 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1521 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1522 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1523 "mq", "lr", "ctr", "ap",
1524 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1525 "ca",
1526 /* AltiVec registers. */
1527 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1528 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1529 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1530 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1531 "vrsave", "vscr",
1532 /* SPE registers. */
1533 "spe_acc", "spefscr",
1534 /* Soft frame pointer. */
1535 "sfp",
1536 /* HTM SPR registers. */
1537 "tfhar", "tfiar", "texasr",
1538 /* SPE High registers. */
1539 "%rh0", "%rh1", "%rh2", "%rh3", "%rh4", "%rh5", "%rh6", "%rh7",
1540 "%rh8", "%rh9", "%rh10", "%r11", "%rh12", "%rh13", "%rh14", "%rh15",
1541 "%rh16", "%rh17", "%rh18", "%rh19", "%rh20", "%rh21", "%rh22", "%rh23",
1542 "%rh24", "%rh25", "%rh26", "%rh27", "%rh28", "%rh29", "%rh30", "%rh31"
1544 #endif
1546 /* Table of valid machine attributes. */
1548 static const struct attribute_spec rs6000_attribute_table[] =
1550 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
1551 affects_type_identity } */
1552 { "altivec", 1, 1, false, true, false, rs6000_handle_altivec_attribute,
1553 false },
1554 { "longcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1555 false },
1556 { "shortcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1557 false },
1558 { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1559 false },
1560 { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1561 false },
1562 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1563 SUBTARGET_ATTRIBUTE_TABLE,
1564 #endif
1565 { NULL, 0, 0, false, false, false, NULL, false }
1568 #ifndef TARGET_PROFILE_KERNEL
1569 #define TARGET_PROFILE_KERNEL 0
1570 #endif
1572 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1573 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1575 /* Initialize the GCC target structure. */
1576 #undef TARGET_ATTRIBUTE_TABLE
1577 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1578 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1579 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1580 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1581 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1583 #undef TARGET_ASM_ALIGNED_DI_OP
1584 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1586 /* Default unaligned ops are only provided for ELF. Find the ops needed
1587 for non-ELF systems. */
1588 #ifndef OBJECT_FORMAT_ELF
1589 #if TARGET_XCOFF
1590 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1591 64-bit targets. */
1592 #undef TARGET_ASM_UNALIGNED_HI_OP
1593 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1594 #undef TARGET_ASM_UNALIGNED_SI_OP
1595 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1596 #undef TARGET_ASM_UNALIGNED_DI_OP
1597 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1598 #else
1599 /* For Darwin. */
1600 #undef TARGET_ASM_UNALIGNED_HI_OP
1601 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1602 #undef TARGET_ASM_UNALIGNED_SI_OP
1603 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1604 #undef TARGET_ASM_UNALIGNED_DI_OP
1605 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1606 #undef TARGET_ASM_ALIGNED_DI_OP
1607 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1608 #endif
1609 #endif
1611 /* This hook deals with fixups for relocatable code and DI-mode objects
1612 in 64-bit code. */
1613 #undef TARGET_ASM_INTEGER
1614 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1616 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1617 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1618 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1619 #endif
1621 #undef TARGET_SET_UP_BY_PROLOGUE
1622 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1624 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1625 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1626 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1627 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1628 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1629 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1630 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1631 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1632 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1633 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1634 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1635 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1637 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1638 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1640 #undef TARGET_INTERNAL_ARG_POINTER
1641 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1643 #undef TARGET_HAVE_TLS
1644 #define TARGET_HAVE_TLS HAVE_AS_TLS
1646 #undef TARGET_CANNOT_FORCE_CONST_MEM
1647 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1649 #undef TARGET_DELEGITIMIZE_ADDRESS
1650 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1652 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1653 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1655 #undef TARGET_LEGITIMATE_COMBINED_INSN
1656 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1658 #undef TARGET_ASM_FUNCTION_PROLOGUE
1659 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1660 #undef TARGET_ASM_FUNCTION_EPILOGUE
1661 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1663 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1664 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1666 #undef TARGET_LEGITIMIZE_ADDRESS
1667 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1669 #undef TARGET_SCHED_VARIABLE_ISSUE
1670 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1672 #undef TARGET_SCHED_ISSUE_RATE
1673 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1674 #undef TARGET_SCHED_ADJUST_COST
1675 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1676 #undef TARGET_SCHED_ADJUST_PRIORITY
1677 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1678 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1679 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1680 #undef TARGET_SCHED_INIT
1681 #define TARGET_SCHED_INIT rs6000_sched_init
1682 #undef TARGET_SCHED_FINISH
1683 #define TARGET_SCHED_FINISH rs6000_sched_finish
1684 #undef TARGET_SCHED_REORDER
1685 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1686 #undef TARGET_SCHED_REORDER2
1687 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1689 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1690 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1692 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1693 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1695 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1696 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1697 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1698 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1699 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1700 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1701 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1702 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1704 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1705 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1707 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1708 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1709 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1710 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1711 rs6000_builtin_support_vector_misalignment
1712 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1713 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1714 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1715 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1716 rs6000_builtin_vectorization_cost
1717 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1718 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1719 rs6000_preferred_simd_mode
1720 #undef TARGET_VECTORIZE_INIT_COST
1721 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1722 #undef TARGET_VECTORIZE_ADD_STMT_COST
1723 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1724 #undef TARGET_VECTORIZE_FINISH_COST
1725 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1726 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1727 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1729 #undef TARGET_INIT_BUILTINS
1730 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1731 #undef TARGET_BUILTIN_DECL
1732 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1734 #undef TARGET_FOLD_BUILTIN
1735 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1736 #undef TARGET_GIMPLE_FOLD_BUILTIN
1737 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1739 #undef TARGET_EXPAND_BUILTIN
1740 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1742 #undef TARGET_MANGLE_TYPE
1743 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1745 #undef TARGET_INIT_LIBFUNCS
1746 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1748 #if TARGET_MACHO
1749 #undef TARGET_BINDS_LOCAL_P
1750 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1751 #endif
1753 #undef TARGET_MS_BITFIELD_LAYOUT_P
1754 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1756 #undef TARGET_ASM_OUTPUT_MI_THUNK
1757 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1759 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1760 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1762 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1763 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1765 #undef TARGET_REGISTER_MOVE_COST
1766 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1767 #undef TARGET_MEMORY_MOVE_COST
1768 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1769 #undef TARGET_CANNOT_COPY_INSN_P
1770 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1771 #undef TARGET_RTX_COSTS
1772 #define TARGET_RTX_COSTS rs6000_rtx_costs
1773 #undef TARGET_ADDRESS_COST
1774 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1776 #undef TARGET_DWARF_REGISTER_SPAN
1777 #define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span
1779 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1780 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1782 #undef TARGET_MEMBER_TYPE_FORCES_BLK
1783 #define TARGET_MEMBER_TYPE_FORCES_BLK rs6000_member_type_forces_blk
1785 #undef TARGET_PROMOTE_FUNCTION_MODE
1786 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1788 #undef TARGET_RETURN_IN_MEMORY
1789 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1791 #undef TARGET_RETURN_IN_MSB
1792 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1794 #undef TARGET_SETUP_INCOMING_VARARGS
1795 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1797 /* Always strict argument naming on rs6000. */
1798 #undef TARGET_STRICT_ARGUMENT_NAMING
1799 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1800 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1801 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1802 #undef TARGET_SPLIT_COMPLEX_ARG
1803 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1804 #undef TARGET_MUST_PASS_IN_STACK
1805 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1806 #undef TARGET_PASS_BY_REFERENCE
1807 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1808 #undef TARGET_ARG_PARTIAL_BYTES
1809 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1810 #undef TARGET_FUNCTION_ARG_ADVANCE
1811 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1812 #undef TARGET_FUNCTION_ARG
1813 #define TARGET_FUNCTION_ARG rs6000_function_arg
1814 #undef TARGET_FUNCTION_ARG_PADDING
1815 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1816 #undef TARGET_FUNCTION_ARG_BOUNDARY
1817 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1819 #undef TARGET_BUILD_BUILTIN_VA_LIST
1820 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1822 #undef TARGET_EXPAND_BUILTIN_VA_START
1823 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1825 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1826 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1828 #undef TARGET_EH_RETURN_FILTER_MODE
1829 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1831 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1832 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1834 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1835 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1837 #undef TARGET_FLOATN_MODE
1838 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1840 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1841 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1843 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
1844 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
1846 #undef TARGET_MD_ASM_ADJUST
1847 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1849 #undef TARGET_OPTION_OVERRIDE
1850 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1852 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1853 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1854 rs6000_builtin_vectorized_function
1856 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1857 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1858 rs6000_builtin_md_vectorized_function
1860 #undef TARGET_STACK_PROTECT_GUARD
1861 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1863 #if !TARGET_MACHO
1864 #undef TARGET_STACK_PROTECT_FAIL
1865 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1866 #endif
1868 #ifdef HAVE_AS_TLS
1869 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1870 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1871 #endif
1873 /* Use a 32-bit anchor range. This leads to sequences like:
1875 addis tmp,anchor,high
1876 add dest,tmp,low
1878 where tmp itself acts as an anchor, and can be shared between
1879 accesses to the same 64k page. */
1880 #undef TARGET_MIN_ANCHOR_OFFSET
1881 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1882 #undef TARGET_MAX_ANCHOR_OFFSET
1883 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1884 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1885 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1886 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1887 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1889 #undef TARGET_BUILTIN_RECIPROCAL
1890 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1892 #undef TARGET_EXPAND_TO_RTL_HOOK
1893 #define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot
1895 #undef TARGET_INSTANTIATE_DECLS
1896 #define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
1898 #undef TARGET_SECONDARY_RELOAD
1899 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1900 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1901 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1903 #undef TARGET_LEGITIMATE_ADDRESS_P
1904 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1906 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1907 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1909 #undef TARGET_LRA_P
1910 #define TARGET_LRA_P rs6000_lra_p
1912 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1913 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1915 #undef TARGET_CAN_ELIMINATE
1916 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1918 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1919 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1921 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1922 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1924 #undef TARGET_TRAMPOLINE_INIT
1925 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1927 #undef TARGET_FUNCTION_VALUE
1928 #define TARGET_FUNCTION_VALUE rs6000_function_value
1930 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1931 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1933 #undef TARGET_OPTION_SAVE
1934 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1936 #undef TARGET_OPTION_RESTORE
1937 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1939 #undef TARGET_OPTION_PRINT
1940 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1942 #undef TARGET_CAN_INLINE_P
1943 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1945 #undef TARGET_SET_CURRENT_FUNCTION
1946 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1948 #undef TARGET_LEGITIMATE_CONSTANT_P
1949 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1951 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
1952 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok
1954 #undef TARGET_CAN_USE_DOLOOP_P
1955 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1957 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1958 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1960 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1961 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1962 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1963 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1964 #undef TARGET_UNWIND_WORD_MODE
1965 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1967 #undef TARGET_OFFLOAD_OPTIONS
1968 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1970 #undef TARGET_C_MODE_FOR_SUFFIX
1971 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1973 #undef TARGET_INVALID_BINARY_OP
1974 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1976 #undef TARGET_OPTAB_SUPPORTED_P
1977 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1979 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1980 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1982 #undef TARGET_HARD_REGNO_NREGS
1983 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1984 #undef TARGET_HARD_REGNO_MODE_OK
1985 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1987 #undef TARGET_MODES_TIEABLE_P
1988 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1990 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1991 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1992 rs6000_hard_regno_call_part_clobbered
1994 #undef TARGET_SLOW_UNALIGNED_ACCESS
1995 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1998 /* Processor table. */
1999 struct rs6000_ptt
2001 const char *const name; /* Canonical processor name. */
2002 const enum processor_type processor; /* Processor type enum value. */
2003 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
2006 static struct rs6000_ptt const processor_target_table[] =
2008 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
2009 #include "powerpcspe-cpus.def"
2010 #undef RS6000_CPU
2013 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
2014 name is invalid. */
2016 static int
2017 rs6000_cpu_name_lookup (const char *name)
2019 size_t i;
2021 if (name != NULL)
2023 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
2024 if (! strcmp (name, processor_target_table[i].name))
2025 return (int)i;
2028 return -1;
2032 /* Return number of consecutive hard regs needed starting at reg REGNO
2033 to hold something of mode MODE.
2034 This is ordinarily the length in words of a value of mode MODE
2035 but can be less for certain modes in special long registers.
2037 For the SPE, GPRs are 64 bits but only 32 bits are visible in
2038 scalar instructions. The upper 32 bits are only available to the
2039 SIMD instructions.
2041 POWER and PowerPC GPRs hold 32 bits worth;
2042 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
2044 static int
2045 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
2047 unsigned HOST_WIDE_INT reg_size;
2049 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
2050 128-bit floating point that can go in vector registers, which has VSX
2051 memory addressing. */
2052 if (FP_REGNO_P (regno))
2053 reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
2054 ? UNITS_PER_VSX_WORD
2055 : UNITS_PER_FP_WORD);
2057 else if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
2058 reg_size = UNITS_PER_SPE_WORD;
2060 else if (ALTIVEC_REGNO_P (regno))
2061 reg_size = UNITS_PER_ALTIVEC_WORD;
2063 /* The value returned for SCmode in the E500 double case is 2 for
2064 ABI compatibility; storing an SCmode value in a single register
2065 would require function_arg and rs6000_spe_function_arg to handle
2066 SCmode so as to pass the value correctly in a pair of
2067 registers. */
2068 else if (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) && mode != SCmode
2069 && !DECIMAL_FLOAT_MODE_P (mode) && SPE_SIMD_REGNO_P (regno))
2070 reg_size = UNITS_PER_FP_WORD;
2072 else
2073 reg_size = UNITS_PER_WORD;
2075 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
2078 /* Value is 1 if hard register REGNO can hold a value of machine-mode
2079 MODE. */
2080 static int
2081 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
2083 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
2085 if (COMPLEX_MODE_P (mode))
2086 mode = GET_MODE_INNER (mode);
2088 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
2089 register combinations, and use PTImode where we need to deal with quad
2090 word memory operations. Don't allow quad words in the argument or frame
2091 pointer registers, just registers 0..31. */
2092 if (mode == PTImode)
2093 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
2094 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
2095 && ((regno & 1) == 0));
2097 /* VSX registers that overlap the FPR registers are larger than for non-VSX
2098 implementations. Don't allow an item to be split between a FP register
2099 and an Altivec register. Allow TImode in all VSX registers if the user
2100 asked for it. */
2101 if (TARGET_VSX && VSX_REGNO_P (regno)
2102 && (VECTOR_MEM_VSX_P (mode)
2103 || FLOAT128_VECTOR_P (mode)
2104 || reg_addr[mode].scalar_in_vmx_p
2105 || (TARGET_VSX_TIMODE && mode == TImode)
2106 || (TARGET_VADDUQM && mode == V1TImode)))
2108 if (FP_REGNO_P (regno))
2109 return FP_REGNO_P (last_regno);
2111 if (ALTIVEC_REGNO_P (regno))
2113 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
2114 return 0;
2116 return ALTIVEC_REGNO_P (last_regno);
2120 /* The GPRs can hold any mode, but values bigger than one register
2121 cannot go past R31. */
2122 if (INT_REGNO_P (regno))
2123 return INT_REGNO_P (last_regno);
2125 /* The float registers (except for VSX vector modes) can only hold floating
2126 modes and DImode. */
2127 if (FP_REGNO_P (regno))
2129 if (FLOAT128_VECTOR_P (mode))
2130 return false;
2132 if (SCALAR_FLOAT_MODE_P (mode)
2133 && (mode != TDmode || (regno % 2) == 0)
2134 && FP_REGNO_P (last_regno))
2135 return 1;
2137 if (GET_MODE_CLASS (mode) == MODE_INT)
2139 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
2140 return 1;
2142 if (TARGET_VSX_SMALL_INTEGER)
2144 if (mode == SImode)
2145 return 1;
2147 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
2148 return 1;
2152 if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT
2153 && PAIRED_VECTOR_MODE (mode))
2154 return 1;
2156 return 0;
2159 /* The CR register can only hold CC modes. */
2160 if (CR_REGNO_P (regno))
2161 return GET_MODE_CLASS (mode) == MODE_CC;
2163 if (CA_REGNO_P (regno))
2164 return mode == Pmode || mode == SImode;
2166 /* AltiVec only in AldyVec registers. */
2167 if (ALTIVEC_REGNO_P (regno))
2168 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
2169 || mode == V1TImode);
2171 /* ...but GPRs can hold SIMD data on the SPE in one register. */
2172 if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
2173 return 1;
2175 /* We cannot put non-VSX TImode or PTImode anywhere except general register
2176 and it must be able to fit within the register set. */
2178 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
2181 /* Implement TARGET_HARD_REGNO_NREGS. */
2183 static unsigned int
2184 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
2186 return rs6000_hard_regno_nregs[mode][regno];
2189 /* Implement TARGET_HARD_REGNO_MODE_OK. */
2191 static bool
2192 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
2194 return rs6000_hard_regno_mode_ok_p[mode][regno];
2197 /* Implement TARGET_MODES_TIEABLE_P.
2199 PTImode cannot tie with other modes because PTImode is restricted to even
2200 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
2201 57744).
2203 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
2204 128-bit floating point on VSX systems ties with other vectors. */
2206 static bool
2207 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
2209 if (mode1 == PTImode)
2210 return mode2 == PTImode;
2211 if (mode2 == PTImode)
2212 return false;
2214 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
2215 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
2216 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
2217 return false;
2219 if (SCALAR_FLOAT_MODE_P (mode1))
2220 return SCALAR_FLOAT_MODE_P (mode2);
2221 if (SCALAR_FLOAT_MODE_P (mode2))
2222 return false;
2224 if (GET_MODE_CLASS (mode1) == MODE_CC)
2225 return GET_MODE_CLASS (mode2) == MODE_CC;
2226 if (GET_MODE_CLASS (mode2) == MODE_CC)
2227 return false;
2229 if (SPE_VECTOR_MODE (mode1))
2230 return SPE_VECTOR_MODE (mode2);
2231 if (SPE_VECTOR_MODE (mode2))
2232 return false;
2234 return true;
2237 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
2239 static bool
2240 rs6000_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode)
2242 if (TARGET_32BIT
2243 && TARGET_POWERPC64
2244 && GET_MODE_SIZE (mode) > 4
2245 && INT_REGNO_P (regno))
2246 return true;
2248 if (TARGET_VSX
2249 && FP_REGNO_P (regno)
2250 && GET_MODE_SIZE (mode) > 8
2251 && !FLOAT128_2REG_P (mode))
2252 return true;
2254 return false;
2257 /* Print interesting facts about registers. */
2258 static void
2259 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2261 int r, m;
2263 for (r = first_regno; r <= last_regno; ++r)
2265 const char *comma = "";
2266 int len;
2268 if (first_regno == last_regno)
2269 fprintf (stderr, "%s:\t", reg_name);
2270 else
2271 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2273 len = 8;
2274 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2275 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2277 if (len > 70)
2279 fprintf (stderr, ",\n\t");
2280 len = 8;
2281 comma = "";
2284 if (rs6000_hard_regno_nregs[m][r] > 1)
2285 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2286 rs6000_hard_regno_nregs[m][r]);
2287 else
2288 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2290 comma = ", ";
2293 if (call_used_regs[r])
2295 if (len > 70)
2297 fprintf (stderr, ",\n\t");
2298 len = 8;
2299 comma = "";
2302 len += fprintf (stderr, "%s%s", comma, "call-used");
2303 comma = ", ";
2306 if (fixed_regs[r])
2308 if (len > 70)
2310 fprintf (stderr, ",\n\t");
2311 len = 8;
2312 comma = "";
2315 len += fprintf (stderr, "%s%s", comma, "fixed");
2316 comma = ", ";
2319 if (len > 70)
2321 fprintf (stderr, ",\n\t");
2322 comma = "";
2325 len += fprintf (stderr, "%sreg-class = %s", comma,
2326 reg_class_names[(int)rs6000_regno_regclass[r]]);
2327 comma = ", ";
2329 if (len > 70)
2331 fprintf (stderr, ",\n\t");
2332 comma = "";
2335 fprintf (stderr, "%sregno = %d\n", comma, r);
2339 static const char *
2340 rs6000_debug_vector_unit (enum rs6000_vector v)
2342 const char *ret;
2344 switch (v)
2346 case VECTOR_NONE: ret = "none"; break;
2347 case VECTOR_ALTIVEC: ret = "altivec"; break;
2348 case VECTOR_VSX: ret = "vsx"; break;
2349 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2350 case VECTOR_PAIRED: ret = "paired"; break;
2351 case VECTOR_SPE: ret = "spe"; break;
2352 case VECTOR_OTHER: ret = "other"; break;
2353 default: ret = "unknown"; break;
2356 return ret;
2359 /* Inner function printing just the address mask for a particular reload
2360 register class. */
2361 DEBUG_FUNCTION char *
2362 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2364 static char ret[8];
2365 char *p = ret;
2367 if ((mask & RELOAD_REG_VALID) != 0)
2368 *p++ = 'v';
2369 else if (keep_spaces)
2370 *p++ = ' ';
2372 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2373 *p++ = 'm';
2374 else if (keep_spaces)
2375 *p++ = ' ';
2377 if ((mask & RELOAD_REG_INDEXED) != 0)
2378 *p++ = 'i';
2379 else if (keep_spaces)
2380 *p++ = ' ';
2382 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2383 *p++ = 'O';
2384 else if ((mask & RELOAD_REG_OFFSET) != 0)
2385 *p++ = 'o';
2386 else if (keep_spaces)
2387 *p++ = ' ';
2389 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2390 *p++ = '+';
2391 else if (keep_spaces)
2392 *p++ = ' ';
2394 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2395 *p++ = '+';
2396 else if (keep_spaces)
2397 *p++ = ' ';
2399 if ((mask & RELOAD_REG_AND_M16) != 0)
2400 *p++ = '&';
2401 else if (keep_spaces)
2402 *p++ = ' ';
2404 *p = '\0';
2406 return ret;
2409 /* Print the address masks in a human readble fashion. */
2410 DEBUG_FUNCTION void
2411 rs6000_debug_print_mode (ssize_t m)
2413 ssize_t rc;
2414 int spaces = 0;
2415 bool fuse_extra_p;
2417 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2418 for (rc = 0; rc < N_RELOAD_REG; rc++)
2419 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2420 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2422 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2423 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2424 fprintf (stderr, " Reload=%c%c",
2425 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2426 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2427 else
2428 spaces += sizeof (" Reload=sl") - 1;
2430 if (reg_addr[m].scalar_in_vmx_p)
2432 fprintf (stderr, "%*s Upper=y", spaces, "");
2433 spaces = 0;
2435 else
2436 spaces += sizeof (" Upper=y") - 1;
2438 fuse_extra_p = ((reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2439 || reg_addr[m].fused_toc);
2440 if (!fuse_extra_p)
2442 for (rc = 0; rc < N_RELOAD_REG; rc++)
2444 if (rc != RELOAD_REG_ANY)
2446 if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2447 || reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2448 || reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing
2449 || reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing
2450 || reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2452 fuse_extra_p = true;
2453 break;
2459 if (fuse_extra_p)
2461 fprintf (stderr, "%*s Fuse:", spaces, "");
2462 spaces = 0;
2464 for (rc = 0; rc < N_RELOAD_REG; rc++)
2466 if (rc != RELOAD_REG_ANY)
2468 char load, store;
2470 if (reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing)
2471 load = 'l';
2472 else if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing)
2473 load = 'L';
2474 else
2475 load = '-';
2477 if (reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2478 store = 's';
2479 else if (reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing)
2480 store = 'S';
2481 else
2482 store = '-';
2484 if (load == '-' && store == '-')
2485 spaces += 5;
2486 else
2488 fprintf (stderr, "%*s%c=%c%c", (spaces + 1), "",
2489 reload_reg_map[rc].name[0], load, store);
2490 spaces = 0;
2495 if (reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2497 fprintf (stderr, "%*sP8gpr", (spaces + 1), "");
2498 spaces = 0;
2500 else
2501 spaces += sizeof (" P8gpr") - 1;
2503 if (reg_addr[m].fused_toc)
2505 fprintf (stderr, "%*sToc", (spaces + 1), "");
2506 spaces = 0;
2508 else
2509 spaces += sizeof (" Toc") - 1;
2511 else
2512 spaces += sizeof (" Fuse: G=ls F=ls v=ls P8gpr Toc") - 1;
2514 if (rs6000_vector_unit[m] != VECTOR_NONE
2515 || rs6000_vector_mem[m] != VECTOR_NONE)
2517 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2518 spaces, "",
2519 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2520 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2523 fputs ("\n", stderr);
2526 #define DEBUG_FMT_ID "%-32s= "
2527 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2528 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2529 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2531 /* Print various interesting information with -mdebug=reg. */
2532 static void
2533 rs6000_debug_reg_global (void)
2535 static const char *const tf[2] = { "false", "true" };
2536 const char *nl = (const char *)0;
2537 int m;
2538 size_t m1, m2, v;
2539 char costly_num[20];
2540 char nop_num[20];
2541 char flags_buffer[40];
2542 const char *costly_str;
2543 const char *nop_str;
2544 const char *trace_str;
2545 const char *abi_str;
2546 const char *cmodel_str;
2547 struct cl_target_option cl_opts;
2549 /* Modes we want tieable information on. */
2550 static const machine_mode print_tieable_modes[] = {
2551 QImode,
2552 HImode,
2553 SImode,
2554 DImode,
2555 TImode,
2556 PTImode,
2557 SFmode,
2558 DFmode,
2559 TFmode,
2560 IFmode,
2561 KFmode,
2562 SDmode,
2563 DDmode,
2564 TDmode,
2565 V8QImode,
2566 V4HImode,
2567 V2SImode,
2568 V16QImode,
2569 V8HImode,
2570 V4SImode,
2571 V2DImode,
2572 V1TImode,
2573 V32QImode,
2574 V16HImode,
2575 V8SImode,
2576 V4DImode,
2577 V2TImode,
2578 V2SFmode,
2579 V4SFmode,
2580 V2DFmode,
2581 V8SFmode,
2582 V4DFmode,
2583 CCmode,
2584 CCUNSmode,
2585 CCEQmode,
2588 /* Virtual regs we are interested in. */
2589 const static struct {
2590 int regno; /* register number. */
2591 const char *name; /* register name. */
2592 } virtual_regs[] = {
2593 { STACK_POINTER_REGNUM, "stack pointer:" },
2594 { TOC_REGNUM, "toc: " },
2595 { STATIC_CHAIN_REGNUM, "static chain: " },
2596 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2597 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2598 { ARG_POINTER_REGNUM, "arg pointer: " },
2599 { FRAME_POINTER_REGNUM, "frame pointer:" },
2600 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2601 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2602 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2603 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2604 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2605 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2606 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2607 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2608 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2611 fputs ("\nHard register information:\n", stderr);
2612 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2613 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2614 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2615 LAST_ALTIVEC_REGNO,
2616 "vs");
2617 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2618 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2619 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2620 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2621 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2622 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2623 rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a");
2624 rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f");
2626 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2627 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2628 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2630 fprintf (stderr,
2631 "\n"
2632 "d reg_class = %s\n"
2633 "f reg_class = %s\n"
2634 "v reg_class = %s\n"
2635 "wa reg_class = %s\n"
2636 "wb reg_class = %s\n"
2637 "wd reg_class = %s\n"
2638 "we reg_class = %s\n"
2639 "wf reg_class = %s\n"
2640 "wg reg_class = %s\n"
2641 "wh reg_class = %s\n"
2642 "wi reg_class = %s\n"
2643 "wj reg_class = %s\n"
2644 "wk reg_class = %s\n"
2645 "wl reg_class = %s\n"
2646 "wm reg_class = %s\n"
2647 "wo reg_class = %s\n"
2648 "wp reg_class = %s\n"
2649 "wq reg_class = %s\n"
2650 "wr reg_class = %s\n"
2651 "ws reg_class = %s\n"
2652 "wt reg_class = %s\n"
2653 "wu reg_class = %s\n"
2654 "wv reg_class = %s\n"
2655 "ww reg_class = %s\n"
2656 "wx reg_class = %s\n"
2657 "wy reg_class = %s\n"
2658 "wz reg_class = %s\n"
2659 "wA reg_class = %s\n"
2660 "wH reg_class = %s\n"
2661 "wI reg_class = %s\n"
2662 "wJ reg_class = %s\n"
2663 "wK reg_class = %s\n"
2664 "\n",
2665 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2666 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2667 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2668 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2669 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wb]],
2670 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
2671 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2672 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
2673 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
2674 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]],
2675 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]],
2676 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]],
2677 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]],
2678 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
2679 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
2680 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wo]],
2681 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wp]],
2682 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wq]],
2683 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2684 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
2685 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
2686 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
2687 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
2688 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
2689 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2690 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
2691 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]],
2692 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]],
2693 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wH]],
2694 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wI]],
2695 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wJ]],
2696 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wK]]);
2698 nl = "\n";
2699 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2700 rs6000_debug_print_mode (m);
2702 fputs ("\n", stderr);
2704 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2706 machine_mode mode1 = print_tieable_modes[m1];
2707 bool first_time = true;
2709 nl = (const char *)0;
2710 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2712 machine_mode mode2 = print_tieable_modes[m2];
2713 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2715 if (first_time)
2717 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2718 nl = "\n";
2719 first_time = false;
2722 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2726 if (!first_time)
2727 fputs ("\n", stderr);
2730 if (nl)
2731 fputs (nl, stderr);
2733 if (rs6000_recip_control)
2735 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2737 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2738 if (rs6000_recip_bits[m])
2740 fprintf (stderr,
2741 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2742 GET_MODE_NAME (m),
2743 (RS6000_RECIP_AUTO_RE_P (m)
2744 ? "auto"
2745 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2746 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2747 ? "auto"
2748 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2751 fputs ("\n", stderr);
2754 if (rs6000_cpu_index >= 0)
2756 const char *name = processor_target_table[rs6000_cpu_index].name;
2757 HOST_WIDE_INT flags
2758 = processor_target_table[rs6000_cpu_index].target_enable;
2760 sprintf (flags_buffer, "-mcpu=%s flags", name);
2761 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2763 else
2764 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2766 if (rs6000_tune_index >= 0)
2768 const char *name = processor_target_table[rs6000_tune_index].name;
2769 HOST_WIDE_INT flags
2770 = processor_target_table[rs6000_tune_index].target_enable;
2772 sprintf (flags_buffer, "-mtune=%s flags", name);
2773 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2775 else
2776 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2778 cl_target_option_save (&cl_opts, &global_options);
2779 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2780 rs6000_isa_flags);
2782 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2783 rs6000_isa_flags_explicit);
2785 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2786 rs6000_builtin_mask);
2788 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2790 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2791 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2793 switch (rs6000_sched_costly_dep)
2795 case max_dep_latency:
2796 costly_str = "max_dep_latency";
2797 break;
2799 case no_dep_costly:
2800 costly_str = "no_dep_costly";
2801 break;
2803 case all_deps_costly:
2804 costly_str = "all_deps_costly";
2805 break;
2807 case true_store_to_load_dep_costly:
2808 costly_str = "true_store_to_load_dep_costly";
2809 break;
2811 case store_to_load_dep_costly:
2812 costly_str = "store_to_load_dep_costly";
2813 break;
2815 default:
2816 costly_str = costly_num;
2817 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2818 break;
2821 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2823 switch (rs6000_sched_insert_nops)
2825 case sched_finish_regroup_exact:
2826 nop_str = "sched_finish_regroup_exact";
2827 break;
2829 case sched_finish_pad_groups:
2830 nop_str = "sched_finish_pad_groups";
2831 break;
2833 case sched_finish_none:
2834 nop_str = "sched_finish_none";
2835 break;
2837 default:
2838 nop_str = nop_num;
2839 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2840 break;
2843 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2845 switch (rs6000_sdata)
2847 default:
2848 case SDATA_NONE:
2849 break;
2851 case SDATA_DATA:
2852 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2853 break;
2855 case SDATA_SYSV:
2856 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2857 break;
2859 case SDATA_EABI:
2860 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2861 break;
2865 switch (rs6000_traceback)
2867 case traceback_default: trace_str = "default"; break;
2868 case traceback_none: trace_str = "none"; break;
2869 case traceback_part: trace_str = "part"; break;
2870 case traceback_full: trace_str = "full"; break;
2871 default: trace_str = "unknown"; break;
2874 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2876 switch (rs6000_current_cmodel)
2878 case CMODEL_SMALL: cmodel_str = "small"; break;
2879 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2880 case CMODEL_LARGE: cmodel_str = "large"; break;
2881 default: cmodel_str = "unknown"; break;
2884 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2886 switch (rs6000_current_abi)
2888 case ABI_NONE: abi_str = "none"; break;
2889 case ABI_AIX: abi_str = "aix"; break;
2890 case ABI_ELFv2: abi_str = "ELFv2"; break;
2891 case ABI_V4: abi_str = "V4"; break;
2892 case ABI_DARWIN: abi_str = "darwin"; break;
2893 default: abi_str = "unknown"; break;
2896 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2898 if (rs6000_altivec_abi)
2899 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2901 if (rs6000_spe_abi)
2902 fprintf (stderr, DEBUG_FMT_S, "spe_abi", "true");
2904 if (rs6000_darwin64_abi)
2905 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2907 if (rs6000_float_gprs)
2908 fprintf (stderr, DEBUG_FMT_S, "float_gprs", "true");
2910 fprintf (stderr, DEBUG_FMT_S, "fprs",
2911 (TARGET_FPRS ? "true" : "false"));
2913 fprintf (stderr, DEBUG_FMT_S, "single_float",
2914 (TARGET_SINGLE_FLOAT ? "true" : "false"));
2916 fprintf (stderr, DEBUG_FMT_S, "double_float",
2917 (TARGET_DOUBLE_FLOAT ? "true" : "false"));
2919 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2920 (TARGET_SOFT_FLOAT ? "true" : "false"));
2922 fprintf (stderr, DEBUG_FMT_S, "e500_single",
2923 (TARGET_E500_SINGLE ? "true" : "false"));
2925 fprintf (stderr, DEBUG_FMT_S, "e500_double",
2926 (TARGET_E500_DOUBLE ? "true" : "false"));
2928 if (TARGET_LINK_STACK)
2929 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2931 fprintf (stderr, DEBUG_FMT_S, "lra", TARGET_LRA ? "true" : "false");
2933 if (TARGET_P8_FUSION)
2935 char options[80];
2937 strcpy (options, (TARGET_P9_FUSION) ? "power9" : "power8");
2938 if (TARGET_TOC_FUSION)
2939 strcat (options, ", toc");
2941 if (TARGET_P8_FUSION_SIGN)
2942 strcat (options, ", sign");
2944 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2947 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2948 TARGET_SECURE_PLT ? "secure" : "bss");
2949 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2950 aix_struct_return ? "aix" : "sysv");
2951 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2952 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2953 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2954 tf[!!rs6000_align_branch_targets]);
2955 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2956 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2957 rs6000_long_double_type_size);
2958 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2959 (int)rs6000_sched_restricted_insns_priority);
2960 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2961 (int)END_BUILTINS);
2962 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2963 (int)RS6000_BUILTIN_COUNT);
2965 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2966 (int)TARGET_FLOAT128_ENABLE_TYPE);
2968 if (TARGET_VSX)
2969 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2970 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2972 if (TARGET_DIRECT_MOVE_128)
2973 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2974 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2978 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2979 legitimate address support to figure out the appropriate addressing to
2980 use. */
2982 static void
2983 rs6000_setup_reg_addr_masks (void)
2985 ssize_t rc, reg, m, nregs;
2986 addr_mask_type any_addr_mask, addr_mask;
2988 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2990 machine_mode m2 = (machine_mode) m;
2991 bool complex_p = false;
2992 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2993 size_t msize;
2995 if (COMPLEX_MODE_P (m2))
2997 complex_p = true;
2998 m2 = GET_MODE_INNER (m2);
3001 msize = GET_MODE_SIZE (m2);
3003 /* SDmode is special in that we want to access it only via REG+REG
3004 addressing on power7 and above, since we want to use the LFIWZX and
3005 STFIWZX instructions to load it. */
3006 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
3008 any_addr_mask = 0;
3009 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
3011 addr_mask = 0;
3012 reg = reload_reg_map[rc].reg;
3014 /* Can mode values go in the GPR/FPR/Altivec registers? */
3015 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
3017 bool small_int_vsx_p = (small_int_p
3018 && (rc == RELOAD_REG_FPR
3019 || rc == RELOAD_REG_VMX));
3021 nregs = rs6000_hard_regno_nregs[m][reg];
3022 addr_mask |= RELOAD_REG_VALID;
3024 /* Indicate if the mode takes more than 1 physical register. If
3025 it takes a single register, indicate it can do REG+REG
3026 addressing. Small integers in VSX registers can only do
3027 REG+REG addressing. */
3028 if (small_int_vsx_p)
3029 addr_mask |= RELOAD_REG_INDEXED;
3030 else if (nregs > 1 || m == BLKmode || complex_p)
3031 addr_mask |= RELOAD_REG_MULTIPLE;
3032 else
3033 addr_mask |= RELOAD_REG_INDEXED;
3035 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
3036 addressing. Restrict addressing on SPE for 64-bit types
3037 because of the SUBREG hackery used to address 64-bit floats in
3038 '32-bit' GPRs. If we allow scalars into Altivec registers,
3039 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY. */
3041 if (TARGET_UPDATE
3042 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
3043 && msize <= 8
3044 && !VECTOR_MODE_P (m2)
3045 && !FLOAT128_VECTOR_P (m2)
3046 && !complex_p
3047 && !small_int_vsx_p
3048 && (m2 != DFmode || !TARGET_UPPER_REGS_DF)
3049 && (m2 != SFmode || !TARGET_UPPER_REGS_SF)
3050 && !(TARGET_E500_DOUBLE && msize == 8))
3052 addr_mask |= RELOAD_REG_PRE_INCDEC;
3054 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
3055 we don't allow PRE_MODIFY for some multi-register
3056 operations. */
3057 switch (m)
3059 default:
3060 addr_mask |= RELOAD_REG_PRE_MODIFY;
3061 break;
3063 case E_DImode:
3064 if (TARGET_POWERPC64)
3065 addr_mask |= RELOAD_REG_PRE_MODIFY;
3066 break;
3068 case E_DFmode:
3069 case E_DDmode:
3070 if (TARGET_DF_INSN)
3071 addr_mask |= RELOAD_REG_PRE_MODIFY;
3072 break;
3077 /* GPR and FPR registers can do REG+OFFSET addressing, except
3078 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
3079 for 64-bit scalars and 32-bit SFmode to altivec registers. */
3080 if ((addr_mask != 0) && !indexed_only_p
3081 && msize <= 8
3082 && (rc == RELOAD_REG_GPR
3083 || ((msize == 8 || m2 == SFmode)
3084 && (rc == RELOAD_REG_FPR
3085 || (rc == RELOAD_REG_VMX
3086 && TARGET_P9_DFORM_SCALAR)))))
3087 addr_mask |= RELOAD_REG_OFFSET;
3089 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
3090 instructions are enabled. The offset for 128-bit VSX registers is
3091 only 12-bits. While GPRs can handle the full offset range, VSX
3092 registers can only handle the restricted range. */
3093 else if ((addr_mask != 0) && !indexed_only_p
3094 && msize == 16 && TARGET_P9_DFORM_VECTOR
3095 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
3096 || (m2 == TImode && TARGET_VSX_TIMODE)))
3098 addr_mask |= RELOAD_REG_OFFSET;
3099 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
3100 addr_mask |= RELOAD_REG_QUAD_OFFSET;
3103 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
3104 addressing on 128-bit types. */
3105 if (rc == RELOAD_REG_VMX && msize == 16
3106 && (addr_mask & RELOAD_REG_VALID) != 0)
3107 addr_mask |= RELOAD_REG_AND_M16;
3109 reg_addr[m].addr_mask[rc] = addr_mask;
3110 any_addr_mask |= addr_mask;
3113 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
3118 /* Initialize the various global tables that are based on register size. */
3119 static void
3120 rs6000_init_hard_regno_mode_ok (bool global_init_p)
3122 ssize_t r, m, c;
3123 int align64;
3124 int align32;
3126 /* Precalculate REGNO_REG_CLASS. */
3127 rs6000_regno_regclass[0] = GENERAL_REGS;
3128 for (r = 1; r < 32; ++r)
3129 rs6000_regno_regclass[r] = BASE_REGS;
3131 for (r = 32; r < 64; ++r)
3132 rs6000_regno_regclass[r] = FLOAT_REGS;
3134 for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r)
3135 rs6000_regno_regclass[r] = NO_REGS;
3137 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
3138 rs6000_regno_regclass[r] = ALTIVEC_REGS;
3140 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
3141 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
3142 rs6000_regno_regclass[r] = CR_REGS;
3144 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
3145 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
3146 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
3147 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
3148 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
3149 rs6000_regno_regclass[SPE_ACC_REGNO] = SPE_ACC_REGS;
3150 rs6000_regno_regclass[SPEFSCR_REGNO] = SPEFSCR_REGS;
3151 rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
3152 rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
3153 rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
3154 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
3155 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
3157 /* Precalculate register class to simpler reload register class. We don't
3158 need all of the register classes that are combinations of different
3159 classes, just the simple ones that have constraint letters. */
3160 for (c = 0; c < N_REG_CLASSES; c++)
3161 reg_class_to_reg_type[c] = NO_REG_TYPE;
3163 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
3164 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
3165 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
3166 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
3167 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
3168 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
3169 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
3170 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
3171 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
3172 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
3173 reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
3174 reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
3176 if (TARGET_VSX)
3178 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
3179 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
3181 else
3183 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
3184 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
3187 /* Precalculate the valid memory formats as well as the vector information,
3188 this must be set up before the rs6000_hard_regno_nregs_internal calls
3189 below. */
3190 gcc_assert ((int)VECTOR_NONE == 0);
3191 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
3192 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
3194 gcc_assert ((int)CODE_FOR_nothing == 0);
3195 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
3197 gcc_assert ((int)NO_REGS == 0);
3198 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
3200 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
3201 believes it can use native alignment or still uses 128-bit alignment. */
3202 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
3204 align64 = 64;
3205 align32 = 32;
3207 else
3209 align64 = 128;
3210 align32 = 128;
3213 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
3214 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
3215 if (TARGET_FLOAT128_TYPE)
3217 rs6000_vector_mem[KFmode] = VECTOR_VSX;
3218 rs6000_vector_align[KFmode] = 128;
3220 if (FLOAT128_IEEE_P (TFmode))
3222 rs6000_vector_mem[TFmode] = VECTOR_VSX;
3223 rs6000_vector_align[TFmode] = 128;
3227 /* V2DF mode, VSX only. */
3228 if (TARGET_VSX)
3230 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
3231 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
3232 rs6000_vector_align[V2DFmode] = align64;
3235 /* V4SF mode, either VSX or Altivec. */
3236 if (TARGET_VSX)
3238 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
3239 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
3240 rs6000_vector_align[V4SFmode] = align32;
3242 else if (TARGET_ALTIVEC)
3244 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
3245 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
3246 rs6000_vector_align[V4SFmode] = align32;
3249 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
3250 and stores. */
3251 if (TARGET_ALTIVEC)
3253 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
3254 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
3255 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
3256 rs6000_vector_align[V4SImode] = align32;
3257 rs6000_vector_align[V8HImode] = align32;
3258 rs6000_vector_align[V16QImode] = align32;
3260 if (TARGET_VSX)
3262 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
3263 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
3264 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
3266 else
3268 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
3269 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
3270 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
3274 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
3275 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
3276 if (TARGET_VSX)
3278 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
3279 rs6000_vector_unit[V2DImode]
3280 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3281 rs6000_vector_align[V2DImode] = align64;
3283 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
3284 rs6000_vector_unit[V1TImode]
3285 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3286 rs6000_vector_align[V1TImode] = 128;
3289 /* DFmode, see if we want to use the VSX unit. Memory is handled
3290 differently, so don't set rs6000_vector_mem. */
3291 if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE)
3293 rs6000_vector_unit[DFmode] = VECTOR_VSX;
3294 rs6000_vector_align[DFmode] = 64;
3297 /* SFmode, see if we want to use the VSX unit. */
3298 if (TARGET_P8_VECTOR && TARGET_VSX_SCALAR_FLOAT)
3300 rs6000_vector_unit[SFmode] = VECTOR_VSX;
3301 rs6000_vector_align[SFmode] = 32;
3304 /* Allow TImode in VSX register and set the VSX memory macros. */
3305 if (TARGET_VSX && TARGET_VSX_TIMODE)
3307 rs6000_vector_mem[TImode] = VECTOR_VSX;
3308 rs6000_vector_align[TImode] = align64;
3311 /* TODO add SPE and paired floating point vector support. */
3313 /* Register class constraints for the constraints that depend on compile
3314 switches. When the VSX code was added, different constraints were added
3315 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
3316 of the VSX registers are used. The register classes for scalar floating
3317 point types is set, based on whether we allow that type into the upper
3318 (Altivec) registers. GCC has register classes to target the Altivec
3319 registers for load/store operations, to select using a VSX memory
3320 operation instead of the traditional floating point operation. The
3321 constraints are:
3323 d - Register class to use with traditional DFmode instructions.
3324 f - Register class to use with traditional SFmode instructions.
3325 v - Altivec register.
3326 wa - Any VSX register.
3327 wc - Reserved to represent individual CR bits (used in LLVM).
3328 wd - Preferred register class for V2DFmode.
3329 wf - Preferred register class for V4SFmode.
3330 wg - Float register for power6x move insns.
3331 wh - FP register for direct move instructions.
3332 wi - FP or VSX register to hold 64-bit integers for VSX insns.
3333 wj - FP or VSX register to hold 64-bit integers for direct moves.
3334 wk - FP or VSX register to hold 64-bit doubles for direct moves.
3335 wl - Float register if we can do 32-bit signed int loads.
3336 wm - VSX register for ISA 2.07 direct move operations.
3337 wn - always NO_REGS.
3338 wr - GPR if 64-bit mode is permitted.
3339 ws - Register class to do ISA 2.06 DF operations.
3340 wt - VSX register for TImode in VSX registers.
3341 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
3342 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
3343 ww - Register class to do SF conversions in with VSX operations.
3344 wx - Float register if we can do 32-bit int stores.
3345 wy - Register class to do ISA 2.07 SF operations.
3346 wz - Float register if we can do 32-bit unsigned int loads.
3347 wH - Altivec register if SImode is allowed in VSX registers.
3348 wI - VSX register if SImode is allowed in VSX registers.
3349 wJ - VSX register if QImode/HImode are allowed in VSX registers.
3350 wK - Altivec register if QImode/HImode are allowed in VSX registers. */
3352 if (TARGET_HARD_FLOAT && TARGET_FPRS)
3353 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
3355 if (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
3356 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
3358 if (TARGET_VSX)
3360 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
3361 rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */
3362 rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */
3364 if (TARGET_VSX_TIMODE)
3365 rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */
3367 if (TARGET_UPPER_REGS_DF) /* DFmode */
3369 rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS;
3370 rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
3372 else
3373 rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS;
3375 if (TARGET_UPPER_REGS_DI) /* DImode */
3376 rs6000_constraints[RS6000_CONSTRAINT_wi] = VSX_REGS;
3377 else
3378 rs6000_constraints[RS6000_CONSTRAINT_wi] = FLOAT_REGS;
3381 /* Add conditional constraints based on various options, to allow us to
3382 collapse multiple insn patterns. */
3383 if (TARGET_ALTIVEC)
3384 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
3386 if (TARGET_MFPGPR) /* DFmode */
3387 rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
3389 if (TARGET_LFIWAX)
3390 rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */
3392 if (TARGET_DIRECT_MOVE)
3394 rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS;
3395 rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */
3396 = rs6000_constraints[RS6000_CONSTRAINT_wi];
3397 rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */
3398 = rs6000_constraints[RS6000_CONSTRAINT_ws];
3399 rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
3402 if (TARGET_POWERPC64)
3404 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
3405 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
3408 if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF) /* SFmode */
3410 rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
3411 rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
3412 rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
3414 else if (TARGET_P8_VECTOR)
3416 rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS;
3417 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3419 else if (TARGET_VSX)
3420 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3422 if (TARGET_STFIWX)
3423 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
3425 if (TARGET_LFIWZX)
3426 rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */
3428 if (TARGET_FLOAT128_TYPE)
3430 rs6000_constraints[RS6000_CONSTRAINT_wq] = VSX_REGS; /* KFmode */
3431 if (FLOAT128_IEEE_P (TFmode))
3432 rs6000_constraints[RS6000_CONSTRAINT_wp] = VSX_REGS; /* TFmode */
3435 /* Support for new D-form instructions. */
3436 if (TARGET_P9_DFORM_SCALAR)
3437 rs6000_constraints[RS6000_CONSTRAINT_wb] = ALTIVEC_REGS;
3439 /* Support for ISA 3.0 (power9) vectors. */
3440 if (TARGET_P9_VECTOR)
3441 rs6000_constraints[RS6000_CONSTRAINT_wo] = VSX_REGS;
3443 /* Support for new direct moves (ISA 3.0 + 64bit). */
3444 if (TARGET_DIRECT_MOVE_128)
3445 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
3447 /* Support small integers in VSX registers. */
3448 if (TARGET_VSX_SMALL_INTEGER)
3450 rs6000_constraints[RS6000_CONSTRAINT_wH] = ALTIVEC_REGS;
3451 rs6000_constraints[RS6000_CONSTRAINT_wI] = FLOAT_REGS;
3452 if (TARGET_P9_VECTOR)
3454 rs6000_constraints[RS6000_CONSTRAINT_wJ] = FLOAT_REGS;
3455 rs6000_constraints[RS6000_CONSTRAINT_wK] = ALTIVEC_REGS;
3459 /* Set up the reload helper and direct move functions. */
3460 if (TARGET_VSX || TARGET_ALTIVEC)
3462 if (TARGET_64BIT)
3464 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
3465 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
3466 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
3467 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
3468 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
3469 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
3470 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
3471 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
3472 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
3473 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
3474 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
3475 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
3476 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
3477 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
3478 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
3479 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
3480 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
3481 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3482 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3483 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3485 if (FLOAT128_VECTOR_P (KFmode))
3487 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3488 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3491 if (FLOAT128_VECTOR_P (TFmode))
3493 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3494 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3497 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3498 available. */
3499 if (TARGET_NO_SDMODE_STACK)
3501 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3502 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3505 if (TARGET_VSX_TIMODE)
3507 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3508 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3511 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3513 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3514 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3515 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3516 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3517 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3518 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3519 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3520 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3521 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3523 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3524 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3525 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3526 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3527 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3528 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3529 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3530 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3531 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3533 if (FLOAT128_VECTOR_P (KFmode))
3535 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3536 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3539 if (FLOAT128_VECTOR_P (TFmode))
3541 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3542 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3546 else
3548 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3549 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3550 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3551 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3552 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3553 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3554 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3555 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3556 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3557 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3558 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3559 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3560 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3561 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3562 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3563 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3564 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3565 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3566 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3567 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3569 if (FLOAT128_VECTOR_P (KFmode))
3571 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3572 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3575 if (FLOAT128_IEEE_P (TFmode))
3577 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3578 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3581 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3582 available. */
3583 if (TARGET_NO_SDMODE_STACK)
3585 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3586 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3589 if (TARGET_VSX_TIMODE)
3591 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3592 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3595 if (TARGET_DIRECT_MOVE)
3597 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3598 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3599 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3603 if (TARGET_UPPER_REGS_DF)
3604 reg_addr[DFmode].scalar_in_vmx_p = true;
3606 if (TARGET_UPPER_REGS_DI)
3607 reg_addr[DImode].scalar_in_vmx_p = true;
3609 if (TARGET_UPPER_REGS_SF)
3610 reg_addr[SFmode].scalar_in_vmx_p = true;
3612 if (TARGET_VSX_SMALL_INTEGER)
3614 reg_addr[SImode].scalar_in_vmx_p = true;
3615 if (TARGET_P9_VECTOR)
3617 reg_addr[HImode].scalar_in_vmx_p = true;
3618 reg_addr[QImode].scalar_in_vmx_p = true;
3623 /* Setup the fusion operations. */
3624 if (TARGET_P8_FUSION)
3626 reg_addr[QImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_qi;
3627 reg_addr[HImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_hi;
3628 reg_addr[SImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_si;
3629 if (TARGET_64BIT)
3630 reg_addr[DImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_di;
3633 if (TARGET_P9_FUSION)
3635 struct fuse_insns {
3636 enum machine_mode mode; /* mode of the fused type. */
3637 enum machine_mode pmode; /* pointer mode. */
3638 enum rs6000_reload_reg_type rtype; /* register type. */
3639 enum insn_code load; /* load insn. */
3640 enum insn_code store; /* store insn. */
3643 static const struct fuse_insns addis_insns[] = {
3644 { E_SFmode, E_DImode, RELOAD_REG_FPR,
3645 CODE_FOR_fusion_vsx_di_sf_load,
3646 CODE_FOR_fusion_vsx_di_sf_store },
3648 { E_SFmode, E_SImode, RELOAD_REG_FPR,
3649 CODE_FOR_fusion_vsx_si_sf_load,
3650 CODE_FOR_fusion_vsx_si_sf_store },
3652 { E_DFmode, E_DImode, RELOAD_REG_FPR,
3653 CODE_FOR_fusion_vsx_di_df_load,
3654 CODE_FOR_fusion_vsx_di_df_store },
3656 { E_DFmode, E_SImode, RELOAD_REG_FPR,
3657 CODE_FOR_fusion_vsx_si_df_load,
3658 CODE_FOR_fusion_vsx_si_df_store },
3660 { E_DImode, E_DImode, RELOAD_REG_FPR,
3661 CODE_FOR_fusion_vsx_di_di_load,
3662 CODE_FOR_fusion_vsx_di_di_store },
3664 { E_DImode, E_SImode, RELOAD_REG_FPR,
3665 CODE_FOR_fusion_vsx_si_di_load,
3666 CODE_FOR_fusion_vsx_si_di_store },
3668 { E_QImode, E_DImode, RELOAD_REG_GPR,
3669 CODE_FOR_fusion_gpr_di_qi_load,
3670 CODE_FOR_fusion_gpr_di_qi_store },
3672 { E_QImode, E_SImode, RELOAD_REG_GPR,
3673 CODE_FOR_fusion_gpr_si_qi_load,
3674 CODE_FOR_fusion_gpr_si_qi_store },
3676 { E_HImode, E_DImode, RELOAD_REG_GPR,
3677 CODE_FOR_fusion_gpr_di_hi_load,
3678 CODE_FOR_fusion_gpr_di_hi_store },
3680 { E_HImode, E_SImode, RELOAD_REG_GPR,
3681 CODE_FOR_fusion_gpr_si_hi_load,
3682 CODE_FOR_fusion_gpr_si_hi_store },
3684 { E_SImode, E_DImode, RELOAD_REG_GPR,
3685 CODE_FOR_fusion_gpr_di_si_load,
3686 CODE_FOR_fusion_gpr_di_si_store },
3688 { E_SImode, E_SImode, RELOAD_REG_GPR,
3689 CODE_FOR_fusion_gpr_si_si_load,
3690 CODE_FOR_fusion_gpr_si_si_store },
3692 { E_SFmode, E_DImode, RELOAD_REG_GPR,
3693 CODE_FOR_fusion_gpr_di_sf_load,
3694 CODE_FOR_fusion_gpr_di_sf_store },
3696 { E_SFmode, E_SImode, RELOAD_REG_GPR,
3697 CODE_FOR_fusion_gpr_si_sf_load,
3698 CODE_FOR_fusion_gpr_si_sf_store },
3700 { E_DImode, E_DImode, RELOAD_REG_GPR,
3701 CODE_FOR_fusion_gpr_di_di_load,
3702 CODE_FOR_fusion_gpr_di_di_store },
3704 { E_DFmode, E_DImode, RELOAD_REG_GPR,
3705 CODE_FOR_fusion_gpr_di_df_load,
3706 CODE_FOR_fusion_gpr_di_df_store },
3709 machine_mode cur_pmode = Pmode;
3710 size_t i;
3712 for (i = 0; i < ARRAY_SIZE (addis_insns); i++)
3714 machine_mode xmode = addis_insns[i].mode;
3715 enum rs6000_reload_reg_type rtype = addis_insns[i].rtype;
3717 if (addis_insns[i].pmode != cur_pmode)
3718 continue;
3720 if (rtype == RELOAD_REG_FPR
3721 && (!TARGET_HARD_FLOAT || !TARGET_FPRS))
3722 continue;
3724 reg_addr[xmode].fusion_addis_ld[rtype] = addis_insns[i].load;
3725 reg_addr[xmode].fusion_addis_st[rtype] = addis_insns[i].store;
3727 if (rtype == RELOAD_REG_FPR && TARGET_P9_DFORM_SCALAR)
3729 reg_addr[xmode].fusion_addis_ld[RELOAD_REG_VMX]
3730 = addis_insns[i].load;
3731 reg_addr[xmode].fusion_addis_st[RELOAD_REG_VMX]
3732 = addis_insns[i].store;
3737 /* Note which types we support fusing TOC setup plus memory insn. We only do
3738 fused TOCs for medium/large code models. */
3739 if (TARGET_P8_FUSION && TARGET_TOC_FUSION && TARGET_POWERPC64
3740 && (TARGET_CMODEL != CMODEL_SMALL))
3742 reg_addr[QImode].fused_toc = true;
3743 reg_addr[HImode].fused_toc = true;
3744 reg_addr[SImode].fused_toc = true;
3745 reg_addr[DImode].fused_toc = true;
3746 if (TARGET_HARD_FLOAT && TARGET_FPRS)
3748 if (TARGET_SINGLE_FLOAT)
3749 reg_addr[SFmode].fused_toc = true;
3750 if (TARGET_DOUBLE_FLOAT)
3751 reg_addr[DFmode].fused_toc = true;
3755 /* Precalculate HARD_REGNO_NREGS. */
3756 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3757 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3758 rs6000_hard_regno_nregs[m][r]
3759 = rs6000_hard_regno_nregs_internal (r, (machine_mode)m);
3761 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3762 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3763 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3764 if (rs6000_hard_regno_mode_ok_uncached (r, (machine_mode)m))
3765 rs6000_hard_regno_mode_ok_p[m][r] = true;
3767 /* Precalculate CLASS_MAX_NREGS sizes. */
3768 for (c = 0; c < LIM_REG_CLASSES; ++c)
3770 int reg_size;
3772 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3773 reg_size = UNITS_PER_VSX_WORD;
3775 else if (c == ALTIVEC_REGS)
3776 reg_size = UNITS_PER_ALTIVEC_WORD;
3778 else if (c == FLOAT_REGS)
3779 reg_size = UNITS_PER_FP_WORD;
3781 else
3782 reg_size = UNITS_PER_WORD;
3784 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3786 machine_mode m2 = (machine_mode)m;
3787 int reg_size2 = reg_size;
3789 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3790 in VSX. */
3791 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3792 reg_size2 = UNITS_PER_FP_WORD;
3794 rs6000_class_max_nregs[m][c]
3795 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3799 if (TARGET_E500_DOUBLE)
3800 rs6000_class_max_nregs[DFmode][GENERAL_REGS] = 1;
3802 /* Calculate which modes to automatically generate code to use a the
3803 reciprocal divide and square root instructions. In the future, possibly
3804 automatically generate the instructions even if the user did not specify
3805 -mrecip. The older machines double precision reciprocal sqrt estimate is
3806 not accurate enough. */
3807 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3808 if (TARGET_FRES)
3809 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3810 if (TARGET_FRE)
3811 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3812 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3813 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3814 if (VECTOR_UNIT_VSX_P (V2DFmode))
3815 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3817 if (TARGET_FRSQRTES)
3818 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3819 if (TARGET_FRSQRTE)
3820 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3821 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3822 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3823 if (VECTOR_UNIT_VSX_P (V2DFmode))
3824 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3826 if (rs6000_recip_control)
3828 if (!flag_finite_math_only)
3829 warning (0, "-mrecip requires -ffinite-math or -ffast-math");
3830 if (flag_trapping_math)
3831 warning (0, "-mrecip requires -fno-trapping-math or -ffast-math");
3832 if (!flag_reciprocal_math)
3833 warning (0, "-mrecip requires -freciprocal-math or -ffast-math");
3834 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3836 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3837 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3838 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3840 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3841 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3842 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3844 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3845 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3846 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3848 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3849 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3850 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3852 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3853 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3854 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3856 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3857 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3858 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3860 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3861 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3862 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3864 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3865 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3866 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3870 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3871 legitimate address support to figure out the appropriate addressing to
3872 use. */
3873 rs6000_setup_reg_addr_masks ();
3875 if (global_init_p || TARGET_DEBUG_TARGET)
3877 if (TARGET_DEBUG_REG)
3878 rs6000_debug_reg_global ();
3880 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3881 fprintf (stderr,
3882 "SImode variable mult cost = %d\n"
3883 "SImode constant mult cost = %d\n"
3884 "SImode short constant mult cost = %d\n"
3885 "DImode multipliciation cost = %d\n"
3886 "SImode division cost = %d\n"
3887 "DImode division cost = %d\n"
3888 "Simple fp operation cost = %d\n"
3889 "DFmode multiplication cost = %d\n"
3890 "SFmode division cost = %d\n"
3891 "DFmode division cost = %d\n"
3892 "cache line size = %d\n"
3893 "l1 cache size = %d\n"
3894 "l2 cache size = %d\n"
3895 "simultaneous prefetches = %d\n"
3896 "\n",
3897 rs6000_cost->mulsi,
3898 rs6000_cost->mulsi_const,
3899 rs6000_cost->mulsi_const9,
3900 rs6000_cost->muldi,
3901 rs6000_cost->divsi,
3902 rs6000_cost->divdi,
3903 rs6000_cost->fp,
3904 rs6000_cost->dmul,
3905 rs6000_cost->sdiv,
3906 rs6000_cost->ddiv,
3907 rs6000_cost->cache_line_size,
3908 rs6000_cost->l1_cache_size,
3909 rs6000_cost->l2_cache_size,
3910 rs6000_cost->simultaneous_prefetches);
3914 #if TARGET_MACHO
3915 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3917 static void
3918 darwin_rs6000_override_options (void)
3920 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3921 off. */
3922 rs6000_altivec_abi = 1;
3923 TARGET_ALTIVEC_VRSAVE = 1;
3924 rs6000_current_abi = ABI_DARWIN;
3926 if (DEFAULT_ABI == ABI_DARWIN
3927 && TARGET_64BIT)
3928 darwin_one_byte_bool = 1;
3930 if (TARGET_64BIT && ! TARGET_POWERPC64)
3932 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3933 warning (0, "-m64 requires PowerPC64 architecture, enabling");
3935 if (flag_mkernel)
3937 rs6000_default_long_calls = 1;
3938 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3941 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3942 Altivec. */
3943 if (!flag_mkernel && !flag_apple_kext
3944 && TARGET_64BIT
3945 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3946 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3948 /* Unless the user (not the configurer) has explicitly overridden
3949 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3950 G4 unless targeting the kernel. */
3951 if (!flag_mkernel
3952 && !flag_apple_kext
3953 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3954 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3955 && ! global_options_set.x_rs6000_cpu_index)
3957 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3960 #endif
3962 /* If not otherwise specified by a target, make 'long double' equivalent to
3963 'double'. */
3965 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3966 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3967 #endif
3969 /* Return the builtin mask of the various options used that could affect which
3970 builtins were used. In the past we used target_flags, but we've run out of
3971 bits, and some options like SPE and PAIRED are no longer in
3972 target_flags. */
3974 HOST_WIDE_INT
3975 rs6000_builtin_mask_calculate (void)
3977 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3978 | ((TARGET_CMPB) ? RS6000_BTM_CMPB : 0)
3979 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3980 | ((TARGET_SPE) ? RS6000_BTM_SPE : 0)
3981 | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0)
3982 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3983 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3984 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3985 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3986 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3987 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3988 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3989 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3990 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3991 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3992 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3993 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3994 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3995 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3996 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3997 | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0)
3998 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0));
4001 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
4002 to clobber the XER[CA] bit because clobbering that bit without telling
4003 the compiler worked just fine with versions of GCC before GCC 5, and
4004 breaking a lot of older code in ways that are hard to track down is
4005 not such a great idea. */
4007 static rtx_insn *
4008 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
4009 vec<const char *> &/*constraints*/,
4010 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
4012 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
4013 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
4014 return NULL;
4017 /* Override command line options.
4019 Combine build-specific configuration information with options
4020 specified on the command line to set various state variables which
4021 influence code generation, optimization, and expansion of built-in
4022 functions. Assure that command-line configuration preferences are
4023 compatible with each other and with the build configuration; issue
4024 warnings while adjusting configuration or error messages while
4025 rejecting configuration.
4027 Upon entry to this function:
4029 This function is called once at the beginning of
4030 compilation, and then again at the start and end of compiling
4031 each section of code that has a different configuration, as
4032 indicated, for example, by adding the
4034 __attribute__((__target__("cpu=power9")))
4036 qualifier to a function definition or, for example, by bracketing
4037 code between
4039 #pragma GCC target("altivec")
4043 #pragma GCC reset_options
4045 directives. Parameter global_init_p is true for the initial
4046 invocation, which initializes global variables, and false for all
4047 subsequent invocations.
4050 Various global state information is assumed to be valid. This
4051 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
4052 default CPU specified at build configure time, TARGET_DEFAULT,
4053 representing the default set of option flags for the default
4054 target, and global_options_set.x_rs6000_isa_flags, representing
4055 which options were requested on the command line.
4057 Upon return from this function:
4059 rs6000_isa_flags_explicit has a non-zero bit for each flag that
4060 was set by name on the command line. Additionally, if certain
4061 attributes are automatically enabled or disabled by this function
4062 in order to assure compatibility between options and
4063 configuration, the flags associated with those attributes are
4064 also set. By setting these "explicit bits", we avoid the risk
4065 that other code might accidentally overwrite these particular
4066 attributes with "default values".
4068 The various bits of rs6000_isa_flags are set to indicate the
4069 target options that have been selected for the most current
4070 compilation efforts. This has the effect of also turning on the
4071 associated TARGET_XXX values since these are macros which are
4072 generally defined to test the corresponding bit of the
4073 rs6000_isa_flags variable.
4075 The variable rs6000_builtin_mask is set to represent the target
4076 options for the most current compilation efforts, consistent with
4077 the current contents of rs6000_isa_flags. This variable controls
4078 expansion of built-in functions.
4080 Various other global variables and fields of global structures
4081 (over 50 in all) are initialized to reflect the desired options
4082 for the most current compilation efforts. */
4084 static bool
4085 rs6000_option_override_internal (bool global_init_p)
4087 bool ret = true;
4088 bool have_cpu = false;
4090 /* The default cpu requested at configure time, if any. */
4091 const char *implicit_cpu = OPTION_TARGET_CPU_DEFAULT;
4093 HOST_WIDE_INT set_masks;
4094 HOST_WIDE_INT ignore_masks;
4095 int cpu_index;
4096 int tune_index;
4097 struct cl_target_option *main_target_opt
4098 = ((global_init_p || target_option_default_node == NULL)
4099 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
4101 /* Print defaults. */
4102 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
4103 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
4105 /* Remember the explicit arguments. */
4106 if (global_init_p)
4107 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
4109 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
4110 library functions, so warn about it. The flag may be useful for
4111 performance studies from time to time though, so don't disable it
4112 entirely. */
4113 if (global_options_set.x_rs6000_alignment_flags
4114 && rs6000_alignment_flags == MASK_ALIGN_POWER
4115 && DEFAULT_ABI == ABI_DARWIN
4116 && TARGET_64BIT)
4117 warning (0, "-malign-power is not supported for 64-bit Darwin;"
4118 " it is incompatible with the installed C and C++ libraries");
4120 /* Numerous experiment shows that IRA based loop pressure
4121 calculation works better for RTL loop invariant motion on targets
4122 with enough (>= 32) registers. It is an expensive optimization.
4123 So it is on only for peak performance. */
4124 if (optimize >= 3 && global_init_p
4125 && !global_options_set.x_flag_ira_loop_pressure)
4126 flag_ira_loop_pressure = 1;
4128 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
4129 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
4130 options were already specified. */
4131 if (flag_sanitize & SANITIZE_USER_ADDRESS
4132 && !global_options_set.x_flag_asynchronous_unwind_tables)
4133 flag_asynchronous_unwind_tables = 1;
4135 /* Set the pointer size. */
4136 if (TARGET_64BIT)
4138 rs6000_pmode = DImode;
4139 rs6000_pointer_size = 64;
4141 else
4143 rs6000_pmode = SImode;
4144 rs6000_pointer_size = 32;
4147 /* Some OSs don't support saving the high part of 64-bit registers on context
4148 switch. Other OSs don't support saving Altivec registers. On those OSs,
4149 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
4150 if the user wants either, the user must explicitly specify them and we
4151 won't interfere with the user's specification. */
4153 set_masks = POWERPC_MASKS;
4154 #ifdef OS_MISSING_POWERPC64
4155 if (OS_MISSING_POWERPC64)
4156 set_masks &= ~OPTION_MASK_POWERPC64;
4157 #endif
4158 #ifdef OS_MISSING_ALTIVEC
4159 if (OS_MISSING_ALTIVEC)
4160 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
4161 | OTHER_VSX_VECTOR_MASKS);
4162 #endif
4164 /* Don't override by the processor default if given explicitly. */
4165 set_masks &= ~rs6000_isa_flags_explicit;
4167 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
4168 the cpu in a target attribute or pragma, but did not specify a tuning
4169 option, use the cpu for the tuning option rather than the option specified
4170 with -mtune on the command line. Process a '--with-cpu' configuration
4171 request as an implicit --cpu. */
4172 if (rs6000_cpu_index >= 0)
4174 cpu_index = rs6000_cpu_index;
4175 have_cpu = true;
4177 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
4179 rs6000_cpu_index = cpu_index = main_target_opt->x_rs6000_cpu_index;
4180 have_cpu = true;
4182 else if (implicit_cpu)
4184 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (implicit_cpu);
4185 have_cpu = true;
4187 else
4189 /* PowerPC 64-bit LE requires at least ISA 2.07. */
4190 const char *default_cpu = ((!TARGET_POWERPC64)
4191 ? "powerpc"
4192 : ((BYTES_BIG_ENDIAN)
4193 ? "powerpc64"
4194 : "powerpc64le"));
4196 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
4197 have_cpu = false;
4200 gcc_assert (cpu_index >= 0);
4202 if (have_cpu)
4204 #ifndef HAVE_AS_POWER9
4205 if (processor_target_table[rs6000_cpu_index].processor
4206 == PROCESSOR_POWER9)
4208 have_cpu = false;
4209 warning (0, "will not generate power9 instructions because "
4210 "assembler lacks power9 support");
4212 #endif
4213 #ifndef HAVE_AS_POWER8
4214 if (processor_target_table[rs6000_cpu_index].processor
4215 == PROCESSOR_POWER8)
4217 have_cpu = false;
4218 warning (0, "will not generate power8 instructions because "
4219 "assembler lacks power8 support");
4221 #endif
4222 #ifndef HAVE_AS_POPCNTD
4223 if (processor_target_table[rs6000_cpu_index].processor
4224 == PROCESSOR_POWER7)
4226 have_cpu = false;
4227 warning (0, "will not generate power7 instructions because "
4228 "assembler lacks power7 support");
4230 #endif
4231 #ifndef HAVE_AS_DFP
4232 if (processor_target_table[rs6000_cpu_index].processor
4233 == PROCESSOR_POWER6)
4235 have_cpu = false;
4236 warning (0, "will not generate power6 instructions because "
4237 "assembler lacks power6 support");
4239 #endif
4240 #ifndef HAVE_AS_POPCNTB
4241 if (processor_target_table[rs6000_cpu_index].processor
4242 == PROCESSOR_POWER5)
4244 have_cpu = false;
4245 warning (0, "will not generate power5 instructions because "
4246 "assembler lacks power5 support");
4248 #endif
4250 if (!have_cpu)
4252 /* PowerPC 64-bit LE requires at least ISA 2.07. */
4253 const char *default_cpu = (!TARGET_POWERPC64
4254 ? "powerpc"
4255 : (BYTES_BIG_ENDIAN
4256 ? "powerpc64"
4257 : "powerpc64le"));
4259 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
4263 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
4264 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
4265 with those from the cpu, except for options that were explicitly set. If
4266 we don't have a cpu, do not override the target bits set in
4267 TARGET_DEFAULT. */
4268 if (have_cpu)
4270 rs6000_isa_flags &= ~set_masks;
4271 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
4272 & set_masks);
4274 else
4276 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
4277 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
4278 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
4279 to using rs6000_isa_flags, we need to do the initialization here.
4281 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
4282 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
4283 HOST_WIDE_INT flags = ((TARGET_DEFAULT) ? TARGET_DEFAULT
4284 : processor_target_table[cpu_index].target_enable);
4285 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
4288 if (rs6000_tune_index >= 0)
4289 tune_index = rs6000_tune_index;
4290 else if (have_cpu)
4291 rs6000_tune_index = tune_index = cpu_index;
4292 else
4294 size_t i;
4295 enum processor_type tune_proc
4296 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
4298 tune_index = -1;
4299 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
4300 if (processor_target_table[i].processor == tune_proc)
4302 rs6000_tune_index = tune_index = i;
4303 break;
4307 gcc_assert (tune_index >= 0);
4308 rs6000_cpu = processor_target_table[tune_index].processor;
4310 /* Pick defaults for SPE related control flags. Do this early to make sure
4311 that the TARGET_ macros are representative ASAP. */
4313 int spe_capable_cpu =
4314 (rs6000_cpu == PROCESSOR_PPC8540
4315 || rs6000_cpu == PROCESSOR_PPC8548);
4317 if (!global_options_set.x_rs6000_spe_abi)
4318 rs6000_spe_abi = spe_capable_cpu;
4320 if (!global_options_set.x_rs6000_spe)
4321 rs6000_spe = spe_capable_cpu;
4323 if (!global_options_set.x_rs6000_float_gprs)
4324 rs6000_float_gprs =
4325 (rs6000_cpu == PROCESSOR_PPC8540 ? 1
4326 : rs6000_cpu == PROCESSOR_PPC8548 ? 2
4327 : 0);
4330 if (global_options_set.x_rs6000_spe_abi
4331 && rs6000_spe_abi
4332 && !TARGET_SPE_ABI)
4333 error ("not configured for SPE ABI");
4335 if (global_options_set.x_rs6000_spe
4336 && rs6000_spe
4337 && !TARGET_SPE)
4338 error ("not configured for SPE instruction set");
4340 if (main_target_opt != NULL
4341 && ((main_target_opt->x_rs6000_spe_abi != rs6000_spe_abi)
4342 || (main_target_opt->x_rs6000_spe != rs6000_spe)
4343 || (main_target_opt->x_rs6000_float_gprs != rs6000_float_gprs)))
4344 error ("target attribute or pragma changes SPE ABI");
4346 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
4347 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
4348 || rs6000_cpu == PROCESSOR_PPCE5500)
4350 if (TARGET_ALTIVEC)
4351 error ("AltiVec not supported in this target");
4352 if (TARGET_SPE)
4353 error ("SPE not supported in this target");
4355 if (rs6000_cpu == PROCESSOR_PPCE6500)
4357 if (TARGET_SPE)
4358 error ("SPE not supported in this target");
4361 /* Disable Cell microcode if we are optimizing for the Cell
4362 and not optimizing for size. */
4363 if (rs6000_gen_cell_microcode == -1)
4364 rs6000_gen_cell_microcode = !(rs6000_cpu == PROCESSOR_CELL
4365 && !optimize_size);
4367 /* If we are optimizing big endian systems for space and it's OK to
4368 use instructions that would be microcoded on the Cell, use the
4369 load/store multiple and string instructions. */
4370 if (BYTES_BIG_ENDIAN && optimize_size && rs6000_gen_cell_microcode)
4371 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & (OPTION_MASK_MULTIPLE
4372 | OPTION_MASK_STRING);
4374 /* Don't allow -mmultiple or -mstring on little endian systems
4375 unless the cpu is a 750, because the hardware doesn't support the
4376 instructions used in little endian mode, and causes an alignment
4377 trap. The 750 does not cause an alignment trap (except when the
4378 target is unaligned). */
4380 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750)
4382 if (TARGET_MULTIPLE)
4384 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
4385 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
4386 warning (0, "-mmultiple is not supported on little endian systems");
4389 if (TARGET_STRING)
4391 rs6000_isa_flags &= ~OPTION_MASK_STRING;
4392 if ((rs6000_isa_flags_explicit & OPTION_MASK_STRING) != 0)
4393 warning (0, "-mstring is not supported on little endian systems");
4397 /* If little-endian, default to -mstrict-align on older processors.
4398 Testing for htm matches power8 and later. */
4399 if (!BYTES_BIG_ENDIAN
4400 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
4401 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
4403 /* -maltivec={le,be} implies -maltivec. */
4404 if (rs6000_altivec_element_order != 0)
4405 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
4407 /* Disallow -maltivec=le in big endian mode for now. This is not
4408 known to be useful for anyone. */
4409 if (BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 1)
4411 warning (0, N_("-maltivec=le not allowed for big-endian targets"));
4412 rs6000_altivec_element_order = 0;
4415 /* Add some warnings for VSX. */
4416 if (TARGET_VSX)
4418 const char *msg = NULL;
4419 if (!TARGET_HARD_FLOAT || !TARGET_FPRS
4420 || !TARGET_SINGLE_FLOAT || !TARGET_DOUBLE_FLOAT)
4422 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4423 msg = N_("-mvsx requires hardware floating point");
4424 else
4426 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
4427 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4430 else if (TARGET_PAIRED_FLOAT)
4431 msg = N_("-mvsx and -mpaired are incompatible");
4432 else if (TARGET_AVOID_XFORM > 0)
4433 msg = N_("-mvsx needs indexed addressing");
4434 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
4435 & OPTION_MASK_ALTIVEC))
4437 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4438 msg = N_("-mvsx and -mno-altivec are incompatible");
4439 else
4440 msg = N_("-mno-altivec disables vsx");
4443 if (msg)
4445 warning (0, msg);
4446 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
4447 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4451 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
4452 the -mcpu setting to enable options that conflict. */
4453 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
4454 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
4455 | OPTION_MASK_ALTIVEC
4456 | OPTION_MASK_VSX)) != 0)
4457 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
4458 | OPTION_MASK_DIRECT_MOVE)
4459 & ~rs6000_isa_flags_explicit);
4461 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4462 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
4464 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
4465 off all of the options that depend on those flags. */
4466 ignore_masks = rs6000_disable_incompatible_switches ();
4468 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
4469 unless the user explicitly used the -mno-<option> to disable the code. */
4470 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_DFORM_SCALAR
4471 || TARGET_P9_DFORM_VECTOR || TARGET_P9_DFORM_BOTH > 0)
4472 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
4473 else if (TARGET_P9_MINMAX)
4475 if (have_cpu)
4477 if (cpu_index == PROCESSOR_POWER9)
4479 /* legacy behavior: allow -mcpu-power9 with certain
4480 capabilities explicitly disabled. */
4481 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
4482 /* However, reject this automatic fix if certain
4483 capabilities required for TARGET_P9_MINMAX support
4484 have been explicitly disabled. */
4485 if (((OPTION_MASK_VSX | OPTION_MASK_UPPER_REGS_SF
4486 | OPTION_MASK_UPPER_REGS_DF) & rs6000_isa_flags)
4487 != (OPTION_MASK_VSX | OPTION_MASK_UPPER_REGS_SF
4488 | OPTION_MASK_UPPER_REGS_DF))
4489 error ("-mpower9-minmax incompatible with explicitly disabled options");
4491 else
4492 error ("Power9 target option is incompatible with -mcpu=<xxx> for "
4493 "<xxx> less than power9");
4495 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
4496 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
4497 & rs6000_isa_flags_explicit))
4498 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
4499 were explicitly cleared. */
4500 error ("-mpower9-minmax incompatible with explicitly disabled options");
4501 else
4502 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
4504 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
4505 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
4506 else if (TARGET_VSX)
4507 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
4508 else if (TARGET_POPCNTD)
4509 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
4510 else if (TARGET_DFP)
4511 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
4512 else if (TARGET_CMPB)
4513 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
4514 else if (TARGET_FPRND)
4515 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
4516 else if (TARGET_POPCNTB)
4517 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
4518 else if (TARGET_ALTIVEC)
4519 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
4521 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
4523 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
4524 error ("-mcrypto requires -maltivec");
4525 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
4528 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
4530 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
4531 error ("-mdirect-move requires -mvsx");
4532 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
4535 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
4537 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4538 error ("-mpower8-vector requires -maltivec");
4539 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4542 if (TARGET_P8_VECTOR && !TARGET_VSX)
4544 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4545 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
4546 error ("-mpower8-vector requires -mvsx");
4547 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
4549 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4550 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4551 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4553 else
4555 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
4556 not explicit. */
4557 rs6000_isa_flags |= OPTION_MASK_VSX;
4558 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4562 if (TARGET_VSX_TIMODE && !TARGET_VSX)
4564 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
4565 error ("-mvsx-timode requires -mvsx");
4566 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
4569 if (TARGET_DFP && !TARGET_HARD_FLOAT)
4571 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
4572 error ("-mhard-dfp requires -mhard-float");
4573 rs6000_isa_flags &= ~OPTION_MASK_DFP;
4576 /* Allow an explicit -mupper-regs to set -mupper-regs-df, -mupper-regs-di,
4577 and -mupper-regs-sf, depending on the cpu, unless the user explicitly also
4578 set the individual option. */
4579 if (TARGET_UPPER_REGS > 0)
4581 if (TARGET_VSX
4582 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
4584 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DF;
4585 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
4587 if (TARGET_VSX
4588 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI))
4590 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DI;
4591 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DI;
4593 if (TARGET_P8_VECTOR
4594 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
4596 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_SF;
4597 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
4600 else if (TARGET_UPPER_REGS == 0)
4602 if (TARGET_VSX
4603 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
4605 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4606 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
4608 if (TARGET_VSX
4609 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI))
4611 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DI;
4612 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DI;
4614 if (TARGET_P8_VECTOR
4615 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
4617 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
4618 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
4622 if (TARGET_UPPER_REGS_DF && !TARGET_VSX)
4624 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4625 error ("-mupper-regs-df requires -mvsx");
4626 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4629 if (TARGET_UPPER_REGS_DI && !TARGET_VSX)
4631 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI)
4632 error ("-mupper-regs-di requires -mvsx");
4633 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DI;
4636 if (TARGET_UPPER_REGS_SF && !TARGET_P8_VECTOR)
4638 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
4639 error ("-mupper-regs-sf requires -mpower8-vector");
4640 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
4643 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
4644 silently turn off quad memory mode. */
4645 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
4647 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4648 warning (0, N_("-mquad-memory requires 64-bit mode"));
4650 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
4651 warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
4653 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
4654 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
4657 /* Non-atomic quad memory load/store are disabled for little endian, since
4658 the words are reversed, but atomic operations can still be done by
4659 swapping the words. */
4660 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
4662 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4663 warning (0, N_("-mquad-memory is not available in little endian mode"));
4665 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
4668 /* Assume if the user asked for normal quad memory instructions, they want
4669 the atomic versions as well, unless they explicity told us not to use quad
4670 word atomic instructions. */
4671 if (TARGET_QUAD_MEMORY
4672 && !TARGET_QUAD_MEMORY_ATOMIC
4673 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
4674 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
4676 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4677 generating power8 instructions. */
4678 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4679 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4680 & OPTION_MASK_P8_FUSION);
4682 /* Setting additional fusion flags turns on base fusion. */
4683 if (!TARGET_P8_FUSION && (TARGET_P8_FUSION_SIGN || TARGET_TOC_FUSION))
4685 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4687 if (TARGET_P8_FUSION_SIGN)
4688 error ("-mpower8-fusion-sign requires -mpower8-fusion");
4690 if (TARGET_TOC_FUSION)
4691 error ("-mtoc-fusion requires -mpower8-fusion");
4693 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4695 else
4696 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4699 /* Power9 fusion is a superset over power8 fusion. */
4700 if (TARGET_P9_FUSION && !TARGET_P8_FUSION)
4702 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4704 /* We prefer to not mention undocumented options in
4705 error messages. However, if users have managed to select
4706 power9-fusion without selecting power8-fusion, they
4707 already know about undocumented flags. */
4708 error ("-mpower9-fusion requires -mpower8-fusion");
4709 rs6000_isa_flags &= ~OPTION_MASK_P9_FUSION;
4711 else
4712 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4715 /* Enable power9 fusion if we are tuning for power9, even if we aren't
4716 generating power9 instructions. */
4717 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_FUSION))
4718 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4719 & OPTION_MASK_P9_FUSION);
4721 /* Power8 does not fuse sign extended loads with the addis. If we are
4722 optimizing at high levels for speed, convert a sign extended load into a
4723 zero extending load, and an explicit sign extension. */
4724 if (TARGET_P8_FUSION
4725 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4726 && optimize_function_for_speed_p (cfun)
4727 && optimize >= 3)
4728 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4730 /* TOC fusion requires 64-bit and medium/large code model. */
4731 if (TARGET_TOC_FUSION && !TARGET_POWERPC64)
4733 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4734 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4735 warning (0, N_("-mtoc-fusion requires 64-bit"));
4738 if (TARGET_TOC_FUSION && (TARGET_CMODEL == CMODEL_SMALL))
4740 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4741 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4742 warning (0, N_("-mtoc-fusion requires medium/large code model"));
4745 /* Turn on -mtoc-fusion by default if p8-fusion and 64-bit medium/large code
4746 model. */
4747 if (TARGET_P8_FUSION && !TARGET_TOC_FUSION && TARGET_POWERPC64
4748 && (TARGET_CMODEL != CMODEL_SMALL)
4749 && !(rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION))
4750 rs6000_isa_flags |= OPTION_MASK_TOC_FUSION;
4752 /* ISA 3.0 vector instructions include ISA 2.07. */
4753 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4755 /* We prefer to not mention undocumented options in
4756 error messages. However, if users have managed to select
4757 power9-vector without selecting power8-vector, they
4758 already know about undocumented flags. */
4759 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
4760 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
4761 error ("-mpower9-vector requires -mpower8-vector");
4762 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
4764 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4765 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4766 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
4768 else
4770 /* OPTION_MASK_P9_VECTOR is explicit and
4771 OPTION_MASK_P8_VECTOR is not explicit. */
4772 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
4773 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4777 /* -mpower9-dform turns on both -mpower9-dform-scalar and
4778 -mpower9-dform-vector. */
4779 if (TARGET_P9_DFORM_BOTH > 0)
4781 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR))
4782 rs6000_isa_flags |= OPTION_MASK_P9_DFORM_VECTOR;
4784 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR))
4785 rs6000_isa_flags |= OPTION_MASK_P9_DFORM_SCALAR;
4787 else if (TARGET_P9_DFORM_BOTH == 0)
4789 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR))
4790 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_VECTOR;
4792 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR))
4793 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4796 /* ISA 3.0 D-form instructions require p9-vector and upper-regs. */
4797 if ((TARGET_P9_DFORM_SCALAR || TARGET_P9_DFORM_VECTOR) && !TARGET_P9_VECTOR)
4799 /* We prefer to not mention undocumented options in
4800 error messages. However, if users have managed to select
4801 power9-dform without selecting power9-vector, they
4802 already know about undocumented flags. */
4803 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR)
4804 && (rs6000_isa_flags_explicit & (OPTION_MASK_P9_DFORM_SCALAR
4805 | OPTION_MASK_P9_DFORM_VECTOR)))
4806 error ("-mpower9-dform requires -mpower9-vector");
4807 else if (rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR)
4809 rs6000_isa_flags &=
4810 ~(OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR);
4811 rs6000_isa_flags_explicit |=
4812 (OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR);
4814 else
4816 /* We know that OPTION_MASK_P9_VECTOR is not explicit and
4817 OPTION_MASK_P9_DFORM_SCALAR or OPTION_MASK_P9_DORM_VECTOR
4818 may be explicit. */
4819 rs6000_isa_flags |= OPTION_MASK_P9_VECTOR;
4820 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
4824 if ((TARGET_P9_DFORM_SCALAR || TARGET_P9_DFORM_VECTOR)
4825 && !TARGET_DIRECT_MOVE)
4827 /* We prefer to not mention undocumented options in
4828 error messages. However, if users have managed to select
4829 power9-dform without selecting direct-move, they
4830 already know about undocumented flags. */
4831 if ((rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
4832 && ((rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR) ||
4833 (rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR) ||
4834 (TARGET_P9_DFORM_BOTH == 1)))
4835 error ("-mpower9-dform, -mpower9-dform-vector, -mpower9-dform-scalar"
4836 " require -mdirect-move");
4837 else if ((rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE) == 0)
4839 rs6000_isa_flags |= OPTION_MASK_DIRECT_MOVE;
4840 rs6000_isa_flags_explicit |= OPTION_MASK_DIRECT_MOVE;
4842 else
4844 rs6000_isa_flags &=
4845 ~(OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR);
4846 rs6000_isa_flags_explicit |=
4847 (OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR);
4851 if (TARGET_P9_DFORM_SCALAR && !TARGET_UPPER_REGS_DF)
4853 /* We prefer to not mention undocumented options in
4854 error messages. However, if users have managed to select
4855 power9-dform without selecting upper-regs-df, they
4856 already know about undocumented flags. */
4857 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4858 error ("-mpower9-dform requires -mupper-regs-df");
4859 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4862 if (TARGET_P9_DFORM_SCALAR && !TARGET_UPPER_REGS_SF)
4864 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
4865 error ("-mpower9-dform requires -mupper-regs-sf");
4866 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4869 /* Enable LRA by default. */
4870 if ((rs6000_isa_flags_explicit & OPTION_MASK_LRA) == 0)
4871 rs6000_isa_flags |= OPTION_MASK_LRA;
4873 /* There have been bugs with -mvsx-timode that don't show up with -mlra,
4874 but do show up with -mno-lra. Given -mlra will become the default once
4875 PR 69847 is fixed, turn off the options with problems by default if
4876 -mno-lra was used, and warn if the user explicitly asked for the option.
4878 Enable -mpower9-dform-vector by default if LRA and other power9 options.
4879 Enable -mvsx-timode by default if LRA and VSX. */
4880 if (!TARGET_LRA)
4882 if (TARGET_VSX_TIMODE)
4884 if ((rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) != 0)
4885 warning (0, "-mvsx-timode might need -mlra");
4887 else
4888 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
4892 else
4894 if (TARGET_VSX && !TARGET_VSX_TIMODE
4895 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) == 0)
4896 rs6000_isa_flags |= OPTION_MASK_VSX_TIMODE;
4899 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4900 support. If we only have ISA 2.06 support, and the user did not specify
4901 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4902 but we don't enable the full vectorization support */
4903 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4904 TARGET_ALLOW_MOVMISALIGN = 1;
4906 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4908 if (TARGET_ALLOW_MOVMISALIGN > 0
4909 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
4910 error ("-mallow-movmisalign requires -mvsx");
4912 TARGET_ALLOW_MOVMISALIGN = 0;
4915 /* Determine when unaligned vector accesses are permitted, and when
4916 they are preferred over masked Altivec loads. Note that if
4917 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4918 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4919 not true. */
4920 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4922 if (!TARGET_VSX)
4924 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4925 error ("-mefficient-unaligned-vsx requires -mvsx");
4927 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4930 else if (!TARGET_ALLOW_MOVMISALIGN)
4932 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4933 error ("-mefficient-unaligned-vsx requires -mallow-movmisalign");
4935 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4939 /* Check whether we should allow small integers into VSX registers. We
4940 require direct move to prevent the register allocator from having to move
4941 variables through memory to do moves. SImode can be used on ISA 2.07,
4942 while HImode and QImode require ISA 3.0. */
4943 if (TARGET_VSX_SMALL_INTEGER
4944 && (!TARGET_DIRECT_MOVE || !TARGET_P8_VECTOR || !TARGET_UPPER_REGS_DI))
4946 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_SMALL_INTEGER)
4947 error ("-mvsx-small-integer requires -mpower8-vector, "
4948 "-mupper-regs-di, and -mdirect-move");
4950 rs6000_isa_flags &= ~OPTION_MASK_VSX_SMALL_INTEGER;
4953 /* Set long double size before the IEEE 128-bit tests. */
4954 if (!global_options_set.x_rs6000_long_double_type_size)
4956 if (main_target_opt != NULL
4957 && (main_target_opt->x_rs6000_long_double_type_size
4958 != RS6000_DEFAULT_LONG_DOUBLE_SIZE))
4959 error ("target attribute or pragma changes long double size");
4960 else
4961 rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE;
4964 /* Set -mabi=ieeelongdouble on some old targets. Note, AIX and Darwin
4965 explicitly redefine TARGET_IEEEQUAD to 0, so those systems will not
4966 pick up this default. */
4967 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
4968 if (!global_options_set.x_rs6000_ieeequad)
4969 rs6000_ieeequad = 1;
4970 #endif
4972 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4973 sytems, but don't enable the __float128 keyword. */
4974 if (TARGET_VSX && TARGET_LONG_DOUBLE_128
4975 && (TARGET_FLOAT128_ENABLE_TYPE || TARGET_IEEEQUAD)
4976 && ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_TYPE) == 0))
4977 rs6000_isa_flags |= OPTION_MASK_FLOAT128_TYPE;
4979 /* IEEE 128-bit floating point requires VSX support. */
4980 if (!TARGET_VSX)
4982 if (TARGET_FLOAT128_KEYWORD)
4984 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4985 error ("-mfloat128 requires VSX support");
4987 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4988 | OPTION_MASK_FLOAT128_KEYWORD
4989 | OPTION_MASK_FLOAT128_HW);
4992 else if (TARGET_FLOAT128_TYPE)
4994 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_TYPE) != 0)
4995 error ("-mfloat128-type requires VSX support");
4997 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4998 | OPTION_MASK_FLOAT128_KEYWORD
4999 | OPTION_MASK_FLOAT128_HW);
5003 /* -mfloat128 and -mfloat128-hardware internally require the underlying IEEE
5004 128-bit floating point support to be enabled. */
5005 if (!TARGET_FLOAT128_TYPE)
5007 if (TARGET_FLOAT128_KEYWORD)
5009 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
5011 error ("-mfloat128 requires -mfloat128-type");
5012 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
5013 | OPTION_MASK_FLOAT128_KEYWORD
5014 | OPTION_MASK_FLOAT128_HW);
5016 else
5017 rs6000_isa_flags |= OPTION_MASK_FLOAT128_TYPE;
5020 if (TARGET_FLOAT128_HW)
5022 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
5024 error ("-mfloat128-hardware requires -mfloat128-type");
5025 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
5027 else
5028 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
5029 | OPTION_MASK_FLOAT128_KEYWORD
5030 | OPTION_MASK_FLOAT128_HW);
5034 /* If we have -mfloat128-type and full ISA 3.0 support, enable
5035 -mfloat128-hardware by default. However, don't enable the __float128
5036 keyword. If the user explicitly turned on -mfloat128-hardware, enable the
5037 -mfloat128 option as well if it was not already set. */
5038 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW
5039 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
5040 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
5041 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
5043 if (TARGET_FLOAT128_HW
5044 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
5046 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
5047 error ("-mfloat128-hardware requires full ISA 3.0 support");
5049 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
5052 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
5054 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
5055 error ("-mfloat128-hardware requires -m64");
5057 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
5060 if (TARGET_FLOAT128_HW && !TARGET_FLOAT128_KEYWORD
5061 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0
5062 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
5063 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
5065 /* Print the options after updating the defaults. */
5066 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
5067 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
5069 /* E500mc does "better" if we inline more aggressively. Respect the
5070 user's opinion, though. */
5071 if (rs6000_block_move_inline_limit == 0
5072 && (rs6000_cpu == PROCESSOR_PPCE500MC
5073 || rs6000_cpu == PROCESSOR_PPCE500MC64
5074 || rs6000_cpu == PROCESSOR_PPCE5500
5075 || rs6000_cpu == PROCESSOR_PPCE6500))
5076 rs6000_block_move_inline_limit = 128;
5078 /* store_one_arg depends on expand_block_move to handle at least the
5079 size of reg_parm_stack_space. */
5080 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
5081 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
5083 if (global_init_p)
5085 /* If the appropriate debug option is enabled, replace the target hooks
5086 with debug versions that call the real version and then prints
5087 debugging information. */
5088 if (TARGET_DEBUG_COST)
5090 targetm.rtx_costs = rs6000_debug_rtx_costs;
5091 targetm.address_cost = rs6000_debug_address_cost;
5092 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
5095 if (TARGET_DEBUG_ADDR)
5097 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
5098 targetm.legitimize_address = rs6000_debug_legitimize_address;
5099 rs6000_secondary_reload_class_ptr
5100 = rs6000_debug_secondary_reload_class;
5101 rs6000_secondary_memory_needed_ptr
5102 = rs6000_debug_secondary_memory_needed;
5103 rs6000_cannot_change_mode_class_ptr
5104 = rs6000_debug_cannot_change_mode_class;
5105 rs6000_preferred_reload_class_ptr
5106 = rs6000_debug_preferred_reload_class;
5107 rs6000_legitimize_reload_address_ptr
5108 = rs6000_debug_legitimize_reload_address;
5109 rs6000_mode_dependent_address_ptr
5110 = rs6000_debug_mode_dependent_address;
5113 if (rs6000_veclibabi_name)
5115 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
5116 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
5117 else
5119 error ("unknown vectorization library ABI type (%s) for "
5120 "-mveclibabi= switch", rs6000_veclibabi_name);
5121 ret = false;
5126 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
5127 target attribute or pragma which automatically enables both options,
5128 unless the altivec ABI was set. This is set by default for 64-bit, but
5129 not for 32-bit. */
5130 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
5131 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
5132 | OPTION_MASK_FLOAT128_TYPE
5133 | OPTION_MASK_FLOAT128_KEYWORD)
5134 & ~rs6000_isa_flags_explicit);
5136 /* Enable Altivec ABI for AIX -maltivec. */
5137 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
5139 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
5140 error ("target attribute or pragma changes AltiVec ABI");
5141 else
5142 rs6000_altivec_abi = 1;
5145 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
5146 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
5147 be explicitly overridden in either case. */
5148 if (TARGET_ELF)
5150 if (!global_options_set.x_rs6000_altivec_abi
5151 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
5153 if (main_target_opt != NULL &&
5154 !main_target_opt->x_rs6000_altivec_abi)
5155 error ("target attribute or pragma changes AltiVec ABI");
5156 else
5157 rs6000_altivec_abi = 1;
5161 /* Set the Darwin64 ABI as default for 64-bit Darwin.
5162 So far, the only darwin64 targets are also MACH-O. */
5163 if (TARGET_MACHO
5164 && DEFAULT_ABI == ABI_DARWIN
5165 && TARGET_64BIT)
5167 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
5168 error ("target attribute or pragma changes darwin64 ABI");
5169 else
5171 rs6000_darwin64_abi = 1;
5172 /* Default to natural alignment, for better performance. */
5173 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
5177 /* Place FP constants in the constant pool instead of TOC
5178 if section anchors enabled. */
5179 if (flag_section_anchors
5180 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
5181 TARGET_NO_FP_IN_TOC = 1;
5183 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
5184 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
5186 #ifdef SUBTARGET_OVERRIDE_OPTIONS
5187 SUBTARGET_OVERRIDE_OPTIONS;
5188 #endif
5189 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
5190 SUBSUBTARGET_OVERRIDE_OPTIONS;
5191 #endif
5192 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
5193 SUB3TARGET_OVERRIDE_OPTIONS;
5194 #endif
5196 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
5197 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
5199 /* For the E500 family of cores, reset the single/double FP flags to let us
5200 check that they remain constant across attributes or pragmas. Also,
5201 clear a possible request for string instructions, not supported and which
5202 we might have silently queried above for -Os.
5204 For other families, clear ISEL in case it was set implicitly.
5207 switch (rs6000_cpu)
5209 case PROCESSOR_PPC8540:
5210 case PROCESSOR_PPC8548:
5211 case PROCESSOR_PPCE500MC:
5212 case PROCESSOR_PPCE500MC64:
5213 case PROCESSOR_PPCE5500:
5214 case PROCESSOR_PPCE6500:
5216 rs6000_single_float = TARGET_E500_SINGLE || TARGET_E500_DOUBLE;
5217 rs6000_double_float = TARGET_E500_DOUBLE;
5219 rs6000_isa_flags &= ~OPTION_MASK_STRING;
5221 break;
5223 default:
5225 if (have_cpu && !(rs6000_isa_flags_explicit & OPTION_MASK_ISEL))
5226 rs6000_isa_flags &= ~OPTION_MASK_ISEL;
5228 break;
5231 if (main_target_opt)
5233 if (main_target_opt->x_rs6000_single_float != rs6000_single_float)
5234 error ("target attribute or pragma changes single precision floating "
5235 "point");
5236 if (main_target_opt->x_rs6000_double_float != rs6000_double_float)
5237 error ("target attribute or pragma changes double precision floating "
5238 "point");
5241 /* Detect invalid option combinations with E500. */
5242 CHECK_E500_OPTIONS;
5244 rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4
5245 && rs6000_cpu != PROCESSOR_POWER5
5246 && rs6000_cpu != PROCESSOR_POWER6
5247 && rs6000_cpu != PROCESSOR_POWER7
5248 && rs6000_cpu != PROCESSOR_POWER8
5249 && rs6000_cpu != PROCESSOR_POWER9
5250 && rs6000_cpu != PROCESSOR_PPCA2
5251 && rs6000_cpu != PROCESSOR_CELL
5252 && rs6000_cpu != PROCESSOR_PPC476);
5253 rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
5254 || rs6000_cpu == PROCESSOR_POWER5
5255 || rs6000_cpu == PROCESSOR_POWER7
5256 || rs6000_cpu == PROCESSOR_POWER8);
5257 rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
5258 || rs6000_cpu == PROCESSOR_POWER5
5259 || rs6000_cpu == PROCESSOR_POWER6
5260 || rs6000_cpu == PROCESSOR_POWER7
5261 || rs6000_cpu == PROCESSOR_POWER8
5262 || rs6000_cpu == PROCESSOR_POWER9
5263 || rs6000_cpu == PROCESSOR_PPCE500MC
5264 || rs6000_cpu == PROCESSOR_PPCE500MC64
5265 || rs6000_cpu == PROCESSOR_PPCE5500
5266 || rs6000_cpu == PROCESSOR_PPCE6500);
5268 /* Allow debug switches to override the above settings. These are set to -1
5269 in powerpcspe.opt to indicate the user hasn't directly set the switch. */
5270 if (TARGET_ALWAYS_HINT >= 0)
5271 rs6000_always_hint = TARGET_ALWAYS_HINT;
5273 if (TARGET_SCHED_GROUPS >= 0)
5274 rs6000_sched_groups = TARGET_SCHED_GROUPS;
5276 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
5277 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
5279 rs6000_sched_restricted_insns_priority
5280 = (rs6000_sched_groups ? 1 : 0);
5282 /* Handle -msched-costly-dep option. */
5283 rs6000_sched_costly_dep
5284 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
5286 if (rs6000_sched_costly_dep_str)
5288 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
5289 rs6000_sched_costly_dep = no_dep_costly;
5290 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
5291 rs6000_sched_costly_dep = all_deps_costly;
5292 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
5293 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
5294 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
5295 rs6000_sched_costly_dep = store_to_load_dep_costly;
5296 else
5297 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
5298 atoi (rs6000_sched_costly_dep_str));
5301 /* Handle -minsert-sched-nops option. */
5302 rs6000_sched_insert_nops
5303 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
5305 if (rs6000_sched_insert_nops_str)
5307 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
5308 rs6000_sched_insert_nops = sched_finish_none;
5309 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
5310 rs6000_sched_insert_nops = sched_finish_pad_groups;
5311 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
5312 rs6000_sched_insert_nops = sched_finish_regroup_exact;
5313 else
5314 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
5315 atoi (rs6000_sched_insert_nops_str));
5318 /* Handle stack protector */
5319 if (!global_options_set.x_rs6000_stack_protector_guard)
5320 #ifdef TARGET_THREAD_SSP_OFFSET
5321 rs6000_stack_protector_guard = SSP_TLS;
5322 #else
5323 rs6000_stack_protector_guard = SSP_GLOBAL;
5324 #endif
5326 #ifdef TARGET_THREAD_SSP_OFFSET
5327 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
5328 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
5329 #endif
5331 if (global_options_set.x_rs6000_stack_protector_guard_offset_str)
5333 char *endp;
5334 const char *str = rs6000_stack_protector_guard_offset_str;
5336 errno = 0;
5337 long offset = strtol (str, &endp, 0);
5338 if (!*str || *endp || errno)
5339 error ("%qs is not a valid number "
5340 "in -mstack-protector-guard-offset=", str);
5342 if (!IN_RANGE (offset, -0x8000, 0x7fff)
5343 || (TARGET_64BIT && (offset & 3)))
5344 error ("%qs is not a valid offset "
5345 "in -mstack-protector-guard-offset=", str);
5347 rs6000_stack_protector_guard_offset = offset;
5350 if (global_options_set.x_rs6000_stack_protector_guard_reg_str)
5352 const char *str = rs6000_stack_protector_guard_reg_str;
5353 int reg = decode_reg_name (str);
5355 if (!IN_RANGE (reg, 1, 31))
5356 error ("%qs is not a valid base register "
5357 "in -mstack-protector-guard-reg=", str);
5359 rs6000_stack_protector_guard_reg = reg;
5362 if (rs6000_stack_protector_guard == SSP_TLS
5363 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
5364 error ("-mstack-protector-guard=tls needs a valid base register");
5366 if (global_init_p)
5368 #ifdef TARGET_REGNAMES
5369 /* If the user desires alternate register names, copy in the
5370 alternate names now. */
5371 if (TARGET_REGNAMES)
5372 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
5373 #endif
5375 /* Set aix_struct_return last, after the ABI is determined.
5376 If -maix-struct-return or -msvr4-struct-return was explicitly
5377 used, don't override with the ABI default. */
5378 if (!global_options_set.x_aix_struct_return)
5379 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
5381 #if 0
5382 /* IBM XL compiler defaults to unsigned bitfields. */
5383 if (TARGET_XL_COMPAT)
5384 flag_signed_bitfields = 0;
5385 #endif
5387 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
5388 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
5390 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
5392 /* We can only guarantee the availability of DI pseudo-ops when
5393 assembling for 64-bit targets. */
5394 if (!TARGET_64BIT)
5396 targetm.asm_out.aligned_op.di = NULL;
5397 targetm.asm_out.unaligned_op.di = NULL;
5401 /* Set branch target alignment, if not optimizing for size. */
5402 if (!optimize_size)
5404 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
5405 aligned 8byte to avoid misprediction by the branch predictor. */
5406 if (rs6000_cpu == PROCESSOR_TITAN
5407 || rs6000_cpu == PROCESSOR_CELL)
5409 if (align_functions <= 0)
5410 align_functions = 8;
5411 if (align_jumps <= 0)
5412 align_jumps = 8;
5413 if (align_loops <= 0)
5414 align_loops = 8;
5416 if (rs6000_align_branch_targets)
5418 if (align_functions <= 0)
5419 align_functions = 16;
5420 if (align_jumps <= 0)
5421 align_jumps = 16;
5422 if (align_loops <= 0)
5424 can_override_loop_align = 1;
5425 align_loops = 16;
5428 if (align_jumps_max_skip <= 0)
5429 align_jumps_max_skip = 15;
5430 if (align_loops_max_skip <= 0)
5431 align_loops_max_skip = 15;
5434 /* Arrange to save and restore machine status around nested functions. */
5435 init_machine_status = rs6000_init_machine_status;
5437 /* We should always be splitting complex arguments, but we can't break
5438 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
5439 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
5440 targetm.calls.split_complex_arg = NULL;
5442 /* The AIX and ELFv1 ABIs define standard function descriptors. */
5443 if (DEFAULT_ABI == ABI_AIX)
5444 targetm.calls.custom_function_descriptors = 0;
5447 /* Initialize rs6000_cost with the appropriate target costs. */
5448 if (optimize_size)
5449 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
5450 else
5451 switch (rs6000_cpu)
5453 case PROCESSOR_RS64A:
5454 rs6000_cost = &rs64a_cost;
5455 break;
5457 case PROCESSOR_MPCCORE:
5458 rs6000_cost = &mpccore_cost;
5459 break;
5461 case PROCESSOR_PPC403:
5462 rs6000_cost = &ppc403_cost;
5463 break;
5465 case PROCESSOR_PPC405:
5466 rs6000_cost = &ppc405_cost;
5467 break;
5469 case PROCESSOR_PPC440:
5470 rs6000_cost = &ppc440_cost;
5471 break;
5473 case PROCESSOR_PPC476:
5474 rs6000_cost = &ppc476_cost;
5475 break;
5477 case PROCESSOR_PPC601:
5478 rs6000_cost = &ppc601_cost;
5479 break;
5481 case PROCESSOR_PPC603:
5482 rs6000_cost = &ppc603_cost;
5483 break;
5485 case PROCESSOR_PPC604:
5486 rs6000_cost = &ppc604_cost;
5487 break;
5489 case PROCESSOR_PPC604e:
5490 rs6000_cost = &ppc604e_cost;
5491 break;
5493 case PROCESSOR_PPC620:
5494 rs6000_cost = &ppc620_cost;
5495 break;
5497 case PROCESSOR_PPC630:
5498 rs6000_cost = &ppc630_cost;
5499 break;
5501 case PROCESSOR_CELL:
5502 rs6000_cost = &ppccell_cost;
5503 break;
5505 case PROCESSOR_PPC750:
5506 case PROCESSOR_PPC7400:
5507 rs6000_cost = &ppc750_cost;
5508 break;
5510 case PROCESSOR_PPC7450:
5511 rs6000_cost = &ppc7450_cost;
5512 break;
5514 case PROCESSOR_PPC8540:
5515 case PROCESSOR_PPC8548:
5516 rs6000_cost = &ppc8540_cost;
5517 break;
5519 case PROCESSOR_PPCE300C2:
5520 case PROCESSOR_PPCE300C3:
5521 rs6000_cost = &ppce300c2c3_cost;
5522 break;
5524 case PROCESSOR_PPCE500MC:
5525 rs6000_cost = &ppce500mc_cost;
5526 break;
5528 case PROCESSOR_PPCE500MC64:
5529 rs6000_cost = &ppce500mc64_cost;
5530 break;
5532 case PROCESSOR_PPCE5500:
5533 rs6000_cost = &ppce5500_cost;
5534 break;
5536 case PROCESSOR_PPCE6500:
5537 rs6000_cost = &ppce6500_cost;
5538 break;
5540 case PROCESSOR_TITAN:
5541 rs6000_cost = &titan_cost;
5542 break;
5544 case PROCESSOR_POWER4:
5545 case PROCESSOR_POWER5:
5546 rs6000_cost = &power4_cost;
5547 break;
5549 case PROCESSOR_POWER6:
5550 rs6000_cost = &power6_cost;
5551 break;
5553 case PROCESSOR_POWER7:
5554 rs6000_cost = &power7_cost;
5555 break;
5557 case PROCESSOR_POWER8:
5558 rs6000_cost = &power8_cost;
5559 break;
5561 case PROCESSOR_POWER9:
5562 rs6000_cost = &power9_cost;
5563 break;
5565 case PROCESSOR_PPCA2:
5566 rs6000_cost = &ppca2_cost;
5567 break;
5569 default:
5570 gcc_unreachable ();
5573 if (global_init_p)
5575 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
5576 rs6000_cost->simultaneous_prefetches,
5577 global_options.x_param_values,
5578 global_options_set.x_param_values);
5579 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
5580 global_options.x_param_values,
5581 global_options_set.x_param_values);
5582 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
5583 rs6000_cost->cache_line_size,
5584 global_options.x_param_values,
5585 global_options_set.x_param_values);
5586 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
5587 global_options.x_param_values,
5588 global_options_set.x_param_values);
5590 /* Increase loop peeling limits based on performance analysis. */
5591 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
5592 global_options.x_param_values,
5593 global_options_set.x_param_values);
5594 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
5595 global_options.x_param_values,
5596 global_options_set.x_param_values);
5598 /* Use the 'model' -fsched-pressure algorithm by default. */
5599 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM,
5600 SCHED_PRESSURE_MODEL,
5601 global_options.x_param_values,
5602 global_options_set.x_param_values);
5604 /* If using typedef char *va_list, signal that
5605 __builtin_va_start (&ap, 0) can be optimized to
5606 ap = __builtin_next_arg (0). */
5607 if (DEFAULT_ABI != ABI_V4)
5608 targetm.expand_builtin_va_start = NULL;
5611 /* Set up single/double float flags.
5612 If TARGET_HARD_FLOAT is set, but neither single or double is set,
5613 then set both flags. */
5614 if (TARGET_HARD_FLOAT && TARGET_FPRS
5615 && rs6000_single_float == 0 && rs6000_double_float == 0)
5616 rs6000_single_float = rs6000_double_float = 1;
5618 /* If not explicitly specified via option, decide whether to generate indexed
5619 load/store instructions. A value of -1 indicates that the
5620 initial value of this variable has not been overwritten. During
5621 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
5622 if (TARGET_AVOID_XFORM == -1)
5623 /* Avoid indexed addressing when targeting Power6 in order to avoid the
5624 DERAT mispredict penalty. However the LVE and STVE altivec instructions
5625 need indexed accesses and the type used is the scalar type of the element
5626 being loaded or stored. */
5627 TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB
5628 && !TARGET_ALTIVEC);
5630 /* Set the -mrecip options. */
5631 if (rs6000_recip_name)
5633 char *p = ASTRDUP (rs6000_recip_name);
5634 char *q;
5635 unsigned int mask, i;
5636 bool invert;
5638 while ((q = strtok (p, ",")) != NULL)
5640 p = NULL;
5641 if (*q == '!')
5643 invert = true;
5644 q++;
5646 else
5647 invert = false;
5649 if (!strcmp (q, "default"))
5650 mask = ((TARGET_RECIP_PRECISION)
5651 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
5652 else
5654 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
5655 if (!strcmp (q, recip_options[i].string))
5657 mask = recip_options[i].mask;
5658 break;
5661 if (i == ARRAY_SIZE (recip_options))
5663 error ("unknown option for -mrecip=%s", q);
5664 invert = false;
5665 mask = 0;
5666 ret = false;
5670 if (invert)
5671 rs6000_recip_control &= ~mask;
5672 else
5673 rs6000_recip_control |= mask;
5677 /* Set the builtin mask of the various options used that could affect which
5678 builtins were used. In the past we used target_flags, but we've run out
5679 of bits, and some options like SPE and PAIRED are no longer in
5680 target_flags. */
5681 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
5682 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
5683 rs6000_print_builtin_options (stderr, 0, "builtin mask",
5684 rs6000_builtin_mask);
5686 /* Initialize all of the registers. */
5687 rs6000_init_hard_regno_mode_ok (global_init_p);
5689 /* Save the initial options in case the user does function specific options */
5690 if (global_init_p)
5691 target_option_default_node = target_option_current_node
5692 = build_target_option_node (&global_options);
5694 /* If not explicitly specified via option, decide whether to generate the
5695 extra blr's required to preserve the link stack on some cpus (eg, 476). */
5696 if (TARGET_LINK_STACK == -1)
5697 SET_TARGET_LINK_STACK (rs6000_cpu == PROCESSOR_PPC476 && flag_pic);
5699 return ret;
5702 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
5703 define the target cpu type. */
5705 static void
5706 rs6000_option_override (void)
5708 (void) rs6000_option_override_internal (true);
5712 /* Implement targetm.vectorize.builtin_mask_for_load. */
5713 static tree
5714 rs6000_builtin_mask_for_load (void)
5716 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
5717 if ((TARGET_ALTIVEC && !TARGET_VSX)
5718 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
5719 return altivec_builtin_mask_for_load;
5720 else
5721 return 0;
5724 /* Implement LOOP_ALIGN. */
5726 rs6000_loop_align (rtx label)
5728 basic_block bb;
5729 int ninsns;
5731 /* Don't override loop alignment if -falign-loops was specified. */
5732 if (!can_override_loop_align)
5733 return align_loops_log;
5735 bb = BLOCK_FOR_INSN (label);
5736 ninsns = num_loop_insns(bb->loop_father);
5738 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
5739 if (ninsns > 4 && ninsns <= 8
5740 && (rs6000_cpu == PROCESSOR_POWER4
5741 || rs6000_cpu == PROCESSOR_POWER5
5742 || rs6000_cpu == PROCESSOR_POWER6
5743 || rs6000_cpu == PROCESSOR_POWER7
5744 || rs6000_cpu == PROCESSOR_POWER8
5745 || rs6000_cpu == PROCESSOR_POWER9))
5746 return 5;
5747 else
5748 return align_loops_log;
5751 /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
5752 static int
5753 rs6000_loop_align_max_skip (rtx_insn *label)
5755 return (1 << rs6000_loop_align (label)) - 1;
5758 /* Return true iff, data reference of TYPE can reach vector alignment (16)
5759 after applying N number of iterations. This routine does not determine
5760 how may iterations are required to reach desired alignment. */
5762 static bool
5763 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
5765 if (is_packed)
5766 return false;
5768 if (TARGET_32BIT)
5770 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
5771 return true;
5773 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
5774 return true;
5776 return false;
5778 else
5780 if (TARGET_MACHO)
5781 return false;
5783 /* Assuming that all other types are naturally aligned. CHECKME! */
5784 return true;
5788 /* Return true if the vector misalignment factor is supported by the
5789 target. */
5790 static bool
5791 rs6000_builtin_support_vector_misalignment (machine_mode mode,
5792 const_tree type,
5793 int misalignment,
5794 bool is_packed)
5796 if (TARGET_VSX)
5798 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5799 return true;
5801 /* Return if movmisalign pattern is not supported for this mode. */
5802 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
5803 return false;
5805 if (misalignment == -1)
5807 /* Misalignment factor is unknown at compile time but we know
5808 it's word aligned. */
5809 if (rs6000_vector_alignment_reachable (type, is_packed))
5811 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
5813 if (element_size == 64 || element_size == 32)
5814 return true;
5817 return false;
5820 /* VSX supports word-aligned vector. */
5821 if (misalignment % 4 == 0)
5822 return true;
5824 return false;
5827 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5828 static int
5829 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5830 tree vectype, int misalign)
5832 unsigned elements;
5833 tree elem_type;
5835 switch (type_of_cost)
5837 case scalar_stmt:
5838 case scalar_load:
5839 case scalar_store:
5840 case vector_stmt:
5841 case vector_load:
5842 case vector_store:
5843 case vec_to_scalar:
5844 case scalar_to_vec:
5845 case cond_branch_not_taken:
5846 return 1;
5848 case vec_perm:
5849 if (TARGET_VSX)
5850 return 3;
5851 else
5852 return 1;
5854 case vec_promote_demote:
5855 if (TARGET_VSX)
5856 return 4;
5857 else
5858 return 1;
5860 case cond_branch_taken:
5861 return 3;
5863 case unaligned_load:
5864 if (TARGET_P9_VECTOR)
5865 return 3;
5867 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5868 return 1;
5870 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5872 elements = TYPE_VECTOR_SUBPARTS (vectype);
5873 if (elements == 2)
5874 /* Double word aligned. */
5875 return 2;
5877 if (elements == 4)
5879 switch (misalign)
5881 case 8:
5882 /* Double word aligned. */
5883 return 2;
5885 case -1:
5886 /* Unknown misalignment. */
5887 case 4:
5888 case 12:
5889 /* Word aligned. */
5890 return 22;
5892 default:
5893 gcc_unreachable ();
5898 if (TARGET_ALTIVEC)
5899 /* Misaligned loads are not supported. */
5900 gcc_unreachable ();
5902 return 2;
5904 case unaligned_store:
5905 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5906 return 1;
5908 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5910 elements = TYPE_VECTOR_SUBPARTS (vectype);
5911 if (elements == 2)
5912 /* Double word aligned. */
5913 return 2;
5915 if (elements == 4)
5917 switch (misalign)
5919 case 8:
5920 /* Double word aligned. */
5921 return 2;
5923 case -1:
5924 /* Unknown misalignment. */
5925 case 4:
5926 case 12:
5927 /* Word aligned. */
5928 return 23;
5930 default:
5931 gcc_unreachable ();
5936 if (TARGET_ALTIVEC)
5937 /* Misaligned stores are not supported. */
5938 gcc_unreachable ();
5940 return 2;
5942 case vec_construct:
5943 /* This is a rough approximation assuming non-constant elements
5944 constructed into a vector via element insertion. FIXME:
5945 vec_construct is not granular enough for uniformly good
5946 decisions. If the initialization is a splat, this is
5947 cheaper than we estimate. Improve this someday. */
5948 elem_type = TREE_TYPE (vectype);
5949 /* 32-bit vectors loaded into registers are stored as double
5950 precision, so we need 2 permutes, 2 converts, and 1 merge
5951 to construct a vector of short floats from them. */
5952 if (SCALAR_FLOAT_TYPE_P (elem_type)
5953 && TYPE_PRECISION (elem_type) == 32)
5954 return 5;
5955 /* On POWER9, integer vector types are built up in GPRs and then
5956 use a direct move (2 cycles). For POWER8 this is even worse,
5957 as we need two direct moves and a merge, and the direct moves
5958 are five cycles. */
5959 else if (INTEGRAL_TYPE_P (elem_type))
5961 if (TARGET_P9_VECTOR)
5962 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
5963 else
5964 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 11;
5966 else
5967 /* V2DFmode doesn't need a direct move. */
5968 return 2;
5970 default:
5971 gcc_unreachable ();
5975 /* Implement targetm.vectorize.preferred_simd_mode. */
5977 static machine_mode
5978 rs6000_preferred_simd_mode (scalar_mode mode)
5980 if (TARGET_VSX)
5981 switch (mode)
5983 case E_DFmode:
5984 return V2DFmode;
5985 default:;
5987 if (TARGET_ALTIVEC || TARGET_VSX)
5988 switch (mode)
5990 case E_SFmode:
5991 return V4SFmode;
5992 case E_TImode:
5993 return V1TImode;
5994 case E_DImode:
5995 return V2DImode;
5996 case E_SImode:
5997 return V4SImode;
5998 case E_HImode:
5999 return V8HImode;
6000 case E_QImode:
6001 return V16QImode;
6002 default:;
6004 if (TARGET_SPE)
6005 switch (mode)
6007 case E_SFmode:
6008 return V2SFmode;
6009 case E_SImode:
6010 return V2SImode;
6011 default:;
6013 if (TARGET_PAIRED_FLOAT
6014 && mode == SFmode)
6015 return V2SFmode;
6016 return word_mode;
6019 typedef struct _rs6000_cost_data
6021 struct loop *loop_info;
6022 unsigned cost[3];
6023 } rs6000_cost_data;
6025 /* Test for likely overcommitment of vector hardware resources. If a
6026 loop iteration is relatively large, and too large a percentage of
6027 instructions in the loop are vectorized, the cost model may not
6028 adequately reflect delays from unavailable vector resources.
6029 Penalize the loop body cost for this case. */
6031 static void
6032 rs6000_density_test (rs6000_cost_data *data)
6034 const int DENSITY_PCT_THRESHOLD = 85;
6035 const int DENSITY_SIZE_THRESHOLD = 70;
6036 const int DENSITY_PENALTY = 10;
6037 struct loop *loop = data->loop_info;
6038 basic_block *bbs = get_loop_body (loop);
6039 int nbbs = loop->num_nodes;
6040 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
6041 int i, density_pct;
6043 for (i = 0; i < nbbs; i++)
6045 basic_block bb = bbs[i];
6046 gimple_stmt_iterator gsi;
6048 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
6050 gimple *stmt = gsi_stmt (gsi);
6051 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6053 if (!STMT_VINFO_RELEVANT_P (stmt_info)
6054 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
6055 not_vec_cost++;
6059 free (bbs);
6060 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
6062 if (density_pct > DENSITY_PCT_THRESHOLD
6063 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
6065 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
6066 if (dump_enabled_p ())
6067 dump_printf_loc (MSG_NOTE, vect_location,
6068 "density %d%%, cost %d exceeds threshold, penalizing "
6069 "loop body cost by %d%%", density_pct,
6070 vec_cost + not_vec_cost, DENSITY_PENALTY);
6074 /* Implement targetm.vectorize.init_cost. */
6076 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
6077 instruction is needed by the vectorization. */
6078 static bool rs6000_vect_nonmem;
6080 static void *
6081 rs6000_init_cost (struct loop *loop_info)
6083 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
6084 data->loop_info = loop_info;
6085 data->cost[vect_prologue] = 0;
6086 data->cost[vect_body] = 0;
6087 data->cost[vect_epilogue] = 0;
6088 rs6000_vect_nonmem = false;
6089 return data;
6092 /* Implement targetm.vectorize.add_stmt_cost. */
6094 static unsigned
6095 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6096 struct _stmt_vec_info *stmt_info, int misalign,
6097 enum vect_cost_model_location where)
6099 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
6100 unsigned retval = 0;
6102 if (flag_vect_cost_model)
6104 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6105 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
6106 misalign);
6107 /* Statements in an inner loop relative to the loop being
6108 vectorized are weighted more heavily. The value here is
6109 arbitrary and could potentially be improved with analysis. */
6110 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6111 count *= 50; /* FIXME. */
6113 retval = (unsigned) (count * stmt_cost);
6114 cost_data->cost[where] += retval;
6116 /* Check whether we're doing something other than just a copy loop.
6117 Not all such loops may be profitably vectorized; see
6118 rs6000_finish_cost. */
6119 if ((kind == vec_to_scalar || kind == vec_perm
6120 || kind == vec_promote_demote || kind == vec_construct
6121 || kind == scalar_to_vec)
6122 || (where == vect_body && kind == vector_stmt))
6123 rs6000_vect_nonmem = true;
6126 return retval;
6129 /* Implement targetm.vectorize.finish_cost. */
6131 static void
6132 rs6000_finish_cost (void *data, unsigned *prologue_cost,
6133 unsigned *body_cost, unsigned *epilogue_cost)
6135 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
6137 if (cost_data->loop_info)
6138 rs6000_density_test (cost_data);
6140 /* Don't vectorize minimum-vectorization-factor, simple copy loops
6141 that require versioning for any reason. The vectorization is at
6142 best a wash inside the loop, and the versioning checks make
6143 profitability highly unlikely and potentially quite harmful. */
6144 if (cost_data->loop_info)
6146 loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info);
6147 if (!rs6000_vect_nonmem
6148 && LOOP_VINFO_VECT_FACTOR (vec_info) == 2
6149 && LOOP_REQUIRES_VERSIONING (vec_info))
6150 cost_data->cost[vect_body] += 10000;
6153 *prologue_cost = cost_data->cost[vect_prologue];
6154 *body_cost = cost_data->cost[vect_body];
6155 *epilogue_cost = cost_data->cost[vect_epilogue];
6158 /* Implement targetm.vectorize.destroy_cost_data. */
6160 static void
6161 rs6000_destroy_cost_data (void *data)
6163 free (data);
6166 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
6167 library with vectorized intrinsics. */
6169 static tree
6170 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
6171 tree type_in)
6173 char name[32];
6174 const char *suffix = NULL;
6175 tree fntype, new_fndecl, bdecl = NULL_TREE;
6176 int n_args = 1;
6177 const char *bname;
6178 machine_mode el_mode, in_mode;
6179 int n, in_n;
6181 /* Libmass is suitable for unsafe math only as it does not correctly support
6182 parts of IEEE with the required precision such as denormals. Only support
6183 it if we have VSX to use the simd d2 or f4 functions.
6184 XXX: Add variable length support. */
6185 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
6186 return NULL_TREE;
6188 el_mode = TYPE_MODE (TREE_TYPE (type_out));
6189 n = TYPE_VECTOR_SUBPARTS (type_out);
6190 in_mode = TYPE_MODE (TREE_TYPE (type_in));
6191 in_n = TYPE_VECTOR_SUBPARTS (type_in);
6192 if (el_mode != in_mode
6193 || n != in_n)
6194 return NULL_TREE;
6196 switch (fn)
6198 CASE_CFN_ATAN2:
6199 CASE_CFN_HYPOT:
6200 CASE_CFN_POW:
6201 n_args = 2;
6202 gcc_fallthrough ();
6204 CASE_CFN_ACOS:
6205 CASE_CFN_ACOSH:
6206 CASE_CFN_ASIN:
6207 CASE_CFN_ASINH:
6208 CASE_CFN_ATAN:
6209 CASE_CFN_ATANH:
6210 CASE_CFN_CBRT:
6211 CASE_CFN_COS:
6212 CASE_CFN_COSH:
6213 CASE_CFN_ERF:
6214 CASE_CFN_ERFC:
6215 CASE_CFN_EXP2:
6216 CASE_CFN_EXP:
6217 CASE_CFN_EXPM1:
6218 CASE_CFN_LGAMMA:
6219 CASE_CFN_LOG10:
6220 CASE_CFN_LOG1P:
6221 CASE_CFN_LOG2:
6222 CASE_CFN_LOG:
6223 CASE_CFN_SIN:
6224 CASE_CFN_SINH:
6225 CASE_CFN_SQRT:
6226 CASE_CFN_TAN:
6227 CASE_CFN_TANH:
6228 if (el_mode == DFmode && n == 2)
6230 bdecl = mathfn_built_in (double_type_node, fn);
6231 suffix = "d2"; /* pow -> powd2 */
6233 else if (el_mode == SFmode && n == 4)
6235 bdecl = mathfn_built_in (float_type_node, fn);
6236 suffix = "4"; /* powf -> powf4 */
6238 else
6239 return NULL_TREE;
6240 if (!bdecl)
6241 return NULL_TREE;
6242 break;
6244 default:
6245 return NULL_TREE;
6248 gcc_assert (suffix != NULL);
6249 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
6250 if (!bname)
6251 return NULL_TREE;
6253 strcpy (name, bname + sizeof ("__builtin_") - 1);
6254 strcat (name, suffix);
6256 if (n_args == 1)
6257 fntype = build_function_type_list (type_out, type_in, NULL);
6258 else if (n_args == 2)
6259 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
6260 else
6261 gcc_unreachable ();
6263 /* Build a function declaration for the vectorized function. */
6264 new_fndecl = build_decl (BUILTINS_LOCATION,
6265 FUNCTION_DECL, get_identifier (name), fntype);
6266 TREE_PUBLIC (new_fndecl) = 1;
6267 DECL_EXTERNAL (new_fndecl) = 1;
6268 DECL_IS_NOVOPS (new_fndecl) = 1;
6269 TREE_READONLY (new_fndecl) = 1;
6271 return new_fndecl;
6274 /* Returns a function decl for a vectorized version of the builtin function
6275 with builtin function code FN and the result vector type TYPE, or NULL_TREE
6276 if it is not available. */
6278 static tree
6279 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
6280 tree type_in)
6282 machine_mode in_mode, out_mode;
6283 int in_n, out_n;
6285 if (TARGET_DEBUG_BUILTIN)
6286 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
6287 combined_fn_name (combined_fn (fn)),
6288 GET_MODE_NAME (TYPE_MODE (type_out)),
6289 GET_MODE_NAME (TYPE_MODE (type_in)));
6291 if (TREE_CODE (type_out) != VECTOR_TYPE
6292 || TREE_CODE (type_in) != VECTOR_TYPE
6293 || !TARGET_VECTORIZE_BUILTINS)
6294 return NULL_TREE;
6296 out_mode = TYPE_MODE (TREE_TYPE (type_out));
6297 out_n = TYPE_VECTOR_SUBPARTS (type_out);
6298 in_mode = TYPE_MODE (TREE_TYPE (type_in));
6299 in_n = TYPE_VECTOR_SUBPARTS (type_in);
6301 switch (fn)
6303 CASE_CFN_COPYSIGN:
6304 if (VECTOR_UNIT_VSX_P (V2DFmode)
6305 && out_mode == DFmode && out_n == 2
6306 && in_mode == DFmode && in_n == 2)
6307 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
6308 if (VECTOR_UNIT_VSX_P (V4SFmode)
6309 && out_mode == SFmode && out_n == 4
6310 && in_mode == SFmode && in_n == 4)
6311 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
6312 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
6313 && out_mode == SFmode && out_n == 4
6314 && in_mode == SFmode && in_n == 4)
6315 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
6316 break;
6317 CASE_CFN_CEIL:
6318 if (VECTOR_UNIT_VSX_P (V2DFmode)
6319 && out_mode == DFmode && out_n == 2
6320 && in_mode == DFmode && in_n == 2)
6321 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
6322 if (VECTOR_UNIT_VSX_P (V4SFmode)
6323 && out_mode == SFmode && out_n == 4
6324 && in_mode == SFmode && in_n == 4)
6325 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
6326 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
6327 && out_mode == SFmode && out_n == 4
6328 && in_mode == SFmode && in_n == 4)
6329 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
6330 break;
6331 CASE_CFN_FLOOR:
6332 if (VECTOR_UNIT_VSX_P (V2DFmode)
6333 && out_mode == DFmode && out_n == 2
6334 && in_mode == DFmode && in_n == 2)
6335 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
6336 if (VECTOR_UNIT_VSX_P (V4SFmode)
6337 && out_mode == SFmode && out_n == 4
6338 && in_mode == SFmode && in_n == 4)
6339 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
6340 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
6341 && out_mode == SFmode && out_n == 4
6342 && in_mode == SFmode && in_n == 4)
6343 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
6344 break;
6345 CASE_CFN_FMA:
6346 if (VECTOR_UNIT_VSX_P (V2DFmode)
6347 && out_mode == DFmode && out_n == 2
6348 && in_mode == DFmode && in_n == 2)
6349 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
6350 if (VECTOR_UNIT_VSX_P (V4SFmode)
6351 && out_mode == SFmode && out_n == 4
6352 && in_mode == SFmode && in_n == 4)
6353 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
6354 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
6355 && out_mode == SFmode && out_n == 4
6356 && in_mode == SFmode && in_n == 4)
6357 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
6358 break;
6359 CASE_CFN_TRUNC:
6360 if (VECTOR_UNIT_VSX_P (V2DFmode)
6361 && out_mode == DFmode && out_n == 2
6362 && in_mode == DFmode && in_n == 2)
6363 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
6364 if (VECTOR_UNIT_VSX_P (V4SFmode)
6365 && out_mode == SFmode && out_n == 4
6366 && in_mode == SFmode && in_n == 4)
6367 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
6368 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
6369 && out_mode == SFmode && out_n == 4
6370 && in_mode == SFmode && in_n == 4)
6371 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
6372 break;
6373 CASE_CFN_NEARBYINT:
6374 if (VECTOR_UNIT_VSX_P (V2DFmode)
6375 && flag_unsafe_math_optimizations
6376 && out_mode == DFmode && out_n == 2
6377 && in_mode == DFmode && in_n == 2)
6378 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
6379 if (VECTOR_UNIT_VSX_P (V4SFmode)
6380 && flag_unsafe_math_optimizations
6381 && out_mode == SFmode && out_n == 4
6382 && in_mode == SFmode && in_n == 4)
6383 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
6384 break;
6385 CASE_CFN_RINT:
6386 if (VECTOR_UNIT_VSX_P (V2DFmode)
6387 && !flag_trapping_math
6388 && out_mode == DFmode && out_n == 2
6389 && in_mode == DFmode && in_n == 2)
6390 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
6391 if (VECTOR_UNIT_VSX_P (V4SFmode)
6392 && !flag_trapping_math
6393 && out_mode == SFmode && out_n == 4
6394 && in_mode == SFmode && in_n == 4)
6395 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
6396 break;
6397 default:
6398 break;
6401 /* Generate calls to libmass if appropriate. */
6402 if (rs6000_veclib_handler)
6403 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
6405 return NULL_TREE;
6408 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
6410 static tree
6411 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
6412 tree type_in)
6414 machine_mode in_mode, out_mode;
6415 int in_n, out_n;
6417 if (TARGET_DEBUG_BUILTIN)
6418 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
6419 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
6420 GET_MODE_NAME (TYPE_MODE (type_out)),
6421 GET_MODE_NAME (TYPE_MODE (type_in)));
6423 if (TREE_CODE (type_out) != VECTOR_TYPE
6424 || TREE_CODE (type_in) != VECTOR_TYPE
6425 || !TARGET_VECTORIZE_BUILTINS)
6426 return NULL_TREE;
6428 out_mode = TYPE_MODE (TREE_TYPE (type_out));
6429 out_n = TYPE_VECTOR_SUBPARTS (type_out);
6430 in_mode = TYPE_MODE (TREE_TYPE (type_in));
6431 in_n = TYPE_VECTOR_SUBPARTS (type_in);
6433 enum rs6000_builtins fn
6434 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
6435 switch (fn)
6437 case RS6000_BUILTIN_RSQRTF:
6438 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
6439 && out_mode == SFmode && out_n == 4
6440 && in_mode == SFmode && in_n == 4)
6441 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
6442 break;
6443 case RS6000_BUILTIN_RSQRT:
6444 if (VECTOR_UNIT_VSX_P (V2DFmode)
6445 && out_mode == DFmode && out_n == 2
6446 && in_mode == DFmode && in_n == 2)
6447 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
6448 break;
6449 case RS6000_BUILTIN_RECIPF:
6450 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
6451 && out_mode == SFmode && out_n == 4
6452 && in_mode == SFmode && in_n == 4)
6453 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
6454 break;
6455 case RS6000_BUILTIN_RECIP:
6456 if (VECTOR_UNIT_VSX_P (V2DFmode)
6457 && out_mode == DFmode && out_n == 2
6458 && in_mode == DFmode && in_n == 2)
6459 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
6460 break;
6461 default:
6462 break;
6464 return NULL_TREE;
6467 /* Default CPU string for rs6000*_file_start functions. */
6468 static const char *rs6000_default_cpu;
6470 /* Do anything needed at the start of the asm file. */
6472 static void
6473 rs6000_file_start (void)
6475 char buffer[80];
6476 const char *start = buffer;
6477 FILE *file = asm_out_file;
6479 rs6000_default_cpu = TARGET_CPU_DEFAULT;
6481 default_file_start ();
6483 if (flag_verbose_asm)
6485 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
6487 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
6489 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
6490 start = "";
6493 if (global_options_set.x_rs6000_cpu_index)
6495 fprintf (file, "%s -mcpu=%s", start,
6496 processor_target_table[rs6000_cpu_index].name);
6497 start = "";
6500 if (global_options_set.x_rs6000_tune_index)
6502 fprintf (file, "%s -mtune=%s", start,
6503 processor_target_table[rs6000_tune_index].name);
6504 start = "";
6507 if (PPC405_ERRATUM77)
6509 fprintf (file, "%s PPC405CR_ERRATUM77", start);
6510 start = "";
6513 #ifdef USING_ELFOS_H
6514 switch (rs6000_sdata)
6516 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
6517 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
6518 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
6519 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
6522 if (rs6000_sdata && g_switch_value)
6524 fprintf (file, "%s -G %d", start,
6525 g_switch_value);
6526 start = "";
6528 #endif
6530 if (*start == '\0')
6531 putc ('\n', file);
6534 #ifdef USING_ELFOS_H
6535 if (!(rs6000_default_cpu && rs6000_default_cpu[0])
6536 && !global_options_set.x_rs6000_cpu_index)
6538 fputs ("\t.machine ", asm_out_file);
6539 if ((rs6000_isa_flags & OPTION_MASK_MODULO) != 0)
6540 fputs ("power9\n", asm_out_file);
6541 else if ((rs6000_isa_flags & OPTION_MASK_DIRECT_MOVE) != 0)
6542 fputs ("power8\n", asm_out_file);
6543 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTD) != 0)
6544 fputs ("power7\n", asm_out_file);
6545 else if ((rs6000_isa_flags & OPTION_MASK_CMPB) != 0)
6546 fputs ("power6\n", asm_out_file);
6547 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTB) != 0)
6548 fputs ("power5\n", asm_out_file);
6549 else if ((rs6000_isa_flags & OPTION_MASK_MFCRF) != 0)
6550 fputs ("power4\n", asm_out_file);
6551 else if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) != 0)
6552 fputs ("ppc64\n", asm_out_file);
6553 else
6554 fputs ("ppc\n", asm_out_file);
6556 #endif
6558 if (DEFAULT_ABI == ABI_ELFv2)
6559 fprintf (file, "\t.abiversion 2\n");
6563 /* Return nonzero if this function is known to have a null epilogue. */
6566 direct_return (void)
6568 if (reload_completed)
6570 rs6000_stack_t *info = rs6000_stack_info ();
6572 if (info->first_gp_reg_save == 32
6573 && info->first_fp_reg_save == 64
6574 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
6575 && ! info->lr_save_p
6576 && ! info->cr_save_p
6577 && info->vrsave_size == 0
6578 && ! info->push_p)
6579 return 1;
6582 return 0;
6585 /* Return the number of instructions it takes to form a constant in an
6586 integer register. */
6589 num_insns_constant_wide (HOST_WIDE_INT value)
6591 /* signed constant loadable with addi */
6592 if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
6593 return 1;
6595 /* constant loadable with addis */
6596 else if ((value & 0xffff) == 0
6597 && (value >> 31 == -1 || value >> 31 == 0))
6598 return 1;
6600 else if (TARGET_POWERPC64)
6602 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
6603 HOST_WIDE_INT high = value >> 31;
6605 if (high == 0 || high == -1)
6606 return 2;
6608 high >>= 1;
6610 if (low == 0)
6611 return num_insns_constant_wide (high) + 1;
6612 else if (high == 0)
6613 return num_insns_constant_wide (low) + 1;
6614 else
6615 return (num_insns_constant_wide (high)
6616 + num_insns_constant_wide (low) + 1);
6619 else
6620 return 2;
6624 num_insns_constant (rtx op, machine_mode mode)
6626 HOST_WIDE_INT low, high;
6628 switch (GET_CODE (op))
6630 case CONST_INT:
6631 if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1
6632 && rs6000_is_valid_and_mask (op, mode))
6633 return 2;
6634 else
6635 return num_insns_constant_wide (INTVAL (op));
6637 case CONST_WIDE_INT:
6639 int i;
6640 int ins = CONST_WIDE_INT_NUNITS (op) - 1;
6641 for (i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
6642 ins += num_insns_constant_wide (CONST_WIDE_INT_ELT (op, i));
6643 return ins;
6646 case CONST_DOUBLE:
6647 if (mode == SFmode || mode == SDmode)
6649 long l;
6651 if (DECIMAL_FLOAT_MODE_P (mode))
6652 REAL_VALUE_TO_TARGET_DECIMAL32
6653 (*CONST_DOUBLE_REAL_VALUE (op), l);
6654 else
6655 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), l);
6656 return num_insns_constant_wide ((HOST_WIDE_INT) l);
6659 long l[2];
6660 if (DECIMAL_FLOAT_MODE_P (mode))
6661 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (op), l);
6662 else
6663 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (op), l);
6664 high = l[WORDS_BIG_ENDIAN == 0];
6665 low = l[WORDS_BIG_ENDIAN != 0];
6667 if (TARGET_32BIT)
6668 return (num_insns_constant_wide (low)
6669 + num_insns_constant_wide (high));
6670 else
6672 if ((high == 0 && low >= 0)
6673 || (high == -1 && low < 0))
6674 return num_insns_constant_wide (low);
6676 else if (rs6000_is_valid_and_mask (op, mode))
6677 return 2;
6679 else if (low == 0)
6680 return num_insns_constant_wide (high) + 1;
6682 else
6683 return (num_insns_constant_wide (high)
6684 + num_insns_constant_wide (low) + 1);
6687 default:
6688 gcc_unreachable ();
6692 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6693 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6694 corresponding element of the vector, but for V4SFmode and V2SFmode,
6695 the corresponding "float" is interpreted as an SImode integer. */
6697 HOST_WIDE_INT
6698 const_vector_elt_as_int (rtx op, unsigned int elt)
6700 rtx tmp;
6702 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6703 gcc_assert (GET_MODE (op) != V2DImode
6704 && GET_MODE (op) != V2DFmode);
6706 tmp = CONST_VECTOR_ELT (op, elt);
6707 if (GET_MODE (op) == V4SFmode
6708 || GET_MODE (op) == V2SFmode)
6709 tmp = gen_lowpart (SImode, tmp);
6710 return INTVAL (tmp);
6713 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6714 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6715 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6716 all items are set to the same value and contain COPIES replicas of the
6717 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6718 operand and the others are set to the value of the operand's msb. */
6720 static bool
6721 vspltis_constant (rtx op, unsigned step, unsigned copies)
6723 machine_mode mode = GET_MODE (op);
6724 machine_mode inner = GET_MODE_INNER (mode);
6726 unsigned i;
6727 unsigned nunits;
6728 unsigned bitsize;
6729 unsigned mask;
6731 HOST_WIDE_INT val;
6732 HOST_WIDE_INT splat_val;
6733 HOST_WIDE_INT msb_val;
6735 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
6736 return false;
6738 nunits = GET_MODE_NUNITS (mode);
6739 bitsize = GET_MODE_BITSIZE (inner);
6740 mask = GET_MODE_MASK (inner);
6742 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6743 splat_val = val;
6744 msb_val = val >= 0 ? 0 : -1;
6746 /* Construct the value to be splatted, if possible. If not, return 0. */
6747 for (i = 2; i <= copies; i *= 2)
6749 HOST_WIDE_INT small_val;
6750 bitsize /= 2;
6751 small_val = splat_val >> bitsize;
6752 mask >>= bitsize;
6753 if (splat_val != ((HOST_WIDE_INT)
6754 ((unsigned HOST_WIDE_INT) small_val << bitsize)
6755 | (small_val & mask)))
6756 return false;
6757 splat_val = small_val;
6760 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6761 if (EASY_VECTOR_15 (splat_val))
6764 /* Also check if we can splat, and then add the result to itself. Do so if
6765 the value is positive, of if the splat instruction is using OP's mode;
6766 for splat_val < 0, the splat and the add should use the same mode. */
6767 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
6768 && (splat_val >= 0 || (step == 1 && copies == 1)))
6771 /* Also check if are loading up the most significant bit which can be done by
6772 loading up -1 and shifting the value left by -1. */
6773 else if (EASY_VECTOR_MSB (splat_val, inner))
6776 else
6777 return false;
6779 /* Check if VAL is present in every STEP-th element, and the
6780 other elements are filled with its most significant bit. */
6781 for (i = 1; i < nunits; ++i)
6783 HOST_WIDE_INT desired_val;
6784 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6785 if ((i & (step - 1)) == 0)
6786 desired_val = val;
6787 else
6788 desired_val = msb_val;
6790 if (desired_val != const_vector_elt_as_int (op, elt))
6791 return false;
6794 return true;
6797 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6798 instruction, filling in the bottom elements with 0 or -1.
6800 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6801 for the number of zeroes to shift in, or negative for the number of 0xff
6802 bytes to shift in.
6804 OP is a CONST_VECTOR. */
6807 vspltis_shifted (rtx op)
6809 machine_mode mode = GET_MODE (op);
6810 machine_mode inner = GET_MODE_INNER (mode);
6812 unsigned i, j;
6813 unsigned nunits;
6814 unsigned mask;
6816 HOST_WIDE_INT val;
6818 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
6819 return false;
6821 /* We need to create pseudo registers to do the shift, so don't recognize
6822 shift vector constants after reload. */
6823 if (!can_create_pseudo_p ())
6824 return false;
6826 nunits = GET_MODE_NUNITS (mode);
6827 mask = GET_MODE_MASK (inner);
6829 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
6831 /* Check if the value can really be the operand of a vspltis[bhw]. */
6832 if (EASY_VECTOR_15 (val))
6835 /* Also check if we are loading up the most significant bit which can be done
6836 by loading up -1 and shifting the value left by -1. */
6837 else if (EASY_VECTOR_MSB (val, inner))
6840 else
6841 return 0;
6843 /* Check if VAL is present in every STEP-th element until we find elements
6844 that are 0 or all 1 bits. */
6845 for (i = 1; i < nunits; ++i)
6847 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6848 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6850 /* If the value isn't the splat value, check for the remaining elements
6851 being 0/-1. */
6852 if (val != elt_val)
6854 if (elt_val == 0)
6856 for (j = i+1; j < nunits; ++j)
6858 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6859 if (const_vector_elt_as_int (op, elt2) != 0)
6860 return 0;
6863 return (nunits - i) * GET_MODE_SIZE (inner);
6866 else if ((elt_val & mask) == mask)
6868 for (j = i+1; j < nunits; ++j)
6870 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6871 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6872 return 0;
6875 return -((nunits - i) * GET_MODE_SIZE (inner));
6878 else
6879 return 0;
6883 /* If all elements are equal, we don't need to do VLSDOI. */
6884 return 0;
6888 /* Return true if OP is of the given MODE and can be synthesized
6889 with a vspltisb, vspltish or vspltisw. */
6891 bool
6892 easy_altivec_constant (rtx op, machine_mode mode)
6894 unsigned step, copies;
6896 if (mode == VOIDmode)
6897 mode = GET_MODE (op);
6898 else if (mode != GET_MODE (op))
6899 return false;
6901 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6902 constants. */
6903 if (mode == V2DFmode)
6904 return zero_constant (op, mode);
6906 else if (mode == V2DImode)
6908 if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
6909 || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT)
6910 return false;
6912 if (zero_constant (op, mode))
6913 return true;
6915 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6916 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6917 return true;
6919 return false;
6922 /* V1TImode is a special container for TImode. Ignore for now. */
6923 else if (mode == V1TImode)
6924 return false;
6926 /* Start with a vspltisw. */
6927 step = GET_MODE_NUNITS (mode) / 4;
6928 copies = 1;
6930 if (vspltis_constant (op, step, copies))
6931 return true;
6933 /* Then try with a vspltish. */
6934 if (step == 1)
6935 copies <<= 1;
6936 else
6937 step >>= 1;
6939 if (vspltis_constant (op, step, copies))
6940 return true;
6942 /* And finally a vspltisb. */
6943 if (step == 1)
6944 copies <<= 1;
6945 else
6946 step >>= 1;
6948 if (vspltis_constant (op, step, copies))
6949 return true;
6951 if (vspltis_shifted (op) != 0)
6952 return true;
6954 return false;
6957 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6958 result is OP. Abort if it is not possible. */
6961 gen_easy_altivec_constant (rtx op)
6963 machine_mode mode = GET_MODE (op);
6964 int nunits = GET_MODE_NUNITS (mode);
6965 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6966 unsigned step = nunits / 4;
6967 unsigned copies = 1;
6969 /* Start with a vspltisw. */
6970 if (vspltis_constant (op, step, copies))
6971 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6973 /* Then try with a vspltish. */
6974 if (step == 1)
6975 copies <<= 1;
6976 else
6977 step >>= 1;
6979 if (vspltis_constant (op, step, copies))
6980 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6982 /* And finally a vspltisb. */
6983 if (step == 1)
6984 copies <<= 1;
6985 else
6986 step >>= 1;
6988 if (vspltis_constant (op, step, copies))
6989 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6991 gcc_unreachable ();
6994 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6995 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6997 Return the number of instructions needed (1 or 2) into the address pointed
6998 via NUM_INSNS_PTR.
7000 Return the constant that is being split via CONSTANT_PTR. */
7002 bool
7003 xxspltib_constant_p (rtx op,
7004 machine_mode mode,
7005 int *num_insns_ptr,
7006 int *constant_ptr)
7008 size_t nunits = GET_MODE_NUNITS (mode);
7009 size_t i;
7010 HOST_WIDE_INT value;
7011 rtx element;
7013 /* Set the returned values to out of bound values. */
7014 *num_insns_ptr = -1;
7015 *constant_ptr = 256;
7017 if (!TARGET_P9_VECTOR)
7018 return false;
7020 if (mode == VOIDmode)
7021 mode = GET_MODE (op);
7023 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
7024 return false;
7026 /* Handle (vec_duplicate <constant>). */
7027 if (GET_CODE (op) == VEC_DUPLICATE)
7029 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
7030 && mode != V2DImode)
7031 return false;
7033 element = XEXP (op, 0);
7034 if (!CONST_INT_P (element))
7035 return false;
7037 value = INTVAL (element);
7038 if (!IN_RANGE (value, -128, 127))
7039 return false;
7042 /* Handle (const_vector [...]). */
7043 else if (GET_CODE (op) == CONST_VECTOR)
7045 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
7046 && mode != V2DImode)
7047 return false;
7049 element = CONST_VECTOR_ELT (op, 0);
7050 if (!CONST_INT_P (element))
7051 return false;
7053 value = INTVAL (element);
7054 if (!IN_RANGE (value, -128, 127))
7055 return false;
7057 for (i = 1; i < nunits; i++)
7059 element = CONST_VECTOR_ELT (op, i);
7060 if (!CONST_INT_P (element))
7061 return false;
7063 if (value != INTVAL (element))
7064 return false;
7068 /* Handle integer constants being loaded into the upper part of the VSX
7069 register as a scalar. If the value isn't 0/-1, only allow it if the mode
7070 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
7071 else if (CONST_INT_P (op))
7073 if (!SCALAR_INT_MODE_P (mode))
7074 return false;
7076 value = INTVAL (op);
7077 if (!IN_RANGE (value, -128, 127))
7078 return false;
7080 if (!IN_RANGE (value, -1, 0))
7082 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
7083 return false;
7085 if (EASY_VECTOR_15 (value))
7086 return false;
7090 else
7091 return false;
7093 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
7094 sign extend. Special case 0/-1 to allow getting any VSX register instead
7095 of an Altivec register. */
7096 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
7097 && EASY_VECTOR_15 (value))
7098 return false;
7100 /* Return # of instructions and the constant byte for XXSPLTIB. */
7101 if (mode == V16QImode)
7102 *num_insns_ptr = 1;
7104 else if (IN_RANGE (value, -1, 0))
7105 *num_insns_ptr = 1;
7107 else
7108 *num_insns_ptr = 2;
7110 *constant_ptr = (int) value;
7111 return true;
7114 const char *
7115 output_vec_const_move (rtx *operands)
7117 int cst, cst2, shift;
7118 machine_mode mode;
7119 rtx dest, vec;
7121 dest = operands[0];
7122 vec = operands[1];
7123 mode = GET_MODE (dest);
7125 if (TARGET_VSX)
7127 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
7128 int xxspltib_value = 256;
7129 int num_insns = -1;
7131 if (zero_constant (vec, mode))
7133 if (TARGET_P9_VECTOR)
7134 return "xxspltib %x0,0";
7136 else if (dest_vmx_p)
7137 return "vspltisw %0,0";
7139 else
7140 return "xxlxor %x0,%x0,%x0";
7143 if (all_ones_constant (vec, mode))
7145 if (TARGET_P9_VECTOR)
7146 return "xxspltib %x0,255";
7148 else if (dest_vmx_p)
7149 return "vspltisw %0,-1";
7151 else if (TARGET_P8_VECTOR)
7152 return "xxlorc %x0,%x0,%x0";
7154 else
7155 gcc_unreachable ();
7158 if (TARGET_P9_VECTOR
7159 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
7161 if (num_insns == 1)
7163 operands[2] = GEN_INT (xxspltib_value & 0xff);
7164 return "xxspltib %x0,%2";
7167 return "#";
7171 if (TARGET_ALTIVEC)
7173 rtx splat_vec;
7175 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
7176 if (zero_constant (vec, mode))
7177 return "vspltisw %0,0";
7179 if (all_ones_constant (vec, mode))
7180 return "vspltisw %0,-1";
7182 /* Do we need to construct a value using VSLDOI? */
7183 shift = vspltis_shifted (vec);
7184 if (shift != 0)
7185 return "#";
7187 splat_vec = gen_easy_altivec_constant (vec);
7188 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
7189 operands[1] = XEXP (splat_vec, 0);
7190 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
7191 return "#";
7193 switch (GET_MODE (splat_vec))
7195 case E_V4SImode:
7196 return "vspltisw %0,%1";
7198 case E_V8HImode:
7199 return "vspltish %0,%1";
7201 case E_V16QImode:
7202 return "vspltisb %0,%1";
7204 default:
7205 gcc_unreachable ();
7209 gcc_assert (TARGET_SPE);
7211 /* Vector constant 0 is handled as a splitter of V2SI, and in the
7212 pattern of V1DI, V4HI, and V2SF.
7214 FIXME: We should probably return # and add post reload
7215 splitters for these, but this way is so easy ;-). */
7216 cst = INTVAL (CONST_VECTOR_ELT (vec, 0));
7217 cst2 = INTVAL (CONST_VECTOR_ELT (vec, 1));
7218 operands[1] = CONST_VECTOR_ELT (vec, 0);
7219 operands[2] = CONST_VECTOR_ELT (vec, 1);
7220 if (cst == cst2)
7221 return "li %0,%1\n\tevmergelo %0,%0,%0";
7222 else if (WORDS_BIG_ENDIAN)
7223 return "li %0,%1\n\tevmergelo %0,%0,%0\n\tli %0,%2";
7224 else
7225 return "li %0,%2\n\tevmergelo %0,%0,%0\n\tli %0,%1";
7228 /* Initialize TARGET of vector PAIRED to VALS. */
7230 void
7231 paired_expand_vector_init (rtx target, rtx vals)
7233 machine_mode mode = GET_MODE (target);
7234 int n_elts = GET_MODE_NUNITS (mode);
7235 int n_var = 0;
7236 rtx x, new_rtx, tmp, constant_op, op1, op2;
7237 int i;
7239 for (i = 0; i < n_elts; ++i)
7241 x = XVECEXP (vals, 0, i);
7242 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
7243 ++n_var;
7245 if (n_var == 0)
7247 /* Load from constant pool. */
7248 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
7249 return;
7252 if (n_var == 2)
7254 /* The vector is initialized only with non-constants. */
7255 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, XVECEXP (vals, 0, 0),
7256 XVECEXP (vals, 0, 1));
7258 emit_move_insn (target, new_rtx);
7259 return;
7262 /* One field is non-constant and the other one is a constant. Load the
7263 constant from the constant pool and use ps_merge instruction to
7264 construct the whole vector. */
7265 op1 = XVECEXP (vals, 0, 0);
7266 op2 = XVECEXP (vals, 0, 1);
7268 constant_op = (CONSTANT_P (op1)) ? op1 : op2;
7270 tmp = gen_reg_rtx (GET_MODE (constant_op));
7271 emit_move_insn (tmp, constant_op);
7273 if (CONSTANT_P (op1))
7274 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, tmp, op2);
7275 else
7276 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, op1, tmp);
7278 emit_move_insn (target, new_rtx);
7281 void
7282 paired_expand_vector_move (rtx operands[])
7284 rtx op0 = operands[0], op1 = operands[1];
7286 emit_move_insn (op0, op1);
7289 /* Emit vector compare for code RCODE. DEST is destination, OP1 and
7290 OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two
7291 operands for the relation operation COND. This is a recursive
7292 function. */
7294 static void
7295 paired_emit_vector_compare (enum rtx_code rcode,
7296 rtx dest, rtx op0, rtx op1,
7297 rtx cc_op0, rtx cc_op1)
7299 rtx tmp = gen_reg_rtx (V2SFmode);
7300 rtx tmp1, max, min;
7302 gcc_assert (TARGET_PAIRED_FLOAT);
7303 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
7305 switch (rcode)
7307 case LT:
7308 case LTU:
7309 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
7310 return;
7311 case GE:
7312 case GEU:
7313 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
7314 emit_insn (gen_selv2sf4 (dest, tmp, op0, op1, CONST0_RTX (SFmode)));
7315 return;
7316 case LE:
7317 case LEU:
7318 paired_emit_vector_compare (GE, dest, op0, op1, cc_op1, cc_op0);
7319 return;
7320 case GT:
7321 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
7322 return;
7323 case EQ:
7324 tmp1 = gen_reg_rtx (V2SFmode);
7325 max = gen_reg_rtx (V2SFmode);
7326 min = gen_reg_rtx (V2SFmode);
7327 gen_reg_rtx (V2SFmode);
7329 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
7330 emit_insn (gen_selv2sf4
7331 (max, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
7332 emit_insn (gen_subv2sf3 (tmp, cc_op1, cc_op0));
7333 emit_insn (gen_selv2sf4
7334 (min, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
7335 emit_insn (gen_subv2sf3 (tmp1, min, max));
7336 emit_insn (gen_selv2sf4 (dest, tmp1, op0, op1, CONST0_RTX (SFmode)));
7337 return;
7338 case NE:
7339 paired_emit_vector_compare (EQ, dest, op1, op0, cc_op0, cc_op1);
7340 return;
7341 case UNLE:
7342 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
7343 return;
7344 case UNLT:
7345 paired_emit_vector_compare (LT, dest, op1, op0, cc_op0, cc_op1);
7346 return;
7347 case UNGE:
7348 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
7349 return;
7350 case UNGT:
7351 paired_emit_vector_compare (GT, dest, op1, op0, cc_op0, cc_op1);
7352 return;
7353 default:
7354 gcc_unreachable ();
7357 return;
7360 /* Emit vector conditional expression.
7361 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
7362 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
7365 paired_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
7366 rtx cond, rtx cc_op0, rtx cc_op1)
7368 enum rtx_code rcode = GET_CODE (cond);
7370 if (!TARGET_PAIRED_FLOAT)
7371 return 0;
7373 paired_emit_vector_compare (rcode, dest, op1, op2, cc_op0, cc_op1);
7375 return 1;
7378 /* Initialize vector TARGET to VALS. */
7380 void
7381 rs6000_expand_vector_init (rtx target, rtx vals)
7383 machine_mode mode = GET_MODE (target);
7384 machine_mode inner_mode = GET_MODE_INNER (mode);
7385 int n_elts = GET_MODE_NUNITS (mode);
7386 int n_var = 0, one_var = -1;
7387 bool all_same = true, all_const_zero = true;
7388 rtx x, mem;
7389 int i;
7391 for (i = 0; i < n_elts; ++i)
7393 x = XVECEXP (vals, 0, i);
7394 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
7395 ++n_var, one_var = i;
7396 else if (x != CONST0_RTX (inner_mode))
7397 all_const_zero = false;
7399 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
7400 all_same = false;
7403 if (n_var == 0)
7405 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
7406 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
7407 if ((int_vector_p || TARGET_VSX) && all_const_zero)
7409 /* Zero register. */
7410 emit_move_insn (target, CONST0_RTX (mode));
7411 return;
7413 else if (int_vector_p && easy_vector_constant (const_vec, mode))
7415 /* Splat immediate. */
7416 emit_insn (gen_rtx_SET (target, const_vec));
7417 return;
7419 else
7421 /* Load from constant pool. */
7422 emit_move_insn (target, const_vec);
7423 return;
7427 /* Double word values on VSX can use xxpermdi or lxvdsx. */
7428 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
7430 rtx op[2];
7431 size_t i;
7432 size_t num_elements = all_same ? 1 : 2;
7433 for (i = 0; i < num_elements; i++)
7435 op[i] = XVECEXP (vals, 0, i);
7436 /* Just in case there is a SUBREG with a smaller mode, do a
7437 conversion. */
7438 if (GET_MODE (op[i]) != inner_mode)
7440 rtx tmp = gen_reg_rtx (inner_mode);
7441 convert_move (tmp, op[i], 0);
7442 op[i] = tmp;
7444 /* Allow load with splat double word. */
7445 else if (MEM_P (op[i]))
7447 if (!all_same)
7448 op[i] = force_reg (inner_mode, op[i]);
7450 else if (!REG_P (op[i]))
7451 op[i] = force_reg (inner_mode, op[i]);
7454 if (all_same)
7456 if (mode == V2DFmode)
7457 emit_insn (gen_vsx_splat_v2df (target, op[0]));
7458 else
7459 emit_insn (gen_vsx_splat_v2di (target, op[0]));
7461 else
7463 if (mode == V2DFmode)
7464 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
7465 else
7466 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
7468 return;
7471 /* Special case initializing vector int if we are on 64-bit systems with
7472 direct move or we have the ISA 3.0 instructions. */
7473 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
7474 && TARGET_DIRECT_MOVE_64BIT)
7476 if (all_same)
7478 rtx element0 = XVECEXP (vals, 0, 0);
7479 if (MEM_P (element0))
7480 element0 = rs6000_address_for_fpconvert (element0);
7481 else
7482 element0 = force_reg (SImode, element0);
7484 if (TARGET_P9_VECTOR)
7485 emit_insn (gen_vsx_splat_v4si (target, element0));
7486 else
7488 rtx tmp = gen_reg_rtx (DImode);
7489 emit_insn (gen_zero_extendsidi2 (tmp, element0));
7490 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
7492 return;
7494 else
7496 rtx elements[4];
7497 size_t i;
7499 for (i = 0; i < 4; i++)
7501 elements[i] = XVECEXP (vals, 0, i);
7502 if (!CONST_INT_P (elements[i]) && !REG_P (elements[i]))
7503 elements[i] = copy_to_mode_reg (SImode, elements[i]);
7506 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
7507 elements[2], elements[3]));
7508 return;
7512 /* With single precision floating point on VSX, know that internally single
7513 precision is actually represented as a double, and either make 2 V2DF
7514 vectors, and convert these vectors to single precision, or do one
7515 conversion, and splat the result to the other elements. */
7516 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
7518 if (all_same)
7520 rtx element0 = XVECEXP (vals, 0, 0);
7522 if (TARGET_P9_VECTOR)
7524 if (MEM_P (element0))
7525 element0 = rs6000_address_for_fpconvert (element0);
7527 emit_insn (gen_vsx_splat_v4sf (target, element0));
7530 else
7532 rtx freg = gen_reg_rtx (V4SFmode);
7533 rtx sreg = force_reg (SFmode, element0);
7534 rtx cvt = (TARGET_XSCVDPSPN
7535 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
7536 : gen_vsx_xscvdpsp_scalar (freg, sreg));
7538 emit_insn (cvt);
7539 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
7540 const0_rtx));
7543 else
7545 rtx dbl_even = gen_reg_rtx (V2DFmode);
7546 rtx dbl_odd = gen_reg_rtx (V2DFmode);
7547 rtx flt_even = gen_reg_rtx (V4SFmode);
7548 rtx flt_odd = gen_reg_rtx (V4SFmode);
7549 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
7550 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
7551 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
7552 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
7554 /* Use VMRGEW if we can instead of doing a permute. */
7555 if (TARGET_P8_VECTOR)
7557 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op2));
7558 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op1, op3));
7559 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
7560 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
7561 if (BYTES_BIG_ENDIAN)
7562 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_even, flt_odd));
7563 else
7564 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_odd, flt_even));
7566 else
7568 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
7569 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
7570 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
7571 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
7572 rs6000_expand_extract_even (target, flt_even, flt_odd);
7575 return;
7578 /* Special case initializing vector short/char that are splats if we are on
7579 64-bit systems with direct move. */
7580 if (all_same && TARGET_DIRECT_MOVE_64BIT
7581 && (mode == V16QImode || mode == V8HImode))
7583 rtx op0 = XVECEXP (vals, 0, 0);
7584 rtx di_tmp = gen_reg_rtx (DImode);
7586 if (!REG_P (op0))
7587 op0 = force_reg (GET_MODE_INNER (mode), op0);
7589 if (mode == V16QImode)
7591 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
7592 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
7593 return;
7596 if (mode == V8HImode)
7598 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
7599 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
7600 return;
7604 /* Store value to stack temp. Load vector element. Splat. However, splat
7605 of 64-bit items is not supported on Altivec. */
7606 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
7608 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7609 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
7610 XVECEXP (vals, 0, 0));
7611 x = gen_rtx_UNSPEC (VOIDmode,
7612 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7613 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7614 gen_rtvec (2,
7615 gen_rtx_SET (target, mem),
7616 x)));
7617 x = gen_rtx_VEC_SELECT (inner_mode, target,
7618 gen_rtx_PARALLEL (VOIDmode,
7619 gen_rtvec (1, const0_rtx)));
7620 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
7621 return;
7624 /* One field is non-constant. Load constant then overwrite
7625 varying field. */
7626 if (n_var == 1)
7628 rtx copy = copy_rtx (vals);
7630 /* Load constant part of vector, substitute neighboring value for
7631 varying element. */
7632 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
7633 rs6000_expand_vector_init (target, copy);
7635 /* Insert variable. */
7636 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
7637 return;
7640 /* Construct the vector in memory one field at a time
7641 and load the whole vector. */
7642 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7643 for (i = 0; i < n_elts; i++)
7644 emit_move_insn (adjust_address_nv (mem, inner_mode,
7645 i * GET_MODE_SIZE (inner_mode)),
7646 XVECEXP (vals, 0, i));
7647 emit_move_insn (target, mem);
7650 /* Set field ELT of TARGET to VAL. */
7652 void
7653 rs6000_expand_vector_set (rtx target, rtx val, int elt)
7655 machine_mode mode = GET_MODE (target);
7656 machine_mode inner_mode = GET_MODE_INNER (mode);
7657 rtx reg = gen_reg_rtx (mode);
7658 rtx mask, mem, x;
7659 int width = GET_MODE_SIZE (inner_mode);
7660 int i;
7662 val = force_reg (GET_MODE (val), val);
7664 if (VECTOR_MEM_VSX_P (mode))
7666 rtx insn = NULL_RTX;
7667 rtx elt_rtx = GEN_INT (elt);
7669 if (mode == V2DFmode)
7670 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
7672 else if (mode == V2DImode)
7673 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
7675 else if (TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER
7676 && TARGET_UPPER_REGS_DI && TARGET_POWERPC64)
7678 if (mode == V4SImode)
7679 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
7680 else if (mode == V8HImode)
7681 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
7682 else if (mode == V16QImode)
7683 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
7686 if (insn)
7688 emit_insn (insn);
7689 return;
7693 /* Simplify setting single element vectors like V1TImode. */
7694 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
7696 emit_move_insn (target, gen_lowpart (mode, val));
7697 return;
7700 /* Load single variable value. */
7701 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7702 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
7703 x = gen_rtx_UNSPEC (VOIDmode,
7704 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7705 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7706 gen_rtvec (2,
7707 gen_rtx_SET (reg, mem),
7708 x)));
7710 /* Linear sequence. */
7711 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
7712 for (i = 0; i < 16; ++i)
7713 XVECEXP (mask, 0, i) = GEN_INT (i);
7715 /* Set permute mask to insert element into target. */
7716 for (i = 0; i < width; ++i)
7717 XVECEXP (mask, 0, elt*width + i)
7718 = GEN_INT (i + 0x10);
7719 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
7721 if (BYTES_BIG_ENDIAN)
7722 x = gen_rtx_UNSPEC (mode,
7723 gen_rtvec (3, target, reg,
7724 force_reg (V16QImode, x)),
7725 UNSPEC_VPERM);
7726 else
7728 if (TARGET_P9_VECTOR)
7729 x = gen_rtx_UNSPEC (mode,
7730 gen_rtvec (3, target, reg,
7731 force_reg (V16QImode, x)),
7732 UNSPEC_VPERMR);
7733 else
7735 /* Invert selector. We prefer to generate VNAND on P8 so
7736 that future fusion opportunities can kick in, but must
7737 generate VNOR elsewhere. */
7738 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
7739 rtx iorx = (TARGET_P8_VECTOR
7740 ? gen_rtx_IOR (V16QImode, notx, notx)
7741 : gen_rtx_AND (V16QImode, notx, notx));
7742 rtx tmp = gen_reg_rtx (V16QImode);
7743 emit_insn (gen_rtx_SET (tmp, iorx));
7745 /* Permute with operands reversed and adjusted selector. */
7746 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
7747 UNSPEC_VPERM);
7751 emit_insn (gen_rtx_SET (target, x));
7754 /* Extract field ELT from VEC into TARGET. */
7756 void
7757 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
7759 machine_mode mode = GET_MODE (vec);
7760 machine_mode inner_mode = GET_MODE_INNER (mode);
7761 rtx mem;
7763 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
7765 switch (mode)
7767 default:
7768 break;
7769 case E_V1TImode:
7770 gcc_assert (INTVAL (elt) == 0 && inner_mode == TImode);
7771 emit_move_insn (target, gen_lowpart (TImode, vec));
7772 break;
7773 case E_V2DFmode:
7774 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
7775 return;
7776 case E_V2DImode:
7777 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
7778 return;
7779 case E_V4SFmode:
7780 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
7781 return;
7782 case E_V16QImode:
7783 if (TARGET_DIRECT_MOVE_64BIT)
7785 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
7786 return;
7788 else
7789 break;
7790 case E_V8HImode:
7791 if (TARGET_DIRECT_MOVE_64BIT)
7793 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
7794 return;
7796 else
7797 break;
7798 case E_V4SImode:
7799 if (TARGET_DIRECT_MOVE_64BIT)
7801 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
7802 return;
7804 break;
7807 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
7808 && TARGET_DIRECT_MOVE_64BIT)
7810 if (GET_MODE (elt) != DImode)
7812 rtx tmp = gen_reg_rtx (DImode);
7813 convert_move (tmp, elt, 0);
7814 elt = tmp;
7816 else if (!REG_P (elt))
7817 elt = force_reg (DImode, elt);
7819 switch (mode)
7821 case E_V2DFmode:
7822 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
7823 return;
7825 case E_V2DImode:
7826 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
7827 return;
7829 case E_V4SFmode:
7830 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
7831 return;
7833 case E_V4SImode:
7834 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
7835 return;
7837 case E_V8HImode:
7838 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
7839 return;
7841 case E_V16QImode:
7842 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
7843 return;
7845 default:
7846 gcc_unreachable ();
7850 gcc_assert (CONST_INT_P (elt));
7852 /* Allocate mode-sized buffer. */
7853 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7855 emit_move_insn (mem, vec);
7857 /* Add offset to field within buffer matching vector element. */
7858 mem = adjust_address_nv (mem, inner_mode,
7859 INTVAL (elt) * GET_MODE_SIZE (inner_mode));
7861 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
7864 /* Helper function to return the register number of a RTX. */
7865 static inline int
7866 regno_or_subregno (rtx op)
7868 if (REG_P (op))
7869 return REGNO (op);
7870 else if (SUBREG_P (op))
7871 return subreg_regno (op);
7872 else
7873 gcc_unreachable ();
7876 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7877 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7878 temporary (BASE_TMP) to fixup the address. Return the new memory address
7879 that is valid for reads or writes to a given register (SCALAR_REG). */
7882 rs6000_adjust_vec_address (rtx scalar_reg,
7883 rtx mem,
7884 rtx element,
7885 rtx base_tmp,
7886 machine_mode scalar_mode)
7888 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7889 rtx addr = XEXP (mem, 0);
7890 rtx element_offset;
7891 rtx new_addr;
7892 bool valid_addr_p;
7894 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7895 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
7897 /* Calculate what we need to add to the address to get the element
7898 address. */
7899 if (CONST_INT_P (element))
7900 element_offset = GEN_INT (INTVAL (element) * scalar_size);
7901 else
7903 int byte_shift = exact_log2 (scalar_size);
7904 gcc_assert (byte_shift >= 0);
7906 if (byte_shift == 0)
7907 element_offset = element;
7909 else
7911 if (TARGET_POWERPC64)
7912 emit_insn (gen_ashldi3 (base_tmp, element, GEN_INT (byte_shift)));
7913 else
7914 emit_insn (gen_ashlsi3 (base_tmp, element, GEN_INT (byte_shift)));
7916 element_offset = base_tmp;
7920 /* Create the new address pointing to the element within the vector. If we
7921 are adding 0, we don't have to change the address. */
7922 if (element_offset == const0_rtx)
7923 new_addr = addr;
7925 /* A simple indirect address can be converted into a reg + offset
7926 address. */
7927 else if (REG_P (addr) || SUBREG_P (addr))
7928 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
7930 /* Optimize D-FORM addresses with constant offset with a constant element, to
7931 include the element offset in the address directly. */
7932 else if (GET_CODE (addr) == PLUS)
7934 rtx op0 = XEXP (addr, 0);
7935 rtx op1 = XEXP (addr, 1);
7936 rtx insn;
7938 gcc_assert (REG_P (op0) || SUBREG_P (op0));
7939 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
7941 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
7942 rtx offset_rtx = GEN_INT (offset);
7944 if (IN_RANGE (offset, -32768, 32767)
7945 && (scalar_size < 8 || (offset & 0x3) == 0))
7946 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
7947 else
7949 emit_move_insn (base_tmp, offset_rtx);
7950 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7953 else
7955 bool op1_reg_p = (REG_P (op1) || SUBREG_P (op1));
7956 bool ele_reg_p = (REG_P (element_offset) || SUBREG_P (element_offset));
7958 /* Note, ADDI requires the register being added to be a base
7959 register. If the register was R0, load it up into the temporary
7960 and do the add. */
7961 if (op1_reg_p
7962 && (ele_reg_p || reg_or_subregno (op1) != FIRST_GPR_REGNO))
7964 insn = gen_add3_insn (base_tmp, op1, element_offset);
7965 gcc_assert (insn != NULL_RTX);
7966 emit_insn (insn);
7969 else if (ele_reg_p
7970 && reg_or_subregno (element_offset) != FIRST_GPR_REGNO)
7972 insn = gen_add3_insn (base_tmp, element_offset, op1);
7973 gcc_assert (insn != NULL_RTX);
7974 emit_insn (insn);
7977 else
7979 emit_move_insn (base_tmp, op1);
7980 emit_insn (gen_add2_insn (base_tmp, element_offset));
7983 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7987 else
7989 emit_move_insn (base_tmp, addr);
7990 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7993 /* If we have a PLUS, we need to see whether the particular register class
7994 allows for D-FORM or X-FORM addressing. */
7995 if (GET_CODE (new_addr) == PLUS)
7997 rtx op1 = XEXP (new_addr, 1);
7998 addr_mask_type addr_mask;
7999 int scalar_regno = regno_or_subregno (scalar_reg);
8001 gcc_assert (scalar_regno < FIRST_PSEUDO_REGISTER);
8002 if (INT_REGNO_P (scalar_regno))
8003 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_GPR];
8005 else if (FP_REGNO_P (scalar_regno))
8006 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_FPR];
8008 else if (ALTIVEC_REGNO_P (scalar_regno))
8009 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_VMX];
8011 else
8012 gcc_unreachable ();
8014 if (REG_P (op1) || SUBREG_P (op1))
8015 valid_addr_p = (addr_mask & RELOAD_REG_INDEXED) != 0;
8016 else
8017 valid_addr_p = (addr_mask & RELOAD_REG_OFFSET) != 0;
8020 else if (REG_P (new_addr) || SUBREG_P (new_addr))
8021 valid_addr_p = true;
8023 else
8024 valid_addr_p = false;
8026 if (!valid_addr_p)
8028 emit_move_insn (base_tmp, new_addr);
8029 new_addr = base_tmp;
8032 return change_address (mem, scalar_mode, new_addr);
8035 /* Split a variable vec_extract operation into the component instructions. */
8037 void
8038 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
8039 rtx tmp_altivec)
8041 machine_mode mode = GET_MODE (src);
8042 machine_mode scalar_mode = GET_MODE (dest);
8043 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
8044 int byte_shift = exact_log2 (scalar_size);
8046 gcc_assert (byte_shift >= 0);
8048 /* If we are given a memory address, optimize to load just the element. We
8049 don't have to adjust the vector element number on little endian
8050 systems. */
8051 if (MEM_P (src))
8053 gcc_assert (REG_P (tmp_gpr));
8054 emit_move_insn (dest, rs6000_adjust_vec_address (dest, src, element,
8055 tmp_gpr, scalar_mode));
8056 return;
8059 else if (REG_P (src) || SUBREG_P (src))
8061 int bit_shift = byte_shift + 3;
8062 rtx element2;
8063 int dest_regno = regno_or_subregno (dest);
8064 int src_regno = regno_or_subregno (src);
8065 int element_regno = regno_or_subregno (element);
8067 gcc_assert (REG_P (tmp_gpr));
8069 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
8070 a general purpose register. */
8071 if (TARGET_P9_VECTOR
8072 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
8073 && INT_REGNO_P (dest_regno)
8074 && ALTIVEC_REGNO_P (src_regno)
8075 && INT_REGNO_P (element_regno))
8077 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
8078 rtx element_si = gen_rtx_REG (SImode, element_regno);
8080 if (mode == V16QImode)
8081 emit_insn (VECTOR_ELT_ORDER_BIG
8082 ? gen_vextublx (dest_si, element_si, src)
8083 : gen_vextubrx (dest_si, element_si, src));
8085 else if (mode == V8HImode)
8087 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
8088 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
8089 emit_insn (VECTOR_ELT_ORDER_BIG
8090 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
8091 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
8095 else
8097 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
8098 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
8099 emit_insn (VECTOR_ELT_ORDER_BIG
8100 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
8101 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
8104 return;
8108 gcc_assert (REG_P (tmp_altivec));
8110 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
8111 an XOR, otherwise we need to subtract. The shift amount is so VSLO
8112 will shift the element into the upper position (adding 3 to convert a
8113 byte shift into a bit shift). */
8114 if (scalar_size == 8)
8116 if (!VECTOR_ELT_ORDER_BIG)
8118 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
8119 element2 = tmp_gpr;
8121 else
8122 element2 = element;
8124 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
8125 bit. */
8126 emit_insn (gen_rtx_SET (tmp_gpr,
8127 gen_rtx_AND (DImode,
8128 gen_rtx_ASHIFT (DImode,
8129 element2,
8130 GEN_INT (6)),
8131 GEN_INT (64))));
8133 else
8135 if (!VECTOR_ELT_ORDER_BIG)
8137 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
8139 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
8140 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
8141 element2 = tmp_gpr;
8143 else
8144 element2 = element;
8146 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
8149 /* Get the value into the lower byte of the Altivec register where VSLO
8150 expects it. */
8151 if (TARGET_P9_VECTOR)
8152 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
8153 else if (can_create_pseudo_p ())
8154 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
8155 else
8157 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
8158 emit_move_insn (tmp_di, tmp_gpr);
8159 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
8162 /* Do the VSLO to get the value into the final location. */
8163 switch (mode)
8165 case E_V2DFmode:
8166 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
8167 return;
8169 case E_V2DImode:
8170 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
8171 return;
8173 case E_V4SFmode:
8175 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
8176 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
8177 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
8178 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
8179 tmp_altivec));
8181 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
8182 return;
8185 case E_V4SImode:
8186 case E_V8HImode:
8187 case E_V16QImode:
8189 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
8190 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
8191 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
8192 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
8193 tmp_altivec));
8194 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
8195 emit_insn (gen_ashrdi3 (tmp_gpr_di, tmp_gpr_di,
8196 GEN_INT (64 - (8 * scalar_size))));
8197 return;
8200 default:
8201 gcc_unreachable ();
8204 return;
8206 else
8207 gcc_unreachable ();
8210 /* Helper function for rs6000_split_v4si_init to build up a DImode value from
8211 two SImode values. */
8213 static void
8214 rs6000_split_v4si_init_di_reg (rtx dest, rtx si1, rtx si2, rtx tmp)
8216 const unsigned HOST_WIDE_INT mask_32bit = HOST_WIDE_INT_C (0xffffffff);
8218 if (CONST_INT_P (si1) && CONST_INT_P (si2))
8220 unsigned HOST_WIDE_INT const1 = (UINTVAL (si1) & mask_32bit) << 32;
8221 unsigned HOST_WIDE_INT const2 = UINTVAL (si2) & mask_32bit;
8223 emit_move_insn (dest, GEN_INT (const1 | const2));
8224 return;
8227 /* Put si1 into upper 32-bits of dest. */
8228 if (CONST_INT_P (si1))
8229 emit_move_insn (dest, GEN_INT ((UINTVAL (si1) & mask_32bit) << 32));
8230 else
8232 /* Generate RLDIC. */
8233 rtx si1_di = gen_rtx_REG (DImode, regno_or_subregno (si1));
8234 rtx shift_rtx = gen_rtx_ASHIFT (DImode, si1_di, GEN_INT (32));
8235 rtx mask_rtx = GEN_INT (mask_32bit << 32);
8236 rtx and_rtx = gen_rtx_AND (DImode, shift_rtx, mask_rtx);
8237 gcc_assert (!reg_overlap_mentioned_p (dest, si1));
8238 emit_insn (gen_rtx_SET (dest, and_rtx));
8241 /* Put si2 into the temporary. */
8242 gcc_assert (!reg_overlap_mentioned_p (dest, tmp));
8243 if (CONST_INT_P (si2))
8244 emit_move_insn (tmp, GEN_INT (UINTVAL (si2) & mask_32bit));
8245 else
8246 emit_insn (gen_zero_extendsidi2 (tmp, si2));
8248 /* Combine the two parts. */
8249 emit_insn (gen_iordi3 (dest, dest, tmp));
8250 return;
8253 /* Split a V4SI initialization. */
8255 void
8256 rs6000_split_v4si_init (rtx operands[])
8258 rtx dest = operands[0];
8260 /* Destination is a GPR, build up the two DImode parts in place. */
8261 if (REG_P (dest) || SUBREG_P (dest))
8263 int d_regno = regno_or_subregno (dest);
8264 rtx scalar1 = operands[1];
8265 rtx scalar2 = operands[2];
8266 rtx scalar3 = operands[3];
8267 rtx scalar4 = operands[4];
8268 rtx tmp1 = operands[5];
8269 rtx tmp2 = operands[6];
8271 /* Even though we only need one temporary (plus the destination, which
8272 has an early clobber constraint, try to use two temporaries, one for
8273 each double word created. That way the 2nd insn scheduling pass can
8274 rearrange things so the two parts are done in parallel. */
8275 if (BYTES_BIG_ENDIAN)
8277 rtx di_lo = gen_rtx_REG (DImode, d_regno);
8278 rtx di_hi = gen_rtx_REG (DImode, d_regno + 1);
8279 rs6000_split_v4si_init_di_reg (di_lo, scalar1, scalar2, tmp1);
8280 rs6000_split_v4si_init_di_reg (di_hi, scalar3, scalar4, tmp2);
8282 else
8284 rtx di_lo = gen_rtx_REG (DImode, d_regno + 1);
8285 rtx di_hi = gen_rtx_REG (DImode, d_regno);
8286 gcc_assert (!VECTOR_ELT_ORDER_BIG);
8287 rs6000_split_v4si_init_di_reg (di_lo, scalar4, scalar3, tmp1);
8288 rs6000_split_v4si_init_di_reg (di_hi, scalar2, scalar1, tmp2);
8290 return;
8293 else
8294 gcc_unreachable ();
8297 /* Return TRUE if OP is an invalid SUBREG operation on the e500. */
8299 bool
8300 invalid_e500_subreg (rtx op, machine_mode mode)
8302 if (TARGET_E500_DOUBLE)
8304 /* Reject (subreg:SI (reg:DF)); likewise with subreg:DI or
8305 subreg:TI and reg:TF. Decimal float modes are like integer
8306 modes (only low part of each register used) for this
8307 purpose. */
8308 if (GET_CODE (op) == SUBREG
8309 && (mode == SImode || mode == DImode || mode == TImode
8310 || mode == DDmode || mode == TDmode || mode == PTImode)
8311 && REG_P (SUBREG_REG (op))
8312 && (GET_MODE (SUBREG_REG (op)) == DFmode
8313 || GET_MODE (SUBREG_REG (op)) == TFmode
8314 || GET_MODE (SUBREG_REG (op)) == IFmode
8315 || GET_MODE (SUBREG_REG (op)) == KFmode))
8316 return true;
8318 /* Reject (subreg:DF (reg:DI)); likewise with subreg:TF and
8319 reg:TI. */
8320 if (GET_CODE (op) == SUBREG
8321 && (mode == DFmode || mode == TFmode || mode == IFmode
8322 || mode == KFmode)
8323 && REG_P (SUBREG_REG (op))
8324 && (GET_MODE (SUBREG_REG (op)) == DImode
8325 || GET_MODE (SUBREG_REG (op)) == TImode
8326 || GET_MODE (SUBREG_REG (op)) == PTImode
8327 || GET_MODE (SUBREG_REG (op)) == DDmode
8328 || GET_MODE (SUBREG_REG (op)) == TDmode))
8329 return true;
8332 if (TARGET_SPE
8333 && GET_CODE (op) == SUBREG
8334 && mode == SImode
8335 && REG_P (SUBREG_REG (op))
8336 && SPE_VECTOR_MODE (GET_MODE (SUBREG_REG (op))))
8337 return true;
8339 return false;
8342 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
8343 selects whether the alignment is abi mandated, optional, or
8344 both abi and optional alignment. */
8346 unsigned int
8347 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
8349 if (how != align_opt)
8351 if (TREE_CODE (type) == VECTOR_TYPE)
8353 if ((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (type)))
8354 || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type))))
8356 if (align < 64)
8357 align = 64;
8359 else if (align < 128)
8360 align = 128;
8362 else if (TARGET_E500_DOUBLE
8363 && TREE_CODE (type) == REAL_TYPE
8364 && TYPE_MODE (type) == DFmode)
8366 if (align < 64)
8367 align = 64;
8371 if (how != align_abi)
8373 if (TREE_CODE (type) == ARRAY_TYPE
8374 && TYPE_MODE (TREE_TYPE (type)) == QImode)
8376 if (align < BITS_PER_WORD)
8377 align = BITS_PER_WORD;
8381 return align;
8384 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
8385 instructions simply ignore the low bits; SPE vector memory
8386 instructions trap on unaligned accesses; VSX memory instructions are
8387 aligned to 4 or 8 bytes. */
8389 static bool
8390 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
8392 return (STRICT_ALIGNMENT
8393 || (!TARGET_EFFICIENT_UNALIGNED_VSX
8394 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
8395 || ((VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode))
8396 && (int) align < VECTOR_ALIGN (mode)))));
8399 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
8401 bool
8402 rs6000_special_adjust_field_align_p (tree type, unsigned int computed)
8404 if (TARGET_ALTIVEC && TREE_CODE (type) == VECTOR_TYPE)
8406 if (computed != 128)
8408 static bool warned;
8409 if (!warned && warn_psabi)
8411 warned = true;
8412 inform (input_location,
8413 "the layout of aggregates containing vectors with"
8414 " %d-byte alignment has changed in GCC 5",
8415 computed / BITS_PER_UNIT);
8418 /* In current GCC there is no special case. */
8419 return false;
8422 return false;
8425 /* AIX increases natural record alignment to doubleword if the first
8426 field is an FP double while the FP fields remain word aligned. */
8428 unsigned int
8429 rs6000_special_round_type_align (tree type, unsigned int computed,
8430 unsigned int specified)
8432 unsigned int align = MAX (computed, specified);
8433 tree field = TYPE_FIELDS (type);
8435 /* Skip all non field decls */
8436 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
8437 field = DECL_CHAIN (field);
8439 if (field != NULL && field != type)
8441 type = TREE_TYPE (field);
8442 while (TREE_CODE (type) == ARRAY_TYPE)
8443 type = TREE_TYPE (type);
8445 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
8446 align = MAX (align, 64);
8449 return align;
8452 /* Darwin increases record alignment to the natural alignment of
8453 the first field. */
8455 unsigned int
8456 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
8457 unsigned int specified)
8459 unsigned int align = MAX (computed, specified);
8461 if (TYPE_PACKED (type))
8462 return align;
8464 /* Find the first field, looking down into aggregates. */
8465 do {
8466 tree field = TYPE_FIELDS (type);
8467 /* Skip all non field decls */
8468 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
8469 field = DECL_CHAIN (field);
8470 if (! field)
8471 break;
8472 /* A packed field does not contribute any extra alignment. */
8473 if (DECL_PACKED (field))
8474 return align;
8475 type = TREE_TYPE (field);
8476 while (TREE_CODE (type) == ARRAY_TYPE)
8477 type = TREE_TYPE (type);
8478 } while (AGGREGATE_TYPE_P (type));
8480 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
8481 align = MAX (align, TYPE_ALIGN (type));
8483 return align;
8486 /* Return 1 for an operand in small memory on V.4/eabi. */
8489 small_data_operand (rtx op ATTRIBUTE_UNUSED,
8490 machine_mode mode ATTRIBUTE_UNUSED)
8492 #if TARGET_ELF
8493 rtx sym_ref;
8495 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
8496 return 0;
8498 if (DEFAULT_ABI != ABI_V4)
8499 return 0;
8501 /* Vector and float memory instructions have a limited offset on the
8502 SPE, so using a vector or float variable directly as an operand is
8503 not useful. */
8504 if (TARGET_SPE
8505 && (SPE_VECTOR_MODE (mode) || FLOAT_MODE_P (mode)))
8506 return 0;
8508 if (GET_CODE (op) == SYMBOL_REF)
8509 sym_ref = op;
8511 else if (GET_CODE (op) != CONST
8512 || GET_CODE (XEXP (op, 0)) != PLUS
8513 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF
8514 || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT)
8515 return 0;
8517 else
8519 rtx sum = XEXP (op, 0);
8520 HOST_WIDE_INT summand;
8522 /* We have to be careful here, because it is the referenced address
8523 that must be 32k from _SDA_BASE_, not just the symbol. */
8524 summand = INTVAL (XEXP (sum, 1));
8525 if (summand < 0 || summand > g_switch_value)
8526 return 0;
8528 sym_ref = XEXP (sum, 0);
8531 return SYMBOL_REF_SMALL_P (sym_ref);
8532 #else
8533 return 0;
8534 #endif
8537 /* Return true if either operand is a general purpose register. */
8539 bool
8540 gpr_or_gpr_p (rtx op0, rtx op1)
8542 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
8543 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
8546 /* Return true if this is a move direct operation between GPR registers and
8547 floating point/VSX registers. */
8549 bool
8550 direct_move_p (rtx op0, rtx op1)
8552 int regno0, regno1;
8554 if (!REG_P (op0) || !REG_P (op1))
8555 return false;
8557 if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
8558 return false;
8560 regno0 = REGNO (op0);
8561 regno1 = REGNO (op1);
8562 if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
8563 return false;
8565 if (INT_REGNO_P (regno0))
8566 return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
8568 else if (INT_REGNO_P (regno1))
8570 if (TARGET_MFPGPR && FP_REGNO_P (regno0))
8571 return true;
8573 else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
8574 return true;
8577 return false;
8580 /* Return true if the OFFSET is valid for the quad address instructions that
8581 use d-form (register + offset) addressing. */
8583 static inline bool
8584 quad_address_offset_p (HOST_WIDE_INT offset)
8586 return (IN_RANGE (offset, -32768, 32767) && ((offset) & 0xf) == 0);
8589 /* Return true if the ADDR is an acceptable address for a quad memory
8590 operation of mode MODE (either LQ/STQ for general purpose registers, or
8591 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
8592 is intended for LQ/STQ. If it is false, the address is intended for the ISA
8593 3.0 LXV/STXV instruction. */
8595 bool
8596 quad_address_p (rtx addr, machine_mode mode, bool strict)
8598 rtx op0, op1;
8600 if (GET_MODE_SIZE (mode) != 16)
8601 return false;
8603 if (legitimate_indirect_address_p (addr, strict))
8604 return true;
8606 if (VECTOR_MODE_P (mode) && !mode_supports_vsx_dform_quad (mode))
8607 return false;
8609 if (GET_CODE (addr) != PLUS)
8610 return false;
8612 op0 = XEXP (addr, 0);
8613 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
8614 return false;
8616 op1 = XEXP (addr, 1);
8617 if (!CONST_INT_P (op1))
8618 return false;
8620 return quad_address_offset_p (INTVAL (op1));
8623 /* Return true if this is a load or store quad operation. This function does
8624 not handle the atomic quad memory instructions. */
8626 bool
8627 quad_load_store_p (rtx op0, rtx op1)
8629 bool ret;
8631 if (!TARGET_QUAD_MEMORY)
8632 ret = false;
8634 else if (REG_P (op0) && MEM_P (op1))
8635 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
8636 && quad_memory_operand (op1, GET_MODE (op1))
8637 && !reg_overlap_mentioned_p (op0, op1));
8639 else if (MEM_P (op0) && REG_P (op1))
8640 ret = (quad_memory_operand (op0, GET_MODE (op0))
8641 && quad_int_reg_operand (op1, GET_MODE (op1)));
8643 else
8644 ret = false;
8646 if (TARGET_DEBUG_ADDR)
8648 fprintf (stderr, "\n========== quad_load_store, return %s\n",
8649 ret ? "true" : "false");
8650 debug_rtx (gen_rtx_SET (op0, op1));
8653 return ret;
8656 /* Given an address, return a constant offset term if one exists. */
8658 static rtx
8659 address_offset (rtx op)
8661 if (GET_CODE (op) == PRE_INC
8662 || GET_CODE (op) == PRE_DEC)
8663 op = XEXP (op, 0);
8664 else if (GET_CODE (op) == PRE_MODIFY
8665 || GET_CODE (op) == LO_SUM)
8666 op = XEXP (op, 1);
8668 if (GET_CODE (op) == CONST)
8669 op = XEXP (op, 0);
8671 if (GET_CODE (op) == PLUS)
8672 op = XEXP (op, 1);
8674 if (CONST_INT_P (op))
8675 return op;
8677 return NULL_RTX;
8680 /* Return true if the MEM operand is a memory operand suitable for use
8681 with a (full width, possibly multiple) gpr load/store. On
8682 powerpc64 this means the offset must be divisible by 4.
8683 Implements 'Y' constraint.
8685 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8686 a constraint function we know the operand has satisfied a suitable
8687 memory predicate. Also accept some odd rtl generated by reload
8688 (see rs6000_legitimize_reload_address for various forms). It is
8689 important that reload rtl be accepted by appropriate constraints
8690 but not by the operand predicate.
8692 Offsetting a lo_sum should not be allowed, except where we know by
8693 alignment that a 32k boundary is not crossed, but see the ???
8694 comment in rs6000_legitimize_reload_address. Note that by
8695 "offsetting" here we mean a further offset to access parts of the
8696 MEM. It's fine to have a lo_sum where the inner address is offset
8697 from a sym, since the same sym+offset will appear in the high part
8698 of the address calculation. */
8700 bool
8701 mem_operand_gpr (rtx op, machine_mode mode)
8703 unsigned HOST_WIDE_INT offset;
8704 int extra;
8705 rtx addr = XEXP (op, 0);
8707 op = address_offset (addr);
8708 if (op == NULL_RTX)
8709 return true;
8711 offset = INTVAL (op);
8712 if (TARGET_POWERPC64 && (offset & 3) != 0)
8713 return false;
8715 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8716 if (extra < 0)
8717 extra = 0;
8719 if (GET_CODE (addr) == LO_SUM)
8720 /* For lo_sum addresses, we must allow any offset except one that
8721 causes a wrap, so test only the low 16 bits. */
8722 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8724 return offset + 0x8000 < 0x10000u - extra;
8727 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8728 enforce an offset divisible by 4 even for 32-bit. */
8730 bool
8731 mem_operand_ds_form (rtx op, machine_mode mode)
8733 unsigned HOST_WIDE_INT offset;
8734 int extra;
8735 rtx addr = XEXP (op, 0);
8737 if (!offsettable_address_p (false, mode, addr))
8738 return false;
8740 op = address_offset (addr);
8741 if (op == NULL_RTX)
8742 return true;
8744 offset = INTVAL (op);
8745 if ((offset & 3) != 0)
8746 return false;
8748 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8749 if (extra < 0)
8750 extra = 0;
8752 if (GET_CODE (addr) == LO_SUM)
8753 /* For lo_sum addresses, we must allow any offset except one that
8754 causes a wrap, so test only the low 16 bits. */
8755 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8757 return offset + 0x8000 < 0x10000u - extra;
8760 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8762 static bool
8763 reg_offset_addressing_ok_p (machine_mode mode)
8765 switch (mode)
8767 case E_V16QImode:
8768 case E_V8HImode:
8769 case E_V4SFmode:
8770 case E_V4SImode:
8771 case E_V2DFmode:
8772 case E_V2DImode:
8773 case E_V1TImode:
8774 case E_TImode:
8775 case E_TFmode:
8776 case E_KFmode:
8777 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8778 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8779 a vector mode, if we want to use the VSX registers to move it around,
8780 we need to restrict ourselves to reg+reg addressing. Similarly for
8781 IEEE 128-bit floating point that is passed in a single vector
8782 register. */
8783 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
8784 return mode_supports_vsx_dform_quad (mode);
8785 break;
8787 case E_V4HImode:
8788 case E_V2SImode:
8789 case E_V1DImode:
8790 case E_V2SFmode:
8791 /* Paired vector modes. Only reg+reg addressing is valid. */
8792 if (TARGET_PAIRED_FLOAT)
8793 return false;
8794 break;
8796 case E_SDmode:
8797 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8798 addressing for the LFIWZX and STFIWX instructions. */
8799 if (TARGET_NO_SDMODE_STACK)
8800 return false;
8801 break;
8803 default:
8804 break;
8807 return true;
8810 static bool
8811 virtual_stack_registers_memory_p (rtx op)
8813 int regnum;
8815 if (GET_CODE (op) == REG)
8816 regnum = REGNO (op);
8818 else if (GET_CODE (op) == PLUS
8819 && GET_CODE (XEXP (op, 0)) == REG
8820 && GET_CODE (XEXP (op, 1)) == CONST_INT)
8821 regnum = REGNO (XEXP (op, 0));
8823 else
8824 return false;
8826 return (regnum >= FIRST_VIRTUAL_REGISTER
8827 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
8830 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8831 is known to not straddle a 32k boundary. This function is used
8832 to determine whether -mcmodel=medium code can use TOC pointer
8833 relative addressing for OP. This means the alignment of the TOC
8834 pointer must also be taken into account, and unfortunately that is
8835 only 8 bytes. */
8837 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8838 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8839 #endif
8841 static bool
8842 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
8843 machine_mode mode)
8845 tree decl;
8846 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
8848 if (GET_CODE (op) != SYMBOL_REF)
8849 return false;
8851 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8852 SYMBOL_REF. */
8853 if (mode_supports_vsx_dform_quad (mode))
8854 return false;
8856 dsize = GET_MODE_SIZE (mode);
8857 decl = SYMBOL_REF_DECL (op);
8858 if (!decl)
8860 if (dsize == 0)
8861 return false;
8863 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8864 replacing memory addresses with an anchor plus offset. We
8865 could find the decl by rummaging around in the block->objects
8866 VEC for the given offset but that seems like too much work. */
8867 dalign = BITS_PER_UNIT;
8868 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
8869 && SYMBOL_REF_ANCHOR_P (op)
8870 && SYMBOL_REF_BLOCK (op) != NULL)
8872 struct object_block *block = SYMBOL_REF_BLOCK (op);
8874 dalign = block->alignment;
8875 offset += SYMBOL_REF_BLOCK_OFFSET (op);
8877 else if (CONSTANT_POOL_ADDRESS_P (op))
8879 /* It would be nice to have get_pool_align().. */
8880 machine_mode cmode = get_pool_mode (op);
8882 dalign = GET_MODE_ALIGNMENT (cmode);
8885 else if (DECL_P (decl))
8887 dalign = DECL_ALIGN (decl);
8889 if (dsize == 0)
8891 /* Allow BLKmode when the entire object is known to not
8892 cross a 32k boundary. */
8893 if (!DECL_SIZE_UNIT (decl))
8894 return false;
8896 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
8897 return false;
8899 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
8900 if (dsize > 32768)
8901 return false;
8903 dalign /= BITS_PER_UNIT;
8904 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8905 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8906 return dalign >= dsize;
8909 else
8910 gcc_unreachable ();
8912 /* Find how many bits of the alignment we know for this access. */
8913 dalign /= BITS_PER_UNIT;
8914 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8915 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8916 mask = dalign - 1;
8917 lsb = offset & -offset;
8918 mask &= lsb - 1;
8919 dalign = mask + 1;
8921 return dalign >= dsize;
8924 static bool
8925 constant_pool_expr_p (rtx op)
8927 rtx base, offset;
8929 split_const (op, &base, &offset);
8930 return (GET_CODE (base) == SYMBOL_REF
8931 && CONSTANT_POOL_ADDRESS_P (base)
8932 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
8935 static const_rtx tocrel_base, tocrel_offset;
8937 /* Return true if OP is a toc pointer relative address (the output
8938 of create_TOC_reference). If STRICT, do not match non-split
8939 -mcmodel=large/medium toc pointer relative addresses. */
8941 bool
8942 toc_relative_expr_p (const_rtx op, bool strict)
8944 if (!TARGET_TOC)
8945 return false;
8947 if (TARGET_CMODEL != CMODEL_SMALL)
8949 /* When strict ensure we have everything tidy. */
8950 if (strict
8951 && !(GET_CODE (op) == LO_SUM
8952 && REG_P (XEXP (op, 0))
8953 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
8954 return false;
8956 /* When not strict, allow non-split TOC addresses and also allow
8957 (lo_sum (high ..)) TOC addresses created during reload. */
8958 if (GET_CODE (op) == LO_SUM)
8959 op = XEXP (op, 1);
8962 tocrel_base = op;
8963 tocrel_offset = const0_rtx;
8964 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
8966 tocrel_base = XEXP (op, 0);
8967 tocrel_offset = XEXP (op, 1);
8970 return (GET_CODE (tocrel_base) == UNSPEC
8971 && XINT (tocrel_base, 1) == UNSPEC_TOCREL);
8974 /* Return true if X is a constant pool address, and also for cmodel=medium
8975 if X is a toc-relative address known to be offsettable within MODE. */
8977 bool
8978 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
8979 bool strict)
8981 return (toc_relative_expr_p (x, strict)
8982 && (TARGET_CMODEL != CMODEL_MEDIUM
8983 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
8984 || mode == QImode
8985 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
8986 INTVAL (tocrel_offset), mode)));
8989 static bool
8990 legitimate_small_data_p (machine_mode mode, rtx x)
8992 return (DEFAULT_ABI == ABI_V4
8993 && !flag_pic && !TARGET_TOC
8994 && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
8995 && small_data_operand (x, mode));
8998 /* SPE offset addressing is limited to 5-bits worth of double words. */
8999 #define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0)
9001 bool
9002 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
9003 bool strict, bool worst_case)
9005 unsigned HOST_WIDE_INT offset;
9006 unsigned int extra;
9008 if (GET_CODE (x) != PLUS)
9009 return false;
9010 if (!REG_P (XEXP (x, 0)))
9011 return false;
9012 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
9013 return false;
9014 if (mode_supports_vsx_dform_quad (mode))
9015 return quad_address_p (x, mode, strict);
9016 if (!reg_offset_addressing_ok_p (mode))
9017 return virtual_stack_registers_memory_p (x);
9018 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
9019 return true;
9020 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
9021 return false;
9023 offset = INTVAL (XEXP (x, 1));
9024 extra = 0;
9025 switch (mode)
9027 case E_V4HImode:
9028 case E_V2SImode:
9029 case E_V1DImode:
9030 case E_V2SFmode:
9031 /* SPE vector modes. */
9032 return SPE_CONST_OFFSET_OK (offset);
9034 case E_DFmode:
9035 case E_DDmode:
9036 case E_DImode:
9037 /* On e500v2, we may have:
9039 (subreg:DF (mem:DI (plus (reg) (const_int))) 0).
9041 Which gets addressed with evldd instructions. */
9042 if (TARGET_E500_DOUBLE)
9043 return SPE_CONST_OFFSET_OK (offset);
9045 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
9046 addressing. */
9047 if (VECTOR_MEM_VSX_P (mode))
9048 return false;
9050 if (!worst_case)
9051 break;
9052 if (!TARGET_POWERPC64)
9053 extra = 4;
9054 else if (offset & 3)
9055 return false;
9056 break;
9058 case E_TFmode:
9059 case E_IFmode:
9060 case E_KFmode:
9061 case E_TDmode:
9062 case E_TImode:
9063 case E_PTImode:
9064 if (TARGET_E500_DOUBLE)
9065 return (SPE_CONST_OFFSET_OK (offset)
9066 && SPE_CONST_OFFSET_OK (offset + 8));
9068 extra = 8;
9069 if (!worst_case)
9070 break;
9071 if (!TARGET_POWERPC64)
9072 extra = 12;
9073 else if (offset & 3)
9074 return false;
9075 break;
9077 default:
9078 break;
9081 offset += 0x8000;
9082 return offset < 0x10000 - extra;
9085 bool
9086 legitimate_indexed_address_p (rtx x, int strict)
9088 rtx op0, op1;
9090 if (GET_CODE (x) != PLUS)
9091 return false;
9093 op0 = XEXP (x, 0);
9094 op1 = XEXP (x, 1);
9096 /* Recognize the rtl generated by reload which we know will later be
9097 replaced with proper base and index regs. */
9098 if (!strict
9099 && reload_in_progress
9100 && (REG_P (op0) || GET_CODE (op0) == PLUS)
9101 && REG_P (op1))
9102 return true;
9104 return (REG_P (op0) && REG_P (op1)
9105 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
9106 && INT_REG_OK_FOR_INDEX_P (op1, strict))
9107 || (INT_REG_OK_FOR_BASE_P (op1, strict)
9108 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
9111 bool
9112 avoiding_indexed_address_p (machine_mode mode)
9114 /* Avoid indexed addressing for modes that have non-indexed
9115 load/store instruction forms. */
9116 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
9119 bool
9120 legitimate_indirect_address_p (rtx x, int strict)
9122 return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict);
9125 bool
9126 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
9128 if (!TARGET_MACHO || !flag_pic
9129 || mode != SImode || GET_CODE (x) != MEM)
9130 return false;
9131 x = XEXP (x, 0);
9133 if (GET_CODE (x) != LO_SUM)
9134 return false;
9135 if (GET_CODE (XEXP (x, 0)) != REG)
9136 return false;
9137 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
9138 return false;
9139 x = XEXP (x, 1);
9141 return CONSTANT_P (x);
9144 static bool
9145 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
9147 if (GET_CODE (x) != LO_SUM)
9148 return false;
9149 if (GET_CODE (XEXP (x, 0)) != REG)
9150 return false;
9151 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
9152 return false;
9153 /* quad word addresses are restricted, and we can't use LO_SUM. */
9154 if (mode_supports_vsx_dform_quad (mode))
9155 return false;
9156 /* Restrict addressing for DI because of our SUBREG hackery. */
9157 if (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
9158 return false;
9159 x = XEXP (x, 1);
9161 if (TARGET_ELF || TARGET_MACHO)
9163 bool large_toc_ok;
9165 if (DEFAULT_ABI == ABI_V4 && flag_pic)
9166 return false;
9167 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
9168 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
9169 recognizes some LO_SUM addresses as valid although this
9170 function says opposite. In most cases, LRA through different
9171 transformations can generate correct code for address reloads.
9172 It can not manage only some LO_SUM cases. So we need to add
9173 code analogous to one in rs6000_legitimize_reload_address for
9174 LOW_SUM here saying that some addresses are still valid. */
9175 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
9176 && small_toc_ref (x, VOIDmode));
9177 if (TARGET_TOC && ! large_toc_ok)
9178 return false;
9179 if (GET_MODE_NUNITS (mode) != 1)
9180 return false;
9181 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
9182 && !(/* ??? Assume floating point reg based on mode? */
9183 TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
9184 && (mode == DFmode || mode == DDmode)))
9185 return false;
9187 return CONSTANT_P (x) || large_toc_ok;
9190 return false;
9194 /* Try machine-dependent ways of modifying an illegitimate address
9195 to be legitimate. If we find one, return the new, valid address.
9196 This is used from only one place: `memory_address' in explow.c.
9198 OLDX is the address as it was before break_out_memory_refs was
9199 called. In some cases it is useful to look at this to decide what
9200 needs to be done.
9202 It is always safe for this function to do nothing. It exists to
9203 recognize opportunities to optimize the output.
9205 On RS/6000, first check for the sum of a register with a constant
9206 integer that is out of range. If so, generate code to add the
9207 constant with the low-order 16 bits masked to the register and force
9208 this result into another register (this can be done with `cau').
9209 Then generate an address of REG+(CONST&0xffff), allowing for the
9210 possibility of bit 16 being a one.
9212 Then check for the sum of a register and something not constant, try to
9213 load the other things into a register and return the sum. */
9215 static rtx
9216 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
9217 machine_mode mode)
9219 unsigned int extra;
9221 if (!reg_offset_addressing_ok_p (mode)
9222 || mode_supports_vsx_dform_quad (mode))
9224 if (virtual_stack_registers_memory_p (x))
9225 return x;
9227 /* In theory we should not be seeing addresses of the form reg+0,
9228 but just in case it is generated, optimize it away. */
9229 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
9230 return force_reg (Pmode, XEXP (x, 0));
9232 /* For TImode with load/store quad, restrict addresses to just a single
9233 pointer, so it works with both GPRs and VSX registers. */
9234 /* Make sure both operands are registers. */
9235 else if (GET_CODE (x) == PLUS
9236 && (mode != TImode || !TARGET_VSX_TIMODE))
9237 return gen_rtx_PLUS (Pmode,
9238 force_reg (Pmode, XEXP (x, 0)),
9239 force_reg (Pmode, XEXP (x, 1)));
9240 else
9241 return force_reg (Pmode, x);
9243 if (GET_CODE (x) == SYMBOL_REF)
9245 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
9246 if (model != 0)
9247 return rs6000_legitimize_tls_address (x, model);
9250 extra = 0;
9251 switch (mode)
9253 case E_TFmode:
9254 case E_TDmode:
9255 case E_TImode:
9256 case E_PTImode:
9257 case E_IFmode:
9258 case E_KFmode:
9259 /* As in legitimate_offset_address_p we do not assume
9260 worst-case. The mode here is just a hint as to the registers
9261 used. A TImode is usually in gprs, but may actually be in
9262 fprs. Leave worst-case scenario for reload to handle via
9263 insn constraints. PTImode is only GPRs. */
9264 extra = 8;
9265 break;
9266 default:
9267 break;
9270 if (GET_CODE (x) == PLUS
9271 && GET_CODE (XEXP (x, 0)) == REG
9272 && GET_CODE (XEXP (x, 1)) == CONST_INT
9273 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
9274 >= 0x10000 - extra)
9275 && !(SPE_VECTOR_MODE (mode)
9276 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)))
9278 HOST_WIDE_INT high_int, low_int;
9279 rtx sum;
9280 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
9281 if (low_int >= 0x8000 - extra)
9282 low_int = 0;
9283 high_int = INTVAL (XEXP (x, 1)) - low_int;
9284 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
9285 GEN_INT (high_int)), 0);
9286 return plus_constant (Pmode, sum, low_int);
9288 else if (GET_CODE (x) == PLUS
9289 && GET_CODE (XEXP (x, 0)) == REG
9290 && GET_CODE (XEXP (x, 1)) != CONST_INT
9291 && GET_MODE_NUNITS (mode) == 1
9292 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
9293 || (/* ??? Assume floating point reg based on mode? */
9294 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
9295 && (mode == DFmode || mode == DDmode)))
9296 && !avoiding_indexed_address_p (mode))
9298 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
9299 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
9301 else if (SPE_VECTOR_MODE (mode)
9302 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD))
9304 if (mode == DImode)
9305 return x;
9306 /* We accept [reg + reg] and [reg + OFFSET]. */
9308 if (GET_CODE (x) == PLUS)
9310 rtx op1 = XEXP (x, 0);
9311 rtx op2 = XEXP (x, 1);
9312 rtx y;
9314 op1 = force_reg (Pmode, op1);
9316 if (GET_CODE (op2) != REG
9317 && (GET_CODE (op2) != CONST_INT
9318 || !SPE_CONST_OFFSET_OK (INTVAL (op2))
9319 || (GET_MODE_SIZE (mode) > 8
9320 && !SPE_CONST_OFFSET_OK (INTVAL (op2) + 8))))
9321 op2 = force_reg (Pmode, op2);
9323 /* We can't always do [reg + reg] for these, because [reg +
9324 reg + offset] is not a legitimate addressing mode. */
9325 y = gen_rtx_PLUS (Pmode, op1, op2);
9327 if ((GET_MODE_SIZE (mode) > 8 || mode == DDmode) && REG_P (op2))
9328 return force_reg (Pmode, y);
9329 else
9330 return y;
9333 return force_reg (Pmode, x);
9335 else if ((TARGET_ELF
9336 #if TARGET_MACHO
9337 || !MACHO_DYNAMIC_NO_PIC_P
9338 #endif
9340 && TARGET_32BIT
9341 && TARGET_NO_TOC
9342 && ! flag_pic
9343 && GET_CODE (x) != CONST_INT
9344 && GET_CODE (x) != CONST_WIDE_INT
9345 && GET_CODE (x) != CONST_DOUBLE
9346 && CONSTANT_P (x)
9347 && GET_MODE_NUNITS (mode) == 1
9348 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
9349 || (/* ??? Assume floating point reg based on mode? */
9350 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
9351 && (mode == DFmode || mode == DDmode))))
9353 rtx reg = gen_reg_rtx (Pmode);
9354 if (TARGET_ELF)
9355 emit_insn (gen_elf_high (reg, x));
9356 else
9357 emit_insn (gen_macho_high (reg, x));
9358 return gen_rtx_LO_SUM (Pmode, reg, x);
9360 else if (TARGET_TOC
9361 && GET_CODE (x) == SYMBOL_REF
9362 && constant_pool_expr_p (x)
9363 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
9364 return create_TOC_reference (x, NULL_RTX);
9365 else
9366 return x;
9369 /* Debug version of rs6000_legitimize_address. */
9370 static rtx
9371 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
9373 rtx ret;
9374 rtx_insn *insns;
9376 start_sequence ();
9377 ret = rs6000_legitimize_address (x, oldx, mode);
9378 insns = get_insns ();
9379 end_sequence ();
9381 if (ret != x)
9383 fprintf (stderr,
9384 "\nrs6000_legitimize_address: mode %s, old code %s, "
9385 "new code %s, modified\n",
9386 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
9387 GET_RTX_NAME (GET_CODE (ret)));
9389 fprintf (stderr, "Original address:\n");
9390 debug_rtx (x);
9392 fprintf (stderr, "oldx:\n");
9393 debug_rtx (oldx);
9395 fprintf (stderr, "New address:\n");
9396 debug_rtx (ret);
9398 if (insns)
9400 fprintf (stderr, "Insns added:\n");
9401 debug_rtx_list (insns, 20);
9404 else
9406 fprintf (stderr,
9407 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
9408 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
9410 debug_rtx (x);
9413 if (insns)
9414 emit_insn (insns);
9416 return ret;
9419 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9420 We need to emit DTP-relative relocations. */
9422 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
9423 static void
9424 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
9426 switch (size)
9428 case 4:
9429 fputs ("\t.long\t", file);
9430 break;
9431 case 8:
9432 fputs (DOUBLE_INT_ASM_OP, file);
9433 break;
9434 default:
9435 gcc_unreachable ();
9437 output_addr_const (file, x);
9438 if (TARGET_ELF)
9439 fputs ("@dtprel+0x8000", file);
9440 else if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF)
9442 switch (SYMBOL_REF_TLS_MODEL (x))
9444 case 0:
9445 break;
9446 case TLS_MODEL_LOCAL_EXEC:
9447 fputs ("@le", file);
9448 break;
9449 case TLS_MODEL_INITIAL_EXEC:
9450 fputs ("@ie", file);
9451 break;
9452 case TLS_MODEL_GLOBAL_DYNAMIC:
9453 case TLS_MODEL_LOCAL_DYNAMIC:
9454 fputs ("@m", file);
9455 break;
9456 default:
9457 gcc_unreachable ();
9462 /* Return true if X is a symbol that refers to real (rather than emulated)
9463 TLS. */
9465 static bool
9466 rs6000_real_tls_symbol_ref_p (rtx x)
9468 return (GET_CODE (x) == SYMBOL_REF
9469 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
9472 /* In the name of slightly smaller debug output, and to cater to
9473 general assembler lossage, recognize various UNSPEC sequences
9474 and turn them back into a direct symbol reference. */
9476 static rtx
9477 rs6000_delegitimize_address (rtx orig_x)
9479 rtx x, y, offset;
9481 orig_x = delegitimize_mem_from_attrs (orig_x);
9482 x = orig_x;
9483 if (MEM_P (x))
9484 x = XEXP (x, 0);
9486 y = x;
9487 if (TARGET_CMODEL != CMODEL_SMALL
9488 && GET_CODE (y) == LO_SUM)
9489 y = XEXP (y, 1);
9491 offset = NULL_RTX;
9492 if (GET_CODE (y) == PLUS
9493 && GET_MODE (y) == Pmode
9494 && CONST_INT_P (XEXP (y, 1)))
9496 offset = XEXP (y, 1);
9497 y = XEXP (y, 0);
9500 if (GET_CODE (y) == UNSPEC
9501 && XINT (y, 1) == UNSPEC_TOCREL)
9503 y = XVECEXP (y, 0, 0);
9505 #ifdef HAVE_AS_TLS
9506 /* Do not associate thread-local symbols with the original
9507 constant pool symbol. */
9508 if (TARGET_XCOFF
9509 && GET_CODE (y) == SYMBOL_REF
9510 && CONSTANT_POOL_ADDRESS_P (y)
9511 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
9512 return orig_x;
9513 #endif
9515 if (offset != NULL_RTX)
9516 y = gen_rtx_PLUS (Pmode, y, offset);
9517 if (!MEM_P (orig_x))
9518 return y;
9519 else
9520 return replace_equiv_address_nv (orig_x, y);
9523 if (TARGET_MACHO
9524 && GET_CODE (orig_x) == LO_SUM
9525 && GET_CODE (XEXP (orig_x, 1)) == CONST)
9527 y = XEXP (XEXP (orig_x, 1), 0);
9528 if (GET_CODE (y) == UNSPEC
9529 && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
9530 return XVECEXP (y, 0, 0);
9533 return orig_x;
9536 /* Return true if X shouldn't be emitted into the debug info.
9537 The linker doesn't like .toc section references from
9538 .debug_* sections, so reject .toc section symbols. */
9540 static bool
9541 rs6000_const_not_ok_for_debug_p (rtx x)
9543 if (GET_CODE (x) == SYMBOL_REF
9544 && CONSTANT_POOL_ADDRESS_P (x))
9546 rtx c = get_pool_constant (x);
9547 machine_mode cmode = get_pool_mode (x);
9548 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
9549 return true;
9552 return false;
9556 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
9558 static bool
9559 rs6000_legitimate_combined_insn (rtx_insn *insn)
9561 int icode = INSN_CODE (insn);
9563 /* Reject creating doloop insns. Combine should not be allowed
9564 to create these for a number of reasons:
9565 1) In a nested loop, if combine creates one of these in an
9566 outer loop and the register allocator happens to allocate ctr
9567 to the outer loop insn, then the inner loop can't use ctr.
9568 Inner loops ought to be more highly optimized.
9569 2) Combine often wants to create one of these from what was
9570 originally a three insn sequence, first combining the three
9571 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
9572 allocated ctr, the splitter takes use back to the three insn
9573 sequence. It's better to stop combine at the two insn
9574 sequence.
9575 3) Faced with not being able to allocate ctr for ctrsi/crtdi
9576 insns, the register allocator sometimes uses floating point
9577 or vector registers for the pseudo. Since ctrsi/ctrdi is a
9578 jump insn and output reloads are not implemented for jumps,
9579 the ctrsi/ctrdi splitters need to handle all possible cases.
9580 That's a pain, and it gets to be seriously difficult when a
9581 splitter that runs after reload needs memory to transfer from
9582 a gpr to fpr. See PR70098 and PR71763 which are not fixed
9583 for the difficult case. It's better to not create problems
9584 in the first place. */
9585 if (icode != CODE_FOR_nothing
9586 && (icode == CODE_FOR_ctrsi_internal1
9587 || icode == CODE_FOR_ctrdi_internal1
9588 || icode == CODE_FOR_ctrsi_internal2
9589 || icode == CODE_FOR_ctrdi_internal2
9590 || icode == CODE_FOR_ctrsi_internal3
9591 || icode == CODE_FOR_ctrdi_internal3
9592 || icode == CODE_FOR_ctrsi_internal4
9593 || icode == CODE_FOR_ctrdi_internal4))
9594 return false;
9596 return true;
9599 /* Construct the SYMBOL_REF for the tls_get_addr function. */
9601 static GTY(()) rtx rs6000_tls_symbol;
9602 static rtx
9603 rs6000_tls_get_addr (void)
9605 if (!rs6000_tls_symbol)
9606 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
9608 return rs6000_tls_symbol;
9611 /* Construct the SYMBOL_REF for TLS GOT references. */
9613 static GTY(()) rtx rs6000_got_symbol;
9614 static rtx
9615 rs6000_got_sym (void)
9617 if (!rs6000_got_symbol)
9619 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
9620 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
9621 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
9624 return rs6000_got_symbol;
9627 /* AIX Thread-Local Address support. */
9629 static rtx
9630 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
9632 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
9633 const char *name;
9634 char *tlsname;
9636 name = XSTR (addr, 0);
9637 /* Append TLS CSECT qualifier, unless the symbol already is qualified
9638 or the symbol will be in TLS private data section. */
9639 if (name[strlen (name) - 1] != ']'
9640 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
9641 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
9643 tlsname = XALLOCAVEC (char, strlen (name) + 4);
9644 strcpy (tlsname, name);
9645 strcat (tlsname,
9646 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
9647 tlsaddr = copy_rtx (addr);
9648 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
9650 else
9651 tlsaddr = addr;
9653 /* Place addr into TOC constant pool. */
9654 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
9656 /* Output the TOC entry and create the MEM referencing the value. */
9657 if (constant_pool_expr_p (XEXP (sym, 0))
9658 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
9660 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
9661 mem = gen_const_mem (Pmode, tocref);
9662 set_mem_alias_set (mem, get_TOC_alias_set ());
9664 else
9665 return sym;
9667 /* Use global-dynamic for local-dynamic. */
9668 if (model == TLS_MODEL_GLOBAL_DYNAMIC
9669 || model == TLS_MODEL_LOCAL_DYNAMIC)
9671 /* Create new TOC reference for @m symbol. */
9672 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
9673 tlsname = XALLOCAVEC (char, strlen (name) + 1);
9674 strcpy (tlsname, "*LCM");
9675 strcat (tlsname, name + 3);
9676 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
9677 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
9678 tocref = create_TOC_reference (modaddr, NULL_RTX);
9679 rtx modmem = gen_const_mem (Pmode, tocref);
9680 set_mem_alias_set (modmem, get_TOC_alias_set ());
9682 rtx modreg = gen_reg_rtx (Pmode);
9683 emit_insn (gen_rtx_SET (modreg, modmem));
9685 tmpreg = gen_reg_rtx (Pmode);
9686 emit_insn (gen_rtx_SET (tmpreg, mem));
9688 dest = gen_reg_rtx (Pmode);
9689 if (TARGET_32BIT)
9690 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
9691 else
9692 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
9693 return dest;
9695 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
9696 else if (TARGET_32BIT)
9698 tlsreg = gen_reg_rtx (SImode);
9699 emit_insn (gen_tls_get_tpointer (tlsreg));
9701 else
9702 tlsreg = gen_rtx_REG (DImode, 13);
9704 /* Load the TOC value into temporary register. */
9705 tmpreg = gen_reg_rtx (Pmode);
9706 emit_insn (gen_rtx_SET (tmpreg, mem));
9707 set_unique_reg_note (get_last_insn (), REG_EQUAL,
9708 gen_rtx_MINUS (Pmode, addr, tlsreg));
9710 /* Add TOC symbol value to TLS pointer. */
9711 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
9713 return dest;
9716 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9717 this (thread-local) address. */
9719 static rtx
9720 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
9722 rtx dest, insn;
9724 if (TARGET_XCOFF)
9725 return rs6000_legitimize_tls_address_aix (addr, model);
9727 dest = gen_reg_rtx (Pmode);
9728 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
9730 rtx tlsreg;
9732 if (TARGET_64BIT)
9734 tlsreg = gen_rtx_REG (Pmode, 13);
9735 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
9737 else
9739 tlsreg = gen_rtx_REG (Pmode, 2);
9740 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
9742 emit_insn (insn);
9744 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
9746 rtx tlsreg, tmp;
9748 tmp = gen_reg_rtx (Pmode);
9749 if (TARGET_64BIT)
9751 tlsreg = gen_rtx_REG (Pmode, 13);
9752 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
9754 else
9756 tlsreg = gen_rtx_REG (Pmode, 2);
9757 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
9759 emit_insn (insn);
9760 if (TARGET_64BIT)
9761 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
9762 else
9763 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
9764 emit_insn (insn);
9766 else
9768 rtx r3, got, tga, tmp1, tmp2, call_insn;
9770 /* We currently use relocations like @got@tlsgd for tls, which
9771 means the linker will handle allocation of tls entries, placing
9772 them in the .got section. So use a pointer to the .got section,
9773 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9774 or to secondary GOT sections used by 32-bit -fPIC. */
9775 if (TARGET_64BIT)
9776 got = gen_rtx_REG (Pmode, 2);
9777 else
9779 if (flag_pic == 1)
9780 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
9781 else
9783 rtx gsym = rs6000_got_sym ();
9784 got = gen_reg_rtx (Pmode);
9785 if (flag_pic == 0)
9786 rs6000_emit_move (got, gsym, Pmode);
9787 else
9789 rtx mem, lab;
9791 tmp1 = gen_reg_rtx (Pmode);
9792 tmp2 = gen_reg_rtx (Pmode);
9793 mem = gen_const_mem (Pmode, tmp1);
9794 lab = gen_label_rtx ();
9795 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
9796 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
9797 if (TARGET_LINK_STACK)
9798 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
9799 emit_move_insn (tmp2, mem);
9800 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
9801 set_unique_reg_note (last, REG_EQUAL, gsym);
9806 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
9808 tga = rs6000_tls_get_addr ();
9809 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
9810 const0_rtx, Pmode);
9812 r3 = gen_rtx_REG (Pmode, 3);
9813 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9815 if (TARGET_64BIT)
9816 insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
9817 else
9818 insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
9820 else if (DEFAULT_ABI == ABI_V4)
9821 insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
9822 else
9823 gcc_unreachable ();
9824 call_insn = last_call_insn ();
9825 PATTERN (call_insn) = insn;
9826 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
9827 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
9828 pic_offset_table_rtx);
9830 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
9832 tga = rs6000_tls_get_addr ();
9833 tmp1 = gen_reg_rtx (Pmode);
9834 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
9835 const0_rtx, Pmode);
9837 r3 = gen_rtx_REG (Pmode, 3);
9838 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9840 if (TARGET_64BIT)
9841 insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
9842 else
9843 insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
9845 else if (DEFAULT_ABI == ABI_V4)
9846 insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
9847 else
9848 gcc_unreachable ();
9849 call_insn = last_call_insn ();
9850 PATTERN (call_insn) = insn;
9851 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
9852 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
9853 pic_offset_table_rtx);
9855 if (rs6000_tls_size == 16)
9857 if (TARGET_64BIT)
9858 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
9859 else
9860 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
9862 else if (rs6000_tls_size == 32)
9864 tmp2 = gen_reg_rtx (Pmode);
9865 if (TARGET_64BIT)
9866 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
9867 else
9868 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
9869 emit_insn (insn);
9870 if (TARGET_64BIT)
9871 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
9872 else
9873 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
9875 else
9877 tmp2 = gen_reg_rtx (Pmode);
9878 if (TARGET_64BIT)
9879 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
9880 else
9881 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
9882 emit_insn (insn);
9883 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
9885 emit_insn (insn);
9887 else
9889 /* IE, or 64-bit offset LE. */
9890 tmp2 = gen_reg_rtx (Pmode);
9891 if (TARGET_64BIT)
9892 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
9893 else
9894 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
9895 emit_insn (insn);
9896 if (TARGET_64BIT)
9897 insn = gen_tls_tls_64 (dest, tmp2, addr);
9898 else
9899 insn = gen_tls_tls_32 (dest, tmp2, addr);
9900 emit_insn (insn);
9904 return dest;
9907 /* Only create the global variable for the stack protect guard if we are using
9908 the global flavor of that guard. */
9909 static tree
9910 rs6000_init_stack_protect_guard (void)
9912 if (rs6000_stack_protector_guard == SSP_GLOBAL)
9913 return default_stack_protect_guard ();
9915 return NULL_TREE;
9918 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9920 static bool
9921 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9923 if (GET_CODE (x) == HIGH
9924 && GET_CODE (XEXP (x, 0)) == UNSPEC)
9925 return true;
9927 /* A TLS symbol in the TOC cannot contain a sum. */
9928 if (GET_CODE (x) == CONST
9929 && GET_CODE (XEXP (x, 0)) == PLUS
9930 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9931 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
9932 return true;
9934 /* Do not place an ELF TLS symbol in the constant pool. */
9935 return TARGET_ELF && tls_referenced_p (x);
9938 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9939 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9940 can be addressed relative to the toc pointer. */
9942 static bool
9943 use_toc_relative_ref (rtx sym, machine_mode mode)
9945 return ((constant_pool_expr_p (sym)
9946 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
9947 get_pool_mode (sym)))
9948 || (TARGET_CMODEL == CMODEL_MEDIUM
9949 && SYMBOL_REF_LOCAL_P (sym)
9950 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
9953 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
9954 replace the input X, or the original X if no replacement is called for.
9955 The output parameter *WIN is 1 if the calling macro should goto WIN,
9956 0 if it should not.
9958 For RS/6000, we wish to handle large displacements off a base
9959 register by splitting the addend across an addiu/addis and the mem insn.
9960 This cuts number of extra insns needed from 3 to 1.
9962 On Darwin, we use this to generate code for floating point constants.
9963 A movsf_low is generated so we wind up with 2 instructions rather than 3.
9964 The Darwin code is inside #if TARGET_MACHO because only then are the
9965 machopic_* functions defined. */
9966 static rtx
9967 rs6000_legitimize_reload_address (rtx x, machine_mode mode,
9968 int opnum, int type,
9969 int ind_levels ATTRIBUTE_UNUSED, int *win)
9971 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9972 bool quad_offset_p = mode_supports_vsx_dform_quad (mode);
9974 /* Nasty hack for vsx_splat_v2df/v2di load from mem, which takes a
9975 DFmode/DImode MEM. Ditto for ISA 3.0 vsx_splat_v4sf/v4si. */
9976 if (reg_offset_p
9977 && opnum == 1
9978 && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
9979 || (mode == DImode && recog_data.operand_mode[0] == V2DImode)
9980 || (mode == SFmode && recog_data.operand_mode[0] == V4SFmode
9981 && TARGET_P9_VECTOR)
9982 || (mode == SImode && recog_data.operand_mode[0] == V4SImode
9983 && TARGET_P9_VECTOR)))
9984 reg_offset_p = false;
9986 /* We must recognize output that we have already generated ourselves. */
9987 if (GET_CODE (x) == PLUS
9988 && GET_CODE (XEXP (x, 0)) == PLUS
9989 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
9990 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
9991 && GET_CODE (XEXP (x, 1)) == CONST_INT)
9993 if (TARGET_DEBUG_ADDR)
9995 fprintf (stderr, "\nlegitimize_reload_address push_reload #1:\n");
9996 debug_rtx (x);
9998 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9999 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
10000 opnum, (enum reload_type) type);
10001 *win = 1;
10002 return x;
10005 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
10006 if (GET_CODE (x) == LO_SUM
10007 && GET_CODE (XEXP (x, 0)) == HIGH)
10009 if (TARGET_DEBUG_ADDR)
10011 fprintf (stderr, "\nlegitimize_reload_address push_reload #2:\n");
10012 debug_rtx (x);
10014 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
10015 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10016 opnum, (enum reload_type) type);
10017 *win = 1;
10018 return x;
10021 #if TARGET_MACHO
10022 if (DEFAULT_ABI == ABI_DARWIN && flag_pic
10023 && GET_CODE (x) == LO_SUM
10024 && GET_CODE (XEXP (x, 0)) == PLUS
10025 && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx
10026 && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH
10027 && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1)
10028 && machopic_operand_p (XEXP (x, 1)))
10030 /* Result of previous invocation of this function on Darwin
10031 floating point constant. */
10032 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
10033 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10034 opnum, (enum reload_type) type);
10035 *win = 1;
10036 return x;
10038 #endif
10040 if (TARGET_CMODEL != CMODEL_SMALL
10041 && reg_offset_p
10042 && !quad_offset_p
10043 && small_toc_ref (x, VOIDmode))
10045 rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x));
10046 x = gen_rtx_LO_SUM (Pmode, hi, x);
10047 if (TARGET_DEBUG_ADDR)
10049 fprintf (stderr, "\nlegitimize_reload_address push_reload #3:\n");
10050 debug_rtx (x);
10052 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
10053 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10054 opnum, (enum reload_type) type);
10055 *win = 1;
10056 return x;
10059 if (GET_CODE (x) == PLUS
10060 && REG_P (XEXP (x, 0))
10061 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
10062 && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
10063 && CONST_INT_P (XEXP (x, 1))
10064 && reg_offset_p
10065 && !SPE_VECTOR_MODE (mode)
10066 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
10067 && (quad_offset_p || !VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
10069 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
10070 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
10071 HOST_WIDE_INT high
10072 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
10074 /* Check for 32-bit overflow or quad addresses with one of the
10075 four least significant bits set. */
10076 if (high + low != val
10077 || (quad_offset_p && (low & 0xf)))
10079 *win = 0;
10080 return x;
10083 /* Reload the high part into a base reg; leave the low part
10084 in the mem directly. */
10086 x = gen_rtx_PLUS (GET_MODE (x),
10087 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
10088 GEN_INT (high)),
10089 GEN_INT (low));
10091 if (TARGET_DEBUG_ADDR)
10093 fprintf (stderr, "\nlegitimize_reload_address push_reload #4:\n");
10094 debug_rtx (x);
10096 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
10097 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
10098 opnum, (enum reload_type) type);
10099 *win = 1;
10100 return x;
10103 if (GET_CODE (x) == SYMBOL_REF
10104 && reg_offset_p
10105 && !quad_offset_p
10106 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
10107 && !SPE_VECTOR_MODE (mode)
10108 #if TARGET_MACHO
10109 && DEFAULT_ABI == ABI_DARWIN
10110 && (flag_pic || MACHO_DYNAMIC_NO_PIC_P)
10111 && machopic_symbol_defined_p (x)
10112 #else
10113 && DEFAULT_ABI == ABI_V4
10114 && !flag_pic
10115 #endif
10116 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
10117 The same goes for DImode without 64-bit gprs and DFmode and DDmode
10118 without fprs.
10119 ??? Assume floating point reg based on mode? This assumption is
10120 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
10121 where reload ends up doing a DFmode load of a constant from
10122 mem using two gprs. Unfortunately, at this point reload
10123 hasn't yet selected regs so poking around in reload data
10124 won't help and even if we could figure out the regs reliably,
10125 we'd still want to allow this transformation when the mem is
10126 naturally aligned. Since we say the address is good here, we
10127 can't disable offsets from LO_SUMs in mem_operand_gpr.
10128 FIXME: Allow offset from lo_sum for other modes too, when
10129 mem is sufficiently aligned.
10131 Also disallow this if the type can go in VMX/Altivec registers, since
10132 those registers do not have d-form (reg+offset) address modes. */
10133 && !reg_addr[mode].scalar_in_vmx_p
10134 && mode != TFmode
10135 && mode != TDmode
10136 && mode != IFmode
10137 && mode != KFmode
10138 && (mode != TImode || !TARGET_VSX_TIMODE)
10139 && mode != PTImode
10140 && (mode != DImode || TARGET_POWERPC64)
10141 && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
10142 || (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)))
10144 #if TARGET_MACHO
10145 if (flag_pic)
10147 rtx offset = machopic_gen_offset (x);
10148 x = gen_rtx_LO_SUM (GET_MODE (x),
10149 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
10150 gen_rtx_HIGH (Pmode, offset)), offset);
10152 else
10153 #endif
10154 x = gen_rtx_LO_SUM (GET_MODE (x),
10155 gen_rtx_HIGH (Pmode, x), x);
10157 if (TARGET_DEBUG_ADDR)
10159 fprintf (stderr, "\nlegitimize_reload_address push_reload #5:\n");
10160 debug_rtx (x);
10162 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
10163 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10164 opnum, (enum reload_type) type);
10165 *win = 1;
10166 return x;
10169 /* Reload an offset address wrapped by an AND that represents the
10170 masking of the lower bits. Strip the outer AND and let reload
10171 convert the offset address into an indirect address. For VSX,
10172 force reload to create the address with an AND in a separate
10173 register, because we can't guarantee an altivec register will
10174 be used. */
10175 if (VECTOR_MEM_ALTIVEC_P (mode)
10176 && GET_CODE (x) == AND
10177 && GET_CODE (XEXP (x, 0)) == PLUS
10178 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
10179 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
10180 && GET_CODE (XEXP (x, 1)) == CONST_INT
10181 && INTVAL (XEXP (x, 1)) == -16)
10183 x = XEXP (x, 0);
10184 *win = 1;
10185 return x;
10188 if (TARGET_TOC
10189 && reg_offset_p
10190 && !quad_offset_p
10191 && GET_CODE (x) == SYMBOL_REF
10192 && use_toc_relative_ref (x, mode))
10194 x = create_TOC_reference (x, NULL_RTX);
10195 if (TARGET_CMODEL != CMODEL_SMALL)
10197 if (TARGET_DEBUG_ADDR)
10199 fprintf (stderr, "\nlegitimize_reload_address push_reload #6:\n");
10200 debug_rtx (x);
10202 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
10203 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10204 opnum, (enum reload_type) type);
10206 *win = 1;
10207 return x;
10209 *win = 0;
10210 return x;
10213 /* Debug version of rs6000_legitimize_reload_address. */
10214 static rtx
10215 rs6000_debug_legitimize_reload_address (rtx x, machine_mode mode,
10216 int opnum, int type,
10217 int ind_levels, int *win)
10219 rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
10220 ind_levels, win);
10221 fprintf (stderr,
10222 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
10223 "type = %d, ind_levels = %d, win = %d, original addr:\n",
10224 GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
10225 debug_rtx (x);
10227 if (x == ret)
10228 fprintf (stderr, "Same address returned\n");
10229 else if (!ret)
10230 fprintf (stderr, "NULL returned\n");
10231 else
10233 fprintf (stderr, "New address:\n");
10234 debug_rtx (ret);
10237 return ret;
10240 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
10241 that is a valid memory address for an instruction.
10242 The MODE argument is the machine mode for the MEM expression
10243 that wants to use this address.
10245 On the RS/6000, there are four valid address: a SYMBOL_REF that
10246 refers to a constant pool entry of an address (or the sum of it
10247 plus a constant), a short (16-bit signed) constant plus a register,
10248 the sum of two registers, or a register indirect, possibly with an
10249 auto-increment. For DFmode, DDmode and DImode with a constant plus
10250 register, we must ensure that both words are addressable or PowerPC64
10251 with offset word aligned.
10253 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
10254 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
10255 because adjacent memory cells are accessed by adding word-sized offsets
10256 during assembly output. */
10257 static bool
10258 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
10260 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
10261 bool quad_offset_p = mode_supports_vsx_dform_quad (mode);
10263 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
10264 if (VECTOR_MEM_ALTIVEC_P (mode)
10265 && GET_CODE (x) == AND
10266 && GET_CODE (XEXP (x, 1)) == CONST_INT
10267 && INTVAL (XEXP (x, 1)) == -16)
10268 x = XEXP (x, 0);
10270 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
10271 return 0;
10272 if (legitimate_indirect_address_p (x, reg_ok_strict))
10273 return 1;
10274 if (TARGET_UPDATE
10275 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
10276 && mode_supports_pre_incdec_p (mode)
10277 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
10278 return 1;
10279 /* Handle restricted vector d-form offsets in ISA 3.0. */
10280 if (quad_offset_p)
10282 if (quad_address_p (x, mode, reg_ok_strict))
10283 return 1;
10285 else if (virtual_stack_registers_memory_p (x))
10286 return 1;
10288 else if (reg_offset_p)
10290 if (legitimate_small_data_p (mode, x))
10291 return 1;
10292 if (legitimate_constant_pool_address_p (x, mode,
10293 reg_ok_strict || lra_in_progress))
10294 return 1;
10295 if (reg_addr[mode].fused_toc && GET_CODE (x) == UNSPEC
10296 && XINT (x, 1) == UNSPEC_FUSION_ADDIS)
10297 return 1;
10300 /* For TImode, if we have TImode in VSX registers, only allow register
10301 indirect addresses. This will allow the values to go in either GPRs
10302 or VSX registers without reloading. The vector types would tend to
10303 go into VSX registers, so we allow REG+REG, while TImode seems
10304 somewhat split, in that some uses are GPR based, and some VSX based. */
10305 /* FIXME: We could loosen this by changing the following to
10306 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
10307 but currently we cannot allow REG+REG addressing for TImode. See
10308 PR72827 for complete details on how this ends up hoodwinking DSE. */
10309 if (mode == TImode && TARGET_VSX_TIMODE)
10310 return 0;
10311 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
10312 if (! reg_ok_strict
10313 && reg_offset_p
10314 && GET_CODE (x) == PLUS
10315 && GET_CODE (XEXP (x, 0)) == REG
10316 && (XEXP (x, 0) == virtual_stack_vars_rtx
10317 || XEXP (x, 0) == arg_pointer_rtx)
10318 && GET_CODE (XEXP (x, 1)) == CONST_INT)
10319 return 1;
10320 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
10321 return 1;
10322 if (!FLOAT128_2REG_P (mode)
10323 && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
10324 || TARGET_POWERPC64
10325 || (mode != DFmode && mode != DDmode)
10326 || (TARGET_E500_DOUBLE && mode != DDmode))
10327 && (TARGET_POWERPC64 || mode != DImode)
10328 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
10329 && mode != PTImode
10330 && !avoiding_indexed_address_p (mode)
10331 && legitimate_indexed_address_p (x, reg_ok_strict))
10332 return 1;
10333 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
10334 && mode_supports_pre_modify_p (mode)
10335 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
10336 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
10337 reg_ok_strict, false)
10338 || (!avoiding_indexed_address_p (mode)
10339 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
10340 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
10341 return 1;
10342 if (reg_offset_p && !quad_offset_p
10343 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
10344 return 1;
10345 return 0;
10348 /* Debug version of rs6000_legitimate_address_p. */
10349 static bool
10350 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
10351 bool reg_ok_strict)
10353 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
10354 fprintf (stderr,
10355 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
10356 "strict = %d, reload = %s, code = %s\n",
10357 ret ? "true" : "false",
10358 GET_MODE_NAME (mode),
10359 reg_ok_strict,
10360 (reload_completed
10361 ? "after"
10362 : (reload_in_progress ? "progress" : "before")),
10363 GET_RTX_NAME (GET_CODE (x)));
10364 debug_rtx (x);
10366 return ret;
10369 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
10371 static bool
10372 rs6000_mode_dependent_address_p (const_rtx addr,
10373 addr_space_t as ATTRIBUTE_UNUSED)
10375 return rs6000_mode_dependent_address_ptr (addr);
10378 /* Go to LABEL if ADDR (a legitimate address expression)
10379 has an effect that depends on the machine mode it is used for.
10381 On the RS/6000 this is true of all integral offsets (since AltiVec
10382 and VSX modes don't allow them) or is a pre-increment or decrement.
10384 ??? Except that due to conceptual problems in offsettable_address_p
10385 we can't really report the problems of integral offsets. So leave
10386 this assuming that the adjustable offset must be valid for the
10387 sub-words of a TFmode operand, which is what we had before. */
10389 static bool
10390 rs6000_mode_dependent_address (const_rtx addr)
10392 switch (GET_CODE (addr))
10394 case PLUS:
10395 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
10396 is considered a legitimate address before reload, so there
10397 are no offset restrictions in that case. Note that this
10398 condition is safe in strict mode because any address involving
10399 virtual_stack_vars_rtx or arg_pointer_rtx would already have
10400 been rejected as illegitimate. */
10401 if (XEXP (addr, 0) != virtual_stack_vars_rtx
10402 && XEXP (addr, 0) != arg_pointer_rtx
10403 && GET_CODE (XEXP (addr, 1)) == CONST_INT)
10405 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
10406 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
10408 break;
10410 case LO_SUM:
10411 /* Anything in the constant pool is sufficiently aligned that
10412 all bytes have the same high part address. */
10413 return !legitimate_constant_pool_address_p (addr, QImode, false);
10415 /* Auto-increment cases are now treated generically in recog.c. */
10416 case PRE_MODIFY:
10417 return TARGET_UPDATE;
10419 /* AND is only allowed in Altivec loads. */
10420 case AND:
10421 return true;
10423 default:
10424 break;
10427 return false;
10430 /* Debug version of rs6000_mode_dependent_address. */
10431 static bool
10432 rs6000_debug_mode_dependent_address (const_rtx addr)
10434 bool ret = rs6000_mode_dependent_address (addr);
10436 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
10437 ret ? "true" : "false");
10438 debug_rtx (addr);
10440 return ret;
10443 /* Implement FIND_BASE_TERM. */
10446 rs6000_find_base_term (rtx op)
10448 rtx base;
10450 base = op;
10451 if (GET_CODE (base) == CONST)
10452 base = XEXP (base, 0);
10453 if (GET_CODE (base) == PLUS)
10454 base = XEXP (base, 0);
10455 if (GET_CODE (base) == UNSPEC)
10456 switch (XINT (base, 1))
10458 case UNSPEC_TOCREL:
10459 case UNSPEC_MACHOPIC_OFFSET:
10460 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
10461 for aliasing purposes. */
10462 return XVECEXP (base, 0, 0);
10465 return op;
10468 /* More elaborate version of recog's offsettable_memref_p predicate
10469 that works around the ??? note of rs6000_mode_dependent_address.
10470 In particular it accepts
10472 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
10474 in 32-bit mode, that the recog predicate rejects. */
10476 static bool
10477 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode)
10479 bool worst_case;
10481 if (!MEM_P (op))
10482 return false;
10484 /* First mimic offsettable_memref_p. */
10485 if (offsettable_address_p (true, GET_MODE (op), XEXP (op, 0)))
10486 return true;
10488 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
10489 the latter predicate knows nothing about the mode of the memory
10490 reference and, therefore, assumes that it is the largest supported
10491 mode (TFmode). As a consequence, legitimate offsettable memory
10492 references are rejected. rs6000_legitimate_offset_address_p contains
10493 the correct logic for the PLUS case of rs6000_mode_dependent_address,
10494 at least with a little bit of help here given that we know the
10495 actual registers used. */
10496 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
10497 || GET_MODE_SIZE (reg_mode) == 4);
10498 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
10499 true, worst_case);
10502 /* Determine the reassociation width to be used in reassociate_bb.
10503 This takes into account how many parallel operations we
10504 can actually do of a given type, and also the latency.
10506 int add/sub 6/cycle
10507 mul 2/cycle
10508 vect add/sub/mul 2/cycle
10509 fp add/sub/mul 2/cycle
10510 dfp 1/cycle
10513 static int
10514 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
10515 machine_mode mode)
10517 switch (rs6000_cpu)
10519 case PROCESSOR_POWER8:
10520 case PROCESSOR_POWER9:
10521 if (DECIMAL_FLOAT_MODE_P (mode))
10522 return 1;
10523 if (VECTOR_MODE_P (mode))
10524 return 4;
10525 if (INTEGRAL_MODE_P (mode))
10526 return opc == MULT_EXPR ? 4 : 6;
10527 if (FLOAT_MODE_P (mode))
10528 return 4;
10529 break;
10530 default:
10531 break;
10533 return 1;
10536 /* Change register usage conditional on target flags. */
10537 static void
10538 rs6000_conditional_register_usage (void)
10540 int i;
10542 if (TARGET_DEBUG_TARGET)
10543 fprintf (stderr, "rs6000_conditional_register_usage called\n");
10545 /* Set MQ register fixed (already call_used) so that it will not be
10546 allocated. */
10547 fixed_regs[64] = 1;
10549 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
10550 if (TARGET_64BIT)
10551 fixed_regs[13] = call_used_regs[13]
10552 = call_really_used_regs[13] = 1;
10554 /* Conditionally disable FPRs. */
10555 if (TARGET_SOFT_FLOAT || !TARGET_FPRS)
10556 for (i = 32; i < 64; i++)
10557 fixed_regs[i] = call_used_regs[i]
10558 = call_really_used_regs[i] = 1;
10560 /* The TOC register is not killed across calls in a way that is
10561 visible to the compiler. */
10562 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10563 call_really_used_regs[2] = 0;
10565 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
10566 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10568 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
10569 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10570 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10571 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10573 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
10574 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10575 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10576 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10578 if (TARGET_TOC && TARGET_MINIMAL_TOC)
10579 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10580 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10582 if (TARGET_SPE)
10584 global_regs[SPEFSCR_REGNO] = 1;
10585 /* We used to use r14 as FIXED_SCRATCH to address SPE 64-bit
10586 registers in prologues and epilogues. We no longer use r14
10587 for FIXED_SCRATCH, but we're keeping r14 out of the allocation
10588 pool for link-compatibility with older versions of GCC. Once
10589 "old" code has died out, we can return r14 to the allocation
10590 pool. */
10591 fixed_regs[14]
10592 = call_used_regs[14]
10593 = call_really_used_regs[14] = 1;
10596 if (!TARGET_ALTIVEC && !TARGET_VSX)
10598 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
10599 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
10600 call_really_used_regs[VRSAVE_REGNO] = 1;
10603 if (TARGET_ALTIVEC || TARGET_VSX)
10604 global_regs[VSCR_REGNO] = 1;
10606 if (TARGET_ALTIVEC_ABI)
10608 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
10609 call_used_regs[i] = call_really_used_regs[i] = 1;
10611 /* AIX reserves VR20:31 in non-extended ABI mode. */
10612 if (TARGET_XCOFF)
10613 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
10614 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
10619 /* Output insns to set DEST equal to the constant SOURCE as a series of
10620 lis, ori and shl instructions and return TRUE. */
10622 bool
10623 rs6000_emit_set_const (rtx dest, rtx source)
10625 machine_mode mode = GET_MODE (dest);
10626 rtx temp, set;
10627 rtx_insn *insn;
10628 HOST_WIDE_INT c;
10630 gcc_checking_assert (CONST_INT_P (source));
10631 c = INTVAL (source);
10632 switch (mode)
10634 case E_QImode:
10635 case E_HImode:
10636 emit_insn (gen_rtx_SET (dest, source));
10637 return true;
10639 case E_SImode:
10640 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
10642 emit_insn (gen_rtx_SET (copy_rtx (temp),
10643 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
10644 emit_insn (gen_rtx_SET (dest,
10645 gen_rtx_IOR (SImode, copy_rtx (temp),
10646 GEN_INT (c & 0xffff))));
10647 break;
10649 case E_DImode:
10650 if (!TARGET_POWERPC64)
10652 rtx hi, lo;
10654 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
10655 DImode);
10656 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
10657 DImode);
10658 emit_move_insn (hi, GEN_INT (c >> 32));
10659 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
10660 emit_move_insn (lo, GEN_INT (c));
10662 else
10663 rs6000_emit_set_long_const (dest, c);
10664 break;
10666 default:
10667 gcc_unreachable ();
10670 insn = get_last_insn ();
10671 set = single_set (insn);
10672 if (! CONSTANT_P (SET_SRC (set)))
10673 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
10675 return true;
10678 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
10679 Output insns to set DEST equal to the constant C as a series of
10680 lis, ori and shl instructions. */
10682 static void
10683 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
10685 rtx temp;
10686 HOST_WIDE_INT ud1, ud2, ud3, ud4;
10688 ud1 = c & 0xffff;
10689 c = c >> 16;
10690 ud2 = c & 0xffff;
10691 c = c >> 16;
10692 ud3 = c & 0xffff;
10693 c = c >> 16;
10694 ud4 = c & 0xffff;
10696 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
10697 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
10698 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
10700 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
10701 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
10703 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10705 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10706 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
10707 if (ud1 != 0)
10708 emit_move_insn (dest,
10709 gen_rtx_IOR (DImode, copy_rtx (temp),
10710 GEN_INT (ud1)));
10712 else if (ud3 == 0 && ud4 == 0)
10714 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10716 gcc_assert (ud2 & 0x8000);
10717 emit_move_insn (copy_rtx (temp),
10718 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
10719 if (ud1 != 0)
10720 emit_move_insn (copy_rtx (temp),
10721 gen_rtx_IOR (DImode, copy_rtx (temp),
10722 GEN_INT (ud1)));
10723 emit_move_insn (dest,
10724 gen_rtx_ZERO_EXTEND (DImode,
10725 gen_lowpart (SImode,
10726 copy_rtx (temp))));
10728 else if ((ud4 == 0xffff && (ud3 & 0x8000))
10729 || (ud4 == 0 && ! (ud3 & 0x8000)))
10731 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10733 emit_move_insn (copy_rtx (temp),
10734 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
10735 if (ud2 != 0)
10736 emit_move_insn (copy_rtx (temp),
10737 gen_rtx_IOR (DImode, copy_rtx (temp),
10738 GEN_INT (ud2)));
10739 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10740 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10741 GEN_INT (16)));
10742 if (ud1 != 0)
10743 emit_move_insn (dest,
10744 gen_rtx_IOR (DImode, copy_rtx (temp),
10745 GEN_INT (ud1)));
10747 else
10749 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10751 emit_move_insn (copy_rtx (temp),
10752 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
10753 if (ud3 != 0)
10754 emit_move_insn (copy_rtx (temp),
10755 gen_rtx_IOR (DImode, copy_rtx (temp),
10756 GEN_INT (ud3)));
10758 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
10759 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10760 GEN_INT (32)));
10761 if (ud2 != 0)
10762 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10763 gen_rtx_IOR (DImode, copy_rtx (temp),
10764 GEN_INT (ud2 << 16)));
10765 if (ud1 != 0)
10766 emit_move_insn (dest,
10767 gen_rtx_IOR (DImode, copy_rtx (temp),
10768 GEN_INT (ud1)));
10772 /* Helper for the following. Get rid of [r+r] memory refs
10773 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
10775 static void
10776 rs6000_eliminate_indexed_memrefs (rtx operands[2])
10778 if (reload_in_progress)
10779 return;
10781 if (GET_CODE (operands[0]) == MEM
10782 && GET_CODE (XEXP (operands[0], 0)) != REG
10783 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
10784 GET_MODE (operands[0]), false))
10785 operands[0]
10786 = replace_equiv_address (operands[0],
10787 copy_addr_to_reg (XEXP (operands[0], 0)));
10789 if (GET_CODE (operands[1]) == MEM
10790 && GET_CODE (XEXP (operands[1], 0)) != REG
10791 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
10792 GET_MODE (operands[1]), false))
10793 operands[1]
10794 = replace_equiv_address (operands[1],
10795 copy_addr_to_reg (XEXP (operands[1], 0)));
10798 /* Generate a vector of constants to permute MODE for a little-endian
10799 storage operation by swapping the two halves of a vector. */
10800 static rtvec
10801 rs6000_const_vec (machine_mode mode)
10803 int i, subparts;
10804 rtvec v;
10806 switch (mode)
10808 case E_V1TImode:
10809 subparts = 1;
10810 break;
10811 case E_V2DFmode:
10812 case E_V2DImode:
10813 subparts = 2;
10814 break;
10815 case E_V4SFmode:
10816 case E_V4SImode:
10817 subparts = 4;
10818 break;
10819 case E_V8HImode:
10820 subparts = 8;
10821 break;
10822 case E_V16QImode:
10823 subparts = 16;
10824 break;
10825 default:
10826 gcc_unreachable();
10829 v = rtvec_alloc (subparts);
10831 for (i = 0; i < subparts / 2; ++i)
10832 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
10833 for (i = subparts / 2; i < subparts; ++i)
10834 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
10836 return v;
10839 /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
10840 for a VSX load or store operation. */
10842 rs6000_gen_le_vsx_permute (rtx source, machine_mode mode)
10844 /* Use ROTATE instead of VEC_SELECT on IEEE 128-bit floating point, and
10845 128-bit integers if they are allowed in VSX registers. */
10846 if (FLOAT128_VECTOR_P (mode) || mode == TImode || mode == V1TImode)
10847 return gen_rtx_ROTATE (mode, source, GEN_INT (64));
10848 else
10850 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
10851 return gen_rtx_VEC_SELECT (mode, source, par);
10855 /* Emit a little-endian load from vector memory location SOURCE to VSX
10856 register DEST in mode MODE. The load is done with two permuting
10857 insn's that represent an lxvd2x and xxpermdi. */
10858 void
10859 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
10861 rtx tmp, permute_mem, permute_reg;
10863 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10864 V1TImode). */
10865 if (mode == TImode || mode == V1TImode)
10867 mode = V2DImode;
10868 dest = gen_lowpart (V2DImode, dest);
10869 source = adjust_address (source, V2DImode, 0);
10872 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
10873 permute_mem = rs6000_gen_le_vsx_permute (source, mode);
10874 permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
10875 emit_insn (gen_rtx_SET (tmp, permute_mem));
10876 emit_insn (gen_rtx_SET (dest, permute_reg));
10879 /* Emit a little-endian store to vector memory location DEST from VSX
10880 register SOURCE in mode MODE. The store is done with two permuting
10881 insn's that represent an xxpermdi and an stxvd2x. */
10882 void
10883 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
10885 rtx tmp, permute_src, permute_tmp;
10887 /* This should never be called during or after reload, because it does
10888 not re-permute the source register. It is intended only for use
10889 during expand. */
10890 gcc_assert (!reload_in_progress && !lra_in_progress && !reload_completed);
10892 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10893 V1TImode). */
10894 if (mode == TImode || mode == V1TImode)
10896 mode = V2DImode;
10897 dest = adjust_address (dest, V2DImode, 0);
10898 source = gen_lowpart (V2DImode, source);
10901 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
10902 permute_src = rs6000_gen_le_vsx_permute (source, mode);
10903 permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
10904 emit_insn (gen_rtx_SET (tmp, permute_src));
10905 emit_insn (gen_rtx_SET (dest, permute_tmp));
10908 /* Emit a sequence representing a little-endian VSX load or store,
10909 moving data from SOURCE to DEST in mode MODE. This is done
10910 separately from rs6000_emit_move to ensure it is called only
10911 during expand. LE VSX loads and stores introduced later are
10912 handled with a split. The expand-time RTL generation allows
10913 us to optimize away redundant pairs of register-permutes. */
10914 void
10915 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
10917 gcc_assert (!BYTES_BIG_ENDIAN
10918 && VECTOR_MEM_VSX_P (mode)
10919 && !TARGET_P9_VECTOR
10920 && !gpr_or_gpr_p (dest, source)
10921 && (MEM_P (source) ^ MEM_P (dest)));
10923 if (MEM_P (source))
10925 gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG);
10926 rs6000_emit_le_vsx_load (dest, source, mode);
10928 else
10930 if (!REG_P (source))
10931 source = force_reg (mode, source);
10932 rs6000_emit_le_vsx_store (dest, source, mode);
10936 /* Return whether a SFmode or SImode move can be done without converting one
10937 mode to another. This arrises when we have:
10939 (SUBREG:SF (REG:SI ...))
10940 (SUBREG:SI (REG:SF ...))
10942 and one of the values is in a floating point/vector register, where SFmode
10943 scalars are stored in DFmode format. */
10945 bool
10946 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
10948 if (TARGET_ALLOW_SF_SUBREG)
10949 return true;
10951 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
10952 return true;
10954 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
10955 return true;
10957 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
10958 if (SUBREG_P (dest))
10960 rtx dest_subreg = SUBREG_REG (dest);
10961 rtx src_subreg = SUBREG_REG (src);
10962 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
10965 return false;
10969 /* Helper function to change moves with:
10971 (SUBREG:SF (REG:SI)) and
10972 (SUBREG:SI (REG:SF))
10974 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
10975 values are stored as DFmode values in the VSX registers. We need to convert
10976 the bits before we can use a direct move or operate on the bits in the
10977 vector register as an integer type.
10979 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
10981 static bool
10982 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
10984 if (TARGET_DIRECT_MOVE_64BIT && !reload_in_progress && !reload_completed
10985 && !lra_in_progress
10986 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
10987 && SUBREG_P (source) && sf_subreg_operand (source, mode))
10989 rtx inner_source = SUBREG_REG (source);
10990 machine_mode inner_mode = GET_MODE (inner_source);
10992 if (mode == SImode && inner_mode == SFmode)
10994 emit_insn (gen_movsi_from_sf (dest, inner_source));
10995 return true;
10998 if (mode == SFmode && inner_mode == SImode)
11000 emit_insn (gen_movsf_from_si (dest, inner_source));
11001 return true;
11005 return false;
11008 /* Emit a move from SOURCE to DEST in mode MODE. */
11009 void
11010 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
11012 rtx operands[2];
11013 operands[0] = dest;
11014 operands[1] = source;
11016 if (TARGET_DEBUG_ADDR)
11018 fprintf (stderr,
11019 "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, "
11020 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
11021 GET_MODE_NAME (mode),
11022 reload_in_progress,
11023 reload_completed,
11024 can_create_pseudo_p ());
11025 debug_rtx (dest);
11026 fprintf (stderr, "source:\n");
11027 debug_rtx (source);
11030 /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
11031 if (CONST_WIDE_INT_P (operands[1])
11032 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
11034 /* This should be fixed with the introduction of CONST_WIDE_INT. */
11035 gcc_unreachable ();
11038 /* See if we need to special case SImode/SFmode SUBREG moves. */
11039 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
11040 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
11041 return;
11043 /* Check if GCC is setting up a block move that will end up using FP
11044 registers as temporaries. We must make sure this is acceptable. */
11045 if (GET_CODE (operands[0]) == MEM
11046 && GET_CODE (operands[1]) == MEM
11047 && mode == DImode
11048 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
11049 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
11050 && ! (rs6000_slow_unaligned_access (SImode,
11051 (MEM_ALIGN (operands[0]) > 32
11052 ? 32 : MEM_ALIGN (operands[0])))
11053 || rs6000_slow_unaligned_access (SImode,
11054 (MEM_ALIGN (operands[1]) > 32
11055 ? 32 : MEM_ALIGN (operands[1]))))
11056 && ! MEM_VOLATILE_P (operands [0])
11057 && ! MEM_VOLATILE_P (operands [1]))
11059 emit_move_insn (adjust_address (operands[0], SImode, 0),
11060 adjust_address (operands[1], SImode, 0));
11061 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
11062 adjust_address (copy_rtx (operands[1]), SImode, 4));
11063 return;
11066 if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM
11067 && !gpc_reg_operand (operands[1], mode))
11068 operands[1] = force_reg (mode, operands[1]);
11070 /* Recognize the case where operand[1] is a reference to thread-local
11071 data and load its address to a register. */
11072 if (tls_referenced_p (operands[1]))
11074 enum tls_model model;
11075 rtx tmp = operands[1];
11076 rtx addend = NULL;
11078 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
11080 addend = XEXP (XEXP (tmp, 0), 1);
11081 tmp = XEXP (XEXP (tmp, 0), 0);
11084 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
11085 model = SYMBOL_REF_TLS_MODEL (tmp);
11086 gcc_assert (model != 0);
11088 tmp = rs6000_legitimize_tls_address (tmp, model);
11089 if (addend)
11091 tmp = gen_rtx_PLUS (mode, tmp, addend);
11092 tmp = force_operand (tmp, operands[0]);
11094 operands[1] = tmp;
11097 /* Handle the case where reload calls us with an invalid address. */
11098 if (reload_in_progress && mode == Pmode
11099 && (! general_operand (operands[1], mode)
11100 || ! nonimmediate_operand (operands[0], mode)))
11101 goto emit_set;
11103 /* 128-bit constant floating-point values on Darwin should really be loaded
11104 as two parts. However, this premature splitting is a problem when DFmode
11105 values can go into Altivec registers. */
11106 if (FLOAT128_IBM_P (mode) && !reg_addr[DFmode].scalar_in_vmx_p
11107 && GET_CODE (operands[1]) == CONST_DOUBLE)
11109 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
11110 simplify_gen_subreg (DFmode, operands[1], mode, 0),
11111 DFmode);
11112 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
11113 GET_MODE_SIZE (DFmode)),
11114 simplify_gen_subreg (DFmode, operands[1], mode,
11115 GET_MODE_SIZE (DFmode)),
11116 DFmode);
11117 return;
11120 if (reload_in_progress && cfun->machine->sdmode_stack_slot != NULL_RTX)
11121 cfun->machine->sdmode_stack_slot =
11122 eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
11125 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
11126 p1:SD) if p1 is not of floating point class and p0 is spilled as
11127 we can have no analogous movsd_store for this. */
11128 if (lra_in_progress && mode == DDmode
11129 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
11130 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
11131 && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))
11132 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
11134 enum reg_class cl;
11135 int regno = REGNO (SUBREG_REG (operands[1]));
11137 if (regno >= FIRST_PSEUDO_REGISTER)
11139 cl = reg_preferred_class (regno);
11140 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
11142 if (regno >= 0 && ! FP_REGNO_P (regno))
11144 mode = SDmode;
11145 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
11146 operands[1] = SUBREG_REG (operands[1]);
11149 if (lra_in_progress
11150 && mode == SDmode
11151 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
11152 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
11153 && (REG_P (operands[1])
11154 || (GET_CODE (operands[1]) == SUBREG
11155 && REG_P (SUBREG_REG (operands[1])))))
11157 int regno = REGNO (GET_CODE (operands[1]) == SUBREG
11158 ? SUBREG_REG (operands[1]) : operands[1]);
11159 enum reg_class cl;
11161 if (regno >= FIRST_PSEUDO_REGISTER)
11163 cl = reg_preferred_class (regno);
11164 gcc_assert (cl != NO_REGS);
11165 regno = ira_class_hard_regs[cl][0];
11167 if (FP_REGNO_P (regno))
11169 if (GET_MODE (operands[0]) != DDmode)
11170 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
11171 emit_insn (gen_movsd_store (operands[0], operands[1]));
11173 else if (INT_REGNO_P (regno))
11174 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
11175 else
11176 gcc_unreachable();
11177 return;
11179 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
11180 p:DD)) if p0 is not of floating point class and p1 is spilled as
11181 we can have no analogous movsd_load for this. */
11182 if (lra_in_progress && mode == DDmode
11183 && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0]))
11184 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
11185 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
11186 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
11188 enum reg_class cl;
11189 int regno = REGNO (SUBREG_REG (operands[0]));
11191 if (regno >= FIRST_PSEUDO_REGISTER)
11193 cl = reg_preferred_class (regno);
11194 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
11196 if (regno >= 0 && ! FP_REGNO_P (regno))
11198 mode = SDmode;
11199 operands[0] = SUBREG_REG (operands[0]);
11200 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
11203 if (lra_in_progress
11204 && mode == SDmode
11205 && (REG_P (operands[0])
11206 || (GET_CODE (operands[0]) == SUBREG
11207 && REG_P (SUBREG_REG (operands[0]))))
11208 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
11209 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
11211 int regno = REGNO (GET_CODE (operands[0]) == SUBREG
11212 ? SUBREG_REG (operands[0]) : operands[0]);
11213 enum reg_class cl;
11215 if (regno >= FIRST_PSEUDO_REGISTER)
11217 cl = reg_preferred_class (regno);
11218 gcc_assert (cl != NO_REGS);
11219 regno = ira_class_hard_regs[cl][0];
11221 if (FP_REGNO_P (regno))
11223 if (GET_MODE (operands[1]) != DDmode)
11224 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
11225 emit_insn (gen_movsd_load (operands[0], operands[1]));
11227 else if (INT_REGNO_P (regno))
11228 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
11229 else
11230 gcc_unreachable();
11231 return;
11234 if (reload_in_progress
11235 && mode == SDmode
11236 && cfun->machine->sdmode_stack_slot != NULL_RTX
11237 && MEM_P (operands[0])
11238 && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
11239 && REG_P (operands[1]))
11241 if (FP_REGNO_P (REGNO (operands[1])))
11243 rtx mem = adjust_address_nv (operands[0], DDmode, 0);
11244 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
11245 emit_insn (gen_movsd_store (mem, operands[1]));
11247 else if (INT_REGNO_P (REGNO (operands[1])))
11249 rtx mem = operands[0];
11250 if (BYTES_BIG_ENDIAN)
11251 mem = adjust_address_nv (mem, mode, 4);
11252 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
11253 emit_insn (gen_movsd_hardfloat (mem, operands[1]));
11255 else
11256 gcc_unreachable();
11257 return;
11259 if (reload_in_progress
11260 && mode == SDmode
11261 && REG_P (operands[0])
11262 && MEM_P (operands[1])
11263 && cfun->machine->sdmode_stack_slot != NULL_RTX
11264 && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
11266 if (FP_REGNO_P (REGNO (operands[0])))
11268 rtx mem = adjust_address_nv (operands[1], DDmode, 0);
11269 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
11270 emit_insn (gen_movsd_load (operands[0], mem));
11272 else if (INT_REGNO_P (REGNO (operands[0])))
11274 rtx mem = operands[1];
11275 if (BYTES_BIG_ENDIAN)
11276 mem = adjust_address_nv (mem, mode, 4);
11277 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
11278 emit_insn (gen_movsd_hardfloat (operands[0], mem));
11280 else
11281 gcc_unreachable();
11282 return;
11285 /* FIXME: In the long term, this switch statement should go away
11286 and be replaced by a sequence of tests based on things like
11287 mode == Pmode. */
11288 switch (mode)
11290 case E_HImode:
11291 case E_QImode:
11292 if (CONSTANT_P (operands[1])
11293 && GET_CODE (operands[1]) != CONST_INT)
11294 operands[1] = force_const_mem (mode, operands[1]);
11295 break;
11297 case E_TFmode:
11298 case E_TDmode:
11299 case E_IFmode:
11300 case E_KFmode:
11301 if (FLOAT128_2REG_P (mode))
11302 rs6000_eliminate_indexed_memrefs (operands);
11303 /* fall through */
11305 case E_DFmode:
11306 case E_DDmode:
11307 case E_SFmode:
11308 case E_SDmode:
11309 if (CONSTANT_P (operands[1])
11310 && ! easy_fp_constant (operands[1], mode))
11311 operands[1] = force_const_mem (mode, operands[1]);
11312 break;
11314 case E_V16QImode:
11315 case E_V8HImode:
11316 case E_V4SFmode:
11317 case E_V4SImode:
11318 case E_V4HImode:
11319 case E_V2SFmode:
11320 case E_V2SImode:
11321 case E_V1DImode:
11322 case E_V2DFmode:
11323 case E_V2DImode:
11324 case E_V1TImode:
11325 if (CONSTANT_P (operands[1])
11326 && !easy_vector_constant (operands[1], mode))
11327 operands[1] = force_const_mem (mode, operands[1]);
11328 break;
11330 case E_SImode:
11331 case E_DImode:
11332 /* Use default pattern for address of ELF small data */
11333 if (TARGET_ELF
11334 && mode == Pmode
11335 && DEFAULT_ABI == ABI_V4
11336 && (GET_CODE (operands[1]) == SYMBOL_REF
11337 || GET_CODE (operands[1]) == CONST)
11338 && small_data_operand (operands[1], mode))
11340 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11341 return;
11344 if (DEFAULT_ABI == ABI_V4
11345 && mode == Pmode && mode == SImode
11346 && flag_pic == 1 && got_operand (operands[1], mode))
11348 emit_insn (gen_movsi_got (operands[0], operands[1]));
11349 return;
11352 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
11353 && TARGET_NO_TOC
11354 && ! flag_pic
11355 && mode == Pmode
11356 && CONSTANT_P (operands[1])
11357 && GET_CODE (operands[1]) != HIGH
11358 && GET_CODE (operands[1]) != CONST_INT)
11360 rtx target = (!can_create_pseudo_p ()
11361 ? operands[0]
11362 : gen_reg_rtx (mode));
11364 /* If this is a function address on -mcall-aixdesc,
11365 convert it to the address of the descriptor. */
11366 if (DEFAULT_ABI == ABI_AIX
11367 && GET_CODE (operands[1]) == SYMBOL_REF
11368 && XSTR (operands[1], 0)[0] == '.')
11370 const char *name = XSTR (operands[1], 0);
11371 rtx new_ref;
11372 while (*name == '.')
11373 name++;
11374 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
11375 CONSTANT_POOL_ADDRESS_P (new_ref)
11376 = CONSTANT_POOL_ADDRESS_P (operands[1]);
11377 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
11378 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
11379 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
11380 operands[1] = new_ref;
11383 if (DEFAULT_ABI == ABI_DARWIN)
11385 #if TARGET_MACHO
11386 if (MACHO_DYNAMIC_NO_PIC_P)
11388 /* Take care of any required data indirection. */
11389 operands[1] = rs6000_machopic_legitimize_pic_address (
11390 operands[1], mode, operands[0]);
11391 if (operands[0] != operands[1])
11392 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11393 return;
11395 #endif
11396 emit_insn (gen_macho_high (target, operands[1]));
11397 emit_insn (gen_macho_low (operands[0], target, operands[1]));
11398 return;
11401 emit_insn (gen_elf_high (target, operands[1]));
11402 emit_insn (gen_elf_low (operands[0], target, operands[1]));
11403 return;
11406 /* If this is a SYMBOL_REF that refers to a constant pool entry,
11407 and we have put it in the TOC, we just need to make a TOC-relative
11408 reference to it. */
11409 if (TARGET_TOC
11410 && GET_CODE (operands[1]) == SYMBOL_REF
11411 && use_toc_relative_ref (operands[1], mode))
11412 operands[1] = create_TOC_reference (operands[1], operands[0]);
11413 else if (mode == Pmode
11414 && CONSTANT_P (operands[1])
11415 && GET_CODE (operands[1]) != HIGH
11416 && ((GET_CODE (operands[1]) != CONST_INT
11417 && ! easy_fp_constant (operands[1], mode))
11418 || (GET_CODE (operands[1]) == CONST_INT
11419 && (num_insns_constant (operands[1], mode)
11420 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
11421 || (GET_CODE (operands[0]) == REG
11422 && FP_REGNO_P (REGNO (operands[0]))))
11423 && !toc_relative_expr_p (operands[1], false)
11424 && (TARGET_CMODEL == CMODEL_SMALL
11425 || can_create_pseudo_p ()
11426 || (REG_P (operands[0])
11427 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
11430 #if TARGET_MACHO
11431 /* Darwin uses a special PIC legitimizer. */
11432 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
11434 operands[1] =
11435 rs6000_machopic_legitimize_pic_address (operands[1], mode,
11436 operands[0]);
11437 if (operands[0] != operands[1])
11438 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11439 return;
11441 #endif
11443 /* If we are to limit the number of things we put in the TOC and
11444 this is a symbol plus a constant we can add in one insn,
11445 just put the symbol in the TOC and add the constant. Don't do
11446 this if reload is in progress. */
11447 if (GET_CODE (operands[1]) == CONST
11448 && TARGET_NO_SUM_IN_TOC && ! reload_in_progress
11449 && GET_CODE (XEXP (operands[1], 0)) == PLUS
11450 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
11451 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
11452 || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF)
11453 && ! side_effects_p (operands[0]))
11455 rtx sym =
11456 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
11457 rtx other = XEXP (XEXP (operands[1], 0), 1);
11459 sym = force_reg (mode, sym);
11460 emit_insn (gen_add3_insn (operands[0], sym, other));
11461 return;
11464 operands[1] = force_const_mem (mode, operands[1]);
11466 if (TARGET_TOC
11467 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
11468 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
11470 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
11471 operands[0]);
11472 operands[1] = gen_const_mem (mode, tocref);
11473 set_mem_alias_set (operands[1], get_TOC_alias_set ());
11476 break;
11478 case E_TImode:
11479 if (!VECTOR_MEM_VSX_P (TImode))
11480 rs6000_eliminate_indexed_memrefs (operands);
11481 break;
11483 case E_PTImode:
11484 rs6000_eliminate_indexed_memrefs (operands);
11485 break;
11487 default:
11488 fatal_insn ("bad move", gen_rtx_SET (dest, source));
11491 /* Above, we may have called force_const_mem which may have returned
11492 an invalid address. If we can, fix this up; otherwise, reload will
11493 have to deal with it. */
11494 if (GET_CODE (operands[1]) == MEM && ! reload_in_progress)
11495 operands[1] = validize_mem (operands[1]);
11497 emit_set:
11498 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11501 /* Return true if a structure, union or array containing FIELD should be
11502 accessed using `BLKMODE'.
11504 For the SPE, simd types are V2SI, and gcc can be tempted to put the
11505 entire thing in a DI and use subregs to access the internals.
11506 store_bit_field() will force (subreg:DI (reg:V2SI x))'s to the
11507 back-end. Because a single GPR can hold a V2SI, but not a DI, the
11508 best thing to do is set structs to BLKmode and avoid Severe Tire
11509 Damage.
11511 On e500 v2, DF and DI modes suffer from the same anomaly. DF can
11512 fit into 1, whereas DI still needs two. */
11514 static bool
11515 rs6000_member_type_forces_blk (const_tree field, machine_mode mode)
11517 return ((TARGET_SPE && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
11518 || (TARGET_E500_DOUBLE && mode == DFmode));
11521 /* Nonzero if we can use a floating-point register to pass this arg. */
11522 #define USE_FP_FOR_ARG_P(CUM,MODE) \
11523 (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \
11524 && (CUM)->fregno <= FP_ARG_MAX_REG \
11525 && TARGET_HARD_FLOAT && TARGET_FPRS)
11527 /* Nonzero if we can use an AltiVec register to pass this arg. */
11528 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
11529 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
11530 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
11531 && TARGET_ALTIVEC_ABI \
11532 && (NAMED))
11534 /* Walk down the type tree of TYPE counting consecutive base elements.
11535 If *MODEP is VOIDmode, then set it to the first valid floating point
11536 or vector type. If a non-floating point or vector type is found, or
11537 if a floating point or vector type that doesn't match a non-VOIDmode
11538 *MODEP is found, then return -1, otherwise return the count in the
11539 sub-tree. */
11541 static int
11542 rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
11544 machine_mode mode;
11545 HOST_WIDE_INT size;
11547 switch (TREE_CODE (type))
11549 case REAL_TYPE:
11550 mode = TYPE_MODE (type);
11551 if (!SCALAR_FLOAT_MODE_P (mode))
11552 return -1;
11554 if (*modep == VOIDmode)
11555 *modep = mode;
11557 if (*modep == mode)
11558 return 1;
11560 break;
11562 case COMPLEX_TYPE:
11563 mode = TYPE_MODE (TREE_TYPE (type));
11564 if (!SCALAR_FLOAT_MODE_P (mode))
11565 return -1;
11567 if (*modep == VOIDmode)
11568 *modep = mode;
11570 if (*modep == mode)
11571 return 2;
11573 break;
11575 case VECTOR_TYPE:
11576 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
11577 return -1;
11579 /* Use V4SImode as representative of all 128-bit vector types. */
11580 size = int_size_in_bytes (type);
11581 switch (size)
11583 case 16:
11584 mode = V4SImode;
11585 break;
11586 default:
11587 return -1;
11590 if (*modep == VOIDmode)
11591 *modep = mode;
11593 /* Vector modes are considered to be opaque: two vectors are
11594 equivalent for the purposes of being homogeneous aggregates
11595 if they are the same size. */
11596 if (*modep == mode)
11597 return 1;
11599 break;
11601 case ARRAY_TYPE:
11603 int count;
11604 tree index = TYPE_DOMAIN (type);
11606 /* Can't handle incomplete types nor sizes that are not
11607 fixed. */
11608 if (!COMPLETE_TYPE_P (type)
11609 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
11610 return -1;
11612 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
11613 if (count == -1
11614 || !index
11615 || !TYPE_MAX_VALUE (index)
11616 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
11617 || !TYPE_MIN_VALUE (index)
11618 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
11619 || count < 0)
11620 return -1;
11622 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
11623 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
11625 /* There must be no padding. */
11626 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
11627 return -1;
11629 return count;
11632 case RECORD_TYPE:
11634 int count = 0;
11635 int sub_count;
11636 tree field;
11638 /* Can't handle incomplete types nor sizes that are not
11639 fixed. */
11640 if (!COMPLETE_TYPE_P (type)
11641 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
11642 return -1;
11644 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
11646 if (TREE_CODE (field) != FIELD_DECL)
11647 continue;
11649 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
11650 if (sub_count < 0)
11651 return -1;
11652 count += sub_count;
11655 /* There must be no padding. */
11656 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
11657 return -1;
11659 return count;
11662 case UNION_TYPE:
11663 case QUAL_UNION_TYPE:
11665 /* These aren't very interesting except in a degenerate case. */
11666 int count = 0;
11667 int sub_count;
11668 tree field;
11670 /* Can't handle incomplete types nor sizes that are not
11671 fixed. */
11672 if (!COMPLETE_TYPE_P (type)
11673 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
11674 return -1;
11676 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
11678 if (TREE_CODE (field) != FIELD_DECL)
11679 continue;
11681 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
11682 if (sub_count < 0)
11683 return -1;
11684 count = count > sub_count ? count : sub_count;
11687 /* There must be no padding. */
11688 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
11689 return -1;
11691 return count;
11694 default:
11695 break;
11698 return -1;
11701 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
11702 float or vector aggregate that shall be passed in FP/vector registers
11703 according to the ELFv2 ABI, return the homogeneous element mode in
11704 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
11706 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
11708 static bool
11709 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
11710 machine_mode *elt_mode,
11711 int *n_elts)
11713 /* Note that we do not accept complex types at the top level as
11714 homogeneous aggregates; these types are handled via the
11715 targetm.calls.split_complex_arg mechanism. Complex types
11716 can be elements of homogeneous aggregates, however. */
11717 if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type))
11719 machine_mode field_mode = VOIDmode;
11720 int field_count = rs6000_aggregate_candidate (type, &field_mode);
11722 if (field_count > 0)
11724 int n_regs = (SCALAR_FLOAT_MODE_P (field_mode) ?
11725 (GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
11727 /* The ELFv2 ABI allows homogeneous aggregates to occupy
11728 up to AGGR_ARG_NUM_REG registers. */
11729 if (field_count * n_regs <= AGGR_ARG_NUM_REG)
11731 if (elt_mode)
11732 *elt_mode = field_mode;
11733 if (n_elts)
11734 *n_elts = field_count;
11735 return true;
11740 if (elt_mode)
11741 *elt_mode = mode;
11742 if (n_elts)
11743 *n_elts = 1;
11744 return false;
11747 /* Return a nonzero value to say to return the function value in
11748 memory, just as large structures are always returned. TYPE will be
11749 the data type of the value, and FNTYPE will be the type of the
11750 function doing the returning, or @code{NULL} for libcalls.
11752 The AIX ABI for the RS/6000 specifies that all structures are
11753 returned in memory. The Darwin ABI does the same.
11755 For the Darwin 64 Bit ABI, a function result can be returned in
11756 registers or in memory, depending on the size of the return data
11757 type. If it is returned in registers, the value occupies the same
11758 registers as it would if it were the first and only function
11759 argument. Otherwise, the function places its result in memory at
11760 the location pointed to by GPR3.
11762 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
11763 but a draft put them in memory, and GCC used to implement the draft
11764 instead of the final standard. Therefore, aix_struct_return
11765 controls this instead of DEFAULT_ABI; V.4 targets needing backward
11766 compatibility can change DRAFT_V4_STRUCT_RET to override the
11767 default, and -m switches get the final word. See
11768 rs6000_option_override_internal for more details.
11770 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
11771 long double support is enabled. These values are returned in memory.
11773 int_size_in_bytes returns -1 for variable size objects, which go in
11774 memory always. The cast to unsigned makes -1 > 8. */
11776 static bool
11777 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
11779 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
11780 if (TARGET_MACHO
11781 && rs6000_darwin64_abi
11782 && TREE_CODE (type) == RECORD_TYPE
11783 && int_size_in_bytes (type) > 0)
11785 CUMULATIVE_ARGS valcum;
11786 rtx valret;
11788 valcum.words = 0;
11789 valcum.fregno = FP_ARG_MIN_REG;
11790 valcum.vregno = ALTIVEC_ARG_MIN_REG;
11791 /* Do a trial code generation as if this were going to be passed
11792 as an argument; if any part goes in memory, we return NULL. */
11793 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
11794 if (valret)
11795 return false;
11796 /* Otherwise fall through to more conventional ABI rules. */
11799 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
11800 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
11801 NULL, NULL))
11802 return false;
11804 /* The ELFv2 ABI returns aggregates up to 16B in registers */
11805 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
11806 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
11807 return false;
11809 if (AGGREGATE_TYPE_P (type)
11810 && (aix_struct_return
11811 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
11812 return true;
11814 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
11815 modes only exist for GCC vector types if -maltivec. */
11816 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
11817 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
11818 return false;
11820 /* Return synthetic vectors in memory. */
11821 if (TREE_CODE (type) == VECTOR_TYPE
11822 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
11824 static bool warned_for_return_big_vectors = false;
11825 if (!warned_for_return_big_vectors)
11827 warning (OPT_Wpsabi, "GCC vector returned by reference: "
11828 "non-standard ABI extension with no compatibility guarantee");
11829 warned_for_return_big_vectors = true;
11831 return true;
11834 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
11835 && FLOAT128_IEEE_P (TYPE_MODE (type)))
11836 return true;
11838 return false;
11841 /* Specify whether values returned in registers should be at the most
11842 significant end of a register. We want aggregates returned by
11843 value to match the way aggregates are passed to functions. */
11845 static bool
11846 rs6000_return_in_msb (const_tree valtype)
11848 return (DEFAULT_ABI == ABI_ELFv2
11849 && BYTES_BIG_ENDIAN
11850 && AGGREGATE_TYPE_P (valtype)
11851 && rs6000_function_arg_padding (TYPE_MODE (valtype),
11852 valtype) == PAD_UPWARD);
11855 #ifdef HAVE_AS_GNU_ATTRIBUTE
11856 /* Return TRUE if a call to function FNDECL may be one that
11857 potentially affects the function calling ABI of the object file. */
11859 static bool
11860 call_ABI_of_interest (tree fndecl)
11862 if (rs6000_gnu_attr && symtab->state == EXPANSION)
11864 struct cgraph_node *c_node;
11866 /* Libcalls are always interesting. */
11867 if (fndecl == NULL_TREE)
11868 return true;
11870 /* Any call to an external function is interesting. */
11871 if (DECL_EXTERNAL (fndecl))
11872 return true;
11874 /* Interesting functions that we are emitting in this object file. */
11875 c_node = cgraph_node::get (fndecl);
11876 c_node = c_node->ultimate_alias_target ();
11877 return !c_node->only_called_directly_p ();
11879 return false;
11881 #endif
11883 /* Initialize a variable CUM of type CUMULATIVE_ARGS
11884 for a call to a function whose data type is FNTYPE.
11885 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
11887 For incoming args we set the number of arguments in the prototype large
11888 so we never return a PARALLEL. */
11890 void
11891 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
11892 rtx libname ATTRIBUTE_UNUSED, int incoming,
11893 int libcall, int n_named_args,
11894 tree fndecl ATTRIBUTE_UNUSED,
11895 machine_mode return_mode ATTRIBUTE_UNUSED)
11897 static CUMULATIVE_ARGS zero_cumulative;
11899 *cum = zero_cumulative;
11900 cum->words = 0;
11901 cum->fregno = FP_ARG_MIN_REG;
11902 cum->vregno = ALTIVEC_ARG_MIN_REG;
11903 cum->prototype = (fntype && prototype_p (fntype));
11904 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
11905 ? CALL_LIBCALL : CALL_NORMAL);
11906 cum->sysv_gregno = GP_ARG_MIN_REG;
11907 cum->stdarg = stdarg_p (fntype);
11908 cum->libcall = libcall;
11910 cum->nargs_prototype = 0;
11911 if (incoming || cum->prototype)
11912 cum->nargs_prototype = n_named_args;
11914 /* Check for a longcall attribute. */
11915 if ((!fntype && rs6000_default_long_calls)
11916 || (fntype
11917 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
11918 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
11919 cum->call_cookie |= CALL_LONG;
11921 if (TARGET_DEBUG_ARG)
11923 fprintf (stderr, "\ninit_cumulative_args:");
11924 if (fntype)
11926 tree ret_type = TREE_TYPE (fntype);
11927 fprintf (stderr, " ret code = %s,",
11928 get_tree_code_name (TREE_CODE (ret_type)));
11931 if (cum->call_cookie & CALL_LONG)
11932 fprintf (stderr, " longcall,");
11934 fprintf (stderr, " proto = %d, nargs = %d\n",
11935 cum->prototype, cum->nargs_prototype);
11938 #ifdef HAVE_AS_GNU_ATTRIBUTE
11939 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4))
11941 cum->escapes = call_ABI_of_interest (fndecl);
11942 if (cum->escapes)
11944 tree return_type;
11946 if (fntype)
11948 return_type = TREE_TYPE (fntype);
11949 return_mode = TYPE_MODE (return_type);
11951 else
11952 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
11954 if (return_type != NULL)
11956 if (TREE_CODE (return_type) == RECORD_TYPE
11957 && TYPE_TRANSPARENT_AGGR (return_type))
11959 return_type = TREE_TYPE (first_field (return_type));
11960 return_mode = TYPE_MODE (return_type);
11962 if (AGGREGATE_TYPE_P (return_type)
11963 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
11964 <= 8))
11965 rs6000_returns_struct = true;
11967 if (SCALAR_FLOAT_MODE_P (return_mode))
11969 rs6000_passes_float = true;
11970 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
11971 && (FLOAT128_IBM_P (return_mode)
11972 || FLOAT128_IEEE_P (return_mode)
11973 || (return_type != NULL
11974 && (TYPE_MAIN_VARIANT (return_type)
11975 == long_double_type_node))))
11976 rs6000_passes_long_double = true;
11978 if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode)
11979 || SPE_VECTOR_MODE (return_mode))
11980 rs6000_passes_vector = true;
11983 #endif
11985 if (fntype
11986 && !TARGET_ALTIVEC
11987 && TARGET_ALTIVEC_ABI
11988 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
11990 error ("cannot return value in vector register because"
11991 " altivec instructions are disabled, use -maltivec"
11992 " to enable them");
11996 /* The mode the ABI uses for a word. This is not the same as word_mode
11997 for -m32 -mpowerpc64. This is used to implement various target hooks. */
11999 static scalar_int_mode
12000 rs6000_abi_word_mode (void)
12002 return TARGET_32BIT ? SImode : DImode;
12005 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
12006 static char *
12007 rs6000_offload_options (void)
12009 if (TARGET_64BIT)
12010 return xstrdup ("-foffload-abi=lp64");
12011 else
12012 return xstrdup ("-foffload-abi=ilp32");
12015 /* On rs6000, function arguments are promoted, as are function return
12016 values. */
12018 static machine_mode
12019 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
12020 machine_mode mode,
12021 int *punsignedp ATTRIBUTE_UNUSED,
12022 const_tree, int)
12024 PROMOTE_MODE (mode, *punsignedp, type);
12026 return mode;
12029 /* Return true if TYPE must be passed on the stack and not in registers. */
12031 static bool
12032 rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
12034 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
12035 return must_pass_in_stack_var_size (mode, type);
12036 else
12037 return must_pass_in_stack_var_size_or_pad (mode, type);
12040 static inline bool
12041 is_complex_IBM_long_double (machine_mode mode)
12043 return mode == ICmode || (!TARGET_IEEEQUAD && mode == TCmode);
12046 /* Whether ABI_V4 passes MODE args to a function in floating point
12047 registers. */
12049 static bool
12050 abi_v4_pass_in_fpr (machine_mode mode)
12052 if (!TARGET_FPRS || !TARGET_HARD_FLOAT)
12053 return false;
12054 if (TARGET_SINGLE_FLOAT && mode == SFmode)
12055 return true;
12056 if (TARGET_DOUBLE_FLOAT && mode == DFmode)
12057 return true;
12058 /* ABI_V4 passes complex IBM long double in 8 gprs.
12059 Stupid, but we can't change the ABI now. */
12060 if (is_complex_IBM_long_double (mode))
12061 return false;
12062 if (FLOAT128_2REG_P (mode))
12063 return true;
12064 if (DECIMAL_FLOAT_MODE_P (mode))
12065 return true;
12066 return false;
12069 /* Implement TARGET_FUNCTION_ARG_PADDING
12071 For the AIX ABI structs are always stored left shifted in their
12072 argument slot. */
12074 static pad_direction
12075 rs6000_function_arg_padding (machine_mode mode, const_tree type)
12077 #ifndef AGGREGATE_PADDING_FIXED
12078 #define AGGREGATE_PADDING_FIXED 0
12079 #endif
12080 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
12081 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
12082 #endif
12084 if (!AGGREGATE_PADDING_FIXED)
12086 /* GCC used to pass structures of the same size as integer types as
12087 if they were in fact integers, ignoring TARGET_FUNCTION_ARG_PADDING.
12088 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
12089 passed padded downward, except that -mstrict-align further
12090 muddied the water in that multi-component structures of 2 and 4
12091 bytes in size were passed padded upward.
12093 The following arranges for best compatibility with previous
12094 versions of gcc, but removes the -mstrict-align dependency. */
12095 if (BYTES_BIG_ENDIAN)
12097 HOST_WIDE_INT size = 0;
12099 if (mode == BLKmode)
12101 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
12102 size = int_size_in_bytes (type);
12104 else
12105 size = GET_MODE_SIZE (mode);
12107 if (size == 1 || size == 2 || size == 4)
12108 return PAD_DOWNWARD;
12110 return PAD_UPWARD;
12113 if (AGGREGATES_PAD_UPWARD_ALWAYS)
12115 if (type != 0 && AGGREGATE_TYPE_P (type))
12116 return PAD_UPWARD;
12119 /* Fall back to the default. */
12120 return default_function_arg_padding (mode, type);
12123 /* If defined, a C expression that gives the alignment boundary, in bits,
12124 of an argument with the specified mode and type. If it is not defined,
12125 PARM_BOUNDARY is used for all arguments.
12127 V.4 wants long longs and doubles to be double word aligned. Just
12128 testing the mode size is a boneheaded way to do this as it means
12129 that other types such as complex int are also double word aligned.
12130 However, we're stuck with this because changing the ABI might break
12131 existing library interfaces.
12133 Doubleword align SPE vectors.
12134 Quadword align Altivec/VSX vectors.
12135 Quadword align large synthetic vector types. */
12137 static unsigned int
12138 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
12140 machine_mode elt_mode;
12141 int n_elts;
12143 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
12145 if (DEFAULT_ABI == ABI_V4
12146 && (GET_MODE_SIZE (mode) == 8
12147 || (TARGET_HARD_FLOAT
12148 && TARGET_FPRS
12149 && !is_complex_IBM_long_double (mode)
12150 && FLOAT128_2REG_P (mode))))
12151 return 64;
12152 else if (FLOAT128_VECTOR_P (mode))
12153 return 128;
12154 else if (SPE_VECTOR_MODE (mode)
12155 || (type && TREE_CODE (type) == VECTOR_TYPE
12156 && int_size_in_bytes (type) >= 8
12157 && int_size_in_bytes (type) < 16))
12158 return 64;
12159 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
12160 || (type && TREE_CODE (type) == VECTOR_TYPE
12161 && int_size_in_bytes (type) >= 16))
12162 return 128;
12164 /* Aggregate types that need > 8 byte alignment are quadword-aligned
12165 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
12166 -mcompat-align-parm is used. */
12167 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
12168 || DEFAULT_ABI == ABI_ELFv2)
12169 && type && TYPE_ALIGN (type) > 64)
12171 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
12172 or homogeneous float/vector aggregates here. We already handled
12173 vector aggregates above, but still need to check for float here. */
12174 bool aggregate_p = (AGGREGATE_TYPE_P (type)
12175 && !SCALAR_FLOAT_MODE_P (elt_mode));
12177 /* We used to check for BLKmode instead of the above aggregate type
12178 check. Warn when this results in any difference to the ABI. */
12179 if (aggregate_p != (mode == BLKmode))
12181 static bool warned;
12182 if (!warned && warn_psabi)
12184 warned = true;
12185 inform (input_location,
12186 "the ABI of passing aggregates with %d-byte alignment"
12187 " has changed in GCC 5",
12188 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
12192 if (aggregate_p)
12193 return 128;
12196 /* Similar for the Darwin64 ABI. Note that for historical reasons we
12197 implement the "aggregate type" check as a BLKmode check here; this
12198 means certain aggregate types are in fact not aligned. */
12199 if (TARGET_MACHO && rs6000_darwin64_abi
12200 && mode == BLKmode
12201 && type && TYPE_ALIGN (type) > 64)
12202 return 128;
12204 return PARM_BOUNDARY;
12207 /* The offset in words to the start of the parameter save area. */
12209 static unsigned int
12210 rs6000_parm_offset (void)
12212 return (DEFAULT_ABI == ABI_V4 ? 2
12213 : DEFAULT_ABI == ABI_ELFv2 ? 4
12214 : 6);
12217 /* For a function parm of MODE and TYPE, return the starting word in
12218 the parameter area. NWORDS of the parameter area are already used. */
12220 static unsigned int
12221 rs6000_parm_start (machine_mode mode, const_tree type,
12222 unsigned int nwords)
12224 unsigned int align;
12226 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
12227 return nwords + (-(rs6000_parm_offset () + nwords) & align);
12230 /* Compute the size (in words) of a function argument. */
12232 static unsigned long
12233 rs6000_arg_size (machine_mode mode, const_tree type)
12235 unsigned long size;
12237 if (mode != BLKmode)
12238 size = GET_MODE_SIZE (mode);
12239 else
12240 size = int_size_in_bytes (type);
12242 if (TARGET_32BIT)
12243 return (size + 3) >> 2;
12244 else
12245 return (size + 7) >> 3;
12248 /* Use this to flush pending int fields. */
12250 static void
12251 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
12252 HOST_WIDE_INT bitpos, int final)
12254 unsigned int startbit, endbit;
12255 int intregs, intoffset;
12257 /* Handle the situations where a float is taking up the first half
12258 of the GPR, and the other half is empty (typically due to
12259 alignment restrictions). We can detect this by a 8-byte-aligned
12260 int field, or by seeing that this is the final flush for this
12261 argument. Count the word and continue on. */
12262 if (cum->floats_in_gpr == 1
12263 && (cum->intoffset % 64 == 0
12264 || (cum->intoffset == -1 && final)))
12266 cum->words++;
12267 cum->floats_in_gpr = 0;
12270 if (cum->intoffset == -1)
12271 return;
12273 intoffset = cum->intoffset;
12274 cum->intoffset = -1;
12275 cum->floats_in_gpr = 0;
12277 if (intoffset % BITS_PER_WORD != 0)
12279 unsigned int bits = BITS_PER_WORD - intoffset % BITS_PER_WORD;
12280 if (!int_mode_for_size (bits, 0).exists ())
12282 /* We couldn't find an appropriate mode, which happens,
12283 e.g., in packed structs when there are 3 bytes to load.
12284 Back intoffset back to the beginning of the word in this
12285 case. */
12286 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
12290 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
12291 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
12292 intregs = (endbit - startbit) / BITS_PER_WORD;
12293 cum->words += intregs;
12294 /* words should be unsigned. */
12295 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
12297 int pad = (endbit/BITS_PER_WORD) - cum->words;
12298 cum->words += pad;
12302 /* The darwin64 ABI calls for us to recurse down through structs,
12303 looking for elements passed in registers. Unfortunately, we have
12304 to track int register count here also because of misalignments
12305 in powerpc alignment mode. */
12307 static void
12308 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
12309 const_tree type,
12310 HOST_WIDE_INT startbitpos)
12312 tree f;
12314 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
12315 if (TREE_CODE (f) == FIELD_DECL)
12317 HOST_WIDE_INT bitpos = startbitpos;
12318 tree ftype = TREE_TYPE (f);
12319 machine_mode mode;
12320 if (ftype == error_mark_node)
12321 continue;
12322 mode = TYPE_MODE (ftype);
12324 if (DECL_SIZE (f) != 0
12325 && tree_fits_uhwi_p (bit_position (f)))
12326 bitpos += int_bit_position (f);
12328 /* ??? FIXME: else assume zero offset. */
12330 if (TREE_CODE (ftype) == RECORD_TYPE)
12331 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
12332 else if (USE_FP_FOR_ARG_P (cum, mode))
12334 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
12335 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
12336 cum->fregno += n_fpregs;
12337 /* Single-precision floats present a special problem for
12338 us, because they are smaller than an 8-byte GPR, and so
12339 the structure-packing rules combined with the standard
12340 varargs behavior mean that we want to pack float/float
12341 and float/int combinations into a single register's
12342 space. This is complicated by the arg advance flushing,
12343 which works on arbitrarily large groups of int-type
12344 fields. */
12345 if (mode == SFmode)
12347 if (cum->floats_in_gpr == 1)
12349 /* Two floats in a word; count the word and reset
12350 the float count. */
12351 cum->words++;
12352 cum->floats_in_gpr = 0;
12354 else if (bitpos % 64 == 0)
12356 /* A float at the beginning of an 8-byte word;
12357 count it and put off adjusting cum->words until
12358 we see if a arg advance flush is going to do it
12359 for us. */
12360 cum->floats_in_gpr++;
12362 else
12364 /* The float is at the end of a word, preceded
12365 by integer fields, so the arg advance flush
12366 just above has already set cum->words and
12367 everything is taken care of. */
12370 else
12371 cum->words += n_fpregs;
12373 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
12375 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
12376 cum->vregno++;
12377 cum->words += 2;
12379 else if (cum->intoffset == -1)
12380 cum->intoffset = bitpos;
12384 /* Check for an item that needs to be considered specially under the darwin 64
12385 bit ABI. These are record types where the mode is BLK or the structure is
12386 8 bytes in size. */
12387 static int
12388 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
12390 return rs6000_darwin64_abi
12391 && ((mode == BLKmode
12392 && TREE_CODE (type) == RECORD_TYPE
12393 && int_size_in_bytes (type) > 0)
12394 || (type && TREE_CODE (type) == RECORD_TYPE
12395 && int_size_in_bytes (type) == 8)) ? 1 : 0;
12398 /* Update the data in CUM to advance over an argument
12399 of mode MODE and data type TYPE.
12400 (TYPE is null for libcalls where that information may not be available.)
12402 Note that for args passed by reference, function_arg will be called
12403 with MODE and TYPE set to that of the pointer to the arg, not the arg
12404 itself. */
12406 static void
12407 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
12408 const_tree type, bool named, int depth)
12410 machine_mode elt_mode;
12411 int n_elts;
12413 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
12415 /* Only tick off an argument if we're not recursing. */
12416 if (depth == 0)
12417 cum->nargs_prototype--;
12419 #ifdef HAVE_AS_GNU_ATTRIBUTE
12420 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4)
12421 && cum->escapes)
12423 if (SCALAR_FLOAT_MODE_P (mode))
12425 rs6000_passes_float = true;
12426 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
12427 && (FLOAT128_IBM_P (mode)
12428 || FLOAT128_IEEE_P (mode)
12429 || (type != NULL
12430 && TYPE_MAIN_VARIANT (type) == long_double_type_node)))
12431 rs6000_passes_long_double = true;
12433 if ((named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
12434 || (SPE_VECTOR_MODE (mode)
12435 && !cum->stdarg
12436 && cum->sysv_gregno <= GP_ARG_MAX_REG))
12437 rs6000_passes_vector = true;
12439 #endif
12441 if (TARGET_ALTIVEC_ABI
12442 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
12443 || (type && TREE_CODE (type) == VECTOR_TYPE
12444 && int_size_in_bytes (type) == 16)))
12446 bool stack = false;
12448 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
12450 cum->vregno += n_elts;
12452 if (!TARGET_ALTIVEC)
12453 error ("cannot pass argument in vector register because"
12454 " altivec instructions are disabled, use -maltivec"
12455 " to enable them");
12457 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
12458 even if it is going to be passed in a vector register.
12459 Darwin does the same for variable-argument functions. */
12460 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
12461 && TARGET_64BIT)
12462 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
12463 stack = true;
12465 else
12466 stack = true;
12468 if (stack)
12470 int align;
12472 /* Vector parameters must be 16-byte aligned. In 32-bit
12473 mode this means we need to take into account the offset
12474 to the parameter save area. In 64-bit mode, they just
12475 have to start on an even word, since the parameter save
12476 area is 16-byte aligned. */
12477 if (TARGET_32BIT)
12478 align = -(rs6000_parm_offset () + cum->words) & 3;
12479 else
12480 align = cum->words & 1;
12481 cum->words += align + rs6000_arg_size (mode, type);
12483 if (TARGET_DEBUG_ARG)
12485 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
12486 cum->words, align);
12487 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
12488 cum->nargs_prototype, cum->prototype,
12489 GET_MODE_NAME (mode));
12493 else if (TARGET_SPE_ABI && TARGET_SPE && SPE_VECTOR_MODE (mode)
12494 && !cum->stdarg
12495 && cum->sysv_gregno <= GP_ARG_MAX_REG)
12496 cum->sysv_gregno++;
12498 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
12500 int size = int_size_in_bytes (type);
12501 /* Variable sized types have size == -1 and are
12502 treated as if consisting entirely of ints.
12503 Pad to 16 byte boundary if needed. */
12504 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
12505 && (cum->words % 2) != 0)
12506 cum->words++;
12507 /* For varargs, we can just go up by the size of the struct. */
12508 if (!named)
12509 cum->words += (size + 7) / 8;
12510 else
12512 /* It is tempting to say int register count just goes up by
12513 sizeof(type)/8, but this is wrong in a case such as
12514 { int; double; int; } [powerpc alignment]. We have to
12515 grovel through the fields for these too. */
12516 cum->intoffset = 0;
12517 cum->floats_in_gpr = 0;
12518 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
12519 rs6000_darwin64_record_arg_advance_flush (cum,
12520 size * BITS_PER_UNIT, 1);
12522 if (TARGET_DEBUG_ARG)
12524 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
12525 cum->words, TYPE_ALIGN (type), size);
12526 fprintf (stderr,
12527 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
12528 cum->nargs_prototype, cum->prototype,
12529 GET_MODE_NAME (mode));
12532 else if (DEFAULT_ABI == ABI_V4)
12534 if (abi_v4_pass_in_fpr (mode))
12536 /* _Decimal128 must use an even/odd register pair. This assumes
12537 that the register number is odd when fregno is odd. */
12538 if (mode == TDmode && (cum->fregno % 2) == 1)
12539 cum->fregno++;
12541 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
12542 <= FP_ARG_V4_MAX_REG)
12543 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
12544 else
12546 cum->fregno = FP_ARG_V4_MAX_REG + 1;
12547 if (mode == DFmode || FLOAT128_IBM_P (mode)
12548 || mode == DDmode || mode == TDmode)
12549 cum->words += cum->words & 1;
12550 cum->words += rs6000_arg_size (mode, type);
12553 else
12555 int n_words = rs6000_arg_size (mode, type);
12556 int gregno = cum->sysv_gregno;
12558 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
12559 (r7,r8) or (r9,r10). As does any other 2 word item such
12560 as complex int due to a historical mistake. */
12561 if (n_words == 2)
12562 gregno += (1 - gregno) & 1;
12564 /* Multi-reg args are not split between registers and stack. */
12565 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
12567 /* Long long and SPE vectors are aligned on the stack.
12568 So are other 2 word items such as complex int due to
12569 a historical mistake. */
12570 if (n_words == 2)
12571 cum->words += cum->words & 1;
12572 cum->words += n_words;
12575 /* Note: continuing to accumulate gregno past when we've started
12576 spilling to the stack indicates the fact that we've started
12577 spilling to the stack to expand_builtin_saveregs. */
12578 cum->sysv_gregno = gregno + n_words;
12581 if (TARGET_DEBUG_ARG)
12583 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
12584 cum->words, cum->fregno);
12585 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
12586 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
12587 fprintf (stderr, "mode = %4s, named = %d\n",
12588 GET_MODE_NAME (mode), named);
12591 else
12593 int n_words = rs6000_arg_size (mode, type);
12594 int start_words = cum->words;
12595 int align_words = rs6000_parm_start (mode, type, start_words);
12597 cum->words = align_words + n_words;
12599 if (SCALAR_FLOAT_MODE_P (elt_mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
12601 /* _Decimal128 must be passed in an even/odd float register pair.
12602 This assumes that the register number is odd when fregno is
12603 odd. */
12604 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
12605 cum->fregno++;
12606 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
12609 if (TARGET_DEBUG_ARG)
12611 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
12612 cum->words, cum->fregno);
12613 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
12614 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
12615 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
12616 named, align_words - start_words, depth);
12621 static void
12622 rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
12623 const_tree type, bool named)
12625 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
12629 static rtx
12630 spe_build_register_parallel (machine_mode mode, int gregno)
12632 rtx r1, r3, r5, r7;
12634 switch (mode)
12636 case E_DFmode:
12637 r1 = gen_rtx_REG (DImode, gregno);
12638 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
12639 return gen_rtx_PARALLEL (mode, gen_rtvec (1, r1));
12641 case E_DCmode:
12642 case E_TFmode:
12643 r1 = gen_rtx_REG (DImode, gregno);
12644 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
12645 r3 = gen_rtx_REG (DImode, gregno + 2);
12646 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
12647 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r3));
12649 case E_TCmode:
12650 r1 = gen_rtx_REG (DImode, gregno);
12651 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
12652 r3 = gen_rtx_REG (DImode, gregno + 2);
12653 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
12654 r5 = gen_rtx_REG (DImode, gregno + 4);
12655 r5 = gen_rtx_EXPR_LIST (VOIDmode, r5, GEN_INT (16));
12656 r7 = gen_rtx_REG (DImode, gregno + 6);
12657 r7 = gen_rtx_EXPR_LIST (VOIDmode, r7, GEN_INT (24));
12658 return gen_rtx_PARALLEL (mode, gen_rtvec (4, r1, r3, r5, r7));
12660 default:
12661 gcc_unreachable ();
12665 /* Determine where to put a SIMD argument on the SPE. */
12666 static rtx
12667 rs6000_spe_function_arg (const CUMULATIVE_ARGS *cum, machine_mode mode,
12668 const_tree type)
12670 int gregno = cum->sysv_gregno;
12672 /* On E500 v2, double arithmetic is done on the full 64-bit GPR, but
12673 are passed and returned in a pair of GPRs for ABI compatibility. */
12674 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
12675 || mode == DCmode || mode == TCmode))
12677 int n_words = rs6000_arg_size (mode, type);
12679 /* Doubles go in an odd/even register pair (r5/r6, etc). */
12680 if (mode == DFmode)
12681 gregno += (1 - gregno) & 1;
12683 /* Multi-reg args are not split between registers and stack. */
12684 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
12685 return NULL_RTX;
12687 return spe_build_register_parallel (mode, gregno);
12689 if (cum->stdarg)
12691 int n_words = rs6000_arg_size (mode, type);
12693 /* SPE vectors are put in odd registers. */
12694 if (n_words == 2 && (gregno & 1) == 0)
12695 gregno += 1;
12697 if (gregno + n_words - 1 <= GP_ARG_MAX_REG)
12699 rtx r1, r2;
12700 machine_mode m = SImode;
12702 r1 = gen_rtx_REG (m, gregno);
12703 r1 = gen_rtx_EXPR_LIST (m, r1, const0_rtx);
12704 r2 = gen_rtx_REG (m, gregno + 1);
12705 r2 = gen_rtx_EXPR_LIST (m, r2, GEN_INT (4));
12706 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
12708 else
12709 return NULL_RTX;
12711 else
12713 if (gregno <= GP_ARG_MAX_REG)
12714 return gen_rtx_REG (mode, gregno);
12715 else
12716 return NULL_RTX;
12720 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
12721 structure between cum->intoffset and bitpos to integer registers. */
12723 static void
12724 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
12725 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
12727 machine_mode mode;
12728 unsigned int regno;
12729 unsigned int startbit, endbit;
12730 int this_regno, intregs, intoffset;
12731 rtx reg;
12733 if (cum->intoffset == -1)
12734 return;
12736 intoffset = cum->intoffset;
12737 cum->intoffset = -1;
12739 /* If this is the trailing part of a word, try to only load that
12740 much into the register. Otherwise load the whole register. Note
12741 that in the latter case we may pick up unwanted bits. It's not a
12742 problem at the moment but may wish to revisit. */
12744 if (intoffset % BITS_PER_WORD != 0)
12746 unsigned int bits = BITS_PER_WORD - intoffset % BITS_PER_WORD;
12747 if (!int_mode_for_size (bits, 0).exists (&mode))
12749 /* We couldn't find an appropriate mode, which happens,
12750 e.g., in packed structs when there are 3 bytes to load.
12751 Back intoffset back to the beginning of the word in this
12752 case. */
12753 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
12754 mode = word_mode;
12757 else
12758 mode = word_mode;
12760 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
12761 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
12762 intregs = (endbit - startbit) / BITS_PER_WORD;
12763 this_regno = cum->words + intoffset / BITS_PER_WORD;
12765 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
12766 cum->use_stack = 1;
12768 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
12769 if (intregs <= 0)
12770 return;
12772 intoffset /= BITS_PER_UNIT;
12775 regno = GP_ARG_MIN_REG + this_regno;
12776 reg = gen_rtx_REG (mode, regno);
12777 rvec[(*k)++] =
12778 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
12780 this_regno += 1;
12781 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
12782 mode = word_mode;
12783 intregs -= 1;
12785 while (intregs > 0);
12788 /* Recursive workhorse for the following. */
12790 static void
12791 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
12792 HOST_WIDE_INT startbitpos, rtx rvec[],
12793 int *k)
12795 tree f;
12797 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
12798 if (TREE_CODE (f) == FIELD_DECL)
12800 HOST_WIDE_INT bitpos = startbitpos;
12801 tree ftype = TREE_TYPE (f);
12802 machine_mode mode;
12803 if (ftype == error_mark_node)
12804 continue;
12805 mode = TYPE_MODE (ftype);
12807 if (DECL_SIZE (f) != 0
12808 && tree_fits_uhwi_p (bit_position (f)))
12809 bitpos += int_bit_position (f);
12811 /* ??? FIXME: else assume zero offset. */
12813 if (TREE_CODE (ftype) == RECORD_TYPE)
12814 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
12815 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
12817 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
12818 #if 0
12819 switch (mode)
12821 case E_SCmode: mode = SFmode; break;
12822 case E_DCmode: mode = DFmode; break;
12823 case E_TCmode: mode = TFmode; break;
12824 default: break;
12826 #endif
12827 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
12828 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
12830 gcc_assert (cum->fregno == FP_ARG_MAX_REG
12831 && (mode == TFmode || mode == TDmode));
12832 /* Long double or _Decimal128 split over regs and memory. */
12833 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
12834 cum->use_stack=1;
12836 rvec[(*k)++]
12837 = gen_rtx_EXPR_LIST (VOIDmode,
12838 gen_rtx_REG (mode, cum->fregno++),
12839 GEN_INT (bitpos / BITS_PER_UNIT));
12840 if (FLOAT128_2REG_P (mode))
12841 cum->fregno++;
12843 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
12845 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
12846 rvec[(*k)++]
12847 = gen_rtx_EXPR_LIST (VOIDmode,
12848 gen_rtx_REG (mode, cum->vregno++),
12849 GEN_INT (bitpos / BITS_PER_UNIT));
12851 else if (cum->intoffset == -1)
12852 cum->intoffset = bitpos;
12856 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
12857 the register(s) to be used for each field and subfield of a struct
12858 being passed by value, along with the offset of where the
12859 register's value may be found in the block. FP fields go in FP
12860 register, vector fields go in vector registers, and everything
12861 else goes in int registers, packed as in memory.
12863 This code is also used for function return values. RETVAL indicates
12864 whether this is the case.
12866 Much of this is taken from the SPARC V9 port, which has a similar
12867 calling convention. */
12869 static rtx
12870 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
12871 bool named, bool retval)
12873 rtx rvec[FIRST_PSEUDO_REGISTER];
12874 int k = 1, kbase = 1;
12875 HOST_WIDE_INT typesize = int_size_in_bytes (type);
12876 /* This is a copy; modifications are not visible to our caller. */
12877 CUMULATIVE_ARGS copy_cum = *orig_cum;
12878 CUMULATIVE_ARGS *cum = &copy_cum;
12880 /* Pad to 16 byte boundary if needed. */
12881 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
12882 && (cum->words % 2) != 0)
12883 cum->words++;
12885 cum->intoffset = 0;
12886 cum->use_stack = 0;
12887 cum->named = named;
12889 /* Put entries into rvec[] for individual FP and vector fields, and
12890 for the chunks of memory that go in int regs. Note we start at
12891 element 1; 0 is reserved for an indication of using memory, and
12892 may or may not be filled in below. */
12893 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
12894 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
12896 /* If any part of the struct went on the stack put all of it there.
12897 This hack is because the generic code for
12898 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
12899 parts of the struct are not at the beginning. */
12900 if (cum->use_stack)
12902 if (retval)
12903 return NULL_RTX; /* doesn't go in registers at all */
12904 kbase = 0;
12905 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12907 if (k > 1 || cum->use_stack)
12908 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
12909 else
12910 return NULL_RTX;
12913 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
12915 static rtx
12916 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
12917 int align_words)
12919 int n_units;
12920 int i, k;
12921 rtx rvec[GP_ARG_NUM_REG + 1];
12923 if (align_words >= GP_ARG_NUM_REG)
12924 return NULL_RTX;
12926 n_units = rs6000_arg_size (mode, type);
12928 /* Optimize the simple case where the arg fits in one gpr, except in
12929 the case of BLKmode due to assign_parms assuming that registers are
12930 BITS_PER_WORD wide. */
12931 if (n_units == 0
12932 || (n_units == 1 && mode != BLKmode))
12933 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12935 k = 0;
12936 if (align_words + n_units > GP_ARG_NUM_REG)
12937 /* Not all of the arg fits in gprs. Say that it goes in memory too,
12938 using a magic NULL_RTX component.
12939 This is not strictly correct. Only some of the arg belongs in
12940 memory, not all of it. However, the normal scheme using
12941 function_arg_partial_nregs can result in unusual subregs, eg.
12942 (subreg:SI (reg:DF) 4), which are not handled well. The code to
12943 store the whole arg to memory is often more efficient than code
12944 to store pieces, and we know that space is available in the right
12945 place for the whole arg. */
12946 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12948 i = 0;
12951 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
12952 rtx off = GEN_INT (i++ * 4);
12953 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12955 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
12957 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
12960 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
12961 but must also be copied into the parameter save area starting at
12962 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
12963 to the GPRs and/or memory. Return the number of elements used. */
12965 static int
12966 rs6000_psave_function_arg (machine_mode mode, const_tree type,
12967 int align_words, rtx *rvec)
12969 int k = 0;
12971 if (align_words < GP_ARG_NUM_REG)
12973 int n_words = rs6000_arg_size (mode, type);
12975 if (align_words + n_words > GP_ARG_NUM_REG
12976 || mode == BLKmode
12977 || (TARGET_32BIT && TARGET_POWERPC64))
12979 /* If this is partially on the stack, then we only
12980 include the portion actually in registers here. */
12981 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
12982 int i = 0;
12984 if (align_words + n_words > GP_ARG_NUM_REG)
12986 /* Not all of the arg fits in gprs. Say that it goes in memory
12987 too, using a magic NULL_RTX component. Also see comment in
12988 rs6000_mixed_function_arg for why the normal
12989 function_arg_partial_nregs scheme doesn't work in this case. */
12990 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12995 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
12996 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
12997 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12999 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
13001 else
13003 /* The whole arg fits in gprs. */
13004 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
13005 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
13008 else
13010 /* It's entirely in memory. */
13011 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
13014 return k;
13017 /* RVEC is a vector of K components of an argument of mode MODE.
13018 Construct the final function_arg return value from it. */
13020 static rtx
13021 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
13023 gcc_assert (k >= 1);
13025 /* Avoid returning a PARALLEL in the trivial cases. */
13026 if (k == 1)
13028 if (XEXP (rvec[0], 0) == NULL_RTX)
13029 return NULL_RTX;
13031 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
13032 return XEXP (rvec[0], 0);
13035 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
13038 /* Determine where to put an argument to a function.
13039 Value is zero to push the argument on the stack,
13040 or a hard register in which to store the argument.
13042 MODE is the argument's machine mode.
13043 TYPE is the data type of the argument (as a tree).
13044 This is null for libcalls where that information may
13045 not be available.
13046 CUM is a variable of type CUMULATIVE_ARGS which gives info about
13047 the preceding args and about the function being called. It is
13048 not modified in this routine.
13049 NAMED is nonzero if this argument is a named parameter
13050 (otherwise it is an extra parameter matching an ellipsis).
13052 On RS/6000 the first eight words of non-FP are normally in registers
13053 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
13054 Under V.4, the first 8 FP args are in registers.
13056 If this is floating-point and no prototype is specified, we use
13057 both an FP and integer register (or possibly FP reg and stack). Library
13058 functions (when CALL_LIBCALL is set) always have the proper types for args,
13059 so we can pass the FP value just in one register. emit_library_function
13060 doesn't support PARALLEL anyway.
13062 Note that for args passed by reference, function_arg will be called
13063 with MODE and TYPE set to that of the pointer to the arg, not the arg
13064 itself. */
13066 static rtx
13067 rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
13068 const_tree type, bool named)
13070 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
13071 enum rs6000_abi abi = DEFAULT_ABI;
13072 machine_mode elt_mode;
13073 int n_elts;
13075 /* Return a marker to indicate whether CR1 needs to set or clear the
13076 bit that V.4 uses to say fp args were passed in registers.
13077 Assume that we don't need the marker for software floating point,
13078 or compiler generated library calls. */
13079 if (mode == VOIDmode)
13081 if (abi == ABI_V4
13082 && (cum->call_cookie & CALL_LIBCALL) == 0
13083 && (cum->stdarg
13084 || (cum->nargs_prototype < 0
13085 && (cum->prototype || TARGET_NO_PROTOTYPE))))
13087 /* For the SPE, we need to crxor CR6 always. */
13088 if (TARGET_SPE_ABI)
13089 return GEN_INT (cum->call_cookie | CALL_V4_SET_FP_ARGS);
13090 else if (TARGET_HARD_FLOAT && TARGET_FPRS)
13091 return GEN_INT (cum->call_cookie
13092 | ((cum->fregno == FP_ARG_MIN_REG)
13093 ? CALL_V4_SET_FP_ARGS
13094 : CALL_V4_CLEAR_FP_ARGS));
13097 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
13100 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
13102 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
13104 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
13105 if (rslt != NULL_RTX)
13106 return rslt;
13107 /* Else fall through to usual handling. */
13110 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
13112 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
13113 rtx r, off;
13114 int i, k = 0;
13116 /* Do we also need to pass this argument in the parameter save area?
13117 Library support functions for IEEE 128-bit are assumed to not need the
13118 value passed both in GPRs and in vector registers. */
13119 if (TARGET_64BIT && !cum->prototype
13120 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
13122 int align_words = ROUND_UP (cum->words, 2);
13123 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
13126 /* Describe where this argument goes in the vector registers. */
13127 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
13129 r = gen_rtx_REG (elt_mode, cum->vregno + i);
13130 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
13131 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
13134 return rs6000_finish_function_arg (mode, rvec, k);
13136 else if (TARGET_ALTIVEC_ABI
13137 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
13138 || (type && TREE_CODE (type) == VECTOR_TYPE
13139 && int_size_in_bytes (type) == 16)))
13141 if (named || abi == ABI_V4)
13142 return NULL_RTX;
13143 else
13145 /* Vector parameters to varargs functions under AIX or Darwin
13146 get passed in memory and possibly also in GPRs. */
13147 int align, align_words, n_words;
13148 machine_mode part_mode;
13150 /* Vector parameters must be 16-byte aligned. In 32-bit
13151 mode this means we need to take into account the offset
13152 to the parameter save area. In 64-bit mode, they just
13153 have to start on an even word, since the parameter save
13154 area is 16-byte aligned. */
13155 if (TARGET_32BIT)
13156 align = -(rs6000_parm_offset () + cum->words) & 3;
13157 else
13158 align = cum->words & 1;
13159 align_words = cum->words + align;
13161 /* Out of registers? Memory, then. */
13162 if (align_words >= GP_ARG_NUM_REG)
13163 return NULL_RTX;
13165 if (TARGET_32BIT && TARGET_POWERPC64)
13166 return rs6000_mixed_function_arg (mode, type, align_words);
13168 /* The vector value goes in GPRs. Only the part of the
13169 value in GPRs is reported here. */
13170 part_mode = mode;
13171 n_words = rs6000_arg_size (mode, type);
13172 if (align_words + n_words > GP_ARG_NUM_REG)
13173 /* Fortunately, there are only two possibilities, the value
13174 is either wholly in GPRs or half in GPRs and half not. */
13175 part_mode = DImode;
13177 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
13180 else if (TARGET_SPE_ABI && TARGET_SPE
13181 && (SPE_VECTOR_MODE (mode)
13182 || (TARGET_E500_DOUBLE && (mode == DFmode
13183 || mode == DCmode
13184 || mode == TFmode
13185 || mode == TCmode))))
13186 return rs6000_spe_function_arg (cum, mode, type);
13188 else if (abi == ABI_V4)
13190 if (abi_v4_pass_in_fpr (mode))
13192 /* _Decimal128 must use an even/odd register pair. This assumes
13193 that the register number is odd when fregno is odd. */
13194 if (mode == TDmode && (cum->fregno % 2) == 1)
13195 cum->fregno++;
13197 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
13198 <= FP_ARG_V4_MAX_REG)
13199 return gen_rtx_REG (mode, cum->fregno);
13200 else
13201 return NULL_RTX;
13203 else
13205 int n_words = rs6000_arg_size (mode, type);
13206 int gregno = cum->sysv_gregno;
13208 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
13209 (r7,r8) or (r9,r10). As does any other 2 word item such
13210 as complex int due to a historical mistake. */
13211 if (n_words == 2)
13212 gregno += (1 - gregno) & 1;
13214 /* Multi-reg args are not split between registers and stack. */
13215 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
13216 return NULL_RTX;
13218 if (TARGET_32BIT && TARGET_POWERPC64)
13219 return rs6000_mixed_function_arg (mode, type,
13220 gregno - GP_ARG_MIN_REG);
13221 return gen_rtx_REG (mode, gregno);
13224 else
13226 int align_words = rs6000_parm_start (mode, type, cum->words);
13228 /* _Decimal128 must be passed in an even/odd float register pair.
13229 This assumes that the register number is odd when fregno is odd. */
13230 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
13231 cum->fregno++;
13233 if (USE_FP_FOR_ARG_P (cum, elt_mode))
13235 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
13236 rtx r, off;
13237 int i, k = 0;
13238 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
13239 int fpr_words;
13241 /* Do we also need to pass this argument in the parameter
13242 save area? */
13243 if (type && (cum->nargs_prototype <= 0
13244 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
13245 && TARGET_XL_COMPAT
13246 && align_words >= GP_ARG_NUM_REG)))
13247 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
13249 /* Describe where this argument goes in the fprs. */
13250 for (i = 0; i < n_elts
13251 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
13253 /* Check if the argument is split over registers and memory.
13254 This can only ever happen for long double or _Decimal128;
13255 complex types are handled via split_complex_arg. */
13256 machine_mode fmode = elt_mode;
13257 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
13259 gcc_assert (FLOAT128_2REG_P (fmode));
13260 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
13263 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
13264 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
13265 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
13268 /* If there were not enough FPRs to hold the argument, the rest
13269 usually goes into memory. However, if the current position
13270 is still within the register parameter area, a portion may
13271 actually have to go into GPRs.
13273 Note that it may happen that the portion of the argument
13274 passed in the first "half" of the first GPR was already
13275 passed in the last FPR as well.
13277 For unnamed arguments, we already set up GPRs to cover the
13278 whole argument in rs6000_psave_function_arg, so there is
13279 nothing further to do at this point. */
13280 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
13281 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
13282 && cum->nargs_prototype > 0)
13284 static bool warned;
13286 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
13287 int n_words = rs6000_arg_size (mode, type);
13289 align_words += fpr_words;
13290 n_words -= fpr_words;
13294 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
13295 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
13296 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
13298 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
13300 if (!warned && warn_psabi)
13302 warned = true;
13303 inform (input_location,
13304 "the ABI of passing homogeneous float aggregates"
13305 " has changed in GCC 5");
13309 return rs6000_finish_function_arg (mode, rvec, k);
13311 else if (align_words < GP_ARG_NUM_REG)
13313 if (TARGET_32BIT && TARGET_POWERPC64)
13314 return rs6000_mixed_function_arg (mode, type, align_words);
13316 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
13318 else
13319 return NULL_RTX;
13323 /* For an arg passed partly in registers and partly in memory, this is
13324 the number of bytes passed in registers. For args passed entirely in
13325 registers or entirely in memory, zero. When an arg is described by a
13326 PARALLEL, perhaps using more than one register type, this function
13327 returns the number of bytes used by the first element of the PARALLEL. */
13329 static int
13330 rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
13331 tree type, bool named)
13333 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
13334 bool passed_in_gprs = true;
13335 int ret = 0;
13336 int align_words;
13337 machine_mode elt_mode;
13338 int n_elts;
13340 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
13342 if (DEFAULT_ABI == ABI_V4)
13343 return 0;
13345 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
13347 /* If we are passing this arg in the fixed parameter save area (gprs or
13348 memory) as well as VRs, we do not use the partial bytes mechanism;
13349 instead, rs6000_function_arg will return a PARALLEL including a memory
13350 element as necessary. Library support functions for IEEE 128-bit are
13351 assumed to not need the value passed both in GPRs and in vector
13352 registers. */
13353 if (TARGET_64BIT && !cum->prototype
13354 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
13355 return 0;
13357 /* Otherwise, we pass in VRs only. Check for partial copies. */
13358 passed_in_gprs = false;
13359 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
13360 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
13363 /* In this complicated case we just disable the partial_nregs code. */
13364 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
13365 return 0;
13367 align_words = rs6000_parm_start (mode, type, cum->words);
13369 if (USE_FP_FOR_ARG_P (cum, elt_mode))
13371 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
13373 /* If we are passing this arg in the fixed parameter save area
13374 (gprs or memory) as well as FPRs, we do not use the partial
13375 bytes mechanism; instead, rs6000_function_arg will return a
13376 PARALLEL including a memory element as necessary. */
13377 if (type
13378 && (cum->nargs_prototype <= 0
13379 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
13380 && TARGET_XL_COMPAT
13381 && align_words >= GP_ARG_NUM_REG)))
13382 return 0;
13384 /* Otherwise, we pass in FPRs only. Check for partial copies. */
13385 passed_in_gprs = false;
13386 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
13388 /* Compute number of bytes / words passed in FPRs. If there
13389 is still space available in the register parameter area
13390 *after* that amount, a part of the argument will be passed
13391 in GPRs. In that case, the total amount passed in any
13392 registers is equal to the amount that would have been passed
13393 in GPRs if everything were passed there, so we fall back to
13394 the GPR code below to compute the appropriate value. */
13395 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
13396 * MIN (8, GET_MODE_SIZE (elt_mode)));
13397 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
13399 if (align_words + fpr_words < GP_ARG_NUM_REG)
13400 passed_in_gprs = true;
13401 else
13402 ret = fpr;
13406 if (passed_in_gprs
13407 && align_words < GP_ARG_NUM_REG
13408 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
13409 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
13411 if (ret != 0 && TARGET_DEBUG_ARG)
13412 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
13414 return ret;
13417 /* A C expression that indicates when an argument must be passed by
13418 reference. If nonzero for an argument, a copy of that argument is
13419 made in memory and a pointer to the argument is passed instead of
13420 the argument itself. The pointer is passed in whatever way is
13421 appropriate for passing a pointer to that type.
13423 Under V.4, aggregates and long double are passed by reference.
13425 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
13426 reference unless the AltiVec vector extension ABI is in force.
13428 As an extension to all ABIs, variable sized types are passed by
13429 reference. */
13431 static bool
13432 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
13433 machine_mode mode, const_tree type,
13434 bool named ATTRIBUTE_UNUSED)
13436 if (!type)
13437 return 0;
13439 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
13440 && FLOAT128_IEEE_P (TYPE_MODE (type)))
13442 if (TARGET_DEBUG_ARG)
13443 fprintf (stderr, "function_arg_pass_by_reference: V4 IEEE 128-bit\n");
13444 return 1;
13447 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
13449 if (TARGET_DEBUG_ARG)
13450 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
13451 return 1;
13454 if (int_size_in_bytes (type) < 0)
13456 if (TARGET_DEBUG_ARG)
13457 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
13458 return 1;
13461 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
13462 modes only exist for GCC vector types if -maltivec. */
13463 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
13465 if (TARGET_DEBUG_ARG)
13466 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
13467 return 1;
13470 /* Pass synthetic vectors in memory. */
13471 if (TREE_CODE (type) == VECTOR_TYPE
13472 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
13474 static bool warned_for_pass_big_vectors = false;
13475 if (TARGET_DEBUG_ARG)
13476 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
13477 if (!warned_for_pass_big_vectors)
13479 warning (OPT_Wpsabi, "GCC vector passed by reference: "
13480 "non-standard ABI extension with no compatibility guarantee");
13481 warned_for_pass_big_vectors = true;
13483 return 1;
13486 return 0;
13489 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
13490 already processes. Return true if the parameter must be passed
13491 (fully or partially) on the stack. */
13493 static bool
13494 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
13496 machine_mode mode;
13497 int unsignedp;
13498 rtx entry_parm;
13500 /* Catch errors. */
13501 if (type == NULL || type == error_mark_node)
13502 return true;
13504 /* Handle types with no storage requirement. */
13505 if (TYPE_MODE (type) == VOIDmode)
13506 return false;
13508 /* Handle complex types. */
13509 if (TREE_CODE (type) == COMPLEX_TYPE)
13510 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
13511 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
13513 /* Handle transparent aggregates. */
13514 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
13515 && TYPE_TRANSPARENT_AGGR (type))
13516 type = TREE_TYPE (first_field (type));
13518 /* See if this arg was passed by invisible reference. */
13519 if (pass_by_reference (get_cumulative_args (args_so_far),
13520 TYPE_MODE (type), type, true))
13521 type = build_pointer_type (type);
13523 /* Find mode as it is passed by the ABI. */
13524 unsignedp = TYPE_UNSIGNED (type);
13525 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
13527 /* If we must pass in stack, we need a stack. */
13528 if (rs6000_must_pass_in_stack (mode, type))
13529 return true;
13531 /* If there is no incoming register, we need a stack. */
13532 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
13533 if (entry_parm == NULL)
13534 return true;
13536 /* Likewise if we need to pass both in registers and on the stack. */
13537 if (GET_CODE (entry_parm) == PARALLEL
13538 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
13539 return true;
13541 /* Also true if we're partially in registers and partially not. */
13542 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
13543 return true;
13545 /* Update info on where next arg arrives in registers. */
13546 rs6000_function_arg_advance (args_so_far, mode, type, true);
13547 return false;
13550 /* Return true if FUN has no prototype, has a variable argument
13551 list, or passes any parameter in memory. */
13553 static bool
13554 rs6000_function_parms_need_stack (tree fun, bool incoming)
13556 tree fntype, result;
13557 CUMULATIVE_ARGS args_so_far_v;
13558 cumulative_args_t args_so_far;
13560 if (!fun)
13561 /* Must be a libcall, all of which only use reg parms. */
13562 return false;
13564 fntype = fun;
13565 if (!TYPE_P (fun))
13566 fntype = TREE_TYPE (fun);
13568 /* Varargs functions need the parameter save area. */
13569 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
13570 return true;
13572 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
13573 args_so_far = pack_cumulative_args (&args_so_far_v);
13575 /* When incoming, we will have been passed the function decl.
13576 It is necessary to use the decl to handle K&R style functions,
13577 where TYPE_ARG_TYPES may not be available. */
13578 if (incoming)
13580 gcc_assert (DECL_P (fun));
13581 result = DECL_RESULT (fun);
13583 else
13584 result = TREE_TYPE (fntype);
13586 if (result && aggregate_value_p (result, fntype))
13588 if (!TYPE_P (result))
13589 result = TREE_TYPE (result);
13590 result = build_pointer_type (result);
13591 rs6000_parm_needs_stack (args_so_far, result);
13594 if (incoming)
13596 tree parm;
13598 for (parm = DECL_ARGUMENTS (fun);
13599 parm && parm != void_list_node;
13600 parm = TREE_CHAIN (parm))
13601 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
13602 return true;
13604 else
13606 function_args_iterator args_iter;
13607 tree arg_type;
13609 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
13610 if (rs6000_parm_needs_stack (args_so_far, arg_type))
13611 return true;
13614 return false;
13617 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
13618 usually a constant depending on the ABI. However, in the ELFv2 ABI
13619 the register parameter area is optional when calling a function that
13620 has a prototype is scope, has no variable argument list, and passes
13621 all parameters in registers. */
13624 rs6000_reg_parm_stack_space (tree fun, bool incoming)
13626 int reg_parm_stack_space;
13628 switch (DEFAULT_ABI)
13630 default:
13631 reg_parm_stack_space = 0;
13632 break;
13634 case ABI_AIX:
13635 case ABI_DARWIN:
13636 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
13637 break;
13639 case ABI_ELFv2:
13640 /* ??? Recomputing this every time is a bit expensive. Is there
13641 a place to cache this information? */
13642 if (rs6000_function_parms_need_stack (fun, incoming))
13643 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
13644 else
13645 reg_parm_stack_space = 0;
13646 break;
13649 return reg_parm_stack_space;
13652 static void
13653 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
13655 int i;
13656 machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
13658 if (nregs == 0)
13659 return;
13661 for (i = 0; i < nregs; i++)
13663 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
13664 if (reload_completed)
13666 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
13667 tem = NULL_RTX;
13668 else
13669 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
13670 i * GET_MODE_SIZE (reg_mode));
13672 else
13673 tem = replace_equiv_address (tem, XEXP (tem, 0));
13675 gcc_assert (tem);
13677 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
13681 /* Perform any needed actions needed for a function that is receiving a
13682 variable number of arguments.
13684 CUM is as above.
13686 MODE and TYPE are the mode and type of the current parameter.
13688 PRETEND_SIZE is a variable that should be set to the amount of stack
13689 that must be pushed by the prolog to pretend that our caller pushed
13692 Normally, this macro will push all remaining incoming registers on the
13693 stack and set PRETEND_SIZE to the length of the registers pushed. */
13695 static void
13696 setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
13697 tree type, int *pretend_size ATTRIBUTE_UNUSED,
13698 int no_rtl)
13700 CUMULATIVE_ARGS next_cum;
13701 int reg_size = TARGET_32BIT ? 4 : 8;
13702 rtx save_area = NULL_RTX, mem;
13703 int first_reg_offset;
13704 alias_set_type set;
13706 /* Skip the last named argument. */
13707 next_cum = *get_cumulative_args (cum);
13708 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
13710 if (DEFAULT_ABI == ABI_V4)
13712 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
13714 if (! no_rtl)
13716 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
13717 HOST_WIDE_INT offset = 0;
13719 /* Try to optimize the size of the varargs save area.
13720 The ABI requires that ap.reg_save_area is doubleword
13721 aligned, but we don't need to allocate space for all
13722 the bytes, only those to which we actually will save
13723 anything. */
13724 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
13725 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
13726 if (TARGET_HARD_FLOAT && TARGET_FPRS
13727 && next_cum.fregno <= FP_ARG_V4_MAX_REG
13728 && cfun->va_list_fpr_size)
13730 if (gpr_reg_num)
13731 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
13732 * UNITS_PER_FP_WORD;
13733 if (cfun->va_list_fpr_size
13734 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
13735 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
13736 else
13737 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
13738 * UNITS_PER_FP_WORD;
13740 if (gpr_reg_num)
13742 offset = -((first_reg_offset * reg_size) & ~7);
13743 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
13745 gpr_reg_num = cfun->va_list_gpr_size;
13746 if (reg_size == 4 && (first_reg_offset & 1))
13747 gpr_reg_num++;
13749 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
13751 else if (fpr_size)
13752 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
13753 * UNITS_PER_FP_WORD
13754 - (int) (GP_ARG_NUM_REG * reg_size);
13756 if (gpr_size + fpr_size)
13758 rtx reg_save_area
13759 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
13760 gcc_assert (GET_CODE (reg_save_area) == MEM);
13761 reg_save_area = XEXP (reg_save_area, 0);
13762 if (GET_CODE (reg_save_area) == PLUS)
13764 gcc_assert (XEXP (reg_save_area, 0)
13765 == virtual_stack_vars_rtx);
13766 gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT);
13767 offset += INTVAL (XEXP (reg_save_area, 1));
13769 else
13770 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
13773 cfun->machine->varargs_save_offset = offset;
13774 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
13777 else
13779 first_reg_offset = next_cum.words;
13780 save_area = crtl->args.internal_arg_pointer;
13782 if (targetm.calls.must_pass_in_stack (mode, type))
13783 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
13786 set = get_varargs_alias_set ();
13787 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
13788 && cfun->va_list_gpr_size)
13790 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
13792 if (va_list_gpr_counter_field)
13793 /* V4 va_list_gpr_size counts number of registers needed. */
13794 n_gpr = cfun->va_list_gpr_size;
13795 else
13796 /* char * va_list instead counts number of bytes needed. */
13797 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
13799 if (nregs > n_gpr)
13800 nregs = n_gpr;
13802 mem = gen_rtx_MEM (BLKmode,
13803 plus_constant (Pmode, save_area,
13804 first_reg_offset * reg_size));
13805 MEM_NOTRAP_P (mem) = 1;
13806 set_mem_alias_set (mem, set);
13807 set_mem_align (mem, BITS_PER_WORD);
13809 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
13810 nregs);
13813 /* Save FP registers if needed. */
13814 if (DEFAULT_ABI == ABI_V4
13815 && TARGET_HARD_FLOAT && TARGET_FPRS
13816 && ! no_rtl
13817 && next_cum.fregno <= FP_ARG_V4_MAX_REG
13818 && cfun->va_list_fpr_size)
13820 int fregno = next_cum.fregno, nregs;
13821 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
13822 rtx lab = gen_label_rtx ();
13823 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
13824 * UNITS_PER_FP_WORD);
13826 emit_jump_insn
13827 (gen_rtx_SET (pc_rtx,
13828 gen_rtx_IF_THEN_ELSE (VOIDmode,
13829 gen_rtx_NE (VOIDmode, cr1,
13830 const0_rtx),
13831 gen_rtx_LABEL_REF (VOIDmode, lab),
13832 pc_rtx)));
13834 for (nregs = 0;
13835 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
13836 fregno++, off += UNITS_PER_FP_WORD, nregs++)
13838 mem = gen_rtx_MEM ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13839 ? DFmode : SFmode,
13840 plus_constant (Pmode, save_area, off));
13841 MEM_NOTRAP_P (mem) = 1;
13842 set_mem_alias_set (mem, set);
13843 set_mem_align (mem, GET_MODE_ALIGNMENT (
13844 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13845 ? DFmode : SFmode));
13846 emit_move_insn (mem, gen_rtx_REG (
13847 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13848 ? DFmode : SFmode, fregno));
13851 emit_label (lab);
13855 /* Create the va_list data type. */
13857 static tree
13858 rs6000_build_builtin_va_list (void)
13860 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
13862 /* For AIX, prefer 'char *' because that's what the system
13863 header files like. */
13864 if (DEFAULT_ABI != ABI_V4)
13865 return build_pointer_type (char_type_node);
13867 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
13868 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
13869 get_identifier ("__va_list_tag"), record);
13871 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
13872 unsigned_char_type_node);
13873 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
13874 unsigned_char_type_node);
13875 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
13876 every user file. */
13877 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13878 get_identifier ("reserved"), short_unsigned_type_node);
13879 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13880 get_identifier ("overflow_arg_area"),
13881 ptr_type_node);
13882 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13883 get_identifier ("reg_save_area"),
13884 ptr_type_node);
13886 va_list_gpr_counter_field = f_gpr;
13887 va_list_fpr_counter_field = f_fpr;
13889 DECL_FIELD_CONTEXT (f_gpr) = record;
13890 DECL_FIELD_CONTEXT (f_fpr) = record;
13891 DECL_FIELD_CONTEXT (f_res) = record;
13892 DECL_FIELD_CONTEXT (f_ovf) = record;
13893 DECL_FIELD_CONTEXT (f_sav) = record;
13895 TYPE_STUB_DECL (record) = type_decl;
13896 TYPE_NAME (record) = type_decl;
13897 TYPE_FIELDS (record) = f_gpr;
13898 DECL_CHAIN (f_gpr) = f_fpr;
13899 DECL_CHAIN (f_fpr) = f_res;
13900 DECL_CHAIN (f_res) = f_ovf;
13901 DECL_CHAIN (f_ovf) = f_sav;
13903 layout_type (record);
13905 /* The correct type is an array type of one element. */
13906 return build_array_type (record, build_index_type (size_zero_node));
13909 /* Implement va_start. */
13911 static void
13912 rs6000_va_start (tree valist, rtx nextarg)
13914 HOST_WIDE_INT words, n_gpr, n_fpr;
13915 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
13916 tree gpr, fpr, ovf, sav, t;
13918 /* Only SVR4 needs something special. */
13919 if (DEFAULT_ABI != ABI_V4)
13921 std_expand_builtin_va_start (valist, nextarg);
13922 return;
13925 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
13926 f_fpr = DECL_CHAIN (f_gpr);
13927 f_res = DECL_CHAIN (f_fpr);
13928 f_ovf = DECL_CHAIN (f_res);
13929 f_sav = DECL_CHAIN (f_ovf);
13931 valist = build_simple_mem_ref (valist);
13932 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
13933 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
13934 f_fpr, NULL_TREE);
13935 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
13936 f_ovf, NULL_TREE);
13937 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
13938 f_sav, NULL_TREE);
13940 /* Count number of gp and fp argument registers used. */
13941 words = crtl->args.info.words;
13942 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
13943 GP_ARG_NUM_REG);
13944 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
13945 FP_ARG_NUM_REG);
13947 if (TARGET_DEBUG_ARG)
13948 fprintf (stderr, "va_start: words = " HOST_WIDE_INT_PRINT_DEC", n_gpr = "
13949 HOST_WIDE_INT_PRINT_DEC", n_fpr = " HOST_WIDE_INT_PRINT_DEC"\n",
13950 words, n_gpr, n_fpr);
13952 if (cfun->va_list_gpr_size)
13954 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
13955 build_int_cst (NULL_TREE, n_gpr));
13956 TREE_SIDE_EFFECTS (t) = 1;
13957 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13960 if (cfun->va_list_fpr_size)
13962 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
13963 build_int_cst (NULL_TREE, n_fpr));
13964 TREE_SIDE_EFFECTS (t) = 1;
13965 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13967 #ifdef HAVE_AS_GNU_ATTRIBUTE
13968 if (call_ABI_of_interest (cfun->decl))
13969 rs6000_passes_float = true;
13970 #endif
13973 /* Find the overflow area. */
13974 t = make_tree (TREE_TYPE (ovf), crtl->args.internal_arg_pointer);
13975 if (words != 0)
13976 t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD);
13977 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
13978 TREE_SIDE_EFFECTS (t) = 1;
13979 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13981 /* If there were no va_arg invocations, don't set up the register
13982 save area. */
13983 if (!cfun->va_list_gpr_size
13984 && !cfun->va_list_fpr_size
13985 && n_gpr < GP_ARG_NUM_REG
13986 && n_fpr < FP_ARG_V4_MAX_REG)
13987 return;
13989 /* Find the register save area. */
13990 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
13991 if (cfun->machine->varargs_save_offset)
13992 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
13993 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
13994 TREE_SIDE_EFFECTS (t) = 1;
13995 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13998 /* Implement va_arg. */
14000 static tree
14001 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
14002 gimple_seq *post_p)
14004 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
14005 tree gpr, fpr, ovf, sav, reg, t, u;
14006 int size, rsize, n_reg, sav_ofs, sav_scale;
14007 tree lab_false, lab_over, addr;
14008 int align;
14009 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
14010 int regalign = 0;
14011 gimple *stmt;
14013 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
14015 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
14016 return build_va_arg_indirect_ref (t);
14019 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
14020 earlier version of gcc, with the property that it always applied alignment
14021 adjustments to the va-args (even for zero-sized types). The cheapest way
14022 to deal with this is to replicate the effect of the part of
14023 std_gimplify_va_arg_expr that carries out the align adjust, for the case
14024 of relevance.
14025 We don't need to check for pass-by-reference because of the test above.
14026 We can return a simplifed answer, since we know there's no offset to add. */
14028 if (((TARGET_MACHO
14029 && rs6000_darwin64_abi)
14030 || DEFAULT_ABI == ABI_ELFv2
14031 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
14032 && integer_zerop (TYPE_SIZE (type)))
14034 unsigned HOST_WIDE_INT align, boundary;
14035 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
14036 align = PARM_BOUNDARY / BITS_PER_UNIT;
14037 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
14038 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
14039 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
14040 boundary /= BITS_PER_UNIT;
14041 if (boundary > align)
14043 tree t ;
14044 /* This updates arg ptr by the amount that would be necessary
14045 to align the zero-sized (but not zero-alignment) item. */
14046 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
14047 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
14048 gimplify_and_add (t, pre_p);
14050 t = fold_convert (sizetype, valist_tmp);
14051 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
14052 fold_convert (TREE_TYPE (valist),
14053 fold_build2 (BIT_AND_EXPR, sizetype, t,
14054 size_int (-boundary))));
14055 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
14056 gimplify_and_add (t, pre_p);
14058 /* Since it is zero-sized there's no increment for the item itself. */
14059 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
14060 return build_va_arg_indirect_ref (valist_tmp);
14063 if (DEFAULT_ABI != ABI_V4)
14065 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
14067 tree elem_type = TREE_TYPE (type);
14068 machine_mode elem_mode = TYPE_MODE (elem_type);
14069 int elem_size = GET_MODE_SIZE (elem_mode);
14071 if (elem_size < UNITS_PER_WORD)
14073 tree real_part, imag_part;
14074 gimple_seq post = NULL;
14076 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
14077 &post);
14078 /* Copy the value into a temporary, lest the formal temporary
14079 be reused out from under us. */
14080 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
14081 gimple_seq_add_seq (pre_p, post);
14083 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
14084 post_p);
14086 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
14090 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
14093 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
14094 f_fpr = DECL_CHAIN (f_gpr);
14095 f_res = DECL_CHAIN (f_fpr);
14096 f_ovf = DECL_CHAIN (f_res);
14097 f_sav = DECL_CHAIN (f_ovf);
14099 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
14100 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
14101 f_fpr, NULL_TREE);
14102 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
14103 f_ovf, NULL_TREE);
14104 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
14105 f_sav, NULL_TREE);
14107 size = int_size_in_bytes (type);
14108 rsize = (size + 3) / 4;
14109 int pad = 4 * rsize - size;
14110 align = 1;
14112 machine_mode mode = TYPE_MODE (type);
14113 if (abi_v4_pass_in_fpr (mode))
14115 /* FP args go in FP registers, if present. */
14116 reg = fpr;
14117 n_reg = (size + 7) / 8;
14118 sav_ofs = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4) * 4;
14119 sav_scale = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4);
14120 if (mode != SFmode && mode != SDmode)
14121 align = 8;
14123 else
14125 /* Otherwise into GP registers. */
14126 reg = gpr;
14127 n_reg = rsize;
14128 sav_ofs = 0;
14129 sav_scale = 4;
14130 if (n_reg == 2)
14131 align = 8;
14134 /* Pull the value out of the saved registers.... */
14136 lab_over = NULL;
14137 addr = create_tmp_var (ptr_type_node, "addr");
14139 /* AltiVec vectors never go in registers when -mabi=altivec. */
14140 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
14141 align = 16;
14142 else
14144 lab_false = create_artificial_label (input_location);
14145 lab_over = create_artificial_label (input_location);
14147 /* Long long and SPE vectors are aligned in the registers.
14148 As are any other 2 gpr item such as complex int due to a
14149 historical mistake. */
14150 u = reg;
14151 if (n_reg == 2 && reg == gpr)
14153 regalign = 1;
14154 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
14155 build_int_cst (TREE_TYPE (reg), n_reg - 1));
14156 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
14157 unshare_expr (reg), u);
14159 /* _Decimal128 is passed in even/odd fpr pairs; the stored
14160 reg number is 0 for f1, so we want to make it odd. */
14161 else if (reg == fpr && mode == TDmode)
14163 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
14164 build_int_cst (TREE_TYPE (reg), 1));
14165 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
14168 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
14169 t = build2 (GE_EXPR, boolean_type_node, u, t);
14170 u = build1 (GOTO_EXPR, void_type_node, lab_false);
14171 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
14172 gimplify_and_add (t, pre_p);
14174 t = sav;
14175 if (sav_ofs)
14176 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
14178 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
14179 build_int_cst (TREE_TYPE (reg), n_reg));
14180 u = fold_convert (sizetype, u);
14181 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
14182 t = fold_build_pointer_plus (t, u);
14184 /* _Decimal32 varargs are located in the second word of the 64-bit
14185 FP register for 32-bit binaries. */
14186 if (TARGET_32BIT
14187 && TARGET_HARD_FLOAT && TARGET_FPRS
14188 && mode == SDmode)
14189 t = fold_build_pointer_plus_hwi (t, size);
14191 /* Args are passed right-aligned. */
14192 if (BYTES_BIG_ENDIAN)
14193 t = fold_build_pointer_plus_hwi (t, pad);
14195 gimplify_assign (addr, t, pre_p);
14197 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
14199 stmt = gimple_build_label (lab_false);
14200 gimple_seq_add_stmt (pre_p, stmt);
14202 if ((n_reg == 2 && !regalign) || n_reg > 2)
14204 /* Ensure that we don't find any more args in regs.
14205 Alignment has taken care of for special cases. */
14206 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
14210 /* ... otherwise out of the overflow area. */
14212 /* Care for on-stack alignment if needed. */
14213 t = ovf;
14214 if (align != 1)
14216 t = fold_build_pointer_plus_hwi (t, align - 1);
14217 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
14218 build_int_cst (TREE_TYPE (t), -align));
14221 /* Args are passed right-aligned. */
14222 if (BYTES_BIG_ENDIAN)
14223 t = fold_build_pointer_plus_hwi (t, pad);
14225 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
14227 gimplify_assign (unshare_expr (addr), t, pre_p);
14229 t = fold_build_pointer_plus_hwi (t, size);
14230 gimplify_assign (unshare_expr (ovf), t, pre_p);
14232 if (lab_over)
14234 stmt = gimple_build_label (lab_over);
14235 gimple_seq_add_stmt (pre_p, stmt);
14238 if (STRICT_ALIGNMENT
14239 && (TYPE_ALIGN (type)
14240 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
14242 /* The value (of type complex double, for example) may not be
14243 aligned in memory in the saved registers, so copy via a
14244 temporary. (This is the same code as used for SPARC.) */
14245 tree tmp = create_tmp_var (type, "va_arg_tmp");
14246 tree dest_addr = build_fold_addr_expr (tmp);
14248 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
14249 3, dest_addr, addr, size_int (rsize * 4));
14251 gimplify_and_add (copy, pre_p);
14252 addr = dest_addr;
14255 addr = fold_convert (ptrtype, addr);
14256 return build_va_arg_indirect_ref (addr);
14259 /* Builtins. */
14261 static void
14262 def_builtin (const char *name, tree type, enum rs6000_builtins code)
14264 tree t;
14265 unsigned classify = rs6000_builtin_info[(int)code].attr;
14266 const char *attr_string = "";
14268 gcc_assert (name != NULL);
14269 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
14271 if (rs6000_builtin_decls[(int)code])
14272 fatal_error (input_location,
14273 "internal error: builtin function %s already processed", name);
14275 rs6000_builtin_decls[(int)code] = t =
14276 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
14278 /* Set any special attributes. */
14279 if ((classify & RS6000_BTC_CONST) != 0)
14281 /* const function, function only depends on the inputs. */
14282 TREE_READONLY (t) = 1;
14283 TREE_NOTHROW (t) = 1;
14284 attr_string = ", const";
14286 else if ((classify & RS6000_BTC_PURE) != 0)
14288 /* pure function, function can read global memory, but does not set any
14289 external state. */
14290 DECL_PURE_P (t) = 1;
14291 TREE_NOTHROW (t) = 1;
14292 attr_string = ", pure";
14294 else if ((classify & RS6000_BTC_FP) != 0)
14296 /* Function is a math function. If rounding mode is on, then treat the
14297 function as not reading global memory, but it can have arbitrary side
14298 effects. If it is off, then assume the function is a const function.
14299 This mimics the ATTR_MATHFN_FPROUNDING attribute in
14300 builtin-attribute.def that is used for the math functions. */
14301 TREE_NOTHROW (t) = 1;
14302 if (flag_rounding_math)
14304 DECL_PURE_P (t) = 1;
14305 DECL_IS_NOVOPS (t) = 1;
14306 attr_string = ", fp, pure";
14308 else
14310 TREE_READONLY (t) = 1;
14311 attr_string = ", fp, const";
14314 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
14315 gcc_unreachable ();
14317 if (TARGET_DEBUG_BUILTIN)
14318 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
14319 (int)code, name, attr_string);
14322 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
14324 #undef RS6000_BUILTIN_0
14325 #undef RS6000_BUILTIN_1
14326 #undef RS6000_BUILTIN_2
14327 #undef RS6000_BUILTIN_3
14328 #undef RS6000_BUILTIN_A
14329 #undef RS6000_BUILTIN_D
14330 #undef RS6000_BUILTIN_E
14331 #undef RS6000_BUILTIN_H
14332 #undef RS6000_BUILTIN_P
14333 #undef RS6000_BUILTIN_Q
14334 #undef RS6000_BUILTIN_S
14335 #undef RS6000_BUILTIN_X
14337 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14338 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14339 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14340 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
14341 { MASK, ICODE, NAME, ENUM },
14343 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14344 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14345 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14346 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14347 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14348 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14349 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14350 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14352 static const struct builtin_description bdesc_3arg[] =
14354 #include "powerpcspe-builtin.def"
14357 /* DST operations: void foo (void *, const int, const char). */
14359 #undef RS6000_BUILTIN_0
14360 #undef RS6000_BUILTIN_1
14361 #undef RS6000_BUILTIN_2
14362 #undef RS6000_BUILTIN_3
14363 #undef RS6000_BUILTIN_A
14364 #undef RS6000_BUILTIN_D
14365 #undef RS6000_BUILTIN_E
14366 #undef RS6000_BUILTIN_H
14367 #undef RS6000_BUILTIN_P
14368 #undef RS6000_BUILTIN_Q
14369 #undef RS6000_BUILTIN_S
14370 #undef RS6000_BUILTIN_X
14372 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14373 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14374 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14375 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14376 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14377 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
14378 { MASK, ICODE, NAME, ENUM },
14380 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14381 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14382 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14383 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14384 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14385 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14387 static const struct builtin_description bdesc_dst[] =
14389 #include "powerpcspe-builtin.def"
14392 /* Simple binary operations: VECc = foo (VECa, VECb). */
14394 #undef RS6000_BUILTIN_0
14395 #undef RS6000_BUILTIN_1
14396 #undef RS6000_BUILTIN_2
14397 #undef RS6000_BUILTIN_3
14398 #undef RS6000_BUILTIN_A
14399 #undef RS6000_BUILTIN_D
14400 #undef RS6000_BUILTIN_E
14401 #undef RS6000_BUILTIN_H
14402 #undef RS6000_BUILTIN_P
14403 #undef RS6000_BUILTIN_Q
14404 #undef RS6000_BUILTIN_S
14405 #undef RS6000_BUILTIN_X
14407 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14408 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14409 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
14410 { MASK, ICODE, NAME, ENUM },
14412 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14413 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14414 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14415 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14416 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14417 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14418 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14419 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14420 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14422 static const struct builtin_description bdesc_2arg[] =
14424 #include "powerpcspe-builtin.def"
14427 #undef RS6000_BUILTIN_0
14428 #undef RS6000_BUILTIN_1
14429 #undef RS6000_BUILTIN_2
14430 #undef RS6000_BUILTIN_3
14431 #undef RS6000_BUILTIN_A
14432 #undef RS6000_BUILTIN_D
14433 #undef RS6000_BUILTIN_E
14434 #undef RS6000_BUILTIN_H
14435 #undef RS6000_BUILTIN_P
14436 #undef RS6000_BUILTIN_Q
14437 #undef RS6000_BUILTIN_S
14438 #undef RS6000_BUILTIN_X
14440 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14441 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14442 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14443 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14444 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14445 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14446 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14447 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14448 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
14449 { MASK, ICODE, NAME, ENUM },
14451 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14452 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14453 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14455 /* AltiVec predicates. */
14457 static const struct builtin_description bdesc_altivec_preds[] =
14459 #include "powerpcspe-builtin.def"
14462 /* SPE predicates. */
14463 #undef RS6000_BUILTIN_0
14464 #undef RS6000_BUILTIN_1
14465 #undef RS6000_BUILTIN_2
14466 #undef RS6000_BUILTIN_3
14467 #undef RS6000_BUILTIN_A
14468 #undef RS6000_BUILTIN_D
14469 #undef RS6000_BUILTIN_E
14470 #undef RS6000_BUILTIN_H
14471 #undef RS6000_BUILTIN_P
14472 #undef RS6000_BUILTIN_Q
14473 #undef RS6000_BUILTIN_S
14474 #undef RS6000_BUILTIN_X
14476 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14477 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14478 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14479 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14480 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14481 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14482 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14483 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14484 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14485 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14486 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
14487 { MASK, ICODE, NAME, ENUM },
14489 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14491 static const struct builtin_description bdesc_spe_predicates[] =
14493 #include "powerpcspe-builtin.def"
14496 /* SPE evsel predicates. */
14497 #undef RS6000_BUILTIN_0
14498 #undef RS6000_BUILTIN_1
14499 #undef RS6000_BUILTIN_2
14500 #undef RS6000_BUILTIN_3
14501 #undef RS6000_BUILTIN_A
14502 #undef RS6000_BUILTIN_D
14503 #undef RS6000_BUILTIN_E
14504 #undef RS6000_BUILTIN_H
14505 #undef RS6000_BUILTIN_P
14506 #undef RS6000_BUILTIN_Q
14507 #undef RS6000_BUILTIN_S
14508 #undef RS6000_BUILTIN_X
14510 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14511 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14512 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14513 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14514 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14515 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14516 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
14517 { MASK, ICODE, NAME, ENUM },
14519 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14520 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14521 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14522 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14523 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14525 static const struct builtin_description bdesc_spe_evsel[] =
14527 #include "powerpcspe-builtin.def"
14530 /* PAIRED predicates. */
14531 #undef RS6000_BUILTIN_0
14532 #undef RS6000_BUILTIN_1
14533 #undef RS6000_BUILTIN_2
14534 #undef RS6000_BUILTIN_3
14535 #undef RS6000_BUILTIN_A
14536 #undef RS6000_BUILTIN_D
14537 #undef RS6000_BUILTIN_E
14538 #undef RS6000_BUILTIN_H
14539 #undef RS6000_BUILTIN_P
14540 #undef RS6000_BUILTIN_Q
14541 #undef RS6000_BUILTIN_S
14542 #undef RS6000_BUILTIN_X
14544 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14545 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14546 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14547 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14548 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14549 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14550 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14551 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14552 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14553 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
14554 { MASK, ICODE, NAME, ENUM },
14556 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14557 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14559 static const struct builtin_description bdesc_paired_preds[] =
14561 #include "powerpcspe-builtin.def"
14564 /* ABS* operations. */
14566 #undef RS6000_BUILTIN_0
14567 #undef RS6000_BUILTIN_1
14568 #undef RS6000_BUILTIN_2
14569 #undef RS6000_BUILTIN_3
14570 #undef RS6000_BUILTIN_A
14571 #undef RS6000_BUILTIN_D
14572 #undef RS6000_BUILTIN_E
14573 #undef RS6000_BUILTIN_H
14574 #undef RS6000_BUILTIN_P
14575 #undef RS6000_BUILTIN_Q
14576 #undef RS6000_BUILTIN_S
14577 #undef RS6000_BUILTIN_X
14579 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14580 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14581 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14582 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14583 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
14584 { MASK, ICODE, NAME, ENUM },
14586 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14587 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14588 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14589 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14590 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14591 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14592 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14594 static const struct builtin_description bdesc_abs[] =
14596 #include "powerpcspe-builtin.def"
14599 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
14600 foo (VECa). */
14602 #undef RS6000_BUILTIN_0
14603 #undef RS6000_BUILTIN_1
14604 #undef RS6000_BUILTIN_2
14605 #undef RS6000_BUILTIN_3
14606 #undef RS6000_BUILTIN_A
14607 #undef RS6000_BUILTIN_D
14608 #undef RS6000_BUILTIN_E
14609 #undef RS6000_BUILTIN_H
14610 #undef RS6000_BUILTIN_P
14611 #undef RS6000_BUILTIN_Q
14612 #undef RS6000_BUILTIN_S
14613 #undef RS6000_BUILTIN_X
14615 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14616 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
14617 { MASK, ICODE, NAME, ENUM },
14619 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14620 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14621 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14622 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14623 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14624 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14625 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14626 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14627 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14628 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14630 static const struct builtin_description bdesc_1arg[] =
14632 #include "powerpcspe-builtin.def"
14635 /* Simple no-argument operations: result = __builtin_darn_32 () */
14637 #undef RS6000_BUILTIN_0
14638 #undef RS6000_BUILTIN_1
14639 #undef RS6000_BUILTIN_2
14640 #undef RS6000_BUILTIN_3
14641 #undef RS6000_BUILTIN_A
14642 #undef RS6000_BUILTIN_D
14643 #undef RS6000_BUILTIN_E
14644 #undef RS6000_BUILTIN_H
14645 #undef RS6000_BUILTIN_P
14646 #undef RS6000_BUILTIN_Q
14647 #undef RS6000_BUILTIN_S
14648 #undef RS6000_BUILTIN_X
14650 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
14651 { MASK, ICODE, NAME, ENUM },
14653 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14654 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14655 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14656 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14657 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14658 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14659 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14660 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14661 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14662 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14663 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14665 static const struct builtin_description bdesc_0arg[] =
14667 #include "powerpcspe-builtin.def"
14670 /* HTM builtins. */
14671 #undef RS6000_BUILTIN_0
14672 #undef RS6000_BUILTIN_1
14673 #undef RS6000_BUILTIN_2
14674 #undef RS6000_BUILTIN_3
14675 #undef RS6000_BUILTIN_A
14676 #undef RS6000_BUILTIN_D
14677 #undef RS6000_BUILTIN_E
14678 #undef RS6000_BUILTIN_H
14679 #undef RS6000_BUILTIN_P
14680 #undef RS6000_BUILTIN_Q
14681 #undef RS6000_BUILTIN_S
14682 #undef RS6000_BUILTIN_X
14684 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14685 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14686 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14687 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14688 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14689 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14690 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14691 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
14692 { MASK, ICODE, NAME, ENUM },
14694 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14695 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14696 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14697 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14699 static const struct builtin_description bdesc_htm[] =
14701 #include "powerpcspe-builtin.def"
14704 #undef RS6000_BUILTIN_0
14705 #undef RS6000_BUILTIN_1
14706 #undef RS6000_BUILTIN_2
14707 #undef RS6000_BUILTIN_3
14708 #undef RS6000_BUILTIN_A
14709 #undef RS6000_BUILTIN_D
14710 #undef RS6000_BUILTIN_E
14711 #undef RS6000_BUILTIN_H
14712 #undef RS6000_BUILTIN_P
14713 #undef RS6000_BUILTIN_Q
14714 #undef RS6000_BUILTIN_S
14716 /* Return true if a builtin function is overloaded. */
14717 bool
14718 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
14720 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
14723 const char *
14724 rs6000_overloaded_builtin_name (enum rs6000_builtins fncode)
14726 return rs6000_builtin_info[(int)fncode].name;
14729 /* Expand an expression EXP that calls a builtin without arguments. */
14730 static rtx
14731 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
14733 rtx pat;
14734 machine_mode tmode = insn_data[icode].operand[0].mode;
14736 if (icode == CODE_FOR_nothing)
14737 /* Builtin not supported on this processor. */
14738 return 0;
14740 if (target == 0
14741 || GET_MODE (target) != tmode
14742 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14743 target = gen_reg_rtx (tmode);
14745 pat = GEN_FCN (icode) (target);
14746 if (! pat)
14747 return 0;
14748 emit_insn (pat);
14750 return target;
14754 static rtx
14755 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
14757 rtx pat;
14758 tree arg0 = CALL_EXPR_ARG (exp, 0);
14759 tree arg1 = CALL_EXPR_ARG (exp, 1);
14760 rtx op0 = expand_normal (arg0);
14761 rtx op1 = expand_normal (arg1);
14762 machine_mode mode0 = insn_data[icode].operand[0].mode;
14763 machine_mode mode1 = insn_data[icode].operand[1].mode;
14765 if (icode == CODE_FOR_nothing)
14766 /* Builtin not supported on this processor. */
14767 return 0;
14769 /* If we got invalid arguments bail out before generating bad rtl. */
14770 if (arg0 == error_mark_node || arg1 == error_mark_node)
14771 return const0_rtx;
14773 if (GET_CODE (op0) != CONST_INT
14774 || INTVAL (op0) > 255
14775 || INTVAL (op0) < 0)
14777 error ("argument 1 must be an 8-bit field value");
14778 return const0_rtx;
14781 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14782 op0 = copy_to_mode_reg (mode0, op0);
14784 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14785 op1 = copy_to_mode_reg (mode1, op1);
14787 pat = GEN_FCN (icode) (op0, op1);
14788 if (! pat)
14789 return const0_rtx;
14790 emit_insn (pat);
14792 return NULL_RTX;
14795 static rtx
14796 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
14798 rtx pat;
14799 tree arg0 = CALL_EXPR_ARG (exp, 0);
14800 rtx op0 = expand_normal (arg0);
14801 machine_mode tmode = insn_data[icode].operand[0].mode;
14802 machine_mode mode0 = insn_data[icode].operand[1].mode;
14804 if (icode == CODE_FOR_nothing)
14805 /* Builtin not supported on this processor. */
14806 return 0;
14808 /* If we got invalid arguments bail out before generating bad rtl. */
14809 if (arg0 == error_mark_node)
14810 return const0_rtx;
14812 if (icode == CODE_FOR_altivec_vspltisb
14813 || icode == CODE_FOR_altivec_vspltish
14814 || icode == CODE_FOR_altivec_vspltisw
14815 || icode == CODE_FOR_spe_evsplatfi
14816 || icode == CODE_FOR_spe_evsplati)
14818 /* Only allow 5-bit *signed* literals. */
14819 if (GET_CODE (op0) != CONST_INT
14820 || INTVAL (op0) > 15
14821 || INTVAL (op0) < -16)
14823 error ("argument 1 must be a 5-bit signed literal");
14824 return CONST0_RTX (tmode);
14828 if (target == 0
14829 || GET_MODE (target) != tmode
14830 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14831 target = gen_reg_rtx (tmode);
14833 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14834 op0 = copy_to_mode_reg (mode0, op0);
14836 pat = GEN_FCN (icode) (target, op0);
14837 if (! pat)
14838 return 0;
14839 emit_insn (pat);
14841 return target;
14844 static rtx
14845 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
14847 rtx pat, scratch1, scratch2;
14848 tree arg0 = CALL_EXPR_ARG (exp, 0);
14849 rtx op0 = expand_normal (arg0);
14850 machine_mode tmode = insn_data[icode].operand[0].mode;
14851 machine_mode mode0 = insn_data[icode].operand[1].mode;
14853 /* If we have invalid arguments, bail out before generating bad rtl. */
14854 if (arg0 == error_mark_node)
14855 return const0_rtx;
14857 if (target == 0
14858 || GET_MODE (target) != tmode
14859 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14860 target = gen_reg_rtx (tmode);
14862 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14863 op0 = copy_to_mode_reg (mode0, op0);
14865 scratch1 = gen_reg_rtx (mode0);
14866 scratch2 = gen_reg_rtx (mode0);
14868 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
14869 if (! pat)
14870 return 0;
14871 emit_insn (pat);
14873 return target;
14876 static rtx
14877 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
14879 rtx pat;
14880 tree arg0 = CALL_EXPR_ARG (exp, 0);
14881 tree arg1 = CALL_EXPR_ARG (exp, 1);
14882 rtx op0 = expand_normal (arg0);
14883 rtx op1 = expand_normal (arg1);
14884 machine_mode tmode = insn_data[icode].operand[0].mode;
14885 machine_mode mode0 = insn_data[icode].operand[1].mode;
14886 machine_mode mode1 = insn_data[icode].operand[2].mode;
14888 if (icode == CODE_FOR_nothing)
14889 /* Builtin not supported on this processor. */
14890 return 0;
14892 /* If we got invalid arguments bail out before generating bad rtl. */
14893 if (arg0 == error_mark_node || arg1 == error_mark_node)
14894 return const0_rtx;
14896 if (icode == CODE_FOR_altivec_vcfux
14897 || icode == CODE_FOR_altivec_vcfsx
14898 || icode == CODE_FOR_altivec_vctsxs
14899 || icode == CODE_FOR_altivec_vctuxs
14900 || icode == CODE_FOR_altivec_vspltb
14901 || icode == CODE_FOR_altivec_vsplth
14902 || icode == CODE_FOR_altivec_vspltw
14903 || icode == CODE_FOR_spe_evaddiw
14904 || icode == CODE_FOR_spe_evldd
14905 || icode == CODE_FOR_spe_evldh
14906 || icode == CODE_FOR_spe_evldw
14907 || icode == CODE_FOR_spe_evlhhesplat
14908 || icode == CODE_FOR_spe_evlhhossplat
14909 || icode == CODE_FOR_spe_evlhhousplat
14910 || icode == CODE_FOR_spe_evlwhe
14911 || icode == CODE_FOR_spe_evlwhos
14912 || icode == CODE_FOR_spe_evlwhou
14913 || icode == CODE_FOR_spe_evlwhsplat
14914 || icode == CODE_FOR_spe_evlwwsplat
14915 || icode == CODE_FOR_spe_evrlwi
14916 || icode == CODE_FOR_spe_evslwi
14917 || icode == CODE_FOR_spe_evsrwis
14918 || icode == CODE_FOR_spe_evsubifw
14919 || icode == CODE_FOR_spe_evsrwiu)
14921 /* Only allow 5-bit unsigned literals. */
14922 STRIP_NOPS (arg1);
14923 if (TREE_CODE (arg1) != INTEGER_CST
14924 || TREE_INT_CST_LOW (arg1) & ~0x1f)
14926 error ("argument 2 must be a 5-bit unsigned literal");
14927 return CONST0_RTX (tmode);
14930 else if (icode == CODE_FOR_dfptstsfi_eq_dd
14931 || icode == CODE_FOR_dfptstsfi_lt_dd
14932 || icode == CODE_FOR_dfptstsfi_gt_dd
14933 || icode == CODE_FOR_dfptstsfi_unordered_dd
14934 || icode == CODE_FOR_dfptstsfi_eq_td
14935 || icode == CODE_FOR_dfptstsfi_lt_td
14936 || icode == CODE_FOR_dfptstsfi_gt_td
14937 || icode == CODE_FOR_dfptstsfi_unordered_td)
14939 /* Only allow 6-bit unsigned literals. */
14940 STRIP_NOPS (arg0);
14941 if (TREE_CODE (arg0) != INTEGER_CST
14942 || !IN_RANGE (TREE_INT_CST_LOW (arg0), 0, 63))
14944 error ("argument 1 must be a 6-bit unsigned literal");
14945 return CONST0_RTX (tmode);
14948 else if (icode == CODE_FOR_xststdcdp
14949 || icode == CODE_FOR_xststdcsp
14950 || icode == CODE_FOR_xvtstdcdp
14951 || icode == CODE_FOR_xvtstdcsp)
14953 /* Only allow 7-bit unsigned literals. */
14954 STRIP_NOPS (arg1);
14955 if (TREE_CODE (arg1) != INTEGER_CST
14956 || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 127))
14958 error ("argument 2 must be a 7-bit unsigned literal");
14959 return CONST0_RTX (tmode);
14963 if (target == 0
14964 || GET_MODE (target) != tmode
14965 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14966 target = gen_reg_rtx (tmode);
14968 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14969 op0 = copy_to_mode_reg (mode0, op0);
14970 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14971 op1 = copy_to_mode_reg (mode1, op1);
14973 pat = GEN_FCN (icode) (target, op0, op1);
14974 if (! pat)
14975 return 0;
14976 emit_insn (pat);
14978 return target;
14981 static rtx
14982 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
14984 rtx pat, scratch;
14985 tree cr6_form = CALL_EXPR_ARG (exp, 0);
14986 tree arg0 = CALL_EXPR_ARG (exp, 1);
14987 tree arg1 = CALL_EXPR_ARG (exp, 2);
14988 rtx op0 = expand_normal (arg0);
14989 rtx op1 = expand_normal (arg1);
14990 machine_mode tmode = SImode;
14991 machine_mode mode0 = insn_data[icode].operand[1].mode;
14992 machine_mode mode1 = insn_data[icode].operand[2].mode;
14993 int cr6_form_int;
14995 if (TREE_CODE (cr6_form) != INTEGER_CST)
14997 error ("argument 1 of __builtin_altivec_predicate must be a constant");
14998 return const0_rtx;
15000 else
15001 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
15003 gcc_assert (mode0 == mode1);
15005 /* If we have invalid arguments, bail out before generating bad rtl. */
15006 if (arg0 == error_mark_node || arg1 == error_mark_node)
15007 return const0_rtx;
15009 if (target == 0
15010 || GET_MODE (target) != tmode
15011 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15012 target = gen_reg_rtx (tmode);
15014 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15015 op0 = copy_to_mode_reg (mode0, op0);
15016 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15017 op1 = copy_to_mode_reg (mode1, op1);
15019 /* Note that for many of the relevant operations (e.g. cmpne or
15020 cmpeq) with float or double operands, it makes more sense for the
15021 mode of the allocated scratch register to select a vector of
15022 integer. But the choice to copy the mode of operand 0 was made
15023 long ago and there are no plans to change it. */
15024 scratch = gen_reg_rtx (mode0);
15026 pat = GEN_FCN (icode) (scratch, op0, op1);
15027 if (! pat)
15028 return 0;
15029 emit_insn (pat);
15031 /* The vec_any* and vec_all* predicates use the same opcodes for two
15032 different operations, but the bits in CR6 will be different
15033 depending on what information we want. So we have to play tricks
15034 with CR6 to get the right bits out.
15036 If you think this is disgusting, look at the specs for the
15037 AltiVec predicates. */
15039 switch (cr6_form_int)
15041 case 0:
15042 emit_insn (gen_cr6_test_for_zero (target));
15043 break;
15044 case 1:
15045 emit_insn (gen_cr6_test_for_zero_reverse (target));
15046 break;
15047 case 2:
15048 emit_insn (gen_cr6_test_for_lt (target));
15049 break;
15050 case 3:
15051 emit_insn (gen_cr6_test_for_lt_reverse (target));
15052 break;
15053 default:
15054 error ("argument 1 of __builtin_altivec_predicate is out of range");
15055 break;
15058 return target;
15061 static rtx
15062 paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
15064 rtx pat, addr;
15065 tree arg0 = CALL_EXPR_ARG (exp, 0);
15066 tree arg1 = CALL_EXPR_ARG (exp, 1);
15067 machine_mode tmode = insn_data[icode].operand[0].mode;
15068 machine_mode mode0 = Pmode;
15069 machine_mode mode1 = Pmode;
15070 rtx op0 = expand_normal (arg0);
15071 rtx op1 = expand_normal (arg1);
15073 if (icode == CODE_FOR_nothing)
15074 /* Builtin not supported on this processor. */
15075 return 0;
15077 /* If we got invalid arguments bail out before generating bad rtl. */
15078 if (arg0 == error_mark_node || arg1 == error_mark_node)
15079 return const0_rtx;
15081 if (target == 0
15082 || GET_MODE (target) != tmode
15083 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15084 target = gen_reg_rtx (tmode);
15086 op1 = copy_to_mode_reg (mode1, op1);
15088 if (op0 == const0_rtx)
15090 addr = gen_rtx_MEM (tmode, op1);
15092 else
15094 op0 = copy_to_mode_reg (mode0, op0);
15095 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op0, op1));
15098 pat = GEN_FCN (icode) (target, addr);
15100 if (! pat)
15101 return 0;
15102 emit_insn (pat);
15104 return target;
15107 /* Return a constant vector for use as a little-endian permute control vector
15108 to reverse the order of elements of the given vector mode. */
15109 static rtx
15110 swap_selector_for_mode (machine_mode mode)
15112 /* These are little endian vectors, so their elements are reversed
15113 from what you would normally expect for a permute control vector. */
15114 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
15115 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
15116 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
15117 unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
15118 unsigned int *swaparray, i;
15119 rtx perm[16];
15121 switch (mode)
15123 case E_V2DFmode:
15124 case E_V2DImode:
15125 swaparray = swap2;
15126 break;
15127 case E_V4SFmode:
15128 case E_V4SImode:
15129 swaparray = swap4;
15130 break;
15131 case E_V8HImode:
15132 swaparray = swap8;
15133 break;
15134 case E_V16QImode:
15135 swaparray = swap16;
15136 break;
15137 default:
15138 gcc_unreachable ();
15141 for (i = 0; i < 16; ++i)
15142 perm[i] = GEN_INT (swaparray[i]);
15144 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)));
15147 /* Generate code for an "lvxl", or "lve*x" built-in for a little endian target
15148 with -maltivec=be specified. Issue the load followed by an element-
15149 reversing permute. */
15150 void
15151 altivec_expand_lvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
15153 rtx tmp = gen_reg_rtx (mode);
15154 rtx load = gen_rtx_SET (tmp, op1);
15155 rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
15156 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx));
15157 rtx sel = swap_selector_for_mode (mode);
15158 rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM);
15160 gcc_assert (REG_P (op0));
15161 emit_insn (par);
15162 emit_insn (gen_rtx_SET (op0, vperm));
15165 /* Generate code for a "stvxl" built-in for a little endian target with
15166 -maltivec=be specified. Issue the store preceded by an element-reversing
15167 permute. */
15168 void
15169 altivec_expand_stvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
15171 rtx tmp = gen_reg_rtx (mode);
15172 rtx store = gen_rtx_SET (op0, tmp);
15173 rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
15174 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx));
15175 rtx sel = swap_selector_for_mode (mode);
15176 rtx vperm;
15178 gcc_assert (REG_P (op1));
15179 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
15180 emit_insn (gen_rtx_SET (tmp, vperm));
15181 emit_insn (par);
15184 /* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
15185 specified. Issue the store preceded by an element-reversing permute. */
15186 void
15187 altivec_expand_stvex_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
15189 machine_mode inner_mode = GET_MODE_INNER (mode);
15190 rtx tmp = gen_reg_rtx (mode);
15191 rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec);
15192 rtx sel = swap_selector_for_mode (mode);
15193 rtx vperm;
15195 gcc_assert (REG_P (op1));
15196 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
15197 emit_insn (gen_rtx_SET (tmp, vperm));
15198 emit_insn (gen_rtx_SET (op0, stvx));
15201 static rtx
15202 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
15204 rtx pat, addr;
15205 tree arg0 = CALL_EXPR_ARG (exp, 0);
15206 tree arg1 = CALL_EXPR_ARG (exp, 1);
15207 machine_mode tmode = insn_data[icode].operand[0].mode;
15208 machine_mode mode0 = Pmode;
15209 machine_mode mode1 = Pmode;
15210 rtx op0 = expand_normal (arg0);
15211 rtx op1 = expand_normal (arg1);
15213 if (icode == CODE_FOR_nothing)
15214 /* Builtin not supported on this processor. */
15215 return 0;
15217 /* If we got invalid arguments bail out before generating bad rtl. */
15218 if (arg0 == error_mark_node || arg1 == error_mark_node)
15219 return const0_rtx;
15221 if (target == 0
15222 || GET_MODE (target) != tmode
15223 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15224 target = gen_reg_rtx (tmode);
15226 op1 = copy_to_mode_reg (mode1, op1);
15228 /* For LVX, express the RTL accurately by ANDing the address with -16.
15229 LVXL and LVE*X expand to use UNSPECs to hide their special behavior,
15230 so the raw address is fine. */
15231 if (icode == CODE_FOR_altivec_lvx_v2df_2op
15232 || icode == CODE_FOR_altivec_lvx_v2di_2op
15233 || icode == CODE_FOR_altivec_lvx_v4sf_2op
15234 || icode == CODE_FOR_altivec_lvx_v4si_2op
15235 || icode == CODE_FOR_altivec_lvx_v8hi_2op
15236 || icode == CODE_FOR_altivec_lvx_v16qi_2op)
15238 rtx rawaddr;
15239 if (op0 == const0_rtx)
15240 rawaddr = op1;
15241 else
15243 op0 = copy_to_mode_reg (mode0, op0);
15244 rawaddr = gen_rtx_PLUS (Pmode, op1, op0);
15246 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
15247 addr = gen_rtx_MEM (blk ? BLKmode : tmode, addr);
15249 /* For -maltivec=be, emit the load and follow it up with a
15250 permute to swap the elements. */
15251 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
15253 rtx temp = gen_reg_rtx (tmode);
15254 emit_insn (gen_rtx_SET (temp, addr));
15256 rtx sel = swap_selector_for_mode (tmode);
15257 rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, temp, temp, sel),
15258 UNSPEC_VPERM);
15259 emit_insn (gen_rtx_SET (target, vperm));
15261 else
15262 emit_insn (gen_rtx_SET (target, addr));
15264 else
15266 if (op0 == const0_rtx)
15267 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
15268 else
15270 op0 = copy_to_mode_reg (mode0, op0);
15271 addr = gen_rtx_MEM (blk ? BLKmode : tmode,
15272 gen_rtx_PLUS (Pmode, op1, op0));
15275 pat = GEN_FCN (icode) (target, addr);
15276 if (! pat)
15277 return 0;
15278 emit_insn (pat);
15281 return target;
15284 static rtx
15285 spe_expand_stv_builtin (enum insn_code icode, tree exp)
15287 tree arg0 = CALL_EXPR_ARG (exp, 0);
15288 tree arg1 = CALL_EXPR_ARG (exp, 1);
15289 tree arg2 = CALL_EXPR_ARG (exp, 2);
15290 rtx op0 = expand_normal (arg0);
15291 rtx op1 = expand_normal (arg1);
15292 rtx op2 = expand_normal (arg2);
15293 rtx pat;
15294 machine_mode mode0 = insn_data[icode].operand[0].mode;
15295 machine_mode mode1 = insn_data[icode].operand[1].mode;
15296 machine_mode mode2 = insn_data[icode].operand[2].mode;
15298 /* Invalid arguments. Bail before doing anything stoopid! */
15299 if (arg0 == error_mark_node
15300 || arg1 == error_mark_node
15301 || arg2 == error_mark_node)
15302 return const0_rtx;
15304 if (! (*insn_data[icode].operand[2].predicate) (op0, mode2))
15305 op0 = copy_to_mode_reg (mode2, op0);
15306 if (! (*insn_data[icode].operand[0].predicate) (op1, mode0))
15307 op1 = copy_to_mode_reg (mode0, op1);
15308 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
15309 op2 = copy_to_mode_reg (mode1, op2);
15311 pat = GEN_FCN (icode) (op1, op2, op0);
15312 if (pat)
15313 emit_insn (pat);
15314 return NULL_RTX;
15317 static rtx
15318 paired_expand_stv_builtin (enum insn_code icode, tree exp)
15320 tree arg0 = CALL_EXPR_ARG (exp, 0);
15321 tree arg1 = CALL_EXPR_ARG (exp, 1);
15322 tree arg2 = CALL_EXPR_ARG (exp, 2);
15323 rtx op0 = expand_normal (arg0);
15324 rtx op1 = expand_normal (arg1);
15325 rtx op2 = expand_normal (arg2);
15326 rtx pat, addr;
15327 machine_mode tmode = insn_data[icode].operand[0].mode;
15328 machine_mode mode1 = Pmode;
15329 machine_mode mode2 = Pmode;
15331 /* Invalid arguments. Bail before doing anything stoopid! */
15332 if (arg0 == error_mark_node
15333 || arg1 == error_mark_node
15334 || arg2 == error_mark_node)
15335 return const0_rtx;
15337 if (! (*insn_data[icode].operand[1].predicate) (op0, tmode))
15338 op0 = copy_to_mode_reg (tmode, op0);
15340 op2 = copy_to_mode_reg (mode2, op2);
15342 if (op1 == const0_rtx)
15344 addr = gen_rtx_MEM (tmode, op2);
15346 else
15348 op1 = copy_to_mode_reg (mode1, op1);
15349 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
15352 pat = GEN_FCN (icode) (addr, op0);
15353 if (pat)
15354 emit_insn (pat);
15355 return NULL_RTX;
15358 static rtx
15359 altivec_expand_stxvl_builtin (enum insn_code icode, tree exp)
15361 rtx pat;
15362 tree arg0 = CALL_EXPR_ARG (exp, 0);
15363 tree arg1 = CALL_EXPR_ARG (exp, 1);
15364 tree arg2 = CALL_EXPR_ARG (exp, 2);
15365 rtx op0 = expand_normal (arg0);
15366 rtx op1 = expand_normal (arg1);
15367 rtx op2 = expand_normal (arg2);
15368 machine_mode mode0 = insn_data[icode].operand[0].mode;
15369 machine_mode mode1 = insn_data[icode].operand[1].mode;
15370 machine_mode mode2 = insn_data[icode].operand[2].mode;
15372 if (icode == CODE_FOR_nothing)
15373 /* Builtin not supported on this processor. */
15374 return NULL_RTX;
15376 /* If we got invalid arguments bail out before generating bad rtl. */
15377 if (arg0 == error_mark_node
15378 || arg1 == error_mark_node
15379 || arg2 == error_mark_node)
15380 return NULL_RTX;
15382 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15383 op0 = copy_to_mode_reg (mode0, op0);
15384 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15385 op1 = copy_to_mode_reg (mode1, op1);
15386 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15387 op2 = copy_to_mode_reg (mode2, op2);
15389 pat = GEN_FCN (icode) (op0, op1, op2);
15390 if (pat)
15391 emit_insn (pat);
15393 return NULL_RTX;
15396 static rtx
15397 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
15399 tree arg0 = CALL_EXPR_ARG (exp, 0);
15400 tree arg1 = CALL_EXPR_ARG (exp, 1);
15401 tree arg2 = CALL_EXPR_ARG (exp, 2);
15402 rtx op0 = expand_normal (arg0);
15403 rtx op1 = expand_normal (arg1);
15404 rtx op2 = expand_normal (arg2);
15405 rtx pat, addr, rawaddr;
15406 machine_mode tmode = insn_data[icode].operand[0].mode;
15407 machine_mode smode = insn_data[icode].operand[1].mode;
15408 machine_mode mode1 = Pmode;
15409 machine_mode mode2 = Pmode;
15411 /* Invalid arguments. Bail before doing anything stoopid! */
15412 if (arg0 == error_mark_node
15413 || arg1 == error_mark_node
15414 || arg2 == error_mark_node)
15415 return const0_rtx;
15417 op2 = copy_to_mode_reg (mode2, op2);
15419 /* For STVX, express the RTL accurately by ANDing the address with -16.
15420 STVXL and STVE*X expand to use UNSPECs to hide their special behavior,
15421 so the raw address is fine. */
15422 if (icode == CODE_FOR_altivec_stvx_v2df_2op
15423 || icode == CODE_FOR_altivec_stvx_v2di_2op
15424 || icode == CODE_FOR_altivec_stvx_v4sf_2op
15425 || icode == CODE_FOR_altivec_stvx_v4si_2op
15426 || icode == CODE_FOR_altivec_stvx_v8hi_2op
15427 || icode == CODE_FOR_altivec_stvx_v16qi_2op)
15429 if (op1 == const0_rtx)
15430 rawaddr = op2;
15431 else
15433 op1 = copy_to_mode_reg (mode1, op1);
15434 rawaddr = gen_rtx_PLUS (Pmode, op2, op1);
15437 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
15438 addr = gen_rtx_MEM (tmode, addr);
15440 op0 = copy_to_mode_reg (tmode, op0);
15442 /* For -maltivec=be, emit a permute to swap the elements, followed
15443 by the store. */
15444 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
15446 rtx temp = gen_reg_rtx (tmode);
15447 rtx sel = swap_selector_for_mode (tmode);
15448 rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, op0, op0, sel),
15449 UNSPEC_VPERM);
15450 emit_insn (gen_rtx_SET (temp, vperm));
15451 emit_insn (gen_rtx_SET (addr, temp));
15453 else
15454 emit_insn (gen_rtx_SET (addr, op0));
15456 else
15458 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
15459 op0 = copy_to_mode_reg (smode, op0);
15461 if (op1 == const0_rtx)
15462 addr = gen_rtx_MEM (tmode, op2);
15463 else
15465 op1 = copy_to_mode_reg (mode1, op1);
15466 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op2, op1));
15469 pat = GEN_FCN (icode) (addr, op0);
15470 if (pat)
15471 emit_insn (pat);
15474 return NULL_RTX;
15477 /* Return the appropriate SPR number associated with the given builtin. */
15478 static inline HOST_WIDE_INT
15479 htm_spr_num (enum rs6000_builtins code)
15481 if (code == HTM_BUILTIN_GET_TFHAR
15482 || code == HTM_BUILTIN_SET_TFHAR)
15483 return TFHAR_SPR;
15484 else if (code == HTM_BUILTIN_GET_TFIAR
15485 || code == HTM_BUILTIN_SET_TFIAR)
15486 return TFIAR_SPR;
15487 else if (code == HTM_BUILTIN_GET_TEXASR
15488 || code == HTM_BUILTIN_SET_TEXASR)
15489 return TEXASR_SPR;
15490 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
15491 || code == HTM_BUILTIN_SET_TEXASRU);
15492 return TEXASRU_SPR;
15495 /* Return the appropriate SPR regno associated with the given builtin. */
15496 static inline HOST_WIDE_INT
15497 htm_spr_regno (enum rs6000_builtins code)
15499 if (code == HTM_BUILTIN_GET_TFHAR
15500 || code == HTM_BUILTIN_SET_TFHAR)
15501 return TFHAR_REGNO;
15502 else if (code == HTM_BUILTIN_GET_TFIAR
15503 || code == HTM_BUILTIN_SET_TFIAR)
15504 return TFIAR_REGNO;
15505 gcc_assert (code == HTM_BUILTIN_GET_TEXASR
15506 || code == HTM_BUILTIN_SET_TEXASR
15507 || code == HTM_BUILTIN_GET_TEXASRU
15508 || code == HTM_BUILTIN_SET_TEXASRU);
15509 return TEXASR_REGNO;
15512 /* Return the correct ICODE value depending on whether we are
15513 setting or reading the HTM SPRs. */
15514 static inline enum insn_code
15515 rs6000_htm_spr_icode (bool nonvoid)
15517 if (nonvoid)
15518 return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
15519 else
15520 return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
15523 /* Expand the HTM builtin in EXP and store the result in TARGET.
15524 Store true in *EXPANDEDP if we found a builtin to expand. */
15525 static rtx
15526 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
15528 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15529 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
15530 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15531 const struct builtin_description *d;
15532 size_t i;
15534 *expandedp = true;
15536 if (!TARGET_POWERPC64
15537 && (fcode == HTM_BUILTIN_TABORTDC
15538 || fcode == HTM_BUILTIN_TABORTDCI))
15540 size_t uns_fcode = (size_t)fcode;
15541 const char *name = rs6000_builtin_info[uns_fcode].name;
15542 error ("builtin %s is only valid in 64-bit mode", name);
15543 return const0_rtx;
15546 /* Expand the HTM builtins. */
15547 d = bdesc_htm;
15548 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
15549 if (d->code == fcode)
15551 rtx op[MAX_HTM_OPERANDS], pat;
15552 int nopnds = 0;
15553 tree arg;
15554 call_expr_arg_iterator iter;
15555 unsigned attr = rs6000_builtin_info[fcode].attr;
15556 enum insn_code icode = d->icode;
15557 const struct insn_operand_data *insn_op;
15558 bool uses_spr = (attr & RS6000_BTC_SPR);
15559 rtx cr = NULL_RTX;
15561 if (uses_spr)
15562 icode = rs6000_htm_spr_icode (nonvoid);
15563 insn_op = &insn_data[icode].operand[0];
15565 if (nonvoid)
15567 machine_mode tmode = (uses_spr) ? insn_op->mode : E_SImode;
15568 if (!target
15569 || GET_MODE (target) != tmode
15570 || (uses_spr && !(*insn_op->predicate) (target, tmode)))
15571 target = gen_reg_rtx (tmode);
15572 if (uses_spr)
15573 op[nopnds++] = target;
15576 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
15578 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
15579 return const0_rtx;
15581 insn_op = &insn_data[icode].operand[nopnds];
15583 op[nopnds] = expand_normal (arg);
15585 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
15587 if (!strcmp (insn_op->constraint, "n"))
15589 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
15590 if (!CONST_INT_P (op[nopnds]))
15591 error ("argument %d must be an unsigned literal", arg_num);
15592 else
15593 error ("argument %d is an unsigned literal that is "
15594 "out of range", arg_num);
15595 return const0_rtx;
15597 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
15600 nopnds++;
15603 /* Handle the builtins for extended mnemonics. These accept
15604 no arguments, but map to builtins that take arguments. */
15605 switch (fcode)
15607 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
15608 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
15609 op[nopnds++] = GEN_INT (1);
15610 if (flag_checking)
15611 attr |= RS6000_BTC_UNARY;
15612 break;
15613 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
15614 op[nopnds++] = GEN_INT (0);
15615 if (flag_checking)
15616 attr |= RS6000_BTC_UNARY;
15617 break;
15618 default:
15619 break;
15622 /* If this builtin accesses SPRs, then pass in the appropriate
15623 SPR number and SPR regno as the last two operands. */
15624 if (uses_spr)
15626 machine_mode mode = (TARGET_POWERPC64) ? DImode : SImode;
15627 op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode));
15628 op[nopnds++] = gen_rtx_REG (mode, htm_spr_regno (fcode));
15630 /* If this builtin accesses a CR, then pass in a scratch
15631 CR as the last operand. */
15632 else if (attr & RS6000_BTC_CR)
15633 { cr = gen_reg_rtx (CCmode);
15634 op[nopnds++] = cr;
15637 if (flag_checking)
15639 int expected_nopnds = 0;
15640 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
15641 expected_nopnds = 1;
15642 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
15643 expected_nopnds = 2;
15644 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
15645 expected_nopnds = 3;
15646 if (!(attr & RS6000_BTC_VOID))
15647 expected_nopnds += 1;
15648 if (uses_spr)
15649 expected_nopnds += 2;
15651 gcc_assert (nopnds == expected_nopnds
15652 && nopnds <= MAX_HTM_OPERANDS);
15655 switch (nopnds)
15657 case 1:
15658 pat = GEN_FCN (icode) (op[0]);
15659 break;
15660 case 2:
15661 pat = GEN_FCN (icode) (op[0], op[1]);
15662 break;
15663 case 3:
15664 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
15665 break;
15666 case 4:
15667 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
15668 break;
15669 default:
15670 gcc_unreachable ();
15672 if (!pat)
15673 return NULL_RTX;
15674 emit_insn (pat);
15676 if (attr & RS6000_BTC_CR)
15678 if (fcode == HTM_BUILTIN_TBEGIN)
15680 /* Emit code to set TARGET to true or false depending on
15681 whether the tbegin. instruction successfully or failed
15682 to start a transaction. We do this by placing the 1's
15683 complement of CR's EQ bit into TARGET. */
15684 rtx scratch = gen_reg_rtx (SImode);
15685 emit_insn (gen_rtx_SET (scratch,
15686 gen_rtx_EQ (SImode, cr,
15687 const0_rtx)));
15688 emit_insn (gen_rtx_SET (target,
15689 gen_rtx_XOR (SImode, scratch,
15690 GEN_INT (1))));
15692 else
15694 /* Emit code to copy the 4-bit condition register field
15695 CR into the least significant end of register TARGET. */
15696 rtx scratch1 = gen_reg_rtx (SImode);
15697 rtx scratch2 = gen_reg_rtx (SImode);
15698 rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0);
15699 emit_insn (gen_movcc (subreg, cr));
15700 emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28)));
15701 emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf)));
15705 if (nonvoid)
15706 return target;
15707 return const0_rtx;
15710 *expandedp = false;
15711 return NULL_RTX;
15714 /* Expand the CPU builtin in FCODE and store the result in TARGET. */
15716 static rtx
15717 cpu_expand_builtin (enum rs6000_builtins fcode, tree exp ATTRIBUTE_UNUSED,
15718 rtx target)
15720 /* __builtin_cpu_init () is a nop, so expand to nothing. */
15721 if (fcode == RS6000_BUILTIN_CPU_INIT)
15722 return const0_rtx;
15724 if (target == 0 || GET_MODE (target) != SImode)
15725 target = gen_reg_rtx (SImode);
15727 #ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
15728 tree arg = TREE_OPERAND (CALL_EXPR_ARG (exp, 0), 0);
15729 if (TREE_CODE (arg) != STRING_CST)
15731 error ("builtin %s only accepts a string argument",
15732 rs6000_builtin_info[(size_t) fcode].name);
15733 return const0_rtx;
15736 if (fcode == RS6000_BUILTIN_CPU_IS)
15738 const char *cpu = TREE_STRING_POINTER (arg);
15739 rtx cpuid = NULL_RTX;
15740 for (size_t i = 0; i < ARRAY_SIZE (cpu_is_info); i++)
15741 if (strcmp (cpu, cpu_is_info[i].cpu) == 0)
15743 /* The CPUID value in the TCB is offset by _DL_FIRST_PLATFORM. */
15744 cpuid = GEN_INT (cpu_is_info[i].cpuid + _DL_FIRST_PLATFORM);
15745 break;
15747 if (cpuid == NULL_RTX)
15749 /* Invalid CPU argument. */
15750 error ("cpu %s is an invalid argument to builtin %s",
15751 cpu, rs6000_builtin_info[(size_t) fcode].name);
15752 return const0_rtx;
15755 rtx platform = gen_reg_rtx (SImode);
15756 rtx tcbmem = gen_const_mem (SImode,
15757 gen_rtx_PLUS (Pmode,
15758 gen_rtx_REG (Pmode, TLS_REGNUM),
15759 GEN_INT (TCB_PLATFORM_OFFSET)));
15760 emit_move_insn (platform, tcbmem);
15761 emit_insn (gen_eqsi3 (target, platform, cpuid));
15763 else if (fcode == RS6000_BUILTIN_CPU_SUPPORTS)
15765 const char *hwcap = TREE_STRING_POINTER (arg);
15766 rtx mask = NULL_RTX;
15767 int hwcap_offset;
15768 for (size_t i = 0; i < ARRAY_SIZE (cpu_supports_info); i++)
15769 if (strcmp (hwcap, cpu_supports_info[i].hwcap) == 0)
15771 mask = GEN_INT (cpu_supports_info[i].mask);
15772 hwcap_offset = TCB_HWCAP_OFFSET (cpu_supports_info[i].id);
15773 break;
15775 if (mask == NULL_RTX)
15777 /* Invalid HWCAP argument. */
15778 error ("hwcap %s is an invalid argument to builtin %s",
15779 hwcap, rs6000_builtin_info[(size_t) fcode].name);
15780 return const0_rtx;
15783 rtx tcb_hwcap = gen_reg_rtx (SImode);
15784 rtx tcbmem = gen_const_mem (SImode,
15785 gen_rtx_PLUS (Pmode,
15786 gen_rtx_REG (Pmode, TLS_REGNUM),
15787 GEN_INT (hwcap_offset)));
15788 emit_move_insn (tcb_hwcap, tcbmem);
15789 rtx scratch1 = gen_reg_rtx (SImode);
15790 emit_insn (gen_rtx_SET (scratch1, gen_rtx_AND (SImode, tcb_hwcap, mask)));
15791 rtx scratch2 = gen_reg_rtx (SImode);
15792 emit_insn (gen_eqsi3 (scratch2, scratch1, const0_rtx));
15793 emit_insn (gen_rtx_SET (target, gen_rtx_XOR (SImode, scratch2, const1_rtx)));
15796 /* Record that we have expanded a CPU builtin, so that we can later
15797 emit a reference to the special symbol exported by LIBC to ensure we
15798 do not link against an old LIBC that doesn't support this feature. */
15799 cpu_builtin_p = true;
15801 #else
15802 /* For old LIBCs, always return FALSE. */
15803 emit_move_insn (target, GEN_INT (0));
15804 #endif /* TARGET_LIBC_PROVIDES_HWCAP_IN_TCB */
15806 return target;
15809 static rtx
15810 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
15812 rtx pat;
15813 tree arg0 = CALL_EXPR_ARG (exp, 0);
15814 tree arg1 = CALL_EXPR_ARG (exp, 1);
15815 tree arg2 = CALL_EXPR_ARG (exp, 2);
15816 rtx op0 = expand_normal (arg0);
15817 rtx op1 = expand_normal (arg1);
15818 rtx op2 = expand_normal (arg2);
15819 machine_mode tmode = insn_data[icode].operand[0].mode;
15820 machine_mode mode0 = insn_data[icode].operand[1].mode;
15821 machine_mode mode1 = insn_data[icode].operand[2].mode;
15822 machine_mode mode2 = insn_data[icode].operand[3].mode;
15824 if (icode == CODE_FOR_nothing)
15825 /* Builtin not supported on this processor. */
15826 return 0;
15828 /* If we got invalid arguments bail out before generating bad rtl. */
15829 if (arg0 == error_mark_node
15830 || arg1 == error_mark_node
15831 || arg2 == error_mark_node)
15832 return const0_rtx;
15834 /* Check and prepare argument depending on the instruction code.
15836 Note that a switch statement instead of the sequence of tests
15837 would be incorrect as many of the CODE_FOR values could be
15838 CODE_FOR_nothing and that would yield multiple alternatives
15839 with identical values. We'd never reach here at runtime in
15840 this case. */
15841 if (icode == CODE_FOR_altivec_vsldoi_v4sf
15842 || icode == CODE_FOR_altivec_vsldoi_v2df
15843 || icode == CODE_FOR_altivec_vsldoi_v4si
15844 || icode == CODE_FOR_altivec_vsldoi_v8hi
15845 || icode == CODE_FOR_altivec_vsldoi_v16qi)
15847 /* Only allow 4-bit unsigned literals. */
15848 STRIP_NOPS (arg2);
15849 if (TREE_CODE (arg2) != INTEGER_CST
15850 || TREE_INT_CST_LOW (arg2) & ~0xf)
15852 error ("argument 3 must be a 4-bit unsigned literal");
15853 return CONST0_RTX (tmode);
15856 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
15857 || icode == CODE_FOR_vsx_xxpermdi_v2di
15858 || icode == CODE_FOR_vsx_xxpermdi_v2df_be
15859 || icode == CODE_FOR_vsx_xxpermdi_v2di_be
15860 || icode == CODE_FOR_vsx_xxpermdi_v1ti
15861 || icode == CODE_FOR_vsx_xxpermdi_v4sf
15862 || icode == CODE_FOR_vsx_xxpermdi_v4si
15863 || icode == CODE_FOR_vsx_xxpermdi_v8hi
15864 || icode == CODE_FOR_vsx_xxpermdi_v16qi
15865 || icode == CODE_FOR_vsx_xxsldwi_v16qi
15866 || icode == CODE_FOR_vsx_xxsldwi_v8hi
15867 || icode == CODE_FOR_vsx_xxsldwi_v4si
15868 || icode == CODE_FOR_vsx_xxsldwi_v4sf
15869 || icode == CODE_FOR_vsx_xxsldwi_v2di
15870 || icode == CODE_FOR_vsx_xxsldwi_v2df)
15872 /* Only allow 2-bit unsigned literals. */
15873 STRIP_NOPS (arg2);
15874 if (TREE_CODE (arg2) != INTEGER_CST
15875 || TREE_INT_CST_LOW (arg2) & ~0x3)
15877 error ("argument 3 must be a 2-bit unsigned literal");
15878 return CONST0_RTX (tmode);
15881 else if (icode == CODE_FOR_vsx_set_v2df
15882 || icode == CODE_FOR_vsx_set_v2di
15883 || icode == CODE_FOR_bcdadd
15884 || icode == CODE_FOR_bcdadd_lt
15885 || icode == CODE_FOR_bcdadd_eq
15886 || icode == CODE_FOR_bcdadd_gt
15887 || icode == CODE_FOR_bcdsub
15888 || icode == CODE_FOR_bcdsub_lt
15889 || icode == CODE_FOR_bcdsub_eq
15890 || icode == CODE_FOR_bcdsub_gt)
15892 /* Only allow 1-bit unsigned literals. */
15893 STRIP_NOPS (arg2);
15894 if (TREE_CODE (arg2) != INTEGER_CST
15895 || TREE_INT_CST_LOW (arg2) & ~0x1)
15897 error ("argument 3 must be a 1-bit unsigned literal");
15898 return CONST0_RTX (tmode);
15901 else if (icode == CODE_FOR_dfp_ddedpd_dd
15902 || icode == CODE_FOR_dfp_ddedpd_td)
15904 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
15905 STRIP_NOPS (arg0);
15906 if (TREE_CODE (arg0) != INTEGER_CST
15907 || TREE_INT_CST_LOW (arg2) & ~0x3)
15909 error ("argument 1 must be 0 or 2");
15910 return CONST0_RTX (tmode);
15913 else if (icode == CODE_FOR_dfp_denbcd_dd
15914 || icode == CODE_FOR_dfp_denbcd_td)
15916 /* Only allow 1-bit unsigned literals. */
15917 STRIP_NOPS (arg0);
15918 if (TREE_CODE (arg0) != INTEGER_CST
15919 || TREE_INT_CST_LOW (arg0) & ~0x1)
15921 error ("argument 1 must be a 1-bit unsigned literal");
15922 return CONST0_RTX (tmode);
15925 else if (icode == CODE_FOR_dfp_dscli_dd
15926 || icode == CODE_FOR_dfp_dscli_td
15927 || icode == CODE_FOR_dfp_dscri_dd
15928 || icode == CODE_FOR_dfp_dscri_td)
15930 /* Only allow 6-bit unsigned literals. */
15931 STRIP_NOPS (arg1);
15932 if (TREE_CODE (arg1) != INTEGER_CST
15933 || TREE_INT_CST_LOW (arg1) & ~0x3f)
15935 error ("argument 2 must be a 6-bit unsigned literal");
15936 return CONST0_RTX (tmode);
15939 else if (icode == CODE_FOR_crypto_vshasigmaw
15940 || icode == CODE_FOR_crypto_vshasigmad)
15942 /* Check whether the 2nd and 3rd arguments are integer constants and in
15943 range and prepare arguments. */
15944 STRIP_NOPS (arg1);
15945 if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (arg1, 2))
15947 error ("argument 2 must be 0 or 1");
15948 return CONST0_RTX (tmode);
15951 STRIP_NOPS (arg2);
15952 if (TREE_CODE (arg2) != INTEGER_CST || wi::geu_p (arg2, 16))
15954 error ("argument 3 must be in the range 0..15");
15955 return CONST0_RTX (tmode);
15959 if (target == 0
15960 || GET_MODE (target) != tmode
15961 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15962 target = gen_reg_rtx (tmode);
15964 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15965 op0 = copy_to_mode_reg (mode0, op0);
15966 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15967 op1 = copy_to_mode_reg (mode1, op1);
15968 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15969 op2 = copy_to_mode_reg (mode2, op2);
15971 if (TARGET_PAIRED_FLOAT && icode == CODE_FOR_selv2sf4)
15972 pat = GEN_FCN (icode) (target, op0, op1, op2, CONST0_RTX (SFmode));
15973 else
15974 pat = GEN_FCN (icode) (target, op0, op1, op2);
15975 if (! pat)
15976 return 0;
15977 emit_insn (pat);
15979 return target;
15982 /* Expand the lvx builtins. */
15983 static rtx
15984 altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp)
15986 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15987 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
15988 tree arg0;
15989 machine_mode tmode, mode0;
15990 rtx pat, op0;
15991 enum insn_code icode;
15993 switch (fcode)
15995 case ALTIVEC_BUILTIN_LD_INTERNAL_16qi:
15996 icode = CODE_FOR_vector_altivec_load_v16qi;
15997 break;
15998 case ALTIVEC_BUILTIN_LD_INTERNAL_8hi:
15999 icode = CODE_FOR_vector_altivec_load_v8hi;
16000 break;
16001 case ALTIVEC_BUILTIN_LD_INTERNAL_4si:
16002 icode = CODE_FOR_vector_altivec_load_v4si;
16003 break;
16004 case ALTIVEC_BUILTIN_LD_INTERNAL_4sf:
16005 icode = CODE_FOR_vector_altivec_load_v4sf;
16006 break;
16007 case ALTIVEC_BUILTIN_LD_INTERNAL_2df:
16008 icode = CODE_FOR_vector_altivec_load_v2df;
16009 break;
16010 case ALTIVEC_BUILTIN_LD_INTERNAL_2di:
16011 icode = CODE_FOR_vector_altivec_load_v2di;
16012 break;
16013 case ALTIVEC_BUILTIN_LD_INTERNAL_1ti:
16014 icode = CODE_FOR_vector_altivec_load_v1ti;
16015 break;
16016 default:
16017 *expandedp = false;
16018 return NULL_RTX;
16021 *expandedp = true;
16023 arg0 = CALL_EXPR_ARG (exp, 0);
16024 op0 = expand_normal (arg0);
16025 tmode = insn_data[icode].operand[0].mode;
16026 mode0 = insn_data[icode].operand[1].mode;
16028 if (target == 0
16029 || GET_MODE (target) != tmode
16030 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16031 target = gen_reg_rtx (tmode);
16033 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16034 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16036 pat = GEN_FCN (icode) (target, op0);
16037 if (! pat)
16038 return 0;
16039 emit_insn (pat);
16040 return target;
16043 /* Expand the stvx builtins. */
16044 static rtx
16045 altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
16046 bool *expandedp)
16048 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16049 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
16050 tree arg0, arg1;
16051 machine_mode mode0, mode1;
16052 rtx pat, op0, op1;
16053 enum insn_code icode;
16055 switch (fcode)
16057 case ALTIVEC_BUILTIN_ST_INTERNAL_16qi:
16058 icode = CODE_FOR_vector_altivec_store_v16qi;
16059 break;
16060 case ALTIVEC_BUILTIN_ST_INTERNAL_8hi:
16061 icode = CODE_FOR_vector_altivec_store_v8hi;
16062 break;
16063 case ALTIVEC_BUILTIN_ST_INTERNAL_4si:
16064 icode = CODE_FOR_vector_altivec_store_v4si;
16065 break;
16066 case ALTIVEC_BUILTIN_ST_INTERNAL_4sf:
16067 icode = CODE_FOR_vector_altivec_store_v4sf;
16068 break;
16069 case ALTIVEC_BUILTIN_ST_INTERNAL_2df:
16070 icode = CODE_FOR_vector_altivec_store_v2df;
16071 break;
16072 case ALTIVEC_BUILTIN_ST_INTERNAL_2di:
16073 icode = CODE_FOR_vector_altivec_store_v2di;
16074 break;
16075 case ALTIVEC_BUILTIN_ST_INTERNAL_1ti:
16076 icode = CODE_FOR_vector_altivec_store_v1ti;
16077 break;
16078 default:
16079 *expandedp = false;
16080 return NULL_RTX;
16083 arg0 = CALL_EXPR_ARG (exp, 0);
16084 arg1 = CALL_EXPR_ARG (exp, 1);
16085 op0 = expand_normal (arg0);
16086 op1 = expand_normal (arg1);
16087 mode0 = insn_data[icode].operand[0].mode;
16088 mode1 = insn_data[icode].operand[1].mode;
16090 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16091 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16092 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
16093 op1 = copy_to_mode_reg (mode1, op1);
16095 pat = GEN_FCN (icode) (op0, op1);
16096 if (pat)
16097 emit_insn (pat);
16099 *expandedp = true;
16100 return NULL_RTX;
16103 /* Expand the dst builtins. */
16104 static rtx
16105 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
16106 bool *expandedp)
16108 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16109 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
16110 tree arg0, arg1, arg2;
16111 machine_mode mode0, mode1;
16112 rtx pat, op0, op1, op2;
16113 const struct builtin_description *d;
16114 size_t i;
16116 *expandedp = false;
16118 /* Handle DST variants. */
16119 d = bdesc_dst;
16120 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
16121 if (d->code == fcode)
16123 arg0 = CALL_EXPR_ARG (exp, 0);
16124 arg1 = CALL_EXPR_ARG (exp, 1);
16125 arg2 = CALL_EXPR_ARG (exp, 2);
16126 op0 = expand_normal (arg0);
16127 op1 = expand_normal (arg1);
16128 op2 = expand_normal (arg2);
16129 mode0 = insn_data[d->icode].operand[0].mode;
16130 mode1 = insn_data[d->icode].operand[1].mode;
16132 /* Invalid arguments, bail out before generating bad rtl. */
16133 if (arg0 == error_mark_node
16134 || arg1 == error_mark_node
16135 || arg2 == error_mark_node)
16136 return const0_rtx;
16138 *expandedp = true;
16139 STRIP_NOPS (arg2);
16140 if (TREE_CODE (arg2) != INTEGER_CST
16141 || TREE_INT_CST_LOW (arg2) & ~0x3)
16143 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
16144 return const0_rtx;
16147 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
16148 op0 = copy_to_mode_reg (Pmode, op0);
16149 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
16150 op1 = copy_to_mode_reg (mode1, op1);
16152 pat = GEN_FCN (d->icode) (op0, op1, op2);
16153 if (pat != 0)
16154 emit_insn (pat);
16156 return NULL_RTX;
16159 return NULL_RTX;
16162 /* Expand vec_init builtin. */
16163 static rtx
16164 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
16166 machine_mode tmode = TYPE_MODE (type);
16167 machine_mode inner_mode = GET_MODE_INNER (tmode);
16168 int i, n_elt = GET_MODE_NUNITS (tmode);
16170 gcc_assert (VECTOR_MODE_P (tmode));
16171 gcc_assert (n_elt == call_expr_nargs (exp));
16173 if (!target || !register_operand (target, tmode))
16174 target = gen_reg_rtx (tmode);
16176 /* If we have a vector compromised of a single element, such as V1TImode, do
16177 the initialization directly. */
16178 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
16180 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
16181 emit_move_insn (target, gen_lowpart (tmode, x));
16183 else
16185 rtvec v = rtvec_alloc (n_elt);
16187 for (i = 0; i < n_elt; ++i)
16189 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
16190 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
16193 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
16196 return target;
16199 /* Return the integer constant in ARG. Constrain it to be in the range
16200 of the subparts of VEC_TYPE; issue an error if not. */
16202 static int
16203 get_element_number (tree vec_type, tree arg)
16205 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
16207 if (!tree_fits_uhwi_p (arg)
16208 || (elt = tree_to_uhwi (arg), elt > max))
16210 error ("selector must be an integer constant in the range 0..%wi", max);
16211 return 0;
16214 return elt;
16217 /* Expand vec_set builtin. */
16218 static rtx
16219 altivec_expand_vec_set_builtin (tree exp)
16221 machine_mode tmode, mode1;
16222 tree arg0, arg1, arg2;
16223 int elt;
16224 rtx op0, op1;
16226 arg0 = CALL_EXPR_ARG (exp, 0);
16227 arg1 = CALL_EXPR_ARG (exp, 1);
16228 arg2 = CALL_EXPR_ARG (exp, 2);
16230 tmode = TYPE_MODE (TREE_TYPE (arg0));
16231 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16232 gcc_assert (VECTOR_MODE_P (tmode));
16234 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
16235 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
16236 elt = get_element_number (TREE_TYPE (arg0), arg2);
16238 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
16239 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
16241 op0 = force_reg (tmode, op0);
16242 op1 = force_reg (mode1, op1);
16244 rs6000_expand_vector_set (op0, op1, elt);
16246 return op0;
16249 /* Expand vec_ext builtin. */
16250 static rtx
16251 altivec_expand_vec_ext_builtin (tree exp, rtx target)
16253 machine_mode tmode, mode0;
16254 tree arg0, arg1;
16255 rtx op0;
16256 rtx op1;
16258 arg0 = CALL_EXPR_ARG (exp, 0);
16259 arg1 = CALL_EXPR_ARG (exp, 1);
16261 op0 = expand_normal (arg0);
16262 op1 = expand_normal (arg1);
16264 /* Call get_element_number to validate arg1 if it is a constant. */
16265 if (TREE_CODE (arg1) == INTEGER_CST)
16266 (void) get_element_number (TREE_TYPE (arg0), arg1);
16268 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16269 mode0 = TYPE_MODE (TREE_TYPE (arg0));
16270 gcc_assert (VECTOR_MODE_P (mode0));
16272 op0 = force_reg (mode0, op0);
16274 if (optimize || !target || !register_operand (target, tmode))
16275 target = gen_reg_rtx (tmode);
16277 rs6000_expand_vector_extract (target, op0, op1);
16279 return target;
16282 /* Expand the builtin in EXP and store the result in TARGET. Store
16283 true in *EXPANDEDP if we found a builtin to expand. */
16284 static rtx
16285 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
16287 const struct builtin_description *d;
16288 size_t i;
16289 enum insn_code icode;
16290 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16291 tree arg0, arg1, arg2;
16292 rtx op0, pat;
16293 machine_mode tmode, mode0;
16294 enum rs6000_builtins fcode
16295 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
16297 if (rs6000_overloaded_builtin_p (fcode))
16299 *expandedp = true;
16300 error ("unresolved overload for Altivec builtin %qF", fndecl);
16302 /* Given it is invalid, just generate a normal call. */
16303 return expand_call (exp, target, false);
16306 target = altivec_expand_ld_builtin (exp, target, expandedp);
16307 if (*expandedp)
16308 return target;
16310 target = altivec_expand_st_builtin (exp, target, expandedp);
16311 if (*expandedp)
16312 return target;
16314 target = altivec_expand_dst_builtin (exp, target, expandedp);
16315 if (*expandedp)
16316 return target;
16318 *expandedp = true;
16320 switch (fcode)
16322 case ALTIVEC_BUILTIN_STVX_V2DF:
16323 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df_2op, exp);
16324 case ALTIVEC_BUILTIN_STVX_V2DI:
16325 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di_2op, exp);
16326 case ALTIVEC_BUILTIN_STVX_V4SF:
16327 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf_2op, exp);
16328 case ALTIVEC_BUILTIN_STVX:
16329 case ALTIVEC_BUILTIN_STVX_V4SI:
16330 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si_2op, exp);
16331 case ALTIVEC_BUILTIN_STVX_V8HI:
16332 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi_2op, exp);
16333 case ALTIVEC_BUILTIN_STVX_V16QI:
16334 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi_2op, exp);
16335 case ALTIVEC_BUILTIN_STVEBX:
16336 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
16337 case ALTIVEC_BUILTIN_STVEHX:
16338 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
16339 case ALTIVEC_BUILTIN_STVEWX:
16340 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
16341 case ALTIVEC_BUILTIN_STVXL_V2DF:
16342 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
16343 case ALTIVEC_BUILTIN_STVXL_V2DI:
16344 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
16345 case ALTIVEC_BUILTIN_STVXL_V4SF:
16346 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
16347 case ALTIVEC_BUILTIN_STVXL:
16348 case ALTIVEC_BUILTIN_STVXL_V4SI:
16349 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
16350 case ALTIVEC_BUILTIN_STVXL_V8HI:
16351 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
16352 case ALTIVEC_BUILTIN_STVXL_V16QI:
16353 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
16355 case ALTIVEC_BUILTIN_STVLX:
16356 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
16357 case ALTIVEC_BUILTIN_STVLXL:
16358 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
16359 case ALTIVEC_BUILTIN_STVRX:
16360 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
16361 case ALTIVEC_BUILTIN_STVRXL:
16362 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
16364 case P9V_BUILTIN_STXVL:
16365 return altivec_expand_stxvl_builtin (CODE_FOR_stxvl, exp);
16367 case VSX_BUILTIN_STXVD2X_V1TI:
16368 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
16369 case VSX_BUILTIN_STXVD2X_V2DF:
16370 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
16371 case VSX_BUILTIN_STXVD2X_V2DI:
16372 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
16373 case VSX_BUILTIN_STXVW4X_V4SF:
16374 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
16375 case VSX_BUILTIN_STXVW4X_V4SI:
16376 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
16377 case VSX_BUILTIN_STXVW4X_V8HI:
16378 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
16379 case VSX_BUILTIN_STXVW4X_V16QI:
16380 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
16382 /* For the following on big endian, it's ok to use any appropriate
16383 unaligned-supporting store, so use a generic expander. For
16384 little-endian, the exact element-reversing instruction must
16385 be used. */
16386 case VSX_BUILTIN_ST_ELEMREV_V2DF:
16388 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2df
16389 : CODE_FOR_vsx_st_elemrev_v2df);
16390 return altivec_expand_stv_builtin (code, exp);
16392 case VSX_BUILTIN_ST_ELEMREV_V2DI:
16394 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2di
16395 : CODE_FOR_vsx_st_elemrev_v2di);
16396 return altivec_expand_stv_builtin (code, exp);
16398 case VSX_BUILTIN_ST_ELEMREV_V4SF:
16400 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4sf
16401 : CODE_FOR_vsx_st_elemrev_v4sf);
16402 return altivec_expand_stv_builtin (code, exp);
16404 case VSX_BUILTIN_ST_ELEMREV_V4SI:
16406 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4si
16407 : CODE_FOR_vsx_st_elemrev_v4si);
16408 return altivec_expand_stv_builtin (code, exp);
16410 case VSX_BUILTIN_ST_ELEMREV_V8HI:
16412 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v8hi
16413 : CODE_FOR_vsx_st_elemrev_v8hi);
16414 return altivec_expand_stv_builtin (code, exp);
16416 case VSX_BUILTIN_ST_ELEMREV_V16QI:
16418 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v16qi
16419 : CODE_FOR_vsx_st_elemrev_v16qi);
16420 return altivec_expand_stv_builtin (code, exp);
16423 case ALTIVEC_BUILTIN_MFVSCR:
16424 icode = CODE_FOR_altivec_mfvscr;
16425 tmode = insn_data[icode].operand[0].mode;
16427 if (target == 0
16428 || GET_MODE (target) != tmode
16429 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16430 target = gen_reg_rtx (tmode);
16432 pat = GEN_FCN (icode) (target);
16433 if (! pat)
16434 return 0;
16435 emit_insn (pat);
16436 return target;
16438 case ALTIVEC_BUILTIN_MTVSCR:
16439 icode = CODE_FOR_altivec_mtvscr;
16440 arg0 = CALL_EXPR_ARG (exp, 0);
16441 op0 = expand_normal (arg0);
16442 mode0 = insn_data[icode].operand[0].mode;
16444 /* If we got invalid arguments bail out before generating bad rtl. */
16445 if (arg0 == error_mark_node)
16446 return const0_rtx;
16448 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16449 op0 = copy_to_mode_reg (mode0, op0);
16451 pat = GEN_FCN (icode) (op0);
16452 if (pat)
16453 emit_insn (pat);
16454 return NULL_RTX;
16456 case ALTIVEC_BUILTIN_DSSALL:
16457 emit_insn (gen_altivec_dssall ());
16458 return NULL_RTX;
16460 case ALTIVEC_BUILTIN_DSS:
16461 icode = CODE_FOR_altivec_dss;
16462 arg0 = CALL_EXPR_ARG (exp, 0);
16463 STRIP_NOPS (arg0);
16464 op0 = expand_normal (arg0);
16465 mode0 = insn_data[icode].operand[0].mode;
16467 /* If we got invalid arguments bail out before generating bad rtl. */
16468 if (arg0 == error_mark_node)
16469 return const0_rtx;
16471 if (TREE_CODE (arg0) != INTEGER_CST
16472 || TREE_INT_CST_LOW (arg0) & ~0x3)
16474 error ("argument to dss must be a 2-bit unsigned literal");
16475 return const0_rtx;
16478 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16479 op0 = copy_to_mode_reg (mode0, op0);
16481 emit_insn (gen_altivec_dss (op0));
16482 return NULL_RTX;
16484 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
16485 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
16486 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
16487 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
16488 case VSX_BUILTIN_VEC_INIT_V2DF:
16489 case VSX_BUILTIN_VEC_INIT_V2DI:
16490 case VSX_BUILTIN_VEC_INIT_V1TI:
16491 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
16493 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
16494 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
16495 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
16496 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
16497 case VSX_BUILTIN_VEC_SET_V2DF:
16498 case VSX_BUILTIN_VEC_SET_V2DI:
16499 case VSX_BUILTIN_VEC_SET_V1TI:
16500 return altivec_expand_vec_set_builtin (exp);
16502 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
16503 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
16504 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
16505 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
16506 case VSX_BUILTIN_VEC_EXT_V2DF:
16507 case VSX_BUILTIN_VEC_EXT_V2DI:
16508 case VSX_BUILTIN_VEC_EXT_V1TI:
16509 return altivec_expand_vec_ext_builtin (exp, target);
16511 case P9V_BUILTIN_VEXTRACT4B:
16512 case P9V_BUILTIN_VEC_VEXTRACT4B:
16513 arg1 = CALL_EXPR_ARG (exp, 1);
16514 STRIP_NOPS (arg1);
16516 /* Generate a normal call if it is invalid. */
16517 if (arg1 == error_mark_node)
16518 return expand_call (exp, target, false);
16520 if (TREE_CODE (arg1) != INTEGER_CST || TREE_INT_CST_LOW (arg1) > 12)
16522 error ("second argument to vec_vextract4b must be 0..12");
16523 return expand_call (exp, target, false);
16525 break;
16527 case P9V_BUILTIN_VINSERT4B:
16528 case P9V_BUILTIN_VINSERT4B_DI:
16529 case P9V_BUILTIN_VEC_VINSERT4B:
16530 arg2 = CALL_EXPR_ARG (exp, 2);
16531 STRIP_NOPS (arg2);
16533 /* Generate a normal call if it is invalid. */
16534 if (arg2 == error_mark_node)
16535 return expand_call (exp, target, false);
16537 if (TREE_CODE (arg2) != INTEGER_CST || TREE_INT_CST_LOW (arg2) > 12)
16539 error ("third argument to vec_vinsert4b must be 0..12");
16540 return expand_call (exp, target, false);
16542 break;
16544 default:
16545 break;
16546 /* Fall through. */
16549 /* Expand abs* operations. */
16550 d = bdesc_abs;
16551 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
16552 if (d->code == fcode)
16553 return altivec_expand_abs_builtin (d->icode, exp, target);
16555 /* Expand the AltiVec predicates. */
16556 d = bdesc_altivec_preds;
16557 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
16558 if (d->code == fcode)
16559 return altivec_expand_predicate_builtin (d->icode, exp, target);
16561 /* LV* are funky. We initialized them differently. */
16562 switch (fcode)
16564 case ALTIVEC_BUILTIN_LVSL:
16565 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
16566 exp, target, false);
16567 case ALTIVEC_BUILTIN_LVSR:
16568 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
16569 exp, target, false);
16570 case ALTIVEC_BUILTIN_LVEBX:
16571 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
16572 exp, target, false);
16573 case ALTIVEC_BUILTIN_LVEHX:
16574 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
16575 exp, target, false);
16576 case ALTIVEC_BUILTIN_LVEWX:
16577 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
16578 exp, target, false);
16579 case ALTIVEC_BUILTIN_LVXL_V2DF:
16580 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
16581 exp, target, false);
16582 case ALTIVEC_BUILTIN_LVXL_V2DI:
16583 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
16584 exp, target, false);
16585 case ALTIVEC_BUILTIN_LVXL_V4SF:
16586 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
16587 exp, target, false);
16588 case ALTIVEC_BUILTIN_LVXL:
16589 case ALTIVEC_BUILTIN_LVXL_V4SI:
16590 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
16591 exp, target, false);
16592 case ALTIVEC_BUILTIN_LVXL_V8HI:
16593 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
16594 exp, target, false);
16595 case ALTIVEC_BUILTIN_LVXL_V16QI:
16596 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
16597 exp, target, false);
16598 case ALTIVEC_BUILTIN_LVX_V2DF:
16599 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df_2op,
16600 exp, target, false);
16601 case ALTIVEC_BUILTIN_LVX_V2DI:
16602 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di_2op,
16603 exp, target, false);
16604 case ALTIVEC_BUILTIN_LVX_V4SF:
16605 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf_2op,
16606 exp, target, false);
16607 case ALTIVEC_BUILTIN_LVX:
16608 case ALTIVEC_BUILTIN_LVX_V4SI:
16609 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si_2op,
16610 exp, target, false);
16611 case ALTIVEC_BUILTIN_LVX_V8HI:
16612 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi_2op,
16613 exp, target, false);
16614 case ALTIVEC_BUILTIN_LVX_V16QI:
16615 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi_2op,
16616 exp, target, false);
16617 case ALTIVEC_BUILTIN_LVLX:
16618 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
16619 exp, target, true);
16620 case ALTIVEC_BUILTIN_LVLXL:
16621 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
16622 exp, target, true);
16623 case ALTIVEC_BUILTIN_LVRX:
16624 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
16625 exp, target, true);
16626 case ALTIVEC_BUILTIN_LVRXL:
16627 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
16628 exp, target, true);
16629 case VSX_BUILTIN_LXVD2X_V1TI:
16630 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
16631 exp, target, false);
16632 case VSX_BUILTIN_LXVD2X_V2DF:
16633 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
16634 exp, target, false);
16635 case VSX_BUILTIN_LXVD2X_V2DI:
16636 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
16637 exp, target, false);
16638 case VSX_BUILTIN_LXVW4X_V4SF:
16639 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
16640 exp, target, false);
16641 case VSX_BUILTIN_LXVW4X_V4SI:
16642 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
16643 exp, target, false);
16644 case VSX_BUILTIN_LXVW4X_V8HI:
16645 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
16646 exp, target, false);
16647 case VSX_BUILTIN_LXVW4X_V16QI:
16648 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
16649 exp, target, false);
16650 /* For the following on big endian, it's ok to use any appropriate
16651 unaligned-supporting load, so use a generic expander. For
16652 little-endian, the exact element-reversing instruction must
16653 be used. */
16654 case VSX_BUILTIN_LD_ELEMREV_V2DF:
16656 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2df
16657 : CODE_FOR_vsx_ld_elemrev_v2df);
16658 return altivec_expand_lv_builtin (code, exp, target, false);
16660 case VSX_BUILTIN_LD_ELEMREV_V2DI:
16662 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2di
16663 : CODE_FOR_vsx_ld_elemrev_v2di);
16664 return altivec_expand_lv_builtin (code, exp, target, false);
16666 case VSX_BUILTIN_LD_ELEMREV_V4SF:
16668 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4sf
16669 : CODE_FOR_vsx_ld_elemrev_v4sf);
16670 return altivec_expand_lv_builtin (code, exp, target, false);
16672 case VSX_BUILTIN_LD_ELEMREV_V4SI:
16674 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4si
16675 : CODE_FOR_vsx_ld_elemrev_v4si);
16676 return altivec_expand_lv_builtin (code, exp, target, false);
16678 case VSX_BUILTIN_LD_ELEMREV_V8HI:
16680 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v8hi
16681 : CODE_FOR_vsx_ld_elemrev_v8hi);
16682 return altivec_expand_lv_builtin (code, exp, target, false);
16684 case VSX_BUILTIN_LD_ELEMREV_V16QI:
16686 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v16qi
16687 : CODE_FOR_vsx_ld_elemrev_v16qi);
16688 return altivec_expand_lv_builtin (code, exp, target, false);
16690 break;
16691 default:
16692 break;
16693 /* Fall through. */
16696 *expandedp = false;
16697 return NULL_RTX;
16700 /* Expand the builtin in EXP and store the result in TARGET. Store
16701 true in *EXPANDEDP if we found a builtin to expand. */
16702 static rtx
16703 paired_expand_builtin (tree exp, rtx target, bool * expandedp)
16705 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16706 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
16707 const struct builtin_description *d;
16708 size_t i;
16710 *expandedp = true;
16712 switch (fcode)
16714 case PAIRED_BUILTIN_STX:
16715 return paired_expand_stv_builtin (CODE_FOR_paired_stx, exp);
16716 case PAIRED_BUILTIN_LX:
16717 return paired_expand_lv_builtin (CODE_FOR_paired_lx, exp, target);
16718 default:
16719 break;
16720 /* Fall through. */
16723 /* Expand the paired predicates. */
16724 d = bdesc_paired_preds;
16725 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); i++, d++)
16726 if (d->code == fcode)
16727 return paired_expand_predicate_builtin (d->icode, exp, target);
16729 *expandedp = false;
16730 return NULL_RTX;
16733 /* Binops that need to be initialized manually, but can be expanded
16734 automagically by rs6000_expand_binop_builtin. */
16735 static const struct builtin_description bdesc_2arg_spe[] =
16737 { RS6000_BTM_SPE, CODE_FOR_spe_evlddx, "__builtin_spe_evlddx", SPE_BUILTIN_EVLDDX },
16738 { RS6000_BTM_SPE, CODE_FOR_spe_evldwx, "__builtin_spe_evldwx", SPE_BUILTIN_EVLDWX },
16739 { RS6000_BTM_SPE, CODE_FOR_spe_evldhx, "__builtin_spe_evldhx", SPE_BUILTIN_EVLDHX },
16740 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhex, "__builtin_spe_evlwhex", SPE_BUILTIN_EVLWHEX },
16741 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhoux, "__builtin_spe_evlwhoux", SPE_BUILTIN_EVLWHOUX },
16742 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhosx, "__builtin_spe_evlwhosx", SPE_BUILTIN_EVLWHOSX },
16743 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplatx, "__builtin_spe_evlwwsplatx", SPE_BUILTIN_EVLWWSPLATX },
16744 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplatx, "__builtin_spe_evlwhsplatx", SPE_BUILTIN_EVLWHSPLATX },
16745 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplatx, "__builtin_spe_evlhhesplatx", SPE_BUILTIN_EVLHHESPLATX },
16746 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplatx, "__builtin_spe_evlhhousplatx", SPE_BUILTIN_EVLHHOUSPLATX },
16747 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplatx, "__builtin_spe_evlhhossplatx", SPE_BUILTIN_EVLHHOSSPLATX },
16748 { RS6000_BTM_SPE, CODE_FOR_spe_evldd, "__builtin_spe_evldd", SPE_BUILTIN_EVLDD },
16749 { RS6000_BTM_SPE, CODE_FOR_spe_evldw, "__builtin_spe_evldw", SPE_BUILTIN_EVLDW },
16750 { RS6000_BTM_SPE, CODE_FOR_spe_evldh, "__builtin_spe_evldh", SPE_BUILTIN_EVLDH },
16751 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhe, "__builtin_spe_evlwhe", SPE_BUILTIN_EVLWHE },
16752 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhou, "__builtin_spe_evlwhou", SPE_BUILTIN_EVLWHOU },
16753 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhos, "__builtin_spe_evlwhos", SPE_BUILTIN_EVLWHOS },
16754 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplat, "__builtin_spe_evlwwsplat", SPE_BUILTIN_EVLWWSPLAT },
16755 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplat, "__builtin_spe_evlwhsplat", SPE_BUILTIN_EVLWHSPLAT },
16756 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplat, "__builtin_spe_evlhhesplat", SPE_BUILTIN_EVLHHESPLAT },
16757 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplat, "__builtin_spe_evlhhousplat", SPE_BUILTIN_EVLHHOUSPLAT },
16758 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplat, "__builtin_spe_evlhhossplat", SPE_BUILTIN_EVLHHOSSPLAT }
16761 /* Expand the builtin in EXP and store the result in TARGET. Store
16762 true in *EXPANDEDP if we found a builtin to expand.
16764 This expands the SPE builtins that are not simple unary and binary
16765 operations. */
16766 static rtx
16767 spe_expand_builtin (tree exp, rtx target, bool *expandedp)
16769 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16770 tree arg1, arg0;
16771 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
16772 enum insn_code icode;
16773 machine_mode tmode, mode0;
16774 rtx pat, op0;
16775 const struct builtin_description *d;
16776 size_t i;
16778 *expandedp = true;
16780 /* Syntax check for a 5-bit unsigned immediate. */
16781 switch (fcode)
16783 case SPE_BUILTIN_EVSTDD:
16784 case SPE_BUILTIN_EVSTDH:
16785 case SPE_BUILTIN_EVSTDW:
16786 case SPE_BUILTIN_EVSTWHE:
16787 case SPE_BUILTIN_EVSTWHO:
16788 case SPE_BUILTIN_EVSTWWE:
16789 case SPE_BUILTIN_EVSTWWO:
16790 arg1 = CALL_EXPR_ARG (exp, 2);
16791 if (TREE_CODE (arg1) != INTEGER_CST
16792 || TREE_INT_CST_LOW (arg1) & ~0x1f)
16794 error ("argument 2 must be a 5-bit unsigned literal");
16795 return const0_rtx;
16797 break;
16798 default:
16799 break;
16802 /* The evsplat*i instructions are not quite generic. */
16803 switch (fcode)
16805 case SPE_BUILTIN_EVSPLATFI:
16806 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplatfi,
16807 exp, target);
16808 case SPE_BUILTIN_EVSPLATI:
16809 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplati,
16810 exp, target);
16811 default:
16812 break;
16815 d = bdesc_2arg_spe;
16816 for (i = 0; i < ARRAY_SIZE (bdesc_2arg_spe); ++i, ++d)
16817 if (d->code == fcode)
16818 return rs6000_expand_binop_builtin (d->icode, exp, target);
16820 d = bdesc_spe_predicates;
16821 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, ++d)
16822 if (d->code == fcode)
16823 return spe_expand_predicate_builtin (d->icode, exp, target);
16825 d = bdesc_spe_evsel;
16826 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, ++d)
16827 if (d->code == fcode)
16828 return spe_expand_evsel_builtin (d->icode, exp, target);
16830 switch (fcode)
16832 case SPE_BUILTIN_EVSTDDX:
16833 return spe_expand_stv_builtin (CODE_FOR_spe_evstddx, exp);
16834 case SPE_BUILTIN_EVSTDHX:
16835 return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx, exp);
16836 case SPE_BUILTIN_EVSTDWX:
16837 return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx, exp);
16838 case SPE_BUILTIN_EVSTWHEX:
16839 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex, exp);
16840 case SPE_BUILTIN_EVSTWHOX:
16841 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox, exp);
16842 case SPE_BUILTIN_EVSTWWEX:
16843 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex, exp);
16844 case SPE_BUILTIN_EVSTWWOX:
16845 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox, exp);
16846 case SPE_BUILTIN_EVSTDD:
16847 return spe_expand_stv_builtin (CODE_FOR_spe_evstdd, exp);
16848 case SPE_BUILTIN_EVSTDH:
16849 return spe_expand_stv_builtin (CODE_FOR_spe_evstdh, exp);
16850 case SPE_BUILTIN_EVSTDW:
16851 return spe_expand_stv_builtin (CODE_FOR_spe_evstdw, exp);
16852 case SPE_BUILTIN_EVSTWHE:
16853 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe, exp);
16854 case SPE_BUILTIN_EVSTWHO:
16855 return spe_expand_stv_builtin (CODE_FOR_spe_evstwho, exp);
16856 case SPE_BUILTIN_EVSTWWE:
16857 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe, exp);
16858 case SPE_BUILTIN_EVSTWWO:
16859 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo, exp);
16860 case SPE_BUILTIN_MFSPEFSCR:
16861 icode = CODE_FOR_spe_mfspefscr;
16862 tmode = insn_data[icode].operand[0].mode;
16864 if (target == 0
16865 || GET_MODE (target) != tmode
16866 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16867 target = gen_reg_rtx (tmode);
16869 pat = GEN_FCN (icode) (target);
16870 if (! pat)
16871 return 0;
16872 emit_insn (pat);
16873 return target;
16874 case SPE_BUILTIN_MTSPEFSCR:
16875 icode = CODE_FOR_spe_mtspefscr;
16876 arg0 = CALL_EXPR_ARG (exp, 0);
16877 op0 = expand_normal (arg0);
16878 mode0 = insn_data[icode].operand[0].mode;
16880 if (arg0 == error_mark_node)
16881 return const0_rtx;
16883 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16884 op0 = copy_to_mode_reg (mode0, op0);
16886 pat = GEN_FCN (icode) (op0);
16887 if (pat)
16888 emit_insn (pat);
16889 return NULL_RTX;
16890 default:
16891 break;
16894 *expandedp = false;
16895 return NULL_RTX;
16898 static rtx
16899 paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
16901 rtx pat, scratch, tmp;
16902 tree form = CALL_EXPR_ARG (exp, 0);
16903 tree arg0 = CALL_EXPR_ARG (exp, 1);
16904 tree arg1 = CALL_EXPR_ARG (exp, 2);
16905 rtx op0 = expand_normal (arg0);
16906 rtx op1 = expand_normal (arg1);
16907 machine_mode mode0 = insn_data[icode].operand[1].mode;
16908 machine_mode mode1 = insn_data[icode].operand[2].mode;
16909 int form_int;
16910 enum rtx_code code;
16912 if (TREE_CODE (form) != INTEGER_CST)
16914 error ("argument 1 of __builtin_paired_predicate must be a constant");
16915 return const0_rtx;
16917 else
16918 form_int = TREE_INT_CST_LOW (form);
16920 gcc_assert (mode0 == mode1);
16922 if (arg0 == error_mark_node || arg1 == error_mark_node)
16923 return const0_rtx;
16925 if (target == 0
16926 || GET_MODE (target) != SImode
16927 || !(*insn_data[icode].operand[0].predicate) (target, SImode))
16928 target = gen_reg_rtx (SImode);
16929 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
16930 op0 = copy_to_mode_reg (mode0, op0);
16931 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
16932 op1 = copy_to_mode_reg (mode1, op1);
16934 scratch = gen_reg_rtx (CCFPmode);
16936 pat = GEN_FCN (icode) (scratch, op0, op1);
16937 if (!pat)
16938 return const0_rtx;
16940 emit_insn (pat);
16942 switch (form_int)
16944 /* LT bit. */
16945 case 0:
16946 code = LT;
16947 break;
16948 /* GT bit. */
16949 case 1:
16950 code = GT;
16951 break;
16952 /* EQ bit. */
16953 case 2:
16954 code = EQ;
16955 break;
16956 /* UN bit. */
16957 case 3:
16958 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
16959 return target;
16960 default:
16961 error ("argument 1 of __builtin_paired_predicate is out of range");
16962 return const0_rtx;
16965 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
16966 emit_move_insn (target, tmp);
16967 return target;
16970 static rtx
16971 spe_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
16973 rtx pat, scratch, tmp;
16974 tree form = CALL_EXPR_ARG (exp, 0);
16975 tree arg0 = CALL_EXPR_ARG (exp, 1);
16976 tree arg1 = CALL_EXPR_ARG (exp, 2);
16977 rtx op0 = expand_normal (arg0);
16978 rtx op1 = expand_normal (arg1);
16979 machine_mode mode0 = insn_data[icode].operand[1].mode;
16980 machine_mode mode1 = insn_data[icode].operand[2].mode;
16981 int form_int;
16982 enum rtx_code code;
16984 if (TREE_CODE (form) != INTEGER_CST)
16986 error ("argument 1 of __builtin_spe_predicate must be a constant");
16987 return const0_rtx;
16989 else
16990 form_int = TREE_INT_CST_LOW (form);
16992 gcc_assert (mode0 == mode1);
16994 if (arg0 == error_mark_node || arg1 == error_mark_node)
16995 return const0_rtx;
16997 if (target == 0
16998 || GET_MODE (target) != SImode
16999 || ! (*insn_data[icode].operand[0].predicate) (target, SImode))
17000 target = gen_reg_rtx (SImode);
17002 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17003 op0 = copy_to_mode_reg (mode0, op0);
17004 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
17005 op1 = copy_to_mode_reg (mode1, op1);
17007 scratch = gen_reg_rtx (CCmode);
17009 pat = GEN_FCN (icode) (scratch, op0, op1);
17010 if (! pat)
17011 return const0_rtx;
17012 emit_insn (pat);
17014 /* There are 4 variants for each predicate: _any_, _all_, _upper_,
17015 _lower_. We use one compare, but look in different bits of the
17016 CR for each variant.
17018 There are 2 elements in each SPE simd type (upper/lower). The CR
17019 bits are set as follows:
17021 BIT0 | BIT 1 | BIT 2 | BIT 3
17022 U | L | (U | L) | (U & L)
17024 So, for an "all" relationship, BIT 3 would be set.
17025 For an "any" relationship, BIT 2 would be set. Etc.
17027 Following traditional nomenclature, these bits map to:
17029 BIT0 | BIT 1 | BIT 2 | BIT 3
17030 LT | GT | EQ | OV
17032 Later, we will generate rtl to look in the LT/EQ/EQ/OV bits.
17035 switch (form_int)
17037 /* All variant. OV bit. */
17038 case 0:
17039 /* We need to get to the OV bit, which is the ORDERED bit. We
17040 could generate (ordered:SI (reg:CC xx) (const_int 0)), but
17041 that's ugly and will make validate_condition_mode die.
17042 So let's just use another pattern. */
17043 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
17044 return target;
17045 /* Any variant. EQ bit. */
17046 case 1:
17047 code = EQ;
17048 break;
17049 /* Upper variant. LT bit. */
17050 case 2:
17051 code = LT;
17052 break;
17053 /* Lower variant. GT bit. */
17054 case 3:
17055 code = GT;
17056 break;
17057 default:
17058 error ("argument 1 of __builtin_spe_predicate is out of range");
17059 return const0_rtx;
17062 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
17063 emit_move_insn (target, tmp);
17065 return target;
17068 /* The evsel builtins look like this:
17070 e = __builtin_spe_evsel_OP (a, b, c, d);
17072 and work like this:
17074 e[upper] = a[upper] *OP* b[upper] ? c[upper] : d[upper];
17075 e[lower] = a[lower] *OP* b[lower] ? c[lower] : d[lower];
17078 static rtx
17079 spe_expand_evsel_builtin (enum insn_code icode, tree exp, rtx target)
17081 rtx pat, scratch;
17082 tree arg0 = CALL_EXPR_ARG (exp, 0);
17083 tree arg1 = CALL_EXPR_ARG (exp, 1);
17084 tree arg2 = CALL_EXPR_ARG (exp, 2);
17085 tree arg3 = CALL_EXPR_ARG (exp, 3);
17086 rtx op0 = expand_normal (arg0);
17087 rtx op1 = expand_normal (arg1);
17088 rtx op2 = expand_normal (arg2);
17089 rtx op3 = expand_normal (arg3);
17090 machine_mode mode0 = insn_data[icode].operand[1].mode;
17091 machine_mode mode1 = insn_data[icode].operand[2].mode;
17093 gcc_assert (mode0 == mode1);
17095 if (arg0 == error_mark_node || arg1 == error_mark_node
17096 || arg2 == error_mark_node || arg3 == error_mark_node)
17097 return const0_rtx;
17099 if (target == 0
17100 || GET_MODE (target) != mode0
17101 || ! (*insn_data[icode].operand[0].predicate) (target, mode0))
17102 target = gen_reg_rtx (mode0);
17104 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17105 op0 = copy_to_mode_reg (mode0, op0);
17106 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
17107 op1 = copy_to_mode_reg (mode0, op1);
17108 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
17109 op2 = copy_to_mode_reg (mode0, op2);
17110 if (! (*insn_data[icode].operand[1].predicate) (op3, mode1))
17111 op3 = copy_to_mode_reg (mode0, op3);
17113 /* Generate the compare. */
17114 scratch = gen_reg_rtx (CCmode);
17115 pat = GEN_FCN (icode) (scratch, op0, op1);
17116 if (! pat)
17117 return const0_rtx;
17118 emit_insn (pat);
17120 if (mode0 == V2SImode)
17121 emit_insn (gen_spe_evsel (target, op2, op3, scratch));
17122 else
17123 emit_insn (gen_spe_evsel_fs (target, op2, op3, scratch));
17125 return target;
17128 /* Raise an error message for a builtin function that is called without the
17129 appropriate target options being set. */
17131 static void
17132 rs6000_invalid_builtin (enum rs6000_builtins fncode)
17134 size_t uns_fncode = (size_t)fncode;
17135 const char *name = rs6000_builtin_info[uns_fncode].name;
17136 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
17138 gcc_assert (name != NULL);
17139 if ((fnmask & RS6000_BTM_CELL) != 0)
17140 error ("Builtin function %s is only valid for the cell processor", name);
17141 else if ((fnmask & RS6000_BTM_VSX) != 0)
17142 error ("Builtin function %s requires the -mvsx option", name);
17143 else if ((fnmask & RS6000_BTM_HTM) != 0)
17144 error ("Builtin function %s requires the -mhtm option", name);
17145 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
17146 error ("Builtin function %s requires the -maltivec option", name);
17147 else if ((fnmask & RS6000_BTM_PAIRED) != 0)
17148 error ("Builtin function %s requires the -mpaired option", name);
17149 else if ((fnmask & RS6000_BTM_SPE) != 0)
17150 error ("Builtin function %s requires the -mspe option", name);
17151 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
17152 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
17153 error ("Builtin function %s requires the -mhard-dfp and"
17154 " -mpower8-vector options", name);
17155 else if ((fnmask & RS6000_BTM_DFP) != 0)
17156 error ("Builtin function %s requires the -mhard-dfp option", name);
17157 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
17158 error ("Builtin function %s requires the -mpower8-vector option", name);
17159 else if ((fnmask & (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
17160 == (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
17161 error ("Builtin function %s requires the -mcpu=power9 and"
17162 " -m64 options", name);
17163 else if ((fnmask & RS6000_BTM_P9_VECTOR) != 0)
17164 error ("Builtin function %s requires the -mcpu=power9 option", name);
17165 else if ((fnmask & (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
17166 == (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
17167 error ("Builtin function %s requires the -mcpu=power9 and"
17168 " -m64 options", name);
17169 else if ((fnmask & RS6000_BTM_P9_MISC) == RS6000_BTM_P9_MISC)
17170 error ("Builtin function %s requires the -mcpu=power9 option", name);
17171 else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
17172 == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
17173 error ("Builtin function %s requires the -mhard-float and"
17174 " -mlong-double-128 options", name);
17175 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
17176 error ("Builtin function %s requires the -mhard-float option", name);
17177 else if ((fnmask & RS6000_BTM_FLOAT128) != 0)
17178 error ("Builtin function %s requires the -mfloat128 option", name);
17179 else
17180 error ("Builtin function %s is not supported with the current options",
17181 name);
17184 /* Target hook for early folding of built-ins, shamelessly stolen
17185 from ia64.c. */
17187 static tree
17188 rs6000_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
17189 tree *args, bool ignore ATTRIBUTE_UNUSED)
17191 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
17193 enum rs6000_builtins fn_code
17194 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
17195 switch (fn_code)
17197 case RS6000_BUILTIN_NANQ:
17198 case RS6000_BUILTIN_NANSQ:
17200 tree type = TREE_TYPE (TREE_TYPE (fndecl));
17201 const char *str = c_getstr (*args);
17202 int quiet = fn_code == RS6000_BUILTIN_NANQ;
17203 REAL_VALUE_TYPE real;
17205 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
17206 return build_real (type, real);
17207 return NULL_TREE;
17209 case RS6000_BUILTIN_INFQ:
17210 case RS6000_BUILTIN_HUGE_VALQ:
17212 tree type = TREE_TYPE (TREE_TYPE (fndecl));
17213 REAL_VALUE_TYPE inf;
17214 real_inf (&inf);
17215 return build_real (type, inf);
17217 default:
17218 break;
17221 #ifdef SUBTARGET_FOLD_BUILTIN
17222 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
17223 #else
17224 return NULL_TREE;
17225 #endif
17228 /* Fold a machine-dependent built-in in GIMPLE. (For folding into
17229 a constant, use rs6000_fold_builtin.) */
17231 bool
17232 rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
17234 gimple *stmt = gsi_stmt (*gsi);
17235 tree fndecl = gimple_call_fndecl (stmt);
17236 gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD);
17237 enum rs6000_builtins fn_code
17238 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
17239 tree arg0, arg1, lhs;
17241 switch (fn_code)
17243 /* Flavors of vec_add. We deliberately don't expand
17244 P8V_BUILTIN_VADDUQM as it gets lowered from V1TImode to
17245 TImode, resulting in much poorer code generation. */
17246 case ALTIVEC_BUILTIN_VADDUBM:
17247 case ALTIVEC_BUILTIN_VADDUHM:
17248 case ALTIVEC_BUILTIN_VADDUWM:
17249 case P8V_BUILTIN_VADDUDM:
17250 case ALTIVEC_BUILTIN_VADDFP:
17251 case VSX_BUILTIN_XVADDDP:
17253 arg0 = gimple_call_arg (stmt, 0);
17254 arg1 = gimple_call_arg (stmt, 1);
17255 lhs = gimple_call_lhs (stmt);
17256 gimple *g = gimple_build_assign (lhs, PLUS_EXPR, arg0, arg1);
17257 gimple_set_location (g, gimple_location (stmt));
17258 gsi_replace (gsi, g, true);
17259 return true;
17261 /* Flavors of vec_sub. We deliberately don't expand
17262 P8V_BUILTIN_VSUBUQM. */
17263 case ALTIVEC_BUILTIN_VSUBUBM:
17264 case ALTIVEC_BUILTIN_VSUBUHM:
17265 case ALTIVEC_BUILTIN_VSUBUWM:
17266 case P8V_BUILTIN_VSUBUDM:
17267 case ALTIVEC_BUILTIN_VSUBFP:
17268 case VSX_BUILTIN_XVSUBDP:
17270 arg0 = gimple_call_arg (stmt, 0);
17271 arg1 = gimple_call_arg (stmt, 1);
17272 lhs = gimple_call_lhs (stmt);
17273 gimple *g = gimple_build_assign (lhs, MINUS_EXPR, arg0, arg1);
17274 gimple_set_location (g, gimple_location (stmt));
17275 gsi_replace (gsi, g, true);
17276 return true;
17278 case VSX_BUILTIN_XVMULSP:
17279 case VSX_BUILTIN_XVMULDP:
17281 arg0 = gimple_call_arg (stmt, 0);
17282 arg1 = gimple_call_arg (stmt, 1);
17283 lhs = gimple_call_lhs (stmt);
17284 gimple *g = gimple_build_assign (lhs, MULT_EXPR, arg0, arg1);
17285 gimple_set_location (g, gimple_location (stmt));
17286 gsi_replace (gsi, g, true);
17287 return true;
17289 /* Even element flavors of vec_mul (signed). */
17290 case ALTIVEC_BUILTIN_VMULESB:
17291 case ALTIVEC_BUILTIN_VMULESH:
17292 /* Even element flavors of vec_mul (unsigned). */
17293 case ALTIVEC_BUILTIN_VMULEUB:
17294 case ALTIVEC_BUILTIN_VMULEUH:
17296 arg0 = gimple_call_arg (stmt, 0);
17297 arg1 = gimple_call_arg (stmt, 1);
17298 lhs = gimple_call_lhs (stmt);
17299 gimple *g = gimple_build_assign (lhs, VEC_WIDEN_MULT_EVEN_EXPR, arg0, arg1);
17300 gimple_set_location (g, gimple_location (stmt));
17301 gsi_replace (gsi, g, true);
17302 return true;
17304 /* Odd element flavors of vec_mul (signed). */
17305 case ALTIVEC_BUILTIN_VMULOSB:
17306 case ALTIVEC_BUILTIN_VMULOSH:
17307 /* Odd element flavors of vec_mul (unsigned). */
17308 case ALTIVEC_BUILTIN_VMULOUB:
17309 case ALTIVEC_BUILTIN_VMULOUH:
17311 arg0 = gimple_call_arg (stmt, 0);
17312 arg1 = gimple_call_arg (stmt, 1);
17313 lhs = gimple_call_lhs (stmt);
17314 gimple *g = gimple_build_assign (lhs, VEC_WIDEN_MULT_ODD_EXPR, arg0, arg1);
17315 gimple_set_location (g, gimple_location (stmt));
17316 gsi_replace (gsi, g, true);
17317 return true;
17319 /* Flavors of vec_div (Integer). */
17320 case VSX_BUILTIN_DIV_V2DI:
17321 case VSX_BUILTIN_UDIV_V2DI:
17323 arg0 = gimple_call_arg (stmt, 0);
17324 arg1 = gimple_call_arg (stmt, 1);
17325 lhs = gimple_call_lhs (stmt);
17326 gimple *g = gimple_build_assign (lhs, TRUNC_DIV_EXPR, arg0, arg1);
17327 gimple_set_location (g, gimple_location (stmt));
17328 gsi_replace (gsi, g, true);
17329 return true;
17331 /* Flavors of vec_div (Float). */
17332 case VSX_BUILTIN_XVDIVSP:
17333 case VSX_BUILTIN_XVDIVDP:
17335 arg0 = gimple_call_arg (stmt, 0);
17336 arg1 = gimple_call_arg (stmt, 1);
17337 lhs = gimple_call_lhs (stmt);
17338 gimple *g = gimple_build_assign (lhs, RDIV_EXPR, arg0, arg1);
17339 gimple_set_location (g, gimple_location (stmt));
17340 gsi_replace (gsi, g, true);
17341 return true;
17343 /* Flavors of vec_and. */
17344 case ALTIVEC_BUILTIN_VAND:
17346 arg0 = gimple_call_arg (stmt, 0);
17347 arg1 = gimple_call_arg (stmt, 1);
17348 lhs = gimple_call_lhs (stmt);
17349 gimple *g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, arg1);
17350 gimple_set_location (g, gimple_location (stmt));
17351 gsi_replace (gsi, g, true);
17352 return true;
17354 /* Flavors of vec_andc. */
17355 case ALTIVEC_BUILTIN_VANDC:
17357 arg0 = gimple_call_arg (stmt, 0);
17358 arg1 = gimple_call_arg (stmt, 1);
17359 lhs = gimple_call_lhs (stmt);
17360 tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
17361 gimple *g = gimple_build_assign(temp, BIT_NOT_EXPR, arg1);
17362 gimple_set_location (g, gimple_location (stmt));
17363 gsi_insert_before(gsi, g, GSI_SAME_STMT);
17364 g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, temp);
17365 gimple_set_location (g, gimple_location (stmt));
17366 gsi_replace (gsi, g, true);
17367 return true;
17369 /* Flavors of vec_nand. */
17370 case P8V_BUILTIN_VEC_NAND:
17371 case P8V_BUILTIN_NAND_V16QI:
17372 case P8V_BUILTIN_NAND_V8HI:
17373 case P8V_BUILTIN_NAND_V4SI:
17374 case P8V_BUILTIN_NAND_V4SF:
17375 case P8V_BUILTIN_NAND_V2DF:
17376 case P8V_BUILTIN_NAND_V2DI:
17378 arg0 = gimple_call_arg (stmt, 0);
17379 arg1 = gimple_call_arg (stmt, 1);
17380 lhs = gimple_call_lhs (stmt);
17381 tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
17382 gimple *g = gimple_build_assign(temp, BIT_AND_EXPR, arg0, arg1);
17383 gimple_set_location (g, gimple_location (stmt));
17384 gsi_insert_before(gsi, g, GSI_SAME_STMT);
17385 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
17386 gimple_set_location (g, gimple_location (stmt));
17387 gsi_replace (gsi, g, true);
17388 return true;
17390 /* Flavors of vec_or. */
17391 case ALTIVEC_BUILTIN_VOR:
17393 arg0 = gimple_call_arg (stmt, 0);
17394 arg1 = gimple_call_arg (stmt, 1);
17395 lhs = gimple_call_lhs (stmt);
17396 gimple *g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, arg1);
17397 gimple_set_location (g, gimple_location (stmt));
17398 gsi_replace (gsi, g, true);
17399 return true;
17401 /* flavors of vec_orc. */
17402 case P8V_BUILTIN_ORC_V16QI:
17403 case P8V_BUILTIN_ORC_V8HI:
17404 case P8V_BUILTIN_ORC_V4SI:
17405 case P8V_BUILTIN_ORC_V4SF:
17406 case P8V_BUILTIN_ORC_V2DF:
17407 case P8V_BUILTIN_ORC_V2DI:
17409 arg0 = gimple_call_arg (stmt, 0);
17410 arg1 = gimple_call_arg (stmt, 1);
17411 lhs = gimple_call_lhs (stmt);
17412 tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
17413 gimple *g = gimple_build_assign(temp, BIT_NOT_EXPR, arg1);
17414 gimple_set_location (g, gimple_location (stmt));
17415 gsi_insert_before(gsi, g, GSI_SAME_STMT);
17416 g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, temp);
17417 gimple_set_location (g, gimple_location (stmt));
17418 gsi_replace (gsi, g, true);
17419 return true;
17421 /* Flavors of vec_xor. */
17422 case ALTIVEC_BUILTIN_VXOR:
17424 arg0 = gimple_call_arg (stmt, 0);
17425 arg1 = gimple_call_arg (stmt, 1);
17426 lhs = gimple_call_lhs (stmt);
17427 gimple *g = gimple_build_assign (lhs, BIT_XOR_EXPR, arg0, arg1);
17428 gimple_set_location (g, gimple_location (stmt));
17429 gsi_replace (gsi, g, true);
17430 return true;
17432 /* Flavors of vec_nor. */
17433 case ALTIVEC_BUILTIN_VNOR:
17435 arg0 = gimple_call_arg (stmt, 0);
17436 arg1 = gimple_call_arg (stmt, 1);
17437 lhs = gimple_call_lhs (stmt);
17438 tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
17439 gimple *g = gimple_build_assign (temp, BIT_IOR_EXPR, arg0, arg1);
17440 gimple_set_location (g, gimple_location (stmt));
17441 gsi_insert_before(gsi, g, GSI_SAME_STMT);
17442 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
17443 gimple_set_location (g, gimple_location (stmt));
17444 gsi_replace (gsi, g, true);
17445 return true;
17447 default:
17448 break;
17451 return false;
17454 /* Expand an expression EXP that calls a built-in function,
17455 with result going to TARGET if that's convenient
17456 (and in mode MODE if that's convenient).
17457 SUBTARGET may be used as the target for computing one of EXP's operands.
17458 IGNORE is nonzero if the value is to be ignored. */
17460 static rtx
17461 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
17462 machine_mode mode ATTRIBUTE_UNUSED,
17463 int ignore ATTRIBUTE_UNUSED)
17465 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
17466 enum rs6000_builtins fcode
17467 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
17468 size_t uns_fcode = (size_t)fcode;
17469 const struct builtin_description *d;
17470 size_t i;
17471 rtx ret;
17472 bool success;
17473 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
17474 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
17476 if (TARGET_DEBUG_BUILTIN)
17478 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
17479 const char *name1 = rs6000_builtin_info[uns_fcode].name;
17480 const char *name2 = ((icode != CODE_FOR_nothing)
17481 ? get_insn_name ((int)icode)
17482 : "nothing");
17483 const char *name3;
17485 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
17487 default: name3 = "unknown"; break;
17488 case RS6000_BTC_SPECIAL: name3 = "special"; break;
17489 case RS6000_BTC_UNARY: name3 = "unary"; break;
17490 case RS6000_BTC_BINARY: name3 = "binary"; break;
17491 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
17492 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
17493 case RS6000_BTC_ABS: name3 = "abs"; break;
17494 case RS6000_BTC_EVSEL: name3 = "evsel"; break;
17495 case RS6000_BTC_DST: name3 = "dst"; break;
17499 fprintf (stderr,
17500 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
17501 (name1) ? name1 : "---", fcode,
17502 (name2) ? name2 : "---", (int)icode,
17503 name3,
17504 func_valid_p ? "" : ", not valid");
17507 if (!func_valid_p)
17509 rs6000_invalid_builtin (fcode);
17511 /* Given it is invalid, just generate a normal call. */
17512 return expand_call (exp, target, ignore);
17515 switch (fcode)
17517 case RS6000_BUILTIN_RECIP:
17518 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
17520 case RS6000_BUILTIN_RECIPF:
17521 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
17523 case RS6000_BUILTIN_RSQRTF:
17524 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
17526 case RS6000_BUILTIN_RSQRT:
17527 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
17529 case POWER7_BUILTIN_BPERMD:
17530 return rs6000_expand_binop_builtin (((TARGET_64BIT)
17531 ? CODE_FOR_bpermd_di
17532 : CODE_FOR_bpermd_si), exp, target);
17534 case RS6000_BUILTIN_GET_TB:
17535 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
17536 target);
17538 case RS6000_BUILTIN_MFTB:
17539 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
17540 ? CODE_FOR_rs6000_mftb_di
17541 : CODE_FOR_rs6000_mftb_si),
17542 target);
17544 case RS6000_BUILTIN_MFFS:
17545 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
17547 case RS6000_BUILTIN_MTFSF:
17548 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
17550 case RS6000_BUILTIN_CPU_INIT:
17551 case RS6000_BUILTIN_CPU_IS:
17552 case RS6000_BUILTIN_CPU_SUPPORTS:
17553 return cpu_expand_builtin (fcode, exp, target);
17555 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
17556 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
17558 int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
17559 : (int) CODE_FOR_altivec_lvsl_direct);
17560 machine_mode tmode = insn_data[icode].operand[0].mode;
17561 machine_mode mode = insn_data[icode].operand[1].mode;
17562 tree arg;
17563 rtx op, addr, pat;
17565 gcc_assert (TARGET_ALTIVEC);
17567 arg = CALL_EXPR_ARG (exp, 0);
17568 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
17569 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
17570 addr = memory_address (mode, op);
17571 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
17572 op = addr;
17573 else
17575 /* For the load case need to negate the address. */
17576 op = gen_reg_rtx (GET_MODE (addr));
17577 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
17579 op = gen_rtx_MEM (mode, op);
17581 if (target == 0
17582 || GET_MODE (target) != tmode
17583 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17584 target = gen_reg_rtx (tmode);
17586 pat = GEN_FCN (icode) (target, op);
17587 if (!pat)
17588 return 0;
17589 emit_insn (pat);
17591 return target;
17594 case ALTIVEC_BUILTIN_VCFUX:
17595 case ALTIVEC_BUILTIN_VCFSX:
17596 case ALTIVEC_BUILTIN_VCTUXS:
17597 case ALTIVEC_BUILTIN_VCTSXS:
17598 /* FIXME: There's got to be a nicer way to handle this case than
17599 constructing a new CALL_EXPR. */
17600 if (call_expr_nargs (exp) == 1)
17602 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
17603 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
17605 break;
17607 default:
17608 break;
17611 if (TARGET_ALTIVEC)
17613 ret = altivec_expand_builtin (exp, target, &success);
17615 if (success)
17616 return ret;
17618 if (TARGET_SPE)
17620 ret = spe_expand_builtin (exp, target, &success);
17622 if (success)
17623 return ret;
17625 if (TARGET_PAIRED_FLOAT)
17627 ret = paired_expand_builtin (exp, target, &success);
17629 if (success)
17630 return ret;
17632 if (TARGET_HTM)
17634 ret = htm_expand_builtin (exp, target, &success);
17636 if (success)
17637 return ret;
17640 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
17641 /* RS6000_BTC_SPECIAL represents no-operand operators. */
17642 gcc_assert (attr == RS6000_BTC_UNARY
17643 || attr == RS6000_BTC_BINARY
17644 || attr == RS6000_BTC_TERNARY
17645 || attr == RS6000_BTC_SPECIAL);
17647 /* Handle simple unary operations. */
17648 d = bdesc_1arg;
17649 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17650 if (d->code == fcode)
17651 return rs6000_expand_unop_builtin (d->icode, exp, target);
17653 /* Handle simple binary operations. */
17654 d = bdesc_2arg;
17655 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17656 if (d->code == fcode)
17657 return rs6000_expand_binop_builtin (d->icode, exp, target);
17659 /* Handle simple ternary operations. */
17660 d = bdesc_3arg;
17661 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
17662 if (d->code == fcode)
17663 return rs6000_expand_ternop_builtin (d->icode, exp, target);
17665 /* Handle simple no-argument operations. */
17666 d = bdesc_0arg;
17667 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
17668 if (d->code == fcode)
17669 return rs6000_expand_zeroop_builtin (d->icode, target);
17671 gcc_unreachable ();
17674 /* Create a builtin vector type with a name. Taking care not to give
17675 the canonical type a name. */
17677 static tree
17678 rs6000_vector_type (const char *name, tree elt_type, unsigned num_elts)
17680 tree result = build_vector_type (elt_type, num_elts);
17682 /* Copy so we don't give the canonical type a name. */
17683 result = build_variant_type_copy (result);
17685 add_builtin_type (name, result);
17687 return result;
17690 static void
17691 rs6000_init_builtins (void)
17693 tree tdecl;
17694 tree ftype;
17695 machine_mode mode;
17697 if (TARGET_DEBUG_BUILTIN)
17698 fprintf (stderr, "rs6000_init_builtins%s%s%s%s\n",
17699 (TARGET_PAIRED_FLOAT) ? ", paired" : "",
17700 (TARGET_SPE) ? ", spe" : "",
17701 (TARGET_ALTIVEC) ? ", altivec" : "",
17702 (TARGET_VSX) ? ", vsx" : "");
17704 V2SI_type_node = build_vector_type (intSI_type_node, 2);
17705 V2SF_type_node = build_vector_type (float_type_node, 2);
17706 V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64 ? "__vector long"
17707 : "__vector long long",
17708 intDI_type_node, 2);
17709 V2DF_type_node = rs6000_vector_type ("__vector double", double_type_node, 2);
17710 V4HI_type_node = build_vector_type (intHI_type_node, 4);
17711 V4SI_type_node = rs6000_vector_type ("__vector signed int",
17712 intSI_type_node, 4);
17713 V4SF_type_node = rs6000_vector_type ("__vector float", float_type_node, 4);
17714 V8HI_type_node = rs6000_vector_type ("__vector signed short",
17715 intHI_type_node, 8);
17716 V16QI_type_node = rs6000_vector_type ("__vector signed char",
17717 intQI_type_node, 16);
17719 unsigned_V16QI_type_node = rs6000_vector_type ("__vector unsigned char",
17720 unsigned_intQI_type_node, 16);
17721 unsigned_V8HI_type_node = rs6000_vector_type ("__vector unsigned short",
17722 unsigned_intHI_type_node, 8);
17723 unsigned_V4SI_type_node = rs6000_vector_type ("__vector unsigned int",
17724 unsigned_intSI_type_node, 4);
17725 unsigned_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64
17726 ? "__vector unsigned long"
17727 : "__vector unsigned long long",
17728 unsigned_intDI_type_node, 2);
17730 opaque_V2SF_type_node = build_opaque_vector_type (float_type_node, 2);
17731 opaque_V2SI_type_node = build_opaque_vector_type (intSI_type_node, 2);
17732 opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node);
17733 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
17735 const_str_type_node
17736 = build_pointer_type (build_qualified_type (char_type_node,
17737 TYPE_QUAL_CONST));
17739 /* We use V1TI mode as a special container to hold __int128_t items that
17740 must live in VSX registers. */
17741 if (intTI_type_node)
17743 V1TI_type_node = rs6000_vector_type ("__vector __int128",
17744 intTI_type_node, 1);
17745 unsigned_V1TI_type_node
17746 = rs6000_vector_type ("__vector unsigned __int128",
17747 unsigned_intTI_type_node, 1);
17750 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
17751 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
17752 'vector unsigned short'. */
17754 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
17755 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
17756 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
17757 bool_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
17758 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
17760 long_integer_type_internal_node = long_integer_type_node;
17761 long_unsigned_type_internal_node = long_unsigned_type_node;
17762 long_long_integer_type_internal_node = long_long_integer_type_node;
17763 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
17764 intQI_type_internal_node = intQI_type_node;
17765 uintQI_type_internal_node = unsigned_intQI_type_node;
17766 intHI_type_internal_node = intHI_type_node;
17767 uintHI_type_internal_node = unsigned_intHI_type_node;
17768 intSI_type_internal_node = intSI_type_node;
17769 uintSI_type_internal_node = unsigned_intSI_type_node;
17770 intDI_type_internal_node = intDI_type_node;
17771 uintDI_type_internal_node = unsigned_intDI_type_node;
17772 intTI_type_internal_node = intTI_type_node;
17773 uintTI_type_internal_node = unsigned_intTI_type_node;
17774 float_type_internal_node = float_type_node;
17775 double_type_internal_node = double_type_node;
17776 long_double_type_internal_node = long_double_type_node;
17777 dfloat64_type_internal_node = dfloat64_type_node;
17778 dfloat128_type_internal_node = dfloat128_type_node;
17779 void_type_internal_node = void_type_node;
17781 /* 128-bit floating point support. KFmode is IEEE 128-bit floating point.
17782 IFmode is the IBM extended 128-bit format that is a pair of doubles.
17783 TFmode will be either IEEE 128-bit floating point or the IBM double-double
17784 format that uses a pair of doubles, depending on the switches and
17785 defaults.
17787 We do not enable the actual __float128 keyword unless the user explicitly
17788 asks for it, because the library support is not yet complete.
17790 If we don't support for either 128-bit IBM double double or IEEE 128-bit
17791 floating point, we need make sure the type is non-zero or else self-test
17792 fails during bootstrap.
17794 We don't register a built-in type for __ibm128 if the type is the same as
17795 long double. Instead we add a #define for __ibm128 in
17796 rs6000_cpu_cpp_builtins to long double. */
17797 if (TARGET_LONG_DOUBLE_128 && FLOAT128_IEEE_P (TFmode))
17799 ibm128_float_type_node = make_node (REAL_TYPE);
17800 TYPE_PRECISION (ibm128_float_type_node) = 128;
17801 SET_TYPE_MODE (ibm128_float_type_node, IFmode);
17802 layout_type (ibm128_float_type_node);
17804 lang_hooks.types.register_builtin_type (ibm128_float_type_node,
17805 "__ibm128");
17807 else
17808 ibm128_float_type_node = long_double_type_node;
17810 if (TARGET_FLOAT128_KEYWORD)
17812 ieee128_float_type_node = float128_type_node;
17813 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
17814 "__float128");
17817 else if (TARGET_FLOAT128_TYPE)
17819 ieee128_float_type_node = make_node (REAL_TYPE);
17820 TYPE_PRECISION (ibm128_float_type_node) = 128;
17821 SET_TYPE_MODE (ieee128_float_type_node, KFmode);
17822 layout_type (ieee128_float_type_node);
17824 /* If we are not exporting the __float128/_Float128 keywords, we need a
17825 keyword to get the types created. Use __ieee128 as the dummy
17826 keyword. */
17827 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
17828 "__ieee128");
17831 else
17832 ieee128_float_type_node = long_double_type_node;
17834 /* Initialize the modes for builtin_function_type, mapping a machine mode to
17835 tree type node. */
17836 builtin_mode_to_type[QImode][0] = integer_type_node;
17837 builtin_mode_to_type[HImode][0] = integer_type_node;
17838 builtin_mode_to_type[SImode][0] = intSI_type_node;
17839 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
17840 builtin_mode_to_type[DImode][0] = intDI_type_node;
17841 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
17842 builtin_mode_to_type[TImode][0] = intTI_type_node;
17843 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
17844 builtin_mode_to_type[SFmode][0] = float_type_node;
17845 builtin_mode_to_type[DFmode][0] = double_type_node;
17846 builtin_mode_to_type[IFmode][0] = ibm128_float_type_node;
17847 builtin_mode_to_type[KFmode][0] = ieee128_float_type_node;
17848 builtin_mode_to_type[TFmode][0] = long_double_type_node;
17849 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
17850 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
17851 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
17852 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
17853 builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
17854 builtin_mode_to_type[V2SFmode][0] = V2SF_type_node;
17855 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
17856 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
17857 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
17858 builtin_mode_to_type[V4HImode][0] = V4HI_type_node;
17859 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
17860 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
17861 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
17862 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
17863 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
17864 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
17865 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
17867 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
17868 TYPE_NAME (bool_char_type_node) = tdecl;
17870 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
17871 TYPE_NAME (bool_short_type_node) = tdecl;
17873 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
17874 TYPE_NAME (bool_int_type_node) = tdecl;
17876 tdecl = add_builtin_type ("__pixel", pixel_type_node);
17877 TYPE_NAME (pixel_type_node) = tdecl;
17879 bool_V16QI_type_node = rs6000_vector_type ("__vector __bool char",
17880 bool_char_type_node, 16);
17881 bool_V8HI_type_node = rs6000_vector_type ("__vector __bool short",
17882 bool_short_type_node, 8);
17883 bool_V4SI_type_node = rs6000_vector_type ("__vector __bool int",
17884 bool_int_type_node, 4);
17885 bool_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64
17886 ? "__vector __bool long"
17887 : "__vector __bool long long",
17888 bool_long_type_node, 2);
17889 pixel_V8HI_type_node = rs6000_vector_type ("__vector __pixel",
17890 pixel_type_node, 8);
17892 /* Paired and SPE builtins are only available if you build a compiler with
17893 the appropriate options, so only create those builtins with the
17894 appropriate compiler option. Create Altivec and VSX builtins on machines
17895 with at least the general purpose extensions (970 and newer) to allow the
17896 use of the target attribute. */
17897 if (TARGET_PAIRED_FLOAT)
17898 paired_init_builtins ();
17899 if (TARGET_SPE)
17900 spe_init_builtins ();
17901 if (TARGET_EXTRA_BUILTINS)
17902 altivec_init_builtins ();
17903 if (TARGET_HTM)
17904 htm_init_builtins ();
17906 if (TARGET_EXTRA_BUILTINS || TARGET_SPE || TARGET_PAIRED_FLOAT)
17907 rs6000_common_init_builtins ();
17909 ftype = build_function_type_list (ieee128_float_type_node,
17910 const_str_type_node, NULL_TREE);
17911 def_builtin ("__builtin_nanq", ftype, RS6000_BUILTIN_NANQ);
17912 def_builtin ("__builtin_nansq", ftype, RS6000_BUILTIN_NANSQ);
17914 ftype = build_function_type_list (ieee128_float_type_node, NULL_TREE);
17915 def_builtin ("__builtin_infq", ftype, RS6000_BUILTIN_INFQ);
17916 def_builtin ("__builtin_huge_valq", ftype, RS6000_BUILTIN_HUGE_VALQ);
17918 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
17919 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
17920 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
17922 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
17923 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
17924 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
17926 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
17927 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
17928 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
17930 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
17931 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
17932 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
17934 mode = (TARGET_64BIT) ? DImode : SImode;
17935 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
17936 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
17937 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
17939 ftype = build_function_type_list (unsigned_intDI_type_node,
17940 NULL_TREE);
17941 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
17943 if (TARGET_64BIT)
17944 ftype = build_function_type_list (unsigned_intDI_type_node,
17945 NULL_TREE);
17946 else
17947 ftype = build_function_type_list (unsigned_intSI_type_node,
17948 NULL_TREE);
17949 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
17951 ftype = build_function_type_list (double_type_node, NULL_TREE);
17952 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
17954 ftype = build_function_type_list (void_type_node,
17955 intSI_type_node, double_type_node,
17956 NULL_TREE);
17957 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
17959 ftype = build_function_type_list (void_type_node, NULL_TREE);
17960 def_builtin ("__builtin_cpu_init", ftype, RS6000_BUILTIN_CPU_INIT);
17962 ftype = build_function_type_list (bool_int_type_node, const_ptr_type_node,
17963 NULL_TREE);
17964 def_builtin ("__builtin_cpu_is", ftype, RS6000_BUILTIN_CPU_IS);
17965 def_builtin ("__builtin_cpu_supports", ftype, RS6000_BUILTIN_CPU_SUPPORTS);
17967 /* AIX libm provides clog as __clog. */
17968 if (TARGET_XCOFF &&
17969 (tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
17970 set_user_assembler_name (tdecl, "__clog");
17972 #ifdef SUBTARGET_INIT_BUILTINS
17973 SUBTARGET_INIT_BUILTINS;
17974 #endif
17977 /* Returns the rs6000 builtin decl for CODE. */
17979 static tree
17980 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
17982 HOST_WIDE_INT fnmask;
17984 if (code >= RS6000_BUILTIN_COUNT)
17985 return error_mark_node;
17987 fnmask = rs6000_builtin_info[code].mask;
17988 if ((fnmask & rs6000_builtin_mask) != fnmask)
17990 rs6000_invalid_builtin ((enum rs6000_builtins)code);
17991 return error_mark_node;
17994 return rs6000_builtin_decls[code];
17997 static void
17998 spe_init_builtins (void)
18000 tree puint_type_node = build_pointer_type (unsigned_type_node);
18001 tree pushort_type_node = build_pointer_type (short_unsigned_type_node);
18002 const struct builtin_description *d;
18003 size_t i;
18004 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
18006 tree v2si_ftype_4_v2si
18007 = build_function_type_list (opaque_V2SI_type_node,
18008 opaque_V2SI_type_node,
18009 opaque_V2SI_type_node,
18010 opaque_V2SI_type_node,
18011 opaque_V2SI_type_node,
18012 NULL_TREE);
18014 tree v2sf_ftype_4_v2sf
18015 = build_function_type_list (opaque_V2SF_type_node,
18016 opaque_V2SF_type_node,
18017 opaque_V2SF_type_node,
18018 opaque_V2SF_type_node,
18019 opaque_V2SF_type_node,
18020 NULL_TREE);
18022 tree int_ftype_int_v2si_v2si
18023 = build_function_type_list (integer_type_node,
18024 integer_type_node,
18025 opaque_V2SI_type_node,
18026 opaque_V2SI_type_node,
18027 NULL_TREE);
18029 tree int_ftype_int_v2sf_v2sf
18030 = build_function_type_list (integer_type_node,
18031 integer_type_node,
18032 opaque_V2SF_type_node,
18033 opaque_V2SF_type_node,
18034 NULL_TREE);
18036 tree void_ftype_v2si_puint_int
18037 = build_function_type_list (void_type_node,
18038 opaque_V2SI_type_node,
18039 puint_type_node,
18040 integer_type_node,
18041 NULL_TREE);
18043 tree void_ftype_v2si_puint_char
18044 = build_function_type_list (void_type_node,
18045 opaque_V2SI_type_node,
18046 puint_type_node,
18047 char_type_node,
18048 NULL_TREE);
18050 tree void_ftype_v2si_pv2si_int
18051 = build_function_type_list (void_type_node,
18052 opaque_V2SI_type_node,
18053 opaque_p_V2SI_type_node,
18054 integer_type_node,
18055 NULL_TREE);
18057 tree void_ftype_v2si_pv2si_char
18058 = build_function_type_list (void_type_node,
18059 opaque_V2SI_type_node,
18060 opaque_p_V2SI_type_node,
18061 char_type_node,
18062 NULL_TREE);
18064 tree void_ftype_int
18065 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
18067 tree int_ftype_void
18068 = build_function_type_list (integer_type_node, NULL_TREE);
18070 tree v2si_ftype_pv2si_int
18071 = build_function_type_list (opaque_V2SI_type_node,
18072 opaque_p_V2SI_type_node,
18073 integer_type_node,
18074 NULL_TREE);
18076 tree v2si_ftype_puint_int
18077 = build_function_type_list (opaque_V2SI_type_node,
18078 puint_type_node,
18079 integer_type_node,
18080 NULL_TREE);
18082 tree v2si_ftype_pushort_int
18083 = build_function_type_list (opaque_V2SI_type_node,
18084 pushort_type_node,
18085 integer_type_node,
18086 NULL_TREE);
18088 tree v2si_ftype_signed_char
18089 = build_function_type_list (opaque_V2SI_type_node,
18090 signed_char_type_node,
18091 NULL_TREE);
18093 add_builtin_type ("__ev64_opaque__", opaque_V2SI_type_node);
18095 /* Initialize irregular SPE builtins. */
18097 def_builtin ("__builtin_spe_mtspefscr", void_ftype_int, SPE_BUILTIN_MTSPEFSCR);
18098 def_builtin ("__builtin_spe_mfspefscr", int_ftype_void, SPE_BUILTIN_MFSPEFSCR);
18099 def_builtin ("__builtin_spe_evstddx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDDX);
18100 def_builtin ("__builtin_spe_evstdhx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDHX);
18101 def_builtin ("__builtin_spe_evstdwx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDWX);
18102 def_builtin ("__builtin_spe_evstwhex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHEX);
18103 def_builtin ("__builtin_spe_evstwhox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHOX);
18104 def_builtin ("__builtin_spe_evstwwex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWEX);
18105 def_builtin ("__builtin_spe_evstwwox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWOX);
18106 def_builtin ("__builtin_spe_evstdd", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDD);
18107 def_builtin ("__builtin_spe_evstdh", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDH);
18108 def_builtin ("__builtin_spe_evstdw", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDW);
18109 def_builtin ("__builtin_spe_evstwhe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHE);
18110 def_builtin ("__builtin_spe_evstwho", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHO);
18111 def_builtin ("__builtin_spe_evstwwe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWE);
18112 def_builtin ("__builtin_spe_evstwwo", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWO);
18113 def_builtin ("__builtin_spe_evsplatfi", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATFI);
18114 def_builtin ("__builtin_spe_evsplati", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATI);
18116 /* Loads. */
18117 def_builtin ("__builtin_spe_evlddx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDDX);
18118 def_builtin ("__builtin_spe_evldwx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDWX);
18119 def_builtin ("__builtin_spe_evldhx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDHX);
18120 def_builtin ("__builtin_spe_evlwhex", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHEX);
18121 def_builtin ("__builtin_spe_evlwhoux", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOUX);
18122 def_builtin ("__builtin_spe_evlwhosx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOSX);
18123 def_builtin ("__builtin_spe_evlwwsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLATX);
18124 def_builtin ("__builtin_spe_evlwhsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLATX);
18125 def_builtin ("__builtin_spe_evlhhesplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLATX);
18126 def_builtin ("__builtin_spe_evlhhousplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLATX);
18127 def_builtin ("__builtin_spe_evlhhossplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLATX);
18128 def_builtin ("__builtin_spe_evldd", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDD);
18129 def_builtin ("__builtin_spe_evldw", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDW);
18130 def_builtin ("__builtin_spe_evldh", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDH);
18131 def_builtin ("__builtin_spe_evlhhesplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLAT);
18132 def_builtin ("__builtin_spe_evlhhossplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLAT);
18133 def_builtin ("__builtin_spe_evlhhousplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLAT);
18134 def_builtin ("__builtin_spe_evlwhe", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHE);
18135 def_builtin ("__builtin_spe_evlwhos", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOS);
18136 def_builtin ("__builtin_spe_evlwhou", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOU);
18137 def_builtin ("__builtin_spe_evlwhsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLAT);
18138 def_builtin ("__builtin_spe_evlwwsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLAT);
18140 /* Predicates. */
18141 d = bdesc_spe_predicates;
18142 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, d++)
18144 tree type;
18145 HOST_WIDE_INT mask = d->mask;
18147 if ((mask & builtin_mask) != mask)
18149 if (TARGET_DEBUG_BUILTIN)
18150 fprintf (stderr, "spe_init_builtins, skip predicate %s\n",
18151 d->name);
18152 continue;
18155 /* Cannot define builtin if the instruction is disabled. */
18156 gcc_assert (d->icode != CODE_FOR_nothing);
18157 switch (insn_data[d->icode].operand[1].mode)
18159 case E_V2SImode:
18160 type = int_ftype_int_v2si_v2si;
18161 break;
18162 case E_V2SFmode:
18163 type = int_ftype_int_v2sf_v2sf;
18164 break;
18165 default:
18166 gcc_unreachable ();
18169 def_builtin (d->name, type, d->code);
18172 /* Evsel predicates. */
18173 d = bdesc_spe_evsel;
18174 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, d++)
18176 tree type;
18177 HOST_WIDE_INT mask = d->mask;
18179 if ((mask & builtin_mask) != mask)
18181 if (TARGET_DEBUG_BUILTIN)
18182 fprintf (stderr, "spe_init_builtins, skip evsel %s\n",
18183 d->name);
18184 continue;
18187 /* Cannot define builtin if the instruction is disabled. */
18188 gcc_assert (d->icode != CODE_FOR_nothing);
18189 switch (insn_data[d->icode].operand[1].mode)
18191 case E_V2SImode:
18192 type = v2si_ftype_4_v2si;
18193 break;
18194 case E_V2SFmode:
18195 type = v2sf_ftype_4_v2sf;
18196 break;
18197 default:
18198 gcc_unreachable ();
18201 def_builtin (d->name, type, d->code);
18205 static void
18206 paired_init_builtins (void)
18208 const struct builtin_description *d;
18209 size_t i;
18210 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
18212 tree int_ftype_int_v2sf_v2sf
18213 = build_function_type_list (integer_type_node,
18214 integer_type_node,
18215 V2SF_type_node,
18216 V2SF_type_node,
18217 NULL_TREE);
18218 tree pcfloat_type_node =
18219 build_pointer_type (build_qualified_type
18220 (float_type_node, TYPE_QUAL_CONST));
18222 tree v2sf_ftype_long_pcfloat = build_function_type_list (V2SF_type_node,
18223 long_integer_type_node,
18224 pcfloat_type_node,
18225 NULL_TREE);
18226 tree void_ftype_v2sf_long_pcfloat =
18227 build_function_type_list (void_type_node,
18228 V2SF_type_node,
18229 long_integer_type_node,
18230 pcfloat_type_node,
18231 NULL_TREE);
18234 def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat,
18235 PAIRED_BUILTIN_LX);
18238 def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat,
18239 PAIRED_BUILTIN_STX);
18241 /* Predicates. */
18242 d = bdesc_paired_preds;
18243 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); ++i, d++)
18245 tree type;
18246 HOST_WIDE_INT mask = d->mask;
18248 if ((mask & builtin_mask) != mask)
18250 if (TARGET_DEBUG_BUILTIN)
18251 fprintf (stderr, "paired_init_builtins, skip predicate %s\n",
18252 d->name);
18253 continue;
18256 /* Cannot define builtin if the instruction is disabled. */
18257 gcc_assert (d->icode != CODE_FOR_nothing);
18259 if (TARGET_DEBUG_BUILTIN)
18260 fprintf (stderr, "paired pred #%d, insn = %s [%d], mode = %s\n",
18261 (int)i, get_insn_name (d->icode), (int)d->icode,
18262 GET_MODE_NAME (insn_data[d->icode].operand[1].mode));
18264 switch (insn_data[d->icode].operand[1].mode)
18266 case E_V2SFmode:
18267 type = int_ftype_int_v2sf_v2sf;
18268 break;
18269 default:
18270 gcc_unreachable ();
18273 def_builtin (d->name, type, d->code);
18277 static void
18278 altivec_init_builtins (void)
18280 const struct builtin_description *d;
18281 size_t i;
18282 tree ftype;
18283 tree decl;
18284 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
18286 tree pvoid_type_node = build_pointer_type (void_type_node);
18288 tree pcvoid_type_node
18289 = build_pointer_type (build_qualified_type (void_type_node,
18290 TYPE_QUAL_CONST));
18292 tree int_ftype_opaque
18293 = build_function_type_list (integer_type_node,
18294 opaque_V4SI_type_node, NULL_TREE);
18295 tree opaque_ftype_opaque
18296 = build_function_type_list (integer_type_node, NULL_TREE);
18297 tree opaque_ftype_opaque_int
18298 = build_function_type_list (opaque_V4SI_type_node,
18299 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
18300 tree opaque_ftype_opaque_opaque_int
18301 = build_function_type_list (opaque_V4SI_type_node,
18302 opaque_V4SI_type_node, opaque_V4SI_type_node,
18303 integer_type_node, NULL_TREE);
18304 tree opaque_ftype_opaque_opaque_opaque
18305 = build_function_type_list (opaque_V4SI_type_node,
18306 opaque_V4SI_type_node, opaque_V4SI_type_node,
18307 opaque_V4SI_type_node, NULL_TREE);
18308 tree opaque_ftype_opaque_opaque
18309 = build_function_type_list (opaque_V4SI_type_node,
18310 opaque_V4SI_type_node, opaque_V4SI_type_node,
18311 NULL_TREE);
18312 tree int_ftype_int_opaque_opaque
18313 = build_function_type_list (integer_type_node,
18314 integer_type_node, opaque_V4SI_type_node,
18315 opaque_V4SI_type_node, NULL_TREE);
18316 tree int_ftype_int_v4si_v4si
18317 = build_function_type_list (integer_type_node,
18318 integer_type_node, V4SI_type_node,
18319 V4SI_type_node, NULL_TREE);
18320 tree int_ftype_int_v2di_v2di
18321 = build_function_type_list (integer_type_node,
18322 integer_type_node, V2DI_type_node,
18323 V2DI_type_node, NULL_TREE);
18324 tree void_ftype_v4si
18325 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
18326 tree v8hi_ftype_void
18327 = build_function_type_list (V8HI_type_node, NULL_TREE);
18328 tree void_ftype_void
18329 = build_function_type_list (void_type_node, NULL_TREE);
18330 tree void_ftype_int
18331 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
18333 tree opaque_ftype_long_pcvoid
18334 = build_function_type_list (opaque_V4SI_type_node,
18335 long_integer_type_node, pcvoid_type_node,
18336 NULL_TREE);
18337 tree v16qi_ftype_long_pcvoid
18338 = build_function_type_list (V16QI_type_node,
18339 long_integer_type_node, pcvoid_type_node,
18340 NULL_TREE);
18341 tree v8hi_ftype_long_pcvoid
18342 = build_function_type_list (V8HI_type_node,
18343 long_integer_type_node, pcvoid_type_node,
18344 NULL_TREE);
18345 tree v4si_ftype_long_pcvoid
18346 = build_function_type_list (V4SI_type_node,
18347 long_integer_type_node, pcvoid_type_node,
18348 NULL_TREE);
18349 tree v4sf_ftype_long_pcvoid
18350 = build_function_type_list (V4SF_type_node,
18351 long_integer_type_node, pcvoid_type_node,
18352 NULL_TREE);
18353 tree v2df_ftype_long_pcvoid
18354 = build_function_type_list (V2DF_type_node,
18355 long_integer_type_node, pcvoid_type_node,
18356 NULL_TREE);
18357 tree v2di_ftype_long_pcvoid
18358 = build_function_type_list (V2DI_type_node,
18359 long_integer_type_node, pcvoid_type_node,
18360 NULL_TREE);
18362 tree void_ftype_opaque_long_pvoid
18363 = build_function_type_list (void_type_node,
18364 opaque_V4SI_type_node, long_integer_type_node,
18365 pvoid_type_node, NULL_TREE);
18366 tree void_ftype_v4si_long_pvoid
18367 = build_function_type_list (void_type_node,
18368 V4SI_type_node, long_integer_type_node,
18369 pvoid_type_node, NULL_TREE);
18370 tree void_ftype_v16qi_long_pvoid
18371 = build_function_type_list (void_type_node,
18372 V16QI_type_node, long_integer_type_node,
18373 pvoid_type_node, NULL_TREE);
18375 tree void_ftype_v16qi_pvoid_long
18376 = build_function_type_list (void_type_node,
18377 V16QI_type_node, pvoid_type_node,
18378 long_integer_type_node, NULL_TREE);
18380 tree void_ftype_v8hi_long_pvoid
18381 = build_function_type_list (void_type_node,
18382 V8HI_type_node, long_integer_type_node,
18383 pvoid_type_node, NULL_TREE);
18384 tree void_ftype_v4sf_long_pvoid
18385 = build_function_type_list (void_type_node,
18386 V4SF_type_node, long_integer_type_node,
18387 pvoid_type_node, NULL_TREE);
18388 tree void_ftype_v2df_long_pvoid
18389 = build_function_type_list (void_type_node,
18390 V2DF_type_node, long_integer_type_node,
18391 pvoid_type_node, NULL_TREE);
18392 tree void_ftype_v2di_long_pvoid
18393 = build_function_type_list (void_type_node,
18394 V2DI_type_node, long_integer_type_node,
18395 pvoid_type_node, NULL_TREE);
18396 tree int_ftype_int_v8hi_v8hi
18397 = build_function_type_list (integer_type_node,
18398 integer_type_node, V8HI_type_node,
18399 V8HI_type_node, NULL_TREE);
18400 tree int_ftype_int_v16qi_v16qi
18401 = build_function_type_list (integer_type_node,
18402 integer_type_node, V16QI_type_node,
18403 V16QI_type_node, NULL_TREE);
18404 tree int_ftype_int_v4sf_v4sf
18405 = build_function_type_list (integer_type_node,
18406 integer_type_node, V4SF_type_node,
18407 V4SF_type_node, NULL_TREE);
18408 tree int_ftype_int_v2df_v2df
18409 = build_function_type_list (integer_type_node,
18410 integer_type_node, V2DF_type_node,
18411 V2DF_type_node, NULL_TREE);
18412 tree v2di_ftype_v2di
18413 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
18414 tree v4si_ftype_v4si
18415 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
18416 tree v8hi_ftype_v8hi
18417 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
18418 tree v16qi_ftype_v16qi
18419 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
18420 tree v4sf_ftype_v4sf
18421 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
18422 tree v2df_ftype_v2df
18423 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
18424 tree void_ftype_pcvoid_int_int
18425 = build_function_type_list (void_type_node,
18426 pcvoid_type_node, integer_type_node,
18427 integer_type_node, NULL_TREE);
18429 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
18430 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
18431 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
18432 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
18433 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
18434 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
18435 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
18436 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
18437 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
18438 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
18439 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
18440 ALTIVEC_BUILTIN_LVXL_V2DF);
18441 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
18442 ALTIVEC_BUILTIN_LVXL_V2DI);
18443 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
18444 ALTIVEC_BUILTIN_LVXL_V4SF);
18445 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
18446 ALTIVEC_BUILTIN_LVXL_V4SI);
18447 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
18448 ALTIVEC_BUILTIN_LVXL_V8HI);
18449 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
18450 ALTIVEC_BUILTIN_LVXL_V16QI);
18451 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
18452 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
18453 ALTIVEC_BUILTIN_LVX_V2DF);
18454 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
18455 ALTIVEC_BUILTIN_LVX_V2DI);
18456 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
18457 ALTIVEC_BUILTIN_LVX_V4SF);
18458 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
18459 ALTIVEC_BUILTIN_LVX_V4SI);
18460 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
18461 ALTIVEC_BUILTIN_LVX_V8HI);
18462 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
18463 ALTIVEC_BUILTIN_LVX_V16QI);
18464 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
18465 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
18466 ALTIVEC_BUILTIN_STVX_V2DF);
18467 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
18468 ALTIVEC_BUILTIN_STVX_V2DI);
18469 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
18470 ALTIVEC_BUILTIN_STVX_V4SF);
18471 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
18472 ALTIVEC_BUILTIN_STVX_V4SI);
18473 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
18474 ALTIVEC_BUILTIN_STVX_V8HI);
18475 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
18476 ALTIVEC_BUILTIN_STVX_V16QI);
18477 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
18478 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
18479 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
18480 ALTIVEC_BUILTIN_STVXL_V2DF);
18481 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
18482 ALTIVEC_BUILTIN_STVXL_V2DI);
18483 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
18484 ALTIVEC_BUILTIN_STVXL_V4SF);
18485 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
18486 ALTIVEC_BUILTIN_STVXL_V4SI);
18487 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
18488 ALTIVEC_BUILTIN_STVXL_V8HI);
18489 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
18490 ALTIVEC_BUILTIN_STVXL_V16QI);
18491 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
18492 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
18493 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
18494 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
18495 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
18496 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
18497 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
18498 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
18499 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
18500 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
18501 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
18502 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
18503 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
18504 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
18505 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
18506 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
18508 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
18509 VSX_BUILTIN_LXVD2X_V2DF);
18510 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
18511 VSX_BUILTIN_LXVD2X_V2DI);
18512 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
18513 VSX_BUILTIN_LXVW4X_V4SF);
18514 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
18515 VSX_BUILTIN_LXVW4X_V4SI);
18516 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
18517 VSX_BUILTIN_LXVW4X_V8HI);
18518 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
18519 VSX_BUILTIN_LXVW4X_V16QI);
18520 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
18521 VSX_BUILTIN_STXVD2X_V2DF);
18522 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
18523 VSX_BUILTIN_STXVD2X_V2DI);
18524 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
18525 VSX_BUILTIN_STXVW4X_V4SF);
18526 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
18527 VSX_BUILTIN_STXVW4X_V4SI);
18528 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
18529 VSX_BUILTIN_STXVW4X_V8HI);
18530 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
18531 VSX_BUILTIN_STXVW4X_V16QI);
18533 def_builtin ("__builtin_vsx_ld_elemrev_v2df", v2df_ftype_long_pcvoid,
18534 VSX_BUILTIN_LD_ELEMREV_V2DF);
18535 def_builtin ("__builtin_vsx_ld_elemrev_v2di", v2di_ftype_long_pcvoid,
18536 VSX_BUILTIN_LD_ELEMREV_V2DI);
18537 def_builtin ("__builtin_vsx_ld_elemrev_v4sf", v4sf_ftype_long_pcvoid,
18538 VSX_BUILTIN_LD_ELEMREV_V4SF);
18539 def_builtin ("__builtin_vsx_ld_elemrev_v4si", v4si_ftype_long_pcvoid,
18540 VSX_BUILTIN_LD_ELEMREV_V4SI);
18541 def_builtin ("__builtin_vsx_st_elemrev_v2df", void_ftype_v2df_long_pvoid,
18542 VSX_BUILTIN_ST_ELEMREV_V2DF);
18543 def_builtin ("__builtin_vsx_st_elemrev_v2di", void_ftype_v2di_long_pvoid,
18544 VSX_BUILTIN_ST_ELEMREV_V2DI);
18545 def_builtin ("__builtin_vsx_st_elemrev_v4sf", void_ftype_v4sf_long_pvoid,
18546 VSX_BUILTIN_ST_ELEMREV_V4SF);
18547 def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid,
18548 VSX_BUILTIN_ST_ELEMREV_V4SI);
18550 if (TARGET_P9_VECTOR)
18552 def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid,
18553 VSX_BUILTIN_LD_ELEMREV_V8HI);
18554 def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid,
18555 VSX_BUILTIN_LD_ELEMREV_V16QI);
18556 def_builtin ("__builtin_vsx_st_elemrev_v8hi",
18557 void_ftype_v8hi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V8HI);
18558 def_builtin ("__builtin_vsx_st_elemrev_v16qi",
18559 void_ftype_v16qi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V16QI);
18561 else
18563 rs6000_builtin_decls[(int) VSX_BUILTIN_LD_ELEMREV_V8HI]
18564 = rs6000_builtin_decls[(int) VSX_BUILTIN_LXVW4X_V8HI];
18565 rs6000_builtin_decls[(int) VSX_BUILTIN_LD_ELEMREV_V16QI]
18566 = rs6000_builtin_decls[(int) VSX_BUILTIN_LXVW4X_V16QI];
18567 rs6000_builtin_decls[(int) VSX_BUILTIN_ST_ELEMREV_V8HI]
18568 = rs6000_builtin_decls[(int) VSX_BUILTIN_STXVW4X_V8HI];
18569 rs6000_builtin_decls[(int) VSX_BUILTIN_ST_ELEMREV_V16QI]
18570 = rs6000_builtin_decls[(int) VSX_BUILTIN_STXVW4X_V16QI];
18573 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
18574 VSX_BUILTIN_VEC_LD);
18575 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
18576 VSX_BUILTIN_VEC_ST);
18577 def_builtin ("__builtin_vec_xl", opaque_ftype_long_pcvoid,
18578 VSX_BUILTIN_VEC_XL);
18579 def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid,
18580 VSX_BUILTIN_VEC_XST);
18582 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
18583 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
18584 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
18586 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
18587 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
18588 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
18589 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
18590 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
18591 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
18592 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
18593 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
18594 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
18595 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
18596 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
18597 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
18599 def_builtin ("__builtin_vec_adde", opaque_ftype_opaque_opaque_opaque,
18600 ALTIVEC_BUILTIN_VEC_ADDE);
18601 def_builtin ("__builtin_vec_addec", opaque_ftype_opaque_opaque_opaque,
18602 ALTIVEC_BUILTIN_VEC_ADDEC);
18603 def_builtin ("__builtin_vec_cmpne", opaque_ftype_opaque_opaque,
18604 ALTIVEC_BUILTIN_VEC_CMPNE);
18605 def_builtin ("__builtin_vec_mul", opaque_ftype_opaque_opaque,
18606 ALTIVEC_BUILTIN_VEC_MUL);
18608 /* Cell builtins. */
18609 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
18610 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
18611 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
18612 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
18614 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
18615 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
18616 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
18617 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
18619 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
18620 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
18621 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
18622 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
18624 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
18625 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
18626 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
18627 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
18629 if (TARGET_P9_VECTOR)
18630 def_builtin ("__builtin_altivec_stxvl", void_ftype_v16qi_pvoid_long,
18631 P9V_BUILTIN_STXVL);
18633 /* Add the DST variants. */
18634 d = bdesc_dst;
18635 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
18637 HOST_WIDE_INT mask = d->mask;
18639 /* It is expected that these dst built-in functions may have
18640 d->icode equal to CODE_FOR_nothing. */
18641 if ((mask & builtin_mask) != mask)
18643 if (TARGET_DEBUG_BUILTIN)
18644 fprintf (stderr, "altivec_init_builtins, skip dst %s\n",
18645 d->name);
18646 continue;
18648 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
18651 /* Initialize the predicates. */
18652 d = bdesc_altivec_preds;
18653 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
18655 machine_mode mode1;
18656 tree type;
18657 HOST_WIDE_INT mask = d->mask;
18659 if ((mask & builtin_mask) != mask)
18661 if (TARGET_DEBUG_BUILTIN)
18662 fprintf (stderr, "altivec_init_builtins, skip predicate %s\n",
18663 d->name);
18664 continue;
18667 if (rs6000_overloaded_builtin_p (d->code))
18668 mode1 = VOIDmode;
18669 else
18671 /* Cannot define builtin if the instruction is disabled. */
18672 gcc_assert (d->icode != CODE_FOR_nothing);
18673 mode1 = insn_data[d->icode].operand[1].mode;
18676 switch (mode1)
18678 case E_VOIDmode:
18679 type = int_ftype_int_opaque_opaque;
18680 break;
18681 case E_V2DImode:
18682 type = int_ftype_int_v2di_v2di;
18683 break;
18684 case E_V4SImode:
18685 type = int_ftype_int_v4si_v4si;
18686 break;
18687 case E_V8HImode:
18688 type = int_ftype_int_v8hi_v8hi;
18689 break;
18690 case E_V16QImode:
18691 type = int_ftype_int_v16qi_v16qi;
18692 break;
18693 case E_V4SFmode:
18694 type = int_ftype_int_v4sf_v4sf;
18695 break;
18696 case E_V2DFmode:
18697 type = int_ftype_int_v2df_v2df;
18698 break;
18699 default:
18700 gcc_unreachable ();
18703 def_builtin (d->name, type, d->code);
18706 /* Initialize the abs* operators. */
18707 d = bdesc_abs;
18708 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
18710 machine_mode mode0;
18711 tree type;
18712 HOST_WIDE_INT mask = d->mask;
18714 if ((mask & builtin_mask) != mask)
18716 if (TARGET_DEBUG_BUILTIN)
18717 fprintf (stderr, "altivec_init_builtins, skip abs %s\n",
18718 d->name);
18719 continue;
18722 /* Cannot define builtin if the instruction is disabled. */
18723 gcc_assert (d->icode != CODE_FOR_nothing);
18724 mode0 = insn_data[d->icode].operand[0].mode;
18726 switch (mode0)
18728 case E_V2DImode:
18729 type = v2di_ftype_v2di;
18730 break;
18731 case E_V4SImode:
18732 type = v4si_ftype_v4si;
18733 break;
18734 case E_V8HImode:
18735 type = v8hi_ftype_v8hi;
18736 break;
18737 case E_V16QImode:
18738 type = v16qi_ftype_v16qi;
18739 break;
18740 case E_V4SFmode:
18741 type = v4sf_ftype_v4sf;
18742 break;
18743 case E_V2DFmode:
18744 type = v2df_ftype_v2df;
18745 break;
18746 default:
18747 gcc_unreachable ();
18750 def_builtin (d->name, type, d->code);
18753 /* Initialize target builtin that implements
18754 targetm.vectorize.builtin_mask_for_load. */
18756 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
18757 v16qi_ftype_long_pcvoid,
18758 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
18759 BUILT_IN_MD, NULL, NULL_TREE);
18760 TREE_READONLY (decl) = 1;
18761 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
18762 altivec_builtin_mask_for_load = decl;
18764 /* Access to the vec_init patterns. */
18765 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
18766 integer_type_node, integer_type_node,
18767 integer_type_node, NULL_TREE);
18768 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
18770 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
18771 short_integer_type_node,
18772 short_integer_type_node,
18773 short_integer_type_node,
18774 short_integer_type_node,
18775 short_integer_type_node,
18776 short_integer_type_node,
18777 short_integer_type_node, NULL_TREE);
18778 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
18780 ftype = build_function_type_list (V16QI_type_node, char_type_node,
18781 char_type_node, char_type_node,
18782 char_type_node, char_type_node,
18783 char_type_node, char_type_node,
18784 char_type_node, char_type_node,
18785 char_type_node, char_type_node,
18786 char_type_node, char_type_node,
18787 char_type_node, char_type_node,
18788 char_type_node, NULL_TREE);
18789 def_builtin ("__builtin_vec_init_v16qi", ftype,
18790 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
18792 ftype = build_function_type_list (V4SF_type_node, float_type_node,
18793 float_type_node, float_type_node,
18794 float_type_node, NULL_TREE);
18795 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
18797 /* VSX builtins. */
18798 ftype = build_function_type_list (V2DF_type_node, double_type_node,
18799 double_type_node, NULL_TREE);
18800 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
18802 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
18803 intDI_type_node, NULL_TREE);
18804 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
18806 /* Access to the vec_set patterns. */
18807 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
18808 intSI_type_node,
18809 integer_type_node, NULL_TREE);
18810 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
18812 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
18813 intHI_type_node,
18814 integer_type_node, NULL_TREE);
18815 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
18817 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
18818 intQI_type_node,
18819 integer_type_node, NULL_TREE);
18820 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
18822 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
18823 float_type_node,
18824 integer_type_node, NULL_TREE);
18825 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
18827 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
18828 double_type_node,
18829 integer_type_node, NULL_TREE);
18830 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
18832 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
18833 intDI_type_node,
18834 integer_type_node, NULL_TREE);
18835 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
18837 /* Access to the vec_extract patterns. */
18838 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
18839 integer_type_node, NULL_TREE);
18840 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
18842 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
18843 integer_type_node, NULL_TREE);
18844 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
18846 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
18847 integer_type_node, NULL_TREE);
18848 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
18850 ftype = build_function_type_list (float_type_node, V4SF_type_node,
18851 integer_type_node, NULL_TREE);
18852 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
18854 ftype = build_function_type_list (double_type_node, V2DF_type_node,
18855 integer_type_node, NULL_TREE);
18856 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
18858 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
18859 integer_type_node, NULL_TREE);
18860 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
18863 if (V1TI_type_node)
18865 tree v1ti_ftype_long_pcvoid
18866 = build_function_type_list (V1TI_type_node,
18867 long_integer_type_node, pcvoid_type_node,
18868 NULL_TREE);
18869 tree void_ftype_v1ti_long_pvoid
18870 = build_function_type_list (void_type_node,
18871 V1TI_type_node, long_integer_type_node,
18872 pvoid_type_node, NULL_TREE);
18873 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
18874 VSX_BUILTIN_LXVD2X_V1TI);
18875 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
18876 VSX_BUILTIN_STXVD2X_V1TI);
18877 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
18878 NULL_TREE, NULL_TREE);
18879 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
18880 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
18881 intTI_type_node,
18882 integer_type_node, NULL_TREE);
18883 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
18884 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
18885 integer_type_node, NULL_TREE);
18886 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
18891 static void
18892 htm_init_builtins (void)
18894 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
18895 const struct builtin_description *d;
18896 size_t i;
18898 d = bdesc_htm;
18899 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
18901 tree op[MAX_HTM_OPERANDS], type;
18902 HOST_WIDE_INT mask = d->mask;
18903 unsigned attr = rs6000_builtin_info[d->code].attr;
18904 bool void_func = (attr & RS6000_BTC_VOID);
18905 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
18906 int nopnds = 0;
18907 tree gpr_type_node;
18908 tree rettype;
18909 tree argtype;
18911 /* It is expected that these htm built-in functions may have
18912 d->icode equal to CODE_FOR_nothing. */
18914 if (TARGET_32BIT && TARGET_POWERPC64)
18915 gpr_type_node = long_long_unsigned_type_node;
18916 else
18917 gpr_type_node = long_unsigned_type_node;
18919 if (attr & RS6000_BTC_SPR)
18921 rettype = gpr_type_node;
18922 argtype = gpr_type_node;
18924 else if (d->code == HTM_BUILTIN_TABORTDC
18925 || d->code == HTM_BUILTIN_TABORTDCI)
18927 rettype = unsigned_type_node;
18928 argtype = gpr_type_node;
18930 else
18932 rettype = unsigned_type_node;
18933 argtype = unsigned_type_node;
18936 if ((mask & builtin_mask) != mask)
18938 if (TARGET_DEBUG_BUILTIN)
18939 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
18940 continue;
18943 if (d->name == 0)
18945 if (TARGET_DEBUG_BUILTIN)
18946 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
18947 (long unsigned) i);
18948 continue;
18951 op[nopnds++] = (void_func) ? void_type_node : rettype;
18953 if (attr_args == RS6000_BTC_UNARY)
18954 op[nopnds++] = argtype;
18955 else if (attr_args == RS6000_BTC_BINARY)
18957 op[nopnds++] = argtype;
18958 op[nopnds++] = argtype;
18960 else if (attr_args == RS6000_BTC_TERNARY)
18962 op[nopnds++] = argtype;
18963 op[nopnds++] = argtype;
18964 op[nopnds++] = argtype;
18967 switch (nopnds)
18969 case 1:
18970 type = build_function_type_list (op[0], NULL_TREE);
18971 break;
18972 case 2:
18973 type = build_function_type_list (op[0], op[1], NULL_TREE);
18974 break;
18975 case 3:
18976 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
18977 break;
18978 case 4:
18979 type = build_function_type_list (op[0], op[1], op[2], op[3],
18980 NULL_TREE);
18981 break;
18982 default:
18983 gcc_unreachable ();
18986 def_builtin (d->name, type, d->code);
18990 /* Hash function for builtin functions with up to 3 arguments and a return
18991 type. */
18992 hashval_t
18993 builtin_hasher::hash (builtin_hash_struct *bh)
18995 unsigned ret = 0;
18996 int i;
18998 for (i = 0; i < 4; i++)
19000 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
19001 ret = (ret * 2) + bh->uns_p[i];
19004 return ret;
19007 /* Compare builtin hash entries H1 and H2 for equivalence. */
19008 bool
19009 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
19011 return ((p1->mode[0] == p2->mode[0])
19012 && (p1->mode[1] == p2->mode[1])
19013 && (p1->mode[2] == p2->mode[2])
19014 && (p1->mode[3] == p2->mode[3])
19015 && (p1->uns_p[0] == p2->uns_p[0])
19016 && (p1->uns_p[1] == p2->uns_p[1])
19017 && (p1->uns_p[2] == p2->uns_p[2])
19018 && (p1->uns_p[3] == p2->uns_p[3]));
19021 /* Map types for builtin functions with an explicit return type and up to 3
19022 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
19023 of the argument. */
19024 static tree
19025 builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
19026 machine_mode mode_arg1, machine_mode mode_arg2,
19027 enum rs6000_builtins builtin, const char *name)
19029 struct builtin_hash_struct h;
19030 struct builtin_hash_struct *h2;
19031 int num_args = 3;
19032 int i;
19033 tree ret_type = NULL_TREE;
19034 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
19036 /* Create builtin_hash_table. */
19037 if (builtin_hash_table == NULL)
19038 builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
19040 h.type = NULL_TREE;
19041 h.mode[0] = mode_ret;
19042 h.mode[1] = mode_arg0;
19043 h.mode[2] = mode_arg1;
19044 h.mode[3] = mode_arg2;
19045 h.uns_p[0] = 0;
19046 h.uns_p[1] = 0;
19047 h.uns_p[2] = 0;
19048 h.uns_p[3] = 0;
19050 /* If the builtin is a type that produces unsigned results or takes unsigned
19051 arguments, and it is returned as a decl for the vectorizer (such as
19052 widening multiplies, permute), make sure the arguments and return value
19053 are type correct. */
19054 switch (builtin)
19056 /* unsigned 1 argument functions. */
19057 case CRYPTO_BUILTIN_VSBOX:
19058 case P8V_BUILTIN_VGBBD:
19059 case MISC_BUILTIN_CDTBCD:
19060 case MISC_BUILTIN_CBCDTD:
19061 h.uns_p[0] = 1;
19062 h.uns_p[1] = 1;
19063 break;
19065 /* unsigned 2 argument functions. */
19066 case ALTIVEC_BUILTIN_VMULEUB:
19067 case ALTIVEC_BUILTIN_VMULEUH:
19068 case ALTIVEC_BUILTIN_VMULOUB:
19069 case ALTIVEC_BUILTIN_VMULOUH:
19070 case CRYPTO_BUILTIN_VCIPHER:
19071 case CRYPTO_BUILTIN_VCIPHERLAST:
19072 case CRYPTO_BUILTIN_VNCIPHER:
19073 case CRYPTO_BUILTIN_VNCIPHERLAST:
19074 case CRYPTO_BUILTIN_VPMSUMB:
19075 case CRYPTO_BUILTIN_VPMSUMH:
19076 case CRYPTO_BUILTIN_VPMSUMW:
19077 case CRYPTO_BUILTIN_VPMSUMD:
19078 case CRYPTO_BUILTIN_VPMSUM:
19079 case MISC_BUILTIN_ADDG6S:
19080 case MISC_BUILTIN_DIVWEU:
19081 case MISC_BUILTIN_DIVWEUO:
19082 case MISC_BUILTIN_DIVDEU:
19083 case MISC_BUILTIN_DIVDEUO:
19084 case VSX_BUILTIN_UDIV_V2DI:
19085 h.uns_p[0] = 1;
19086 h.uns_p[1] = 1;
19087 h.uns_p[2] = 1;
19088 break;
19090 /* unsigned 3 argument functions. */
19091 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
19092 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
19093 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
19094 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
19095 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
19096 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
19097 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
19098 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
19099 case VSX_BUILTIN_VPERM_16QI_UNS:
19100 case VSX_BUILTIN_VPERM_8HI_UNS:
19101 case VSX_BUILTIN_VPERM_4SI_UNS:
19102 case VSX_BUILTIN_VPERM_2DI_UNS:
19103 case VSX_BUILTIN_XXSEL_16QI_UNS:
19104 case VSX_BUILTIN_XXSEL_8HI_UNS:
19105 case VSX_BUILTIN_XXSEL_4SI_UNS:
19106 case VSX_BUILTIN_XXSEL_2DI_UNS:
19107 case CRYPTO_BUILTIN_VPERMXOR:
19108 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
19109 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
19110 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
19111 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
19112 case CRYPTO_BUILTIN_VSHASIGMAW:
19113 case CRYPTO_BUILTIN_VSHASIGMAD:
19114 case CRYPTO_BUILTIN_VSHASIGMA:
19115 h.uns_p[0] = 1;
19116 h.uns_p[1] = 1;
19117 h.uns_p[2] = 1;
19118 h.uns_p[3] = 1;
19119 break;
19121 /* signed permute functions with unsigned char mask. */
19122 case ALTIVEC_BUILTIN_VPERM_16QI:
19123 case ALTIVEC_BUILTIN_VPERM_8HI:
19124 case ALTIVEC_BUILTIN_VPERM_4SI:
19125 case ALTIVEC_BUILTIN_VPERM_4SF:
19126 case ALTIVEC_BUILTIN_VPERM_2DI:
19127 case ALTIVEC_BUILTIN_VPERM_2DF:
19128 case VSX_BUILTIN_VPERM_16QI:
19129 case VSX_BUILTIN_VPERM_8HI:
19130 case VSX_BUILTIN_VPERM_4SI:
19131 case VSX_BUILTIN_VPERM_4SF:
19132 case VSX_BUILTIN_VPERM_2DI:
19133 case VSX_BUILTIN_VPERM_2DF:
19134 h.uns_p[3] = 1;
19135 break;
19137 /* unsigned args, signed return. */
19138 case VSX_BUILTIN_XVCVUXDSP:
19139 case VSX_BUILTIN_XVCVUXDDP_UNS:
19140 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
19141 h.uns_p[1] = 1;
19142 break;
19144 /* signed args, unsigned return. */
19145 case VSX_BUILTIN_XVCVDPUXDS_UNS:
19146 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
19147 case MISC_BUILTIN_UNPACK_TD:
19148 case MISC_BUILTIN_UNPACK_V1TI:
19149 h.uns_p[0] = 1;
19150 break;
19152 /* unsigned arguments for 128-bit pack instructions. */
19153 case MISC_BUILTIN_PACK_TD:
19154 case MISC_BUILTIN_PACK_V1TI:
19155 h.uns_p[1] = 1;
19156 h.uns_p[2] = 1;
19157 break;
19159 default:
19160 break;
19163 /* Figure out how many args are present. */
19164 while (num_args > 0 && h.mode[num_args] == VOIDmode)
19165 num_args--;
19167 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
19168 if (!ret_type && h.uns_p[0])
19169 ret_type = builtin_mode_to_type[h.mode[0]][0];
19171 if (!ret_type)
19172 fatal_error (input_location,
19173 "internal error: builtin function %s had an unexpected "
19174 "return type %s", name, GET_MODE_NAME (h.mode[0]));
19176 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
19177 arg_type[i] = NULL_TREE;
19179 for (i = 0; i < num_args; i++)
19181 int m = (int) h.mode[i+1];
19182 int uns_p = h.uns_p[i+1];
19184 arg_type[i] = builtin_mode_to_type[m][uns_p];
19185 if (!arg_type[i] && uns_p)
19186 arg_type[i] = builtin_mode_to_type[m][0];
19188 if (!arg_type[i])
19189 fatal_error (input_location,
19190 "internal error: builtin function %s, argument %d "
19191 "had unexpected argument type %s", name, i,
19192 GET_MODE_NAME (m));
19195 builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
19196 if (*found == NULL)
19198 h2 = ggc_alloc<builtin_hash_struct> ();
19199 *h2 = h;
19200 *found = h2;
19202 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
19203 arg_type[2], NULL_TREE);
19206 return (*found)->type;
19209 static void
19210 rs6000_common_init_builtins (void)
19212 const struct builtin_description *d;
19213 size_t i;
19215 tree opaque_ftype_opaque = NULL_TREE;
19216 tree opaque_ftype_opaque_opaque = NULL_TREE;
19217 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
19218 tree v2si_ftype = NULL_TREE;
19219 tree v2si_ftype_qi = NULL_TREE;
19220 tree v2si_ftype_v2si_qi = NULL_TREE;
19221 tree v2si_ftype_int_qi = NULL_TREE;
19222 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
19224 if (!TARGET_PAIRED_FLOAT)
19226 builtin_mode_to_type[V2SImode][0] = opaque_V2SI_type_node;
19227 builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node;
19230 /* Paired and SPE builtins are only available if you build a compiler with
19231 the appropriate options, so only create those builtins with the
19232 appropriate compiler option. Create Altivec and VSX builtins on machines
19233 with at least the general purpose extensions (970 and newer) to allow the
19234 use of the target attribute.. */
19236 if (TARGET_EXTRA_BUILTINS)
19237 builtin_mask |= RS6000_BTM_COMMON;
19239 /* Add the ternary operators. */
19240 d = bdesc_3arg;
19241 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
19243 tree type;
19244 HOST_WIDE_INT mask = d->mask;
19246 if ((mask & builtin_mask) != mask)
19248 if (TARGET_DEBUG_BUILTIN)
19249 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
19250 continue;
19253 if (rs6000_overloaded_builtin_p (d->code))
19255 if (! (type = opaque_ftype_opaque_opaque_opaque))
19256 type = opaque_ftype_opaque_opaque_opaque
19257 = build_function_type_list (opaque_V4SI_type_node,
19258 opaque_V4SI_type_node,
19259 opaque_V4SI_type_node,
19260 opaque_V4SI_type_node,
19261 NULL_TREE);
19263 else
19265 enum insn_code icode = d->icode;
19266 if (d->name == 0)
19268 if (TARGET_DEBUG_BUILTIN)
19269 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
19270 (long unsigned)i);
19272 continue;
19275 if (icode == CODE_FOR_nothing)
19277 if (TARGET_DEBUG_BUILTIN)
19278 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
19279 d->name);
19281 continue;
19284 type = builtin_function_type (insn_data[icode].operand[0].mode,
19285 insn_data[icode].operand[1].mode,
19286 insn_data[icode].operand[2].mode,
19287 insn_data[icode].operand[3].mode,
19288 d->code, d->name);
19291 def_builtin (d->name, type, d->code);
19294 /* Add the binary operators. */
19295 d = bdesc_2arg;
19296 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19298 machine_mode mode0, mode1, mode2;
19299 tree type;
19300 HOST_WIDE_INT mask = d->mask;
19302 if ((mask & builtin_mask) != mask)
19304 if (TARGET_DEBUG_BUILTIN)
19305 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
19306 continue;
19309 if (rs6000_overloaded_builtin_p (d->code))
19311 if (! (type = opaque_ftype_opaque_opaque))
19312 type = opaque_ftype_opaque_opaque
19313 = build_function_type_list (opaque_V4SI_type_node,
19314 opaque_V4SI_type_node,
19315 opaque_V4SI_type_node,
19316 NULL_TREE);
19318 else
19320 enum insn_code icode = d->icode;
19321 if (d->name == 0)
19323 if (TARGET_DEBUG_BUILTIN)
19324 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
19325 (long unsigned)i);
19327 continue;
19330 if (icode == CODE_FOR_nothing)
19332 if (TARGET_DEBUG_BUILTIN)
19333 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
19334 d->name);
19336 continue;
19339 mode0 = insn_data[icode].operand[0].mode;
19340 mode1 = insn_data[icode].operand[1].mode;
19341 mode2 = insn_data[icode].operand[2].mode;
19343 if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode)
19345 if (! (type = v2si_ftype_v2si_qi))
19346 type = v2si_ftype_v2si_qi
19347 = build_function_type_list (opaque_V2SI_type_node,
19348 opaque_V2SI_type_node,
19349 char_type_node,
19350 NULL_TREE);
19353 else if (mode0 == V2SImode && GET_MODE_CLASS (mode1) == MODE_INT
19354 && mode2 == QImode)
19356 if (! (type = v2si_ftype_int_qi))
19357 type = v2si_ftype_int_qi
19358 = build_function_type_list (opaque_V2SI_type_node,
19359 integer_type_node,
19360 char_type_node,
19361 NULL_TREE);
19364 else
19365 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
19366 d->code, d->name);
19369 def_builtin (d->name, type, d->code);
19372 /* Add the simple unary operators. */
19373 d = bdesc_1arg;
19374 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19376 machine_mode mode0, mode1;
19377 tree type;
19378 HOST_WIDE_INT mask = d->mask;
19380 if ((mask & builtin_mask) != mask)
19382 if (TARGET_DEBUG_BUILTIN)
19383 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
19384 continue;
19387 if (rs6000_overloaded_builtin_p (d->code))
19389 if (! (type = opaque_ftype_opaque))
19390 type = opaque_ftype_opaque
19391 = build_function_type_list (opaque_V4SI_type_node,
19392 opaque_V4SI_type_node,
19393 NULL_TREE);
19395 else
19397 enum insn_code icode = d->icode;
19398 if (d->name == 0)
19400 if (TARGET_DEBUG_BUILTIN)
19401 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
19402 (long unsigned)i);
19404 continue;
19407 if (icode == CODE_FOR_nothing)
19409 if (TARGET_DEBUG_BUILTIN)
19410 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
19411 d->name);
19413 continue;
19416 mode0 = insn_data[icode].operand[0].mode;
19417 mode1 = insn_data[icode].operand[1].mode;
19419 if (mode0 == V2SImode && mode1 == QImode)
19421 if (! (type = v2si_ftype_qi))
19422 type = v2si_ftype_qi
19423 = build_function_type_list (opaque_V2SI_type_node,
19424 char_type_node,
19425 NULL_TREE);
19428 else
19429 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
19430 d->code, d->name);
19433 def_builtin (d->name, type, d->code);
19436 /* Add the simple no-argument operators. */
19437 d = bdesc_0arg;
19438 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
19440 machine_mode mode0;
19441 tree type;
19442 HOST_WIDE_INT mask = d->mask;
19444 if ((mask & builtin_mask) != mask)
19446 if (TARGET_DEBUG_BUILTIN)
19447 fprintf (stderr, "rs6000_builtin, skip no-argument %s\n", d->name);
19448 continue;
19450 if (rs6000_overloaded_builtin_p (d->code))
19452 if (!opaque_ftype_opaque)
19453 opaque_ftype_opaque
19454 = build_function_type_list (opaque_V4SI_type_node, NULL_TREE);
19455 type = opaque_ftype_opaque;
19457 else
19459 enum insn_code icode = d->icode;
19460 if (d->name == 0)
19462 if (TARGET_DEBUG_BUILTIN)
19463 fprintf (stderr, "rs6000_builtin, bdesc_0arg[%lu] no name\n",
19464 (long unsigned) i);
19465 continue;
19467 if (icode == CODE_FOR_nothing)
19469 if (TARGET_DEBUG_BUILTIN)
19470 fprintf (stderr,
19471 "rs6000_builtin, skip no-argument %s (no code)\n",
19472 d->name);
19473 continue;
19475 mode0 = insn_data[icode].operand[0].mode;
19476 if (mode0 == V2SImode)
19478 /* code for SPE */
19479 if (! (type = v2si_ftype))
19481 v2si_ftype
19482 = build_function_type_list (opaque_V2SI_type_node,
19483 NULL_TREE);
19484 type = v2si_ftype;
19487 else
19488 type = builtin_function_type (mode0, VOIDmode, VOIDmode, VOIDmode,
19489 d->code, d->name);
19491 def_builtin (d->name, type, d->code);
19495 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
19496 static void
19497 init_float128_ibm (machine_mode mode)
19499 if (!TARGET_XL_COMPAT)
19501 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
19502 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
19503 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
19504 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
19506 if (!(TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)))
19508 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
19509 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
19510 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
19511 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
19512 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
19513 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
19514 set_optab_libfunc (le_optab, mode, "__gcc_qle");
19516 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
19517 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
19518 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
19519 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
19520 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
19521 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
19522 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
19523 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
19526 if (!(TARGET_HARD_FLOAT && TARGET_FPRS))
19527 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
19529 else
19531 set_optab_libfunc (add_optab, mode, "_xlqadd");
19532 set_optab_libfunc (sub_optab, mode, "_xlqsub");
19533 set_optab_libfunc (smul_optab, mode, "_xlqmul");
19534 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
19537 /* Add various conversions for IFmode to use the traditional TFmode
19538 names. */
19539 if (mode == IFmode)
19541 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf2");
19542 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf2");
19543 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctftd2");
19544 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd2");
19545 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd2");
19546 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdtf2");
19548 if (TARGET_POWERPC64)
19550 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
19551 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
19552 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
19553 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
19558 /* Set up IEEE 128-bit floating point routines. Use different names if the
19559 arguments can be passed in a vector register. The historical PowerPC
19560 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
19561 continue to use that if we aren't using vector registers to pass IEEE
19562 128-bit floating point. */
19564 static void
19565 init_float128_ieee (machine_mode mode)
19567 if (FLOAT128_VECTOR_P (mode))
19569 set_optab_libfunc (add_optab, mode, "__addkf3");
19570 set_optab_libfunc (sub_optab, mode, "__subkf3");
19571 set_optab_libfunc (neg_optab, mode, "__negkf2");
19572 set_optab_libfunc (smul_optab, mode, "__mulkf3");
19573 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
19574 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
19575 set_optab_libfunc (abs_optab, mode, "__abstkf2");
19577 set_optab_libfunc (eq_optab, mode, "__eqkf2");
19578 set_optab_libfunc (ne_optab, mode, "__nekf2");
19579 set_optab_libfunc (gt_optab, mode, "__gtkf2");
19580 set_optab_libfunc (ge_optab, mode, "__gekf2");
19581 set_optab_libfunc (lt_optab, mode, "__ltkf2");
19582 set_optab_libfunc (le_optab, mode, "__lekf2");
19583 set_optab_libfunc (unord_optab, mode, "__unordkf2");
19585 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
19586 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
19587 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
19588 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
19590 set_conv_libfunc (sext_optab, mode, IFmode, "__extendtfkf2");
19591 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
19592 set_conv_libfunc (sext_optab, mode, TFmode, "__extendtfkf2");
19594 set_conv_libfunc (trunc_optab, IFmode, mode, "__trunckftf2");
19595 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
19596 set_conv_libfunc (trunc_optab, TFmode, mode, "__trunckftf2");
19598 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf2");
19599 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf2");
19600 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunckftd2");
19601 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd2");
19602 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd2");
19603 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdkf2");
19605 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
19606 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
19607 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
19608 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
19610 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
19611 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
19612 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
19613 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
19615 if (TARGET_POWERPC64)
19617 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
19618 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
19619 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
19620 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
19624 else
19626 set_optab_libfunc (add_optab, mode, "_q_add");
19627 set_optab_libfunc (sub_optab, mode, "_q_sub");
19628 set_optab_libfunc (neg_optab, mode, "_q_neg");
19629 set_optab_libfunc (smul_optab, mode, "_q_mul");
19630 set_optab_libfunc (sdiv_optab, mode, "_q_div");
19631 if (TARGET_PPC_GPOPT)
19632 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
19634 set_optab_libfunc (eq_optab, mode, "_q_feq");
19635 set_optab_libfunc (ne_optab, mode, "_q_fne");
19636 set_optab_libfunc (gt_optab, mode, "_q_fgt");
19637 set_optab_libfunc (ge_optab, mode, "_q_fge");
19638 set_optab_libfunc (lt_optab, mode, "_q_flt");
19639 set_optab_libfunc (le_optab, mode, "_q_fle");
19641 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
19642 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
19643 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
19644 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
19645 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
19646 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
19647 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
19648 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
19652 static void
19653 rs6000_init_libfuncs (void)
19655 /* __float128 support. */
19656 if (TARGET_FLOAT128_TYPE)
19658 init_float128_ibm (IFmode);
19659 init_float128_ieee (KFmode);
19662 /* AIX/Darwin/64-bit Linux quad floating point routines. */
19663 if (TARGET_LONG_DOUBLE_128)
19665 if (!TARGET_IEEEQUAD)
19666 init_float128_ibm (TFmode);
19668 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
19669 else
19670 init_float128_ieee (TFmode);
19675 /* Expand a block clear operation, and return 1 if successful. Return 0
19676 if we should let the compiler generate normal code.
19678 operands[0] is the destination
19679 operands[1] is the length
19680 operands[3] is the alignment */
19683 expand_block_clear (rtx operands[])
19685 rtx orig_dest = operands[0];
19686 rtx bytes_rtx = operands[1];
19687 rtx align_rtx = operands[3];
19688 bool constp = (GET_CODE (bytes_rtx) == CONST_INT);
19689 HOST_WIDE_INT align;
19690 HOST_WIDE_INT bytes;
19691 int offset;
19692 int clear_bytes;
19693 int clear_step;
19695 /* If this is not a fixed size move, just call memcpy */
19696 if (! constp)
19697 return 0;
19699 /* This must be a fixed size alignment */
19700 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
19701 align = INTVAL (align_rtx) * BITS_PER_UNIT;
19703 /* Anything to clear? */
19704 bytes = INTVAL (bytes_rtx);
19705 if (bytes <= 0)
19706 return 1;
19708 /* Use the builtin memset after a point, to avoid huge code bloat.
19709 When optimize_size, avoid any significant code bloat; calling
19710 memset is about 4 instructions, so allow for one instruction to
19711 load zero and three to do clearing. */
19712 if (TARGET_ALTIVEC && align >= 128)
19713 clear_step = 16;
19714 else if (TARGET_POWERPC64 && (align >= 64 || !STRICT_ALIGNMENT))
19715 clear_step = 8;
19716 else if (TARGET_SPE && align >= 64)
19717 clear_step = 8;
19718 else
19719 clear_step = 4;
19721 if (optimize_size && bytes > 3 * clear_step)
19722 return 0;
19723 if (! optimize_size && bytes > 8 * clear_step)
19724 return 0;
19726 for (offset = 0; bytes > 0; offset += clear_bytes, bytes -= clear_bytes)
19728 machine_mode mode = BLKmode;
19729 rtx dest;
19731 if (bytes >= 16 && TARGET_ALTIVEC && align >= 128)
19733 clear_bytes = 16;
19734 mode = V4SImode;
19736 else if (bytes >= 8 && TARGET_SPE && align >= 64)
19738 clear_bytes = 8;
19739 mode = V2SImode;
19741 else if (bytes >= 8 && TARGET_POWERPC64
19742 && (align >= 64 || !STRICT_ALIGNMENT))
19744 clear_bytes = 8;
19745 mode = DImode;
19746 if (offset == 0 && align < 64)
19748 rtx addr;
19750 /* If the address form is reg+offset with offset not a
19751 multiple of four, reload into reg indirect form here
19752 rather than waiting for reload. This way we get one
19753 reload, not one per store. */
19754 addr = XEXP (orig_dest, 0);
19755 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
19756 && GET_CODE (XEXP (addr, 1)) == CONST_INT
19757 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
19759 addr = copy_addr_to_reg (addr);
19760 orig_dest = replace_equiv_address (orig_dest, addr);
19764 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
19765 { /* move 4 bytes */
19766 clear_bytes = 4;
19767 mode = SImode;
19769 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
19770 { /* move 2 bytes */
19771 clear_bytes = 2;
19772 mode = HImode;
19774 else /* move 1 byte at a time */
19776 clear_bytes = 1;
19777 mode = QImode;
19780 dest = adjust_address (orig_dest, mode, offset);
19782 emit_move_insn (dest, CONST0_RTX (mode));
19785 return 1;
19788 /* Emit a potentially record-form instruction, setting DST from SRC.
19789 If DOT is 0, that is all; otherwise, set CCREG to the result of the
19790 signed comparison of DST with zero. If DOT is 1, the generated RTL
19791 doesn't care about the DST result; if DOT is 2, it does. If CCREG
19792 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
19793 a separate COMPARE. */
19795 static void
19796 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
19798 if (dot == 0)
19800 emit_move_insn (dst, src);
19801 return;
19804 if (cc_reg_not_cr0_operand (ccreg, CCmode))
19806 emit_move_insn (dst, src);
19807 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
19808 return;
19811 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
19812 if (dot == 1)
19814 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
19815 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
19817 else
19819 rtx set = gen_rtx_SET (dst, src);
19820 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
19824 /* Figure out the correct instructions to generate to load data for
19825 block compare. MODE is used for the read from memory, and
19826 data is zero extended if REG is wider than MODE. If LE code
19827 is being generated, bswap loads are used.
19829 REG is the destination register to move the data into.
19830 MEM is the memory block being read.
19831 MODE is the mode of memory to use for the read. */
19832 static void
19833 do_load_for_compare (rtx reg, rtx mem, machine_mode mode)
19835 switch (GET_MODE (reg))
19837 case E_DImode:
19838 switch (mode)
19840 case E_QImode:
19841 emit_insn (gen_zero_extendqidi2 (reg, mem));
19842 break;
19843 case E_HImode:
19845 rtx src = mem;
19846 if (!BYTES_BIG_ENDIAN)
19848 src = gen_reg_rtx (HImode);
19849 emit_insn (gen_bswaphi2 (src, mem));
19851 emit_insn (gen_zero_extendhidi2 (reg, src));
19852 break;
19854 case E_SImode:
19856 rtx src = mem;
19857 if (!BYTES_BIG_ENDIAN)
19859 src = gen_reg_rtx (SImode);
19860 emit_insn (gen_bswapsi2 (src, mem));
19862 emit_insn (gen_zero_extendsidi2 (reg, src));
19864 break;
19865 case E_DImode:
19866 if (!BYTES_BIG_ENDIAN)
19867 emit_insn (gen_bswapdi2 (reg, mem));
19868 else
19869 emit_insn (gen_movdi (reg, mem));
19870 break;
19871 default:
19872 gcc_unreachable ();
19874 break;
19876 case E_SImode:
19877 switch (mode)
19879 case E_QImode:
19880 emit_insn (gen_zero_extendqisi2 (reg, mem));
19881 break;
19882 case E_HImode:
19884 rtx src = mem;
19885 if (!BYTES_BIG_ENDIAN)
19887 src = gen_reg_rtx (HImode);
19888 emit_insn (gen_bswaphi2 (src, mem));
19890 emit_insn (gen_zero_extendhisi2 (reg, src));
19891 break;
19893 case E_SImode:
19894 if (!BYTES_BIG_ENDIAN)
19895 emit_insn (gen_bswapsi2 (reg, mem));
19896 else
19897 emit_insn (gen_movsi (reg, mem));
19898 break;
19899 case E_DImode:
19900 /* DImode is larger than the destination reg so is not expected. */
19901 gcc_unreachable ();
19902 break;
19903 default:
19904 gcc_unreachable ();
19906 break;
19907 default:
19908 gcc_unreachable ();
19909 break;
19913 /* Select the mode to be used for reading the next chunk of bytes
19914 in the compare.
19916 OFFSET is the current read offset from the beginning of the block.
19917 BYTES is the number of bytes remaining to be read.
19918 ALIGN is the minimum alignment of the memory blocks being compared in bytes.
19919 WORD_MODE_OK indicates using WORD_MODE is allowed, else SImode is
19920 the largest allowable mode. */
19921 static machine_mode
19922 select_block_compare_mode (unsigned HOST_WIDE_INT offset,
19923 unsigned HOST_WIDE_INT bytes,
19924 unsigned HOST_WIDE_INT align, bool word_mode_ok)
19926 /* First see if we can do a whole load unit
19927 as that will be more efficient than a larger load + shift. */
19929 /* If big, use biggest chunk.
19930 If exactly chunk size, use that size.
19931 If remainder can be done in one piece with shifting, do that.
19932 Do largest chunk possible without violating alignment rules. */
19934 /* The most we can read without potential page crossing. */
19935 unsigned HOST_WIDE_INT maxread = ROUND_UP (bytes, align);
19937 if (word_mode_ok && bytes >= UNITS_PER_WORD)
19938 return word_mode;
19939 else if (bytes == GET_MODE_SIZE (SImode))
19940 return SImode;
19941 else if (bytes == GET_MODE_SIZE (HImode))
19942 return HImode;
19943 else if (bytes == GET_MODE_SIZE (QImode))
19944 return QImode;
19945 else if (bytes < GET_MODE_SIZE (SImode)
19946 && offset >= GET_MODE_SIZE (SImode) - bytes)
19947 /* This matches the case were we have SImode and 3 bytes
19948 and offset >= 1 and permits us to move back one and overlap
19949 with the previous read, thus avoiding having to shift
19950 unwanted bytes off of the input. */
19951 return SImode;
19952 else if (word_mode_ok && bytes < UNITS_PER_WORD
19953 && offset >= UNITS_PER_WORD-bytes)
19954 /* Similarly, if we can use DImode it will get matched here and
19955 can do an overlapping read that ends at the end of the block. */
19956 return word_mode;
19957 else if (word_mode_ok && maxread >= UNITS_PER_WORD)
19958 /* It is safe to do all remaining in one load of largest size,
19959 possibly with a shift to get rid of unwanted bytes. */
19960 return word_mode;
19961 else if (maxread >= GET_MODE_SIZE (SImode))
19962 /* It is safe to do all remaining in one SImode load,
19963 possibly with a shift to get rid of unwanted bytes. */
19964 return SImode;
19965 else if (bytes > GET_MODE_SIZE (SImode))
19966 return SImode;
19967 else if (bytes > GET_MODE_SIZE (HImode))
19968 return HImode;
19970 /* final fallback is do one byte */
19971 return QImode;
19974 /* Compute the alignment of pointer+OFFSET where the original alignment
19975 of pointer was BASE_ALIGN. */
19976 static unsigned HOST_WIDE_INT
19977 compute_current_alignment (unsigned HOST_WIDE_INT base_align,
19978 unsigned HOST_WIDE_INT offset)
19980 if (offset == 0)
19981 return base_align;
19982 return min (base_align, offset & -offset);
19985 /* Expand a block compare operation, and return true if successful.
19986 Return false if we should let the compiler generate normal code,
19987 probably a memcmp call.
19989 OPERANDS[0] is the target (result).
19990 OPERANDS[1] is the first source.
19991 OPERANDS[2] is the second source.
19992 OPERANDS[3] is the length.
19993 OPERANDS[4] is the alignment. */
19994 bool
19995 expand_block_compare (rtx operands[])
19997 rtx target = operands[0];
19998 rtx orig_src1 = operands[1];
19999 rtx orig_src2 = operands[2];
20000 rtx bytes_rtx = operands[3];
20001 rtx align_rtx = operands[4];
20002 HOST_WIDE_INT cmp_bytes = 0;
20003 rtx src1 = orig_src1;
20004 rtx src2 = orig_src2;
20006 /* This case is complicated to handle because the subtract
20007 with carry instructions do not generate the 64-bit
20008 carry and so we must emit code to calculate it ourselves.
20009 We choose not to implement this yet. */
20010 if (TARGET_32BIT && TARGET_POWERPC64)
20011 return false;
20013 /* If this is not a fixed size compare, just call memcmp. */
20014 if (!CONST_INT_P (bytes_rtx))
20015 return false;
20017 /* This must be a fixed size alignment. */
20018 if (!CONST_INT_P (align_rtx))
20019 return false;
20021 unsigned int base_align = UINTVAL (align_rtx) / BITS_PER_UNIT;
20023 /* rs6000_slow_unaligned_access -- don't do unaligned stuff. */
20024 if (rs6000_slow_unaligned_access (word_mode, MEM_ALIGN (orig_src1))
20025 || rs6000_slow_unaligned_access (word_mode, MEM_ALIGN (orig_src2)))
20026 return false;
20028 gcc_assert (GET_MODE (target) == SImode);
20030 /* Anything to move? */
20031 unsigned HOST_WIDE_INT bytes = UINTVAL (bytes_rtx);
20032 if (bytes == 0)
20033 return true;
20035 /* The code generated for p7 and older is not faster than glibc
20036 memcmp if alignment is small and length is not short, so bail
20037 out to avoid those conditions. */
20038 if (!TARGET_EFFICIENT_OVERLAPPING_UNALIGNED
20039 && ((base_align == 1 && bytes > 16)
20040 || (base_align == 2 && bytes > 32)))
20041 return false;
20043 rtx tmp_reg_src1 = gen_reg_rtx (word_mode);
20044 rtx tmp_reg_src2 = gen_reg_rtx (word_mode);
20045 /* P7/P8 code uses cond for subfc. but P9 uses
20046 it for cmpld which needs CCUNSmode. */
20047 rtx cond;
20048 if (TARGET_P9_MISC)
20049 cond = gen_reg_rtx (CCUNSmode);
20050 else
20051 cond = gen_reg_rtx (CCmode);
20053 /* If we have an LE target without ldbrx and word_mode is DImode,
20054 then we must avoid using word_mode. */
20055 int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX
20056 && word_mode == DImode);
20058 /* Strategy phase. How many ops will this take and should we expand it? */
20060 unsigned HOST_WIDE_INT offset = 0;
20061 machine_mode load_mode =
20062 select_block_compare_mode (offset, bytes, base_align, word_mode_ok);
20063 unsigned int load_mode_size = GET_MODE_SIZE (load_mode);
20065 /* We don't want to generate too much code. */
20066 unsigned HOST_WIDE_INT max_bytes =
20067 load_mode_size * (unsigned HOST_WIDE_INT) rs6000_block_compare_inline_limit;
20068 if (!IN_RANGE (bytes, 1, max_bytes))
20069 return false;
20071 bool generate_6432_conversion = false;
20072 rtx convert_label = NULL;
20073 rtx final_label = NULL;
20075 /* Example of generated code for 18 bytes aligned 1 byte.
20076 Compiled with -fno-reorder-blocks for clarity.
20077 ldbrx 10,31,8
20078 ldbrx 9,7,8
20079 subfc. 9,9,10
20080 bne 0,.L6487
20081 addi 9,12,8
20082 addi 5,11,8
20083 ldbrx 10,0,9
20084 ldbrx 9,0,5
20085 subfc. 9,9,10
20086 bne 0,.L6487
20087 addi 9,12,16
20088 lhbrx 10,0,9
20089 addi 9,11,16
20090 lhbrx 9,0,9
20091 subf 9,9,10
20092 b .L6488
20093 .p2align 4,,15
20094 .L6487: #convert_label
20095 popcntd 9,9
20096 subfe 10,10,10
20097 or 9,9,10
20098 .L6488: #final_label
20099 extsw 10,9
20101 We start off with DImode for two blocks that jump to the DI->SI conversion
20102 if the difference is found there, then a final block of HImode that skips
20103 the DI->SI conversion. */
20105 while (bytes > 0)
20107 unsigned int align = compute_current_alignment (base_align, offset);
20108 if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
20109 load_mode = select_block_compare_mode (offset, bytes, align,
20110 word_mode_ok);
20111 else
20112 load_mode = select_block_compare_mode (0, bytes, align, word_mode_ok);
20113 load_mode_size = GET_MODE_SIZE (load_mode);
20114 if (bytes >= load_mode_size)
20115 cmp_bytes = load_mode_size;
20116 else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
20118 /* Move this load back so it doesn't go past the end.
20119 P8/P9 can do this efficiently. */
20120 unsigned int extra_bytes = load_mode_size - bytes;
20121 cmp_bytes = bytes;
20122 if (extra_bytes < offset)
20124 offset -= extra_bytes;
20125 cmp_bytes = load_mode_size;
20126 bytes = cmp_bytes;
20129 else
20130 /* P7 and earlier can't do the overlapping load trick fast,
20131 so this forces a non-overlapping load and a shift to get
20132 rid of the extra bytes. */
20133 cmp_bytes = bytes;
20135 src1 = adjust_address (orig_src1, load_mode, offset);
20136 src2 = adjust_address (orig_src2, load_mode, offset);
20138 if (!REG_P (XEXP (src1, 0)))
20140 rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
20141 src1 = replace_equiv_address (src1, src1_reg);
20143 set_mem_size (src1, cmp_bytes);
20145 if (!REG_P (XEXP (src2, 0)))
20147 rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
20148 src2 = replace_equiv_address (src2, src2_reg);
20150 set_mem_size (src2, cmp_bytes);
20152 do_load_for_compare (tmp_reg_src1, src1, load_mode);
20153 do_load_for_compare (tmp_reg_src2, src2, load_mode);
20155 if (cmp_bytes < load_mode_size)
20157 /* Shift unneeded bytes off. */
20158 rtx sh = GEN_INT (BITS_PER_UNIT * (load_mode_size - cmp_bytes));
20159 if (word_mode == DImode)
20161 emit_insn (gen_lshrdi3 (tmp_reg_src1, tmp_reg_src1, sh));
20162 emit_insn (gen_lshrdi3 (tmp_reg_src2, tmp_reg_src2, sh));
20164 else
20166 emit_insn (gen_lshrsi3 (tmp_reg_src1, tmp_reg_src1, sh));
20167 emit_insn (gen_lshrsi3 (tmp_reg_src2, tmp_reg_src2, sh));
20171 int remain = bytes - cmp_bytes;
20172 if (GET_MODE_SIZE (GET_MODE (target)) > GET_MODE_SIZE (load_mode))
20174 /* Target is larger than load size so we don't need to
20175 reduce result size. */
20177 /* We previously did a block that need 64->32 conversion but
20178 the current block does not, so a label is needed to jump
20179 to the end. */
20180 if (generate_6432_conversion && !final_label)
20181 final_label = gen_label_rtx ();
20183 if (remain > 0)
20185 /* This is not the last block, branch to the end if the result
20186 of this subtract is not zero. */
20187 if (!final_label)
20188 final_label = gen_label_rtx ();
20189 rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
20190 rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2);
20191 rtx cr = gen_reg_rtx (CCmode);
20192 rs6000_emit_dot_insn (tmp_reg_src2, tmp, 2, cr);
20193 emit_insn (gen_movsi (target,
20194 gen_lowpart (SImode, tmp_reg_src2)));
20195 rtx ne_rtx = gen_rtx_NE (VOIDmode, cr, const0_rtx);
20196 rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
20197 fin_ref, pc_rtx);
20198 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
20199 JUMP_LABEL (j) = final_label;
20200 LABEL_NUSES (final_label) += 1;
20202 else
20204 if (word_mode == DImode)
20206 emit_insn (gen_subdi3 (tmp_reg_src2, tmp_reg_src1,
20207 tmp_reg_src2));
20208 emit_insn (gen_movsi (target,
20209 gen_lowpart (SImode, tmp_reg_src2)));
20211 else
20212 emit_insn (gen_subsi3 (target, tmp_reg_src1, tmp_reg_src2));
20214 if (final_label)
20216 rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
20217 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref));
20218 JUMP_LABEL(j) = final_label;
20219 LABEL_NUSES (final_label) += 1;
20220 emit_barrier ();
20224 else
20226 /* Do we need a 64->32 conversion block? We need the 64->32
20227 conversion even if target size == load_mode size because
20228 the subtract generates one extra bit. */
20229 generate_6432_conversion = true;
20231 if (remain > 0)
20233 if (!convert_label)
20234 convert_label = gen_label_rtx ();
20236 /* Compare to zero and branch to convert_label if not zero. */
20237 rtx cvt_ref = gen_rtx_LABEL_REF (VOIDmode, convert_label);
20238 if (TARGET_P9_MISC)
20240 /* Generate a compare, and convert with a setb later. */
20241 rtx cmp = gen_rtx_COMPARE (CCUNSmode, tmp_reg_src1,
20242 tmp_reg_src2);
20243 emit_insn (gen_rtx_SET (cond, cmp));
20245 else
20246 /* Generate a subfc. and use the longer
20247 sequence for conversion. */
20248 if (TARGET_64BIT)
20249 emit_insn (gen_subfdi3_carry_dot2 (tmp_reg_src2, tmp_reg_src2,
20250 tmp_reg_src1, cond));
20251 else
20252 emit_insn (gen_subfsi3_carry_dot2 (tmp_reg_src2, tmp_reg_src2,
20253 tmp_reg_src1, cond));
20254 rtx ne_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20255 rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
20256 cvt_ref, pc_rtx);
20257 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
20258 JUMP_LABEL(j) = convert_label;
20259 LABEL_NUSES (convert_label) += 1;
20261 else
20263 /* Just do the subtract/compare. Since this is the last block
20264 the convert code will be generated immediately following. */
20265 if (TARGET_P9_MISC)
20267 rtx cmp = gen_rtx_COMPARE (CCUNSmode, tmp_reg_src1,
20268 tmp_reg_src2);
20269 emit_insn (gen_rtx_SET (cond, cmp));
20271 else
20272 if (TARGET_64BIT)
20273 emit_insn (gen_subfdi3_carry (tmp_reg_src2, tmp_reg_src2,
20274 tmp_reg_src1));
20275 else
20276 emit_insn (gen_subfsi3_carry (tmp_reg_src2, tmp_reg_src2,
20277 tmp_reg_src1));
20281 offset += cmp_bytes;
20282 bytes -= cmp_bytes;
20285 if (generate_6432_conversion)
20287 if (convert_label)
20288 emit_label (convert_label);
20290 /* We need to produce DI result from sub, then convert to target SI
20291 while maintaining <0 / ==0 / >0 properties. This sequence works:
20292 subfc L,A,B
20293 subfe H,H,H
20294 popcntd L,L
20295 rldimi L,H,6,0
20297 This is an alternate one Segher cooked up if somebody
20298 wants to expand this for something that doesn't have popcntd:
20299 subfc L,a,b
20300 subfe H,x,x
20301 addic t,L,-1
20302 subfe v,t,L
20303 or z,v,H
20305 And finally, p9 can just do this:
20306 cmpld A,B
20307 setb r */
20309 if (TARGET_P9_MISC)
20311 emit_insn (gen_setb_unsigned (target, cond));
20313 else
20315 if (TARGET_64BIT)
20317 rtx tmp_reg_ca = gen_reg_rtx (DImode);
20318 emit_insn (gen_subfdi3_carry_in_xx (tmp_reg_ca));
20319 emit_insn (gen_popcntddi2 (tmp_reg_src2, tmp_reg_src2));
20320 emit_insn (gen_iordi3 (tmp_reg_src2, tmp_reg_src2, tmp_reg_ca));
20321 emit_insn (gen_movsi (target, gen_lowpart (SImode, tmp_reg_src2)));
20323 else
20325 rtx tmp_reg_ca = gen_reg_rtx (SImode);
20326 emit_insn (gen_subfsi3_carry_in_xx (tmp_reg_ca));
20327 emit_insn (gen_popcntdsi2 (tmp_reg_src2, tmp_reg_src2));
20328 emit_insn (gen_iorsi3 (target, tmp_reg_src2, tmp_reg_ca));
20333 if (final_label)
20334 emit_label (final_label);
20336 gcc_assert (bytes == 0);
20337 return true;
20340 /* Generate alignment check and branch code to set up for
20341 strncmp when we don't have DI alignment.
20342 STRNCMP_LABEL is the label to branch if there is a page crossing.
20343 SRC is the string pointer to be examined.
20344 BYTES is the max number of bytes to compare. */
20345 static void
20346 expand_strncmp_align_check (rtx strncmp_label, rtx src, HOST_WIDE_INT bytes)
20348 rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, strncmp_label);
20349 rtx src_check = copy_addr_to_reg (XEXP (src, 0));
20350 if (GET_MODE (src_check) == SImode)
20351 emit_insn (gen_andsi3 (src_check, src_check, GEN_INT (0xfff)));
20352 else
20353 emit_insn (gen_anddi3 (src_check, src_check, GEN_INT (0xfff)));
20354 rtx cond = gen_reg_rtx (CCmode);
20355 emit_move_insn (cond, gen_rtx_COMPARE (CCmode, src_check,
20356 GEN_INT (4096 - bytes)));
20358 rtx cmp_rtx = gen_rtx_LT (VOIDmode, cond, const0_rtx);
20360 rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
20361 pc_rtx, lab_ref);
20362 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
20363 JUMP_LABEL (j) = strncmp_label;
20364 LABEL_NUSES (strncmp_label) += 1;
20367 /* Expand a string compare operation with length, and return
20368 true if successful. Return false if we should let the
20369 compiler generate normal code, probably a strncmp call.
20371 OPERANDS[0] is the target (result).
20372 OPERANDS[1] is the first source.
20373 OPERANDS[2] is the second source.
20374 If NO_LENGTH is zero, then:
20375 OPERANDS[3] is the length.
20376 OPERANDS[4] is the alignment in bytes.
20377 If NO_LENGTH is nonzero, then:
20378 OPERANDS[3] is the alignment in bytes. */
20379 bool
20380 expand_strn_compare (rtx operands[], int no_length)
20382 rtx target = operands[0];
20383 rtx orig_src1 = operands[1];
20384 rtx orig_src2 = operands[2];
20385 rtx bytes_rtx, align_rtx;
20386 if (no_length)
20388 bytes_rtx = NULL;
20389 align_rtx = operands[3];
20391 else
20393 bytes_rtx = operands[3];
20394 align_rtx = operands[4];
20396 unsigned HOST_WIDE_INT cmp_bytes = 0;
20397 rtx src1 = orig_src1;
20398 rtx src2 = orig_src2;
20400 /* If we have a length, it must be constant. This simplifies things
20401 a bit as we don't have to generate code to check if we've exceeded
20402 the length. Later this could be expanded to handle this case. */
20403 if (!no_length && !CONST_INT_P (bytes_rtx))
20404 return false;
20406 /* This must be a fixed size alignment. */
20407 if (!CONST_INT_P (align_rtx))
20408 return false;
20410 unsigned int base_align = UINTVAL (align_rtx);
20411 int align1 = MEM_ALIGN (orig_src1) / BITS_PER_UNIT;
20412 int align2 = MEM_ALIGN (orig_src2) / BITS_PER_UNIT;
20414 /* rs6000_slow_unaligned_access -- don't do unaligned stuff. */
20415 if (rs6000_slow_unaligned_access (word_mode, align1)
20416 || rs6000_slow_unaligned_access (word_mode, align2))
20417 return false;
20419 gcc_assert (GET_MODE (target) == SImode);
20421 /* If we have an LE target without ldbrx and word_mode is DImode,
20422 then we must avoid using word_mode. */
20423 int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX
20424 && word_mode == DImode);
20426 unsigned int word_mode_size = GET_MODE_SIZE (word_mode);
20428 unsigned HOST_WIDE_INT offset = 0;
20429 unsigned HOST_WIDE_INT bytes; /* N from the strncmp args if available. */
20430 unsigned HOST_WIDE_INT compare_length; /* How much to compare inline. */
20431 if (no_length)
20432 /* Use this as a standin to determine the mode to use. */
20433 bytes = rs6000_string_compare_inline_limit * word_mode_size;
20434 else
20435 bytes = UINTVAL (bytes_rtx);
20437 machine_mode load_mode =
20438 select_block_compare_mode (offset, bytes, base_align, word_mode_ok);
20439 unsigned int load_mode_size = GET_MODE_SIZE (load_mode);
20440 compare_length = rs6000_string_compare_inline_limit * load_mode_size;
20442 /* If we have equality at the end of the last compare and we have not
20443 found the end of the string, we need to call strcmp/strncmp to
20444 compare the remainder. */
20445 bool equality_compare_rest = false;
20447 if (no_length)
20449 bytes = compare_length;
20450 equality_compare_rest = true;
20452 else
20454 if (bytes <= compare_length)
20455 compare_length = bytes;
20456 else
20457 equality_compare_rest = true;
20460 rtx result_reg = gen_reg_rtx (word_mode);
20461 rtx final_move_label = gen_label_rtx ();
20462 rtx final_label = gen_label_rtx ();
20463 rtx begin_compare_label = NULL;
20465 if (base_align < 8)
20467 /* Generate code that checks distance to 4k boundary for this case. */
20468 begin_compare_label = gen_label_rtx ();
20469 rtx strncmp_label = gen_label_rtx ();
20470 rtx jmp;
20472 /* Strncmp for power8 in glibc does this:
20473 rldicl r8,r3,0,52
20474 cmpldi cr7,r8,4096-16
20475 bgt cr7,L(pagecross) */
20477 /* Make sure that the length we use for the alignment test and
20478 the subsequent code generation are in agreement so we do not
20479 go past the length we tested for a 4k boundary crossing. */
20480 unsigned HOST_WIDE_INT align_test = compare_length;
20481 if (align_test < 8)
20483 align_test = HOST_WIDE_INT_1U << ceil_log2 (align_test);
20484 base_align = align_test;
20486 else
20488 align_test = ROUND_UP (align_test, 8);
20489 base_align = 8;
20492 if (align1 < 8)
20493 expand_strncmp_align_check (strncmp_label, src1, align_test);
20494 if (align2 < 8)
20495 expand_strncmp_align_check (strncmp_label, src2, align_test);
20497 /* Now generate the following sequence:
20498 - branch to begin_compare
20499 - strncmp_label
20500 - call to strncmp
20501 - branch to final_label
20502 - begin_compare_label */
20504 rtx cmp_ref = gen_rtx_LABEL_REF (VOIDmode, begin_compare_label);
20505 jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, cmp_ref));
20506 JUMP_LABEL (jmp) = begin_compare_label;
20507 LABEL_NUSES (begin_compare_label) += 1;
20508 emit_barrier ();
20510 emit_label (strncmp_label);
20512 if (!REG_P (XEXP (src1, 0)))
20514 rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
20515 src1 = replace_equiv_address (src1, src1_reg);
20518 if (!REG_P (XEXP (src2, 0)))
20520 rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
20521 src2 = replace_equiv_address (src2, src2_reg);
20524 if (no_length)
20526 tree fun = builtin_decl_explicit (BUILT_IN_STRCMP);
20527 emit_library_call_value (XEXP (DECL_RTL (fun), 0),
20528 target, LCT_NORMAL, GET_MODE (target),
20529 force_reg (Pmode, XEXP (src1, 0)), Pmode,
20530 force_reg (Pmode, XEXP (src2, 0)), Pmode);
20532 else
20534 /* -m32 -mpowerpc64 results in word_mode being DImode even
20535 though otherwise it is 32-bit. The length arg to strncmp
20536 is a size_t which will be the same size as pointers. */
20537 rtx len_rtx;
20538 if (TARGET_64BIT)
20539 len_rtx = gen_reg_rtx (DImode);
20540 else
20541 len_rtx = gen_reg_rtx (SImode);
20543 emit_move_insn (len_rtx, bytes_rtx);
20545 tree fun = builtin_decl_explicit (BUILT_IN_STRNCMP);
20546 emit_library_call_value (XEXP (DECL_RTL (fun), 0),
20547 target, LCT_NORMAL, GET_MODE (target),
20548 force_reg (Pmode, XEXP (src1, 0)), Pmode,
20549 force_reg (Pmode, XEXP (src2, 0)), Pmode,
20550 len_rtx, GET_MODE (len_rtx));
20553 rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
20554 jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref));
20555 JUMP_LABEL (jmp) = final_label;
20556 LABEL_NUSES (final_label) += 1;
20557 emit_barrier ();
20558 emit_label (begin_compare_label);
20561 rtx cleanup_label = NULL;
20562 rtx tmp_reg_src1 = gen_reg_rtx (word_mode);
20563 rtx tmp_reg_src2 = gen_reg_rtx (word_mode);
20565 /* Generate sequence of ld/ldbrx, cmpb to compare out
20566 to the length specified. */
20567 unsigned HOST_WIDE_INT bytes_to_compare = compare_length;
20568 while (bytes_to_compare > 0)
20570 /* Compare sequence:
20571 check each 8B with: ld/ld cmpd bne
20572 If equal, use rldicr/cmpb to check for zero byte.
20573 cleanup code at end:
20574 cmpb get byte that differs
20575 cmpb look for zero byte
20576 orc combine
20577 cntlzd get bit of first zero/diff byte
20578 subfic convert for rldcl use
20579 rldcl rldcl extract diff/zero byte
20580 subf subtract for final result
20582 The last compare can branch around the cleanup code if the
20583 result is zero because the strings are exactly equal. */
20584 unsigned int align = compute_current_alignment (base_align, offset);
20585 if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
20586 load_mode = select_block_compare_mode (offset, bytes_to_compare, align,
20587 word_mode_ok);
20588 else
20589 load_mode = select_block_compare_mode (0, bytes_to_compare, align,
20590 word_mode_ok);
20591 load_mode_size = GET_MODE_SIZE (load_mode);
20592 if (bytes_to_compare >= load_mode_size)
20593 cmp_bytes = load_mode_size;
20594 else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
20596 /* Move this load back so it doesn't go past the end.
20597 P8/P9 can do this efficiently. */
20598 unsigned int extra_bytes = load_mode_size - bytes_to_compare;
20599 cmp_bytes = bytes_to_compare;
20600 if (extra_bytes < offset)
20602 offset -= extra_bytes;
20603 cmp_bytes = load_mode_size;
20604 bytes_to_compare = cmp_bytes;
20607 else
20608 /* P7 and earlier can't do the overlapping load trick fast,
20609 so this forces a non-overlapping load and a shift to get
20610 rid of the extra bytes. */
20611 cmp_bytes = bytes_to_compare;
20613 src1 = adjust_address (orig_src1, load_mode, offset);
20614 src2 = adjust_address (orig_src2, load_mode, offset);
20616 if (!REG_P (XEXP (src1, 0)))
20618 rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
20619 src1 = replace_equiv_address (src1, src1_reg);
20621 set_mem_size (src1, cmp_bytes);
20623 if (!REG_P (XEXP (src2, 0)))
20625 rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
20626 src2 = replace_equiv_address (src2, src2_reg);
20628 set_mem_size (src2, cmp_bytes);
20630 do_load_for_compare (tmp_reg_src1, src1, load_mode);
20631 do_load_for_compare (tmp_reg_src2, src2, load_mode);
20633 /* We must always left-align the data we read, and
20634 clear any bytes to the right that are beyond the string.
20635 Otherwise the cmpb sequence won't produce the correct
20636 results. The beginning of the compare will be done
20637 with word_mode so will not have any extra shifts or
20638 clear rights. */
20640 if (load_mode_size < word_mode_size)
20642 /* Rotate left first. */
20643 rtx sh = GEN_INT (BITS_PER_UNIT * (word_mode_size - load_mode_size));
20644 if (word_mode == DImode)
20646 emit_insn (gen_rotldi3 (tmp_reg_src1, tmp_reg_src1, sh));
20647 emit_insn (gen_rotldi3 (tmp_reg_src2, tmp_reg_src2, sh));
20649 else
20651 emit_insn (gen_rotlsi3 (tmp_reg_src1, tmp_reg_src1, sh));
20652 emit_insn (gen_rotlsi3 (tmp_reg_src2, tmp_reg_src2, sh));
20656 if (cmp_bytes < word_mode_size)
20658 /* Now clear right. This plus the rotate can be
20659 turned into a rldicr instruction. */
20660 HOST_WIDE_INT mb = BITS_PER_UNIT * (word_mode_size - cmp_bytes);
20661 rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
20662 if (word_mode == DImode)
20664 emit_insn (gen_anddi3_mask (tmp_reg_src1, tmp_reg_src1, mask));
20665 emit_insn (gen_anddi3_mask (tmp_reg_src2, tmp_reg_src2, mask));
20667 else
20669 emit_insn (gen_andsi3_mask (tmp_reg_src1, tmp_reg_src1, mask));
20670 emit_insn (gen_andsi3_mask (tmp_reg_src2, tmp_reg_src2, mask));
20674 /* Cases to handle. A and B are chunks of the two strings.
20675 1: Not end of comparison:
20676 A != B: branch to cleanup code to compute result.
20677 A == B: check for 0 byte, next block if not found.
20678 2: End of the inline comparison:
20679 A != B: branch to cleanup code to compute result.
20680 A == B: check for 0 byte, call strcmp/strncmp
20681 3: compared requested N bytes:
20682 A == B: branch to result 0.
20683 A != B: cleanup code to compute result. */
20685 unsigned HOST_WIDE_INT remain = bytes_to_compare - cmp_bytes;
20687 rtx dst_label;
20688 if (remain > 0 || equality_compare_rest)
20690 /* Branch to cleanup code, otherwise fall through to do
20691 more compares. */
20692 if (!cleanup_label)
20693 cleanup_label = gen_label_rtx ();
20694 dst_label = cleanup_label;
20696 else
20697 /* Branch to end and produce result of 0. */
20698 dst_label = final_move_label;
20700 rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, dst_label);
20701 rtx cond = gen_reg_rtx (CCmode);
20703 /* Always produce the 0 result, it is needed if
20704 cmpb finds a 0 byte in this chunk. */
20705 rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2);
20706 rs6000_emit_dot_insn (result_reg, tmp, 1, cond);
20708 rtx cmp_rtx;
20709 if (remain == 0 && !equality_compare_rest)
20710 cmp_rtx = gen_rtx_EQ (VOIDmode, cond, const0_rtx);
20711 else
20712 cmp_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20714 rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
20715 lab_ref, pc_rtx);
20716 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
20717 JUMP_LABEL (j) = dst_label;
20718 LABEL_NUSES (dst_label) += 1;
20720 if (remain > 0 || equality_compare_rest)
20722 /* Generate a cmpb to test for a 0 byte and branch
20723 to final result if found. */
20724 rtx cmpb_zero = gen_reg_rtx (word_mode);
20725 rtx lab_ref_fin = gen_rtx_LABEL_REF (VOIDmode, final_move_label);
20726 rtx condz = gen_reg_rtx (CCmode);
20727 rtx zero_reg = gen_reg_rtx (word_mode);
20728 if (word_mode == SImode)
20730 emit_insn (gen_movsi (zero_reg, GEN_INT (0)));
20731 emit_insn (gen_cmpbsi3 (cmpb_zero, tmp_reg_src1, zero_reg));
20732 if (cmp_bytes < word_mode_size)
20734 /* Don't want to look at zero bytes past end. */
20735 HOST_WIDE_INT mb =
20736 BITS_PER_UNIT * (word_mode_size - cmp_bytes);
20737 rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
20738 emit_insn (gen_andsi3_mask (cmpb_zero, cmpb_zero, mask));
20741 else
20743 emit_insn (gen_movdi (zero_reg, GEN_INT (0)));
20744 emit_insn (gen_cmpbdi3 (cmpb_zero, tmp_reg_src1, zero_reg));
20745 if (cmp_bytes < word_mode_size)
20747 /* Don't want to look at zero bytes past end. */
20748 HOST_WIDE_INT mb =
20749 BITS_PER_UNIT * (word_mode_size - cmp_bytes);
20750 rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
20751 emit_insn (gen_anddi3_mask (cmpb_zero, cmpb_zero, mask));
20755 emit_move_insn (condz, gen_rtx_COMPARE (CCmode, cmpb_zero, zero_reg));
20756 rtx cmpnz_rtx = gen_rtx_NE (VOIDmode, condz, const0_rtx);
20757 rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmpnz_rtx,
20758 lab_ref_fin, pc_rtx);
20759 rtx j2 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
20760 JUMP_LABEL (j2) = final_move_label;
20761 LABEL_NUSES (final_move_label) += 1;
20765 offset += cmp_bytes;
20766 bytes_to_compare -= cmp_bytes;
20769 if (equality_compare_rest)
20771 /* Update pointers past what has been compared already. */
20772 src1 = adjust_address (orig_src1, load_mode, offset);
20773 src2 = adjust_address (orig_src2, load_mode, offset);
20775 if (!REG_P (XEXP (src1, 0)))
20777 rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
20778 src1 = replace_equiv_address (src1, src1_reg);
20780 set_mem_size (src1, cmp_bytes);
20782 if (!REG_P (XEXP (src2, 0)))
20784 rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
20785 src2 = replace_equiv_address (src2, src2_reg);
20787 set_mem_size (src2, cmp_bytes);
20789 /* Construct call to strcmp/strncmp to compare the rest of the string. */
20790 if (no_length)
20792 tree fun = builtin_decl_explicit (BUILT_IN_STRCMP);
20793 emit_library_call_value (XEXP (DECL_RTL (fun), 0),
20794 target, LCT_NORMAL, GET_MODE (target),
20795 force_reg (Pmode, XEXP (src1, 0)), Pmode,
20796 force_reg (Pmode, XEXP (src2, 0)), Pmode);
20798 else
20800 rtx len_rtx;
20801 if (TARGET_64BIT)
20802 len_rtx = gen_reg_rtx (DImode);
20803 else
20804 len_rtx = gen_reg_rtx (SImode);
20806 emit_move_insn (len_rtx, GEN_INT (bytes - compare_length));
20807 tree fun = builtin_decl_explicit (BUILT_IN_STRNCMP);
20808 emit_library_call_value (XEXP (DECL_RTL (fun), 0),
20809 target, LCT_NORMAL, GET_MODE (target),
20810 force_reg (Pmode, XEXP (src1, 0)), Pmode,
20811 force_reg (Pmode, XEXP (src2, 0)), Pmode,
20812 len_rtx, GET_MODE (len_rtx));
20815 rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
20816 rtx jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref));
20817 JUMP_LABEL (jmp) = final_label;
20818 LABEL_NUSES (final_label) += 1;
20819 emit_barrier ();
20822 if (cleanup_label)
20823 emit_label (cleanup_label);
20825 /* Generate the final sequence that identifies the differing
20826 byte and generates the final result, taking into account
20827 zero bytes:
20829 cmpb cmpb_result1, src1, src2
20830 cmpb cmpb_result2, src1, zero
20831 orc cmpb_result1, cmp_result1, cmpb_result2
20832 cntlzd get bit of first zero/diff byte
20833 addi convert for rldcl use
20834 rldcl rldcl extract diff/zero byte
20835 subf subtract for final result
20838 rtx cmpb_diff = gen_reg_rtx (word_mode);
20839 rtx cmpb_zero = gen_reg_rtx (word_mode);
20840 rtx rot_amt = gen_reg_rtx (word_mode);
20841 rtx zero_reg = gen_reg_rtx (word_mode);
20843 rtx rot1_1 = gen_reg_rtx (word_mode);
20844 rtx rot1_2 = gen_reg_rtx (word_mode);
20845 rtx rot2_1 = gen_reg_rtx (word_mode);
20846 rtx rot2_2 = gen_reg_rtx (word_mode);
20848 if (word_mode == SImode)
20850 emit_insn (gen_cmpbsi3 (cmpb_diff, tmp_reg_src1, tmp_reg_src2));
20851 emit_insn (gen_movsi (zero_reg, GEN_INT (0)));
20852 emit_insn (gen_cmpbsi3 (cmpb_zero, tmp_reg_src1, zero_reg));
20853 emit_insn (gen_one_cmplsi2 (cmpb_diff,cmpb_diff));
20854 emit_insn (gen_iorsi3 (cmpb_diff, cmpb_diff, cmpb_zero));
20855 emit_insn (gen_clzsi2 (rot_amt, cmpb_diff));
20856 emit_insn (gen_addsi3 (rot_amt, rot_amt, GEN_INT (8)));
20857 emit_insn (gen_rotlsi3 (rot1_1, tmp_reg_src1,
20858 gen_lowpart (SImode, rot_amt)));
20859 emit_insn (gen_andsi3_mask (rot1_2, rot1_1, GEN_INT (0xff)));
20860 emit_insn (gen_rotlsi3 (rot2_1, tmp_reg_src2,
20861 gen_lowpart (SImode, rot_amt)));
20862 emit_insn (gen_andsi3_mask (rot2_2, rot2_1, GEN_INT (0xff)));
20863 emit_insn (gen_subsi3 (result_reg, rot1_2, rot2_2));
20865 else
20867 emit_insn (gen_cmpbdi3 (cmpb_diff, tmp_reg_src1, tmp_reg_src2));
20868 emit_insn (gen_movdi (zero_reg, GEN_INT (0)));
20869 emit_insn (gen_cmpbdi3 (cmpb_zero, tmp_reg_src1, zero_reg));
20870 emit_insn (gen_one_cmpldi2 (cmpb_diff,cmpb_diff));
20871 emit_insn (gen_iordi3 (cmpb_diff, cmpb_diff, cmpb_zero));
20872 emit_insn (gen_clzdi2 (rot_amt, cmpb_diff));
20873 emit_insn (gen_adddi3 (rot_amt, rot_amt, GEN_INT (8)));
20874 emit_insn (gen_rotldi3 (rot1_1, tmp_reg_src1,
20875 gen_lowpart (SImode, rot_amt)));
20876 emit_insn (gen_anddi3_mask (rot1_2, rot1_1, GEN_INT (0xff)));
20877 emit_insn (gen_rotldi3 (rot2_1, tmp_reg_src2,
20878 gen_lowpart (SImode, rot_amt)));
20879 emit_insn (gen_anddi3_mask (rot2_2, rot2_1, GEN_INT (0xff)));
20880 emit_insn (gen_subdi3 (result_reg, rot1_2, rot2_2));
20883 emit_label (final_move_label);
20884 emit_insn (gen_movsi (target,
20885 gen_lowpart (SImode, result_reg)));
20886 emit_label (final_label);
20887 return true;
20890 /* Expand a block move operation, and return 1 if successful. Return 0
20891 if we should let the compiler generate normal code.
20893 operands[0] is the destination
20894 operands[1] is the source
20895 operands[2] is the length
20896 operands[3] is the alignment */
20898 #define MAX_MOVE_REG 4
20901 expand_block_move (rtx operands[])
20903 rtx orig_dest = operands[0];
20904 rtx orig_src = operands[1];
20905 rtx bytes_rtx = operands[2];
20906 rtx align_rtx = operands[3];
20907 int constp = (GET_CODE (bytes_rtx) == CONST_INT);
20908 int align;
20909 int bytes;
20910 int offset;
20911 int move_bytes;
20912 rtx stores[MAX_MOVE_REG];
20913 int num_reg = 0;
20915 /* If this is not a fixed size move, just call memcpy */
20916 if (! constp)
20917 return 0;
20919 /* This must be a fixed size alignment */
20920 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
20921 align = INTVAL (align_rtx) * BITS_PER_UNIT;
20923 /* Anything to move? */
20924 bytes = INTVAL (bytes_rtx);
20925 if (bytes <= 0)
20926 return 1;
20928 if (bytes > rs6000_block_move_inline_limit)
20929 return 0;
20931 for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes)
20933 union {
20934 rtx (*movmemsi) (rtx, rtx, rtx, rtx);
20935 rtx (*mov) (rtx, rtx);
20936 } gen_func;
20937 machine_mode mode = BLKmode;
20938 rtx src, dest;
20940 /* Altivec first, since it will be faster than a string move
20941 when it applies, and usually not significantly larger. */
20942 if (TARGET_ALTIVEC && bytes >= 16 && align >= 128)
20944 move_bytes = 16;
20945 mode = V4SImode;
20946 gen_func.mov = gen_movv4si;
20948 else if (TARGET_SPE && bytes >= 8 && align >= 64)
20950 move_bytes = 8;
20951 mode = V2SImode;
20952 gen_func.mov = gen_movv2si;
20954 else if (TARGET_STRING
20955 && bytes > 24 /* move up to 32 bytes at a time */
20956 && ! fixed_regs[5]
20957 && ! fixed_regs[6]
20958 && ! fixed_regs[7]
20959 && ! fixed_regs[8]
20960 && ! fixed_regs[9]
20961 && ! fixed_regs[10]
20962 && ! fixed_regs[11]
20963 && ! fixed_regs[12])
20965 move_bytes = (bytes > 32) ? 32 : bytes;
20966 gen_func.movmemsi = gen_movmemsi_8reg;
20968 else if (TARGET_STRING
20969 && bytes > 16 /* move up to 24 bytes at a time */
20970 && ! fixed_regs[5]
20971 && ! fixed_regs[6]
20972 && ! fixed_regs[7]
20973 && ! fixed_regs[8]
20974 && ! fixed_regs[9]
20975 && ! fixed_regs[10])
20977 move_bytes = (bytes > 24) ? 24 : bytes;
20978 gen_func.movmemsi = gen_movmemsi_6reg;
20980 else if (TARGET_STRING
20981 && bytes > 8 /* move up to 16 bytes at a time */
20982 && ! fixed_regs[5]
20983 && ! fixed_regs[6]
20984 && ! fixed_regs[7]
20985 && ! fixed_regs[8])
20987 move_bytes = (bytes > 16) ? 16 : bytes;
20988 gen_func.movmemsi = gen_movmemsi_4reg;
20990 else if (bytes >= 8 && TARGET_POWERPC64
20991 && (align >= 64 || !STRICT_ALIGNMENT))
20993 move_bytes = 8;
20994 mode = DImode;
20995 gen_func.mov = gen_movdi;
20996 if (offset == 0 && align < 64)
20998 rtx addr;
21000 /* If the address form is reg+offset with offset not a
21001 multiple of four, reload into reg indirect form here
21002 rather than waiting for reload. This way we get one
21003 reload, not one per load and/or store. */
21004 addr = XEXP (orig_dest, 0);
21005 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
21006 && GET_CODE (XEXP (addr, 1)) == CONST_INT
21007 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
21009 addr = copy_addr_to_reg (addr);
21010 orig_dest = replace_equiv_address (orig_dest, addr);
21012 addr = XEXP (orig_src, 0);
21013 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
21014 && GET_CODE (XEXP (addr, 1)) == CONST_INT
21015 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
21017 addr = copy_addr_to_reg (addr);
21018 orig_src = replace_equiv_address (orig_src, addr);
21022 else if (TARGET_STRING && bytes > 4 && !TARGET_POWERPC64)
21023 { /* move up to 8 bytes at a time */
21024 move_bytes = (bytes > 8) ? 8 : bytes;
21025 gen_func.movmemsi = gen_movmemsi_2reg;
21027 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
21028 { /* move 4 bytes */
21029 move_bytes = 4;
21030 mode = SImode;
21031 gen_func.mov = gen_movsi;
21033 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
21034 { /* move 2 bytes */
21035 move_bytes = 2;
21036 mode = HImode;
21037 gen_func.mov = gen_movhi;
21039 else if (TARGET_STRING && bytes > 1)
21040 { /* move up to 4 bytes at a time */
21041 move_bytes = (bytes > 4) ? 4 : bytes;
21042 gen_func.movmemsi = gen_movmemsi_1reg;
21044 else /* move 1 byte at a time */
21046 move_bytes = 1;
21047 mode = QImode;
21048 gen_func.mov = gen_movqi;
21051 src = adjust_address (orig_src, mode, offset);
21052 dest = adjust_address (orig_dest, mode, offset);
21054 if (mode != BLKmode)
21056 rtx tmp_reg = gen_reg_rtx (mode);
21058 emit_insn ((*gen_func.mov) (tmp_reg, src));
21059 stores[num_reg++] = (*gen_func.mov) (dest, tmp_reg);
21062 if (mode == BLKmode || num_reg >= MAX_MOVE_REG || bytes == move_bytes)
21064 int i;
21065 for (i = 0; i < num_reg; i++)
21066 emit_insn (stores[i]);
21067 num_reg = 0;
21070 if (mode == BLKmode)
21072 /* Move the address into scratch registers. The movmemsi
21073 patterns require zero offset. */
21074 if (!REG_P (XEXP (src, 0)))
21076 rtx src_reg = copy_addr_to_reg (XEXP (src, 0));
21077 src = replace_equiv_address (src, src_reg);
21079 set_mem_size (src, move_bytes);
21081 if (!REG_P (XEXP (dest, 0)))
21083 rtx dest_reg = copy_addr_to_reg (XEXP (dest, 0));
21084 dest = replace_equiv_address (dest, dest_reg);
21086 set_mem_size (dest, move_bytes);
21088 emit_insn ((*gen_func.movmemsi) (dest, src,
21089 GEN_INT (move_bytes & 31),
21090 align_rtx));
21094 return 1;
21098 /* Return a string to perform a load_multiple operation.
21099 operands[0] is the vector.
21100 operands[1] is the source address.
21101 operands[2] is the first destination register. */
21103 const char *
21104 rs6000_output_load_multiple (rtx operands[3])
21106 /* We have to handle the case where the pseudo used to contain the address
21107 is assigned to one of the output registers. */
21108 int i, j;
21109 int words = XVECLEN (operands[0], 0);
21110 rtx xop[10];
21112 if (XVECLEN (operands[0], 0) == 1)
21113 return "lwz %2,0(%1)";
21115 for (i = 0; i < words; i++)
21116 if (refers_to_regno_p (REGNO (operands[2]) + i, operands[1]))
21118 if (i == words-1)
21120 xop[0] = GEN_INT (4 * (words-1));
21121 xop[1] = operands[1];
21122 xop[2] = operands[2];
21123 output_asm_insn ("lswi %2,%1,%0\n\tlwz %1,%0(%1)", xop);
21124 return "";
21126 else if (i == 0)
21128 xop[0] = GEN_INT (4 * (words-1));
21129 xop[1] = operands[1];
21130 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
21131 output_asm_insn ("addi %1,%1,4\n\tlswi %2,%1,%0\n\tlwz %1,-4(%1)", xop);
21132 return "";
21134 else
21136 for (j = 0; j < words; j++)
21137 if (j != i)
21139 xop[0] = GEN_INT (j * 4);
21140 xop[1] = operands[1];
21141 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + j);
21142 output_asm_insn ("lwz %2,%0(%1)", xop);
21144 xop[0] = GEN_INT (i * 4);
21145 xop[1] = operands[1];
21146 output_asm_insn ("lwz %1,%0(%1)", xop);
21147 return "";
21151 return "lswi %2,%1,%N0";
21155 /* A validation routine: say whether CODE, a condition code, and MODE
21156 match. The other alternatives either don't make sense or should
21157 never be generated. */
21159 void
21160 validate_condition_mode (enum rtx_code code, machine_mode mode)
21162 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
21163 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
21164 && GET_MODE_CLASS (mode) == MODE_CC);
21166 /* These don't make sense. */
21167 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
21168 || mode != CCUNSmode);
21170 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
21171 || mode == CCUNSmode);
21173 gcc_assert (mode == CCFPmode
21174 || (code != ORDERED && code != UNORDERED
21175 && code != UNEQ && code != LTGT
21176 && code != UNGT && code != UNLT
21177 && code != UNGE && code != UNLE));
21179 /* These should never be generated except for
21180 flag_finite_math_only. */
21181 gcc_assert (mode != CCFPmode
21182 || flag_finite_math_only
21183 || (code != LE && code != GE
21184 && code != UNEQ && code != LTGT
21185 && code != UNGT && code != UNLT));
21187 /* These are invalid; the information is not there. */
21188 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
21192 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
21193 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
21194 not zero, store there the bit offset (counted from the right) where
21195 the single stretch of 1 bits begins; and similarly for B, the bit
21196 offset where it ends. */
21198 bool
21199 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
21201 unsigned HOST_WIDE_INT val = INTVAL (mask);
21202 unsigned HOST_WIDE_INT bit;
21203 int nb, ne;
21204 int n = GET_MODE_PRECISION (mode);
21206 if (mode != DImode && mode != SImode)
21207 return false;
21209 if (INTVAL (mask) >= 0)
21211 bit = val & -val;
21212 ne = exact_log2 (bit);
21213 nb = exact_log2 (val + bit);
21215 else if (val + 1 == 0)
21217 nb = n;
21218 ne = 0;
21220 else if (val & 1)
21222 val = ~val;
21223 bit = val & -val;
21224 nb = exact_log2 (bit);
21225 ne = exact_log2 (val + bit);
21227 else
21229 bit = val & -val;
21230 ne = exact_log2 (bit);
21231 if (val + bit == 0)
21232 nb = n;
21233 else
21234 nb = 0;
21237 nb--;
21239 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
21240 return false;
21242 if (b)
21243 *b = nb;
21244 if (e)
21245 *e = ne;
21247 return true;
21250 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
21251 or rldicr instruction, to implement an AND with it in mode MODE. */
21253 bool
21254 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
21256 int nb, ne;
21258 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
21259 return false;
21261 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
21262 does not wrap. */
21263 if (mode == DImode)
21264 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
21266 /* For SImode, rlwinm can do everything. */
21267 if (mode == SImode)
21268 return (nb < 32 && ne < 32);
21270 return false;
21273 /* Return the instruction template for an AND with mask in mode MODE, with
21274 operands OPERANDS. If DOT is true, make it a record-form instruction. */
21276 const char *
21277 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
21279 int nb, ne;
21281 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
21282 gcc_unreachable ();
21284 if (mode == DImode && ne == 0)
21286 operands[3] = GEN_INT (63 - nb);
21287 if (dot)
21288 return "rldicl. %0,%1,0,%3";
21289 return "rldicl %0,%1,0,%3";
21292 if (mode == DImode && nb == 63)
21294 operands[3] = GEN_INT (63 - ne);
21295 if (dot)
21296 return "rldicr. %0,%1,0,%3";
21297 return "rldicr %0,%1,0,%3";
21300 if (nb < 32 && ne < 32)
21302 operands[3] = GEN_INT (31 - nb);
21303 operands[4] = GEN_INT (31 - ne);
21304 if (dot)
21305 return "rlwinm. %0,%1,0,%3,%4";
21306 return "rlwinm %0,%1,0,%3,%4";
21309 gcc_unreachable ();
21312 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
21313 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
21314 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
21316 bool
21317 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
21319 int nb, ne;
21321 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
21322 return false;
21324 int n = GET_MODE_PRECISION (mode);
21325 int sh = -1;
21327 if (CONST_INT_P (XEXP (shift, 1)))
21329 sh = INTVAL (XEXP (shift, 1));
21330 if (sh < 0 || sh >= n)
21331 return false;
21334 rtx_code code = GET_CODE (shift);
21336 /* Convert any shift by 0 to a rotate, to simplify below code. */
21337 if (sh == 0)
21338 code = ROTATE;
21340 /* Convert rotate to simple shift if we can, to make analysis simpler. */
21341 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
21342 code = ASHIFT;
21343 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
21345 code = LSHIFTRT;
21346 sh = n - sh;
21349 /* DImode rotates need rld*. */
21350 if (mode == DImode && code == ROTATE)
21351 return (nb == 63 || ne == 0 || ne == sh);
21353 /* SImode rotates need rlw*. */
21354 if (mode == SImode && code == ROTATE)
21355 return (nb < 32 && ne < 32 && sh < 32);
21357 /* Wrap-around masks are only okay for rotates. */
21358 if (ne > nb)
21359 return false;
21361 /* Variable shifts are only okay for rotates. */
21362 if (sh < 0)
21363 return false;
21365 /* Don't allow ASHIFT if the mask is wrong for that. */
21366 if (code == ASHIFT && ne < sh)
21367 return false;
21369 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
21370 if the mask is wrong for that. */
21371 if (nb < 32 && ne < 32 && sh < 32
21372 && !(code == LSHIFTRT && nb >= 32 - sh))
21373 return true;
21375 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
21376 if the mask is wrong for that. */
21377 if (code == LSHIFTRT)
21378 sh = 64 - sh;
21379 if (nb == 63 || ne == 0 || ne == sh)
21380 return !(code == LSHIFTRT && nb >= sh);
21382 return false;
21385 /* Return the instruction template for a shift with mask in mode MODE, with
21386 operands OPERANDS. If DOT is true, make it a record-form instruction. */
21388 const char *
21389 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
21391 int nb, ne;
21393 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
21394 gcc_unreachable ();
21396 if (mode == DImode && ne == 0)
21398 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
21399 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
21400 operands[3] = GEN_INT (63 - nb);
21401 if (dot)
21402 return "rld%I2cl. %0,%1,%2,%3";
21403 return "rld%I2cl %0,%1,%2,%3";
21406 if (mode == DImode && nb == 63)
21408 operands[3] = GEN_INT (63 - ne);
21409 if (dot)
21410 return "rld%I2cr. %0,%1,%2,%3";
21411 return "rld%I2cr %0,%1,%2,%3";
21414 if (mode == DImode
21415 && GET_CODE (operands[4]) != LSHIFTRT
21416 && CONST_INT_P (operands[2])
21417 && ne == INTVAL (operands[2]))
21419 operands[3] = GEN_INT (63 - nb);
21420 if (dot)
21421 return "rld%I2c. %0,%1,%2,%3";
21422 return "rld%I2c %0,%1,%2,%3";
21425 if (nb < 32 && ne < 32)
21427 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
21428 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
21429 operands[3] = GEN_INT (31 - nb);
21430 operands[4] = GEN_INT (31 - ne);
21431 /* This insn can also be a 64-bit rotate with mask that really makes
21432 it just a shift right (with mask); the %h below are to adjust for
21433 that situation (shift count is >= 32 in that case). */
21434 if (dot)
21435 return "rlw%I2nm. %0,%1,%h2,%3,%4";
21436 return "rlw%I2nm %0,%1,%h2,%3,%4";
21439 gcc_unreachable ();
21442 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
21443 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
21444 ASHIFT, or LSHIFTRT) in mode MODE. */
21446 bool
21447 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
21449 int nb, ne;
21451 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
21452 return false;
21454 int n = GET_MODE_PRECISION (mode);
21456 int sh = INTVAL (XEXP (shift, 1));
21457 if (sh < 0 || sh >= n)
21458 return false;
21460 rtx_code code = GET_CODE (shift);
21462 /* Convert any shift by 0 to a rotate, to simplify below code. */
21463 if (sh == 0)
21464 code = ROTATE;
21466 /* Convert rotate to simple shift if we can, to make analysis simpler. */
21467 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
21468 code = ASHIFT;
21469 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
21471 code = LSHIFTRT;
21472 sh = n - sh;
21475 /* DImode rotates need rldimi. */
21476 if (mode == DImode && code == ROTATE)
21477 return (ne == sh);
21479 /* SImode rotates need rlwimi. */
21480 if (mode == SImode && code == ROTATE)
21481 return (nb < 32 && ne < 32 && sh < 32);
21483 /* Wrap-around masks are only okay for rotates. */
21484 if (ne > nb)
21485 return false;
21487 /* Don't allow ASHIFT if the mask is wrong for that. */
21488 if (code == ASHIFT && ne < sh)
21489 return false;
21491 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
21492 if the mask is wrong for that. */
21493 if (nb < 32 && ne < 32 && sh < 32
21494 && !(code == LSHIFTRT && nb >= 32 - sh))
21495 return true;
21497 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
21498 if the mask is wrong for that. */
21499 if (code == LSHIFTRT)
21500 sh = 64 - sh;
21501 if (ne == sh)
21502 return !(code == LSHIFTRT && nb >= sh);
21504 return false;
21507 /* Return the instruction template for an insert with mask in mode MODE, with
21508 operands OPERANDS. If DOT is true, make it a record-form instruction. */
21510 const char *
21511 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
21513 int nb, ne;
21515 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
21516 gcc_unreachable ();
21518 /* Prefer rldimi because rlwimi is cracked. */
21519 if (TARGET_POWERPC64
21520 && (!dot || mode == DImode)
21521 && GET_CODE (operands[4]) != LSHIFTRT
21522 && ne == INTVAL (operands[2]))
21524 operands[3] = GEN_INT (63 - nb);
21525 if (dot)
21526 return "rldimi. %0,%1,%2,%3";
21527 return "rldimi %0,%1,%2,%3";
21530 if (nb < 32 && ne < 32)
21532 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
21533 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
21534 operands[3] = GEN_INT (31 - nb);
21535 operands[4] = GEN_INT (31 - ne);
21536 if (dot)
21537 return "rlwimi. %0,%1,%2,%3,%4";
21538 return "rlwimi %0,%1,%2,%3,%4";
21541 gcc_unreachable ();
21544 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
21545 using two machine instructions. */
21547 bool
21548 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
21550 /* There are two kinds of AND we can handle with two insns:
21551 1) those we can do with two rl* insn;
21552 2) ori[s];xori[s].
21554 We do not handle that last case yet. */
21556 /* If there is just one stretch of ones, we can do it. */
21557 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
21558 return true;
21560 /* Otherwise, fill in the lowest "hole"; if we can do the result with
21561 one insn, we can do the whole thing with two. */
21562 unsigned HOST_WIDE_INT val = INTVAL (c);
21563 unsigned HOST_WIDE_INT bit1 = val & -val;
21564 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
21565 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
21566 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
21567 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
21570 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
21571 If EXPAND is true, split rotate-and-mask instructions we generate to
21572 their constituent parts as well (this is used during expand); if DOT
21573 is 1, make the last insn a record-form instruction clobbering the
21574 destination GPR and setting the CC reg (from operands[3]); if 2, set
21575 that GPR as well as the CC reg. */
21577 void
21578 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
21580 gcc_assert (!(expand && dot));
21582 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
21584 /* If it is one stretch of ones, it is DImode; shift left, mask, then
21585 shift right. This generates better code than doing the masks without
21586 shifts, or shifting first right and then left. */
21587 int nb, ne;
21588 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
21590 gcc_assert (mode == DImode);
21592 int shift = 63 - nb;
21593 if (expand)
21595 rtx tmp1 = gen_reg_rtx (DImode);
21596 rtx tmp2 = gen_reg_rtx (DImode);
21597 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
21598 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
21599 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
21601 else
21603 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
21604 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
21605 emit_move_insn (operands[0], tmp);
21606 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
21607 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
21609 return;
21612 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
21613 that does the rest. */
21614 unsigned HOST_WIDE_INT bit1 = val & -val;
21615 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
21616 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
21617 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
21619 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
21620 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
21622 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
21624 /* Two "no-rotate"-and-mask instructions, for SImode. */
21625 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
21627 gcc_assert (mode == SImode);
21629 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
21630 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
21631 emit_move_insn (reg, tmp);
21632 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
21633 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
21634 return;
21637 gcc_assert (mode == DImode);
21639 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
21640 insns; we have to do the first in SImode, because it wraps. */
21641 if (mask2 <= 0xffffffff
21642 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
21644 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
21645 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
21646 GEN_INT (mask1));
21647 rtx reg_low = gen_lowpart (SImode, reg);
21648 emit_move_insn (reg_low, tmp);
21649 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
21650 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
21651 return;
21654 /* Two rld* insns: rotate, clear the hole in the middle (which now is
21655 at the top end), rotate back and clear the other hole. */
21656 int right = exact_log2 (bit3);
21657 int left = 64 - right;
21659 /* Rotate the mask too. */
21660 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
21662 if (expand)
21664 rtx tmp1 = gen_reg_rtx (DImode);
21665 rtx tmp2 = gen_reg_rtx (DImode);
21666 rtx tmp3 = gen_reg_rtx (DImode);
21667 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
21668 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
21669 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
21670 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
21672 else
21674 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
21675 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
21676 emit_move_insn (operands[0], tmp);
21677 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
21678 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
21679 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
21683 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
21684 for lfq and stfq insns iff the registers are hard registers. */
21687 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
21689 /* We might have been passed a SUBREG. */
21690 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
21691 return 0;
21693 /* We might have been passed non floating point registers. */
21694 if (!FP_REGNO_P (REGNO (reg1))
21695 || !FP_REGNO_P (REGNO (reg2)))
21696 return 0;
21698 return (REGNO (reg1) == REGNO (reg2) - 1);
21701 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
21702 addr1 and addr2 must be in consecutive memory locations
21703 (addr2 == addr1 + 8). */
21706 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
21708 rtx addr1, addr2;
21709 unsigned int reg1, reg2;
21710 int offset1, offset2;
21712 /* The mems cannot be volatile. */
21713 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
21714 return 0;
21716 addr1 = XEXP (mem1, 0);
21717 addr2 = XEXP (mem2, 0);
21719 /* Extract an offset (if used) from the first addr. */
21720 if (GET_CODE (addr1) == PLUS)
21722 /* If not a REG, return zero. */
21723 if (GET_CODE (XEXP (addr1, 0)) != REG)
21724 return 0;
21725 else
21727 reg1 = REGNO (XEXP (addr1, 0));
21728 /* The offset must be constant! */
21729 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
21730 return 0;
21731 offset1 = INTVAL (XEXP (addr1, 1));
21734 else if (GET_CODE (addr1) != REG)
21735 return 0;
21736 else
21738 reg1 = REGNO (addr1);
21739 /* This was a simple (mem (reg)) expression. Offset is 0. */
21740 offset1 = 0;
21743 /* And now for the second addr. */
21744 if (GET_CODE (addr2) == PLUS)
21746 /* If not a REG, return zero. */
21747 if (GET_CODE (XEXP (addr2, 0)) != REG)
21748 return 0;
21749 else
21751 reg2 = REGNO (XEXP (addr2, 0));
21752 /* The offset must be constant. */
21753 if (GET_CODE (XEXP (addr2, 1)) != CONST_INT)
21754 return 0;
21755 offset2 = INTVAL (XEXP (addr2, 1));
21758 else if (GET_CODE (addr2) != REG)
21759 return 0;
21760 else
21762 reg2 = REGNO (addr2);
21763 /* This was a simple (mem (reg)) expression. Offset is 0. */
21764 offset2 = 0;
21767 /* Both of these must have the same base register. */
21768 if (reg1 != reg2)
21769 return 0;
21771 /* The offset for the second addr must be 8 more than the first addr. */
21772 if (offset2 != offset1 + 8)
21773 return 0;
21775 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
21776 instructions. */
21777 return 1;
21782 rs6000_secondary_memory_needed_rtx (machine_mode mode)
21784 static bool eliminated = false;
21785 rtx ret;
21787 if (mode != SDmode || TARGET_NO_SDMODE_STACK)
21788 ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
21789 else
21791 rtx mem = cfun->machine->sdmode_stack_slot;
21792 gcc_assert (mem != NULL_RTX);
21794 if (!eliminated)
21796 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
21797 cfun->machine->sdmode_stack_slot = mem;
21798 eliminated = true;
21800 ret = mem;
21803 if (TARGET_DEBUG_ADDR)
21805 fprintf (stderr, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
21806 GET_MODE_NAME (mode));
21807 if (!ret)
21808 fprintf (stderr, "\tNULL_RTX\n");
21809 else
21810 debug_rtx (ret);
21813 return ret;
21816 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE. For SDmode values we
21817 need to use DDmode, in all other cases we can use the same mode. */
21818 static machine_mode
21819 rs6000_secondary_memory_needed_mode (machine_mode mode)
21821 if (lra_in_progress && mode == SDmode)
21822 return DDmode;
21823 return mode;
21826 static tree
21827 rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
21829 /* Don't walk into types. */
21830 if (*tp == NULL_TREE || *tp == error_mark_node || TYPE_P (*tp))
21832 *walk_subtrees = 0;
21833 return NULL_TREE;
21836 switch (TREE_CODE (*tp))
21838 case VAR_DECL:
21839 case PARM_DECL:
21840 case FIELD_DECL:
21841 case RESULT_DECL:
21842 case SSA_NAME:
21843 case REAL_CST:
21844 case MEM_REF:
21845 case VIEW_CONVERT_EXPR:
21846 if (TYPE_MODE (TREE_TYPE (*tp)) == SDmode)
21847 return *tp;
21848 break;
21849 default:
21850 break;
21853 return NULL_TREE;
21856 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
21857 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
21858 only work on the traditional altivec registers, note if an altivec register
21859 was chosen. */
21861 static enum rs6000_reg_type
21862 register_to_reg_type (rtx reg, bool *is_altivec)
21864 HOST_WIDE_INT regno;
21865 enum reg_class rclass;
21867 if (GET_CODE (reg) == SUBREG)
21868 reg = SUBREG_REG (reg);
21870 if (!REG_P (reg))
21871 return NO_REG_TYPE;
21873 regno = REGNO (reg);
21874 if (regno >= FIRST_PSEUDO_REGISTER)
21876 if (!lra_in_progress && !reload_in_progress && !reload_completed)
21877 return PSEUDO_REG_TYPE;
21879 regno = true_regnum (reg);
21880 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
21881 return PSEUDO_REG_TYPE;
21884 gcc_assert (regno >= 0);
21886 if (is_altivec && ALTIVEC_REGNO_P (regno))
21887 *is_altivec = true;
21889 rclass = rs6000_regno_regclass[regno];
21890 return reg_class_to_reg_type[(int)rclass];
21893 /* Helper function to return the cost of adding a TOC entry address. */
21895 static inline int
21896 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
21898 int ret;
21900 if (TARGET_CMODEL != CMODEL_SMALL)
21901 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
21903 else
21904 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
21906 return ret;
21909 /* Helper function for rs6000_secondary_reload to determine whether the memory
21910 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
21911 needs reloading. Return negative if the memory is not handled by the memory
21912 helper functions and to try a different reload method, 0 if no additional
21913 instructions are need, and positive to give the extra cost for the
21914 memory. */
21916 static int
21917 rs6000_secondary_reload_memory (rtx addr,
21918 enum reg_class rclass,
21919 machine_mode mode)
21921 int extra_cost = 0;
21922 rtx reg, and_arg, plus_arg0, plus_arg1;
21923 addr_mask_type addr_mask;
21924 const char *type = NULL;
21925 const char *fail_msg = NULL;
21927 if (GPR_REG_CLASS_P (rclass))
21928 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
21930 else if (rclass == FLOAT_REGS)
21931 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
21933 else if (rclass == ALTIVEC_REGS)
21934 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
21936 /* For the combined VSX_REGS, turn off Altivec AND -16. */
21937 else if (rclass == VSX_REGS)
21938 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
21939 & ~RELOAD_REG_AND_M16);
21941 /* If the register allocator hasn't made up its mind yet on the register
21942 class to use, settle on defaults to use. */
21943 else if (rclass == NO_REGS)
21945 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
21946 & ~RELOAD_REG_AND_M16);
21948 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
21949 addr_mask &= ~(RELOAD_REG_INDEXED
21950 | RELOAD_REG_PRE_INCDEC
21951 | RELOAD_REG_PRE_MODIFY);
21954 else
21955 addr_mask = 0;
21957 /* If the register isn't valid in this register class, just return now. */
21958 if ((addr_mask & RELOAD_REG_VALID) == 0)
21960 if (TARGET_DEBUG_ADDR)
21962 fprintf (stderr,
21963 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
21964 "not valid in class\n",
21965 GET_MODE_NAME (mode), reg_class_names[rclass]);
21966 debug_rtx (addr);
21969 return -1;
21972 switch (GET_CODE (addr))
21974 /* Does the register class supports auto update forms for this mode? We
21975 don't need a scratch register, since the powerpc only supports
21976 PRE_INC, PRE_DEC, and PRE_MODIFY. */
21977 case PRE_INC:
21978 case PRE_DEC:
21979 reg = XEXP (addr, 0);
21980 if (!base_reg_operand (addr, GET_MODE (reg)))
21982 fail_msg = "no base register #1";
21983 extra_cost = -1;
21986 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
21988 extra_cost = 1;
21989 type = "update";
21991 break;
21993 case PRE_MODIFY:
21994 reg = XEXP (addr, 0);
21995 plus_arg1 = XEXP (addr, 1);
21996 if (!base_reg_operand (reg, GET_MODE (reg))
21997 || GET_CODE (plus_arg1) != PLUS
21998 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
22000 fail_msg = "bad PRE_MODIFY";
22001 extra_cost = -1;
22004 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
22006 extra_cost = 1;
22007 type = "update";
22009 break;
22011 /* Do we need to simulate AND -16 to clear the bottom address bits used
22012 in VMX load/stores? Only allow the AND for vector sizes. */
22013 case AND:
22014 and_arg = XEXP (addr, 0);
22015 if (GET_MODE_SIZE (mode) != 16
22016 || GET_CODE (XEXP (addr, 1)) != CONST_INT
22017 || INTVAL (XEXP (addr, 1)) != -16)
22019 fail_msg = "bad Altivec AND #1";
22020 extra_cost = -1;
22023 if (rclass != ALTIVEC_REGS)
22025 if (legitimate_indirect_address_p (and_arg, false))
22026 extra_cost = 1;
22028 else if (legitimate_indexed_address_p (and_arg, false))
22029 extra_cost = 2;
22031 else
22033 fail_msg = "bad Altivec AND #2";
22034 extra_cost = -1;
22037 type = "and";
22039 break;
22041 /* If this is an indirect address, make sure it is a base register. */
22042 case REG:
22043 case SUBREG:
22044 if (!legitimate_indirect_address_p (addr, false))
22046 extra_cost = 1;
22047 type = "move";
22049 break;
22051 /* If this is an indexed address, make sure the register class can handle
22052 indexed addresses for this mode. */
22053 case PLUS:
22054 plus_arg0 = XEXP (addr, 0);
22055 plus_arg1 = XEXP (addr, 1);
22057 /* (plus (plus (reg) (constant)) (constant)) is generated during
22058 push_reload processing, so handle it now. */
22059 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
22061 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
22063 extra_cost = 1;
22064 type = "offset";
22068 /* (plus (plus (reg) (constant)) (reg)) is also generated during
22069 push_reload processing, so handle it now. */
22070 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
22072 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
22074 extra_cost = 1;
22075 type = "indexed #2";
22079 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
22081 fail_msg = "no base register #2";
22082 extra_cost = -1;
22085 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
22087 if ((addr_mask & RELOAD_REG_INDEXED) == 0
22088 || !legitimate_indexed_address_p (addr, false))
22090 extra_cost = 1;
22091 type = "indexed";
22095 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
22096 && CONST_INT_P (plus_arg1))
22098 if (!quad_address_offset_p (INTVAL (plus_arg1)))
22100 extra_cost = 1;
22101 type = "vector d-form offset";
22105 /* Make sure the register class can handle offset addresses. */
22106 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
22108 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
22110 extra_cost = 1;
22111 type = "offset #2";
22115 else
22117 fail_msg = "bad PLUS";
22118 extra_cost = -1;
22121 break;
22123 case LO_SUM:
22124 /* Quad offsets are restricted and can't handle normal addresses. */
22125 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
22127 extra_cost = -1;
22128 type = "vector d-form lo_sum";
22131 else if (!legitimate_lo_sum_address_p (mode, addr, false))
22133 fail_msg = "bad LO_SUM";
22134 extra_cost = -1;
22137 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
22139 extra_cost = 1;
22140 type = "lo_sum";
22142 break;
22144 /* Static addresses need to create a TOC entry. */
22145 case CONST:
22146 case SYMBOL_REF:
22147 case LABEL_REF:
22148 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
22150 extra_cost = -1;
22151 type = "vector d-form lo_sum #2";
22154 else
22156 type = "address";
22157 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
22159 break;
22161 /* TOC references look like offsetable memory. */
22162 case UNSPEC:
22163 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
22165 fail_msg = "bad UNSPEC";
22166 extra_cost = -1;
22169 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
22171 extra_cost = -1;
22172 type = "vector d-form lo_sum #3";
22175 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
22177 extra_cost = 1;
22178 type = "toc reference";
22180 break;
22182 default:
22184 fail_msg = "bad address";
22185 extra_cost = -1;
22189 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
22191 if (extra_cost < 0)
22192 fprintf (stderr,
22193 "rs6000_secondary_reload_memory error: mode = %s, "
22194 "class = %s, addr_mask = '%s', %s\n",
22195 GET_MODE_NAME (mode),
22196 reg_class_names[rclass],
22197 rs6000_debug_addr_mask (addr_mask, false),
22198 (fail_msg != NULL) ? fail_msg : "<bad address>");
22200 else
22201 fprintf (stderr,
22202 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
22203 "addr_mask = '%s', extra cost = %d, %s\n",
22204 GET_MODE_NAME (mode),
22205 reg_class_names[rclass],
22206 rs6000_debug_addr_mask (addr_mask, false),
22207 extra_cost,
22208 (type) ? type : "<none>");
22210 debug_rtx (addr);
22213 return extra_cost;
22216 /* Helper function for rs6000_secondary_reload to return true if a move to a
22217 different register classe is really a simple move. */
22219 static bool
22220 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
22221 enum rs6000_reg_type from_type,
22222 machine_mode mode)
22224 int size = GET_MODE_SIZE (mode);
22226 /* Add support for various direct moves available. In this function, we only
22227 look at cases where we don't need any extra registers, and one or more
22228 simple move insns are issued. Originally small integers are not allowed
22229 in FPR/VSX registers. Single precision binary floating is not a simple
22230 move because we need to convert to the single precision memory layout.
22231 The 4-byte SDmode can be moved. TDmode values are disallowed since they
22232 need special direct move handling, which we do not support yet. */
22233 if (TARGET_DIRECT_MOVE
22234 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
22235 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
22237 if (TARGET_POWERPC64)
22239 /* ISA 2.07: MTVSRD or MVFVSRD. */
22240 if (size == 8)
22241 return true;
22243 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
22244 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
22245 return true;
22248 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
22249 if (TARGET_VSX_SMALL_INTEGER)
22251 if (mode == SImode)
22252 return true;
22254 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
22255 return true;
22258 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
22259 if (mode == SDmode)
22260 return true;
22263 /* Power6+: MFTGPR or MFFGPR. */
22264 else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
22265 && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
22266 || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
22267 return true;
22269 /* Move to/from SPR. */
22270 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
22271 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
22272 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
22273 return true;
22275 return false;
22278 /* Direct move helper function for rs6000_secondary_reload, handle all of the
22279 special direct moves that involve allocating an extra register, return the
22280 insn code of the helper function if there is such a function or
22281 CODE_FOR_nothing if not. */
22283 static bool
22284 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
22285 enum rs6000_reg_type from_type,
22286 machine_mode mode,
22287 secondary_reload_info *sri,
22288 bool altivec_p)
22290 bool ret = false;
22291 enum insn_code icode = CODE_FOR_nothing;
22292 int cost = 0;
22293 int size = GET_MODE_SIZE (mode);
22295 if (TARGET_POWERPC64 && size == 16)
22297 /* Handle moving 128-bit values from GPRs to VSX point registers on
22298 ISA 2.07 (power8, power9) when running in 64-bit mode using
22299 XXPERMDI to glue the two 64-bit values back together. */
22300 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
22302 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
22303 icode = reg_addr[mode].reload_vsx_gpr;
22306 /* Handle moving 128-bit values from VSX point registers to GPRs on
22307 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
22308 bottom 64-bit value. */
22309 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
22311 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
22312 icode = reg_addr[mode].reload_gpr_vsx;
22316 else if (TARGET_POWERPC64 && mode == SFmode)
22318 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
22320 cost = 3; /* xscvdpspn, mfvsrd, and. */
22321 icode = reg_addr[mode].reload_gpr_vsx;
22324 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
22326 cost = 2; /* mtvsrz, xscvspdpn. */
22327 icode = reg_addr[mode].reload_vsx_gpr;
22331 else if (!TARGET_POWERPC64 && size == 8)
22333 /* Handle moving 64-bit values from GPRs to floating point registers on
22334 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
22335 32-bit values back together. Altivec register classes must be handled
22336 specially since a different instruction is used, and the secondary
22337 reload support requires a single instruction class in the scratch
22338 register constraint. However, right now TFmode is not allowed in
22339 Altivec registers, so the pattern will never match. */
22340 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
22342 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
22343 icode = reg_addr[mode].reload_fpr_gpr;
22347 if (icode != CODE_FOR_nothing)
22349 ret = true;
22350 if (sri)
22352 sri->icode = icode;
22353 sri->extra_cost = cost;
22357 return ret;
22360 /* Return whether a move between two register classes can be done either
22361 directly (simple move) or via a pattern that uses a single extra temporary
22362 (using ISA 2.07's direct move in this case. */
22364 static bool
22365 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
22366 enum rs6000_reg_type from_type,
22367 machine_mode mode,
22368 secondary_reload_info *sri,
22369 bool altivec_p)
22371 /* Fall back to load/store reloads if either type is not a register. */
22372 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
22373 return false;
22375 /* If we haven't allocated registers yet, assume the move can be done for the
22376 standard register types. */
22377 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
22378 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
22379 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
22380 return true;
22382 /* Moves to the same set of registers is a simple move for non-specialized
22383 registers. */
22384 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
22385 return true;
22387 /* Check whether a simple move can be done directly. */
22388 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
22390 if (sri)
22392 sri->icode = CODE_FOR_nothing;
22393 sri->extra_cost = 0;
22395 return true;
22398 /* Now check if we can do it in a few steps. */
22399 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
22400 altivec_p);
22403 /* Inform reload about cases where moving X with a mode MODE to a register in
22404 RCLASS requires an extra scratch or immediate register. Return the class
22405 needed for the immediate register.
22407 For VSX and Altivec, we may need a register to convert sp+offset into
22408 reg+sp.
22410 For misaligned 64-bit gpr loads and stores we need a register to
22411 convert an offset address to indirect. */
22413 static reg_class_t
22414 rs6000_secondary_reload (bool in_p,
22415 rtx x,
22416 reg_class_t rclass_i,
22417 machine_mode mode,
22418 secondary_reload_info *sri)
22420 enum reg_class rclass = (enum reg_class) rclass_i;
22421 reg_class_t ret = ALL_REGS;
22422 enum insn_code icode;
22423 bool default_p = false;
22424 bool done_p = false;
22426 /* Allow subreg of memory before/during reload. */
22427 bool memory_p = (MEM_P (x)
22428 || (!reload_completed && GET_CODE (x) == SUBREG
22429 && MEM_P (SUBREG_REG (x))));
22431 sri->icode = CODE_FOR_nothing;
22432 sri->t_icode = CODE_FOR_nothing;
22433 sri->extra_cost = 0;
22434 icode = ((in_p)
22435 ? reg_addr[mode].reload_load
22436 : reg_addr[mode].reload_store);
22438 if (REG_P (x) || register_operand (x, mode))
22440 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
22441 bool altivec_p = (rclass == ALTIVEC_REGS);
22442 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
22444 if (!in_p)
22445 std::swap (to_type, from_type);
22447 /* Can we do a direct move of some sort? */
22448 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
22449 altivec_p))
22451 icode = (enum insn_code)sri->icode;
22452 default_p = false;
22453 done_p = true;
22454 ret = NO_REGS;
22458 /* Make sure 0.0 is not reloaded or forced into memory. */
22459 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
22461 ret = NO_REGS;
22462 default_p = false;
22463 done_p = true;
22466 /* If this is a scalar floating point value and we want to load it into the
22467 traditional Altivec registers, do it via a move via a traditional floating
22468 point register, unless we have D-form addressing. Also make sure that
22469 non-zero constants use a FPR. */
22470 if (!done_p && reg_addr[mode].scalar_in_vmx_p
22471 && !mode_supports_vmx_dform (mode)
22472 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
22473 && (memory_p || (GET_CODE (x) == CONST_DOUBLE)))
22475 ret = FLOAT_REGS;
22476 default_p = false;
22477 done_p = true;
22480 /* Handle reload of load/stores if we have reload helper functions. */
22481 if (!done_p && icode != CODE_FOR_nothing && memory_p)
22483 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
22484 mode);
22486 if (extra_cost >= 0)
22488 done_p = true;
22489 ret = NO_REGS;
22490 if (extra_cost > 0)
22492 sri->extra_cost = extra_cost;
22493 sri->icode = icode;
22498 /* Handle unaligned loads and stores of integer registers. */
22499 if (!done_p && TARGET_POWERPC64
22500 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
22501 && memory_p
22502 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
22504 rtx addr = XEXP (x, 0);
22505 rtx off = address_offset (addr);
22507 if (off != NULL_RTX)
22509 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
22510 unsigned HOST_WIDE_INT offset = INTVAL (off);
22512 /* We need a secondary reload when our legitimate_address_p
22513 says the address is good (as otherwise the entire address
22514 will be reloaded), and the offset is not a multiple of
22515 four or we have an address wrap. Address wrap will only
22516 occur for LO_SUMs since legitimate_offset_address_p
22517 rejects addresses for 16-byte mems that will wrap. */
22518 if (GET_CODE (addr) == LO_SUM
22519 ? (1 /* legitimate_address_p allows any offset for lo_sum */
22520 && ((offset & 3) != 0
22521 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
22522 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
22523 && (offset & 3) != 0))
22525 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
22526 if (in_p)
22527 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
22528 : CODE_FOR_reload_di_load);
22529 else
22530 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
22531 : CODE_FOR_reload_di_store);
22532 sri->extra_cost = 2;
22533 ret = NO_REGS;
22534 done_p = true;
22536 else
22537 default_p = true;
22539 else
22540 default_p = true;
22543 if (!done_p && !TARGET_POWERPC64
22544 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
22545 && memory_p
22546 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
22548 rtx addr = XEXP (x, 0);
22549 rtx off = address_offset (addr);
22551 if (off != NULL_RTX)
22553 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
22554 unsigned HOST_WIDE_INT offset = INTVAL (off);
22556 /* We need a secondary reload when our legitimate_address_p
22557 says the address is good (as otherwise the entire address
22558 will be reloaded), and we have a wrap.
22560 legitimate_lo_sum_address_p allows LO_SUM addresses to
22561 have any offset so test for wrap in the low 16 bits.
22563 legitimate_offset_address_p checks for the range
22564 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
22565 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
22566 [0x7ff4,0x7fff] respectively, so test for the
22567 intersection of these ranges, [0x7ffc,0x7fff] and
22568 [0x7ff4,0x7ff7] respectively.
22570 Note that the address we see here may have been
22571 manipulated by legitimize_reload_address. */
22572 if (GET_CODE (addr) == LO_SUM
22573 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
22574 : offset - (0x8000 - extra) < UNITS_PER_WORD)
22576 if (in_p)
22577 sri->icode = CODE_FOR_reload_si_load;
22578 else
22579 sri->icode = CODE_FOR_reload_si_store;
22580 sri->extra_cost = 2;
22581 ret = NO_REGS;
22582 done_p = true;
22584 else
22585 default_p = true;
22587 else
22588 default_p = true;
22591 if (!done_p)
22592 default_p = true;
22594 if (default_p)
22595 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
22597 gcc_assert (ret != ALL_REGS);
22599 if (TARGET_DEBUG_ADDR)
22601 fprintf (stderr,
22602 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
22603 "mode = %s",
22604 reg_class_names[ret],
22605 in_p ? "true" : "false",
22606 reg_class_names[rclass],
22607 GET_MODE_NAME (mode));
22609 if (reload_completed)
22610 fputs (", after reload", stderr);
22612 if (!done_p)
22613 fputs (", done_p not set", stderr);
22615 if (default_p)
22616 fputs (", default secondary reload", stderr);
22618 if (sri->icode != CODE_FOR_nothing)
22619 fprintf (stderr, ", reload func = %s, extra cost = %d",
22620 insn_data[sri->icode].name, sri->extra_cost);
22622 else if (sri->extra_cost > 0)
22623 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
22625 fputs ("\n", stderr);
22626 debug_rtx (x);
22629 return ret;
22632 /* Better tracing for rs6000_secondary_reload_inner. */
22634 static void
22635 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
22636 bool store_p)
22638 rtx set, clobber;
22640 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
22642 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
22643 store_p ? "store" : "load");
22645 if (store_p)
22646 set = gen_rtx_SET (mem, reg);
22647 else
22648 set = gen_rtx_SET (reg, mem);
22650 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
22651 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
22654 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
22655 ATTRIBUTE_NORETURN;
22657 static void
22658 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
22659 bool store_p)
22661 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
22662 gcc_unreachable ();
22665 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
22666 reload helper functions. These were identified in
22667 rs6000_secondary_reload_memory, and if reload decided to use the secondary
22668 reload, it calls the insns:
22669 reload_<RELOAD:mode>_<P:mptrsize>_store
22670 reload_<RELOAD:mode>_<P:mptrsize>_load
22672 which in turn calls this function, to do whatever is necessary to create
22673 valid addresses. */
22675 void
22676 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
22678 int regno = true_regnum (reg);
22679 machine_mode mode = GET_MODE (reg);
22680 addr_mask_type addr_mask;
22681 rtx addr;
22682 rtx new_addr;
22683 rtx op_reg, op0, op1;
22684 rtx and_op;
22685 rtx cc_clobber;
22686 rtvec rv;
22688 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER || !MEM_P (mem)
22689 || !base_reg_operand (scratch, GET_MODE (scratch)))
22690 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22692 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
22693 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
22695 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
22696 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
22698 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
22699 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
22701 else
22702 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22704 /* Make sure the mode is valid in this register class. */
22705 if ((addr_mask & RELOAD_REG_VALID) == 0)
22706 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22708 if (TARGET_DEBUG_ADDR)
22709 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
22711 new_addr = addr = XEXP (mem, 0);
22712 switch (GET_CODE (addr))
22714 /* Does the register class support auto update forms for this mode? If
22715 not, do the update now. We don't need a scratch register, since the
22716 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
22717 case PRE_INC:
22718 case PRE_DEC:
22719 op_reg = XEXP (addr, 0);
22720 if (!base_reg_operand (op_reg, Pmode))
22721 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22723 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
22725 emit_insn (gen_add2_insn (op_reg, GEN_INT (GET_MODE_SIZE (mode))));
22726 new_addr = op_reg;
22728 break;
22730 case PRE_MODIFY:
22731 op0 = XEXP (addr, 0);
22732 op1 = XEXP (addr, 1);
22733 if (!base_reg_operand (op0, Pmode)
22734 || GET_CODE (op1) != PLUS
22735 || !rtx_equal_p (op0, XEXP (op1, 0)))
22736 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22738 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
22740 emit_insn (gen_rtx_SET (op0, op1));
22741 new_addr = reg;
22743 break;
22745 /* Do we need to simulate AND -16 to clear the bottom address bits used
22746 in VMX load/stores? */
22747 case AND:
22748 op0 = XEXP (addr, 0);
22749 op1 = XEXP (addr, 1);
22750 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
22752 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
22753 op_reg = op0;
22755 else if (GET_CODE (op1) == PLUS)
22757 emit_insn (gen_rtx_SET (scratch, op1));
22758 op_reg = scratch;
22761 else
22762 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22764 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
22765 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
22766 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
22767 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
22768 new_addr = scratch;
22770 break;
22772 /* If this is an indirect address, make sure it is a base register. */
22773 case REG:
22774 case SUBREG:
22775 if (!base_reg_operand (addr, GET_MODE (addr)))
22777 emit_insn (gen_rtx_SET (scratch, addr));
22778 new_addr = scratch;
22780 break;
22782 /* If this is an indexed address, make sure the register class can handle
22783 indexed addresses for this mode. */
22784 case PLUS:
22785 op0 = XEXP (addr, 0);
22786 op1 = XEXP (addr, 1);
22787 if (!base_reg_operand (op0, Pmode))
22788 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22790 else if (int_reg_operand (op1, Pmode))
22792 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
22794 emit_insn (gen_rtx_SET (scratch, addr));
22795 new_addr = scratch;
22799 else if (mode_supports_vsx_dform_quad (mode) && CONST_INT_P (op1))
22801 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
22802 || !quad_address_p (addr, mode, false))
22804 emit_insn (gen_rtx_SET (scratch, addr));
22805 new_addr = scratch;
22809 /* Make sure the register class can handle offset addresses. */
22810 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
22812 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
22814 emit_insn (gen_rtx_SET (scratch, addr));
22815 new_addr = scratch;
22819 else
22820 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22822 break;
22824 case LO_SUM:
22825 op0 = XEXP (addr, 0);
22826 op1 = XEXP (addr, 1);
22827 if (!base_reg_operand (op0, Pmode))
22828 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22830 else if (int_reg_operand (op1, Pmode))
22832 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
22834 emit_insn (gen_rtx_SET (scratch, addr));
22835 new_addr = scratch;
22839 /* Quad offsets are restricted and can't handle normal addresses. */
22840 else if (mode_supports_vsx_dform_quad (mode))
22842 emit_insn (gen_rtx_SET (scratch, addr));
22843 new_addr = scratch;
22846 /* Make sure the register class can handle offset addresses. */
22847 else if (legitimate_lo_sum_address_p (mode, addr, false))
22849 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
22851 emit_insn (gen_rtx_SET (scratch, addr));
22852 new_addr = scratch;
22856 else
22857 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22859 break;
22861 case SYMBOL_REF:
22862 case CONST:
22863 case LABEL_REF:
22864 rs6000_emit_move (scratch, addr, Pmode);
22865 new_addr = scratch;
22866 break;
22868 default:
22869 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22872 /* Adjust the address if it changed. */
22873 if (addr != new_addr)
22875 mem = replace_equiv_address_nv (mem, new_addr);
22876 if (TARGET_DEBUG_ADDR)
22877 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
22880 /* Now create the move. */
22881 if (store_p)
22882 emit_insn (gen_rtx_SET (mem, reg));
22883 else
22884 emit_insn (gen_rtx_SET (reg, mem));
22886 return;
22889 /* Convert reloads involving 64-bit gprs and misaligned offset
22890 addressing, or multiple 32-bit gprs and offsets that are too large,
22891 to use indirect addressing. */
22893 void
22894 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
22896 int regno = true_regnum (reg);
22897 enum reg_class rclass;
22898 rtx addr;
22899 rtx scratch_or_premodify = scratch;
22901 if (TARGET_DEBUG_ADDR)
22903 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
22904 store_p ? "store" : "load");
22905 fprintf (stderr, "reg:\n");
22906 debug_rtx (reg);
22907 fprintf (stderr, "mem:\n");
22908 debug_rtx (mem);
22909 fprintf (stderr, "scratch:\n");
22910 debug_rtx (scratch);
22913 gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
22914 gcc_assert (GET_CODE (mem) == MEM);
22915 rclass = REGNO_REG_CLASS (regno);
22916 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
22917 addr = XEXP (mem, 0);
22919 if (GET_CODE (addr) == PRE_MODIFY)
22921 gcc_assert (REG_P (XEXP (addr, 0))
22922 && GET_CODE (XEXP (addr, 1)) == PLUS
22923 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
22924 scratch_or_premodify = XEXP (addr, 0);
22925 if (!HARD_REGISTER_P (scratch_or_premodify))
22926 /* If we have a pseudo here then reload will have arranged
22927 to have it replaced, but only in the original insn.
22928 Use the replacement here too. */
22929 scratch_or_premodify = find_replacement (&XEXP (addr, 0));
22931 /* RTL emitted by rs6000_secondary_reload_gpr uses RTL
22932 expressions from the original insn, without unsharing them.
22933 Any RTL that points into the original insn will of course
22934 have register replacements applied. That is why we don't
22935 need to look for replacements under the PLUS. */
22936 addr = XEXP (addr, 1);
22938 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
22940 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
22942 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
22944 /* Now create the move. */
22945 if (store_p)
22946 emit_insn (gen_rtx_SET (mem, reg));
22947 else
22948 emit_insn (gen_rtx_SET (reg, mem));
22950 return;
22953 /* Allocate a 64-bit stack slot to be used for copying SDmode values through if
22954 this function has any SDmode references. If we are on a power7 or later, we
22955 don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
22956 can load/store the value. */
22958 static void
22959 rs6000_alloc_sdmode_stack_slot (void)
22961 tree t;
22962 basic_block bb;
22963 gimple_stmt_iterator gsi;
22965 gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
22966 /* We use a different approach for dealing with the secondary
22967 memory in LRA. */
22968 if (ira_use_lra_p)
22969 return;
22971 if (TARGET_NO_SDMODE_STACK)
22972 return;
22974 FOR_EACH_BB_FN (bb, cfun)
22975 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
22977 tree ret = walk_gimple_op (gsi_stmt (gsi), rs6000_check_sdmode, NULL);
22978 if (ret)
22980 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
22981 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
22982 SDmode, 0);
22983 return;
22987 /* Check for any SDmode parameters of the function. */
22988 for (t = DECL_ARGUMENTS (cfun->decl); t; t = DECL_CHAIN (t))
22990 if (TREE_TYPE (t) == error_mark_node)
22991 continue;
22993 if (TYPE_MODE (TREE_TYPE (t)) == SDmode
22994 || TYPE_MODE (DECL_ARG_TYPE (t)) == SDmode)
22996 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
22997 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
22998 SDmode, 0);
22999 return;
23004 static void
23005 rs6000_instantiate_decls (void)
23007 if (cfun->machine->sdmode_stack_slot != NULL_RTX)
23008 instantiate_decl_rtl (cfun->machine->sdmode_stack_slot);
23011 /* Given an rtx X being reloaded into a reg required to be
23012 in class CLASS, return the class of reg to actually use.
23013 In general this is just CLASS; but on some machines
23014 in some cases it is preferable to use a more restrictive class.
23016 On the RS/6000, we have to return NO_REGS when we want to reload a
23017 floating-point CONST_DOUBLE to force it to be copied to memory.
23019 We also don't want to reload integer values into floating-point
23020 registers if we can at all help it. In fact, this can
23021 cause reload to die, if it tries to generate a reload of CTR
23022 into a FP register and discovers it doesn't have the memory location
23023 required.
23025 ??? Would it be a good idea to have reload do the converse, that is
23026 try to reload floating modes into FP registers if possible?
23029 static enum reg_class
23030 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
23032 machine_mode mode = GET_MODE (x);
23033 bool is_constant = CONSTANT_P (x);
23035 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
23036 reload class for it. */
23037 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
23038 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
23039 return NO_REGS;
23041 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
23042 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
23043 return NO_REGS;
23045 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
23046 the reloading of address expressions using PLUS into floating point
23047 registers. */
23048 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
23050 if (is_constant)
23052 /* Zero is always allowed in all VSX registers. */
23053 if (x == CONST0_RTX (mode))
23054 return rclass;
23056 /* If this is a vector constant that can be formed with a few Altivec
23057 instructions, we want altivec registers. */
23058 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
23059 return ALTIVEC_REGS;
23061 /* If this is an integer constant that can easily be loaded into
23062 vector registers, allow it. */
23063 if (CONST_INT_P (x))
23065 HOST_WIDE_INT value = INTVAL (x);
23067 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
23068 2.06 can generate it in the Altivec registers with
23069 VSPLTI<x>. */
23070 if (value == -1)
23072 if (TARGET_P8_VECTOR)
23073 return rclass;
23074 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
23075 return ALTIVEC_REGS;
23076 else
23077 return NO_REGS;
23080 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
23081 a sign extend in the Altivec registers. */
23082 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
23083 && TARGET_VSX_SMALL_INTEGER
23084 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
23085 return ALTIVEC_REGS;
23088 /* Force constant to memory. */
23089 return NO_REGS;
23092 /* D-form addressing can easily reload the value. */
23093 if (mode_supports_vmx_dform (mode)
23094 || mode_supports_vsx_dform_quad (mode))
23095 return rclass;
23097 /* If this is a scalar floating point value and we don't have D-form
23098 addressing, prefer the traditional floating point registers so that we
23099 can use D-form (register+offset) addressing. */
23100 if (rclass == VSX_REGS
23101 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
23102 return FLOAT_REGS;
23104 /* Prefer the Altivec registers if Altivec is handling the vector
23105 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
23106 loads. */
23107 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
23108 || mode == V1TImode)
23109 return ALTIVEC_REGS;
23111 return rclass;
23114 if (is_constant || GET_CODE (x) == PLUS)
23116 if (reg_class_subset_p (GENERAL_REGS, rclass))
23117 return GENERAL_REGS;
23118 if (reg_class_subset_p (BASE_REGS, rclass))
23119 return BASE_REGS;
23120 return NO_REGS;
23123 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
23124 return GENERAL_REGS;
23126 return rclass;
23129 /* Debug version of rs6000_preferred_reload_class. */
23130 static enum reg_class
23131 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
23133 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
23135 fprintf (stderr,
23136 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
23137 "mode = %s, x:\n",
23138 reg_class_names[ret], reg_class_names[rclass],
23139 GET_MODE_NAME (GET_MODE (x)));
23140 debug_rtx (x);
23142 return ret;
23145 /* If we are copying between FP or AltiVec registers and anything else, we need
23146 a memory location. The exception is when we are targeting ppc64 and the
23147 move to/from fpr to gpr instructions are available. Also, under VSX, you
23148 can copy vector registers from the FP register set to the Altivec register
23149 set and vice versa. */
23151 static bool
23152 rs6000_secondary_memory_needed (enum reg_class from_class,
23153 enum reg_class to_class,
23154 machine_mode mode)
23156 enum rs6000_reg_type from_type, to_type;
23157 bool altivec_p = ((from_class == ALTIVEC_REGS)
23158 || (to_class == ALTIVEC_REGS));
23160 /* If a simple/direct move is available, we don't need secondary memory */
23161 from_type = reg_class_to_reg_type[(int)from_class];
23162 to_type = reg_class_to_reg_type[(int)to_class];
23164 if (rs6000_secondary_reload_move (to_type, from_type, mode,
23165 (secondary_reload_info *)0, altivec_p))
23166 return false;
23168 /* If we have a floating point or vector register class, we need to use
23169 memory to transfer the data. */
23170 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
23171 return true;
23173 return false;
23176 /* Debug version of rs6000_secondary_memory_needed. */
23177 static bool
23178 rs6000_debug_secondary_memory_needed (enum reg_class from_class,
23179 enum reg_class to_class,
23180 machine_mode mode)
23182 bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
23184 fprintf (stderr,
23185 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
23186 "to_class = %s, mode = %s\n",
23187 ret ? "true" : "false",
23188 reg_class_names[from_class],
23189 reg_class_names[to_class],
23190 GET_MODE_NAME (mode));
23192 return ret;
23195 /* Return the register class of a scratch register needed to copy IN into
23196 or out of a register in RCLASS in MODE. If it can be done directly,
23197 NO_REGS is returned. */
23199 static enum reg_class
23200 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
23201 rtx in)
23203 int regno;
23205 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
23206 #if TARGET_MACHO
23207 && MACHOPIC_INDIRECT
23208 #endif
23211 /* We cannot copy a symbolic operand directly into anything
23212 other than BASE_REGS for TARGET_ELF. So indicate that a
23213 register from BASE_REGS is needed as an intermediate
23214 register.
23216 On Darwin, pic addresses require a load from memory, which
23217 needs a base register. */
23218 if (rclass != BASE_REGS
23219 && (GET_CODE (in) == SYMBOL_REF
23220 || GET_CODE (in) == HIGH
23221 || GET_CODE (in) == LABEL_REF
23222 || GET_CODE (in) == CONST))
23223 return BASE_REGS;
23226 if (GET_CODE (in) == REG)
23228 regno = REGNO (in);
23229 if (regno >= FIRST_PSEUDO_REGISTER)
23231 regno = true_regnum (in);
23232 if (regno >= FIRST_PSEUDO_REGISTER)
23233 regno = -1;
23236 else if (GET_CODE (in) == SUBREG)
23238 regno = true_regnum (in);
23239 if (regno >= FIRST_PSEUDO_REGISTER)
23240 regno = -1;
23242 else
23243 regno = -1;
23245 /* If we have VSX register moves, prefer moving scalar values between
23246 Altivec registers and GPR by going via an FPR (and then via memory)
23247 instead of reloading the secondary memory address for Altivec moves. */
23248 if (TARGET_VSX
23249 && GET_MODE_SIZE (mode) < 16
23250 && !mode_supports_vmx_dform (mode)
23251 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
23252 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
23253 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
23254 && (regno >= 0 && INT_REGNO_P (regno)))))
23255 return FLOAT_REGS;
23257 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
23258 into anything. */
23259 if (rclass == GENERAL_REGS || rclass == BASE_REGS
23260 || (regno >= 0 && INT_REGNO_P (regno)))
23261 return NO_REGS;
23263 /* Constants, memory, and VSX registers can go into VSX registers (both the
23264 traditional floating point and the altivec registers). */
23265 if (rclass == VSX_REGS
23266 && (regno == -1 || VSX_REGNO_P (regno)))
23267 return NO_REGS;
23269 /* Constants, memory, and FP registers can go into FP registers. */
23270 if ((regno == -1 || FP_REGNO_P (regno))
23271 && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
23272 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
23274 /* Memory, and AltiVec registers can go into AltiVec registers. */
23275 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
23276 && rclass == ALTIVEC_REGS)
23277 return NO_REGS;
23279 /* We can copy among the CR registers. */
23280 if ((rclass == CR_REGS || rclass == CR0_REGS)
23281 && regno >= 0 && CR_REGNO_P (regno))
23282 return NO_REGS;
23284 /* Otherwise, we need GENERAL_REGS. */
23285 return GENERAL_REGS;
23288 /* Debug version of rs6000_secondary_reload_class. */
23289 static enum reg_class
23290 rs6000_debug_secondary_reload_class (enum reg_class rclass,
23291 machine_mode mode, rtx in)
23293 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
23294 fprintf (stderr,
23295 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
23296 "mode = %s, input rtx:\n",
23297 reg_class_names[ret], reg_class_names[rclass],
23298 GET_MODE_NAME (mode));
23299 debug_rtx (in);
23301 return ret;
23304 /* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
23306 static bool
23307 rs6000_cannot_change_mode_class (machine_mode from,
23308 machine_mode to,
23309 enum reg_class rclass)
23311 unsigned from_size = GET_MODE_SIZE (from);
23312 unsigned to_size = GET_MODE_SIZE (to);
23314 if (from_size != to_size)
23316 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
23318 if (reg_classes_intersect_p (xclass, rclass))
23320 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
23321 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
23322 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
23323 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
23325 /* Don't allow 64-bit types to overlap with 128-bit types that take a
23326 single register under VSX because the scalar part of the register
23327 is in the upper 64-bits, and not the lower 64-bits. Types like
23328 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
23329 IEEE floating point can't overlap, and neither can small
23330 values. */
23332 if (to_float128_vector_p && from_float128_vector_p)
23333 return false;
23335 else if (to_float128_vector_p || from_float128_vector_p)
23336 return true;
23338 /* TDmode in floating-mode registers must always go into a register
23339 pair with the most significant word in the even-numbered register
23340 to match ISA requirements. In little-endian mode, this does not
23341 match subreg numbering, so we cannot allow subregs. */
23342 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
23343 return true;
23345 if (from_size < 8 || to_size < 8)
23346 return true;
23348 if (from_size == 8 && (8 * to_nregs) != to_size)
23349 return true;
23351 if (to_size == 8 && (8 * from_nregs) != from_size)
23352 return true;
23354 return false;
23356 else
23357 return false;
23360 if (TARGET_E500_DOUBLE
23361 && ((((to) == DFmode) + ((from) == DFmode)) == 1
23362 || (((to) == TFmode) + ((from) == TFmode)) == 1
23363 || (((to) == IFmode) + ((from) == IFmode)) == 1
23364 || (((to) == KFmode) + ((from) == KFmode)) == 1
23365 || (((to) == DDmode) + ((from) == DDmode)) == 1
23366 || (((to) == TDmode) + ((from) == TDmode)) == 1
23367 || (((to) == DImode) + ((from) == DImode)) == 1))
23368 return true;
23370 /* Since the VSX register set includes traditional floating point registers
23371 and altivec registers, just check for the size being different instead of
23372 trying to check whether the modes are vector modes. Otherwise it won't
23373 allow say DF and DI to change classes. For types like TFmode and TDmode
23374 that take 2 64-bit registers, rather than a single 128-bit register, don't
23375 allow subregs of those types to other 128 bit types. */
23376 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
23378 unsigned num_regs = (from_size + 15) / 16;
23379 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
23380 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
23381 return true;
23383 return (from_size != 8 && from_size != 16);
23386 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
23387 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
23388 return true;
23390 if (TARGET_SPE && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1
23391 && reg_classes_intersect_p (GENERAL_REGS, rclass))
23392 return true;
23394 return false;
23397 /* Debug version of rs6000_cannot_change_mode_class. */
23398 static bool
23399 rs6000_debug_cannot_change_mode_class (machine_mode from,
23400 machine_mode to,
23401 enum reg_class rclass)
23403 bool ret = rs6000_cannot_change_mode_class (from, to, rclass);
23405 fprintf (stderr,
23406 "rs6000_cannot_change_mode_class, return %s, from = %s, "
23407 "to = %s, rclass = %s\n",
23408 ret ? "true" : "false",
23409 GET_MODE_NAME (from), GET_MODE_NAME (to),
23410 reg_class_names[rclass]);
23412 return ret;
23415 /* Return a string to do a move operation of 128 bits of data. */
23417 const char *
23418 rs6000_output_move_128bit (rtx operands[])
23420 rtx dest = operands[0];
23421 rtx src = operands[1];
23422 machine_mode mode = GET_MODE (dest);
23423 int dest_regno;
23424 int src_regno;
23425 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
23426 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
23428 if (REG_P (dest))
23430 dest_regno = REGNO (dest);
23431 dest_gpr_p = INT_REGNO_P (dest_regno);
23432 dest_fp_p = FP_REGNO_P (dest_regno);
23433 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
23434 dest_vsx_p = dest_fp_p | dest_vmx_p;
23436 else
23438 dest_regno = -1;
23439 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
23442 if (REG_P (src))
23444 src_regno = REGNO (src);
23445 src_gpr_p = INT_REGNO_P (src_regno);
23446 src_fp_p = FP_REGNO_P (src_regno);
23447 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
23448 src_vsx_p = src_fp_p | src_vmx_p;
23450 else
23452 src_regno = -1;
23453 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
23456 /* Register moves. */
23457 if (dest_regno >= 0 && src_regno >= 0)
23459 if (dest_gpr_p)
23461 if (src_gpr_p)
23462 return "#";
23464 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
23465 return (WORDS_BIG_ENDIAN
23466 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
23467 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
23469 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
23470 return "#";
23473 else if (TARGET_VSX && dest_vsx_p)
23475 if (src_vsx_p)
23476 return "xxlor %x0,%x1,%x1";
23478 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
23479 return (WORDS_BIG_ENDIAN
23480 ? "mtvsrdd %x0,%1,%L1"
23481 : "mtvsrdd %x0,%L1,%1");
23483 else if (TARGET_DIRECT_MOVE && src_gpr_p)
23484 return "#";
23487 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
23488 return "vor %0,%1,%1";
23490 else if (dest_fp_p && src_fp_p)
23491 return "#";
23494 /* Loads. */
23495 else if (dest_regno >= 0 && MEM_P (src))
23497 if (dest_gpr_p)
23499 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
23500 return "lq %0,%1";
23501 else
23502 return "#";
23505 else if (TARGET_ALTIVEC && dest_vmx_p
23506 && altivec_indexed_or_indirect_operand (src, mode))
23507 return "lvx %0,%y1";
23509 else if (TARGET_VSX && dest_vsx_p)
23511 if (mode_supports_vsx_dform_quad (mode)
23512 && quad_address_p (XEXP (src, 0), mode, true))
23513 return "lxv %x0,%1";
23515 else if (TARGET_P9_VECTOR)
23516 return "lxvx %x0,%y1";
23518 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
23519 return "lxvw4x %x0,%y1";
23521 else
23522 return "lxvd2x %x0,%y1";
23525 else if (TARGET_ALTIVEC && dest_vmx_p)
23526 return "lvx %0,%y1";
23528 else if (dest_fp_p)
23529 return "#";
23532 /* Stores. */
23533 else if (src_regno >= 0 && MEM_P (dest))
23535 if (src_gpr_p)
23537 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
23538 return "stq %1,%0";
23539 else
23540 return "#";
23543 else if (TARGET_ALTIVEC && src_vmx_p
23544 && altivec_indexed_or_indirect_operand (src, mode))
23545 return "stvx %1,%y0";
23547 else if (TARGET_VSX && src_vsx_p)
23549 if (mode_supports_vsx_dform_quad (mode)
23550 && quad_address_p (XEXP (dest, 0), mode, true))
23551 return "stxv %x1,%0";
23553 else if (TARGET_P9_VECTOR)
23554 return "stxvx %x1,%y0";
23556 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
23557 return "stxvw4x %x1,%y0";
23559 else
23560 return "stxvd2x %x1,%y0";
23563 else if (TARGET_ALTIVEC && src_vmx_p)
23564 return "stvx %1,%y0";
23566 else if (src_fp_p)
23567 return "#";
23570 /* Constants. */
23571 else if (dest_regno >= 0
23572 && (GET_CODE (src) == CONST_INT
23573 || GET_CODE (src) == CONST_WIDE_INT
23574 || GET_CODE (src) == CONST_DOUBLE
23575 || GET_CODE (src) == CONST_VECTOR))
23577 if (dest_gpr_p)
23578 return "#";
23580 else if ((dest_vmx_p && TARGET_ALTIVEC)
23581 || (dest_vsx_p && TARGET_VSX))
23582 return output_vec_const_move (operands);
23585 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
23588 /* Validate a 128-bit move. */
23589 bool
23590 rs6000_move_128bit_ok_p (rtx operands[])
23592 machine_mode mode = GET_MODE (operands[0]);
23593 return (gpc_reg_operand (operands[0], mode)
23594 || gpc_reg_operand (operands[1], mode));
23597 /* Return true if a 128-bit move needs to be split. */
23598 bool
23599 rs6000_split_128bit_ok_p (rtx operands[])
23601 if (!reload_completed)
23602 return false;
23604 if (!gpr_or_gpr_p (operands[0], operands[1]))
23605 return false;
23607 if (quad_load_store_p (operands[0], operands[1]))
23608 return false;
23610 return true;
23614 /* Given a comparison operation, return the bit number in CCR to test. We
23615 know this is a valid comparison.
23617 SCC_P is 1 if this is for an scc. That means that %D will have been
23618 used instead of %C, so the bits will be in different places.
23620 Return -1 if OP isn't a valid comparison for some reason. */
23623 ccr_bit (rtx op, int scc_p)
23625 enum rtx_code code = GET_CODE (op);
23626 machine_mode cc_mode;
23627 int cc_regnum;
23628 int base_bit;
23629 rtx reg;
23631 if (!COMPARISON_P (op))
23632 return -1;
23634 reg = XEXP (op, 0);
23636 gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg)));
23638 cc_mode = GET_MODE (reg);
23639 cc_regnum = REGNO (reg);
23640 base_bit = 4 * (cc_regnum - CR0_REGNO);
23642 validate_condition_mode (code, cc_mode);
23644 /* When generating a sCOND operation, only positive conditions are
23645 allowed. */
23646 gcc_assert (!scc_p
23647 || code == EQ || code == GT || code == LT || code == UNORDERED
23648 || code == GTU || code == LTU);
23650 switch (code)
23652 case NE:
23653 return scc_p ? base_bit + 3 : base_bit + 2;
23654 case EQ:
23655 return base_bit + 2;
23656 case GT: case GTU: case UNLE:
23657 return base_bit + 1;
23658 case LT: case LTU: case UNGE:
23659 return base_bit;
23660 case ORDERED: case UNORDERED:
23661 return base_bit + 3;
23663 case GE: case GEU:
23664 /* If scc, we will have done a cror to put the bit in the
23665 unordered position. So test that bit. For integer, this is ! LT
23666 unless this is an scc insn. */
23667 return scc_p ? base_bit + 3 : base_bit;
23669 case LE: case LEU:
23670 return scc_p ? base_bit + 3 : base_bit + 1;
23672 default:
23673 gcc_unreachable ();
23677 /* Return the GOT register. */
23680 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
23682 /* The second flow pass currently (June 1999) can't update
23683 regs_ever_live without disturbing other parts of the compiler, so
23684 update it here to make the prolog/epilogue code happy. */
23685 if (!can_create_pseudo_p ()
23686 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
23687 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
23689 crtl->uses_pic_offset_table = 1;
23691 return pic_offset_table_rtx;
23694 static rs6000_stack_t stack_info;
23696 /* Function to init struct machine_function.
23697 This will be called, via a pointer variable,
23698 from push_function_context. */
23700 static struct machine_function *
23701 rs6000_init_machine_status (void)
23703 stack_info.reload_completed = 0;
23704 return ggc_cleared_alloc<machine_function> ();
23707 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
23709 /* Write out a function code label. */
23711 void
23712 rs6000_output_function_entry (FILE *file, const char *fname)
23714 if (fname[0] != '.')
23716 switch (DEFAULT_ABI)
23718 default:
23719 gcc_unreachable ();
23721 case ABI_AIX:
23722 if (DOT_SYMBOLS)
23723 putc ('.', file);
23724 else
23725 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
23726 break;
23728 case ABI_ELFv2:
23729 case ABI_V4:
23730 case ABI_DARWIN:
23731 break;
23735 RS6000_OUTPUT_BASENAME (file, fname);
23738 /* Print an operand. Recognize special options, documented below. */
23740 #if TARGET_ELF
23741 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
23742 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
23743 #else
23744 #define SMALL_DATA_RELOC "sda21"
23745 #define SMALL_DATA_REG 0
23746 #endif
23748 void
23749 print_operand (FILE *file, rtx x, int code)
23751 int i;
23752 unsigned HOST_WIDE_INT uval;
23754 switch (code)
23756 /* %a is output_address. */
23758 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
23759 output_operand. */
23761 case 'D':
23762 /* Like 'J' but get to the GT bit only. */
23763 gcc_assert (REG_P (x));
23765 /* Bit 1 is GT bit. */
23766 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
23768 /* Add one for shift count in rlinm for scc. */
23769 fprintf (file, "%d", i + 1);
23770 return;
23772 case 'e':
23773 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
23774 if (! INT_P (x))
23776 output_operand_lossage ("invalid %%e value");
23777 return;
23780 uval = INTVAL (x);
23781 if ((uval & 0xffff) == 0 && uval != 0)
23782 putc ('s', file);
23783 return;
23785 case 'E':
23786 /* X is a CR register. Print the number of the EQ bit of the CR */
23787 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
23788 output_operand_lossage ("invalid %%E value");
23789 else
23790 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
23791 return;
23793 case 'f':
23794 /* X is a CR register. Print the shift count needed to move it
23795 to the high-order four bits. */
23796 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
23797 output_operand_lossage ("invalid %%f value");
23798 else
23799 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
23800 return;
23802 case 'F':
23803 /* Similar, but print the count for the rotate in the opposite
23804 direction. */
23805 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
23806 output_operand_lossage ("invalid %%F value");
23807 else
23808 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
23809 return;
23811 case 'G':
23812 /* X is a constant integer. If it is negative, print "m",
23813 otherwise print "z". This is to make an aze or ame insn. */
23814 if (GET_CODE (x) != CONST_INT)
23815 output_operand_lossage ("invalid %%G value");
23816 else if (INTVAL (x) >= 0)
23817 putc ('z', file);
23818 else
23819 putc ('m', file);
23820 return;
23822 case 'h':
23823 /* If constant, output low-order five bits. Otherwise, write
23824 normally. */
23825 if (INT_P (x))
23826 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
23827 else
23828 print_operand (file, x, 0);
23829 return;
23831 case 'H':
23832 /* If constant, output low-order six bits. Otherwise, write
23833 normally. */
23834 if (INT_P (x))
23835 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
23836 else
23837 print_operand (file, x, 0);
23838 return;
23840 case 'I':
23841 /* Print `i' if this is a constant, else nothing. */
23842 if (INT_P (x))
23843 putc ('i', file);
23844 return;
23846 case 'j':
23847 /* Write the bit number in CCR for jump. */
23848 i = ccr_bit (x, 0);
23849 if (i == -1)
23850 output_operand_lossage ("invalid %%j code");
23851 else
23852 fprintf (file, "%d", i);
23853 return;
23855 case 'J':
23856 /* Similar, but add one for shift count in rlinm for scc and pass
23857 scc flag to `ccr_bit'. */
23858 i = ccr_bit (x, 1);
23859 if (i == -1)
23860 output_operand_lossage ("invalid %%J code");
23861 else
23862 /* If we want bit 31, write a shift count of zero, not 32. */
23863 fprintf (file, "%d", i == 31 ? 0 : i + 1);
23864 return;
23866 case 'k':
23867 /* X must be a constant. Write the 1's complement of the
23868 constant. */
23869 if (! INT_P (x))
23870 output_operand_lossage ("invalid %%k value");
23871 else
23872 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
23873 return;
23875 case 'K':
23876 /* X must be a symbolic constant on ELF. Write an
23877 expression suitable for an 'addi' that adds in the low 16
23878 bits of the MEM. */
23879 if (GET_CODE (x) == CONST)
23881 if (GET_CODE (XEXP (x, 0)) != PLUS
23882 || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
23883 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
23884 || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
23885 output_operand_lossage ("invalid %%K value");
23887 print_operand_address (file, x);
23888 fputs ("@l", file);
23889 return;
23891 /* %l is output_asm_label. */
23893 case 'L':
23894 /* Write second word of DImode or DFmode reference. Works on register
23895 or non-indexed memory only. */
23896 if (REG_P (x))
23897 fputs (reg_names[REGNO (x) + 1], file);
23898 else if (MEM_P (x))
23900 machine_mode mode = GET_MODE (x);
23901 /* Handle possible auto-increment. Since it is pre-increment and
23902 we have already done it, we can just use an offset of word. */
23903 if (GET_CODE (XEXP (x, 0)) == PRE_INC
23904 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
23905 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
23906 UNITS_PER_WORD));
23907 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
23908 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
23909 UNITS_PER_WORD));
23910 else
23911 output_address (mode, XEXP (adjust_address_nv (x, SImode,
23912 UNITS_PER_WORD),
23913 0));
23915 if (small_data_operand (x, GET_MODE (x)))
23916 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
23917 reg_names[SMALL_DATA_REG]);
23919 return;
23921 case 'N':
23922 /* Write the number of elements in the vector times 4. */
23923 if (GET_CODE (x) != PARALLEL)
23924 output_operand_lossage ("invalid %%N value");
23925 else
23926 fprintf (file, "%d", XVECLEN (x, 0) * 4);
23927 return;
23929 case 'O':
23930 /* Similar, but subtract 1 first. */
23931 if (GET_CODE (x) != PARALLEL)
23932 output_operand_lossage ("invalid %%O value");
23933 else
23934 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
23935 return;
23937 case 'p':
23938 /* X is a CONST_INT that is a power of two. Output the logarithm. */
23939 if (! INT_P (x)
23940 || INTVAL (x) < 0
23941 || (i = exact_log2 (INTVAL (x))) < 0)
23942 output_operand_lossage ("invalid %%p value");
23943 else
23944 fprintf (file, "%d", i);
23945 return;
23947 case 'P':
23948 /* The operand must be an indirect memory reference. The result
23949 is the register name. */
23950 if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG
23951 || REGNO (XEXP (x, 0)) >= 32)
23952 output_operand_lossage ("invalid %%P value");
23953 else
23954 fputs (reg_names[REGNO (XEXP (x, 0))], file);
23955 return;
23957 case 'q':
23958 /* This outputs the logical code corresponding to a boolean
23959 expression. The expression may have one or both operands
23960 negated (if one, only the first one). For condition register
23961 logical operations, it will also treat the negated
23962 CR codes as NOTs, but not handle NOTs of them. */
23964 const char *const *t = 0;
23965 const char *s;
23966 enum rtx_code code = GET_CODE (x);
23967 static const char * const tbl[3][3] = {
23968 { "and", "andc", "nor" },
23969 { "or", "orc", "nand" },
23970 { "xor", "eqv", "xor" } };
23972 if (code == AND)
23973 t = tbl[0];
23974 else if (code == IOR)
23975 t = tbl[1];
23976 else if (code == XOR)
23977 t = tbl[2];
23978 else
23979 output_operand_lossage ("invalid %%q value");
23981 if (GET_CODE (XEXP (x, 0)) != NOT)
23982 s = t[0];
23983 else
23985 if (GET_CODE (XEXP (x, 1)) == NOT)
23986 s = t[2];
23987 else
23988 s = t[1];
23991 fputs (s, file);
23993 return;
23995 case 'Q':
23996 if (! TARGET_MFCRF)
23997 return;
23998 fputc (',', file);
23999 /* FALLTHRU */
24001 case 'R':
24002 /* X is a CR register. Print the mask for `mtcrf'. */
24003 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
24004 output_operand_lossage ("invalid %%R value");
24005 else
24006 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
24007 return;
24009 case 's':
24010 /* Low 5 bits of 32 - value */
24011 if (! INT_P (x))
24012 output_operand_lossage ("invalid %%s value");
24013 else
24014 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
24015 return;
24017 case 't':
24018 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
24019 gcc_assert (REG_P (x) && GET_MODE (x) == CCmode);
24021 /* Bit 3 is OV bit. */
24022 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
24024 /* If we want bit 31, write a shift count of zero, not 32. */
24025 fprintf (file, "%d", i == 31 ? 0 : i + 1);
24026 return;
24028 case 'T':
24029 /* Print the symbolic name of a branch target register. */
24030 if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO
24031 && REGNO (x) != CTR_REGNO))
24032 output_operand_lossage ("invalid %%T value");
24033 else if (REGNO (x) == LR_REGNO)
24034 fputs ("lr", file);
24035 else
24036 fputs ("ctr", file);
24037 return;
24039 case 'u':
24040 /* High-order or low-order 16 bits of constant, whichever is non-zero,
24041 for use in unsigned operand. */
24042 if (! INT_P (x))
24044 output_operand_lossage ("invalid %%u value");
24045 return;
24048 uval = INTVAL (x);
24049 if ((uval & 0xffff) == 0)
24050 uval >>= 16;
24052 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
24053 return;
24055 case 'v':
24056 /* High-order 16 bits of constant for use in signed operand. */
24057 if (! INT_P (x))
24058 output_operand_lossage ("invalid %%v value");
24059 else
24060 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
24061 (INTVAL (x) >> 16) & 0xffff);
24062 return;
24064 case 'U':
24065 /* Print `u' if this has an auto-increment or auto-decrement. */
24066 if (MEM_P (x)
24067 && (GET_CODE (XEXP (x, 0)) == PRE_INC
24068 || GET_CODE (XEXP (x, 0)) == PRE_DEC
24069 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
24070 putc ('u', file);
24071 return;
24073 case 'V':
24074 /* Print the trap code for this operand. */
24075 switch (GET_CODE (x))
24077 case EQ:
24078 fputs ("eq", file); /* 4 */
24079 break;
24080 case NE:
24081 fputs ("ne", file); /* 24 */
24082 break;
24083 case LT:
24084 fputs ("lt", file); /* 16 */
24085 break;
24086 case LE:
24087 fputs ("le", file); /* 20 */
24088 break;
24089 case GT:
24090 fputs ("gt", file); /* 8 */
24091 break;
24092 case GE:
24093 fputs ("ge", file); /* 12 */
24094 break;
24095 case LTU:
24096 fputs ("llt", file); /* 2 */
24097 break;
24098 case LEU:
24099 fputs ("lle", file); /* 6 */
24100 break;
24101 case GTU:
24102 fputs ("lgt", file); /* 1 */
24103 break;
24104 case GEU:
24105 fputs ("lge", file); /* 5 */
24106 break;
24107 default:
24108 gcc_unreachable ();
24110 break;
24112 case 'w':
24113 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
24114 normally. */
24115 if (INT_P (x))
24116 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
24117 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
24118 else
24119 print_operand (file, x, 0);
24120 return;
24122 case 'x':
24123 /* X is a FPR or Altivec register used in a VSX context. */
24124 if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
24125 output_operand_lossage ("invalid %%x value");
24126 else
24128 int reg = REGNO (x);
24129 int vsx_reg = (FP_REGNO_P (reg)
24130 ? reg - 32
24131 : reg - FIRST_ALTIVEC_REGNO + 32);
24133 #ifdef TARGET_REGNAMES
24134 if (TARGET_REGNAMES)
24135 fprintf (file, "%%vs%d", vsx_reg);
24136 else
24137 #endif
24138 fprintf (file, "%d", vsx_reg);
24140 return;
24142 case 'X':
24143 if (MEM_P (x)
24144 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
24145 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
24146 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
24147 putc ('x', file);
24148 return;
24150 case 'Y':
24151 /* Like 'L', for third word of TImode/PTImode */
24152 if (REG_P (x))
24153 fputs (reg_names[REGNO (x) + 2], file);
24154 else if (MEM_P (x))
24156 machine_mode mode = GET_MODE (x);
24157 if (GET_CODE (XEXP (x, 0)) == PRE_INC
24158 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
24159 output_address (mode, plus_constant (Pmode,
24160 XEXP (XEXP (x, 0), 0), 8));
24161 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
24162 output_address (mode, plus_constant (Pmode,
24163 XEXP (XEXP (x, 0), 0), 8));
24164 else
24165 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
24166 if (small_data_operand (x, GET_MODE (x)))
24167 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
24168 reg_names[SMALL_DATA_REG]);
24170 return;
24172 case 'z':
24173 /* X is a SYMBOL_REF. Write out the name preceded by a
24174 period and without any trailing data in brackets. Used for function
24175 names. If we are configured for System V (or the embedded ABI) on
24176 the PowerPC, do not emit the period, since those systems do not use
24177 TOCs and the like. */
24178 gcc_assert (GET_CODE (x) == SYMBOL_REF);
24180 /* For macho, check to see if we need a stub. */
24181 if (TARGET_MACHO)
24183 const char *name = XSTR (x, 0);
24184 #if TARGET_MACHO
24185 if (darwin_emit_branch_islands
24186 && MACHOPIC_INDIRECT
24187 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
24188 name = machopic_indirection_name (x, /*stub_p=*/true);
24189 #endif
24190 assemble_name (file, name);
24192 else if (!DOT_SYMBOLS)
24193 assemble_name (file, XSTR (x, 0));
24194 else
24195 rs6000_output_function_entry (file, XSTR (x, 0));
24196 return;
24198 case 'Z':
24199 /* Like 'L', for last word of TImode/PTImode. */
24200 if (REG_P (x))
24201 fputs (reg_names[REGNO (x) + 3], file);
24202 else if (MEM_P (x))
24204 machine_mode mode = GET_MODE (x);
24205 if (GET_CODE (XEXP (x, 0)) == PRE_INC
24206 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
24207 output_address (mode, plus_constant (Pmode,
24208 XEXP (XEXP (x, 0), 0), 12));
24209 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
24210 output_address (mode, plus_constant (Pmode,
24211 XEXP (XEXP (x, 0), 0), 12));
24212 else
24213 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
24214 if (small_data_operand (x, GET_MODE (x)))
24215 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
24216 reg_names[SMALL_DATA_REG]);
24218 return;
24220 /* Print AltiVec or SPE memory operand. */
24221 case 'y':
24223 rtx tmp;
24225 gcc_assert (MEM_P (x));
24227 tmp = XEXP (x, 0);
24229 /* Ugly hack because %y is overloaded. */
24230 if ((TARGET_SPE || TARGET_E500_DOUBLE)
24231 && (GET_MODE_SIZE (GET_MODE (x)) == 8
24232 || FLOAT128_2REG_P (GET_MODE (x))
24233 || GET_MODE (x) == TImode
24234 || GET_MODE (x) == PTImode))
24236 /* Handle [reg]. */
24237 if (REG_P (tmp))
24239 fprintf (file, "0(%s)", reg_names[REGNO (tmp)]);
24240 break;
24242 /* Handle [reg+UIMM]. */
24243 else if (GET_CODE (tmp) == PLUS &&
24244 GET_CODE (XEXP (tmp, 1)) == CONST_INT)
24246 int x;
24248 gcc_assert (REG_P (XEXP (tmp, 0)));
24250 x = INTVAL (XEXP (tmp, 1));
24251 fprintf (file, "%d(%s)", x, reg_names[REGNO (XEXP (tmp, 0))]);
24252 break;
24255 /* Fall through. Must be [reg+reg]. */
24257 if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x))
24258 && GET_CODE (tmp) == AND
24259 && GET_CODE (XEXP (tmp, 1)) == CONST_INT
24260 && INTVAL (XEXP (tmp, 1)) == -16)
24261 tmp = XEXP (tmp, 0);
24262 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
24263 && GET_CODE (tmp) == PRE_MODIFY)
24264 tmp = XEXP (tmp, 1);
24265 if (REG_P (tmp))
24266 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
24267 else
24269 if (GET_CODE (tmp) != PLUS
24270 || !REG_P (XEXP (tmp, 0))
24271 || !REG_P (XEXP (tmp, 1)))
24273 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
24274 break;
24277 if (REGNO (XEXP (tmp, 0)) == 0)
24278 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
24279 reg_names[ REGNO (XEXP (tmp, 0)) ]);
24280 else
24281 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
24282 reg_names[ REGNO (XEXP (tmp, 1)) ]);
24284 break;
24287 case 0:
24288 if (REG_P (x))
24289 fprintf (file, "%s", reg_names[REGNO (x)]);
24290 else if (MEM_P (x))
24292 /* We need to handle PRE_INC and PRE_DEC here, since we need to
24293 know the width from the mode. */
24294 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
24295 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
24296 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
24297 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
24298 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
24299 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
24300 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
24301 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
24302 else
24303 output_address (GET_MODE (x), XEXP (x, 0));
24305 else
24307 if (toc_relative_expr_p (x, false))
24308 /* This hack along with a corresponding hack in
24309 rs6000_output_addr_const_extra arranges to output addends
24310 where the assembler expects to find them. eg.
24311 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
24312 without this hack would be output as "x@toc+4". We
24313 want "x+4@toc". */
24314 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
24315 else
24316 output_addr_const (file, x);
24318 return;
24320 case '&':
24321 if (const char *name = get_some_local_dynamic_name ())
24322 assemble_name (file, name);
24323 else
24324 output_operand_lossage ("'%%&' used without any "
24325 "local dynamic TLS references");
24326 return;
24328 default:
24329 output_operand_lossage ("invalid %%xn code");
24333 /* Print the address of an operand. */
24335 void
24336 print_operand_address (FILE *file, rtx x)
24338 if (REG_P (x))
24339 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
24340 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST
24341 || GET_CODE (x) == LABEL_REF)
24343 output_addr_const (file, x);
24344 if (small_data_operand (x, GET_MODE (x)))
24345 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
24346 reg_names[SMALL_DATA_REG]);
24347 else
24348 gcc_assert (!TARGET_TOC);
24350 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
24351 && REG_P (XEXP (x, 1)))
24353 if (REGNO (XEXP (x, 0)) == 0)
24354 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
24355 reg_names[ REGNO (XEXP (x, 0)) ]);
24356 else
24357 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
24358 reg_names[ REGNO (XEXP (x, 1)) ]);
24360 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
24361 && GET_CODE (XEXP (x, 1)) == CONST_INT)
24362 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
24363 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
24364 #if TARGET_MACHO
24365 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
24366 && CONSTANT_P (XEXP (x, 1)))
24368 fprintf (file, "lo16(");
24369 output_addr_const (file, XEXP (x, 1));
24370 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
24372 #endif
24373 #if TARGET_ELF
24374 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
24375 && CONSTANT_P (XEXP (x, 1)))
24377 output_addr_const (file, XEXP (x, 1));
24378 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
24380 #endif
24381 else if (toc_relative_expr_p (x, false))
24383 /* This hack along with a corresponding hack in
24384 rs6000_output_addr_const_extra arranges to output addends
24385 where the assembler expects to find them. eg.
24386 (lo_sum (reg 9)
24387 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
24388 without this hack would be output as "x@toc+8@l(9)". We
24389 want "x+8@toc@l(9)". */
24390 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
24391 if (GET_CODE (x) == LO_SUM)
24392 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
24393 else
24394 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base, 0, 1))]);
24396 else
24397 gcc_unreachable ();
24400 /* Implement TARGET_OUTPUT_ADDR_CONST_EXTRA. */
24402 static bool
24403 rs6000_output_addr_const_extra (FILE *file, rtx x)
24405 if (GET_CODE (x) == UNSPEC)
24406 switch (XINT (x, 1))
24408 case UNSPEC_TOCREL:
24409 gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF
24410 && REG_P (XVECEXP (x, 0, 1))
24411 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
24412 output_addr_const (file, XVECEXP (x, 0, 0));
24413 if (x == tocrel_base && tocrel_offset != const0_rtx)
24415 if (INTVAL (tocrel_offset) >= 0)
24416 fprintf (file, "+");
24417 output_addr_const (file, CONST_CAST_RTX (tocrel_offset));
24419 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
24421 putc ('-', file);
24422 assemble_name (file, toc_label_name);
24423 need_toc_init = 1;
24425 else if (TARGET_ELF)
24426 fputs ("@toc", file);
24427 return true;
24429 #if TARGET_MACHO
24430 case UNSPEC_MACHOPIC_OFFSET:
24431 output_addr_const (file, XVECEXP (x, 0, 0));
24432 putc ('-', file);
24433 machopic_output_function_base_name (file);
24434 return true;
24435 #endif
24437 return false;
24440 /* Target hook for assembling integer objects. The PowerPC version has
24441 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
24442 is defined. It also needs to handle DI-mode objects on 64-bit
24443 targets. */
24445 static bool
24446 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
24448 #ifdef RELOCATABLE_NEEDS_FIXUP
24449 /* Special handling for SI values. */
24450 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
24452 static int recurse = 0;
24454 /* For -mrelocatable, we mark all addresses that need to be fixed up in
24455 the .fixup section. Since the TOC section is already relocated, we
24456 don't need to mark it here. We used to skip the text section, but it
24457 should never be valid for relocated addresses to be placed in the text
24458 section. */
24459 if (DEFAULT_ABI == ABI_V4
24460 && (TARGET_RELOCATABLE || flag_pic > 1)
24461 && in_section != toc_section
24462 && !recurse
24463 && !CONST_SCALAR_INT_P (x)
24464 && CONSTANT_P (x))
24466 char buf[256];
24468 recurse = 1;
24469 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
24470 fixuplabelno++;
24471 ASM_OUTPUT_LABEL (asm_out_file, buf);
24472 fprintf (asm_out_file, "\t.long\t(");
24473 output_addr_const (asm_out_file, x);
24474 fprintf (asm_out_file, ")@fixup\n");
24475 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
24476 ASM_OUTPUT_ALIGN (asm_out_file, 2);
24477 fprintf (asm_out_file, "\t.long\t");
24478 assemble_name (asm_out_file, buf);
24479 fprintf (asm_out_file, "\n\t.previous\n");
24480 recurse = 0;
24481 return true;
24483 /* Remove initial .'s to turn a -mcall-aixdesc function
24484 address into the address of the descriptor, not the function
24485 itself. */
24486 else if (GET_CODE (x) == SYMBOL_REF
24487 && XSTR (x, 0)[0] == '.'
24488 && DEFAULT_ABI == ABI_AIX)
24490 const char *name = XSTR (x, 0);
24491 while (*name == '.')
24492 name++;
24494 fprintf (asm_out_file, "\t.long\t%s\n", name);
24495 return true;
24498 #endif /* RELOCATABLE_NEEDS_FIXUP */
24499 return default_assemble_integer (x, size, aligned_p);
24502 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
24503 /* Emit an assembler directive to set symbol visibility for DECL to
24504 VISIBILITY_TYPE. */
24506 static void
24507 rs6000_assemble_visibility (tree decl, int vis)
24509 if (TARGET_XCOFF)
24510 return;
24512 /* Functions need to have their entry point symbol visibility set as
24513 well as their descriptor symbol visibility. */
24514 if (DEFAULT_ABI == ABI_AIX
24515 && DOT_SYMBOLS
24516 && TREE_CODE (decl) == FUNCTION_DECL)
24518 static const char * const visibility_types[] = {
24519 NULL, "protected", "hidden", "internal"
24522 const char *name, *type;
24524 name = ((* targetm.strip_name_encoding)
24525 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
24526 type = visibility_types[vis];
24528 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
24529 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
24531 else
24532 default_assemble_visibility (decl, vis);
24534 #endif
24536 enum rtx_code
24537 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
24539 /* Reversal of FP compares takes care -- an ordered compare
24540 becomes an unordered compare and vice versa. */
24541 if (mode == CCFPmode
24542 && (!flag_finite_math_only
24543 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
24544 || code == UNEQ || code == LTGT))
24545 return reverse_condition_maybe_unordered (code);
24546 else
24547 return reverse_condition (code);
24550 /* Generate a compare for CODE. Return a brand-new rtx that
24551 represents the result of the compare. */
24553 static rtx
24554 rs6000_generate_compare (rtx cmp, machine_mode mode)
24556 machine_mode comp_mode;
24557 rtx compare_result;
24558 enum rtx_code code = GET_CODE (cmp);
24559 rtx op0 = XEXP (cmp, 0);
24560 rtx op1 = XEXP (cmp, 1);
24562 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
24563 comp_mode = CCmode;
24564 else if (FLOAT_MODE_P (mode))
24565 comp_mode = CCFPmode;
24566 else if (code == GTU || code == LTU
24567 || code == GEU || code == LEU)
24568 comp_mode = CCUNSmode;
24569 else if ((code == EQ || code == NE)
24570 && unsigned_reg_p (op0)
24571 && (unsigned_reg_p (op1)
24572 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
24573 /* These are unsigned values, perhaps there will be a later
24574 ordering compare that can be shared with this one. */
24575 comp_mode = CCUNSmode;
24576 else
24577 comp_mode = CCmode;
24579 /* If we have an unsigned compare, make sure we don't have a signed value as
24580 an immediate. */
24581 if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT
24582 && INTVAL (op1) < 0)
24584 op0 = copy_rtx_if_shared (op0);
24585 op1 = force_reg (GET_MODE (op0), op1);
24586 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
24589 /* First, the compare. */
24590 compare_result = gen_reg_rtx (comp_mode);
24592 /* E500 FP compare instructions on the GPRs. Yuck! */
24593 if ((!TARGET_FPRS && TARGET_HARD_FLOAT)
24594 && FLOAT_MODE_P (mode))
24596 rtx cmp, or_result, compare_result2;
24597 machine_mode op_mode = GET_MODE (op0);
24598 bool reverse_p;
24600 if (op_mode == VOIDmode)
24601 op_mode = GET_MODE (op1);
24603 /* First reverse the condition codes that aren't directly supported. */
24604 switch (code)
24606 case NE:
24607 case UNLT:
24608 case UNLE:
24609 case UNGT:
24610 case UNGE:
24611 code = reverse_condition_maybe_unordered (code);
24612 reverse_p = true;
24613 break;
24615 case EQ:
24616 case LT:
24617 case LE:
24618 case GT:
24619 case GE:
24620 reverse_p = false;
24621 break;
24623 default:
24624 gcc_unreachable ();
24627 /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only.
24628 This explains the following mess. */
24630 switch (code)
24632 case EQ:
24633 switch (op_mode)
24635 case E_SFmode:
24636 cmp = (flag_finite_math_only && !flag_trapping_math)
24637 ? gen_tstsfeq_gpr (compare_result, op0, op1)
24638 : gen_cmpsfeq_gpr (compare_result, op0, op1);
24639 break;
24641 case E_DFmode:
24642 cmp = (flag_finite_math_only && !flag_trapping_math)
24643 ? gen_tstdfeq_gpr (compare_result, op0, op1)
24644 : gen_cmpdfeq_gpr (compare_result, op0, op1);
24645 break;
24647 case E_TFmode:
24648 case E_IFmode:
24649 case E_KFmode:
24650 cmp = (flag_finite_math_only && !flag_trapping_math)
24651 ? gen_tsttfeq_gpr (compare_result, op0, op1)
24652 : gen_cmptfeq_gpr (compare_result, op0, op1);
24653 break;
24655 default:
24656 gcc_unreachable ();
24658 break;
24660 case GT:
24661 case GE:
24662 switch (op_mode)
24664 case E_SFmode:
24665 cmp = (flag_finite_math_only && !flag_trapping_math)
24666 ? gen_tstsfgt_gpr (compare_result, op0, op1)
24667 : gen_cmpsfgt_gpr (compare_result, op0, op1);
24668 break;
24670 case E_DFmode:
24671 cmp = (flag_finite_math_only && !flag_trapping_math)
24672 ? gen_tstdfgt_gpr (compare_result, op0, op1)
24673 : gen_cmpdfgt_gpr (compare_result, op0, op1);
24674 break;
24676 case E_TFmode:
24677 case E_IFmode:
24678 case E_KFmode:
24679 cmp = (flag_finite_math_only && !flag_trapping_math)
24680 ? gen_tsttfgt_gpr (compare_result, op0, op1)
24681 : gen_cmptfgt_gpr (compare_result, op0, op1);
24682 break;
24684 default:
24685 gcc_unreachable ();
24687 break;
24689 case LT:
24690 case LE:
24691 switch (op_mode)
24693 case E_SFmode:
24694 cmp = (flag_finite_math_only && !flag_trapping_math)
24695 ? gen_tstsflt_gpr (compare_result, op0, op1)
24696 : gen_cmpsflt_gpr (compare_result, op0, op1);
24697 break;
24699 case E_DFmode:
24700 cmp = (flag_finite_math_only && !flag_trapping_math)
24701 ? gen_tstdflt_gpr (compare_result, op0, op1)
24702 : gen_cmpdflt_gpr (compare_result, op0, op1);
24703 break;
24705 case E_TFmode:
24706 case E_IFmode:
24707 case E_KFmode:
24708 cmp = (flag_finite_math_only && !flag_trapping_math)
24709 ? gen_tsttflt_gpr (compare_result, op0, op1)
24710 : gen_cmptflt_gpr (compare_result, op0, op1);
24711 break;
24713 default:
24714 gcc_unreachable ();
24716 break;
24718 default:
24719 gcc_unreachable ();
24722 /* Synthesize LE and GE from LT/GT || EQ. */
24723 if (code == LE || code == GE)
24725 emit_insn (cmp);
24727 compare_result2 = gen_reg_rtx (CCFPmode);
24729 /* Do the EQ. */
24730 switch (op_mode)
24732 case E_SFmode:
24733 cmp = (flag_finite_math_only && !flag_trapping_math)
24734 ? gen_tstsfeq_gpr (compare_result2, op0, op1)
24735 : gen_cmpsfeq_gpr (compare_result2, op0, op1);
24736 break;
24738 case E_DFmode:
24739 cmp = (flag_finite_math_only && !flag_trapping_math)
24740 ? gen_tstdfeq_gpr (compare_result2, op0, op1)
24741 : gen_cmpdfeq_gpr (compare_result2, op0, op1);
24742 break;
24744 case E_TFmode:
24745 case E_IFmode:
24746 case E_KFmode:
24747 cmp = (flag_finite_math_only && !flag_trapping_math)
24748 ? gen_tsttfeq_gpr (compare_result2, op0, op1)
24749 : gen_cmptfeq_gpr (compare_result2, op0, op1);
24750 break;
24752 default:
24753 gcc_unreachable ();
24756 emit_insn (cmp);
24758 /* OR them together. */
24759 or_result = gen_reg_rtx (CCFPmode);
24760 cmp = gen_e500_cr_ior_compare (or_result, compare_result,
24761 compare_result2);
24762 compare_result = or_result;
24765 code = reverse_p ? NE : EQ;
24767 emit_insn (cmp);
24770 /* IEEE 128-bit support in VSX registers when we do not have hardware
24771 support. */
24772 else if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
24774 rtx libfunc = NULL_RTX;
24775 bool check_nan = false;
24776 rtx dest;
24778 switch (code)
24780 case EQ:
24781 case NE:
24782 libfunc = optab_libfunc (eq_optab, mode);
24783 break;
24785 case GT:
24786 case GE:
24787 libfunc = optab_libfunc (ge_optab, mode);
24788 break;
24790 case LT:
24791 case LE:
24792 libfunc = optab_libfunc (le_optab, mode);
24793 break;
24795 case UNORDERED:
24796 case ORDERED:
24797 libfunc = optab_libfunc (unord_optab, mode);
24798 code = (code == UNORDERED) ? NE : EQ;
24799 break;
24801 case UNGE:
24802 case UNGT:
24803 check_nan = true;
24804 libfunc = optab_libfunc (ge_optab, mode);
24805 code = (code == UNGE) ? GE : GT;
24806 break;
24808 case UNLE:
24809 case UNLT:
24810 check_nan = true;
24811 libfunc = optab_libfunc (le_optab, mode);
24812 code = (code == UNLE) ? LE : LT;
24813 break;
24815 case UNEQ:
24816 case LTGT:
24817 check_nan = true;
24818 libfunc = optab_libfunc (eq_optab, mode);
24819 code = (code = UNEQ) ? EQ : NE;
24820 break;
24822 default:
24823 gcc_unreachable ();
24826 gcc_assert (libfunc);
24828 if (!check_nan)
24829 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
24830 SImode, op0, mode, op1, mode);
24832 /* The library signals an exception for signalling NaNs, so we need to
24833 handle isgreater, etc. by first checking isordered. */
24834 else
24836 rtx ne_rtx, normal_dest, unord_dest;
24837 rtx unord_func = optab_libfunc (unord_optab, mode);
24838 rtx join_label = gen_label_rtx ();
24839 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
24840 rtx unord_cmp = gen_reg_rtx (comp_mode);
24843 /* Test for either value being a NaN. */
24844 gcc_assert (unord_func);
24845 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
24846 SImode, op0, mode, op1, mode);
24848 /* Set value (0) if either value is a NaN, and jump to the join
24849 label. */
24850 dest = gen_reg_rtx (SImode);
24851 emit_move_insn (dest, const1_rtx);
24852 emit_insn (gen_rtx_SET (unord_cmp,
24853 gen_rtx_COMPARE (comp_mode, unord_dest,
24854 const0_rtx)));
24856 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
24857 emit_jump_insn (gen_rtx_SET (pc_rtx,
24858 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
24859 join_ref,
24860 pc_rtx)));
24862 /* Do the normal comparison, knowing that the values are not
24863 NaNs. */
24864 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
24865 SImode, op0, mode, op1, mode);
24867 emit_insn (gen_cstoresi4 (dest,
24868 gen_rtx_fmt_ee (code, SImode, normal_dest,
24869 const0_rtx),
24870 normal_dest, const0_rtx));
24872 /* Join NaN and non-Nan paths. Compare dest against 0. */
24873 emit_label (join_label);
24874 code = NE;
24877 emit_insn (gen_rtx_SET (compare_result,
24878 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
24881 else
24883 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
24884 CLOBBERs to match cmptf_internal2 pattern. */
24885 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
24886 && FLOAT128_IBM_P (GET_MODE (op0))
24887 && TARGET_HARD_FLOAT && TARGET_FPRS)
24888 emit_insn (gen_rtx_PARALLEL (VOIDmode,
24889 gen_rtvec (10,
24890 gen_rtx_SET (compare_result,
24891 gen_rtx_COMPARE (comp_mode, op0, op1)),
24892 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24893 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24894 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24895 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24896 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24897 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24898 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24899 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24900 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
24901 else if (GET_CODE (op1) == UNSPEC
24902 && XINT (op1, 1) == UNSPEC_SP_TEST)
24904 rtx op1b = XVECEXP (op1, 0, 0);
24905 comp_mode = CCEQmode;
24906 compare_result = gen_reg_rtx (CCEQmode);
24907 if (TARGET_64BIT)
24908 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
24909 else
24910 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
24912 else
24913 emit_insn (gen_rtx_SET (compare_result,
24914 gen_rtx_COMPARE (comp_mode, op0, op1)));
24917 /* Some kinds of FP comparisons need an OR operation;
24918 under flag_finite_math_only we don't bother. */
24919 if (FLOAT_MODE_P (mode)
24920 && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)
24921 && !flag_finite_math_only
24922 && !(TARGET_HARD_FLOAT && !TARGET_FPRS)
24923 && (code == LE || code == GE
24924 || code == UNEQ || code == LTGT
24925 || code == UNGT || code == UNLT))
24927 enum rtx_code or1, or2;
24928 rtx or1_rtx, or2_rtx, compare2_rtx;
24929 rtx or_result = gen_reg_rtx (CCEQmode);
24931 switch (code)
24933 case LE: or1 = LT; or2 = EQ; break;
24934 case GE: or1 = GT; or2 = EQ; break;
24935 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
24936 case LTGT: or1 = LT; or2 = GT; break;
24937 case UNGT: or1 = UNORDERED; or2 = GT; break;
24938 case UNLT: or1 = UNORDERED; or2 = LT; break;
24939 default: gcc_unreachable ();
24941 validate_condition_mode (or1, comp_mode);
24942 validate_condition_mode (or2, comp_mode);
24943 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
24944 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
24945 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
24946 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
24947 const_true_rtx);
24948 emit_insn (gen_rtx_SET (or_result, compare2_rtx));
24950 compare_result = or_result;
24951 code = EQ;
24954 validate_condition_mode (code, GET_MODE (compare_result));
24956 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
24960 /* Return the diagnostic message string if the binary operation OP is
24961 not permitted on TYPE1 and TYPE2, NULL otherwise. */
24963 static const char*
24964 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
24965 const_tree type1,
24966 const_tree type2)
24968 machine_mode mode1 = TYPE_MODE (type1);
24969 machine_mode mode2 = TYPE_MODE (type2);
24971 /* For complex modes, use the inner type. */
24972 if (COMPLEX_MODE_P (mode1))
24973 mode1 = GET_MODE_INNER (mode1);
24975 if (COMPLEX_MODE_P (mode2))
24976 mode2 = GET_MODE_INNER (mode2);
24978 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
24979 double to intermix unless -mfloat128-convert. */
24980 if (mode1 == mode2)
24981 return NULL;
24983 if (!TARGET_FLOAT128_CVT)
24985 if ((mode1 == KFmode && mode2 == IFmode)
24986 || (mode1 == IFmode && mode2 == KFmode))
24987 return N_("__float128 and __ibm128 cannot be used in the same "
24988 "expression");
24990 if (TARGET_IEEEQUAD
24991 && ((mode1 == IFmode && mode2 == TFmode)
24992 || (mode1 == TFmode && mode2 == IFmode)))
24993 return N_("__ibm128 and long double cannot be used in the same "
24994 "expression");
24996 if (!TARGET_IEEEQUAD
24997 && ((mode1 == KFmode && mode2 == TFmode)
24998 || (mode1 == TFmode && mode2 == KFmode)))
24999 return N_("__float128 and long double cannot be used in the same "
25000 "expression");
25003 return NULL;
25007 /* Expand floating point conversion to/from __float128 and __ibm128. */
25009 void
25010 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
25012 machine_mode dest_mode = GET_MODE (dest);
25013 machine_mode src_mode = GET_MODE (src);
25014 convert_optab cvt = unknown_optab;
25015 bool do_move = false;
25016 rtx libfunc = NULL_RTX;
25017 rtx dest2;
25018 typedef rtx (*rtx_2func_t) (rtx, rtx);
25019 rtx_2func_t hw_convert = (rtx_2func_t)0;
25020 size_t kf_or_tf;
25022 struct hw_conv_t {
25023 rtx_2func_t from_df;
25024 rtx_2func_t from_sf;
25025 rtx_2func_t from_si_sign;
25026 rtx_2func_t from_si_uns;
25027 rtx_2func_t from_di_sign;
25028 rtx_2func_t from_di_uns;
25029 rtx_2func_t to_df;
25030 rtx_2func_t to_sf;
25031 rtx_2func_t to_si_sign;
25032 rtx_2func_t to_si_uns;
25033 rtx_2func_t to_di_sign;
25034 rtx_2func_t to_di_uns;
25035 } hw_conversions[2] = {
25036 /* convertions to/from KFmode */
25038 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
25039 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
25040 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
25041 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
25042 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
25043 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
25044 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
25045 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
25046 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
25047 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
25048 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
25049 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
25052 /* convertions to/from TFmode */
25054 gen_extenddftf2_hw, /* TFmode <- DFmode. */
25055 gen_extendsftf2_hw, /* TFmode <- SFmode. */
25056 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
25057 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
25058 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
25059 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
25060 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
25061 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
25062 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
25063 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
25064 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
25065 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
25069 if (dest_mode == src_mode)
25070 gcc_unreachable ();
25072 /* Eliminate memory operations. */
25073 if (MEM_P (src))
25074 src = force_reg (src_mode, src);
25076 if (MEM_P (dest))
25078 rtx tmp = gen_reg_rtx (dest_mode);
25079 rs6000_expand_float128_convert (tmp, src, unsigned_p);
25080 rs6000_emit_move (dest, tmp, dest_mode);
25081 return;
25084 /* Convert to IEEE 128-bit floating point. */
25085 if (FLOAT128_IEEE_P (dest_mode))
25087 if (dest_mode == KFmode)
25088 kf_or_tf = 0;
25089 else if (dest_mode == TFmode)
25090 kf_or_tf = 1;
25091 else
25092 gcc_unreachable ();
25094 switch (src_mode)
25096 case E_DFmode:
25097 cvt = sext_optab;
25098 hw_convert = hw_conversions[kf_or_tf].from_df;
25099 break;
25101 case E_SFmode:
25102 cvt = sext_optab;
25103 hw_convert = hw_conversions[kf_or_tf].from_sf;
25104 break;
25106 case E_KFmode:
25107 case E_IFmode:
25108 case E_TFmode:
25109 if (FLOAT128_IBM_P (src_mode))
25110 cvt = sext_optab;
25111 else
25112 do_move = true;
25113 break;
25115 case E_SImode:
25116 if (unsigned_p)
25118 cvt = ufloat_optab;
25119 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
25121 else
25123 cvt = sfloat_optab;
25124 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
25126 break;
25128 case E_DImode:
25129 if (unsigned_p)
25131 cvt = ufloat_optab;
25132 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
25134 else
25136 cvt = sfloat_optab;
25137 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
25139 break;
25141 default:
25142 gcc_unreachable ();
25146 /* Convert from IEEE 128-bit floating point. */
25147 else if (FLOAT128_IEEE_P (src_mode))
25149 if (src_mode == KFmode)
25150 kf_or_tf = 0;
25151 else if (src_mode == TFmode)
25152 kf_or_tf = 1;
25153 else
25154 gcc_unreachable ();
25156 switch (dest_mode)
25158 case E_DFmode:
25159 cvt = trunc_optab;
25160 hw_convert = hw_conversions[kf_or_tf].to_df;
25161 break;
25163 case E_SFmode:
25164 cvt = trunc_optab;
25165 hw_convert = hw_conversions[kf_or_tf].to_sf;
25166 break;
25168 case E_KFmode:
25169 case E_IFmode:
25170 case E_TFmode:
25171 if (FLOAT128_IBM_P (dest_mode))
25172 cvt = trunc_optab;
25173 else
25174 do_move = true;
25175 break;
25177 case E_SImode:
25178 if (unsigned_p)
25180 cvt = ufix_optab;
25181 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
25183 else
25185 cvt = sfix_optab;
25186 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
25188 break;
25190 case E_DImode:
25191 if (unsigned_p)
25193 cvt = ufix_optab;
25194 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
25196 else
25198 cvt = sfix_optab;
25199 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
25201 break;
25203 default:
25204 gcc_unreachable ();
25208 /* Both IBM format. */
25209 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
25210 do_move = true;
25212 else
25213 gcc_unreachable ();
25215 /* Handle conversion between TFmode/KFmode. */
25216 if (do_move)
25217 emit_move_insn (dest, gen_lowpart (dest_mode, src));
25219 /* Handle conversion if we have hardware support. */
25220 else if (TARGET_FLOAT128_HW && hw_convert)
25221 emit_insn ((hw_convert) (dest, src));
25223 /* Call an external function to do the conversion. */
25224 else if (cvt != unknown_optab)
25226 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
25227 gcc_assert (libfunc != NULL_RTX);
25229 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
25230 src, src_mode);
25232 gcc_assert (dest2 != NULL_RTX);
25233 if (!rtx_equal_p (dest, dest2))
25234 emit_move_insn (dest, dest2);
25237 else
25238 gcc_unreachable ();
25240 return;
25244 /* Emit the RTL for an sISEL pattern. */
25246 void
25247 rs6000_emit_sISEL (machine_mode mode ATTRIBUTE_UNUSED, rtx operands[])
25249 rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx);
25252 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
25253 can be used as that dest register. Return the dest register. */
25256 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
25258 if (op2 == const0_rtx)
25259 return op1;
25261 if (GET_CODE (scratch) == SCRATCH)
25262 scratch = gen_reg_rtx (mode);
25264 if (logical_operand (op2, mode))
25265 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
25266 else
25267 emit_insn (gen_rtx_SET (scratch,
25268 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
25270 return scratch;
25273 void
25274 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
25276 rtx condition_rtx;
25277 machine_mode op_mode;
25278 enum rtx_code cond_code;
25279 rtx result = operands[0];
25281 condition_rtx = rs6000_generate_compare (operands[1], mode);
25282 cond_code = GET_CODE (condition_rtx);
25284 if (FLOAT_MODE_P (mode)
25285 && !TARGET_FPRS && TARGET_HARD_FLOAT)
25287 rtx t;
25289 PUT_MODE (condition_rtx, SImode);
25290 t = XEXP (condition_rtx, 0);
25292 gcc_assert (cond_code == NE || cond_code == EQ);
25294 if (cond_code == NE)
25295 emit_insn (gen_e500_flip_gt_bit (t, t));
25297 emit_insn (gen_move_from_CR_gt_bit (result, t));
25298 return;
25301 if (cond_code == NE
25302 || cond_code == GE || cond_code == LE
25303 || cond_code == GEU || cond_code == LEU
25304 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
25306 rtx not_result = gen_reg_rtx (CCEQmode);
25307 rtx not_op, rev_cond_rtx;
25308 machine_mode cc_mode;
25310 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
25312 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
25313 SImode, XEXP (condition_rtx, 0), const0_rtx);
25314 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
25315 emit_insn (gen_rtx_SET (not_result, not_op));
25316 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
25319 op_mode = GET_MODE (XEXP (operands[1], 0));
25320 if (op_mode == VOIDmode)
25321 op_mode = GET_MODE (XEXP (operands[1], 1));
25323 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
25325 PUT_MODE (condition_rtx, DImode);
25326 convert_move (result, condition_rtx, 0);
25328 else
25330 PUT_MODE (condition_rtx, SImode);
25331 emit_insn (gen_rtx_SET (result, condition_rtx));
25335 /* Emit a branch of kind CODE to location LOC. */
25337 void
25338 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
25340 rtx condition_rtx, loc_ref;
25342 condition_rtx = rs6000_generate_compare (operands[0], mode);
25343 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
25344 emit_jump_insn (gen_rtx_SET (pc_rtx,
25345 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
25346 loc_ref, pc_rtx)));
25349 /* Return the string to output a conditional branch to LABEL, which is
25350 the operand template of the label, or NULL if the branch is really a
25351 conditional return.
25353 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
25354 condition code register and its mode specifies what kind of
25355 comparison we made.
25357 REVERSED is nonzero if we should reverse the sense of the comparison.
25359 INSN is the insn. */
25361 char *
25362 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
25364 static char string[64];
25365 enum rtx_code code = GET_CODE (op);
25366 rtx cc_reg = XEXP (op, 0);
25367 machine_mode mode = GET_MODE (cc_reg);
25368 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
25369 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
25370 int really_reversed = reversed ^ need_longbranch;
25371 char *s = string;
25372 const char *ccode;
25373 const char *pred;
25374 rtx note;
25376 validate_condition_mode (code, mode);
25378 /* Work out which way this really branches. We could use
25379 reverse_condition_maybe_unordered here always but this
25380 makes the resulting assembler clearer. */
25381 if (really_reversed)
25383 /* Reversal of FP compares takes care -- an ordered compare
25384 becomes an unordered compare and vice versa. */
25385 if (mode == CCFPmode)
25386 code = reverse_condition_maybe_unordered (code);
25387 else
25388 code = reverse_condition (code);
25391 if ((!TARGET_FPRS && TARGET_HARD_FLOAT) && mode == CCFPmode)
25393 /* The efscmp/tst* instructions twiddle bit 2, which maps nicely
25394 to the GT bit. */
25395 switch (code)
25397 case EQ:
25398 /* Opposite of GT. */
25399 code = GT;
25400 break;
25402 case NE:
25403 code = UNLE;
25404 break;
25406 default:
25407 gcc_unreachable ();
25411 switch (code)
25413 /* Not all of these are actually distinct opcodes, but
25414 we distinguish them for clarity of the resulting assembler. */
25415 case NE: case LTGT:
25416 ccode = "ne"; break;
25417 case EQ: case UNEQ:
25418 ccode = "eq"; break;
25419 case GE: case GEU:
25420 ccode = "ge"; break;
25421 case GT: case GTU: case UNGT:
25422 ccode = "gt"; break;
25423 case LE: case LEU:
25424 ccode = "le"; break;
25425 case LT: case LTU: case UNLT:
25426 ccode = "lt"; break;
25427 case UNORDERED: ccode = "un"; break;
25428 case ORDERED: ccode = "nu"; break;
25429 case UNGE: ccode = "nl"; break;
25430 case UNLE: ccode = "ng"; break;
25431 default:
25432 gcc_unreachable ();
25435 /* Maybe we have a guess as to how likely the branch is. */
25436 pred = "";
25437 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
25438 if (note != NULL_RTX)
25440 /* PROB is the difference from 50%. */
25441 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
25442 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
25444 /* Only hint for highly probable/improbable branches on newer cpus when
25445 we have real profile data, as static prediction overrides processor
25446 dynamic prediction. For older cpus we may as well always hint, but
25447 assume not taken for branches that are very close to 50% as a
25448 mispredicted taken branch is more expensive than a
25449 mispredicted not-taken branch. */
25450 if (rs6000_always_hint
25451 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
25452 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
25453 && br_prob_note_reliable_p (note)))
25455 if (abs (prob) > REG_BR_PROB_BASE / 20
25456 && ((prob > 0) ^ need_longbranch))
25457 pred = "+";
25458 else
25459 pred = "-";
25463 if (label == NULL)
25464 s += sprintf (s, "b%slr%s ", ccode, pred);
25465 else
25466 s += sprintf (s, "b%s%s ", ccode, pred);
25468 /* We need to escape any '%' characters in the reg_names string.
25469 Assume they'd only be the first character.... */
25470 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
25471 *s++ = '%';
25472 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
25474 if (label != NULL)
25476 /* If the branch distance was too far, we may have to use an
25477 unconditional branch to go the distance. */
25478 if (need_longbranch)
25479 s += sprintf (s, ",$+8\n\tb %s", label);
25480 else
25481 s += sprintf (s, ",%s", label);
25484 return string;
25487 /* Return the string to flip the GT bit on a CR. */
25488 char *
25489 output_e500_flip_gt_bit (rtx dst, rtx src)
25491 static char string[64];
25492 int a, b;
25494 gcc_assert (GET_CODE (dst) == REG && CR_REGNO_P (REGNO (dst))
25495 && GET_CODE (src) == REG && CR_REGNO_P (REGNO (src)));
25497 /* GT bit. */
25498 a = 4 * (REGNO (dst) - CR0_REGNO) + 1;
25499 b = 4 * (REGNO (src) - CR0_REGNO) + 1;
25501 sprintf (string, "crnot %d,%d", a, b);
25502 return string;
25505 /* Return insn for VSX or Altivec comparisons. */
25507 static rtx
25508 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
25510 rtx mask;
25511 machine_mode mode = GET_MODE (op0);
25513 switch (code)
25515 default:
25516 break;
25518 case GE:
25519 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
25520 return NULL_RTX;
25521 /* FALLTHRU */
25523 case EQ:
25524 case GT:
25525 case GTU:
25526 case ORDERED:
25527 case UNORDERED:
25528 case UNEQ:
25529 case LTGT:
25530 mask = gen_reg_rtx (mode);
25531 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
25532 return mask;
25535 return NULL_RTX;
25538 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
25539 DMODE is expected destination mode. This is a recursive function. */
25541 static rtx
25542 rs6000_emit_vector_compare (enum rtx_code rcode,
25543 rtx op0, rtx op1,
25544 machine_mode dmode)
25546 rtx mask;
25547 bool swap_operands = false;
25548 bool try_again = false;
25550 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
25551 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
25553 /* See if the comparison works as is. */
25554 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
25555 if (mask)
25556 return mask;
25558 switch (rcode)
25560 case LT:
25561 rcode = GT;
25562 swap_operands = true;
25563 try_again = true;
25564 break;
25565 case LTU:
25566 rcode = GTU;
25567 swap_operands = true;
25568 try_again = true;
25569 break;
25570 case NE:
25571 case UNLE:
25572 case UNLT:
25573 case UNGE:
25574 case UNGT:
25575 /* Invert condition and try again.
25576 e.g., A != B becomes ~(A==B). */
25578 enum rtx_code rev_code;
25579 enum insn_code nor_code;
25580 rtx mask2;
25582 rev_code = reverse_condition_maybe_unordered (rcode);
25583 if (rev_code == UNKNOWN)
25584 return NULL_RTX;
25586 nor_code = optab_handler (one_cmpl_optab, dmode);
25587 if (nor_code == CODE_FOR_nothing)
25588 return NULL_RTX;
25590 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
25591 if (!mask2)
25592 return NULL_RTX;
25594 mask = gen_reg_rtx (dmode);
25595 emit_insn (GEN_FCN (nor_code) (mask, mask2));
25596 return mask;
25598 break;
25599 case GE:
25600 case GEU:
25601 case LE:
25602 case LEU:
25603 /* Try GT/GTU/LT/LTU OR EQ */
25605 rtx c_rtx, eq_rtx;
25606 enum insn_code ior_code;
25607 enum rtx_code new_code;
25609 switch (rcode)
25611 case GE:
25612 new_code = GT;
25613 break;
25615 case GEU:
25616 new_code = GTU;
25617 break;
25619 case LE:
25620 new_code = LT;
25621 break;
25623 case LEU:
25624 new_code = LTU;
25625 break;
25627 default:
25628 gcc_unreachable ();
25631 ior_code = optab_handler (ior_optab, dmode);
25632 if (ior_code == CODE_FOR_nothing)
25633 return NULL_RTX;
25635 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
25636 if (!c_rtx)
25637 return NULL_RTX;
25639 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
25640 if (!eq_rtx)
25641 return NULL_RTX;
25643 mask = gen_reg_rtx (dmode);
25644 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
25645 return mask;
25647 break;
25648 default:
25649 return NULL_RTX;
25652 if (try_again)
25654 if (swap_operands)
25655 std::swap (op0, op1);
25657 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
25658 if (mask)
25659 return mask;
25662 /* You only get two chances. */
25663 return NULL_RTX;
25666 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
25667 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
25668 operands for the relation operation COND. */
25671 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
25672 rtx cond, rtx cc_op0, rtx cc_op1)
25674 machine_mode dest_mode = GET_MODE (dest);
25675 machine_mode mask_mode = GET_MODE (cc_op0);
25676 enum rtx_code rcode = GET_CODE (cond);
25677 machine_mode cc_mode = CCmode;
25678 rtx mask;
25679 rtx cond2;
25680 bool invert_move = false;
25682 if (VECTOR_UNIT_NONE_P (dest_mode))
25683 return 0;
25685 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
25686 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
25688 switch (rcode)
25690 /* Swap operands if we can, and fall back to doing the operation as
25691 specified, and doing a NOR to invert the test. */
25692 case NE:
25693 case UNLE:
25694 case UNLT:
25695 case UNGE:
25696 case UNGT:
25697 /* Invert condition and try again.
25698 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
25699 invert_move = true;
25700 rcode = reverse_condition_maybe_unordered (rcode);
25701 if (rcode == UNKNOWN)
25702 return 0;
25703 break;
25705 case GE:
25706 case LE:
25707 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
25709 /* Invert condition to avoid compound test. */
25710 invert_move = true;
25711 rcode = reverse_condition (rcode);
25713 break;
25715 case GTU:
25716 case GEU:
25717 case LTU:
25718 case LEU:
25719 /* Mark unsigned tests with CCUNSmode. */
25720 cc_mode = CCUNSmode;
25722 /* Invert condition to avoid compound test if necessary. */
25723 if (rcode == GEU || rcode == LEU)
25725 invert_move = true;
25726 rcode = reverse_condition (rcode);
25728 break;
25730 default:
25731 break;
25734 /* Get the vector mask for the given relational operations. */
25735 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
25737 if (!mask)
25738 return 0;
25740 if (invert_move)
25741 std::swap (op_true, op_false);
25743 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
25744 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
25745 && (GET_CODE (op_true) == CONST_VECTOR
25746 || GET_CODE (op_false) == CONST_VECTOR))
25748 rtx constant_0 = CONST0_RTX (dest_mode);
25749 rtx constant_m1 = CONSTM1_RTX (dest_mode);
25751 if (op_true == constant_m1 && op_false == constant_0)
25753 emit_move_insn (dest, mask);
25754 return 1;
25757 else if (op_true == constant_0 && op_false == constant_m1)
25759 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
25760 return 1;
25763 /* If we can't use the vector comparison directly, perhaps we can use
25764 the mask for the true or false fields, instead of loading up a
25765 constant. */
25766 if (op_true == constant_m1)
25767 op_true = mask;
25769 if (op_false == constant_0)
25770 op_false = mask;
25773 if (!REG_P (op_true) && !SUBREG_P (op_true))
25774 op_true = force_reg (dest_mode, op_true);
25776 if (!REG_P (op_false) && !SUBREG_P (op_false))
25777 op_false = force_reg (dest_mode, op_false);
25779 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
25780 CONST0_RTX (dest_mode));
25781 emit_insn (gen_rtx_SET (dest,
25782 gen_rtx_IF_THEN_ELSE (dest_mode,
25783 cond2,
25784 op_true,
25785 op_false)));
25786 return 1;
25789 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
25790 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
25791 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
25792 hardware has no such operation. */
25794 static int
25795 rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond)
25797 enum rtx_code code = GET_CODE (op);
25798 rtx op0 = XEXP (op, 0);
25799 rtx op1 = XEXP (op, 1);
25800 machine_mode compare_mode = GET_MODE (op0);
25801 machine_mode result_mode = GET_MODE (dest);
25802 bool max_p = false;
25804 if (result_mode != compare_mode)
25805 return 0;
25807 if (code == GE || code == GT)
25808 max_p = true;
25809 else if (code == LE || code == LT)
25810 max_p = false;
25811 else
25812 return 0;
25814 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
25817 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond))
25818 max_p = !max_p;
25820 else
25821 return 0;
25823 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
25824 return 1;
25827 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
25828 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
25829 operands of the last comparison is nonzero/true, FALSE_COND if it is
25830 zero/false. Return 0 if the hardware has no such operation. */
25832 static int
25833 rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
25835 enum rtx_code code = GET_CODE (op);
25836 rtx op0 = XEXP (op, 0);
25837 rtx op1 = XEXP (op, 1);
25838 machine_mode result_mode = GET_MODE (dest);
25839 rtx compare_rtx;
25840 rtx cmove_rtx;
25841 rtx clobber_rtx;
25843 if (!can_create_pseudo_p ())
25844 return 0;
25846 switch (code)
25848 case EQ:
25849 case GE:
25850 case GT:
25851 break;
25853 case NE:
25854 case LT:
25855 case LE:
25856 code = swap_condition (code);
25857 std::swap (op0, op1);
25858 break;
25860 default:
25861 return 0;
25864 /* Generate: [(parallel [(set (dest)
25865 (if_then_else (op (cmp1) (cmp2))
25866 (true)
25867 (false)))
25868 (clobber (scratch))])]. */
25870 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
25871 cmove_rtx = gen_rtx_SET (dest,
25872 gen_rtx_IF_THEN_ELSE (result_mode,
25873 compare_rtx,
25874 true_cond,
25875 false_cond));
25877 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
25878 emit_insn (gen_rtx_PARALLEL (VOIDmode,
25879 gen_rtvec (2, cmove_rtx, clobber_rtx)));
25881 return 1;
25884 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
25885 operands of the last comparison is nonzero/true, FALSE_COND if it
25886 is zero/false. Return 0 if the hardware has no such operation. */
25889 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
25891 enum rtx_code code = GET_CODE (op);
25892 rtx op0 = XEXP (op, 0);
25893 rtx op1 = XEXP (op, 1);
25894 machine_mode compare_mode = GET_MODE (op0);
25895 machine_mode result_mode = GET_MODE (dest);
25896 rtx temp;
25897 bool is_against_zero;
25899 /* These modes should always match. */
25900 if (GET_MODE (op1) != compare_mode
25901 /* In the isel case however, we can use a compare immediate, so
25902 op1 may be a small constant. */
25903 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
25904 return 0;
25905 if (GET_MODE (true_cond) != result_mode)
25906 return 0;
25907 if (GET_MODE (false_cond) != result_mode)
25908 return 0;
25910 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
25911 if (TARGET_P9_MINMAX
25912 && (compare_mode == SFmode || compare_mode == DFmode)
25913 && (result_mode == SFmode || result_mode == DFmode))
25915 if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond))
25916 return 1;
25918 if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond))
25919 return 1;
25922 /* Don't allow using floating point comparisons for integer results for
25923 now. */
25924 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
25925 return 0;
25927 /* First, work out if the hardware can do this at all, or
25928 if it's too slow.... */
25929 if (!FLOAT_MODE_P (compare_mode))
25931 if (TARGET_ISEL)
25932 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
25933 return 0;
25935 else if (TARGET_HARD_FLOAT && !TARGET_FPRS
25936 && SCALAR_FLOAT_MODE_P (compare_mode))
25937 return 0;
25939 is_against_zero = op1 == CONST0_RTX (compare_mode);
25941 /* A floating-point subtract might overflow, underflow, or produce
25942 an inexact result, thus changing the floating-point flags, so it
25943 can't be generated if we care about that. It's safe if one side
25944 of the construct is zero, since then no subtract will be
25945 generated. */
25946 if (SCALAR_FLOAT_MODE_P (compare_mode)
25947 && flag_trapping_math && ! is_against_zero)
25948 return 0;
25950 /* Eliminate half of the comparisons by switching operands, this
25951 makes the remaining code simpler. */
25952 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
25953 || code == LTGT || code == LT || code == UNLE)
25955 code = reverse_condition_maybe_unordered (code);
25956 temp = true_cond;
25957 true_cond = false_cond;
25958 false_cond = temp;
25961 /* UNEQ and LTGT take four instructions for a comparison with zero,
25962 it'll probably be faster to use a branch here too. */
25963 if (code == UNEQ && HONOR_NANS (compare_mode))
25964 return 0;
25966 /* We're going to try to implement comparisons by performing
25967 a subtract, then comparing against zero. Unfortunately,
25968 Inf - Inf is NaN which is not zero, and so if we don't
25969 know that the operand is finite and the comparison
25970 would treat EQ different to UNORDERED, we can't do it. */
25971 if (HONOR_INFINITIES (compare_mode)
25972 && code != GT && code != UNGE
25973 && (GET_CODE (op1) != CONST_DOUBLE
25974 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
25975 /* Constructs of the form (a OP b ? a : b) are safe. */
25976 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
25977 || (! rtx_equal_p (op0, true_cond)
25978 && ! rtx_equal_p (op1, true_cond))))
25979 return 0;
25981 /* At this point we know we can use fsel. */
25983 /* Reduce the comparison to a comparison against zero. */
25984 if (! is_against_zero)
25986 temp = gen_reg_rtx (compare_mode);
25987 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
25988 op0 = temp;
25989 op1 = CONST0_RTX (compare_mode);
25992 /* If we don't care about NaNs we can reduce some of the comparisons
25993 down to faster ones. */
25994 if (! HONOR_NANS (compare_mode))
25995 switch (code)
25997 case GT:
25998 code = LE;
25999 temp = true_cond;
26000 true_cond = false_cond;
26001 false_cond = temp;
26002 break;
26003 case UNGE:
26004 code = GE;
26005 break;
26006 case UNEQ:
26007 code = EQ;
26008 break;
26009 default:
26010 break;
26013 /* Now, reduce everything down to a GE. */
26014 switch (code)
26016 case GE:
26017 break;
26019 case LE:
26020 temp = gen_reg_rtx (compare_mode);
26021 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
26022 op0 = temp;
26023 break;
26025 case ORDERED:
26026 temp = gen_reg_rtx (compare_mode);
26027 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
26028 op0 = temp;
26029 break;
26031 case EQ:
26032 temp = gen_reg_rtx (compare_mode);
26033 emit_insn (gen_rtx_SET (temp,
26034 gen_rtx_NEG (compare_mode,
26035 gen_rtx_ABS (compare_mode, op0))));
26036 op0 = temp;
26037 break;
26039 case UNGE:
26040 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
26041 temp = gen_reg_rtx (result_mode);
26042 emit_insn (gen_rtx_SET (temp,
26043 gen_rtx_IF_THEN_ELSE (result_mode,
26044 gen_rtx_GE (VOIDmode,
26045 op0, op1),
26046 true_cond, false_cond)));
26047 false_cond = true_cond;
26048 true_cond = temp;
26050 temp = gen_reg_rtx (compare_mode);
26051 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
26052 op0 = temp;
26053 break;
26055 case GT:
26056 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
26057 temp = gen_reg_rtx (result_mode);
26058 emit_insn (gen_rtx_SET (temp,
26059 gen_rtx_IF_THEN_ELSE (result_mode,
26060 gen_rtx_GE (VOIDmode,
26061 op0, op1),
26062 true_cond, false_cond)));
26063 true_cond = false_cond;
26064 false_cond = temp;
26066 temp = gen_reg_rtx (compare_mode);
26067 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
26068 op0 = temp;
26069 break;
26071 default:
26072 gcc_unreachable ();
26075 emit_insn (gen_rtx_SET (dest,
26076 gen_rtx_IF_THEN_ELSE (result_mode,
26077 gen_rtx_GE (VOIDmode,
26078 op0, op1),
26079 true_cond, false_cond)));
26080 return 1;
26083 /* Same as above, but for ints (isel). */
26085 static int
26086 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
26088 rtx condition_rtx, cr;
26089 machine_mode mode = GET_MODE (dest);
26090 enum rtx_code cond_code;
26091 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
26092 bool signedp;
26094 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
26095 return 0;
26097 /* We still have to do the compare, because isel doesn't do a
26098 compare, it just looks at the CRx bits set by a previous compare
26099 instruction. */
26100 condition_rtx = rs6000_generate_compare (op, mode);
26101 cond_code = GET_CODE (condition_rtx);
26102 cr = XEXP (condition_rtx, 0);
26103 signedp = GET_MODE (cr) == CCmode;
26105 isel_func = (mode == SImode
26106 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
26107 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
26109 switch (cond_code)
26111 case LT: case GT: case LTU: case GTU: case EQ:
26112 /* isel handles these directly. */
26113 break;
26115 default:
26116 /* We need to swap the sense of the comparison. */
26118 std::swap (false_cond, true_cond);
26119 PUT_CODE (condition_rtx, reverse_condition (cond_code));
26121 break;
26124 false_cond = force_reg (mode, false_cond);
26125 if (true_cond != const0_rtx)
26126 true_cond = force_reg (mode, true_cond);
26128 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
26130 return 1;
26133 const char *
26134 output_isel (rtx *operands)
26136 enum rtx_code code;
26138 code = GET_CODE (operands[1]);
26140 if (code == GE || code == GEU || code == LE || code == LEU || code == NE)
26142 gcc_assert (GET_CODE (operands[2]) == REG
26143 && GET_CODE (operands[3]) == REG);
26144 PUT_CODE (operands[1], reverse_condition (code));
26145 return "isel %0,%3,%2,%j1";
26148 return "isel %0,%2,%3,%j1";
26151 void
26152 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
26154 machine_mode mode = GET_MODE (op0);
26155 enum rtx_code c;
26156 rtx target;
26158 /* VSX/altivec have direct min/max insns. */
26159 if ((code == SMAX || code == SMIN)
26160 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
26161 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
26163 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
26164 return;
26167 if (code == SMAX || code == SMIN)
26168 c = GE;
26169 else
26170 c = GEU;
26172 if (code == SMAX || code == UMAX)
26173 target = emit_conditional_move (dest, c, op0, op1, mode,
26174 op0, op1, mode, 0);
26175 else
26176 target = emit_conditional_move (dest, c, op0, op1, mode,
26177 op1, op0, mode, 0);
26178 gcc_assert (target);
26179 if (target != dest)
26180 emit_move_insn (dest, target);
26183 /* Split a signbit operation on 64-bit machines with direct move. Also allow
26184 for the value to come from memory or if it is already loaded into a GPR. */
26186 void
26187 rs6000_split_signbit (rtx dest, rtx src)
26189 machine_mode d_mode = GET_MODE (dest);
26190 machine_mode s_mode = GET_MODE (src);
26191 rtx dest_di = (d_mode == DImode) ? dest : gen_lowpart (DImode, dest);
26192 rtx shift_reg = dest_di;
26194 gcc_assert (FLOAT128_IEEE_P (s_mode) && TARGET_POWERPC64);
26196 if (MEM_P (src))
26198 rtx mem = (WORDS_BIG_ENDIAN
26199 ? adjust_address (src, DImode, 0)
26200 : adjust_address (src, DImode, 8));
26201 emit_insn (gen_rtx_SET (dest_di, mem));
26204 else
26206 unsigned int r = reg_or_subregno (src);
26208 if (INT_REGNO_P (r))
26209 shift_reg = gen_rtx_REG (DImode, r + (BYTES_BIG_ENDIAN == 0));
26211 else
26213 /* Generate the special mfvsrd instruction to get it in a GPR. */
26214 gcc_assert (VSX_REGNO_P (r));
26215 if (s_mode == KFmode)
26216 emit_insn (gen_signbitkf2_dm2 (dest_di, src));
26217 else
26218 emit_insn (gen_signbittf2_dm2 (dest_di, src));
26222 emit_insn (gen_lshrdi3 (dest_di, shift_reg, GEN_INT (63)));
26223 return;
26226 /* A subroutine of the atomic operation splitters. Jump to LABEL if
26227 COND is true. Mark the jump as unlikely to be taken. */
26229 static void
26230 emit_unlikely_jump (rtx cond, rtx label)
26232 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
26233 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
26234 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
26237 /* A subroutine of the atomic operation splitters. Emit a load-locked
26238 instruction in MODE. For QI/HImode, possibly use a pattern than includes
26239 the zero_extend operation. */
26241 static void
26242 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
26244 rtx (*fn) (rtx, rtx) = NULL;
26246 switch (mode)
26248 case E_QImode:
26249 fn = gen_load_lockedqi;
26250 break;
26251 case E_HImode:
26252 fn = gen_load_lockedhi;
26253 break;
26254 case E_SImode:
26255 if (GET_MODE (mem) == QImode)
26256 fn = gen_load_lockedqi_si;
26257 else if (GET_MODE (mem) == HImode)
26258 fn = gen_load_lockedhi_si;
26259 else
26260 fn = gen_load_lockedsi;
26261 break;
26262 case E_DImode:
26263 fn = gen_load_lockeddi;
26264 break;
26265 case E_TImode:
26266 fn = gen_load_lockedti;
26267 break;
26268 default:
26269 gcc_unreachable ();
26271 emit_insn (fn (reg, mem));
26274 /* A subroutine of the atomic operation splitters. Emit a store-conditional
26275 instruction in MODE. */
26277 static void
26278 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
26280 rtx (*fn) (rtx, rtx, rtx) = NULL;
26282 switch (mode)
26284 case E_QImode:
26285 fn = gen_store_conditionalqi;
26286 break;
26287 case E_HImode:
26288 fn = gen_store_conditionalhi;
26289 break;
26290 case E_SImode:
26291 fn = gen_store_conditionalsi;
26292 break;
26293 case E_DImode:
26294 fn = gen_store_conditionaldi;
26295 break;
26296 case E_TImode:
26297 fn = gen_store_conditionalti;
26298 break;
26299 default:
26300 gcc_unreachable ();
26303 /* Emit sync before stwcx. to address PPC405 Erratum. */
26304 if (PPC405_ERRATUM77)
26305 emit_insn (gen_hwsync ());
26307 emit_insn (fn (res, mem, val));
26310 /* Expand barriers before and after a load_locked/store_cond sequence. */
26312 static rtx
26313 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
26315 rtx addr = XEXP (mem, 0);
26316 int strict_p = (reload_in_progress || reload_completed);
26318 if (!legitimate_indirect_address_p (addr, strict_p)
26319 && !legitimate_indexed_address_p (addr, strict_p))
26321 addr = force_reg (Pmode, addr);
26322 mem = replace_equiv_address_nv (mem, addr);
26325 switch (model)
26327 case MEMMODEL_RELAXED:
26328 case MEMMODEL_CONSUME:
26329 case MEMMODEL_ACQUIRE:
26330 break;
26331 case MEMMODEL_RELEASE:
26332 case MEMMODEL_ACQ_REL:
26333 emit_insn (gen_lwsync ());
26334 break;
26335 case MEMMODEL_SEQ_CST:
26336 emit_insn (gen_hwsync ());
26337 break;
26338 default:
26339 gcc_unreachable ();
26341 return mem;
26344 static void
26345 rs6000_post_atomic_barrier (enum memmodel model)
26347 switch (model)
26349 case MEMMODEL_RELAXED:
26350 case MEMMODEL_CONSUME:
26351 case MEMMODEL_RELEASE:
26352 break;
26353 case MEMMODEL_ACQUIRE:
26354 case MEMMODEL_ACQ_REL:
26355 case MEMMODEL_SEQ_CST:
26356 emit_insn (gen_isync ());
26357 break;
26358 default:
26359 gcc_unreachable ();
26363 /* A subroutine of the various atomic expanders. For sub-word operations,
26364 we must adjust things to operate on SImode. Given the original MEM,
26365 return a new aligned memory. Also build and return the quantities by
26366 which to shift and mask. */
26368 static rtx
26369 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
26371 rtx addr, align, shift, mask, mem;
26372 HOST_WIDE_INT shift_mask;
26373 machine_mode mode = GET_MODE (orig_mem);
26375 /* For smaller modes, we have to implement this via SImode. */
26376 shift_mask = (mode == QImode ? 0x18 : 0x10);
26378 addr = XEXP (orig_mem, 0);
26379 addr = force_reg (GET_MODE (addr), addr);
26381 /* Aligned memory containing subword. Generate a new memory. We
26382 do not want any of the existing MEM_ATTR data, as we're now
26383 accessing memory outside the original object. */
26384 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
26385 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26386 mem = gen_rtx_MEM (SImode, align);
26387 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
26388 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
26389 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
26391 /* Shift amount for subword relative to aligned word. */
26392 shift = gen_reg_rtx (SImode);
26393 addr = gen_lowpart (SImode, addr);
26394 rtx tmp = gen_reg_rtx (SImode);
26395 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
26396 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
26397 if (BYTES_BIG_ENDIAN)
26398 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
26399 shift, 1, OPTAB_LIB_WIDEN);
26400 *pshift = shift;
26402 /* Mask for insertion. */
26403 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
26404 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
26405 *pmask = mask;
26407 return mem;
26410 /* A subroutine of the various atomic expanders. For sub-word operands,
26411 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
26413 static rtx
26414 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
26416 rtx x;
26418 x = gen_reg_rtx (SImode);
26419 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
26420 gen_rtx_NOT (SImode, mask),
26421 oldval)));
26423 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
26425 return x;
26428 /* A subroutine of the various atomic expanders. For sub-word operands,
26429 extract WIDE to NARROW via SHIFT. */
26431 static void
26432 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
26434 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
26435 wide, 1, OPTAB_LIB_WIDEN);
26436 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
26439 /* Expand an atomic compare and swap operation. */
26441 void
26442 rs6000_expand_atomic_compare_and_swap (rtx operands[])
26444 rtx boolval, retval, mem, oldval, newval, cond;
26445 rtx label1, label2, x, mask, shift;
26446 machine_mode mode, orig_mode;
26447 enum memmodel mod_s, mod_f;
26448 bool is_weak;
26450 boolval = operands[0];
26451 retval = operands[1];
26452 mem = operands[2];
26453 oldval = operands[3];
26454 newval = operands[4];
26455 is_weak = (INTVAL (operands[5]) != 0);
26456 mod_s = memmodel_base (INTVAL (operands[6]));
26457 mod_f = memmodel_base (INTVAL (operands[7]));
26458 orig_mode = mode = GET_MODE (mem);
26460 mask = shift = NULL_RTX;
26461 if (mode == QImode || mode == HImode)
26463 /* Before power8, we didn't have access to lbarx/lharx, so generate a
26464 lwarx and shift/mask operations. With power8, we need to do the
26465 comparison in SImode, but the store is still done in QI/HImode. */
26466 oldval = convert_modes (SImode, mode, oldval, 1);
26468 if (!TARGET_SYNC_HI_QI)
26470 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
26472 /* Shift and mask OLDVAL into position with the word. */
26473 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
26474 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26476 /* Shift and mask NEWVAL into position within the word. */
26477 newval = convert_modes (SImode, mode, newval, 1);
26478 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
26479 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26482 /* Prepare to adjust the return value. */
26483 retval = gen_reg_rtx (SImode);
26484 mode = SImode;
26486 else if (reg_overlap_mentioned_p (retval, oldval))
26487 oldval = copy_to_reg (oldval);
26489 if (mode != TImode && !reg_or_short_operand (oldval, mode))
26490 oldval = copy_to_mode_reg (mode, oldval);
26492 if (reg_overlap_mentioned_p (retval, newval))
26493 newval = copy_to_reg (newval);
26495 mem = rs6000_pre_atomic_barrier (mem, mod_s);
26497 label1 = NULL_RTX;
26498 if (!is_weak)
26500 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
26501 emit_label (XEXP (label1, 0));
26503 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
26505 emit_load_locked (mode, retval, mem);
26507 x = retval;
26508 if (mask)
26509 x = expand_simple_binop (SImode, AND, retval, mask,
26510 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26512 cond = gen_reg_rtx (CCmode);
26513 /* If we have TImode, synthesize a comparison. */
26514 if (mode != TImode)
26515 x = gen_rtx_COMPARE (CCmode, x, oldval);
26516 else
26518 rtx xor1_result = gen_reg_rtx (DImode);
26519 rtx xor2_result = gen_reg_rtx (DImode);
26520 rtx or_result = gen_reg_rtx (DImode);
26521 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
26522 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
26523 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
26524 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
26526 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
26527 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
26528 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
26529 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
26532 emit_insn (gen_rtx_SET (cond, x));
26534 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
26535 emit_unlikely_jump (x, label2);
26537 x = newval;
26538 if (mask)
26539 x = rs6000_mask_atomic_subword (retval, newval, mask);
26541 emit_store_conditional (orig_mode, cond, mem, x);
26543 if (!is_weak)
26545 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
26546 emit_unlikely_jump (x, label1);
26549 if (!is_mm_relaxed (mod_f))
26550 emit_label (XEXP (label2, 0));
26552 rs6000_post_atomic_barrier (mod_s);
26554 if (is_mm_relaxed (mod_f))
26555 emit_label (XEXP (label2, 0));
26557 if (shift)
26558 rs6000_finish_atomic_subword (operands[1], retval, shift);
26559 else if (mode != GET_MODE (operands[1]))
26560 convert_move (operands[1], retval, 1);
26562 /* In all cases, CR0 contains EQ on success, and NE on failure. */
26563 x = gen_rtx_EQ (SImode, cond, const0_rtx);
26564 emit_insn (gen_rtx_SET (boolval, x));
26567 /* Expand an atomic exchange operation. */
26569 void
26570 rs6000_expand_atomic_exchange (rtx operands[])
26572 rtx retval, mem, val, cond;
26573 machine_mode mode;
26574 enum memmodel model;
26575 rtx label, x, mask, shift;
26577 retval = operands[0];
26578 mem = operands[1];
26579 val = operands[2];
26580 model = memmodel_base (INTVAL (operands[3]));
26581 mode = GET_MODE (mem);
26583 mask = shift = NULL_RTX;
26584 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
26586 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
26588 /* Shift and mask VAL into position with the word. */
26589 val = convert_modes (SImode, mode, val, 1);
26590 val = expand_simple_binop (SImode, ASHIFT, val, shift,
26591 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26593 /* Prepare to adjust the return value. */
26594 retval = gen_reg_rtx (SImode);
26595 mode = SImode;
26598 mem = rs6000_pre_atomic_barrier (mem, model);
26600 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
26601 emit_label (XEXP (label, 0));
26603 emit_load_locked (mode, retval, mem);
26605 x = val;
26606 if (mask)
26607 x = rs6000_mask_atomic_subword (retval, val, mask);
26609 cond = gen_reg_rtx (CCmode);
26610 emit_store_conditional (mode, cond, mem, x);
26612 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
26613 emit_unlikely_jump (x, label);
26615 rs6000_post_atomic_barrier (model);
26617 if (shift)
26618 rs6000_finish_atomic_subword (operands[0], retval, shift);
26621 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
26622 to perform. MEM is the memory on which to operate. VAL is the second
26623 operand of the binary operator. BEFORE and AFTER are optional locations to
26624 return the value of MEM either before of after the operation. MODEL_RTX
26625 is a CONST_INT containing the memory model to use. */
26627 void
26628 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
26629 rtx orig_before, rtx orig_after, rtx model_rtx)
26631 enum memmodel model = memmodel_base (INTVAL (model_rtx));
26632 machine_mode mode = GET_MODE (mem);
26633 machine_mode store_mode = mode;
26634 rtx label, x, cond, mask, shift;
26635 rtx before = orig_before, after = orig_after;
26637 mask = shift = NULL_RTX;
26638 /* On power8, we want to use SImode for the operation. On previous systems,
26639 use the operation in a subword and shift/mask to get the proper byte or
26640 halfword. */
26641 if (mode == QImode || mode == HImode)
26643 if (TARGET_SYNC_HI_QI)
26645 val = convert_modes (SImode, mode, val, 1);
26647 /* Prepare to adjust the return value. */
26648 before = gen_reg_rtx (SImode);
26649 if (after)
26650 after = gen_reg_rtx (SImode);
26651 mode = SImode;
26653 else
26655 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
26657 /* Shift and mask VAL into position with the word. */
26658 val = convert_modes (SImode, mode, val, 1);
26659 val = expand_simple_binop (SImode, ASHIFT, val, shift,
26660 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26662 switch (code)
26664 case IOR:
26665 case XOR:
26666 /* We've already zero-extended VAL. That is sufficient to
26667 make certain that it does not affect other bits. */
26668 mask = NULL;
26669 break;
26671 case AND:
26672 /* If we make certain that all of the other bits in VAL are
26673 set, that will be sufficient to not affect other bits. */
26674 x = gen_rtx_NOT (SImode, mask);
26675 x = gen_rtx_IOR (SImode, x, val);
26676 emit_insn (gen_rtx_SET (val, x));
26677 mask = NULL;
26678 break;
26680 case NOT:
26681 case PLUS:
26682 case MINUS:
26683 /* These will all affect bits outside the field and need
26684 adjustment via MASK within the loop. */
26685 break;
26687 default:
26688 gcc_unreachable ();
26691 /* Prepare to adjust the return value. */
26692 before = gen_reg_rtx (SImode);
26693 if (after)
26694 after = gen_reg_rtx (SImode);
26695 store_mode = mode = SImode;
26699 mem = rs6000_pre_atomic_barrier (mem, model);
26701 label = gen_label_rtx ();
26702 emit_label (label);
26703 label = gen_rtx_LABEL_REF (VOIDmode, label);
26705 if (before == NULL_RTX)
26706 before = gen_reg_rtx (mode);
26708 emit_load_locked (mode, before, mem);
26710 if (code == NOT)
26712 x = expand_simple_binop (mode, AND, before, val,
26713 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26714 after = expand_simple_unop (mode, NOT, x, after, 1);
26716 else
26718 after = expand_simple_binop (mode, code, before, val,
26719 after, 1, OPTAB_LIB_WIDEN);
26722 x = after;
26723 if (mask)
26725 x = expand_simple_binop (SImode, AND, after, mask,
26726 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26727 x = rs6000_mask_atomic_subword (before, x, mask);
26729 else if (store_mode != mode)
26730 x = convert_modes (store_mode, mode, x, 1);
26732 cond = gen_reg_rtx (CCmode);
26733 emit_store_conditional (store_mode, cond, mem, x);
26735 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
26736 emit_unlikely_jump (x, label);
26738 rs6000_post_atomic_barrier (model);
26740 if (shift)
26742 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
26743 then do the calcuations in a SImode register. */
26744 if (orig_before)
26745 rs6000_finish_atomic_subword (orig_before, before, shift);
26746 if (orig_after)
26747 rs6000_finish_atomic_subword (orig_after, after, shift);
26749 else if (store_mode != mode)
26751 /* QImode/HImode on machines with lbarx/lharx where we do the native
26752 operation and then do the calcuations in a SImode register. */
26753 if (orig_before)
26754 convert_move (orig_before, before, 1);
26755 if (orig_after)
26756 convert_move (orig_after, after, 1);
26758 else if (orig_after && after != orig_after)
26759 emit_move_insn (orig_after, after);
26762 /* Emit instructions to move SRC to DST. Called by splitters for
26763 multi-register moves. It will emit at most one instruction for
26764 each register that is accessed; that is, it won't emit li/lis pairs
26765 (or equivalent for 64-bit code). One of SRC or DST must be a hard
26766 register. */
26768 void
26769 rs6000_split_multireg_move (rtx dst, rtx src)
26771 /* The register number of the first register being moved. */
26772 int reg;
26773 /* The mode that is to be moved. */
26774 machine_mode mode;
26775 /* The mode that the move is being done in, and its size. */
26776 machine_mode reg_mode;
26777 int reg_mode_size;
26778 /* The number of registers that will be moved. */
26779 int nregs;
26781 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
26782 mode = GET_MODE (dst);
26783 nregs = hard_regno_nregs (reg, mode);
26784 if (FP_REGNO_P (reg))
26785 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
26786 ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? DFmode : SFmode);
26787 else if (ALTIVEC_REGNO_P (reg))
26788 reg_mode = V16QImode;
26789 else if (TARGET_E500_DOUBLE && FLOAT128_2REG_P (mode))
26790 reg_mode = DFmode;
26791 else
26792 reg_mode = word_mode;
26793 reg_mode_size = GET_MODE_SIZE (reg_mode);
26795 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
26797 /* TDmode residing in FP registers is special, since the ISA requires that
26798 the lower-numbered word of a register pair is always the most significant
26799 word, even in little-endian mode. This does not match the usual subreg
26800 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
26801 the appropriate constituent registers "by hand" in little-endian mode.
26803 Note we do not need to check for destructive overlap here since TDmode
26804 can only reside in even/odd register pairs. */
26805 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
26807 rtx p_src, p_dst;
26808 int i;
26810 for (i = 0; i < nregs; i++)
26812 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
26813 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
26814 else
26815 p_src = simplify_gen_subreg (reg_mode, src, mode,
26816 i * reg_mode_size);
26818 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
26819 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
26820 else
26821 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
26822 i * reg_mode_size);
26824 emit_insn (gen_rtx_SET (p_dst, p_src));
26827 return;
26830 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
26832 /* Move register range backwards, if we might have destructive
26833 overlap. */
26834 int i;
26835 for (i = nregs - 1; i >= 0; i--)
26836 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
26837 i * reg_mode_size),
26838 simplify_gen_subreg (reg_mode, src, mode,
26839 i * reg_mode_size)));
26841 else
26843 int i;
26844 int j = -1;
26845 bool used_update = false;
26846 rtx restore_basereg = NULL_RTX;
26848 if (MEM_P (src) && INT_REGNO_P (reg))
26850 rtx breg;
26852 if (GET_CODE (XEXP (src, 0)) == PRE_INC
26853 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
26855 rtx delta_rtx;
26856 breg = XEXP (XEXP (src, 0), 0);
26857 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
26858 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
26859 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
26860 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
26861 src = replace_equiv_address (src, breg);
26863 else if (! rs6000_offsettable_memref_p (src, reg_mode))
26865 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
26867 rtx basereg = XEXP (XEXP (src, 0), 0);
26868 if (TARGET_UPDATE)
26870 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
26871 emit_insn (gen_rtx_SET (ndst,
26872 gen_rtx_MEM (reg_mode,
26873 XEXP (src, 0))));
26874 used_update = true;
26876 else
26877 emit_insn (gen_rtx_SET (basereg,
26878 XEXP (XEXP (src, 0), 1)));
26879 src = replace_equiv_address (src, basereg);
26881 else
26883 rtx basereg = gen_rtx_REG (Pmode, reg);
26884 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
26885 src = replace_equiv_address (src, basereg);
26889 breg = XEXP (src, 0);
26890 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
26891 breg = XEXP (breg, 0);
26893 /* If the base register we are using to address memory is
26894 also a destination reg, then change that register last. */
26895 if (REG_P (breg)
26896 && REGNO (breg) >= REGNO (dst)
26897 && REGNO (breg) < REGNO (dst) + nregs)
26898 j = REGNO (breg) - REGNO (dst);
26900 else if (MEM_P (dst) && INT_REGNO_P (reg))
26902 rtx breg;
26904 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
26905 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
26907 rtx delta_rtx;
26908 breg = XEXP (XEXP (dst, 0), 0);
26909 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
26910 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
26911 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
26913 /* We have to update the breg before doing the store.
26914 Use store with update, if available. */
26916 if (TARGET_UPDATE)
26918 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
26919 emit_insn (TARGET_32BIT
26920 ? (TARGET_POWERPC64
26921 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
26922 : gen_movsi_update (breg, breg, delta_rtx, nsrc))
26923 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
26924 used_update = true;
26926 else
26927 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
26928 dst = replace_equiv_address (dst, breg);
26930 else if (!rs6000_offsettable_memref_p (dst, reg_mode)
26931 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
26933 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
26935 rtx basereg = XEXP (XEXP (dst, 0), 0);
26936 if (TARGET_UPDATE)
26938 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
26939 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
26940 XEXP (dst, 0)),
26941 nsrc));
26942 used_update = true;
26944 else
26945 emit_insn (gen_rtx_SET (basereg,
26946 XEXP (XEXP (dst, 0), 1)));
26947 dst = replace_equiv_address (dst, basereg);
26949 else
26951 rtx basereg = XEXP (XEXP (dst, 0), 0);
26952 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
26953 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
26954 && REG_P (basereg)
26955 && REG_P (offsetreg)
26956 && REGNO (basereg) != REGNO (offsetreg));
26957 if (REGNO (basereg) == 0)
26959 rtx tmp = offsetreg;
26960 offsetreg = basereg;
26961 basereg = tmp;
26963 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
26964 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
26965 dst = replace_equiv_address (dst, basereg);
26968 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
26969 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode));
26972 for (i = 0; i < nregs; i++)
26974 /* Calculate index to next subword. */
26975 ++j;
26976 if (j == nregs)
26977 j = 0;
26979 /* If compiler already emitted move of first word by
26980 store with update, no need to do anything. */
26981 if (j == 0 && used_update)
26982 continue;
26984 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
26985 j * reg_mode_size),
26986 simplify_gen_subreg (reg_mode, src, mode,
26987 j * reg_mode_size)));
26989 if (restore_basereg != NULL_RTX)
26990 emit_insn (restore_basereg);
26995 /* This page contains routines that are used to determine what the
26996 function prologue and epilogue code will do and write them out. */
26998 static inline bool
26999 save_reg_p (int r)
27001 return !call_used_regs[r] && df_regs_ever_live_p (r);
27004 /* Determine whether the gp REG is really used. */
27006 static bool
27007 rs6000_reg_live_or_pic_offset_p (int reg)
27009 /* We need to mark the PIC offset register live for the same conditions
27010 as it is set up, or otherwise it won't be saved before we clobber it. */
27012 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM && !TARGET_SINGLE_PIC_BASE)
27014 if (TARGET_TOC && TARGET_MINIMAL_TOC
27015 && (crtl->calls_eh_return
27016 || df_regs_ever_live_p (reg)
27017 || !constant_pool_empty_p ()))
27018 return true;
27020 if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
27021 && flag_pic)
27022 return true;
27025 /* If the function calls eh_return, claim used all the registers that would
27026 be checked for liveness otherwise. */
27028 return ((crtl->calls_eh_return || df_regs_ever_live_p (reg))
27029 && !call_used_regs[reg]);
27032 /* Return the first fixed-point register that is required to be
27033 saved. 32 if none. */
27036 first_reg_to_save (void)
27038 int first_reg;
27040 /* Find lowest numbered live register. */
27041 for (first_reg = 13; first_reg <= 31; first_reg++)
27042 if (save_reg_p (first_reg))
27043 break;
27045 if (first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM
27046 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
27047 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
27048 || (TARGET_TOC && TARGET_MINIMAL_TOC))
27049 && rs6000_reg_live_or_pic_offset_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
27050 first_reg = RS6000_PIC_OFFSET_TABLE_REGNUM;
27052 #if TARGET_MACHO
27053 if (flag_pic
27054 && crtl->uses_pic_offset_table
27055 && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM)
27056 return RS6000_PIC_OFFSET_TABLE_REGNUM;
27057 #endif
27059 return first_reg;
27062 /* Similar, for FP regs. */
27065 first_fp_reg_to_save (void)
27067 int first_reg;
27069 /* Find lowest numbered live register. */
27070 for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
27071 if (save_reg_p (first_reg))
27072 break;
27074 return first_reg;
27077 /* Similar, for AltiVec regs. */
27079 static int
27080 first_altivec_reg_to_save (void)
27082 int i;
27084 /* Stack frame remains as is unless we are in AltiVec ABI. */
27085 if (! TARGET_ALTIVEC_ABI)
27086 return LAST_ALTIVEC_REGNO + 1;
27088 /* On Darwin, the unwind routines are compiled without
27089 TARGET_ALTIVEC, and use save_world to save/restore the
27090 altivec registers when necessary. */
27091 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
27092 && ! TARGET_ALTIVEC)
27093 return FIRST_ALTIVEC_REGNO + 20;
27095 /* Find lowest numbered live register. */
27096 for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
27097 if (save_reg_p (i))
27098 break;
27100 return i;
27103 /* Return a 32-bit mask of the AltiVec registers we need to set in
27104 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
27105 the 32-bit word is 0. */
27107 static unsigned int
27108 compute_vrsave_mask (void)
27110 unsigned int i, mask = 0;
27112 /* On Darwin, the unwind routines are compiled without
27113 TARGET_ALTIVEC, and use save_world to save/restore the
27114 call-saved altivec registers when necessary. */
27115 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
27116 && ! TARGET_ALTIVEC)
27117 mask |= 0xFFF;
27119 /* First, find out if we use _any_ altivec registers. */
27120 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
27121 if (df_regs_ever_live_p (i))
27122 mask |= ALTIVEC_REG_BIT (i);
27124 if (mask == 0)
27125 return mask;
27127 /* Next, remove the argument registers from the set. These must
27128 be in the VRSAVE mask set by the caller, so we don't need to add
27129 them in again. More importantly, the mask we compute here is
27130 used to generate CLOBBERs in the set_vrsave insn, and we do not
27131 wish the argument registers to die. */
27132 for (i = ALTIVEC_ARG_MIN_REG; i < (unsigned) crtl->args.info.vregno; i++)
27133 mask &= ~ALTIVEC_REG_BIT (i);
27135 /* Similarly, remove the return value from the set. */
27137 bool yes = false;
27138 diddle_return_value (is_altivec_return_reg, &yes);
27139 if (yes)
27140 mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
27143 return mask;
27146 /* For a very restricted set of circumstances, we can cut down the
27147 size of prologues/epilogues by calling our own save/restore-the-world
27148 routines. */
27150 static void
27151 compute_save_world_info (rs6000_stack_t *info)
27153 info->world_save_p = 1;
27154 info->world_save_p
27155 = (WORLD_SAVE_P (info)
27156 && DEFAULT_ABI == ABI_DARWIN
27157 && !cfun->has_nonlocal_label
27158 && info->first_fp_reg_save == FIRST_SAVED_FP_REGNO
27159 && info->first_gp_reg_save == FIRST_SAVED_GP_REGNO
27160 && info->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
27161 && info->cr_save_p);
27163 /* This will not work in conjunction with sibcalls. Make sure there
27164 are none. (This check is expensive, but seldom executed.) */
27165 if (WORLD_SAVE_P (info))
27167 rtx_insn *insn;
27168 for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
27169 if (CALL_P (insn) && SIBLING_CALL_P (insn))
27171 info->world_save_p = 0;
27172 break;
27176 if (WORLD_SAVE_P (info))
27178 /* Even if we're not touching VRsave, make sure there's room on the
27179 stack for it, if it looks like we're calling SAVE_WORLD, which
27180 will attempt to save it. */
27181 info->vrsave_size = 4;
27183 /* If we are going to save the world, we need to save the link register too. */
27184 info->lr_save_p = 1;
27186 /* "Save" the VRsave register too if we're saving the world. */
27187 if (info->vrsave_mask == 0)
27188 info->vrsave_mask = compute_vrsave_mask ();
27190 /* Because the Darwin register save/restore routines only handle
27191 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
27192 check. */
27193 gcc_assert (info->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
27194 && (info->first_altivec_reg_save
27195 >= FIRST_SAVED_ALTIVEC_REGNO));
27198 return;
27202 static void
27203 is_altivec_return_reg (rtx reg, void *xyes)
27205 bool *yes = (bool *) xyes;
27206 if (REGNO (reg) == ALTIVEC_ARG_RETURN)
27207 *yes = true;
27211 /* Return whether REG is a global user reg or has been specifed by
27212 -ffixed-REG. We should not restore these, and so cannot use
27213 lmw or out-of-line restore functions if there are any. We also
27214 can't save them (well, emit frame notes for them), because frame
27215 unwinding during exception handling will restore saved registers. */
27217 static bool
27218 fixed_reg_p (int reg)
27220 /* Ignore fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] when the
27221 backend sets it, overriding anything the user might have given. */
27222 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
27223 && ((DEFAULT_ABI == ABI_V4 && flag_pic)
27224 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
27225 || (TARGET_TOC && TARGET_MINIMAL_TOC)))
27226 return false;
27228 return fixed_regs[reg];
27231 /* Determine the strategy for savings/restoring registers. */
27233 enum {
27234 SAVE_MULTIPLE = 0x1,
27235 SAVE_INLINE_GPRS = 0x2,
27236 SAVE_INLINE_FPRS = 0x4,
27237 SAVE_NOINLINE_GPRS_SAVES_LR = 0x8,
27238 SAVE_NOINLINE_FPRS_SAVES_LR = 0x10,
27239 SAVE_INLINE_VRS = 0x20,
27240 REST_MULTIPLE = 0x100,
27241 REST_INLINE_GPRS = 0x200,
27242 REST_INLINE_FPRS = 0x400,
27243 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x800,
27244 REST_INLINE_VRS = 0x1000
27247 static int
27248 rs6000_savres_strategy (rs6000_stack_t *info,
27249 bool using_static_chain_p)
27251 int strategy = 0;
27253 /* Select between in-line and out-of-line save and restore of regs.
27254 First, all the obvious cases where we don't use out-of-line. */
27255 if (crtl->calls_eh_return
27256 || cfun->machine->ra_need_lr)
27257 strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
27258 | SAVE_INLINE_GPRS | REST_INLINE_GPRS
27259 | SAVE_INLINE_VRS | REST_INLINE_VRS);
27261 if (info->first_gp_reg_save == 32)
27262 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
27264 if (info->first_fp_reg_save == 64
27265 /* The out-of-line FP routines use double-precision stores;
27266 we can't use those routines if we don't have such stores. */
27267 || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT))
27268 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
27270 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1)
27271 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
27273 /* Define cutoff for using out-of-line functions to save registers. */
27274 if (DEFAULT_ABI == ABI_V4 || TARGET_ELF)
27276 if (!optimize_size)
27278 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
27279 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
27280 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
27282 else
27284 /* Prefer out-of-line restore if it will exit. */
27285 if (info->first_fp_reg_save > 61)
27286 strategy |= SAVE_INLINE_FPRS;
27287 if (info->first_gp_reg_save > 29)
27289 if (info->first_fp_reg_save == 64)
27290 strategy |= SAVE_INLINE_GPRS;
27291 else
27292 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
27294 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO)
27295 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
27298 else if (DEFAULT_ABI == ABI_DARWIN)
27300 if (info->first_fp_reg_save > 60)
27301 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
27302 if (info->first_gp_reg_save > 29)
27303 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
27304 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
27306 else
27308 gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
27309 if ((flag_shrink_wrap_separate && optimize_function_for_speed_p (cfun))
27310 || info->first_fp_reg_save > 61)
27311 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
27312 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
27313 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
27316 /* Don't bother to try to save things out-of-line if r11 is occupied
27317 by the static chain. It would require too much fiddling and the
27318 static chain is rarely used anyway. FPRs are saved w.r.t the stack
27319 pointer on Darwin, and AIX uses r1 or r12. */
27320 if (using_static_chain_p
27321 && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
27322 strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
27323 | SAVE_INLINE_GPRS
27324 | SAVE_INLINE_VRS);
27326 /* Saving CR interferes with the exit routines used on the SPE, so
27327 just punt here. */
27328 if (TARGET_SPE_ABI
27329 && info->spe_64bit_regs_used
27330 && info->cr_save_p)
27331 strategy |= REST_INLINE_GPRS;
27333 /* We can only use the out-of-line routines to restore fprs if we've
27334 saved all the registers from first_fp_reg_save in the prologue.
27335 Otherwise, we risk loading garbage. Of course, if we have saved
27336 out-of-line then we know we haven't skipped any fprs. */
27337 if ((strategy & SAVE_INLINE_FPRS)
27338 && !(strategy & REST_INLINE_FPRS))
27340 int i;
27342 for (i = info->first_fp_reg_save; i < 64; i++)
27343 if (fixed_regs[i] || !save_reg_p (i))
27345 strategy |= REST_INLINE_FPRS;
27346 break;
27350 /* Similarly, for altivec regs. */
27351 if ((strategy & SAVE_INLINE_VRS)
27352 && !(strategy & REST_INLINE_VRS))
27354 int i;
27356 for (i = info->first_altivec_reg_save; i < LAST_ALTIVEC_REGNO + 1; i++)
27357 if (fixed_regs[i] || !save_reg_p (i))
27359 strategy |= REST_INLINE_VRS;
27360 break;
27364 /* info->lr_save_p isn't yet set if the only reason lr needs to be
27365 saved is an out-of-line save or restore. Set up the value for
27366 the next test (excluding out-of-line gprs). */
27367 bool lr_save_p = (info->lr_save_p
27368 || !(strategy & SAVE_INLINE_FPRS)
27369 || !(strategy & SAVE_INLINE_VRS)
27370 || !(strategy & REST_INLINE_FPRS)
27371 || !(strategy & REST_INLINE_VRS));
27373 if (TARGET_MULTIPLE
27374 && !TARGET_POWERPC64
27375 && !(TARGET_SPE_ABI && info->spe_64bit_regs_used)
27376 && info->first_gp_reg_save < 31
27377 && !(flag_shrink_wrap
27378 && flag_shrink_wrap_separate
27379 && optimize_function_for_speed_p (cfun)))
27381 /* Prefer store multiple for saves over out-of-line routines,
27382 since the store-multiple instruction will always be smaller. */
27383 strategy |= SAVE_INLINE_GPRS | SAVE_MULTIPLE;
27385 /* The situation is more complicated with load multiple. We'd
27386 prefer to use the out-of-line routines for restores, since the
27387 "exit" out-of-line routines can handle the restore of LR and the
27388 frame teardown. However if doesn't make sense to use the
27389 out-of-line routine if that is the only reason we'd need to save
27390 LR, and we can't use the "exit" out-of-line gpr restore if we
27391 have saved some fprs; In those cases it is advantageous to use
27392 load multiple when available. */
27393 if (info->first_fp_reg_save != 64 || !lr_save_p)
27394 strategy |= REST_INLINE_GPRS | REST_MULTIPLE;
27397 /* Using the "exit" out-of-line routine does not improve code size
27398 if using it would require lr to be saved and if only saving one
27399 or two gprs. */
27400 else if (!lr_save_p && info->first_gp_reg_save > 29)
27401 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
27403 /* We can only use load multiple or the out-of-line routines to
27404 restore gprs if we've saved all the registers from
27405 first_gp_reg_save. Otherwise, we risk loading garbage.
27406 Of course, if we have saved out-of-line or used stmw then we know
27407 we haven't skipped any gprs. */
27408 if ((strategy & (SAVE_INLINE_GPRS | SAVE_MULTIPLE)) == SAVE_INLINE_GPRS
27409 && (strategy & (REST_INLINE_GPRS | REST_MULTIPLE)) != REST_INLINE_GPRS)
27411 int i;
27413 for (i = info->first_gp_reg_save; i < 32; i++)
27414 if (fixed_reg_p (i) || !save_reg_p (i))
27416 strategy |= REST_INLINE_GPRS;
27417 strategy &= ~REST_MULTIPLE;
27418 break;
27422 if (TARGET_ELF && TARGET_64BIT)
27424 if (!(strategy & SAVE_INLINE_FPRS))
27425 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
27426 else if (!(strategy & SAVE_INLINE_GPRS)
27427 && info->first_fp_reg_save == 64)
27428 strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
27430 else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
27431 strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
27433 if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
27434 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
27436 return strategy;
27439 /* Calculate the stack information for the current function. This is
27440 complicated by having two separate calling sequences, the AIX calling
27441 sequence and the V.4 calling sequence.
27443 AIX (and Darwin/Mac OS X) stack frames look like:
27444 32-bit 64-bit
27445 SP----> +---------------------------------------+
27446 | back chain to caller | 0 0
27447 +---------------------------------------+
27448 | saved CR | 4 8 (8-11)
27449 +---------------------------------------+
27450 | saved LR | 8 16
27451 +---------------------------------------+
27452 | reserved for compilers | 12 24
27453 +---------------------------------------+
27454 | reserved for binders | 16 32
27455 +---------------------------------------+
27456 | saved TOC pointer | 20 40
27457 +---------------------------------------+
27458 | Parameter save area (+padding*) (P) | 24 48
27459 +---------------------------------------+
27460 | Alloca space (A) | 24+P etc.
27461 +---------------------------------------+
27462 | Local variable space (L) | 24+P+A
27463 +---------------------------------------+
27464 | Float/int conversion temporary (X) | 24+P+A+L
27465 +---------------------------------------+
27466 | Save area for AltiVec registers (W) | 24+P+A+L+X
27467 +---------------------------------------+
27468 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
27469 +---------------------------------------+
27470 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
27471 +---------------------------------------+
27472 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
27473 +---------------------------------------+
27474 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
27475 +---------------------------------------+
27476 old SP->| back chain to caller's caller |
27477 +---------------------------------------+
27479 * If the alloca area is present, the parameter save area is
27480 padded so that the former starts 16-byte aligned.
27482 The required alignment for AIX configurations is two words (i.e., 8
27483 or 16 bytes).
27485 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
27487 SP----> +---------------------------------------+
27488 | Back chain to caller | 0
27489 +---------------------------------------+
27490 | Save area for CR | 8
27491 +---------------------------------------+
27492 | Saved LR | 16
27493 +---------------------------------------+
27494 | Saved TOC pointer | 24
27495 +---------------------------------------+
27496 | Parameter save area (+padding*) (P) | 32
27497 +---------------------------------------+
27498 | Alloca space (A) | 32+P
27499 +---------------------------------------+
27500 | Local variable space (L) | 32+P+A
27501 +---------------------------------------+
27502 | Save area for AltiVec registers (W) | 32+P+A+L
27503 +---------------------------------------+
27504 | AltiVec alignment padding (Y) | 32+P+A+L+W
27505 +---------------------------------------+
27506 | Save area for GP registers (G) | 32+P+A+L+W+Y
27507 +---------------------------------------+
27508 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
27509 +---------------------------------------+
27510 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
27511 +---------------------------------------+
27513 * If the alloca area is present, the parameter save area is
27514 padded so that the former starts 16-byte aligned.
27516 V.4 stack frames look like:
27518 SP----> +---------------------------------------+
27519 | back chain to caller | 0
27520 +---------------------------------------+
27521 | caller's saved LR | 4
27522 +---------------------------------------+
27523 | Parameter save area (+padding*) (P) | 8
27524 +---------------------------------------+
27525 | Alloca space (A) | 8+P
27526 +---------------------------------------+
27527 | Varargs save area (V) | 8+P+A
27528 +---------------------------------------+
27529 | Local variable space (L) | 8+P+A+V
27530 +---------------------------------------+
27531 | Float/int conversion temporary (X) | 8+P+A+V+L
27532 +---------------------------------------+
27533 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
27534 +---------------------------------------+
27535 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
27536 +---------------------------------------+
27537 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
27538 +---------------------------------------+
27539 | SPE: area for 64-bit GP registers |
27540 +---------------------------------------+
27541 | SPE alignment padding |
27542 +---------------------------------------+
27543 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
27544 +---------------------------------------+
27545 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
27546 +---------------------------------------+
27547 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
27548 +---------------------------------------+
27549 old SP->| back chain to caller's caller |
27550 +---------------------------------------+
27552 * If the alloca area is present and the required alignment is
27553 16 bytes, the parameter save area is padded so that the
27554 alloca area starts 16-byte aligned.
27556 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
27557 given. (But note below and in sysv4.h that we require only 8 and
27558 may round up the size of our stack frame anyways. The historical
27559 reason is early versions of powerpc-linux which didn't properly
27560 align the stack at program startup. A happy side-effect is that
27561 -mno-eabi libraries can be used with -meabi programs.)
27563 The EABI configuration defaults to the V.4 layout. However,
27564 the stack alignment requirements may differ. If -mno-eabi is not
27565 given, the required stack alignment is 8 bytes; if -mno-eabi is
27566 given, the required alignment is 16 bytes. (But see V.4 comment
27567 above.) */
27569 #ifndef ABI_STACK_BOUNDARY
27570 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
27571 #endif
27573 static rs6000_stack_t *
27574 rs6000_stack_info (void)
27576 /* We should never be called for thunks, we are not set up for that. */
27577 gcc_assert (!cfun->is_thunk);
27579 rs6000_stack_t *info = &stack_info;
27580 int reg_size = TARGET_32BIT ? 4 : 8;
27581 int ehrd_size;
27582 int ehcr_size;
27583 int save_align;
27584 int first_gp;
27585 HOST_WIDE_INT non_fixed_size;
27586 bool using_static_chain_p;
27588 if (reload_completed && info->reload_completed)
27589 return info;
27591 memset (info, 0, sizeof (*info));
27592 info->reload_completed = reload_completed;
27594 if (TARGET_SPE)
27596 /* Cache value so we don't rescan instruction chain over and over. */
27597 if (cfun->machine->spe_insn_chain_scanned_p == 0)
27598 cfun->machine->spe_insn_chain_scanned_p
27599 = spe_func_has_64bit_regs_p () + 1;
27600 info->spe_64bit_regs_used = cfun->machine->spe_insn_chain_scanned_p - 1;
27603 /* Select which calling sequence. */
27604 info->abi = DEFAULT_ABI;
27606 /* Calculate which registers need to be saved & save area size. */
27607 info->first_gp_reg_save = first_reg_to_save ();
27608 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
27609 even if it currently looks like we won't. Reload may need it to
27610 get at a constant; if so, it will have already created a constant
27611 pool entry for it. */
27612 if (((TARGET_TOC && TARGET_MINIMAL_TOC)
27613 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
27614 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
27615 && crtl->uses_const_pool
27616 && info->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
27617 first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
27618 else
27619 first_gp = info->first_gp_reg_save;
27621 info->gp_size = reg_size * (32 - first_gp);
27623 /* For the SPE, we have an additional upper 32-bits on each GPR.
27624 Ideally we should save the entire 64-bits only when the upper
27625 half is used in SIMD instructions. Since we only record
27626 registers live (not the size they are used in), this proves
27627 difficult because we'd have to traverse the instruction chain at
27628 the right time, taking reload into account. This is a real pain,
27629 so we opt to save the GPRs in 64-bits always if but one register
27630 gets used in 64-bits. Otherwise, all the registers in the frame
27631 get saved in 32-bits.
27633 So... since when we save all GPRs (except the SP) in 64-bits, the
27634 traditional GP save area will be empty. */
27635 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
27636 info->gp_size = 0;
27638 info->first_fp_reg_save = first_fp_reg_to_save ();
27639 info->fp_size = 8 * (64 - info->first_fp_reg_save);
27641 info->first_altivec_reg_save = first_altivec_reg_to_save ();
27642 info->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
27643 - info->first_altivec_reg_save);
27645 /* Does this function call anything? */
27646 info->calls_p = (!crtl->is_leaf || cfun->machine->ra_needs_full_frame);
27648 /* Determine if we need to save the condition code registers. */
27649 if (save_reg_p (CR2_REGNO)
27650 || save_reg_p (CR3_REGNO)
27651 || save_reg_p (CR4_REGNO))
27653 info->cr_save_p = 1;
27654 if (DEFAULT_ABI == ABI_V4)
27655 info->cr_size = reg_size;
27658 /* If the current function calls __builtin_eh_return, then we need
27659 to allocate stack space for registers that will hold data for
27660 the exception handler. */
27661 if (crtl->calls_eh_return)
27663 unsigned int i;
27664 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
27665 continue;
27667 /* SPE saves EH registers in 64-bits. */
27668 ehrd_size = i * (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0
27669 ? UNITS_PER_SPE_WORD : UNITS_PER_WORD);
27671 else
27672 ehrd_size = 0;
27674 /* In the ELFv2 ABI, we also need to allocate space for separate
27675 CR field save areas if the function calls __builtin_eh_return. */
27676 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
27678 /* This hard-codes that we have three call-saved CR fields. */
27679 ehcr_size = 3 * reg_size;
27680 /* We do *not* use the regular CR save mechanism. */
27681 info->cr_save_p = 0;
27683 else
27684 ehcr_size = 0;
27686 /* Determine various sizes. */
27687 info->reg_size = reg_size;
27688 info->fixed_size = RS6000_SAVE_AREA;
27689 info->vars_size = RS6000_ALIGN (get_frame_size (), 8);
27690 if (cfun->calls_alloca)
27691 info->parm_size =
27692 RS6000_ALIGN (crtl->outgoing_args_size + info->fixed_size,
27693 STACK_BOUNDARY / BITS_PER_UNIT) - info->fixed_size;
27694 else
27695 info->parm_size = RS6000_ALIGN (crtl->outgoing_args_size,
27696 TARGET_ALTIVEC ? 16 : 8);
27697 if (FRAME_GROWS_DOWNWARD)
27698 info->vars_size
27699 += RS6000_ALIGN (info->fixed_size + info->vars_size + info->parm_size,
27700 ABI_STACK_BOUNDARY / BITS_PER_UNIT)
27701 - (info->fixed_size + info->vars_size + info->parm_size);
27703 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
27704 info->spe_gp_size = 8 * (32 - first_gp);
27706 if (TARGET_ALTIVEC_ABI)
27707 info->vrsave_mask = compute_vrsave_mask ();
27709 if (TARGET_ALTIVEC_VRSAVE && info->vrsave_mask)
27710 info->vrsave_size = 4;
27712 compute_save_world_info (info);
27714 /* Calculate the offsets. */
27715 switch (DEFAULT_ABI)
27717 case ABI_NONE:
27718 default:
27719 gcc_unreachable ();
27721 case ABI_AIX:
27722 case ABI_ELFv2:
27723 case ABI_DARWIN:
27724 info->fp_save_offset = -info->fp_size;
27725 info->gp_save_offset = info->fp_save_offset - info->gp_size;
27727 if (TARGET_ALTIVEC_ABI)
27729 info->vrsave_save_offset = info->gp_save_offset - info->vrsave_size;
27731 /* Align stack so vector save area is on a quadword boundary.
27732 The padding goes above the vectors. */
27733 if (info->altivec_size != 0)
27734 info->altivec_padding_size = info->vrsave_save_offset & 0xF;
27736 info->altivec_save_offset = info->vrsave_save_offset
27737 - info->altivec_padding_size
27738 - info->altivec_size;
27739 gcc_assert (info->altivec_size == 0
27740 || info->altivec_save_offset % 16 == 0);
27742 /* Adjust for AltiVec case. */
27743 info->ehrd_offset = info->altivec_save_offset - ehrd_size;
27745 else
27746 info->ehrd_offset = info->gp_save_offset - ehrd_size;
27748 info->ehcr_offset = info->ehrd_offset - ehcr_size;
27749 info->cr_save_offset = reg_size; /* first word when 64-bit. */
27750 info->lr_save_offset = 2*reg_size;
27751 break;
27753 case ABI_V4:
27754 info->fp_save_offset = -info->fp_size;
27755 info->gp_save_offset = info->fp_save_offset - info->gp_size;
27756 info->cr_save_offset = info->gp_save_offset - info->cr_size;
27758 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
27760 /* Align stack so SPE GPR save area is aligned on a
27761 double-word boundary. */
27762 if (info->spe_gp_size != 0 && info->cr_save_offset != 0)
27763 info->spe_padding_size = 8 - (-info->cr_save_offset % 8);
27764 else
27765 info->spe_padding_size = 0;
27767 info->spe_gp_save_offset = info->cr_save_offset
27768 - info->spe_padding_size
27769 - info->spe_gp_size;
27771 /* Adjust for SPE case. */
27772 info->ehrd_offset = info->spe_gp_save_offset;
27774 else if (TARGET_ALTIVEC_ABI)
27776 info->vrsave_save_offset = info->cr_save_offset - info->vrsave_size;
27778 /* Align stack so vector save area is on a quadword boundary. */
27779 if (info->altivec_size != 0)
27780 info->altivec_padding_size = 16 - (-info->vrsave_save_offset % 16);
27782 info->altivec_save_offset = info->vrsave_save_offset
27783 - info->altivec_padding_size
27784 - info->altivec_size;
27786 /* Adjust for AltiVec case. */
27787 info->ehrd_offset = info->altivec_save_offset;
27789 else
27790 info->ehrd_offset = info->cr_save_offset;
27792 info->ehrd_offset -= ehrd_size;
27793 info->lr_save_offset = reg_size;
27796 save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
27797 info->save_size = RS6000_ALIGN (info->fp_size
27798 + info->gp_size
27799 + info->altivec_size
27800 + info->altivec_padding_size
27801 + info->spe_gp_size
27802 + info->spe_padding_size
27803 + ehrd_size
27804 + ehcr_size
27805 + info->cr_size
27806 + info->vrsave_size,
27807 save_align);
27809 non_fixed_size = info->vars_size + info->parm_size + info->save_size;
27811 info->total_size = RS6000_ALIGN (non_fixed_size + info->fixed_size,
27812 ABI_STACK_BOUNDARY / BITS_PER_UNIT);
27814 /* Determine if we need to save the link register. */
27815 if (info->calls_p
27816 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27817 && crtl->profile
27818 && !TARGET_PROFILE_KERNEL)
27819 || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
27820 #ifdef TARGET_RELOCATABLE
27821 || (DEFAULT_ABI == ABI_V4
27822 && (TARGET_RELOCATABLE || flag_pic > 1)
27823 && !constant_pool_empty_p ())
27824 #endif
27825 || rs6000_ra_ever_killed ())
27826 info->lr_save_p = 1;
27828 using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
27829 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
27830 && call_used_regs[STATIC_CHAIN_REGNUM]);
27831 info->savres_strategy = rs6000_savres_strategy (info, using_static_chain_p);
27833 if (!(info->savres_strategy & SAVE_INLINE_GPRS)
27834 || !(info->savres_strategy & SAVE_INLINE_FPRS)
27835 || !(info->savres_strategy & SAVE_INLINE_VRS)
27836 || !(info->savres_strategy & REST_INLINE_GPRS)
27837 || !(info->savres_strategy & REST_INLINE_FPRS)
27838 || !(info->savres_strategy & REST_INLINE_VRS))
27839 info->lr_save_p = 1;
27841 if (info->lr_save_p)
27842 df_set_regs_ever_live (LR_REGNO, true);
27844 /* Determine if we need to allocate any stack frame:
27846 For AIX we need to push the stack if a frame pointer is needed
27847 (because the stack might be dynamically adjusted), if we are
27848 debugging, if we make calls, or if the sum of fp_save, gp_save,
27849 and local variables are more than the space needed to save all
27850 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
27851 + 18*8 = 288 (GPR13 reserved).
27853 For V.4 we don't have the stack cushion that AIX uses, but assume
27854 that the debugger can handle stackless frames. */
27856 if (info->calls_p)
27857 info->push_p = 1;
27859 else if (DEFAULT_ABI == ABI_V4)
27860 info->push_p = non_fixed_size != 0;
27862 else if (frame_pointer_needed)
27863 info->push_p = 1;
27865 else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
27866 info->push_p = 1;
27868 else
27869 info->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
27871 return info;
27874 /* Return true if the current function uses any GPRs in 64-bit SIMD
27875 mode. */
27877 static bool
27878 spe_func_has_64bit_regs_p (void)
27880 rtx_insn *insns, *insn;
27882 /* Functions that save and restore all the call-saved registers will
27883 need to save/restore the registers in 64-bits. */
27884 if (crtl->calls_eh_return
27885 || cfun->calls_setjmp
27886 || crtl->has_nonlocal_goto)
27887 return true;
27889 insns = get_insns ();
27891 for (insn = NEXT_INSN (insns); insn != NULL_RTX; insn = NEXT_INSN (insn))
27893 if (INSN_P (insn))
27895 rtx i;
27897 /* FIXME: This should be implemented with attributes...
27899 (set_attr "spe64" "true")....then,
27900 if (get_spe64(insn)) return true;
27902 It's the only reliable way to do the stuff below. */
27904 i = PATTERN (insn);
27905 if (GET_CODE (i) == SET)
27907 machine_mode mode = GET_MODE (SET_SRC (i));
27909 if (SPE_VECTOR_MODE (mode))
27910 return true;
27911 if (TARGET_E500_DOUBLE
27912 && (mode == DFmode || FLOAT128_2REG_P (mode)))
27913 return true;
27918 return false;
27921 static void
27922 debug_stack_info (rs6000_stack_t *info)
27924 const char *abi_string;
27926 if (! info)
27927 info = rs6000_stack_info ();
27929 fprintf (stderr, "\nStack information for function %s:\n",
27930 ((current_function_decl && DECL_NAME (current_function_decl))
27931 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
27932 : "<unknown>"));
27934 switch (info->abi)
27936 default: abi_string = "Unknown"; break;
27937 case ABI_NONE: abi_string = "NONE"; break;
27938 case ABI_AIX: abi_string = "AIX"; break;
27939 case ABI_ELFv2: abi_string = "ELFv2"; break;
27940 case ABI_DARWIN: abi_string = "Darwin"; break;
27941 case ABI_V4: abi_string = "V.4"; break;
27944 fprintf (stderr, "\tABI = %5s\n", abi_string);
27946 if (TARGET_ALTIVEC_ABI)
27947 fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
27949 if (TARGET_SPE_ABI)
27950 fprintf (stderr, "\tSPE ABI extensions enabled.\n");
27952 if (info->first_gp_reg_save != 32)
27953 fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save);
27955 if (info->first_fp_reg_save != 64)
27956 fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save);
27958 if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
27959 fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
27960 info->first_altivec_reg_save);
27962 if (info->lr_save_p)
27963 fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p);
27965 if (info->cr_save_p)
27966 fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p);
27968 if (info->vrsave_mask)
27969 fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask);
27971 if (info->push_p)
27972 fprintf (stderr, "\tpush_p = %5d\n", info->push_p);
27974 if (info->calls_p)
27975 fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p);
27977 if (info->gp_size)
27978 fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset);
27980 if (info->fp_size)
27981 fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset);
27983 if (info->altivec_size)
27984 fprintf (stderr, "\taltivec_save_offset = %5d\n",
27985 info->altivec_save_offset);
27987 if (info->spe_gp_size)
27988 fprintf (stderr, "\tspe_gp_save_offset = %5d\n",
27989 info->spe_gp_save_offset);
27991 if (info->vrsave_size)
27992 fprintf (stderr, "\tvrsave_save_offset = %5d\n",
27993 info->vrsave_save_offset);
27995 if (info->lr_save_p)
27996 fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset);
27998 if (info->cr_save_p)
27999 fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset);
28001 if (info->varargs_save_offset)
28002 fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
28004 if (info->total_size)
28005 fprintf (stderr, "\ttotal_size = " HOST_WIDE_INT_PRINT_DEC"\n",
28006 info->total_size);
28008 if (info->vars_size)
28009 fprintf (stderr, "\tvars_size = " HOST_WIDE_INT_PRINT_DEC"\n",
28010 info->vars_size);
28012 if (info->parm_size)
28013 fprintf (stderr, "\tparm_size = %5d\n", info->parm_size);
28015 if (info->fixed_size)
28016 fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size);
28018 if (info->gp_size)
28019 fprintf (stderr, "\tgp_size = %5d\n", info->gp_size);
28021 if (info->spe_gp_size)
28022 fprintf (stderr, "\tspe_gp_size = %5d\n", info->spe_gp_size);
28024 if (info->fp_size)
28025 fprintf (stderr, "\tfp_size = %5d\n", info->fp_size);
28027 if (info->altivec_size)
28028 fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size);
28030 if (info->vrsave_size)
28031 fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size);
28033 if (info->altivec_padding_size)
28034 fprintf (stderr, "\taltivec_padding_size= %5d\n",
28035 info->altivec_padding_size);
28037 if (info->spe_padding_size)
28038 fprintf (stderr, "\tspe_padding_size = %5d\n",
28039 info->spe_padding_size);
28041 if (info->cr_size)
28042 fprintf (stderr, "\tcr_size = %5d\n", info->cr_size);
28044 if (info->save_size)
28045 fprintf (stderr, "\tsave_size = %5d\n", info->save_size);
28047 if (info->reg_size != 4)
28048 fprintf (stderr, "\treg_size = %5d\n", info->reg_size);
28050 fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy);
28052 fprintf (stderr, "\n");
28056 rs6000_return_addr (int count, rtx frame)
28058 /* Currently we don't optimize very well between prolog and body
28059 code and for PIC code the code can be actually quite bad, so
28060 don't try to be too clever here. */
28061 if (count != 0
28062 || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
28064 cfun->machine->ra_needs_full_frame = 1;
28066 return
28067 gen_rtx_MEM
28068 (Pmode,
28069 memory_address
28070 (Pmode,
28071 plus_constant (Pmode,
28072 copy_to_reg
28073 (gen_rtx_MEM (Pmode,
28074 memory_address (Pmode, frame))),
28075 RETURN_ADDRESS_OFFSET)));
28078 cfun->machine->ra_need_lr = 1;
28079 return get_hard_reg_initial_val (Pmode, LR_REGNO);
28082 /* Say whether a function is a candidate for sibcall handling or not. */
28084 static bool
28085 rs6000_function_ok_for_sibcall (tree decl, tree exp)
28087 tree fntype;
28089 if (decl)
28090 fntype = TREE_TYPE (decl);
28091 else
28092 fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
28094 /* We can't do it if the called function has more vector parameters
28095 than the current function; there's nowhere to put the VRsave code. */
28096 if (TARGET_ALTIVEC_ABI
28097 && TARGET_ALTIVEC_VRSAVE
28098 && !(decl && decl == current_function_decl))
28100 function_args_iterator args_iter;
28101 tree type;
28102 int nvreg = 0;
28104 /* Functions with vector parameters are required to have a
28105 prototype, so the argument type info must be available
28106 here. */
28107 FOREACH_FUNCTION_ARGS(fntype, type, args_iter)
28108 if (TREE_CODE (type) == VECTOR_TYPE
28109 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
28110 nvreg++;
28112 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter)
28113 if (TREE_CODE (type) == VECTOR_TYPE
28114 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
28115 nvreg--;
28117 if (nvreg > 0)
28118 return false;
28121 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
28122 functions, because the callee may have a different TOC pointer to
28123 the caller and there's no way to ensure we restore the TOC when
28124 we return. With the secure-plt SYSV ABI we can't make non-local
28125 calls when -fpic/PIC because the plt call stubs use r30. */
28126 if (DEFAULT_ABI == ABI_DARWIN
28127 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28128 && decl
28129 && !DECL_EXTERNAL (decl)
28130 && !DECL_WEAK (decl)
28131 && (*targetm.binds_local_p) (decl))
28132 || (DEFAULT_ABI == ABI_V4
28133 && (!TARGET_SECURE_PLT
28134 || !flag_pic
28135 || (decl
28136 && (*targetm.binds_local_p) (decl)))))
28138 tree attr_list = TYPE_ATTRIBUTES (fntype);
28140 if (!lookup_attribute ("longcall", attr_list)
28141 || lookup_attribute ("shortcall", attr_list))
28142 return true;
28145 return false;
28148 static int
28149 rs6000_ra_ever_killed (void)
28151 rtx_insn *top;
28152 rtx reg;
28153 rtx_insn *insn;
28155 if (cfun->is_thunk)
28156 return 0;
28158 if (cfun->machine->lr_save_state)
28159 return cfun->machine->lr_save_state - 1;
28161 /* regs_ever_live has LR marked as used if any sibcalls are present,
28162 but this should not force saving and restoring in the
28163 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
28164 clobbers LR, so that is inappropriate. */
28166 /* Also, the prologue can generate a store into LR that
28167 doesn't really count, like this:
28169 move LR->R0
28170 bcl to set PIC register
28171 move LR->R31
28172 move R0->LR
28174 When we're called from the epilogue, we need to avoid counting
28175 this as a store. */
28177 push_topmost_sequence ();
28178 top = get_insns ();
28179 pop_topmost_sequence ();
28180 reg = gen_rtx_REG (Pmode, LR_REGNO);
28182 for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
28184 if (INSN_P (insn))
28186 if (CALL_P (insn))
28188 if (!SIBLING_CALL_P (insn))
28189 return 1;
28191 else if (find_regno_note (insn, REG_INC, LR_REGNO))
28192 return 1;
28193 else if (set_of (reg, insn) != NULL_RTX
28194 && !prologue_epilogue_contains (insn))
28195 return 1;
28198 return 0;
28201 /* Emit instructions needed to load the TOC register.
28202 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
28203 a constant pool; or for SVR4 -fpic. */
28205 void
28206 rs6000_emit_load_toc_table (int fromprolog)
28208 rtx dest;
28209 dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
28211 if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
28213 char buf[30];
28214 rtx lab, tmp1, tmp2, got;
28216 lab = gen_label_rtx ();
28217 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
28218 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
28219 if (flag_pic == 2)
28221 got = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
28222 need_toc_init = 1;
28224 else
28225 got = rs6000_got_sym ();
28226 tmp1 = tmp2 = dest;
28227 if (!fromprolog)
28229 tmp1 = gen_reg_rtx (Pmode);
28230 tmp2 = gen_reg_rtx (Pmode);
28232 emit_insn (gen_load_toc_v4_PIC_1 (lab));
28233 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
28234 emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
28235 emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
28237 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
28239 emit_insn (gen_load_toc_v4_pic_si ());
28240 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
28242 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
28244 char buf[30];
28245 rtx temp0 = (fromprolog
28246 ? gen_rtx_REG (Pmode, 0)
28247 : gen_reg_rtx (Pmode));
28249 if (fromprolog)
28251 rtx symF, symL;
28253 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
28254 symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
28256 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
28257 symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
28259 emit_insn (gen_load_toc_v4_PIC_1 (symF));
28260 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
28261 emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
28263 else
28265 rtx tocsym, lab;
28267 tocsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
28268 need_toc_init = 1;
28269 lab = gen_label_rtx ();
28270 emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
28271 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
28272 if (TARGET_LINK_STACK)
28273 emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
28274 emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
28276 emit_insn (gen_addsi3 (dest, temp0, dest));
28278 else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
28280 /* This is for AIX code running in non-PIC ELF32. */
28281 rtx realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
28283 need_toc_init = 1;
28284 emit_insn (gen_elf_high (dest, realsym));
28285 emit_insn (gen_elf_low (dest, dest, realsym));
28287 else
28289 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
28291 if (TARGET_32BIT)
28292 emit_insn (gen_load_toc_aix_si (dest));
28293 else
28294 emit_insn (gen_load_toc_aix_di (dest));
28298 /* Emit instructions to restore the link register after determining where
28299 its value has been stored. */
28301 void
28302 rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
28304 rs6000_stack_t *info = rs6000_stack_info ();
28305 rtx operands[2];
28307 operands[0] = source;
28308 operands[1] = scratch;
28310 if (info->lr_save_p)
28312 rtx frame_rtx = stack_pointer_rtx;
28313 HOST_WIDE_INT sp_offset = 0;
28314 rtx tmp;
28316 if (frame_pointer_needed
28317 || cfun->calls_alloca
28318 || info->total_size > 32767)
28320 tmp = gen_frame_mem (Pmode, frame_rtx);
28321 emit_move_insn (operands[1], tmp);
28322 frame_rtx = operands[1];
28324 else if (info->push_p)
28325 sp_offset = info->total_size;
28327 tmp = plus_constant (Pmode, frame_rtx,
28328 info->lr_save_offset + sp_offset);
28329 tmp = gen_frame_mem (Pmode, tmp);
28330 emit_move_insn (tmp, operands[0]);
28332 else
28333 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
28335 /* Freeze lr_save_p. We've just emitted rtl that depends on the
28336 state of lr_save_p so any change from here on would be a bug. In
28337 particular, stop rs6000_ra_ever_killed from considering the SET
28338 of lr we may have added just above. */
28339 cfun->machine->lr_save_state = info->lr_save_p + 1;
28342 static GTY(()) alias_set_type set = -1;
28344 alias_set_type
28345 get_TOC_alias_set (void)
28347 if (set == -1)
28348 set = new_alias_set ();
28349 return set;
28352 /* This returns nonzero if the current function uses the TOC. This is
28353 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
28354 is generated by the ABI_V4 load_toc_* patterns. */
28355 #if TARGET_ELF
28356 static int
28357 uses_TOC (void)
28359 rtx_insn *insn;
28361 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
28362 if (INSN_P (insn))
28364 rtx pat = PATTERN (insn);
28365 int i;
28367 if (GET_CODE (pat) == PARALLEL)
28368 for (i = 0; i < XVECLEN (pat, 0); i++)
28370 rtx sub = XVECEXP (pat, 0, i);
28371 if (GET_CODE (sub) == USE)
28373 sub = XEXP (sub, 0);
28374 if (GET_CODE (sub) == UNSPEC
28375 && XINT (sub, 1) == UNSPEC_TOC)
28376 return 1;
28380 return 0;
28382 #endif
28385 create_TOC_reference (rtx symbol, rtx largetoc_reg)
28387 rtx tocrel, tocreg, hi;
28389 if (TARGET_DEBUG_ADDR)
28391 if (GET_CODE (symbol) == SYMBOL_REF)
28392 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
28393 XSTR (symbol, 0));
28394 else
28396 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
28397 GET_RTX_NAME (GET_CODE (symbol)));
28398 debug_rtx (symbol);
28402 if (!can_create_pseudo_p ())
28403 df_set_regs_ever_live (TOC_REGISTER, true);
28405 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
28406 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
28407 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
28408 return tocrel;
28410 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
28411 if (largetoc_reg != NULL)
28413 emit_move_insn (largetoc_reg, hi);
28414 hi = largetoc_reg;
28416 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
28419 /* Issue assembly directives that create a reference to the given DWARF
28420 FRAME_TABLE_LABEL from the current function section. */
28421 void
28422 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
28424 fprintf (asm_out_file, "\t.ref %s\n",
28425 (* targetm.strip_name_encoding) (frame_table_label));
28428 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
28429 and the change to the stack pointer. */
28431 static void
28432 rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
28434 rtvec p;
28435 int i;
28436 rtx regs[3];
28438 i = 0;
28439 regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
28440 if (hard_frame_needed)
28441 regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
28442 if (!(REGNO (fp) == STACK_POINTER_REGNUM
28443 || (hard_frame_needed
28444 && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
28445 regs[i++] = fp;
28447 p = rtvec_alloc (i);
28448 while (--i >= 0)
28450 rtx mem = gen_frame_mem (BLKmode, regs[i]);
28451 RTVEC_ELT (p, i) = gen_rtx_SET (mem, const0_rtx);
28454 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
28457 /* Emit the correct code for allocating stack space, as insns.
28458 If COPY_REG, make sure a copy of the old frame is left there.
28459 The generated code may use hard register 0 as a temporary. */
28461 static rtx_insn *
28462 rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
28464 rtx_insn *insn;
28465 rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
28466 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
28467 rtx todec = gen_int_mode (-size, Pmode);
28468 rtx par, set, mem;
28470 if (INTVAL (todec) != -size)
28472 warning (0, "stack frame too large");
28473 emit_insn (gen_trap ());
28474 return 0;
28477 if (crtl->limit_stack)
28479 if (REG_P (stack_limit_rtx)
28480 && REGNO (stack_limit_rtx) > 1
28481 && REGNO (stack_limit_rtx) <= 31)
28483 emit_insn (gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size)));
28484 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
28485 const0_rtx));
28487 else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF
28488 && TARGET_32BIT
28489 && DEFAULT_ABI == ABI_V4
28490 && !flag_pic)
28492 rtx toload = gen_rtx_CONST (VOIDmode,
28493 gen_rtx_PLUS (Pmode,
28494 stack_limit_rtx,
28495 GEN_INT (size)));
28497 emit_insn (gen_elf_high (tmp_reg, toload));
28498 emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
28499 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
28500 const0_rtx));
28502 else
28503 warning (0, "stack limit expression is not supported");
28506 if (copy_reg)
28508 if (copy_off != 0)
28509 emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
28510 else
28511 emit_move_insn (copy_reg, stack_reg);
28514 if (size > 32767)
28516 /* Need a note here so that try_split doesn't get confused. */
28517 if (get_last_insn () == NULL_RTX)
28518 emit_note (NOTE_INSN_DELETED);
28519 insn = emit_move_insn (tmp_reg, todec);
28520 try_split (PATTERN (insn), insn, 0);
28521 todec = tmp_reg;
28524 insn = emit_insn (TARGET_32BIT
28525 ? gen_movsi_update_stack (stack_reg, stack_reg,
28526 todec, stack_reg)
28527 : gen_movdi_di_update_stack (stack_reg, stack_reg,
28528 todec, stack_reg));
28529 /* Since we didn't use gen_frame_mem to generate the MEM, grab
28530 it now and set the alias set/attributes. The above gen_*_update
28531 calls will generate a PARALLEL with the MEM set being the first
28532 operation. */
28533 par = PATTERN (insn);
28534 gcc_assert (GET_CODE (par) == PARALLEL);
28535 set = XVECEXP (par, 0, 0);
28536 gcc_assert (GET_CODE (set) == SET);
28537 mem = SET_DEST (set);
28538 gcc_assert (MEM_P (mem));
28539 MEM_NOTRAP_P (mem) = 1;
28540 set_mem_alias_set (mem, get_frame_alias_set ());
28542 RTX_FRAME_RELATED_P (insn) = 1;
28543 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
28544 gen_rtx_SET (stack_reg, gen_rtx_PLUS (Pmode, stack_reg,
28545 GEN_INT (-size))));
28546 return insn;
28549 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
28551 #if PROBE_INTERVAL > 32768
28552 #error Cannot use indexed addressing mode for stack probing
28553 #endif
28555 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
28556 inclusive. These are offsets from the current stack pointer. */
28558 static void
28559 rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
28561 /* See if we have a constant small number of probes to generate. If so,
28562 that's the easy case. */
28563 if (first + size <= 32768)
28565 HOST_WIDE_INT i;
28567 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
28568 it exceeds SIZE. If only one probe is needed, this will not
28569 generate any code. Then probe at FIRST + SIZE. */
28570 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
28571 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
28572 -(first + i)));
28574 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
28575 -(first + size)));
28578 /* Otherwise, do the same as above, but in a loop. Note that we must be
28579 extra careful with variables wrapping around because we might be at
28580 the very top (or the very bottom) of the address space and we have
28581 to be able to handle this case properly; in particular, we use an
28582 equality test for the loop condition. */
28583 else
28585 HOST_WIDE_INT rounded_size;
28586 rtx r12 = gen_rtx_REG (Pmode, 12);
28587 rtx r0 = gen_rtx_REG (Pmode, 0);
28589 /* Sanity check for the addressing mode we're going to use. */
28590 gcc_assert (first <= 32768);
28592 /* Step 1: round SIZE to the previous multiple of the interval. */
28594 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
28597 /* Step 2: compute initial and final value of the loop counter. */
28599 /* TEST_ADDR = SP + FIRST. */
28600 emit_insn (gen_rtx_SET (r12, plus_constant (Pmode, stack_pointer_rtx,
28601 -first)));
28603 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
28604 if (rounded_size > 32768)
28606 emit_move_insn (r0, GEN_INT (-rounded_size));
28607 emit_insn (gen_rtx_SET (r0, gen_rtx_PLUS (Pmode, r12, r0)));
28609 else
28610 emit_insn (gen_rtx_SET (r0, plus_constant (Pmode, r12,
28611 -rounded_size)));
28614 /* Step 3: the loop
28618 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
28619 probe at TEST_ADDR
28621 while (TEST_ADDR != LAST_ADDR)
28623 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
28624 until it is equal to ROUNDED_SIZE. */
28626 if (TARGET_64BIT)
28627 emit_insn (gen_probe_stack_rangedi (r12, r12, r0));
28628 else
28629 emit_insn (gen_probe_stack_rangesi (r12, r12, r0));
28632 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
28633 that SIZE is equal to ROUNDED_SIZE. */
28635 if (size != rounded_size)
28636 emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
28640 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
28641 absolute addresses. */
28643 const char *
28644 output_probe_stack_range (rtx reg1, rtx reg2)
28646 static int labelno = 0;
28647 char loop_lab[32];
28648 rtx xops[2];
28650 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
28652 /* Loop. */
28653 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
28655 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
28656 xops[0] = reg1;
28657 xops[1] = GEN_INT (-PROBE_INTERVAL);
28658 output_asm_insn ("addi %0,%0,%1", xops);
28660 /* Probe at TEST_ADDR. */
28661 xops[1] = gen_rtx_REG (Pmode, 0);
28662 output_asm_insn ("stw %1,0(%0)", xops);
28664 /* Test if TEST_ADDR == LAST_ADDR. */
28665 xops[1] = reg2;
28666 if (TARGET_64BIT)
28667 output_asm_insn ("cmpd 0,%0,%1", xops);
28668 else
28669 output_asm_insn ("cmpw 0,%0,%1", xops);
28671 /* Branch. */
28672 fputs ("\tbne 0,", asm_out_file);
28673 assemble_name_raw (asm_out_file, loop_lab);
28674 fputc ('\n', asm_out_file);
28676 return "";
28679 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
28680 with (plus:P (reg 1) VAL), and with REG2 replaced with REPL2 if REG2
28681 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
28682 deduce these equivalences by itself so it wasn't necessary to hold
28683 its hand so much. Don't be tempted to always supply d2_f_d_e with
28684 the actual cfa register, ie. r31 when we are using a hard frame
28685 pointer. That fails when saving regs off r1, and sched moves the
28686 r31 setup past the reg saves. */
28688 static rtx_insn *
28689 rs6000_frame_related (rtx_insn *insn, rtx reg, HOST_WIDE_INT val,
28690 rtx reg2, rtx repl2)
28692 rtx repl;
28694 if (REGNO (reg) == STACK_POINTER_REGNUM)
28696 gcc_checking_assert (val == 0);
28697 repl = NULL_RTX;
28699 else
28700 repl = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
28701 GEN_INT (val));
28703 rtx pat = PATTERN (insn);
28704 if (!repl && !reg2)
28706 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
28707 if (GET_CODE (pat) == PARALLEL)
28708 for (int i = 0; i < XVECLEN (pat, 0); i++)
28709 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
28711 rtx set = XVECEXP (pat, 0, i);
28713 /* If this PARALLEL has been emitted for out-of-line
28714 register save functions, or store multiple, then omit
28715 eh_frame info for any user-defined global regs. If
28716 eh_frame info is supplied, frame unwinding will
28717 restore a user reg. */
28718 if (!REG_P (SET_SRC (set))
28719 || !fixed_reg_p (REGNO (SET_SRC (set))))
28720 RTX_FRAME_RELATED_P (set) = 1;
28722 RTX_FRAME_RELATED_P (insn) = 1;
28723 return insn;
28726 /* We expect that 'pat' is either a SET or a PARALLEL containing
28727 SETs (and possibly other stuff). In a PARALLEL, all the SETs
28728 are important so they all have to be marked RTX_FRAME_RELATED_P.
28729 Call simplify_replace_rtx on the SETs rather than the whole insn
28730 so as to leave the other stuff alone (for example USE of r12). */
28732 set_used_flags (pat);
28733 if (GET_CODE (pat) == SET)
28735 if (repl)
28736 pat = simplify_replace_rtx (pat, reg, repl);
28737 if (reg2)
28738 pat = simplify_replace_rtx (pat, reg2, repl2);
28740 else if (GET_CODE (pat) == PARALLEL)
28742 pat = shallow_copy_rtx (pat);
28743 XVEC (pat, 0) = shallow_copy_rtvec (XVEC (pat, 0));
28745 for (int i = 0; i < XVECLEN (pat, 0); i++)
28746 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
28748 rtx set = XVECEXP (pat, 0, i);
28750 if (repl)
28751 set = simplify_replace_rtx (set, reg, repl);
28752 if (reg2)
28753 set = simplify_replace_rtx (set, reg2, repl2);
28754 XVECEXP (pat, 0, i) = set;
28756 /* Omit eh_frame info for any user-defined global regs. */
28757 if (!REG_P (SET_SRC (set))
28758 || !fixed_reg_p (REGNO (SET_SRC (set))))
28759 RTX_FRAME_RELATED_P (set) = 1;
28762 else
28763 gcc_unreachable ();
28765 RTX_FRAME_RELATED_P (insn) = 1;
28766 add_reg_note (insn, REG_FRAME_RELATED_EXPR, copy_rtx_if_shared (pat));
28768 return insn;
28771 /* Returns an insn that has a vrsave set operation with the
28772 appropriate CLOBBERs. */
28774 static rtx
28775 generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
28777 int nclobs, i;
28778 rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
28779 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
28781 clobs[0]
28782 = gen_rtx_SET (vrsave,
28783 gen_rtx_UNSPEC_VOLATILE (SImode,
28784 gen_rtvec (2, reg, vrsave),
28785 UNSPECV_SET_VRSAVE));
28787 nclobs = 1;
28789 /* We need to clobber the registers in the mask so the scheduler
28790 does not move sets to VRSAVE before sets of AltiVec registers.
28792 However, if the function receives nonlocal gotos, reload will set
28793 all call saved registers live. We will end up with:
28795 (set (reg 999) (mem))
28796 (parallel [ (set (reg vrsave) (unspec blah))
28797 (clobber (reg 999))])
28799 The clobber will cause the store into reg 999 to be dead, and
28800 flow will attempt to delete an epilogue insn. In this case, we
28801 need an unspec use/set of the register. */
28803 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
28804 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
28806 if (!epiloguep || call_used_regs [i])
28807 clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode,
28808 gen_rtx_REG (V4SImode, i));
28809 else
28811 rtx reg = gen_rtx_REG (V4SImode, i);
28813 clobs[nclobs++]
28814 = gen_rtx_SET (reg,
28815 gen_rtx_UNSPEC (V4SImode,
28816 gen_rtvec (1, reg), 27));
28820 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
28822 for (i = 0; i < nclobs; ++i)
28823 XVECEXP (insn, 0, i) = clobs[i];
28825 return insn;
28828 static rtx
28829 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
28831 rtx addr, mem;
28833 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
28834 mem = gen_frame_mem (GET_MODE (reg), addr);
28835 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
28838 static rtx
28839 gen_frame_load (rtx reg, rtx frame_reg, int offset)
28841 return gen_frame_set (reg, frame_reg, offset, false);
28844 static rtx
28845 gen_frame_store (rtx reg, rtx frame_reg, int offset)
28847 return gen_frame_set (reg, frame_reg, offset, true);
28850 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
28851 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
28853 static rtx_insn *
28854 emit_frame_save (rtx frame_reg, machine_mode mode,
28855 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
28857 rtx reg;
28859 /* Some cases that need register indexed addressing. */
28860 gcc_checking_assert (!((TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
28861 || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
28862 || (TARGET_E500_DOUBLE && mode == DFmode)
28863 || (TARGET_SPE_ABI
28864 && SPE_VECTOR_MODE (mode)
28865 && !SPE_CONST_OFFSET_OK (offset))));
28867 reg = gen_rtx_REG (mode, regno);
28868 rtx_insn *insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
28869 return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
28870 NULL_RTX, NULL_RTX);
28873 /* Emit an offset memory reference suitable for a frame store, while
28874 converting to a valid addressing mode. */
28876 static rtx
28877 gen_frame_mem_offset (machine_mode mode, rtx reg, int offset)
28879 rtx int_rtx, offset_rtx;
28881 int_rtx = GEN_INT (offset);
28883 if ((TARGET_SPE_ABI && SPE_VECTOR_MODE (mode) && !SPE_CONST_OFFSET_OK (offset))
28884 || (TARGET_E500_DOUBLE && mode == DFmode))
28886 offset_rtx = gen_rtx_REG (Pmode, FIXED_SCRATCH);
28887 emit_move_insn (offset_rtx, int_rtx);
28889 else
28890 offset_rtx = int_rtx;
28892 return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, offset_rtx));
28895 #ifndef TARGET_FIX_AND_CONTINUE
28896 #define TARGET_FIX_AND_CONTINUE 0
28897 #endif
28899 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
28900 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
28901 #define LAST_SAVRES_REGISTER 31
28902 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
28904 enum {
28905 SAVRES_LR = 0x1,
28906 SAVRES_SAVE = 0x2,
28907 SAVRES_REG = 0x0c,
28908 SAVRES_GPR = 0,
28909 SAVRES_FPR = 4,
28910 SAVRES_VR = 8
28913 static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
28915 /* Temporary holding space for an out-of-line register save/restore
28916 routine name. */
28917 static char savres_routine_name[30];
28919 /* Return the name for an out-of-line register save/restore routine.
28920 We are saving/restoring GPRs if GPR is true. */
28922 static char *
28923 rs6000_savres_routine_name (rs6000_stack_t *info, int regno, int sel)
28925 const char *prefix = "";
28926 const char *suffix = "";
28928 /* Different targets are supposed to define
28929 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
28930 routine name could be defined with:
28932 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
28934 This is a nice idea in practice, but in reality, things are
28935 complicated in several ways:
28937 - ELF targets have save/restore routines for GPRs.
28939 - SPE targets use different prefixes for 32/64-bit registers, and
28940 neither of them fit neatly in the FOO_{PREFIX,SUFFIX} regimen.
28942 - PPC64 ELF targets have routines for save/restore of GPRs that
28943 differ in what they do with the link register, so having a set
28944 prefix doesn't work. (We only use one of the save routines at
28945 the moment, though.)
28947 - PPC32 elf targets have "exit" versions of the restore routines
28948 that restore the link register and can save some extra space.
28949 These require an extra suffix. (There are also "tail" versions
28950 of the restore routines and "GOT" versions of the save routines,
28951 but we don't generate those at present. Same problems apply,
28952 though.)
28954 We deal with all this by synthesizing our own prefix/suffix and
28955 using that for the simple sprintf call shown above. */
28956 if (TARGET_SPE)
28958 /* No floating point saves on the SPE. */
28959 gcc_assert ((sel & SAVRES_REG) == SAVRES_GPR);
28961 if ((sel & SAVRES_SAVE))
28962 prefix = info->spe_64bit_regs_used ? "_save64gpr_" : "_save32gpr_";
28963 else
28964 prefix = info->spe_64bit_regs_used ? "_rest64gpr_" : "_rest32gpr_";
28966 if ((sel & SAVRES_LR))
28967 suffix = "_x";
28969 else if (DEFAULT_ABI == ABI_V4)
28971 if (TARGET_64BIT)
28972 goto aix_names;
28974 if ((sel & SAVRES_REG) == SAVRES_GPR)
28975 prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
28976 else if ((sel & SAVRES_REG) == SAVRES_FPR)
28977 prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
28978 else if ((sel & SAVRES_REG) == SAVRES_VR)
28979 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
28980 else
28981 abort ();
28983 if ((sel & SAVRES_LR))
28984 suffix = "_x";
28986 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28988 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
28989 /* No out-of-line save/restore routines for GPRs on AIX. */
28990 gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
28991 #endif
28993 aix_names:
28994 if ((sel & SAVRES_REG) == SAVRES_GPR)
28995 prefix = ((sel & SAVRES_SAVE)
28996 ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
28997 : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
28998 else if ((sel & SAVRES_REG) == SAVRES_FPR)
29000 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
29001 if ((sel & SAVRES_LR))
29002 prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
29003 else
29004 #endif
29006 prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
29007 suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
29010 else if ((sel & SAVRES_REG) == SAVRES_VR)
29011 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
29012 else
29013 abort ();
29016 if (DEFAULT_ABI == ABI_DARWIN)
29018 /* The Darwin approach is (slightly) different, in order to be
29019 compatible with code generated by the system toolchain. There is a
29020 single symbol for the start of save sequence, and the code here
29021 embeds an offset into that code on the basis of the first register
29022 to be saved. */
29023 prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
29024 if ((sel & SAVRES_REG) == SAVRES_GPR)
29025 sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
29026 ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
29027 (regno - 13) * 4, prefix, regno);
29028 else if ((sel & SAVRES_REG) == SAVRES_FPR)
29029 sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
29030 (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
29031 else if ((sel & SAVRES_REG) == SAVRES_VR)
29032 sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
29033 (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
29034 else
29035 abort ();
29037 else
29038 sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
29040 return savres_routine_name;
29043 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
29044 We are saving/restoring GPRs if GPR is true. */
29046 static rtx
29047 rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
29049 int regno = ((sel & SAVRES_REG) == SAVRES_GPR
29050 ? info->first_gp_reg_save
29051 : (sel & SAVRES_REG) == SAVRES_FPR
29052 ? info->first_fp_reg_save - 32
29053 : (sel & SAVRES_REG) == SAVRES_VR
29054 ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
29055 : -1);
29056 rtx sym;
29057 int select = sel;
29059 /* On the SPE, we never have any FPRs, but we do have 32/64-bit
29060 versions of the gpr routines. */
29061 if (TARGET_SPE_ABI && (sel & SAVRES_REG) == SAVRES_GPR
29062 && info->spe_64bit_regs_used)
29063 select ^= SAVRES_FPR ^ SAVRES_GPR;
29065 /* Don't generate bogus routine names. */
29066 gcc_assert (FIRST_SAVRES_REGISTER <= regno
29067 && regno <= LAST_SAVRES_REGISTER
29068 && select >= 0 && select <= 12);
29070 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
29072 if (sym == NULL)
29074 char *name;
29076 name = rs6000_savres_routine_name (info, regno, sel);
29078 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
29079 = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
29080 SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
29083 return sym;
29086 /* Emit a sequence of insns, including a stack tie if needed, for
29087 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
29088 reset the stack pointer, but move the base of the frame into
29089 reg UPDT_REGNO for use by out-of-line register restore routines. */
29091 static rtx
29092 rs6000_emit_stack_reset (rs6000_stack_t *info,
29093 rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
29094 unsigned updt_regno)
29096 /* If there is nothing to do, don't do anything. */
29097 if (frame_off == 0 && REGNO (frame_reg_rtx) == updt_regno)
29098 return NULL_RTX;
29100 rtx updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
29102 /* This blockage is needed so that sched doesn't decide to move
29103 the sp change before the register restores. */
29104 if (DEFAULT_ABI == ABI_V4
29105 || (TARGET_SPE_ABI
29106 && info->spe_64bit_regs_used != 0
29107 && info->first_gp_reg_save != 32))
29108 return emit_insn (gen_stack_restore_tie (updt_reg_rtx, frame_reg_rtx,
29109 GEN_INT (frame_off)));
29111 /* If we are restoring registers out-of-line, we will be using the
29112 "exit" variants of the restore routines, which will reset the
29113 stack for us. But we do need to point updt_reg into the
29114 right place for those routines. */
29115 if (frame_off != 0)
29116 return emit_insn (gen_add3_insn (updt_reg_rtx,
29117 frame_reg_rtx, GEN_INT (frame_off)));
29118 else
29119 return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
29121 return NULL_RTX;
29124 /* Return the register number used as a pointer by out-of-line
29125 save/restore functions. */
29127 static inline unsigned
29128 ptr_regno_for_savres (int sel)
29130 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29131 return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
29132 return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
29135 /* Construct a parallel rtx describing the effect of a call to an
29136 out-of-line register save/restore routine, and emit the insn
29137 or jump_insn as appropriate. */
29139 static rtx_insn *
29140 rs6000_emit_savres_rtx (rs6000_stack_t *info,
29141 rtx frame_reg_rtx, int save_area_offset, int lr_offset,
29142 machine_mode reg_mode, int sel)
29144 int i;
29145 int offset, start_reg, end_reg, n_regs, use_reg;
29146 int reg_size = GET_MODE_SIZE (reg_mode);
29147 rtx sym;
29148 rtvec p;
29149 rtx par;
29150 rtx_insn *insn;
29152 offset = 0;
29153 start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
29154 ? info->first_gp_reg_save
29155 : (sel & SAVRES_REG) == SAVRES_FPR
29156 ? info->first_fp_reg_save
29157 : (sel & SAVRES_REG) == SAVRES_VR
29158 ? info->first_altivec_reg_save
29159 : -1);
29160 end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
29161 ? 32
29162 : (sel & SAVRES_REG) == SAVRES_FPR
29163 ? 64
29164 : (sel & SAVRES_REG) == SAVRES_VR
29165 ? LAST_ALTIVEC_REGNO + 1
29166 : -1);
29167 n_regs = end_reg - start_reg;
29168 p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
29169 + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
29170 + n_regs);
29172 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
29173 RTVEC_ELT (p, offset++) = ret_rtx;
29175 RTVEC_ELT (p, offset++)
29176 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
29178 sym = rs6000_savres_routine_sym (info, sel);
29179 RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
29181 use_reg = ptr_regno_for_savres (sel);
29182 if ((sel & SAVRES_REG) == SAVRES_VR)
29184 /* Vector regs are saved/restored using [reg+reg] addressing. */
29185 RTVEC_ELT (p, offset++)
29186 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg));
29187 RTVEC_ELT (p, offset++)
29188 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
29190 else
29191 RTVEC_ELT (p, offset++)
29192 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
29194 for (i = 0; i < end_reg - start_reg; i++)
29195 RTVEC_ELT (p, i + offset)
29196 = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
29197 frame_reg_rtx, save_area_offset + reg_size * i,
29198 (sel & SAVRES_SAVE) != 0);
29200 if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
29201 RTVEC_ELT (p, i + offset)
29202 = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
29204 par = gen_rtx_PARALLEL (VOIDmode, p);
29206 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
29208 insn = emit_jump_insn (par);
29209 JUMP_LABEL (insn) = ret_rtx;
29211 else
29212 insn = emit_insn (par);
29213 return insn;
29216 /* Emit code to store CR fields that need to be saved into REG. */
29218 static void
29219 rs6000_emit_move_from_cr (rtx reg)
29221 /* Only the ELFv2 ABI allows storing only selected fields. */
29222 if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
29224 int i, cr_reg[8], count = 0;
29226 /* Collect CR fields that must be saved. */
29227 for (i = 0; i < 8; i++)
29228 if (save_reg_p (CR0_REGNO + i))
29229 cr_reg[count++] = i;
29231 /* If it's just a single one, use mfcrf. */
29232 if (count == 1)
29234 rtvec p = rtvec_alloc (1);
29235 rtvec r = rtvec_alloc (2);
29236 RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
29237 RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
29238 RTVEC_ELT (p, 0)
29239 = gen_rtx_SET (reg,
29240 gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
29242 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
29243 return;
29246 /* ??? It might be better to handle count == 2 / 3 cases here
29247 as well, using logical operations to combine the values. */
29250 emit_insn (gen_movesi_from_cr (reg));
29253 /* Return whether the split-stack arg pointer (r12) is used. */
29255 static bool
29256 split_stack_arg_pointer_used_p (void)
29258 /* If the pseudo holding the arg pointer is no longer a pseudo,
29259 then the arg pointer is used. */
29260 if (cfun->machine->split_stack_arg_pointer != NULL_RTX
29261 && (!REG_P (cfun->machine->split_stack_arg_pointer)
29262 || (REGNO (cfun->machine->split_stack_arg_pointer)
29263 < FIRST_PSEUDO_REGISTER)))
29264 return true;
29266 /* Unfortunately we also need to do some code scanning, since
29267 r12 may have been substituted for the pseudo. */
29268 rtx_insn *insn;
29269 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
29270 FOR_BB_INSNS (bb, insn)
29271 if (NONDEBUG_INSN_P (insn))
29273 /* A call destroys r12. */
29274 if (CALL_P (insn))
29275 return false;
29277 df_ref use;
29278 FOR_EACH_INSN_USE (use, insn)
29280 rtx x = DF_REF_REG (use);
29281 if (REG_P (x) && REGNO (x) == 12)
29282 return true;
29284 df_ref def;
29285 FOR_EACH_INSN_DEF (def, insn)
29287 rtx x = DF_REF_REG (def);
29288 if (REG_P (x) && REGNO (x) == 12)
29289 return false;
29292 return bitmap_bit_p (DF_LR_OUT (bb), 12);
29295 /* Return whether we need to emit an ELFv2 global entry point prologue. */
29297 static bool
29298 rs6000_global_entry_point_needed_p (void)
29300 /* Only needed for the ELFv2 ABI. */
29301 if (DEFAULT_ABI != ABI_ELFv2)
29302 return false;
29304 /* With -msingle-pic-base, we assume the whole program shares the same
29305 TOC, so no global entry point prologues are needed anywhere. */
29306 if (TARGET_SINGLE_PIC_BASE)
29307 return false;
29309 /* Ensure we have a global entry point for thunks. ??? We could
29310 avoid that if the target routine doesn't need a global entry point,
29311 but we do not know whether this is the case at this point. */
29312 if (cfun->is_thunk)
29313 return true;
29315 /* For regular functions, rs6000_emit_prologue sets this flag if the
29316 routine ever uses the TOC pointer. */
29317 return cfun->machine->r2_setup_needed;
29320 /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
29321 static sbitmap
29322 rs6000_get_separate_components (void)
29324 rs6000_stack_t *info = rs6000_stack_info ();
29326 if (WORLD_SAVE_P (info))
29327 return NULL;
29329 if (TARGET_SPE_ABI)
29330 return NULL;
29332 gcc_assert (!(info->savres_strategy & SAVE_MULTIPLE)
29333 && !(info->savres_strategy & REST_MULTIPLE));
29335 /* Component 0 is the save/restore of LR (done via GPR0).
29336 Components 13..31 are the save/restore of GPR13..GPR31.
29337 Components 46..63 are the save/restore of FPR14..FPR31. */
29339 cfun->machine->n_components = 64;
29341 sbitmap components = sbitmap_alloc (cfun->machine->n_components);
29342 bitmap_clear (components);
29344 int reg_size = TARGET_32BIT ? 4 : 8;
29345 int fp_reg_size = 8;
29347 /* The GPRs we need saved to the frame. */
29348 if ((info->savres_strategy & SAVE_INLINE_GPRS)
29349 && (info->savres_strategy & REST_INLINE_GPRS))
29351 int offset = info->gp_save_offset;
29352 if (info->push_p)
29353 offset += info->total_size;
29355 for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++)
29357 if (IN_RANGE (offset, -0x8000, 0x7fff)
29358 && rs6000_reg_live_or_pic_offset_p (regno))
29359 bitmap_set_bit (components, regno);
29361 offset += reg_size;
29365 /* Don't mess with the hard frame pointer. */
29366 if (frame_pointer_needed)
29367 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
29369 /* Don't mess with the fixed TOC register. */
29370 if ((TARGET_TOC && TARGET_MINIMAL_TOC)
29371 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
29372 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
29373 bitmap_clear_bit (components, RS6000_PIC_OFFSET_TABLE_REGNUM);
29375 /* The FPRs we need saved to the frame. */
29376 if ((info->savres_strategy & SAVE_INLINE_FPRS)
29377 && (info->savres_strategy & REST_INLINE_FPRS))
29379 int offset = info->fp_save_offset;
29380 if (info->push_p)
29381 offset += info->total_size;
29383 for (unsigned regno = info->first_fp_reg_save; regno < 64; regno++)
29385 if (IN_RANGE (offset, -0x8000, 0x7fff) && save_reg_p (regno))
29386 bitmap_set_bit (components, regno);
29388 offset += fp_reg_size;
29392 /* Optimize LR save and restore if we can. This is component 0. Any
29393 out-of-line register save/restore routines need LR. */
29394 if (info->lr_save_p
29395 && !(flag_pic && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
29396 && (info->savres_strategy & SAVE_INLINE_GPRS)
29397 && (info->savres_strategy & REST_INLINE_GPRS)
29398 && (info->savres_strategy & SAVE_INLINE_FPRS)
29399 && (info->savres_strategy & REST_INLINE_FPRS)
29400 && (info->savres_strategy & SAVE_INLINE_VRS)
29401 && (info->savres_strategy & REST_INLINE_VRS))
29403 int offset = info->lr_save_offset;
29404 if (info->push_p)
29405 offset += info->total_size;
29406 if (IN_RANGE (offset, -0x8000, 0x7fff))
29407 bitmap_set_bit (components, 0);
29410 return components;
29413 /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
29414 static sbitmap
29415 rs6000_components_for_bb (basic_block bb)
29417 rs6000_stack_t *info = rs6000_stack_info ();
29419 bitmap in = DF_LIVE_IN (bb);
29420 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
29421 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
29423 sbitmap components = sbitmap_alloc (cfun->machine->n_components);
29424 bitmap_clear (components);
29426 /* A register is used in a bb if it is in the IN, GEN, or KILL sets. */
29428 /* GPRs. */
29429 for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++)
29430 if (bitmap_bit_p (in, regno)
29431 || bitmap_bit_p (gen, regno)
29432 || bitmap_bit_p (kill, regno))
29433 bitmap_set_bit (components, regno);
29435 /* FPRs. */
29436 for (unsigned regno = info->first_fp_reg_save; regno < 64; regno++)
29437 if (bitmap_bit_p (in, regno)
29438 || bitmap_bit_p (gen, regno)
29439 || bitmap_bit_p (kill, regno))
29440 bitmap_set_bit (components, regno);
29442 /* The link register. */
29443 if (bitmap_bit_p (in, LR_REGNO)
29444 || bitmap_bit_p (gen, LR_REGNO)
29445 || bitmap_bit_p (kill, LR_REGNO))
29446 bitmap_set_bit (components, 0);
29448 return components;
29451 /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */
29452 static void
29453 rs6000_disqualify_components (sbitmap components, edge e,
29454 sbitmap edge_components, bool /*is_prologue*/)
29456 /* Our LR pro/epilogue code moves LR via R0, so R0 had better not be
29457 live where we want to place that code. */
29458 if (bitmap_bit_p (edge_components, 0)
29459 && bitmap_bit_p (DF_LIVE_IN (e->dest), 0))
29461 if (dump_file)
29462 fprintf (dump_file, "Disqualifying LR because GPR0 is live "
29463 "on entry to bb %d\n", e->dest->index);
29464 bitmap_clear_bit (components, 0);
29468 /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
29469 static void
29470 rs6000_emit_prologue_components (sbitmap components)
29472 rs6000_stack_t *info = rs6000_stack_info ();
29473 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
29474 ? HARD_FRAME_POINTER_REGNUM
29475 : STACK_POINTER_REGNUM);
29477 machine_mode reg_mode = Pmode;
29478 int reg_size = TARGET_32BIT ? 4 : 8;
29479 machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
29480 ? DFmode : SFmode;
29481 int fp_reg_size = 8;
29483 /* Prologue for LR. */
29484 if (bitmap_bit_p (components, 0))
29486 rtx reg = gen_rtx_REG (reg_mode, 0);
29487 rtx_insn *insn = emit_move_insn (reg, gen_rtx_REG (reg_mode, LR_REGNO));
29488 RTX_FRAME_RELATED_P (insn) = 1;
29489 add_reg_note (insn, REG_CFA_REGISTER, NULL);
29491 int offset = info->lr_save_offset;
29492 if (info->push_p)
29493 offset += info->total_size;
29495 insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
29496 RTX_FRAME_RELATED_P (insn) = 1;
29497 rtx lr = gen_rtx_REG (reg_mode, LR_REGNO);
29498 rtx mem = copy_rtx (SET_DEST (single_set (insn)));
29499 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, lr));
29502 /* Prologue for the GPRs. */
29503 int offset = info->gp_save_offset;
29504 if (info->push_p)
29505 offset += info->total_size;
29507 for (int i = info->first_gp_reg_save; i < 32; i++)
29509 if (bitmap_bit_p (components, i))
29511 rtx reg = gen_rtx_REG (reg_mode, i);
29512 rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
29513 RTX_FRAME_RELATED_P (insn) = 1;
29514 rtx set = copy_rtx (single_set (insn));
29515 add_reg_note (insn, REG_CFA_OFFSET, set);
29518 offset += reg_size;
29521 /* Prologue for the FPRs. */
29522 offset = info->fp_save_offset;
29523 if (info->push_p)
29524 offset += info->total_size;
29526 for (int i = info->first_fp_reg_save; i < 64; i++)
29528 if (bitmap_bit_p (components, i))
29530 rtx reg = gen_rtx_REG (fp_reg_mode, i);
29531 rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
29532 RTX_FRAME_RELATED_P (insn) = 1;
29533 rtx set = copy_rtx (single_set (insn));
29534 add_reg_note (insn, REG_CFA_OFFSET, set);
29537 offset += fp_reg_size;
29541 /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
29542 static void
29543 rs6000_emit_epilogue_components (sbitmap components)
29545 rs6000_stack_t *info = rs6000_stack_info ();
29546 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
29547 ? HARD_FRAME_POINTER_REGNUM
29548 : STACK_POINTER_REGNUM);
29550 machine_mode reg_mode = Pmode;
29551 int reg_size = TARGET_32BIT ? 4 : 8;
29553 machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
29554 ? DFmode : SFmode;
29555 int fp_reg_size = 8;
29557 /* Epilogue for the FPRs. */
29558 int offset = info->fp_save_offset;
29559 if (info->push_p)
29560 offset += info->total_size;
29562 for (int i = info->first_fp_reg_save; i < 64; i++)
29564 if (bitmap_bit_p (components, i))
29566 rtx reg = gen_rtx_REG (fp_reg_mode, i);
29567 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
29568 RTX_FRAME_RELATED_P (insn) = 1;
29569 add_reg_note (insn, REG_CFA_RESTORE, reg);
29572 offset += fp_reg_size;
29575 /* Epilogue for the GPRs. */
29576 offset = info->gp_save_offset;
29577 if (info->push_p)
29578 offset += info->total_size;
29580 for (int i = info->first_gp_reg_save; i < 32; i++)
29582 if (bitmap_bit_p (components, i))
29584 rtx reg = gen_rtx_REG (reg_mode, i);
29585 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
29586 RTX_FRAME_RELATED_P (insn) = 1;
29587 add_reg_note (insn, REG_CFA_RESTORE, reg);
29590 offset += reg_size;
29593 /* Epilogue for LR. */
29594 if (bitmap_bit_p (components, 0))
29596 int offset = info->lr_save_offset;
29597 if (info->push_p)
29598 offset += info->total_size;
29600 rtx reg = gen_rtx_REG (reg_mode, 0);
29601 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
29603 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
29604 insn = emit_move_insn (lr, reg);
29605 RTX_FRAME_RELATED_P (insn) = 1;
29606 add_reg_note (insn, REG_CFA_RESTORE, lr);
29610 /* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
29611 static void
29612 rs6000_set_handled_components (sbitmap components)
29614 rs6000_stack_t *info = rs6000_stack_info ();
29616 for (int i = info->first_gp_reg_save; i < 32; i++)
29617 if (bitmap_bit_p (components, i))
29618 cfun->machine->gpr_is_wrapped_separately[i] = true;
29620 for (int i = info->first_fp_reg_save; i < 64; i++)
29621 if (bitmap_bit_p (components, i))
29622 cfun->machine->fpr_is_wrapped_separately[i - 32] = true;
29624 if (bitmap_bit_p (components, 0))
29625 cfun->machine->lr_is_wrapped_separately = true;
29628 /* Emit function prologue as insns. */
29630 void
29631 rs6000_emit_prologue (void)
29633 rs6000_stack_t *info = rs6000_stack_info ();
29634 machine_mode reg_mode = Pmode;
29635 int reg_size = TARGET_32BIT ? 4 : 8;
29636 machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
29637 ? DFmode : SFmode;
29638 int fp_reg_size = 8;
29639 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
29640 rtx frame_reg_rtx = sp_reg_rtx;
29641 unsigned int cr_save_regno;
29642 rtx cr_save_rtx = NULL_RTX;
29643 rtx_insn *insn;
29644 int strategy;
29645 int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
29646 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
29647 && call_used_regs[STATIC_CHAIN_REGNUM]);
29648 int using_split_stack = (flag_split_stack
29649 && (lookup_attribute ("no_split_stack",
29650 DECL_ATTRIBUTES (cfun->decl))
29651 == NULL));
29653 /* Offset to top of frame for frame_reg and sp respectively. */
29654 HOST_WIDE_INT frame_off = 0;
29655 HOST_WIDE_INT sp_off = 0;
29656 /* sp_adjust is the stack adjusting instruction, tracked so that the
29657 insn setting up the split-stack arg pointer can be emitted just
29658 prior to it, when r12 is not used here for other purposes. */
29659 rtx_insn *sp_adjust = 0;
29661 #if CHECKING_P
29662 /* Track and check usage of r0, r11, r12. */
29663 int reg_inuse = using_static_chain_p ? 1 << 11 : 0;
29664 #define START_USE(R) do \
29666 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
29667 reg_inuse |= 1 << (R); \
29668 } while (0)
29669 #define END_USE(R) do \
29671 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
29672 reg_inuse &= ~(1 << (R)); \
29673 } while (0)
29674 #define NOT_INUSE(R) do \
29676 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
29677 } while (0)
29678 #else
29679 #define START_USE(R) do {} while (0)
29680 #define END_USE(R) do {} while (0)
29681 #define NOT_INUSE(R) do {} while (0)
29682 #endif
29684 if (DEFAULT_ABI == ABI_ELFv2
29685 && !TARGET_SINGLE_PIC_BASE)
29687 cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
29689 /* With -mminimal-toc we may generate an extra use of r2 below. */
29690 if (TARGET_TOC && TARGET_MINIMAL_TOC
29691 && !constant_pool_empty_p ())
29692 cfun->machine->r2_setup_needed = true;
29696 if (flag_stack_usage_info)
29697 current_function_static_stack_size = info->total_size;
29699 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
29701 HOST_WIDE_INT size = info->total_size;
29703 if (crtl->is_leaf && !cfun->calls_alloca)
29705 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
29706 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT,
29707 size - STACK_CHECK_PROTECT);
29709 else if (size > 0)
29710 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
29713 if (TARGET_FIX_AND_CONTINUE)
29715 /* gdb on darwin arranges to forward a function from the old
29716 address by modifying the first 5 instructions of the function
29717 to branch to the overriding function. This is necessary to
29718 permit function pointers that point to the old function to
29719 actually forward to the new function. */
29720 emit_insn (gen_nop ());
29721 emit_insn (gen_nop ());
29722 emit_insn (gen_nop ());
29723 emit_insn (gen_nop ());
29724 emit_insn (gen_nop ());
29727 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
29729 reg_mode = V2SImode;
29730 reg_size = 8;
29733 /* Handle world saves specially here. */
29734 if (WORLD_SAVE_P (info))
29736 int i, j, sz;
29737 rtx treg;
29738 rtvec p;
29739 rtx reg0;
29741 /* save_world expects lr in r0. */
29742 reg0 = gen_rtx_REG (Pmode, 0);
29743 if (info->lr_save_p)
29745 insn = emit_move_insn (reg0,
29746 gen_rtx_REG (Pmode, LR_REGNO));
29747 RTX_FRAME_RELATED_P (insn) = 1;
29750 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
29751 assumptions about the offsets of various bits of the stack
29752 frame. */
29753 gcc_assert (info->gp_save_offset == -220
29754 && info->fp_save_offset == -144
29755 && info->lr_save_offset == 8
29756 && info->cr_save_offset == 4
29757 && info->push_p
29758 && info->lr_save_p
29759 && (!crtl->calls_eh_return
29760 || info->ehrd_offset == -432)
29761 && info->vrsave_save_offset == -224
29762 && info->altivec_save_offset == -416);
29764 treg = gen_rtx_REG (SImode, 11);
29765 emit_move_insn (treg, GEN_INT (-info->total_size));
29767 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
29768 in R11. It also clobbers R12, so beware! */
29770 /* Preserve CR2 for save_world prologues */
29771 sz = 5;
29772 sz += 32 - info->first_gp_reg_save;
29773 sz += 64 - info->first_fp_reg_save;
29774 sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
29775 p = rtvec_alloc (sz);
29776 j = 0;
29777 RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
29778 gen_rtx_REG (SImode,
29779 LR_REGNO));
29780 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
29781 gen_rtx_SYMBOL_REF (Pmode,
29782 "*save_world"));
29783 /* We do floats first so that the instruction pattern matches
29784 properly. */
29785 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
29786 RTVEC_ELT (p, j++)
29787 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
29788 ? DFmode : SFmode,
29789 info->first_fp_reg_save + i),
29790 frame_reg_rtx,
29791 info->fp_save_offset + frame_off + 8 * i);
29792 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
29793 RTVEC_ELT (p, j++)
29794 = gen_frame_store (gen_rtx_REG (V4SImode,
29795 info->first_altivec_reg_save + i),
29796 frame_reg_rtx,
29797 info->altivec_save_offset + frame_off + 16 * i);
29798 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
29799 RTVEC_ELT (p, j++)
29800 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
29801 frame_reg_rtx,
29802 info->gp_save_offset + frame_off + reg_size * i);
29804 /* CR register traditionally saved as CR2. */
29805 RTVEC_ELT (p, j++)
29806 = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO),
29807 frame_reg_rtx, info->cr_save_offset + frame_off);
29808 /* Explain about use of R0. */
29809 if (info->lr_save_p)
29810 RTVEC_ELT (p, j++)
29811 = gen_frame_store (reg0,
29812 frame_reg_rtx, info->lr_save_offset + frame_off);
29813 /* Explain what happens to the stack pointer. */
29815 rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
29816 RTVEC_ELT (p, j++) = gen_rtx_SET (sp_reg_rtx, newval);
29819 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
29820 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
29821 treg, GEN_INT (-info->total_size));
29822 sp_off = frame_off = info->total_size;
29825 strategy = info->savres_strategy;
29827 /* For V.4, update stack before we do any saving and set back pointer. */
29828 if (! WORLD_SAVE_P (info)
29829 && info->push_p
29830 && (DEFAULT_ABI == ABI_V4
29831 || crtl->calls_eh_return))
29833 bool need_r11 = (TARGET_SPE
29834 ? (!(strategy & SAVE_INLINE_GPRS)
29835 && info->spe_64bit_regs_used == 0)
29836 : (!(strategy & SAVE_INLINE_FPRS)
29837 || !(strategy & SAVE_INLINE_GPRS)
29838 || !(strategy & SAVE_INLINE_VRS)));
29839 int ptr_regno = -1;
29840 rtx ptr_reg = NULL_RTX;
29841 int ptr_off = 0;
29843 if (info->total_size < 32767)
29844 frame_off = info->total_size;
29845 else if (need_r11)
29846 ptr_regno = 11;
29847 else if (info->cr_save_p
29848 || info->lr_save_p
29849 || info->first_fp_reg_save < 64
29850 || info->first_gp_reg_save < 32
29851 || info->altivec_size != 0
29852 || info->vrsave_size != 0
29853 || crtl->calls_eh_return)
29854 ptr_regno = 12;
29855 else
29857 /* The prologue won't be saving any regs so there is no need
29858 to set up a frame register to access any frame save area.
29859 We also won't be using frame_off anywhere below, but set
29860 the correct value anyway to protect against future
29861 changes to this function. */
29862 frame_off = info->total_size;
29864 if (ptr_regno != -1)
29866 /* Set up the frame offset to that needed by the first
29867 out-of-line save function. */
29868 START_USE (ptr_regno);
29869 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
29870 frame_reg_rtx = ptr_reg;
29871 if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0)
29872 gcc_checking_assert (info->fp_save_offset + info->fp_size == 0);
29873 else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32)
29874 ptr_off = info->gp_save_offset + info->gp_size;
29875 else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0)
29876 ptr_off = info->altivec_save_offset + info->altivec_size;
29877 frame_off = -ptr_off;
29879 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
29880 ptr_reg, ptr_off);
29881 if (REGNO (frame_reg_rtx) == 12)
29882 sp_adjust = 0;
29883 sp_off = info->total_size;
29884 if (frame_reg_rtx != sp_reg_rtx)
29885 rs6000_emit_stack_tie (frame_reg_rtx, false);
29888 /* If we use the link register, get it into r0. */
29889 if (!WORLD_SAVE_P (info) && info->lr_save_p
29890 && !cfun->machine->lr_is_wrapped_separately)
29892 rtx addr, reg, mem;
29894 reg = gen_rtx_REG (Pmode, 0);
29895 START_USE (0);
29896 insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
29897 RTX_FRAME_RELATED_P (insn) = 1;
29899 if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
29900 | SAVE_NOINLINE_FPRS_SAVES_LR)))
29902 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
29903 GEN_INT (info->lr_save_offset + frame_off));
29904 mem = gen_rtx_MEM (Pmode, addr);
29905 /* This should not be of rs6000_sr_alias_set, because of
29906 __builtin_return_address. */
29908 insn = emit_move_insn (mem, reg);
29909 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
29910 NULL_RTX, NULL_RTX);
29911 END_USE (0);
29915 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
29916 r12 will be needed by out-of-line gpr restore. */
29917 cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29918 && !(strategy & (SAVE_INLINE_GPRS
29919 | SAVE_NOINLINE_GPRS_SAVES_LR))
29920 ? 11 : 12);
29921 if (!WORLD_SAVE_P (info)
29922 && info->cr_save_p
29923 && REGNO (frame_reg_rtx) != cr_save_regno
29924 && !(using_static_chain_p && cr_save_regno == 11)
29925 && !(using_split_stack && cr_save_regno == 12 && sp_adjust))
29927 cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
29928 START_USE (cr_save_regno);
29929 rs6000_emit_move_from_cr (cr_save_rtx);
29932 /* Do any required saving of fpr's. If only one or two to save, do
29933 it ourselves. Otherwise, call function. */
29934 if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
29936 int offset = info->fp_save_offset + frame_off;
29937 for (int i = info->first_fp_reg_save; i < 64; i++)
29939 if (save_reg_p (i)
29940 && !cfun->machine->fpr_is_wrapped_separately[i - 32])
29941 emit_frame_save (frame_reg_rtx, fp_reg_mode, i, offset,
29942 sp_off - frame_off);
29944 offset += fp_reg_size;
29947 else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
29949 bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
29950 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
29951 unsigned ptr_regno = ptr_regno_for_savres (sel);
29952 rtx ptr_reg = frame_reg_rtx;
29954 if (REGNO (frame_reg_rtx) == ptr_regno)
29955 gcc_checking_assert (frame_off == 0);
29956 else
29958 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
29959 NOT_INUSE (ptr_regno);
29960 emit_insn (gen_add3_insn (ptr_reg,
29961 frame_reg_rtx, GEN_INT (frame_off)));
29963 insn = rs6000_emit_savres_rtx (info, ptr_reg,
29964 info->fp_save_offset,
29965 info->lr_save_offset,
29966 DFmode, sel);
29967 rs6000_frame_related (insn, ptr_reg, sp_off,
29968 NULL_RTX, NULL_RTX);
29969 if (lr)
29970 END_USE (0);
29973 /* Save GPRs. This is done as a PARALLEL if we are using
29974 the store-multiple instructions. */
29975 if (!WORLD_SAVE_P (info)
29976 && TARGET_SPE_ABI
29977 && info->spe_64bit_regs_used != 0
29978 && info->first_gp_reg_save != 32)
29980 int i;
29981 rtx spe_save_area_ptr;
29982 HOST_WIDE_INT save_off;
29983 int ool_adjust = 0;
29985 /* Determine whether we can address all of the registers that need
29986 to be saved with an offset from frame_reg_rtx that fits in
29987 the small const field for SPE memory instructions. */
29988 int spe_regs_addressable
29989 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
29990 + reg_size * (32 - info->first_gp_reg_save - 1))
29991 && (strategy & SAVE_INLINE_GPRS));
29993 if (spe_regs_addressable)
29995 spe_save_area_ptr = frame_reg_rtx;
29996 save_off = frame_off;
29998 else
30000 /* Make r11 point to the start of the SPE save area. We need
30001 to be careful here if r11 is holding the static chain. If
30002 it is, then temporarily save it in r0. */
30003 HOST_WIDE_INT offset;
30005 if (!(strategy & SAVE_INLINE_GPRS))
30006 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
30007 offset = info->spe_gp_save_offset + frame_off - ool_adjust;
30008 spe_save_area_ptr = gen_rtx_REG (Pmode, 11);
30009 save_off = frame_off - offset;
30011 if (using_static_chain_p)
30013 rtx r0 = gen_rtx_REG (Pmode, 0);
30015 START_USE (0);
30016 gcc_assert (info->first_gp_reg_save > 11);
30018 emit_move_insn (r0, spe_save_area_ptr);
30020 else if (REGNO (frame_reg_rtx) != 11)
30021 START_USE (11);
30023 emit_insn (gen_addsi3 (spe_save_area_ptr,
30024 frame_reg_rtx, GEN_INT (offset)));
30025 if (!using_static_chain_p && REGNO (frame_reg_rtx) == 11)
30026 frame_off = -info->spe_gp_save_offset + ool_adjust;
30029 if ((strategy & SAVE_INLINE_GPRS))
30031 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
30032 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
30033 emit_frame_save (spe_save_area_ptr, reg_mode,
30034 info->first_gp_reg_save + i,
30035 (info->spe_gp_save_offset + save_off
30036 + reg_size * i),
30037 sp_off - save_off);
30039 else
30041 insn = rs6000_emit_savres_rtx (info, spe_save_area_ptr,
30042 info->spe_gp_save_offset + save_off,
30043 0, reg_mode,
30044 SAVRES_SAVE | SAVRES_GPR);
30046 rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off,
30047 NULL_RTX, NULL_RTX);
30050 /* Move the static chain pointer back. */
30051 if (!spe_regs_addressable)
30053 if (using_static_chain_p)
30055 emit_move_insn (spe_save_area_ptr, gen_rtx_REG (Pmode, 0));
30056 END_USE (0);
30058 else if (REGNO (frame_reg_rtx) != 11)
30059 END_USE (11);
30062 else if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS))
30064 bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0;
30065 int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0);
30066 unsigned ptr_regno = ptr_regno_for_savres (sel);
30067 rtx ptr_reg = frame_reg_rtx;
30068 bool ptr_set_up = REGNO (ptr_reg) == ptr_regno;
30069 int end_save = info->gp_save_offset + info->gp_size;
30070 int ptr_off;
30072 if (ptr_regno == 12)
30073 sp_adjust = 0;
30074 if (!ptr_set_up)
30075 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
30077 /* Need to adjust r11 (r12) if we saved any FPRs. */
30078 if (end_save + frame_off != 0)
30080 rtx offset = GEN_INT (end_save + frame_off);
30082 if (ptr_set_up)
30083 frame_off = -end_save;
30084 else
30085 NOT_INUSE (ptr_regno);
30086 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
30088 else if (!ptr_set_up)
30090 NOT_INUSE (ptr_regno);
30091 emit_move_insn (ptr_reg, frame_reg_rtx);
30093 ptr_off = -end_save;
30094 insn = rs6000_emit_savres_rtx (info, ptr_reg,
30095 info->gp_save_offset + ptr_off,
30096 info->lr_save_offset + ptr_off,
30097 reg_mode, sel);
30098 rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
30099 NULL_RTX, NULL_RTX);
30100 if (lr)
30101 END_USE (0);
30103 else if (!WORLD_SAVE_P (info) && (strategy & SAVE_MULTIPLE))
30105 rtvec p;
30106 int i;
30107 p = rtvec_alloc (32 - info->first_gp_reg_save);
30108 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
30109 RTVEC_ELT (p, i)
30110 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
30111 frame_reg_rtx,
30112 info->gp_save_offset + frame_off + reg_size * i);
30113 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
30114 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
30115 NULL_RTX, NULL_RTX);
30117 else if (!WORLD_SAVE_P (info))
30119 int offset = info->gp_save_offset + frame_off;
30120 for (int i = info->first_gp_reg_save; i < 32; i++)
30122 if (rs6000_reg_live_or_pic_offset_p (i)
30123 && !cfun->machine->gpr_is_wrapped_separately[i])
30124 emit_frame_save (frame_reg_rtx, reg_mode, i, offset,
30125 sp_off - frame_off);
30127 offset += reg_size;
30131 if (crtl->calls_eh_return)
30133 unsigned int i;
30134 rtvec p;
30136 for (i = 0; ; ++i)
30138 unsigned int regno = EH_RETURN_DATA_REGNO (i);
30139 if (regno == INVALID_REGNUM)
30140 break;
30143 p = rtvec_alloc (i);
30145 for (i = 0; ; ++i)
30147 unsigned int regno = EH_RETURN_DATA_REGNO (i);
30148 if (regno == INVALID_REGNUM)
30149 break;
30151 rtx set
30152 = gen_frame_store (gen_rtx_REG (reg_mode, regno),
30153 sp_reg_rtx,
30154 info->ehrd_offset + sp_off + reg_size * (int) i);
30155 RTVEC_ELT (p, i) = set;
30156 RTX_FRAME_RELATED_P (set) = 1;
30159 insn = emit_insn (gen_blockage ());
30160 RTX_FRAME_RELATED_P (insn) = 1;
30161 add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p));
30164 /* In AIX ABI we need to make sure r2 is really saved. */
30165 if (TARGET_AIX && crtl->calls_eh_return)
30167 rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
30168 rtx join_insn, note;
30169 rtx_insn *save_insn;
30170 long toc_restore_insn;
30172 tmp_reg = gen_rtx_REG (Pmode, 11);
30173 tmp_reg_si = gen_rtx_REG (SImode, 11);
30174 if (using_static_chain_p)
30176 START_USE (0);
30177 emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
30179 else
30180 START_USE (11);
30181 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
30182 /* Peek at instruction to which this function returns. If it's
30183 restoring r2, then we know we've already saved r2. We can't
30184 unconditionally save r2 because the value we have will already
30185 be updated if we arrived at this function via a plt call or
30186 toc adjusting stub. */
30187 emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
30188 toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
30189 + RS6000_TOC_SAVE_SLOT);
30190 hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
30191 emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
30192 compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
30193 validate_condition_mode (EQ, CCUNSmode);
30194 lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
30195 emit_insn (gen_rtx_SET (compare_result,
30196 gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
30197 toc_save_done = gen_label_rtx ();
30198 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
30199 gen_rtx_EQ (VOIDmode, compare_result,
30200 const0_rtx),
30201 gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
30202 pc_rtx);
30203 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
30204 JUMP_LABEL (jump) = toc_save_done;
30205 LABEL_NUSES (toc_save_done) += 1;
30207 save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
30208 TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
30209 sp_off - frame_off);
30211 emit_label (toc_save_done);
30213 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
30214 have a CFG that has different saves along different paths.
30215 Move the note to a dummy blockage insn, which describes that
30216 R2 is unconditionally saved after the label. */
30217 /* ??? An alternate representation might be a special insn pattern
30218 containing both the branch and the store. That might let the
30219 code that minimizes the number of DW_CFA_advance opcodes better
30220 freedom in placing the annotations. */
30221 note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL);
30222 if (note)
30223 remove_note (save_insn, note);
30224 else
30225 note = alloc_reg_note (REG_FRAME_RELATED_EXPR,
30226 copy_rtx (PATTERN (save_insn)), NULL_RTX);
30227 RTX_FRAME_RELATED_P (save_insn) = 0;
30229 join_insn = emit_insn (gen_blockage ());
30230 REG_NOTES (join_insn) = note;
30231 RTX_FRAME_RELATED_P (join_insn) = 1;
30233 if (using_static_chain_p)
30235 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
30236 END_USE (0);
30238 else
30239 END_USE (11);
30242 /* Save CR if we use any that must be preserved. */
30243 if (!WORLD_SAVE_P (info) && info->cr_save_p)
30245 rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
30246 GEN_INT (info->cr_save_offset + frame_off));
30247 rtx mem = gen_frame_mem (SImode, addr);
30249 /* If we didn't copy cr before, do so now using r0. */
30250 if (cr_save_rtx == NULL_RTX)
30252 START_USE (0);
30253 cr_save_rtx = gen_rtx_REG (SImode, 0);
30254 rs6000_emit_move_from_cr (cr_save_rtx);
30257 /* Saving CR requires a two-instruction sequence: one instruction
30258 to move the CR to a general-purpose register, and a second
30259 instruction that stores the GPR to memory.
30261 We do not emit any DWARF CFI records for the first of these,
30262 because we cannot properly represent the fact that CR is saved in
30263 a register. One reason is that we cannot express that multiple
30264 CR fields are saved; another reason is that on 64-bit, the size
30265 of the CR register in DWARF (4 bytes) differs from the size of
30266 a general-purpose register.
30268 This means if any intervening instruction were to clobber one of
30269 the call-saved CR fields, we'd have incorrect CFI. To prevent
30270 this from happening, we mark the store to memory as a use of
30271 those CR fields, which prevents any such instruction from being
30272 scheduled in between the two instructions. */
30273 rtx crsave_v[9];
30274 int n_crsave = 0;
30275 int i;
30277 crsave_v[n_crsave++] = gen_rtx_SET (mem, cr_save_rtx);
30278 for (i = 0; i < 8; i++)
30279 if (save_reg_p (CR0_REGNO + i))
30280 crsave_v[n_crsave++]
30281 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
30283 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
30284 gen_rtvec_v (n_crsave, crsave_v)));
30285 END_USE (REGNO (cr_save_rtx));
30287 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
30288 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
30289 so we need to construct a frame expression manually. */
30290 RTX_FRAME_RELATED_P (insn) = 1;
30292 /* Update address to be stack-pointer relative, like
30293 rs6000_frame_related would do. */
30294 addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
30295 GEN_INT (info->cr_save_offset + sp_off));
30296 mem = gen_frame_mem (SImode, addr);
30298 if (DEFAULT_ABI == ABI_ELFv2)
30300 /* In the ELFv2 ABI we generate separate CFI records for each
30301 CR field that was actually saved. They all point to the
30302 same 32-bit stack slot. */
30303 rtx crframe[8];
30304 int n_crframe = 0;
30306 for (i = 0; i < 8; i++)
30307 if (save_reg_p (CR0_REGNO + i))
30309 crframe[n_crframe]
30310 = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR0_REGNO + i));
30312 RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
30313 n_crframe++;
30316 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
30317 gen_rtx_PARALLEL (VOIDmode,
30318 gen_rtvec_v (n_crframe, crframe)));
30320 else
30322 /* In other ABIs, by convention, we use a single CR regnum to
30323 represent the fact that all call-saved CR fields are saved.
30324 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
30325 rtx set = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR2_REGNO));
30326 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
30330 /* In the ELFv2 ABI we need to save all call-saved CR fields into
30331 *separate* slots if the routine calls __builtin_eh_return, so
30332 that they can be independently restored by the unwinder. */
30333 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
30335 int i, cr_off = info->ehcr_offset;
30336 rtx crsave;
30338 /* ??? We might get better performance by using multiple mfocrf
30339 instructions. */
30340 crsave = gen_rtx_REG (SImode, 0);
30341 emit_insn (gen_movesi_from_cr (crsave));
30343 for (i = 0; i < 8; i++)
30344 if (!call_used_regs[CR0_REGNO + i])
30346 rtvec p = rtvec_alloc (2);
30347 RTVEC_ELT (p, 0)
30348 = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
30349 RTVEC_ELT (p, 1)
30350 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
30352 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
30354 RTX_FRAME_RELATED_P (insn) = 1;
30355 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
30356 gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
30357 sp_reg_rtx, cr_off + sp_off));
30359 cr_off += reg_size;
30363 /* Update stack and set back pointer unless this is V.4,
30364 for which it was done previously. */
30365 if (!WORLD_SAVE_P (info) && info->push_p
30366 && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
30368 rtx ptr_reg = NULL;
30369 int ptr_off = 0;
30371 /* If saving altivec regs we need to be able to address all save
30372 locations using a 16-bit offset. */
30373 if ((strategy & SAVE_INLINE_VRS) == 0
30374 || (info->altivec_size != 0
30375 && (info->altivec_save_offset + info->altivec_size - 16
30376 + info->total_size - frame_off) > 32767)
30377 || (info->vrsave_size != 0
30378 && (info->vrsave_save_offset
30379 + info->total_size - frame_off) > 32767))
30381 int sel = SAVRES_SAVE | SAVRES_VR;
30382 unsigned ptr_regno = ptr_regno_for_savres (sel);
30384 if (using_static_chain_p
30385 && ptr_regno == STATIC_CHAIN_REGNUM)
30386 ptr_regno = 12;
30387 if (REGNO (frame_reg_rtx) != ptr_regno)
30388 START_USE (ptr_regno);
30389 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
30390 frame_reg_rtx = ptr_reg;
30391 ptr_off = info->altivec_save_offset + info->altivec_size;
30392 frame_off = -ptr_off;
30394 else if (REGNO (frame_reg_rtx) == 1)
30395 frame_off = info->total_size;
30396 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
30397 ptr_reg, ptr_off);
30398 if (REGNO (frame_reg_rtx) == 12)
30399 sp_adjust = 0;
30400 sp_off = info->total_size;
30401 if (frame_reg_rtx != sp_reg_rtx)
30402 rs6000_emit_stack_tie (frame_reg_rtx, false);
30405 /* Set frame pointer, if needed. */
30406 if (frame_pointer_needed)
30408 insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
30409 sp_reg_rtx);
30410 RTX_FRAME_RELATED_P (insn) = 1;
30413 /* Save AltiVec registers if needed. Save here because the red zone does
30414 not always include AltiVec registers. */
30415 if (!WORLD_SAVE_P (info)
30416 && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0)
30418 int end_save = info->altivec_save_offset + info->altivec_size;
30419 int ptr_off;
30420 /* Oddly, the vector save/restore functions point r0 at the end
30421 of the save area, then use r11 or r12 to load offsets for
30422 [reg+reg] addressing. */
30423 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
30424 int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR);
30425 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
30427 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
30428 NOT_INUSE (0);
30429 if (scratch_regno == 12)
30430 sp_adjust = 0;
30431 if (end_save + frame_off != 0)
30433 rtx offset = GEN_INT (end_save + frame_off);
30435 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
30437 else
30438 emit_move_insn (ptr_reg, frame_reg_rtx);
30440 ptr_off = -end_save;
30441 insn = rs6000_emit_savres_rtx (info, scratch_reg,
30442 info->altivec_save_offset + ptr_off,
30443 0, V4SImode, SAVRES_SAVE | SAVRES_VR);
30444 rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
30445 NULL_RTX, NULL_RTX);
30446 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
30448 /* The oddity mentioned above clobbered our frame reg. */
30449 emit_move_insn (frame_reg_rtx, ptr_reg);
30450 frame_off = ptr_off;
30453 else if (!WORLD_SAVE_P (info)
30454 && info->altivec_size != 0)
30456 int i;
30458 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
30459 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
30461 rtx areg, savereg, mem;
30462 HOST_WIDE_INT offset;
30464 offset = (info->altivec_save_offset + frame_off
30465 + 16 * (i - info->first_altivec_reg_save));
30467 savereg = gen_rtx_REG (V4SImode, i);
30469 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
30471 mem = gen_frame_mem (V4SImode,
30472 gen_rtx_PLUS (Pmode, frame_reg_rtx,
30473 GEN_INT (offset)));
30474 insn = emit_insn (gen_rtx_SET (mem, savereg));
30475 areg = NULL_RTX;
30477 else
30479 NOT_INUSE (0);
30480 areg = gen_rtx_REG (Pmode, 0);
30481 emit_move_insn (areg, GEN_INT (offset));
30483 /* AltiVec addressing mode is [reg+reg]. */
30484 mem = gen_frame_mem (V4SImode,
30485 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
30487 /* Rather than emitting a generic move, force use of the stvx
30488 instruction, which we always want on ISA 2.07 (power8) systems.
30489 In particular we don't want xxpermdi/stxvd2x for little
30490 endian. */
30491 insn = emit_insn (gen_altivec_stvx_v4si_internal (mem, savereg));
30494 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
30495 areg, GEN_INT (offset));
30499 /* VRSAVE is a bit vector representing which AltiVec registers
30500 are used. The OS uses this to determine which vector
30501 registers to save on a context switch. We need to save
30502 VRSAVE on the stack frame, add whatever AltiVec registers we
30503 used in this function, and do the corresponding magic in the
30504 epilogue. */
30506 if (!WORLD_SAVE_P (info)
30507 && info->vrsave_size != 0)
30509 rtx reg, vrsave;
30510 int offset;
30511 int save_regno;
30513 /* Get VRSAVE onto a GPR. Note that ABI_V4 and ABI_DARWIN might
30514 be using r12 as frame_reg_rtx and r11 as the static chain
30515 pointer for nested functions. */
30516 save_regno = 12;
30517 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
30518 && !using_static_chain_p)
30519 save_regno = 11;
30520 else if (using_split_stack || REGNO (frame_reg_rtx) == 12)
30522 save_regno = 11;
30523 if (using_static_chain_p)
30524 save_regno = 0;
30527 NOT_INUSE (save_regno);
30528 reg = gen_rtx_REG (SImode, save_regno);
30529 vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
30530 if (TARGET_MACHO)
30531 emit_insn (gen_get_vrsave_internal (reg));
30532 else
30533 emit_insn (gen_rtx_SET (reg, vrsave));
30535 /* Save VRSAVE. */
30536 offset = info->vrsave_save_offset + frame_off;
30537 insn = emit_insn (gen_frame_store (reg, frame_reg_rtx, offset));
30539 /* Include the registers in the mask. */
30540 emit_insn (gen_iorsi3 (reg, reg, GEN_INT ((int) info->vrsave_mask)));
30542 insn = emit_insn (generate_set_vrsave (reg, info, 0));
30545 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
30546 if (!TARGET_SINGLE_PIC_BASE
30547 && ((TARGET_TOC && TARGET_MINIMAL_TOC
30548 && !constant_pool_empty_p ())
30549 || (DEFAULT_ABI == ABI_V4
30550 && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
30551 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
30553 /* If emit_load_toc_table will use the link register, we need to save
30554 it. We use R12 for this purpose because emit_load_toc_table
30555 can use register 0. This allows us to use a plain 'blr' to return
30556 from the procedure more often. */
30557 int save_LR_around_toc_setup = (TARGET_ELF
30558 && DEFAULT_ABI == ABI_V4
30559 && flag_pic
30560 && ! info->lr_save_p
30561 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0);
30562 if (save_LR_around_toc_setup)
30564 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
30565 rtx tmp = gen_rtx_REG (Pmode, 12);
30567 sp_adjust = 0;
30568 insn = emit_move_insn (tmp, lr);
30569 RTX_FRAME_RELATED_P (insn) = 1;
30571 rs6000_emit_load_toc_table (TRUE);
30573 insn = emit_move_insn (lr, tmp);
30574 add_reg_note (insn, REG_CFA_RESTORE, lr);
30575 RTX_FRAME_RELATED_P (insn) = 1;
30577 else
30578 rs6000_emit_load_toc_table (TRUE);
30581 #if TARGET_MACHO
30582 if (!TARGET_SINGLE_PIC_BASE
30583 && DEFAULT_ABI == ABI_DARWIN
30584 && flag_pic && crtl->uses_pic_offset_table)
30586 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
30587 rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
30589 /* Save and restore LR locally around this call (in R0). */
30590 if (!info->lr_save_p)
30591 emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
30593 emit_insn (gen_load_macho_picbase (src));
30595 emit_move_insn (gen_rtx_REG (Pmode,
30596 RS6000_PIC_OFFSET_TABLE_REGNUM),
30597 lr);
30599 if (!info->lr_save_p)
30600 emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
30602 #endif
30604 /* If we need to, save the TOC register after doing the stack setup.
30605 Do not emit eh frame info for this save. The unwinder wants info,
30606 conceptually attached to instructions in this function, about
30607 register values in the caller of this function. This R2 may have
30608 already been changed from the value in the caller.
30609 We don't attempt to write accurate DWARF EH frame info for R2
30610 because code emitted by gcc for a (non-pointer) function call
30611 doesn't save and restore R2. Instead, R2 is managed out-of-line
30612 by a linker generated plt call stub when the function resides in
30613 a shared library. This behavior is costly to describe in DWARF,
30614 both in terms of the size of DWARF info and the time taken in the
30615 unwinder to interpret it. R2 changes, apart from the
30616 calls_eh_return case earlier in this function, are handled by
30617 linux-unwind.h frob_update_context. */
30618 if (rs6000_save_toc_in_prologue_p ())
30620 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
30621 emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
30624 if (using_split_stack && split_stack_arg_pointer_used_p ())
30626 /* Set up the arg pointer (r12) for -fsplit-stack code. If
30627 __morestack was called, it left the arg pointer to the old
30628 stack in r29. Otherwise, the arg pointer is the top of the
30629 current frame. */
30630 cfun->machine->split_stack_argp_used = true;
30631 if (sp_adjust)
30633 rtx r12 = gen_rtx_REG (Pmode, 12);
30634 rtx set_r12 = gen_rtx_SET (r12, sp_reg_rtx);
30635 emit_insn_before (set_r12, sp_adjust);
30637 else if (frame_off != 0 || REGNO (frame_reg_rtx) != 12)
30639 rtx r12 = gen_rtx_REG (Pmode, 12);
30640 if (frame_off == 0)
30641 emit_move_insn (r12, frame_reg_rtx);
30642 else
30643 emit_insn (gen_add3_insn (r12, frame_reg_rtx, GEN_INT (frame_off)));
30645 if (info->push_p)
30647 rtx r12 = gen_rtx_REG (Pmode, 12);
30648 rtx r29 = gen_rtx_REG (Pmode, 29);
30649 rtx cr7 = gen_rtx_REG (CCUNSmode, CR7_REGNO);
30650 rtx not_more = gen_label_rtx ();
30651 rtx jump;
30653 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
30654 gen_rtx_GEU (VOIDmode, cr7, const0_rtx),
30655 gen_rtx_LABEL_REF (VOIDmode, not_more),
30656 pc_rtx);
30657 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
30658 JUMP_LABEL (jump) = not_more;
30659 LABEL_NUSES (not_more) += 1;
30660 emit_move_insn (r12, r29);
30661 emit_label (not_more);
30666 /* Output .extern statements for the save/restore routines we use. */
30668 static void
30669 rs6000_output_savres_externs (FILE *file)
30671 rs6000_stack_t *info = rs6000_stack_info ();
30673 if (TARGET_DEBUG_STACK)
30674 debug_stack_info (info);
30676 /* Write .extern for any function we will call to save and restore
30677 fp values. */
30678 if (info->first_fp_reg_save < 64
30679 && !TARGET_MACHO
30680 && !TARGET_ELF)
30682 char *name;
30683 int regno = info->first_fp_reg_save - 32;
30685 if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
30687 bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
30688 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
30689 name = rs6000_savres_routine_name (info, regno, sel);
30690 fprintf (file, "\t.extern %s\n", name);
30692 if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
30694 bool lr = (info->savres_strategy
30695 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
30696 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
30697 name = rs6000_savres_routine_name (info, regno, sel);
30698 fprintf (file, "\t.extern %s\n", name);
30703 /* Write function prologue. */
30705 static void
30706 rs6000_output_function_prologue (FILE *file)
30708 if (!cfun->is_thunk)
30709 rs6000_output_savres_externs (file);
30711 /* ELFv2 ABI r2 setup code and local entry point. This must follow
30712 immediately after the global entry point label. */
30713 if (rs6000_global_entry_point_needed_p ())
30715 const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
30717 (*targetm.asm_out.internal_label) (file, "LCF", rs6000_pic_labelno);
30719 if (TARGET_CMODEL != CMODEL_LARGE)
30721 /* In the small and medium code models, we assume the TOC is less
30722 2 GB away from the text section, so it can be computed via the
30723 following two-instruction sequence. */
30724 char buf[256];
30726 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
30727 fprintf (file, "0:\taddis 2,12,.TOC.-");
30728 assemble_name (file, buf);
30729 fprintf (file, "@ha\n");
30730 fprintf (file, "\taddi 2,2,.TOC.-");
30731 assemble_name (file, buf);
30732 fprintf (file, "@l\n");
30734 else
30736 /* In the large code model, we allow arbitrary offsets between the
30737 TOC and the text section, so we have to load the offset from
30738 memory. The data field is emitted directly before the global
30739 entry point in rs6000_elf_declare_function_name. */
30740 char buf[256];
30742 #ifdef HAVE_AS_ENTRY_MARKERS
30743 /* If supported by the linker, emit a marker relocation. If the
30744 total code size of the final executable or shared library
30745 happens to fit into 2 GB after all, the linker will replace
30746 this code sequence with the sequence for the small or medium
30747 code model. */
30748 fprintf (file, "\t.reloc .,R_PPC64_ENTRY\n");
30749 #endif
30750 fprintf (file, "\tld 2,");
30751 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
30752 assemble_name (file, buf);
30753 fprintf (file, "-");
30754 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
30755 assemble_name (file, buf);
30756 fprintf (file, "(12)\n");
30757 fprintf (file, "\tadd 2,2,12\n");
30760 fputs ("\t.localentry\t", file);
30761 assemble_name (file, name);
30762 fputs (",.-", file);
30763 assemble_name (file, name);
30764 fputs ("\n", file);
30767 /* Output -mprofile-kernel code. This needs to be done here instead of
30768 in output_function_profile since it must go after the ELFv2 ABI
30769 local entry point. */
30770 if (TARGET_PROFILE_KERNEL && crtl->profile)
30772 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
30773 gcc_assert (!TARGET_32BIT);
30775 asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
30777 /* In the ELFv2 ABI we have no compiler stack word. It must be
30778 the resposibility of _mcount to preserve the static chain
30779 register if required. */
30780 if (DEFAULT_ABI != ABI_ELFv2
30781 && cfun->static_chain_decl != NULL)
30783 asm_fprintf (file, "\tstd %s,24(%s)\n",
30784 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
30785 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
30786 asm_fprintf (file, "\tld %s,24(%s)\n",
30787 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
30789 else
30790 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
30793 rs6000_pic_labelno++;
30796 /* -mprofile-kernel code calls mcount before the function prolog,
30797 so a profiled leaf function should stay a leaf function. */
30798 static bool
30799 rs6000_keep_leaf_when_profiled ()
30801 return TARGET_PROFILE_KERNEL;
30804 /* Non-zero if vmx regs are restored before the frame pop, zero if
30805 we restore after the pop when possible. */
30806 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
30808 /* Restoring cr is a two step process: loading a reg from the frame
30809 save, then moving the reg to cr. For ABI_V4 we must let the
30810 unwinder know that the stack location is no longer valid at or
30811 before the stack deallocation, but we can't emit a cfa_restore for
30812 cr at the stack deallocation like we do for other registers.
30813 The trouble is that it is possible for the move to cr to be
30814 scheduled after the stack deallocation. So say exactly where cr
30815 is located on each of the two insns. */
30817 static rtx
30818 load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
30820 rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
30821 rtx reg = gen_rtx_REG (SImode, regno);
30822 rtx_insn *insn = emit_move_insn (reg, mem);
30824 if (!exit_func && DEFAULT_ABI == ABI_V4)
30826 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
30827 rtx set = gen_rtx_SET (reg, cr);
30829 add_reg_note (insn, REG_CFA_REGISTER, set);
30830 RTX_FRAME_RELATED_P (insn) = 1;
30832 return reg;
30835 /* Reload CR from REG. */
30837 static void
30838 restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
30840 int count = 0;
30841 int i;
30843 if (using_mfcr_multiple)
30845 for (i = 0; i < 8; i++)
30846 if (save_reg_p (CR0_REGNO + i))
30847 count++;
30848 gcc_assert (count);
30851 if (using_mfcr_multiple && count > 1)
30853 rtx_insn *insn;
30854 rtvec p;
30855 int ndx;
30857 p = rtvec_alloc (count);
30859 ndx = 0;
30860 for (i = 0; i < 8; i++)
30861 if (save_reg_p (CR0_REGNO + i))
30863 rtvec r = rtvec_alloc (2);
30864 RTVEC_ELT (r, 0) = reg;
30865 RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
30866 RTVEC_ELT (p, ndx) =
30867 gen_rtx_SET (gen_rtx_REG (CCmode, CR0_REGNO + i),
30868 gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
30869 ndx++;
30871 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
30872 gcc_assert (ndx == count);
30874 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
30875 CR field separately. */
30876 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
30878 for (i = 0; i < 8; i++)
30879 if (save_reg_p (CR0_REGNO + i))
30880 add_reg_note (insn, REG_CFA_RESTORE,
30881 gen_rtx_REG (SImode, CR0_REGNO + i));
30883 RTX_FRAME_RELATED_P (insn) = 1;
30886 else
30887 for (i = 0; i < 8; i++)
30888 if (save_reg_p (CR0_REGNO + i))
30890 rtx insn = emit_insn (gen_movsi_to_cr_one
30891 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
30893 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
30894 CR field separately, attached to the insn that in fact
30895 restores this particular CR field. */
30896 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
30898 add_reg_note (insn, REG_CFA_RESTORE,
30899 gen_rtx_REG (SImode, CR0_REGNO + i));
30901 RTX_FRAME_RELATED_P (insn) = 1;
30905 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
30906 if (!exit_func && DEFAULT_ABI != ABI_ELFv2
30907 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
30909 rtx_insn *insn = get_last_insn ();
30910 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
30912 add_reg_note (insn, REG_CFA_RESTORE, cr);
30913 RTX_FRAME_RELATED_P (insn) = 1;
30917 /* Like cr, the move to lr instruction can be scheduled after the
30918 stack deallocation, but unlike cr, its stack frame save is still
30919 valid. So we only need to emit the cfa_restore on the correct
30920 instruction. */
30922 static void
30923 load_lr_save (int regno, rtx frame_reg_rtx, int offset)
30925 rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
30926 rtx reg = gen_rtx_REG (Pmode, regno);
30928 emit_move_insn (reg, mem);
30931 static void
30932 restore_saved_lr (int regno, bool exit_func)
30934 rtx reg = gen_rtx_REG (Pmode, regno);
30935 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
30936 rtx_insn *insn = emit_move_insn (lr, reg);
30938 if (!exit_func && flag_shrink_wrap)
30940 add_reg_note (insn, REG_CFA_RESTORE, lr);
30941 RTX_FRAME_RELATED_P (insn) = 1;
30945 static rtx
30946 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
30948 if (DEFAULT_ABI == ABI_ELFv2)
30950 int i;
30951 for (i = 0; i < 8; i++)
30952 if (save_reg_p (CR0_REGNO + i))
30954 rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
30955 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
30956 cfa_restores);
30959 else if (info->cr_save_p)
30960 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
30961 gen_rtx_REG (SImode, CR2_REGNO),
30962 cfa_restores);
30964 if (info->lr_save_p)
30965 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
30966 gen_rtx_REG (Pmode, LR_REGNO),
30967 cfa_restores);
30968 return cfa_restores;
30971 /* Return true if OFFSET from stack pointer can be clobbered by signals.
30972 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
30973 below stack pointer not cloberred by signals. */
30975 static inline bool
30976 offset_below_red_zone_p (HOST_WIDE_INT offset)
30978 return offset < (DEFAULT_ABI == ABI_V4
30980 : TARGET_32BIT ? -220 : -288);
30983 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
30985 static void
30986 emit_cfa_restores (rtx cfa_restores)
30988 rtx_insn *insn = get_last_insn ();
30989 rtx *loc = &REG_NOTES (insn);
30991 while (*loc)
30992 loc = &XEXP (*loc, 1);
30993 *loc = cfa_restores;
30994 RTX_FRAME_RELATED_P (insn) = 1;
30997 /* Emit function epilogue as insns. */
30999 void
31000 rs6000_emit_epilogue (int sibcall)
31002 rs6000_stack_t *info;
31003 int restoring_GPRs_inline;
31004 int restoring_FPRs_inline;
31005 int using_load_multiple;
31006 int using_mtcr_multiple;
31007 int use_backchain_to_restore_sp;
31008 int restore_lr;
31009 int strategy;
31010 HOST_WIDE_INT frame_off = 0;
31011 rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
31012 rtx frame_reg_rtx = sp_reg_rtx;
31013 rtx cfa_restores = NULL_RTX;
31014 rtx insn;
31015 rtx cr_save_reg = NULL_RTX;
31016 machine_mode reg_mode = Pmode;
31017 int reg_size = TARGET_32BIT ? 4 : 8;
31018 machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
31019 ? DFmode : SFmode;
31020 int fp_reg_size = 8;
31021 int i;
31022 bool exit_func;
31023 unsigned ptr_regno;
31025 info = rs6000_stack_info ();
31027 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
31029 reg_mode = V2SImode;
31030 reg_size = 8;
31033 strategy = info->savres_strategy;
31034 using_load_multiple = strategy & REST_MULTIPLE;
31035 restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
31036 restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
31037 using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601
31038 || rs6000_cpu == PROCESSOR_PPC603
31039 || rs6000_cpu == PROCESSOR_PPC750
31040 || optimize_size);
31041 /* Restore via the backchain when we have a large frame, since this
31042 is more efficient than an addis, addi pair. The second condition
31043 here will not trigger at the moment; We don't actually need a
31044 frame pointer for alloca, but the generic parts of the compiler
31045 give us one anyway. */
31046 use_backchain_to_restore_sp = (info->total_size + (info->lr_save_p
31047 ? info->lr_save_offset
31048 : 0) > 32767
31049 || (cfun->calls_alloca
31050 && !frame_pointer_needed));
31051 restore_lr = (info->lr_save_p
31052 && (restoring_FPRs_inline
31053 || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
31054 && (restoring_GPRs_inline
31055 || info->first_fp_reg_save < 64)
31056 && !cfun->machine->lr_is_wrapped_separately);
31059 if (WORLD_SAVE_P (info))
31061 int i, j;
31062 char rname[30];
31063 const char *alloc_rname;
31064 rtvec p;
31066 /* eh_rest_world_r10 will return to the location saved in the LR
31067 stack slot (which is not likely to be our caller.)
31068 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
31069 rest_world is similar, except any R10 parameter is ignored.
31070 The exception-handling stuff that was here in 2.95 is no
31071 longer necessary. */
31073 p = rtvec_alloc (9
31074 + 32 - info->first_gp_reg_save
31075 + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
31076 + 63 + 1 - info->first_fp_reg_save);
31078 strcpy (rname, ((crtl->calls_eh_return) ?
31079 "*eh_rest_world_r10" : "*rest_world"));
31080 alloc_rname = ggc_strdup (rname);
31082 j = 0;
31083 RTVEC_ELT (p, j++) = ret_rtx;
31084 RTVEC_ELT (p, j++)
31085 = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
31086 /* The instruction pattern requires a clobber here;
31087 it is shared with the restVEC helper. */
31088 RTVEC_ELT (p, j++)
31089 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
31092 /* CR register traditionally saved as CR2. */
31093 rtx reg = gen_rtx_REG (SImode, CR2_REGNO);
31094 RTVEC_ELT (p, j++)
31095 = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset);
31096 if (flag_shrink_wrap)
31098 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
31099 gen_rtx_REG (Pmode, LR_REGNO),
31100 cfa_restores);
31101 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31105 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
31107 rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
31108 RTVEC_ELT (p, j++)
31109 = gen_frame_load (reg,
31110 frame_reg_rtx, info->gp_save_offset + reg_size * i);
31111 if (flag_shrink_wrap)
31112 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31114 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
31116 rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
31117 RTVEC_ELT (p, j++)
31118 = gen_frame_load (reg,
31119 frame_reg_rtx, info->altivec_save_offset + 16 * i);
31120 if (flag_shrink_wrap)
31121 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31123 for (i = 0; info->first_fp_reg_save + i <= 63; i++)
31125 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
31126 ? DFmode : SFmode),
31127 info->first_fp_reg_save + i);
31128 RTVEC_ELT (p, j++)
31129 = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i);
31130 if (flag_shrink_wrap)
31131 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31133 RTVEC_ELT (p, j++)
31134 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
31135 RTVEC_ELT (p, j++)
31136 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
31137 RTVEC_ELT (p, j++)
31138 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
31139 RTVEC_ELT (p, j++)
31140 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
31141 RTVEC_ELT (p, j++)
31142 = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
31143 insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
31145 if (flag_shrink_wrap)
31147 REG_NOTES (insn) = cfa_restores;
31148 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
31149 RTX_FRAME_RELATED_P (insn) = 1;
31151 return;
31154 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
31155 if (info->push_p)
31156 frame_off = info->total_size;
31158 /* Restore AltiVec registers if we must do so before adjusting the
31159 stack. */
31160 if (info->altivec_size != 0
31161 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
31162 || (DEFAULT_ABI != ABI_V4
31163 && offset_below_red_zone_p (info->altivec_save_offset))))
31165 int i;
31166 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
31168 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
31169 if (use_backchain_to_restore_sp)
31171 int frame_regno = 11;
31173 if ((strategy & REST_INLINE_VRS) == 0)
31175 /* Of r11 and r12, select the one not clobbered by an
31176 out-of-line restore function for the frame register. */
31177 frame_regno = 11 + 12 - scratch_regno;
31179 frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno);
31180 emit_move_insn (frame_reg_rtx,
31181 gen_rtx_MEM (Pmode, sp_reg_rtx));
31182 frame_off = 0;
31184 else if (frame_pointer_needed)
31185 frame_reg_rtx = hard_frame_pointer_rtx;
31187 if ((strategy & REST_INLINE_VRS) == 0)
31189 int end_save = info->altivec_save_offset + info->altivec_size;
31190 int ptr_off;
31191 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
31192 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
31194 if (end_save + frame_off != 0)
31196 rtx offset = GEN_INT (end_save + frame_off);
31198 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
31200 else
31201 emit_move_insn (ptr_reg, frame_reg_rtx);
31203 ptr_off = -end_save;
31204 insn = rs6000_emit_savres_rtx (info, scratch_reg,
31205 info->altivec_save_offset + ptr_off,
31206 0, V4SImode, SAVRES_VR);
31208 else
31210 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
31211 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
31213 rtx addr, areg, mem, insn;
31214 rtx reg = gen_rtx_REG (V4SImode, i);
31215 HOST_WIDE_INT offset
31216 = (info->altivec_save_offset + frame_off
31217 + 16 * (i - info->first_altivec_reg_save));
31219 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
31221 mem = gen_frame_mem (V4SImode,
31222 gen_rtx_PLUS (Pmode, frame_reg_rtx,
31223 GEN_INT (offset)));
31224 insn = gen_rtx_SET (reg, mem);
31226 else
31228 areg = gen_rtx_REG (Pmode, 0);
31229 emit_move_insn (areg, GEN_INT (offset));
31231 /* AltiVec addressing mode is [reg+reg]. */
31232 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
31233 mem = gen_frame_mem (V4SImode, addr);
31235 /* Rather than emitting a generic move, force use of the
31236 lvx instruction, which we always want. In particular we
31237 don't want lxvd2x/xxpermdi for little endian. */
31238 insn = gen_altivec_lvx_v4si_internal (reg, mem);
31241 (void) emit_insn (insn);
31245 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
31246 if (((strategy & REST_INLINE_VRS) == 0
31247 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
31248 && (flag_shrink_wrap
31249 || (offset_below_red_zone_p
31250 (info->altivec_save_offset
31251 + 16 * (i - info->first_altivec_reg_save)))))
31253 rtx reg = gen_rtx_REG (V4SImode, i);
31254 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31258 /* Restore VRSAVE if we must do so before adjusting the stack. */
31259 if (info->vrsave_size != 0
31260 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
31261 || (DEFAULT_ABI != ABI_V4
31262 && offset_below_red_zone_p (info->vrsave_save_offset))))
31264 rtx reg;
31266 if (frame_reg_rtx == sp_reg_rtx)
31268 if (use_backchain_to_restore_sp)
31270 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
31271 emit_move_insn (frame_reg_rtx,
31272 gen_rtx_MEM (Pmode, sp_reg_rtx));
31273 frame_off = 0;
31275 else if (frame_pointer_needed)
31276 frame_reg_rtx = hard_frame_pointer_rtx;
31279 reg = gen_rtx_REG (SImode, 12);
31280 emit_insn (gen_frame_load (reg, frame_reg_rtx,
31281 info->vrsave_save_offset + frame_off));
31283 emit_insn (generate_set_vrsave (reg, info, 1));
31286 insn = NULL_RTX;
31287 /* If we have a large stack frame, restore the old stack pointer
31288 using the backchain. */
31289 if (use_backchain_to_restore_sp)
31291 if (frame_reg_rtx == sp_reg_rtx)
31293 /* Under V.4, don't reset the stack pointer until after we're done
31294 loading the saved registers. */
31295 if (DEFAULT_ABI == ABI_V4)
31296 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
31298 insn = emit_move_insn (frame_reg_rtx,
31299 gen_rtx_MEM (Pmode, sp_reg_rtx));
31300 frame_off = 0;
31302 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
31303 && DEFAULT_ABI == ABI_V4)
31304 /* frame_reg_rtx has been set up by the altivec restore. */
31306 else
31308 insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
31309 frame_reg_rtx = sp_reg_rtx;
31312 /* If we have a frame pointer, we can restore the old stack pointer
31313 from it. */
31314 else if (frame_pointer_needed)
31316 frame_reg_rtx = sp_reg_rtx;
31317 if (DEFAULT_ABI == ABI_V4)
31318 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
31319 /* Prevent reordering memory accesses against stack pointer restore. */
31320 else if (cfun->calls_alloca
31321 || offset_below_red_zone_p (-info->total_size))
31322 rs6000_emit_stack_tie (frame_reg_rtx, true);
31324 insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
31325 GEN_INT (info->total_size)));
31326 frame_off = 0;
31328 else if (info->push_p
31329 && DEFAULT_ABI != ABI_V4
31330 && !crtl->calls_eh_return)
31332 /* Prevent reordering memory accesses against stack pointer restore. */
31333 if (cfun->calls_alloca
31334 || offset_below_red_zone_p (-info->total_size))
31335 rs6000_emit_stack_tie (frame_reg_rtx, false);
31336 insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
31337 GEN_INT (info->total_size)));
31338 frame_off = 0;
31340 if (insn && frame_reg_rtx == sp_reg_rtx)
31342 if (cfa_restores)
31344 REG_NOTES (insn) = cfa_restores;
31345 cfa_restores = NULL_RTX;
31347 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
31348 RTX_FRAME_RELATED_P (insn) = 1;
31351 /* Restore AltiVec registers if we have not done so already. */
31352 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
31353 && info->altivec_size != 0
31354 && (DEFAULT_ABI == ABI_V4
31355 || !offset_below_red_zone_p (info->altivec_save_offset)))
31357 int i;
31359 if ((strategy & REST_INLINE_VRS) == 0)
31361 int end_save = info->altivec_save_offset + info->altivec_size;
31362 int ptr_off;
31363 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
31364 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
31365 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
31367 if (end_save + frame_off != 0)
31369 rtx offset = GEN_INT (end_save + frame_off);
31371 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
31373 else
31374 emit_move_insn (ptr_reg, frame_reg_rtx);
31376 ptr_off = -end_save;
31377 insn = rs6000_emit_savres_rtx (info, scratch_reg,
31378 info->altivec_save_offset + ptr_off,
31379 0, V4SImode, SAVRES_VR);
31380 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
31382 /* Frame reg was clobbered by out-of-line save. Restore it
31383 from ptr_reg, and if we are calling out-of-line gpr or
31384 fpr restore set up the correct pointer and offset. */
31385 unsigned newptr_regno = 1;
31386 if (!restoring_GPRs_inline)
31388 bool lr = info->gp_save_offset + info->gp_size == 0;
31389 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
31390 newptr_regno = ptr_regno_for_savres (sel);
31391 end_save = info->gp_save_offset + info->gp_size;
31393 else if (!restoring_FPRs_inline)
31395 bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR);
31396 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
31397 newptr_regno = ptr_regno_for_savres (sel);
31398 end_save = info->fp_save_offset + info->fp_size;
31401 if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
31402 frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
31404 if (end_save + ptr_off != 0)
31406 rtx offset = GEN_INT (end_save + ptr_off);
31408 frame_off = -end_save;
31409 if (TARGET_32BIT)
31410 emit_insn (gen_addsi3_carry (frame_reg_rtx,
31411 ptr_reg, offset));
31412 else
31413 emit_insn (gen_adddi3_carry (frame_reg_rtx,
31414 ptr_reg, offset));
31416 else
31418 frame_off = ptr_off;
31419 emit_move_insn (frame_reg_rtx, ptr_reg);
31423 else
31425 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
31426 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
31428 rtx addr, areg, mem, insn;
31429 rtx reg = gen_rtx_REG (V4SImode, i);
31430 HOST_WIDE_INT offset
31431 = (info->altivec_save_offset + frame_off
31432 + 16 * (i - info->first_altivec_reg_save));
31434 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
31436 mem = gen_frame_mem (V4SImode,
31437 gen_rtx_PLUS (Pmode, frame_reg_rtx,
31438 GEN_INT (offset)));
31439 insn = gen_rtx_SET (reg, mem);
31441 else
31443 areg = gen_rtx_REG (Pmode, 0);
31444 emit_move_insn (areg, GEN_INT (offset));
31446 /* AltiVec addressing mode is [reg+reg]. */
31447 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
31448 mem = gen_frame_mem (V4SImode, addr);
31450 /* Rather than emitting a generic move, force use of the
31451 lvx instruction, which we always want. In particular we
31452 don't want lxvd2x/xxpermdi for little endian. */
31453 insn = gen_altivec_lvx_v4si_internal (reg, mem);
31456 (void) emit_insn (insn);
31460 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
31461 if (((strategy & REST_INLINE_VRS) == 0
31462 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
31463 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
31465 rtx reg = gen_rtx_REG (V4SImode, i);
31466 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31470 /* Restore VRSAVE if we have not done so already. */
31471 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
31472 && info->vrsave_size != 0
31473 && (DEFAULT_ABI == ABI_V4
31474 || !offset_below_red_zone_p (info->vrsave_save_offset)))
31476 rtx reg;
31478 reg = gen_rtx_REG (SImode, 12);
31479 emit_insn (gen_frame_load (reg, frame_reg_rtx,
31480 info->vrsave_save_offset + frame_off));
31482 emit_insn (generate_set_vrsave (reg, info, 1));
31485 /* If we exit by an out-of-line restore function on ABI_V4 then that
31486 function will deallocate the stack, so we don't need to worry
31487 about the unwinder restoring cr from an invalid stack frame
31488 location. */
31489 exit_func = (!restoring_FPRs_inline
31490 || (!restoring_GPRs_inline
31491 && info->first_fp_reg_save == 64));
31493 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
31494 *separate* slots if the routine calls __builtin_eh_return, so
31495 that they can be independently restored by the unwinder. */
31496 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
31498 int i, cr_off = info->ehcr_offset;
31500 for (i = 0; i < 8; i++)
31501 if (!call_used_regs[CR0_REGNO + i])
31503 rtx reg = gen_rtx_REG (SImode, 0);
31504 emit_insn (gen_frame_load (reg, frame_reg_rtx,
31505 cr_off + frame_off));
31507 insn = emit_insn (gen_movsi_to_cr_one
31508 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
31510 if (!exit_func && flag_shrink_wrap)
31512 add_reg_note (insn, REG_CFA_RESTORE,
31513 gen_rtx_REG (SImode, CR0_REGNO + i));
31515 RTX_FRAME_RELATED_P (insn) = 1;
31518 cr_off += reg_size;
31522 /* Get the old lr if we saved it. If we are restoring registers
31523 out-of-line, then the out-of-line routines can do this for us. */
31524 if (restore_lr && restoring_GPRs_inline)
31525 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
31527 /* Get the old cr if we saved it. */
31528 if (info->cr_save_p)
31530 unsigned cr_save_regno = 12;
31532 if (!restoring_GPRs_inline)
31534 /* Ensure we don't use the register used by the out-of-line
31535 gpr register restore below. */
31536 bool lr = info->gp_save_offset + info->gp_size == 0;
31537 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
31538 int gpr_ptr_regno = ptr_regno_for_savres (sel);
31540 if (gpr_ptr_regno == 12)
31541 cr_save_regno = 11;
31542 gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno);
31544 else if (REGNO (frame_reg_rtx) == 12)
31545 cr_save_regno = 11;
31547 cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx,
31548 info->cr_save_offset + frame_off,
31549 exit_func);
31552 /* Set LR here to try to overlap restores below. */
31553 if (restore_lr && restoring_GPRs_inline)
31554 restore_saved_lr (0, exit_func);
31556 /* Load exception handler data registers, if needed. */
31557 if (crtl->calls_eh_return)
31559 unsigned int i, regno;
31561 if (TARGET_AIX)
31563 rtx reg = gen_rtx_REG (reg_mode, 2);
31564 emit_insn (gen_frame_load (reg, frame_reg_rtx,
31565 frame_off + RS6000_TOC_SAVE_SLOT));
31568 for (i = 0; ; ++i)
31570 rtx mem;
31572 regno = EH_RETURN_DATA_REGNO (i);
31573 if (regno == INVALID_REGNUM)
31574 break;
31576 /* Note: possible use of r0 here to address SPE regs. */
31577 mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
31578 info->ehrd_offset + frame_off
31579 + reg_size * (int) i);
31581 emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
31585 /* Restore GPRs. This is done as a PARALLEL if we are using
31586 the load-multiple instructions. */
31587 if (TARGET_SPE_ABI
31588 && info->spe_64bit_regs_used
31589 && info->first_gp_reg_save != 32)
31591 /* Determine whether we can address all of the registers that need
31592 to be saved with an offset from frame_reg_rtx that fits in
31593 the small const field for SPE memory instructions. */
31594 int spe_regs_addressable
31595 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
31596 + reg_size * (32 - info->first_gp_reg_save - 1))
31597 && restoring_GPRs_inline);
31599 if (!spe_regs_addressable)
31601 int ool_adjust = 0;
31602 rtx old_frame_reg_rtx = frame_reg_rtx;
31603 /* Make r11 point to the start of the SPE save area. We worried about
31604 not clobbering it when we were saving registers in the prologue.
31605 There's no need to worry here because the static chain is passed
31606 anew to every function. */
31608 if (!restoring_GPRs_inline)
31609 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
31610 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
31611 emit_insn (gen_addsi3 (frame_reg_rtx, old_frame_reg_rtx,
31612 GEN_INT (info->spe_gp_save_offset
31613 + frame_off
31614 - ool_adjust)));
31615 /* Keep the invariant that frame_reg_rtx + frame_off points
31616 at the top of the stack frame. */
31617 frame_off = -info->spe_gp_save_offset + ool_adjust;
31620 if (restoring_GPRs_inline)
31622 HOST_WIDE_INT spe_offset = info->spe_gp_save_offset + frame_off;
31624 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
31625 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
31627 rtx offset, addr, mem, reg;
31629 /* We're doing all this to ensure that the immediate offset
31630 fits into the immediate field of 'evldd'. */
31631 gcc_assert (SPE_CONST_OFFSET_OK (spe_offset + reg_size * i));
31633 offset = GEN_INT (spe_offset + reg_size * i);
31634 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, offset);
31635 mem = gen_rtx_MEM (V2SImode, addr);
31636 reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
31638 emit_move_insn (reg, mem);
31641 else
31642 rs6000_emit_savres_rtx (info, frame_reg_rtx,
31643 info->spe_gp_save_offset + frame_off,
31644 info->lr_save_offset + frame_off,
31645 reg_mode,
31646 SAVRES_GPR | SAVRES_LR);
31648 else if (!restoring_GPRs_inline)
31650 /* We are jumping to an out-of-line function. */
31651 rtx ptr_reg;
31652 int end_save = info->gp_save_offset + info->gp_size;
31653 bool can_use_exit = end_save == 0;
31654 int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0);
31655 int ptr_off;
31657 /* Emit stack reset code if we need it. */
31658 ptr_regno = ptr_regno_for_savres (sel);
31659 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
31660 if (can_use_exit)
31661 rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
31662 else if (end_save + frame_off != 0)
31663 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx,
31664 GEN_INT (end_save + frame_off)));
31665 else if (REGNO (frame_reg_rtx) != ptr_regno)
31666 emit_move_insn (ptr_reg, frame_reg_rtx);
31667 if (REGNO (frame_reg_rtx) == ptr_regno)
31668 frame_off = -end_save;
31670 if (can_use_exit && info->cr_save_p)
31671 restore_saved_cr (cr_save_reg, using_mtcr_multiple, true);
31673 ptr_off = -end_save;
31674 rs6000_emit_savres_rtx (info, ptr_reg,
31675 info->gp_save_offset + ptr_off,
31676 info->lr_save_offset + ptr_off,
31677 reg_mode, sel);
31679 else if (using_load_multiple)
31681 rtvec p;
31682 p = rtvec_alloc (32 - info->first_gp_reg_save);
31683 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
31684 RTVEC_ELT (p, i)
31685 = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
31686 frame_reg_rtx,
31687 info->gp_save_offset + frame_off + reg_size * i);
31688 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
31690 else
31692 int offset = info->gp_save_offset + frame_off;
31693 for (i = info->first_gp_reg_save; i < 32; i++)
31695 if (rs6000_reg_live_or_pic_offset_p (i)
31696 && !cfun->machine->gpr_is_wrapped_separately[i])
31698 rtx reg = gen_rtx_REG (reg_mode, i);
31699 emit_insn (gen_frame_load (reg, frame_reg_rtx, offset));
31702 offset += reg_size;
31706 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
31708 /* If the frame pointer was used then we can't delay emitting
31709 a REG_CFA_DEF_CFA note. This must happen on the insn that
31710 restores the frame pointer, r31. We may have already emitted
31711 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
31712 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
31713 be harmless if emitted. */
31714 if (frame_pointer_needed)
31716 insn = get_last_insn ();
31717 add_reg_note (insn, REG_CFA_DEF_CFA,
31718 plus_constant (Pmode, frame_reg_rtx, frame_off));
31719 RTX_FRAME_RELATED_P (insn) = 1;
31722 /* Set up cfa_restores. We always need these when
31723 shrink-wrapping. If not shrink-wrapping then we only need
31724 the cfa_restore when the stack location is no longer valid.
31725 The cfa_restores must be emitted on or before the insn that
31726 invalidates the stack, and of course must not be emitted
31727 before the insn that actually does the restore. The latter
31728 is why it is a bad idea to emit the cfa_restores as a group
31729 on the last instruction here that actually does a restore:
31730 That insn may be reordered with respect to others doing
31731 restores. */
31732 if (flag_shrink_wrap
31733 && !restoring_GPRs_inline
31734 && info->first_fp_reg_save == 64)
31735 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
31737 for (i = info->first_gp_reg_save; i < 32; i++)
31738 if (!restoring_GPRs_inline
31739 || using_load_multiple
31740 || rs6000_reg_live_or_pic_offset_p (i))
31742 if (cfun->machine->gpr_is_wrapped_separately[i])
31743 continue;
31745 rtx reg = gen_rtx_REG (reg_mode, i);
31746 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31750 if (!restoring_GPRs_inline
31751 && info->first_fp_reg_save == 64)
31753 /* We are jumping to an out-of-line function. */
31754 if (cfa_restores)
31755 emit_cfa_restores (cfa_restores);
31756 return;
31759 if (restore_lr && !restoring_GPRs_inline)
31761 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
31762 restore_saved_lr (0, exit_func);
31765 /* Restore fpr's if we need to do it without calling a function. */
31766 if (restoring_FPRs_inline)
31768 int offset = info->fp_save_offset + frame_off;
31769 for (i = info->first_fp_reg_save; i < 64; i++)
31771 if (save_reg_p (i)
31772 && !cfun->machine->fpr_is_wrapped_separately[i - 32])
31774 rtx reg = gen_rtx_REG (fp_reg_mode, i);
31775 emit_insn (gen_frame_load (reg, frame_reg_rtx, offset));
31776 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
31777 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
31778 cfa_restores);
31781 offset += fp_reg_size;
31785 /* If we saved cr, restore it here. Just those that were used. */
31786 if (info->cr_save_p)
31787 restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func);
31789 /* If this is V.4, unwind the stack pointer after all of the loads
31790 have been done, or set up r11 if we are restoring fp out of line. */
31791 ptr_regno = 1;
31792 if (!restoring_FPRs_inline)
31794 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
31795 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
31796 ptr_regno = ptr_regno_for_savres (sel);
31799 insn = rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
31800 if (REGNO (frame_reg_rtx) == ptr_regno)
31801 frame_off = 0;
31803 if (insn && restoring_FPRs_inline)
31805 if (cfa_restores)
31807 REG_NOTES (insn) = cfa_restores;
31808 cfa_restores = NULL_RTX;
31810 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
31811 RTX_FRAME_RELATED_P (insn) = 1;
31814 if (crtl->calls_eh_return)
31816 rtx sa = EH_RETURN_STACKADJ_RTX;
31817 emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
31820 if (!sibcall && restoring_FPRs_inline)
31822 if (cfa_restores)
31824 /* We can't hang the cfa_restores off a simple return,
31825 since the shrink-wrap code sometimes uses an existing
31826 return. This means there might be a path from
31827 pre-prologue code to this return, and dwarf2cfi code
31828 wants the eh_frame unwinder state to be the same on
31829 all paths to any point. So we need to emit the
31830 cfa_restores before the return. For -m64 we really
31831 don't need epilogue cfa_restores at all, except for
31832 this irritating dwarf2cfi with shrink-wrap
31833 requirement; The stack red-zone means eh_frame info
31834 from the prologue telling the unwinder to restore
31835 from the stack is perfectly good right to the end of
31836 the function. */
31837 emit_insn (gen_blockage ());
31838 emit_cfa_restores (cfa_restores);
31839 cfa_restores = NULL_RTX;
31842 emit_jump_insn (targetm.gen_simple_return ());
31845 if (!sibcall && !restoring_FPRs_inline)
31847 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
31848 rtvec p = rtvec_alloc (3 + !!lr + 64 - info->first_fp_reg_save);
31849 int elt = 0;
31850 RTVEC_ELT (p, elt++) = ret_rtx;
31851 if (lr)
31852 RTVEC_ELT (p, elt++)
31853 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
31855 /* We have to restore more than two FP registers, so branch to the
31856 restore function. It will return to our caller. */
31857 int i;
31858 int reg;
31859 rtx sym;
31861 if (flag_shrink_wrap)
31862 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
31864 sym = rs6000_savres_routine_sym (info, SAVRES_FPR | (lr ? SAVRES_LR : 0));
31865 RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, sym);
31866 reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
31867 RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
31869 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
31871 rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
31873 RTVEC_ELT (p, elt++)
31874 = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i);
31875 if (flag_shrink_wrap)
31876 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31879 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
31882 if (cfa_restores)
31884 if (sibcall)
31885 /* Ensure the cfa_restores are hung off an insn that won't
31886 be reordered above other restores. */
31887 emit_insn (gen_blockage ());
31889 emit_cfa_restores (cfa_restores);
31893 /* Write function epilogue. */
31895 static void
31896 rs6000_output_function_epilogue (FILE *file)
31898 #if TARGET_MACHO
31899 macho_branch_islands ();
31902 rtx_insn *insn = get_last_insn ();
31903 rtx_insn *deleted_debug_label = NULL;
31905 /* Mach-O doesn't support labels at the end of objects, so if
31906 it looks like we might want one, take special action.
31908 First, collect any sequence of deleted debug labels. */
31909 while (insn
31910 && NOTE_P (insn)
31911 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
31913 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
31914 notes only, instead set their CODE_LABEL_NUMBER to -1,
31915 otherwise there would be code generation differences
31916 in between -g and -g0. */
31917 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
31918 deleted_debug_label = insn;
31919 insn = PREV_INSN (insn);
31922 /* Second, if we have:
31923 label:
31924 barrier
31925 then this needs to be detected, so skip past the barrier. */
31927 if (insn && BARRIER_P (insn))
31928 insn = PREV_INSN (insn);
31930 /* Up to now we've only seen notes or barriers. */
31931 if (insn)
31933 if (LABEL_P (insn)
31934 || (NOTE_P (insn)
31935 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
31936 /* Trailing label: <barrier>. */
31937 fputs ("\tnop\n", file);
31938 else
31940 /* Lastly, see if we have a completely empty function body. */
31941 while (insn && ! INSN_P (insn))
31942 insn = PREV_INSN (insn);
31943 /* If we don't find any insns, we've got an empty function body;
31944 I.e. completely empty - without a return or branch. This is
31945 taken as the case where a function body has been removed
31946 because it contains an inline __builtin_unreachable(). GCC
31947 states that reaching __builtin_unreachable() means UB so we're
31948 not obliged to do anything special; however, we want
31949 non-zero-sized function bodies. To meet this, and help the
31950 user out, let's trap the case. */
31951 if (insn == NULL)
31952 fputs ("\ttrap\n", file);
31955 else if (deleted_debug_label)
31956 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
31957 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
31958 CODE_LABEL_NUMBER (insn) = -1;
31960 #endif
31962 /* Output a traceback table here. See /usr/include/sys/debug.h for info
31963 on its format.
31965 We don't output a traceback table if -finhibit-size-directive was
31966 used. The documentation for -finhibit-size-directive reads
31967 ``don't output a @code{.size} assembler directive, or anything
31968 else that would cause trouble if the function is split in the
31969 middle, and the two halves are placed at locations far apart in
31970 memory.'' The traceback table has this property, since it
31971 includes the offset from the start of the function to the
31972 traceback table itself.
31974 System V.4 Powerpc's (and the embedded ABI derived from it) use a
31975 different traceback table. */
31976 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
31977 && ! flag_inhibit_size_directive
31978 && rs6000_traceback != traceback_none && !cfun->is_thunk)
31980 const char *fname = NULL;
31981 const char *language_string = lang_hooks.name;
31982 int fixed_parms = 0, float_parms = 0, parm_info = 0;
31983 int i;
31984 int optional_tbtab;
31985 rs6000_stack_t *info = rs6000_stack_info ();
31987 if (rs6000_traceback == traceback_full)
31988 optional_tbtab = 1;
31989 else if (rs6000_traceback == traceback_part)
31990 optional_tbtab = 0;
31991 else
31992 optional_tbtab = !optimize_size && !TARGET_ELF;
31994 if (optional_tbtab)
31996 fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
31997 while (*fname == '.') /* V.4 encodes . in the name */
31998 fname++;
32000 /* Need label immediately before tbtab, so we can compute
32001 its offset from the function start. */
32002 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
32003 ASM_OUTPUT_LABEL (file, fname);
32006 /* The .tbtab pseudo-op can only be used for the first eight
32007 expressions, since it can't handle the possibly variable
32008 length fields that follow. However, if you omit the optional
32009 fields, the assembler outputs zeros for all optional fields
32010 anyways, giving each variable length field is minimum length
32011 (as defined in sys/debug.h). Thus we can not use the .tbtab
32012 pseudo-op at all. */
32014 /* An all-zero word flags the start of the tbtab, for debuggers
32015 that have to find it by searching forward from the entry
32016 point or from the current pc. */
32017 fputs ("\t.long 0\n", file);
32019 /* Tbtab format type. Use format type 0. */
32020 fputs ("\t.byte 0,", file);
32022 /* Language type. Unfortunately, there does not seem to be any
32023 official way to discover the language being compiled, so we
32024 use language_string.
32025 C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9.
32026 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
32027 a number, so for now use 9. LTO, Go and JIT aren't assigned numbers
32028 either, so for now use 0. */
32029 if (lang_GNU_C ()
32030 || ! strcmp (language_string, "GNU GIMPLE")
32031 || ! strcmp (language_string, "GNU Go")
32032 || ! strcmp (language_string, "libgccjit"))
32033 i = 0;
32034 else if (! strcmp (language_string, "GNU F77")
32035 || lang_GNU_Fortran ())
32036 i = 1;
32037 else if (! strcmp (language_string, "GNU Pascal"))
32038 i = 2;
32039 else if (! strcmp (language_string, "GNU Ada"))
32040 i = 3;
32041 else if (lang_GNU_CXX ()
32042 || ! strcmp (language_string, "GNU Objective-C++"))
32043 i = 9;
32044 else if (! strcmp (language_string, "GNU Java"))
32045 i = 13;
32046 else if (! strcmp (language_string, "GNU Objective-C"))
32047 i = 14;
32048 else
32049 gcc_unreachable ();
32050 fprintf (file, "%d,", i);
32052 /* 8 single bit fields: global linkage (not set for C extern linkage,
32053 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
32054 from start of procedure stored in tbtab, internal function, function
32055 has controlled storage, function has no toc, function uses fp,
32056 function logs/aborts fp operations. */
32057 /* Assume that fp operations are used if any fp reg must be saved. */
32058 fprintf (file, "%d,",
32059 (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
32061 /* 6 bitfields: function is interrupt handler, name present in
32062 proc table, function calls alloca, on condition directives
32063 (controls stack walks, 3 bits), saves condition reg, saves
32064 link reg. */
32065 /* The `function calls alloca' bit seems to be set whenever reg 31 is
32066 set up as a frame pointer, even when there is no alloca call. */
32067 fprintf (file, "%d,",
32068 ((optional_tbtab << 6)
32069 | ((optional_tbtab & frame_pointer_needed) << 5)
32070 | (info->cr_save_p << 1)
32071 | (info->lr_save_p)));
32073 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
32074 (6 bits). */
32075 fprintf (file, "%d,",
32076 (info->push_p << 7) | (64 - info->first_fp_reg_save));
32078 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
32079 fprintf (file, "%d,", (32 - first_reg_to_save ()));
32081 if (optional_tbtab)
32083 /* Compute the parameter info from the function decl argument
32084 list. */
32085 tree decl;
32086 int next_parm_info_bit = 31;
32088 for (decl = DECL_ARGUMENTS (current_function_decl);
32089 decl; decl = DECL_CHAIN (decl))
32091 rtx parameter = DECL_INCOMING_RTL (decl);
32092 machine_mode mode = GET_MODE (parameter);
32094 if (GET_CODE (parameter) == REG)
32096 if (SCALAR_FLOAT_MODE_P (mode))
32098 int bits;
32100 float_parms++;
32102 switch (mode)
32104 case E_SFmode:
32105 case E_SDmode:
32106 bits = 0x2;
32107 break;
32109 case E_DFmode:
32110 case E_DDmode:
32111 case E_TFmode:
32112 case E_TDmode:
32113 case E_IFmode:
32114 case E_KFmode:
32115 bits = 0x3;
32116 break;
32118 default:
32119 gcc_unreachable ();
32122 /* If only one bit will fit, don't or in this entry. */
32123 if (next_parm_info_bit > 0)
32124 parm_info |= (bits << (next_parm_info_bit - 1));
32125 next_parm_info_bit -= 2;
32127 else
32129 fixed_parms += ((GET_MODE_SIZE (mode)
32130 + (UNITS_PER_WORD - 1))
32131 / UNITS_PER_WORD);
32132 next_parm_info_bit -= 1;
32138 /* Number of fixed point parameters. */
32139 /* This is actually the number of words of fixed point parameters; thus
32140 an 8 byte struct counts as 2; and thus the maximum value is 8. */
32141 fprintf (file, "%d,", fixed_parms);
32143 /* 2 bitfields: number of floating point parameters (7 bits), parameters
32144 all on stack. */
32145 /* This is actually the number of fp registers that hold parameters;
32146 and thus the maximum value is 13. */
32147 /* Set parameters on stack bit if parameters are not in their original
32148 registers, regardless of whether they are on the stack? Xlc
32149 seems to set the bit when not optimizing. */
32150 fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
32152 if (optional_tbtab)
32154 /* Optional fields follow. Some are variable length. */
32156 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single
32157 float, 11 double float. */
32158 /* There is an entry for each parameter in a register, in the order
32159 that they occur in the parameter list. Any intervening arguments
32160 on the stack are ignored. If the list overflows a long (max
32161 possible length 34 bits) then completely leave off all elements
32162 that don't fit. */
32163 /* Only emit this long if there was at least one parameter. */
32164 if (fixed_parms || float_parms)
32165 fprintf (file, "\t.long %d\n", parm_info);
32167 /* Offset from start of code to tb table. */
32168 fputs ("\t.long ", file);
32169 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
32170 RS6000_OUTPUT_BASENAME (file, fname);
32171 putc ('-', file);
32172 rs6000_output_function_entry (file, fname);
32173 putc ('\n', file);
32175 /* Interrupt handler mask. */
32176 /* Omit this long, since we never set the interrupt handler bit
32177 above. */
32179 /* Number of CTL (controlled storage) anchors. */
32180 /* Omit this long, since the has_ctl bit is never set above. */
32182 /* Displacement into stack of each CTL anchor. */
32183 /* Omit this list of longs, because there are no CTL anchors. */
32185 /* Length of function name. */
32186 if (*fname == '*')
32187 ++fname;
32188 fprintf (file, "\t.short %d\n", (int) strlen (fname));
32190 /* Function name. */
32191 assemble_string (fname, strlen (fname));
32193 /* Register for alloca automatic storage; this is always reg 31.
32194 Only emit this if the alloca bit was set above. */
32195 if (frame_pointer_needed)
32196 fputs ("\t.byte 31\n", file);
32198 fputs ("\t.align 2\n", file);
32202 /* Arrange to define .LCTOC1 label, if not already done. */
32203 if (need_toc_init)
32205 need_toc_init = 0;
32206 if (!toc_initialized)
32208 switch_to_section (toc_section);
32209 switch_to_section (current_function_section ());
32214 /* -fsplit-stack support. */
32216 /* A SYMBOL_REF for __morestack. */
32217 static GTY(()) rtx morestack_ref;
32219 static rtx
32220 gen_add3_const (rtx rt, rtx ra, long c)
32222 if (TARGET_64BIT)
32223 return gen_adddi3 (rt, ra, GEN_INT (c));
32224 else
32225 return gen_addsi3 (rt, ra, GEN_INT (c));
32228 /* Emit -fsplit-stack prologue, which goes before the regular function
32229 prologue (at local entry point in the case of ELFv2). */
32231 void
32232 rs6000_expand_split_stack_prologue (void)
32234 rs6000_stack_t *info = rs6000_stack_info ();
32235 unsigned HOST_WIDE_INT allocate;
32236 long alloc_hi, alloc_lo;
32237 rtx r0, r1, r12, lr, ok_label, compare, jump, call_fusage;
32238 rtx_insn *insn;
32240 gcc_assert (flag_split_stack && reload_completed);
32242 if (!info->push_p)
32243 return;
32245 if (global_regs[29])
32247 error ("-fsplit-stack uses register r29");
32248 inform (DECL_SOURCE_LOCATION (global_regs_decl[29]),
32249 "conflicts with %qD", global_regs_decl[29]);
32252 allocate = info->total_size;
32253 if (allocate > (unsigned HOST_WIDE_INT) 1 << 31)
32255 sorry ("Stack frame larger than 2G is not supported for -fsplit-stack");
32256 return;
32258 if (morestack_ref == NULL_RTX)
32260 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
32261 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
32262 | SYMBOL_FLAG_FUNCTION);
32265 r0 = gen_rtx_REG (Pmode, 0);
32266 r1 = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
32267 r12 = gen_rtx_REG (Pmode, 12);
32268 emit_insn (gen_load_split_stack_limit (r0));
32269 /* Always emit two insns here to calculate the requested stack,
32270 so that the linker can edit them when adjusting size for calling
32271 non-split-stack code. */
32272 alloc_hi = (-allocate + 0x8000) & ~0xffffL;
32273 alloc_lo = -allocate - alloc_hi;
32274 if (alloc_hi != 0)
32276 emit_insn (gen_add3_const (r12, r1, alloc_hi));
32277 if (alloc_lo != 0)
32278 emit_insn (gen_add3_const (r12, r12, alloc_lo));
32279 else
32280 emit_insn (gen_nop ());
32282 else
32284 emit_insn (gen_add3_const (r12, r1, alloc_lo));
32285 emit_insn (gen_nop ());
32288 compare = gen_rtx_REG (CCUNSmode, CR7_REGNO);
32289 emit_insn (gen_rtx_SET (compare, gen_rtx_COMPARE (CCUNSmode, r12, r0)));
32290 ok_label = gen_label_rtx ();
32291 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
32292 gen_rtx_GEU (VOIDmode, compare, const0_rtx),
32293 gen_rtx_LABEL_REF (VOIDmode, ok_label),
32294 pc_rtx);
32295 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
32296 JUMP_LABEL (insn) = ok_label;
32297 /* Mark the jump as very likely to be taken. */
32298 add_reg_br_prob_note (insn, profile_probability::very_likely ());
32300 lr = gen_rtx_REG (Pmode, LR_REGNO);
32301 insn = emit_move_insn (r0, lr);
32302 RTX_FRAME_RELATED_P (insn) = 1;
32303 insn = emit_insn (gen_frame_store (r0, r1, info->lr_save_offset));
32304 RTX_FRAME_RELATED_P (insn) = 1;
32306 insn = emit_call_insn (gen_call (gen_rtx_MEM (SImode, morestack_ref),
32307 const0_rtx, const0_rtx));
32308 call_fusage = NULL_RTX;
32309 use_reg (&call_fusage, r12);
32310 /* Say the call uses r0, even though it doesn't, to stop regrename
32311 from twiddling with the insns saving lr, trashing args for cfun.
32312 The insns restoring lr are similarly protected by making
32313 split_stack_return use r0. */
32314 use_reg (&call_fusage, r0);
32315 add_function_usage_to (insn, call_fusage);
32316 /* Indicate that this function can't jump to non-local gotos. */
32317 make_reg_eh_region_note_nothrow_nononlocal (insn);
32318 emit_insn (gen_frame_load (r0, r1, info->lr_save_offset));
32319 insn = emit_move_insn (lr, r0);
32320 add_reg_note (insn, REG_CFA_RESTORE, lr);
32321 RTX_FRAME_RELATED_P (insn) = 1;
32322 emit_insn (gen_split_stack_return ());
32324 emit_label (ok_label);
32325 LABEL_NUSES (ok_label) = 1;
32328 /* Return the internal arg pointer used for function incoming
32329 arguments. When -fsplit-stack, the arg pointer is r12 so we need
32330 to copy it to a pseudo in order for it to be preserved over calls
32331 and suchlike. We'd really like to use a pseudo here for the
32332 internal arg pointer but data-flow analysis is not prepared to
32333 accept pseudos as live at the beginning of a function. */
32335 static rtx
32336 rs6000_internal_arg_pointer (void)
32338 if (flag_split_stack
32339 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
32340 == NULL))
32343 if (cfun->machine->split_stack_arg_pointer == NULL_RTX)
32345 rtx pat;
32347 cfun->machine->split_stack_arg_pointer = gen_reg_rtx (Pmode);
32348 REG_POINTER (cfun->machine->split_stack_arg_pointer) = 1;
32350 /* Put the pseudo initialization right after the note at the
32351 beginning of the function. */
32352 pat = gen_rtx_SET (cfun->machine->split_stack_arg_pointer,
32353 gen_rtx_REG (Pmode, 12));
32354 push_topmost_sequence ();
32355 emit_insn_after (pat, get_insns ());
32356 pop_topmost_sequence ();
32358 return plus_constant (Pmode, cfun->machine->split_stack_arg_pointer,
32359 FIRST_PARM_OFFSET (current_function_decl));
32361 return virtual_incoming_args_rtx;
32364 /* We may have to tell the dataflow pass that the split stack prologue
32365 is initializing a register. */
32367 static void
32368 rs6000_live_on_entry (bitmap regs)
32370 if (flag_split_stack)
32371 bitmap_set_bit (regs, 12);
32374 /* Emit -fsplit-stack dynamic stack allocation space check. */
32376 void
32377 rs6000_split_stack_space_check (rtx size, rtx label)
32379 rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
32380 rtx limit = gen_reg_rtx (Pmode);
32381 rtx requested = gen_reg_rtx (Pmode);
32382 rtx cmp = gen_reg_rtx (CCUNSmode);
32383 rtx jump;
32385 emit_insn (gen_load_split_stack_limit (limit));
32386 if (CONST_INT_P (size))
32387 emit_insn (gen_add3_insn (requested, sp, GEN_INT (-INTVAL (size))));
32388 else
32390 size = force_reg (Pmode, size);
32391 emit_move_insn (requested, gen_rtx_MINUS (Pmode, sp, size));
32393 emit_insn (gen_rtx_SET (cmp, gen_rtx_COMPARE (CCUNSmode, requested, limit)));
32394 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
32395 gen_rtx_GEU (VOIDmode, cmp, const0_rtx),
32396 gen_rtx_LABEL_REF (VOIDmode, label),
32397 pc_rtx);
32398 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
32399 JUMP_LABEL (jump) = label;
32402 /* A C compound statement that outputs the assembler code for a thunk
32403 function, used to implement C++ virtual function calls with
32404 multiple inheritance. The thunk acts as a wrapper around a virtual
32405 function, adjusting the implicit object parameter before handing
32406 control off to the real function.
32408 First, emit code to add the integer DELTA to the location that
32409 contains the incoming first argument. Assume that this argument
32410 contains a pointer, and is the one used to pass the `this' pointer
32411 in C++. This is the incoming argument *before* the function
32412 prologue, e.g. `%o0' on a sparc. The addition must preserve the
32413 values of all other incoming arguments.
32415 After the addition, emit code to jump to FUNCTION, which is a
32416 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
32417 not touch the return address. Hence returning from FUNCTION will
32418 return to whoever called the current `thunk'.
32420 The effect must be as if FUNCTION had been called directly with the
32421 adjusted first argument. This macro is responsible for emitting
32422 all of the code for a thunk function; output_function_prologue()
32423 and output_function_epilogue() are not invoked.
32425 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
32426 been extracted from it.) It might possibly be useful on some
32427 targets, but probably not.
32429 If you do not define this macro, the target-independent code in the
32430 C++ frontend will generate a less efficient heavyweight thunk that
32431 calls FUNCTION instead of jumping to it. The generic approach does
32432 not support varargs. */
32434 static void
32435 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
32436 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
32437 tree function)
32439 rtx this_rtx, funexp;
32440 rtx_insn *insn;
32442 reload_completed = 1;
32443 epilogue_completed = 1;
32445 /* Mark the end of the (empty) prologue. */
32446 emit_note (NOTE_INSN_PROLOGUE_END);
32448 /* Find the "this" pointer. If the function returns a structure,
32449 the structure return pointer is in r3. */
32450 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
32451 this_rtx = gen_rtx_REG (Pmode, 4);
32452 else
32453 this_rtx = gen_rtx_REG (Pmode, 3);
32455 /* Apply the constant offset, if required. */
32456 if (delta)
32457 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
32459 /* Apply the offset from the vtable, if required. */
32460 if (vcall_offset)
32462 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
32463 rtx tmp = gen_rtx_REG (Pmode, 12);
32465 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
32466 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
32468 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
32469 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
32471 else
32473 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
32475 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
32477 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
32480 /* Generate a tail call to the target function. */
32481 if (!TREE_USED (function))
32483 assemble_external (function);
32484 TREE_USED (function) = 1;
32486 funexp = XEXP (DECL_RTL (function), 0);
32487 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
32489 #if TARGET_MACHO
32490 if (MACHOPIC_INDIRECT)
32491 funexp = machopic_indirect_call_target (funexp);
32492 #endif
32494 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
32495 generate sibcall RTL explicitly. */
32496 insn = emit_call_insn (
32497 gen_rtx_PARALLEL (VOIDmode,
32498 gen_rtvec (3,
32499 gen_rtx_CALL (VOIDmode,
32500 funexp, const0_rtx),
32501 gen_rtx_USE (VOIDmode, const0_rtx),
32502 simple_return_rtx)));
32503 SIBLING_CALL_P (insn) = 1;
32504 emit_barrier ();
32506 /* Run just enough of rest_of_compilation to get the insns emitted.
32507 There's not really enough bulk here to make other passes such as
32508 instruction scheduling worth while. Note that use_thunk calls
32509 assemble_start_function and assemble_end_function. */
32510 insn = get_insns ();
32511 shorten_branches (insn);
32512 final_start_function (insn, file, 1);
32513 final (insn, file, 1);
32514 final_end_function ();
32516 reload_completed = 0;
32517 epilogue_completed = 0;
32520 /* A quick summary of the various types of 'constant-pool tables'
32521 under PowerPC:
32523 Target Flags Name One table per
32524 AIX (none) AIX TOC object file
32525 AIX -mfull-toc AIX TOC object file
32526 AIX -mminimal-toc AIX minimal TOC translation unit
32527 SVR4/EABI (none) SVR4 SDATA object file
32528 SVR4/EABI -fpic SVR4 pic object file
32529 SVR4/EABI -fPIC SVR4 PIC translation unit
32530 SVR4/EABI -mrelocatable EABI TOC function
32531 SVR4/EABI -maix AIX TOC object file
32532 SVR4/EABI -maix -mminimal-toc
32533 AIX minimal TOC translation unit
32535 Name Reg. Set by entries contains:
32536 made by addrs? fp? sum?
32538 AIX TOC 2 crt0 as Y option option
32539 AIX minimal TOC 30 prolog gcc Y Y option
32540 SVR4 SDATA 13 crt0 gcc N Y N
32541 SVR4 pic 30 prolog ld Y not yet N
32542 SVR4 PIC 30 prolog gcc Y option option
32543 EABI TOC 30 prolog gcc Y option option
32547 /* Hash functions for the hash table. */
32549 static unsigned
32550 rs6000_hash_constant (rtx k)
32552 enum rtx_code code = GET_CODE (k);
32553 machine_mode mode = GET_MODE (k);
32554 unsigned result = (code << 3) ^ mode;
32555 const char *format;
32556 int flen, fidx;
32558 format = GET_RTX_FORMAT (code);
32559 flen = strlen (format);
32560 fidx = 0;
32562 switch (code)
32564 case LABEL_REF:
32565 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
32567 case CONST_WIDE_INT:
32569 int i;
32570 flen = CONST_WIDE_INT_NUNITS (k);
32571 for (i = 0; i < flen; i++)
32572 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
32573 return result;
32576 case CONST_DOUBLE:
32577 if (mode != VOIDmode)
32578 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
32579 flen = 2;
32580 break;
32582 case CODE_LABEL:
32583 fidx = 3;
32584 break;
32586 default:
32587 break;
32590 for (; fidx < flen; fidx++)
32591 switch (format[fidx])
32593 case 's':
32595 unsigned i, len;
32596 const char *str = XSTR (k, fidx);
32597 len = strlen (str);
32598 result = result * 613 + len;
32599 for (i = 0; i < len; i++)
32600 result = result * 613 + (unsigned) str[i];
32601 break;
32603 case 'u':
32604 case 'e':
32605 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
32606 break;
32607 case 'i':
32608 case 'n':
32609 result = result * 613 + (unsigned) XINT (k, fidx);
32610 break;
32611 case 'w':
32612 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
32613 result = result * 613 + (unsigned) XWINT (k, fidx);
32614 else
32616 size_t i;
32617 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
32618 result = result * 613 + (unsigned) (XWINT (k, fidx)
32619 >> CHAR_BIT * i);
32621 break;
32622 case '0':
32623 break;
32624 default:
32625 gcc_unreachable ();
32628 return result;
32631 hashval_t
32632 toc_hasher::hash (toc_hash_struct *thc)
32634 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
32637 /* Compare H1 and H2 for equivalence. */
32639 bool
32640 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
32642 rtx r1 = h1->key;
32643 rtx r2 = h2->key;
32645 if (h1->key_mode != h2->key_mode)
32646 return 0;
32648 return rtx_equal_p (r1, r2);
32651 /* These are the names given by the C++ front-end to vtables, and
32652 vtable-like objects. Ideally, this logic should not be here;
32653 instead, there should be some programmatic way of inquiring as
32654 to whether or not an object is a vtable. */
32656 #define VTABLE_NAME_P(NAME) \
32657 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
32658 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
32659 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
32660 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
32661 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
32663 #ifdef NO_DOLLAR_IN_LABEL
32664 /* Return a GGC-allocated character string translating dollar signs in
32665 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
32667 const char *
32668 rs6000_xcoff_strip_dollar (const char *name)
32670 char *strip, *p;
32671 const char *q;
32672 size_t len;
32674 q = (const char *) strchr (name, '$');
32676 if (q == 0 || q == name)
32677 return name;
32679 len = strlen (name);
32680 strip = XALLOCAVEC (char, len + 1);
32681 strcpy (strip, name);
32682 p = strip + (q - name);
32683 while (p)
32685 *p = '_';
32686 p = strchr (p + 1, '$');
32689 return ggc_alloc_string (strip, len);
32691 #endif
32693 void
32694 rs6000_output_symbol_ref (FILE *file, rtx x)
32696 const char *name = XSTR (x, 0);
32698 /* Currently C++ toc references to vtables can be emitted before it
32699 is decided whether the vtable is public or private. If this is
32700 the case, then the linker will eventually complain that there is
32701 a reference to an unknown section. Thus, for vtables only,
32702 we emit the TOC reference to reference the identifier and not the
32703 symbol. */
32704 if (VTABLE_NAME_P (name))
32706 RS6000_OUTPUT_BASENAME (file, name);
32708 else
32709 assemble_name (file, name);
32712 /* Output a TOC entry. We derive the entry name from what is being
32713 written. */
32715 void
32716 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
32718 char buf[256];
32719 const char *name = buf;
32720 rtx base = x;
32721 HOST_WIDE_INT offset = 0;
32723 gcc_assert (!TARGET_NO_TOC);
32725 /* When the linker won't eliminate them, don't output duplicate
32726 TOC entries (this happens on AIX if there is any kind of TOC,
32727 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
32728 CODE_LABELs. */
32729 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
32731 struct toc_hash_struct *h;
32733 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
32734 time because GGC is not initialized at that point. */
32735 if (toc_hash_table == NULL)
32736 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
32738 h = ggc_alloc<toc_hash_struct> ();
32739 h->key = x;
32740 h->key_mode = mode;
32741 h->labelno = labelno;
32743 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
32744 if (*found == NULL)
32745 *found = h;
32746 else /* This is indeed a duplicate.
32747 Set this label equal to that label. */
32749 fputs ("\t.set ", file);
32750 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
32751 fprintf (file, "%d,", labelno);
32752 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
32753 fprintf (file, "%d\n", ((*found)->labelno));
32755 #ifdef HAVE_AS_TLS
32756 if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF
32757 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
32758 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
32760 fputs ("\t.set ", file);
32761 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
32762 fprintf (file, "%d,", labelno);
32763 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
32764 fprintf (file, "%d\n", ((*found)->labelno));
32766 #endif
32767 return;
32771 /* If we're going to put a double constant in the TOC, make sure it's
32772 aligned properly when strict alignment is on. */
32773 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
32774 && STRICT_ALIGNMENT
32775 && GET_MODE_BITSIZE (mode) >= 64
32776 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
32777 ASM_OUTPUT_ALIGN (file, 3);
32780 (*targetm.asm_out.internal_label) (file, "LC", labelno);
32782 /* Handle FP constants specially. Note that if we have a minimal
32783 TOC, things we put here aren't actually in the TOC, so we can allow
32784 FP constants. */
32785 if (GET_CODE (x) == CONST_DOUBLE &&
32786 (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
32787 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
32789 long k[4];
32791 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
32792 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
32793 else
32794 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
32796 if (TARGET_64BIT)
32798 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32799 fputs (DOUBLE_INT_ASM_OP, file);
32800 else
32801 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
32802 k[0] & 0xffffffff, k[1] & 0xffffffff,
32803 k[2] & 0xffffffff, k[3] & 0xffffffff);
32804 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
32805 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
32806 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
32807 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
32808 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
32809 return;
32811 else
32813 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32814 fputs ("\t.long ", file);
32815 else
32816 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
32817 k[0] & 0xffffffff, k[1] & 0xffffffff,
32818 k[2] & 0xffffffff, k[3] & 0xffffffff);
32819 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
32820 k[0] & 0xffffffff, k[1] & 0xffffffff,
32821 k[2] & 0xffffffff, k[3] & 0xffffffff);
32822 return;
32825 else if (GET_CODE (x) == CONST_DOUBLE &&
32826 (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
32828 long k[2];
32830 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
32831 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
32832 else
32833 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
32835 if (TARGET_64BIT)
32837 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32838 fputs (DOUBLE_INT_ASM_OP, file);
32839 else
32840 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
32841 k[0] & 0xffffffff, k[1] & 0xffffffff);
32842 fprintf (file, "0x%lx%08lx\n",
32843 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
32844 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
32845 return;
32847 else
32849 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32850 fputs ("\t.long ", file);
32851 else
32852 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
32853 k[0] & 0xffffffff, k[1] & 0xffffffff);
32854 fprintf (file, "0x%lx,0x%lx\n",
32855 k[0] & 0xffffffff, k[1] & 0xffffffff);
32856 return;
32859 else if (GET_CODE (x) == CONST_DOUBLE &&
32860 (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
32862 long l;
32864 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
32865 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
32866 else
32867 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
32869 if (TARGET_64BIT)
32871 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32872 fputs (DOUBLE_INT_ASM_OP, file);
32873 else
32874 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
32875 if (WORDS_BIG_ENDIAN)
32876 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
32877 else
32878 fprintf (file, "0x%lx\n", l & 0xffffffff);
32879 return;
32881 else
32883 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32884 fputs ("\t.long ", file);
32885 else
32886 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
32887 fprintf (file, "0x%lx\n", l & 0xffffffff);
32888 return;
32891 else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT)
32893 unsigned HOST_WIDE_INT low;
32894 HOST_WIDE_INT high;
32896 low = INTVAL (x) & 0xffffffff;
32897 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
32899 /* TOC entries are always Pmode-sized, so when big-endian
32900 smaller integer constants in the TOC need to be padded.
32901 (This is still a win over putting the constants in
32902 a separate constant pool, because then we'd have
32903 to have both a TOC entry _and_ the actual constant.)
32905 For a 32-bit target, CONST_INT values are loaded and shifted
32906 entirely within `low' and can be stored in one TOC entry. */
32908 /* It would be easy to make this work, but it doesn't now. */
32909 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
32911 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
32913 low |= high << 32;
32914 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
32915 high = (HOST_WIDE_INT) low >> 32;
32916 low &= 0xffffffff;
32919 if (TARGET_64BIT)
32921 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32922 fputs (DOUBLE_INT_ASM_OP, file);
32923 else
32924 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
32925 (long) high & 0xffffffff, (long) low & 0xffffffff);
32926 fprintf (file, "0x%lx%08lx\n",
32927 (long) high & 0xffffffff, (long) low & 0xffffffff);
32928 return;
32930 else
32932 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
32934 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32935 fputs ("\t.long ", file);
32936 else
32937 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
32938 (long) high & 0xffffffff, (long) low & 0xffffffff);
32939 fprintf (file, "0x%lx,0x%lx\n",
32940 (long) high & 0xffffffff, (long) low & 0xffffffff);
32942 else
32944 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32945 fputs ("\t.long ", file);
32946 else
32947 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
32948 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
32950 return;
32954 if (GET_CODE (x) == CONST)
32956 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
32957 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
32959 base = XEXP (XEXP (x, 0), 0);
32960 offset = INTVAL (XEXP (XEXP (x, 0), 1));
32963 switch (GET_CODE (base))
32965 case SYMBOL_REF:
32966 name = XSTR (base, 0);
32967 break;
32969 case LABEL_REF:
32970 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
32971 CODE_LABEL_NUMBER (XEXP (base, 0)));
32972 break;
32974 case CODE_LABEL:
32975 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
32976 break;
32978 default:
32979 gcc_unreachable ();
32982 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32983 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
32984 else
32986 fputs ("\t.tc ", file);
32987 RS6000_OUTPUT_BASENAME (file, name);
32989 if (offset < 0)
32990 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
32991 else if (offset)
32992 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
32994 /* Mark large TOC symbols on AIX with [TE] so they are mapped
32995 after other TOC symbols, reducing overflow of small TOC access
32996 to [TC] symbols. */
32997 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
32998 ? "[TE]," : "[TC],", file);
33001 /* Currently C++ toc references to vtables can be emitted before it
33002 is decided whether the vtable is public or private. If this is
33003 the case, then the linker will eventually complain that there is
33004 a TOC reference to an unknown section. Thus, for vtables only,
33005 we emit the TOC reference to reference the symbol and not the
33006 section. */
33007 if (VTABLE_NAME_P (name))
33009 RS6000_OUTPUT_BASENAME (file, name);
33010 if (offset < 0)
33011 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
33012 else if (offset > 0)
33013 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
33015 else
33016 output_addr_const (file, x);
33018 #if HAVE_AS_TLS
33019 if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF)
33021 switch (SYMBOL_REF_TLS_MODEL (base))
33023 case 0:
33024 break;
33025 case TLS_MODEL_LOCAL_EXEC:
33026 fputs ("@le", file);
33027 break;
33028 case TLS_MODEL_INITIAL_EXEC:
33029 fputs ("@ie", file);
33030 break;
33031 /* Use global-dynamic for local-dynamic. */
33032 case TLS_MODEL_GLOBAL_DYNAMIC:
33033 case TLS_MODEL_LOCAL_DYNAMIC:
33034 putc ('\n', file);
33035 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
33036 fputs ("\t.tc .", file);
33037 RS6000_OUTPUT_BASENAME (file, name);
33038 fputs ("[TC],", file);
33039 output_addr_const (file, x);
33040 fputs ("@m", file);
33041 break;
33042 default:
33043 gcc_unreachable ();
33046 #endif
33048 putc ('\n', file);
33051 /* Output an assembler pseudo-op to write an ASCII string of N characters
33052 starting at P to FILE.
33054 On the RS/6000, we have to do this using the .byte operation and
33055 write out special characters outside the quoted string.
33056 Also, the assembler is broken; very long strings are truncated,
33057 so we must artificially break them up early. */
33059 void
33060 output_ascii (FILE *file, const char *p, int n)
33062 char c;
33063 int i, count_string;
33064 const char *for_string = "\t.byte \"";
33065 const char *for_decimal = "\t.byte ";
33066 const char *to_close = NULL;
33068 count_string = 0;
33069 for (i = 0; i < n; i++)
33071 c = *p++;
33072 if (c >= ' ' && c < 0177)
33074 if (for_string)
33075 fputs (for_string, file);
33076 putc (c, file);
33078 /* Write two quotes to get one. */
33079 if (c == '"')
33081 putc (c, file);
33082 ++count_string;
33085 for_string = NULL;
33086 for_decimal = "\"\n\t.byte ";
33087 to_close = "\"\n";
33088 ++count_string;
33090 if (count_string >= 512)
33092 fputs (to_close, file);
33094 for_string = "\t.byte \"";
33095 for_decimal = "\t.byte ";
33096 to_close = NULL;
33097 count_string = 0;
33100 else
33102 if (for_decimal)
33103 fputs (for_decimal, file);
33104 fprintf (file, "%d", c);
33106 for_string = "\n\t.byte \"";
33107 for_decimal = ", ";
33108 to_close = "\n";
33109 count_string = 0;
33113 /* Now close the string if we have written one. Then end the line. */
33114 if (to_close)
33115 fputs (to_close, file);
33118 /* Generate a unique section name for FILENAME for a section type
33119 represented by SECTION_DESC. Output goes into BUF.
33121 SECTION_DESC can be any string, as long as it is different for each
33122 possible section type.
33124 We name the section in the same manner as xlc. The name begins with an
33125 underscore followed by the filename (after stripping any leading directory
33126 names) with the last period replaced by the string SECTION_DESC. If
33127 FILENAME does not contain a period, SECTION_DESC is appended to the end of
33128 the name. */
33130 void
33131 rs6000_gen_section_name (char **buf, const char *filename,
33132 const char *section_desc)
33134 const char *q, *after_last_slash, *last_period = 0;
33135 char *p;
33136 int len;
33138 after_last_slash = filename;
33139 for (q = filename; *q; q++)
33141 if (*q == '/')
33142 after_last_slash = q + 1;
33143 else if (*q == '.')
33144 last_period = q;
33147 len = strlen (after_last_slash) + strlen (section_desc) + 2;
33148 *buf = (char *) xmalloc (len);
33150 p = *buf;
33151 *p++ = '_';
33153 for (q = after_last_slash; *q; q++)
33155 if (q == last_period)
33157 strcpy (p, section_desc);
33158 p += strlen (section_desc);
33159 break;
33162 else if (ISALNUM (*q))
33163 *p++ = *q;
33166 if (last_period == 0)
33167 strcpy (p, section_desc);
33168 else
33169 *p = '\0';
33172 /* Emit profile function. */
33174 void
33175 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
33177 /* Non-standard profiling for kernels, which just saves LR then calls
33178 _mcount without worrying about arg saves. The idea is to change
33179 the function prologue as little as possible as it isn't easy to
33180 account for arg save/restore code added just for _mcount. */
33181 if (TARGET_PROFILE_KERNEL)
33182 return;
33184 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
33186 #ifndef NO_PROFILE_COUNTERS
33187 # define NO_PROFILE_COUNTERS 0
33188 #endif
33189 if (NO_PROFILE_COUNTERS)
33190 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
33191 LCT_NORMAL, VOIDmode);
33192 else
33194 char buf[30];
33195 const char *label_name;
33196 rtx fun;
33198 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
33199 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
33200 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
33202 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
33203 LCT_NORMAL, VOIDmode, fun, Pmode);
33206 else if (DEFAULT_ABI == ABI_DARWIN)
33208 const char *mcount_name = RS6000_MCOUNT;
33209 int caller_addr_regno = LR_REGNO;
33211 /* Be conservative and always set this, at least for now. */
33212 crtl->uses_pic_offset_table = 1;
33214 #if TARGET_MACHO
33215 /* For PIC code, set up a stub and collect the caller's address
33216 from r0, which is where the prologue puts it. */
33217 if (MACHOPIC_INDIRECT
33218 && crtl->uses_pic_offset_table)
33219 caller_addr_regno = 0;
33220 #endif
33221 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
33222 LCT_NORMAL, VOIDmode,
33223 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
33227 /* Write function profiler code. */
33229 void
33230 output_function_profiler (FILE *file, int labelno)
33232 char buf[100];
33234 switch (DEFAULT_ABI)
33236 default:
33237 gcc_unreachable ();
33239 case ABI_V4:
33240 if (!TARGET_32BIT)
33242 warning (0, "no profiling of 64-bit code for this ABI");
33243 return;
33245 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
33246 fprintf (file, "\tmflr %s\n", reg_names[0]);
33247 if (NO_PROFILE_COUNTERS)
33249 asm_fprintf (file, "\tstw %s,4(%s)\n",
33250 reg_names[0], reg_names[1]);
33252 else if (TARGET_SECURE_PLT && flag_pic)
33254 if (TARGET_LINK_STACK)
33256 char name[32];
33257 get_ppc476_thunk_name (name);
33258 asm_fprintf (file, "\tbl %s\n", name);
33260 else
33261 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
33262 asm_fprintf (file, "\tstw %s,4(%s)\n",
33263 reg_names[0], reg_names[1]);
33264 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
33265 asm_fprintf (file, "\taddis %s,%s,",
33266 reg_names[12], reg_names[12]);
33267 assemble_name (file, buf);
33268 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
33269 assemble_name (file, buf);
33270 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
33272 else if (flag_pic == 1)
33274 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
33275 asm_fprintf (file, "\tstw %s,4(%s)\n",
33276 reg_names[0], reg_names[1]);
33277 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
33278 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
33279 assemble_name (file, buf);
33280 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
33282 else if (flag_pic > 1)
33284 asm_fprintf (file, "\tstw %s,4(%s)\n",
33285 reg_names[0], reg_names[1]);
33286 /* Now, we need to get the address of the label. */
33287 if (TARGET_LINK_STACK)
33289 char name[32];
33290 get_ppc476_thunk_name (name);
33291 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
33292 assemble_name (file, buf);
33293 fputs ("-.\n1:", file);
33294 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
33295 asm_fprintf (file, "\taddi %s,%s,4\n",
33296 reg_names[11], reg_names[11]);
33298 else
33300 fputs ("\tbcl 20,31,1f\n\t.long ", file);
33301 assemble_name (file, buf);
33302 fputs ("-.\n1:", file);
33303 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
33305 asm_fprintf (file, "\tlwz %s,0(%s)\n",
33306 reg_names[0], reg_names[11]);
33307 asm_fprintf (file, "\tadd %s,%s,%s\n",
33308 reg_names[0], reg_names[0], reg_names[11]);
33310 else
33312 asm_fprintf (file, "\tlis %s,", reg_names[12]);
33313 assemble_name (file, buf);
33314 fputs ("@ha\n", file);
33315 asm_fprintf (file, "\tstw %s,4(%s)\n",
33316 reg_names[0], reg_names[1]);
33317 asm_fprintf (file, "\tla %s,", reg_names[0]);
33318 assemble_name (file, buf);
33319 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
33322 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
33323 fprintf (file, "\tbl %s%s\n",
33324 RS6000_MCOUNT, flag_pic ? "@plt" : "");
33325 break;
33327 case ABI_AIX:
33328 case ABI_ELFv2:
33329 case ABI_DARWIN:
33330 /* Don't do anything, done in output_profile_hook (). */
33331 break;
33337 /* The following variable value is the last issued insn. */
33339 static rtx_insn *last_scheduled_insn;
33341 /* The following variable helps to balance issuing of load and
33342 store instructions */
33344 static int load_store_pendulum;
33346 /* The following variable helps pair divide insns during scheduling. */
33347 static int divide_cnt;
33348 /* The following variable helps pair and alternate vector and vector load
33349 insns during scheduling. */
33350 static int vec_pairing;
33353 /* Power4 load update and store update instructions are cracked into a
33354 load or store and an integer insn which are executed in the same cycle.
33355 Branches have their own dispatch slot which does not count against the
33356 GCC issue rate, but it changes the program flow so there are no other
33357 instructions to issue in this cycle. */
33359 static int
33360 rs6000_variable_issue_1 (rtx_insn *insn, int more)
33362 last_scheduled_insn = insn;
33363 if (GET_CODE (PATTERN (insn)) == USE
33364 || GET_CODE (PATTERN (insn)) == CLOBBER)
33366 cached_can_issue_more = more;
33367 return cached_can_issue_more;
33370 if (insn_terminates_group_p (insn, current_group))
33372 cached_can_issue_more = 0;
33373 return cached_can_issue_more;
33376 /* If no reservation, but reach here */
33377 if (recog_memoized (insn) < 0)
33378 return more;
33380 if (rs6000_sched_groups)
33382 if (is_microcoded_insn (insn))
33383 cached_can_issue_more = 0;
33384 else if (is_cracked_insn (insn))
33385 cached_can_issue_more = more > 2 ? more - 2 : 0;
33386 else
33387 cached_can_issue_more = more - 1;
33389 return cached_can_issue_more;
33392 if (rs6000_cpu_attr == CPU_CELL && is_nonpipeline_insn (insn))
33393 return 0;
33395 cached_can_issue_more = more - 1;
33396 return cached_can_issue_more;
33399 static int
33400 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
33402 int r = rs6000_variable_issue_1 (insn, more);
33403 if (verbose)
33404 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
33405 return r;
33408 /* Adjust the cost of a scheduling dependency. Return the new cost of
33409 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
33411 static int
33412 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
33413 unsigned int)
33415 enum attr_type attr_type;
33417 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
33418 return cost;
33420 switch (dep_type)
33422 case REG_DEP_TRUE:
33424 /* Data dependency; DEP_INSN writes a register that INSN reads
33425 some cycles later. */
33427 /* Separate a load from a narrower, dependent store. */
33428 if ((rs6000_sched_groups || rs6000_cpu_attr == CPU_POWER9)
33429 && GET_CODE (PATTERN (insn)) == SET
33430 && GET_CODE (PATTERN (dep_insn)) == SET
33431 && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
33432 && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
33433 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
33434 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
33435 return cost + 14;
33437 attr_type = get_attr_type (insn);
33439 switch (attr_type)
33441 case TYPE_JMPREG:
33442 /* Tell the first scheduling pass about the latency between
33443 a mtctr and bctr (and mtlr and br/blr). The first
33444 scheduling pass will not know about this latency since
33445 the mtctr instruction, which has the latency associated
33446 to it, will be generated by reload. */
33447 return 4;
33448 case TYPE_BRANCH:
33449 /* Leave some extra cycles between a compare and its
33450 dependent branch, to inhibit expensive mispredicts. */
33451 if ((rs6000_cpu_attr == CPU_PPC603
33452 || rs6000_cpu_attr == CPU_PPC604
33453 || rs6000_cpu_attr == CPU_PPC604E
33454 || rs6000_cpu_attr == CPU_PPC620
33455 || rs6000_cpu_attr == CPU_PPC630
33456 || rs6000_cpu_attr == CPU_PPC750
33457 || rs6000_cpu_attr == CPU_PPC7400
33458 || rs6000_cpu_attr == CPU_PPC7450
33459 || rs6000_cpu_attr == CPU_PPCE5500
33460 || rs6000_cpu_attr == CPU_PPCE6500
33461 || rs6000_cpu_attr == CPU_POWER4
33462 || rs6000_cpu_attr == CPU_POWER5
33463 || rs6000_cpu_attr == CPU_POWER7
33464 || rs6000_cpu_attr == CPU_POWER8
33465 || rs6000_cpu_attr == CPU_POWER9
33466 || rs6000_cpu_attr == CPU_CELL)
33467 && recog_memoized (dep_insn)
33468 && (INSN_CODE (dep_insn) >= 0))
33470 switch (get_attr_type (dep_insn))
33472 case TYPE_CMP:
33473 case TYPE_FPCOMPARE:
33474 case TYPE_CR_LOGICAL:
33475 case TYPE_DELAYED_CR:
33476 return cost + 2;
33477 case TYPE_EXTS:
33478 case TYPE_MUL:
33479 if (get_attr_dot (dep_insn) == DOT_YES)
33480 return cost + 2;
33481 else
33482 break;
33483 case TYPE_SHIFT:
33484 if (get_attr_dot (dep_insn) == DOT_YES
33485 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
33486 return cost + 2;
33487 else
33488 break;
33489 default:
33490 break;
33492 break;
33494 case TYPE_STORE:
33495 case TYPE_FPSTORE:
33496 if ((rs6000_cpu == PROCESSOR_POWER6)
33497 && recog_memoized (dep_insn)
33498 && (INSN_CODE (dep_insn) >= 0))
33501 if (GET_CODE (PATTERN (insn)) != SET)
33502 /* If this happens, we have to extend this to schedule
33503 optimally. Return default for now. */
33504 return cost;
33506 /* Adjust the cost for the case where the value written
33507 by a fixed point operation is used as the address
33508 gen value on a store. */
33509 switch (get_attr_type (dep_insn))
33511 case TYPE_LOAD:
33512 case TYPE_CNTLZ:
33514 if (! rs6000_store_data_bypass_p (dep_insn, insn))
33515 return get_attr_sign_extend (dep_insn)
33516 == SIGN_EXTEND_YES ? 6 : 4;
33517 break;
33519 case TYPE_SHIFT:
33521 if (! rs6000_store_data_bypass_p (dep_insn, insn))
33522 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
33523 6 : 3;
33524 break;
33526 case TYPE_INTEGER:
33527 case TYPE_ADD:
33528 case TYPE_LOGICAL:
33529 case TYPE_EXTS:
33530 case TYPE_INSERT:
33532 if (! rs6000_store_data_bypass_p (dep_insn, insn))
33533 return 3;
33534 break;
33536 case TYPE_STORE:
33537 case TYPE_FPLOAD:
33538 case TYPE_FPSTORE:
33540 if (get_attr_update (dep_insn) == UPDATE_YES
33541 && ! rs6000_store_data_bypass_p (dep_insn, insn))
33542 return 3;
33543 break;
33545 case TYPE_MUL:
33547 if (! rs6000_store_data_bypass_p (dep_insn, insn))
33548 return 17;
33549 break;
33551 case TYPE_DIV:
33553 if (! rs6000_store_data_bypass_p (dep_insn, insn))
33554 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
33555 break;
33557 default:
33558 break;
33561 break;
33563 case TYPE_LOAD:
33564 if ((rs6000_cpu == PROCESSOR_POWER6)
33565 && recog_memoized (dep_insn)
33566 && (INSN_CODE (dep_insn) >= 0))
33569 /* Adjust the cost for the case where the value written
33570 by a fixed point instruction is used within the address
33571 gen portion of a subsequent load(u)(x) */
33572 switch (get_attr_type (dep_insn))
33574 case TYPE_LOAD:
33575 case TYPE_CNTLZ:
33577 if (set_to_load_agen (dep_insn, insn))
33578 return get_attr_sign_extend (dep_insn)
33579 == SIGN_EXTEND_YES ? 6 : 4;
33580 break;
33582 case TYPE_SHIFT:
33584 if (set_to_load_agen (dep_insn, insn))
33585 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
33586 6 : 3;
33587 break;
33589 case TYPE_INTEGER:
33590 case TYPE_ADD:
33591 case TYPE_LOGICAL:
33592 case TYPE_EXTS:
33593 case TYPE_INSERT:
33595 if (set_to_load_agen (dep_insn, insn))
33596 return 3;
33597 break;
33599 case TYPE_STORE:
33600 case TYPE_FPLOAD:
33601 case TYPE_FPSTORE:
33603 if (get_attr_update (dep_insn) == UPDATE_YES
33604 && set_to_load_agen (dep_insn, insn))
33605 return 3;
33606 break;
33608 case TYPE_MUL:
33610 if (set_to_load_agen (dep_insn, insn))
33611 return 17;
33612 break;
33614 case TYPE_DIV:
33616 if (set_to_load_agen (dep_insn, insn))
33617 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
33618 break;
33620 default:
33621 break;
33624 break;
33626 case TYPE_FPLOAD:
33627 if ((rs6000_cpu == PROCESSOR_POWER6)
33628 && get_attr_update (insn) == UPDATE_NO
33629 && recog_memoized (dep_insn)
33630 && (INSN_CODE (dep_insn) >= 0)
33631 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
33632 return 2;
33634 default:
33635 break;
33638 /* Fall out to return default cost. */
33640 break;
33642 case REG_DEP_OUTPUT:
33643 /* Output dependency; DEP_INSN writes a register that INSN writes some
33644 cycles later. */
33645 if ((rs6000_cpu == PROCESSOR_POWER6)
33646 && recog_memoized (dep_insn)
33647 && (INSN_CODE (dep_insn) >= 0))
33649 attr_type = get_attr_type (insn);
33651 switch (attr_type)
33653 case TYPE_FP:
33654 case TYPE_FPSIMPLE:
33655 if (get_attr_type (dep_insn) == TYPE_FP
33656 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
33657 return 1;
33658 break;
33659 case TYPE_FPLOAD:
33660 if (get_attr_update (insn) == UPDATE_NO
33661 && get_attr_type (dep_insn) == TYPE_MFFGPR)
33662 return 2;
33663 break;
33664 default:
33665 break;
33668 /* Fall through, no cost for output dependency. */
33669 /* FALLTHRU */
33671 case REG_DEP_ANTI:
33672 /* Anti dependency; DEP_INSN reads a register that INSN writes some
33673 cycles later. */
33674 return 0;
33676 default:
33677 gcc_unreachable ();
33680 return cost;
33683 /* Debug version of rs6000_adjust_cost. */
33685 static int
33686 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
33687 int cost, unsigned int dw)
33689 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
33691 if (ret != cost)
33693 const char *dep;
33695 switch (dep_type)
33697 default: dep = "unknown depencency"; break;
33698 case REG_DEP_TRUE: dep = "data dependency"; break;
33699 case REG_DEP_OUTPUT: dep = "output dependency"; break;
33700 case REG_DEP_ANTI: dep = "anti depencency"; break;
33703 fprintf (stderr,
33704 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
33705 "%s, insn:\n", ret, cost, dep);
33707 debug_rtx (insn);
33710 return ret;
33713 /* The function returns a true if INSN is microcoded.
33714 Return false otherwise. */
33716 static bool
33717 is_microcoded_insn (rtx_insn *insn)
33719 if (!insn || !NONDEBUG_INSN_P (insn)
33720 || GET_CODE (PATTERN (insn)) == USE
33721 || GET_CODE (PATTERN (insn)) == CLOBBER)
33722 return false;
33724 if (rs6000_cpu_attr == CPU_CELL)
33725 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
33727 if (rs6000_sched_groups
33728 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
33730 enum attr_type type = get_attr_type (insn);
33731 if ((type == TYPE_LOAD
33732 && get_attr_update (insn) == UPDATE_YES
33733 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
33734 || ((type == TYPE_LOAD || type == TYPE_STORE)
33735 && get_attr_update (insn) == UPDATE_YES
33736 && get_attr_indexed (insn) == INDEXED_YES)
33737 || type == TYPE_MFCR)
33738 return true;
33741 return false;
33744 /* The function returns true if INSN is cracked into 2 instructions
33745 by the processor (and therefore occupies 2 issue slots). */
33747 static bool
33748 is_cracked_insn (rtx_insn *insn)
33750 if (!insn || !NONDEBUG_INSN_P (insn)
33751 || GET_CODE (PATTERN (insn)) == USE
33752 || GET_CODE (PATTERN (insn)) == CLOBBER)
33753 return false;
33755 if (rs6000_sched_groups
33756 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
33758 enum attr_type type = get_attr_type (insn);
33759 if ((type == TYPE_LOAD
33760 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
33761 && get_attr_update (insn) == UPDATE_NO)
33762 || (type == TYPE_LOAD
33763 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
33764 && get_attr_update (insn) == UPDATE_YES
33765 && get_attr_indexed (insn) == INDEXED_NO)
33766 || (type == TYPE_STORE
33767 && get_attr_update (insn) == UPDATE_YES
33768 && get_attr_indexed (insn) == INDEXED_NO)
33769 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
33770 && get_attr_update (insn) == UPDATE_YES)
33771 || type == TYPE_DELAYED_CR
33772 || (type == TYPE_EXTS
33773 && get_attr_dot (insn) == DOT_YES)
33774 || (type == TYPE_SHIFT
33775 && get_attr_dot (insn) == DOT_YES
33776 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
33777 || (type == TYPE_MUL
33778 && get_attr_dot (insn) == DOT_YES)
33779 || type == TYPE_DIV
33780 || (type == TYPE_INSERT
33781 && get_attr_size (insn) == SIZE_32))
33782 return true;
33785 return false;
33788 /* The function returns true if INSN can be issued only from
33789 the branch slot. */
33791 static bool
33792 is_branch_slot_insn (rtx_insn *insn)
33794 if (!insn || !NONDEBUG_INSN_P (insn)
33795 || GET_CODE (PATTERN (insn)) == USE
33796 || GET_CODE (PATTERN (insn)) == CLOBBER)
33797 return false;
33799 if (rs6000_sched_groups)
33801 enum attr_type type = get_attr_type (insn);
33802 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
33803 return true;
33804 return false;
33807 return false;
33810 /* The function returns true if out_inst sets a value that is
33811 used in the address generation computation of in_insn */
33812 static bool
33813 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
33815 rtx out_set, in_set;
33817 /* For performance reasons, only handle the simple case where
33818 both loads are a single_set. */
33819 out_set = single_set (out_insn);
33820 if (out_set)
33822 in_set = single_set (in_insn);
33823 if (in_set)
33824 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
33827 return false;
33830 /* Try to determine base/offset/size parts of the given MEM.
33831 Return true if successful, false if all the values couldn't
33832 be determined.
33834 This function only looks for REG or REG+CONST address forms.
33835 REG+REG address form will return false. */
33837 static bool
33838 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
33839 HOST_WIDE_INT *size)
33841 rtx addr_rtx;
33842 if MEM_SIZE_KNOWN_P (mem)
33843 *size = MEM_SIZE (mem);
33844 else
33845 return false;
33847 addr_rtx = (XEXP (mem, 0));
33848 if (GET_CODE (addr_rtx) == PRE_MODIFY)
33849 addr_rtx = XEXP (addr_rtx, 1);
33851 *offset = 0;
33852 while (GET_CODE (addr_rtx) == PLUS
33853 && CONST_INT_P (XEXP (addr_rtx, 1)))
33855 *offset += INTVAL (XEXP (addr_rtx, 1));
33856 addr_rtx = XEXP (addr_rtx, 0);
33858 if (!REG_P (addr_rtx))
33859 return false;
33861 *base = addr_rtx;
33862 return true;
33865 /* The function returns true if the target storage location of
33866 mem1 is adjacent to the target storage location of mem2 */
33867 /* Return 1 if memory locations are adjacent. */
33869 static bool
33870 adjacent_mem_locations (rtx mem1, rtx mem2)
33872 rtx reg1, reg2;
33873 HOST_WIDE_INT off1, size1, off2, size2;
33875 if (get_memref_parts (mem1, &reg1, &off1, &size1)
33876 && get_memref_parts (mem2, &reg2, &off2, &size2))
33877 return ((REGNO (reg1) == REGNO (reg2))
33878 && ((off1 + size1 == off2)
33879 || (off2 + size2 == off1)));
33881 return false;
33884 /* This function returns true if it can be determined that the two MEM
33885 locations overlap by at least 1 byte based on base reg/offset/size. */
33887 static bool
33888 mem_locations_overlap (rtx mem1, rtx mem2)
33890 rtx reg1, reg2;
33891 HOST_WIDE_INT off1, size1, off2, size2;
33893 if (get_memref_parts (mem1, &reg1, &off1, &size1)
33894 && get_memref_parts (mem2, &reg2, &off2, &size2))
33895 return ((REGNO (reg1) == REGNO (reg2))
33896 && (((off1 <= off2) && (off1 + size1 > off2))
33897 || ((off2 <= off1) && (off2 + size2 > off1))));
33899 return false;
33902 /* A C statement (sans semicolon) to update the integer scheduling
33903 priority INSN_PRIORITY (INSN). Increase the priority to execute the
33904 INSN earlier, reduce the priority to execute INSN later. Do not
33905 define this macro if you do not need to adjust the scheduling
33906 priorities of insns. */
33908 static int
33909 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
33911 rtx load_mem, str_mem;
33912 /* On machines (like the 750) which have asymmetric integer units,
33913 where one integer unit can do multiply and divides and the other
33914 can't, reduce the priority of multiply/divide so it is scheduled
33915 before other integer operations. */
33917 #if 0
33918 if (! INSN_P (insn))
33919 return priority;
33921 if (GET_CODE (PATTERN (insn)) == USE)
33922 return priority;
33924 switch (rs6000_cpu_attr) {
33925 case CPU_PPC750:
33926 switch (get_attr_type (insn))
33928 default:
33929 break;
33931 case TYPE_MUL:
33932 case TYPE_DIV:
33933 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
33934 priority, priority);
33935 if (priority >= 0 && priority < 0x01000000)
33936 priority >>= 3;
33937 break;
33940 #endif
33942 if (insn_must_be_first_in_group (insn)
33943 && reload_completed
33944 && current_sched_info->sched_max_insns_priority
33945 && rs6000_sched_restricted_insns_priority)
33948 /* Prioritize insns that can be dispatched only in the first
33949 dispatch slot. */
33950 if (rs6000_sched_restricted_insns_priority == 1)
33951 /* Attach highest priority to insn. This means that in
33952 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
33953 precede 'priority' (critical path) considerations. */
33954 return current_sched_info->sched_max_insns_priority;
33955 else if (rs6000_sched_restricted_insns_priority == 2)
33956 /* Increase priority of insn by a minimal amount. This means that in
33957 haifa-sched.c:ready_sort(), only 'priority' (critical path)
33958 considerations precede dispatch-slot restriction considerations. */
33959 return (priority + 1);
33962 if (rs6000_cpu == PROCESSOR_POWER6
33963 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
33964 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
33965 /* Attach highest priority to insn if the scheduler has just issued two
33966 stores and this instruction is a load, or two loads and this instruction
33967 is a store. Power6 wants loads and stores scheduled alternately
33968 when possible */
33969 return current_sched_info->sched_max_insns_priority;
33971 return priority;
33974 /* Return true if the instruction is nonpipelined on the Cell. */
33975 static bool
33976 is_nonpipeline_insn (rtx_insn *insn)
33978 enum attr_type type;
33979 if (!insn || !NONDEBUG_INSN_P (insn)
33980 || GET_CODE (PATTERN (insn)) == USE
33981 || GET_CODE (PATTERN (insn)) == CLOBBER)
33982 return false;
33984 type = get_attr_type (insn);
33985 if (type == TYPE_MUL
33986 || type == TYPE_DIV
33987 || type == TYPE_SDIV
33988 || type == TYPE_DDIV
33989 || type == TYPE_SSQRT
33990 || type == TYPE_DSQRT
33991 || type == TYPE_MFCR
33992 || type == TYPE_MFCRF
33993 || type == TYPE_MFJMPR)
33995 return true;
33997 return false;
34001 /* Return how many instructions the machine can issue per cycle. */
34003 static int
34004 rs6000_issue_rate (void)
34006 /* Unless scheduling for register pressure, use issue rate of 1 for
34007 first scheduling pass to decrease degradation. */
34008 if (!reload_completed && !flag_sched_pressure)
34009 return 1;
34011 switch (rs6000_cpu_attr) {
34012 case CPU_RS64A:
34013 case CPU_PPC601: /* ? */
34014 case CPU_PPC7450:
34015 return 3;
34016 case CPU_PPC440:
34017 case CPU_PPC603:
34018 case CPU_PPC750:
34019 case CPU_PPC7400:
34020 case CPU_PPC8540:
34021 case CPU_PPC8548:
34022 case CPU_CELL:
34023 case CPU_PPCE300C2:
34024 case CPU_PPCE300C3:
34025 case CPU_PPCE500MC:
34026 case CPU_PPCE500MC64:
34027 case CPU_PPCE5500:
34028 case CPU_PPCE6500:
34029 case CPU_TITAN:
34030 return 2;
34031 case CPU_PPC476:
34032 case CPU_PPC604:
34033 case CPU_PPC604E:
34034 case CPU_PPC620:
34035 case CPU_PPC630:
34036 return 4;
34037 case CPU_POWER4:
34038 case CPU_POWER5:
34039 case CPU_POWER6:
34040 case CPU_POWER7:
34041 return 5;
34042 case CPU_POWER8:
34043 return 7;
34044 case CPU_POWER9:
34045 return 6;
34046 default:
34047 return 1;
34051 /* Return how many instructions to look ahead for better insn
34052 scheduling. */
34054 static int
34055 rs6000_use_sched_lookahead (void)
34057 switch (rs6000_cpu_attr)
34059 case CPU_PPC8540:
34060 case CPU_PPC8548:
34061 return 4;
34063 case CPU_CELL:
34064 return (reload_completed ? 8 : 0);
34066 default:
34067 return 0;
34071 /* We are choosing insn from the ready queue. Return zero if INSN can be
34072 chosen. */
34073 static int
34074 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
34076 if (ready_index == 0)
34077 return 0;
34079 if (rs6000_cpu_attr != CPU_CELL)
34080 return 0;
34082 gcc_assert (insn != NULL_RTX && INSN_P (insn));
34084 if (!reload_completed
34085 || is_nonpipeline_insn (insn)
34086 || is_microcoded_insn (insn))
34087 return 1;
34089 return 0;
34092 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
34093 and return true. */
34095 static bool
34096 find_mem_ref (rtx pat, rtx *mem_ref)
34098 const char * fmt;
34099 int i, j;
34101 /* stack_tie does not produce any real memory traffic. */
34102 if (tie_operand (pat, VOIDmode))
34103 return false;
34105 if (GET_CODE (pat) == MEM)
34107 *mem_ref = pat;
34108 return true;
34111 /* Recursively process the pattern. */
34112 fmt = GET_RTX_FORMAT (GET_CODE (pat));
34114 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
34116 if (fmt[i] == 'e')
34118 if (find_mem_ref (XEXP (pat, i), mem_ref))
34119 return true;
34121 else if (fmt[i] == 'E')
34122 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
34124 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
34125 return true;
34129 return false;
34132 /* Determine if PAT is a PATTERN of a load insn. */
34134 static bool
34135 is_load_insn1 (rtx pat, rtx *load_mem)
34137 if (!pat || pat == NULL_RTX)
34138 return false;
34140 if (GET_CODE (pat) == SET)
34141 return find_mem_ref (SET_SRC (pat), load_mem);
34143 if (GET_CODE (pat) == PARALLEL)
34145 int i;
34147 for (i = 0; i < XVECLEN (pat, 0); i++)
34148 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
34149 return true;
34152 return false;
34155 /* Determine if INSN loads from memory. */
34157 static bool
34158 is_load_insn (rtx insn, rtx *load_mem)
34160 if (!insn || !INSN_P (insn))
34161 return false;
34163 if (CALL_P (insn))
34164 return false;
34166 return is_load_insn1 (PATTERN (insn), load_mem);
34169 /* Determine if PAT is a PATTERN of a store insn. */
34171 static bool
34172 is_store_insn1 (rtx pat, rtx *str_mem)
34174 if (!pat || pat == NULL_RTX)
34175 return false;
34177 if (GET_CODE (pat) == SET)
34178 return find_mem_ref (SET_DEST (pat), str_mem);
34180 if (GET_CODE (pat) == PARALLEL)
34182 int i;
34184 for (i = 0; i < XVECLEN (pat, 0); i++)
34185 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
34186 return true;
34189 return false;
34192 /* Determine if INSN stores to memory. */
34194 static bool
34195 is_store_insn (rtx insn, rtx *str_mem)
34197 if (!insn || !INSN_P (insn))
34198 return false;
34200 return is_store_insn1 (PATTERN (insn), str_mem);
34203 /* Return whether TYPE is a Power9 pairable vector instruction type. */
34205 static bool
34206 is_power9_pairable_vec_type (enum attr_type type)
34208 switch (type)
34210 case TYPE_VECSIMPLE:
34211 case TYPE_VECCOMPLEX:
34212 case TYPE_VECDIV:
34213 case TYPE_VECCMP:
34214 case TYPE_VECPERM:
34215 case TYPE_VECFLOAT:
34216 case TYPE_VECFDIV:
34217 case TYPE_VECDOUBLE:
34218 return true;
34219 default:
34220 break;
34222 return false;
34225 /* Returns whether the dependence between INSN and NEXT is considered
34226 costly by the given target. */
34228 static bool
34229 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
34231 rtx insn;
34232 rtx next;
34233 rtx load_mem, str_mem;
34235 /* If the flag is not enabled - no dependence is considered costly;
34236 allow all dependent insns in the same group.
34237 This is the most aggressive option. */
34238 if (rs6000_sched_costly_dep == no_dep_costly)
34239 return false;
34241 /* If the flag is set to 1 - a dependence is always considered costly;
34242 do not allow dependent instructions in the same group.
34243 This is the most conservative option. */
34244 if (rs6000_sched_costly_dep == all_deps_costly)
34245 return true;
34247 insn = DEP_PRO (dep);
34248 next = DEP_CON (dep);
34250 if (rs6000_sched_costly_dep == store_to_load_dep_costly
34251 && is_load_insn (next, &load_mem)
34252 && is_store_insn (insn, &str_mem))
34253 /* Prevent load after store in the same group. */
34254 return true;
34256 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
34257 && is_load_insn (next, &load_mem)
34258 && is_store_insn (insn, &str_mem)
34259 && DEP_TYPE (dep) == REG_DEP_TRUE
34260 && mem_locations_overlap(str_mem, load_mem))
34261 /* Prevent load after store in the same group if it is a true
34262 dependence. */
34263 return true;
34265 /* The flag is set to X; dependences with latency >= X are considered costly,
34266 and will not be scheduled in the same group. */
34267 if (rs6000_sched_costly_dep <= max_dep_latency
34268 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
34269 return true;
34271 return false;
34274 /* Return the next insn after INSN that is found before TAIL is reached,
34275 skipping any "non-active" insns - insns that will not actually occupy
34276 an issue slot. Return NULL_RTX if such an insn is not found. */
34278 static rtx_insn *
34279 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
34281 if (insn == NULL_RTX || insn == tail)
34282 return NULL;
34284 while (1)
34286 insn = NEXT_INSN (insn);
34287 if (insn == NULL_RTX || insn == tail)
34288 return NULL;
34290 if (CALL_P (insn)
34291 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
34292 || (NONJUMP_INSN_P (insn)
34293 && GET_CODE (PATTERN (insn)) != USE
34294 && GET_CODE (PATTERN (insn)) != CLOBBER
34295 && INSN_CODE (insn) != CODE_FOR_stack_tie))
34296 break;
34298 return insn;
34301 /* Do Power9 specific sched_reorder2 reordering of ready list. */
34303 static int
34304 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
34306 int pos;
34307 int i;
34308 rtx_insn *tmp;
34309 enum attr_type type, type2;
34311 type = get_attr_type (last_scheduled_insn);
34313 /* Try to issue fixed point divides back-to-back in pairs so they will be
34314 routed to separate execution units and execute in parallel. */
34315 if (type == TYPE_DIV && divide_cnt == 0)
34317 /* First divide has been scheduled. */
34318 divide_cnt = 1;
34320 /* Scan the ready list looking for another divide, if found move it
34321 to the end of the list so it is chosen next. */
34322 pos = lastpos;
34323 while (pos >= 0)
34325 if (recog_memoized (ready[pos]) >= 0
34326 && get_attr_type (ready[pos]) == TYPE_DIV)
34328 tmp = ready[pos];
34329 for (i = pos; i < lastpos; i++)
34330 ready[i] = ready[i + 1];
34331 ready[lastpos] = tmp;
34332 break;
34334 pos--;
34337 else
34339 /* Last insn was the 2nd divide or not a divide, reset the counter. */
34340 divide_cnt = 0;
34342 /* The best dispatch throughput for vector and vector load insns can be
34343 achieved by interleaving a vector and vector load such that they'll
34344 dispatch to the same superslice. If this pairing cannot be achieved
34345 then it is best to pair vector insns together and vector load insns
34346 together.
34348 To aid in this pairing, vec_pairing maintains the current state with
34349 the following values:
34351 0 : Initial state, no vecload/vector pairing has been started.
34353 1 : A vecload or vector insn has been issued and a candidate for
34354 pairing has been found and moved to the end of the ready
34355 list. */
34356 if (type == TYPE_VECLOAD)
34358 /* Issued a vecload. */
34359 if (vec_pairing == 0)
34361 int vecload_pos = -1;
34362 /* We issued a single vecload, look for a vector insn to pair it
34363 with. If one isn't found, try to pair another vecload. */
34364 pos = lastpos;
34365 while (pos >= 0)
34367 if (recog_memoized (ready[pos]) >= 0)
34369 type2 = get_attr_type (ready[pos]);
34370 if (is_power9_pairable_vec_type (type2))
34372 /* Found a vector insn to pair with, move it to the
34373 end of the ready list so it is scheduled next. */
34374 tmp = ready[pos];
34375 for (i = pos; i < lastpos; i++)
34376 ready[i] = ready[i + 1];
34377 ready[lastpos] = tmp;
34378 vec_pairing = 1;
34379 return cached_can_issue_more;
34381 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
34382 /* Remember position of first vecload seen. */
34383 vecload_pos = pos;
34385 pos--;
34387 if (vecload_pos >= 0)
34389 /* Didn't find a vector to pair with but did find a vecload,
34390 move it to the end of the ready list. */
34391 tmp = ready[vecload_pos];
34392 for (i = vecload_pos; i < lastpos; i++)
34393 ready[i] = ready[i + 1];
34394 ready[lastpos] = tmp;
34395 vec_pairing = 1;
34396 return cached_can_issue_more;
34400 else if (is_power9_pairable_vec_type (type))
34402 /* Issued a vector operation. */
34403 if (vec_pairing == 0)
34405 int vec_pos = -1;
34406 /* We issued a single vector insn, look for a vecload to pair it
34407 with. If one isn't found, try to pair another vector. */
34408 pos = lastpos;
34409 while (pos >= 0)
34411 if (recog_memoized (ready[pos]) >= 0)
34413 type2 = get_attr_type (ready[pos]);
34414 if (type2 == TYPE_VECLOAD)
34416 /* Found a vecload insn to pair with, move it to the
34417 end of the ready list so it is scheduled next. */
34418 tmp = ready[pos];
34419 for (i = pos; i < lastpos; i++)
34420 ready[i] = ready[i + 1];
34421 ready[lastpos] = tmp;
34422 vec_pairing = 1;
34423 return cached_can_issue_more;
34425 else if (is_power9_pairable_vec_type (type2)
34426 && vec_pos == -1)
34427 /* Remember position of first vector insn seen. */
34428 vec_pos = pos;
34430 pos--;
34432 if (vec_pos >= 0)
34434 /* Didn't find a vecload to pair with but did find a vector
34435 insn, move it to the end of the ready list. */
34436 tmp = ready[vec_pos];
34437 for (i = vec_pos; i < lastpos; i++)
34438 ready[i] = ready[i + 1];
34439 ready[lastpos] = tmp;
34440 vec_pairing = 1;
34441 return cached_can_issue_more;
34446 /* We've either finished a vec/vecload pair, couldn't find an insn to
34447 continue the current pair, or the last insn had nothing to do with
34448 with pairing. In any case, reset the state. */
34449 vec_pairing = 0;
34452 return cached_can_issue_more;
34455 /* We are about to begin issuing insns for this clock cycle. */
34457 static int
34458 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
34459 rtx_insn **ready ATTRIBUTE_UNUSED,
34460 int *pn_ready ATTRIBUTE_UNUSED,
34461 int clock_var ATTRIBUTE_UNUSED)
34463 int n_ready = *pn_ready;
34465 if (sched_verbose)
34466 fprintf (dump, "// rs6000_sched_reorder :\n");
34468 /* Reorder the ready list, if the second to last ready insn
34469 is a nonepipeline insn. */
34470 if (rs6000_cpu_attr == CPU_CELL && n_ready > 1)
34472 if (is_nonpipeline_insn (ready[n_ready - 1])
34473 && (recog_memoized (ready[n_ready - 2]) > 0))
34474 /* Simply swap first two insns. */
34475 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
34478 if (rs6000_cpu == PROCESSOR_POWER6)
34479 load_store_pendulum = 0;
34481 return rs6000_issue_rate ();
34484 /* Like rs6000_sched_reorder, but called after issuing each insn. */
34486 static int
34487 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
34488 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
34490 if (sched_verbose)
34491 fprintf (dump, "// rs6000_sched_reorder2 :\n");
34493 /* For Power6, we need to handle some special cases to try and keep the
34494 store queue from overflowing and triggering expensive flushes.
34496 This code monitors how load and store instructions are being issued
34497 and skews the ready list one way or the other to increase the likelihood
34498 that a desired instruction is issued at the proper time.
34500 A couple of things are done. First, we maintain a "load_store_pendulum"
34501 to track the current state of load/store issue.
34503 - If the pendulum is at zero, then no loads or stores have been
34504 issued in the current cycle so we do nothing.
34506 - If the pendulum is 1, then a single load has been issued in this
34507 cycle and we attempt to locate another load in the ready list to
34508 issue with it.
34510 - If the pendulum is -2, then two stores have already been
34511 issued in this cycle, so we increase the priority of the first load
34512 in the ready list to increase it's likelihood of being chosen first
34513 in the next cycle.
34515 - If the pendulum is -1, then a single store has been issued in this
34516 cycle and we attempt to locate another store in the ready list to
34517 issue with it, preferring a store to an adjacent memory location to
34518 facilitate store pairing in the store queue.
34520 - If the pendulum is 2, then two loads have already been
34521 issued in this cycle, so we increase the priority of the first store
34522 in the ready list to increase it's likelihood of being chosen first
34523 in the next cycle.
34525 - If the pendulum < -2 or > 2, then do nothing.
34527 Note: This code covers the most common scenarios. There exist non
34528 load/store instructions which make use of the LSU and which
34529 would need to be accounted for to strictly model the behavior
34530 of the machine. Those instructions are currently unaccounted
34531 for to help minimize compile time overhead of this code.
34533 if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn)
34535 int pos;
34536 int i;
34537 rtx_insn *tmp;
34538 rtx load_mem, str_mem;
34540 if (is_store_insn (last_scheduled_insn, &str_mem))
34541 /* Issuing a store, swing the load_store_pendulum to the left */
34542 load_store_pendulum--;
34543 else if (is_load_insn (last_scheduled_insn, &load_mem))
34544 /* Issuing a load, swing the load_store_pendulum to the right */
34545 load_store_pendulum++;
34546 else
34547 return cached_can_issue_more;
34549 /* If the pendulum is balanced, or there is only one instruction on
34550 the ready list, then all is well, so return. */
34551 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
34552 return cached_can_issue_more;
34554 if (load_store_pendulum == 1)
34556 /* A load has been issued in this cycle. Scan the ready list
34557 for another load to issue with it */
34558 pos = *pn_ready-1;
34560 while (pos >= 0)
34562 if (is_load_insn (ready[pos], &load_mem))
34564 /* Found a load. Move it to the head of the ready list,
34565 and adjust it's priority so that it is more likely to
34566 stay there */
34567 tmp = ready[pos];
34568 for (i=pos; i<*pn_ready-1; i++)
34569 ready[i] = ready[i + 1];
34570 ready[*pn_ready-1] = tmp;
34572 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
34573 INSN_PRIORITY (tmp)++;
34574 break;
34576 pos--;
34579 else if (load_store_pendulum == -2)
34581 /* Two stores have been issued in this cycle. Increase the
34582 priority of the first load in the ready list to favor it for
34583 issuing in the next cycle. */
34584 pos = *pn_ready-1;
34586 while (pos >= 0)
34588 if (is_load_insn (ready[pos], &load_mem)
34589 && !sel_sched_p ()
34590 && INSN_PRIORITY_KNOWN (ready[pos]))
34592 INSN_PRIORITY (ready[pos])++;
34594 /* Adjust the pendulum to account for the fact that a load
34595 was found and increased in priority. This is to prevent
34596 increasing the priority of multiple loads */
34597 load_store_pendulum--;
34599 break;
34601 pos--;
34604 else if (load_store_pendulum == -1)
34606 /* A store has been issued in this cycle. Scan the ready list for
34607 another store to issue with it, preferring a store to an adjacent
34608 memory location */
34609 int first_store_pos = -1;
34611 pos = *pn_ready-1;
34613 while (pos >= 0)
34615 if (is_store_insn (ready[pos], &str_mem))
34617 rtx str_mem2;
34618 /* Maintain the index of the first store found on the
34619 list */
34620 if (first_store_pos == -1)
34621 first_store_pos = pos;
34623 if (is_store_insn (last_scheduled_insn, &str_mem2)
34624 && adjacent_mem_locations (str_mem, str_mem2))
34626 /* Found an adjacent store. Move it to the head of the
34627 ready list, and adjust it's priority so that it is
34628 more likely to stay there */
34629 tmp = ready[pos];
34630 for (i=pos; i<*pn_ready-1; i++)
34631 ready[i] = ready[i + 1];
34632 ready[*pn_ready-1] = tmp;
34634 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
34635 INSN_PRIORITY (tmp)++;
34637 first_store_pos = -1;
34639 break;
34642 pos--;
34645 if (first_store_pos >= 0)
34647 /* An adjacent store wasn't found, but a non-adjacent store was,
34648 so move the non-adjacent store to the front of the ready
34649 list, and adjust its priority so that it is more likely to
34650 stay there. */
34651 tmp = ready[first_store_pos];
34652 for (i=first_store_pos; i<*pn_ready-1; i++)
34653 ready[i] = ready[i + 1];
34654 ready[*pn_ready-1] = tmp;
34655 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
34656 INSN_PRIORITY (tmp)++;
34659 else if (load_store_pendulum == 2)
34661 /* Two loads have been issued in this cycle. Increase the priority
34662 of the first store in the ready list to favor it for issuing in
34663 the next cycle. */
34664 pos = *pn_ready-1;
34666 while (pos >= 0)
34668 if (is_store_insn (ready[pos], &str_mem)
34669 && !sel_sched_p ()
34670 && INSN_PRIORITY_KNOWN (ready[pos]))
34672 INSN_PRIORITY (ready[pos])++;
34674 /* Adjust the pendulum to account for the fact that a store
34675 was found and increased in priority. This is to prevent
34676 increasing the priority of multiple stores */
34677 load_store_pendulum++;
34679 break;
34681 pos--;
34686 /* Do Power9 dependent reordering if necessary. */
34687 if (rs6000_cpu == PROCESSOR_POWER9 && last_scheduled_insn
34688 && recog_memoized (last_scheduled_insn) >= 0)
34689 return power9_sched_reorder2 (ready, *pn_ready - 1);
34691 return cached_can_issue_more;
34694 /* Return whether the presence of INSN causes a dispatch group termination
34695 of group WHICH_GROUP.
34697 If WHICH_GROUP == current_group, this function will return true if INSN
34698 causes the termination of the current group (i.e, the dispatch group to
34699 which INSN belongs). This means that INSN will be the last insn in the
34700 group it belongs to.
34702 If WHICH_GROUP == previous_group, this function will return true if INSN
34703 causes the termination of the previous group (i.e, the dispatch group that
34704 precedes the group to which INSN belongs). This means that INSN will be
34705 the first insn in the group it belongs to). */
34707 static bool
34708 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
34710 bool first, last;
34712 if (! insn)
34713 return false;
34715 first = insn_must_be_first_in_group (insn);
34716 last = insn_must_be_last_in_group (insn);
34718 if (first && last)
34719 return true;
34721 if (which_group == current_group)
34722 return last;
34723 else if (which_group == previous_group)
34724 return first;
34726 return false;
34730 static bool
34731 insn_must_be_first_in_group (rtx_insn *insn)
34733 enum attr_type type;
34735 if (!insn
34736 || NOTE_P (insn)
34737 || DEBUG_INSN_P (insn)
34738 || GET_CODE (PATTERN (insn)) == USE
34739 || GET_CODE (PATTERN (insn)) == CLOBBER)
34740 return false;
34742 switch (rs6000_cpu)
34744 case PROCESSOR_POWER5:
34745 if (is_cracked_insn (insn))
34746 return true;
34747 /* FALLTHRU */
34748 case PROCESSOR_POWER4:
34749 if (is_microcoded_insn (insn))
34750 return true;
34752 if (!rs6000_sched_groups)
34753 return false;
34755 type = get_attr_type (insn);
34757 switch (type)
34759 case TYPE_MFCR:
34760 case TYPE_MFCRF:
34761 case TYPE_MTCR:
34762 case TYPE_DELAYED_CR:
34763 case TYPE_CR_LOGICAL:
34764 case TYPE_MTJMPR:
34765 case TYPE_MFJMPR:
34766 case TYPE_DIV:
34767 case TYPE_LOAD_L:
34768 case TYPE_STORE_C:
34769 case TYPE_ISYNC:
34770 case TYPE_SYNC:
34771 return true;
34772 default:
34773 break;
34775 break;
34776 case PROCESSOR_POWER6:
34777 type = get_attr_type (insn);
34779 switch (type)
34781 case TYPE_EXTS:
34782 case TYPE_CNTLZ:
34783 case TYPE_TRAP:
34784 case TYPE_MUL:
34785 case TYPE_INSERT:
34786 case TYPE_FPCOMPARE:
34787 case TYPE_MFCR:
34788 case TYPE_MTCR:
34789 case TYPE_MFJMPR:
34790 case TYPE_MTJMPR:
34791 case TYPE_ISYNC:
34792 case TYPE_SYNC:
34793 case TYPE_LOAD_L:
34794 case TYPE_STORE_C:
34795 return true;
34796 case TYPE_SHIFT:
34797 if (get_attr_dot (insn) == DOT_NO
34798 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
34799 return true;
34800 else
34801 break;
34802 case TYPE_DIV:
34803 if (get_attr_size (insn) == SIZE_32)
34804 return true;
34805 else
34806 break;
34807 case TYPE_LOAD:
34808 case TYPE_STORE:
34809 case TYPE_FPLOAD:
34810 case TYPE_FPSTORE:
34811 if (get_attr_update (insn) == UPDATE_YES)
34812 return true;
34813 else
34814 break;
34815 default:
34816 break;
34818 break;
34819 case PROCESSOR_POWER7:
34820 type = get_attr_type (insn);
34822 switch (type)
34824 case TYPE_CR_LOGICAL:
34825 case TYPE_MFCR:
34826 case TYPE_MFCRF:
34827 case TYPE_MTCR:
34828 case TYPE_DIV:
34829 case TYPE_ISYNC:
34830 case TYPE_LOAD_L:
34831 case TYPE_STORE_C:
34832 case TYPE_MFJMPR:
34833 case TYPE_MTJMPR:
34834 return true;
34835 case TYPE_MUL:
34836 case TYPE_SHIFT:
34837 case TYPE_EXTS:
34838 if (get_attr_dot (insn) == DOT_YES)
34839 return true;
34840 else
34841 break;
34842 case TYPE_LOAD:
34843 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
34844 || get_attr_update (insn) == UPDATE_YES)
34845 return true;
34846 else
34847 break;
34848 case TYPE_STORE:
34849 case TYPE_FPLOAD:
34850 case TYPE_FPSTORE:
34851 if (get_attr_update (insn) == UPDATE_YES)
34852 return true;
34853 else
34854 break;
34855 default:
34856 break;
34858 break;
34859 case PROCESSOR_POWER8:
34860 type = get_attr_type (insn);
34862 switch (type)
34864 case TYPE_CR_LOGICAL:
34865 case TYPE_DELAYED_CR:
34866 case TYPE_MFCR:
34867 case TYPE_MFCRF:
34868 case TYPE_MTCR:
34869 case TYPE_SYNC:
34870 case TYPE_ISYNC:
34871 case TYPE_LOAD_L:
34872 case TYPE_STORE_C:
34873 case TYPE_VECSTORE:
34874 case TYPE_MFJMPR:
34875 case TYPE_MTJMPR:
34876 return true;
34877 case TYPE_SHIFT:
34878 case TYPE_EXTS:
34879 case TYPE_MUL:
34880 if (get_attr_dot (insn) == DOT_YES)
34881 return true;
34882 else
34883 break;
34884 case TYPE_LOAD:
34885 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
34886 || get_attr_update (insn) == UPDATE_YES)
34887 return true;
34888 else
34889 break;
34890 case TYPE_STORE:
34891 if (get_attr_update (insn) == UPDATE_YES
34892 && get_attr_indexed (insn) == INDEXED_YES)
34893 return true;
34894 else
34895 break;
34896 default:
34897 break;
34899 break;
34900 default:
34901 break;
34904 return false;
34907 static bool
34908 insn_must_be_last_in_group (rtx_insn *insn)
34910 enum attr_type type;
34912 if (!insn
34913 || NOTE_P (insn)
34914 || DEBUG_INSN_P (insn)
34915 || GET_CODE (PATTERN (insn)) == USE
34916 || GET_CODE (PATTERN (insn)) == CLOBBER)
34917 return false;
34919 switch (rs6000_cpu) {
34920 case PROCESSOR_POWER4:
34921 case PROCESSOR_POWER5:
34922 if (is_microcoded_insn (insn))
34923 return true;
34925 if (is_branch_slot_insn (insn))
34926 return true;
34928 break;
34929 case PROCESSOR_POWER6:
34930 type = get_attr_type (insn);
34932 switch (type)
34934 case TYPE_EXTS:
34935 case TYPE_CNTLZ:
34936 case TYPE_TRAP:
34937 case TYPE_MUL:
34938 case TYPE_FPCOMPARE:
34939 case TYPE_MFCR:
34940 case TYPE_MTCR:
34941 case TYPE_MFJMPR:
34942 case TYPE_MTJMPR:
34943 case TYPE_ISYNC:
34944 case TYPE_SYNC:
34945 case TYPE_LOAD_L:
34946 case TYPE_STORE_C:
34947 return true;
34948 case TYPE_SHIFT:
34949 if (get_attr_dot (insn) == DOT_NO
34950 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
34951 return true;
34952 else
34953 break;
34954 case TYPE_DIV:
34955 if (get_attr_size (insn) == SIZE_32)
34956 return true;
34957 else
34958 break;
34959 default:
34960 break;
34962 break;
34963 case PROCESSOR_POWER7:
34964 type = get_attr_type (insn);
34966 switch (type)
34968 case TYPE_ISYNC:
34969 case TYPE_SYNC:
34970 case TYPE_LOAD_L:
34971 case TYPE_STORE_C:
34972 return true;
34973 case TYPE_LOAD:
34974 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
34975 && get_attr_update (insn) == UPDATE_YES)
34976 return true;
34977 else
34978 break;
34979 case TYPE_STORE:
34980 if (get_attr_update (insn) == UPDATE_YES
34981 && get_attr_indexed (insn) == INDEXED_YES)
34982 return true;
34983 else
34984 break;
34985 default:
34986 break;
34988 break;
34989 case PROCESSOR_POWER8:
34990 type = get_attr_type (insn);
34992 switch (type)
34994 case TYPE_MFCR:
34995 case TYPE_MTCR:
34996 case TYPE_ISYNC:
34997 case TYPE_SYNC:
34998 case TYPE_LOAD_L:
34999 case TYPE_STORE_C:
35000 return true;
35001 case TYPE_LOAD:
35002 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
35003 && get_attr_update (insn) == UPDATE_YES)
35004 return true;
35005 else
35006 break;
35007 case TYPE_STORE:
35008 if (get_attr_update (insn) == UPDATE_YES
35009 && get_attr_indexed (insn) == INDEXED_YES)
35010 return true;
35011 else
35012 break;
35013 default:
35014 break;
35016 break;
35017 default:
35018 break;
35021 return false;
35024 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
35025 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
35027 static bool
35028 is_costly_group (rtx *group_insns, rtx next_insn)
35030 int i;
35031 int issue_rate = rs6000_issue_rate ();
35033 for (i = 0; i < issue_rate; i++)
35035 sd_iterator_def sd_it;
35036 dep_t dep;
35037 rtx insn = group_insns[i];
35039 if (!insn)
35040 continue;
35042 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
35044 rtx next = DEP_CON (dep);
35046 if (next == next_insn
35047 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
35048 return true;
35052 return false;
35055 /* Utility of the function redefine_groups.
35056 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
35057 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
35058 to keep it "far" (in a separate group) from GROUP_INSNS, following
35059 one of the following schemes, depending on the value of the flag
35060 -minsert_sched_nops = X:
35061 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
35062 in order to force NEXT_INSN into a separate group.
35063 (2) X < sched_finish_regroup_exact: insert exactly X nops.
35064 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
35065 insertion (has a group just ended, how many vacant issue slots remain in the
35066 last group, and how many dispatch groups were encountered so far). */
35068 static int
35069 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
35070 rtx_insn *next_insn, bool *group_end, int can_issue_more,
35071 int *group_count)
35073 rtx nop;
35074 bool force;
35075 int issue_rate = rs6000_issue_rate ();
35076 bool end = *group_end;
35077 int i;
35079 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
35080 return can_issue_more;
35082 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
35083 return can_issue_more;
35085 force = is_costly_group (group_insns, next_insn);
35086 if (!force)
35087 return can_issue_more;
35089 if (sched_verbose > 6)
35090 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
35091 *group_count ,can_issue_more);
35093 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
35095 if (*group_end)
35096 can_issue_more = 0;
35098 /* Since only a branch can be issued in the last issue_slot, it is
35099 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
35100 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
35101 in this case the last nop will start a new group and the branch
35102 will be forced to the new group. */
35103 if (can_issue_more && !is_branch_slot_insn (next_insn))
35104 can_issue_more--;
35106 /* Do we have a special group ending nop? */
35107 if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
35108 || rs6000_cpu_attr == CPU_POWER8)
35110 nop = gen_group_ending_nop ();
35111 emit_insn_before (nop, next_insn);
35112 can_issue_more = 0;
35114 else
35115 while (can_issue_more > 0)
35117 nop = gen_nop ();
35118 emit_insn_before (nop, next_insn);
35119 can_issue_more--;
35122 *group_end = true;
35123 return 0;
35126 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
35128 int n_nops = rs6000_sched_insert_nops;
35130 /* Nops can't be issued from the branch slot, so the effective
35131 issue_rate for nops is 'issue_rate - 1'. */
35132 if (can_issue_more == 0)
35133 can_issue_more = issue_rate;
35134 can_issue_more--;
35135 if (can_issue_more == 0)
35137 can_issue_more = issue_rate - 1;
35138 (*group_count)++;
35139 end = true;
35140 for (i = 0; i < issue_rate; i++)
35142 group_insns[i] = 0;
35146 while (n_nops > 0)
35148 nop = gen_nop ();
35149 emit_insn_before (nop, next_insn);
35150 if (can_issue_more == issue_rate - 1) /* new group begins */
35151 end = false;
35152 can_issue_more--;
35153 if (can_issue_more == 0)
35155 can_issue_more = issue_rate - 1;
35156 (*group_count)++;
35157 end = true;
35158 for (i = 0; i < issue_rate; i++)
35160 group_insns[i] = 0;
35163 n_nops--;
35166 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
35167 can_issue_more++;
35169 /* Is next_insn going to start a new group? */
35170 *group_end
35171 = (end
35172 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
35173 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
35174 || (can_issue_more < issue_rate &&
35175 insn_terminates_group_p (next_insn, previous_group)));
35176 if (*group_end && end)
35177 (*group_count)--;
35179 if (sched_verbose > 6)
35180 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
35181 *group_count, can_issue_more);
35182 return can_issue_more;
35185 return can_issue_more;
35188 /* This function tries to synch the dispatch groups that the compiler "sees"
35189 with the dispatch groups that the processor dispatcher is expected to
35190 form in practice. It tries to achieve this synchronization by forcing the
35191 estimated processor grouping on the compiler (as opposed to the function
35192 'pad_goups' which tries to force the scheduler's grouping on the processor).
35194 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
35195 examines the (estimated) dispatch groups that will be formed by the processor
35196 dispatcher. It marks these group boundaries to reflect the estimated
35197 processor grouping, overriding the grouping that the scheduler had marked.
35198 Depending on the value of the flag '-minsert-sched-nops' this function can
35199 force certain insns into separate groups or force a certain distance between
35200 them by inserting nops, for example, if there exists a "costly dependence"
35201 between the insns.
35203 The function estimates the group boundaries that the processor will form as
35204 follows: It keeps track of how many vacant issue slots are available after
35205 each insn. A subsequent insn will start a new group if one of the following
35206 4 cases applies:
35207 - no more vacant issue slots remain in the current dispatch group.
35208 - only the last issue slot, which is the branch slot, is vacant, but the next
35209 insn is not a branch.
35210 - only the last 2 or less issue slots, including the branch slot, are vacant,
35211 which means that a cracked insn (which occupies two issue slots) can't be
35212 issued in this group.
35213 - less than 'issue_rate' slots are vacant, and the next insn always needs to
35214 start a new group. */
35216 static int
35217 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
35218 rtx_insn *tail)
35220 rtx_insn *insn, *next_insn;
35221 int issue_rate;
35222 int can_issue_more;
35223 int slot, i;
35224 bool group_end;
35225 int group_count = 0;
35226 rtx *group_insns;
35228 /* Initialize. */
35229 issue_rate = rs6000_issue_rate ();
35230 group_insns = XALLOCAVEC (rtx, issue_rate);
35231 for (i = 0; i < issue_rate; i++)
35233 group_insns[i] = 0;
35235 can_issue_more = issue_rate;
35236 slot = 0;
35237 insn = get_next_active_insn (prev_head_insn, tail);
35238 group_end = false;
35240 while (insn != NULL_RTX)
35242 slot = (issue_rate - can_issue_more);
35243 group_insns[slot] = insn;
35244 can_issue_more =
35245 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
35246 if (insn_terminates_group_p (insn, current_group))
35247 can_issue_more = 0;
35249 next_insn = get_next_active_insn (insn, tail);
35250 if (next_insn == NULL_RTX)
35251 return group_count + 1;
35253 /* Is next_insn going to start a new group? */
35254 group_end
35255 = (can_issue_more == 0
35256 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
35257 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
35258 || (can_issue_more < issue_rate &&
35259 insn_terminates_group_p (next_insn, previous_group)));
35261 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
35262 next_insn, &group_end, can_issue_more,
35263 &group_count);
35265 if (group_end)
35267 group_count++;
35268 can_issue_more = 0;
35269 for (i = 0; i < issue_rate; i++)
35271 group_insns[i] = 0;
35275 if (GET_MODE (next_insn) == TImode && can_issue_more)
35276 PUT_MODE (next_insn, VOIDmode);
35277 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
35278 PUT_MODE (next_insn, TImode);
35280 insn = next_insn;
35281 if (can_issue_more == 0)
35282 can_issue_more = issue_rate;
35283 } /* while */
35285 return group_count;
35288 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
35289 dispatch group boundaries that the scheduler had marked. Pad with nops
35290 any dispatch groups which have vacant issue slots, in order to force the
35291 scheduler's grouping on the processor dispatcher. The function
35292 returns the number of dispatch groups found. */
35294 static int
35295 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
35296 rtx_insn *tail)
35298 rtx_insn *insn, *next_insn;
35299 rtx nop;
35300 int issue_rate;
35301 int can_issue_more;
35302 int group_end;
35303 int group_count = 0;
35305 /* Initialize issue_rate. */
35306 issue_rate = rs6000_issue_rate ();
35307 can_issue_more = issue_rate;
35309 insn = get_next_active_insn (prev_head_insn, tail);
35310 next_insn = get_next_active_insn (insn, tail);
35312 while (insn != NULL_RTX)
35314 can_issue_more =
35315 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
35317 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
35319 if (next_insn == NULL_RTX)
35320 break;
35322 if (group_end)
35324 /* If the scheduler had marked group termination at this location
35325 (between insn and next_insn), and neither insn nor next_insn will
35326 force group termination, pad the group with nops to force group
35327 termination. */
35328 if (can_issue_more
35329 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
35330 && !insn_terminates_group_p (insn, current_group)
35331 && !insn_terminates_group_p (next_insn, previous_group))
35333 if (!is_branch_slot_insn (next_insn))
35334 can_issue_more--;
35336 while (can_issue_more)
35338 nop = gen_nop ();
35339 emit_insn_before (nop, next_insn);
35340 can_issue_more--;
35344 can_issue_more = issue_rate;
35345 group_count++;
35348 insn = next_insn;
35349 next_insn = get_next_active_insn (insn, tail);
35352 return group_count;
35355 /* We're beginning a new block. Initialize data structures as necessary. */
35357 static void
35358 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
35359 int sched_verbose ATTRIBUTE_UNUSED,
35360 int max_ready ATTRIBUTE_UNUSED)
35362 last_scheduled_insn = NULL;
35363 load_store_pendulum = 0;
35364 divide_cnt = 0;
35365 vec_pairing = 0;
35368 /* The following function is called at the end of scheduling BB.
35369 After reload, it inserts nops at insn group bundling. */
35371 static void
35372 rs6000_sched_finish (FILE *dump, int sched_verbose)
35374 int n_groups;
35376 if (sched_verbose)
35377 fprintf (dump, "=== Finishing schedule.\n");
35379 if (reload_completed && rs6000_sched_groups)
35381 /* Do not run sched_finish hook when selective scheduling enabled. */
35382 if (sel_sched_p ())
35383 return;
35385 if (rs6000_sched_insert_nops == sched_finish_none)
35386 return;
35388 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
35389 n_groups = pad_groups (dump, sched_verbose,
35390 current_sched_info->prev_head,
35391 current_sched_info->next_tail);
35392 else
35393 n_groups = redefine_groups (dump, sched_verbose,
35394 current_sched_info->prev_head,
35395 current_sched_info->next_tail);
35397 if (sched_verbose >= 6)
35399 fprintf (dump, "ngroups = %d\n", n_groups);
35400 print_rtl (dump, current_sched_info->prev_head);
35401 fprintf (dump, "Done finish_sched\n");
35406 struct rs6000_sched_context
35408 short cached_can_issue_more;
35409 rtx_insn *last_scheduled_insn;
35410 int load_store_pendulum;
35411 int divide_cnt;
35412 int vec_pairing;
35415 typedef struct rs6000_sched_context rs6000_sched_context_def;
35416 typedef rs6000_sched_context_def *rs6000_sched_context_t;
35418 /* Allocate store for new scheduling context. */
35419 static void *
35420 rs6000_alloc_sched_context (void)
35422 return xmalloc (sizeof (rs6000_sched_context_def));
35425 /* If CLEAN_P is true then initializes _SC with clean data,
35426 and from the global context otherwise. */
35427 static void
35428 rs6000_init_sched_context (void *_sc, bool clean_p)
35430 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
35432 if (clean_p)
35434 sc->cached_can_issue_more = 0;
35435 sc->last_scheduled_insn = NULL;
35436 sc->load_store_pendulum = 0;
35437 sc->divide_cnt = 0;
35438 sc->vec_pairing = 0;
35440 else
35442 sc->cached_can_issue_more = cached_can_issue_more;
35443 sc->last_scheduled_insn = last_scheduled_insn;
35444 sc->load_store_pendulum = load_store_pendulum;
35445 sc->divide_cnt = divide_cnt;
35446 sc->vec_pairing = vec_pairing;
35450 /* Sets the global scheduling context to the one pointed to by _SC. */
35451 static void
35452 rs6000_set_sched_context (void *_sc)
35454 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
35456 gcc_assert (sc != NULL);
35458 cached_can_issue_more = sc->cached_can_issue_more;
35459 last_scheduled_insn = sc->last_scheduled_insn;
35460 load_store_pendulum = sc->load_store_pendulum;
35461 divide_cnt = sc->divide_cnt;
35462 vec_pairing = sc->vec_pairing;
35465 /* Free _SC. */
35466 static void
35467 rs6000_free_sched_context (void *_sc)
35469 gcc_assert (_sc != NULL);
35471 free (_sc);
35474 static bool
35475 rs6000_sched_can_speculate_insn (rtx_insn *insn)
35477 switch (get_attr_type (insn))
35479 case TYPE_DIV:
35480 case TYPE_SDIV:
35481 case TYPE_DDIV:
35482 case TYPE_VECDIV:
35483 case TYPE_SSQRT:
35484 case TYPE_DSQRT:
35485 return false;
35487 default:
35488 return true;
35492 /* Length in units of the trampoline for entering a nested function. */
35495 rs6000_trampoline_size (void)
35497 int ret = 0;
35499 switch (DEFAULT_ABI)
35501 default:
35502 gcc_unreachable ();
35504 case ABI_AIX:
35505 ret = (TARGET_32BIT) ? 12 : 24;
35506 break;
35508 case ABI_ELFv2:
35509 gcc_assert (!TARGET_32BIT);
35510 ret = 32;
35511 break;
35513 case ABI_DARWIN:
35514 case ABI_V4:
35515 ret = (TARGET_32BIT) ? 40 : 48;
35516 break;
35519 return ret;
35522 /* Emit RTL insns to initialize the variable parts of a trampoline.
35523 FNADDR is an RTX for the address of the function's pure code.
35524 CXT is an RTX for the static chain value for the function. */
35526 static void
35527 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
35529 int regsize = (TARGET_32BIT) ? 4 : 8;
35530 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
35531 rtx ctx_reg = force_reg (Pmode, cxt);
35532 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
35534 switch (DEFAULT_ABI)
35536 default:
35537 gcc_unreachable ();
35539 /* Under AIX, just build the 3 word function descriptor */
35540 case ABI_AIX:
35542 rtx fnmem, fn_reg, toc_reg;
35544 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
35545 error ("You cannot take the address of a nested function if you use "
35546 "the -mno-pointers-to-nested-functions option.");
35548 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
35549 fn_reg = gen_reg_rtx (Pmode);
35550 toc_reg = gen_reg_rtx (Pmode);
35552 /* Macro to shorten the code expansions below. */
35553 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
35555 m_tramp = replace_equiv_address (m_tramp, addr);
35557 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
35558 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
35559 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
35560 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
35561 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
35563 # undef MEM_PLUS
35565 break;
35567 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
35568 case ABI_ELFv2:
35569 case ABI_DARWIN:
35570 case ABI_V4:
35571 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
35572 LCT_NORMAL, VOIDmode,
35573 addr, Pmode,
35574 GEN_INT (rs6000_trampoline_size ()), SImode,
35575 fnaddr, Pmode,
35576 ctx_reg, Pmode);
35577 break;
35582 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
35583 identifier as an argument, so the front end shouldn't look it up. */
35585 static bool
35586 rs6000_attribute_takes_identifier_p (const_tree attr_id)
35588 return is_attribute_p ("altivec", attr_id);
35591 /* Handle the "altivec" attribute. The attribute may have
35592 arguments as follows:
35594 __attribute__((altivec(vector__)))
35595 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
35596 __attribute__((altivec(bool__))) (always followed by 'unsigned')
35598 and may appear more than once (e.g., 'vector bool char') in a
35599 given declaration. */
35601 static tree
35602 rs6000_handle_altivec_attribute (tree *node,
35603 tree name ATTRIBUTE_UNUSED,
35604 tree args,
35605 int flags ATTRIBUTE_UNUSED,
35606 bool *no_add_attrs)
35608 tree type = *node, result = NULL_TREE;
35609 machine_mode mode;
35610 int unsigned_p;
35611 char altivec_type
35612 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
35613 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
35614 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
35615 : '?');
35617 while (POINTER_TYPE_P (type)
35618 || TREE_CODE (type) == FUNCTION_TYPE
35619 || TREE_CODE (type) == METHOD_TYPE
35620 || TREE_CODE (type) == ARRAY_TYPE)
35621 type = TREE_TYPE (type);
35623 mode = TYPE_MODE (type);
35625 /* Check for invalid AltiVec type qualifiers. */
35626 if (type == long_double_type_node)
35627 error ("use of %<long double%> in AltiVec types is invalid");
35628 else if (type == boolean_type_node)
35629 error ("use of boolean types in AltiVec types is invalid");
35630 else if (TREE_CODE (type) == COMPLEX_TYPE)
35631 error ("use of %<complex%> in AltiVec types is invalid");
35632 else if (DECIMAL_FLOAT_MODE_P (mode))
35633 error ("use of decimal floating point types in AltiVec types is invalid");
35634 else if (!TARGET_VSX)
35636 if (type == long_unsigned_type_node || type == long_integer_type_node)
35638 if (TARGET_64BIT)
35639 error ("use of %<long%> in AltiVec types is invalid for "
35640 "64-bit code without -mvsx");
35641 else if (rs6000_warn_altivec_long)
35642 warning (0, "use of %<long%> in AltiVec types is deprecated; "
35643 "use %<int%>");
35645 else if (type == long_long_unsigned_type_node
35646 || type == long_long_integer_type_node)
35647 error ("use of %<long long%> in AltiVec types is invalid without "
35648 "-mvsx");
35649 else if (type == double_type_node)
35650 error ("use of %<double%> in AltiVec types is invalid without -mvsx");
35653 switch (altivec_type)
35655 case 'v':
35656 unsigned_p = TYPE_UNSIGNED (type);
35657 switch (mode)
35659 case E_TImode:
35660 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
35661 break;
35662 case E_DImode:
35663 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
35664 break;
35665 case E_SImode:
35666 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
35667 break;
35668 case E_HImode:
35669 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
35670 break;
35671 case E_QImode:
35672 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
35673 break;
35674 case E_SFmode: result = V4SF_type_node; break;
35675 case E_DFmode: result = V2DF_type_node; break;
35676 /* If the user says 'vector int bool', we may be handed the 'bool'
35677 attribute _before_ the 'vector' attribute, and so select the
35678 proper type in the 'b' case below. */
35679 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
35680 case E_V2DImode: case E_V2DFmode:
35681 result = type;
35682 default: break;
35684 break;
35685 case 'b':
35686 switch (mode)
35688 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
35689 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
35690 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
35691 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
35692 default: break;
35694 break;
35695 case 'p':
35696 switch (mode)
35698 case E_V8HImode: result = pixel_V8HI_type_node;
35699 default: break;
35701 default: break;
35704 /* Propagate qualifiers attached to the element type
35705 onto the vector type. */
35706 if (result && result != type && TYPE_QUALS (type))
35707 result = build_qualified_type (result, TYPE_QUALS (type));
35709 *no_add_attrs = true; /* No need to hang on to the attribute. */
35711 if (result)
35712 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
35714 return NULL_TREE;
35717 /* AltiVec defines four built-in scalar types that serve as vector
35718 elements; we must teach the compiler how to mangle them. */
35720 static const char *
35721 rs6000_mangle_type (const_tree type)
35723 type = TYPE_MAIN_VARIANT (type);
35725 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
35726 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
35727 return NULL;
35729 if (type == bool_char_type_node) return "U6__boolc";
35730 if (type == bool_short_type_node) return "U6__bools";
35731 if (type == pixel_type_node) return "u7__pixel";
35732 if (type == bool_int_type_node) return "U6__booli";
35733 if (type == bool_long_type_node) return "U6__booll";
35735 /* Use a unique name for __float128 rather than trying to use "e" or "g". Use
35736 "g" for IBM extended double, no matter whether it is long double (using
35737 -mabi=ibmlongdouble) or the distinct __ibm128 type. */
35738 if (TARGET_FLOAT128_TYPE)
35740 if (type == ieee128_float_type_node)
35741 return "U10__float128";
35743 if (type == ibm128_float_type_node)
35744 return "g";
35746 if (type == long_double_type_node && TARGET_LONG_DOUBLE_128)
35747 return (TARGET_IEEEQUAD) ? "U10__float128" : "g";
35750 /* Mangle IBM extended float long double as `g' (__float128) on
35751 powerpc*-linux where long-double-64 previously was the default. */
35752 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
35753 && TARGET_ELF
35754 && TARGET_LONG_DOUBLE_128
35755 && !TARGET_IEEEQUAD)
35756 return "g";
35758 /* For all other types, use normal C++ mangling. */
35759 return NULL;
35762 /* Handle a "longcall" or "shortcall" attribute; arguments as in
35763 struct attribute_spec.handler. */
35765 static tree
35766 rs6000_handle_longcall_attribute (tree *node, tree name,
35767 tree args ATTRIBUTE_UNUSED,
35768 int flags ATTRIBUTE_UNUSED,
35769 bool *no_add_attrs)
35771 if (TREE_CODE (*node) != FUNCTION_TYPE
35772 && TREE_CODE (*node) != FIELD_DECL
35773 && TREE_CODE (*node) != TYPE_DECL)
35775 warning (OPT_Wattributes, "%qE attribute only applies to functions",
35776 name);
35777 *no_add_attrs = true;
35780 return NULL_TREE;
35783 /* Set longcall attributes on all functions declared when
35784 rs6000_default_long_calls is true. */
35785 static void
35786 rs6000_set_default_type_attributes (tree type)
35788 if (rs6000_default_long_calls
35789 && (TREE_CODE (type) == FUNCTION_TYPE
35790 || TREE_CODE (type) == METHOD_TYPE))
35791 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
35792 NULL_TREE,
35793 TYPE_ATTRIBUTES (type));
35795 #if TARGET_MACHO
35796 darwin_set_default_type_attributes (type);
35797 #endif
35800 /* Return a reference suitable for calling a function with the
35801 longcall attribute. */
35804 rs6000_longcall_ref (rtx call_ref)
35806 const char *call_name;
35807 tree node;
35809 if (GET_CODE (call_ref) != SYMBOL_REF)
35810 return call_ref;
35812 /* System V adds '.' to the internal name, so skip them. */
35813 call_name = XSTR (call_ref, 0);
35814 if (*call_name == '.')
35816 while (*call_name == '.')
35817 call_name++;
35819 node = get_identifier (call_name);
35820 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
35823 return force_reg (Pmode, call_ref);
35826 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
35827 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
35828 #endif
35830 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
35831 struct attribute_spec.handler. */
35832 static tree
35833 rs6000_handle_struct_attribute (tree *node, tree name,
35834 tree args ATTRIBUTE_UNUSED,
35835 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
35837 tree *type = NULL;
35838 if (DECL_P (*node))
35840 if (TREE_CODE (*node) == TYPE_DECL)
35841 type = &TREE_TYPE (*node);
35843 else
35844 type = node;
35846 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
35847 || TREE_CODE (*type) == UNION_TYPE)))
35849 warning (OPT_Wattributes, "%qE attribute ignored", name);
35850 *no_add_attrs = true;
35853 else if ((is_attribute_p ("ms_struct", name)
35854 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
35855 || ((is_attribute_p ("gcc_struct", name)
35856 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
35858 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
35859 name);
35860 *no_add_attrs = true;
35863 return NULL_TREE;
35866 static bool
35867 rs6000_ms_bitfield_layout_p (const_tree record_type)
35869 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
35870 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
35871 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
35874 #ifdef USING_ELFOS_H
35876 /* A get_unnamed_section callback, used for switching to toc_section. */
35878 static void
35879 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
35881 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
35882 && TARGET_MINIMAL_TOC)
35884 if (!toc_initialized)
35886 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
35887 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
35888 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
35889 fprintf (asm_out_file, "\t.tc ");
35890 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
35891 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
35892 fprintf (asm_out_file, "\n");
35894 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
35895 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
35896 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
35897 fprintf (asm_out_file, " = .+32768\n");
35898 toc_initialized = 1;
35900 else
35901 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
35903 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
35905 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
35906 if (!toc_initialized)
35908 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
35909 toc_initialized = 1;
35912 else
35914 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
35915 if (!toc_initialized)
35917 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
35918 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
35919 fprintf (asm_out_file, " = .+32768\n");
35920 toc_initialized = 1;
35925 /* Implement TARGET_ASM_INIT_SECTIONS. */
35927 static void
35928 rs6000_elf_asm_init_sections (void)
35930 toc_section
35931 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
35933 sdata2_section
35934 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
35935 SDATA2_SECTION_ASM_OP);
35938 /* Implement TARGET_SELECT_RTX_SECTION. */
35940 static section *
35941 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
35942 unsigned HOST_WIDE_INT align)
35944 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
35945 return toc_section;
35946 else
35947 return default_elf_select_rtx_section (mode, x, align);
35950 /* For a SYMBOL_REF, set generic flags and then perform some
35951 target-specific processing.
35953 When the AIX ABI is requested on a non-AIX system, replace the
35954 function name with the real name (with a leading .) rather than the
35955 function descriptor name. This saves a lot of overriding code to
35956 read the prefixes. */
35958 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
35959 static void
35960 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
35962 default_encode_section_info (decl, rtl, first);
35964 if (first
35965 && TREE_CODE (decl) == FUNCTION_DECL
35966 && !TARGET_AIX
35967 && DEFAULT_ABI == ABI_AIX)
35969 rtx sym_ref = XEXP (rtl, 0);
35970 size_t len = strlen (XSTR (sym_ref, 0));
35971 char *str = XALLOCAVEC (char, len + 2);
35972 str[0] = '.';
35973 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
35974 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
35978 static inline bool
35979 compare_section_name (const char *section, const char *templ)
35981 int len;
35983 len = strlen (templ);
35984 return (strncmp (section, templ, len) == 0
35985 && (section[len] == 0 || section[len] == '.'));
35988 bool
35989 rs6000_elf_in_small_data_p (const_tree decl)
35991 if (rs6000_sdata == SDATA_NONE)
35992 return false;
35994 /* We want to merge strings, so we never consider them small data. */
35995 if (TREE_CODE (decl) == STRING_CST)
35996 return false;
35998 /* Functions are never in the small data area. */
35999 if (TREE_CODE (decl) == FUNCTION_DECL)
36000 return false;
36002 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
36004 const char *section = DECL_SECTION_NAME (decl);
36005 if (compare_section_name (section, ".sdata")
36006 || compare_section_name (section, ".sdata2")
36007 || compare_section_name (section, ".gnu.linkonce.s")
36008 || compare_section_name (section, ".sbss")
36009 || compare_section_name (section, ".sbss2")
36010 || compare_section_name (section, ".gnu.linkonce.sb")
36011 || strcmp (section, ".PPC.EMB.sdata0") == 0
36012 || strcmp (section, ".PPC.EMB.sbss0") == 0)
36013 return true;
36015 else
36017 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
36019 if (size > 0
36020 && size <= g_switch_value
36021 /* If it's not public, and we're not going to reference it there,
36022 there's no need to put it in the small data section. */
36023 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
36024 return true;
36027 return false;
36030 #endif /* USING_ELFOS_H */
36032 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
36034 static bool
36035 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
36037 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
36040 /* Do not place thread-local symbols refs in the object blocks. */
36042 static bool
36043 rs6000_use_blocks_for_decl_p (const_tree decl)
36045 return !DECL_THREAD_LOCAL_P (decl);
36048 /* Return a REG that occurs in ADDR with coefficient 1.
36049 ADDR can be effectively incremented by incrementing REG.
36051 r0 is special and we must not select it as an address
36052 register by this routine since our caller will try to
36053 increment the returned register via an "la" instruction. */
36056 find_addr_reg (rtx addr)
36058 while (GET_CODE (addr) == PLUS)
36060 if (GET_CODE (XEXP (addr, 0)) == REG
36061 && REGNO (XEXP (addr, 0)) != 0)
36062 addr = XEXP (addr, 0);
36063 else if (GET_CODE (XEXP (addr, 1)) == REG
36064 && REGNO (XEXP (addr, 1)) != 0)
36065 addr = XEXP (addr, 1);
36066 else if (CONSTANT_P (XEXP (addr, 0)))
36067 addr = XEXP (addr, 1);
36068 else if (CONSTANT_P (XEXP (addr, 1)))
36069 addr = XEXP (addr, 0);
36070 else
36071 gcc_unreachable ();
36073 gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0);
36074 return addr;
36077 void
36078 rs6000_fatal_bad_address (rtx op)
36080 fatal_insn ("bad address", op);
36083 #if TARGET_MACHO
36085 typedef struct branch_island_d {
36086 tree function_name;
36087 tree label_name;
36088 int line_number;
36089 } branch_island;
36092 static vec<branch_island, va_gc> *branch_islands;
36094 /* Remember to generate a branch island for far calls to the given
36095 function. */
36097 static void
36098 add_compiler_branch_island (tree label_name, tree function_name,
36099 int line_number)
36101 branch_island bi = {function_name, label_name, line_number};
36102 vec_safe_push (branch_islands, bi);
36105 /* Generate far-jump branch islands for everything recorded in
36106 branch_islands. Invoked immediately after the last instruction of
36107 the epilogue has been emitted; the branch islands must be appended
36108 to, and contiguous with, the function body. Mach-O stubs are
36109 generated in machopic_output_stub(). */
36111 static void
36112 macho_branch_islands (void)
36114 char tmp_buf[512];
36116 while (!vec_safe_is_empty (branch_islands))
36118 branch_island *bi = &branch_islands->last ();
36119 const char *label = IDENTIFIER_POINTER (bi->label_name);
36120 const char *name = IDENTIFIER_POINTER (bi->function_name);
36121 char name_buf[512];
36122 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
36123 if (name[0] == '*' || name[0] == '&')
36124 strcpy (name_buf, name+1);
36125 else
36127 name_buf[0] = '_';
36128 strcpy (name_buf+1, name);
36130 strcpy (tmp_buf, "\n");
36131 strcat (tmp_buf, label);
36132 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
36133 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
36134 dbxout_stabd (N_SLINE, bi->line_number);
36135 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
36136 if (flag_pic)
36138 if (TARGET_LINK_STACK)
36140 char name[32];
36141 get_ppc476_thunk_name (name);
36142 strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
36143 strcat (tmp_buf, name);
36144 strcat (tmp_buf, "\n");
36145 strcat (tmp_buf, label);
36146 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
36148 else
36150 strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
36151 strcat (tmp_buf, label);
36152 strcat (tmp_buf, "_pic\n");
36153 strcat (tmp_buf, label);
36154 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
36157 strcat (tmp_buf, "\taddis r11,r11,ha16(");
36158 strcat (tmp_buf, name_buf);
36159 strcat (tmp_buf, " - ");
36160 strcat (tmp_buf, label);
36161 strcat (tmp_buf, "_pic)\n");
36163 strcat (tmp_buf, "\tmtlr r0\n");
36165 strcat (tmp_buf, "\taddi r12,r11,lo16(");
36166 strcat (tmp_buf, name_buf);
36167 strcat (tmp_buf, " - ");
36168 strcat (tmp_buf, label);
36169 strcat (tmp_buf, "_pic)\n");
36171 strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
36173 else
36175 strcat (tmp_buf, ":\nlis r12,hi16(");
36176 strcat (tmp_buf, name_buf);
36177 strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
36178 strcat (tmp_buf, name_buf);
36179 strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
36181 output_asm_insn (tmp_buf, 0);
36182 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
36183 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
36184 dbxout_stabd (N_SLINE, bi->line_number);
36185 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
36186 branch_islands->pop ();
36190 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
36191 already there or not. */
36193 static int
36194 no_previous_def (tree function_name)
36196 branch_island *bi;
36197 unsigned ix;
36199 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
36200 if (function_name == bi->function_name)
36201 return 0;
36202 return 1;
36205 /* GET_PREV_LABEL gets the label name from the previous definition of
36206 the function. */
36208 static tree
36209 get_prev_label (tree function_name)
36211 branch_island *bi;
36212 unsigned ix;
36214 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
36215 if (function_name == bi->function_name)
36216 return bi->label_name;
36217 return NULL_TREE;
36220 /* INSN is either a function call or a millicode call. It may have an
36221 unconditional jump in its delay slot.
36223 CALL_DEST is the routine we are calling. */
36225 char *
36226 output_call (rtx_insn *insn, rtx *operands, int dest_operand_number,
36227 int cookie_operand_number)
36229 static char buf[256];
36230 if (darwin_emit_branch_islands
36231 && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
36232 && (INTVAL (operands[cookie_operand_number]) & CALL_LONG))
36234 tree labelname;
36235 tree funname = get_identifier (XSTR (operands[dest_operand_number], 0));
36237 if (no_previous_def (funname))
36239 rtx label_rtx = gen_label_rtx ();
36240 char *label_buf, temp_buf[256];
36241 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
36242 CODE_LABEL_NUMBER (label_rtx));
36243 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
36244 labelname = get_identifier (label_buf);
36245 add_compiler_branch_island (labelname, funname, insn_line (insn));
36247 else
36248 labelname = get_prev_label (funname);
36250 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
36251 instruction will reach 'foo', otherwise link as 'bl L42'".
36252 "L42" should be a 'branch island', that will do a far jump to
36253 'foo'. Branch islands are generated in
36254 macho_branch_islands(). */
36255 sprintf (buf, "jbsr %%z%d,%.246s",
36256 dest_operand_number, IDENTIFIER_POINTER (labelname));
36258 else
36259 sprintf (buf, "bl %%z%d", dest_operand_number);
36260 return buf;
36263 /* Generate PIC and indirect symbol stubs. */
36265 void
36266 machopic_output_stub (FILE *file, const char *symb, const char *stub)
36268 unsigned int length;
36269 char *symbol_name, *lazy_ptr_name;
36270 char *local_label_0;
36271 static int label = 0;
36273 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
36274 symb = (*targetm.strip_name_encoding) (symb);
36277 length = strlen (symb);
36278 symbol_name = XALLOCAVEC (char, length + 32);
36279 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
36281 lazy_ptr_name = XALLOCAVEC (char, length + 32);
36282 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
36284 if (flag_pic == 2)
36285 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
36286 else
36287 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
36289 if (flag_pic == 2)
36291 fprintf (file, "\t.align 5\n");
36293 fprintf (file, "%s:\n", stub);
36294 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
36296 label++;
36297 local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
36298 sprintf (local_label_0, "\"L%011d$spb\"", label);
36300 fprintf (file, "\tmflr r0\n");
36301 if (TARGET_LINK_STACK)
36303 char name[32];
36304 get_ppc476_thunk_name (name);
36305 fprintf (file, "\tbl %s\n", name);
36306 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
36308 else
36310 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
36311 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
36313 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
36314 lazy_ptr_name, local_label_0);
36315 fprintf (file, "\tmtlr r0\n");
36316 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
36317 (TARGET_64BIT ? "ldu" : "lwzu"),
36318 lazy_ptr_name, local_label_0);
36319 fprintf (file, "\tmtctr r12\n");
36320 fprintf (file, "\tbctr\n");
36322 else
36324 fprintf (file, "\t.align 4\n");
36326 fprintf (file, "%s:\n", stub);
36327 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
36329 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
36330 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
36331 (TARGET_64BIT ? "ldu" : "lwzu"),
36332 lazy_ptr_name);
36333 fprintf (file, "\tmtctr r12\n");
36334 fprintf (file, "\tbctr\n");
36337 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
36338 fprintf (file, "%s:\n", lazy_ptr_name);
36339 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
36340 fprintf (file, "%sdyld_stub_binding_helper\n",
36341 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
36344 /* Legitimize PIC addresses. If the address is already
36345 position-independent, we return ORIG. Newly generated
36346 position-independent addresses go into a reg. This is REG if non
36347 zero, otherwise we allocate register(s) as necessary. */
36349 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
36352 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
36353 rtx reg)
36355 rtx base, offset;
36357 if (reg == NULL && ! reload_in_progress && ! reload_completed)
36358 reg = gen_reg_rtx (Pmode);
36360 if (GET_CODE (orig) == CONST)
36362 rtx reg_temp;
36364 if (GET_CODE (XEXP (orig, 0)) == PLUS
36365 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
36366 return orig;
36368 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
36370 /* Use a different reg for the intermediate value, as
36371 it will be marked UNCHANGING. */
36372 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
36373 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
36374 Pmode, reg_temp);
36375 offset =
36376 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
36377 Pmode, reg);
36379 if (GET_CODE (offset) == CONST_INT)
36381 if (SMALL_INT (offset))
36382 return plus_constant (Pmode, base, INTVAL (offset));
36383 else if (! reload_in_progress && ! reload_completed)
36384 offset = force_reg (Pmode, offset);
36385 else
36387 rtx mem = force_const_mem (Pmode, orig);
36388 return machopic_legitimize_pic_address (mem, Pmode, reg);
36391 return gen_rtx_PLUS (Pmode, base, offset);
36394 /* Fall back on generic machopic code. */
36395 return machopic_legitimize_pic_address (orig, mode, reg);
36398 /* Output a .machine directive for the Darwin assembler, and call
36399 the generic start_file routine. */
36401 static void
36402 rs6000_darwin_file_start (void)
36404 static const struct
36406 const char *arg;
36407 const char *name;
36408 HOST_WIDE_INT if_set;
36409 } mapping[] = {
36410 { "ppc64", "ppc64", MASK_64BIT },
36411 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
36412 { "power4", "ppc970", 0 },
36413 { "G5", "ppc970", 0 },
36414 { "7450", "ppc7450", 0 },
36415 { "7400", "ppc7400", MASK_ALTIVEC },
36416 { "G4", "ppc7400", 0 },
36417 { "750", "ppc750", 0 },
36418 { "740", "ppc750", 0 },
36419 { "G3", "ppc750", 0 },
36420 { "604e", "ppc604e", 0 },
36421 { "604", "ppc604", 0 },
36422 { "603e", "ppc603", 0 },
36423 { "603", "ppc603", 0 },
36424 { "601", "ppc601", 0 },
36425 { NULL, "ppc", 0 } };
36426 const char *cpu_id = "";
36427 size_t i;
36429 rs6000_file_start ();
36430 darwin_file_start ();
36432 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
36434 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
36435 cpu_id = rs6000_default_cpu;
36437 if (global_options_set.x_rs6000_cpu_index)
36438 cpu_id = processor_target_table[rs6000_cpu_index].name;
36440 /* Look through the mapping array. Pick the first name that either
36441 matches the argument, has a bit set in IF_SET that is also set
36442 in the target flags, or has a NULL name. */
36444 i = 0;
36445 while (mapping[i].arg != NULL
36446 && strcmp (mapping[i].arg, cpu_id) != 0
36447 && (mapping[i].if_set & rs6000_isa_flags) == 0)
36448 i++;
36450 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
36453 #endif /* TARGET_MACHO */
36455 #if TARGET_ELF
36456 static int
36457 rs6000_elf_reloc_rw_mask (void)
36459 if (flag_pic)
36460 return 3;
36461 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
36462 return 2;
36463 else
36464 return 0;
36467 /* Record an element in the table of global constructors. SYMBOL is
36468 a SYMBOL_REF of the function to be called; PRIORITY is a number
36469 between 0 and MAX_INIT_PRIORITY.
36471 This differs from default_named_section_asm_out_constructor in
36472 that we have special handling for -mrelocatable. */
36474 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
36475 static void
36476 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
36478 const char *section = ".ctors";
36479 char buf[18];
36481 if (priority != DEFAULT_INIT_PRIORITY)
36483 sprintf (buf, ".ctors.%.5u",
36484 /* Invert the numbering so the linker puts us in the proper
36485 order; constructors are run from right to left, and the
36486 linker sorts in increasing order. */
36487 MAX_INIT_PRIORITY - priority);
36488 section = buf;
36491 switch_to_section (get_section (section, SECTION_WRITE, NULL));
36492 assemble_align (POINTER_SIZE);
36494 if (DEFAULT_ABI == ABI_V4
36495 && (TARGET_RELOCATABLE || flag_pic > 1))
36497 fputs ("\t.long (", asm_out_file);
36498 output_addr_const (asm_out_file, symbol);
36499 fputs (")@fixup\n", asm_out_file);
36501 else
36502 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
36505 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
36506 static void
36507 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
36509 const char *section = ".dtors";
36510 char buf[18];
36512 if (priority != DEFAULT_INIT_PRIORITY)
36514 sprintf (buf, ".dtors.%.5u",
36515 /* Invert the numbering so the linker puts us in the proper
36516 order; constructors are run from right to left, and the
36517 linker sorts in increasing order. */
36518 MAX_INIT_PRIORITY - priority);
36519 section = buf;
36522 switch_to_section (get_section (section, SECTION_WRITE, NULL));
36523 assemble_align (POINTER_SIZE);
36525 if (DEFAULT_ABI == ABI_V4
36526 && (TARGET_RELOCATABLE || flag_pic > 1))
36528 fputs ("\t.long (", asm_out_file);
36529 output_addr_const (asm_out_file, symbol);
36530 fputs (")@fixup\n", asm_out_file);
36532 else
36533 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
36536 void
36537 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
36539 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
36541 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
36542 ASM_OUTPUT_LABEL (file, name);
36543 fputs (DOUBLE_INT_ASM_OP, file);
36544 rs6000_output_function_entry (file, name);
36545 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
36546 if (DOT_SYMBOLS)
36548 fputs ("\t.size\t", file);
36549 assemble_name (file, name);
36550 fputs (",24\n\t.type\t.", file);
36551 assemble_name (file, name);
36552 fputs (",@function\n", file);
36553 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
36555 fputs ("\t.globl\t.", file);
36556 assemble_name (file, name);
36557 putc ('\n', file);
36560 else
36561 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
36562 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
36563 rs6000_output_function_entry (file, name);
36564 fputs (":\n", file);
36565 return;
36568 if (DEFAULT_ABI == ABI_V4
36569 && (TARGET_RELOCATABLE || flag_pic > 1)
36570 && !TARGET_SECURE_PLT
36571 && (!constant_pool_empty_p () || crtl->profile)
36572 && uses_TOC ())
36574 char buf[256];
36576 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
36578 fprintf (file, "\t.long ");
36579 assemble_name (file, toc_label_name);
36580 need_toc_init = 1;
36581 putc ('-', file);
36582 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
36583 assemble_name (file, buf);
36584 putc ('\n', file);
36587 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
36588 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
36590 if (TARGET_CMODEL == CMODEL_LARGE && rs6000_global_entry_point_needed_p ())
36592 char buf[256];
36594 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
36596 fprintf (file, "\t.quad .TOC.-");
36597 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
36598 assemble_name (file, buf);
36599 putc ('\n', file);
36602 if (DEFAULT_ABI == ABI_AIX)
36604 const char *desc_name, *orig_name;
36606 orig_name = (*targetm.strip_name_encoding) (name);
36607 desc_name = orig_name;
36608 while (*desc_name == '.')
36609 desc_name++;
36611 if (TREE_PUBLIC (decl))
36612 fprintf (file, "\t.globl %s\n", desc_name);
36614 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
36615 fprintf (file, "%s:\n", desc_name);
36616 fprintf (file, "\t.long %s\n", orig_name);
36617 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
36618 fputs ("\t.long 0\n", file);
36619 fprintf (file, "\t.previous\n");
36621 ASM_OUTPUT_LABEL (file, name);
36624 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
36625 static void
36626 rs6000_elf_file_end (void)
36628 #ifdef HAVE_AS_GNU_ATTRIBUTE
36629 /* ??? The value emitted depends on options active at file end.
36630 Assume anyone using #pragma or attributes that might change
36631 options knows what they are doing. */
36632 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
36633 && rs6000_passes_float)
36635 int fp;
36637 if (TARGET_DF_FPR | TARGET_DF_SPE)
36638 fp = 1;
36639 else if (TARGET_SF_FPR | TARGET_SF_SPE)
36640 fp = 3;
36641 else
36642 fp = 2;
36643 if (rs6000_passes_long_double)
36645 if (!TARGET_LONG_DOUBLE_128)
36646 fp |= 2 * 4;
36647 else if (TARGET_IEEEQUAD)
36648 fp |= 3 * 4;
36649 else
36650 fp |= 1 * 4;
36652 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
36654 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
36656 if (rs6000_passes_vector)
36657 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
36658 (TARGET_ALTIVEC_ABI ? 2
36659 : TARGET_SPE_ABI ? 3
36660 : 1));
36661 if (rs6000_returns_struct)
36662 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
36663 aix_struct_return ? 2 : 1);
36665 #endif
36666 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
36667 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
36668 file_end_indicate_exec_stack ();
36669 #endif
36671 if (flag_split_stack)
36672 file_end_indicate_split_stack ();
36674 if (cpu_builtin_p)
36676 /* We have expanded a CPU builtin, so we need to emit a reference to
36677 the special symbol that LIBC uses to declare it supports the
36678 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
36679 switch_to_section (data_section);
36680 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
36681 fprintf (asm_out_file, "\t%s %s\n",
36682 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
36685 #endif
36687 #if TARGET_XCOFF
36689 #ifndef HAVE_XCOFF_DWARF_EXTRAS
36690 #define HAVE_XCOFF_DWARF_EXTRAS 0
36691 #endif
36693 static enum unwind_info_type
36694 rs6000_xcoff_debug_unwind_info (void)
36696 return UI_NONE;
36699 static void
36700 rs6000_xcoff_asm_output_anchor (rtx symbol)
36702 char buffer[100];
36704 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
36705 SYMBOL_REF_BLOCK_OFFSET (symbol));
36706 fprintf (asm_out_file, "%s", SET_ASM_OP);
36707 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
36708 fprintf (asm_out_file, ",");
36709 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
36710 fprintf (asm_out_file, "\n");
36713 static void
36714 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
36716 fputs (GLOBAL_ASM_OP, stream);
36717 RS6000_OUTPUT_BASENAME (stream, name);
36718 putc ('\n', stream);
36721 /* A get_unnamed_decl callback, used for read-only sections. PTR
36722 points to the section string variable. */
36724 static void
36725 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
36727 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
36728 *(const char *const *) directive,
36729 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
36732 /* Likewise for read-write sections. */
36734 static void
36735 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
36737 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
36738 *(const char *const *) directive,
36739 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
36742 static void
36743 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
36745 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
36746 *(const char *const *) directive,
36747 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
36750 /* A get_unnamed_section callback, used for switching to toc_section. */
36752 static void
36753 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
36755 if (TARGET_MINIMAL_TOC)
36757 /* toc_section is always selected at least once from
36758 rs6000_xcoff_file_start, so this is guaranteed to
36759 always be defined once and only once in each file. */
36760 if (!toc_initialized)
36762 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
36763 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
36764 toc_initialized = 1;
36766 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
36767 (TARGET_32BIT ? "" : ",3"));
36769 else
36770 fputs ("\t.toc\n", asm_out_file);
36773 /* Implement TARGET_ASM_INIT_SECTIONS. */
36775 static void
36776 rs6000_xcoff_asm_init_sections (void)
36778 read_only_data_section
36779 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
36780 &xcoff_read_only_section_name);
36782 private_data_section
36783 = get_unnamed_section (SECTION_WRITE,
36784 rs6000_xcoff_output_readwrite_section_asm_op,
36785 &xcoff_private_data_section_name);
36787 tls_data_section
36788 = get_unnamed_section (SECTION_TLS,
36789 rs6000_xcoff_output_tls_section_asm_op,
36790 &xcoff_tls_data_section_name);
36792 tls_private_data_section
36793 = get_unnamed_section (SECTION_TLS,
36794 rs6000_xcoff_output_tls_section_asm_op,
36795 &xcoff_private_data_section_name);
36797 read_only_private_data_section
36798 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
36799 &xcoff_private_data_section_name);
36801 toc_section
36802 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
36804 readonly_data_section = read_only_data_section;
36807 static int
36808 rs6000_xcoff_reloc_rw_mask (void)
36810 return 3;
36813 static void
36814 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
36815 tree decl ATTRIBUTE_UNUSED)
36817 int smclass;
36818 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
36820 if (flags & SECTION_EXCLUDE)
36821 smclass = 4;
36822 else if (flags & SECTION_DEBUG)
36824 fprintf (asm_out_file, "\t.dwsect %s\n", name);
36825 return;
36827 else if (flags & SECTION_CODE)
36828 smclass = 0;
36829 else if (flags & SECTION_TLS)
36830 smclass = 3;
36831 else if (flags & SECTION_WRITE)
36832 smclass = 2;
36833 else
36834 smclass = 1;
36836 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
36837 (flags & SECTION_CODE) ? "." : "",
36838 name, suffix[smclass], flags & SECTION_ENTSIZE);
36841 #define IN_NAMED_SECTION(DECL) \
36842 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
36843 && DECL_SECTION_NAME (DECL) != NULL)
36845 static section *
36846 rs6000_xcoff_select_section (tree decl, int reloc,
36847 unsigned HOST_WIDE_INT align)
36849 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
36850 named section. */
36851 if (align > BIGGEST_ALIGNMENT)
36853 resolve_unique_section (decl, reloc, true);
36854 if (IN_NAMED_SECTION (decl))
36855 return get_named_section (decl, NULL, reloc);
36858 if (decl_readonly_section (decl, reloc))
36860 if (TREE_PUBLIC (decl))
36861 return read_only_data_section;
36862 else
36863 return read_only_private_data_section;
36865 else
36867 #if HAVE_AS_TLS
36868 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
36870 if (TREE_PUBLIC (decl))
36871 return tls_data_section;
36872 else if (bss_initializer_p (decl))
36874 /* Convert to COMMON to emit in BSS. */
36875 DECL_COMMON (decl) = 1;
36876 return tls_comm_section;
36878 else
36879 return tls_private_data_section;
36881 else
36882 #endif
36883 if (TREE_PUBLIC (decl))
36884 return data_section;
36885 else
36886 return private_data_section;
36890 static void
36891 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
36893 const char *name;
36895 /* Use select_section for private data and uninitialized data with
36896 alignment <= BIGGEST_ALIGNMENT. */
36897 if (!TREE_PUBLIC (decl)
36898 || DECL_COMMON (decl)
36899 || (DECL_INITIAL (decl) == NULL_TREE
36900 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
36901 || DECL_INITIAL (decl) == error_mark_node
36902 || (flag_zero_initialized_in_bss
36903 && initializer_zerop (DECL_INITIAL (decl))))
36904 return;
36906 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
36907 name = (*targetm.strip_name_encoding) (name);
36908 set_decl_section_name (decl, name);
36911 /* Select section for constant in constant pool.
36913 On RS/6000, all constants are in the private read-only data area.
36914 However, if this is being placed in the TOC it must be output as a
36915 toc entry. */
36917 static section *
36918 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
36919 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
36921 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
36922 return toc_section;
36923 else
36924 return read_only_private_data_section;
36927 /* Remove any trailing [DS] or the like from the symbol name. */
36929 static const char *
36930 rs6000_xcoff_strip_name_encoding (const char *name)
36932 size_t len;
36933 if (*name == '*')
36934 name++;
36935 len = strlen (name);
36936 if (name[len - 1] == ']')
36937 return ggc_alloc_string (name, len - 4);
36938 else
36939 return name;
36942 /* Section attributes. AIX is always PIC. */
36944 static unsigned int
36945 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
36947 unsigned int align;
36948 unsigned int flags = default_section_type_flags (decl, name, reloc);
36950 /* Align to at least UNIT size. */
36951 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
36952 align = MIN_UNITS_PER_WORD;
36953 else
36954 /* Increase alignment of large objects if not already stricter. */
36955 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
36956 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
36957 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
36959 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
36962 /* Output at beginning of assembler file.
36964 Initialize the section names for the RS/6000 at this point.
36966 Specify filename, including full path, to assembler.
36968 We want to go into the TOC section so at least one .toc will be emitted.
36969 Also, in order to output proper .bs/.es pairs, we need at least one static
36970 [RW] section emitted.
36972 Finally, declare mcount when profiling to make the assembler happy. */
36974 static void
36975 rs6000_xcoff_file_start (void)
36977 rs6000_gen_section_name (&xcoff_bss_section_name,
36978 main_input_filename, ".bss_");
36979 rs6000_gen_section_name (&xcoff_private_data_section_name,
36980 main_input_filename, ".rw_");
36981 rs6000_gen_section_name (&xcoff_read_only_section_name,
36982 main_input_filename, ".ro_");
36983 rs6000_gen_section_name (&xcoff_tls_data_section_name,
36984 main_input_filename, ".tls_");
36985 rs6000_gen_section_name (&xcoff_tbss_section_name,
36986 main_input_filename, ".tbss_[UL]");
36988 fputs ("\t.file\t", asm_out_file);
36989 output_quoted_string (asm_out_file, main_input_filename);
36990 fputc ('\n', asm_out_file);
36991 if (write_symbols != NO_DEBUG)
36992 switch_to_section (private_data_section);
36993 switch_to_section (toc_section);
36994 switch_to_section (text_section);
36995 if (profile_flag)
36996 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
36997 rs6000_file_start ();
37000 /* Output at end of assembler file.
37001 On the RS/6000, referencing data should automatically pull in text. */
37003 static void
37004 rs6000_xcoff_file_end (void)
37006 switch_to_section (text_section);
37007 fputs ("_section_.text:\n", asm_out_file);
37008 switch_to_section (data_section);
37009 fputs (TARGET_32BIT
37010 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
37011 asm_out_file);
37014 struct declare_alias_data
37016 FILE *file;
37017 bool function_descriptor;
37020 /* Declare alias N. A helper function for for_node_and_aliases. */
37022 static bool
37023 rs6000_declare_alias (struct symtab_node *n, void *d)
37025 struct declare_alias_data *data = (struct declare_alias_data *)d;
37026 /* Main symbol is output specially, because varasm machinery does part of
37027 the job for us - we do not need to declare .globl/lglobs and such. */
37028 if (!n->alias || n->weakref)
37029 return false;
37031 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
37032 return false;
37034 /* Prevent assemble_alias from trying to use .set pseudo operation
37035 that does not behave as expected by the middle-end. */
37036 TREE_ASM_WRITTEN (n->decl) = true;
37038 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
37039 char *buffer = (char *) alloca (strlen (name) + 2);
37040 char *p;
37041 int dollar_inside = 0;
37043 strcpy (buffer, name);
37044 p = strchr (buffer, '$');
37045 while (p) {
37046 *p = '_';
37047 dollar_inside++;
37048 p = strchr (p + 1, '$');
37050 if (TREE_PUBLIC (n->decl))
37052 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
37054 if (dollar_inside) {
37055 if (data->function_descriptor)
37056 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
37057 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
37059 if (data->function_descriptor)
37061 fputs ("\t.globl .", data->file);
37062 RS6000_OUTPUT_BASENAME (data->file, buffer);
37063 putc ('\n', data->file);
37065 fputs ("\t.globl ", data->file);
37066 RS6000_OUTPUT_BASENAME (data->file, buffer);
37067 putc ('\n', data->file);
37069 #ifdef ASM_WEAKEN_DECL
37070 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
37071 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
37072 #endif
37074 else
37076 if (dollar_inside)
37078 if (data->function_descriptor)
37079 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
37080 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
37082 if (data->function_descriptor)
37084 fputs ("\t.lglobl .", data->file);
37085 RS6000_OUTPUT_BASENAME (data->file, buffer);
37086 putc ('\n', data->file);
37088 fputs ("\t.lglobl ", data->file);
37089 RS6000_OUTPUT_BASENAME (data->file, buffer);
37090 putc ('\n', data->file);
37092 if (data->function_descriptor)
37093 fputs (".", data->file);
37094 RS6000_OUTPUT_BASENAME (data->file, buffer);
37095 fputs (":\n", data->file);
37096 return false;
37100 #ifdef HAVE_GAS_HIDDEN
37101 /* Helper function to calculate visibility of a DECL
37102 and return the value as a const string. */
37104 static const char *
37105 rs6000_xcoff_visibility (tree decl)
37107 static const char * const visibility_types[] = {
37108 "", ",protected", ",hidden", ",internal"
37111 enum symbol_visibility vis = DECL_VISIBILITY (decl);
37113 if (TREE_CODE (decl) == FUNCTION_DECL
37114 && cgraph_node::get (decl)
37115 && cgraph_node::get (decl)->instrumentation_clone
37116 && cgraph_node::get (decl)->instrumented_version)
37117 vis = DECL_VISIBILITY (cgraph_node::get (decl)->instrumented_version->decl);
37119 return visibility_types[vis];
37121 #endif
37124 /* This macro produces the initial definition of a function name.
37125 On the RS/6000, we need to place an extra '.' in the function name and
37126 output the function descriptor.
37127 Dollar signs are converted to underscores.
37129 The csect for the function will have already been created when
37130 text_section was selected. We do have to go back to that csect, however.
37132 The third and fourth parameters to the .function pseudo-op (16 and 044)
37133 are placeholders which no longer have any use.
37135 Because AIX assembler's .set command has unexpected semantics, we output
37136 all aliases as alternative labels in front of the definition. */
37138 void
37139 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
37141 char *buffer = (char *) alloca (strlen (name) + 1);
37142 char *p;
37143 int dollar_inside = 0;
37144 struct declare_alias_data data = {file, false};
37146 strcpy (buffer, name);
37147 p = strchr (buffer, '$');
37148 while (p) {
37149 *p = '_';
37150 dollar_inside++;
37151 p = strchr (p + 1, '$');
37153 if (TREE_PUBLIC (decl))
37155 if (!RS6000_WEAK || !DECL_WEAK (decl))
37157 if (dollar_inside) {
37158 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
37159 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
37161 fputs ("\t.globl .", file);
37162 RS6000_OUTPUT_BASENAME (file, buffer);
37163 #ifdef HAVE_GAS_HIDDEN
37164 fputs (rs6000_xcoff_visibility (decl), file);
37165 #endif
37166 putc ('\n', file);
37169 else
37171 if (dollar_inside) {
37172 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
37173 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
37175 fputs ("\t.lglobl .", file);
37176 RS6000_OUTPUT_BASENAME (file, buffer);
37177 putc ('\n', file);
37179 fputs ("\t.csect ", file);
37180 RS6000_OUTPUT_BASENAME (file, buffer);
37181 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
37182 RS6000_OUTPUT_BASENAME (file, buffer);
37183 fputs (":\n", file);
37184 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
37185 &data, true);
37186 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
37187 RS6000_OUTPUT_BASENAME (file, buffer);
37188 fputs (", TOC[tc0], 0\n", file);
37189 in_section = NULL;
37190 switch_to_section (function_section (decl));
37191 putc ('.', file);
37192 RS6000_OUTPUT_BASENAME (file, buffer);
37193 fputs (":\n", file);
37194 data.function_descriptor = true;
37195 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
37196 &data, true);
37197 if (!DECL_IGNORED_P (decl))
37199 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
37200 xcoffout_declare_function (file, decl, buffer);
37201 else if (write_symbols == DWARF2_DEBUG)
37203 name = (*targetm.strip_name_encoding) (name);
37204 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
37207 return;
37211 /* Output assembly language to globalize a symbol from a DECL,
37212 possibly with visibility. */
37214 void
37215 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
37217 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
37218 fputs (GLOBAL_ASM_OP, stream);
37219 RS6000_OUTPUT_BASENAME (stream, name);
37220 #ifdef HAVE_GAS_HIDDEN
37221 fputs (rs6000_xcoff_visibility (decl), stream);
37222 #endif
37223 putc ('\n', stream);
37226 /* Output assembly language to define a symbol as COMMON from a DECL,
37227 possibly with visibility. */
37229 void
37230 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
37231 tree decl ATTRIBUTE_UNUSED,
37232 const char *name,
37233 unsigned HOST_WIDE_INT size,
37234 unsigned HOST_WIDE_INT align)
37236 unsigned HOST_WIDE_INT align2 = 2;
37238 if (align > 32)
37239 align2 = floor_log2 (align / BITS_PER_UNIT);
37240 else if (size > 4)
37241 align2 = 3;
37243 fputs (COMMON_ASM_OP, stream);
37244 RS6000_OUTPUT_BASENAME (stream, name);
37246 fprintf (stream,
37247 "," HOST_WIDE_INT_PRINT_UNSIGNED "," HOST_WIDE_INT_PRINT_UNSIGNED,
37248 size, align2);
37250 #ifdef HAVE_GAS_HIDDEN
37251 fputs (rs6000_xcoff_visibility (decl), stream);
37252 #endif
37253 putc ('\n', stream);
37256 /* This macro produces the initial definition of a object (variable) name.
37257 Because AIX assembler's .set command has unexpected semantics, we output
37258 all aliases as alternative labels in front of the definition. */
37260 void
37261 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
37263 struct declare_alias_data data = {file, false};
37264 RS6000_OUTPUT_BASENAME (file, name);
37265 fputs (":\n", file);
37266 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
37267 &data, true);
37270 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
37272 void
37273 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
37275 fputs (integer_asm_op (size, FALSE), file);
37276 assemble_name (file, label);
37277 fputs ("-$", file);
37280 /* Output a symbol offset relative to the dbase for the current object.
37281 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
37282 signed offsets.
37284 __gcc_unwind_dbase is embedded in all executables/libraries through
37285 libgcc/config/rs6000/crtdbase.S. */
37287 void
37288 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
37290 fputs (integer_asm_op (size, FALSE), file);
37291 assemble_name (file, label);
37292 fputs("-__gcc_unwind_dbase", file);
37295 #ifdef HAVE_AS_TLS
37296 static void
37297 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
37299 rtx symbol;
37300 int flags;
37301 const char *symname;
37303 default_encode_section_info (decl, rtl, first);
37305 /* Careful not to prod global register variables. */
37306 if (!MEM_P (rtl))
37307 return;
37308 symbol = XEXP (rtl, 0);
37309 if (GET_CODE (symbol) != SYMBOL_REF)
37310 return;
37312 flags = SYMBOL_REF_FLAGS (symbol);
37314 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
37315 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
37317 SYMBOL_REF_FLAGS (symbol) = flags;
37319 /* Append mapping class to extern decls. */
37320 symname = XSTR (symbol, 0);
37321 if (decl /* sync condition with assemble_external () */
37322 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
37323 && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl))
37324 || TREE_CODE (decl) == FUNCTION_DECL)
37325 && symname[strlen (symname) - 1] != ']')
37327 char *newname = (char *) alloca (strlen (symname) + 5);
37328 strcpy (newname, symname);
37329 strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL
37330 ? "[DS]" : "[UA]"));
37331 XSTR (symbol, 0) = ggc_strdup (newname);
37334 #endif /* HAVE_AS_TLS */
37335 #endif /* TARGET_XCOFF */
37337 void
37338 rs6000_asm_weaken_decl (FILE *stream, tree decl,
37339 const char *name, const char *val)
37341 fputs ("\t.weak\t", stream);
37342 RS6000_OUTPUT_BASENAME (stream, name);
37343 if (decl && TREE_CODE (decl) == FUNCTION_DECL
37344 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
37346 if (TARGET_XCOFF)
37347 fputs ("[DS]", stream);
37348 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
37349 if (TARGET_XCOFF)
37350 fputs (rs6000_xcoff_visibility (decl), stream);
37351 #endif
37352 fputs ("\n\t.weak\t.", stream);
37353 RS6000_OUTPUT_BASENAME (stream, name);
37355 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
37356 if (TARGET_XCOFF)
37357 fputs (rs6000_xcoff_visibility (decl), stream);
37358 #endif
37359 fputc ('\n', stream);
37360 if (val)
37362 #ifdef ASM_OUTPUT_DEF
37363 ASM_OUTPUT_DEF (stream, name, val);
37364 #endif
37365 if (decl && TREE_CODE (decl) == FUNCTION_DECL
37366 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
37368 fputs ("\t.set\t.", stream);
37369 RS6000_OUTPUT_BASENAME (stream, name);
37370 fputs (",.", stream);
37371 RS6000_OUTPUT_BASENAME (stream, val);
37372 fputc ('\n', stream);
37378 /* Return true if INSN should not be copied. */
37380 static bool
37381 rs6000_cannot_copy_insn_p (rtx_insn *insn)
37383 return recog_memoized (insn) >= 0
37384 && get_attr_cannot_copy (insn);
37387 /* Compute a (partial) cost for rtx X. Return true if the complete
37388 cost has been computed, and false if subexpressions should be
37389 scanned. In either case, *TOTAL contains the cost result. */
37391 static bool
37392 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
37393 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
37395 int code = GET_CODE (x);
37397 switch (code)
37399 /* On the RS/6000, if it is valid in the insn, it is free. */
37400 case CONST_INT:
37401 if (((outer_code == SET
37402 || outer_code == PLUS
37403 || outer_code == MINUS)
37404 && (satisfies_constraint_I (x)
37405 || satisfies_constraint_L (x)))
37406 || (outer_code == AND
37407 && (satisfies_constraint_K (x)
37408 || (mode == SImode
37409 ? satisfies_constraint_L (x)
37410 : satisfies_constraint_J (x))))
37411 || ((outer_code == IOR || outer_code == XOR)
37412 && (satisfies_constraint_K (x)
37413 || (mode == SImode
37414 ? satisfies_constraint_L (x)
37415 : satisfies_constraint_J (x))))
37416 || outer_code == ASHIFT
37417 || outer_code == ASHIFTRT
37418 || outer_code == LSHIFTRT
37419 || outer_code == ROTATE
37420 || outer_code == ROTATERT
37421 || outer_code == ZERO_EXTRACT
37422 || (outer_code == MULT
37423 && satisfies_constraint_I (x))
37424 || ((outer_code == DIV || outer_code == UDIV
37425 || outer_code == MOD || outer_code == UMOD)
37426 && exact_log2 (INTVAL (x)) >= 0)
37427 || (outer_code == COMPARE
37428 && (satisfies_constraint_I (x)
37429 || satisfies_constraint_K (x)))
37430 || ((outer_code == EQ || outer_code == NE)
37431 && (satisfies_constraint_I (x)
37432 || satisfies_constraint_K (x)
37433 || (mode == SImode
37434 ? satisfies_constraint_L (x)
37435 : satisfies_constraint_J (x))))
37436 || (outer_code == GTU
37437 && satisfies_constraint_I (x))
37438 || (outer_code == LTU
37439 && satisfies_constraint_P (x)))
37441 *total = 0;
37442 return true;
37444 else if ((outer_code == PLUS
37445 && reg_or_add_cint_operand (x, VOIDmode))
37446 || (outer_code == MINUS
37447 && reg_or_sub_cint_operand (x, VOIDmode))
37448 || ((outer_code == SET
37449 || outer_code == IOR
37450 || outer_code == XOR)
37451 && (INTVAL (x)
37452 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
37454 *total = COSTS_N_INSNS (1);
37455 return true;
37457 /* FALLTHRU */
37459 case CONST_DOUBLE:
37460 case CONST_WIDE_INT:
37461 case CONST:
37462 case HIGH:
37463 case SYMBOL_REF:
37464 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
37465 return true;
37467 case MEM:
37468 /* When optimizing for size, MEM should be slightly more expensive
37469 than generating address, e.g., (plus (reg) (const)).
37470 L1 cache latency is about two instructions. */
37471 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
37472 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
37473 *total += COSTS_N_INSNS (100);
37474 return true;
37476 case LABEL_REF:
37477 *total = 0;
37478 return true;
37480 case PLUS:
37481 case MINUS:
37482 if (FLOAT_MODE_P (mode))
37483 *total = rs6000_cost->fp;
37484 else
37485 *total = COSTS_N_INSNS (1);
37486 return false;
37488 case MULT:
37489 if (GET_CODE (XEXP (x, 1)) == CONST_INT
37490 && satisfies_constraint_I (XEXP (x, 1)))
37492 if (INTVAL (XEXP (x, 1)) >= -256
37493 && INTVAL (XEXP (x, 1)) <= 255)
37494 *total = rs6000_cost->mulsi_const9;
37495 else
37496 *total = rs6000_cost->mulsi_const;
37498 else if (mode == SFmode)
37499 *total = rs6000_cost->fp;
37500 else if (FLOAT_MODE_P (mode))
37501 *total = rs6000_cost->dmul;
37502 else if (mode == DImode)
37503 *total = rs6000_cost->muldi;
37504 else
37505 *total = rs6000_cost->mulsi;
37506 return false;
37508 case FMA:
37509 if (mode == SFmode)
37510 *total = rs6000_cost->fp;
37511 else
37512 *total = rs6000_cost->dmul;
37513 break;
37515 case DIV:
37516 case MOD:
37517 if (FLOAT_MODE_P (mode))
37519 *total = mode == DFmode ? rs6000_cost->ddiv
37520 : rs6000_cost->sdiv;
37521 return false;
37523 /* FALLTHRU */
37525 case UDIV:
37526 case UMOD:
37527 if (GET_CODE (XEXP (x, 1)) == CONST_INT
37528 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
37530 if (code == DIV || code == MOD)
37531 /* Shift, addze */
37532 *total = COSTS_N_INSNS (2);
37533 else
37534 /* Shift */
37535 *total = COSTS_N_INSNS (1);
37537 else
37539 if (GET_MODE (XEXP (x, 1)) == DImode)
37540 *total = rs6000_cost->divdi;
37541 else
37542 *total = rs6000_cost->divsi;
37544 /* Add in shift and subtract for MOD unless we have a mod instruction. */
37545 if (!TARGET_MODULO && (code == MOD || code == UMOD))
37546 *total += COSTS_N_INSNS (2);
37547 return false;
37549 case CTZ:
37550 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
37551 return false;
37553 case FFS:
37554 *total = COSTS_N_INSNS (4);
37555 return false;
37557 case POPCOUNT:
37558 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
37559 return false;
37561 case PARITY:
37562 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
37563 return false;
37565 case NOT:
37566 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
37567 *total = 0;
37568 else
37569 *total = COSTS_N_INSNS (1);
37570 return false;
37572 case AND:
37573 if (CONST_INT_P (XEXP (x, 1)))
37575 rtx left = XEXP (x, 0);
37576 rtx_code left_code = GET_CODE (left);
37578 /* rotate-and-mask: 1 insn. */
37579 if ((left_code == ROTATE
37580 || left_code == ASHIFT
37581 || left_code == LSHIFTRT)
37582 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
37584 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
37585 if (!CONST_INT_P (XEXP (left, 1)))
37586 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
37587 *total += COSTS_N_INSNS (1);
37588 return true;
37591 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
37592 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
37593 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
37594 || (val & 0xffff) == val
37595 || (val & 0xffff0000) == val
37596 || ((val & 0xffff) == 0 && mode == SImode))
37598 *total = rtx_cost (left, mode, AND, 0, speed);
37599 *total += COSTS_N_INSNS (1);
37600 return true;
37603 /* 2 insns. */
37604 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
37606 *total = rtx_cost (left, mode, AND, 0, speed);
37607 *total += COSTS_N_INSNS (2);
37608 return true;
37612 *total = COSTS_N_INSNS (1);
37613 return false;
37615 case IOR:
37616 /* FIXME */
37617 *total = COSTS_N_INSNS (1);
37618 return true;
37620 case CLZ:
37621 case XOR:
37622 case ZERO_EXTRACT:
37623 *total = COSTS_N_INSNS (1);
37624 return false;
37626 case ASHIFT:
37627 /* The EXTSWSLI instruction is a combined instruction. Don't count both
37628 the sign extend and shift separately within the insn. */
37629 if (TARGET_EXTSWSLI && mode == DImode
37630 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
37631 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
37633 *total = 0;
37634 return false;
37636 /* fall through */
37638 case ASHIFTRT:
37639 case LSHIFTRT:
37640 case ROTATE:
37641 case ROTATERT:
37642 /* Handle mul_highpart. */
37643 if (outer_code == TRUNCATE
37644 && GET_CODE (XEXP (x, 0)) == MULT)
37646 if (mode == DImode)
37647 *total = rs6000_cost->muldi;
37648 else
37649 *total = rs6000_cost->mulsi;
37650 return true;
37652 else if (outer_code == AND)
37653 *total = 0;
37654 else
37655 *total = COSTS_N_INSNS (1);
37656 return false;
37658 case SIGN_EXTEND:
37659 case ZERO_EXTEND:
37660 if (GET_CODE (XEXP (x, 0)) == MEM)
37661 *total = 0;
37662 else
37663 *total = COSTS_N_INSNS (1);
37664 return false;
37666 case COMPARE:
37667 case NEG:
37668 case ABS:
37669 if (!FLOAT_MODE_P (mode))
37671 *total = COSTS_N_INSNS (1);
37672 return false;
37674 /* FALLTHRU */
37676 case FLOAT:
37677 case UNSIGNED_FLOAT:
37678 case FIX:
37679 case UNSIGNED_FIX:
37680 case FLOAT_TRUNCATE:
37681 *total = rs6000_cost->fp;
37682 return false;
37684 case FLOAT_EXTEND:
37685 if (mode == DFmode)
37686 *total = rs6000_cost->sfdf_convert;
37687 else
37688 *total = rs6000_cost->fp;
37689 return false;
37691 case UNSPEC:
37692 switch (XINT (x, 1))
37694 case UNSPEC_FRSP:
37695 *total = rs6000_cost->fp;
37696 return true;
37698 default:
37699 break;
37701 break;
37703 case CALL:
37704 case IF_THEN_ELSE:
37705 if (!speed)
37707 *total = COSTS_N_INSNS (1);
37708 return true;
37710 else if (FLOAT_MODE_P (mode)
37711 && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS)
37713 *total = rs6000_cost->fp;
37714 return false;
37716 break;
37718 case NE:
37719 case EQ:
37720 case GTU:
37721 case LTU:
37722 /* Carry bit requires mode == Pmode.
37723 NEG or PLUS already counted so only add one. */
37724 if (mode == Pmode
37725 && (outer_code == NEG || outer_code == PLUS))
37727 *total = COSTS_N_INSNS (1);
37728 return true;
37730 if (outer_code == SET)
37732 if (XEXP (x, 1) == const0_rtx)
37734 if (TARGET_ISEL && !TARGET_MFCRF)
37735 *total = COSTS_N_INSNS (8);
37736 else
37737 *total = COSTS_N_INSNS (2);
37738 return true;
37740 else
37742 *total = COSTS_N_INSNS (3);
37743 return false;
37746 /* FALLTHRU */
37748 case GT:
37749 case LT:
37750 case UNORDERED:
37751 if (outer_code == SET && (XEXP (x, 1) == const0_rtx))
37753 if (TARGET_ISEL && !TARGET_MFCRF)
37754 *total = COSTS_N_INSNS (8);
37755 else
37756 *total = COSTS_N_INSNS (2);
37757 return true;
37759 /* CC COMPARE. */
37760 if (outer_code == COMPARE)
37762 *total = 0;
37763 return true;
37765 break;
37767 default:
37768 break;
37771 return false;
37774 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
37776 static bool
37777 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
37778 int opno, int *total, bool speed)
37780 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
37782 fprintf (stderr,
37783 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
37784 "opno = %d, total = %d, speed = %s, x:\n",
37785 ret ? "complete" : "scan inner",
37786 GET_MODE_NAME (mode),
37787 GET_RTX_NAME (outer_code),
37788 opno,
37789 *total,
37790 speed ? "true" : "false");
37792 debug_rtx (x);
37794 return ret;
37797 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
37799 static int
37800 rs6000_debug_address_cost (rtx x, machine_mode mode,
37801 addr_space_t as, bool speed)
37803 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
37805 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
37806 ret, speed ? "true" : "false");
37807 debug_rtx (x);
37809 return ret;
37813 /* A C expression returning the cost of moving data from a register of class
37814 CLASS1 to one of CLASS2. */
37816 static int
37817 rs6000_register_move_cost (machine_mode mode,
37818 reg_class_t from, reg_class_t to)
37820 int ret;
37822 if (TARGET_DEBUG_COST)
37823 dbg_cost_ctrl++;
37825 /* Moves from/to GENERAL_REGS. */
37826 if (reg_classes_intersect_p (to, GENERAL_REGS)
37827 || reg_classes_intersect_p (from, GENERAL_REGS))
37829 reg_class_t rclass = from;
37831 if (! reg_classes_intersect_p (to, GENERAL_REGS))
37832 rclass = to;
37834 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
37835 ret = (rs6000_memory_move_cost (mode, rclass, false)
37836 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
37838 /* It's more expensive to move CR_REGS than CR0_REGS because of the
37839 shift. */
37840 else if (rclass == CR_REGS)
37841 ret = 4;
37843 /* For those processors that have slow LR/CTR moves, make them more
37844 expensive than memory in order to bias spills to memory .*/
37845 else if ((rs6000_cpu == PROCESSOR_POWER6
37846 || rs6000_cpu == PROCESSOR_POWER7
37847 || rs6000_cpu == PROCESSOR_POWER8
37848 || rs6000_cpu == PROCESSOR_POWER9)
37849 && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
37850 ret = 6 * hard_regno_nregs (0, mode);
37852 else
37853 /* A move will cost one instruction per GPR moved. */
37854 ret = 2 * hard_regno_nregs (0, mode);
37857 /* If we have VSX, we can easily move between FPR or Altivec registers. */
37858 else if (VECTOR_MEM_VSX_P (mode)
37859 && reg_classes_intersect_p (to, VSX_REGS)
37860 && reg_classes_intersect_p (from, VSX_REGS))
37861 ret = 2 * hard_regno_nregs (FIRST_FPR_REGNO, mode);
37863 /* Moving between two similar registers is just one instruction. */
37864 else if (reg_classes_intersect_p (to, from))
37865 ret = (FLOAT128_2REG_P (mode)) ? 4 : 2;
37867 /* Everything else has to go through GENERAL_REGS. */
37868 else
37869 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
37870 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
37872 if (TARGET_DEBUG_COST)
37874 if (dbg_cost_ctrl == 1)
37875 fprintf (stderr,
37876 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
37877 ret, GET_MODE_NAME (mode), reg_class_names[from],
37878 reg_class_names[to]);
37879 dbg_cost_ctrl--;
37882 return ret;
37885 /* A C expressions returning the cost of moving data of MODE from a register to
37886 or from memory. */
37888 static int
37889 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
37890 bool in ATTRIBUTE_UNUSED)
37892 int ret;
37894 if (TARGET_DEBUG_COST)
37895 dbg_cost_ctrl++;
37897 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
37898 ret = 4 * hard_regno_nregs (0, mode);
37899 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
37900 || reg_classes_intersect_p (rclass, VSX_REGS)))
37901 ret = 4 * hard_regno_nregs (32, mode);
37902 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
37903 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
37904 else
37905 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
37907 if (TARGET_DEBUG_COST)
37909 if (dbg_cost_ctrl == 1)
37910 fprintf (stderr,
37911 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
37912 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
37913 dbg_cost_ctrl--;
37916 return ret;
37919 /* Returns a code for a target-specific builtin that implements
37920 reciprocal of the function, or NULL_TREE if not available. */
37922 static tree
37923 rs6000_builtin_reciprocal (tree fndecl)
37925 switch (DECL_FUNCTION_CODE (fndecl))
37927 case VSX_BUILTIN_XVSQRTDP:
37928 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
37929 return NULL_TREE;
37931 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
37933 case VSX_BUILTIN_XVSQRTSP:
37934 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
37935 return NULL_TREE;
37937 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
37939 default:
37940 return NULL_TREE;
37944 /* Load up a constant. If the mode is a vector mode, splat the value across
37945 all of the vector elements. */
37947 static rtx
37948 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
37950 rtx reg;
37952 if (mode == SFmode || mode == DFmode)
37954 rtx d = const_double_from_real_value (dconst, mode);
37955 reg = force_reg (mode, d);
37957 else if (mode == V4SFmode)
37959 rtx d = const_double_from_real_value (dconst, SFmode);
37960 rtvec v = gen_rtvec (4, d, d, d, d);
37961 reg = gen_reg_rtx (mode);
37962 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
37964 else if (mode == V2DFmode)
37966 rtx d = const_double_from_real_value (dconst, DFmode);
37967 rtvec v = gen_rtvec (2, d, d);
37968 reg = gen_reg_rtx (mode);
37969 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
37971 else
37972 gcc_unreachable ();
37974 return reg;
37977 /* Generate an FMA instruction. */
37979 static void
37980 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
37982 machine_mode mode = GET_MODE (target);
37983 rtx dst;
37985 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
37986 gcc_assert (dst != NULL);
37988 if (dst != target)
37989 emit_move_insn (target, dst);
37992 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
37994 static void
37995 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
37997 machine_mode mode = GET_MODE (dst);
37998 rtx r;
38000 /* This is a tad more complicated, since the fnma_optab is for
38001 a different expression: fma(-m1, m2, a), which is the same
38002 thing except in the case of signed zeros.
38004 Fortunately we know that if FMA is supported that FNMSUB is
38005 also supported in the ISA. Just expand it directly. */
38007 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
38009 r = gen_rtx_NEG (mode, a);
38010 r = gen_rtx_FMA (mode, m1, m2, r);
38011 r = gen_rtx_NEG (mode, r);
38012 emit_insn (gen_rtx_SET (dst, r));
38015 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
38016 add a reg_note saying that this was a division. Support both scalar and
38017 vector divide. Assumes no trapping math and finite arguments. */
38019 void
38020 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
38022 machine_mode mode = GET_MODE (dst);
38023 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
38024 int i;
38026 /* Low precision estimates guarantee 5 bits of accuracy. High
38027 precision estimates guarantee 14 bits of accuracy. SFmode
38028 requires 23 bits of accuracy. DFmode requires 52 bits of
38029 accuracy. Each pass at least doubles the accuracy, leading
38030 to the following. */
38031 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
38032 if (mode == DFmode || mode == V2DFmode)
38033 passes++;
38035 enum insn_code code = optab_handler (smul_optab, mode);
38036 insn_gen_fn gen_mul = GEN_FCN (code);
38038 gcc_assert (code != CODE_FOR_nothing);
38040 one = rs6000_load_constant_and_splat (mode, dconst1);
38042 /* x0 = 1./d estimate */
38043 x0 = gen_reg_rtx (mode);
38044 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
38045 UNSPEC_FRES)));
38047 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
38048 if (passes > 1) {
38050 /* e0 = 1. - d * x0 */
38051 e0 = gen_reg_rtx (mode);
38052 rs6000_emit_nmsub (e0, d, x0, one);
38054 /* x1 = x0 + e0 * x0 */
38055 x1 = gen_reg_rtx (mode);
38056 rs6000_emit_madd (x1, e0, x0, x0);
38058 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
38059 ++i, xprev = xnext, eprev = enext) {
38061 /* enext = eprev * eprev */
38062 enext = gen_reg_rtx (mode);
38063 emit_insn (gen_mul (enext, eprev, eprev));
38065 /* xnext = xprev + enext * xprev */
38066 xnext = gen_reg_rtx (mode);
38067 rs6000_emit_madd (xnext, enext, xprev, xprev);
38070 } else
38071 xprev = x0;
38073 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
38075 /* u = n * xprev */
38076 u = gen_reg_rtx (mode);
38077 emit_insn (gen_mul (u, n, xprev));
38079 /* v = n - (d * u) */
38080 v = gen_reg_rtx (mode);
38081 rs6000_emit_nmsub (v, d, u, n);
38083 /* dst = (v * xprev) + u */
38084 rs6000_emit_madd (dst, v, xprev, u);
38086 if (note_p)
38087 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
38090 /* Goldschmidt's Algorithm for single/double-precision floating point
38091 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
38093 void
38094 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
38096 machine_mode mode = GET_MODE (src);
38097 rtx e = gen_reg_rtx (mode);
38098 rtx g = gen_reg_rtx (mode);
38099 rtx h = gen_reg_rtx (mode);
38101 /* Low precision estimates guarantee 5 bits of accuracy. High
38102 precision estimates guarantee 14 bits of accuracy. SFmode
38103 requires 23 bits of accuracy. DFmode requires 52 bits of
38104 accuracy. Each pass at least doubles the accuracy, leading
38105 to the following. */
38106 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
38107 if (mode == DFmode || mode == V2DFmode)
38108 passes++;
38110 int i;
38111 rtx mhalf;
38112 enum insn_code code = optab_handler (smul_optab, mode);
38113 insn_gen_fn gen_mul = GEN_FCN (code);
38115 gcc_assert (code != CODE_FOR_nothing);
38117 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
38119 /* e = rsqrt estimate */
38120 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
38121 UNSPEC_RSQRT)));
38123 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
38124 if (!recip)
38126 rtx zero = force_reg (mode, CONST0_RTX (mode));
38128 if (mode == SFmode)
38130 rtx target = emit_conditional_move (e, GT, src, zero, mode,
38131 e, zero, mode, 0);
38132 if (target != e)
38133 emit_move_insn (e, target);
38135 else
38137 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
38138 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
38142 /* g = sqrt estimate. */
38143 emit_insn (gen_mul (g, e, src));
38144 /* h = 1/(2*sqrt) estimate. */
38145 emit_insn (gen_mul (h, e, mhalf));
38147 if (recip)
38149 if (passes == 1)
38151 rtx t = gen_reg_rtx (mode);
38152 rs6000_emit_nmsub (t, g, h, mhalf);
38153 /* Apply correction directly to 1/rsqrt estimate. */
38154 rs6000_emit_madd (dst, e, t, e);
38156 else
38158 for (i = 0; i < passes; i++)
38160 rtx t1 = gen_reg_rtx (mode);
38161 rtx g1 = gen_reg_rtx (mode);
38162 rtx h1 = gen_reg_rtx (mode);
38164 rs6000_emit_nmsub (t1, g, h, mhalf);
38165 rs6000_emit_madd (g1, g, t1, g);
38166 rs6000_emit_madd (h1, h, t1, h);
38168 g = g1;
38169 h = h1;
38171 /* Multiply by 2 for 1/rsqrt. */
38172 emit_insn (gen_add3_insn (dst, h, h));
38175 else
38177 rtx t = gen_reg_rtx (mode);
38178 rs6000_emit_nmsub (t, g, h, mhalf);
38179 rs6000_emit_madd (dst, g, t, g);
38182 return;
38185 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
38186 (Power7) targets. DST is the target, and SRC is the argument operand. */
38188 void
38189 rs6000_emit_popcount (rtx dst, rtx src)
38191 machine_mode mode = GET_MODE (dst);
38192 rtx tmp1, tmp2;
38194 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
38195 if (TARGET_POPCNTD)
38197 if (mode == SImode)
38198 emit_insn (gen_popcntdsi2 (dst, src));
38199 else
38200 emit_insn (gen_popcntddi2 (dst, src));
38201 return;
38204 tmp1 = gen_reg_rtx (mode);
38206 if (mode == SImode)
38208 emit_insn (gen_popcntbsi2 (tmp1, src));
38209 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
38210 NULL_RTX, 0);
38211 tmp2 = force_reg (SImode, tmp2);
38212 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
38214 else
38216 emit_insn (gen_popcntbdi2 (tmp1, src));
38217 tmp2 = expand_mult (DImode, tmp1,
38218 GEN_INT ((HOST_WIDE_INT)
38219 0x01010101 << 32 | 0x01010101),
38220 NULL_RTX, 0);
38221 tmp2 = force_reg (DImode, tmp2);
38222 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
38227 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
38228 target, and SRC is the argument operand. */
38230 void
38231 rs6000_emit_parity (rtx dst, rtx src)
38233 machine_mode mode = GET_MODE (dst);
38234 rtx tmp;
38236 tmp = gen_reg_rtx (mode);
38238 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
38239 if (TARGET_CMPB)
38241 if (mode == SImode)
38243 emit_insn (gen_popcntbsi2 (tmp, src));
38244 emit_insn (gen_paritysi2_cmpb (dst, tmp));
38246 else
38248 emit_insn (gen_popcntbdi2 (tmp, src));
38249 emit_insn (gen_paritydi2_cmpb (dst, tmp));
38251 return;
38254 if (mode == SImode)
38256 /* Is mult+shift >= shift+xor+shift+xor? */
38257 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
38259 rtx tmp1, tmp2, tmp3, tmp4;
38261 tmp1 = gen_reg_rtx (SImode);
38262 emit_insn (gen_popcntbsi2 (tmp1, src));
38264 tmp2 = gen_reg_rtx (SImode);
38265 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
38266 tmp3 = gen_reg_rtx (SImode);
38267 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
38269 tmp4 = gen_reg_rtx (SImode);
38270 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
38271 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
38273 else
38274 rs6000_emit_popcount (tmp, src);
38275 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
38277 else
38279 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
38280 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
38282 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
38284 tmp1 = gen_reg_rtx (DImode);
38285 emit_insn (gen_popcntbdi2 (tmp1, src));
38287 tmp2 = gen_reg_rtx (DImode);
38288 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
38289 tmp3 = gen_reg_rtx (DImode);
38290 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
38292 tmp4 = gen_reg_rtx (DImode);
38293 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
38294 tmp5 = gen_reg_rtx (DImode);
38295 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
38297 tmp6 = gen_reg_rtx (DImode);
38298 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
38299 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
38301 else
38302 rs6000_emit_popcount (tmp, src);
38303 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
38307 /* Expand an Altivec constant permutation for little endian mode.
38308 There are two issues: First, the two input operands must be
38309 swapped so that together they form a double-wide array in LE
38310 order. Second, the vperm instruction has surprising behavior
38311 in LE mode: it interprets the elements of the source vectors
38312 in BE mode ("left to right") and interprets the elements of
38313 the destination vector in LE mode ("right to left"). To
38314 correct for this, we must subtract each element of the permute
38315 control vector from 31.
38317 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
38318 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
38319 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
38320 serve as the permute control vector. Then, in BE mode,
38322 vperm 9,10,11,12
38324 places the desired result in vr9. However, in LE mode the
38325 vector contents will be
38327 vr10 = 00000003 00000002 00000001 00000000
38328 vr11 = 00000007 00000006 00000005 00000004
38330 The result of the vperm using the same permute control vector is
38332 vr9 = 05000000 07000000 01000000 03000000
38334 That is, the leftmost 4 bytes of vr10 are interpreted as the
38335 source for the rightmost 4 bytes of vr9, and so on.
38337 If we change the permute control vector to
38339 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
38341 and issue
38343 vperm 9,11,10,12
38345 we get the desired
38347 vr9 = 00000006 00000004 00000002 00000000. */
38349 void
38350 altivec_expand_vec_perm_const_le (rtx operands[4])
38352 unsigned int i;
38353 rtx perm[16];
38354 rtx constv, unspec;
38355 rtx target = operands[0];
38356 rtx op0 = operands[1];
38357 rtx op1 = operands[2];
38358 rtx sel = operands[3];
38360 /* Unpack and adjust the constant selector. */
38361 for (i = 0; i < 16; ++i)
38363 rtx e = XVECEXP (sel, 0, i);
38364 unsigned int elt = 31 - (INTVAL (e) & 31);
38365 perm[i] = GEN_INT (elt);
38368 /* Expand to a permute, swapping the inputs and using the
38369 adjusted selector. */
38370 if (!REG_P (op0))
38371 op0 = force_reg (V16QImode, op0);
38372 if (!REG_P (op1))
38373 op1 = force_reg (V16QImode, op1);
38375 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
38376 constv = force_reg (V16QImode, constv);
38377 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
38378 UNSPEC_VPERM);
38379 if (!REG_P (target))
38381 rtx tmp = gen_reg_rtx (V16QImode);
38382 emit_move_insn (tmp, unspec);
38383 unspec = tmp;
38386 emit_move_insn (target, unspec);
38389 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
38390 permute control vector. But here it's not a constant, so we must
38391 generate a vector NAND or NOR to do the adjustment. */
38393 void
38394 altivec_expand_vec_perm_le (rtx operands[4])
38396 rtx notx, iorx, unspec;
38397 rtx target = operands[0];
38398 rtx op0 = operands[1];
38399 rtx op1 = operands[2];
38400 rtx sel = operands[3];
38401 rtx tmp = target;
38402 rtx norreg = gen_reg_rtx (V16QImode);
38403 machine_mode mode = GET_MODE (target);
38405 /* Get everything in regs so the pattern matches. */
38406 if (!REG_P (op0))
38407 op0 = force_reg (mode, op0);
38408 if (!REG_P (op1))
38409 op1 = force_reg (mode, op1);
38410 if (!REG_P (sel))
38411 sel = force_reg (V16QImode, sel);
38412 if (!REG_P (target))
38413 tmp = gen_reg_rtx (mode);
38415 if (TARGET_P9_VECTOR)
38417 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op0, op1, sel),
38418 UNSPEC_VPERMR);
38420 else
38422 /* Invert the selector with a VNAND if available, else a VNOR.
38423 The VNAND is preferred for future fusion opportunities. */
38424 notx = gen_rtx_NOT (V16QImode, sel);
38425 iorx = (TARGET_P8_VECTOR
38426 ? gen_rtx_IOR (V16QImode, notx, notx)
38427 : gen_rtx_AND (V16QImode, notx, notx));
38428 emit_insn (gen_rtx_SET (norreg, iorx));
38430 /* Permute with operands reversed and adjusted selector. */
38431 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
38432 UNSPEC_VPERM);
38435 /* Copy into target, possibly by way of a register. */
38436 if (!REG_P (target))
38438 emit_move_insn (tmp, unspec);
38439 unspec = tmp;
38442 emit_move_insn (target, unspec);
38445 /* Expand an Altivec constant permutation. Return true if we match
38446 an efficient implementation; false to fall back to VPERM. */
38448 bool
38449 altivec_expand_vec_perm_const (rtx operands[4])
38451 struct altivec_perm_insn {
38452 HOST_WIDE_INT mask;
38453 enum insn_code impl;
38454 unsigned char perm[16];
38456 static const struct altivec_perm_insn patterns[] = {
38457 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
38458 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
38459 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
38460 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
38461 { OPTION_MASK_ALTIVEC,
38462 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
38463 : CODE_FOR_altivec_vmrglb_direct),
38464 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
38465 { OPTION_MASK_ALTIVEC,
38466 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
38467 : CODE_FOR_altivec_vmrglh_direct),
38468 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
38469 { OPTION_MASK_ALTIVEC,
38470 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
38471 : CODE_FOR_altivec_vmrglw_direct),
38472 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
38473 { OPTION_MASK_ALTIVEC,
38474 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
38475 : CODE_FOR_altivec_vmrghb_direct),
38476 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
38477 { OPTION_MASK_ALTIVEC,
38478 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
38479 : CODE_FOR_altivec_vmrghh_direct),
38480 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
38481 { OPTION_MASK_ALTIVEC,
38482 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
38483 : CODE_FOR_altivec_vmrghw_direct),
38484 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
38485 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
38486 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
38487 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
38488 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
38491 unsigned int i, j, elt, which;
38492 unsigned char perm[16];
38493 rtx target, op0, op1, sel, x;
38494 bool one_vec;
38496 target = operands[0];
38497 op0 = operands[1];
38498 op1 = operands[2];
38499 sel = operands[3];
38501 /* Unpack the constant selector. */
38502 for (i = which = 0; i < 16; ++i)
38504 rtx e = XVECEXP (sel, 0, i);
38505 elt = INTVAL (e) & 31;
38506 which |= (elt < 16 ? 1 : 2);
38507 perm[i] = elt;
38510 /* Simplify the constant selector based on operands. */
38511 switch (which)
38513 default:
38514 gcc_unreachable ();
38516 case 3:
38517 one_vec = false;
38518 if (!rtx_equal_p (op0, op1))
38519 break;
38520 /* FALLTHRU */
38522 case 2:
38523 for (i = 0; i < 16; ++i)
38524 perm[i] &= 15;
38525 op0 = op1;
38526 one_vec = true;
38527 break;
38529 case 1:
38530 op1 = op0;
38531 one_vec = true;
38532 break;
38535 /* Look for splat patterns. */
38536 if (one_vec)
38538 elt = perm[0];
38540 for (i = 0; i < 16; ++i)
38541 if (perm[i] != elt)
38542 break;
38543 if (i == 16)
38545 if (!BYTES_BIG_ENDIAN)
38546 elt = 15 - elt;
38547 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
38548 return true;
38551 if (elt % 2 == 0)
38553 for (i = 0; i < 16; i += 2)
38554 if (perm[i] != elt || perm[i + 1] != elt + 1)
38555 break;
38556 if (i == 16)
38558 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
38559 x = gen_reg_rtx (V8HImode);
38560 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
38561 GEN_INT (field)));
38562 emit_move_insn (target, gen_lowpart (V16QImode, x));
38563 return true;
38567 if (elt % 4 == 0)
38569 for (i = 0; i < 16; i += 4)
38570 if (perm[i] != elt
38571 || perm[i + 1] != elt + 1
38572 || perm[i + 2] != elt + 2
38573 || perm[i + 3] != elt + 3)
38574 break;
38575 if (i == 16)
38577 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
38578 x = gen_reg_rtx (V4SImode);
38579 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
38580 GEN_INT (field)));
38581 emit_move_insn (target, gen_lowpart (V16QImode, x));
38582 return true;
38587 /* Look for merge and pack patterns. */
38588 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
38590 bool swapped;
38592 if ((patterns[j].mask & rs6000_isa_flags) == 0)
38593 continue;
38595 elt = patterns[j].perm[0];
38596 if (perm[0] == elt)
38597 swapped = false;
38598 else if (perm[0] == elt + 16)
38599 swapped = true;
38600 else
38601 continue;
38602 for (i = 1; i < 16; ++i)
38604 elt = patterns[j].perm[i];
38605 if (swapped)
38606 elt = (elt >= 16 ? elt - 16 : elt + 16);
38607 else if (one_vec && elt >= 16)
38608 elt -= 16;
38609 if (perm[i] != elt)
38610 break;
38612 if (i == 16)
38614 enum insn_code icode = patterns[j].impl;
38615 machine_mode omode = insn_data[icode].operand[0].mode;
38616 machine_mode imode = insn_data[icode].operand[1].mode;
38618 /* For little-endian, don't use vpkuwum and vpkuhum if the
38619 underlying vector type is not V4SI and V8HI, respectively.
38620 For example, using vpkuwum with a V8HI picks up the even
38621 halfwords (BE numbering) when the even halfwords (LE
38622 numbering) are what we need. */
38623 if (!BYTES_BIG_ENDIAN
38624 && icode == CODE_FOR_altivec_vpkuwum_direct
38625 && ((GET_CODE (op0) == REG
38626 && GET_MODE (op0) != V4SImode)
38627 || (GET_CODE (op0) == SUBREG
38628 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
38629 continue;
38630 if (!BYTES_BIG_ENDIAN
38631 && icode == CODE_FOR_altivec_vpkuhum_direct
38632 && ((GET_CODE (op0) == REG
38633 && GET_MODE (op0) != V8HImode)
38634 || (GET_CODE (op0) == SUBREG
38635 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
38636 continue;
38638 /* For little-endian, the two input operands must be swapped
38639 (or swapped back) to ensure proper right-to-left numbering
38640 from 0 to 2N-1. */
38641 if (swapped ^ !BYTES_BIG_ENDIAN)
38642 std::swap (op0, op1);
38643 if (imode != V16QImode)
38645 op0 = gen_lowpart (imode, op0);
38646 op1 = gen_lowpart (imode, op1);
38648 if (omode == V16QImode)
38649 x = target;
38650 else
38651 x = gen_reg_rtx (omode);
38652 emit_insn (GEN_FCN (icode) (x, op0, op1));
38653 if (omode != V16QImode)
38654 emit_move_insn (target, gen_lowpart (V16QImode, x));
38655 return true;
38659 if (!BYTES_BIG_ENDIAN)
38661 altivec_expand_vec_perm_const_le (operands);
38662 return true;
38665 return false;
38668 /* Expand a Paired Single, VSX Permute Doubleword, or SPE constant permutation.
38669 Return true if we match an efficient implementation. */
38671 static bool
38672 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
38673 unsigned char perm0, unsigned char perm1)
38675 rtx x;
38677 /* If both selectors come from the same operand, fold to single op. */
38678 if ((perm0 & 2) == (perm1 & 2))
38680 if (perm0 & 2)
38681 op0 = op1;
38682 else
38683 op1 = op0;
38685 /* If both operands are equal, fold to simpler permutation. */
38686 if (rtx_equal_p (op0, op1))
38688 perm0 = perm0 & 1;
38689 perm1 = (perm1 & 1) + 2;
38691 /* If the first selector comes from the second operand, swap. */
38692 else if (perm0 & 2)
38694 if (perm1 & 2)
38695 return false;
38696 perm0 -= 2;
38697 perm1 += 2;
38698 std::swap (op0, op1);
38700 /* If the second selector does not come from the second operand, fail. */
38701 else if ((perm1 & 2) == 0)
38702 return false;
38704 /* Success! */
38705 if (target != NULL)
38707 machine_mode vmode, dmode;
38708 rtvec v;
38710 vmode = GET_MODE (target);
38711 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
38712 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
38713 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
38714 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
38715 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
38716 emit_insn (gen_rtx_SET (target, x));
38718 return true;
38721 bool
38722 rs6000_expand_vec_perm_const (rtx operands[4])
38724 rtx target, op0, op1, sel;
38725 unsigned char perm0, perm1;
38727 target = operands[0];
38728 op0 = operands[1];
38729 op1 = operands[2];
38730 sel = operands[3];
38732 /* Unpack the constant selector. */
38733 perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3;
38734 perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3;
38736 return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1);
38739 /* Test whether a constant permutation is supported. */
38741 static bool
38742 rs6000_vectorize_vec_perm_const_ok (machine_mode vmode,
38743 const unsigned char *sel)
38745 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
38746 if (TARGET_ALTIVEC)
38747 return true;
38749 /* Check for ps_merge* or evmerge* insns. */
38750 if ((TARGET_PAIRED_FLOAT && vmode == V2SFmode)
38751 || (TARGET_SPE && vmode == V2SImode))
38753 rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
38754 rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
38755 return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]);
38758 return false;
38761 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */
38763 static void
38764 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
38765 machine_mode vmode, unsigned nelt, rtx perm[])
38767 machine_mode imode;
38768 rtx x;
38770 imode = vmode;
38771 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT)
38772 imode = mode_for_int_vector (vmode).require ();
38774 x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm));
38775 x = expand_vec_perm (vmode, op0, op1, x, target);
38776 if (x != target)
38777 emit_move_insn (target, x);
38780 /* Expand an extract even operation. */
38782 void
38783 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
38785 machine_mode vmode = GET_MODE (target);
38786 unsigned i, nelt = GET_MODE_NUNITS (vmode);
38787 rtx perm[16];
38789 for (i = 0; i < nelt; i++)
38790 perm[i] = GEN_INT (i * 2);
38792 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
38795 /* Expand a vector interleave operation. */
38797 void
38798 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
38800 machine_mode vmode = GET_MODE (target);
38801 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
38802 rtx perm[16];
38804 high = (highp ? 0 : nelt / 2);
38805 for (i = 0; i < nelt / 2; i++)
38807 perm[i * 2] = GEN_INT (i + high);
38808 perm[i * 2 + 1] = GEN_INT (i + nelt + high);
38811 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
38814 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
38815 void
38816 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
38818 HOST_WIDE_INT hwi_scale (scale);
38819 REAL_VALUE_TYPE r_pow;
38820 rtvec v = rtvec_alloc (2);
38821 rtx elt;
38822 rtx scale_vec = gen_reg_rtx (V2DFmode);
38823 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
38824 elt = const_double_from_real_value (r_pow, DFmode);
38825 RTVEC_ELT (v, 0) = elt;
38826 RTVEC_ELT (v, 1) = elt;
38827 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
38828 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
38831 /* Return an RTX representing where to find the function value of a
38832 function returning MODE. */
38833 static rtx
38834 rs6000_complex_function_value (machine_mode mode)
38836 unsigned int regno;
38837 rtx r1, r2;
38838 machine_mode inner = GET_MODE_INNER (mode);
38839 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
38841 if (TARGET_FLOAT128_TYPE
38842 && (mode == KCmode
38843 || (mode == TCmode && TARGET_IEEEQUAD)))
38844 regno = ALTIVEC_ARG_RETURN;
38846 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
38847 regno = FP_ARG_RETURN;
38849 else
38851 regno = GP_ARG_RETURN;
38853 /* 32-bit is OK since it'll go in r3/r4. */
38854 if (TARGET_32BIT && inner_bytes >= 4)
38855 return gen_rtx_REG (mode, regno);
38858 if (inner_bytes >= 8)
38859 return gen_rtx_REG (mode, regno);
38861 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
38862 const0_rtx);
38863 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
38864 GEN_INT (inner_bytes));
38865 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
38868 /* Return an rtx describing a return value of MODE as a PARALLEL
38869 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
38870 stride REG_STRIDE. */
38872 static rtx
38873 rs6000_parallel_return (machine_mode mode,
38874 int n_elts, machine_mode elt_mode,
38875 unsigned int regno, unsigned int reg_stride)
38877 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
38879 int i;
38880 for (i = 0; i < n_elts; i++)
38882 rtx r = gen_rtx_REG (elt_mode, regno);
38883 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
38884 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
38885 regno += reg_stride;
38888 return par;
38891 /* Target hook for TARGET_FUNCTION_VALUE.
38893 On the SPE, both FPs and vectors are returned in r3.
38895 On RS/6000 an integer value is in r3 and a floating-point value is in
38896 fp1, unless -msoft-float. */
38898 static rtx
38899 rs6000_function_value (const_tree valtype,
38900 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
38901 bool outgoing ATTRIBUTE_UNUSED)
38903 machine_mode mode;
38904 unsigned int regno;
38905 machine_mode elt_mode;
38906 int n_elts;
38908 /* Special handling for structs in darwin64. */
38909 if (TARGET_MACHO
38910 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
38912 CUMULATIVE_ARGS valcum;
38913 rtx valret;
38915 valcum.words = 0;
38916 valcum.fregno = FP_ARG_MIN_REG;
38917 valcum.vregno = ALTIVEC_ARG_MIN_REG;
38918 /* Do a trial code generation as if this were going to be passed as
38919 an argument; if any part goes in memory, we return NULL. */
38920 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
38921 if (valret)
38922 return valret;
38923 /* Otherwise fall through to standard ABI rules. */
38926 mode = TYPE_MODE (valtype);
38928 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
38929 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
38931 int first_reg, n_regs;
38933 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
38935 /* _Decimal128 must use even/odd register pairs. */
38936 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
38937 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
38939 else
38941 first_reg = ALTIVEC_ARG_RETURN;
38942 n_regs = 1;
38945 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
38948 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
38949 if (TARGET_32BIT && TARGET_POWERPC64)
38950 switch (mode)
38952 default:
38953 break;
38954 case E_DImode:
38955 case E_SCmode:
38956 case E_DCmode:
38957 case E_TCmode:
38958 int count = GET_MODE_SIZE (mode) / 4;
38959 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
38962 if ((INTEGRAL_TYPE_P (valtype)
38963 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
38964 || POINTER_TYPE_P (valtype))
38965 mode = TARGET_32BIT ? SImode : DImode;
38967 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
38968 /* _Decimal128 must use an even/odd register pair. */
38969 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
38970 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT && TARGET_FPRS
38971 && !FLOAT128_VECTOR_P (mode)
38972 && ((TARGET_SINGLE_FLOAT && (mode == SFmode)) || TARGET_DOUBLE_FLOAT))
38973 regno = FP_ARG_RETURN;
38974 else if (TREE_CODE (valtype) == COMPLEX_TYPE
38975 && targetm.calls.split_complex_arg)
38976 return rs6000_complex_function_value (mode);
38977 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
38978 return register is used in both cases, and we won't see V2DImode/V2DFmode
38979 for pure altivec, combine the two cases. */
38980 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
38981 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
38982 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
38983 regno = ALTIVEC_ARG_RETURN;
38984 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
38985 && (mode == DFmode || mode == DCmode
38986 || FLOAT128_IBM_P (mode) || mode == TCmode))
38987 return spe_build_register_parallel (mode, GP_ARG_RETURN);
38988 else
38989 regno = GP_ARG_RETURN;
38991 return gen_rtx_REG (mode, regno);
38994 /* Define how to find the value returned by a library function
38995 assuming the value has mode MODE. */
38997 rs6000_libcall_value (machine_mode mode)
38999 unsigned int regno;
39001 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
39002 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
39003 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
39005 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
39006 /* _Decimal128 must use an even/odd register pair. */
39007 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
39008 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode)
39009 && TARGET_HARD_FLOAT && TARGET_FPRS
39010 && ((TARGET_SINGLE_FLOAT && mode == SFmode) || TARGET_DOUBLE_FLOAT))
39011 regno = FP_ARG_RETURN;
39012 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
39013 return register is used in both cases, and we won't see V2DImode/V2DFmode
39014 for pure altivec, combine the two cases. */
39015 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
39016 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
39017 regno = ALTIVEC_ARG_RETURN;
39018 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
39019 return rs6000_complex_function_value (mode);
39020 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
39021 && (mode == DFmode || mode == DCmode
39022 || FLOAT128_IBM_P (mode) || mode == TCmode))
39023 return spe_build_register_parallel (mode, GP_ARG_RETURN);
39024 else
39025 regno = GP_ARG_RETURN;
39027 return gen_rtx_REG (mode, regno);
39031 /* Return true if we use LRA instead of reload pass. */
39032 static bool
39033 rs6000_lra_p (void)
39035 return TARGET_LRA;
39038 /* Compute register pressure classes. We implement the target hook to avoid
39039 IRA picking something like NON_SPECIAL_REGS as a pressure class, which can
39040 lead to incorrect estimates of number of available registers and therefor
39041 increased register pressure/spill. */
39042 static int
39043 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
39045 int n;
39047 n = 0;
39048 pressure_classes[n++] = GENERAL_REGS;
39049 if (TARGET_VSX)
39050 pressure_classes[n++] = VSX_REGS;
39051 else
39053 if (TARGET_ALTIVEC)
39054 pressure_classes[n++] = ALTIVEC_REGS;
39055 if (TARGET_HARD_FLOAT && TARGET_FPRS)
39056 pressure_classes[n++] = FLOAT_REGS;
39058 pressure_classes[n++] = CR_REGS;
39059 pressure_classes[n++] = SPECIAL_REGS;
39061 return n;
39064 /* Given FROM and TO register numbers, say whether this elimination is allowed.
39065 Frame pointer elimination is automatically handled.
39067 For the RS/6000, if frame pointer elimination is being done, we would like
39068 to convert ap into fp, not sp.
39070 We need r30 if -mminimal-toc was specified, and there are constant pool
39071 references. */
39073 static bool
39074 rs6000_can_eliminate (const int from, const int to)
39076 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
39077 ? ! frame_pointer_needed
39078 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
39079 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC
39080 || constant_pool_empty_p ()
39081 : true);
39084 /* Define the offset between two registers, FROM to be eliminated and its
39085 replacement TO, at the start of a routine. */
39086 HOST_WIDE_INT
39087 rs6000_initial_elimination_offset (int from, int to)
39089 rs6000_stack_t *info = rs6000_stack_info ();
39090 HOST_WIDE_INT offset;
39092 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
39093 offset = info->push_p ? 0 : -info->total_size;
39094 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
39096 offset = info->push_p ? 0 : -info->total_size;
39097 if (FRAME_GROWS_DOWNWARD)
39098 offset += info->fixed_size + info->vars_size + info->parm_size;
39100 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
39101 offset = FRAME_GROWS_DOWNWARD
39102 ? info->fixed_size + info->vars_size + info->parm_size
39103 : 0;
39104 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
39105 offset = info->total_size;
39106 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
39107 offset = info->push_p ? info->total_size : 0;
39108 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
39109 offset = 0;
39110 else
39111 gcc_unreachable ();
39113 return offset;
39116 static rtx
39117 rs6000_dwarf_register_span (rtx reg)
39119 rtx parts[8];
39120 int i, words;
39121 unsigned regno = REGNO (reg);
39122 machine_mode mode = GET_MODE (reg);
39124 if (TARGET_SPE
39125 && regno < 32
39126 && (SPE_VECTOR_MODE (GET_MODE (reg))
39127 || (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode)
39128 && mode != SFmode && mode != SDmode && mode != SCmode)))
39130 else
39131 return NULL_RTX;
39133 regno = REGNO (reg);
39135 /* The duality of the SPE register size wreaks all kinds of havoc.
39136 This is a way of distinguishing r0 in 32-bits from r0 in
39137 64-bits. */
39138 words = (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD;
39139 gcc_assert (words <= 4);
39140 for (i = 0; i < words; i++, regno++)
39142 if (BYTES_BIG_ENDIAN)
39144 parts[2 * i] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
39145 parts[2 * i + 1] = gen_rtx_REG (SImode, regno);
39147 else
39149 parts[2 * i] = gen_rtx_REG (SImode, regno);
39150 parts[2 * i + 1] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
39154 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (words * 2, parts));
39157 /* Fill in sizes for SPE register high parts in table used by unwinder. */
39159 static void
39160 rs6000_init_dwarf_reg_sizes_extra (tree address)
39162 if (TARGET_SPE)
39164 int i;
39165 machine_mode mode = TYPE_MODE (char_type_node);
39166 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
39167 rtx mem = gen_rtx_MEM (BLKmode, addr);
39168 rtx value = gen_int_mode (4, mode);
39170 for (i = FIRST_SPE_HIGH_REGNO; i < LAST_SPE_HIGH_REGNO+1; i++)
39172 int column = DWARF_REG_TO_UNWIND_COLUMN
39173 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
39174 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
39176 emit_move_insn (adjust_address (mem, mode, offset), value);
39180 if (TARGET_MACHO && ! TARGET_ALTIVEC)
39182 int i;
39183 machine_mode mode = TYPE_MODE (char_type_node);
39184 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
39185 rtx mem = gen_rtx_MEM (BLKmode, addr);
39186 rtx value = gen_int_mode (16, mode);
39188 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
39189 The unwinder still needs to know the size of Altivec registers. */
39191 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
39193 int column = DWARF_REG_TO_UNWIND_COLUMN
39194 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
39195 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
39197 emit_move_insn (adjust_address (mem, mode, offset), value);
39202 /* Map internal gcc register numbers to debug format register numbers.
39203 FORMAT specifies the type of debug register number to use:
39204 0 -- debug information, except for frame-related sections
39205 1 -- DWARF .debug_frame section
39206 2 -- DWARF .eh_frame section */
39208 unsigned int
39209 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
39211 /* We never use the GCC internal number for SPE high registers.
39212 Those are mapped to the 1200..1231 range for all debug formats. */
39213 if (SPE_HIGH_REGNO_P (regno))
39214 return regno - FIRST_SPE_HIGH_REGNO + 1200;
39216 /* Except for the above, we use the internal number for non-DWARF
39217 debug information, and also for .eh_frame. */
39218 if ((format == 0 && write_symbols != DWARF2_DEBUG) || format == 2)
39219 return regno;
39221 /* On some platforms, we use the standard DWARF register
39222 numbering for .debug_info and .debug_frame. */
39223 #ifdef RS6000_USE_DWARF_NUMBERING
39224 if (regno <= 63)
39225 return regno;
39226 if (regno == LR_REGNO)
39227 return 108;
39228 if (regno == CTR_REGNO)
39229 return 109;
39230 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
39231 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
39232 The actual code emitted saves the whole of CR, so we map CR2_REGNO
39233 to the DWARF reg for CR. */
39234 if (format == 1 && regno == CR2_REGNO)
39235 return 64;
39236 if (CR_REGNO_P (regno))
39237 return regno - CR0_REGNO + 86;
39238 if (regno == CA_REGNO)
39239 return 101; /* XER */
39240 if (ALTIVEC_REGNO_P (regno))
39241 return regno - FIRST_ALTIVEC_REGNO + 1124;
39242 if (regno == VRSAVE_REGNO)
39243 return 356;
39244 if (regno == VSCR_REGNO)
39245 return 67;
39246 if (regno == SPE_ACC_REGNO)
39247 return 99;
39248 if (regno == SPEFSCR_REGNO)
39249 return 612;
39250 #endif
39251 return regno;
39254 /* target hook eh_return_filter_mode */
39255 static scalar_int_mode
39256 rs6000_eh_return_filter_mode (void)
39258 return TARGET_32BIT ? SImode : word_mode;
39261 /* Target hook for scalar_mode_supported_p. */
39262 static bool
39263 rs6000_scalar_mode_supported_p (scalar_mode mode)
39265 /* -m32 does not support TImode. This is the default, from
39266 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
39267 same ABI as for -m32. But default_scalar_mode_supported_p allows
39268 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
39269 for -mpowerpc64. */
39270 if (TARGET_32BIT && mode == TImode)
39271 return false;
39273 if (DECIMAL_FLOAT_MODE_P (mode))
39274 return default_decimal_float_supported_p ();
39275 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
39276 return true;
39277 else
39278 return default_scalar_mode_supported_p (mode);
39281 /* Target hook for vector_mode_supported_p. */
39282 static bool
39283 rs6000_vector_mode_supported_p (machine_mode mode)
39286 if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (mode))
39287 return true;
39289 if (TARGET_SPE && SPE_VECTOR_MODE (mode))
39290 return true;
39292 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
39293 128-bit, the compiler might try to widen IEEE 128-bit to IBM
39294 double-double. */
39295 else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
39296 return true;
39298 else
39299 return false;
39302 /* Target hook for floatn_mode. */
39303 static opt_scalar_float_mode
39304 rs6000_floatn_mode (int n, bool extended)
39306 if (extended)
39308 switch (n)
39310 case 32:
39311 return DFmode;
39313 case 64:
39314 if (TARGET_FLOAT128_KEYWORD)
39315 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
39316 else
39317 return opt_scalar_float_mode ();
39319 case 128:
39320 return opt_scalar_float_mode ();
39322 default:
39323 /* Those are the only valid _FloatNx types. */
39324 gcc_unreachable ();
39327 else
39329 switch (n)
39331 case 32:
39332 return SFmode;
39334 case 64:
39335 return DFmode;
39337 case 128:
39338 if (TARGET_FLOAT128_KEYWORD)
39339 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
39340 else
39341 return opt_scalar_float_mode ();
39343 default:
39344 return opt_scalar_float_mode ();
39350 /* Target hook for c_mode_for_suffix. */
39351 static machine_mode
39352 rs6000_c_mode_for_suffix (char suffix)
39354 if (TARGET_FLOAT128_TYPE)
39356 if (suffix == 'q' || suffix == 'Q')
39357 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
39359 /* At the moment, we are not defining a suffix for IBM extended double.
39360 If/when the default for -mabi=ieeelongdouble is changed, and we want
39361 to support __ibm128 constants in legacy library code, we may need to
39362 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
39363 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
39364 __float80 constants. */
39367 return VOIDmode;
39370 /* Target hook for invalid_arg_for_unprototyped_fn. */
39371 static const char *
39372 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
39374 return (!rs6000_darwin64_abi
39375 && typelist == 0
39376 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
39377 && (funcdecl == NULL_TREE
39378 || (TREE_CODE (funcdecl) == FUNCTION_DECL
39379 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
39380 ? N_("AltiVec argument passed to unprototyped function")
39381 : NULL;
39384 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
39385 setup by using __stack_chk_fail_local hidden function instead of
39386 calling __stack_chk_fail directly. Otherwise it is better to call
39387 __stack_chk_fail directly. */
39389 static tree ATTRIBUTE_UNUSED
39390 rs6000_stack_protect_fail (void)
39392 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
39393 ? default_hidden_stack_protect_fail ()
39394 : default_external_stack_protect_fail ();
39397 void
39398 rs6000_final_prescan_insn (rtx_insn *insn, rtx *operand ATTRIBUTE_UNUSED,
39399 int num_operands ATTRIBUTE_UNUSED)
39401 if (rs6000_warn_cell_microcode)
39403 const char *temp;
39404 int insn_code_number = recog_memoized (insn);
39405 location_t location = INSN_LOCATION (insn);
39407 /* Punt on insns we cannot recognize. */
39408 if (insn_code_number < 0)
39409 return;
39411 /* get_insn_template can modify recog_data, so save and restore it. */
39412 struct recog_data_d recog_data_save = recog_data;
39413 for (int i = 0; i < recog_data.n_operands; i++)
39414 recog_data.operand[i] = copy_rtx (recog_data.operand[i]);
39415 temp = get_insn_template (insn_code_number, insn);
39416 recog_data = recog_data_save;
39418 if (get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS)
39419 warning_at (location, OPT_mwarn_cell_microcode,
39420 "emitting microcode insn %s\t[%s] #%d",
39421 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
39422 else if (get_attr_cell_micro (insn) == CELL_MICRO_CONDITIONAL)
39423 warning_at (location, OPT_mwarn_cell_microcode,
39424 "emitting conditional microcode insn %s\t[%s] #%d",
39425 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
39429 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
39431 #if TARGET_ELF
39432 static unsigned HOST_WIDE_INT
39433 rs6000_asan_shadow_offset (void)
39435 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
39437 #endif
39439 /* Mask options that we want to support inside of attribute((target)) and
39440 #pragma GCC target operations. Note, we do not include things like
39441 64/32-bit, endianness, hard/soft floating point, etc. that would have
39442 different calling sequences. */
39444 struct rs6000_opt_mask {
39445 const char *name; /* option name */
39446 HOST_WIDE_INT mask; /* mask to set */
39447 bool invert; /* invert sense of mask */
39448 bool valid_target; /* option is a target option */
39451 static struct rs6000_opt_mask const rs6000_opt_masks[] =
39453 { "altivec", OPTION_MASK_ALTIVEC, false, true },
39454 { "cmpb", OPTION_MASK_CMPB, false, true },
39455 { "crypto", OPTION_MASK_CRYPTO, false, true },
39456 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
39457 { "dlmzb", OPTION_MASK_DLMZB, false, true },
39458 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
39459 false, true },
39460 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, false },
39461 { "float128-type", OPTION_MASK_FLOAT128_TYPE, false, false },
39462 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, false },
39463 { "fprnd", OPTION_MASK_FPRND, false, true },
39464 { "hard-dfp", OPTION_MASK_DFP, false, true },
39465 { "htm", OPTION_MASK_HTM, false, true },
39466 { "isel", OPTION_MASK_ISEL, false, true },
39467 { "mfcrf", OPTION_MASK_MFCRF, false, true },
39468 { "mfpgpr", OPTION_MASK_MFPGPR, false, true },
39469 { "modulo", OPTION_MASK_MODULO, false, true },
39470 { "mulhw", OPTION_MASK_MULHW, false, true },
39471 { "multiple", OPTION_MASK_MULTIPLE, false, true },
39472 { "popcntb", OPTION_MASK_POPCNTB, false, true },
39473 { "popcntd", OPTION_MASK_POPCNTD, false, true },
39474 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
39475 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
39476 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
39477 { "power9-dform-scalar", OPTION_MASK_P9_DFORM_SCALAR, false, true },
39478 { "power9-dform-vector", OPTION_MASK_P9_DFORM_VECTOR, false, true },
39479 { "power9-fusion", OPTION_MASK_P9_FUSION, false, true },
39480 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
39481 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
39482 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
39483 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
39484 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
39485 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
39486 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
39487 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
39488 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
39489 { "string", OPTION_MASK_STRING, false, true },
39490 { "toc-fusion", OPTION_MASK_TOC_FUSION, false, true },
39491 { "update", OPTION_MASK_NO_UPDATE, true , true },
39492 { "upper-regs-di", OPTION_MASK_UPPER_REGS_DI, false, true },
39493 { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, true },
39494 { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, true },
39495 { "vsx", OPTION_MASK_VSX, false, true },
39496 { "vsx-small-integer", OPTION_MASK_VSX_SMALL_INTEGER, false, true },
39497 { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true },
39498 #ifdef OPTION_MASK_64BIT
39499 #if TARGET_AIX_OS
39500 { "aix64", OPTION_MASK_64BIT, false, false },
39501 { "aix32", OPTION_MASK_64BIT, true, false },
39502 #else
39503 { "64", OPTION_MASK_64BIT, false, false },
39504 { "32", OPTION_MASK_64BIT, true, false },
39505 #endif
39506 #endif
39507 #ifdef OPTION_MASK_EABI
39508 { "eabi", OPTION_MASK_EABI, false, false },
39509 #endif
39510 #ifdef OPTION_MASK_LITTLE_ENDIAN
39511 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
39512 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
39513 #endif
39514 #ifdef OPTION_MASK_RELOCATABLE
39515 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
39516 #endif
39517 #ifdef OPTION_MASK_STRICT_ALIGN
39518 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
39519 #endif
39520 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
39521 { "string", OPTION_MASK_STRING, false, false },
39524 /* Builtin mask mapping for printing the flags. */
39525 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
39527 { "altivec", RS6000_BTM_ALTIVEC, false, false },
39528 { "vsx", RS6000_BTM_VSX, false, false },
39529 { "spe", RS6000_BTM_SPE, false, false },
39530 { "paired", RS6000_BTM_PAIRED, false, false },
39531 { "fre", RS6000_BTM_FRE, false, false },
39532 { "fres", RS6000_BTM_FRES, false, false },
39533 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
39534 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
39535 { "popcntd", RS6000_BTM_POPCNTD, false, false },
39536 { "cell", RS6000_BTM_CELL, false, false },
39537 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
39538 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
39539 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
39540 { "crypto", RS6000_BTM_CRYPTO, false, false },
39541 { "htm", RS6000_BTM_HTM, false, false },
39542 { "hard-dfp", RS6000_BTM_DFP, false, false },
39543 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
39544 { "long-double-128", RS6000_BTM_LDBL128, false, false },
39545 { "float128", RS6000_BTM_FLOAT128, false, false },
39548 /* Option variables that we want to support inside attribute((target)) and
39549 #pragma GCC target operations. */
39551 struct rs6000_opt_var {
39552 const char *name; /* option name */
39553 size_t global_offset; /* offset of the option in global_options. */
39554 size_t target_offset; /* offset of the option in target options. */
39557 static struct rs6000_opt_var const rs6000_opt_vars[] =
39559 { "friz",
39560 offsetof (struct gcc_options, x_TARGET_FRIZ),
39561 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
39562 { "avoid-indexed-addresses",
39563 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
39564 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
39565 { "paired",
39566 offsetof (struct gcc_options, x_rs6000_paired_float),
39567 offsetof (struct cl_target_option, x_rs6000_paired_float), },
39568 { "longcall",
39569 offsetof (struct gcc_options, x_rs6000_default_long_calls),
39570 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
39571 { "optimize-swaps",
39572 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
39573 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
39574 { "allow-movmisalign",
39575 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
39576 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
39577 { "allow-df-permute",
39578 offsetof (struct gcc_options, x_TARGET_ALLOW_DF_PERMUTE),
39579 offsetof (struct cl_target_option, x_TARGET_ALLOW_DF_PERMUTE), },
39580 { "sched-groups",
39581 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
39582 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
39583 { "always-hint",
39584 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
39585 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
39586 { "align-branch-targets",
39587 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
39588 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
39589 { "vectorize-builtins",
39590 offsetof (struct gcc_options, x_TARGET_VECTORIZE_BUILTINS),
39591 offsetof (struct cl_target_option, x_TARGET_VECTORIZE_BUILTINS), },
39592 { "tls-markers",
39593 offsetof (struct gcc_options, x_tls_markers),
39594 offsetof (struct cl_target_option, x_tls_markers), },
39595 { "sched-prolog",
39596 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
39597 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
39598 { "sched-epilog",
39599 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
39600 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
39601 { "gen-cell-microcode",
39602 offsetof (struct gcc_options, x_rs6000_gen_cell_microcode),
39603 offsetof (struct cl_target_option, x_rs6000_gen_cell_microcode), },
39604 { "warn-cell-microcode",
39605 offsetof (struct gcc_options, x_rs6000_warn_cell_microcode),
39606 offsetof (struct cl_target_option, x_rs6000_warn_cell_microcode), },
39609 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
39610 parsing. Return true if there were no errors. */
39612 static bool
39613 rs6000_inner_target_options (tree args, bool attr_p)
39615 bool ret = true;
39617 if (args == NULL_TREE)
39620 else if (TREE_CODE (args) == STRING_CST)
39622 char *p = ASTRDUP (TREE_STRING_POINTER (args));
39623 char *q;
39625 while ((q = strtok (p, ",")) != NULL)
39627 bool error_p = false;
39628 bool not_valid_p = false;
39629 const char *cpu_opt = NULL;
39631 p = NULL;
39632 if (strncmp (q, "cpu=", 4) == 0)
39634 int cpu_index = rs6000_cpu_name_lookup (q+4);
39635 if (cpu_index >= 0)
39636 rs6000_cpu_index = cpu_index;
39637 else
39639 error_p = true;
39640 cpu_opt = q+4;
39643 else if (strncmp (q, "tune=", 5) == 0)
39645 int tune_index = rs6000_cpu_name_lookup (q+5);
39646 if (tune_index >= 0)
39647 rs6000_tune_index = tune_index;
39648 else
39650 error_p = true;
39651 cpu_opt = q+5;
39654 else
39656 size_t i;
39657 bool invert = false;
39658 char *r = q;
39660 error_p = true;
39661 if (strncmp (r, "no-", 3) == 0)
39663 invert = true;
39664 r += 3;
39667 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
39668 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
39670 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
39672 if (!rs6000_opt_masks[i].valid_target)
39673 not_valid_p = true;
39674 else
39676 error_p = false;
39677 rs6000_isa_flags_explicit |= mask;
39679 /* VSX needs altivec, so -mvsx automagically sets
39680 altivec and disables -mavoid-indexed-addresses. */
39681 if (!invert)
39683 if (mask == OPTION_MASK_VSX)
39685 mask |= OPTION_MASK_ALTIVEC;
39686 TARGET_AVOID_XFORM = 0;
39690 if (rs6000_opt_masks[i].invert)
39691 invert = !invert;
39693 if (invert)
39694 rs6000_isa_flags &= ~mask;
39695 else
39696 rs6000_isa_flags |= mask;
39698 break;
39701 if (error_p && !not_valid_p)
39703 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
39704 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
39706 size_t j = rs6000_opt_vars[i].global_offset;
39707 *((int *) ((char *)&global_options + j)) = !invert;
39708 error_p = false;
39709 not_valid_p = false;
39710 break;
39715 if (error_p)
39717 const char *eprefix, *esuffix;
39719 ret = false;
39720 if (attr_p)
39722 eprefix = "__attribute__((__target__(";
39723 esuffix = ")))";
39725 else
39727 eprefix = "#pragma GCC target ";
39728 esuffix = "";
39731 if (cpu_opt)
39732 error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt, eprefix,
39733 q, esuffix);
39734 else if (not_valid_p)
39735 error ("%s\"%s\"%s is not allowed", eprefix, q, esuffix);
39736 else
39737 error ("%s\"%s\"%s is invalid", eprefix, q, esuffix);
39742 else if (TREE_CODE (args) == TREE_LIST)
39746 tree value = TREE_VALUE (args);
39747 if (value)
39749 bool ret2 = rs6000_inner_target_options (value, attr_p);
39750 if (!ret2)
39751 ret = false;
39753 args = TREE_CHAIN (args);
39755 while (args != NULL_TREE);
39758 else
39760 error ("attribute %<target%> argument not a string");
39761 return false;
39764 return ret;
39767 /* Print out the target options as a list for -mdebug=target. */
39769 static void
39770 rs6000_debug_target_options (tree args, const char *prefix)
39772 if (args == NULL_TREE)
39773 fprintf (stderr, "%s<NULL>", prefix);
39775 else if (TREE_CODE (args) == STRING_CST)
39777 char *p = ASTRDUP (TREE_STRING_POINTER (args));
39778 char *q;
39780 while ((q = strtok (p, ",")) != NULL)
39782 p = NULL;
39783 fprintf (stderr, "%s\"%s\"", prefix, q);
39784 prefix = ", ";
39788 else if (TREE_CODE (args) == TREE_LIST)
39792 tree value = TREE_VALUE (args);
39793 if (value)
39795 rs6000_debug_target_options (value, prefix);
39796 prefix = ", ";
39798 args = TREE_CHAIN (args);
39800 while (args != NULL_TREE);
39803 else
39804 gcc_unreachable ();
39806 return;
39810 /* Hook to validate attribute((target("..."))). */
39812 static bool
39813 rs6000_valid_attribute_p (tree fndecl,
39814 tree ARG_UNUSED (name),
39815 tree args,
39816 int flags)
39818 struct cl_target_option cur_target;
39819 bool ret;
39820 tree old_optimize = build_optimization_node (&global_options);
39821 tree new_target, new_optimize;
39822 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
39824 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
39826 if (TARGET_DEBUG_TARGET)
39828 tree tname = DECL_NAME (fndecl);
39829 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
39830 if (tname)
39831 fprintf (stderr, "function: %.*s\n",
39832 (int) IDENTIFIER_LENGTH (tname),
39833 IDENTIFIER_POINTER (tname));
39834 else
39835 fprintf (stderr, "function: unknown\n");
39837 fprintf (stderr, "args:");
39838 rs6000_debug_target_options (args, " ");
39839 fprintf (stderr, "\n");
39841 if (flags)
39842 fprintf (stderr, "flags: 0x%x\n", flags);
39844 fprintf (stderr, "--------------------\n");
39847 old_optimize = build_optimization_node (&global_options);
39848 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
39850 /* If the function changed the optimization levels as well as setting target
39851 options, start with the optimizations specified. */
39852 if (func_optimize && func_optimize != old_optimize)
39853 cl_optimization_restore (&global_options,
39854 TREE_OPTIMIZATION (func_optimize));
39856 /* The target attributes may also change some optimization flags, so update
39857 the optimization options if necessary. */
39858 cl_target_option_save (&cur_target, &global_options);
39859 rs6000_cpu_index = rs6000_tune_index = -1;
39860 ret = rs6000_inner_target_options (args, true);
39862 /* Set up any additional state. */
39863 if (ret)
39865 ret = rs6000_option_override_internal (false);
39866 new_target = build_target_option_node (&global_options);
39868 else
39869 new_target = NULL;
39871 new_optimize = build_optimization_node (&global_options);
39873 if (!new_target)
39874 ret = false;
39876 else if (fndecl)
39878 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
39880 if (old_optimize != new_optimize)
39881 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
39884 cl_target_option_restore (&global_options, &cur_target);
39886 if (old_optimize != new_optimize)
39887 cl_optimization_restore (&global_options,
39888 TREE_OPTIMIZATION (old_optimize));
39890 return ret;
39894 /* Hook to validate the current #pragma GCC target and set the state, and
39895 update the macros based on what was changed. If ARGS is NULL, then
39896 POP_TARGET is used to reset the options. */
39898 bool
39899 rs6000_pragma_target_parse (tree args, tree pop_target)
39901 tree prev_tree = build_target_option_node (&global_options);
39902 tree cur_tree;
39903 struct cl_target_option *prev_opt, *cur_opt;
39904 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
39905 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
39907 if (TARGET_DEBUG_TARGET)
39909 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
39910 fprintf (stderr, "args:");
39911 rs6000_debug_target_options (args, " ");
39912 fprintf (stderr, "\n");
39914 if (pop_target)
39916 fprintf (stderr, "pop_target:\n");
39917 debug_tree (pop_target);
39919 else
39920 fprintf (stderr, "pop_target: <NULL>\n");
39922 fprintf (stderr, "--------------------\n");
39925 if (! args)
39927 cur_tree = ((pop_target)
39928 ? pop_target
39929 : target_option_default_node);
39930 cl_target_option_restore (&global_options,
39931 TREE_TARGET_OPTION (cur_tree));
39933 else
39935 rs6000_cpu_index = rs6000_tune_index = -1;
39936 if (!rs6000_inner_target_options (args, false)
39937 || !rs6000_option_override_internal (false)
39938 || (cur_tree = build_target_option_node (&global_options))
39939 == NULL_TREE)
39941 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
39942 fprintf (stderr, "invalid pragma\n");
39944 return false;
39948 target_option_current_node = cur_tree;
39950 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
39951 change the macros that are defined. */
39952 if (rs6000_target_modify_macros_ptr)
39954 prev_opt = TREE_TARGET_OPTION (prev_tree);
39955 prev_bumask = prev_opt->x_rs6000_builtin_mask;
39956 prev_flags = prev_opt->x_rs6000_isa_flags;
39958 cur_opt = TREE_TARGET_OPTION (cur_tree);
39959 cur_flags = cur_opt->x_rs6000_isa_flags;
39960 cur_bumask = cur_opt->x_rs6000_builtin_mask;
39962 diff_bumask = (prev_bumask ^ cur_bumask);
39963 diff_flags = (prev_flags ^ cur_flags);
39965 if ((diff_flags != 0) || (diff_bumask != 0))
39967 /* Delete old macros. */
39968 rs6000_target_modify_macros_ptr (false,
39969 prev_flags & diff_flags,
39970 prev_bumask & diff_bumask);
39972 /* Define new macros. */
39973 rs6000_target_modify_macros_ptr (true,
39974 cur_flags & diff_flags,
39975 cur_bumask & diff_bumask);
39979 return true;
39983 /* Remember the last target of rs6000_set_current_function. */
39984 static GTY(()) tree rs6000_previous_fndecl;
39986 /* Establish appropriate back-end context for processing the function
39987 FNDECL. The argument might be NULL to indicate processing at top
39988 level, outside of any function scope. */
39989 static void
39990 rs6000_set_current_function (tree fndecl)
39992 tree old_tree = (rs6000_previous_fndecl
39993 ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl)
39994 : NULL_TREE);
39996 tree new_tree = (fndecl
39997 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
39998 : NULL_TREE);
40000 if (TARGET_DEBUG_TARGET)
40002 bool print_final = false;
40003 fprintf (stderr, "\n==================== rs6000_set_current_function");
40005 if (fndecl)
40006 fprintf (stderr, ", fndecl %s (%p)",
40007 (DECL_NAME (fndecl)
40008 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
40009 : "<unknown>"), (void *)fndecl);
40011 if (rs6000_previous_fndecl)
40012 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
40014 fprintf (stderr, "\n");
40015 if (new_tree)
40017 fprintf (stderr, "\nnew fndecl target specific options:\n");
40018 debug_tree (new_tree);
40019 print_final = true;
40022 if (old_tree)
40024 fprintf (stderr, "\nold fndecl target specific options:\n");
40025 debug_tree (old_tree);
40026 print_final = true;
40029 if (print_final)
40030 fprintf (stderr, "--------------------\n");
40033 /* Only change the context if the function changes. This hook is called
40034 several times in the course of compiling a function, and we don't want to
40035 slow things down too much or call target_reinit when it isn't safe. */
40036 if (fndecl && fndecl != rs6000_previous_fndecl)
40038 rs6000_previous_fndecl = fndecl;
40039 if (old_tree == new_tree)
40042 else if (new_tree && new_tree != target_option_default_node)
40044 cl_target_option_restore (&global_options,
40045 TREE_TARGET_OPTION (new_tree));
40046 if (TREE_TARGET_GLOBALS (new_tree))
40047 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
40048 else
40049 TREE_TARGET_GLOBALS (new_tree)
40050 = save_target_globals_default_opts ();
40053 else if (old_tree && old_tree != target_option_default_node)
40055 new_tree = target_option_current_node;
40056 cl_target_option_restore (&global_options,
40057 TREE_TARGET_OPTION (new_tree));
40058 if (TREE_TARGET_GLOBALS (new_tree))
40059 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
40060 else if (new_tree == target_option_default_node)
40061 restore_target_globals (&default_target_globals);
40062 else
40063 TREE_TARGET_GLOBALS (new_tree)
40064 = save_target_globals_default_opts ();
40070 /* Save the current options */
40072 static void
40073 rs6000_function_specific_save (struct cl_target_option *ptr,
40074 struct gcc_options *opts)
40076 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
40077 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
40080 /* Restore the current options */
40082 static void
40083 rs6000_function_specific_restore (struct gcc_options *opts,
40084 struct cl_target_option *ptr)
40087 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
40088 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
40089 (void) rs6000_option_override_internal (false);
40092 /* Print the current options */
40094 static void
40095 rs6000_function_specific_print (FILE *file, int indent,
40096 struct cl_target_option *ptr)
40098 rs6000_print_isa_options (file, indent, "Isa options set",
40099 ptr->x_rs6000_isa_flags);
40101 rs6000_print_isa_options (file, indent, "Isa options explicit",
40102 ptr->x_rs6000_isa_flags_explicit);
40105 /* Helper function to print the current isa or misc options on a line. */
40107 static void
40108 rs6000_print_options_internal (FILE *file,
40109 int indent,
40110 const char *string,
40111 HOST_WIDE_INT flags,
40112 const char *prefix,
40113 const struct rs6000_opt_mask *opts,
40114 size_t num_elements)
40116 size_t i;
40117 size_t start_column = 0;
40118 size_t cur_column;
40119 size_t max_column = 120;
40120 size_t prefix_len = strlen (prefix);
40121 size_t comma_len = 0;
40122 const char *comma = "";
40124 if (indent)
40125 start_column += fprintf (file, "%*s", indent, "");
40127 if (!flags)
40129 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
40130 return;
40133 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
40135 /* Print the various mask options. */
40136 cur_column = start_column;
40137 for (i = 0; i < num_elements; i++)
40139 bool invert = opts[i].invert;
40140 const char *name = opts[i].name;
40141 const char *no_str = "";
40142 HOST_WIDE_INT mask = opts[i].mask;
40143 size_t len = comma_len + prefix_len + strlen (name);
40145 if (!invert)
40147 if ((flags & mask) == 0)
40149 no_str = "no-";
40150 len += sizeof ("no-") - 1;
40153 flags &= ~mask;
40156 else
40158 if ((flags & mask) != 0)
40160 no_str = "no-";
40161 len += sizeof ("no-") - 1;
40164 flags |= mask;
40167 cur_column += len;
40168 if (cur_column > max_column)
40170 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
40171 cur_column = start_column + len;
40172 comma = "";
40175 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
40176 comma = ", ";
40177 comma_len = sizeof (", ") - 1;
40180 fputs ("\n", file);
40183 /* Helper function to print the current isa options on a line. */
40185 static void
40186 rs6000_print_isa_options (FILE *file, int indent, const char *string,
40187 HOST_WIDE_INT flags)
40189 rs6000_print_options_internal (file, indent, string, flags, "-m",
40190 &rs6000_opt_masks[0],
40191 ARRAY_SIZE (rs6000_opt_masks));
40194 static void
40195 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
40196 HOST_WIDE_INT flags)
40198 rs6000_print_options_internal (file, indent, string, flags, "",
40199 &rs6000_builtin_mask_names[0],
40200 ARRAY_SIZE (rs6000_builtin_mask_names));
40203 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
40204 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
40205 -mvsx-timode, -mupper-regs-df).
40207 If the user used -mno-power8-vector, we need to turn off all of the implicit
40208 ISA 2.07 and 3.0 options that relate to the vector unit.
40210 If the user used -mno-power9-vector, we need to turn off all of the implicit
40211 ISA 3.0 options that relate to the vector unit.
40213 This function does not handle explicit options such as the user specifying
40214 -mdirect-move. These are handled in rs6000_option_override_internal, and
40215 the appropriate error is given if needed.
40217 We return a mask of all of the implicit options that should not be enabled
40218 by default. */
40220 static HOST_WIDE_INT
40221 rs6000_disable_incompatible_switches (void)
40223 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
40224 size_t i, j;
40226 static const struct {
40227 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
40228 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
40229 const char *const name; /* name of the switch. */
40230 } flags[] = {
40231 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
40232 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
40233 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
40236 for (i = 0; i < ARRAY_SIZE (flags); i++)
40238 HOST_WIDE_INT no_flag = flags[i].no_flag;
40240 if ((rs6000_isa_flags & no_flag) == 0
40241 && (rs6000_isa_flags_explicit & no_flag) != 0)
40243 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
40244 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
40245 & rs6000_isa_flags
40246 & dep_flags);
40248 if (set_flags)
40250 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
40251 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
40253 set_flags &= ~rs6000_opt_masks[j].mask;
40254 error ("-mno-%s turns off -m%s",
40255 flags[i].name,
40256 rs6000_opt_masks[j].name);
40259 gcc_assert (!set_flags);
40262 rs6000_isa_flags &= ~dep_flags;
40263 ignore_masks |= no_flag | dep_flags;
40267 if (!TARGET_P9_VECTOR
40268 && (rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) != 0
40269 && TARGET_P9_DFORM_BOTH > 0)
40271 error ("-mno-power9-vector turns off -mpower9-dform");
40272 TARGET_P9_DFORM_BOTH = 0;
40275 return ignore_masks;
40279 /* Hook to determine if one function can safely inline another. */
40281 static bool
40282 rs6000_can_inline_p (tree caller, tree callee)
40284 bool ret = false;
40285 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
40286 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
40288 /* If callee has no option attributes, then it is ok to inline. */
40289 if (!callee_tree)
40290 ret = true;
40292 /* If caller has no option attributes, but callee does then it is not ok to
40293 inline. */
40294 else if (!caller_tree)
40295 ret = false;
40297 else
40299 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
40300 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
40302 /* Callee's options should a subset of the caller's, i.e. a vsx function
40303 can inline an altivec function but a non-vsx function can't inline a
40304 vsx function. */
40305 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
40306 == callee_opts->x_rs6000_isa_flags)
40307 ret = true;
40310 if (TARGET_DEBUG_TARGET)
40311 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
40312 (DECL_NAME (caller)
40313 ? IDENTIFIER_POINTER (DECL_NAME (caller))
40314 : "<unknown>"),
40315 (DECL_NAME (callee)
40316 ? IDENTIFIER_POINTER (DECL_NAME (callee))
40317 : "<unknown>"),
40318 (ret ? "can" : "cannot"));
40320 return ret;
40323 /* Allocate a stack temp and fixup the address so it meets the particular
40324 memory requirements (either offetable or REG+REG addressing). */
40327 rs6000_allocate_stack_temp (machine_mode mode,
40328 bool offsettable_p,
40329 bool reg_reg_p)
40331 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40332 rtx addr = XEXP (stack, 0);
40333 int strict_p = (reload_in_progress || reload_completed);
40335 if (!legitimate_indirect_address_p (addr, strict_p))
40337 if (offsettable_p
40338 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
40339 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
40341 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
40342 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
40345 return stack;
40348 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
40349 to such a form to deal with memory reference instructions like STFIWX that
40350 only take reg+reg addressing. */
40353 rs6000_address_for_fpconvert (rtx x)
40355 int strict_p = (reload_in_progress || reload_completed);
40356 rtx addr;
40358 gcc_assert (MEM_P (x));
40359 addr = XEXP (x, 0);
40360 if (! legitimate_indirect_address_p (addr, strict_p)
40361 && ! legitimate_indexed_address_p (addr, strict_p))
40363 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
40365 rtx reg = XEXP (addr, 0);
40366 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
40367 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
40368 gcc_assert (REG_P (reg));
40369 emit_insn (gen_add3_insn (reg, reg, size_rtx));
40370 addr = reg;
40372 else if (GET_CODE (addr) == PRE_MODIFY)
40374 rtx reg = XEXP (addr, 0);
40375 rtx expr = XEXP (addr, 1);
40376 gcc_assert (REG_P (reg));
40377 gcc_assert (GET_CODE (expr) == PLUS);
40378 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
40379 addr = reg;
40382 x = replace_equiv_address (x, copy_addr_to_reg (addr));
40385 return x;
40388 /* Given a memory reference, if it is not in the form for altivec memory
40389 reference instructions (i.e. reg or reg+reg addressing with AND of -16),
40390 convert to the altivec format. */
40393 rs6000_address_for_altivec (rtx x)
40395 gcc_assert (MEM_P (x));
40396 if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x)))
40398 rtx addr = XEXP (x, 0);
40399 int strict_p = (reload_in_progress || reload_completed);
40401 if (!legitimate_indexed_address_p (addr, strict_p)
40402 && !legitimate_indirect_address_p (addr, strict_p))
40403 addr = copy_to_mode_reg (Pmode, addr);
40405 addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
40406 x = change_address (x, GET_MODE (x), addr);
40409 return x;
40412 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
40414 On the RS/6000, all integer constants are acceptable, most won't be valid
40415 for particular insns, though. Only easy FP constants are acceptable. */
40417 static bool
40418 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
40420 if (TARGET_ELF && tls_referenced_p (x))
40421 return false;
40423 return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR)
40424 || GET_MODE (x) == VOIDmode
40425 || (TARGET_POWERPC64 && mode == DImode)
40426 || easy_fp_constant (x, mode)
40427 || easy_vector_constant (x, mode));
40431 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
40433 static bool
40434 chain_already_loaded (rtx_insn *last)
40436 for (; last != NULL; last = PREV_INSN (last))
40438 if (NONJUMP_INSN_P (last))
40440 rtx patt = PATTERN (last);
40442 if (GET_CODE (patt) == SET)
40444 rtx lhs = XEXP (patt, 0);
40446 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
40447 return true;
40451 return false;
40454 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
40456 void
40457 rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
40459 const bool direct_call_p
40460 = GET_CODE (func_desc) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (func_desc);
40461 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
40462 rtx toc_load = NULL_RTX;
40463 rtx toc_restore = NULL_RTX;
40464 rtx func_addr;
40465 rtx abi_reg = NULL_RTX;
40466 rtx call[4];
40467 int n_call;
40468 rtx insn;
40470 /* Handle longcall attributes. */
40471 if (INTVAL (cookie) & CALL_LONG)
40472 func_desc = rs6000_longcall_ref (func_desc);
40474 /* Handle indirect calls. */
40475 if (GET_CODE (func_desc) != SYMBOL_REF
40476 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
40478 /* Save the TOC into its reserved slot before the call,
40479 and prepare to restore it after the call. */
40480 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
40481 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
40482 rtx stack_toc_mem = gen_frame_mem (Pmode,
40483 gen_rtx_PLUS (Pmode, stack_ptr,
40484 stack_toc_offset));
40485 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
40486 gen_rtvec (1, stack_toc_offset),
40487 UNSPEC_TOCSLOT);
40488 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
40490 /* Can we optimize saving the TOC in the prologue or
40491 do we need to do it at every call? */
40492 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
40493 cfun->machine->save_toc_in_prologue = true;
40494 else
40496 MEM_VOLATILE_P (stack_toc_mem) = 1;
40497 emit_move_insn (stack_toc_mem, toc_reg);
40500 if (DEFAULT_ABI == ABI_ELFv2)
40502 /* A function pointer in the ELFv2 ABI is just a plain address, but
40503 the ABI requires it to be loaded into r12 before the call. */
40504 func_addr = gen_rtx_REG (Pmode, 12);
40505 emit_move_insn (func_addr, func_desc);
40506 abi_reg = func_addr;
40508 else
40510 /* A function pointer under AIX is a pointer to a data area whose
40511 first word contains the actual address of the function, whose
40512 second word contains a pointer to its TOC, and whose third word
40513 contains a value to place in the static chain register (r11).
40514 Note that if we load the static chain, our "trampoline" need
40515 not have any executable code. */
40517 /* Load up address of the actual function. */
40518 func_desc = force_reg (Pmode, func_desc);
40519 func_addr = gen_reg_rtx (Pmode);
40520 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
40522 /* Prepare to load the TOC of the called function. Note that the
40523 TOC load must happen immediately before the actual call so
40524 that unwinding the TOC registers works correctly. See the
40525 comment in frob_update_context. */
40526 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
40527 rtx func_toc_mem = gen_rtx_MEM (Pmode,
40528 gen_rtx_PLUS (Pmode, func_desc,
40529 func_toc_offset));
40530 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
40532 /* If we have a static chain, load it up. But, if the call was
40533 originally direct, the 3rd word has not been written since no
40534 trampoline has been built, so we ought not to load it, lest we
40535 override a static chain value. */
40536 if (!direct_call_p
40537 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
40538 && !chain_already_loaded (get_current_sequence ()->next->last))
40540 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
40541 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
40542 rtx func_sc_mem = gen_rtx_MEM (Pmode,
40543 gen_rtx_PLUS (Pmode, func_desc,
40544 func_sc_offset));
40545 emit_move_insn (sc_reg, func_sc_mem);
40546 abi_reg = sc_reg;
40550 else
40552 /* Direct calls use the TOC: for local calls, the callee will
40553 assume the TOC register is set; for non-local calls, the
40554 PLT stub needs the TOC register. */
40555 abi_reg = toc_reg;
40556 func_addr = func_desc;
40559 /* Create the call. */
40560 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
40561 if (value != NULL_RTX)
40562 call[0] = gen_rtx_SET (value, call[0]);
40563 n_call = 1;
40565 if (toc_load)
40566 call[n_call++] = toc_load;
40567 if (toc_restore)
40568 call[n_call++] = toc_restore;
40570 call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
40572 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
40573 insn = emit_call_insn (insn);
40575 /* Mention all registers defined by the ABI to hold information
40576 as uses in CALL_INSN_FUNCTION_USAGE. */
40577 if (abi_reg)
40578 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
40581 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
40583 void
40584 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
40586 rtx call[2];
40587 rtx insn;
40589 gcc_assert (INTVAL (cookie) == 0);
40591 /* Create the call. */
40592 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
40593 if (value != NULL_RTX)
40594 call[0] = gen_rtx_SET (value, call[0]);
40596 call[1] = simple_return_rtx;
40598 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
40599 insn = emit_call_insn (insn);
40601 /* Note use of the TOC register. */
40602 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
40605 /* Return whether we need to always update the saved TOC pointer when we update
40606 the stack pointer. */
40608 static bool
40609 rs6000_save_toc_in_prologue_p (void)
40611 return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue);
40614 #ifdef HAVE_GAS_HIDDEN
40615 # define USE_HIDDEN_LINKONCE 1
40616 #else
40617 # define USE_HIDDEN_LINKONCE 0
40618 #endif
40620 /* Fills in the label name that should be used for a 476 link stack thunk. */
40622 void
40623 get_ppc476_thunk_name (char name[32])
40625 gcc_assert (TARGET_LINK_STACK);
40627 if (USE_HIDDEN_LINKONCE)
40628 sprintf (name, "__ppc476.get_thunk");
40629 else
40630 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
40633 /* This function emits the simple thunk routine that is used to preserve
40634 the link stack on the 476 cpu. */
40636 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
40637 static void
40638 rs6000_code_end (void)
40640 char name[32];
40641 tree decl;
40643 if (!TARGET_LINK_STACK)
40644 return;
40646 get_ppc476_thunk_name (name);
40648 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
40649 build_function_type_list (void_type_node, NULL_TREE));
40650 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
40651 NULL_TREE, void_type_node);
40652 TREE_PUBLIC (decl) = 1;
40653 TREE_STATIC (decl) = 1;
40655 #if RS6000_WEAK
40656 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
40658 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
40659 targetm.asm_out.unique_section (decl, 0);
40660 switch_to_section (get_named_section (decl, NULL, 0));
40661 DECL_WEAK (decl) = 1;
40662 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
40663 targetm.asm_out.globalize_label (asm_out_file, name);
40664 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
40665 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
40667 else
40668 #endif
40670 switch_to_section (text_section);
40671 ASM_OUTPUT_LABEL (asm_out_file, name);
40674 DECL_INITIAL (decl) = make_node (BLOCK);
40675 current_function_decl = decl;
40676 allocate_struct_function (decl, false);
40677 init_function_start (decl);
40678 first_function_block_is_cold = false;
40679 /* Make sure unwind info is emitted for the thunk if needed. */
40680 final_start_function (emit_barrier (), asm_out_file, 1);
40682 fputs ("\tblr\n", asm_out_file);
40684 final_end_function ();
40685 init_insn_lengths ();
40686 free_after_compilation (cfun);
40687 set_cfun (NULL);
40688 current_function_decl = NULL;
40691 /* Add r30 to hard reg set if the prologue sets it up and it is not
40692 pic_offset_table_rtx. */
40694 static void
40695 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
40697 if (!TARGET_SINGLE_PIC_BASE
40698 && TARGET_TOC
40699 && TARGET_MINIMAL_TOC
40700 && !constant_pool_empty_p ())
40701 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
40702 if (cfun->machine->split_stack_argp_used)
40703 add_to_hard_reg_set (&set->set, Pmode, 12);
40707 /* Helper function for rs6000_split_logical to emit a logical instruction after
40708 spliting the operation to single GPR registers.
40710 DEST is the destination register.
40711 OP1 and OP2 are the input source registers.
40712 CODE is the base operation (AND, IOR, XOR, NOT).
40713 MODE is the machine mode.
40714 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
40715 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
40716 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
40718 static void
40719 rs6000_split_logical_inner (rtx dest,
40720 rtx op1,
40721 rtx op2,
40722 enum rtx_code code,
40723 machine_mode mode,
40724 bool complement_final_p,
40725 bool complement_op1_p,
40726 bool complement_op2_p)
40728 rtx bool_rtx;
40730 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
40731 if (op2 && GET_CODE (op2) == CONST_INT
40732 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
40733 && !complement_final_p && !complement_op1_p && !complement_op2_p)
40735 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
40736 HOST_WIDE_INT value = INTVAL (op2) & mask;
40738 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
40739 if (code == AND)
40741 if (value == 0)
40743 emit_insn (gen_rtx_SET (dest, const0_rtx));
40744 return;
40747 else if (value == mask)
40749 if (!rtx_equal_p (dest, op1))
40750 emit_insn (gen_rtx_SET (dest, op1));
40751 return;
40755 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
40756 into separate ORI/ORIS or XORI/XORIS instrucitons. */
40757 else if (code == IOR || code == XOR)
40759 if (value == 0)
40761 if (!rtx_equal_p (dest, op1))
40762 emit_insn (gen_rtx_SET (dest, op1));
40763 return;
40768 if (code == AND && mode == SImode
40769 && !complement_final_p && !complement_op1_p && !complement_op2_p)
40771 emit_insn (gen_andsi3 (dest, op1, op2));
40772 return;
40775 if (complement_op1_p)
40776 op1 = gen_rtx_NOT (mode, op1);
40778 if (complement_op2_p)
40779 op2 = gen_rtx_NOT (mode, op2);
40781 /* For canonical RTL, if only one arm is inverted it is the first. */
40782 if (!complement_op1_p && complement_op2_p)
40783 std::swap (op1, op2);
40785 bool_rtx = ((code == NOT)
40786 ? gen_rtx_NOT (mode, op1)
40787 : gen_rtx_fmt_ee (code, mode, op1, op2));
40789 if (complement_final_p)
40790 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
40792 emit_insn (gen_rtx_SET (dest, bool_rtx));
40795 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
40796 operations are split immediately during RTL generation to allow for more
40797 optimizations of the AND/IOR/XOR.
40799 OPERANDS is an array containing the destination and two input operands.
40800 CODE is the base operation (AND, IOR, XOR, NOT).
40801 MODE is the machine mode.
40802 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
40803 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
40804 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
40805 CLOBBER_REG is either NULL or a scratch register of type CC to allow
40806 formation of the AND instructions. */
40808 static void
40809 rs6000_split_logical_di (rtx operands[3],
40810 enum rtx_code code,
40811 bool complement_final_p,
40812 bool complement_op1_p,
40813 bool complement_op2_p)
40815 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
40816 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
40817 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
40818 enum hi_lo { hi = 0, lo = 1 };
40819 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
40820 size_t i;
40822 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
40823 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
40824 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
40825 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
40827 if (code == NOT)
40828 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
40829 else
40831 if (GET_CODE (operands[2]) != CONST_INT)
40833 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
40834 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
40836 else
40838 HOST_WIDE_INT value = INTVAL (operands[2]);
40839 HOST_WIDE_INT value_hi_lo[2];
40841 gcc_assert (!complement_final_p);
40842 gcc_assert (!complement_op1_p);
40843 gcc_assert (!complement_op2_p);
40845 value_hi_lo[hi] = value >> 32;
40846 value_hi_lo[lo] = value & lower_32bits;
40848 for (i = 0; i < 2; i++)
40850 HOST_WIDE_INT sub_value = value_hi_lo[i];
40852 if (sub_value & sign_bit)
40853 sub_value |= upper_32bits;
40855 op2_hi_lo[i] = GEN_INT (sub_value);
40857 /* If this is an AND instruction, check to see if we need to load
40858 the value in a register. */
40859 if (code == AND && sub_value != -1 && sub_value != 0
40860 && !and_operand (op2_hi_lo[i], SImode))
40861 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
40866 for (i = 0; i < 2; i++)
40868 /* Split large IOR/XOR operations. */
40869 if ((code == IOR || code == XOR)
40870 && GET_CODE (op2_hi_lo[i]) == CONST_INT
40871 && !complement_final_p
40872 && !complement_op1_p
40873 && !complement_op2_p
40874 && !logical_const_operand (op2_hi_lo[i], SImode))
40876 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
40877 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
40878 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
40879 rtx tmp = gen_reg_rtx (SImode);
40881 /* Make sure the constant is sign extended. */
40882 if ((hi_16bits & sign_bit) != 0)
40883 hi_16bits |= upper_32bits;
40885 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
40886 code, SImode, false, false, false);
40888 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
40889 code, SImode, false, false, false);
40891 else
40892 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
40893 code, SImode, complement_final_p,
40894 complement_op1_p, complement_op2_p);
40897 return;
40900 /* Split the insns that make up boolean operations operating on multiple GPR
40901 registers. The boolean MD patterns ensure that the inputs either are
40902 exactly the same as the output registers, or there is no overlap.
40904 OPERANDS is an array containing the destination and two input operands.
40905 CODE is the base operation (AND, IOR, XOR, NOT).
40906 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
40907 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
40908 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
40910 void
40911 rs6000_split_logical (rtx operands[3],
40912 enum rtx_code code,
40913 bool complement_final_p,
40914 bool complement_op1_p,
40915 bool complement_op2_p)
40917 machine_mode mode = GET_MODE (operands[0]);
40918 machine_mode sub_mode;
40919 rtx op0, op1, op2;
40920 int sub_size, regno0, regno1, nregs, i;
40922 /* If this is DImode, use the specialized version that can run before
40923 register allocation. */
40924 if (mode == DImode && !TARGET_POWERPC64)
40926 rs6000_split_logical_di (operands, code, complement_final_p,
40927 complement_op1_p, complement_op2_p);
40928 return;
40931 op0 = operands[0];
40932 op1 = operands[1];
40933 op2 = (code == NOT) ? NULL_RTX : operands[2];
40934 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
40935 sub_size = GET_MODE_SIZE (sub_mode);
40936 regno0 = REGNO (op0);
40937 regno1 = REGNO (op1);
40939 gcc_assert (reload_completed);
40940 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
40941 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
40943 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
40944 gcc_assert (nregs > 1);
40946 if (op2 && REG_P (op2))
40947 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
40949 for (i = 0; i < nregs; i++)
40951 int offset = i * sub_size;
40952 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
40953 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
40954 rtx sub_op2 = ((code == NOT)
40955 ? NULL_RTX
40956 : simplify_subreg (sub_mode, op2, mode, offset));
40958 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
40959 complement_final_p, complement_op1_p,
40960 complement_op2_p);
40963 return;
40967 /* Return true if the peephole2 can combine a load involving a combination of
40968 an addis instruction and a load with an offset that can be fused together on
40969 a power8. */
40971 bool
40972 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
40973 rtx addis_value, /* addis value. */
40974 rtx target, /* target register that is loaded. */
40975 rtx mem) /* bottom part of the memory addr. */
40977 rtx addr;
40978 rtx base_reg;
40980 /* Validate arguments. */
40981 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
40982 return false;
40984 if (!base_reg_operand (target, GET_MODE (target)))
40985 return false;
40987 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
40988 return false;
40990 /* Allow sign/zero extension. */
40991 if (GET_CODE (mem) == ZERO_EXTEND
40992 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
40993 mem = XEXP (mem, 0);
40995 if (!MEM_P (mem))
40996 return false;
40998 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
40999 return false;
41001 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
41002 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
41003 return false;
41005 /* Validate that the register used to load the high value is either the
41006 register being loaded, or we can safely replace its use.
41008 This function is only called from the peephole2 pass and we assume that
41009 there are 2 instructions in the peephole (addis and load), so we want to
41010 check if the target register was not used in the memory address and the
41011 register to hold the addis result is dead after the peephole. */
41012 if (REGNO (addis_reg) != REGNO (target))
41014 if (reg_mentioned_p (target, mem))
41015 return false;
41017 if (!peep2_reg_dead_p (2, addis_reg))
41018 return false;
41020 /* If the target register being loaded is the stack pointer, we must
41021 avoid loading any other value into it, even temporarily. */
41022 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
41023 return false;
41026 base_reg = XEXP (addr, 0);
41027 return REGNO (addis_reg) == REGNO (base_reg);
41030 /* During the peephole2 pass, adjust and expand the insns for a load fusion
41031 sequence. We adjust the addis register to use the target register. If the
41032 load sign extends, we adjust the code to do the zero extending load, and an
41033 explicit sign extension later since the fusion only covers zero extending
41034 loads.
41036 The operands are:
41037 operands[0] register set with addis (to be replaced with target)
41038 operands[1] value set via addis
41039 operands[2] target register being loaded
41040 operands[3] D-form memory reference using operands[0]. */
41042 void
41043 expand_fusion_gpr_load (rtx *operands)
41045 rtx addis_value = operands[1];
41046 rtx target = operands[2];
41047 rtx orig_mem = operands[3];
41048 rtx new_addr, new_mem, orig_addr, offset;
41049 enum rtx_code plus_or_lo_sum;
41050 machine_mode target_mode = GET_MODE (target);
41051 machine_mode extend_mode = target_mode;
41052 machine_mode ptr_mode = Pmode;
41053 enum rtx_code extend = UNKNOWN;
41055 if (GET_CODE (orig_mem) == ZERO_EXTEND
41056 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
41058 extend = GET_CODE (orig_mem);
41059 orig_mem = XEXP (orig_mem, 0);
41060 target_mode = GET_MODE (orig_mem);
41063 gcc_assert (MEM_P (orig_mem));
41065 orig_addr = XEXP (orig_mem, 0);
41066 plus_or_lo_sum = GET_CODE (orig_addr);
41067 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
41069 offset = XEXP (orig_addr, 1);
41070 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
41071 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
41073 if (extend != UNKNOWN)
41074 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
41076 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
41077 UNSPEC_FUSION_GPR);
41078 emit_insn (gen_rtx_SET (target, new_mem));
41080 if (extend == SIGN_EXTEND)
41082 int sub_off = ((BYTES_BIG_ENDIAN)
41083 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
41084 : 0);
41085 rtx sign_reg
41086 = simplify_subreg (target_mode, target, extend_mode, sub_off);
41088 emit_insn (gen_rtx_SET (target,
41089 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
41092 return;
41095 /* Emit the addis instruction that will be part of a fused instruction
41096 sequence. */
41098 void
41099 emit_fusion_addis (rtx target, rtx addis_value, const char *comment,
41100 const char *mode_name)
41102 rtx fuse_ops[10];
41103 char insn_template[80];
41104 const char *addis_str = NULL;
41105 const char *comment_str = ASM_COMMENT_START;
41107 if (*comment_str == ' ')
41108 comment_str++;
41110 /* Emit the addis instruction. */
41111 fuse_ops[0] = target;
41112 if (satisfies_constraint_L (addis_value))
41114 fuse_ops[1] = addis_value;
41115 addis_str = "lis %0,%v1";
41118 else if (GET_CODE (addis_value) == PLUS)
41120 rtx op0 = XEXP (addis_value, 0);
41121 rtx op1 = XEXP (addis_value, 1);
41123 if (REG_P (op0) && CONST_INT_P (op1)
41124 && satisfies_constraint_L (op1))
41126 fuse_ops[1] = op0;
41127 fuse_ops[2] = op1;
41128 addis_str = "addis %0,%1,%v2";
41132 else if (GET_CODE (addis_value) == HIGH)
41134 rtx value = XEXP (addis_value, 0);
41135 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
41137 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
41138 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
41139 if (TARGET_ELF)
41140 addis_str = "addis %0,%2,%1@toc@ha";
41142 else if (TARGET_XCOFF)
41143 addis_str = "addis %0,%1@u(%2)";
41145 else
41146 gcc_unreachable ();
41149 else if (GET_CODE (value) == PLUS)
41151 rtx op0 = XEXP (value, 0);
41152 rtx op1 = XEXP (value, 1);
41154 if (GET_CODE (op0) == UNSPEC
41155 && XINT (op0, 1) == UNSPEC_TOCREL
41156 && CONST_INT_P (op1))
41158 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
41159 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
41160 fuse_ops[3] = op1;
41161 if (TARGET_ELF)
41162 addis_str = "addis %0,%2,%1+%3@toc@ha";
41164 else if (TARGET_XCOFF)
41165 addis_str = "addis %0,%1+%3@u(%2)";
41167 else
41168 gcc_unreachable ();
41172 else if (satisfies_constraint_L (value))
41174 fuse_ops[1] = value;
41175 addis_str = "lis %0,%v1";
41178 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
41180 fuse_ops[1] = value;
41181 addis_str = "lis %0,%1@ha";
41185 if (!addis_str)
41186 fatal_insn ("Could not generate addis value for fusion", addis_value);
41188 sprintf (insn_template, "%s\t\t%s %s, type %s", addis_str, comment_str,
41189 comment, mode_name);
41190 output_asm_insn (insn_template, fuse_ops);
41193 /* Emit a D-form load or store instruction that is the second instruction
41194 of a fusion sequence. */
41196 void
41197 emit_fusion_load_store (rtx load_store_reg, rtx addis_reg, rtx offset,
41198 const char *insn_str)
41200 rtx fuse_ops[10];
41201 char insn_template[80];
41203 fuse_ops[0] = load_store_reg;
41204 fuse_ops[1] = addis_reg;
41206 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
41208 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
41209 fuse_ops[2] = offset;
41210 output_asm_insn (insn_template, fuse_ops);
41213 else if (GET_CODE (offset) == UNSPEC
41214 && XINT (offset, 1) == UNSPEC_TOCREL)
41216 if (TARGET_ELF)
41217 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
41219 else if (TARGET_XCOFF)
41220 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
41222 else
41223 gcc_unreachable ();
41225 fuse_ops[2] = XVECEXP (offset, 0, 0);
41226 output_asm_insn (insn_template, fuse_ops);
41229 else if (GET_CODE (offset) == PLUS
41230 && GET_CODE (XEXP (offset, 0)) == UNSPEC
41231 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
41232 && CONST_INT_P (XEXP (offset, 1)))
41234 rtx tocrel_unspec = XEXP (offset, 0);
41235 if (TARGET_ELF)
41236 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
41238 else if (TARGET_XCOFF)
41239 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
41241 else
41242 gcc_unreachable ();
41244 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
41245 fuse_ops[3] = XEXP (offset, 1);
41246 output_asm_insn (insn_template, fuse_ops);
41249 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
41251 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
41253 fuse_ops[2] = offset;
41254 output_asm_insn (insn_template, fuse_ops);
41257 else
41258 fatal_insn ("Unable to generate load/store offset for fusion", offset);
41260 return;
41263 /* Wrap a TOC address that can be fused to indicate that special fusion
41264 processing is needed. */
41267 fusion_wrap_memory_address (rtx old_mem)
41269 rtx old_addr = XEXP (old_mem, 0);
41270 rtvec v = gen_rtvec (1, old_addr);
41271 rtx new_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_FUSION_ADDIS);
41272 return replace_equiv_address_nv (old_mem, new_addr, false);
41275 /* Given an address, convert it into the addis and load offset parts. Addresses
41276 created during the peephole2 process look like:
41277 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
41278 (unspec [(...)] UNSPEC_TOCREL))
41280 Addresses created via toc fusion look like:
41281 (unspec [(unspec [(...)] UNSPEC_TOCREL)] UNSPEC_FUSION_ADDIS)) */
41283 static void
41284 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
41286 rtx hi, lo;
41288 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_FUSION_ADDIS)
41290 lo = XVECEXP (addr, 0, 0);
41291 hi = gen_rtx_HIGH (Pmode, lo);
41293 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
41295 hi = XEXP (addr, 0);
41296 lo = XEXP (addr, 1);
41298 else
41299 gcc_unreachable ();
41301 *p_hi = hi;
41302 *p_lo = lo;
41305 /* Return a string to fuse an addis instruction with a gpr load to the same
41306 register that we loaded up the addis instruction. The address that is used
41307 is the logical address that was formed during peephole2:
41308 (lo_sum (high) (low-part))
41310 Or the address is the TOC address that is wrapped before register allocation:
41311 (unspec [(addr) (toc-reg)] UNSPEC_FUSION_ADDIS)
41313 The code is complicated, so we call output_asm_insn directly, and just
41314 return "". */
41316 const char *
41317 emit_fusion_gpr_load (rtx target, rtx mem)
41319 rtx addis_value;
41320 rtx addr;
41321 rtx load_offset;
41322 const char *load_str = NULL;
41323 const char *mode_name = NULL;
41324 machine_mode mode;
41326 if (GET_CODE (mem) == ZERO_EXTEND)
41327 mem = XEXP (mem, 0);
41329 gcc_assert (REG_P (target) && MEM_P (mem));
41331 addr = XEXP (mem, 0);
41332 fusion_split_address (addr, &addis_value, &load_offset);
41334 /* Now emit the load instruction to the same register. */
41335 mode = GET_MODE (mem);
41336 switch (mode)
41338 case E_QImode:
41339 mode_name = "char";
41340 load_str = "lbz";
41341 break;
41343 case E_HImode:
41344 mode_name = "short";
41345 load_str = "lhz";
41346 break;
41348 case E_SImode:
41349 case E_SFmode:
41350 mode_name = (mode == SFmode) ? "float" : "int";
41351 load_str = "lwz";
41352 break;
41354 case E_DImode:
41355 case E_DFmode:
41356 gcc_assert (TARGET_POWERPC64);
41357 mode_name = (mode == DFmode) ? "double" : "long";
41358 load_str = "ld";
41359 break;
41361 default:
41362 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
41365 /* Emit the addis instruction. */
41366 emit_fusion_addis (target, addis_value, "gpr load fusion", mode_name);
41368 /* Emit the D-form load instruction. */
41369 emit_fusion_load_store (target, target, load_offset, load_str);
41371 return "";
41375 /* Return true if the peephole2 can combine a load/store involving a
41376 combination of an addis instruction and the memory operation. This was
41377 added to the ISA 3.0 (power9) hardware. */
41379 bool
41380 fusion_p9_p (rtx addis_reg, /* register set via addis. */
41381 rtx addis_value, /* addis value. */
41382 rtx dest, /* destination (memory or register). */
41383 rtx src) /* source (register or memory). */
41385 rtx addr, mem, offset;
41386 machine_mode mode = GET_MODE (src);
41388 /* Validate arguments. */
41389 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
41390 return false;
41392 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
41393 return false;
41395 /* Ignore extend operations that are part of the load. */
41396 if (GET_CODE (src) == FLOAT_EXTEND || GET_CODE (src) == ZERO_EXTEND)
41397 src = XEXP (src, 0);
41399 /* Test for memory<-register or register<-memory. */
41400 if (fpr_reg_operand (src, mode) || int_reg_operand (src, mode))
41402 if (!MEM_P (dest))
41403 return false;
41405 mem = dest;
41408 else if (MEM_P (src))
41410 if (!fpr_reg_operand (dest, mode) && !int_reg_operand (dest, mode))
41411 return false;
41413 mem = src;
41416 else
41417 return false;
41419 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
41420 if (GET_CODE (addr) == PLUS)
41422 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
41423 return false;
41425 return satisfies_constraint_I (XEXP (addr, 1));
41428 else if (GET_CODE (addr) == LO_SUM)
41430 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
41431 return false;
41433 offset = XEXP (addr, 1);
41434 if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
41435 return small_toc_ref (offset, GET_MODE (offset));
41437 else if (TARGET_ELF && !TARGET_POWERPC64)
41438 return CONSTANT_P (offset);
41441 return false;
41444 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
41445 load sequence.
41447 The operands are:
41448 operands[0] register set with addis
41449 operands[1] value set via addis
41450 operands[2] target register being loaded
41451 operands[3] D-form memory reference using operands[0].
41453 This is similar to the fusion introduced with power8, except it scales to
41454 both loads/stores and does not require the result register to be the same as
41455 the base register. At the moment, we only do this if register set with addis
41456 is dead. */
41458 void
41459 expand_fusion_p9_load (rtx *operands)
41461 rtx tmp_reg = operands[0];
41462 rtx addis_value = operands[1];
41463 rtx target = operands[2];
41464 rtx orig_mem = operands[3];
41465 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn;
41466 enum rtx_code plus_or_lo_sum;
41467 machine_mode target_mode = GET_MODE (target);
41468 machine_mode extend_mode = target_mode;
41469 machine_mode ptr_mode = Pmode;
41470 enum rtx_code extend = UNKNOWN;
41472 if (GET_CODE (orig_mem) == FLOAT_EXTEND || GET_CODE (orig_mem) == ZERO_EXTEND)
41474 extend = GET_CODE (orig_mem);
41475 orig_mem = XEXP (orig_mem, 0);
41476 target_mode = GET_MODE (orig_mem);
41479 gcc_assert (MEM_P (orig_mem));
41481 orig_addr = XEXP (orig_mem, 0);
41482 plus_or_lo_sum = GET_CODE (orig_addr);
41483 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
41485 offset = XEXP (orig_addr, 1);
41486 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
41487 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
41489 if (extend != UNKNOWN)
41490 new_mem = gen_rtx_fmt_e (extend, extend_mode, new_mem);
41492 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
41493 UNSPEC_FUSION_P9);
41495 set = gen_rtx_SET (target, new_mem);
41496 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
41497 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
41498 emit_insn (insn);
41500 return;
41503 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
41504 store sequence.
41506 The operands are:
41507 operands[0] register set with addis
41508 operands[1] value set via addis
41509 operands[2] target D-form memory being stored to
41510 operands[3] register being stored
41512 This is similar to the fusion introduced with power8, except it scales to
41513 both loads/stores and does not require the result register to be the same as
41514 the base register. At the moment, we only do this if register set with addis
41515 is dead. */
41517 void
41518 expand_fusion_p9_store (rtx *operands)
41520 rtx tmp_reg = operands[0];
41521 rtx addis_value = operands[1];
41522 rtx orig_mem = operands[2];
41523 rtx src = operands[3];
41524 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn, new_src;
41525 enum rtx_code plus_or_lo_sum;
41526 machine_mode target_mode = GET_MODE (orig_mem);
41527 machine_mode ptr_mode = Pmode;
41529 gcc_assert (MEM_P (orig_mem));
41531 orig_addr = XEXP (orig_mem, 0);
41532 plus_or_lo_sum = GET_CODE (orig_addr);
41533 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
41535 offset = XEXP (orig_addr, 1);
41536 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
41537 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
41539 new_src = gen_rtx_UNSPEC (target_mode, gen_rtvec (1, src),
41540 UNSPEC_FUSION_P9);
41542 set = gen_rtx_SET (new_mem, new_src);
41543 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
41544 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
41545 emit_insn (insn);
41547 return;
41550 /* Return a string to fuse an addis instruction with a load using extended
41551 fusion. The address that is used is the logical address that was formed
41552 during peephole2: (lo_sum (high) (low-part))
41554 The code is complicated, so we call output_asm_insn directly, and just
41555 return "". */
41557 const char *
41558 emit_fusion_p9_load (rtx reg, rtx mem, rtx tmp_reg)
41560 machine_mode mode = GET_MODE (reg);
41561 rtx hi;
41562 rtx lo;
41563 rtx addr;
41564 const char *load_string;
41565 int r;
41567 if (GET_CODE (mem) == FLOAT_EXTEND || GET_CODE (mem) == ZERO_EXTEND)
41569 mem = XEXP (mem, 0);
41570 mode = GET_MODE (mem);
41573 if (GET_CODE (reg) == SUBREG)
41575 gcc_assert (SUBREG_BYTE (reg) == 0);
41576 reg = SUBREG_REG (reg);
41579 if (!REG_P (reg))
41580 fatal_insn ("emit_fusion_p9_load, bad reg #1", reg);
41582 r = REGNO (reg);
41583 if (FP_REGNO_P (r))
41585 if (mode == SFmode)
41586 load_string = "lfs";
41587 else if (mode == DFmode || mode == DImode)
41588 load_string = "lfd";
41589 else
41590 gcc_unreachable ();
41592 else if (ALTIVEC_REGNO_P (r) && TARGET_P9_DFORM_SCALAR)
41594 if (mode == SFmode)
41595 load_string = "lxssp";
41596 else if (mode == DFmode || mode == DImode)
41597 load_string = "lxsd";
41598 else
41599 gcc_unreachable ();
41601 else if (INT_REGNO_P (r))
41603 switch (mode)
41605 case E_QImode:
41606 load_string = "lbz";
41607 break;
41608 case E_HImode:
41609 load_string = "lhz";
41610 break;
41611 case E_SImode:
41612 case E_SFmode:
41613 load_string = "lwz";
41614 break;
41615 case E_DImode:
41616 case E_DFmode:
41617 if (!TARGET_POWERPC64)
41618 gcc_unreachable ();
41619 load_string = "ld";
41620 break;
41621 default:
41622 gcc_unreachable ();
41625 else
41626 fatal_insn ("emit_fusion_p9_load, bad reg #2", reg);
41628 if (!MEM_P (mem))
41629 fatal_insn ("emit_fusion_p9_load not MEM", mem);
41631 addr = XEXP (mem, 0);
41632 fusion_split_address (addr, &hi, &lo);
41634 /* Emit the addis instruction. */
41635 emit_fusion_addis (tmp_reg, hi, "power9 load fusion", GET_MODE_NAME (mode));
41637 /* Emit the D-form load instruction. */
41638 emit_fusion_load_store (reg, tmp_reg, lo, load_string);
41640 return "";
41643 /* Return a string to fuse an addis instruction with a store using extended
41644 fusion. The address that is used is the logical address that was formed
41645 during peephole2: (lo_sum (high) (low-part))
41647 The code is complicated, so we call output_asm_insn directly, and just
41648 return "". */
41650 const char *
41651 emit_fusion_p9_store (rtx mem, rtx reg, rtx tmp_reg)
41653 machine_mode mode = GET_MODE (reg);
41654 rtx hi;
41655 rtx lo;
41656 rtx addr;
41657 const char *store_string;
41658 int r;
41660 if (GET_CODE (reg) == SUBREG)
41662 gcc_assert (SUBREG_BYTE (reg) == 0);
41663 reg = SUBREG_REG (reg);
41666 if (!REG_P (reg))
41667 fatal_insn ("emit_fusion_p9_store, bad reg #1", reg);
41669 r = REGNO (reg);
41670 if (FP_REGNO_P (r))
41672 if (mode == SFmode)
41673 store_string = "stfs";
41674 else if (mode == DFmode)
41675 store_string = "stfd";
41676 else
41677 gcc_unreachable ();
41679 else if (ALTIVEC_REGNO_P (r) && TARGET_P9_DFORM_SCALAR)
41681 if (mode == SFmode)
41682 store_string = "stxssp";
41683 else if (mode == DFmode || mode == DImode)
41684 store_string = "stxsd";
41685 else
41686 gcc_unreachable ();
41688 else if (INT_REGNO_P (r))
41690 switch (mode)
41692 case E_QImode:
41693 store_string = "stb";
41694 break;
41695 case E_HImode:
41696 store_string = "sth";
41697 break;
41698 case E_SImode:
41699 case E_SFmode:
41700 store_string = "stw";
41701 break;
41702 case E_DImode:
41703 case E_DFmode:
41704 if (!TARGET_POWERPC64)
41705 gcc_unreachable ();
41706 store_string = "std";
41707 break;
41708 default:
41709 gcc_unreachable ();
41712 else
41713 fatal_insn ("emit_fusion_p9_store, bad reg #2", reg);
41715 if (!MEM_P (mem))
41716 fatal_insn ("emit_fusion_p9_store not MEM", mem);
41718 addr = XEXP (mem, 0);
41719 fusion_split_address (addr, &hi, &lo);
41721 /* Emit the addis instruction. */
41722 emit_fusion_addis (tmp_reg, hi, "power9 store fusion", GET_MODE_NAME (mode));
41724 /* Emit the D-form load instruction. */
41725 emit_fusion_load_store (reg, tmp_reg, lo, store_string);
41727 return "";
41731 /* Analyze vector computations and remove unnecessary doubleword
41732 swaps (xxswapdi instructions). This pass is performed only
41733 for little-endian VSX code generation.
41735 For this specific case, loads and stores of 4x32 and 2x64 vectors
41736 are inefficient. These are implemented using the lvx2dx and
41737 stvx2dx instructions, which invert the order of doublewords in
41738 a vector register. Thus the code generation inserts an xxswapdi
41739 after each such load, and prior to each such store. (For spill
41740 code after register assignment, an additional xxswapdi is inserted
41741 following each store in order to return a hard register to its
41742 unpermuted value.)
41744 The extra xxswapdi instructions reduce performance. This can be
41745 particularly bad for vectorized code. The purpose of this pass
41746 is to reduce the number of xxswapdi instructions required for
41747 correctness.
41749 The primary insight is that much code that operates on vectors
41750 does not care about the relative order of elements in a register,
41751 so long as the correct memory order is preserved. If we have
41752 a computation where all input values are provided by lvxd2x/xxswapdi
41753 sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
41754 and all intermediate computations are pure SIMD (independent of
41755 element order), then all the xxswapdi's associated with the loads
41756 and stores may be removed.
41758 This pass uses some of the infrastructure and logical ideas from
41759 the "web" pass in web.c. We create maximal webs of computations
41760 fitting the description above using union-find. Each such web is
41761 then optimized by removing its unnecessary xxswapdi instructions.
41763 The pass is placed prior to global optimization so that we can
41764 perform the optimization in the safest and simplest way possible;
41765 that is, by replacing each xxswapdi insn with a register copy insn.
41766 Subsequent forward propagation will remove copies where possible.
41768 There are some operations sensitive to element order for which we
41769 can still allow the operation, provided we modify those operations.
41770 These include CONST_VECTORs, for which we must swap the first and
41771 second halves of the constant vector; and SUBREGs, for which we
41772 must adjust the byte offset to account for the swapped doublewords.
41773 A remaining opportunity would be non-immediate-form splats, for
41774 which we should adjust the selected lane of the input. We should
41775 also make code generation adjustments for sum-across operations,
41776 since this is a common vectorizer reduction.
41778 Because we run prior to the first split, we can see loads and stores
41779 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
41780 vector loads and stores that have not yet been split into a permuting
41781 load/store and a swap. (One way this can happen is with a builtin
41782 call to vec_vsx_{ld,st}.) We can handle these as well, but rather
41783 than deleting a swap, we convert the load/store into a permuting
41784 load/store (which effectively removes the swap). */
41786 /* Notes on Permutes
41788 We do not currently handle computations that contain permutes. There
41789 is a general transformation that can be performed correctly, but it
41790 may introduce more expensive code than it replaces. To handle these
41791 would require a cost model to determine when to perform the optimization.
41792 This commentary records how this could be done if desired.
41794 The most general permute is something like this (example for V16QI):
41796 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
41797 (parallel [(const_int a0) (const_int a1)
41799 (const_int a14) (const_int a15)]))
41801 where a0,...,a15 are in [0,31] and select elements from op1 and op2
41802 to produce in the result.
41804 Regardless of mode, we can convert the PARALLEL to a mask of 16
41805 byte-element selectors. Let's call this M, with M[i] representing
41806 the ith byte-element selector value. Then if we swap doublewords
41807 throughout the computation, we can get correct behavior by replacing
41808 M with M' as follows:
41810 M'[i] = { (M[i]+8)%16 : M[i] in [0,15]
41811 { ((M[i]+8)%16)+16 : M[i] in [16,31]
41813 This seems promising at first, since we are just replacing one mask
41814 with another. But certain masks are preferable to others. If M
41815 is a mask that matches a vmrghh pattern, for example, M' certainly
41816 will not. Instead of a single vmrghh, we would generate a load of
41817 M' and a vperm. So we would need to know how many xxswapd's we can
41818 remove as a result of this transformation to determine if it's
41819 profitable; and preferably the logic would need to be aware of all
41820 the special preferable masks.
41822 Another form of permute is an UNSPEC_VPERM, in which the mask is
41823 already in a register. In some cases, this mask may be a constant
41824 that we can discover with ud-chains, in which case the above
41825 transformation is ok. However, the common usage here is for the
41826 mask to be produced by an UNSPEC_LVSL, in which case the mask
41827 cannot be known at compile time. In such a case we would have to
41828 generate several instructions to compute M' as above at run time,
41829 and a cost model is needed again.
41831 However, when the mask M for an UNSPEC_VPERM is loaded from the
41832 constant pool, we can replace M with M' as above at no cost
41833 beyond adding a constant pool entry. */
41835 /* This is based on the union-find logic in web.c. web_entry_base is
41836 defined in df.h. */
41837 class swap_web_entry : public web_entry_base
41839 public:
41840 /* Pointer to the insn. */
41841 rtx_insn *insn;
41842 /* Set if insn contains a mention of a vector register. All other
41843 fields are undefined if this field is unset. */
41844 unsigned int is_relevant : 1;
41845 /* Set if insn is a load. */
41846 unsigned int is_load : 1;
41847 /* Set if insn is a store. */
41848 unsigned int is_store : 1;
41849 /* Set if insn is a doubleword swap. This can either be a register swap
41850 or a permuting load or store (test is_load and is_store for this). */
41851 unsigned int is_swap : 1;
41852 /* Set if the insn has a live-in use of a parameter register. */
41853 unsigned int is_live_in : 1;
41854 /* Set if the insn has a live-out def of a return register. */
41855 unsigned int is_live_out : 1;
41856 /* Set if the insn contains a subreg reference of a vector register. */
41857 unsigned int contains_subreg : 1;
41858 /* Set if the insn contains a 128-bit integer operand. */
41859 unsigned int is_128_int : 1;
41860 /* Set if this is a call-insn. */
41861 unsigned int is_call : 1;
41862 /* Set if this insn does not perform a vector operation for which
41863 element order matters, or if we know how to fix it up if it does.
41864 Undefined if is_swap is set. */
41865 unsigned int is_swappable : 1;
41866 /* A nonzero value indicates what kind of special handling for this
41867 insn is required if doublewords are swapped. Undefined if
41868 is_swappable is not set. */
41869 unsigned int special_handling : 4;
41870 /* Set if the web represented by this entry cannot be optimized. */
41871 unsigned int web_not_optimizable : 1;
41872 /* Set if this insn should be deleted. */
41873 unsigned int will_delete : 1;
41876 enum special_handling_values {
41877 SH_NONE = 0,
41878 SH_CONST_VECTOR,
41879 SH_SUBREG,
41880 SH_NOSWAP_LD,
41881 SH_NOSWAP_ST,
41882 SH_EXTRACT,
41883 SH_SPLAT,
41884 SH_XXPERMDI,
41885 SH_CONCAT,
41886 SH_VPERM
41889 /* Union INSN with all insns containing definitions that reach USE.
41890 Detect whether USE is live-in to the current function. */
41891 static void
41892 union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
41894 struct df_link *link = DF_REF_CHAIN (use);
41896 if (!link)
41897 insn_entry[INSN_UID (insn)].is_live_in = 1;
41899 while (link)
41901 if (DF_REF_IS_ARTIFICIAL (link->ref))
41902 insn_entry[INSN_UID (insn)].is_live_in = 1;
41904 if (DF_REF_INSN_INFO (link->ref))
41906 rtx def_insn = DF_REF_INSN (link->ref);
41907 (void)unionfind_union (insn_entry + INSN_UID (insn),
41908 insn_entry + INSN_UID (def_insn));
41911 link = link->next;
41915 /* Union INSN with all insns containing uses reached from DEF.
41916 Detect whether DEF is live-out from the current function. */
41917 static void
41918 union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
41920 struct df_link *link = DF_REF_CHAIN (def);
41922 if (!link)
41923 insn_entry[INSN_UID (insn)].is_live_out = 1;
41925 while (link)
41927 /* This could be an eh use or some other artificial use;
41928 we treat these all the same (killing the optimization). */
41929 if (DF_REF_IS_ARTIFICIAL (link->ref))
41930 insn_entry[INSN_UID (insn)].is_live_out = 1;
41932 if (DF_REF_INSN_INFO (link->ref))
41934 rtx use_insn = DF_REF_INSN (link->ref);
41935 (void)unionfind_union (insn_entry + INSN_UID (insn),
41936 insn_entry + INSN_UID (use_insn));
41939 link = link->next;
41943 /* Return 1 iff INSN is a load insn, including permuting loads that
41944 represent an lvxd2x instruction; else return 0. */
41945 static unsigned int
41946 insn_is_load_p (rtx insn)
41948 rtx body = PATTERN (insn);
41950 if (GET_CODE (body) == SET)
41952 if (GET_CODE (SET_SRC (body)) == MEM)
41953 return 1;
41955 if (GET_CODE (SET_SRC (body)) == VEC_SELECT
41956 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM)
41957 return 1;
41959 return 0;
41962 if (GET_CODE (body) != PARALLEL)
41963 return 0;
41965 rtx set = XVECEXP (body, 0, 0);
41967 if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM)
41968 return 1;
41970 return 0;
41973 /* Return 1 iff INSN is a store insn, including permuting stores that
41974 represent an stvxd2x instruction; else return 0. */
41975 static unsigned int
41976 insn_is_store_p (rtx insn)
41978 rtx body = PATTERN (insn);
41979 if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM)
41980 return 1;
41981 if (GET_CODE (body) != PARALLEL)
41982 return 0;
41983 rtx set = XVECEXP (body, 0, 0);
41984 if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM)
41985 return 1;
41986 return 0;
41989 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
41990 a permuting load, or a permuting store. */
41991 static unsigned int
41992 insn_is_swap_p (rtx insn)
41994 rtx body = PATTERN (insn);
41995 if (GET_CODE (body) != SET)
41996 return 0;
41997 rtx rhs = SET_SRC (body);
41998 if (GET_CODE (rhs) != VEC_SELECT)
41999 return 0;
42000 rtx parallel = XEXP (rhs, 1);
42001 if (GET_CODE (parallel) != PARALLEL)
42002 return 0;
42003 unsigned int len = XVECLEN (parallel, 0);
42004 if (len != 2 && len != 4 && len != 8 && len != 16)
42005 return 0;
42006 for (unsigned int i = 0; i < len / 2; ++i)
42008 rtx op = XVECEXP (parallel, 0, i);
42009 if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i)
42010 return 0;
42012 for (unsigned int i = len / 2; i < len; ++i)
42014 rtx op = XVECEXP (parallel, 0, i);
42015 if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2)
42016 return 0;
42018 return 1;
42021 /* Return TRUE if insn is a swap fed by a load from the constant pool. */
42022 static bool
42023 const_load_sequence_p (swap_web_entry *insn_entry, rtx insn)
42025 unsigned uid = INSN_UID (insn);
42026 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load)
42027 return false;
42029 /* Find the unique use in the swap and locate its def. If the def
42030 isn't unique, punt. */
42031 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
42032 df_ref use;
42033 FOR_EACH_INSN_INFO_USE (use, insn_info)
42035 struct df_link *def_link = DF_REF_CHAIN (use);
42036 if (!def_link || def_link->next)
42037 return false;
42039 rtx def_insn = DF_REF_INSN (def_link->ref);
42040 unsigned uid2 = INSN_UID (def_insn);
42041 if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap)
42042 return false;
42044 rtx body = PATTERN (def_insn);
42045 if (GET_CODE (body) != SET
42046 || GET_CODE (SET_SRC (body)) != VEC_SELECT
42047 || GET_CODE (XEXP (SET_SRC (body), 0)) != MEM)
42048 return false;
42050 rtx mem = XEXP (SET_SRC (body), 0);
42051 rtx base_reg = XEXP (mem, 0);
42053 df_ref base_use;
42054 insn_info = DF_INSN_INFO_GET (def_insn);
42055 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
42057 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
42058 continue;
42060 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
42061 if (!base_def_link || base_def_link->next)
42062 return false;
42064 rtx tocrel_insn = DF_REF_INSN (base_def_link->ref);
42065 rtx tocrel_body = PATTERN (tocrel_insn);
42066 rtx base, offset;
42067 if (GET_CODE (tocrel_body) != SET)
42068 return false;
42069 /* There is an extra level of indirection for small/large
42070 code models. */
42071 rtx tocrel_expr = SET_SRC (tocrel_body);
42072 if (GET_CODE (tocrel_expr) == MEM)
42073 tocrel_expr = XEXP (tocrel_expr, 0);
42074 if (!toc_relative_expr_p (tocrel_expr, false))
42075 return false;
42076 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
42077 if (GET_CODE (base) != SYMBOL_REF || !CONSTANT_POOL_ADDRESS_P (base))
42078 return false;
42081 return true;
42084 /* Return TRUE iff OP matches a V2DF reduction pattern. See the
42085 definition of vsx_reduc_<VEC_reduc_name>_v2df in vsx.md. */
42086 static bool
42087 v2df_reduction_p (rtx op)
42089 if (GET_MODE (op) != V2DFmode)
42090 return false;
42092 enum rtx_code code = GET_CODE (op);
42093 if (code != PLUS && code != SMIN && code != SMAX)
42094 return false;
42096 rtx concat = XEXP (op, 0);
42097 if (GET_CODE (concat) != VEC_CONCAT)
42098 return false;
42100 rtx select0 = XEXP (concat, 0);
42101 rtx select1 = XEXP (concat, 1);
42102 if (GET_CODE (select0) != VEC_SELECT || GET_CODE (select1) != VEC_SELECT)
42103 return false;
42105 rtx reg0 = XEXP (select0, 0);
42106 rtx reg1 = XEXP (select1, 0);
42107 if (!rtx_equal_p (reg0, reg1) || !REG_P (reg0))
42108 return false;
42110 rtx parallel0 = XEXP (select0, 1);
42111 rtx parallel1 = XEXP (select1, 1);
42112 if (GET_CODE (parallel0) != PARALLEL || GET_CODE (parallel1) != PARALLEL)
42113 return false;
42115 if (!rtx_equal_p (XVECEXP (parallel0, 0, 0), const1_rtx)
42116 || !rtx_equal_p (XVECEXP (parallel1, 0, 0), const0_rtx))
42117 return false;
42119 return true;
42122 /* Return 1 iff OP is an operand that will not be affected by having
42123 vector doublewords swapped in memory. */
42124 static unsigned int
42125 rtx_is_swappable_p (rtx op, unsigned int *special)
42127 enum rtx_code code = GET_CODE (op);
42128 int i, j;
42129 rtx parallel;
42131 switch (code)
42133 case LABEL_REF:
42134 case SYMBOL_REF:
42135 case CLOBBER:
42136 case REG:
42137 return 1;
42139 case VEC_CONCAT:
42140 case ASM_INPUT:
42141 case ASM_OPERANDS:
42142 return 0;
42144 case CONST_VECTOR:
42146 *special = SH_CONST_VECTOR;
42147 return 1;
42150 case VEC_DUPLICATE:
42151 /* Opportunity: If XEXP (op, 0) has the same mode as the result,
42152 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
42153 it represents a vector splat for which we can do special
42154 handling. */
42155 if (GET_CODE (XEXP (op, 0)) == CONST_INT)
42156 return 1;
42157 else if (REG_P (XEXP (op, 0))
42158 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
42159 /* This catches V2DF and V2DI splat, at a minimum. */
42160 return 1;
42161 else if (GET_CODE (XEXP (op, 0)) == TRUNCATE
42162 && REG_P (XEXP (XEXP (op, 0), 0))
42163 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
42164 /* This catches splat of a truncated value. */
42165 return 1;
42166 else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
42167 /* If the duplicated item is from a select, defer to the select
42168 processing to see if we can change the lane for the splat. */
42169 return rtx_is_swappable_p (XEXP (op, 0), special);
42170 else
42171 return 0;
42173 case VEC_SELECT:
42174 /* A vec_extract operation is ok if we change the lane. */
42175 if (GET_CODE (XEXP (op, 0)) == REG
42176 && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
42177 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
42178 && XVECLEN (parallel, 0) == 1
42179 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT)
42181 *special = SH_EXTRACT;
42182 return 1;
42184 /* An XXPERMDI is ok if we adjust the lanes. Note that if the
42185 XXPERMDI is a swap operation, it will be identified by
42186 insn_is_swap_p and therefore we won't get here. */
42187 else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT
42188 && (GET_MODE (XEXP (op, 0)) == V4DFmode
42189 || GET_MODE (XEXP (op, 0)) == V4DImode)
42190 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
42191 && XVECLEN (parallel, 0) == 2
42192 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT
42193 && GET_CODE (XVECEXP (parallel, 0, 1)) == CONST_INT)
42195 *special = SH_XXPERMDI;
42196 return 1;
42198 else if (v2df_reduction_p (op))
42199 return 1;
42200 else
42201 return 0;
42203 case UNSPEC:
42205 /* Various operations are unsafe for this optimization, at least
42206 without significant additional work. Permutes are obviously
42207 problematic, as both the permute control vector and the ordering
42208 of the target values are invalidated by doubleword swapping.
42209 Vector pack and unpack modify the number of vector lanes.
42210 Merge-high/low will not operate correctly on swapped operands.
42211 Vector shifts across element boundaries are clearly uncool,
42212 as are vector select and concatenate operations. Vector
42213 sum-across instructions define one operand with a specific
42214 order-dependent element, so additional fixup code would be
42215 needed to make those work. Vector set and non-immediate-form
42216 vector splat are element-order sensitive. A few of these
42217 cases might be workable with special handling if required.
42218 Adding cost modeling would be appropriate in some cases. */
42219 int val = XINT (op, 1);
42220 switch (val)
42222 default:
42223 break;
42224 case UNSPEC_VMRGH_DIRECT:
42225 case UNSPEC_VMRGL_DIRECT:
42226 case UNSPEC_VPACK_SIGN_SIGN_SAT:
42227 case UNSPEC_VPACK_SIGN_UNS_SAT:
42228 case UNSPEC_VPACK_UNS_UNS_MOD:
42229 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
42230 case UNSPEC_VPACK_UNS_UNS_SAT:
42231 case UNSPEC_VPERM:
42232 case UNSPEC_VPERM_UNS:
42233 case UNSPEC_VPERMHI:
42234 case UNSPEC_VPERMSI:
42235 case UNSPEC_VPKPX:
42236 case UNSPEC_VSLDOI:
42237 case UNSPEC_VSLO:
42238 case UNSPEC_VSRO:
42239 case UNSPEC_VSUM2SWS:
42240 case UNSPEC_VSUM4S:
42241 case UNSPEC_VSUM4UBS:
42242 case UNSPEC_VSUMSWS:
42243 case UNSPEC_VSUMSWS_DIRECT:
42244 case UNSPEC_VSX_CONCAT:
42245 case UNSPEC_VSX_SET:
42246 case UNSPEC_VSX_SLDWI:
42247 case UNSPEC_VUNPACK_HI_SIGN:
42248 case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
42249 case UNSPEC_VUNPACK_LO_SIGN:
42250 case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
42251 case UNSPEC_VUPKHPX:
42252 case UNSPEC_VUPKHS_V4SF:
42253 case UNSPEC_VUPKHU_V4SF:
42254 case UNSPEC_VUPKLPX:
42255 case UNSPEC_VUPKLS_V4SF:
42256 case UNSPEC_VUPKLU_V4SF:
42257 case UNSPEC_VSX_CVDPSPN:
42258 case UNSPEC_VSX_CVSPDP:
42259 case UNSPEC_VSX_CVSPDPN:
42260 case UNSPEC_VSX_EXTRACT:
42261 case UNSPEC_VSX_VSLO:
42262 case UNSPEC_VSX_VEC_INIT:
42263 return 0;
42264 case UNSPEC_VSPLT_DIRECT:
42265 case UNSPEC_VSX_XXSPLTD:
42266 *special = SH_SPLAT;
42267 return 1;
42268 case UNSPEC_REDUC_PLUS:
42269 case UNSPEC_REDUC:
42270 return 1;
42274 default:
42275 break;
42278 const char *fmt = GET_RTX_FORMAT (code);
42279 int ok = 1;
42281 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
42282 if (fmt[i] == 'e' || fmt[i] == 'u')
42284 unsigned int special_op = SH_NONE;
42285 ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
42286 if (special_op == SH_NONE)
42287 continue;
42288 /* Ensure we never have two kinds of special handling
42289 for the same insn. */
42290 if (*special != SH_NONE && *special != special_op)
42291 return 0;
42292 *special = special_op;
42294 else if (fmt[i] == 'E')
42295 for (j = 0; j < XVECLEN (op, i); ++j)
42297 unsigned int special_op = SH_NONE;
42298 ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
42299 if (special_op == SH_NONE)
42300 continue;
42301 /* Ensure we never have two kinds of special handling
42302 for the same insn. */
42303 if (*special != SH_NONE && *special != special_op)
42304 return 0;
42305 *special = special_op;
42308 return ok;
42311 /* Return 1 iff INSN is an operand that will not be affected by
42312 having vector doublewords swapped in memory (in which case
42313 *SPECIAL is unchanged), or that can be modified to be correct
42314 if vector doublewords are swapped in memory (in which case
42315 *SPECIAL is changed to a value indicating how). */
42316 static unsigned int
42317 insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
42318 unsigned int *special)
42320 /* Calls are always bad. */
42321 if (GET_CODE (insn) == CALL_INSN)
42322 return 0;
42324 /* Loads and stores seen here are not permuting, but we can still
42325 fix them up by converting them to permuting ones. Exceptions:
42326 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
42327 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
42328 for the SET source. Also we must now make an exception for lvx
42329 and stvx when they are not in the UNSPEC_LVX/STVX form (with the
42330 explicit "& -16") since this leads to unrecognizable insns. */
42331 rtx body = PATTERN (insn);
42332 int i = INSN_UID (insn);
42334 if (insn_entry[i].is_load)
42336 if (GET_CODE (body) == SET)
42338 rtx rhs = SET_SRC (body);
42339 /* Even without a swap, the RHS might be a vec_select for, say,
42340 a byte-reversing load. */
42341 if (GET_CODE (rhs) != MEM)
42342 return 0;
42343 if (GET_CODE (XEXP (rhs, 0)) == AND)
42344 return 0;
42346 *special = SH_NOSWAP_LD;
42347 return 1;
42349 else
42350 return 0;
42353 if (insn_entry[i].is_store)
42355 if (GET_CODE (body) == SET
42356 && GET_CODE (SET_SRC (body)) != UNSPEC)
42358 rtx lhs = SET_DEST (body);
42359 /* Even without a swap, the LHS might be a vec_select for, say,
42360 a byte-reversing store. */
42361 if (GET_CODE (lhs) != MEM)
42362 return 0;
42363 if (GET_CODE (XEXP (lhs, 0)) == AND)
42364 return 0;
42366 *special = SH_NOSWAP_ST;
42367 return 1;
42369 else
42370 return 0;
42373 /* A convert to single precision can be left as is provided that
42374 all of its uses are in xxspltw instructions that splat BE element
42375 zero. */
42376 if (GET_CODE (body) == SET
42377 && GET_CODE (SET_SRC (body)) == UNSPEC
42378 && XINT (SET_SRC (body), 1) == UNSPEC_VSX_CVDPSPN)
42380 df_ref def;
42381 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
42383 FOR_EACH_INSN_INFO_DEF (def, insn_info)
42385 struct df_link *link = DF_REF_CHAIN (def);
42386 if (!link)
42387 return 0;
42389 for (; link; link = link->next) {
42390 rtx use_insn = DF_REF_INSN (link->ref);
42391 rtx use_body = PATTERN (use_insn);
42392 if (GET_CODE (use_body) != SET
42393 || GET_CODE (SET_SRC (use_body)) != UNSPEC
42394 || XINT (SET_SRC (use_body), 1) != UNSPEC_VSX_XXSPLTW
42395 || XVECEXP (SET_SRC (use_body), 0, 1) != const0_rtx)
42396 return 0;
42400 return 1;
42403 /* A concatenation of two doublewords is ok if we reverse the
42404 order of the inputs. */
42405 if (GET_CODE (body) == SET
42406 && GET_CODE (SET_SRC (body)) == VEC_CONCAT
42407 && (GET_MODE (SET_SRC (body)) == V2DFmode
42408 || GET_MODE (SET_SRC (body)) == V2DImode))
42410 *special = SH_CONCAT;
42411 return 1;
42414 /* V2DF reductions are always swappable. */
42415 if (GET_CODE (body) == PARALLEL)
42417 rtx expr = XVECEXP (body, 0, 0);
42418 if (GET_CODE (expr) == SET
42419 && v2df_reduction_p (SET_SRC (expr)))
42420 return 1;
42423 /* An UNSPEC_VPERM is ok if the mask operand is loaded from the
42424 constant pool. */
42425 if (GET_CODE (body) == SET
42426 && GET_CODE (SET_SRC (body)) == UNSPEC
42427 && XINT (SET_SRC (body), 1) == UNSPEC_VPERM
42428 && XVECLEN (SET_SRC (body), 0) == 3
42429 && GET_CODE (XVECEXP (SET_SRC (body), 0, 2)) == REG)
42431 rtx mask_reg = XVECEXP (SET_SRC (body), 0, 2);
42432 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
42433 df_ref use;
42434 FOR_EACH_INSN_INFO_USE (use, insn_info)
42435 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
42437 struct df_link *def_link = DF_REF_CHAIN (use);
42438 /* Punt if multiple definitions for this reg. */
42439 if (def_link && !def_link->next &&
42440 const_load_sequence_p (insn_entry,
42441 DF_REF_INSN (def_link->ref)))
42443 *special = SH_VPERM;
42444 return 1;
42449 /* Otherwise check the operands for vector lane violations. */
42450 return rtx_is_swappable_p (body, special);
42453 enum chain_purpose { FOR_LOADS, FOR_STORES };
42455 /* Return true if the UD or DU chain headed by LINK is non-empty,
42456 and every entry on the chain references an insn that is a
42457 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
42458 register swap must have only permuting loads as reaching defs.
42459 If PURPOSE is FOR_STORES, each such register swap must have only
42460 register swaps or permuting stores as reached uses. */
42461 static bool
42462 chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
42463 enum chain_purpose purpose)
42465 if (!link)
42466 return false;
42468 for (; link; link = link->next)
42470 if (!ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (DF_REF_REG (link->ref))))
42471 continue;
42473 if (DF_REF_IS_ARTIFICIAL (link->ref))
42474 return false;
42476 rtx reached_insn = DF_REF_INSN (link->ref);
42477 unsigned uid = INSN_UID (reached_insn);
42478 struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn);
42480 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
42481 || insn_entry[uid].is_store)
42482 return false;
42484 if (purpose == FOR_LOADS)
42486 df_ref use;
42487 FOR_EACH_INSN_INFO_USE (use, insn_info)
42489 struct df_link *swap_link = DF_REF_CHAIN (use);
42491 while (swap_link)
42493 if (DF_REF_IS_ARTIFICIAL (link->ref))
42494 return false;
42496 rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
42497 unsigned uid2 = INSN_UID (swap_def_insn);
42499 /* Only permuting loads are allowed. */
42500 if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
42501 return false;
42503 swap_link = swap_link->next;
42507 else if (purpose == FOR_STORES)
42509 df_ref def;
42510 FOR_EACH_INSN_INFO_DEF (def, insn_info)
42512 struct df_link *swap_link = DF_REF_CHAIN (def);
42514 while (swap_link)
42516 if (DF_REF_IS_ARTIFICIAL (link->ref))
42517 return false;
42519 rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
42520 unsigned uid2 = INSN_UID (swap_use_insn);
42522 /* Permuting stores or register swaps are allowed. */
42523 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
42524 return false;
42526 swap_link = swap_link->next;
42532 return true;
42535 /* Mark the xxswapdi instructions associated with permuting loads and
42536 stores for removal. Note that we only flag them for deletion here,
42537 as there is a possibility of a swap being reached from multiple
42538 loads, etc. */
42539 static void
42540 mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
42542 rtx insn = insn_entry[i].insn;
42543 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
42545 if (insn_entry[i].is_load)
42547 df_ref def;
42548 FOR_EACH_INSN_INFO_DEF (def, insn_info)
42550 struct df_link *link = DF_REF_CHAIN (def);
42552 /* We know by now that these are swaps, so we can delete
42553 them confidently. */
42554 while (link)
42556 rtx use_insn = DF_REF_INSN (link->ref);
42557 insn_entry[INSN_UID (use_insn)].will_delete = 1;
42558 link = link->next;
42562 else if (insn_entry[i].is_store)
42564 df_ref use;
42565 FOR_EACH_INSN_INFO_USE (use, insn_info)
42567 /* Ignore uses for addressability. */
42568 machine_mode mode = GET_MODE (DF_REF_REG (use));
42569 if (!ALTIVEC_OR_VSX_VECTOR_MODE (mode))
42570 continue;
42572 struct df_link *link = DF_REF_CHAIN (use);
42574 /* We know by now that these are swaps, so we can delete
42575 them confidently. */
42576 while (link)
42578 rtx def_insn = DF_REF_INSN (link->ref);
42579 insn_entry[INSN_UID (def_insn)].will_delete = 1;
42580 link = link->next;
42586 /* OP is either a CONST_VECTOR or an expression containing one.
42587 Swap the first half of the vector with the second in the first
42588 case. Recurse to find it in the second. */
42589 static void
42590 swap_const_vector_halves (rtx op)
42592 int i;
42593 enum rtx_code code = GET_CODE (op);
42594 if (GET_CODE (op) == CONST_VECTOR)
42596 int half_units = GET_MODE_NUNITS (GET_MODE (op)) / 2;
42597 for (i = 0; i < half_units; ++i)
42599 rtx temp = CONST_VECTOR_ELT (op, i);
42600 CONST_VECTOR_ELT (op, i) = CONST_VECTOR_ELT (op, i + half_units);
42601 CONST_VECTOR_ELT (op, i + half_units) = temp;
42604 else
42606 int j;
42607 const char *fmt = GET_RTX_FORMAT (code);
42608 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
42609 if (fmt[i] == 'e' || fmt[i] == 'u')
42610 swap_const_vector_halves (XEXP (op, i));
42611 else if (fmt[i] == 'E')
42612 for (j = 0; j < XVECLEN (op, i); ++j)
42613 swap_const_vector_halves (XVECEXP (op, i, j));
42617 /* Find all subregs of a vector expression that perform a narrowing,
42618 and adjust the subreg index to account for doubleword swapping. */
42619 static void
42620 adjust_subreg_index (rtx op)
42622 enum rtx_code code = GET_CODE (op);
42623 if (code == SUBREG
42624 && (GET_MODE_SIZE (GET_MODE (op))
42625 < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
42627 unsigned int index = SUBREG_BYTE (op);
42628 if (index < 8)
42629 index += 8;
42630 else
42631 index -= 8;
42632 SUBREG_BYTE (op) = index;
42635 const char *fmt = GET_RTX_FORMAT (code);
42636 int i,j;
42637 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
42638 if (fmt[i] == 'e' || fmt[i] == 'u')
42639 adjust_subreg_index (XEXP (op, i));
42640 else if (fmt[i] == 'E')
42641 for (j = 0; j < XVECLEN (op, i); ++j)
42642 adjust_subreg_index (XVECEXP (op, i, j));
42645 /* Convert the non-permuting load INSN to a permuting one. */
42646 static void
42647 permute_load (rtx_insn *insn)
42649 rtx body = PATTERN (insn);
42650 rtx mem_op = SET_SRC (body);
42651 rtx tgt_reg = SET_DEST (body);
42652 machine_mode mode = GET_MODE (tgt_reg);
42653 int n_elts = GET_MODE_NUNITS (mode);
42654 int half_elts = n_elts / 2;
42655 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
42656 int i, j;
42657 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
42658 XVECEXP (par, 0, i) = GEN_INT (j);
42659 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
42660 XVECEXP (par, 0, i) = GEN_INT (j);
42661 rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
42662 SET_SRC (body) = sel;
42663 INSN_CODE (insn) = -1; /* Force re-recognition. */
42664 df_insn_rescan (insn);
42666 if (dump_file)
42667 fprintf (dump_file, "Replacing load %d with permuted load\n",
42668 INSN_UID (insn));
42671 /* Convert the non-permuting store INSN to a permuting one. */
42672 static void
42673 permute_store (rtx_insn *insn)
42675 rtx body = PATTERN (insn);
42676 rtx src_reg = SET_SRC (body);
42677 machine_mode mode = GET_MODE (src_reg);
42678 int n_elts = GET_MODE_NUNITS (mode);
42679 int half_elts = n_elts / 2;
42680 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
42681 int i, j;
42682 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
42683 XVECEXP (par, 0, i) = GEN_INT (j);
42684 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
42685 XVECEXP (par, 0, i) = GEN_INT (j);
42686 rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
42687 SET_SRC (body) = sel;
42688 INSN_CODE (insn) = -1; /* Force re-recognition. */
42689 df_insn_rescan (insn);
42691 if (dump_file)
42692 fprintf (dump_file, "Replacing store %d with permuted store\n",
42693 INSN_UID (insn));
42696 /* Given OP that contains a vector extract operation, adjust the index
42697 of the extracted lane to account for the doubleword swap. */
42698 static void
42699 adjust_extract (rtx_insn *insn)
42701 rtx pattern = PATTERN (insn);
42702 if (GET_CODE (pattern) == PARALLEL)
42703 pattern = XVECEXP (pattern, 0, 0);
42704 rtx src = SET_SRC (pattern);
42705 /* The vec_select may be wrapped in a vec_duplicate for a splat, so
42706 account for that. */
42707 rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
42708 rtx par = XEXP (sel, 1);
42709 int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
42710 int lane = INTVAL (XVECEXP (par, 0, 0));
42711 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
42712 XVECEXP (par, 0, 0) = GEN_INT (lane);
42713 INSN_CODE (insn) = -1; /* Force re-recognition. */
42714 df_insn_rescan (insn);
42716 if (dump_file)
42717 fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
42720 /* Given OP that contains a vector direct-splat operation, adjust the index
42721 of the source lane to account for the doubleword swap. */
42722 static void
42723 adjust_splat (rtx_insn *insn)
42725 rtx body = PATTERN (insn);
42726 rtx unspec = XEXP (body, 1);
42727 int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
42728 int lane = INTVAL (XVECEXP (unspec, 0, 1));
42729 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
42730 XVECEXP (unspec, 0, 1) = GEN_INT (lane);
42731 INSN_CODE (insn) = -1; /* Force re-recognition. */
42732 df_insn_rescan (insn);
42734 if (dump_file)
42735 fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
42738 /* Given OP that contains an XXPERMDI operation (that is not a doubleword
42739 swap), reverse the order of the source operands and adjust the indices
42740 of the source lanes to account for doubleword reversal. */
42741 static void
42742 adjust_xxpermdi (rtx_insn *insn)
42744 rtx set = PATTERN (insn);
42745 rtx select = XEXP (set, 1);
42746 rtx concat = XEXP (select, 0);
42747 rtx src0 = XEXP (concat, 0);
42748 XEXP (concat, 0) = XEXP (concat, 1);
42749 XEXP (concat, 1) = src0;
42750 rtx parallel = XEXP (select, 1);
42751 int lane0 = INTVAL (XVECEXP (parallel, 0, 0));
42752 int lane1 = INTVAL (XVECEXP (parallel, 0, 1));
42753 int new_lane0 = 3 - lane1;
42754 int new_lane1 = 3 - lane0;
42755 XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0);
42756 XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1);
42757 INSN_CODE (insn) = -1; /* Force re-recognition. */
42758 df_insn_rescan (insn);
42760 if (dump_file)
42761 fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn));
42764 /* Given OP that contains a VEC_CONCAT operation of two doublewords,
42765 reverse the order of those inputs. */
42766 static void
42767 adjust_concat (rtx_insn *insn)
42769 rtx set = PATTERN (insn);
42770 rtx concat = XEXP (set, 1);
42771 rtx src0 = XEXP (concat, 0);
42772 XEXP (concat, 0) = XEXP (concat, 1);
42773 XEXP (concat, 1) = src0;
42774 INSN_CODE (insn) = -1; /* Force re-recognition. */
42775 df_insn_rescan (insn);
42777 if (dump_file)
42778 fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn));
42781 /* Given an UNSPEC_VPERM insn, modify the mask loaded from the
42782 constant pool to reflect swapped doublewords. */
42783 static void
42784 adjust_vperm (rtx_insn *insn)
42786 /* We previously determined that the UNSPEC_VPERM was fed by a
42787 swap of a swapping load of a TOC-relative constant pool symbol.
42788 Find the MEM in the swapping load and replace it with a MEM for
42789 the adjusted mask constant. */
42790 rtx set = PATTERN (insn);
42791 rtx mask_reg = XVECEXP (SET_SRC (set), 0, 2);
42793 /* Find the swap. */
42794 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
42795 df_ref use;
42796 rtx_insn *swap_insn = 0;
42797 FOR_EACH_INSN_INFO_USE (use, insn_info)
42798 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
42800 struct df_link *def_link = DF_REF_CHAIN (use);
42801 gcc_assert (def_link && !def_link->next);
42802 swap_insn = DF_REF_INSN (def_link->ref);
42803 break;
42805 gcc_assert (swap_insn);
42807 /* Find the load. */
42808 insn_info = DF_INSN_INFO_GET (swap_insn);
42809 rtx_insn *load_insn = 0;
42810 FOR_EACH_INSN_INFO_USE (use, insn_info)
42812 struct df_link *def_link = DF_REF_CHAIN (use);
42813 gcc_assert (def_link && !def_link->next);
42814 load_insn = DF_REF_INSN (def_link->ref);
42815 break;
42817 gcc_assert (load_insn);
42819 /* Find the TOC-relative symbol access. */
42820 insn_info = DF_INSN_INFO_GET (load_insn);
42821 rtx_insn *tocrel_insn = 0;
42822 FOR_EACH_INSN_INFO_USE (use, insn_info)
42824 struct df_link *def_link = DF_REF_CHAIN (use);
42825 gcc_assert (def_link && !def_link->next);
42826 tocrel_insn = DF_REF_INSN (def_link->ref);
42827 break;
42829 gcc_assert (tocrel_insn);
42831 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p
42832 to set tocrel_base; otherwise it would be unnecessary as we've
42833 already established it will return true. */
42834 rtx base, offset;
42835 rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn));
42836 /* There is an extra level of indirection for small/large code models. */
42837 if (GET_CODE (tocrel_expr) == MEM)
42838 tocrel_expr = XEXP (tocrel_expr, 0);
42839 if (!toc_relative_expr_p (tocrel_expr, false))
42840 gcc_unreachable ();
42841 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
42842 rtx const_vector = get_pool_constant (base);
42843 /* With the extra indirection, get_pool_constant will produce the
42844 real constant from the reg_equal expression, so get the real
42845 constant. */
42846 if (GET_CODE (const_vector) == SYMBOL_REF)
42847 const_vector = get_pool_constant (const_vector);
42848 gcc_assert (GET_CODE (const_vector) == CONST_VECTOR);
42850 /* Create an adjusted mask from the initial mask. */
42851 unsigned int new_mask[16], i, val;
42852 for (i = 0; i < 16; ++i) {
42853 val = INTVAL (XVECEXP (const_vector, 0, i));
42854 if (val < 16)
42855 new_mask[i] = (val + 8) % 16;
42856 else
42857 new_mask[i] = ((val + 8) % 16) + 16;
42860 /* Create a new CONST_VECTOR and a MEM that references it. */
42861 rtx vals = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
42862 for (i = 0; i < 16; ++i)
42863 XVECEXP (vals, 0, i) = GEN_INT (new_mask[i]);
42864 rtx new_const_vector = gen_rtx_CONST_VECTOR (V16QImode, XVEC (vals, 0));
42865 rtx new_mem = force_const_mem (V16QImode, new_const_vector);
42866 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
42867 can't recognize. Force the SYMBOL_REF into a register. */
42868 if (!REG_P (XEXP (new_mem, 0))) {
42869 rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0));
42870 XEXP (new_mem, 0) = base_reg;
42871 /* Move the newly created insn ahead of the load insn. */
42872 rtx_insn *force_insn = get_last_insn ();
42873 remove_insn (force_insn);
42874 rtx_insn *before_load_insn = PREV_INSN (load_insn);
42875 add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn));
42876 df_insn_rescan (before_load_insn);
42877 df_insn_rescan (force_insn);
42880 /* Replace the MEM in the load instruction and rescan it. */
42881 XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem;
42882 INSN_CODE (load_insn) = -1; /* Force re-recognition. */
42883 df_insn_rescan (load_insn);
42885 if (dump_file)
42886 fprintf (dump_file, "Adjusting mask for vperm %d\n", INSN_UID (insn));
42889 /* The insn described by INSN_ENTRY[I] can be swapped, but only
42890 with special handling. Take care of that here. */
42891 static void
42892 handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
42894 rtx_insn *insn = insn_entry[i].insn;
42895 rtx body = PATTERN (insn);
42897 switch (insn_entry[i].special_handling)
42899 default:
42900 gcc_unreachable ();
42901 case SH_CONST_VECTOR:
42903 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
42904 gcc_assert (GET_CODE (body) == SET);
42905 rtx rhs = SET_SRC (body);
42906 swap_const_vector_halves (rhs);
42907 if (dump_file)
42908 fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
42909 break;
42911 case SH_SUBREG:
42912 /* A subreg of the same size is already safe. For subregs that
42913 select a smaller portion of a reg, adjust the index for
42914 swapped doublewords. */
42915 adjust_subreg_index (body);
42916 if (dump_file)
42917 fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
42918 break;
42919 case SH_NOSWAP_LD:
42920 /* Convert a non-permuting load to a permuting one. */
42921 permute_load (insn);
42922 break;
42923 case SH_NOSWAP_ST:
42924 /* Convert a non-permuting store to a permuting one. */
42925 permute_store (insn);
42926 break;
42927 case SH_EXTRACT:
42928 /* Change the lane on an extract operation. */
42929 adjust_extract (insn);
42930 break;
42931 case SH_SPLAT:
42932 /* Change the lane on a direct-splat operation. */
42933 adjust_splat (insn);
42934 break;
42935 case SH_XXPERMDI:
42936 /* Change the lanes on an XXPERMDI operation. */
42937 adjust_xxpermdi (insn);
42938 break;
42939 case SH_CONCAT:
42940 /* Reverse the order of a concatenation operation. */
42941 adjust_concat (insn);
42942 break;
42943 case SH_VPERM:
42944 /* Change the mask loaded from the constant pool for a VPERM. */
42945 adjust_vperm (insn);
42946 break;
42950 /* Find the insn from the Ith table entry, which is known to be a
42951 register swap Y = SWAP(X). Replace it with a copy Y = X. */
42952 static void
42953 replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
42955 rtx_insn *insn = insn_entry[i].insn;
42956 rtx body = PATTERN (insn);
42957 rtx src_reg = XEXP (SET_SRC (body), 0);
42958 rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
42959 rtx_insn *new_insn = emit_insn_before (copy, insn);
42960 set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
42961 df_insn_rescan (new_insn);
42963 if (dump_file)
42965 unsigned int new_uid = INSN_UID (new_insn);
42966 fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
42969 df_insn_delete (insn);
42970 remove_insn (insn);
42971 insn->set_deleted ();
42974 /* Dump the swap table to DUMP_FILE. */
42975 static void
42976 dump_swap_insn_table (swap_web_entry *insn_entry)
42978 int e = get_max_uid ();
42979 fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
42981 for (int i = 0; i < e; ++i)
42982 if (insn_entry[i].is_relevant)
42984 swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
42985 fprintf (dump_file, "%6d %6d ", i,
42986 pred_entry && pred_entry->insn
42987 ? INSN_UID (pred_entry->insn) : 0);
42988 if (insn_entry[i].is_load)
42989 fputs ("load ", dump_file);
42990 if (insn_entry[i].is_store)
42991 fputs ("store ", dump_file);
42992 if (insn_entry[i].is_swap)
42993 fputs ("swap ", dump_file);
42994 if (insn_entry[i].is_live_in)
42995 fputs ("live-in ", dump_file);
42996 if (insn_entry[i].is_live_out)
42997 fputs ("live-out ", dump_file);
42998 if (insn_entry[i].contains_subreg)
42999 fputs ("subreg ", dump_file);
43000 if (insn_entry[i].is_128_int)
43001 fputs ("int128 ", dump_file);
43002 if (insn_entry[i].is_call)
43003 fputs ("call ", dump_file);
43004 if (insn_entry[i].is_swappable)
43006 fputs ("swappable ", dump_file);
43007 if (insn_entry[i].special_handling == SH_CONST_VECTOR)
43008 fputs ("special:constvec ", dump_file);
43009 else if (insn_entry[i].special_handling == SH_SUBREG)
43010 fputs ("special:subreg ", dump_file);
43011 else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
43012 fputs ("special:load ", dump_file);
43013 else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
43014 fputs ("special:store ", dump_file);
43015 else if (insn_entry[i].special_handling == SH_EXTRACT)
43016 fputs ("special:extract ", dump_file);
43017 else if (insn_entry[i].special_handling == SH_SPLAT)
43018 fputs ("special:splat ", dump_file);
43019 else if (insn_entry[i].special_handling == SH_XXPERMDI)
43020 fputs ("special:xxpermdi ", dump_file);
43021 else if (insn_entry[i].special_handling == SH_CONCAT)
43022 fputs ("special:concat ", dump_file);
43023 else if (insn_entry[i].special_handling == SH_VPERM)
43024 fputs ("special:vperm ", dump_file);
43026 if (insn_entry[i].web_not_optimizable)
43027 fputs ("unoptimizable ", dump_file);
43028 if (insn_entry[i].will_delete)
43029 fputs ("delete ", dump_file);
43030 fputs ("\n", dump_file);
43032 fputs ("\n", dump_file);
43035 /* Return RTX with its address canonicalized to (reg) or (+ reg reg).
43036 Here RTX is an (& addr (const_int -16)). Always return a new copy
43037 to avoid problems with combine. */
43038 static rtx
43039 alignment_with_canonical_addr (rtx align)
43041 rtx canon;
43042 rtx addr = XEXP (align, 0);
43044 if (REG_P (addr))
43045 canon = addr;
43047 else if (GET_CODE (addr) == PLUS)
43049 rtx addrop0 = XEXP (addr, 0);
43050 rtx addrop1 = XEXP (addr, 1);
43052 if (!REG_P (addrop0))
43053 addrop0 = force_reg (GET_MODE (addrop0), addrop0);
43055 if (!REG_P (addrop1))
43056 addrop1 = force_reg (GET_MODE (addrop1), addrop1);
43058 canon = gen_rtx_PLUS (GET_MODE (addr), addrop0, addrop1);
43061 else
43062 canon = force_reg (GET_MODE (addr), addr);
43064 return gen_rtx_AND (GET_MODE (align), canon, GEN_INT (-16));
43067 /* Check whether an rtx is an alignment mask, and if so, return
43068 a fully-expanded rtx for the masking operation. */
43069 static rtx
43070 alignment_mask (rtx_insn *insn)
43072 rtx body = PATTERN (insn);
43074 if (GET_CODE (body) != SET
43075 || GET_CODE (SET_SRC (body)) != AND
43076 || !REG_P (XEXP (SET_SRC (body), 0)))
43077 return 0;
43079 rtx mask = XEXP (SET_SRC (body), 1);
43081 if (GET_CODE (mask) == CONST_INT)
43083 if (INTVAL (mask) == -16)
43084 return alignment_with_canonical_addr (SET_SRC (body));
43085 else
43086 return 0;
43089 if (!REG_P (mask))
43090 return 0;
43092 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
43093 df_ref use;
43094 rtx real_mask = 0;
43096 FOR_EACH_INSN_INFO_USE (use, insn_info)
43098 if (!rtx_equal_p (DF_REF_REG (use), mask))
43099 continue;
43101 struct df_link *def_link = DF_REF_CHAIN (use);
43102 if (!def_link || def_link->next)
43103 return 0;
43105 rtx_insn *const_insn = DF_REF_INSN (def_link->ref);
43106 rtx const_body = PATTERN (const_insn);
43107 if (GET_CODE (const_body) != SET)
43108 return 0;
43110 real_mask = SET_SRC (const_body);
43112 if (GET_CODE (real_mask) != CONST_INT
43113 || INTVAL (real_mask) != -16)
43114 return 0;
43117 if (real_mask == 0)
43118 return 0;
43120 return alignment_with_canonical_addr (SET_SRC (body));
43123 /* Given INSN that's a load or store based at BASE_REG, look for a
43124 feeding computation that aligns its address on a 16-byte boundary. */
43125 static rtx
43126 find_alignment_op (rtx_insn *insn, rtx base_reg)
43128 df_ref base_use;
43129 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
43130 rtx and_operation = 0;
43132 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
43134 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
43135 continue;
43137 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
43138 if (!base_def_link || base_def_link->next)
43139 break;
43141 /* With stack-protector code enabled, and possibly in other
43142 circumstances, there may not be an associated insn for
43143 the def. */
43144 if (DF_REF_IS_ARTIFICIAL (base_def_link->ref))
43145 break;
43147 rtx_insn *and_insn = DF_REF_INSN (base_def_link->ref);
43148 and_operation = alignment_mask (and_insn);
43149 if (and_operation != 0)
43150 break;
43153 return and_operation;
43156 struct del_info { bool replace; rtx_insn *replace_insn; };
43158 /* If INSN is the load for an lvx pattern, put it in canonical form. */
43159 static void
43160 recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete)
43162 rtx body = PATTERN (insn);
43163 gcc_assert (GET_CODE (body) == SET
43164 && GET_CODE (SET_SRC (body)) == VEC_SELECT
43165 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM);
43167 rtx mem = XEXP (SET_SRC (body), 0);
43168 rtx base_reg = XEXP (mem, 0);
43170 rtx and_operation = find_alignment_op (insn, base_reg);
43172 if (and_operation != 0)
43174 df_ref def;
43175 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
43176 FOR_EACH_INSN_INFO_DEF (def, insn_info)
43178 struct df_link *link = DF_REF_CHAIN (def);
43179 if (!link || link->next)
43180 break;
43182 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
43183 if (!insn_is_swap_p (swap_insn)
43184 || insn_is_load_p (swap_insn)
43185 || insn_is_store_p (swap_insn))
43186 break;
43188 /* Expected lvx pattern found. Change the swap to
43189 a copy, and propagate the AND operation into the
43190 load. */
43191 to_delete[INSN_UID (swap_insn)].replace = true;
43192 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
43194 XEXP (mem, 0) = and_operation;
43195 SET_SRC (body) = mem;
43196 INSN_CODE (insn) = -1; /* Force re-recognition. */
43197 df_insn_rescan (insn);
43199 if (dump_file)
43200 fprintf (dump_file, "lvx opportunity found at %d\n",
43201 INSN_UID (insn));
43206 /* If INSN is the store for an stvx pattern, put it in canonical form. */
43207 static void
43208 recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete)
43210 rtx body = PATTERN (insn);
43211 gcc_assert (GET_CODE (body) == SET
43212 && GET_CODE (SET_DEST (body)) == MEM
43213 && GET_CODE (SET_SRC (body)) == VEC_SELECT);
43214 rtx mem = SET_DEST (body);
43215 rtx base_reg = XEXP (mem, 0);
43217 rtx and_operation = find_alignment_op (insn, base_reg);
43219 if (and_operation != 0)
43221 rtx src_reg = XEXP (SET_SRC (body), 0);
43222 df_ref src_use;
43223 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
43224 FOR_EACH_INSN_INFO_USE (src_use, insn_info)
43226 if (!rtx_equal_p (DF_REF_REG (src_use), src_reg))
43227 continue;
43229 struct df_link *link = DF_REF_CHAIN (src_use);
43230 if (!link || link->next)
43231 break;
43233 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
43234 if (!insn_is_swap_p (swap_insn)
43235 || insn_is_load_p (swap_insn)
43236 || insn_is_store_p (swap_insn))
43237 break;
43239 /* Expected stvx pattern found. Change the swap to
43240 a copy, and propagate the AND operation into the
43241 store. */
43242 to_delete[INSN_UID (swap_insn)].replace = true;
43243 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
43245 XEXP (mem, 0) = and_operation;
43246 SET_SRC (body) = src_reg;
43247 INSN_CODE (insn) = -1; /* Force re-recognition. */
43248 df_insn_rescan (insn);
43250 if (dump_file)
43251 fprintf (dump_file, "stvx opportunity found at %d\n",
43252 INSN_UID (insn));
43257 /* Look for patterns created from builtin lvx and stvx calls, and
43258 canonicalize them to be properly recognized as such. */
43259 static void
43260 recombine_lvx_stvx_patterns (function *fun)
43262 int i;
43263 basic_block bb;
43264 rtx_insn *insn;
43266 int num_insns = get_max_uid ();
43267 del_info *to_delete = XCNEWVEC (del_info, num_insns);
43269 FOR_ALL_BB_FN (bb, fun)
43270 FOR_BB_INSNS (bb, insn)
43272 if (!NONDEBUG_INSN_P (insn))
43273 continue;
43275 if (insn_is_load_p (insn) && insn_is_swap_p (insn))
43276 recombine_lvx_pattern (insn, to_delete);
43277 else if (insn_is_store_p (insn) && insn_is_swap_p (insn))
43278 recombine_stvx_pattern (insn, to_delete);
43281 /* Turning swaps into copies is delayed until now, to avoid problems
43282 with deleting instructions during the insn walk. */
43283 for (i = 0; i < num_insns; i++)
43284 if (to_delete[i].replace)
43286 rtx swap_body = PATTERN (to_delete[i].replace_insn);
43287 rtx src_reg = XEXP (SET_SRC (swap_body), 0);
43288 rtx copy = gen_rtx_SET (SET_DEST (swap_body), src_reg);
43289 rtx_insn *new_insn = emit_insn_before (copy,
43290 to_delete[i].replace_insn);
43291 set_block_for_insn (new_insn,
43292 BLOCK_FOR_INSN (to_delete[i].replace_insn));
43293 df_insn_rescan (new_insn);
43294 df_insn_delete (to_delete[i].replace_insn);
43295 remove_insn (to_delete[i].replace_insn);
43296 to_delete[i].replace_insn->set_deleted ();
43299 free (to_delete);
43302 /* Main entry point for this pass. */
43303 unsigned int
43304 rs6000_analyze_swaps (function *fun)
43306 swap_web_entry *insn_entry;
43307 basic_block bb;
43308 rtx_insn *insn, *curr_insn = 0;
43310 /* Dataflow analysis for use-def chains. */
43311 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
43312 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
43313 df_analyze ();
43314 df_set_flags (DF_DEFER_INSN_RESCAN);
43316 /* Pre-pass to recombine lvx and stvx patterns so we don't lose info. */
43317 recombine_lvx_stvx_patterns (fun);
43319 /* Allocate structure to represent webs of insns. */
43320 insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
43322 /* Walk the insns to gather basic data. */
43323 FOR_ALL_BB_FN (bb, fun)
43324 FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
43326 unsigned int uid = INSN_UID (insn);
43327 if (NONDEBUG_INSN_P (insn))
43329 insn_entry[uid].insn = insn;
43331 if (GET_CODE (insn) == CALL_INSN)
43332 insn_entry[uid].is_call = 1;
43334 /* Walk the uses and defs to see if we mention vector regs.
43335 Record any constraints on optimization of such mentions. */
43336 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
43337 df_ref mention;
43338 FOR_EACH_INSN_INFO_USE (mention, insn_info)
43340 /* We use DF_REF_REAL_REG here to get inside any subregs. */
43341 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
43343 /* If a use gets its value from a call insn, it will be
43344 a hard register and will look like (reg:V4SI 3 3).
43345 The df analysis creates two mentions for GPR3 and GPR4,
43346 both DImode. We must recognize this and treat it as a
43347 vector mention to ensure the call is unioned with this
43348 use. */
43349 if (mode == DImode && DF_REF_INSN_INFO (mention))
43351 rtx feeder = DF_REF_INSN (mention);
43352 /* FIXME: It is pretty hard to get from the df mention
43353 to the mode of the use in the insn. We arbitrarily
43354 pick a vector mode here, even though the use might
43355 be a real DImode. We can be too conservative
43356 (create a web larger than necessary) because of
43357 this, so consider eventually fixing this. */
43358 if (GET_CODE (feeder) == CALL_INSN)
43359 mode = V4SImode;
43362 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
43364 insn_entry[uid].is_relevant = 1;
43365 if (mode == TImode || mode == V1TImode
43366 || FLOAT128_VECTOR_P (mode))
43367 insn_entry[uid].is_128_int = 1;
43368 if (DF_REF_INSN_INFO (mention))
43369 insn_entry[uid].contains_subreg
43370 = !rtx_equal_p (DF_REF_REG (mention),
43371 DF_REF_REAL_REG (mention));
43372 union_defs (insn_entry, insn, mention);
43375 FOR_EACH_INSN_INFO_DEF (mention, insn_info)
43377 /* We use DF_REF_REAL_REG here to get inside any subregs. */
43378 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
43380 /* If we're loading up a hard vector register for a call,
43381 it looks like (set (reg:V4SI 9 9) (...)). The df
43382 analysis creates two mentions for GPR9 and GPR10, both
43383 DImode. So relying on the mode from the mentions
43384 isn't sufficient to ensure we union the call into the
43385 web with the parameter setup code. */
43386 if (mode == DImode && GET_CODE (insn) == SET
43387 && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn))))
43388 mode = GET_MODE (SET_DEST (insn));
43390 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
43392 insn_entry[uid].is_relevant = 1;
43393 if (mode == TImode || mode == V1TImode
43394 || FLOAT128_VECTOR_P (mode))
43395 insn_entry[uid].is_128_int = 1;
43396 if (DF_REF_INSN_INFO (mention))
43397 insn_entry[uid].contains_subreg
43398 = !rtx_equal_p (DF_REF_REG (mention),
43399 DF_REF_REAL_REG (mention));
43400 /* REG_FUNCTION_VALUE_P is not valid for subregs. */
43401 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
43402 insn_entry[uid].is_live_out = 1;
43403 union_uses (insn_entry, insn, mention);
43407 if (insn_entry[uid].is_relevant)
43409 /* Determine if this is a load or store. */
43410 insn_entry[uid].is_load = insn_is_load_p (insn);
43411 insn_entry[uid].is_store = insn_is_store_p (insn);
43413 /* Determine if this is a doubleword swap. If not,
43414 determine whether it can legally be swapped. */
43415 if (insn_is_swap_p (insn))
43416 insn_entry[uid].is_swap = 1;
43417 else
43419 unsigned int special = SH_NONE;
43420 insn_entry[uid].is_swappable
43421 = insn_is_swappable_p (insn_entry, insn, &special);
43422 if (special != SH_NONE && insn_entry[uid].contains_subreg)
43423 insn_entry[uid].is_swappable = 0;
43424 else if (special != SH_NONE)
43425 insn_entry[uid].special_handling = special;
43426 else if (insn_entry[uid].contains_subreg)
43427 insn_entry[uid].special_handling = SH_SUBREG;
43433 if (dump_file)
43435 fprintf (dump_file, "\nSwap insn entry table when first built\n");
43436 dump_swap_insn_table (insn_entry);
43439 /* Record unoptimizable webs. */
43440 unsigned e = get_max_uid (), i;
43441 for (i = 0; i < e; ++i)
43443 if (!insn_entry[i].is_relevant)
43444 continue;
43446 swap_web_entry *root
43447 = (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
43449 if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
43450 || (insn_entry[i].contains_subreg
43451 && insn_entry[i].special_handling != SH_SUBREG)
43452 || insn_entry[i].is_128_int || insn_entry[i].is_call
43453 || !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
43454 root->web_not_optimizable = 1;
43456 /* If we have loads or stores that aren't permuting then the
43457 optimization isn't appropriate. */
43458 else if ((insn_entry[i].is_load || insn_entry[i].is_store)
43459 && !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
43460 root->web_not_optimizable = 1;
43462 /* If we have permuting loads or stores that are not accompanied
43463 by a register swap, the optimization isn't appropriate. */
43464 else if (insn_entry[i].is_load && insn_entry[i].is_swap)
43466 rtx insn = insn_entry[i].insn;
43467 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
43468 df_ref def;
43470 FOR_EACH_INSN_INFO_DEF (def, insn_info)
43472 struct df_link *link = DF_REF_CHAIN (def);
43474 if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
43476 root->web_not_optimizable = 1;
43477 break;
43481 else if (insn_entry[i].is_store && insn_entry[i].is_swap)
43483 rtx insn = insn_entry[i].insn;
43484 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
43485 df_ref use;
43487 FOR_EACH_INSN_INFO_USE (use, insn_info)
43489 struct df_link *link = DF_REF_CHAIN (use);
43491 if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
43493 root->web_not_optimizable = 1;
43494 break;
43500 if (dump_file)
43502 fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
43503 dump_swap_insn_table (insn_entry);
43506 /* For each load and store in an optimizable web (which implies
43507 the loads and stores are permuting), find the associated
43508 register swaps and mark them for removal. Due to various
43509 optimizations we may mark the same swap more than once. Also
43510 perform special handling for swappable insns that require it. */
43511 for (i = 0; i < e; ++i)
43512 if ((insn_entry[i].is_load || insn_entry[i].is_store)
43513 && insn_entry[i].is_swap)
43515 swap_web_entry* root_entry
43516 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
43517 if (!root_entry->web_not_optimizable)
43518 mark_swaps_for_removal (insn_entry, i);
43520 else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
43522 swap_web_entry* root_entry
43523 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
43524 if (!root_entry->web_not_optimizable)
43525 handle_special_swappables (insn_entry, i);
43528 /* Now delete the swaps marked for removal. */
43529 for (i = 0; i < e; ++i)
43530 if (insn_entry[i].will_delete)
43531 replace_swap_with_copy (insn_entry, i);
43533 /* Clean up. */
43534 free (insn_entry);
43535 return 0;
43538 const pass_data pass_data_analyze_swaps =
43540 RTL_PASS, /* type */
43541 "swaps", /* name */
43542 OPTGROUP_NONE, /* optinfo_flags */
43543 TV_NONE, /* tv_id */
43544 0, /* properties_required */
43545 0, /* properties_provided */
43546 0, /* properties_destroyed */
43547 0, /* todo_flags_start */
43548 TODO_df_finish, /* todo_flags_finish */
43551 class pass_analyze_swaps : public rtl_opt_pass
43553 public:
43554 pass_analyze_swaps(gcc::context *ctxt)
43555 : rtl_opt_pass(pass_data_analyze_swaps, ctxt)
43558 /* opt_pass methods: */
43559 virtual bool gate (function *)
43561 return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
43562 && !TARGET_P9_VECTOR && rs6000_optimize_swaps);
43565 virtual unsigned int execute (function *fun)
43567 return rs6000_analyze_swaps (fun);
43570 opt_pass *clone ()
43572 return new pass_analyze_swaps (m_ctxt);
43575 }; // class pass_analyze_swaps
43577 rtl_opt_pass *
43578 make_pass_analyze_swaps (gcc::context *ctxt)
43580 return new pass_analyze_swaps (ctxt);
43583 #ifdef RS6000_GLIBC_ATOMIC_FENV
43584 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
43585 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
43586 #endif
43588 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
43590 static void
43591 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
43593 if (!TARGET_HARD_FLOAT || !TARGET_FPRS)
43595 #ifdef RS6000_GLIBC_ATOMIC_FENV
43596 if (atomic_hold_decl == NULL_TREE)
43598 atomic_hold_decl
43599 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
43600 get_identifier ("__atomic_feholdexcept"),
43601 build_function_type_list (void_type_node,
43602 double_ptr_type_node,
43603 NULL_TREE));
43604 TREE_PUBLIC (atomic_hold_decl) = 1;
43605 DECL_EXTERNAL (atomic_hold_decl) = 1;
43608 if (atomic_clear_decl == NULL_TREE)
43610 atomic_clear_decl
43611 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
43612 get_identifier ("__atomic_feclearexcept"),
43613 build_function_type_list (void_type_node,
43614 NULL_TREE));
43615 TREE_PUBLIC (atomic_clear_decl) = 1;
43616 DECL_EXTERNAL (atomic_clear_decl) = 1;
43619 tree const_double = build_qualified_type (double_type_node,
43620 TYPE_QUAL_CONST);
43621 tree const_double_ptr = build_pointer_type (const_double);
43622 if (atomic_update_decl == NULL_TREE)
43624 atomic_update_decl
43625 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
43626 get_identifier ("__atomic_feupdateenv"),
43627 build_function_type_list (void_type_node,
43628 const_double_ptr,
43629 NULL_TREE));
43630 TREE_PUBLIC (atomic_update_decl) = 1;
43631 DECL_EXTERNAL (atomic_update_decl) = 1;
43634 tree fenv_var = create_tmp_var_raw (double_type_node);
43635 TREE_ADDRESSABLE (fenv_var) = 1;
43636 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
43638 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
43639 *clear = build_call_expr (atomic_clear_decl, 0);
43640 *update = build_call_expr (atomic_update_decl, 1,
43641 fold_convert (const_double_ptr, fenv_addr));
43642 #endif
43643 return;
43646 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
43647 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
43648 tree call_mffs = build_call_expr (mffs, 0);
43650 /* Generates the equivalent of feholdexcept (&fenv_var)
43652 *fenv_var = __builtin_mffs ();
43653 double fenv_hold;
43654 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
43655 __builtin_mtfsf (0xff, fenv_hold); */
43657 /* Mask to clear everything except for the rounding modes and non-IEEE
43658 arithmetic flag. */
43659 const unsigned HOST_WIDE_INT hold_exception_mask =
43660 HOST_WIDE_INT_C (0xffffffff00000007);
43662 tree fenv_var = create_tmp_var_raw (double_type_node);
43664 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
43666 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
43667 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
43668 build_int_cst (uint64_type_node,
43669 hold_exception_mask));
43671 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
43672 fenv_llu_and);
43674 tree hold_mtfsf = build_call_expr (mtfsf, 2,
43675 build_int_cst (unsigned_type_node, 0xff),
43676 fenv_hold_mtfsf);
43678 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
43680 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
43682 double fenv_clear = __builtin_mffs ();
43683 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
43684 __builtin_mtfsf (0xff, fenv_clear); */
43686 /* Mask to clear everything except for the rounding modes and non-IEEE
43687 arithmetic flag. */
43688 const unsigned HOST_WIDE_INT clear_exception_mask =
43689 HOST_WIDE_INT_C (0xffffffff00000000);
43691 tree fenv_clear = create_tmp_var_raw (double_type_node);
43693 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
43695 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
43696 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
43697 fenv_clean_llu,
43698 build_int_cst (uint64_type_node,
43699 clear_exception_mask));
43701 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
43702 fenv_clear_llu_and);
43704 tree clear_mtfsf = build_call_expr (mtfsf, 2,
43705 build_int_cst (unsigned_type_node, 0xff),
43706 fenv_clear_mtfsf);
43708 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
43710 /* Generates the equivalent of feupdateenv (&fenv_var)
43712 double old_fenv = __builtin_mffs ();
43713 double fenv_update;
43714 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
43715 (*(uint64_t*)fenv_var 0x1ff80fff);
43716 __builtin_mtfsf (0xff, fenv_update); */
43718 const unsigned HOST_WIDE_INT update_exception_mask =
43719 HOST_WIDE_INT_C (0xffffffff1fffff00);
43720 const unsigned HOST_WIDE_INT new_exception_mask =
43721 HOST_WIDE_INT_C (0x1ff80fff);
43723 tree old_fenv = create_tmp_var_raw (double_type_node);
43724 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
43726 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
43727 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
43728 build_int_cst (uint64_type_node,
43729 update_exception_mask));
43731 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
43732 build_int_cst (uint64_type_node,
43733 new_exception_mask));
43735 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
43736 old_llu_and, new_llu_and);
43738 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
43739 new_llu_mask);
43741 tree update_mtfsf = build_call_expr (mtfsf, 2,
43742 build_int_cst (unsigned_type_node, 0xff),
43743 fenv_update_mtfsf);
43745 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
43748 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
43750 static bool
43751 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
43752 optimization_type opt_type)
43754 switch (op)
43756 case rsqrt_optab:
43757 return (opt_type == OPTIMIZE_FOR_SPEED
43758 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
43760 default:
43761 return true;
43765 struct gcc_target targetm = TARGET_INITIALIZER;
43767 #include "gt-powerpcspe.h"