Turn HARD_REGNO_NREGS into a target hook
[official-gcc.git] / gcc / config / powerpcspe / powerpcspe.c
blob77846763585be72bb89690e7f7df7d791458be72
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "memmodel.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "cfgloop.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "attribs.h"
35 #include "expmed.h"
36 #include "optabs.h"
37 #include "regs.h"
38 #include "ira.h"
39 #include "recog.h"
40 #include "cgraph.h"
41 #include "diagnostic-core.h"
42 #include "insn-attr.h"
43 #include "flags.h"
44 #include "alias.h"
45 #include "fold-const.h"
46 #include "stor-layout.h"
47 #include "calls.h"
48 #include "print-tree.h"
49 #include "varasm.h"
50 #include "explow.h"
51 #include "expr.h"
52 #include "output.h"
53 #include "dbxout.h"
54 #include "common/common-target.h"
55 #include "langhooks.h"
56 #include "reload.h"
57 #include "sched-int.h"
58 #include "gimplify.h"
59 #include "gimple-fold.h"
60 #include "gimple-iterator.h"
61 #include "gimple-ssa.h"
62 #include "gimple-walk.h"
63 #include "intl.h"
64 #include "params.h"
65 #include "tm-constrs.h"
66 #include "tree-vectorizer.h"
67 #include "target-globals.h"
68 #include "builtins.h"
69 #include "context.h"
70 #include "tree-pass.h"
71 #include "except.h"
72 #if TARGET_XCOFF
73 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
74 #endif
75 #if TARGET_MACHO
76 #include "gstab.h" /* for N_SLINE */
77 #endif
78 #include "case-cfn-macros.h"
79 #include "ppc-auxv.h"
81 /* This file should be included last. */
82 #include "target-def.h"
84 #ifndef TARGET_NO_PROTOTYPE
85 #define TARGET_NO_PROTOTYPE 0
86 #endif
88 #define min(A,B) ((A) < (B) ? (A) : (B))
89 #define max(A,B) ((A) > (B) ? (A) : (B))
91 static pad_direction rs6000_function_arg_padding (machine_mode, const_tree);
93 /* Structure used to define the rs6000 stack */
94 typedef struct rs6000_stack {
95 int reload_completed; /* stack info won't change from here on */
96 int first_gp_reg_save; /* first callee saved GP register used */
97 int first_fp_reg_save; /* first callee saved FP register used */
98 int first_altivec_reg_save; /* first callee saved AltiVec register used */
99 int lr_save_p; /* true if the link reg needs to be saved */
100 int cr_save_p; /* true if the CR reg needs to be saved */
101 unsigned int vrsave_mask; /* mask of vec registers to save */
102 int push_p; /* true if we need to allocate stack space */
103 int calls_p; /* true if the function makes any calls */
104 int world_save_p; /* true if we're saving *everything*:
105 r13-r31, cr, f14-f31, vrsave, v20-v31 */
106 enum rs6000_abi abi; /* which ABI to use */
107 int gp_save_offset; /* offset to save GP regs from initial SP */
108 int fp_save_offset; /* offset to save FP regs from initial SP */
109 int altivec_save_offset; /* offset to save AltiVec regs from initial SP */
110 int lr_save_offset; /* offset to save LR from initial SP */
111 int cr_save_offset; /* offset to save CR from initial SP */
112 int vrsave_save_offset; /* offset to save VRSAVE from initial SP */
113 int spe_gp_save_offset; /* offset to save spe 64-bit gprs */
114 int varargs_save_offset; /* offset to save the varargs registers */
115 int ehrd_offset; /* offset to EH return data */
116 int ehcr_offset; /* offset to EH CR field data */
117 int reg_size; /* register size (4 or 8) */
118 HOST_WIDE_INT vars_size; /* variable save area size */
119 int parm_size; /* outgoing parameter size */
120 int save_size; /* save area size */
121 int fixed_size; /* fixed size of stack frame */
122 int gp_size; /* size of saved GP registers */
123 int fp_size; /* size of saved FP registers */
124 int altivec_size; /* size of saved AltiVec registers */
125 int cr_size; /* size to hold CR if not in fixed area */
126 int vrsave_size; /* size to hold VRSAVE */
127 int altivec_padding_size; /* size of altivec alignment padding */
128 int spe_gp_size; /* size of 64-bit GPR save size for SPE */
129 int spe_padding_size;
130 HOST_WIDE_INT total_size; /* total bytes allocated for stack */
131 int spe_64bit_regs_used;
132 int savres_strategy;
133 } rs6000_stack_t;
135 /* A C structure for machine-specific, per-function data.
136 This is added to the cfun structure. */
137 typedef struct GTY(()) machine_function
139 /* Whether the instruction chain has been scanned already. */
140 int spe_insn_chain_scanned_p;
141 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
142 int ra_needs_full_frame;
143 /* Flags if __builtin_return_address (0) was used. */
144 int ra_need_lr;
145 /* Cache lr_save_p after expansion of builtin_eh_return. */
146 int lr_save_state;
147 /* Whether we need to save the TOC to the reserved stack location in the
148 function prologue. */
149 bool save_toc_in_prologue;
150 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
151 varargs save area. */
152 HOST_WIDE_INT varargs_save_offset;
153 /* Temporary stack slot to use for SDmode copies. This slot is
154 64-bits wide and is allocated early enough so that the offset
155 does not overflow the 16-bit load/store offset field. */
156 rtx sdmode_stack_slot;
157 /* Alternative internal arg pointer for -fsplit-stack. */
158 rtx split_stack_arg_pointer;
159 bool split_stack_argp_used;
160 /* Flag if r2 setup is needed with ELFv2 ABI. */
161 bool r2_setup_needed;
162 /* The number of components we use for separate shrink-wrapping. */
163 int n_components;
164 /* The components already handled by separate shrink-wrapping, which should
165 not be considered by the prologue and epilogue. */
166 bool gpr_is_wrapped_separately[32];
167 bool fpr_is_wrapped_separately[32];
168 bool lr_is_wrapped_separately;
169 } machine_function;
171 /* Support targetm.vectorize.builtin_mask_for_load. */
172 static GTY(()) tree altivec_builtin_mask_for_load;
174 /* Set to nonzero once AIX common-mode calls have been defined. */
175 static GTY(()) int common_mode_defined;
177 /* Label number of label created for -mrelocatable, to call to so we can
178 get the address of the GOT section */
179 static int rs6000_pic_labelno;
181 #ifdef USING_ELFOS_H
182 /* Counter for labels which are to be placed in .fixup. */
183 int fixuplabelno = 0;
184 #endif
186 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
187 int dot_symbols;
189 /* Specify the machine mode that pointers have. After generation of rtl, the
190 compiler makes no further distinction between pointers and any other objects
191 of this machine mode. */
192 scalar_int_mode rs6000_pmode;
194 /* Width in bits of a pointer. */
195 unsigned rs6000_pointer_size;
197 #ifdef HAVE_AS_GNU_ATTRIBUTE
198 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
199 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
200 # endif
201 /* Flag whether floating point values have been passed/returned.
202 Note that this doesn't say whether fprs are used, since the
203 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
204 should be set for soft-float values passed in gprs and ieee128
205 values passed in vsx registers. */
206 static bool rs6000_passes_float;
207 static bool rs6000_passes_long_double;
208 /* Flag whether vector values have been passed/returned. */
209 static bool rs6000_passes_vector;
210 /* Flag whether small (<= 8 byte) structures have been returned. */
211 static bool rs6000_returns_struct;
212 #endif
214 /* Value is TRUE if register/mode pair is acceptable. */
215 static bool rs6000_hard_regno_mode_ok_p
216 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
218 /* Maximum number of registers needed for a given register class and mode. */
219 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
221 /* How many registers are needed for a given register and mode. */
222 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
224 /* Map register number to register class. */
225 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
227 static int dbg_cost_ctrl;
229 /* Built in types. */
230 tree rs6000_builtin_types[RS6000_BTI_MAX];
231 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
233 /* Flag to say the TOC is initialized */
234 int toc_initialized, need_toc_init;
235 char toc_label_name[10];
237 /* Cached value of rs6000_variable_issue. This is cached in
238 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
239 static short cached_can_issue_more;
241 static GTY(()) section *read_only_data_section;
242 static GTY(()) section *private_data_section;
243 static GTY(()) section *tls_data_section;
244 static GTY(()) section *tls_private_data_section;
245 static GTY(()) section *read_only_private_data_section;
246 static GTY(()) section *sdata2_section;
247 static GTY(()) section *toc_section;
249 struct builtin_description
251 const HOST_WIDE_INT mask;
252 const enum insn_code icode;
253 const char *const name;
254 const enum rs6000_builtins code;
257 /* Describe the vector unit used for modes. */
258 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
259 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
261 /* Register classes for various constraints that are based on the target
262 switches. */
263 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
265 /* Describe the alignment of a vector. */
266 int rs6000_vector_align[NUM_MACHINE_MODES];
268 /* Map selected modes to types for builtins. */
269 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
271 /* What modes to automatically generate reciprocal divide estimate (fre) and
272 reciprocal sqrt (frsqrte) for. */
273 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
275 /* Masks to determine which reciprocal esitmate instructions to generate
276 automatically. */
277 enum rs6000_recip_mask {
278 RECIP_SF_DIV = 0x001, /* Use divide estimate */
279 RECIP_DF_DIV = 0x002,
280 RECIP_V4SF_DIV = 0x004,
281 RECIP_V2DF_DIV = 0x008,
283 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
284 RECIP_DF_RSQRT = 0x020,
285 RECIP_V4SF_RSQRT = 0x040,
286 RECIP_V2DF_RSQRT = 0x080,
288 /* Various combination of flags for -mrecip=xxx. */
289 RECIP_NONE = 0,
290 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
291 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
292 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
294 RECIP_HIGH_PRECISION = RECIP_ALL,
296 /* On low precision machines like the power5, don't enable double precision
297 reciprocal square root estimate, since it isn't accurate enough. */
298 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
301 /* -mrecip options. */
302 static struct
304 const char *string; /* option name */
305 unsigned int mask; /* mask bits to set */
306 } recip_options[] = {
307 { "all", RECIP_ALL },
308 { "none", RECIP_NONE },
309 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
310 | RECIP_V2DF_DIV) },
311 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
312 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
313 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
314 | RECIP_V2DF_RSQRT) },
315 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
316 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
319 /* Used by __builtin_cpu_is(), mapping from PLATFORM names to values. */
320 static const struct
322 const char *cpu;
323 unsigned int cpuid;
324 } cpu_is_info[] = {
325 { "power9", PPC_PLATFORM_POWER9 },
326 { "power8", PPC_PLATFORM_POWER8 },
327 { "power7", PPC_PLATFORM_POWER7 },
328 { "power6x", PPC_PLATFORM_POWER6X },
329 { "power6", PPC_PLATFORM_POWER6 },
330 { "power5+", PPC_PLATFORM_POWER5_PLUS },
331 { "power5", PPC_PLATFORM_POWER5 },
332 { "ppc970", PPC_PLATFORM_PPC970 },
333 { "power4", PPC_PLATFORM_POWER4 },
334 { "ppca2", PPC_PLATFORM_PPCA2 },
335 { "ppc476", PPC_PLATFORM_PPC476 },
336 { "ppc464", PPC_PLATFORM_PPC464 },
337 { "ppc440", PPC_PLATFORM_PPC440 },
338 { "ppc405", PPC_PLATFORM_PPC405 },
339 { "ppc-cell-be", PPC_PLATFORM_CELL_BE }
342 /* Used by __builtin_cpu_supports(), mapping from HWCAP names to masks. */
343 static const struct
345 const char *hwcap;
346 int mask;
347 unsigned int id;
348 } cpu_supports_info[] = {
349 /* AT_HWCAP masks. */
350 { "4xxmac", PPC_FEATURE_HAS_4xxMAC, 0 },
351 { "altivec", PPC_FEATURE_HAS_ALTIVEC, 0 },
352 { "arch_2_05", PPC_FEATURE_ARCH_2_05, 0 },
353 { "arch_2_06", PPC_FEATURE_ARCH_2_06, 0 },
354 { "archpmu", PPC_FEATURE_PERFMON_COMPAT, 0 },
355 { "booke", PPC_FEATURE_BOOKE, 0 },
356 { "cellbe", PPC_FEATURE_CELL_BE, 0 },
357 { "dfp", PPC_FEATURE_HAS_DFP, 0 },
358 { "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE, 0 },
359 { "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE, 0 },
360 { "fpu", PPC_FEATURE_HAS_FPU, 0 },
361 { "ic_snoop", PPC_FEATURE_ICACHE_SNOOP, 0 },
362 { "mmu", PPC_FEATURE_HAS_MMU, 0 },
363 { "notb", PPC_FEATURE_NO_TB, 0 },
364 { "pa6t", PPC_FEATURE_PA6T, 0 },
365 { "power4", PPC_FEATURE_POWER4, 0 },
366 { "power5", PPC_FEATURE_POWER5, 0 },
367 { "power5+", PPC_FEATURE_POWER5_PLUS, 0 },
368 { "power6x", PPC_FEATURE_POWER6_EXT, 0 },
369 { "ppc32", PPC_FEATURE_32, 0 },
370 { "ppc601", PPC_FEATURE_601_INSTR, 0 },
371 { "ppc64", PPC_FEATURE_64, 0 },
372 { "ppcle", PPC_FEATURE_PPC_LE, 0 },
373 { "smt", PPC_FEATURE_SMT, 0 },
374 { "spe", PPC_FEATURE_HAS_SPE, 0 },
375 { "true_le", PPC_FEATURE_TRUE_LE, 0 },
376 { "ucache", PPC_FEATURE_UNIFIED_CACHE, 0 },
377 { "vsx", PPC_FEATURE_HAS_VSX, 0 },
379 /* AT_HWCAP2 masks. */
380 { "arch_2_07", PPC_FEATURE2_ARCH_2_07, 1 },
381 { "dscr", PPC_FEATURE2_HAS_DSCR, 1 },
382 { "ebb", PPC_FEATURE2_HAS_EBB, 1 },
383 { "htm", PPC_FEATURE2_HAS_HTM, 1 },
384 { "htm-nosc", PPC_FEATURE2_HTM_NOSC, 1 },
385 { "isel", PPC_FEATURE2_HAS_ISEL, 1 },
386 { "tar", PPC_FEATURE2_HAS_TAR, 1 },
387 { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO, 1 },
388 { "arch_3_00", PPC_FEATURE2_ARCH_3_00, 1 },
389 { "ieee128", PPC_FEATURE2_HAS_IEEE128, 1 }
392 /* Newer LIBCs explicitly export this symbol to declare that they provide
393 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
394 reference to this symbol whenever we expand a CPU builtin, so that
395 we never link against an old LIBC. */
396 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
398 /* True if we have expanded a CPU builtin. */
399 bool cpu_builtin_p;
401 /* Pointer to function (in powerpcspe-c.c) that can define or undefine target
402 macros that have changed. Languages that don't support the preprocessor
403 don't link in powerpcspe-c.c, so we can't call it directly. */
404 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
406 /* Simplfy register classes into simpler classifications. We assume
407 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
408 check for standard register classes (gpr/floating/altivec/vsx) and
409 floating/vector classes (float/altivec/vsx). */
411 enum rs6000_reg_type {
412 NO_REG_TYPE,
413 PSEUDO_REG_TYPE,
414 GPR_REG_TYPE,
415 VSX_REG_TYPE,
416 ALTIVEC_REG_TYPE,
417 FPR_REG_TYPE,
418 SPR_REG_TYPE,
419 CR_REG_TYPE,
420 SPE_ACC_TYPE,
421 SPEFSCR_REG_TYPE
424 /* Map register class to register type. */
425 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
427 /* First/last register type for the 'normal' register types (i.e. general
428 purpose, floating point, altivec, and VSX registers). */
429 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
431 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
434 /* Register classes we care about in secondary reload or go if legitimate
435 address. We only need to worry about GPR, FPR, and Altivec registers here,
436 along an ANY field that is the OR of the 3 register classes. */
438 enum rs6000_reload_reg_type {
439 RELOAD_REG_GPR, /* General purpose registers. */
440 RELOAD_REG_FPR, /* Traditional floating point regs. */
441 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
442 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
443 N_RELOAD_REG
446 /* For setting up register classes, loop through the 3 register classes mapping
447 into real registers, and skip the ANY class, which is just an OR of the
448 bits. */
449 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
450 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
452 /* Map reload register type to a register in the register class. */
453 struct reload_reg_map_type {
454 const char *name; /* Register class name. */
455 int reg; /* Register in the register class. */
458 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
459 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
460 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
461 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
462 { "Any", -1 }, /* RELOAD_REG_ANY. */
465 /* Mask bits for each register class, indexed per mode. Historically the
466 compiler has been more restrictive which types can do PRE_MODIFY instead of
467 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
468 typedef unsigned char addr_mask_type;
470 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
471 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
472 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
473 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
474 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
475 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
476 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
477 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
479 /* Register type masks based on the type, of valid addressing modes. */
480 struct rs6000_reg_addr {
481 enum insn_code reload_load; /* INSN to reload for loading. */
482 enum insn_code reload_store; /* INSN to reload for storing. */
483 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
484 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
485 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
486 enum insn_code fusion_gpr_ld; /* INSN for fusing gpr ADDIS/loads. */
487 /* INSNs for fusing addi with loads
488 or stores for each reg. class. */
489 enum insn_code fusion_addi_ld[(int)N_RELOAD_REG];
490 enum insn_code fusion_addi_st[(int)N_RELOAD_REG];
491 /* INSNs for fusing addis with loads
492 or stores for each reg. class. */
493 enum insn_code fusion_addis_ld[(int)N_RELOAD_REG];
494 enum insn_code fusion_addis_st[(int)N_RELOAD_REG];
495 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
496 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
497 bool fused_toc; /* Mode supports TOC fusion. */
500 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
502 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
503 static inline bool
504 mode_supports_pre_incdec_p (machine_mode mode)
506 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
507 != 0);
510 /* Helper function to say whether a mode supports PRE_MODIFY. */
511 static inline bool
512 mode_supports_pre_modify_p (machine_mode mode)
514 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
515 != 0);
518 /* Given that there exists at least one variable that is set (produced)
519 by OUT_INSN and read (consumed) by IN_INSN, return true iff
520 IN_INSN represents one or more memory store operations and none of
521 the variables set by OUT_INSN is used by IN_INSN as the address of a
522 store operation. If either IN_INSN or OUT_INSN does not represent
523 a "single" RTL SET expression (as loosely defined by the
524 implementation of the single_set function) or a PARALLEL with only
525 SETs, CLOBBERs, and USEs inside, this function returns false.
527 This rs6000-specific version of store_data_bypass_p checks for
528 certain conditions that result in assertion failures (and internal
529 compiler errors) in the generic store_data_bypass_p function and
530 returns false rather than calling store_data_bypass_p if one of the
531 problematic conditions is detected. */
534 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
536 rtx out_set, in_set;
537 rtx out_pat, in_pat;
538 rtx out_exp, in_exp;
539 int i, j;
541 in_set = single_set (in_insn);
542 if (in_set)
544 if (MEM_P (SET_DEST (in_set)))
546 out_set = single_set (out_insn);
547 if (!out_set)
549 out_pat = PATTERN (out_insn);
550 if (GET_CODE (out_pat) == PARALLEL)
552 for (i = 0; i < XVECLEN (out_pat, 0); i++)
554 out_exp = XVECEXP (out_pat, 0, i);
555 if ((GET_CODE (out_exp) == CLOBBER)
556 || (GET_CODE (out_exp) == USE))
557 continue;
558 else if (GET_CODE (out_exp) != SET)
559 return false;
565 else
567 in_pat = PATTERN (in_insn);
568 if (GET_CODE (in_pat) != PARALLEL)
569 return false;
571 for (i = 0; i < XVECLEN (in_pat, 0); i++)
573 in_exp = XVECEXP (in_pat, 0, i);
574 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
575 continue;
576 else if (GET_CODE (in_exp) != SET)
577 return false;
579 if (MEM_P (SET_DEST (in_exp)))
581 out_set = single_set (out_insn);
582 if (!out_set)
584 out_pat = PATTERN (out_insn);
585 if (GET_CODE (out_pat) != PARALLEL)
586 return false;
587 for (j = 0; j < XVECLEN (out_pat, 0); j++)
589 out_exp = XVECEXP (out_pat, 0, j);
590 if ((GET_CODE (out_exp) == CLOBBER)
591 || (GET_CODE (out_exp) == USE))
592 continue;
593 else if (GET_CODE (out_exp) != SET)
594 return false;
600 return store_data_bypass_p (out_insn, in_insn);
603 /* Return true if we have D-form addressing in altivec registers. */
604 static inline bool
605 mode_supports_vmx_dform (machine_mode mode)
607 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
610 /* Return true if we have D-form addressing in VSX registers. This addressing
611 is more limited than normal d-form addressing in that the offset must be
612 aligned on a 16-byte boundary. */
613 static inline bool
614 mode_supports_vsx_dform_quad (machine_mode mode)
616 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
617 != 0);
621 /* Target cpu costs. */
623 struct processor_costs {
624 const int mulsi; /* cost of SImode multiplication. */
625 const int mulsi_const; /* cost of SImode multiplication by constant. */
626 const int mulsi_const9; /* cost of SImode mult by short constant. */
627 const int muldi; /* cost of DImode multiplication. */
628 const int divsi; /* cost of SImode division. */
629 const int divdi; /* cost of DImode division. */
630 const int fp; /* cost of simple SFmode and DFmode insns. */
631 const int dmul; /* cost of DFmode multiplication (and fmadd). */
632 const int sdiv; /* cost of SFmode division (fdivs). */
633 const int ddiv; /* cost of DFmode division (fdiv). */
634 const int cache_line_size; /* cache line size in bytes. */
635 const int l1_cache_size; /* size of l1 cache, in kilobytes. */
636 const int l2_cache_size; /* size of l2 cache, in kilobytes. */
637 const int simultaneous_prefetches; /* number of parallel prefetch
638 operations. */
639 const int sfdf_convert; /* cost of SF->DF conversion. */
642 const struct processor_costs *rs6000_cost;
644 /* Processor costs (relative to an add) */
646 /* Instruction size costs on 32bit processors. */
647 static const
648 struct processor_costs size32_cost = {
649 COSTS_N_INSNS (1), /* mulsi */
650 COSTS_N_INSNS (1), /* mulsi_const */
651 COSTS_N_INSNS (1), /* mulsi_const9 */
652 COSTS_N_INSNS (1), /* muldi */
653 COSTS_N_INSNS (1), /* divsi */
654 COSTS_N_INSNS (1), /* divdi */
655 COSTS_N_INSNS (1), /* fp */
656 COSTS_N_INSNS (1), /* dmul */
657 COSTS_N_INSNS (1), /* sdiv */
658 COSTS_N_INSNS (1), /* ddiv */
659 32, /* cache line size */
660 0, /* l1 cache */
661 0, /* l2 cache */
662 0, /* streams */
663 0, /* SF->DF convert */
666 /* Instruction size costs on 64bit processors. */
667 static const
668 struct processor_costs size64_cost = {
669 COSTS_N_INSNS (1), /* mulsi */
670 COSTS_N_INSNS (1), /* mulsi_const */
671 COSTS_N_INSNS (1), /* mulsi_const9 */
672 COSTS_N_INSNS (1), /* muldi */
673 COSTS_N_INSNS (1), /* divsi */
674 COSTS_N_INSNS (1), /* divdi */
675 COSTS_N_INSNS (1), /* fp */
676 COSTS_N_INSNS (1), /* dmul */
677 COSTS_N_INSNS (1), /* sdiv */
678 COSTS_N_INSNS (1), /* ddiv */
679 128, /* cache line size */
680 0, /* l1 cache */
681 0, /* l2 cache */
682 0, /* streams */
683 0, /* SF->DF convert */
686 /* Instruction costs on RS64A processors. */
687 static const
688 struct processor_costs rs64a_cost = {
689 COSTS_N_INSNS (20), /* mulsi */
690 COSTS_N_INSNS (12), /* mulsi_const */
691 COSTS_N_INSNS (8), /* mulsi_const9 */
692 COSTS_N_INSNS (34), /* muldi */
693 COSTS_N_INSNS (65), /* divsi */
694 COSTS_N_INSNS (67), /* divdi */
695 COSTS_N_INSNS (4), /* fp */
696 COSTS_N_INSNS (4), /* dmul */
697 COSTS_N_INSNS (31), /* sdiv */
698 COSTS_N_INSNS (31), /* ddiv */
699 128, /* cache line size */
700 128, /* l1 cache */
701 2048, /* l2 cache */
702 1, /* streams */
703 0, /* SF->DF convert */
706 /* Instruction costs on MPCCORE processors. */
707 static const
708 struct processor_costs mpccore_cost = {
709 COSTS_N_INSNS (2), /* mulsi */
710 COSTS_N_INSNS (2), /* mulsi_const */
711 COSTS_N_INSNS (2), /* mulsi_const9 */
712 COSTS_N_INSNS (2), /* muldi */
713 COSTS_N_INSNS (6), /* divsi */
714 COSTS_N_INSNS (6), /* divdi */
715 COSTS_N_INSNS (4), /* fp */
716 COSTS_N_INSNS (5), /* dmul */
717 COSTS_N_INSNS (10), /* sdiv */
718 COSTS_N_INSNS (17), /* ddiv */
719 32, /* cache line size */
720 4, /* l1 cache */
721 16, /* l2 cache */
722 1, /* streams */
723 0, /* SF->DF convert */
726 /* Instruction costs on PPC403 processors. */
727 static const
728 struct processor_costs ppc403_cost = {
729 COSTS_N_INSNS (4), /* mulsi */
730 COSTS_N_INSNS (4), /* mulsi_const */
731 COSTS_N_INSNS (4), /* mulsi_const9 */
732 COSTS_N_INSNS (4), /* muldi */
733 COSTS_N_INSNS (33), /* divsi */
734 COSTS_N_INSNS (33), /* divdi */
735 COSTS_N_INSNS (11), /* fp */
736 COSTS_N_INSNS (11), /* dmul */
737 COSTS_N_INSNS (11), /* sdiv */
738 COSTS_N_INSNS (11), /* ddiv */
739 32, /* cache line size */
740 4, /* l1 cache */
741 16, /* l2 cache */
742 1, /* streams */
743 0, /* SF->DF convert */
746 /* Instruction costs on PPC405 processors. */
747 static const
748 struct processor_costs ppc405_cost = {
749 COSTS_N_INSNS (5), /* mulsi */
750 COSTS_N_INSNS (4), /* mulsi_const */
751 COSTS_N_INSNS (3), /* mulsi_const9 */
752 COSTS_N_INSNS (5), /* muldi */
753 COSTS_N_INSNS (35), /* divsi */
754 COSTS_N_INSNS (35), /* divdi */
755 COSTS_N_INSNS (11), /* fp */
756 COSTS_N_INSNS (11), /* dmul */
757 COSTS_N_INSNS (11), /* sdiv */
758 COSTS_N_INSNS (11), /* ddiv */
759 32, /* cache line size */
760 16, /* l1 cache */
761 128, /* l2 cache */
762 1, /* streams */
763 0, /* SF->DF convert */
766 /* Instruction costs on PPC440 processors. */
767 static const
768 struct processor_costs ppc440_cost = {
769 COSTS_N_INSNS (3), /* mulsi */
770 COSTS_N_INSNS (2), /* mulsi_const */
771 COSTS_N_INSNS (2), /* mulsi_const9 */
772 COSTS_N_INSNS (3), /* muldi */
773 COSTS_N_INSNS (34), /* divsi */
774 COSTS_N_INSNS (34), /* divdi */
775 COSTS_N_INSNS (5), /* fp */
776 COSTS_N_INSNS (5), /* dmul */
777 COSTS_N_INSNS (19), /* sdiv */
778 COSTS_N_INSNS (33), /* ddiv */
779 32, /* cache line size */
780 32, /* l1 cache */
781 256, /* l2 cache */
782 1, /* streams */
783 0, /* SF->DF convert */
786 /* Instruction costs on PPC476 processors. */
787 static const
788 struct processor_costs ppc476_cost = {
789 COSTS_N_INSNS (4), /* mulsi */
790 COSTS_N_INSNS (4), /* mulsi_const */
791 COSTS_N_INSNS (4), /* mulsi_const9 */
792 COSTS_N_INSNS (4), /* muldi */
793 COSTS_N_INSNS (11), /* divsi */
794 COSTS_N_INSNS (11), /* divdi */
795 COSTS_N_INSNS (6), /* fp */
796 COSTS_N_INSNS (6), /* dmul */
797 COSTS_N_INSNS (19), /* sdiv */
798 COSTS_N_INSNS (33), /* ddiv */
799 32, /* l1 cache line size */
800 32, /* l1 cache */
801 512, /* l2 cache */
802 1, /* streams */
803 0, /* SF->DF convert */
806 /* Instruction costs on PPC601 processors. */
807 static const
808 struct processor_costs ppc601_cost = {
809 COSTS_N_INSNS (5), /* mulsi */
810 COSTS_N_INSNS (5), /* mulsi_const */
811 COSTS_N_INSNS (5), /* mulsi_const9 */
812 COSTS_N_INSNS (5), /* muldi */
813 COSTS_N_INSNS (36), /* divsi */
814 COSTS_N_INSNS (36), /* divdi */
815 COSTS_N_INSNS (4), /* fp */
816 COSTS_N_INSNS (5), /* dmul */
817 COSTS_N_INSNS (17), /* sdiv */
818 COSTS_N_INSNS (31), /* ddiv */
819 32, /* cache line size */
820 32, /* l1 cache */
821 256, /* l2 cache */
822 1, /* streams */
823 0, /* SF->DF convert */
826 /* Instruction costs on PPC603 processors. */
827 static const
828 struct processor_costs ppc603_cost = {
829 COSTS_N_INSNS (5), /* mulsi */
830 COSTS_N_INSNS (3), /* mulsi_const */
831 COSTS_N_INSNS (2), /* mulsi_const9 */
832 COSTS_N_INSNS (5), /* muldi */
833 COSTS_N_INSNS (37), /* divsi */
834 COSTS_N_INSNS (37), /* divdi */
835 COSTS_N_INSNS (3), /* fp */
836 COSTS_N_INSNS (4), /* dmul */
837 COSTS_N_INSNS (18), /* sdiv */
838 COSTS_N_INSNS (33), /* ddiv */
839 32, /* cache line size */
840 8, /* l1 cache */
841 64, /* l2 cache */
842 1, /* streams */
843 0, /* SF->DF convert */
846 /* Instruction costs on PPC604 processors. */
847 static const
848 struct processor_costs ppc604_cost = {
849 COSTS_N_INSNS (4), /* mulsi */
850 COSTS_N_INSNS (4), /* mulsi_const */
851 COSTS_N_INSNS (4), /* mulsi_const9 */
852 COSTS_N_INSNS (4), /* muldi */
853 COSTS_N_INSNS (20), /* divsi */
854 COSTS_N_INSNS (20), /* divdi */
855 COSTS_N_INSNS (3), /* fp */
856 COSTS_N_INSNS (3), /* dmul */
857 COSTS_N_INSNS (18), /* sdiv */
858 COSTS_N_INSNS (32), /* ddiv */
859 32, /* cache line size */
860 16, /* l1 cache */
861 512, /* l2 cache */
862 1, /* streams */
863 0, /* SF->DF convert */
866 /* Instruction costs on PPC604e processors. */
867 static const
868 struct processor_costs ppc604e_cost = {
869 COSTS_N_INSNS (2), /* mulsi */
870 COSTS_N_INSNS (2), /* mulsi_const */
871 COSTS_N_INSNS (2), /* mulsi_const9 */
872 COSTS_N_INSNS (2), /* muldi */
873 COSTS_N_INSNS (20), /* divsi */
874 COSTS_N_INSNS (20), /* divdi */
875 COSTS_N_INSNS (3), /* fp */
876 COSTS_N_INSNS (3), /* dmul */
877 COSTS_N_INSNS (18), /* sdiv */
878 COSTS_N_INSNS (32), /* ddiv */
879 32, /* cache line size */
880 32, /* l1 cache */
881 1024, /* l2 cache */
882 1, /* streams */
883 0, /* SF->DF convert */
886 /* Instruction costs on PPC620 processors. */
887 static const
888 struct processor_costs ppc620_cost = {
889 COSTS_N_INSNS (5), /* mulsi */
890 COSTS_N_INSNS (4), /* mulsi_const */
891 COSTS_N_INSNS (3), /* mulsi_const9 */
892 COSTS_N_INSNS (7), /* muldi */
893 COSTS_N_INSNS (21), /* divsi */
894 COSTS_N_INSNS (37), /* divdi */
895 COSTS_N_INSNS (3), /* fp */
896 COSTS_N_INSNS (3), /* dmul */
897 COSTS_N_INSNS (18), /* sdiv */
898 COSTS_N_INSNS (32), /* ddiv */
899 128, /* cache line size */
900 32, /* l1 cache */
901 1024, /* l2 cache */
902 1, /* streams */
903 0, /* SF->DF convert */
906 /* Instruction costs on PPC630 processors. */
907 static const
908 struct processor_costs ppc630_cost = {
909 COSTS_N_INSNS (5), /* mulsi */
910 COSTS_N_INSNS (4), /* mulsi_const */
911 COSTS_N_INSNS (3), /* mulsi_const9 */
912 COSTS_N_INSNS (7), /* muldi */
913 COSTS_N_INSNS (21), /* divsi */
914 COSTS_N_INSNS (37), /* divdi */
915 COSTS_N_INSNS (3), /* fp */
916 COSTS_N_INSNS (3), /* dmul */
917 COSTS_N_INSNS (17), /* sdiv */
918 COSTS_N_INSNS (21), /* ddiv */
919 128, /* cache line size */
920 64, /* l1 cache */
921 1024, /* l2 cache */
922 1, /* streams */
923 0, /* SF->DF convert */
926 /* Instruction costs on Cell processor. */
927 /* COSTS_N_INSNS (1) ~ one add. */
928 static const
929 struct processor_costs ppccell_cost = {
930 COSTS_N_INSNS (9/2)+2, /* mulsi */
931 COSTS_N_INSNS (6/2), /* mulsi_const */
932 COSTS_N_INSNS (6/2), /* mulsi_const9 */
933 COSTS_N_INSNS (15/2)+2, /* muldi */
934 COSTS_N_INSNS (38/2), /* divsi */
935 COSTS_N_INSNS (70/2), /* divdi */
936 COSTS_N_INSNS (10/2), /* fp */
937 COSTS_N_INSNS (10/2), /* dmul */
938 COSTS_N_INSNS (74/2), /* sdiv */
939 COSTS_N_INSNS (74/2), /* ddiv */
940 128, /* cache line size */
941 32, /* l1 cache */
942 512, /* l2 cache */
943 6, /* streams */
944 0, /* SF->DF convert */
947 /* Instruction costs on PPC750 and PPC7400 processors. */
948 static const
949 struct processor_costs ppc750_cost = {
950 COSTS_N_INSNS (5), /* mulsi */
951 COSTS_N_INSNS (3), /* mulsi_const */
952 COSTS_N_INSNS (2), /* mulsi_const9 */
953 COSTS_N_INSNS (5), /* muldi */
954 COSTS_N_INSNS (17), /* divsi */
955 COSTS_N_INSNS (17), /* divdi */
956 COSTS_N_INSNS (3), /* fp */
957 COSTS_N_INSNS (3), /* dmul */
958 COSTS_N_INSNS (17), /* sdiv */
959 COSTS_N_INSNS (31), /* ddiv */
960 32, /* cache line size */
961 32, /* l1 cache */
962 512, /* l2 cache */
963 1, /* streams */
964 0, /* SF->DF convert */
967 /* Instruction costs on PPC7450 processors. */
968 static const
969 struct processor_costs ppc7450_cost = {
970 COSTS_N_INSNS (4), /* mulsi */
971 COSTS_N_INSNS (3), /* mulsi_const */
972 COSTS_N_INSNS (3), /* mulsi_const9 */
973 COSTS_N_INSNS (4), /* muldi */
974 COSTS_N_INSNS (23), /* divsi */
975 COSTS_N_INSNS (23), /* divdi */
976 COSTS_N_INSNS (5), /* fp */
977 COSTS_N_INSNS (5), /* dmul */
978 COSTS_N_INSNS (21), /* sdiv */
979 COSTS_N_INSNS (35), /* ddiv */
980 32, /* cache line size */
981 32, /* l1 cache */
982 1024, /* l2 cache */
983 1, /* streams */
984 0, /* SF->DF convert */
987 /* Instruction costs on PPC8540 processors. */
988 static const
989 struct processor_costs ppc8540_cost = {
990 COSTS_N_INSNS (4), /* mulsi */
991 COSTS_N_INSNS (4), /* mulsi_const */
992 COSTS_N_INSNS (4), /* mulsi_const9 */
993 COSTS_N_INSNS (4), /* muldi */
994 COSTS_N_INSNS (19), /* divsi */
995 COSTS_N_INSNS (19), /* divdi */
996 COSTS_N_INSNS (4), /* fp */
997 COSTS_N_INSNS (4), /* dmul */
998 COSTS_N_INSNS (29), /* sdiv */
999 COSTS_N_INSNS (29), /* ddiv */
1000 32, /* cache line size */
1001 32, /* l1 cache */
1002 256, /* l2 cache */
1003 1, /* prefetch streams /*/
1004 0, /* SF->DF convert */
1007 /* Instruction costs on E300C2 and E300C3 cores. */
1008 static const
1009 struct processor_costs ppce300c2c3_cost = {
1010 COSTS_N_INSNS (4), /* mulsi */
1011 COSTS_N_INSNS (4), /* mulsi_const */
1012 COSTS_N_INSNS (4), /* mulsi_const9 */
1013 COSTS_N_INSNS (4), /* muldi */
1014 COSTS_N_INSNS (19), /* divsi */
1015 COSTS_N_INSNS (19), /* divdi */
1016 COSTS_N_INSNS (3), /* fp */
1017 COSTS_N_INSNS (4), /* dmul */
1018 COSTS_N_INSNS (18), /* sdiv */
1019 COSTS_N_INSNS (33), /* ddiv */
1021 16, /* l1 cache */
1022 16, /* l2 cache */
1023 1, /* prefetch streams /*/
1024 0, /* SF->DF convert */
1027 /* Instruction costs on PPCE500MC processors. */
1028 static const
1029 struct processor_costs ppce500mc_cost = {
1030 COSTS_N_INSNS (4), /* mulsi */
1031 COSTS_N_INSNS (4), /* mulsi_const */
1032 COSTS_N_INSNS (4), /* mulsi_const9 */
1033 COSTS_N_INSNS (4), /* muldi */
1034 COSTS_N_INSNS (14), /* divsi */
1035 COSTS_N_INSNS (14), /* divdi */
1036 COSTS_N_INSNS (8), /* fp */
1037 COSTS_N_INSNS (10), /* dmul */
1038 COSTS_N_INSNS (36), /* sdiv */
1039 COSTS_N_INSNS (66), /* ddiv */
1040 64, /* cache line size */
1041 32, /* l1 cache */
1042 128, /* l2 cache */
1043 1, /* prefetch streams /*/
1044 0, /* SF->DF convert */
1047 /* Instruction costs on PPCE500MC64 processors. */
1048 static const
1049 struct processor_costs ppce500mc64_cost = {
1050 COSTS_N_INSNS (4), /* mulsi */
1051 COSTS_N_INSNS (4), /* mulsi_const */
1052 COSTS_N_INSNS (4), /* mulsi_const9 */
1053 COSTS_N_INSNS (4), /* muldi */
1054 COSTS_N_INSNS (14), /* divsi */
1055 COSTS_N_INSNS (14), /* divdi */
1056 COSTS_N_INSNS (4), /* fp */
1057 COSTS_N_INSNS (10), /* dmul */
1058 COSTS_N_INSNS (36), /* sdiv */
1059 COSTS_N_INSNS (66), /* ddiv */
1060 64, /* cache line size */
1061 32, /* l1 cache */
1062 128, /* l2 cache */
1063 1, /* prefetch streams /*/
1064 0, /* SF->DF convert */
1067 /* Instruction costs on PPCE5500 processors. */
1068 static const
1069 struct processor_costs ppce5500_cost = {
1070 COSTS_N_INSNS (5), /* mulsi */
1071 COSTS_N_INSNS (5), /* mulsi_const */
1072 COSTS_N_INSNS (4), /* mulsi_const9 */
1073 COSTS_N_INSNS (5), /* muldi */
1074 COSTS_N_INSNS (14), /* divsi */
1075 COSTS_N_INSNS (14), /* divdi */
1076 COSTS_N_INSNS (7), /* fp */
1077 COSTS_N_INSNS (10), /* dmul */
1078 COSTS_N_INSNS (36), /* sdiv */
1079 COSTS_N_INSNS (66), /* ddiv */
1080 64, /* cache line size */
1081 32, /* l1 cache */
1082 128, /* l2 cache */
1083 1, /* prefetch streams /*/
1084 0, /* SF->DF convert */
1087 /* Instruction costs on PPCE6500 processors. */
1088 static const
1089 struct processor_costs ppce6500_cost = {
1090 COSTS_N_INSNS (5), /* mulsi */
1091 COSTS_N_INSNS (5), /* mulsi_const */
1092 COSTS_N_INSNS (4), /* mulsi_const9 */
1093 COSTS_N_INSNS (5), /* muldi */
1094 COSTS_N_INSNS (14), /* divsi */
1095 COSTS_N_INSNS (14), /* divdi */
1096 COSTS_N_INSNS (7), /* fp */
1097 COSTS_N_INSNS (10), /* dmul */
1098 COSTS_N_INSNS (36), /* sdiv */
1099 COSTS_N_INSNS (66), /* ddiv */
1100 64, /* cache line size */
1101 32, /* l1 cache */
1102 128, /* l2 cache */
1103 1, /* prefetch streams /*/
1104 0, /* SF->DF convert */
1107 /* Instruction costs on AppliedMicro Titan processors. */
1108 static const
1109 struct processor_costs titan_cost = {
1110 COSTS_N_INSNS (5), /* mulsi */
1111 COSTS_N_INSNS (5), /* mulsi_const */
1112 COSTS_N_INSNS (5), /* mulsi_const9 */
1113 COSTS_N_INSNS (5), /* muldi */
1114 COSTS_N_INSNS (18), /* divsi */
1115 COSTS_N_INSNS (18), /* divdi */
1116 COSTS_N_INSNS (10), /* fp */
1117 COSTS_N_INSNS (10), /* dmul */
1118 COSTS_N_INSNS (46), /* sdiv */
1119 COSTS_N_INSNS (72), /* ddiv */
1120 32, /* cache line size */
1121 32, /* l1 cache */
1122 512, /* l2 cache */
1123 1, /* prefetch streams /*/
1124 0, /* SF->DF convert */
1127 /* Instruction costs on POWER4 and POWER5 processors. */
1128 static const
1129 struct processor_costs power4_cost = {
1130 COSTS_N_INSNS (3), /* mulsi */
1131 COSTS_N_INSNS (2), /* mulsi_const */
1132 COSTS_N_INSNS (2), /* mulsi_const9 */
1133 COSTS_N_INSNS (4), /* muldi */
1134 COSTS_N_INSNS (18), /* divsi */
1135 COSTS_N_INSNS (34), /* divdi */
1136 COSTS_N_INSNS (3), /* fp */
1137 COSTS_N_INSNS (3), /* dmul */
1138 COSTS_N_INSNS (17), /* sdiv */
1139 COSTS_N_INSNS (17), /* ddiv */
1140 128, /* cache line size */
1141 32, /* l1 cache */
1142 1024, /* l2 cache */
1143 8, /* prefetch streams /*/
1144 0, /* SF->DF convert */
1147 /* Instruction costs on POWER6 processors. */
1148 static const
1149 struct processor_costs power6_cost = {
1150 COSTS_N_INSNS (8), /* mulsi */
1151 COSTS_N_INSNS (8), /* mulsi_const */
1152 COSTS_N_INSNS (8), /* mulsi_const9 */
1153 COSTS_N_INSNS (8), /* muldi */
1154 COSTS_N_INSNS (22), /* divsi */
1155 COSTS_N_INSNS (28), /* divdi */
1156 COSTS_N_INSNS (3), /* fp */
1157 COSTS_N_INSNS (3), /* dmul */
1158 COSTS_N_INSNS (13), /* sdiv */
1159 COSTS_N_INSNS (16), /* ddiv */
1160 128, /* cache line size */
1161 64, /* l1 cache */
1162 2048, /* l2 cache */
1163 16, /* prefetch streams */
1164 0, /* SF->DF convert */
1167 /* Instruction costs on POWER7 processors. */
1168 static const
1169 struct processor_costs power7_cost = {
1170 COSTS_N_INSNS (2), /* mulsi */
1171 COSTS_N_INSNS (2), /* mulsi_const */
1172 COSTS_N_INSNS (2), /* mulsi_const9 */
1173 COSTS_N_INSNS (2), /* muldi */
1174 COSTS_N_INSNS (18), /* divsi */
1175 COSTS_N_INSNS (34), /* divdi */
1176 COSTS_N_INSNS (3), /* fp */
1177 COSTS_N_INSNS (3), /* dmul */
1178 COSTS_N_INSNS (13), /* sdiv */
1179 COSTS_N_INSNS (16), /* ddiv */
1180 128, /* cache line size */
1181 32, /* l1 cache */
1182 256, /* l2 cache */
1183 12, /* prefetch streams */
1184 COSTS_N_INSNS (3), /* SF->DF convert */
1187 /* Instruction costs on POWER8 processors. */
1188 static const
1189 struct processor_costs power8_cost = {
1190 COSTS_N_INSNS (3), /* mulsi */
1191 COSTS_N_INSNS (3), /* mulsi_const */
1192 COSTS_N_INSNS (3), /* mulsi_const9 */
1193 COSTS_N_INSNS (3), /* muldi */
1194 COSTS_N_INSNS (19), /* divsi */
1195 COSTS_N_INSNS (35), /* divdi */
1196 COSTS_N_INSNS (3), /* fp */
1197 COSTS_N_INSNS (3), /* dmul */
1198 COSTS_N_INSNS (14), /* sdiv */
1199 COSTS_N_INSNS (17), /* ddiv */
1200 128, /* cache line size */
1201 32, /* l1 cache */
1202 256, /* l2 cache */
1203 12, /* prefetch streams */
1204 COSTS_N_INSNS (3), /* SF->DF convert */
1207 /* Instruction costs on POWER9 processors. */
1208 static const
1209 struct processor_costs power9_cost = {
1210 COSTS_N_INSNS (3), /* mulsi */
1211 COSTS_N_INSNS (3), /* mulsi_const */
1212 COSTS_N_INSNS (3), /* mulsi_const9 */
1213 COSTS_N_INSNS (3), /* muldi */
1214 COSTS_N_INSNS (8), /* divsi */
1215 COSTS_N_INSNS (12), /* divdi */
1216 COSTS_N_INSNS (3), /* fp */
1217 COSTS_N_INSNS (3), /* dmul */
1218 COSTS_N_INSNS (13), /* sdiv */
1219 COSTS_N_INSNS (18), /* ddiv */
1220 128, /* cache line size */
1221 32, /* l1 cache */
1222 512, /* l2 cache */
1223 8, /* prefetch streams */
1224 COSTS_N_INSNS (3), /* SF->DF convert */
1227 /* Instruction costs on POWER A2 processors. */
1228 static const
1229 struct processor_costs ppca2_cost = {
1230 COSTS_N_INSNS (16), /* mulsi */
1231 COSTS_N_INSNS (16), /* mulsi_const */
1232 COSTS_N_INSNS (16), /* mulsi_const9 */
1233 COSTS_N_INSNS (16), /* muldi */
1234 COSTS_N_INSNS (22), /* divsi */
1235 COSTS_N_INSNS (28), /* divdi */
1236 COSTS_N_INSNS (3), /* fp */
1237 COSTS_N_INSNS (3), /* dmul */
1238 COSTS_N_INSNS (59), /* sdiv */
1239 COSTS_N_INSNS (72), /* ddiv */
1241 16, /* l1 cache */
1242 2048, /* l2 cache */
1243 16, /* prefetch streams */
1244 0, /* SF->DF convert */
1248 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1249 #undef RS6000_BUILTIN_0
1250 #undef RS6000_BUILTIN_1
1251 #undef RS6000_BUILTIN_2
1252 #undef RS6000_BUILTIN_3
1253 #undef RS6000_BUILTIN_A
1254 #undef RS6000_BUILTIN_D
1255 #undef RS6000_BUILTIN_E
1256 #undef RS6000_BUILTIN_H
1257 #undef RS6000_BUILTIN_P
1258 #undef RS6000_BUILTIN_Q
1259 #undef RS6000_BUILTIN_S
1260 #undef RS6000_BUILTIN_X
1262 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
1263 { NAME, ICODE, MASK, ATTR },
1265 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1266 { NAME, ICODE, MASK, ATTR },
1268 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1269 { NAME, ICODE, MASK, ATTR },
1271 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1272 { NAME, ICODE, MASK, ATTR },
1274 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1275 { NAME, ICODE, MASK, ATTR },
1277 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1278 { NAME, ICODE, MASK, ATTR },
1280 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
1281 { NAME, ICODE, MASK, ATTR },
1283 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1284 { NAME, ICODE, MASK, ATTR },
1286 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1287 { NAME, ICODE, MASK, ATTR },
1289 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
1290 { NAME, ICODE, MASK, ATTR },
1292 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
1293 { NAME, ICODE, MASK, ATTR },
1295 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1296 { NAME, ICODE, MASK, ATTR },
1298 struct rs6000_builtin_info_type {
1299 const char *name;
1300 const enum insn_code icode;
1301 const HOST_WIDE_INT mask;
1302 const unsigned attr;
1305 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1307 #include "powerpcspe-builtin.def"
1310 #undef RS6000_BUILTIN_0
1311 #undef RS6000_BUILTIN_1
1312 #undef RS6000_BUILTIN_2
1313 #undef RS6000_BUILTIN_3
1314 #undef RS6000_BUILTIN_A
1315 #undef RS6000_BUILTIN_D
1316 #undef RS6000_BUILTIN_E
1317 #undef RS6000_BUILTIN_H
1318 #undef RS6000_BUILTIN_P
1319 #undef RS6000_BUILTIN_Q
1320 #undef RS6000_BUILTIN_S
1321 #undef RS6000_BUILTIN_X
1323 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1324 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1327 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1328 static bool spe_func_has_64bit_regs_p (void);
1329 static struct machine_function * rs6000_init_machine_status (void);
1330 static int rs6000_ra_ever_killed (void);
1331 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1332 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1333 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1334 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1335 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1336 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1337 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1338 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1339 bool);
1340 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1341 unsigned int);
1342 static bool is_microcoded_insn (rtx_insn *);
1343 static bool is_nonpipeline_insn (rtx_insn *);
1344 static bool is_cracked_insn (rtx_insn *);
1345 static bool is_load_insn (rtx, rtx *);
1346 static bool is_store_insn (rtx, rtx *);
1347 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1348 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1349 static bool insn_must_be_first_in_group (rtx_insn *);
1350 static bool insn_must_be_last_in_group (rtx_insn *);
1351 static void altivec_init_builtins (void);
1352 static tree builtin_function_type (machine_mode, machine_mode,
1353 machine_mode, machine_mode,
1354 enum rs6000_builtins, const char *name);
1355 static void rs6000_common_init_builtins (void);
1356 static void paired_init_builtins (void);
1357 static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx);
1358 static void spe_init_builtins (void);
1359 static void htm_init_builtins (void);
1360 static rtx spe_expand_predicate_builtin (enum insn_code, tree, rtx);
1361 static rtx spe_expand_evsel_builtin (enum insn_code, tree, rtx);
1362 static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
1363 static rs6000_stack_t *rs6000_stack_info (void);
1364 static void is_altivec_return_reg (rtx, void *);
1365 int easy_vector_constant (rtx, machine_mode);
1366 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1367 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1368 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1369 bool, bool);
1370 #if TARGET_MACHO
1371 static void macho_branch_islands (void);
1372 #endif
1373 static rtx rs6000_legitimize_reload_address (rtx, machine_mode, int, int,
1374 int, int *);
1375 static rtx rs6000_debug_legitimize_reload_address (rtx, machine_mode, int,
1376 int, int, int *);
1377 static bool rs6000_mode_dependent_address (const_rtx);
1378 static bool rs6000_debug_mode_dependent_address (const_rtx);
1379 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1380 machine_mode, rtx);
1381 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1382 machine_mode,
1383 rtx);
1384 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1385 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1386 enum reg_class);
1387 static bool rs6000_secondary_memory_needed (enum reg_class, enum reg_class,
1388 machine_mode);
1389 static bool rs6000_debug_secondary_memory_needed (enum reg_class,
1390 enum reg_class,
1391 machine_mode);
1392 static bool rs6000_cannot_change_mode_class (machine_mode,
1393 machine_mode,
1394 enum reg_class);
1395 static bool rs6000_debug_cannot_change_mode_class (machine_mode,
1396 machine_mode,
1397 enum reg_class);
1398 static bool rs6000_save_toc_in_prologue_p (void);
1399 static rtx rs6000_internal_arg_pointer (void);
1401 rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int,
1402 int, int *)
1403 = rs6000_legitimize_reload_address;
1405 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1406 = rs6000_mode_dependent_address;
1408 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1409 machine_mode, rtx)
1410 = rs6000_secondary_reload_class;
1412 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1413 = rs6000_preferred_reload_class;
1415 bool (*rs6000_secondary_memory_needed_ptr) (enum reg_class, enum reg_class,
1416 machine_mode)
1417 = rs6000_secondary_memory_needed;
1419 bool (*rs6000_cannot_change_mode_class_ptr) (machine_mode,
1420 machine_mode,
1421 enum reg_class)
1422 = rs6000_cannot_change_mode_class;
1424 const int INSN_NOT_AVAILABLE = -1;
1426 static void rs6000_print_isa_options (FILE *, int, const char *,
1427 HOST_WIDE_INT);
1428 static void rs6000_print_builtin_options (FILE *, int, const char *,
1429 HOST_WIDE_INT);
1430 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1432 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1433 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1434 enum rs6000_reg_type,
1435 machine_mode,
1436 secondary_reload_info *,
1437 bool);
1438 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1439 static bool rs6000_keep_leaf_when_profiled () __attribute__ ((unused));
1440 static tree rs6000_fold_builtin (tree, int, tree *, bool);
1442 /* Hash table stuff for keeping track of TOC entries. */
1444 struct GTY((for_user)) toc_hash_struct
1446 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1447 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1448 rtx key;
1449 machine_mode key_mode;
1450 int labelno;
1453 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1455 static hashval_t hash (toc_hash_struct *);
1456 static bool equal (toc_hash_struct *, toc_hash_struct *);
1459 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1461 /* Hash table to keep track of the argument types for builtin functions. */
1463 struct GTY((for_user)) builtin_hash_struct
1465 tree type;
1466 machine_mode mode[4]; /* return value + 3 arguments. */
1467 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1470 struct builtin_hasher : ggc_ptr_hash<builtin_hash_struct>
1472 static hashval_t hash (builtin_hash_struct *);
1473 static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1476 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1479 /* Default register names. */
1480 char rs6000_reg_names[][8] =
1482 "0", "1", "2", "3", "4", "5", "6", "7",
1483 "8", "9", "10", "11", "12", "13", "14", "15",
1484 "16", "17", "18", "19", "20", "21", "22", "23",
1485 "24", "25", "26", "27", "28", "29", "30", "31",
1486 "0", "1", "2", "3", "4", "5", "6", "7",
1487 "8", "9", "10", "11", "12", "13", "14", "15",
1488 "16", "17", "18", "19", "20", "21", "22", "23",
1489 "24", "25", "26", "27", "28", "29", "30", "31",
1490 "mq", "lr", "ctr","ap",
1491 "0", "1", "2", "3", "4", "5", "6", "7",
1492 "ca",
1493 /* AltiVec registers. */
1494 "0", "1", "2", "3", "4", "5", "6", "7",
1495 "8", "9", "10", "11", "12", "13", "14", "15",
1496 "16", "17", "18", "19", "20", "21", "22", "23",
1497 "24", "25", "26", "27", "28", "29", "30", "31",
1498 "vrsave", "vscr",
1499 /* SPE registers. */
1500 "spe_acc", "spefscr",
1501 /* Soft frame pointer. */
1502 "sfp",
1503 /* HTM SPR registers. */
1504 "tfhar", "tfiar", "texasr",
1505 /* SPE High registers. */
1506 "0", "1", "2", "3", "4", "5", "6", "7",
1507 "8", "9", "10", "11", "12", "13", "14", "15",
1508 "16", "17", "18", "19", "20", "21", "22", "23",
1509 "24", "25", "26", "27", "28", "29", "30", "31"
1512 #ifdef TARGET_REGNAMES
1513 static const char alt_reg_names[][8] =
1515 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1516 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1517 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1518 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1519 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1520 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1521 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1522 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1523 "mq", "lr", "ctr", "ap",
1524 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1525 "ca",
1526 /* AltiVec registers. */
1527 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1528 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1529 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1530 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1531 "vrsave", "vscr",
1532 /* SPE registers. */
1533 "spe_acc", "spefscr",
1534 /* Soft frame pointer. */
1535 "sfp",
1536 /* HTM SPR registers. */
1537 "tfhar", "tfiar", "texasr",
1538 /* SPE High registers. */
1539 "%rh0", "%rh1", "%rh2", "%rh3", "%rh4", "%rh5", "%rh6", "%rh7",
1540 "%rh8", "%rh9", "%rh10", "%r11", "%rh12", "%rh13", "%rh14", "%rh15",
1541 "%rh16", "%rh17", "%rh18", "%rh19", "%rh20", "%rh21", "%rh22", "%rh23",
1542 "%rh24", "%rh25", "%rh26", "%rh27", "%rh28", "%rh29", "%rh30", "%rh31"
1544 #endif
1546 /* Table of valid machine attributes. */
1548 static const struct attribute_spec rs6000_attribute_table[] =
1550 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
1551 affects_type_identity } */
1552 { "altivec", 1, 1, false, true, false, rs6000_handle_altivec_attribute,
1553 false },
1554 { "longcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1555 false },
1556 { "shortcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1557 false },
1558 { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1559 false },
1560 { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1561 false },
1562 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1563 SUBTARGET_ATTRIBUTE_TABLE,
1564 #endif
1565 { NULL, 0, 0, false, false, false, NULL, false }
1568 #ifndef TARGET_PROFILE_KERNEL
1569 #define TARGET_PROFILE_KERNEL 0
1570 #endif
1572 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1573 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1575 /* Initialize the GCC target structure. */
1576 #undef TARGET_ATTRIBUTE_TABLE
1577 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1578 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1579 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1580 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1581 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1583 #undef TARGET_ASM_ALIGNED_DI_OP
1584 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1586 /* Default unaligned ops are only provided for ELF. Find the ops needed
1587 for non-ELF systems. */
1588 #ifndef OBJECT_FORMAT_ELF
1589 #if TARGET_XCOFF
1590 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1591 64-bit targets. */
1592 #undef TARGET_ASM_UNALIGNED_HI_OP
1593 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1594 #undef TARGET_ASM_UNALIGNED_SI_OP
1595 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1596 #undef TARGET_ASM_UNALIGNED_DI_OP
1597 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1598 #else
1599 /* For Darwin. */
1600 #undef TARGET_ASM_UNALIGNED_HI_OP
1601 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1602 #undef TARGET_ASM_UNALIGNED_SI_OP
1603 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1604 #undef TARGET_ASM_UNALIGNED_DI_OP
1605 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1606 #undef TARGET_ASM_ALIGNED_DI_OP
1607 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1608 #endif
1609 #endif
1611 /* This hook deals with fixups for relocatable code and DI-mode objects
1612 in 64-bit code. */
1613 #undef TARGET_ASM_INTEGER
1614 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1616 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1617 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1618 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1619 #endif
1621 #undef TARGET_SET_UP_BY_PROLOGUE
1622 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1624 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1625 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1626 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1627 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1628 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1629 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1630 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1631 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1632 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1633 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1634 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1635 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1637 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1638 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1640 #undef TARGET_INTERNAL_ARG_POINTER
1641 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1643 #undef TARGET_HAVE_TLS
1644 #define TARGET_HAVE_TLS HAVE_AS_TLS
1646 #undef TARGET_CANNOT_FORCE_CONST_MEM
1647 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1649 #undef TARGET_DELEGITIMIZE_ADDRESS
1650 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1652 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1653 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1655 #undef TARGET_LEGITIMATE_COMBINED_INSN
1656 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1658 #undef TARGET_ASM_FUNCTION_PROLOGUE
1659 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1660 #undef TARGET_ASM_FUNCTION_EPILOGUE
1661 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1663 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1664 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1666 #undef TARGET_LEGITIMIZE_ADDRESS
1667 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1669 #undef TARGET_SCHED_VARIABLE_ISSUE
1670 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1672 #undef TARGET_SCHED_ISSUE_RATE
1673 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1674 #undef TARGET_SCHED_ADJUST_COST
1675 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1676 #undef TARGET_SCHED_ADJUST_PRIORITY
1677 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1678 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1679 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1680 #undef TARGET_SCHED_INIT
1681 #define TARGET_SCHED_INIT rs6000_sched_init
1682 #undef TARGET_SCHED_FINISH
1683 #define TARGET_SCHED_FINISH rs6000_sched_finish
1684 #undef TARGET_SCHED_REORDER
1685 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1686 #undef TARGET_SCHED_REORDER2
1687 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1689 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1690 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1692 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1693 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1695 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1696 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1697 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1698 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1699 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1700 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1701 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1702 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1704 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1705 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1707 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1708 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1709 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1710 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1711 rs6000_builtin_support_vector_misalignment
1712 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1713 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1714 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1715 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1716 rs6000_builtin_vectorization_cost
1717 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1718 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1719 rs6000_preferred_simd_mode
1720 #undef TARGET_VECTORIZE_INIT_COST
1721 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1722 #undef TARGET_VECTORIZE_ADD_STMT_COST
1723 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1724 #undef TARGET_VECTORIZE_FINISH_COST
1725 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1726 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1727 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1729 #undef TARGET_INIT_BUILTINS
1730 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1731 #undef TARGET_BUILTIN_DECL
1732 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1734 #undef TARGET_FOLD_BUILTIN
1735 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1736 #undef TARGET_GIMPLE_FOLD_BUILTIN
1737 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1739 #undef TARGET_EXPAND_BUILTIN
1740 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1742 #undef TARGET_MANGLE_TYPE
1743 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1745 #undef TARGET_INIT_LIBFUNCS
1746 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1748 #if TARGET_MACHO
1749 #undef TARGET_BINDS_LOCAL_P
1750 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1751 #endif
1753 #undef TARGET_MS_BITFIELD_LAYOUT_P
1754 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1756 #undef TARGET_ASM_OUTPUT_MI_THUNK
1757 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1759 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1760 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1762 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1763 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1765 #undef TARGET_REGISTER_MOVE_COST
1766 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1767 #undef TARGET_MEMORY_MOVE_COST
1768 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1769 #undef TARGET_CANNOT_COPY_INSN_P
1770 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1771 #undef TARGET_RTX_COSTS
1772 #define TARGET_RTX_COSTS rs6000_rtx_costs
1773 #undef TARGET_ADDRESS_COST
1774 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1776 #undef TARGET_DWARF_REGISTER_SPAN
1777 #define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span
1779 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1780 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1782 #undef TARGET_MEMBER_TYPE_FORCES_BLK
1783 #define TARGET_MEMBER_TYPE_FORCES_BLK rs6000_member_type_forces_blk
1785 #undef TARGET_PROMOTE_FUNCTION_MODE
1786 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1788 #undef TARGET_RETURN_IN_MEMORY
1789 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1791 #undef TARGET_RETURN_IN_MSB
1792 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1794 #undef TARGET_SETUP_INCOMING_VARARGS
1795 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1797 /* Always strict argument naming on rs6000. */
1798 #undef TARGET_STRICT_ARGUMENT_NAMING
1799 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1800 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1801 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1802 #undef TARGET_SPLIT_COMPLEX_ARG
1803 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1804 #undef TARGET_MUST_PASS_IN_STACK
1805 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1806 #undef TARGET_PASS_BY_REFERENCE
1807 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1808 #undef TARGET_ARG_PARTIAL_BYTES
1809 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1810 #undef TARGET_FUNCTION_ARG_ADVANCE
1811 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1812 #undef TARGET_FUNCTION_ARG
1813 #define TARGET_FUNCTION_ARG rs6000_function_arg
1814 #undef TARGET_FUNCTION_ARG_PADDING
1815 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1816 #undef TARGET_FUNCTION_ARG_BOUNDARY
1817 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1819 #undef TARGET_BUILD_BUILTIN_VA_LIST
1820 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1822 #undef TARGET_EXPAND_BUILTIN_VA_START
1823 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1825 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1826 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1828 #undef TARGET_EH_RETURN_FILTER_MODE
1829 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1831 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1832 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1834 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1835 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1837 #undef TARGET_FLOATN_MODE
1838 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1840 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1841 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1843 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
1844 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
1846 #undef TARGET_MD_ASM_ADJUST
1847 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1849 #undef TARGET_OPTION_OVERRIDE
1850 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1852 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1853 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1854 rs6000_builtin_vectorized_function
1856 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1857 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1858 rs6000_builtin_md_vectorized_function
1860 #undef TARGET_STACK_PROTECT_GUARD
1861 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1863 #if !TARGET_MACHO
1864 #undef TARGET_STACK_PROTECT_FAIL
1865 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1866 #endif
1868 #ifdef HAVE_AS_TLS
1869 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1870 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1871 #endif
1873 /* Use a 32-bit anchor range. This leads to sequences like:
1875 addis tmp,anchor,high
1876 add dest,tmp,low
1878 where tmp itself acts as an anchor, and can be shared between
1879 accesses to the same 64k page. */
1880 #undef TARGET_MIN_ANCHOR_OFFSET
1881 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1882 #undef TARGET_MAX_ANCHOR_OFFSET
1883 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1884 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1885 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1886 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1887 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1889 #undef TARGET_BUILTIN_RECIPROCAL
1890 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1892 #undef TARGET_EXPAND_TO_RTL_HOOK
1893 #define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot
1895 #undef TARGET_INSTANTIATE_DECLS
1896 #define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
1898 #undef TARGET_SECONDARY_RELOAD
1899 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1901 #undef TARGET_LEGITIMATE_ADDRESS_P
1902 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1904 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1905 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1907 #undef TARGET_LRA_P
1908 #define TARGET_LRA_P rs6000_lra_p
1910 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1911 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1913 #undef TARGET_CAN_ELIMINATE
1914 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1916 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1917 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1919 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1920 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1922 #undef TARGET_TRAMPOLINE_INIT
1923 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1925 #undef TARGET_FUNCTION_VALUE
1926 #define TARGET_FUNCTION_VALUE rs6000_function_value
1928 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1929 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1931 #undef TARGET_OPTION_SAVE
1932 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1934 #undef TARGET_OPTION_RESTORE
1935 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1937 #undef TARGET_OPTION_PRINT
1938 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1940 #undef TARGET_CAN_INLINE_P
1941 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1943 #undef TARGET_SET_CURRENT_FUNCTION
1944 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1946 #undef TARGET_LEGITIMATE_CONSTANT_P
1947 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1949 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
1950 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok
1952 #undef TARGET_CAN_USE_DOLOOP_P
1953 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1955 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1956 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1958 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1959 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1960 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1961 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1962 #undef TARGET_UNWIND_WORD_MODE
1963 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1965 #undef TARGET_OFFLOAD_OPTIONS
1966 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1968 #undef TARGET_C_MODE_FOR_SUFFIX
1969 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1971 #undef TARGET_INVALID_BINARY_OP
1972 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1974 #undef TARGET_OPTAB_SUPPORTED_P
1975 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1977 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1978 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1980 #undef TARGET_HARD_REGNO_NREGS
1981 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1982 #undef TARGET_HARD_REGNO_MODE_OK
1983 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1985 #undef TARGET_MODES_TIEABLE_P
1986 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1988 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1989 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1990 rs6000_hard_regno_call_part_clobbered
1992 #undef TARGET_SLOW_UNALIGNED_ACCESS
1993 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1996 /* Processor table. */
1997 struct rs6000_ptt
1999 const char *const name; /* Canonical processor name. */
2000 const enum processor_type processor; /* Processor type enum value. */
2001 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
2004 static struct rs6000_ptt const processor_target_table[] =
2006 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
2007 #include "powerpcspe-cpus.def"
2008 #undef RS6000_CPU
2011 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
2012 name is invalid. */
2014 static int
2015 rs6000_cpu_name_lookup (const char *name)
2017 size_t i;
2019 if (name != NULL)
2021 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
2022 if (! strcmp (name, processor_target_table[i].name))
2023 return (int)i;
2026 return -1;
2030 /* Return number of consecutive hard regs needed starting at reg REGNO
2031 to hold something of mode MODE.
2032 This is ordinarily the length in words of a value of mode MODE
2033 but can be less for certain modes in special long registers.
2035 For the SPE, GPRs are 64 bits but only 32 bits are visible in
2036 scalar instructions. The upper 32 bits are only available to the
2037 SIMD instructions.
2039 POWER and PowerPC GPRs hold 32 bits worth;
2040 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
2042 static int
2043 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
2045 unsigned HOST_WIDE_INT reg_size;
2047 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
2048 128-bit floating point that can go in vector registers, which has VSX
2049 memory addressing. */
2050 if (FP_REGNO_P (regno))
2051 reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
2052 ? UNITS_PER_VSX_WORD
2053 : UNITS_PER_FP_WORD);
2055 else if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
2056 reg_size = UNITS_PER_SPE_WORD;
2058 else if (ALTIVEC_REGNO_P (regno))
2059 reg_size = UNITS_PER_ALTIVEC_WORD;
2061 /* The value returned for SCmode in the E500 double case is 2 for
2062 ABI compatibility; storing an SCmode value in a single register
2063 would require function_arg and rs6000_spe_function_arg to handle
2064 SCmode so as to pass the value correctly in a pair of
2065 registers. */
2066 else if (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) && mode != SCmode
2067 && !DECIMAL_FLOAT_MODE_P (mode) && SPE_SIMD_REGNO_P (regno))
2068 reg_size = UNITS_PER_FP_WORD;
2070 else
2071 reg_size = UNITS_PER_WORD;
2073 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
2076 /* Value is 1 if hard register REGNO can hold a value of machine-mode
2077 MODE. */
2078 static int
2079 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
2081 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
2083 if (COMPLEX_MODE_P (mode))
2084 mode = GET_MODE_INNER (mode);
2086 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
2087 register combinations, and use PTImode where we need to deal with quad
2088 word memory operations. Don't allow quad words in the argument or frame
2089 pointer registers, just registers 0..31. */
2090 if (mode == PTImode)
2091 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
2092 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
2093 && ((regno & 1) == 0));
2095 /* VSX registers that overlap the FPR registers are larger than for non-VSX
2096 implementations. Don't allow an item to be split between a FP register
2097 and an Altivec register. Allow TImode in all VSX registers if the user
2098 asked for it. */
2099 if (TARGET_VSX && VSX_REGNO_P (regno)
2100 && (VECTOR_MEM_VSX_P (mode)
2101 || FLOAT128_VECTOR_P (mode)
2102 || reg_addr[mode].scalar_in_vmx_p
2103 || (TARGET_VSX_TIMODE && mode == TImode)
2104 || (TARGET_VADDUQM && mode == V1TImode)))
2106 if (FP_REGNO_P (regno))
2107 return FP_REGNO_P (last_regno);
2109 if (ALTIVEC_REGNO_P (regno))
2111 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
2112 return 0;
2114 return ALTIVEC_REGNO_P (last_regno);
2118 /* The GPRs can hold any mode, but values bigger than one register
2119 cannot go past R31. */
2120 if (INT_REGNO_P (regno))
2121 return INT_REGNO_P (last_regno);
2123 /* The float registers (except for VSX vector modes) can only hold floating
2124 modes and DImode. */
2125 if (FP_REGNO_P (regno))
2127 if (FLOAT128_VECTOR_P (mode))
2128 return false;
2130 if (SCALAR_FLOAT_MODE_P (mode)
2131 && (mode != TDmode || (regno % 2) == 0)
2132 && FP_REGNO_P (last_regno))
2133 return 1;
2135 if (GET_MODE_CLASS (mode) == MODE_INT)
2137 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
2138 return 1;
2140 if (TARGET_VSX_SMALL_INTEGER)
2142 if (mode == SImode)
2143 return 1;
2145 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
2146 return 1;
2150 if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT
2151 && PAIRED_VECTOR_MODE (mode))
2152 return 1;
2154 return 0;
2157 /* The CR register can only hold CC modes. */
2158 if (CR_REGNO_P (regno))
2159 return GET_MODE_CLASS (mode) == MODE_CC;
2161 if (CA_REGNO_P (regno))
2162 return mode == Pmode || mode == SImode;
2164 /* AltiVec only in AldyVec registers. */
2165 if (ALTIVEC_REGNO_P (regno))
2166 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
2167 || mode == V1TImode);
2169 /* ...but GPRs can hold SIMD data on the SPE in one register. */
2170 if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
2171 return 1;
2173 /* We cannot put non-VSX TImode or PTImode anywhere except general register
2174 and it must be able to fit within the register set. */
2176 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
2179 /* Implement TARGET_HARD_REGNO_NREGS. */
2181 static unsigned int
2182 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
2184 return rs6000_hard_regno_nregs[mode][regno];
2187 /* Implement TARGET_HARD_REGNO_MODE_OK. */
2189 static bool
2190 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
2192 return rs6000_hard_regno_mode_ok_p[mode][regno];
2195 /* Implement TARGET_MODES_TIEABLE_P.
2197 PTImode cannot tie with other modes because PTImode is restricted to even
2198 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
2199 57744).
2201 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
2202 128-bit floating point on VSX systems ties with other vectors. */
2204 static bool
2205 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
2207 if (mode1 == PTImode)
2208 return mode2 == PTImode;
2209 if (mode2 == PTImode)
2210 return false;
2212 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
2213 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
2214 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
2215 return false;
2217 if (SCALAR_FLOAT_MODE_P (mode1))
2218 return SCALAR_FLOAT_MODE_P (mode2);
2219 if (SCALAR_FLOAT_MODE_P (mode2))
2220 return false;
2222 if (GET_MODE_CLASS (mode1) == MODE_CC)
2223 return GET_MODE_CLASS (mode2) == MODE_CC;
2224 if (GET_MODE_CLASS (mode2) == MODE_CC)
2225 return false;
2227 if (SPE_VECTOR_MODE (mode1))
2228 return SPE_VECTOR_MODE (mode2);
2229 if (SPE_VECTOR_MODE (mode2))
2230 return false;
2232 return true;
2235 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
2237 static bool
2238 rs6000_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode)
2240 if (TARGET_32BIT
2241 && TARGET_POWERPC64
2242 && GET_MODE_SIZE (mode) > 4
2243 && INT_REGNO_P (regno))
2244 return true;
2246 if (TARGET_VSX
2247 && FP_REGNO_P (regno)
2248 && GET_MODE_SIZE (mode) > 8
2249 && !FLOAT128_2REG_P (mode))
2250 return true;
2252 return false;
2255 /* Print interesting facts about registers. */
2256 static void
2257 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2259 int r, m;
2261 for (r = first_regno; r <= last_regno; ++r)
2263 const char *comma = "";
2264 int len;
2266 if (first_regno == last_regno)
2267 fprintf (stderr, "%s:\t", reg_name);
2268 else
2269 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2271 len = 8;
2272 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2273 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2275 if (len > 70)
2277 fprintf (stderr, ",\n\t");
2278 len = 8;
2279 comma = "";
2282 if (rs6000_hard_regno_nregs[m][r] > 1)
2283 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2284 rs6000_hard_regno_nregs[m][r]);
2285 else
2286 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2288 comma = ", ";
2291 if (call_used_regs[r])
2293 if (len > 70)
2295 fprintf (stderr, ",\n\t");
2296 len = 8;
2297 comma = "";
2300 len += fprintf (stderr, "%s%s", comma, "call-used");
2301 comma = ", ";
2304 if (fixed_regs[r])
2306 if (len > 70)
2308 fprintf (stderr, ",\n\t");
2309 len = 8;
2310 comma = "";
2313 len += fprintf (stderr, "%s%s", comma, "fixed");
2314 comma = ", ";
2317 if (len > 70)
2319 fprintf (stderr, ",\n\t");
2320 comma = "";
2323 len += fprintf (stderr, "%sreg-class = %s", comma,
2324 reg_class_names[(int)rs6000_regno_regclass[r]]);
2325 comma = ", ";
2327 if (len > 70)
2329 fprintf (stderr, ",\n\t");
2330 comma = "";
2333 fprintf (stderr, "%sregno = %d\n", comma, r);
2337 static const char *
2338 rs6000_debug_vector_unit (enum rs6000_vector v)
2340 const char *ret;
2342 switch (v)
2344 case VECTOR_NONE: ret = "none"; break;
2345 case VECTOR_ALTIVEC: ret = "altivec"; break;
2346 case VECTOR_VSX: ret = "vsx"; break;
2347 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2348 case VECTOR_PAIRED: ret = "paired"; break;
2349 case VECTOR_SPE: ret = "spe"; break;
2350 case VECTOR_OTHER: ret = "other"; break;
2351 default: ret = "unknown"; break;
2354 return ret;
2357 /* Inner function printing just the address mask for a particular reload
2358 register class. */
2359 DEBUG_FUNCTION char *
2360 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2362 static char ret[8];
2363 char *p = ret;
2365 if ((mask & RELOAD_REG_VALID) != 0)
2366 *p++ = 'v';
2367 else if (keep_spaces)
2368 *p++ = ' ';
2370 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2371 *p++ = 'm';
2372 else if (keep_spaces)
2373 *p++ = ' ';
2375 if ((mask & RELOAD_REG_INDEXED) != 0)
2376 *p++ = 'i';
2377 else if (keep_spaces)
2378 *p++ = ' ';
2380 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2381 *p++ = 'O';
2382 else if ((mask & RELOAD_REG_OFFSET) != 0)
2383 *p++ = 'o';
2384 else if (keep_spaces)
2385 *p++ = ' ';
2387 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2388 *p++ = '+';
2389 else if (keep_spaces)
2390 *p++ = ' ';
2392 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2393 *p++ = '+';
2394 else if (keep_spaces)
2395 *p++ = ' ';
2397 if ((mask & RELOAD_REG_AND_M16) != 0)
2398 *p++ = '&';
2399 else if (keep_spaces)
2400 *p++ = ' ';
2402 *p = '\0';
2404 return ret;
2407 /* Print the address masks in a human readble fashion. */
2408 DEBUG_FUNCTION void
2409 rs6000_debug_print_mode (ssize_t m)
2411 ssize_t rc;
2412 int spaces = 0;
2413 bool fuse_extra_p;
2415 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2416 for (rc = 0; rc < N_RELOAD_REG; rc++)
2417 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2418 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2420 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2421 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2422 fprintf (stderr, " Reload=%c%c",
2423 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2424 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2425 else
2426 spaces += sizeof (" Reload=sl") - 1;
2428 if (reg_addr[m].scalar_in_vmx_p)
2430 fprintf (stderr, "%*s Upper=y", spaces, "");
2431 spaces = 0;
2433 else
2434 spaces += sizeof (" Upper=y") - 1;
2436 fuse_extra_p = ((reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2437 || reg_addr[m].fused_toc);
2438 if (!fuse_extra_p)
2440 for (rc = 0; rc < N_RELOAD_REG; rc++)
2442 if (rc != RELOAD_REG_ANY)
2444 if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2445 || reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2446 || reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing
2447 || reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing
2448 || reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2450 fuse_extra_p = true;
2451 break;
2457 if (fuse_extra_p)
2459 fprintf (stderr, "%*s Fuse:", spaces, "");
2460 spaces = 0;
2462 for (rc = 0; rc < N_RELOAD_REG; rc++)
2464 if (rc != RELOAD_REG_ANY)
2466 char load, store;
2468 if (reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing)
2469 load = 'l';
2470 else if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing)
2471 load = 'L';
2472 else
2473 load = '-';
2475 if (reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2476 store = 's';
2477 else if (reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing)
2478 store = 'S';
2479 else
2480 store = '-';
2482 if (load == '-' && store == '-')
2483 spaces += 5;
2484 else
2486 fprintf (stderr, "%*s%c=%c%c", (spaces + 1), "",
2487 reload_reg_map[rc].name[0], load, store);
2488 spaces = 0;
2493 if (reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2495 fprintf (stderr, "%*sP8gpr", (spaces + 1), "");
2496 spaces = 0;
2498 else
2499 spaces += sizeof (" P8gpr") - 1;
2501 if (reg_addr[m].fused_toc)
2503 fprintf (stderr, "%*sToc", (spaces + 1), "");
2504 spaces = 0;
2506 else
2507 spaces += sizeof (" Toc") - 1;
2509 else
2510 spaces += sizeof (" Fuse: G=ls F=ls v=ls P8gpr Toc") - 1;
2512 if (rs6000_vector_unit[m] != VECTOR_NONE
2513 || rs6000_vector_mem[m] != VECTOR_NONE)
2515 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2516 spaces, "",
2517 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2518 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2521 fputs ("\n", stderr);
2524 #define DEBUG_FMT_ID "%-32s= "
2525 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2526 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2527 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2529 /* Print various interesting information with -mdebug=reg. */
2530 static void
2531 rs6000_debug_reg_global (void)
2533 static const char *const tf[2] = { "false", "true" };
2534 const char *nl = (const char *)0;
2535 int m;
2536 size_t m1, m2, v;
2537 char costly_num[20];
2538 char nop_num[20];
2539 char flags_buffer[40];
2540 const char *costly_str;
2541 const char *nop_str;
2542 const char *trace_str;
2543 const char *abi_str;
2544 const char *cmodel_str;
2545 struct cl_target_option cl_opts;
2547 /* Modes we want tieable information on. */
2548 static const machine_mode print_tieable_modes[] = {
2549 QImode,
2550 HImode,
2551 SImode,
2552 DImode,
2553 TImode,
2554 PTImode,
2555 SFmode,
2556 DFmode,
2557 TFmode,
2558 IFmode,
2559 KFmode,
2560 SDmode,
2561 DDmode,
2562 TDmode,
2563 V8QImode,
2564 V4HImode,
2565 V2SImode,
2566 V16QImode,
2567 V8HImode,
2568 V4SImode,
2569 V2DImode,
2570 V1TImode,
2571 V32QImode,
2572 V16HImode,
2573 V8SImode,
2574 V4DImode,
2575 V2TImode,
2576 V2SFmode,
2577 V4SFmode,
2578 V2DFmode,
2579 V8SFmode,
2580 V4DFmode,
2581 CCmode,
2582 CCUNSmode,
2583 CCEQmode,
2586 /* Virtual regs we are interested in. */
2587 const static struct {
2588 int regno; /* register number. */
2589 const char *name; /* register name. */
2590 } virtual_regs[] = {
2591 { STACK_POINTER_REGNUM, "stack pointer:" },
2592 { TOC_REGNUM, "toc: " },
2593 { STATIC_CHAIN_REGNUM, "static chain: " },
2594 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2595 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2596 { ARG_POINTER_REGNUM, "arg pointer: " },
2597 { FRAME_POINTER_REGNUM, "frame pointer:" },
2598 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2599 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2600 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2601 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2602 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2603 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2604 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2605 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2606 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2609 fputs ("\nHard register information:\n", stderr);
2610 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2611 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2612 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2613 LAST_ALTIVEC_REGNO,
2614 "vs");
2615 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2616 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2617 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2618 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2619 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2620 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2621 rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a");
2622 rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f");
2624 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2625 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2626 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2628 fprintf (stderr,
2629 "\n"
2630 "d reg_class = %s\n"
2631 "f reg_class = %s\n"
2632 "v reg_class = %s\n"
2633 "wa reg_class = %s\n"
2634 "wb reg_class = %s\n"
2635 "wd reg_class = %s\n"
2636 "we reg_class = %s\n"
2637 "wf reg_class = %s\n"
2638 "wg reg_class = %s\n"
2639 "wh reg_class = %s\n"
2640 "wi reg_class = %s\n"
2641 "wj reg_class = %s\n"
2642 "wk reg_class = %s\n"
2643 "wl reg_class = %s\n"
2644 "wm reg_class = %s\n"
2645 "wo reg_class = %s\n"
2646 "wp reg_class = %s\n"
2647 "wq reg_class = %s\n"
2648 "wr reg_class = %s\n"
2649 "ws reg_class = %s\n"
2650 "wt reg_class = %s\n"
2651 "wu reg_class = %s\n"
2652 "wv reg_class = %s\n"
2653 "ww reg_class = %s\n"
2654 "wx reg_class = %s\n"
2655 "wy reg_class = %s\n"
2656 "wz reg_class = %s\n"
2657 "wA reg_class = %s\n"
2658 "wH reg_class = %s\n"
2659 "wI reg_class = %s\n"
2660 "wJ reg_class = %s\n"
2661 "wK reg_class = %s\n"
2662 "\n",
2663 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2664 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2665 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2666 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2667 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wb]],
2668 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
2669 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2670 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
2671 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
2672 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]],
2673 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]],
2674 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]],
2675 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]],
2676 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
2677 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
2678 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wo]],
2679 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wp]],
2680 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wq]],
2681 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2682 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
2683 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
2684 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
2685 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
2686 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
2687 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2688 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
2689 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]],
2690 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]],
2691 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wH]],
2692 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wI]],
2693 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wJ]],
2694 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wK]]);
2696 nl = "\n";
2697 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2698 rs6000_debug_print_mode (m);
2700 fputs ("\n", stderr);
2702 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2704 machine_mode mode1 = print_tieable_modes[m1];
2705 bool first_time = true;
2707 nl = (const char *)0;
2708 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2710 machine_mode mode2 = print_tieable_modes[m2];
2711 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2713 if (first_time)
2715 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2716 nl = "\n";
2717 first_time = false;
2720 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2724 if (!first_time)
2725 fputs ("\n", stderr);
2728 if (nl)
2729 fputs (nl, stderr);
2731 if (rs6000_recip_control)
2733 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2735 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2736 if (rs6000_recip_bits[m])
2738 fprintf (stderr,
2739 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2740 GET_MODE_NAME (m),
2741 (RS6000_RECIP_AUTO_RE_P (m)
2742 ? "auto"
2743 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2744 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2745 ? "auto"
2746 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2749 fputs ("\n", stderr);
2752 if (rs6000_cpu_index >= 0)
2754 const char *name = processor_target_table[rs6000_cpu_index].name;
2755 HOST_WIDE_INT flags
2756 = processor_target_table[rs6000_cpu_index].target_enable;
2758 sprintf (flags_buffer, "-mcpu=%s flags", name);
2759 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2761 else
2762 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2764 if (rs6000_tune_index >= 0)
2766 const char *name = processor_target_table[rs6000_tune_index].name;
2767 HOST_WIDE_INT flags
2768 = processor_target_table[rs6000_tune_index].target_enable;
2770 sprintf (flags_buffer, "-mtune=%s flags", name);
2771 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2773 else
2774 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2776 cl_target_option_save (&cl_opts, &global_options);
2777 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2778 rs6000_isa_flags);
2780 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2781 rs6000_isa_flags_explicit);
2783 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2784 rs6000_builtin_mask);
2786 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2788 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2789 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2791 switch (rs6000_sched_costly_dep)
2793 case max_dep_latency:
2794 costly_str = "max_dep_latency";
2795 break;
2797 case no_dep_costly:
2798 costly_str = "no_dep_costly";
2799 break;
2801 case all_deps_costly:
2802 costly_str = "all_deps_costly";
2803 break;
2805 case true_store_to_load_dep_costly:
2806 costly_str = "true_store_to_load_dep_costly";
2807 break;
2809 case store_to_load_dep_costly:
2810 costly_str = "store_to_load_dep_costly";
2811 break;
2813 default:
2814 costly_str = costly_num;
2815 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2816 break;
2819 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2821 switch (rs6000_sched_insert_nops)
2823 case sched_finish_regroup_exact:
2824 nop_str = "sched_finish_regroup_exact";
2825 break;
2827 case sched_finish_pad_groups:
2828 nop_str = "sched_finish_pad_groups";
2829 break;
2831 case sched_finish_none:
2832 nop_str = "sched_finish_none";
2833 break;
2835 default:
2836 nop_str = nop_num;
2837 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2838 break;
2841 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2843 switch (rs6000_sdata)
2845 default:
2846 case SDATA_NONE:
2847 break;
2849 case SDATA_DATA:
2850 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2851 break;
2853 case SDATA_SYSV:
2854 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2855 break;
2857 case SDATA_EABI:
2858 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2859 break;
2863 switch (rs6000_traceback)
2865 case traceback_default: trace_str = "default"; break;
2866 case traceback_none: trace_str = "none"; break;
2867 case traceback_part: trace_str = "part"; break;
2868 case traceback_full: trace_str = "full"; break;
2869 default: trace_str = "unknown"; break;
2872 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2874 switch (rs6000_current_cmodel)
2876 case CMODEL_SMALL: cmodel_str = "small"; break;
2877 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2878 case CMODEL_LARGE: cmodel_str = "large"; break;
2879 default: cmodel_str = "unknown"; break;
2882 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2884 switch (rs6000_current_abi)
2886 case ABI_NONE: abi_str = "none"; break;
2887 case ABI_AIX: abi_str = "aix"; break;
2888 case ABI_ELFv2: abi_str = "ELFv2"; break;
2889 case ABI_V4: abi_str = "V4"; break;
2890 case ABI_DARWIN: abi_str = "darwin"; break;
2891 default: abi_str = "unknown"; break;
2894 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2896 if (rs6000_altivec_abi)
2897 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2899 if (rs6000_spe_abi)
2900 fprintf (stderr, DEBUG_FMT_S, "spe_abi", "true");
2902 if (rs6000_darwin64_abi)
2903 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2905 if (rs6000_float_gprs)
2906 fprintf (stderr, DEBUG_FMT_S, "float_gprs", "true");
2908 fprintf (stderr, DEBUG_FMT_S, "fprs",
2909 (TARGET_FPRS ? "true" : "false"));
2911 fprintf (stderr, DEBUG_FMT_S, "single_float",
2912 (TARGET_SINGLE_FLOAT ? "true" : "false"));
2914 fprintf (stderr, DEBUG_FMT_S, "double_float",
2915 (TARGET_DOUBLE_FLOAT ? "true" : "false"));
2917 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2918 (TARGET_SOFT_FLOAT ? "true" : "false"));
2920 fprintf (stderr, DEBUG_FMT_S, "e500_single",
2921 (TARGET_E500_SINGLE ? "true" : "false"));
2923 fprintf (stderr, DEBUG_FMT_S, "e500_double",
2924 (TARGET_E500_DOUBLE ? "true" : "false"));
2926 if (TARGET_LINK_STACK)
2927 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2929 fprintf (stderr, DEBUG_FMT_S, "lra", TARGET_LRA ? "true" : "false");
2931 if (TARGET_P8_FUSION)
2933 char options[80];
2935 strcpy (options, (TARGET_P9_FUSION) ? "power9" : "power8");
2936 if (TARGET_TOC_FUSION)
2937 strcat (options, ", toc");
2939 if (TARGET_P8_FUSION_SIGN)
2940 strcat (options, ", sign");
2942 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2945 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2946 TARGET_SECURE_PLT ? "secure" : "bss");
2947 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2948 aix_struct_return ? "aix" : "sysv");
2949 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2950 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2951 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2952 tf[!!rs6000_align_branch_targets]);
2953 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2954 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2955 rs6000_long_double_type_size);
2956 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2957 (int)rs6000_sched_restricted_insns_priority);
2958 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2959 (int)END_BUILTINS);
2960 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2961 (int)RS6000_BUILTIN_COUNT);
2963 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2964 (int)TARGET_FLOAT128_ENABLE_TYPE);
2966 if (TARGET_VSX)
2967 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2968 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2970 if (TARGET_DIRECT_MOVE_128)
2971 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2972 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2976 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2977 legitimate address support to figure out the appropriate addressing to
2978 use. */
2980 static void
2981 rs6000_setup_reg_addr_masks (void)
2983 ssize_t rc, reg, m, nregs;
2984 addr_mask_type any_addr_mask, addr_mask;
2986 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2988 machine_mode m2 = (machine_mode) m;
2989 bool complex_p = false;
2990 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2991 size_t msize;
2993 if (COMPLEX_MODE_P (m2))
2995 complex_p = true;
2996 m2 = GET_MODE_INNER (m2);
2999 msize = GET_MODE_SIZE (m2);
3001 /* SDmode is special in that we want to access it only via REG+REG
3002 addressing on power7 and above, since we want to use the LFIWZX and
3003 STFIWZX instructions to load it. */
3004 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
3006 any_addr_mask = 0;
3007 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
3009 addr_mask = 0;
3010 reg = reload_reg_map[rc].reg;
3012 /* Can mode values go in the GPR/FPR/Altivec registers? */
3013 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
3015 bool small_int_vsx_p = (small_int_p
3016 && (rc == RELOAD_REG_FPR
3017 || rc == RELOAD_REG_VMX));
3019 nregs = rs6000_hard_regno_nregs[m][reg];
3020 addr_mask |= RELOAD_REG_VALID;
3022 /* Indicate if the mode takes more than 1 physical register. If
3023 it takes a single register, indicate it can do REG+REG
3024 addressing. Small integers in VSX registers can only do
3025 REG+REG addressing. */
3026 if (small_int_vsx_p)
3027 addr_mask |= RELOAD_REG_INDEXED;
3028 else if (nregs > 1 || m == BLKmode || complex_p)
3029 addr_mask |= RELOAD_REG_MULTIPLE;
3030 else
3031 addr_mask |= RELOAD_REG_INDEXED;
3033 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
3034 addressing. Restrict addressing on SPE for 64-bit types
3035 because of the SUBREG hackery used to address 64-bit floats in
3036 '32-bit' GPRs. If we allow scalars into Altivec registers,
3037 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY. */
3039 if (TARGET_UPDATE
3040 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
3041 && msize <= 8
3042 && !VECTOR_MODE_P (m2)
3043 && !FLOAT128_VECTOR_P (m2)
3044 && !complex_p
3045 && !small_int_vsx_p
3046 && (m2 != DFmode || !TARGET_UPPER_REGS_DF)
3047 && (m2 != SFmode || !TARGET_UPPER_REGS_SF)
3048 && !(TARGET_E500_DOUBLE && msize == 8))
3050 addr_mask |= RELOAD_REG_PRE_INCDEC;
3052 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
3053 we don't allow PRE_MODIFY for some multi-register
3054 operations. */
3055 switch (m)
3057 default:
3058 addr_mask |= RELOAD_REG_PRE_MODIFY;
3059 break;
3061 case E_DImode:
3062 if (TARGET_POWERPC64)
3063 addr_mask |= RELOAD_REG_PRE_MODIFY;
3064 break;
3066 case E_DFmode:
3067 case E_DDmode:
3068 if (TARGET_DF_INSN)
3069 addr_mask |= RELOAD_REG_PRE_MODIFY;
3070 break;
3075 /* GPR and FPR registers can do REG+OFFSET addressing, except
3076 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
3077 for 64-bit scalars and 32-bit SFmode to altivec registers. */
3078 if ((addr_mask != 0) && !indexed_only_p
3079 && msize <= 8
3080 && (rc == RELOAD_REG_GPR
3081 || ((msize == 8 || m2 == SFmode)
3082 && (rc == RELOAD_REG_FPR
3083 || (rc == RELOAD_REG_VMX
3084 && TARGET_P9_DFORM_SCALAR)))))
3085 addr_mask |= RELOAD_REG_OFFSET;
3087 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
3088 instructions are enabled. The offset for 128-bit VSX registers is
3089 only 12-bits. While GPRs can handle the full offset range, VSX
3090 registers can only handle the restricted range. */
3091 else if ((addr_mask != 0) && !indexed_only_p
3092 && msize == 16 && TARGET_P9_DFORM_VECTOR
3093 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
3094 || (m2 == TImode && TARGET_VSX_TIMODE)))
3096 addr_mask |= RELOAD_REG_OFFSET;
3097 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
3098 addr_mask |= RELOAD_REG_QUAD_OFFSET;
3101 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
3102 addressing on 128-bit types. */
3103 if (rc == RELOAD_REG_VMX && msize == 16
3104 && (addr_mask & RELOAD_REG_VALID) != 0)
3105 addr_mask |= RELOAD_REG_AND_M16;
3107 reg_addr[m].addr_mask[rc] = addr_mask;
3108 any_addr_mask |= addr_mask;
3111 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
3116 /* Initialize the various global tables that are based on register size. */
3117 static void
3118 rs6000_init_hard_regno_mode_ok (bool global_init_p)
3120 ssize_t r, m, c;
3121 int align64;
3122 int align32;
3124 /* Precalculate REGNO_REG_CLASS. */
3125 rs6000_regno_regclass[0] = GENERAL_REGS;
3126 for (r = 1; r < 32; ++r)
3127 rs6000_regno_regclass[r] = BASE_REGS;
3129 for (r = 32; r < 64; ++r)
3130 rs6000_regno_regclass[r] = FLOAT_REGS;
3132 for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r)
3133 rs6000_regno_regclass[r] = NO_REGS;
3135 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
3136 rs6000_regno_regclass[r] = ALTIVEC_REGS;
3138 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
3139 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
3140 rs6000_regno_regclass[r] = CR_REGS;
3142 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
3143 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
3144 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
3145 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
3146 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
3147 rs6000_regno_regclass[SPE_ACC_REGNO] = SPE_ACC_REGS;
3148 rs6000_regno_regclass[SPEFSCR_REGNO] = SPEFSCR_REGS;
3149 rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
3150 rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
3151 rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
3152 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
3153 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
3155 /* Precalculate register class to simpler reload register class. We don't
3156 need all of the register classes that are combinations of different
3157 classes, just the simple ones that have constraint letters. */
3158 for (c = 0; c < N_REG_CLASSES; c++)
3159 reg_class_to_reg_type[c] = NO_REG_TYPE;
3161 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
3162 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
3163 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
3164 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
3165 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
3166 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
3167 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
3168 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
3169 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
3170 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
3171 reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
3172 reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
3174 if (TARGET_VSX)
3176 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
3177 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
3179 else
3181 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
3182 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
3185 /* Precalculate the valid memory formats as well as the vector information,
3186 this must be set up before the rs6000_hard_regno_nregs_internal calls
3187 below. */
3188 gcc_assert ((int)VECTOR_NONE == 0);
3189 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
3190 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
3192 gcc_assert ((int)CODE_FOR_nothing == 0);
3193 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
3195 gcc_assert ((int)NO_REGS == 0);
3196 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
3198 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
3199 believes it can use native alignment or still uses 128-bit alignment. */
3200 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
3202 align64 = 64;
3203 align32 = 32;
3205 else
3207 align64 = 128;
3208 align32 = 128;
3211 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
3212 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
3213 if (TARGET_FLOAT128_TYPE)
3215 rs6000_vector_mem[KFmode] = VECTOR_VSX;
3216 rs6000_vector_align[KFmode] = 128;
3218 if (FLOAT128_IEEE_P (TFmode))
3220 rs6000_vector_mem[TFmode] = VECTOR_VSX;
3221 rs6000_vector_align[TFmode] = 128;
3225 /* V2DF mode, VSX only. */
3226 if (TARGET_VSX)
3228 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
3229 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
3230 rs6000_vector_align[V2DFmode] = align64;
3233 /* V4SF mode, either VSX or Altivec. */
3234 if (TARGET_VSX)
3236 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
3237 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
3238 rs6000_vector_align[V4SFmode] = align32;
3240 else if (TARGET_ALTIVEC)
3242 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
3243 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
3244 rs6000_vector_align[V4SFmode] = align32;
3247 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
3248 and stores. */
3249 if (TARGET_ALTIVEC)
3251 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
3252 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
3253 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
3254 rs6000_vector_align[V4SImode] = align32;
3255 rs6000_vector_align[V8HImode] = align32;
3256 rs6000_vector_align[V16QImode] = align32;
3258 if (TARGET_VSX)
3260 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
3261 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
3262 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
3264 else
3266 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
3267 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
3268 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
3272 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
3273 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
3274 if (TARGET_VSX)
3276 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
3277 rs6000_vector_unit[V2DImode]
3278 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3279 rs6000_vector_align[V2DImode] = align64;
3281 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
3282 rs6000_vector_unit[V1TImode]
3283 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3284 rs6000_vector_align[V1TImode] = 128;
3287 /* DFmode, see if we want to use the VSX unit. Memory is handled
3288 differently, so don't set rs6000_vector_mem. */
3289 if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE)
3291 rs6000_vector_unit[DFmode] = VECTOR_VSX;
3292 rs6000_vector_align[DFmode] = 64;
3295 /* SFmode, see if we want to use the VSX unit. */
3296 if (TARGET_P8_VECTOR && TARGET_VSX_SCALAR_FLOAT)
3298 rs6000_vector_unit[SFmode] = VECTOR_VSX;
3299 rs6000_vector_align[SFmode] = 32;
3302 /* Allow TImode in VSX register and set the VSX memory macros. */
3303 if (TARGET_VSX && TARGET_VSX_TIMODE)
3305 rs6000_vector_mem[TImode] = VECTOR_VSX;
3306 rs6000_vector_align[TImode] = align64;
3309 /* TODO add SPE and paired floating point vector support. */
3311 /* Register class constraints for the constraints that depend on compile
3312 switches. When the VSX code was added, different constraints were added
3313 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
3314 of the VSX registers are used. The register classes for scalar floating
3315 point types is set, based on whether we allow that type into the upper
3316 (Altivec) registers. GCC has register classes to target the Altivec
3317 registers for load/store operations, to select using a VSX memory
3318 operation instead of the traditional floating point operation. The
3319 constraints are:
3321 d - Register class to use with traditional DFmode instructions.
3322 f - Register class to use with traditional SFmode instructions.
3323 v - Altivec register.
3324 wa - Any VSX register.
3325 wc - Reserved to represent individual CR bits (used in LLVM).
3326 wd - Preferred register class for V2DFmode.
3327 wf - Preferred register class for V4SFmode.
3328 wg - Float register for power6x move insns.
3329 wh - FP register for direct move instructions.
3330 wi - FP or VSX register to hold 64-bit integers for VSX insns.
3331 wj - FP or VSX register to hold 64-bit integers for direct moves.
3332 wk - FP or VSX register to hold 64-bit doubles for direct moves.
3333 wl - Float register if we can do 32-bit signed int loads.
3334 wm - VSX register for ISA 2.07 direct move operations.
3335 wn - always NO_REGS.
3336 wr - GPR if 64-bit mode is permitted.
3337 ws - Register class to do ISA 2.06 DF operations.
3338 wt - VSX register for TImode in VSX registers.
3339 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
3340 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
3341 ww - Register class to do SF conversions in with VSX operations.
3342 wx - Float register if we can do 32-bit int stores.
3343 wy - Register class to do ISA 2.07 SF operations.
3344 wz - Float register if we can do 32-bit unsigned int loads.
3345 wH - Altivec register if SImode is allowed in VSX registers.
3346 wI - VSX register if SImode is allowed in VSX registers.
3347 wJ - VSX register if QImode/HImode are allowed in VSX registers.
3348 wK - Altivec register if QImode/HImode are allowed in VSX registers. */
3350 if (TARGET_HARD_FLOAT && TARGET_FPRS)
3351 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
3353 if (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
3354 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
3356 if (TARGET_VSX)
3358 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
3359 rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */
3360 rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */
3362 if (TARGET_VSX_TIMODE)
3363 rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */
3365 if (TARGET_UPPER_REGS_DF) /* DFmode */
3367 rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS;
3368 rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
3370 else
3371 rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS;
3373 if (TARGET_UPPER_REGS_DI) /* DImode */
3374 rs6000_constraints[RS6000_CONSTRAINT_wi] = VSX_REGS;
3375 else
3376 rs6000_constraints[RS6000_CONSTRAINT_wi] = FLOAT_REGS;
3379 /* Add conditional constraints based on various options, to allow us to
3380 collapse multiple insn patterns. */
3381 if (TARGET_ALTIVEC)
3382 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
3384 if (TARGET_MFPGPR) /* DFmode */
3385 rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
3387 if (TARGET_LFIWAX)
3388 rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */
3390 if (TARGET_DIRECT_MOVE)
3392 rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS;
3393 rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */
3394 = rs6000_constraints[RS6000_CONSTRAINT_wi];
3395 rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */
3396 = rs6000_constraints[RS6000_CONSTRAINT_ws];
3397 rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
3400 if (TARGET_POWERPC64)
3402 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
3403 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
3406 if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF) /* SFmode */
3408 rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
3409 rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
3410 rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
3412 else if (TARGET_P8_VECTOR)
3414 rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS;
3415 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3417 else if (TARGET_VSX)
3418 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3420 if (TARGET_STFIWX)
3421 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
3423 if (TARGET_LFIWZX)
3424 rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */
3426 if (TARGET_FLOAT128_TYPE)
3428 rs6000_constraints[RS6000_CONSTRAINT_wq] = VSX_REGS; /* KFmode */
3429 if (FLOAT128_IEEE_P (TFmode))
3430 rs6000_constraints[RS6000_CONSTRAINT_wp] = VSX_REGS; /* TFmode */
3433 /* Support for new D-form instructions. */
3434 if (TARGET_P9_DFORM_SCALAR)
3435 rs6000_constraints[RS6000_CONSTRAINT_wb] = ALTIVEC_REGS;
3437 /* Support for ISA 3.0 (power9) vectors. */
3438 if (TARGET_P9_VECTOR)
3439 rs6000_constraints[RS6000_CONSTRAINT_wo] = VSX_REGS;
3441 /* Support for new direct moves (ISA 3.0 + 64bit). */
3442 if (TARGET_DIRECT_MOVE_128)
3443 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
3445 /* Support small integers in VSX registers. */
3446 if (TARGET_VSX_SMALL_INTEGER)
3448 rs6000_constraints[RS6000_CONSTRAINT_wH] = ALTIVEC_REGS;
3449 rs6000_constraints[RS6000_CONSTRAINT_wI] = FLOAT_REGS;
3450 if (TARGET_P9_VECTOR)
3452 rs6000_constraints[RS6000_CONSTRAINT_wJ] = FLOAT_REGS;
3453 rs6000_constraints[RS6000_CONSTRAINT_wK] = ALTIVEC_REGS;
3457 /* Set up the reload helper and direct move functions. */
3458 if (TARGET_VSX || TARGET_ALTIVEC)
3460 if (TARGET_64BIT)
3462 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
3463 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
3464 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
3465 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
3466 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
3467 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
3468 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
3469 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
3470 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
3471 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
3472 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
3473 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
3474 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
3475 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
3476 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
3477 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
3478 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
3479 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3480 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3481 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3483 if (FLOAT128_VECTOR_P (KFmode))
3485 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3486 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3489 if (FLOAT128_VECTOR_P (TFmode))
3491 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3492 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3495 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3496 available. */
3497 if (TARGET_NO_SDMODE_STACK)
3499 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3500 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3503 if (TARGET_VSX_TIMODE)
3505 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3506 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3509 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3511 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3512 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3513 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3514 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3515 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3516 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3517 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3518 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3519 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3521 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3522 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3523 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3524 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3525 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3526 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3527 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3528 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3529 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3531 if (FLOAT128_VECTOR_P (KFmode))
3533 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3534 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3537 if (FLOAT128_VECTOR_P (TFmode))
3539 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3540 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3544 else
3546 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3547 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3548 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3549 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3550 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3551 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3552 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3553 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3554 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3555 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3556 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3557 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3558 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3559 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3560 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3561 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3562 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3563 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3564 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3565 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3567 if (FLOAT128_VECTOR_P (KFmode))
3569 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3570 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3573 if (FLOAT128_IEEE_P (TFmode))
3575 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3576 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3579 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3580 available. */
3581 if (TARGET_NO_SDMODE_STACK)
3583 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3584 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3587 if (TARGET_VSX_TIMODE)
3589 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3590 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3593 if (TARGET_DIRECT_MOVE)
3595 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3596 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3597 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3601 if (TARGET_UPPER_REGS_DF)
3602 reg_addr[DFmode].scalar_in_vmx_p = true;
3604 if (TARGET_UPPER_REGS_DI)
3605 reg_addr[DImode].scalar_in_vmx_p = true;
3607 if (TARGET_UPPER_REGS_SF)
3608 reg_addr[SFmode].scalar_in_vmx_p = true;
3610 if (TARGET_VSX_SMALL_INTEGER)
3612 reg_addr[SImode].scalar_in_vmx_p = true;
3613 if (TARGET_P9_VECTOR)
3615 reg_addr[HImode].scalar_in_vmx_p = true;
3616 reg_addr[QImode].scalar_in_vmx_p = true;
3621 /* Setup the fusion operations. */
3622 if (TARGET_P8_FUSION)
3624 reg_addr[QImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_qi;
3625 reg_addr[HImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_hi;
3626 reg_addr[SImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_si;
3627 if (TARGET_64BIT)
3628 reg_addr[DImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_di;
3631 if (TARGET_P9_FUSION)
3633 struct fuse_insns {
3634 enum machine_mode mode; /* mode of the fused type. */
3635 enum machine_mode pmode; /* pointer mode. */
3636 enum rs6000_reload_reg_type rtype; /* register type. */
3637 enum insn_code load; /* load insn. */
3638 enum insn_code store; /* store insn. */
3641 static const struct fuse_insns addis_insns[] = {
3642 { E_SFmode, E_DImode, RELOAD_REG_FPR,
3643 CODE_FOR_fusion_vsx_di_sf_load,
3644 CODE_FOR_fusion_vsx_di_sf_store },
3646 { E_SFmode, E_SImode, RELOAD_REG_FPR,
3647 CODE_FOR_fusion_vsx_si_sf_load,
3648 CODE_FOR_fusion_vsx_si_sf_store },
3650 { E_DFmode, E_DImode, RELOAD_REG_FPR,
3651 CODE_FOR_fusion_vsx_di_df_load,
3652 CODE_FOR_fusion_vsx_di_df_store },
3654 { E_DFmode, E_SImode, RELOAD_REG_FPR,
3655 CODE_FOR_fusion_vsx_si_df_load,
3656 CODE_FOR_fusion_vsx_si_df_store },
3658 { E_DImode, E_DImode, RELOAD_REG_FPR,
3659 CODE_FOR_fusion_vsx_di_di_load,
3660 CODE_FOR_fusion_vsx_di_di_store },
3662 { E_DImode, E_SImode, RELOAD_REG_FPR,
3663 CODE_FOR_fusion_vsx_si_di_load,
3664 CODE_FOR_fusion_vsx_si_di_store },
3666 { E_QImode, E_DImode, RELOAD_REG_GPR,
3667 CODE_FOR_fusion_gpr_di_qi_load,
3668 CODE_FOR_fusion_gpr_di_qi_store },
3670 { E_QImode, E_SImode, RELOAD_REG_GPR,
3671 CODE_FOR_fusion_gpr_si_qi_load,
3672 CODE_FOR_fusion_gpr_si_qi_store },
3674 { E_HImode, E_DImode, RELOAD_REG_GPR,
3675 CODE_FOR_fusion_gpr_di_hi_load,
3676 CODE_FOR_fusion_gpr_di_hi_store },
3678 { E_HImode, E_SImode, RELOAD_REG_GPR,
3679 CODE_FOR_fusion_gpr_si_hi_load,
3680 CODE_FOR_fusion_gpr_si_hi_store },
3682 { E_SImode, E_DImode, RELOAD_REG_GPR,
3683 CODE_FOR_fusion_gpr_di_si_load,
3684 CODE_FOR_fusion_gpr_di_si_store },
3686 { E_SImode, E_SImode, RELOAD_REG_GPR,
3687 CODE_FOR_fusion_gpr_si_si_load,
3688 CODE_FOR_fusion_gpr_si_si_store },
3690 { E_SFmode, E_DImode, RELOAD_REG_GPR,
3691 CODE_FOR_fusion_gpr_di_sf_load,
3692 CODE_FOR_fusion_gpr_di_sf_store },
3694 { E_SFmode, E_SImode, RELOAD_REG_GPR,
3695 CODE_FOR_fusion_gpr_si_sf_load,
3696 CODE_FOR_fusion_gpr_si_sf_store },
3698 { E_DImode, E_DImode, RELOAD_REG_GPR,
3699 CODE_FOR_fusion_gpr_di_di_load,
3700 CODE_FOR_fusion_gpr_di_di_store },
3702 { E_DFmode, E_DImode, RELOAD_REG_GPR,
3703 CODE_FOR_fusion_gpr_di_df_load,
3704 CODE_FOR_fusion_gpr_di_df_store },
3707 machine_mode cur_pmode = Pmode;
3708 size_t i;
3710 for (i = 0; i < ARRAY_SIZE (addis_insns); i++)
3712 machine_mode xmode = addis_insns[i].mode;
3713 enum rs6000_reload_reg_type rtype = addis_insns[i].rtype;
3715 if (addis_insns[i].pmode != cur_pmode)
3716 continue;
3718 if (rtype == RELOAD_REG_FPR
3719 && (!TARGET_HARD_FLOAT || !TARGET_FPRS))
3720 continue;
3722 reg_addr[xmode].fusion_addis_ld[rtype] = addis_insns[i].load;
3723 reg_addr[xmode].fusion_addis_st[rtype] = addis_insns[i].store;
3725 if (rtype == RELOAD_REG_FPR && TARGET_P9_DFORM_SCALAR)
3727 reg_addr[xmode].fusion_addis_ld[RELOAD_REG_VMX]
3728 = addis_insns[i].load;
3729 reg_addr[xmode].fusion_addis_st[RELOAD_REG_VMX]
3730 = addis_insns[i].store;
3735 /* Note which types we support fusing TOC setup plus memory insn. We only do
3736 fused TOCs for medium/large code models. */
3737 if (TARGET_P8_FUSION && TARGET_TOC_FUSION && TARGET_POWERPC64
3738 && (TARGET_CMODEL != CMODEL_SMALL))
3740 reg_addr[QImode].fused_toc = true;
3741 reg_addr[HImode].fused_toc = true;
3742 reg_addr[SImode].fused_toc = true;
3743 reg_addr[DImode].fused_toc = true;
3744 if (TARGET_HARD_FLOAT && TARGET_FPRS)
3746 if (TARGET_SINGLE_FLOAT)
3747 reg_addr[SFmode].fused_toc = true;
3748 if (TARGET_DOUBLE_FLOAT)
3749 reg_addr[DFmode].fused_toc = true;
3753 /* Precalculate HARD_REGNO_NREGS. */
3754 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3755 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3756 rs6000_hard_regno_nregs[m][r]
3757 = rs6000_hard_regno_nregs_internal (r, (machine_mode)m);
3759 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3760 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3761 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3762 if (rs6000_hard_regno_mode_ok_uncached (r, (machine_mode)m))
3763 rs6000_hard_regno_mode_ok_p[m][r] = true;
3765 /* Precalculate CLASS_MAX_NREGS sizes. */
3766 for (c = 0; c < LIM_REG_CLASSES; ++c)
3768 int reg_size;
3770 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3771 reg_size = UNITS_PER_VSX_WORD;
3773 else if (c == ALTIVEC_REGS)
3774 reg_size = UNITS_PER_ALTIVEC_WORD;
3776 else if (c == FLOAT_REGS)
3777 reg_size = UNITS_PER_FP_WORD;
3779 else
3780 reg_size = UNITS_PER_WORD;
3782 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3784 machine_mode m2 = (machine_mode)m;
3785 int reg_size2 = reg_size;
3787 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3788 in VSX. */
3789 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3790 reg_size2 = UNITS_PER_FP_WORD;
3792 rs6000_class_max_nregs[m][c]
3793 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3797 if (TARGET_E500_DOUBLE)
3798 rs6000_class_max_nregs[DFmode][GENERAL_REGS] = 1;
3800 /* Calculate which modes to automatically generate code to use a the
3801 reciprocal divide and square root instructions. In the future, possibly
3802 automatically generate the instructions even if the user did not specify
3803 -mrecip. The older machines double precision reciprocal sqrt estimate is
3804 not accurate enough. */
3805 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3806 if (TARGET_FRES)
3807 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3808 if (TARGET_FRE)
3809 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3810 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3811 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3812 if (VECTOR_UNIT_VSX_P (V2DFmode))
3813 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3815 if (TARGET_FRSQRTES)
3816 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3817 if (TARGET_FRSQRTE)
3818 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3819 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3820 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3821 if (VECTOR_UNIT_VSX_P (V2DFmode))
3822 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3824 if (rs6000_recip_control)
3826 if (!flag_finite_math_only)
3827 warning (0, "-mrecip requires -ffinite-math or -ffast-math");
3828 if (flag_trapping_math)
3829 warning (0, "-mrecip requires -fno-trapping-math or -ffast-math");
3830 if (!flag_reciprocal_math)
3831 warning (0, "-mrecip requires -freciprocal-math or -ffast-math");
3832 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3834 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3835 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3836 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3838 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3839 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3840 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3842 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3843 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3844 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3846 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3847 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3848 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3850 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3851 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3852 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3854 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3855 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3856 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3858 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3859 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3860 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3862 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3863 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3864 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3868 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3869 legitimate address support to figure out the appropriate addressing to
3870 use. */
3871 rs6000_setup_reg_addr_masks ();
3873 if (global_init_p || TARGET_DEBUG_TARGET)
3875 if (TARGET_DEBUG_REG)
3876 rs6000_debug_reg_global ();
3878 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3879 fprintf (stderr,
3880 "SImode variable mult cost = %d\n"
3881 "SImode constant mult cost = %d\n"
3882 "SImode short constant mult cost = %d\n"
3883 "DImode multipliciation cost = %d\n"
3884 "SImode division cost = %d\n"
3885 "DImode division cost = %d\n"
3886 "Simple fp operation cost = %d\n"
3887 "DFmode multiplication cost = %d\n"
3888 "SFmode division cost = %d\n"
3889 "DFmode division cost = %d\n"
3890 "cache line size = %d\n"
3891 "l1 cache size = %d\n"
3892 "l2 cache size = %d\n"
3893 "simultaneous prefetches = %d\n"
3894 "\n",
3895 rs6000_cost->mulsi,
3896 rs6000_cost->mulsi_const,
3897 rs6000_cost->mulsi_const9,
3898 rs6000_cost->muldi,
3899 rs6000_cost->divsi,
3900 rs6000_cost->divdi,
3901 rs6000_cost->fp,
3902 rs6000_cost->dmul,
3903 rs6000_cost->sdiv,
3904 rs6000_cost->ddiv,
3905 rs6000_cost->cache_line_size,
3906 rs6000_cost->l1_cache_size,
3907 rs6000_cost->l2_cache_size,
3908 rs6000_cost->simultaneous_prefetches);
3912 #if TARGET_MACHO
3913 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3915 static void
3916 darwin_rs6000_override_options (void)
3918 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3919 off. */
3920 rs6000_altivec_abi = 1;
3921 TARGET_ALTIVEC_VRSAVE = 1;
3922 rs6000_current_abi = ABI_DARWIN;
3924 if (DEFAULT_ABI == ABI_DARWIN
3925 && TARGET_64BIT)
3926 darwin_one_byte_bool = 1;
3928 if (TARGET_64BIT && ! TARGET_POWERPC64)
3930 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3931 warning (0, "-m64 requires PowerPC64 architecture, enabling");
3933 if (flag_mkernel)
3935 rs6000_default_long_calls = 1;
3936 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3939 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3940 Altivec. */
3941 if (!flag_mkernel && !flag_apple_kext
3942 && TARGET_64BIT
3943 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3944 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3946 /* Unless the user (not the configurer) has explicitly overridden
3947 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3948 G4 unless targeting the kernel. */
3949 if (!flag_mkernel
3950 && !flag_apple_kext
3951 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3952 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3953 && ! global_options_set.x_rs6000_cpu_index)
3955 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3958 #endif
3960 /* If not otherwise specified by a target, make 'long double' equivalent to
3961 'double'. */
3963 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3964 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3965 #endif
3967 /* Return the builtin mask of the various options used that could affect which
3968 builtins were used. In the past we used target_flags, but we've run out of
3969 bits, and some options like SPE and PAIRED are no longer in
3970 target_flags. */
3972 HOST_WIDE_INT
3973 rs6000_builtin_mask_calculate (void)
3975 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3976 | ((TARGET_CMPB) ? RS6000_BTM_CMPB : 0)
3977 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3978 | ((TARGET_SPE) ? RS6000_BTM_SPE : 0)
3979 | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0)
3980 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3981 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3982 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3983 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3984 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3985 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3986 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3987 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3988 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3989 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3990 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3991 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3992 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3993 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3994 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3995 | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0)
3996 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0));
3999 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
4000 to clobber the XER[CA] bit because clobbering that bit without telling
4001 the compiler worked just fine with versions of GCC before GCC 5, and
4002 breaking a lot of older code in ways that are hard to track down is
4003 not such a great idea. */
4005 static rtx_insn *
4006 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
4007 vec<const char *> &/*constraints*/,
4008 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
4010 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
4011 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
4012 return NULL;
4015 /* Override command line options.
4017 Combine build-specific configuration information with options
4018 specified on the command line to set various state variables which
4019 influence code generation, optimization, and expansion of built-in
4020 functions. Assure that command-line configuration preferences are
4021 compatible with each other and with the build configuration; issue
4022 warnings while adjusting configuration or error messages while
4023 rejecting configuration.
4025 Upon entry to this function:
4027 This function is called once at the beginning of
4028 compilation, and then again at the start and end of compiling
4029 each section of code that has a different configuration, as
4030 indicated, for example, by adding the
4032 __attribute__((__target__("cpu=power9")))
4034 qualifier to a function definition or, for example, by bracketing
4035 code between
4037 #pragma GCC target("altivec")
4041 #pragma GCC reset_options
4043 directives. Parameter global_init_p is true for the initial
4044 invocation, which initializes global variables, and false for all
4045 subsequent invocations.
4048 Various global state information is assumed to be valid. This
4049 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
4050 default CPU specified at build configure time, TARGET_DEFAULT,
4051 representing the default set of option flags for the default
4052 target, and global_options_set.x_rs6000_isa_flags, representing
4053 which options were requested on the command line.
4055 Upon return from this function:
4057 rs6000_isa_flags_explicit has a non-zero bit for each flag that
4058 was set by name on the command line. Additionally, if certain
4059 attributes are automatically enabled or disabled by this function
4060 in order to assure compatibility between options and
4061 configuration, the flags associated with those attributes are
4062 also set. By setting these "explicit bits", we avoid the risk
4063 that other code might accidentally overwrite these particular
4064 attributes with "default values".
4066 The various bits of rs6000_isa_flags are set to indicate the
4067 target options that have been selected for the most current
4068 compilation efforts. This has the effect of also turning on the
4069 associated TARGET_XXX values since these are macros which are
4070 generally defined to test the corresponding bit of the
4071 rs6000_isa_flags variable.
4073 The variable rs6000_builtin_mask is set to represent the target
4074 options for the most current compilation efforts, consistent with
4075 the current contents of rs6000_isa_flags. This variable controls
4076 expansion of built-in functions.
4078 Various other global variables and fields of global structures
4079 (over 50 in all) are initialized to reflect the desired options
4080 for the most current compilation efforts. */
4082 static bool
4083 rs6000_option_override_internal (bool global_init_p)
4085 bool ret = true;
4086 bool have_cpu = false;
4088 /* The default cpu requested at configure time, if any. */
4089 const char *implicit_cpu = OPTION_TARGET_CPU_DEFAULT;
4091 HOST_WIDE_INT set_masks;
4092 HOST_WIDE_INT ignore_masks;
4093 int cpu_index;
4094 int tune_index;
4095 struct cl_target_option *main_target_opt
4096 = ((global_init_p || target_option_default_node == NULL)
4097 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
4099 /* Print defaults. */
4100 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
4101 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
4103 /* Remember the explicit arguments. */
4104 if (global_init_p)
4105 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
4107 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
4108 library functions, so warn about it. The flag may be useful for
4109 performance studies from time to time though, so don't disable it
4110 entirely. */
4111 if (global_options_set.x_rs6000_alignment_flags
4112 && rs6000_alignment_flags == MASK_ALIGN_POWER
4113 && DEFAULT_ABI == ABI_DARWIN
4114 && TARGET_64BIT)
4115 warning (0, "-malign-power is not supported for 64-bit Darwin;"
4116 " it is incompatible with the installed C and C++ libraries");
4118 /* Numerous experiment shows that IRA based loop pressure
4119 calculation works better for RTL loop invariant motion on targets
4120 with enough (>= 32) registers. It is an expensive optimization.
4121 So it is on only for peak performance. */
4122 if (optimize >= 3 && global_init_p
4123 && !global_options_set.x_flag_ira_loop_pressure)
4124 flag_ira_loop_pressure = 1;
4126 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
4127 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
4128 options were already specified. */
4129 if (flag_sanitize & SANITIZE_USER_ADDRESS
4130 && !global_options_set.x_flag_asynchronous_unwind_tables)
4131 flag_asynchronous_unwind_tables = 1;
4133 /* Set the pointer size. */
4134 if (TARGET_64BIT)
4136 rs6000_pmode = DImode;
4137 rs6000_pointer_size = 64;
4139 else
4141 rs6000_pmode = SImode;
4142 rs6000_pointer_size = 32;
4145 /* Some OSs don't support saving the high part of 64-bit registers on context
4146 switch. Other OSs don't support saving Altivec registers. On those OSs,
4147 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
4148 if the user wants either, the user must explicitly specify them and we
4149 won't interfere with the user's specification. */
4151 set_masks = POWERPC_MASKS;
4152 #ifdef OS_MISSING_POWERPC64
4153 if (OS_MISSING_POWERPC64)
4154 set_masks &= ~OPTION_MASK_POWERPC64;
4155 #endif
4156 #ifdef OS_MISSING_ALTIVEC
4157 if (OS_MISSING_ALTIVEC)
4158 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
4159 | OTHER_VSX_VECTOR_MASKS);
4160 #endif
4162 /* Don't override by the processor default if given explicitly. */
4163 set_masks &= ~rs6000_isa_flags_explicit;
4165 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
4166 the cpu in a target attribute or pragma, but did not specify a tuning
4167 option, use the cpu for the tuning option rather than the option specified
4168 with -mtune on the command line. Process a '--with-cpu' configuration
4169 request as an implicit --cpu. */
4170 if (rs6000_cpu_index >= 0)
4172 cpu_index = rs6000_cpu_index;
4173 have_cpu = true;
4175 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
4177 rs6000_cpu_index = cpu_index = main_target_opt->x_rs6000_cpu_index;
4178 have_cpu = true;
4180 else if (implicit_cpu)
4182 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (implicit_cpu);
4183 have_cpu = true;
4185 else
4187 /* PowerPC 64-bit LE requires at least ISA 2.07. */
4188 const char *default_cpu = ((!TARGET_POWERPC64)
4189 ? "powerpc"
4190 : ((BYTES_BIG_ENDIAN)
4191 ? "powerpc64"
4192 : "powerpc64le"));
4194 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
4195 have_cpu = false;
4198 gcc_assert (cpu_index >= 0);
4200 if (have_cpu)
4202 #ifndef HAVE_AS_POWER9
4203 if (processor_target_table[rs6000_cpu_index].processor
4204 == PROCESSOR_POWER9)
4206 have_cpu = false;
4207 warning (0, "will not generate power9 instructions because "
4208 "assembler lacks power9 support");
4210 #endif
4211 #ifndef HAVE_AS_POWER8
4212 if (processor_target_table[rs6000_cpu_index].processor
4213 == PROCESSOR_POWER8)
4215 have_cpu = false;
4216 warning (0, "will not generate power8 instructions because "
4217 "assembler lacks power8 support");
4219 #endif
4220 #ifndef HAVE_AS_POPCNTD
4221 if (processor_target_table[rs6000_cpu_index].processor
4222 == PROCESSOR_POWER7)
4224 have_cpu = false;
4225 warning (0, "will not generate power7 instructions because "
4226 "assembler lacks power7 support");
4228 #endif
4229 #ifndef HAVE_AS_DFP
4230 if (processor_target_table[rs6000_cpu_index].processor
4231 == PROCESSOR_POWER6)
4233 have_cpu = false;
4234 warning (0, "will not generate power6 instructions because "
4235 "assembler lacks power6 support");
4237 #endif
4238 #ifndef HAVE_AS_POPCNTB
4239 if (processor_target_table[rs6000_cpu_index].processor
4240 == PROCESSOR_POWER5)
4242 have_cpu = false;
4243 warning (0, "will not generate power5 instructions because "
4244 "assembler lacks power5 support");
4246 #endif
4248 if (!have_cpu)
4250 /* PowerPC 64-bit LE requires at least ISA 2.07. */
4251 const char *default_cpu = (!TARGET_POWERPC64
4252 ? "powerpc"
4253 : (BYTES_BIG_ENDIAN
4254 ? "powerpc64"
4255 : "powerpc64le"));
4257 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
4261 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
4262 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
4263 with those from the cpu, except for options that were explicitly set. If
4264 we don't have a cpu, do not override the target bits set in
4265 TARGET_DEFAULT. */
4266 if (have_cpu)
4268 rs6000_isa_flags &= ~set_masks;
4269 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
4270 & set_masks);
4272 else
4274 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
4275 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
4276 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
4277 to using rs6000_isa_flags, we need to do the initialization here.
4279 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
4280 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
4281 HOST_WIDE_INT flags = ((TARGET_DEFAULT) ? TARGET_DEFAULT
4282 : processor_target_table[cpu_index].target_enable);
4283 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
4286 if (rs6000_tune_index >= 0)
4287 tune_index = rs6000_tune_index;
4288 else if (have_cpu)
4289 rs6000_tune_index = tune_index = cpu_index;
4290 else
4292 size_t i;
4293 enum processor_type tune_proc
4294 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
4296 tune_index = -1;
4297 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
4298 if (processor_target_table[i].processor == tune_proc)
4300 rs6000_tune_index = tune_index = i;
4301 break;
4305 gcc_assert (tune_index >= 0);
4306 rs6000_cpu = processor_target_table[tune_index].processor;
4308 /* Pick defaults for SPE related control flags. Do this early to make sure
4309 that the TARGET_ macros are representative ASAP. */
4311 int spe_capable_cpu =
4312 (rs6000_cpu == PROCESSOR_PPC8540
4313 || rs6000_cpu == PROCESSOR_PPC8548);
4315 if (!global_options_set.x_rs6000_spe_abi)
4316 rs6000_spe_abi = spe_capable_cpu;
4318 if (!global_options_set.x_rs6000_spe)
4319 rs6000_spe = spe_capable_cpu;
4321 if (!global_options_set.x_rs6000_float_gprs)
4322 rs6000_float_gprs =
4323 (rs6000_cpu == PROCESSOR_PPC8540 ? 1
4324 : rs6000_cpu == PROCESSOR_PPC8548 ? 2
4325 : 0);
4328 if (global_options_set.x_rs6000_spe_abi
4329 && rs6000_spe_abi
4330 && !TARGET_SPE_ABI)
4331 error ("not configured for SPE ABI");
4333 if (global_options_set.x_rs6000_spe
4334 && rs6000_spe
4335 && !TARGET_SPE)
4336 error ("not configured for SPE instruction set");
4338 if (main_target_opt != NULL
4339 && ((main_target_opt->x_rs6000_spe_abi != rs6000_spe_abi)
4340 || (main_target_opt->x_rs6000_spe != rs6000_spe)
4341 || (main_target_opt->x_rs6000_float_gprs != rs6000_float_gprs)))
4342 error ("target attribute or pragma changes SPE ABI");
4344 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
4345 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
4346 || rs6000_cpu == PROCESSOR_PPCE5500)
4348 if (TARGET_ALTIVEC)
4349 error ("AltiVec not supported in this target");
4350 if (TARGET_SPE)
4351 error ("SPE not supported in this target");
4353 if (rs6000_cpu == PROCESSOR_PPCE6500)
4355 if (TARGET_SPE)
4356 error ("SPE not supported in this target");
4359 /* Disable Cell microcode if we are optimizing for the Cell
4360 and not optimizing for size. */
4361 if (rs6000_gen_cell_microcode == -1)
4362 rs6000_gen_cell_microcode = !(rs6000_cpu == PROCESSOR_CELL
4363 && !optimize_size);
4365 /* If we are optimizing big endian systems for space and it's OK to
4366 use instructions that would be microcoded on the Cell, use the
4367 load/store multiple and string instructions. */
4368 if (BYTES_BIG_ENDIAN && optimize_size && rs6000_gen_cell_microcode)
4369 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & (OPTION_MASK_MULTIPLE
4370 | OPTION_MASK_STRING);
4372 /* Don't allow -mmultiple or -mstring on little endian systems
4373 unless the cpu is a 750, because the hardware doesn't support the
4374 instructions used in little endian mode, and causes an alignment
4375 trap. The 750 does not cause an alignment trap (except when the
4376 target is unaligned). */
4378 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750)
4380 if (TARGET_MULTIPLE)
4382 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
4383 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
4384 warning (0, "-mmultiple is not supported on little endian systems");
4387 if (TARGET_STRING)
4389 rs6000_isa_flags &= ~OPTION_MASK_STRING;
4390 if ((rs6000_isa_flags_explicit & OPTION_MASK_STRING) != 0)
4391 warning (0, "-mstring is not supported on little endian systems");
4395 /* If little-endian, default to -mstrict-align on older processors.
4396 Testing for htm matches power8 and later. */
4397 if (!BYTES_BIG_ENDIAN
4398 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
4399 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
4401 /* -maltivec={le,be} implies -maltivec. */
4402 if (rs6000_altivec_element_order != 0)
4403 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
4405 /* Disallow -maltivec=le in big endian mode for now. This is not
4406 known to be useful for anyone. */
4407 if (BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 1)
4409 warning (0, N_("-maltivec=le not allowed for big-endian targets"));
4410 rs6000_altivec_element_order = 0;
4413 /* Add some warnings for VSX. */
4414 if (TARGET_VSX)
4416 const char *msg = NULL;
4417 if (!TARGET_HARD_FLOAT || !TARGET_FPRS
4418 || !TARGET_SINGLE_FLOAT || !TARGET_DOUBLE_FLOAT)
4420 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4421 msg = N_("-mvsx requires hardware floating point");
4422 else
4424 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
4425 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4428 else if (TARGET_PAIRED_FLOAT)
4429 msg = N_("-mvsx and -mpaired are incompatible");
4430 else if (TARGET_AVOID_XFORM > 0)
4431 msg = N_("-mvsx needs indexed addressing");
4432 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
4433 & OPTION_MASK_ALTIVEC))
4435 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4436 msg = N_("-mvsx and -mno-altivec are incompatible");
4437 else
4438 msg = N_("-mno-altivec disables vsx");
4441 if (msg)
4443 warning (0, msg);
4444 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
4445 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4449 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
4450 the -mcpu setting to enable options that conflict. */
4451 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
4452 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
4453 | OPTION_MASK_ALTIVEC
4454 | OPTION_MASK_VSX)) != 0)
4455 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
4456 | OPTION_MASK_DIRECT_MOVE)
4457 & ~rs6000_isa_flags_explicit);
4459 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4460 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
4462 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
4463 off all of the options that depend on those flags. */
4464 ignore_masks = rs6000_disable_incompatible_switches ();
4466 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
4467 unless the user explicitly used the -mno-<option> to disable the code. */
4468 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_DFORM_SCALAR
4469 || TARGET_P9_DFORM_VECTOR || TARGET_P9_DFORM_BOTH > 0)
4470 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
4471 else if (TARGET_P9_MINMAX)
4473 if (have_cpu)
4475 if (cpu_index == PROCESSOR_POWER9)
4477 /* legacy behavior: allow -mcpu-power9 with certain
4478 capabilities explicitly disabled. */
4479 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
4480 /* However, reject this automatic fix if certain
4481 capabilities required for TARGET_P9_MINMAX support
4482 have been explicitly disabled. */
4483 if (((OPTION_MASK_VSX | OPTION_MASK_UPPER_REGS_SF
4484 | OPTION_MASK_UPPER_REGS_DF) & rs6000_isa_flags)
4485 != (OPTION_MASK_VSX | OPTION_MASK_UPPER_REGS_SF
4486 | OPTION_MASK_UPPER_REGS_DF))
4487 error ("-mpower9-minmax incompatible with explicitly disabled options");
4489 else
4490 error ("Power9 target option is incompatible with -mcpu=<xxx> for "
4491 "<xxx> less than power9");
4493 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
4494 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
4495 & rs6000_isa_flags_explicit))
4496 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
4497 were explicitly cleared. */
4498 error ("-mpower9-minmax incompatible with explicitly disabled options");
4499 else
4500 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
4502 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
4503 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
4504 else if (TARGET_VSX)
4505 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
4506 else if (TARGET_POPCNTD)
4507 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
4508 else if (TARGET_DFP)
4509 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
4510 else if (TARGET_CMPB)
4511 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
4512 else if (TARGET_FPRND)
4513 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
4514 else if (TARGET_POPCNTB)
4515 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
4516 else if (TARGET_ALTIVEC)
4517 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
4519 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
4521 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
4522 error ("-mcrypto requires -maltivec");
4523 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
4526 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
4528 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
4529 error ("-mdirect-move requires -mvsx");
4530 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
4533 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
4535 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4536 error ("-mpower8-vector requires -maltivec");
4537 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4540 if (TARGET_P8_VECTOR && !TARGET_VSX)
4542 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4543 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
4544 error ("-mpower8-vector requires -mvsx");
4545 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
4547 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4548 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4549 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4551 else
4553 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
4554 not explicit. */
4555 rs6000_isa_flags |= OPTION_MASK_VSX;
4556 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4560 if (TARGET_VSX_TIMODE && !TARGET_VSX)
4562 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
4563 error ("-mvsx-timode requires -mvsx");
4564 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
4567 if (TARGET_DFP && !TARGET_HARD_FLOAT)
4569 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
4570 error ("-mhard-dfp requires -mhard-float");
4571 rs6000_isa_flags &= ~OPTION_MASK_DFP;
4574 /* Allow an explicit -mupper-regs to set -mupper-regs-df, -mupper-regs-di,
4575 and -mupper-regs-sf, depending on the cpu, unless the user explicitly also
4576 set the individual option. */
4577 if (TARGET_UPPER_REGS > 0)
4579 if (TARGET_VSX
4580 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
4582 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DF;
4583 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
4585 if (TARGET_VSX
4586 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI))
4588 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DI;
4589 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DI;
4591 if (TARGET_P8_VECTOR
4592 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
4594 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_SF;
4595 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
4598 else if (TARGET_UPPER_REGS == 0)
4600 if (TARGET_VSX
4601 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
4603 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4604 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
4606 if (TARGET_VSX
4607 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI))
4609 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DI;
4610 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DI;
4612 if (TARGET_P8_VECTOR
4613 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
4615 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
4616 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
4620 if (TARGET_UPPER_REGS_DF && !TARGET_VSX)
4622 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4623 error ("-mupper-regs-df requires -mvsx");
4624 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4627 if (TARGET_UPPER_REGS_DI && !TARGET_VSX)
4629 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI)
4630 error ("-mupper-regs-di requires -mvsx");
4631 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DI;
4634 if (TARGET_UPPER_REGS_SF && !TARGET_P8_VECTOR)
4636 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
4637 error ("-mupper-regs-sf requires -mpower8-vector");
4638 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
4641 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
4642 silently turn off quad memory mode. */
4643 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
4645 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4646 warning (0, N_("-mquad-memory requires 64-bit mode"));
4648 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
4649 warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
4651 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
4652 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
4655 /* Non-atomic quad memory load/store are disabled for little endian, since
4656 the words are reversed, but atomic operations can still be done by
4657 swapping the words. */
4658 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
4660 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4661 warning (0, N_("-mquad-memory is not available in little endian mode"));
4663 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
4666 /* Assume if the user asked for normal quad memory instructions, they want
4667 the atomic versions as well, unless they explicity told us not to use quad
4668 word atomic instructions. */
4669 if (TARGET_QUAD_MEMORY
4670 && !TARGET_QUAD_MEMORY_ATOMIC
4671 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
4672 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
4674 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4675 generating power8 instructions. */
4676 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4677 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4678 & OPTION_MASK_P8_FUSION);
4680 /* Setting additional fusion flags turns on base fusion. */
4681 if (!TARGET_P8_FUSION && (TARGET_P8_FUSION_SIGN || TARGET_TOC_FUSION))
4683 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4685 if (TARGET_P8_FUSION_SIGN)
4686 error ("-mpower8-fusion-sign requires -mpower8-fusion");
4688 if (TARGET_TOC_FUSION)
4689 error ("-mtoc-fusion requires -mpower8-fusion");
4691 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4693 else
4694 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4697 /* Power9 fusion is a superset over power8 fusion. */
4698 if (TARGET_P9_FUSION && !TARGET_P8_FUSION)
4700 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4702 /* We prefer to not mention undocumented options in
4703 error messages. However, if users have managed to select
4704 power9-fusion without selecting power8-fusion, they
4705 already know about undocumented flags. */
4706 error ("-mpower9-fusion requires -mpower8-fusion");
4707 rs6000_isa_flags &= ~OPTION_MASK_P9_FUSION;
4709 else
4710 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4713 /* Enable power9 fusion if we are tuning for power9, even if we aren't
4714 generating power9 instructions. */
4715 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_FUSION))
4716 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4717 & OPTION_MASK_P9_FUSION);
4719 /* Power8 does not fuse sign extended loads with the addis. If we are
4720 optimizing at high levels for speed, convert a sign extended load into a
4721 zero extending load, and an explicit sign extension. */
4722 if (TARGET_P8_FUSION
4723 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4724 && optimize_function_for_speed_p (cfun)
4725 && optimize >= 3)
4726 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4728 /* TOC fusion requires 64-bit and medium/large code model. */
4729 if (TARGET_TOC_FUSION && !TARGET_POWERPC64)
4731 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4732 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4733 warning (0, N_("-mtoc-fusion requires 64-bit"));
4736 if (TARGET_TOC_FUSION && (TARGET_CMODEL == CMODEL_SMALL))
4738 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4739 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4740 warning (0, N_("-mtoc-fusion requires medium/large code model"));
4743 /* Turn on -mtoc-fusion by default if p8-fusion and 64-bit medium/large code
4744 model. */
4745 if (TARGET_P8_FUSION && !TARGET_TOC_FUSION && TARGET_POWERPC64
4746 && (TARGET_CMODEL != CMODEL_SMALL)
4747 && !(rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION))
4748 rs6000_isa_flags |= OPTION_MASK_TOC_FUSION;
4750 /* ISA 3.0 vector instructions include ISA 2.07. */
4751 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4753 /* We prefer to not mention undocumented options in
4754 error messages. However, if users have managed to select
4755 power9-vector without selecting power8-vector, they
4756 already know about undocumented flags. */
4757 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
4758 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
4759 error ("-mpower9-vector requires -mpower8-vector");
4760 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
4762 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4763 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4764 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
4766 else
4768 /* OPTION_MASK_P9_VECTOR is explicit and
4769 OPTION_MASK_P8_VECTOR is not explicit. */
4770 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
4771 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4775 /* -mpower9-dform turns on both -mpower9-dform-scalar and
4776 -mpower9-dform-vector. */
4777 if (TARGET_P9_DFORM_BOTH > 0)
4779 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR))
4780 rs6000_isa_flags |= OPTION_MASK_P9_DFORM_VECTOR;
4782 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR))
4783 rs6000_isa_flags |= OPTION_MASK_P9_DFORM_SCALAR;
4785 else if (TARGET_P9_DFORM_BOTH == 0)
4787 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR))
4788 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_VECTOR;
4790 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR))
4791 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4794 /* ISA 3.0 D-form instructions require p9-vector and upper-regs. */
4795 if ((TARGET_P9_DFORM_SCALAR || TARGET_P9_DFORM_VECTOR) && !TARGET_P9_VECTOR)
4797 /* We prefer to not mention undocumented options in
4798 error messages. However, if users have managed to select
4799 power9-dform without selecting power9-vector, they
4800 already know about undocumented flags. */
4801 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR)
4802 && (rs6000_isa_flags_explicit & (OPTION_MASK_P9_DFORM_SCALAR
4803 | OPTION_MASK_P9_DFORM_VECTOR)))
4804 error ("-mpower9-dform requires -mpower9-vector");
4805 else if (rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR)
4807 rs6000_isa_flags &=
4808 ~(OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR);
4809 rs6000_isa_flags_explicit |=
4810 (OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR);
4812 else
4814 /* We know that OPTION_MASK_P9_VECTOR is not explicit and
4815 OPTION_MASK_P9_DFORM_SCALAR or OPTION_MASK_P9_DORM_VECTOR
4816 may be explicit. */
4817 rs6000_isa_flags |= OPTION_MASK_P9_VECTOR;
4818 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
4822 if ((TARGET_P9_DFORM_SCALAR || TARGET_P9_DFORM_VECTOR)
4823 && !TARGET_DIRECT_MOVE)
4825 /* We prefer to not mention undocumented options in
4826 error messages. However, if users have managed to select
4827 power9-dform without selecting direct-move, they
4828 already know about undocumented flags. */
4829 if ((rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
4830 && ((rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR) ||
4831 (rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR) ||
4832 (TARGET_P9_DFORM_BOTH == 1)))
4833 error ("-mpower9-dform, -mpower9-dform-vector, -mpower9-dform-scalar"
4834 " require -mdirect-move");
4835 else if ((rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE) == 0)
4837 rs6000_isa_flags |= OPTION_MASK_DIRECT_MOVE;
4838 rs6000_isa_flags_explicit |= OPTION_MASK_DIRECT_MOVE;
4840 else
4842 rs6000_isa_flags &=
4843 ~(OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR);
4844 rs6000_isa_flags_explicit |=
4845 (OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR);
4849 if (TARGET_P9_DFORM_SCALAR && !TARGET_UPPER_REGS_DF)
4851 /* We prefer to not mention undocumented options in
4852 error messages. However, if users have managed to select
4853 power9-dform without selecting upper-regs-df, they
4854 already know about undocumented flags. */
4855 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4856 error ("-mpower9-dform requires -mupper-regs-df");
4857 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4860 if (TARGET_P9_DFORM_SCALAR && !TARGET_UPPER_REGS_SF)
4862 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
4863 error ("-mpower9-dform requires -mupper-regs-sf");
4864 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4867 /* Enable LRA by default. */
4868 if ((rs6000_isa_flags_explicit & OPTION_MASK_LRA) == 0)
4869 rs6000_isa_flags |= OPTION_MASK_LRA;
4871 /* There have been bugs with -mvsx-timode that don't show up with -mlra,
4872 but do show up with -mno-lra. Given -mlra will become the default once
4873 PR 69847 is fixed, turn off the options with problems by default if
4874 -mno-lra was used, and warn if the user explicitly asked for the option.
4876 Enable -mpower9-dform-vector by default if LRA and other power9 options.
4877 Enable -mvsx-timode by default if LRA and VSX. */
4878 if (!TARGET_LRA)
4880 if (TARGET_VSX_TIMODE)
4882 if ((rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) != 0)
4883 warning (0, "-mvsx-timode might need -mlra");
4885 else
4886 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
4890 else
4892 if (TARGET_VSX && !TARGET_VSX_TIMODE
4893 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) == 0)
4894 rs6000_isa_flags |= OPTION_MASK_VSX_TIMODE;
4897 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4898 support. If we only have ISA 2.06 support, and the user did not specify
4899 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4900 but we don't enable the full vectorization support */
4901 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4902 TARGET_ALLOW_MOVMISALIGN = 1;
4904 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4906 if (TARGET_ALLOW_MOVMISALIGN > 0
4907 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
4908 error ("-mallow-movmisalign requires -mvsx");
4910 TARGET_ALLOW_MOVMISALIGN = 0;
4913 /* Determine when unaligned vector accesses are permitted, and when
4914 they are preferred over masked Altivec loads. Note that if
4915 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4916 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4917 not true. */
4918 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4920 if (!TARGET_VSX)
4922 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4923 error ("-mefficient-unaligned-vsx requires -mvsx");
4925 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4928 else if (!TARGET_ALLOW_MOVMISALIGN)
4930 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4931 error ("-mefficient-unaligned-vsx requires -mallow-movmisalign");
4933 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4937 /* Check whether we should allow small integers into VSX registers. We
4938 require direct move to prevent the register allocator from having to move
4939 variables through memory to do moves. SImode can be used on ISA 2.07,
4940 while HImode and QImode require ISA 3.0. */
4941 if (TARGET_VSX_SMALL_INTEGER
4942 && (!TARGET_DIRECT_MOVE || !TARGET_P8_VECTOR || !TARGET_UPPER_REGS_DI))
4944 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_SMALL_INTEGER)
4945 error ("-mvsx-small-integer requires -mpower8-vector, "
4946 "-mupper-regs-di, and -mdirect-move");
4948 rs6000_isa_flags &= ~OPTION_MASK_VSX_SMALL_INTEGER;
4951 /* Set long double size before the IEEE 128-bit tests. */
4952 if (!global_options_set.x_rs6000_long_double_type_size)
4954 if (main_target_opt != NULL
4955 && (main_target_opt->x_rs6000_long_double_type_size
4956 != RS6000_DEFAULT_LONG_DOUBLE_SIZE))
4957 error ("target attribute or pragma changes long double size");
4958 else
4959 rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE;
4962 /* Set -mabi=ieeelongdouble on some old targets. Note, AIX and Darwin
4963 explicitly redefine TARGET_IEEEQUAD to 0, so those systems will not
4964 pick up this default. */
4965 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
4966 if (!global_options_set.x_rs6000_ieeequad)
4967 rs6000_ieeequad = 1;
4968 #endif
4970 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4971 sytems, but don't enable the __float128 keyword. */
4972 if (TARGET_VSX && TARGET_LONG_DOUBLE_128
4973 && (TARGET_FLOAT128_ENABLE_TYPE || TARGET_IEEEQUAD)
4974 && ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_TYPE) == 0))
4975 rs6000_isa_flags |= OPTION_MASK_FLOAT128_TYPE;
4977 /* IEEE 128-bit floating point requires VSX support. */
4978 if (!TARGET_VSX)
4980 if (TARGET_FLOAT128_KEYWORD)
4982 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4983 error ("-mfloat128 requires VSX support");
4985 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4986 | OPTION_MASK_FLOAT128_KEYWORD
4987 | OPTION_MASK_FLOAT128_HW);
4990 else if (TARGET_FLOAT128_TYPE)
4992 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_TYPE) != 0)
4993 error ("-mfloat128-type requires VSX support");
4995 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4996 | OPTION_MASK_FLOAT128_KEYWORD
4997 | OPTION_MASK_FLOAT128_HW);
5001 /* -mfloat128 and -mfloat128-hardware internally require the underlying IEEE
5002 128-bit floating point support to be enabled. */
5003 if (!TARGET_FLOAT128_TYPE)
5005 if (TARGET_FLOAT128_KEYWORD)
5007 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
5009 error ("-mfloat128 requires -mfloat128-type");
5010 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
5011 | OPTION_MASK_FLOAT128_KEYWORD
5012 | OPTION_MASK_FLOAT128_HW);
5014 else
5015 rs6000_isa_flags |= OPTION_MASK_FLOAT128_TYPE;
5018 if (TARGET_FLOAT128_HW)
5020 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
5022 error ("-mfloat128-hardware requires -mfloat128-type");
5023 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
5025 else
5026 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
5027 | OPTION_MASK_FLOAT128_KEYWORD
5028 | OPTION_MASK_FLOAT128_HW);
5032 /* If we have -mfloat128-type and full ISA 3.0 support, enable
5033 -mfloat128-hardware by default. However, don't enable the __float128
5034 keyword. If the user explicitly turned on -mfloat128-hardware, enable the
5035 -mfloat128 option as well if it was not already set. */
5036 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW
5037 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
5038 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
5039 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
5041 if (TARGET_FLOAT128_HW
5042 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
5044 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
5045 error ("-mfloat128-hardware requires full ISA 3.0 support");
5047 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
5050 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
5052 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
5053 error ("-mfloat128-hardware requires -m64");
5055 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
5058 if (TARGET_FLOAT128_HW && !TARGET_FLOAT128_KEYWORD
5059 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0
5060 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
5061 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
5063 /* Print the options after updating the defaults. */
5064 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
5065 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
5067 /* E500mc does "better" if we inline more aggressively. Respect the
5068 user's opinion, though. */
5069 if (rs6000_block_move_inline_limit == 0
5070 && (rs6000_cpu == PROCESSOR_PPCE500MC
5071 || rs6000_cpu == PROCESSOR_PPCE500MC64
5072 || rs6000_cpu == PROCESSOR_PPCE5500
5073 || rs6000_cpu == PROCESSOR_PPCE6500))
5074 rs6000_block_move_inline_limit = 128;
5076 /* store_one_arg depends on expand_block_move to handle at least the
5077 size of reg_parm_stack_space. */
5078 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
5079 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
5081 if (global_init_p)
5083 /* If the appropriate debug option is enabled, replace the target hooks
5084 with debug versions that call the real version and then prints
5085 debugging information. */
5086 if (TARGET_DEBUG_COST)
5088 targetm.rtx_costs = rs6000_debug_rtx_costs;
5089 targetm.address_cost = rs6000_debug_address_cost;
5090 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
5093 if (TARGET_DEBUG_ADDR)
5095 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
5096 targetm.legitimize_address = rs6000_debug_legitimize_address;
5097 rs6000_secondary_reload_class_ptr
5098 = rs6000_debug_secondary_reload_class;
5099 rs6000_secondary_memory_needed_ptr
5100 = rs6000_debug_secondary_memory_needed;
5101 rs6000_cannot_change_mode_class_ptr
5102 = rs6000_debug_cannot_change_mode_class;
5103 rs6000_preferred_reload_class_ptr
5104 = rs6000_debug_preferred_reload_class;
5105 rs6000_legitimize_reload_address_ptr
5106 = rs6000_debug_legitimize_reload_address;
5107 rs6000_mode_dependent_address_ptr
5108 = rs6000_debug_mode_dependent_address;
5111 if (rs6000_veclibabi_name)
5113 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
5114 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
5115 else
5117 error ("unknown vectorization library ABI type (%s) for "
5118 "-mveclibabi= switch", rs6000_veclibabi_name);
5119 ret = false;
5124 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
5125 target attribute or pragma which automatically enables both options,
5126 unless the altivec ABI was set. This is set by default for 64-bit, but
5127 not for 32-bit. */
5128 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
5129 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
5130 | OPTION_MASK_FLOAT128_TYPE
5131 | OPTION_MASK_FLOAT128_KEYWORD)
5132 & ~rs6000_isa_flags_explicit);
5134 /* Enable Altivec ABI for AIX -maltivec. */
5135 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
5137 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
5138 error ("target attribute or pragma changes AltiVec ABI");
5139 else
5140 rs6000_altivec_abi = 1;
5143 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
5144 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
5145 be explicitly overridden in either case. */
5146 if (TARGET_ELF)
5148 if (!global_options_set.x_rs6000_altivec_abi
5149 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
5151 if (main_target_opt != NULL &&
5152 !main_target_opt->x_rs6000_altivec_abi)
5153 error ("target attribute or pragma changes AltiVec ABI");
5154 else
5155 rs6000_altivec_abi = 1;
5159 /* Set the Darwin64 ABI as default for 64-bit Darwin.
5160 So far, the only darwin64 targets are also MACH-O. */
5161 if (TARGET_MACHO
5162 && DEFAULT_ABI == ABI_DARWIN
5163 && TARGET_64BIT)
5165 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
5166 error ("target attribute or pragma changes darwin64 ABI");
5167 else
5169 rs6000_darwin64_abi = 1;
5170 /* Default to natural alignment, for better performance. */
5171 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
5175 /* Place FP constants in the constant pool instead of TOC
5176 if section anchors enabled. */
5177 if (flag_section_anchors
5178 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
5179 TARGET_NO_FP_IN_TOC = 1;
5181 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
5182 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
5184 #ifdef SUBTARGET_OVERRIDE_OPTIONS
5185 SUBTARGET_OVERRIDE_OPTIONS;
5186 #endif
5187 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
5188 SUBSUBTARGET_OVERRIDE_OPTIONS;
5189 #endif
5190 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
5191 SUB3TARGET_OVERRIDE_OPTIONS;
5192 #endif
5194 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
5195 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
5197 /* For the E500 family of cores, reset the single/double FP flags to let us
5198 check that they remain constant across attributes or pragmas. Also,
5199 clear a possible request for string instructions, not supported and which
5200 we might have silently queried above for -Os.
5202 For other families, clear ISEL in case it was set implicitly.
5205 switch (rs6000_cpu)
5207 case PROCESSOR_PPC8540:
5208 case PROCESSOR_PPC8548:
5209 case PROCESSOR_PPCE500MC:
5210 case PROCESSOR_PPCE500MC64:
5211 case PROCESSOR_PPCE5500:
5212 case PROCESSOR_PPCE6500:
5214 rs6000_single_float = TARGET_E500_SINGLE || TARGET_E500_DOUBLE;
5215 rs6000_double_float = TARGET_E500_DOUBLE;
5217 rs6000_isa_flags &= ~OPTION_MASK_STRING;
5219 break;
5221 default:
5223 if (have_cpu && !(rs6000_isa_flags_explicit & OPTION_MASK_ISEL))
5224 rs6000_isa_flags &= ~OPTION_MASK_ISEL;
5226 break;
5229 if (main_target_opt)
5231 if (main_target_opt->x_rs6000_single_float != rs6000_single_float)
5232 error ("target attribute or pragma changes single precision floating "
5233 "point");
5234 if (main_target_opt->x_rs6000_double_float != rs6000_double_float)
5235 error ("target attribute or pragma changes double precision floating "
5236 "point");
5239 /* Detect invalid option combinations with E500. */
5240 CHECK_E500_OPTIONS;
5242 rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4
5243 && rs6000_cpu != PROCESSOR_POWER5
5244 && rs6000_cpu != PROCESSOR_POWER6
5245 && rs6000_cpu != PROCESSOR_POWER7
5246 && rs6000_cpu != PROCESSOR_POWER8
5247 && rs6000_cpu != PROCESSOR_POWER9
5248 && rs6000_cpu != PROCESSOR_PPCA2
5249 && rs6000_cpu != PROCESSOR_CELL
5250 && rs6000_cpu != PROCESSOR_PPC476);
5251 rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
5252 || rs6000_cpu == PROCESSOR_POWER5
5253 || rs6000_cpu == PROCESSOR_POWER7
5254 || rs6000_cpu == PROCESSOR_POWER8);
5255 rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
5256 || rs6000_cpu == PROCESSOR_POWER5
5257 || rs6000_cpu == PROCESSOR_POWER6
5258 || rs6000_cpu == PROCESSOR_POWER7
5259 || rs6000_cpu == PROCESSOR_POWER8
5260 || rs6000_cpu == PROCESSOR_POWER9
5261 || rs6000_cpu == PROCESSOR_PPCE500MC
5262 || rs6000_cpu == PROCESSOR_PPCE500MC64
5263 || rs6000_cpu == PROCESSOR_PPCE5500
5264 || rs6000_cpu == PROCESSOR_PPCE6500);
5266 /* Allow debug switches to override the above settings. These are set to -1
5267 in powerpcspe.opt to indicate the user hasn't directly set the switch. */
5268 if (TARGET_ALWAYS_HINT >= 0)
5269 rs6000_always_hint = TARGET_ALWAYS_HINT;
5271 if (TARGET_SCHED_GROUPS >= 0)
5272 rs6000_sched_groups = TARGET_SCHED_GROUPS;
5274 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
5275 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
5277 rs6000_sched_restricted_insns_priority
5278 = (rs6000_sched_groups ? 1 : 0);
5280 /* Handle -msched-costly-dep option. */
5281 rs6000_sched_costly_dep
5282 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
5284 if (rs6000_sched_costly_dep_str)
5286 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
5287 rs6000_sched_costly_dep = no_dep_costly;
5288 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
5289 rs6000_sched_costly_dep = all_deps_costly;
5290 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
5291 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
5292 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
5293 rs6000_sched_costly_dep = store_to_load_dep_costly;
5294 else
5295 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
5296 atoi (rs6000_sched_costly_dep_str));
5299 /* Handle -minsert-sched-nops option. */
5300 rs6000_sched_insert_nops
5301 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
5303 if (rs6000_sched_insert_nops_str)
5305 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
5306 rs6000_sched_insert_nops = sched_finish_none;
5307 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
5308 rs6000_sched_insert_nops = sched_finish_pad_groups;
5309 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
5310 rs6000_sched_insert_nops = sched_finish_regroup_exact;
5311 else
5312 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
5313 atoi (rs6000_sched_insert_nops_str));
5316 /* Handle stack protector */
5317 if (!global_options_set.x_rs6000_stack_protector_guard)
5318 #ifdef TARGET_THREAD_SSP_OFFSET
5319 rs6000_stack_protector_guard = SSP_TLS;
5320 #else
5321 rs6000_stack_protector_guard = SSP_GLOBAL;
5322 #endif
5324 #ifdef TARGET_THREAD_SSP_OFFSET
5325 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
5326 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
5327 #endif
5329 if (global_options_set.x_rs6000_stack_protector_guard_offset_str)
5331 char *endp;
5332 const char *str = rs6000_stack_protector_guard_offset_str;
5334 errno = 0;
5335 long offset = strtol (str, &endp, 0);
5336 if (!*str || *endp || errno)
5337 error ("%qs is not a valid number "
5338 "in -mstack-protector-guard-offset=", str);
5340 if (!IN_RANGE (offset, -0x8000, 0x7fff)
5341 || (TARGET_64BIT && (offset & 3)))
5342 error ("%qs is not a valid offset "
5343 "in -mstack-protector-guard-offset=", str);
5345 rs6000_stack_protector_guard_offset = offset;
5348 if (global_options_set.x_rs6000_stack_protector_guard_reg_str)
5350 const char *str = rs6000_stack_protector_guard_reg_str;
5351 int reg = decode_reg_name (str);
5353 if (!IN_RANGE (reg, 1, 31))
5354 error ("%qs is not a valid base register "
5355 "in -mstack-protector-guard-reg=", str);
5357 rs6000_stack_protector_guard_reg = reg;
5360 if (rs6000_stack_protector_guard == SSP_TLS
5361 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
5362 error ("-mstack-protector-guard=tls needs a valid base register");
5364 if (global_init_p)
5366 #ifdef TARGET_REGNAMES
5367 /* If the user desires alternate register names, copy in the
5368 alternate names now. */
5369 if (TARGET_REGNAMES)
5370 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
5371 #endif
5373 /* Set aix_struct_return last, after the ABI is determined.
5374 If -maix-struct-return or -msvr4-struct-return was explicitly
5375 used, don't override with the ABI default. */
5376 if (!global_options_set.x_aix_struct_return)
5377 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
5379 #if 0
5380 /* IBM XL compiler defaults to unsigned bitfields. */
5381 if (TARGET_XL_COMPAT)
5382 flag_signed_bitfields = 0;
5383 #endif
5385 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
5386 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
5388 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
5390 /* We can only guarantee the availability of DI pseudo-ops when
5391 assembling for 64-bit targets. */
5392 if (!TARGET_64BIT)
5394 targetm.asm_out.aligned_op.di = NULL;
5395 targetm.asm_out.unaligned_op.di = NULL;
5399 /* Set branch target alignment, if not optimizing for size. */
5400 if (!optimize_size)
5402 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
5403 aligned 8byte to avoid misprediction by the branch predictor. */
5404 if (rs6000_cpu == PROCESSOR_TITAN
5405 || rs6000_cpu == PROCESSOR_CELL)
5407 if (align_functions <= 0)
5408 align_functions = 8;
5409 if (align_jumps <= 0)
5410 align_jumps = 8;
5411 if (align_loops <= 0)
5412 align_loops = 8;
5414 if (rs6000_align_branch_targets)
5416 if (align_functions <= 0)
5417 align_functions = 16;
5418 if (align_jumps <= 0)
5419 align_jumps = 16;
5420 if (align_loops <= 0)
5422 can_override_loop_align = 1;
5423 align_loops = 16;
5426 if (align_jumps_max_skip <= 0)
5427 align_jumps_max_skip = 15;
5428 if (align_loops_max_skip <= 0)
5429 align_loops_max_skip = 15;
5432 /* Arrange to save and restore machine status around nested functions. */
5433 init_machine_status = rs6000_init_machine_status;
5435 /* We should always be splitting complex arguments, but we can't break
5436 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
5437 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
5438 targetm.calls.split_complex_arg = NULL;
5440 /* The AIX and ELFv1 ABIs define standard function descriptors. */
5441 if (DEFAULT_ABI == ABI_AIX)
5442 targetm.calls.custom_function_descriptors = 0;
5445 /* Initialize rs6000_cost with the appropriate target costs. */
5446 if (optimize_size)
5447 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
5448 else
5449 switch (rs6000_cpu)
5451 case PROCESSOR_RS64A:
5452 rs6000_cost = &rs64a_cost;
5453 break;
5455 case PROCESSOR_MPCCORE:
5456 rs6000_cost = &mpccore_cost;
5457 break;
5459 case PROCESSOR_PPC403:
5460 rs6000_cost = &ppc403_cost;
5461 break;
5463 case PROCESSOR_PPC405:
5464 rs6000_cost = &ppc405_cost;
5465 break;
5467 case PROCESSOR_PPC440:
5468 rs6000_cost = &ppc440_cost;
5469 break;
5471 case PROCESSOR_PPC476:
5472 rs6000_cost = &ppc476_cost;
5473 break;
5475 case PROCESSOR_PPC601:
5476 rs6000_cost = &ppc601_cost;
5477 break;
5479 case PROCESSOR_PPC603:
5480 rs6000_cost = &ppc603_cost;
5481 break;
5483 case PROCESSOR_PPC604:
5484 rs6000_cost = &ppc604_cost;
5485 break;
5487 case PROCESSOR_PPC604e:
5488 rs6000_cost = &ppc604e_cost;
5489 break;
5491 case PROCESSOR_PPC620:
5492 rs6000_cost = &ppc620_cost;
5493 break;
5495 case PROCESSOR_PPC630:
5496 rs6000_cost = &ppc630_cost;
5497 break;
5499 case PROCESSOR_CELL:
5500 rs6000_cost = &ppccell_cost;
5501 break;
5503 case PROCESSOR_PPC750:
5504 case PROCESSOR_PPC7400:
5505 rs6000_cost = &ppc750_cost;
5506 break;
5508 case PROCESSOR_PPC7450:
5509 rs6000_cost = &ppc7450_cost;
5510 break;
5512 case PROCESSOR_PPC8540:
5513 case PROCESSOR_PPC8548:
5514 rs6000_cost = &ppc8540_cost;
5515 break;
5517 case PROCESSOR_PPCE300C2:
5518 case PROCESSOR_PPCE300C3:
5519 rs6000_cost = &ppce300c2c3_cost;
5520 break;
5522 case PROCESSOR_PPCE500MC:
5523 rs6000_cost = &ppce500mc_cost;
5524 break;
5526 case PROCESSOR_PPCE500MC64:
5527 rs6000_cost = &ppce500mc64_cost;
5528 break;
5530 case PROCESSOR_PPCE5500:
5531 rs6000_cost = &ppce5500_cost;
5532 break;
5534 case PROCESSOR_PPCE6500:
5535 rs6000_cost = &ppce6500_cost;
5536 break;
5538 case PROCESSOR_TITAN:
5539 rs6000_cost = &titan_cost;
5540 break;
5542 case PROCESSOR_POWER4:
5543 case PROCESSOR_POWER5:
5544 rs6000_cost = &power4_cost;
5545 break;
5547 case PROCESSOR_POWER6:
5548 rs6000_cost = &power6_cost;
5549 break;
5551 case PROCESSOR_POWER7:
5552 rs6000_cost = &power7_cost;
5553 break;
5555 case PROCESSOR_POWER8:
5556 rs6000_cost = &power8_cost;
5557 break;
5559 case PROCESSOR_POWER9:
5560 rs6000_cost = &power9_cost;
5561 break;
5563 case PROCESSOR_PPCA2:
5564 rs6000_cost = &ppca2_cost;
5565 break;
5567 default:
5568 gcc_unreachable ();
5571 if (global_init_p)
5573 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
5574 rs6000_cost->simultaneous_prefetches,
5575 global_options.x_param_values,
5576 global_options_set.x_param_values);
5577 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
5578 global_options.x_param_values,
5579 global_options_set.x_param_values);
5580 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
5581 rs6000_cost->cache_line_size,
5582 global_options.x_param_values,
5583 global_options_set.x_param_values);
5584 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
5585 global_options.x_param_values,
5586 global_options_set.x_param_values);
5588 /* Increase loop peeling limits based on performance analysis. */
5589 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
5590 global_options.x_param_values,
5591 global_options_set.x_param_values);
5592 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
5593 global_options.x_param_values,
5594 global_options_set.x_param_values);
5596 /* Use the 'model' -fsched-pressure algorithm by default. */
5597 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM,
5598 SCHED_PRESSURE_MODEL,
5599 global_options.x_param_values,
5600 global_options_set.x_param_values);
5602 /* If using typedef char *va_list, signal that
5603 __builtin_va_start (&ap, 0) can be optimized to
5604 ap = __builtin_next_arg (0). */
5605 if (DEFAULT_ABI != ABI_V4)
5606 targetm.expand_builtin_va_start = NULL;
5609 /* Set up single/double float flags.
5610 If TARGET_HARD_FLOAT is set, but neither single or double is set,
5611 then set both flags. */
5612 if (TARGET_HARD_FLOAT && TARGET_FPRS
5613 && rs6000_single_float == 0 && rs6000_double_float == 0)
5614 rs6000_single_float = rs6000_double_float = 1;
5616 /* If not explicitly specified via option, decide whether to generate indexed
5617 load/store instructions. A value of -1 indicates that the
5618 initial value of this variable has not been overwritten. During
5619 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
5620 if (TARGET_AVOID_XFORM == -1)
5621 /* Avoid indexed addressing when targeting Power6 in order to avoid the
5622 DERAT mispredict penalty. However the LVE and STVE altivec instructions
5623 need indexed accesses and the type used is the scalar type of the element
5624 being loaded or stored. */
5625 TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB
5626 && !TARGET_ALTIVEC);
5628 /* Set the -mrecip options. */
5629 if (rs6000_recip_name)
5631 char *p = ASTRDUP (rs6000_recip_name);
5632 char *q;
5633 unsigned int mask, i;
5634 bool invert;
5636 while ((q = strtok (p, ",")) != NULL)
5638 p = NULL;
5639 if (*q == '!')
5641 invert = true;
5642 q++;
5644 else
5645 invert = false;
5647 if (!strcmp (q, "default"))
5648 mask = ((TARGET_RECIP_PRECISION)
5649 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
5650 else
5652 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
5653 if (!strcmp (q, recip_options[i].string))
5655 mask = recip_options[i].mask;
5656 break;
5659 if (i == ARRAY_SIZE (recip_options))
5661 error ("unknown option for -mrecip=%s", q);
5662 invert = false;
5663 mask = 0;
5664 ret = false;
5668 if (invert)
5669 rs6000_recip_control &= ~mask;
5670 else
5671 rs6000_recip_control |= mask;
5675 /* Set the builtin mask of the various options used that could affect which
5676 builtins were used. In the past we used target_flags, but we've run out
5677 of bits, and some options like SPE and PAIRED are no longer in
5678 target_flags. */
5679 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
5680 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
5681 rs6000_print_builtin_options (stderr, 0, "builtin mask",
5682 rs6000_builtin_mask);
5684 /* Initialize all of the registers. */
5685 rs6000_init_hard_regno_mode_ok (global_init_p);
5687 /* Save the initial options in case the user does function specific options */
5688 if (global_init_p)
5689 target_option_default_node = target_option_current_node
5690 = build_target_option_node (&global_options);
5692 /* If not explicitly specified via option, decide whether to generate the
5693 extra blr's required to preserve the link stack on some cpus (eg, 476). */
5694 if (TARGET_LINK_STACK == -1)
5695 SET_TARGET_LINK_STACK (rs6000_cpu == PROCESSOR_PPC476 && flag_pic);
5697 return ret;
5700 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
5701 define the target cpu type. */
5703 static void
5704 rs6000_option_override (void)
5706 (void) rs6000_option_override_internal (true);
5710 /* Implement targetm.vectorize.builtin_mask_for_load. */
5711 static tree
5712 rs6000_builtin_mask_for_load (void)
5714 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
5715 if ((TARGET_ALTIVEC && !TARGET_VSX)
5716 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
5717 return altivec_builtin_mask_for_load;
5718 else
5719 return 0;
5722 /* Implement LOOP_ALIGN. */
5724 rs6000_loop_align (rtx label)
5726 basic_block bb;
5727 int ninsns;
5729 /* Don't override loop alignment if -falign-loops was specified. */
5730 if (!can_override_loop_align)
5731 return align_loops_log;
5733 bb = BLOCK_FOR_INSN (label);
5734 ninsns = num_loop_insns(bb->loop_father);
5736 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
5737 if (ninsns > 4 && ninsns <= 8
5738 && (rs6000_cpu == PROCESSOR_POWER4
5739 || rs6000_cpu == PROCESSOR_POWER5
5740 || rs6000_cpu == PROCESSOR_POWER6
5741 || rs6000_cpu == PROCESSOR_POWER7
5742 || rs6000_cpu == PROCESSOR_POWER8
5743 || rs6000_cpu == PROCESSOR_POWER9))
5744 return 5;
5745 else
5746 return align_loops_log;
5749 /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
5750 static int
5751 rs6000_loop_align_max_skip (rtx_insn *label)
5753 return (1 << rs6000_loop_align (label)) - 1;
5756 /* Return true iff, data reference of TYPE can reach vector alignment (16)
5757 after applying N number of iterations. This routine does not determine
5758 how may iterations are required to reach desired alignment. */
5760 static bool
5761 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
5763 if (is_packed)
5764 return false;
5766 if (TARGET_32BIT)
5768 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
5769 return true;
5771 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
5772 return true;
5774 return false;
5776 else
5778 if (TARGET_MACHO)
5779 return false;
5781 /* Assuming that all other types are naturally aligned. CHECKME! */
5782 return true;
5786 /* Return true if the vector misalignment factor is supported by the
5787 target. */
5788 static bool
5789 rs6000_builtin_support_vector_misalignment (machine_mode mode,
5790 const_tree type,
5791 int misalignment,
5792 bool is_packed)
5794 if (TARGET_VSX)
5796 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5797 return true;
5799 /* Return if movmisalign pattern is not supported for this mode. */
5800 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
5801 return false;
5803 if (misalignment == -1)
5805 /* Misalignment factor is unknown at compile time but we know
5806 it's word aligned. */
5807 if (rs6000_vector_alignment_reachable (type, is_packed))
5809 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
5811 if (element_size == 64 || element_size == 32)
5812 return true;
5815 return false;
5818 /* VSX supports word-aligned vector. */
5819 if (misalignment % 4 == 0)
5820 return true;
5822 return false;
5825 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5826 static int
5827 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5828 tree vectype, int misalign)
5830 unsigned elements;
5831 tree elem_type;
5833 switch (type_of_cost)
5835 case scalar_stmt:
5836 case scalar_load:
5837 case scalar_store:
5838 case vector_stmt:
5839 case vector_load:
5840 case vector_store:
5841 case vec_to_scalar:
5842 case scalar_to_vec:
5843 case cond_branch_not_taken:
5844 return 1;
5846 case vec_perm:
5847 if (TARGET_VSX)
5848 return 3;
5849 else
5850 return 1;
5852 case vec_promote_demote:
5853 if (TARGET_VSX)
5854 return 4;
5855 else
5856 return 1;
5858 case cond_branch_taken:
5859 return 3;
5861 case unaligned_load:
5862 if (TARGET_P9_VECTOR)
5863 return 3;
5865 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5866 return 1;
5868 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5870 elements = TYPE_VECTOR_SUBPARTS (vectype);
5871 if (elements == 2)
5872 /* Double word aligned. */
5873 return 2;
5875 if (elements == 4)
5877 switch (misalign)
5879 case 8:
5880 /* Double word aligned. */
5881 return 2;
5883 case -1:
5884 /* Unknown misalignment. */
5885 case 4:
5886 case 12:
5887 /* Word aligned. */
5888 return 22;
5890 default:
5891 gcc_unreachable ();
5896 if (TARGET_ALTIVEC)
5897 /* Misaligned loads are not supported. */
5898 gcc_unreachable ();
5900 return 2;
5902 case unaligned_store:
5903 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5904 return 1;
5906 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5908 elements = TYPE_VECTOR_SUBPARTS (vectype);
5909 if (elements == 2)
5910 /* Double word aligned. */
5911 return 2;
5913 if (elements == 4)
5915 switch (misalign)
5917 case 8:
5918 /* Double word aligned. */
5919 return 2;
5921 case -1:
5922 /* Unknown misalignment. */
5923 case 4:
5924 case 12:
5925 /* Word aligned. */
5926 return 23;
5928 default:
5929 gcc_unreachable ();
5934 if (TARGET_ALTIVEC)
5935 /* Misaligned stores are not supported. */
5936 gcc_unreachable ();
5938 return 2;
5940 case vec_construct:
5941 /* This is a rough approximation assuming non-constant elements
5942 constructed into a vector via element insertion. FIXME:
5943 vec_construct is not granular enough for uniformly good
5944 decisions. If the initialization is a splat, this is
5945 cheaper than we estimate. Improve this someday. */
5946 elem_type = TREE_TYPE (vectype);
5947 /* 32-bit vectors loaded into registers are stored as double
5948 precision, so we need 2 permutes, 2 converts, and 1 merge
5949 to construct a vector of short floats from them. */
5950 if (SCALAR_FLOAT_TYPE_P (elem_type)
5951 && TYPE_PRECISION (elem_type) == 32)
5952 return 5;
5953 /* On POWER9, integer vector types are built up in GPRs and then
5954 use a direct move (2 cycles). For POWER8 this is even worse,
5955 as we need two direct moves and a merge, and the direct moves
5956 are five cycles. */
5957 else if (INTEGRAL_TYPE_P (elem_type))
5959 if (TARGET_P9_VECTOR)
5960 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
5961 else
5962 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 11;
5964 else
5965 /* V2DFmode doesn't need a direct move. */
5966 return 2;
5968 default:
5969 gcc_unreachable ();
5973 /* Implement targetm.vectorize.preferred_simd_mode. */
5975 static machine_mode
5976 rs6000_preferred_simd_mode (scalar_mode mode)
5978 if (TARGET_VSX)
5979 switch (mode)
5981 case E_DFmode:
5982 return V2DFmode;
5983 default:;
5985 if (TARGET_ALTIVEC || TARGET_VSX)
5986 switch (mode)
5988 case E_SFmode:
5989 return V4SFmode;
5990 case E_TImode:
5991 return V1TImode;
5992 case E_DImode:
5993 return V2DImode;
5994 case E_SImode:
5995 return V4SImode;
5996 case E_HImode:
5997 return V8HImode;
5998 case E_QImode:
5999 return V16QImode;
6000 default:;
6002 if (TARGET_SPE)
6003 switch (mode)
6005 case E_SFmode:
6006 return V2SFmode;
6007 case E_SImode:
6008 return V2SImode;
6009 default:;
6011 if (TARGET_PAIRED_FLOAT
6012 && mode == SFmode)
6013 return V2SFmode;
6014 return word_mode;
6017 typedef struct _rs6000_cost_data
6019 struct loop *loop_info;
6020 unsigned cost[3];
6021 } rs6000_cost_data;
6023 /* Test for likely overcommitment of vector hardware resources. If a
6024 loop iteration is relatively large, and too large a percentage of
6025 instructions in the loop are vectorized, the cost model may not
6026 adequately reflect delays from unavailable vector resources.
6027 Penalize the loop body cost for this case. */
6029 static void
6030 rs6000_density_test (rs6000_cost_data *data)
6032 const int DENSITY_PCT_THRESHOLD = 85;
6033 const int DENSITY_SIZE_THRESHOLD = 70;
6034 const int DENSITY_PENALTY = 10;
6035 struct loop *loop = data->loop_info;
6036 basic_block *bbs = get_loop_body (loop);
6037 int nbbs = loop->num_nodes;
6038 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
6039 int i, density_pct;
6041 for (i = 0; i < nbbs; i++)
6043 basic_block bb = bbs[i];
6044 gimple_stmt_iterator gsi;
6046 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
6048 gimple *stmt = gsi_stmt (gsi);
6049 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6051 if (!STMT_VINFO_RELEVANT_P (stmt_info)
6052 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
6053 not_vec_cost++;
6057 free (bbs);
6058 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
6060 if (density_pct > DENSITY_PCT_THRESHOLD
6061 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
6063 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
6064 if (dump_enabled_p ())
6065 dump_printf_loc (MSG_NOTE, vect_location,
6066 "density %d%%, cost %d exceeds threshold, penalizing "
6067 "loop body cost by %d%%", density_pct,
6068 vec_cost + not_vec_cost, DENSITY_PENALTY);
6072 /* Implement targetm.vectorize.init_cost. */
6074 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
6075 instruction is needed by the vectorization. */
6076 static bool rs6000_vect_nonmem;
6078 static void *
6079 rs6000_init_cost (struct loop *loop_info)
6081 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
6082 data->loop_info = loop_info;
6083 data->cost[vect_prologue] = 0;
6084 data->cost[vect_body] = 0;
6085 data->cost[vect_epilogue] = 0;
6086 rs6000_vect_nonmem = false;
6087 return data;
6090 /* Implement targetm.vectorize.add_stmt_cost. */
6092 static unsigned
6093 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6094 struct _stmt_vec_info *stmt_info, int misalign,
6095 enum vect_cost_model_location where)
6097 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
6098 unsigned retval = 0;
6100 if (flag_vect_cost_model)
6102 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6103 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
6104 misalign);
6105 /* Statements in an inner loop relative to the loop being
6106 vectorized are weighted more heavily. The value here is
6107 arbitrary and could potentially be improved with analysis. */
6108 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6109 count *= 50; /* FIXME. */
6111 retval = (unsigned) (count * stmt_cost);
6112 cost_data->cost[where] += retval;
6114 /* Check whether we're doing something other than just a copy loop.
6115 Not all such loops may be profitably vectorized; see
6116 rs6000_finish_cost. */
6117 if ((kind == vec_to_scalar || kind == vec_perm
6118 || kind == vec_promote_demote || kind == vec_construct
6119 || kind == scalar_to_vec)
6120 || (where == vect_body && kind == vector_stmt))
6121 rs6000_vect_nonmem = true;
6124 return retval;
6127 /* Implement targetm.vectorize.finish_cost. */
6129 static void
6130 rs6000_finish_cost (void *data, unsigned *prologue_cost,
6131 unsigned *body_cost, unsigned *epilogue_cost)
6133 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
6135 if (cost_data->loop_info)
6136 rs6000_density_test (cost_data);
6138 /* Don't vectorize minimum-vectorization-factor, simple copy loops
6139 that require versioning for any reason. The vectorization is at
6140 best a wash inside the loop, and the versioning checks make
6141 profitability highly unlikely and potentially quite harmful. */
6142 if (cost_data->loop_info)
6144 loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info);
6145 if (!rs6000_vect_nonmem
6146 && LOOP_VINFO_VECT_FACTOR (vec_info) == 2
6147 && LOOP_REQUIRES_VERSIONING (vec_info))
6148 cost_data->cost[vect_body] += 10000;
6151 *prologue_cost = cost_data->cost[vect_prologue];
6152 *body_cost = cost_data->cost[vect_body];
6153 *epilogue_cost = cost_data->cost[vect_epilogue];
6156 /* Implement targetm.vectorize.destroy_cost_data. */
6158 static void
6159 rs6000_destroy_cost_data (void *data)
6161 free (data);
6164 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
6165 library with vectorized intrinsics. */
6167 static tree
6168 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
6169 tree type_in)
6171 char name[32];
6172 const char *suffix = NULL;
6173 tree fntype, new_fndecl, bdecl = NULL_TREE;
6174 int n_args = 1;
6175 const char *bname;
6176 machine_mode el_mode, in_mode;
6177 int n, in_n;
6179 /* Libmass is suitable for unsafe math only as it does not correctly support
6180 parts of IEEE with the required precision such as denormals. Only support
6181 it if we have VSX to use the simd d2 or f4 functions.
6182 XXX: Add variable length support. */
6183 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
6184 return NULL_TREE;
6186 el_mode = TYPE_MODE (TREE_TYPE (type_out));
6187 n = TYPE_VECTOR_SUBPARTS (type_out);
6188 in_mode = TYPE_MODE (TREE_TYPE (type_in));
6189 in_n = TYPE_VECTOR_SUBPARTS (type_in);
6190 if (el_mode != in_mode
6191 || n != in_n)
6192 return NULL_TREE;
6194 switch (fn)
6196 CASE_CFN_ATAN2:
6197 CASE_CFN_HYPOT:
6198 CASE_CFN_POW:
6199 n_args = 2;
6200 gcc_fallthrough ();
6202 CASE_CFN_ACOS:
6203 CASE_CFN_ACOSH:
6204 CASE_CFN_ASIN:
6205 CASE_CFN_ASINH:
6206 CASE_CFN_ATAN:
6207 CASE_CFN_ATANH:
6208 CASE_CFN_CBRT:
6209 CASE_CFN_COS:
6210 CASE_CFN_COSH:
6211 CASE_CFN_ERF:
6212 CASE_CFN_ERFC:
6213 CASE_CFN_EXP2:
6214 CASE_CFN_EXP:
6215 CASE_CFN_EXPM1:
6216 CASE_CFN_LGAMMA:
6217 CASE_CFN_LOG10:
6218 CASE_CFN_LOG1P:
6219 CASE_CFN_LOG2:
6220 CASE_CFN_LOG:
6221 CASE_CFN_SIN:
6222 CASE_CFN_SINH:
6223 CASE_CFN_SQRT:
6224 CASE_CFN_TAN:
6225 CASE_CFN_TANH:
6226 if (el_mode == DFmode && n == 2)
6228 bdecl = mathfn_built_in (double_type_node, fn);
6229 suffix = "d2"; /* pow -> powd2 */
6231 else if (el_mode == SFmode && n == 4)
6233 bdecl = mathfn_built_in (float_type_node, fn);
6234 suffix = "4"; /* powf -> powf4 */
6236 else
6237 return NULL_TREE;
6238 if (!bdecl)
6239 return NULL_TREE;
6240 break;
6242 default:
6243 return NULL_TREE;
6246 gcc_assert (suffix != NULL);
6247 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
6248 if (!bname)
6249 return NULL_TREE;
6251 strcpy (name, bname + sizeof ("__builtin_") - 1);
6252 strcat (name, suffix);
6254 if (n_args == 1)
6255 fntype = build_function_type_list (type_out, type_in, NULL);
6256 else if (n_args == 2)
6257 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
6258 else
6259 gcc_unreachable ();
6261 /* Build a function declaration for the vectorized function. */
6262 new_fndecl = build_decl (BUILTINS_LOCATION,
6263 FUNCTION_DECL, get_identifier (name), fntype);
6264 TREE_PUBLIC (new_fndecl) = 1;
6265 DECL_EXTERNAL (new_fndecl) = 1;
6266 DECL_IS_NOVOPS (new_fndecl) = 1;
6267 TREE_READONLY (new_fndecl) = 1;
6269 return new_fndecl;
6272 /* Returns a function decl for a vectorized version of the builtin function
6273 with builtin function code FN and the result vector type TYPE, or NULL_TREE
6274 if it is not available. */
6276 static tree
6277 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
6278 tree type_in)
6280 machine_mode in_mode, out_mode;
6281 int in_n, out_n;
6283 if (TARGET_DEBUG_BUILTIN)
6284 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
6285 combined_fn_name (combined_fn (fn)),
6286 GET_MODE_NAME (TYPE_MODE (type_out)),
6287 GET_MODE_NAME (TYPE_MODE (type_in)));
6289 if (TREE_CODE (type_out) != VECTOR_TYPE
6290 || TREE_CODE (type_in) != VECTOR_TYPE
6291 || !TARGET_VECTORIZE_BUILTINS)
6292 return NULL_TREE;
6294 out_mode = TYPE_MODE (TREE_TYPE (type_out));
6295 out_n = TYPE_VECTOR_SUBPARTS (type_out);
6296 in_mode = TYPE_MODE (TREE_TYPE (type_in));
6297 in_n = TYPE_VECTOR_SUBPARTS (type_in);
6299 switch (fn)
6301 CASE_CFN_COPYSIGN:
6302 if (VECTOR_UNIT_VSX_P (V2DFmode)
6303 && out_mode == DFmode && out_n == 2
6304 && in_mode == DFmode && in_n == 2)
6305 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
6306 if (VECTOR_UNIT_VSX_P (V4SFmode)
6307 && out_mode == SFmode && out_n == 4
6308 && in_mode == SFmode && in_n == 4)
6309 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
6310 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
6311 && out_mode == SFmode && out_n == 4
6312 && in_mode == SFmode && in_n == 4)
6313 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
6314 break;
6315 CASE_CFN_CEIL:
6316 if (VECTOR_UNIT_VSX_P (V2DFmode)
6317 && out_mode == DFmode && out_n == 2
6318 && in_mode == DFmode && in_n == 2)
6319 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
6320 if (VECTOR_UNIT_VSX_P (V4SFmode)
6321 && out_mode == SFmode && out_n == 4
6322 && in_mode == SFmode && in_n == 4)
6323 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
6324 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
6325 && out_mode == SFmode && out_n == 4
6326 && in_mode == SFmode && in_n == 4)
6327 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
6328 break;
6329 CASE_CFN_FLOOR:
6330 if (VECTOR_UNIT_VSX_P (V2DFmode)
6331 && out_mode == DFmode && out_n == 2
6332 && in_mode == DFmode && in_n == 2)
6333 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
6334 if (VECTOR_UNIT_VSX_P (V4SFmode)
6335 && out_mode == SFmode && out_n == 4
6336 && in_mode == SFmode && in_n == 4)
6337 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
6338 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
6339 && out_mode == SFmode && out_n == 4
6340 && in_mode == SFmode && in_n == 4)
6341 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
6342 break;
6343 CASE_CFN_FMA:
6344 if (VECTOR_UNIT_VSX_P (V2DFmode)
6345 && out_mode == DFmode && out_n == 2
6346 && in_mode == DFmode && in_n == 2)
6347 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
6348 if (VECTOR_UNIT_VSX_P (V4SFmode)
6349 && out_mode == SFmode && out_n == 4
6350 && in_mode == SFmode && in_n == 4)
6351 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
6352 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
6353 && out_mode == SFmode && out_n == 4
6354 && in_mode == SFmode && in_n == 4)
6355 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
6356 break;
6357 CASE_CFN_TRUNC:
6358 if (VECTOR_UNIT_VSX_P (V2DFmode)
6359 && out_mode == DFmode && out_n == 2
6360 && in_mode == DFmode && in_n == 2)
6361 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
6362 if (VECTOR_UNIT_VSX_P (V4SFmode)
6363 && out_mode == SFmode && out_n == 4
6364 && in_mode == SFmode && in_n == 4)
6365 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
6366 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
6367 && out_mode == SFmode && out_n == 4
6368 && in_mode == SFmode && in_n == 4)
6369 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
6370 break;
6371 CASE_CFN_NEARBYINT:
6372 if (VECTOR_UNIT_VSX_P (V2DFmode)
6373 && flag_unsafe_math_optimizations
6374 && out_mode == DFmode && out_n == 2
6375 && in_mode == DFmode && in_n == 2)
6376 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
6377 if (VECTOR_UNIT_VSX_P (V4SFmode)
6378 && flag_unsafe_math_optimizations
6379 && out_mode == SFmode && out_n == 4
6380 && in_mode == SFmode && in_n == 4)
6381 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
6382 break;
6383 CASE_CFN_RINT:
6384 if (VECTOR_UNIT_VSX_P (V2DFmode)
6385 && !flag_trapping_math
6386 && out_mode == DFmode && out_n == 2
6387 && in_mode == DFmode && in_n == 2)
6388 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
6389 if (VECTOR_UNIT_VSX_P (V4SFmode)
6390 && !flag_trapping_math
6391 && out_mode == SFmode && out_n == 4
6392 && in_mode == SFmode && in_n == 4)
6393 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
6394 break;
6395 default:
6396 break;
6399 /* Generate calls to libmass if appropriate. */
6400 if (rs6000_veclib_handler)
6401 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
6403 return NULL_TREE;
6406 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
6408 static tree
6409 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
6410 tree type_in)
6412 machine_mode in_mode, out_mode;
6413 int in_n, out_n;
6415 if (TARGET_DEBUG_BUILTIN)
6416 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
6417 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
6418 GET_MODE_NAME (TYPE_MODE (type_out)),
6419 GET_MODE_NAME (TYPE_MODE (type_in)));
6421 if (TREE_CODE (type_out) != VECTOR_TYPE
6422 || TREE_CODE (type_in) != VECTOR_TYPE
6423 || !TARGET_VECTORIZE_BUILTINS)
6424 return NULL_TREE;
6426 out_mode = TYPE_MODE (TREE_TYPE (type_out));
6427 out_n = TYPE_VECTOR_SUBPARTS (type_out);
6428 in_mode = TYPE_MODE (TREE_TYPE (type_in));
6429 in_n = TYPE_VECTOR_SUBPARTS (type_in);
6431 enum rs6000_builtins fn
6432 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
6433 switch (fn)
6435 case RS6000_BUILTIN_RSQRTF:
6436 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
6437 && out_mode == SFmode && out_n == 4
6438 && in_mode == SFmode && in_n == 4)
6439 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
6440 break;
6441 case RS6000_BUILTIN_RSQRT:
6442 if (VECTOR_UNIT_VSX_P (V2DFmode)
6443 && out_mode == DFmode && out_n == 2
6444 && in_mode == DFmode && in_n == 2)
6445 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
6446 break;
6447 case RS6000_BUILTIN_RECIPF:
6448 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
6449 && out_mode == SFmode && out_n == 4
6450 && in_mode == SFmode && in_n == 4)
6451 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
6452 break;
6453 case RS6000_BUILTIN_RECIP:
6454 if (VECTOR_UNIT_VSX_P (V2DFmode)
6455 && out_mode == DFmode && out_n == 2
6456 && in_mode == DFmode && in_n == 2)
6457 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
6458 break;
6459 default:
6460 break;
6462 return NULL_TREE;
6465 /* Default CPU string for rs6000*_file_start functions. */
6466 static const char *rs6000_default_cpu;
6468 /* Do anything needed at the start of the asm file. */
6470 static void
6471 rs6000_file_start (void)
6473 char buffer[80];
6474 const char *start = buffer;
6475 FILE *file = asm_out_file;
6477 rs6000_default_cpu = TARGET_CPU_DEFAULT;
6479 default_file_start ();
6481 if (flag_verbose_asm)
6483 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
6485 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
6487 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
6488 start = "";
6491 if (global_options_set.x_rs6000_cpu_index)
6493 fprintf (file, "%s -mcpu=%s", start,
6494 processor_target_table[rs6000_cpu_index].name);
6495 start = "";
6498 if (global_options_set.x_rs6000_tune_index)
6500 fprintf (file, "%s -mtune=%s", start,
6501 processor_target_table[rs6000_tune_index].name);
6502 start = "";
6505 if (PPC405_ERRATUM77)
6507 fprintf (file, "%s PPC405CR_ERRATUM77", start);
6508 start = "";
6511 #ifdef USING_ELFOS_H
6512 switch (rs6000_sdata)
6514 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
6515 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
6516 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
6517 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
6520 if (rs6000_sdata && g_switch_value)
6522 fprintf (file, "%s -G %d", start,
6523 g_switch_value);
6524 start = "";
6526 #endif
6528 if (*start == '\0')
6529 putc ('\n', file);
6532 #ifdef USING_ELFOS_H
6533 if (!(rs6000_default_cpu && rs6000_default_cpu[0])
6534 && !global_options_set.x_rs6000_cpu_index)
6536 fputs ("\t.machine ", asm_out_file);
6537 if ((rs6000_isa_flags & OPTION_MASK_MODULO) != 0)
6538 fputs ("power9\n", asm_out_file);
6539 else if ((rs6000_isa_flags & OPTION_MASK_DIRECT_MOVE) != 0)
6540 fputs ("power8\n", asm_out_file);
6541 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTD) != 0)
6542 fputs ("power7\n", asm_out_file);
6543 else if ((rs6000_isa_flags & OPTION_MASK_CMPB) != 0)
6544 fputs ("power6\n", asm_out_file);
6545 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTB) != 0)
6546 fputs ("power5\n", asm_out_file);
6547 else if ((rs6000_isa_flags & OPTION_MASK_MFCRF) != 0)
6548 fputs ("power4\n", asm_out_file);
6549 else if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) != 0)
6550 fputs ("ppc64\n", asm_out_file);
6551 else
6552 fputs ("ppc\n", asm_out_file);
6554 #endif
6556 if (DEFAULT_ABI == ABI_ELFv2)
6557 fprintf (file, "\t.abiversion 2\n");
6561 /* Return nonzero if this function is known to have a null epilogue. */
6564 direct_return (void)
6566 if (reload_completed)
6568 rs6000_stack_t *info = rs6000_stack_info ();
6570 if (info->first_gp_reg_save == 32
6571 && info->first_fp_reg_save == 64
6572 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
6573 && ! info->lr_save_p
6574 && ! info->cr_save_p
6575 && info->vrsave_size == 0
6576 && ! info->push_p)
6577 return 1;
6580 return 0;
6583 /* Return the number of instructions it takes to form a constant in an
6584 integer register. */
6587 num_insns_constant_wide (HOST_WIDE_INT value)
6589 /* signed constant loadable with addi */
6590 if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
6591 return 1;
6593 /* constant loadable with addis */
6594 else if ((value & 0xffff) == 0
6595 && (value >> 31 == -1 || value >> 31 == 0))
6596 return 1;
6598 else if (TARGET_POWERPC64)
6600 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
6601 HOST_WIDE_INT high = value >> 31;
6603 if (high == 0 || high == -1)
6604 return 2;
6606 high >>= 1;
6608 if (low == 0)
6609 return num_insns_constant_wide (high) + 1;
6610 else if (high == 0)
6611 return num_insns_constant_wide (low) + 1;
6612 else
6613 return (num_insns_constant_wide (high)
6614 + num_insns_constant_wide (low) + 1);
6617 else
6618 return 2;
6622 num_insns_constant (rtx op, machine_mode mode)
6624 HOST_WIDE_INT low, high;
6626 switch (GET_CODE (op))
6628 case CONST_INT:
6629 if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1
6630 && rs6000_is_valid_and_mask (op, mode))
6631 return 2;
6632 else
6633 return num_insns_constant_wide (INTVAL (op));
6635 case CONST_WIDE_INT:
6637 int i;
6638 int ins = CONST_WIDE_INT_NUNITS (op) - 1;
6639 for (i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
6640 ins += num_insns_constant_wide (CONST_WIDE_INT_ELT (op, i));
6641 return ins;
6644 case CONST_DOUBLE:
6645 if (mode == SFmode || mode == SDmode)
6647 long l;
6649 if (DECIMAL_FLOAT_MODE_P (mode))
6650 REAL_VALUE_TO_TARGET_DECIMAL32
6651 (*CONST_DOUBLE_REAL_VALUE (op), l);
6652 else
6653 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), l);
6654 return num_insns_constant_wide ((HOST_WIDE_INT) l);
6657 long l[2];
6658 if (DECIMAL_FLOAT_MODE_P (mode))
6659 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (op), l);
6660 else
6661 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (op), l);
6662 high = l[WORDS_BIG_ENDIAN == 0];
6663 low = l[WORDS_BIG_ENDIAN != 0];
6665 if (TARGET_32BIT)
6666 return (num_insns_constant_wide (low)
6667 + num_insns_constant_wide (high));
6668 else
6670 if ((high == 0 && low >= 0)
6671 || (high == -1 && low < 0))
6672 return num_insns_constant_wide (low);
6674 else if (rs6000_is_valid_and_mask (op, mode))
6675 return 2;
6677 else if (low == 0)
6678 return num_insns_constant_wide (high) + 1;
6680 else
6681 return (num_insns_constant_wide (high)
6682 + num_insns_constant_wide (low) + 1);
6685 default:
6686 gcc_unreachable ();
6690 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6691 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6692 corresponding element of the vector, but for V4SFmode and V2SFmode,
6693 the corresponding "float" is interpreted as an SImode integer. */
6695 HOST_WIDE_INT
6696 const_vector_elt_as_int (rtx op, unsigned int elt)
6698 rtx tmp;
6700 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6701 gcc_assert (GET_MODE (op) != V2DImode
6702 && GET_MODE (op) != V2DFmode);
6704 tmp = CONST_VECTOR_ELT (op, elt);
6705 if (GET_MODE (op) == V4SFmode
6706 || GET_MODE (op) == V2SFmode)
6707 tmp = gen_lowpart (SImode, tmp);
6708 return INTVAL (tmp);
6711 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6712 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6713 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6714 all items are set to the same value and contain COPIES replicas of the
6715 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6716 operand and the others are set to the value of the operand's msb. */
6718 static bool
6719 vspltis_constant (rtx op, unsigned step, unsigned copies)
6721 machine_mode mode = GET_MODE (op);
6722 machine_mode inner = GET_MODE_INNER (mode);
6724 unsigned i;
6725 unsigned nunits;
6726 unsigned bitsize;
6727 unsigned mask;
6729 HOST_WIDE_INT val;
6730 HOST_WIDE_INT splat_val;
6731 HOST_WIDE_INT msb_val;
6733 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
6734 return false;
6736 nunits = GET_MODE_NUNITS (mode);
6737 bitsize = GET_MODE_BITSIZE (inner);
6738 mask = GET_MODE_MASK (inner);
6740 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6741 splat_val = val;
6742 msb_val = val >= 0 ? 0 : -1;
6744 /* Construct the value to be splatted, if possible. If not, return 0. */
6745 for (i = 2; i <= copies; i *= 2)
6747 HOST_WIDE_INT small_val;
6748 bitsize /= 2;
6749 small_val = splat_val >> bitsize;
6750 mask >>= bitsize;
6751 if (splat_val != ((HOST_WIDE_INT)
6752 ((unsigned HOST_WIDE_INT) small_val << bitsize)
6753 | (small_val & mask)))
6754 return false;
6755 splat_val = small_val;
6758 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6759 if (EASY_VECTOR_15 (splat_val))
6762 /* Also check if we can splat, and then add the result to itself. Do so if
6763 the value is positive, of if the splat instruction is using OP's mode;
6764 for splat_val < 0, the splat and the add should use the same mode. */
6765 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
6766 && (splat_val >= 0 || (step == 1 && copies == 1)))
6769 /* Also check if are loading up the most significant bit which can be done by
6770 loading up -1 and shifting the value left by -1. */
6771 else if (EASY_VECTOR_MSB (splat_val, inner))
6774 else
6775 return false;
6777 /* Check if VAL is present in every STEP-th element, and the
6778 other elements are filled with its most significant bit. */
6779 for (i = 1; i < nunits; ++i)
6781 HOST_WIDE_INT desired_val;
6782 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6783 if ((i & (step - 1)) == 0)
6784 desired_val = val;
6785 else
6786 desired_val = msb_val;
6788 if (desired_val != const_vector_elt_as_int (op, elt))
6789 return false;
6792 return true;
6795 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6796 instruction, filling in the bottom elements with 0 or -1.
6798 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6799 for the number of zeroes to shift in, or negative for the number of 0xff
6800 bytes to shift in.
6802 OP is a CONST_VECTOR. */
6805 vspltis_shifted (rtx op)
6807 machine_mode mode = GET_MODE (op);
6808 machine_mode inner = GET_MODE_INNER (mode);
6810 unsigned i, j;
6811 unsigned nunits;
6812 unsigned mask;
6814 HOST_WIDE_INT val;
6816 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
6817 return false;
6819 /* We need to create pseudo registers to do the shift, so don't recognize
6820 shift vector constants after reload. */
6821 if (!can_create_pseudo_p ())
6822 return false;
6824 nunits = GET_MODE_NUNITS (mode);
6825 mask = GET_MODE_MASK (inner);
6827 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
6829 /* Check if the value can really be the operand of a vspltis[bhw]. */
6830 if (EASY_VECTOR_15 (val))
6833 /* Also check if we are loading up the most significant bit which can be done
6834 by loading up -1 and shifting the value left by -1. */
6835 else if (EASY_VECTOR_MSB (val, inner))
6838 else
6839 return 0;
6841 /* Check if VAL is present in every STEP-th element until we find elements
6842 that are 0 or all 1 bits. */
6843 for (i = 1; i < nunits; ++i)
6845 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6846 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6848 /* If the value isn't the splat value, check for the remaining elements
6849 being 0/-1. */
6850 if (val != elt_val)
6852 if (elt_val == 0)
6854 for (j = i+1; j < nunits; ++j)
6856 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6857 if (const_vector_elt_as_int (op, elt2) != 0)
6858 return 0;
6861 return (nunits - i) * GET_MODE_SIZE (inner);
6864 else if ((elt_val & mask) == mask)
6866 for (j = i+1; j < nunits; ++j)
6868 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6869 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6870 return 0;
6873 return -((nunits - i) * GET_MODE_SIZE (inner));
6876 else
6877 return 0;
6881 /* If all elements are equal, we don't need to do VLSDOI. */
6882 return 0;
6886 /* Return true if OP is of the given MODE and can be synthesized
6887 with a vspltisb, vspltish or vspltisw. */
6889 bool
6890 easy_altivec_constant (rtx op, machine_mode mode)
6892 unsigned step, copies;
6894 if (mode == VOIDmode)
6895 mode = GET_MODE (op);
6896 else if (mode != GET_MODE (op))
6897 return false;
6899 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6900 constants. */
6901 if (mode == V2DFmode)
6902 return zero_constant (op, mode);
6904 else if (mode == V2DImode)
6906 if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
6907 || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT)
6908 return false;
6910 if (zero_constant (op, mode))
6911 return true;
6913 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6914 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6915 return true;
6917 return false;
6920 /* V1TImode is a special container for TImode. Ignore for now. */
6921 else if (mode == V1TImode)
6922 return false;
6924 /* Start with a vspltisw. */
6925 step = GET_MODE_NUNITS (mode) / 4;
6926 copies = 1;
6928 if (vspltis_constant (op, step, copies))
6929 return true;
6931 /* Then try with a vspltish. */
6932 if (step == 1)
6933 copies <<= 1;
6934 else
6935 step >>= 1;
6937 if (vspltis_constant (op, step, copies))
6938 return true;
6940 /* And finally a vspltisb. */
6941 if (step == 1)
6942 copies <<= 1;
6943 else
6944 step >>= 1;
6946 if (vspltis_constant (op, step, copies))
6947 return true;
6949 if (vspltis_shifted (op) != 0)
6950 return true;
6952 return false;
6955 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6956 result is OP. Abort if it is not possible. */
6959 gen_easy_altivec_constant (rtx op)
6961 machine_mode mode = GET_MODE (op);
6962 int nunits = GET_MODE_NUNITS (mode);
6963 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6964 unsigned step = nunits / 4;
6965 unsigned copies = 1;
6967 /* Start with a vspltisw. */
6968 if (vspltis_constant (op, step, copies))
6969 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6971 /* Then try with a vspltish. */
6972 if (step == 1)
6973 copies <<= 1;
6974 else
6975 step >>= 1;
6977 if (vspltis_constant (op, step, copies))
6978 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6980 /* And finally a vspltisb. */
6981 if (step == 1)
6982 copies <<= 1;
6983 else
6984 step >>= 1;
6986 if (vspltis_constant (op, step, copies))
6987 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6989 gcc_unreachable ();
6992 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6993 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6995 Return the number of instructions needed (1 or 2) into the address pointed
6996 via NUM_INSNS_PTR.
6998 Return the constant that is being split via CONSTANT_PTR. */
7000 bool
7001 xxspltib_constant_p (rtx op,
7002 machine_mode mode,
7003 int *num_insns_ptr,
7004 int *constant_ptr)
7006 size_t nunits = GET_MODE_NUNITS (mode);
7007 size_t i;
7008 HOST_WIDE_INT value;
7009 rtx element;
7011 /* Set the returned values to out of bound values. */
7012 *num_insns_ptr = -1;
7013 *constant_ptr = 256;
7015 if (!TARGET_P9_VECTOR)
7016 return false;
7018 if (mode == VOIDmode)
7019 mode = GET_MODE (op);
7021 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
7022 return false;
7024 /* Handle (vec_duplicate <constant>). */
7025 if (GET_CODE (op) == VEC_DUPLICATE)
7027 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
7028 && mode != V2DImode)
7029 return false;
7031 element = XEXP (op, 0);
7032 if (!CONST_INT_P (element))
7033 return false;
7035 value = INTVAL (element);
7036 if (!IN_RANGE (value, -128, 127))
7037 return false;
7040 /* Handle (const_vector [...]). */
7041 else if (GET_CODE (op) == CONST_VECTOR)
7043 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
7044 && mode != V2DImode)
7045 return false;
7047 element = CONST_VECTOR_ELT (op, 0);
7048 if (!CONST_INT_P (element))
7049 return false;
7051 value = INTVAL (element);
7052 if (!IN_RANGE (value, -128, 127))
7053 return false;
7055 for (i = 1; i < nunits; i++)
7057 element = CONST_VECTOR_ELT (op, i);
7058 if (!CONST_INT_P (element))
7059 return false;
7061 if (value != INTVAL (element))
7062 return false;
7066 /* Handle integer constants being loaded into the upper part of the VSX
7067 register as a scalar. If the value isn't 0/-1, only allow it if the mode
7068 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
7069 else if (CONST_INT_P (op))
7071 if (!SCALAR_INT_MODE_P (mode))
7072 return false;
7074 value = INTVAL (op);
7075 if (!IN_RANGE (value, -128, 127))
7076 return false;
7078 if (!IN_RANGE (value, -1, 0))
7080 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
7081 return false;
7083 if (EASY_VECTOR_15 (value))
7084 return false;
7088 else
7089 return false;
7091 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
7092 sign extend. Special case 0/-1 to allow getting any VSX register instead
7093 of an Altivec register. */
7094 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
7095 && EASY_VECTOR_15 (value))
7096 return false;
7098 /* Return # of instructions and the constant byte for XXSPLTIB. */
7099 if (mode == V16QImode)
7100 *num_insns_ptr = 1;
7102 else if (IN_RANGE (value, -1, 0))
7103 *num_insns_ptr = 1;
7105 else
7106 *num_insns_ptr = 2;
7108 *constant_ptr = (int) value;
7109 return true;
7112 const char *
7113 output_vec_const_move (rtx *operands)
7115 int cst, cst2, shift;
7116 machine_mode mode;
7117 rtx dest, vec;
7119 dest = operands[0];
7120 vec = operands[1];
7121 mode = GET_MODE (dest);
7123 if (TARGET_VSX)
7125 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
7126 int xxspltib_value = 256;
7127 int num_insns = -1;
7129 if (zero_constant (vec, mode))
7131 if (TARGET_P9_VECTOR)
7132 return "xxspltib %x0,0";
7134 else if (dest_vmx_p)
7135 return "vspltisw %0,0";
7137 else
7138 return "xxlxor %x0,%x0,%x0";
7141 if (all_ones_constant (vec, mode))
7143 if (TARGET_P9_VECTOR)
7144 return "xxspltib %x0,255";
7146 else if (dest_vmx_p)
7147 return "vspltisw %0,-1";
7149 else if (TARGET_P8_VECTOR)
7150 return "xxlorc %x0,%x0,%x0";
7152 else
7153 gcc_unreachable ();
7156 if (TARGET_P9_VECTOR
7157 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
7159 if (num_insns == 1)
7161 operands[2] = GEN_INT (xxspltib_value & 0xff);
7162 return "xxspltib %x0,%2";
7165 return "#";
7169 if (TARGET_ALTIVEC)
7171 rtx splat_vec;
7173 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
7174 if (zero_constant (vec, mode))
7175 return "vspltisw %0,0";
7177 if (all_ones_constant (vec, mode))
7178 return "vspltisw %0,-1";
7180 /* Do we need to construct a value using VSLDOI? */
7181 shift = vspltis_shifted (vec);
7182 if (shift != 0)
7183 return "#";
7185 splat_vec = gen_easy_altivec_constant (vec);
7186 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
7187 operands[1] = XEXP (splat_vec, 0);
7188 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
7189 return "#";
7191 switch (GET_MODE (splat_vec))
7193 case E_V4SImode:
7194 return "vspltisw %0,%1";
7196 case E_V8HImode:
7197 return "vspltish %0,%1";
7199 case E_V16QImode:
7200 return "vspltisb %0,%1";
7202 default:
7203 gcc_unreachable ();
7207 gcc_assert (TARGET_SPE);
7209 /* Vector constant 0 is handled as a splitter of V2SI, and in the
7210 pattern of V1DI, V4HI, and V2SF.
7212 FIXME: We should probably return # and add post reload
7213 splitters for these, but this way is so easy ;-). */
7214 cst = INTVAL (CONST_VECTOR_ELT (vec, 0));
7215 cst2 = INTVAL (CONST_VECTOR_ELT (vec, 1));
7216 operands[1] = CONST_VECTOR_ELT (vec, 0);
7217 operands[2] = CONST_VECTOR_ELT (vec, 1);
7218 if (cst == cst2)
7219 return "li %0,%1\n\tevmergelo %0,%0,%0";
7220 else if (WORDS_BIG_ENDIAN)
7221 return "li %0,%1\n\tevmergelo %0,%0,%0\n\tli %0,%2";
7222 else
7223 return "li %0,%2\n\tevmergelo %0,%0,%0\n\tli %0,%1";
7226 /* Initialize TARGET of vector PAIRED to VALS. */
7228 void
7229 paired_expand_vector_init (rtx target, rtx vals)
7231 machine_mode mode = GET_MODE (target);
7232 int n_elts = GET_MODE_NUNITS (mode);
7233 int n_var = 0;
7234 rtx x, new_rtx, tmp, constant_op, op1, op2;
7235 int i;
7237 for (i = 0; i < n_elts; ++i)
7239 x = XVECEXP (vals, 0, i);
7240 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
7241 ++n_var;
7243 if (n_var == 0)
7245 /* Load from constant pool. */
7246 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
7247 return;
7250 if (n_var == 2)
7252 /* The vector is initialized only with non-constants. */
7253 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, XVECEXP (vals, 0, 0),
7254 XVECEXP (vals, 0, 1));
7256 emit_move_insn (target, new_rtx);
7257 return;
7260 /* One field is non-constant and the other one is a constant. Load the
7261 constant from the constant pool and use ps_merge instruction to
7262 construct the whole vector. */
7263 op1 = XVECEXP (vals, 0, 0);
7264 op2 = XVECEXP (vals, 0, 1);
7266 constant_op = (CONSTANT_P (op1)) ? op1 : op2;
7268 tmp = gen_reg_rtx (GET_MODE (constant_op));
7269 emit_move_insn (tmp, constant_op);
7271 if (CONSTANT_P (op1))
7272 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, tmp, op2);
7273 else
7274 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, op1, tmp);
7276 emit_move_insn (target, new_rtx);
7279 void
7280 paired_expand_vector_move (rtx operands[])
7282 rtx op0 = operands[0], op1 = operands[1];
7284 emit_move_insn (op0, op1);
7287 /* Emit vector compare for code RCODE. DEST is destination, OP1 and
7288 OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two
7289 operands for the relation operation COND. This is a recursive
7290 function. */
7292 static void
7293 paired_emit_vector_compare (enum rtx_code rcode,
7294 rtx dest, rtx op0, rtx op1,
7295 rtx cc_op0, rtx cc_op1)
7297 rtx tmp = gen_reg_rtx (V2SFmode);
7298 rtx tmp1, max, min;
7300 gcc_assert (TARGET_PAIRED_FLOAT);
7301 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
7303 switch (rcode)
7305 case LT:
7306 case LTU:
7307 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
7308 return;
7309 case GE:
7310 case GEU:
7311 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
7312 emit_insn (gen_selv2sf4 (dest, tmp, op0, op1, CONST0_RTX (SFmode)));
7313 return;
7314 case LE:
7315 case LEU:
7316 paired_emit_vector_compare (GE, dest, op0, op1, cc_op1, cc_op0);
7317 return;
7318 case GT:
7319 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
7320 return;
7321 case EQ:
7322 tmp1 = gen_reg_rtx (V2SFmode);
7323 max = gen_reg_rtx (V2SFmode);
7324 min = gen_reg_rtx (V2SFmode);
7325 gen_reg_rtx (V2SFmode);
7327 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
7328 emit_insn (gen_selv2sf4
7329 (max, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
7330 emit_insn (gen_subv2sf3 (tmp, cc_op1, cc_op0));
7331 emit_insn (gen_selv2sf4
7332 (min, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
7333 emit_insn (gen_subv2sf3 (tmp1, min, max));
7334 emit_insn (gen_selv2sf4 (dest, tmp1, op0, op1, CONST0_RTX (SFmode)));
7335 return;
7336 case NE:
7337 paired_emit_vector_compare (EQ, dest, op1, op0, cc_op0, cc_op1);
7338 return;
7339 case UNLE:
7340 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
7341 return;
7342 case UNLT:
7343 paired_emit_vector_compare (LT, dest, op1, op0, cc_op0, cc_op1);
7344 return;
7345 case UNGE:
7346 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
7347 return;
7348 case UNGT:
7349 paired_emit_vector_compare (GT, dest, op1, op0, cc_op0, cc_op1);
7350 return;
7351 default:
7352 gcc_unreachable ();
7355 return;
7358 /* Emit vector conditional expression.
7359 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
7360 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
7363 paired_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
7364 rtx cond, rtx cc_op0, rtx cc_op1)
7366 enum rtx_code rcode = GET_CODE (cond);
7368 if (!TARGET_PAIRED_FLOAT)
7369 return 0;
7371 paired_emit_vector_compare (rcode, dest, op1, op2, cc_op0, cc_op1);
7373 return 1;
7376 /* Initialize vector TARGET to VALS. */
7378 void
7379 rs6000_expand_vector_init (rtx target, rtx vals)
7381 machine_mode mode = GET_MODE (target);
7382 machine_mode inner_mode = GET_MODE_INNER (mode);
7383 int n_elts = GET_MODE_NUNITS (mode);
7384 int n_var = 0, one_var = -1;
7385 bool all_same = true, all_const_zero = true;
7386 rtx x, mem;
7387 int i;
7389 for (i = 0; i < n_elts; ++i)
7391 x = XVECEXP (vals, 0, i);
7392 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
7393 ++n_var, one_var = i;
7394 else if (x != CONST0_RTX (inner_mode))
7395 all_const_zero = false;
7397 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
7398 all_same = false;
7401 if (n_var == 0)
7403 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
7404 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
7405 if ((int_vector_p || TARGET_VSX) && all_const_zero)
7407 /* Zero register. */
7408 emit_move_insn (target, CONST0_RTX (mode));
7409 return;
7411 else if (int_vector_p && easy_vector_constant (const_vec, mode))
7413 /* Splat immediate. */
7414 emit_insn (gen_rtx_SET (target, const_vec));
7415 return;
7417 else
7419 /* Load from constant pool. */
7420 emit_move_insn (target, const_vec);
7421 return;
7425 /* Double word values on VSX can use xxpermdi or lxvdsx. */
7426 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
7428 rtx op[2];
7429 size_t i;
7430 size_t num_elements = all_same ? 1 : 2;
7431 for (i = 0; i < num_elements; i++)
7433 op[i] = XVECEXP (vals, 0, i);
7434 /* Just in case there is a SUBREG with a smaller mode, do a
7435 conversion. */
7436 if (GET_MODE (op[i]) != inner_mode)
7438 rtx tmp = gen_reg_rtx (inner_mode);
7439 convert_move (tmp, op[i], 0);
7440 op[i] = tmp;
7442 /* Allow load with splat double word. */
7443 else if (MEM_P (op[i]))
7445 if (!all_same)
7446 op[i] = force_reg (inner_mode, op[i]);
7448 else if (!REG_P (op[i]))
7449 op[i] = force_reg (inner_mode, op[i]);
7452 if (all_same)
7454 if (mode == V2DFmode)
7455 emit_insn (gen_vsx_splat_v2df (target, op[0]));
7456 else
7457 emit_insn (gen_vsx_splat_v2di (target, op[0]));
7459 else
7461 if (mode == V2DFmode)
7462 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
7463 else
7464 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
7466 return;
7469 /* Special case initializing vector int if we are on 64-bit systems with
7470 direct move or we have the ISA 3.0 instructions. */
7471 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
7472 && TARGET_DIRECT_MOVE_64BIT)
7474 if (all_same)
7476 rtx element0 = XVECEXP (vals, 0, 0);
7477 if (MEM_P (element0))
7478 element0 = rs6000_address_for_fpconvert (element0);
7479 else
7480 element0 = force_reg (SImode, element0);
7482 if (TARGET_P9_VECTOR)
7483 emit_insn (gen_vsx_splat_v4si (target, element0));
7484 else
7486 rtx tmp = gen_reg_rtx (DImode);
7487 emit_insn (gen_zero_extendsidi2 (tmp, element0));
7488 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
7490 return;
7492 else
7494 rtx elements[4];
7495 size_t i;
7497 for (i = 0; i < 4; i++)
7499 elements[i] = XVECEXP (vals, 0, i);
7500 if (!CONST_INT_P (elements[i]) && !REG_P (elements[i]))
7501 elements[i] = copy_to_mode_reg (SImode, elements[i]);
7504 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
7505 elements[2], elements[3]));
7506 return;
7510 /* With single precision floating point on VSX, know that internally single
7511 precision is actually represented as a double, and either make 2 V2DF
7512 vectors, and convert these vectors to single precision, or do one
7513 conversion, and splat the result to the other elements. */
7514 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
7516 if (all_same)
7518 rtx element0 = XVECEXP (vals, 0, 0);
7520 if (TARGET_P9_VECTOR)
7522 if (MEM_P (element0))
7523 element0 = rs6000_address_for_fpconvert (element0);
7525 emit_insn (gen_vsx_splat_v4sf (target, element0));
7528 else
7530 rtx freg = gen_reg_rtx (V4SFmode);
7531 rtx sreg = force_reg (SFmode, element0);
7532 rtx cvt = (TARGET_XSCVDPSPN
7533 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
7534 : gen_vsx_xscvdpsp_scalar (freg, sreg));
7536 emit_insn (cvt);
7537 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
7538 const0_rtx));
7541 else
7543 rtx dbl_even = gen_reg_rtx (V2DFmode);
7544 rtx dbl_odd = gen_reg_rtx (V2DFmode);
7545 rtx flt_even = gen_reg_rtx (V4SFmode);
7546 rtx flt_odd = gen_reg_rtx (V4SFmode);
7547 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
7548 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
7549 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
7550 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
7552 /* Use VMRGEW if we can instead of doing a permute. */
7553 if (TARGET_P8_VECTOR)
7555 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op2));
7556 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op1, op3));
7557 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
7558 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
7559 if (BYTES_BIG_ENDIAN)
7560 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_even, flt_odd));
7561 else
7562 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_odd, flt_even));
7564 else
7566 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
7567 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
7568 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
7569 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
7570 rs6000_expand_extract_even (target, flt_even, flt_odd);
7573 return;
7576 /* Special case initializing vector short/char that are splats if we are on
7577 64-bit systems with direct move. */
7578 if (all_same && TARGET_DIRECT_MOVE_64BIT
7579 && (mode == V16QImode || mode == V8HImode))
7581 rtx op0 = XVECEXP (vals, 0, 0);
7582 rtx di_tmp = gen_reg_rtx (DImode);
7584 if (!REG_P (op0))
7585 op0 = force_reg (GET_MODE_INNER (mode), op0);
7587 if (mode == V16QImode)
7589 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
7590 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
7591 return;
7594 if (mode == V8HImode)
7596 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
7597 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
7598 return;
7602 /* Store value to stack temp. Load vector element. Splat. However, splat
7603 of 64-bit items is not supported on Altivec. */
7604 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
7606 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7607 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
7608 XVECEXP (vals, 0, 0));
7609 x = gen_rtx_UNSPEC (VOIDmode,
7610 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7611 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7612 gen_rtvec (2,
7613 gen_rtx_SET (target, mem),
7614 x)));
7615 x = gen_rtx_VEC_SELECT (inner_mode, target,
7616 gen_rtx_PARALLEL (VOIDmode,
7617 gen_rtvec (1, const0_rtx)));
7618 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
7619 return;
7622 /* One field is non-constant. Load constant then overwrite
7623 varying field. */
7624 if (n_var == 1)
7626 rtx copy = copy_rtx (vals);
7628 /* Load constant part of vector, substitute neighboring value for
7629 varying element. */
7630 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
7631 rs6000_expand_vector_init (target, copy);
7633 /* Insert variable. */
7634 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
7635 return;
7638 /* Construct the vector in memory one field at a time
7639 and load the whole vector. */
7640 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7641 for (i = 0; i < n_elts; i++)
7642 emit_move_insn (adjust_address_nv (mem, inner_mode,
7643 i * GET_MODE_SIZE (inner_mode)),
7644 XVECEXP (vals, 0, i));
7645 emit_move_insn (target, mem);
7648 /* Set field ELT of TARGET to VAL. */
7650 void
7651 rs6000_expand_vector_set (rtx target, rtx val, int elt)
7653 machine_mode mode = GET_MODE (target);
7654 machine_mode inner_mode = GET_MODE_INNER (mode);
7655 rtx reg = gen_reg_rtx (mode);
7656 rtx mask, mem, x;
7657 int width = GET_MODE_SIZE (inner_mode);
7658 int i;
7660 val = force_reg (GET_MODE (val), val);
7662 if (VECTOR_MEM_VSX_P (mode))
7664 rtx insn = NULL_RTX;
7665 rtx elt_rtx = GEN_INT (elt);
7667 if (mode == V2DFmode)
7668 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
7670 else if (mode == V2DImode)
7671 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
7673 else if (TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER
7674 && TARGET_UPPER_REGS_DI && TARGET_POWERPC64)
7676 if (mode == V4SImode)
7677 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
7678 else if (mode == V8HImode)
7679 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
7680 else if (mode == V16QImode)
7681 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
7684 if (insn)
7686 emit_insn (insn);
7687 return;
7691 /* Simplify setting single element vectors like V1TImode. */
7692 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
7694 emit_move_insn (target, gen_lowpart (mode, val));
7695 return;
7698 /* Load single variable value. */
7699 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7700 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
7701 x = gen_rtx_UNSPEC (VOIDmode,
7702 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7703 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7704 gen_rtvec (2,
7705 gen_rtx_SET (reg, mem),
7706 x)));
7708 /* Linear sequence. */
7709 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
7710 for (i = 0; i < 16; ++i)
7711 XVECEXP (mask, 0, i) = GEN_INT (i);
7713 /* Set permute mask to insert element into target. */
7714 for (i = 0; i < width; ++i)
7715 XVECEXP (mask, 0, elt*width + i)
7716 = GEN_INT (i + 0x10);
7717 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
7719 if (BYTES_BIG_ENDIAN)
7720 x = gen_rtx_UNSPEC (mode,
7721 gen_rtvec (3, target, reg,
7722 force_reg (V16QImode, x)),
7723 UNSPEC_VPERM);
7724 else
7726 if (TARGET_P9_VECTOR)
7727 x = gen_rtx_UNSPEC (mode,
7728 gen_rtvec (3, target, reg,
7729 force_reg (V16QImode, x)),
7730 UNSPEC_VPERMR);
7731 else
7733 /* Invert selector. We prefer to generate VNAND on P8 so
7734 that future fusion opportunities can kick in, but must
7735 generate VNOR elsewhere. */
7736 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
7737 rtx iorx = (TARGET_P8_VECTOR
7738 ? gen_rtx_IOR (V16QImode, notx, notx)
7739 : gen_rtx_AND (V16QImode, notx, notx));
7740 rtx tmp = gen_reg_rtx (V16QImode);
7741 emit_insn (gen_rtx_SET (tmp, iorx));
7743 /* Permute with operands reversed and adjusted selector. */
7744 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
7745 UNSPEC_VPERM);
7749 emit_insn (gen_rtx_SET (target, x));
7752 /* Extract field ELT from VEC into TARGET. */
7754 void
7755 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
7757 machine_mode mode = GET_MODE (vec);
7758 machine_mode inner_mode = GET_MODE_INNER (mode);
7759 rtx mem;
7761 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
7763 switch (mode)
7765 default:
7766 break;
7767 case E_V1TImode:
7768 gcc_assert (INTVAL (elt) == 0 && inner_mode == TImode);
7769 emit_move_insn (target, gen_lowpart (TImode, vec));
7770 break;
7771 case E_V2DFmode:
7772 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
7773 return;
7774 case E_V2DImode:
7775 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
7776 return;
7777 case E_V4SFmode:
7778 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
7779 return;
7780 case E_V16QImode:
7781 if (TARGET_DIRECT_MOVE_64BIT)
7783 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
7784 return;
7786 else
7787 break;
7788 case E_V8HImode:
7789 if (TARGET_DIRECT_MOVE_64BIT)
7791 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
7792 return;
7794 else
7795 break;
7796 case E_V4SImode:
7797 if (TARGET_DIRECT_MOVE_64BIT)
7799 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
7800 return;
7802 break;
7805 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
7806 && TARGET_DIRECT_MOVE_64BIT)
7808 if (GET_MODE (elt) != DImode)
7810 rtx tmp = gen_reg_rtx (DImode);
7811 convert_move (tmp, elt, 0);
7812 elt = tmp;
7814 else if (!REG_P (elt))
7815 elt = force_reg (DImode, elt);
7817 switch (mode)
7819 case E_V2DFmode:
7820 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
7821 return;
7823 case E_V2DImode:
7824 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
7825 return;
7827 case E_V4SFmode:
7828 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
7829 return;
7831 case E_V4SImode:
7832 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
7833 return;
7835 case E_V8HImode:
7836 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
7837 return;
7839 case E_V16QImode:
7840 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
7841 return;
7843 default:
7844 gcc_unreachable ();
7848 gcc_assert (CONST_INT_P (elt));
7850 /* Allocate mode-sized buffer. */
7851 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7853 emit_move_insn (mem, vec);
7855 /* Add offset to field within buffer matching vector element. */
7856 mem = adjust_address_nv (mem, inner_mode,
7857 INTVAL (elt) * GET_MODE_SIZE (inner_mode));
7859 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
7862 /* Helper function to return the register number of a RTX. */
7863 static inline int
7864 regno_or_subregno (rtx op)
7866 if (REG_P (op))
7867 return REGNO (op);
7868 else if (SUBREG_P (op))
7869 return subreg_regno (op);
7870 else
7871 gcc_unreachable ();
7874 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7875 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7876 temporary (BASE_TMP) to fixup the address. Return the new memory address
7877 that is valid for reads or writes to a given register (SCALAR_REG). */
7880 rs6000_adjust_vec_address (rtx scalar_reg,
7881 rtx mem,
7882 rtx element,
7883 rtx base_tmp,
7884 machine_mode scalar_mode)
7886 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7887 rtx addr = XEXP (mem, 0);
7888 rtx element_offset;
7889 rtx new_addr;
7890 bool valid_addr_p;
7892 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7893 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
7895 /* Calculate what we need to add to the address to get the element
7896 address. */
7897 if (CONST_INT_P (element))
7898 element_offset = GEN_INT (INTVAL (element) * scalar_size);
7899 else
7901 int byte_shift = exact_log2 (scalar_size);
7902 gcc_assert (byte_shift >= 0);
7904 if (byte_shift == 0)
7905 element_offset = element;
7907 else
7909 if (TARGET_POWERPC64)
7910 emit_insn (gen_ashldi3 (base_tmp, element, GEN_INT (byte_shift)));
7911 else
7912 emit_insn (gen_ashlsi3 (base_tmp, element, GEN_INT (byte_shift)));
7914 element_offset = base_tmp;
7918 /* Create the new address pointing to the element within the vector. If we
7919 are adding 0, we don't have to change the address. */
7920 if (element_offset == const0_rtx)
7921 new_addr = addr;
7923 /* A simple indirect address can be converted into a reg + offset
7924 address. */
7925 else if (REG_P (addr) || SUBREG_P (addr))
7926 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
7928 /* Optimize D-FORM addresses with constant offset with a constant element, to
7929 include the element offset in the address directly. */
7930 else if (GET_CODE (addr) == PLUS)
7932 rtx op0 = XEXP (addr, 0);
7933 rtx op1 = XEXP (addr, 1);
7934 rtx insn;
7936 gcc_assert (REG_P (op0) || SUBREG_P (op0));
7937 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
7939 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
7940 rtx offset_rtx = GEN_INT (offset);
7942 if (IN_RANGE (offset, -32768, 32767)
7943 && (scalar_size < 8 || (offset & 0x3) == 0))
7944 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
7945 else
7947 emit_move_insn (base_tmp, offset_rtx);
7948 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7951 else
7953 bool op1_reg_p = (REG_P (op1) || SUBREG_P (op1));
7954 bool ele_reg_p = (REG_P (element_offset) || SUBREG_P (element_offset));
7956 /* Note, ADDI requires the register being added to be a base
7957 register. If the register was R0, load it up into the temporary
7958 and do the add. */
7959 if (op1_reg_p
7960 && (ele_reg_p || reg_or_subregno (op1) != FIRST_GPR_REGNO))
7962 insn = gen_add3_insn (base_tmp, op1, element_offset);
7963 gcc_assert (insn != NULL_RTX);
7964 emit_insn (insn);
7967 else if (ele_reg_p
7968 && reg_or_subregno (element_offset) != FIRST_GPR_REGNO)
7970 insn = gen_add3_insn (base_tmp, element_offset, op1);
7971 gcc_assert (insn != NULL_RTX);
7972 emit_insn (insn);
7975 else
7977 emit_move_insn (base_tmp, op1);
7978 emit_insn (gen_add2_insn (base_tmp, element_offset));
7981 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7985 else
7987 emit_move_insn (base_tmp, addr);
7988 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7991 /* If we have a PLUS, we need to see whether the particular register class
7992 allows for D-FORM or X-FORM addressing. */
7993 if (GET_CODE (new_addr) == PLUS)
7995 rtx op1 = XEXP (new_addr, 1);
7996 addr_mask_type addr_mask;
7997 int scalar_regno = regno_or_subregno (scalar_reg);
7999 gcc_assert (scalar_regno < FIRST_PSEUDO_REGISTER);
8000 if (INT_REGNO_P (scalar_regno))
8001 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_GPR];
8003 else if (FP_REGNO_P (scalar_regno))
8004 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_FPR];
8006 else if (ALTIVEC_REGNO_P (scalar_regno))
8007 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_VMX];
8009 else
8010 gcc_unreachable ();
8012 if (REG_P (op1) || SUBREG_P (op1))
8013 valid_addr_p = (addr_mask & RELOAD_REG_INDEXED) != 0;
8014 else
8015 valid_addr_p = (addr_mask & RELOAD_REG_OFFSET) != 0;
8018 else if (REG_P (new_addr) || SUBREG_P (new_addr))
8019 valid_addr_p = true;
8021 else
8022 valid_addr_p = false;
8024 if (!valid_addr_p)
8026 emit_move_insn (base_tmp, new_addr);
8027 new_addr = base_tmp;
8030 return change_address (mem, scalar_mode, new_addr);
8033 /* Split a variable vec_extract operation into the component instructions. */
8035 void
8036 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
8037 rtx tmp_altivec)
8039 machine_mode mode = GET_MODE (src);
8040 machine_mode scalar_mode = GET_MODE (dest);
8041 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
8042 int byte_shift = exact_log2 (scalar_size);
8044 gcc_assert (byte_shift >= 0);
8046 /* If we are given a memory address, optimize to load just the element. We
8047 don't have to adjust the vector element number on little endian
8048 systems. */
8049 if (MEM_P (src))
8051 gcc_assert (REG_P (tmp_gpr));
8052 emit_move_insn (dest, rs6000_adjust_vec_address (dest, src, element,
8053 tmp_gpr, scalar_mode));
8054 return;
8057 else if (REG_P (src) || SUBREG_P (src))
8059 int bit_shift = byte_shift + 3;
8060 rtx element2;
8061 int dest_regno = regno_or_subregno (dest);
8062 int src_regno = regno_or_subregno (src);
8063 int element_regno = regno_or_subregno (element);
8065 gcc_assert (REG_P (tmp_gpr));
8067 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
8068 a general purpose register. */
8069 if (TARGET_P9_VECTOR
8070 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
8071 && INT_REGNO_P (dest_regno)
8072 && ALTIVEC_REGNO_P (src_regno)
8073 && INT_REGNO_P (element_regno))
8075 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
8076 rtx element_si = gen_rtx_REG (SImode, element_regno);
8078 if (mode == V16QImode)
8079 emit_insn (VECTOR_ELT_ORDER_BIG
8080 ? gen_vextublx (dest_si, element_si, src)
8081 : gen_vextubrx (dest_si, element_si, src));
8083 else if (mode == V8HImode)
8085 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
8086 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
8087 emit_insn (VECTOR_ELT_ORDER_BIG
8088 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
8089 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
8093 else
8095 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
8096 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
8097 emit_insn (VECTOR_ELT_ORDER_BIG
8098 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
8099 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
8102 return;
8106 gcc_assert (REG_P (tmp_altivec));
8108 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
8109 an XOR, otherwise we need to subtract. The shift amount is so VSLO
8110 will shift the element into the upper position (adding 3 to convert a
8111 byte shift into a bit shift). */
8112 if (scalar_size == 8)
8114 if (!VECTOR_ELT_ORDER_BIG)
8116 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
8117 element2 = tmp_gpr;
8119 else
8120 element2 = element;
8122 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
8123 bit. */
8124 emit_insn (gen_rtx_SET (tmp_gpr,
8125 gen_rtx_AND (DImode,
8126 gen_rtx_ASHIFT (DImode,
8127 element2,
8128 GEN_INT (6)),
8129 GEN_INT (64))));
8131 else
8133 if (!VECTOR_ELT_ORDER_BIG)
8135 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
8137 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
8138 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
8139 element2 = tmp_gpr;
8141 else
8142 element2 = element;
8144 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
8147 /* Get the value into the lower byte of the Altivec register where VSLO
8148 expects it. */
8149 if (TARGET_P9_VECTOR)
8150 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
8151 else if (can_create_pseudo_p ())
8152 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
8153 else
8155 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
8156 emit_move_insn (tmp_di, tmp_gpr);
8157 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
8160 /* Do the VSLO to get the value into the final location. */
8161 switch (mode)
8163 case E_V2DFmode:
8164 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
8165 return;
8167 case E_V2DImode:
8168 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
8169 return;
8171 case E_V4SFmode:
8173 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
8174 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
8175 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
8176 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
8177 tmp_altivec));
8179 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
8180 return;
8183 case E_V4SImode:
8184 case E_V8HImode:
8185 case E_V16QImode:
8187 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
8188 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
8189 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
8190 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
8191 tmp_altivec));
8192 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
8193 emit_insn (gen_ashrdi3 (tmp_gpr_di, tmp_gpr_di,
8194 GEN_INT (64 - (8 * scalar_size))));
8195 return;
8198 default:
8199 gcc_unreachable ();
8202 return;
8204 else
8205 gcc_unreachable ();
8208 /* Helper function for rs6000_split_v4si_init to build up a DImode value from
8209 two SImode values. */
8211 static void
8212 rs6000_split_v4si_init_di_reg (rtx dest, rtx si1, rtx si2, rtx tmp)
8214 const unsigned HOST_WIDE_INT mask_32bit = HOST_WIDE_INT_C (0xffffffff);
8216 if (CONST_INT_P (si1) && CONST_INT_P (si2))
8218 unsigned HOST_WIDE_INT const1 = (UINTVAL (si1) & mask_32bit) << 32;
8219 unsigned HOST_WIDE_INT const2 = UINTVAL (si2) & mask_32bit;
8221 emit_move_insn (dest, GEN_INT (const1 | const2));
8222 return;
8225 /* Put si1 into upper 32-bits of dest. */
8226 if (CONST_INT_P (si1))
8227 emit_move_insn (dest, GEN_INT ((UINTVAL (si1) & mask_32bit) << 32));
8228 else
8230 /* Generate RLDIC. */
8231 rtx si1_di = gen_rtx_REG (DImode, regno_or_subregno (si1));
8232 rtx shift_rtx = gen_rtx_ASHIFT (DImode, si1_di, GEN_INT (32));
8233 rtx mask_rtx = GEN_INT (mask_32bit << 32);
8234 rtx and_rtx = gen_rtx_AND (DImode, shift_rtx, mask_rtx);
8235 gcc_assert (!reg_overlap_mentioned_p (dest, si1));
8236 emit_insn (gen_rtx_SET (dest, and_rtx));
8239 /* Put si2 into the temporary. */
8240 gcc_assert (!reg_overlap_mentioned_p (dest, tmp));
8241 if (CONST_INT_P (si2))
8242 emit_move_insn (tmp, GEN_INT (UINTVAL (si2) & mask_32bit));
8243 else
8244 emit_insn (gen_zero_extendsidi2 (tmp, si2));
8246 /* Combine the two parts. */
8247 emit_insn (gen_iordi3 (dest, dest, tmp));
8248 return;
8251 /* Split a V4SI initialization. */
8253 void
8254 rs6000_split_v4si_init (rtx operands[])
8256 rtx dest = operands[0];
8258 /* Destination is a GPR, build up the two DImode parts in place. */
8259 if (REG_P (dest) || SUBREG_P (dest))
8261 int d_regno = regno_or_subregno (dest);
8262 rtx scalar1 = operands[1];
8263 rtx scalar2 = operands[2];
8264 rtx scalar3 = operands[3];
8265 rtx scalar4 = operands[4];
8266 rtx tmp1 = operands[5];
8267 rtx tmp2 = operands[6];
8269 /* Even though we only need one temporary (plus the destination, which
8270 has an early clobber constraint, try to use two temporaries, one for
8271 each double word created. That way the 2nd insn scheduling pass can
8272 rearrange things so the two parts are done in parallel. */
8273 if (BYTES_BIG_ENDIAN)
8275 rtx di_lo = gen_rtx_REG (DImode, d_regno);
8276 rtx di_hi = gen_rtx_REG (DImode, d_regno + 1);
8277 rs6000_split_v4si_init_di_reg (di_lo, scalar1, scalar2, tmp1);
8278 rs6000_split_v4si_init_di_reg (di_hi, scalar3, scalar4, tmp2);
8280 else
8282 rtx di_lo = gen_rtx_REG (DImode, d_regno + 1);
8283 rtx di_hi = gen_rtx_REG (DImode, d_regno);
8284 gcc_assert (!VECTOR_ELT_ORDER_BIG);
8285 rs6000_split_v4si_init_di_reg (di_lo, scalar4, scalar3, tmp1);
8286 rs6000_split_v4si_init_di_reg (di_hi, scalar2, scalar1, tmp2);
8288 return;
8291 else
8292 gcc_unreachable ();
8295 /* Return TRUE if OP is an invalid SUBREG operation on the e500. */
8297 bool
8298 invalid_e500_subreg (rtx op, machine_mode mode)
8300 if (TARGET_E500_DOUBLE)
8302 /* Reject (subreg:SI (reg:DF)); likewise with subreg:DI or
8303 subreg:TI and reg:TF. Decimal float modes are like integer
8304 modes (only low part of each register used) for this
8305 purpose. */
8306 if (GET_CODE (op) == SUBREG
8307 && (mode == SImode || mode == DImode || mode == TImode
8308 || mode == DDmode || mode == TDmode || mode == PTImode)
8309 && REG_P (SUBREG_REG (op))
8310 && (GET_MODE (SUBREG_REG (op)) == DFmode
8311 || GET_MODE (SUBREG_REG (op)) == TFmode
8312 || GET_MODE (SUBREG_REG (op)) == IFmode
8313 || GET_MODE (SUBREG_REG (op)) == KFmode))
8314 return true;
8316 /* Reject (subreg:DF (reg:DI)); likewise with subreg:TF and
8317 reg:TI. */
8318 if (GET_CODE (op) == SUBREG
8319 && (mode == DFmode || mode == TFmode || mode == IFmode
8320 || mode == KFmode)
8321 && REG_P (SUBREG_REG (op))
8322 && (GET_MODE (SUBREG_REG (op)) == DImode
8323 || GET_MODE (SUBREG_REG (op)) == TImode
8324 || GET_MODE (SUBREG_REG (op)) == PTImode
8325 || GET_MODE (SUBREG_REG (op)) == DDmode
8326 || GET_MODE (SUBREG_REG (op)) == TDmode))
8327 return true;
8330 if (TARGET_SPE
8331 && GET_CODE (op) == SUBREG
8332 && mode == SImode
8333 && REG_P (SUBREG_REG (op))
8334 && SPE_VECTOR_MODE (GET_MODE (SUBREG_REG (op))))
8335 return true;
8337 return false;
8340 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
8341 selects whether the alignment is abi mandated, optional, or
8342 both abi and optional alignment. */
8344 unsigned int
8345 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
8347 if (how != align_opt)
8349 if (TREE_CODE (type) == VECTOR_TYPE)
8351 if ((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (type)))
8352 || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type))))
8354 if (align < 64)
8355 align = 64;
8357 else if (align < 128)
8358 align = 128;
8360 else if (TARGET_E500_DOUBLE
8361 && TREE_CODE (type) == REAL_TYPE
8362 && TYPE_MODE (type) == DFmode)
8364 if (align < 64)
8365 align = 64;
8369 if (how != align_abi)
8371 if (TREE_CODE (type) == ARRAY_TYPE
8372 && TYPE_MODE (TREE_TYPE (type)) == QImode)
8374 if (align < BITS_PER_WORD)
8375 align = BITS_PER_WORD;
8379 return align;
8382 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
8383 instructions simply ignore the low bits; SPE vector memory
8384 instructions trap on unaligned accesses; VSX memory instructions are
8385 aligned to 4 or 8 bytes. */
8387 static bool
8388 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
8390 return (STRICT_ALIGNMENT
8391 || (!TARGET_EFFICIENT_UNALIGNED_VSX
8392 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
8393 || ((VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode))
8394 && (int) align < VECTOR_ALIGN (mode)))));
8397 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
8399 bool
8400 rs6000_special_adjust_field_align_p (tree type, unsigned int computed)
8402 if (TARGET_ALTIVEC && TREE_CODE (type) == VECTOR_TYPE)
8404 if (computed != 128)
8406 static bool warned;
8407 if (!warned && warn_psabi)
8409 warned = true;
8410 inform (input_location,
8411 "the layout of aggregates containing vectors with"
8412 " %d-byte alignment has changed in GCC 5",
8413 computed / BITS_PER_UNIT);
8416 /* In current GCC there is no special case. */
8417 return false;
8420 return false;
8423 /* AIX increases natural record alignment to doubleword if the first
8424 field is an FP double while the FP fields remain word aligned. */
8426 unsigned int
8427 rs6000_special_round_type_align (tree type, unsigned int computed,
8428 unsigned int specified)
8430 unsigned int align = MAX (computed, specified);
8431 tree field = TYPE_FIELDS (type);
8433 /* Skip all non field decls */
8434 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
8435 field = DECL_CHAIN (field);
8437 if (field != NULL && field != type)
8439 type = TREE_TYPE (field);
8440 while (TREE_CODE (type) == ARRAY_TYPE)
8441 type = TREE_TYPE (type);
8443 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
8444 align = MAX (align, 64);
8447 return align;
8450 /* Darwin increases record alignment to the natural alignment of
8451 the first field. */
8453 unsigned int
8454 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
8455 unsigned int specified)
8457 unsigned int align = MAX (computed, specified);
8459 if (TYPE_PACKED (type))
8460 return align;
8462 /* Find the first field, looking down into aggregates. */
8463 do {
8464 tree field = TYPE_FIELDS (type);
8465 /* Skip all non field decls */
8466 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
8467 field = DECL_CHAIN (field);
8468 if (! field)
8469 break;
8470 /* A packed field does not contribute any extra alignment. */
8471 if (DECL_PACKED (field))
8472 return align;
8473 type = TREE_TYPE (field);
8474 while (TREE_CODE (type) == ARRAY_TYPE)
8475 type = TREE_TYPE (type);
8476 } while (AGGREGATE_TYPE_P (type));
8478 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
8479 align = MAX (align, TYPE_ALIGN (type));
8481 return align;
8484 /* Return 1 for an operand in small memory on V.4/eabi. */
8487 small_data_operand (rtx op ATTRIBUTE_UNUSED,
8488 machine_mode mode ATTRIBUTE_UNUSED)
8490 #if TARGET_ELF
8491 rtx sym_ref;
8493 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
8494 return 0;
8496 if (DEFAULT_ABI != ABI_V4)
8497 return 0;
8499 /* Vector and float memory instructions have a limited offset on the
8500 SPE, so using a vector or float variable directly as an operand is
8501 not useful. */
8502 if (TARGET_SPE
8503 && (SPE_VECTOR_MODE (mode) || FLOAT_MODE_P (mode)))
8504 return 0;
8506 if (GET_CODE (op) == SYMBOL_REF)
8507 sym_ref = op;
8509 else if (GET_CODE (op) != CONST
8510 || GET_CODE (XEXP (op, 0)) != PLUS
8511 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF
8512 || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT)
8513 return 0;
8515 else
8517 rtx sum = XEXP (op, 0);
8518 HOST_WIDE_INT summand;
8520 /* We have to be careful here, because it is the referenced address
8521 that must be 32k from _SDA_BASE_, not just the symbol. */
8522 summand = INTVAL (XEXP (sum, 1));
8523 if (summand < 0 || summand > g_switch_value)
8524 return 0;
8526 sym_ref = XEXP (sum, 0);
8529 return SYMBOL_REF_SMALL_P (sym_ref);
8530 #else
8531 return 0;
8532 #endif
8535 /* Return true if either operand is a general purpose register. */
8537 bool
8538 gpr_or_gpr_p (rtx op0, rtx op1)
8540 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
8541 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
8544 /* Return true if this is a move direct operation between GPR registers and
8545 floating point/VSX registers. */
8547 bool
8548 direct_move_p (rtx op0, rtx op1)
8550 int regno0, regno1;
8552 if (!REG_P (op0) || !REG_P (op1))
8553 return false;
8555 if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
8556 return false;
8558 regno0 = REGNO (op0);
8559 regno1 = REGNO (op1);
8560 if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
8561 return false;
8563 if (INT_REGNO_P (regno0))
8564 return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
8566 else if (INT_REGNO_P (regno1))
8568 if (TARGET_MFPGPR && FP_REGNO_P (regno0))
8569 return true;
8571 else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
8572 return true;
8575 return false;
8578 /* Return true if the OFFSET is valid for the quad address instructions that
8579 use d-form (register + offset) addressing. */
8581 static inline bool
8582 quad_address_offset_p (HOST_WIDE_INT offset)
8584 return (IN_RANGE (offset, -32768, 32767) && ((offset) & 0xf) == 0);
8587 /* Return true if the ADDR is an acceptable address for a quad memory
8588 operation of mode MODE (either LQ/STQ for general purpose registers, or
8589 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
8590 is intended for LQ/STQ. If it is false, the address is intended for the ISA
8591 3.0 LXV/STXV instruction. */
8593 bool
8594 quad_address_p (rtx addr, machine_mode mode, bool strict)
8596 rtx op0, op1;
8598 if (GET_MODE_SIZE (mode) != 16)
8599 return false;
8601 if (legitimate_indirect_address_p (addr, strict))
8602 return true;
8604 if (VECTOR_MODE_P (mode) && !mode_supports_vsx_dform_quad (mode))
8605 return false;
8607 if (GET_CODE (addr) != PLUS)
8608 return false;
8610 op0 = XEXP (addr, 0);
8611 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
8612 return false;
8614 op1 = XEXP (addr, 1);
8615 if (!CONST_INT_P (op1))
8616 return false;
8618 return quad_address_offset_p (INTVAL (op1));
8621 /* Return true if this is a load or store quad operation. This function does
8622 not handle the atomic quad memory instructions. */
8624 bool
8625 quad_load_store_p (rtx op0, rtx op1)
8627 bool ret;
8629 if (!TARGET_QUAD_MEMORY)
8630 ret = false;
8632 else if (REG_P (op0) && MEM_P (op1))
8633 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
8634 && quad_memory_operand (op1, GET_MODE (op1))
8635 && !reg_overlap_mentioned_p (op0, op1));
8637 else if (MEM_P (op0) && REG_P (op1))
8638 ret = (quad_memory_operand (op0, GET_MODE (op0))
8639 && quad_int_reg_operand (op1, GET_MODE (op1)));
8641 else
8642 ret = false;
8644 if (TARGET_DEBUG_ADDR)
8646 fprintf (stderr, "\n========== quad_load_store, return %s\n",
8647 ret ? "true" : "false");
8648 debug_rtx (gen_rtx_SET (op0, op1));
8651 return ret;
8654 /* Given an address, return a constant offset term if one exists. */
8656 static rtx
8657 address_offset (rtx op)
8659 if (GET_CODE (op) == PRE_INC
8660 || GET_CODE (op) == PRE_DEC)
8661 op = XEXP (op, 0);
8662 else if (GET_CODE (op) == PRE_MODIFY
8663 || GET_CODE (op) == LO_SUM)
8664 op = XEXP (op, 1);
8666 if (GET_CODE (op) == CONST)
8667 op = XEXP (op, 0);
8669 if (GET_CODE (op) == PLUS)
8670 op = XEXP (op, 1);
8672 if (CONST_INT_P (op))
8673 return op;
8675 return NULL_RTX;
8678 /* Return true if the MEM operand is a memory operand suitable for use
8679 with a (full width, possibly multiple) gpr load/store. On
8680 powerpc64 this means the offset must be divisible by 4.
8681 Implements 'Y' constraint.
8683 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8684 a constraint function we know the operand has satisfied a suitable
8685 memory predicate. Also accept some odd rtl generated by reload
8686 (see rs6000_legitimize_reload_address for various forms). It is
8687 important that reload rtl be accepted by appropriate constraints
8688 but not by the operand predicate.
8690 Offsetting a lo_sum should not be allowed, except where we know by
8691 alignment that a 32k boundary is not crossed, but see the ???
8692 comment in rs6000_legitimize_reload_address. Note that by
8693 "offsetting" here we mean a further offset to access parts of the
8694 MEM. It's fine to have a lo_sum where the inner address is offset
8695 from a sym, since the same sym+offset will appear in the high part
8696 of the address calculation. */
8698 bool
8699 mem_operand_gpr (rtx op, machine_mode mode)
8701 unsigned HOST_WIDE_INT offset;
8702 int extra;
8703 rtx addr = XEXP (op, 0);
8705 op = address_offset (addr);
8706 if (op == NULL_RTX)
8707 return true;
8709 offset = INTVAL (op);
8710 if (TARGET_POWERPC64 && (offset & 3) != 0)
8711 return false;
8713 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8714 if (extra < 0)
8715 extra = 0;
8717 if (GET_CODE (addr) == LO_SUM)
8718 /* For lo_sum addresses, we must allow any offset except one that
8719 causes a wrap, so test only the low 16 bits. */
8720 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8722 return offset + 0x8000 < 0x10000u - extra;
8725 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8726 enforce an offset divisible by 4 even for 32-bit. */
8728 bool
8729 mem_operand_ds_form (rtx op, machine_mode mode)
8731 unsigned HOST_WIDE_INT offset;
8732 int extra;
8733 rtx addr = XEXP (op, 0);
8735 if (!offsettable_address_p (false, mode, addr))
8736 return false;
8738 op = address_offset (addr);
8739 if (op == NULL_RTX)
8740 return true;
8742 offset = INTVAL (op);
8743 if ((offset & 3) != 0)
8744 return false;
8746 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8747 if (extra < 0)
8748 extra = 0;
8750 if (GET_CODE (addr) == LO_SUM)
8751 /* For lo_sum addresses, we must allow any offset except one that
8752 causes a wrap, so test only the low 16 bits. */
8753 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8755 return offset + 0x8000 < 0x10000u - extra;
8758 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8760 static bool
8761 reg_offset_addressing_ok_p (machine_mode mode)
8763 switch (mode)
8765 case E_V16QImode:
8766 case E_V8HImode:
8767 case E_V4SFmode:
8768 case E_V4SImode:
8769 case E_V2DFmode:
8770 case E_V2DImode:
8771 case E_V1TImode:
8772 case E_TImode:
8773 case E_TFmode:
8774 case E_KFmode:
8775 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8776 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8777 a vector mode, if we want to use the VSX registers to move it around,
8778 we need to restrict ourselves to reg+reg addressing. Similarly for
8779 IEEE 128-bit floating point that is passed in a single vector
8780 register. */
8781 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
8782 return mode_supports_vsx_dform_quad (mode);
8783 break;
8785 case E_V4HImode:
8786 case E_V2SImode:
8787 case E_V1DImode:
8788 case E_V2SFmode:
8789 /* Paired vector modes. Only reg+reg addressing is valid. */
8790 if (TARGET_PAIRED_FLOAT)
8791 return false;
8792 break;
8794 case E_SDmode:
8795 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8796 addressing for the LFIWZX and STFIWX instructions. */
8797 if (TARGET_NO_SDMODE_STACK)
8798 return false;
8799 break;
8801 default:
8802 break;
8805 return true;
8808 static bool
8809 virtual_stack_registers_memory_p (rtx op)
8811 int regnum;
8813 if (GET_CODE (op) == REG)
8814 regnum = REGNO (op);
8816 else if (GET_CODE (op) == PLUS
8817 && GET_CODE (XEXP (op, 0)) == REG
8818 && GET_CODE (XEXP (op, 1)) == CONST_INT)
8819 regnum = REGNO (XEXP (op, 0));
8821 else
8822 return false;
8824 return (regnum >= FIRST_VIRTUAL_REGISTER
8825 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
8828 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8829 is known to not straddle a 32k boundary. This function is used
8830 to determine whether -mcmodel=medium code can use TOC pointer
8831 relative addressing for OP. This means the alignment of the TOC
8832 pointer must also be taken into account, and unfortunately that is
8833 only 8 bytes. */
8835 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8836 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8837 #endif
8839 static bool
8840 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
8841 machine_mode mode)
8843 tree decl;
8844 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
8846 if (GET_CODE (op) != SYMBOL_REF)
8847 return false;
8849 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8850 SYMBOL_REF. */
8851 if (mode_supports_vsx_dform_quad (mode))
8852 return false;
8854 dsize = GET_MODE_SIZE (mode);
8855 decl = SYMBOL_REF_DECL (op);
8856 if (!decl)
8858 if (dsize == 0)
8859 return false;
8861 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8862 replacing memory addresses with an anchor plus offset. We
8863 could find the decl by rummaging around in the block->objects
8864 VEC for the given offset but that seems like too much work. */
8865 dalign = BITS_PER_UNIT;
8866 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
8867 && SYMBOL_REF_ANCHOR_P (op)
8868 && SYMBOL_REF_BLOCK (op) != NULL)
8870 struct object_block *block = SYMBOL_REF_BLOCK (op);
8872 dalign = block->alignment;
8873 offset += SYMBOL_REF_BLOCK_OFFSET (op);
8875 else if (CONSTANT_POOL_ADDRESS_P (op))
8877 /* It would be nice to have get_pool_align().. */
8878 machine_mode cmode = get_pool_mode (op);
8880 dalign = GET_MODE_ALIGNMENT (cmode);
8883 else if (DECL_P (decl))
8885 dalign = DECL_ALIGN (decl);
8887 if (dsize == 0)
8889 /* Allow BLKmode when the entire object is known to not
8890 cross a 32k boundary. */
8891 if (!DECL_SIZE_UNIT (decl))
8892 return false;
8894 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
8895 return false;
8897 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
8898 if (dsize > 32768)
8899 return false;
8901 dalign /= BITS_PER_UNIT;
8902 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8903 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8904 return dalign >= dsize;
8907 else
8908 gcc_unreachable ();
8910 /* Find how many bits of the alignment we know for this access. */
8911 dalign /= BITS_PER_UNIT;
8912 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8913 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8914 mask = dalign - 1;
8915 lsb = offset & -offset;
8916 mask &= lsb - 1;
8917 dalign = mask + 1;
8919 return dalign >= dsize;
8922 static bool
8923 constant_pool_expr_p (rtx op)
8925 rtx base, offset;
8927 split_const (op, &base, &offset);
8928 return (GET_CODE (base) == SYMBOL_REF
8929 && CONSTANT_POOL_ADDRESS_P (base)
8930 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
8933 static const_rtx tocrel_base, tocrel_offset;
8935 /* Return true if OP is a toc pointer relative address (the output
8936 of create_TOC_reference). If STRICT, do not match non-split
8937 -mcmodel=large/medium toc pointer relative addresses. */
8939 bool
8940 toc_relative_expr_p (const_rtx op, bool strict)
8942 if (!TARGET_TOC)
8943 return false;
8945 if (TARGET_CMODEL != CMODEL_SMALL)
8947 /* When strict ensure we have everything tidy. */
8948 if (strict
8949 && !(GET_CODE (op) == LO_SUM
8950 && REG_P (XEXP (op, 0))
8951 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
8952 return false;
8954 /* When not strict, allow non-split TOC addresses and also allow
8955 (lo_sum (high ..)) TOC addresses created during reload. */
8956 if (GET_CODE (op) == LO_SUM)
8957 op = XEXP (op, 1);
8960 tocrel_base = op;
8961 tocrel_offset = const0_rtx;
8962 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
8964 tocrel_base = XEXP (op, 0);
8965 tocrel_offset = XEXP (op, 1);
8968 return (GET_CODE (tocrel_base) == UNSPEC
8969 && XINT (tocrel_base, 1) == UNSPEC_TOCREL);
8972 /* Return true if X is a constant pool address, and also for cmodel=medium
8973 if X is a toc-relative address known to be offsettable within MODE. */
8975 bool
8976 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
8977 bool strict)
8979 return (toc_relative_expr_p (x, strict)
8980 && (TARGET_CMODEL != CMODEL_MEDIUM
8981 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
8982 || mode == QImode
8983 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
8984 INTVAL (tocrel_offset), mode)));
8987 static bool
8988 legitimate_small_data_p (machine_mode mode, rtx x)
8990 return (DEFAULT_ABI == ABI_V4
8991 && !flag_pic && !TARGET_TOC
8992 && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
8993 && small_data_operand (x, mode));
8996 /* SPE offset addressing is limited to 5-bits worth of double words. */
8997 #define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0)
8999 bool
9000 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
9001 bool strict, bool worst_case)
9003 unsigned HOST_WIDE_INT offset;
9004 unsigned int extra;
9006 if (GET_CODE (x) != PLUS)
9007 return false;
9008 if (!REG_P (XEXP (x, 0)))
9009 return false;
9010 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
9011 return false;
9012 if (mode_supports_vsx_dform_quad (mode))
9013 return quad_address_p (x, mode, strict);
9014 if (!reg_offset_addressing_ok_p (mode))
9015 return virtual_stack_registers_memory_p (x);
9016 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
9017 return true;
9018 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
9019 return false;
9021 offset = INTVAL (XEXP (x, 1));
9022 extra = 0;
9023 switch (mode)
9025 case E_V4HImode:
9026 case E_V2SImode:
9027 case E_V1DImode:
9028 case E_V2SFmode:
9029 /* SPE vector modes. */
9030 return SPE_CONST_OFFSET_OK (offset);
9032 case E_DFmode:
9033 case E_DDmode:
9034 case E_DImode:
9035 /* On e500v2, we may have:
9037 (subreg:DF (mem:DI (plus (reg) (const_int))) 0).
9039 Which gets addressed with evldd instructions. */
9040 if (TARGET_E500_DOUBLE)
9041 return SPE_CONST_OFFSET_OK (offset);
9043 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
9044 addressing. */
9045 if (VECTOR_MEM_VSX_P (mode))
9046 return false;
9048 if (!worst_case)
9049 break;
9050 if (!TARGET_POWERPC64)
9051 extra = 4;
9052 else if (offset & 3)
9053 return false;
9054 break;
9056 case E_TFmode:
9057 case E_IFmode:
9058 case E_KFmode:
9059 case E_TDmode:
9060 case E_TImode:
9061 case E_PTImode:
9062 if (TARGET_E500_DOUBLE)
9063 return (SPE_CONST_OFFSET_OK (offset)
9064 && SPE_CONST_OFFSET_OK (offset + 8));
9066 extra = 8;
9067 if (!worst_case)
9068 break;
9069 if (!TARGET_POWERPC64)
9070 extra = 12;
9071 else if (offset & 3)
9072 return false;
9073 break;
9075 default:
9076 break;
9079 offset += 0x8000;
9080 return offset < 0x10000 - extra;
9083 bool
9084 legitimate_indexed_address_p (rtx x, int strict)
9086 rtx op0, op1;
9088 if (GET_CODE (x) != PLUS)
9089 return false;
9091 op0 = XEXP (x, 0);
9092 op1 = XEXP (x, 1);
9094 /* Recognize the rtl generated by reload which we know will later be
9095 replaced with proper base and index regs. */
9096 if (!strict
9097 && reload_in_progress
9098 && (REG_P (op0) || GET_CODE (op0) == PLUS)
9099 && REG_P (op1))
9100 return true;
9102 return (REG_P (op0) && REG_P (op1)
9103 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
9104 && INT_REG_OK_FOR_INDEX_P (op1, strict))
9105 || (INT_REG_OK_FOR_BASE_P (op1, strict)
9106 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
9109 bool
9110 avoiding_indexed_address_p (machine_mode mode)
9112 /* Avoid indexed addressing for modes that have non-indexed
9113 load/store instruction forms. */
9114 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
9117 bool
9118 legitimate_indirect_address_p (rtx x, int strict)
9120 return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict);
9123 bool
9124 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
9126 if (!TARGET_MACHO || !flag_pic
9127 || mode != SImode || GET_CODE (x) != MEM)
9128 return false;
9129 x = XEXP (x, 0);
9131 if (GET_CODE (x) != LO_SUM)
9132 return false;
9133 if (GET_CODE (XEXP (x, 0)) != REG)
9134 return false;
9135 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
9136 return false;
9137 x = XEXP (x, 1);
9139 return CONSTANT_P (x);
9142 static bool
9143 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
9145 if (GET_CODE (x) != LO_SUM)
9146 return false;
9147 if (GET_CODE (XEXP (x, 0)) != REG)
9148 return false;
9149 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
9150 return false;
9151 /* quad word addresses are restricted, and we can't use LO_SUM. */
9152 if (mode_supports_vsx_dform_quad (mode))
9153 return false;
9154 /* Restrict addressing for DI because of our SUBREG hackery. */
9155 if (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
9156 return false;
9157 x = XEXP (x, 1);
9159 if (TARGET_ELF || TARGET_MACHO)
9161 bool large_toc_ok;
9163 if (DEFAULT_ABI == ABI_V4 && flag_pic)
9164 return false;
9165 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
9166 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
9167 recognizes some LO_SUM addresses as valid although this
9168 function says opposite. In most cases, LRA through different
9169 transformations can generate correct code for address reloads.
9170 It can not manage only some LO_SUM cases. So we need to add
9171 code analogous to one in rs6000_legitimize_reload_address for
9172 LOW_SUM here saying that some addresses are still valid. */
9173 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
9174 && small_toc_ref (x, VOIDmode));
9175 if (TARGET_TOC && ! large_toc_ok)
9176 return false;
9177 if (GET_MODE_NUNITS (mode) != 1)
9178 return false;
9179 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
9180 && !(/* ??? Assume floating point reg based on mode? */
9181 TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
9182 && (mode == DFmode || mode == DDmode)))
9183 return false;
9185 return CONSTANT_P (x) || large_toc_ok;
9188 return false;
9192 /* Try machine-dependent ways of modifying an illegitimate address
9193 to be legitimate. If we find one, return the new, valid address.
9194 This is used from only one place: `memory_address' in explow.c.
9196 OLDX is the address as it was before break_out_memory_refs was
9197 called. In some cases it is useful to look at this to decide what
9198 needs to be done.
9200 It is always safe for this function to do nothing. It exists to
9201 recognize opportunities to optimize the output.
9203 On RS/6000, first check for the sum of a register with a constant
9204 integer that is out of range. If so, generate code to add the
9205 constant with the low-order 16 bits masked to the register and force
9206 this result into another register (this can be done with `cau').
9207 Then generate an address of REG+(CONST&0xffff), allowing for the
9208 possibility of bit 16 being a one.
9210 Then check for the sum of a register and something not constant, try to
9211 load the other things into a register and return the sum. */
9213 static rtx
9214 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
9215 machine_mode mode)
9217 unsigned int extra;
9219 if (!reg_offset_addressing_ok_p (mode)
9220 || mode_supports_vsx_dform_quad (mode))
9222 if (virtual_stack_registers_memory_p (x))
9223 return x;
9225 /* In theory we should not be seeing addresses of the form reg+0,
9226 but just in case it is generated, optimize it away. */
9227 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
9228 return force_reg (Pmode, XEXP (x, 0));
9230 /* For TImode with load/store quad, restrict addresses to just a single
9231 pointer, so it works with both GPRs and VSX registers. */
9232 /* Make sure both operands are registers. */
9233 else if (GET_CODE (x) == PLUS
9234 && (mode != TImode || !TARGET_VSX_TIMODE))
9235 return gen_rtx_PLUS (Pmode,
9236 force_reg (Pmode, XEXP (x, 0)),
9237 force_reg (Pmode, XEXP (x, 1)));
9238 else
9239 return force_reg (Pmode, x);
9241 if (GET_CODE (x) == SYMBOL_REF)
9243 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
9244 if (model != 0)
9245 return rs6000_legitimize_tls_address (x, model);
9248 extra = 0;
9249 switch (mode)
9251 case E_TFmode:
9252 case E_TDmode:
9253 case E_TImode:
9254 case E_PTImode:
9255 case E_IFmode:
9256 case E_KFmode:
9257 /* As in legitimate_offset_address_p we do not assume
9258 worst-case. The mode here is just a hint as to the registers
9259 used. A TImode is usually in gprs, but may actually be in
9260 fprs. Leave worst-case scenario for reload to handle via
9261 insn constraints. PTImode is only GPRs. */
9262 extra = 8;
9263 break;
9264 default:
9265 break;
9268 if (GET_CODE (x) == PLUS
9269 && GET_CODE (XEXP (x, 0)) == REG
9270 && GET_CODE (XEXP (x, 1)) == CONST_INT
9271 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
9272 >= 0x10000 - extra)
9273 && !(SPE_VECTOR_MODE (mode)
9274 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)))
9276 HOST_WIDE_INT high_int, low_int;
9277 rtx sum;
9278 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
9279 if (low_int >= 0x8000 - extra)
9280 low_int = 0;
9281 high_int = INTVAL (XEXP (x, 1)) - low_int;
9282 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
9283 GEN_INT (high_int)), 0);
9284 return plus_constant (Pmode, sum, low_int);
9286 else if (GET_CODE (x) == PLUS
9287 && GET_CODE (XEXP (x, 0)) == REG
9288 && GET_CODE (XEXP (x, 1)) != CONST_INT
9289 && GET_MODE_NUNITS (mode) == 1
9290 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
9291 || (/* ??? Assume floating point reg based on mode? */
9292 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
9293 && (mode == DFmode || mode == DDmode)))
9294 && !avoiding_indexed_address_p (mode))
9296 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
9297 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
9299 else if (SPE_VECTOR_MODE (mode)
9300 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD))
9302 if (mode == DImode)
9303 return x;
9304 /* We accept [reg + reg] and [reg + OFFSET]. */
9306 if (GET_CODE (x) == PLUS)
9308 rtx op1 = XEXP (x, 0);
9309 rtx op2 = XEXP (x, 1);
9310 rtx y;
9312 op1 = force_reg (Pmode, op1);
9314 if (GET_CODE (op2) != REG
9315 && (GET_CODE (op2) != CONST_INT
9316 || !SPE_CONST_OFFSET_OK (INTVAL (op2))
9317 || (GET_MODE_SIZE (mode) > 8
9318 && !SPE_CONST_OFFSET_OK (INTVAL (op2) + 8))))
9319 op2 = force_reg (Pmode, op2);
9321 /* We can't always do [reg + reg] for these, because [reg +
9322 reg + offset] is not a legitimate addressing mode. */
9323 y = gen_rtx_PLUS (Pmode, op1, op2);
9325 if ((GET_MODE_SIZE (mode) > 8 || mode == DDmode) && REG_P (op2))
9326 return force_reg (Pmode, y);
9327 else
9328 return y;
9331 return force_reg (Pmode, x);
9333 else if ((TARGET_ELF
9334 #if TARGET_MACHO
9335 || !MACHO_DYNAMIC_NO_PIC_P
9336 #endif
9338 && TARGET_32BIT
9339 && TARGET_NO_TOC
9340 && ! flag_pic
9341 && GET_CODE (x) != CONST_INT
9342 && GET_CODE (x) != CONST_WIDE_INT
9343 && GET_CODE (x) != CONST_DOUBLE
9344 && CONSTANT_P (x)
9345 && GET_MODE_NUNITS (mode) == 1
9346 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
9347 || (/* ??? Assume floating point reg based on mode? */
9348 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
9349 && (mode == DFmode || mode == DDmode))))
9351 rtx reg = gen_reg_rtx (Pmode);
9352 if (TARGET_ELF)
9353 emit_insn (gen_elf_high (reg, x));
9354 else
9355 emit_insn (gen_macho_high (reg, x));
9356 return gen_rtx_LO_SUM (Pmode, reg, x);
9358 else if (TARGET_TOC
9359 && GET_CODE (x) == SYMBOL_REF
9360 && constant_pool_expr_p (x)
9361 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
9362 return create_TOC_reference (x, NULL_RTX);
9363 else
9364 return x;
9367 /* Debug version of rs6000_legitimize_address. */
9368 static rtx
9369 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
9371 rtx ret;
9372 rtx_insn *insns;
9374 start_sequence ();
9375 ret = rs6000_legitimize_address (x, oldx, mode);
9376 insns = get_insns ();
9377 end_sequence ();
9379 if (ret != x)
9381 fprintf (stderr,
9382 "\nrs6000_legitimize_address: mode %s, old code %s, "
9383 "new code %s, modified\n",
9384 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
9385 GET_RTX_NAME (GET_CODE (ret)));
9387 fprintf (stderr, "Original address:\n");
9388 debug_rtx (x);
9390 fprintf (stderr, "oldx:\n");
9391 debug_rtx (oldx);
9393 fprintf (stderr, "New address:\n");
9394 debug_rtx (ret);
9396 if (insns)
9398 fprintf (stderr, "Insns added:\n");
9399 debug_rtx_list (insns, 20);
9402 else
9404 fprintf (stderr,
9405 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
9406 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
9408 debug_rtx (x);
9411 if (insns)
9412 emit_insn (insns);
9414 return ret;
9417 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9418 We need to emit DTP-relative relocations. */
9420 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
9421 static void
9422 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
9424 switch (size)
9426 case 4:
9427 fputs ("\t.long\t", file);
9428 break;
9429 case 8:
9430 fputs (DOUBLE_INT_ASM_OP, file);
9431 break;
9432 default:
9433 gcc_unreachable ();
9435 output_addr_const (file, x);
9436 if (TARGET_ELF)
9437 fputs ("@dtprel+0x8000", file);
9438 else if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF)
9440 switch (SYMBOL_REF_TLS_MODEL (x))
9442 case 0:
9443 break;
9444 case TLS_MODEL_LOCAL_EXEC:
9445 fputs ("@le", file);
9446 break;
9447 case TLS_MODEL_INITIAL_EXEC:
9448 fputs ("@ie", file);
9449 break;
9450 case TLS_MODEL_GLOBAL_DYNAMIC:
9451 case TLS_MODEL_LOCAL_DYNAMIC:
9452 fputs ("@m", file);
9453 break;
9454 default:
9455 gcc_unreachable ();
9460 /* Return true if X is a symbol that refers to real (rather than emulated)
9461 TLS. */
9463 static bool
9464 rs6000_real_tls_symbol_ref_p (rtx x)
9466 return (GET_CODE (x) == SYMBOL_REF
9467 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
9470 /* In the name of slightly smaller debug output, and to cater to
9471 general assembler lossage, recognize various UNSPEC sequences
9472 and turn them back into a direct symbol reference. */
9474 static rtx
9475 rs6000_delegitimize_address (rtx orig_x)
9477 rtx x, y, offset;
9479 orig_x = delegitimize_mem_from_attrs (orig_x);
9480 x = orig_x;
9481 if (MEM_P (x))
9482 x = XEXP (x, 0);
9484 y = x;
9485 if (TARGET_CMODEL != CMODEL_SMALL
9486 && GET_CODE (y) == LO_SUM)
9487 y = XEXP (y, 1);
9489 offset = NULL_RTX;
9490 if (GET_CODE (y) == PLUS
9491 && GET_MODE (y) == Pmode
9492 && CONST_INT_P (XEXP (y, 1)))
9494 offset = XEXP (y, 1);
9495 y = XEXP (y, 0);
9498 if (GET_CODE (y) == UNSPEC
9499 && XINT (y, 1) == UNSPEC_TOCREL)
9501 y = XVECEXP (y, 0, 0);
9503 #ifdef HAVE_AS_TLS
9504 /* Do not associate thread-local symbols with the original
9505 constant pool symbol. */
9506 if (TARGET_XCOFF
9507 && GET_CODE (y) == SYMBOL_REF
9508 && CONSTANT_POOL_ADDRESS_P (y)
9509 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
9510 return orig_x;
9511 #endif
9513 if (offset != NULL_RTX)
9514 y = gen_rtx_PLUS (Pmode, y, offset);
9515 if (!MEM_P (orig_x))
9516 return y;
9517 else
9518 return replace_equiv_address_nv (orig_x, y);
9521 if (TARGET_MACHO
9522 && GET_CODE (orig_x) == LO_SUM
9523 && GET_CODE (XEXP (orig_x, 1)) == CONST)
9525 y = XEXP (XEXP (orig_x, 1), 0);
9526 if (GET_CODE (y) == UNSPEC
9527 && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
9528 return XVECEXP (y, 0, 0);
9531 return orig_x;
9534 /* Return true if X shouldn't be emitted into the debug info.
9535 The linker doesn't like .toc section references from
9536 .debug_* sections, so reject .toc section symbols. */
9538 static bool
9539 rs6000_const_not_ok_for_debug_p (rtx x)
9541 if (GET_CODE (x) == SYMBOL_REF
9542 && CONSTANT_POOL_ADDRESS_P (x))
9544 rtx c = get_pool_constant (x);
9545 machine_mode cmode = get_pool_mode (x);
9546 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
9547 return true;
9550 return false;
9554 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
9556 static bool
9557 rs6000_legitimate_combined_insn (rtx_insn *insn)
9559 int icode = INSN_CODE (insn);
9561 /* Reject creating doloop insns. Combine should not be allowed
9562 to create these for a number of reasons:
9563 1) In a nested loop, if combine creates one of these in an
9564 outer loop and the register allocator happens to allocate ctr
9565 to the outer loop insn, then the inner loop can't use ctr.
9566 Inner loops ought to be more highly optimized.
9567 2) Combine often wants to create one of these from what was
9568 originally a three insn sequence, first combining the three
9569 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
9570 allocated ctr, the splitter takes use back to the three insn
9571 sequence. It's better to stop combine at the two insn
9572 sequence.
9573 3) Faced with not being able to allocate ctr for ctrsi/crtdi
9574 insns, the register allocator sometimes uses floating point
9575 or vector registers for the pseudo. Since ctrsi/ctrdi is a
9576 jump insn and output reloads are not implemented for jumps,
9577 the ctrsi/ctrdi splitters need to handle all possible cases.
9578 That's a pain, and it gets to be seriously difficult when a
9579 splitter that runs after reload needs memory to transfer from
9580 a gpr to fpr. See PR70098 and PR71763 which are not fixed
9581 for the difficult case. It's better to not create problems
9582 in the first place. */
9583 if (icode != CODE_FOR_nothing
9584 && (icode == CODE_FOR_ctrsi_internal1
9585 || icode == CODE_FOR_ctrdi_internal1
9586 || icode == CODE_FOR_ctrsi_internal2
9587 || icode == CODE_FOR_ctrdi_internal2
9588 || icode == CODE_FOR_ctrsi_internal3
9589 || icode == CODE_FOR_ctrdi_internal3
9590 || icode == CODE_FOR_ctrsi_internal4
9591 || icode == CODE_FOR_ctrdi_internal4))
9592 return false;
9594 return true;
9597 /* Construct the SYMBOL_REF for the tls_get_addr function. */
9599 static GTY(()) rtx rs6000_tls_symbol;
9600 static rtx
9601 rs6000_tls_get_addr (void)
9603 if (!rs6000_tls_symbol)
9604 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
9606 return rs6000_tls_symbol;
9609 /* Construct the SYMBOL_REF for TLS GOT references. */
9611 static GTY(()) rtx rs6000_got_symbol;
9612 static rtx
9613 rs6000_got_sym (void)
9615 if (!rs6000_got_symbol)
9617 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
9618 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
9619 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
9622 return rs6000_got_symbol;
9625 /* AIX Thread-Local Address support. */
9627 static rtx
9628 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
9630 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
9631 const char *name;
9632 char *tlsname;
9634 name = XSTR (addr, 0);
9635 /* Append TLS CSECT qualifier, unless the symbol already is qualified
9636 or the symbol will be in TLS private data section. */
9637 if (name[strlen (name) - 1] != ']'
9638 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
9639 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
9641 tlsname = XALLOCAVEC (char, strlen (name) + 4);
9642 strcpy (tlsname, name);
9643 strcat (tlsname,
9644 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
9645 tlsaddr = copy_rtx (addr);
9646 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
9648 else
9649 tlsaddr = addr;
9651 /* Place addr into TOC constant pool. */
9652 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
9654 /* Output the TOC entry and create the MEM referencing the value. */
9655 if (constant_pool_expr_p (XEXP (sym, 0))
9656 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
9658 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
9659 mem = gen_const_mem (Pmode, tocref);
9660 set_mem_alias_set (mem, get_TOC_alias_set ());
9662 else
9663 return sym;
9665 /* Use global-dynamic for local-dynamic. */
9666 if (model == TLS_MODEL_GLOBAL_DYNAMIC
9667 || model == TLS_MODEL_LOCAL_DYNAMIC)
9669 /* Create new TOC reference for @m symbol. */
9670 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
9671 tlsname = XALLOCAVEC (char, strlen (name) + 1);
9672 strcpy (tlsname, "*LCM");
9673 strcat (tlsname, name + 3);
9674 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
9675 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
9676 tocref = create_TOC_reference (modaddr, NULL_RTX);
9677 rtx modmem = gen_const_mem (Pmode, tocref);
9678 set_mem_alias_set (modmem, get_TOC_alias_set ());
9680 rtx modreg = gen_reg_rtx (Pmode);
9681 emit_insn (gen_rtx_SET (modreg, modmem));
9683 tmpreg = gen_reg_rtx (Pmode);
9684 emit_insn (gen_rtx_SET (tmpreg, mem));
9686 dest = gen_reg_rtx (Pmode);
9687 if (TARGET_32BIT)
9688 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
9689 else
9690 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
9691 return dest;
9693 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
9694 else if (TARGET_32BIT)
9696 tlsreg = gen_reg_rtx (SImode);
9697 emit_insn (gen_tls_get_tpointer (tlsreg));
9699 else
9700 tlsreg = gen_rtx_REG (DImode, 13);
9702 /* Load the TOC value into temporary register. */
9703 tmpreg = gen_reg_rtx (Pmode);
9704 emit_insn (gen_rtx_SET (tmpreg, mem));
9705 set_unique_reg_note (get_last_insn (), REG_EQUAL,
9706 gen_rtx_MINUS (Pmode, addr, tlsreg));
9708 /* Add TOC symbol value to TLS pointer. */
9709 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
9711 return dest;
9714 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9715 this (thread-local) address. */
9717 static rtx
9718 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
9720 rtx dest, insn;
9722 if (TARGET_XCOFF)
9723 return rs6000_legitimize_tls_address_aix (addr, model);
9725 dest = gen_reg_rtx (Pmode);
9726 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
9728 rtx tlsreg;
9730 if (TARGET_64BIT)
9732 tlsreg = gen_rtx_REG (Pmode, 13);
9733 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
9735 else
9737 tlsreg = gen_rtx_REG (Pmode, 2);
9738 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
9740 emit_insn (insn);
9742 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
9744 rtx tlsreg, tmp;
9746 tmp = gen_reg_rtx (Pmode);
9747 if (TARGET_64BIT)
9749 tlsreg = gen_rtx_REG (Pmode, 13);
9750 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
9752 else
9754 tlsreg = gen_rtx_REG (Pmode, 2);
9755 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
9757 emit_insn (insn);
9758 if (TARGET_64BIT)
9759 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
9760 else
9761 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
9762 emit_insn (insn);
9764 else
9766 rtx r3, got, tga, tmp1, tmp2, call_insn;
9768 /* We currently use relocations like @got@tlsgd for tls, which
9769 means the linker will handle allocation of tls entries, placing
9770 them in the .got section. So use a pointer to the .got section,
9771 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9772 or to secondary GOT sections used by 32-bit -fPIC. */
9773 if (TARGET_64BIT)
9774 got = gen_rtx_REG (Pmode, 2);
9775 else
9777 if (flag_pic == 1)
9778 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
9779 else
9781 rtx gsym = rs6000_got_sym ();
9782 got = gen_reg_rtx (Pmode);
9783 if (flag_pic == 0)
9784 rs6000_emit_move (got, gsym, Pmode);
9785 else
9787 rtx mem, lab;
9789 tmp1 = gen_reg_rtx (Pmode);
9790 tmp2 = gen_reg_rtx (Pmode);
9791 mem = gen_const_mem (Pmode, tmp1);
9792 lab = gen_label_rtx ();
9793 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
9794 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
9795 if (TARGET_LINK_STACK)
9796 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
9797 emit_move_insn (tmp2, mem);
9798 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
9799 set_unique_reg_note (last, REG_EQUAL, gsym);
9804 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
9806 tga = rs6000_tls_get_addr ();
9807 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
9808 const0_rtx, Pmode);
9810 r3 = gen_rtx_REG (Pmode, 3);
9811 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9813 if (TARGET_64BIT)
9814 insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
9815 else
9816 insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
9818 else if (DEFAULT_ABI == ABI_V4)
9819 insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
9820 else
9821 gcc_unreachable ();
9822 call_insn = last_call_insn ();
9823 PATTERN (call_insn) = insn;
9824 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
9825 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
9826 pic_offset_table_rtx);
9828 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
9830 tga = rs6000_tls_get_addr ();
9831 tmp1 = gen_reg_rtx (Pmode);
9832 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
9833 const0_rtx, Pmode);
9835 r3 = gen_rtx_REG (Pmode, 3);
9836 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9838 if (TARGET_64BIT)
9839 insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
9840 else
9841 insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
9843 else if (DEFAULT_ABI == ABI_V4)
9844 insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
9845 else
9846 gcc_unreachable ();
9847 call_insn = last_call_insn ();
9848 PATTERN (call_insn) = insn;
9849 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
9850 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
9851 pic_offset_table_rtx);
9853 if (rs6000_tls_size == 16)
9855 if (TARGET_64BIT)
9856 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
9857 else
9858 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
9860 else if (rs6000_tls_size == 32)
9862 tmp2 = gen_reg_rtx (Pmode);
9863 if (TARGET_64BIT)
9864 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
9865 else
9866 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
9867 emit_insn (insn);
9868 if (TARGET_64BIT)
9869 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
9870 else
9871 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
9873 else
9875 tmp2 = gen_reg_rtx (Pmode);
9876 if (TARGET_64BIT)
9877 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
9878 else
9879 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
9880 emit_insn (insn);
9881 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
9883 emit_insn (insn);
9885 else
9887 /* IE, or 64-bit offset LE. */
9888 tmp2 = gen_reg_rtx (Pmode);
9889 if (TARGET_64BIT)
9890 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
9891 else
9892 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
9893 emit_insn (insn);
9894 if (TARGET_64BIT)
9895 insn = gen_tls_tls_64 (dest, tmp2, addr);
9896 else
9897 insn = gen_tls_tls_32 (dest, tmp2, addr);
9898 emit_insn (insn);
9902 return dest;
9905 /* Only create the global variable for the stack protect guard if we are using
9906 the global flavor of that guard. */
9907 static tree
9908 rs6000_init_stack_protect_guard (void)
9910 if (rs6000_stack_protector_guard == SSP_GLOBAL)
9911 return default_stack_protect_guard ();
9913 return NULL_TREE;
9916 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9918 static bool
9919 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9921 if (GET_CODE (x) == HIGH
9922 && GET_CODE (XEXP (x, 0)) == UNSPEC)
9923 return true;
9925 /* A TLS symbol in the TOC cannot contain a sum. */
9926 if (GET_CODE (x) == CONST
9927 && GET_CODE (XEXP (x, 0)) == PLUS
9928 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9929 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
9930 return true;
9932 /* Do not place an ELF TLS symbol in the constant pool. */
9933 return TARGET_ELF && tls_referenced_p (x);
9936 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9937 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9938 can be addressed relative to the toc pointer. */
9940 static bool
9941 use_toc_relative_ref (rtx sym, machine_mode mode)
9943 return ((constant_pool_expr_p (sym)
9944 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
9945 get_pool_mode (sym)))
9946 || (TARGET_CMODEL == CMODEL_MEDIUM
9947 && SYMBOL_REF_LOCAL_P (sym)
9948 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
9951 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
9952 replace the input X, or the original X if no replacement is called for.
9953 The output parameter *WIN is 1 if the calling macro should goto WIN,
9954 0 if it should not.
9956 For RS/6000, we wish to handle large displacements off a base
9957 register by splitting the addend across an addiu/addis and the mem insn.
9958 This cuts number of extra insns needed from 3 to 1.
9960 On Darwin, we use this to generate code for floating point constants.
9961 A movsf_low is generated so we wind up with 2 instructions rather than 3.
9962 The Darwin code is inside #if TARGET_MACHO because only then are the
9963 machopic_* functions defined. */
9964 static rtx
9965 rs6000_legitimize_reload_address (rtx x, machine_mode mode,
9966 int opnum, int type,
9967 int ind_levels ATTRIBUTE_UNUSED, int *win)
9969 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9970 bool quad_offset_p = mode_supports_vsx_dform_quad (mode);
9972 /* Nasty hack for vsx_splat_v2df/v2di load from mem, which takes a
9973 DFmode/DImode MEM. Ditto for ISA 3.0 vsx_splat_v4sf/v4si. */
9974 if (reg_offset_p
9975 && opnum == 1
9976 && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
9977 || (mode == DImode && recog_data.operand_mode[0] == V2DImode)
9978 || (mode == SFmode && recog_data.operand_mode[0] == V4SFmode
9979 && TARGET_P9_VECTOR)
9980 || (mode == SImode && recog_data.operand_mode[0] == V4SImode
9981 && TARGET_P9_VECTOR)))
9982 reg_offset_p = false;
9984 /* We must recognize output that we have already generated ourselves. */
9985 if (GET_CODE (x) == PLUS
9986 && GET_CODE (XEXP (x, 0)) == PLUS
9987 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
9988 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
9989 && GET_CODE (XEXP (x, 1)) == CONST_INT)
9991 if (TARGET_DEBUG_ADDR)
9993 fprintf (stderr, "\nlegitimize_reload_address push_reload #1:\n");
9994 debug_rtx (x);
9996 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9997 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
9998 opnum, (enum reload_type) type);
9999 *win = 1;
10000 return x;
10003 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
10004 if (GET_CODE (x) == LO_SUM
10005 && GET_CODE (XEXP (x, 0)) == HIGH)
10007 if (TARGET_DEBUG_ADDR)
10009 fprintf (stderr, "\nlegitimize_reload_address push_reload #2:\n");
10010 debug_rtx (x);
10012 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
10013 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10014 opnum, (enum reload_type) type);
10015 *win = 1;
10016 return x;
10019 #if TARGET_MACHO
10020 if (DEFAULT_ABI == ABI_DARWIN && flag_pic
10021 && GET_CODE (x) == LO_SUM
10022 && GET_CODE (XEXP (x, 0)) == PLUS
10023 && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx
10024 && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH
10025 && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1)
10026 && machopic_operand_p (XEXP (x, 1)))
10028 /* Result of previous invocation of this function on Darwin
10029 floating point constant. */
10030 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
10031 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10032 opnum, (enum reload_type) type);
10033 *win = 1;
10034 return x;
10036 #endif
10038 if (TARGET_CMODEL != CMODEL_SMALL
10039 && reg_offset_p
10040 && !quad_offset_p
10041 && small_toc_ref (x, VOIDmode))
10043 rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x));
10044 x = gen_rtx_LO_SUM (Pmode, hi, x);
10045 if (TARGET_DEBUG_ADDR)
10047 fprintf (stderr, "\nlegitimize_reload_address push_reload #3:\n");
10048 debug_rtx (x);
10050 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
10051 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10052 opnum, (enum reload_type) type);
10053 *win = 1;
10054 return x;
10057 if (GET_CODE (x) == PLUS
10058 && REG_P (XEXP (x, 0))
10059 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
10060 && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
10061 && CONST_INT_P (XEXP (x, 1))
10062 && reg_offset_p
10063 && !SPE_VECTOR_MODE (mode)
10064 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
10065 && (quad_offset_p || !VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
10067 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
10068 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
10069 HOST_WIDE_INT high
10070 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
10072 /* Check for 32-bit overflow or quad addresses with one of the
10073 four least significant bits set. */
10074 if (high + low != val
10075 || (quad_offset_p && (low & 0xf)))
10077 *win = 0;
10078 return x;
10081 /* Reload the high part into a base reg; leave the low part
10082 in the mem directly. */
10084 x = gen_rtx_PLUS (GET_MODE (x),
10085 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
10086 GEN_INT (high)),
10087 GEN_INT (low));
10089 if (TARGET_DEBUG_ADDR)
10091 fprintf (stderr, "\nlegitimize_reload_address push_reload #4:\n");
10092 debug_rtx (x);
10094 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
10095 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
10096 opnum, (enum reload_type) type);
10097 *win = 1;
10098 return x;
10101 if (GET_CODE (x) == SYMBOL_REF
10102 && reg_offset_p
10103 && !quad_offset_p
10104 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
10105 && !SPE_VECTOR_MODE (mode)
10106 #if TARGET_MACHO
10107 && DEFAULT_ABI == ABI_DARWIN
10108 && (flag_pic || MACHO_DYNAMIC_NO_PIC_P)
10109 && machopic_symbol_defined_p (x)
10110 #else
10111 && DEFAULT_ABI == ABI_V4
10112 && !flag_pic
10113 #endif
10114 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
10115 The same goes for DImode without 64-bit gprs and DFmode and DDmode
10116 without fprs.
10117 ??? Assume floating point reg based on mode? This assumption is
10118 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
10119 where reload ends up doing a DFmode load of a constant from
10120 mem using two gprs. Unfortunately, at this point reload
10121 hasn't yet selected regs so poking around in reload data
10122 won't help and even if we could figure out the regs reliably,
10123 we'd still want to allow this transformation when the mem is
10124 naturally aligned. Since we say the address is good here, we
10125 can't disable offsets from LO_SUMs in mem_operand_gpr.
10126 FIXME: Allow offset from lo_sum for other modes too, when
10127 mem is sufficiently aligned.
10129 Also disallow this if the type can go in VMX/Altivec registers, since
10130 those registers do not have d-form (reg+offset) address modes. */
10131 && !reg_addr[mode].scalar_in_vmx_p
10132 && mode != TFmode
10133 && mode != TDmode
10134 && mode != IFmode
10135 && mode != KFmode
10136 && (mode != TImode || !TARGET_VSX_TIMODE)
10137 && mode != PTImode
10138 && (mode != DImode || TARGET_POWERPC64)
10139 && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
10140 || (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)))
10142 #if TARGET_MACHO
10143 if (flag_pic)
10145 rtx offset = machopic_gen_offset (x);
10146 x = gen_rtx_LO_SUM (GET_MODE (x),
10147 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
10148 gen_rtx_HIGH (Pmode, offset)), offset);
10150 else
10151 #endif
10152 x = gen_rtx_LO_SUM (GET_MODE (x),
10153 gen_rtx_HIGH (Pmode, x), x);
10155 if (TARGET_DEBUG_ADDR)
10157 fprintf (stderr, "\nlegitimize_reload_address push_reload #5:\n");
10158 debug_rtx (x);
10160 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
10161 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10162 opnum, (enum reload_type) type);
10163 *win = 1;
10164 return x;
10167 /* Reload an offset address wrapped by an AND that represents the
10168 masking of the lower bits. Strip the outer AND and let reload
10169 convert the offset address into an indirect address. For VSX,
10170 force reload to create the address with an AND in a separate
10171 register, because we can't guarantee an altivec register will
10172 be used. */
10173 if (VECTOR_MEM_ALTIVEC_P (mode)
10174 && GET_CODE (x) == AND
10175 && GET_CODE (XEXP (x, 0)) == PLUS
10176 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
10177 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
10178 && GET_CODE (XEXP (x, 1)) == CONST_INT
10179 && INTVAL (XEXP (x, 1)) == -16)
10181 x = XEXP (x, 0);
10182 *win = 1;
10183 return x;
10186 if (TARGET_TOC
10187 && reg_offset_p
10188 && !quad_offset_p
10189 && GET_CODE (x) == SYMBOL_REF
10190 && use_toc_relative_ref (x, mode))
10192 x = create_TOC_reference (x, NULL_RTX);
10193 if (TARGET_CMODEL != CMODEL_SMALL)
10195 if (TARGET_DEBUG_ADDR)
10197 fprintf (stderr, "\nlegitimize_reload_address push_reload #6:\n");
10198 debug_rtx (x);
10200 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
10201 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10202 opnum, (enum reload_type) type);
10204 *win = 1;
10205 return x;
10207 *win = 0;
10208 return x;
10211 /* Debug version of rs6000_legitimize_reload_address. */
10212 static rtx
10213 rs6000_debug_legitimize_reload_address (rtx x, machine_mode mode,
10214 int opnum, int type,
10215 int ind_levels, int *win)
10217 rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
10218 ind_levels, win);
10219 fprintf (stderr,
10220 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
10221 "type = %d, ind_levels = %d, win = %d, original addr:\n",
10222 GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
10223 debug_rtx (x);
10225 if (x == ret)
10226 fprintf (stderr, "Same address returned\n");
10227 else if (!ret)
10228 fprintf (stderr, "NULL returned\n");
10229 else
10231 fprintf (stderr, "New address:\n");
10232 debug_rtx (ret);
10235 return ret;
10238 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
10239 that is a valid memory address for an instruction.
10240 The MODE argument is the machine mode for the MEM expression
10241 that wants to use this address.
10243 On the RS/6000, there are four valid address: a SYMBOL_REF that
10244 refers to a constant pool entry of an address (or the sum of it
10245 plus a constant), a short (16-bit signed) constant plus a register,
10246 the sum of two registers, or a register indirect, possibly with an
10247 auto-increment. For DFmode, DDmode and DImode with a constant plus
10248 register, we must ensure that both words are addressable or PowerPC64
10249 with offset word aligned.
10251 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
10252 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
10253 because adjacent memory cells are accessed by adding word-sized offsets
10254 during assembly output. */
10255 static bool
10256 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
10258 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
10259 bool quad_offset_p = mode_supports_vsx_dform_quad (mode);
10261 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
10262 if (VECTOR_MEM_ALTIVEC_P (mode)
10263 && GET_CODE (x) == AND
10264 && GET_CODE (XEXP (x, 1)) == CONST_INT
10265 && INTVAL (XEXP (x, 1)) == -16)
10266 x = XEXP (x, 0);
10268 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
10269 return 0;
10270 if (legitimate_indirect_address_p (x, reg_ok_strict))
10271 return 1;
10272 if (TARGET_UPDATE
10273 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
10274 && mode_supports_pre_incdec_p (mode)
10275 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
10276 return 1;
10277 /* Handle restricted vector d-form offsets in ISA 3.0. */
10278 if (quad_offset_p)
10280 if (quad_address_p (x, mode, reg_ok_strict))
10281 return 1;
10283 else if (virtual_stack_registers_memory_p (x))
10284 return 1;
10286 else if (reg_offset_p)
10288 if (legitimate_small_data_p (mode, x))
10289 return 1;
10290 if (legitimate_constant_pool_address_p (x, mode,
10291 reg_ok_strict || lra_in_progress))
10292 return 1;
10293 if (reg_addr[mode].fused_toc && GET_CODE (x) == UNSPEC
10294 && XINT (x, 1) == UNSPEC_FUSION_ADDIS)
10295 return 1;
10298 /* For TImode, if we have TImode in VSX registers, only allow register
10299 indirect addresses. This will allow the values to go in either GPRs
10300 or VSX registers without reloading. The vector types would tend to
10301 go into VSX registers, so we allow REG+REG, while TImode seems
10302 somewhat split, in that some uses are GPR based, and some VSX based. */
10303 /* FIXME: We could loosen this by changing the following to
10304 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
10305 but currently we cannot allow REG+REG addressing for TImode. See
10306 PR72827 for complete details on how this ends up hoodwinking DSE. */
10307 if (mode == TImode && TARGET_VSX_TIMODE)
10308 return 0;
10309 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
10310 if (! reg_ok_strict
10311 && reg_offset_p
10312 && GET_CODE (x) == PLUS
10313 && GET_CODE (XEXP (x, 0)) == REG
10314 && (XEXP (x, 0) == virtual_stack_vars_rtx
10315 || XEXP (x, 0) == arg_pointer_rtx)
10316 && GET_CODE (XEXP (x, 1)) == CONST_INT)
10317 return 1;
10318 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
10319 return 1;
10320 if (!FLOAT128_2REG_P (mode)
10321 && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
10322 || TARGET_POWERPC64
10323 || (mode != DFmode && mode != DDmode)
10324 || (TARGET_E500_DOUBLE && mode != DDmode))
10325 && (TARGET_POWERPC64 || mode != DImode)
10326 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
10327 && mode != PTImode
10328 && !avoiding_indexed_address_p (mode)
10329 && legitimate_indexed_address_p (x, reg_ok_strict))
10330 return 1;
10331 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
10332 && mode_supports_pre_modify_p (mode)
10333 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
10334 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
10335 reg_ok_strict, false)
10336 || (!avoiding_indexed_address_p (mode)
10337 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
10338 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
10339 return 1;
10340 if (reg_offset_p && !quad_offset_p
10341 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
10342 return 1;
10343 return 0;
10346 /* Debug version of rs6000_legitimate_address_p. */
10347 static bool
10348 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
10349 bool reg_ok_strict)
10351 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
10352 fprintf (stderr,
10353 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
10354 "strict = %d, reload = %s, code = %s\n",
10355 ret ? "true" : "false",
10356 GET_MODE_NAME (mode),
10357 reg_ok_strict,
10358 (reload_completed
10359 ? "after"
10360 : (reload_in_progress ? "progress" : "before")),
10361 GET_RTX_NAME (GET_CODE (x)));
10362 debug_rtx (x);
10364 return ret;
10367 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
10369 static bool
10370 rs6000_mode_dependent_address_p (const_rtx addr,
10371 addr_space_t as ATTRIBUTE_UNUSED)
10373 return rs6000_mode_dependent_address_ptr (addr);
10376 /* Go to LABEL if ADDR (a legitimate address expression)
10377 has an effect that depends on the machine mode it is used for.
10379 On the RS/6000 this is true of all integral offsets (since AltiVec
10380 and VSX modes don't allow them) or is a pre-increment or decrement.
10382 ??? Except that due to conceptual problems in offsettable_address_p
10383 we can't really report the problems of integral offsets. So leave
10384 this assuming that the adjustable offset must be valid for the
10385 sub-words of a TFmode operand, which is what we had before. */
10387 static bool
10388 rs6000_mode_dependent_address (const_rtx addr)
10390 switch (GET_CODE (addr))
10392 case PLUS:
10393 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
10394 is considered a legitimate address before reload, so there
10395 are no offset restrictions in that case. Note that this
10396 condition is safe in strict mode because any address involving
10397 virtual_stack_vars_rtx or arg_pointer_rtx would already have
10398 been rejected as illegitimate. */
10399 if (XEXP (addr, 0) != virtual_stack_vars_rtx
10400 && XEXP (addr, 0) != arg_pointer_rtx
10401 && GET_CODE (XEXP (addr, 1)) == CONST_INT)
10403 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
10404 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
10406 break;
10408 case LO_SUM:
10409 /* Anything in the constant pool is sufficiently aligned that
10410 all bytes have the same high part address. */
10411 return !legitimate_constant_pool_address_p (addr, QImode, false);
10413 /* Auto-increment cases are now treated generically in recog.c. */
10414 case PRE_MODIFY:
10415 return TARGET_UPDATE;
10417 /* AND is only allowed in Altivec loads. */
10418 case AND:
10419 return true;
10421 default:
10422 break;
10425 return false;
10428 /* Debug version of rs6000_mode_dependent_address. */
10429 static bool
10430 rs6000_debug_mode_dependent_address (const_rtx addr)
10432 bool ret = rs6000_mode_dependent_address (addr);
10434 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
10435 ret ? "true" : "false");
10436 debug_rtx (addr);
10438 return ret;
10441 /* Implement FIND_BASE_TERM. */
10444 rs6000_find_base_term (rtx op)
10446 rtx base;
10448 base = op;
10449 if (GET_CODE (base) == CONST)
10450 base = XEXP (base, 0);
10451 if (GET_CODE (base) == PLUS)
10452 base = XEXP (base, 0);
10453 if (GET_CODE (base) == UNSPEC)
10454 switch (XINT (base, 1))
10456 case UNSPEC_TOCREL:
10457 case UNSPEC_MACHOPIC_OFFSET:
10458 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
10459 for aliasing purposes. */
10460 return XVECEXP (base, 0, 0);
10463 return op;
10466 /* More elaborate version of recog's offsettable_memref_p predicate
10467 that works around the ??? note of rs6000_mode_dependent_address.
10468 In particular it accepts
10470 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
10472 in 32-bit mode, that the recog predicate rejects. */
10474 static bool
10475 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode)
10477 bool worst_case;
10479 if (!MEM_P (op))
10480 return false;
10482 /* First mimic offsettable_memref_p. */
10483 if (offsettable_address_p (true, GET_MODE (op), XEXP (op, 0)))
10484 return true;
10486 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
10487 the latter predicate knows nothing about the mode of the memory
10488 reference and, therefore, assumes that it is the largest supported
10489 mode (TFmode). As a consequence, legitimate offsettable memory
10490 references are rejected. rs6000_legitimate_offset_address_p contains
10491 the correct logic for the PLUS case of rs6000_mode_dependent_address,
10492 at least with a little bit of help here given that we know the
10493 actual registers used. */
10494 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
10495 || GET_MODE_SIZE (reg_mode) == 4);
10496 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
10497 true, worst_case);
10500 /* Determine the reassociation width to be used in reassociate_bb.
10501 This takes into account how many parallel operations we
10502 can actually do of a given type, and also the latency.
10504 int add/sub 6/cycle
10505 mul 2/cycle
10506 vect add/sub/mul 2/cycle
10507 fp add/sub/mul 2/cycle
10508 dfp 1/cycle
10511 static int
10512 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
10513 machine_mode mode)
10515 switch (rs6000_cpu)
10517 case PROCESSOR_POWER8:
10518 case PROCESSOR_POWER9:
10519 if (DECIMAL_FLOAT_MODE_P (mode))
10520 return 1;
10521 if (VECTOR_MODE_P (mode))
10522 return 4;
10523 if (INTEGRAL_MODE_P (mode))
10524 return opc == MULT_EXPR ? 4 : 6;
10525 if (FLOAT_MODE_P (mode))
10526 return 4;
10527 break;
10528 default:
10529 break;
10531 return 1;
10534 /* Change register usage conditional on target flags. */
10535 static void
10536 rs6000_conditional_register_usage (void)
10538 int i;
10540 if (TARGET_DEBUG_TARGET)
10541 fprintf (stderr, "rs6000_conditional_register_usage called\n");
10543 /* Set MQ register fixed (already call_used) so that it will not be
10544 allocated. */
10545 fixed_regs[64] = 1;
10547 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
10548 if (TARGET_64BIT)
10549 fixed_regs[13] = call_used_regs[13]
10550 = call_really_used_regs[13] = 1;
10552 /* Conditionally disable FPRs. */
10553 if (TARGET_SOFT_FLOAT || !TARGET_FPRS)
10554 for (i = 32; i < 64; i++)
10555 fixed_regs[i] = call_used_regs[i]
10556 = call_really_used_regs[i] = 1;
10558 /* The TOC register is not killed across calls in a way that is
10559 visible to the compiler. */
10560 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10561 call_really_used_regs[2] = 0;
10563 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
10564 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10566 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
10567 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10568 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10569 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10571 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
10572 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10573 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10574 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10576 if (TARGET_TOC && TARGET_MINIMAL_TOC)
10577 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10578 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10580 if (TARGET_SPE)
10582 global_regs[SPEFSCR_REGNO] = 1;
10583 /* We used to use r14 as FIXED_SCRATCH to address SPE 64-bit
10584 registers in prologues and epilogues. We no longer use r14
10585 for FIXED_SCRATCH, but we're keeping r14 out of the allocation
10586 pool for link-compatibility with older versions of GCC. Once
10587 "old" code has died out, we can return r14 to the allocation
10588 pool. */
10589 fixed_regs[14]
10590 = call_used_regs[14]
10591 = call_really_used_regs[14] = 1;
10594 if (!TARGET_ALTIVEC && !TARGET_VSX)
10596 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
10597 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
10598 call_really_used_regs[VRSAVE_REGNO] = 1;
10601 if (TARGET_ALTIVEC || TARGET_VSX)
10602 global_regs[VSCR_REGNO] = 1;
10604 if (TARGET_ALTIVEC_ABI)
10606 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
10607 call_used_regs[i] = call_really_used_regs[i] = 1;
10609 /* AIX reserves VR20:31 in non-extended ABI mode. */
10610 if (TARGET_XCOFF)
10611 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
10612 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
10617 /* Output insns to set DEST equal to the constant SOURCE as a series of
10618 lis, ori and shl instructions and return TRUE. */
10620 bool
10621 rs6000_emit_set_const (rtx dest, rtx source)
10623 machine_mode mode = GET_MODE (dest);
10624 rtx temp, set;
10625 rtx_insn *insn;
10626 HOST_WIDE_INT c;
10628 gcc_checking_assert (CONST_INT_P (source));
10629 c = INTVAL (source);
10630 switch (mode)
10632 case E_QImode:
10633 case E_HImode:
10634 emit_insn (gen_rtx_SET (dest, source));
10635 return true;
10637 case E_SImode:
10638 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
10640 emit_insn (gen_rtx_SET (copy_rtx (temp),
10641 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
10642 emit_insn (gen_rtx_SET (dest,
10643 gen_rtx_IOR (SImode, copy_rtx (temp),
10644 GEN_INT (c & 0xffff))));
10645 break;
10647 case E_DImode:
10648 if (!TARGET_POWERPC64)
10650 rtx hi, lo;
10652 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
10653 DImode);
10654 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
10655 DImode);
10656 emit_move_insn (hi, GEN_INT (c >> 32));
10657 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
10658 emit_move_insn (lo, GEN_INT (c));
10660 else
10661 rs6000_emit_set_long_const (dest, c);
10662 break;
10664 default:
10665 gcc_unreachable ();
10668 insn = get_last_insn ();
10669 set = single_set (insn);
10670 if (! CONSTANT_P (SET_SRC (set)))
10671 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
10673 return true;
10676 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
10677 Output insns to set DEST equal to the constant C as a series of
10678 lis, ori and shl instructions. */
10680 static void
10681 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
10683 rtx temp;
10684 HOST_WIDE_INT ud1, ud2, ud3, ud4;
10686 ud1 = c & 0xffff;
10687 c = c >> 16;
10688 ud2 = c & 0xffff;
10689 c = c >> 16;
10690 ud3 = c & 0xffff;
10691 c = c >> 16;
10692 ud4 = c & 0xffff;
10694 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
10695 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
10696 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
10698 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
10699 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
10701 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10703 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10704 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
10705 if (ud1 != 0)
10706 emit_move_insn (dest,
10707 gen_rtx_IOR (DImode, copy_rtx (temp),
10708 GEN_INT (ud1)));
10710 else if (ud3 == 0 && ud4 == 0)
10712 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10714 gcc_assert (ud2 & 0x8000);
10715 emit_move_insn (copy_rtx (temp),
10716 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
10717 if (ud1 != 0)
10718 emit_move_insn (copy_rtx (temp),
10719 gen_rtx_IOR (DImode, copy_rtx (temp),
10720 GEN_INT (ud1)));
10721 emit_move_insn (dest,
10722 gen_rtx_ZERO_EXTEND (DImode,
10723 gen_lowpart (SImode,
10724 copy_rtx (temp))));
10726 else if ((ud4 == 0xffff && (ud3 & 0x8000))
10727 || (ud4 == 0 && ! (ud3 & 0x8000)))
10729 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10731 emit_move_insn (copy_rtx (temp),
10732 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
10733 if (ud2 != 0)
10734 emit_move_insn (copy_rtx (temp),
10735 gen_rtx_IOR (DImode, copy_rtx (temp),
10736 GEN_INT (ud2)));
10737 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10738 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10739 GEN_INT (16)));
10740 if (ud1 != 0)
10741 emit_move_insn (dest,
10742 gen_rtx_IOR (DImode, copy_rtx (temp),
10743 GEN_INT (ud1)));
10745 else
10747 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10749 emit_move_insn (copy_rtx (temp),
10750 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
10751 if (ud3 != 0)
10752 emit_move_insn (copy_rtx (temp),
10753 gen_rtx_IOR (DImode, copy_rtx (temp),
10754 GEN_INT (ud3)));
10756 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
10757 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10758 GEN_INT (32)));
10759 if (ud2 != 0)
10760 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10761 gen_rtx_IOR (DImode, copy_rtx (temp),
10762 GEN_INT (ud2 << 16)));
10763 if (ud1 != 0)
10764 emit_move_insn (dest,
10765 gen_rtx_IOR (DImode, copy_rtx (temp),
10766 GEN_INT (ud1)));
10770 /* Helper for the following. Get rid of [r+r] memory refs
10771 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
10773 static void
10774 rs6000_eliminate_indexed_memrefs (rtx operands[2])
10776 if (reload_in_progress)
10777 return;
10779 if (GET_CODE (operands[0]) == MEM
10780 && GET_CODE (XEXP (operands[0], 0)) != REG
10781 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
10782 GET_MODE (operands[0]), false))
10783 operands[0]
10784 = replace_equiv_address (operands[0],
10785 copy_addr_to_reg (XEXP (operands[0], 0)));
10787 if (GET_CODE (operands[1]) == MEM
10788 && GET_CODE (XEXP (operands[1], 0)) != REG
10789 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
10790 GET_MODE (operands[1]), false))
10791 operands[1]
10792 = replace_equiv_address (operands[1],
10793 copy_addr_to_reg (XEXP (operands[1], 0)));
10796 /* Generate a vector of constants to permute MODE for a little-endian
10797 storage operation by swapping the two halves of a vector. */
10798 static rtvec
10799 rs6000_const_vec (machine_mode mode)
10801 int i, subparts;
10802 rtvec v;
10804 switch (mode)
10806 case E_V1TImode:
10807 subparts = 1;
10808 break;
10809 case E_V2DFmode:
10810 case E_V2DImode:
10811 subparts = 2;
10812 break;
10813 case E_V4SFmode:
10814 case E_V4SImode:
10815 subparts = 4;
10816 break;
10817 case E_V8HImode:
10818 subparts = 8;
10819 break;
10820 case E_V16QImode:
10821 subparts = 16;
10822 break;
10823 default:
10824 gcc_unreachable();
10827 v = rtvec_alloc (subparts);
10829 for (i = 0; i < subparts / 2; ++i)
10830 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
10831 for (i = subparts / 2; i < subparts; ++i)
10832 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
10834 return v;
10837 /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
10838 for a VSX load or store operation. */
10840 rs6000_gen_le_vsx_permute (rtx source, machine_mode mode)
10842 /* Use ROTATE instead of VEC_SELECT on IEEE 128-bit floating point, and
10843 128-bit integers if they are allowed in VSX registers. */
10844 if (FLOAT128_VECTOR_P (mode) || mode == TImode || mode == V1TImode)
10845 return gen_rtx_ROTATE (mode, source, GEN_INT (64));
10846 else
10848 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
10849 return gen_rtx_VEC_SELECT (mode, source, par);
10853 /* Emit a little-endian load from vector memory location SOURCE to VSX
10854 register DEST in mode MODE. The load is done with two permuting
10855 insn's that represent an lxvd2x and xxpermdi. */
10856 void
10857 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
10859 rtx tmp, permute_mem, permute_reg;
10861 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10862 V1TImode). */
10863 if (mode == TImode || mode == V1TImode)
10865 mode = V2DImode;
10866 dest = gen_lowpart (V2DImode, dest);
10867 source = adjust_address (source, V2DImode, 0);
10870 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
10871 permute_mem = rs6000_gen_le_vsx_permute (source, mode);
10872 permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
10873 emit_insn (gen_rtx_SET (tmp, permute_mem));
10874 emit_insn (gen_rtx_SET (dest, permute_reg));
10877 /* Emit a little-endian store to vector memory location DEST from VSX
10878 register SOURCE in mode MODE. The store is done with two permuting
10879 insn's that represent an xxpermdi and an stxvd2x. */
10880 void
10881 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
10883 rtx tmp, permute_src, permute_tmp;
10885 /* This should never be called during or after reload, because it does
10886 not re-permute the source register. It is intended only for use
10887 during expand. */
10888 gcc_assert (!reload_in_progress && !lra_in_progress && !reload_completed);
10890 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10891 V1TImode). */
10892 if (mode == TImode || mode == V1TImode)
10894 mode = V2DImode;
10895 dest = adjust_address (dest, V2DImode, 0);
10896 source = gen_lowpart (V2DImode, source);
10899 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
10900 permute_src = rs6000_gen_le_vsx_permute (source, mode);
10901 permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
10902 emit_insn (gen_rtx_SET (tmp, permute_src));
10903 emit_insn (gen_rtx_SET (dest, permute_tmp));
10906 /* Emit a sequence representing a little-endian VSX load or store,
10907 moving data from SOURCE to DEST in mode MODE. This is done
10908 separately from rs6000_emit_move to ensure it is called only
10909 during expand. LE VSX loads and stores introduced later are
10910 handled with a split. The expand-time RTL generation allows
10911 us to optimize away redundant pairs of register-permutes. */
10912 void
10913 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
10915 gcc_assert (!BYTES_BIG_ENDIAN
10916 && VECTOR_MEM_VSX_P (mode)
10917 && !TARGET_P9_VECTOR
10918 && !gpr_or_gpr_p (dest, source)
10919 && (MEM_P (source) ^ MEM_P (dest)));
10921 if (MEM_P (source))
10923 gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG);
10924 rs6000_emit_le_vsx_load (dest, source, mode);
10926 else
10928 if (!REG_P (source))
10929 source = force_reg (mode, source);
10930 rs6000_emit_le_vsx_store (dest, source, mode);
10934 /* Return whether a SFmode or SImode move can be done without converting one
10935 mode to another. This arrises when we have:
10937 (SUBREG:SF (REG:SI ...))
10938 (SUBREG:SI (REG:SF ...))
10940 and one of the values is in a floating point/vector register, where SFmode
10941 scalars are stored in DFmode format. */
10943 bool
10944 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
10946 if (TARGET_ALLOW_SF_SUBREG)
10947 return true;
10949 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
10950 return true;
10952 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
10953 return true;
10955 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
10956 if (SUBREG_P (dest))
10958 rtx dest_subreg = SUBREG_REG (dest);
10959 rtx src_subreg = SUBREG_REG (src);
10960 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
10963 return false;
10967 /* Helper function to change moves with:
10969 (SUBREG:SF (REG:SI)) and
10970 (SUBREG:SI (REG:SF))
10972 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
10973 values are stored as DFmode values in the VSX registers. We need to convert
10974 the bits before we can use a direct move or operate on the bits in the
10975 vector register as an integer type.
10977 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
10979 static bool
10980 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
10982 if (TARGET_DIRECT_MOVE_64BIT && !reload_in_progress && !reload_completed
10983 && !lra_in_progress
10984 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
10985 && SUBREG_P (source) && sf_subreg_operand (source, mode))
10987 rtx inner_source = SUBREG_REG (source);
10988 machine_mode inner_mode = GET_MODE (inner_source);
10990 if (mode == SImode && inner_mode == SFmode)
10992 emit_insn (gen_movsi_from_sf (dest, inner_source));
10993 return true;
10996 if (mode == SFmode && inner_mode == SImode)
10998 emit_insn (gen_movsf_from_si (dest, inner_source));
10999 return true;
11003 return false;
11006 /* Emit a move from SOURCE to DEST in mode MODE. */
11007 void
11008 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
11010 rtx operands[2];
11011 operands[0] = dest;
11012 operands[1] = source;
11014 if (TARGET_DEBUG_ADDR)
11016 fprintf (stderr,
11017 "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, "
11018 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
11019 GET_MODE_NAME (mode),
11020 reload_in_progress,
11021 reload_completed,
11022 can_create_pseudo_p ());
11023 debug_rtx (dest);
11024 fprintf (stderr, "source:\n");
11025 debug_rtx (source);
11028 /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
11029 if (CONST_WIDE_INT_P (operands[1])
11030 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
11032 /* This should be fixed with the introduction of CONST_WIDE_INT. */
11033 gcc_unreachable ();
11036 /* See if we need to special case SImode/SFmode SUBREG moves. */
11037 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
11038 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
11039 return;
11041 /* Check if GCC is setting up a block move that will end up using FP
11042 registers as temporaries. We must make sure this is acceptable. */
11043 if (GET_CODE (operands[0]) == MEM
11044 && GET_CODE (operands[1]) == MEM
11045 && mode == DImode
11046 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
11047 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
11048 && ! (rs6000_slow_unaligned_access (SImode,
11049 (MEM_ALIGN (operands[0]) > 32
11050 ? 32 : MEM_ALIGN (operands[0])))
11051 || rs6000_slow_unaligned_access (SImode,
11052 (MEM_ALIGN (operands[1]) > 32
11053 ? 32 : MEM_ALIGN (operands[1]))))
11054 && ! MEM_VOLATILE_P (operands [0])
11055 && ! MEM_VOLATILE_P (operands [1]))
11057 emit_move_insn (adjust_address (operands[0], SImode, 0),
11058 adjust_address (operands[1], SImode, 0));
11059 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
11060 adjust_address (copy_rtx (operands[1]), SImode, 4));
11061 return;
11064 if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM
11065 && !gpc_reg_operand (operands[1], mode))
11066 operands[1] = force_reg (mode, operands[1]);
11068 /* Recognize the case where operand[1] is a reference to thread-local
11069 data and load its address to a register. */
11070 if (tls_referenced_p (operands[1]))
11072 enum tls_model model;
11073 rtx tmp = operands[1];
11074 rtx addend = NULL;
11076 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
11078 addend = XEXP (XEXP (tmp, 0), 1);
11079 tmp = XEXP (XEXP (tmp, 0), 0);
11082 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
11083 model = SYMBOL_REF_TLS_MODEL (tmp);
11084 gcc_assert (model != 0);
11086 tmp = rs6000_legitimize_tls_address (tmp, model);
11087 if (addend)
11089 tmp = gen_rtx_PLUS (mode, tmp, addend);
11090 tmp = force_operand (tmp, operands[0]);
11092 operands[1] = tmp;
11095 /* Handle the case where reload calls us with an invalid address. */
11096 if (reload_in_progress && mode == Pmode
11097 && (! general_operand (operands[1], mode)
11098 || ! nonimmediate_operand (operands[0], mode)))
11099 goto emit_set;
11101 /* 128-bit constant floating-point values on Darwin should really be loaded
11102 as two parts. However, this premature splitting is a problem when DFmode
11103 values can go into Altivec registers. */
11104 if (FLOAT128_IBM_P (mode) && !reg_addr[DFmode].scalar_in_vmx_p
11105 && GET_CODE (operands[1]) == CONST_DOUBLE)
11107 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
11108 simplify_gen_subreg (DFmode, operands[1], mode, 0),
11109 DFmode);
11110 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
11111 GET_MODE_SIZE (DFmode)),
11112 simplify_gen_subreg (DFmode, operands[1], mode,
11113 GET_MODE_SIZE (DFmode)),
11114 DFmode);
11115 return;
11118 if (reload_in_progress && cfun->machine->sdmode_stack_slot != NULL_RTX)
11119 cfun->machine->sdmode_stack_slot =
11120 eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
11123 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
11124 p1:SD) if p1 is not of floating point class and p0 is spilled as
11125 we can have no analogous movsd_store for this. */
11126 if (lra_in_progress && mode == DDmode
11127 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
11128 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
11129 && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))
11130 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
11132 enum reg_class cl;
11133 int regno = REGNO (SUBREG_REG (operands[1]));
11135 if (regno >= FIRST_PSEUDO_REGISTER)
11137 cl = reg_preferred_class (regno);
11138 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
11140 if (regno >= 0 && ! FP_REGNO_P (regno))
11142 mode = SDmode;
11143 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
11144 operands[1] = SUBREG_REG (operands[1]);
11147 if (lra_in_progress
11148 && mode == SDmode
11149 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
11150 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
11151 && (REG_P (operands[1])
11152 || (GET_CODE (operands[1]) == SUBREG
11153 && REG_P (SUBREG_REG (operands[1])))))
11155 int regno = REGNO (GET_CODE (operands[1]) == SUBREG
11156 ? SUBREG_REG (operands[1]) : operands[1]);
11157 enum reg_class cl;
11159 if (regno >= FIRST_PSEUDO_REGISTER)
11161 cl = reg_preferred_class (regno);
11162 gcc_assert (cl != NO_REGS);
11163 regno = ira_class_hard_regs[cl][0];
11165 if (FP_REGNO_P (regno))
11167 if (GET_MODE (operands[0]) != DDmode)
11168 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
11169 emit_insn (gen_movsd_store (operands[0], operands[1]));
11171 else if (INT_REGNO_P (regno))
11172 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
11173 else
11174 gcc_unreachable();
11175 return;
11177 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
11178 p:DD)) if p0 is not of floating point class and p1 is spilled as
11179 we can have no analogous movsd_load for this. */
11180 if (lra_in_progress && mode == DDmode
11181 && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0]))
11182 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
11183 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
11184 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
11186 enum reg_class cl;
11187 int regno = REGNO (SUBREG_REG (operands[0]));
11189 if (regno >= FIRST_PSEUDO_REGISTER)
11191 cl = reg_preferred_class (regno);
11192 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
11194 if (regno >= 0 && ! FP_REGNO_P (regno))
11196 mode = SDmode;
11197 operands[0] = SUBREG_REG (operands[0]);
11198 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
11201 if (lra_in_progress
11202 && mode == SDmode
11203 && (REG_P (operands[0])
11204 || (GET_CODE (operands[0]) == SUBREG
11205 && REG_P (SUBREG_REG (operands[0]))))
11206 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
11207 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
11209 int regno = REGNO (GET_CODE (operands[0]) == SUBREG
11210 ? SUBREG_REG (operands[0]) : operands[0]);
11211 enum reg_class cl;
11213 if (regno >= FIRST_PSEUDO_REGISTER)
11215 cl = reg_preferred_class (regno);
11216 gcc_assert (cl != NO_REGS);
11217 regno = ira_class_hard_regs[cl][0];
11219 if (FP_REGNO_P (regno))
11221 if (GET_MODE (operands[1]) != DDmode)
11222 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
11223 emit_insn (gen_movsd_load (operands[0], operands[1]));
11225 else if (INT_REGNO_P (regno))
11226 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
11227 else
11228 gcc_unreachable();
11229 return;
11232 if (reload_in_progress
11233 && mode == SDmode
11234 && cfun->machine->sdmode_stack_slot != NULL_RTX
11235 && MEM_P (operands[0])
11236 && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
11237 && REG_P (operands[1]))
11239 if (FP_REGNO_P (REGNO (operands[1])))
11241 rtx mem = adjust_address_nv (operands[0], DDmode, 0);
11242 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
11243 emit_insn (gen_movsd_store (mem, operands[1]));
11245 else if (INT_REGNO_P (REGNO (operands[1])))
11247 rtx mem = operands[0];
11248 if (BYTES_BIG_ENDIAN)
11249 mem = adjust_address_nv (mem, mode, 4);
11250 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
11251 emit_insn (gen_movsd_hardfloat (mem, operands[1]));
11253 else
11254 gcc_unreachable();
11255 return;
11257 if (reload_in_progress
11258 && mode == SDmode
11259 && REG_P (operands[0])
11260 && MEM_P (operands[1])
11261 && cfun->machine->sdmode_stack_slot != NULL_RTX
11262 && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
11264 if (FP_REGNO_P (REGNO (operands[0])))
11266 rtx mem = adjust_address_nv (operands[1], DDmode, 0);
11267 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
11268 emit_insn (gen_movsd_load (operands[0], mem));
11270 else if (INT_REGNO_P (REGNO (operands[0])))
11272 rtx mem = operands[1];
11273 if (BYTES_BIG_ENDIAN)
11274 mem = adjust_address_nv (mem, mode, 4);
11275 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
11276 emit_insn (gen_movsd_hardfloat (operands[0], mem));
11278 else
11279 gcc_unreachable();
11280 return;
11283 /* FIXME: In the long term, this switch statement should go away
11284 and be replaced by a sequence of tests based on things like
11285 mode == Pmode. */
11286 switch (mode)
11288 case E_HImode:
11289 case E_QImode:
11290 if (CONSTANT_P (operands[1])
11291 && GET_CODE (operands[1]) != CONST_INT)
11292 operands[1] = force_const_mem (mode, operands[1]);
11293 break;
11295 case E_TFmode:
11296 case E_TDmode:
11297 case E_IFmode:
11298 case E_KFmode:
11299 if (FLOAT128_2REG_P (mode))
11300 rs6000_eliminate_indexed_memrefs (operands);
11301 /* fall through */
11303 case E_DFmode:
11304 case E_DDmode:
11305 case E_SFmode:
11306 case E_SDmode:
11307 if (CONSTANT_P (operands[1])
11308 && ! easy_fp_constant (operands[1], mode))
11309 operands[1] = force_const_mem (mode, operands[1]);
11310 break;
11312 case E_V16QImode:
11313 case E_V8HImode:
11314 case E_V4SFmode:
11315 case E_V4SImode:
11316 case E_V4HImode:
11317 case E_V2SFmode:
11318 case E_V2SImode:
11319 case E_V1DImode:
11320 case E_V2DFmode:
11321 case E_V2DImode:
11322 case E_V1TImode:
11323 if (CONSTANT_P (operands[1])
11324 && !easy_vector_constant (operands[1], mode))
11325 operands[1] = force_const_mem (mode, operands[1]);
11326 break;
11328 case E_SImode:
11329 case E_DImode:
11330 /* Use default pattern for address of ELF small data */
11331 if (TARGET_ELF
11332 && mode == Pmode
11333 && DEFAULT_ABI == ABI_V4
11334 && (GET_CODE (operands[1]) == SYMBOL_REF
11335 || GET_CODE (operands[1]) == CONST)
11336 && small_data_operand (operands[1], mode))
11338 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11339 return;
11342 if (DEFAULT_ABI == ABI_V4
11343 && mode == Pmode && mode == SImode
11344 && flag_pic == 1 && got_operand (operands[1], mode))
11346 emit_insn (gen_movsi_got (operands[0], operands[1]));
11347 return;
11350 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
11351 && TARGET_NO_TOC
11352 && ! flag_pic
11353 && mode == Pmode
11354 && CONSTANT_P (operands[1])
11355 && GET_CODE (operands[1]) != HIGH
11356 && GET_CODE (operands[1]) != CONST_INT)
11358 rtx target = (!can_create_pseudo_p ()
11359 ? operands[0]
11360 : gen_reg_rtx (mode));
11362 /* If this is a function address on -mcall-aixdesc,
11363 convert it to the address of the descriptor. */
11364 if (DEFAULT_ABI == ABI_AIX
11365 && GET_CODE (operands[1]) == SYMBOL_REF
11366 && XSTR (operands[1], 0)[0] == '.')
11368 const char *name = XSTR (operands[1], 0);
11369 rtx new_ref;
11370 while (*name == '.')
11371 name++;
11372 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
11373 CONSTANT_POOL_ADDRESS_P (new_ref)
11374 = CONSTANT_POOL_ADDRESS_P (operands[1]);
11375 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
11376 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
11377 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
11378 operands[1] = new_ref;
11381 if (DEFAULT_ABI == ABI_DARWIN)
11383 #if TARGET_MACHO
11384 if (MACHO_DYNAMIC_NO_PIC_P)
11386 /* Take care of any required data indirection. */
11387 operands[1] = rs6000_machopic_legitimize_pic_address (
11388 operands[1], mode, operands[0]);
11389 if (operands[0] != operands[1])
11390 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11391 return;
11393 #endif
11394 emit_insn (gen_macho_high (target, operands[1]));
11395 emit_insn (gen_macho_low (operands[0], target, operands[1]));
11396 return;
11399 emit_insn (gen_elf_high (target, operands[1]));
11400 emit_insn (gen_elf_low (operands[0], target, operands[1]));
11401 return;
11404 /* If this is a SYMBOL_REF that refers to a constant pool entry,
11405 and we have put it in the TOC, we just need to make a TOC-relative
11406 reference to it. */
11407 if (TARGET_TOC
11408 && GET_CODE (operands[1]) == SYMBOL_REF
11409 && use_toc_relative_ref (operands[1], mode))
11410 operands[1] = create_TOC_reference (operands[1], operands[0]);
11411 else if (mode == Pmode
11412 && CONSTANT_P (operands[1])
11413 && GET_CODE (operands[1]) != HIGH
11414 && ((GET_CODE (operands[1]) != CONST_INT
11415 && ! easy_fp_constant (operands[1], mode))
11416 || (GET_CODE (operands[1]) == CONST_INT
11417 && (num_insns_constant (operands[1], mode)
11418 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
11419 || (GET_CODE (operands[0]) == REG
11420 && FP_REGNO_P (REGNO (operands[0]))))
11421 && !toc_relative_expr_p (operands[1], false)
11422 && (TARGET_CMODEL == CMODEL_SMALL
11423 || can_create_pseudo_p ()
11424 || (REG_P (operands[0])
11425 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
11428 #if TARGET_MACHO
11429 /* Darwin uses a special PIC legitimizer. */
11430 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
11432 operands[1] =
11433 rs6000_machopic_legitimize_pic_address (operands[1], mode,
11434 operands[0]);
11435 if (operands[0] != operands[1])
11436 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11437 return;
11439 #endif
11441 /* If we are to limit the number of things we put in the TOC and
11442 this is a symbol plus a constant we can add in one insn,
11443 just put the symbol in the TOC and add the constant. Don't do
11444 this if reload is in progress. */
11445 if (GET_CODE (operands[1]) == CONST
11446 && TARGET_NO_SUM_IN_TOC && ! reload_in_progress
11447 && GET_CODE (XEXP (operands[1], 0)) == PLUS
11448 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
11449 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
11450 || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF)
11451 && ! side_effects_p (operands[0]))
11453 rtx sym =
11454 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
11455 rtx other = XEXP (XEXP (operands[1], 0), 1);
11457 sym = force_reg (mode, sym);
11458 emit_insn (gen_add3_insn (operands[0], sym, other));
11459 return;
11462 operands[1] = force_const_mem (mode, operands[1]);
11464 if (TARGET_TOC
11465 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
11466 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
11468 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
11469 operands[0]);
11470 operands[1] = gen_const_mem (mode, tocref);
11471 set_mem_alias_set (operands[1], get_TOC_alias_set ());
11474 break;
11476 case E_TImode:
11477 if (!VECTOR_MEM_VSX_P (TImode))
11478 rs6000_eliminate_indexed_memrefs (operands);
11479 break;
11481 case E_PTImode:
11482 rs6000_eliminate_indexed_memrefs (operands);
11483 break;
11485 default:
11486 fatal_insn ("bad move", gen_rtx_SET (dest, source));
11489 /* Above, we may have called force_const_mem which may have returned
11490 an invalid address. If we can, fix this up; otherwise, reload will
11491 have to deal with it. */
11492 if (GET_CODE (operands[1]) == MEM && ! reload_in_progress)
11493 operands[1] = validize_mem (operands[1]);
11495 emit_set:
11496 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11499 /* Return true if a structure, union or array containing FIELD should be
11500 accessed using `BLKMODE'.
11502 For the SPE, simd types are V2SI, and gcc can be tempted to put the
11503 entire thing in a DI and use subregs to access the internals.
11504 store_bit_field() will force (subreg:DI (reg:V2SI x))'s to the
11505 back-end. Because a single GPR can hold a V2SI, but not a DI, the
11506 best thing to do is set structs to BLKmode and avoid Severe Tire
11507 Damage.
11509 On e500 v2, DF and DI modes suffer from the same anomaly. DF can
11510 fit into 1, whereas DI still needs two. */
11512 static bool
11513 rs6000_member_type_forces_blk (const_tree field, machine_mode mode)
11515 return ((TARGET_SPE && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
11516 || (TARGET_E500_DOUBLE && mode == DFmode));
11519 /* Nonzero if we can use a floating-point register to pass this arg. */
11520 #define USE_FP_FOR_ARG_P(CUM,MODE) \
11521 (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \
11522 && (CUM)->fregno <= FP_ARG_MAX_REG \
11523 && TARGET_HARD_FLOAT && TARGET_FPRS)
11525 /* Nonzero if we can use an AltiVec register to pass this arg. */
11526 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
11527 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
11528 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
11529 && TARGET_ALTIVEC_ABI \
11530 && (NAMED))
11532 /* Walk down the type tree of TYPE counting consecutive base elements.
11533 If *MODEP is VOIDmode, then set it to the first valid floating point
11534 or vector type. If a non-floating point or vector type is found, or
11535 if a floating point or vector type that doesn't match a non-VOIDmode
11536 *MODEP is found, then return -1, otherwise return the count in the
11537 sub-tree. */
11539 static int
11540 rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
11542 machine_mode mode;
11543 HOST_WIDE_INT size;
11545 switch (TREE_CODE (type))
11547 case REAL_TYPE:
11548 mode = TYPE_MODE (type);
11549 if (!SCALAR_FLOAT_MODE_P (mode))
11550 return -1;
11552 if (*modep == VOIDmode)
11553 *modep = mode;
11555 if (*modep == mode)
11556 return 1;
11558 break;
11560 case COMPLEX_TYPE:
11561 mode = TYPE_MODE (TREE_TYPE (type));
11562 if (!SCALAR_FLOAT_MODE_P (mode))
11563 return -1;
11565 if (*modep == VOIDmode)
11566 *modep = mode;
11568 if (*modep == mode)
11569 return 2;
11571 break;
11573 case VECTOR_TYPE:
11574 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
11575 return -1;
11577 /* Use V4SImode as representative of all 128-bit vector types. */
11578 size = int_size_in_bytes (type);
11579 switch (size)
11581 case 16:
11582 mode = V4SImode;
11583 break;
11584 default:
11585 return -1;
11588 if (*modep == VOIDmode)
11589 *modep = mode;
11591 /* Vector modes are considered to be opaque: two vectors are
11592 equivalent for the purposes of being homogeneous aggregates
11593 if they are the same size. */
11594 if (*modep == mode)
11595 return 1;
11597 break;
11599 case ARRAY_TYPE:
11601 int count;
11602 tree index = TYPE_DOMAIN (type);
11604 /* Can't handle incomplete types nor sizes that are not
11605 fixed. */
11606 if (!COMPLETE_TYPE_P (type)
11607 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
11608 return -1;
11610 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
11611 if (count == -1
11612 || !index
11613 || !TYPE_MAX_VALUE (index)
11614 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
11615 || !TYPE_MIN_VALUE (index)
11616 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
11617 || count < 0)
11618 return -1;
11620 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
11621 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
11623 /* There must be no padding. */
11624 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
11625 return -1;
11627 return count;
11630 case RECORD_TYPE:
11632 int count = 0;
11633 int sub_count;
11634 tree field;
11636 /* Can't handle incomplete types nor sizes that are not
11637 fixed. */
11638 if (!COMPLETE_TYPE_P (type)
11639 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
11640 return -1;
11642 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
11644 if (TREE_CODE (field) != FIELD_DECL)
11645 continue;
11647 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
11648 if (sub_count < 0)
11649 return -1;
11650 count += sub_count;
11653 /* There must be no padding. */
11654 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
11655 return -1;
11657 return count;
11660 case UNION_TYPE:
11661 case QUAL_UNION_TYPE:
11663 /* These aren't very interesting except in a degenerate case. */
11664 int count = 0;
11665 int sub_count;
11666 tree field;
11668 /* Can't handle incomplete types nor sizes that are not
11669 fixed. */
11670 if (!COMPLETE_TYPE_P (type)
11671 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
11672 return -1;
11674 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
11676 if (TREE_CODE (field) != FIELD_DECL)
11677 continue;
11679 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
11680 if (sub_count < 0)
11681 return -1;
11682 count = count > sub_count ? count : sub_count;
11685 /* There must be no padding. */
11686 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
11687 return -1;
11689 return count;
11692 default:
11693 break;
11696 return -1;
11699 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
11700 float or vector aggregate that shall be passed in FP/vector registers
11701 according to the ELFv2 ABI, return the homogeneous element mode in
11702 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
11704 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
11706 static bool
11707 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
11708 machine_mode *elt_mode,
11709 int *n_elts)
11711 /* Note that we do not accept complex types at the top level as
11712 homogeneous aggregates; these types are handled via the
11713 targetm.calls.split_complex_arg mechanism. Complex types
11714 can be elements of homogeneous aggregates, however. */
11715 if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type))
11717 machine_mode field_mode = VOIDmode;
11718 int field_count = rs6000_aggregate_candidate (type, &field_mode);
11720 if (field_count > 0)
11722 int n_regs = (SCALAR_FLOAT_MODE_P (field_mode) ?
11723 (GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
11725 /* The ELFv2 ABI allows homogeneous aggregates to occupy
11726 up to AGGR_ARG_NUM_REG registers. */
11727 if (field_count * n_regs <= AGGR_ARG_NUM_REG)
11729 if (elt_mode)
11730 *elt_mode = field_mode;
11731 if (n_elts)
11732 *n_elts = field_count;
11733 return true;
11738 if (elt_mode)
11739 *elt_mode = mode;
11740 if (n_elts)
11741 *n_elts = 1;
11742 return false;
11745 /* Return a nonzero value to say to return the function value in
11746 memory, just as large structures are always returned. TYPE will be
11747 the data type of the value, and FNTYPE will be the type of the
11748 function doing the returning, or @code{NULL} for libcalls.
11750 The AIX ABI for the RS/6000 specifies that all structures are
11751 returned in memory. The Darwin ABI does the same.
11753 For the Darwin 64 Bit ABI, a function result can be returned in
11754 registers or in memory, depending on the size of the return data
11755 type. If it is returned in registers, the value occupies the same
11756 registers as it would if it were the first and only function
11757 argument. Otherwise, the function places its result in memory at
11758 the location pointed to by GPR3.
11760 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
11761 but a draft put them in memory, and GCC used to implement the draft
11762 instead of the final standard. Therefore, aix_struct_return
11763 controls this instead of DEFAULT_ABI; V.4 targets needing backward
11764 compatibility can change DRAFT_V4_STRUCT_RET to override the
11765 default, and -m switches get the final word. See
11766 rs6000_option_override_internal for more details.
11768 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
11769 long double support is enabled. These values are returned in memory.
11771 int_size_in_bytes returns -1 for variable size objects, which go in
11772 memory always. The cast to unsigned makes -1 > 8. */
11774 static bool
11775 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
11777 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
11778 if (TARGET_MACHO
11779 && rs6000_darwin64_abi
11780 && TREE_CODE (type) == RECORD_TYPE
11781 && int_size_in_bytes (type) > 0)
11783 CUMULATIVE_ARGS valcum;
11784 rtx valret;
11786 valcum.words = 0;
11787 valcum.fregno = FP_ARG_MIN_REG;
11788 valcum.vregno = ALTIVEC_ARG_MIN_REG;
11789 /* Do a trial code generation as if this were going to be passed
11790 as an argument; if any part goes in memory, we return NULL. */
11791 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
11792 if (valret)
11793 return false;
11794 /* Otherwise fall through to more conventional ABI rules. */
11797 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
11798 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
11799 NULL, NULL))
11800 return false;
11802 /* The ELFv2 ABI returns aggregates up to 16B in registers */
11803 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
11804 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
11805 return false;
11807 if (AGGREGATE_TYPE_P (type)
11808 && (aix_struct_return
11809 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
11810 return true;
11812 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
11813 modes only exist for GCC vector types if -maltivec. */
11814 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
11815 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
11816 return false;
11818 /* Return synthetic vectors in memory. */
11819 if (TREE_CODE (type) == VECTOR_TYPE
11820 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
11822 static bool warned_for_return_big_vectors = false;
11823 if (!warned_for_return_big_vectors)
11825 warning (OPT_Wpsabi, "GCC vector returned by reference: "
11826 "non-standard ABI extension with no compatibility guarantee");
11827 warned_for_return_big_vectors = true;
11829 return true;
11832 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
11833 && FLOAT128_IEEE_P (TYPE_MODE (type)))
11834 return true;
11836 return false;
11839 /* Specify whether values returned in registers should be at the most
11840 significant end of a register. We want aggregates returned by
11841 value to match the way aggregates are passed to functions. */
11843 static bool
11844 rs6000_return_in_msb (const_tree valtype)
11846 return (DEFAULT_ABI == ABI_ELFv2
11847 && BYTES_BIG_ENDIAN
11848 && AGGREGATE_TYPE_P (valtype)
11849 && rs6000_function_arg_padding (TYPE_MODE (valtype),
11850 valtype) == PAD_UPWARD);
11853 #ifdef HAVE_AS_GNU_ATTRIBUTE
11854 /* Return TRUE if a call to function FNDECL may be one that
11855 potentially affects the function calling ABI of the object file. */
11857 static bool
11858 call_ABI_of_interest (tree fndecl)
11860 if (rs6000_gnu_attr && symtab->state == EXPANSION)
11862 struct cgraph_node *c_node;
11864 /* Libcalls are always interesting. */
11865 if (fndecl == NULL_TREE)
11866 return true;
11868 /* Any call to an external function is interesting. */
11869 if (DECL_EXTERNAL (fndecl))
11870 return true;
11872 /* Interesting functions that we are emitting in this object file. */
11873 c_node = cgraph_node::get (fndecl);
11874 c_node = c_node->ultimate_alias_target ();
11875 return !c_node->only_called_directly_p ();
11877 return false;
11879 #endif
11881 /* Initialize a variable CUM of type CUMULATIVE_ARGS
11882 for a call to a function whose data type is FNTYPE.
11883 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
11885 For incoming args we set the number of arguments in the prototype large
11886 so we never return a PARALLEL. */
11888 void
11889 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
11890 rtx libname ATTRIBUTE_UNUSED, int incoming,
11891 int libcall, int n_named_args,
11892 tree fndecl ATTRIBUTE_UNUSED,
11893 machine_mode return_mode ATTRIBUTE_UNUSED)
11895 static CUMULATIVE_ARGS zero_cumulative;
11897 *cum = zero_cumulative;
11898 cum->words = 0;
11899 cum->fregno = FP_ARG_MIN_REG;
11900 cum->vregno = ALTIVEC_ARG_MIN_REG;
11901 cum->prototype = (fntype && prototype_p (fntype));
11902 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
11903 ? CALL_LIBCALL : CALL_NORMAL);
11904 cum->sysv_gregno = GP_ARG_MIN_REG;
11905 cum->stdarg = stdarg_p (fntype);
11906 cum->libcall = libcall;
11908 cum->nargs_prototype = 0;
11909 if (incoming || cum->prototype)
11910 cum->nargs_prototype = n_named_args;
11912 /* Check for a longcall attribute. */
11913 if ((!fntype && rs6000_default_long_calls)
11914 || (fntype
11915 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
11916 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
11917 cum->call_cookie |= CALL_LONG;
11919 if (TARGET_DEBUG_ARG)
11921 fprintf (stderr, "\ninit_cumulative_args:");
11922 if (fntype)
11924 tree ret_type = TREE_TYPE (fntype);
11925 fprintf (stderr, " ret code = %s,",
11926 get_tree_code_name (TREE_CODE (ret_type)));
11929 if (cum->call_cookie & CALL_LONG)
11930 fprintf (stderr, " longcall,");
11932 fprintf (stderr, " proto = %d, nargs = %d\n",
11933 cum->prototype, cum->nargs_prototype);
11936 #ifdef HAVE_AS_GNU_ATTRIBUTE
11937 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4))
11939 cum->escapes = call_ABI_of_interest (fndecl);
11940 if (cum->escapes)
11942 tree return_type;
11944 if (fntype)
11946 return_type = TREE_TYPE (fntype);
11947 return_mode = TYPE_MODE (return_type);
11949 else
11950 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
11952 if (return_type != NULL)
11954 if (TREE_CODE (return_type) == RECORD_TYPE
11955 && TYPE_TRANSPARENT_AGGR (return_type))
11957 return_type = TREE_TYPE (first_field (return_type));
11958 return_mode = TYPE_MODE (return_type);
11960 if (AGGREGATE_TYPE_P (return_type)
11961 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
11962 <= 8))
11963 rs6000_returns_struct = true;
11965 if (SCALAR_FLOAT_MODE_P (return_mode))
11967 rs6000_passes_float = true;
11968 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
11969 && (FLOAT128_IBM_P (return_mode)
11970 || FLOAT128_IEEE_P (return_mode)
11971 || (return_type != NULL
11972 && (TYPE_MAIN_VARIANT (return_type)
11973 == long_double_type_node))))
11974 rs6000_passes_long_double = true;
11976 if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode)
11977 || SPE_VECTOR_MODE (return_mode))
11978 rs6000_passes_vector = true;
11981 #endif
11983 if (fntype
11984 && !TARGET_ALTIVEC
11985 && TARGET_ALTIVEC_ABI
11986 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
11988 error ("cannot return value in vector register because"
11989 " altivec instructions are disabled, use -maltivec"
11990 " to enable them");
11994 /* The mode the ABI uses for a word. This is not the same as word_mode
11995 for -m32 -mpowerpc64. This is used to implement various target hooks. */
11997 static scalar_int_mode
11998 rs6000_abi_word_mode (void)
12000 return TARGET_32BIT ? SImode : DImode;
12003 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
12004 static char *
12005 rs6000_offload_options (void)
12007 if (TARGET_64BIT)
12008 return xstrdup ("-foffload-abi=lp64");
12009 else
12010 return xstrdup ("-foffload-abi=ilp32");
12013 /* On rs6000, function arguments are promoted, as are function return
12014 values. */
12016 static machine_mode
12017 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
12018 machine_mode mode,
12019 int *punsignedp ATTRIBUTE_UNUSED,
12020 const_tree, int)
12022 PROMOTE_MODE (mode, *punsignedp, type);
12024 return mode;
12027 /* Return true if TYPE must be passed on the stack and not in registers. */
12029 static bool
12030 rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
12032 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
12033 return must_pass_in_stack_var_size (mode, type);
12034 else
12035 return must_pass_in_stack_var_size_or_pad (mode, type);
12038 static inline bool
12039 is_complex_IBM_long_double (machine_mode mode)
12041 return mode == ICmode || (!TARGET_IEEEQUAD && mode == TCmode);
12044 /* Whether ABI_V4 passes MODE args to a function in floating point
12045 registers. */
12047 static bool
12048 abi_v4_pass_in_fpr (machine_mode mode)
12050 if (!TARGET_FPRS || !TARGET_HARD_FLOAT)
12051 return false;
12052 if (TARGET_SINGLE_FLOAT && mode == SFmode)
12053 return true;
12054 if (TARGET_DOUBLE_FLOAT && mode == DFmode)
12055 return true;
12056 /* ABI_V4 passes complex IBM long double in 8 gprs.
12057 Stupid, but we can't change the ABI now. */
12058 if (is_complex_IBM_long_double (mode))
12059 return false;
12060 if (FLOAT128_2REG_P (mode))
12061 return true;
12062 if (DECIMAL_FLOAT_MODE_P (mode))
12063 return true;
12064 return false;
12067 /* Implement TARGET_FUNCTION_ARG_PADDING
12069 For the AIX ABI structs are always stored left shifted in their
12070 argument slot. */
12072 static pad_direction
12073 rs6000_function_arg_padding (machine_mode mode, const_tree type)
12075 #ifndef AGGREGATE_PADDING_FIXED
12076 #define AGGREGATE_PADDING_FIXED 0
12077 #endif
12078 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
12079 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
12080 #endif
12082 if (!AGGREGATE_PADDING_FIXED)
12084 /* GCC used to pass structures of the same size as integer types as
12085 if they were in fact integers, ignoring TARGET_FUNCTION_ARG_PADDING.
12086 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
12087 passed padded downward, except that -mstrict-align further
12088 muddied the water in that multi-component structures of 2 and 4
12089 bytes in size were passed padded upward.
12091 The following arranges for best compatibility with previous
12092 versions of gcc, but removes the -mstrict-align dependency. */
12093 if (BYTES_BIG_ENDIAN)
12095 HOST_WIDE_INT size = 0;
12097 if (mode == BLKmode)
12099 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
12100 size = int_size_in_bytes (type);
12102 else
12103 size = GET_MODE_SIZE (mode);
12105 if (size == 1 || size == 2 || size == 4)
12106 return PAD_DOWNWARD;
12108 return PAD_UPWARD;
12111 if (AGGREGATES_PAD_UPWARD_ALWAYS)
12113 if (type != 0 && AGGREGATE_TYPE_P (type))
12114 return PAD_UPWARD;
12117 /* Fall back to the default. */
12118 return default_function_arg_padding (mode, type);
12121 /* If defined, a C expression that gives the alignment boundary, in bits,
12122 of an argument with the specified mode and type. If it is not defined,
12123 PARM_BOUNDARY is used for all arguments.
12125 V.4 wants long longs and doubles to be double word aligned. Just
12126 testing the mode size is a boneheaded way to do this as it means
12127 that other types such as complex int are also double word aligned.
12128 However, we're stuck with this because changing the ABI might break
12129 existing library interfaces.
12131 Doubleword align SPE vectors.
12132 Quadword align Altivec/VSX vectors.
12133 Quadword align large synthetic vector types. */
12135 static unsigned int
12136 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
12138 machine_mode elt_mode;
12139 int n_elts;
12141 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
12143 if (DEFAULT_ABI == ABI_V4
12144 && (GET_MODE_SIZE (mode) == 8
12145 || (TARGET_HARD_FLOAT
12146 && TARGET_FPRS
12147 && !is_complex_IBM_long_double (mode)
12148 && FLOAT128_2REG_P (mode))))
12149 return 64;
12150 else if (FLOAT128_VECTOR_P (mode))
12151 return 128;
12152 else if (SPE_VECTOR_MODE (mode)
12153 || (type && TREE_CODE (type) == VECTOR_TYPE
12154 && int_size_in_bytes (type) >= 8
12155 && int_size_in_bytes (type) < 16))
12156 return 64;
12157 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
12158 || (type && TREE_CODE (type) == VECTOR_TYPE
12159 && int_size_in_bytes (type) >= 16))
12160 return 128;
12162 /* Aggregate types that need > 8 byte alignment are quadword-aligned
12163 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
12164 -mcompat-align-parm is used. */
12165 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
12166 || DEFAULT_ABI == ABI_ELFv2)
12167 && type && TYPE_ALIGN (type) > 64)
12169 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
12170 or homogeneous float/vector aggregates here. We already handled
12171 vector aggregates above, but still need to check for float here. */
12172 bool aggregate_p = (AGGREGATE_TYPE_P (type)
12173 && !SCALAR_FLOAT_MODE_P (elt_mode));
12175 /* We used to check for BLKmode instead of the above aggregate type
12176 check. Warn when this results in any difference to the ABI. */
12177 if (aggregate_p != (mode == BLKmode))
12179 static bool warned;
12180 if (!warned && warn_psabi)
12182 warned = true;
12183 inform (input_location,
12184 "the ABI of passing aggregates with %d-byte alignment"
12185 " has changed in GCC 5",
12186 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
12190 if (aggregate_p)
12191 return 128;
12194 /* Similar for the Darwin64 ABI. Note that for historical reasons we
12195 implement the "aggregate type" check as a BLKmode check here; this
12196 means certain aggregate types are in fact not aligned. */
12197 if (TARGET_MACHO && rs6000_darwin64_abi
12198 && mode == BLKmode
12199 && type && TYPE_ALIGN (type) > 64)
12200 return 128;
12202 return PARM_BOUNDARY;
12205 /* The offset in words to the start of the parameter save area. */
12207 static unsigned int
12208 rs6000_parm_offset (void)
12210 return (DEFAULT_ABI == ABI_V4 ? 2
12211 : DEFAULT_ABI == ABI_ELFv2 ? 4
12212 : 6);
12215 /* For a function parm of MODE and TYPE, return the starting word in
12216 the parameter area. NWORDS of the parameter area are already used. */
12218 static unsigned int
12219 rs6000_parm_start (machine_mode mode, const_tree type,
12220 unsigned int nwords)
12222 unsigned int align;
12224 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
12225 return nwords + (-(rs6000_parm_offset () + nwords) & align);
12228 /* Compute the size (in words) of a function argument. */
12230 static unsigned long
12231 rs6000_arg_size (machine_mode mode, const_tree type)
12233 unsigned long size;
12235 if (mode != BLKmode)
12236 size = GET_MODE_SIZE (mode);
12237 else
12238 size = int_size_in_bytes (type);
12240 if (TARGET_32BIT)
12241 return (size + 3) >> 2;
12242 else
12243 return (size + 7) >> 3;
12246 /* Use this to flush pending int fields. */
12248 static void
12249 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
12250 HOST_WIDE_INT bitpos, int final)
12252 unsigned int startbit, endbit;
12253 int intregs, intoffset;
12255 /* Handle the situations where a float is taking up the first half
12256 of the GPR, and the other half is empty (typically due to
12257 alignment restrictions). We can detect this by a 8-byte-aligned
12258 int field, or by seeing that this is the final flush for this
12259 argument. Count the word and continue on. */
12260 if (cum->floats_in_gpr == 1
12261 && (cum->intoffset % 64 == 0
12262 || (cum->intoffset == -1 && final)))
12264 cum->words++;
12265 cum->floats_in_gpr = 0;
12268 if (cum->intoffset == -1)
12269 return;
12271 intoffset = cum->intoffset;
12272 cum->intoffset = -1;
12273 cum->floats_in_gpr = 0;
12275 if (intoffset % BITS_PER_WORD != 0)
12277 unsigned int bits = BITS_PER_WORD - intoffset % BITS_PER_WORD;
12278 if (!int_mode_for_size (bits, 0).exists ())
12280 /* We couldn't find an appropriate mode, which happens,
12281 e.g., in packed structs when there are 3 bytes to load.
12282 Back intoffset back to the beginning of the word in this
12283 case. */
12284 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
12288 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
12289 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
12290 intregs = (endbit - startbit) / BITS_PER_WORD;
12291 cum->words += intregs;
12292 /* words should be unsigned. */
12293 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
12295 int pad = (endbit/BITS_PER_WORD) - cum->words;
12296 cum->words += pad;
12300 /* The darwin64 ABI calls for us to recurse down through structs,
12301 looking for elements passed in registers. Unfortunately, we have
12302 to track int register count here also because of misalignments
12303 in powerpc alignment mode. */
12305 static void
12306 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
12307 const_tree type,
12308 HOST_WIDE_INT startbitpos)
12310 tree f;
12312 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
12313 if (TREE_CODE (f) == FIELD_DECL)
12315 HOST_WIDE_INT bitpos = startbitpos;
12316 tree ftype = TREE_TYPE (f);
12317 machine_mode mode;
12318 if (ftype == error_mark_node)
12319 continue;
12320 mode = TYPE_MODE (ftype);
12322 if (DECL_SIZE (f) != 0
12323 && tree_fits_uhwi_p (bit_position (f)))
12324 bitpos += int_bit_position (f);
12326 /* ??? FIXME: else assume zero offset. */
12328 if (TREE_CODE (ftype) == RECORD_TYPE)
12329 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
12330 else if (USE_FP_FOR_ARG_P (cum, mode))
12332 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
12333 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
12334 cum->fregno += n_fpregs;
12335 /* Single-precision floats present a special problem for
12336 us, because they are smaller than an 8-byte GPR, and so
12337 the structure-packing rules combined with the standard
12338 varargs behavior mean that we want to pack float/float
12339 and float/int combinations into a single register's
12340 space. This is complicated by the arg advance flushing,
12341 which works on arbitrarily large groups of int-type
12342 fields. */
12343 if (mode == SFmode)
12345 if (cum->floats_in_gpr == 1)
12347 /* Two floats in a word; count the word and reset
12348 the float count. */
12349 cum->words++;
12350 cum->floats_in_gpr = 0;
12352 else if (bitpos % 64 == 0)
12354 /* A float at the beginning of an 8-byte word;
12355 count it and put off adjusting cum->words until
12356 we see if a arg advance flush is going to do it
12357 for us. */
12358 cum->floats_in_gpr++;
12360 else
12362 /* The float is at the end of a word, preceded
12363 by integer fields, so the arg advance flush
12364 just above has already set cum->words and
12365 everything is taken care of. */
12368 else
12369 cum->words += n_fpregs;
12371 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
12373 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
12374 cum->vregno++;
12375 cum->words += 2;
12377 else if (cum->intoffset == -1)
12378 cum->intoffset = bitpos;
12382 /* Check for an item that needs to be considered specially under the darwin 64
12383 bit ABI. These are record types where the mode is BLK or the structure is
12384 8 bytes in size. */
12385 static int
12386 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
12388 return rs6000_darwin64_abi
12389 && ((mode == BLKmode
12390 && TREE_CODE (type) == RECORD_TYPE
12391 && int_size_in_bytes (type) > 0)
12392 || (type && TREE_CODE (type) == RECORD_TYPE
12393 && int_size_in_bytes (type) == 8)) ? 1 : 0;
12396 /* Update the data in CUM to advance over an argument
12397 of mode MODE and data type TYPE.
12398 (TYPE is null for libcalls where that information may not be available.)
12400 Note that for args passed by reference, function_arg will be called
12401 with MODE and TYPE set to that of the pointer to the arg, not the arg
12402 itself. */
12404 static void
12405 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
12406 const_tree type, bool named, int depth)
12408 machine_mode elt_mode;
12409 int n_elts;
12411 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
12413 /* Only tick off an argument if we're not recursing. */
12414 if (depth == 0)
12415 cum->nargs_prototype--;
12417 #ifdef HAVE_AS_GNU_ATTRIBUTE
12418 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4)
12419 && cum->escapes)
12421 if (SCALAR_FLOAT_MODE_P (mode))
12423 rs6000_passes_float = true;
12424 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
12425 && (FLOAT128_IBM_P (mode)
12426 || FLOAT128_IEEE_P (mode)
12427 || (type != NULL
12428 && TYPE_MAIN_VARIANT (type) == long_double_type_node)))
12429 rs6000_passes_long_double = true;
12431 if ((named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
12432 || (SPE_VECTOR_MODE (mode)
12433 && !cum->stdarg
12434 && cum->sysv_gregno <= GP_ARG_MAX_REG))
12435 rs6000_passes_vector = true;
12437 #endif
12439 if (TARGET_ALTIVEC_ABI
12440 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
12441 || (type && TREE_CODE (type) == VECTOR_TYPE
12442 && int_size_in_bytes (type) == 16)))
12444 bool stack = false;
12446 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
12448 cum->vregno += n_elts;
12450 if (!TARGET_ALTIVEC)
12451 error ("cannot pass argument in vector register because"
12452 " altivec instructions are disabled, use -maltivec"
12453 " to enable them");
12455 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
12456 even if it is going to be passed in a vector register.
12457 Darwin does the same for variable-argument functions. */
12458 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
12459 && TARGET_64BIT)
12460 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
12461 stack = true;
12463 else
12464 stack = true;
12466 if (stack)
12468 int align;
12470 /* Vector parameters must be 16-byte aligned. In 32-bit
12471 mode this means we need to take into account the offset
12472 to the parameter save area. In 64-bit mode, they just
12473 have to start on an even word, since the parameter save
12474 area is 16-byte aligned. */
12475 if (TARGET_32BIT)
12476 align = -(rs6000_parm_offset () + cum->words) & 3;
12477 else
12478 align = cum->words & 1;
12479 cum->words += align + rs6000_arg_size (mode, type);
12481 if (TARGET_DEBUG_ARG)
12483 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
12484 cum->words, align);
12485 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
12486 cum->nargs_prototype, cum->prototype,
12487 GET_MODE_NAME (mode));
12491 else if (TARGET_SPE_ABI && TARGET_SPE && SPE_VECTOR_MODE (mode)
12492 && !cum->stdarg
12493 && cum->sysv_gregno <= GP_ARG_MAX_REG)
12494 cum->sysv_gregno++;
12496 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
12498 int size = int_size_in_bytes (type);
12499 /* Variable sized types have size == -1 and are
12500 treated as if consisting entirely of ints.
12501 Pad to 16 byte boundary if needed. */
12502 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
12503 && (cum->words % 2) != 0)
12504 cum->words++;
12505 /* For varargs, we can just go up by the size of the struct. */
12506 if (!named)
12507 cum->words += (size + 7) / 8;
12508 else
12510 /* It is tempting to say int register count just goes up by
12511 sizeof(type)/8, but this is wrong in a case such as
12512 { int; double; int; } [powerpc alignment]. We have to
12513 grovel through the fields for these too. */
12514 cum->intoffset = 0;
12515 cum->floats_in_gpr = 0;
12516 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
12517 rs6000_darwin64_record_arg_advance_flush (cum,
12518 size * BITS_PER_UNIT, 1);
12520 if (TARGET_DEBUG_ARG)
12522 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
12523 cum->words, TYPE_ALIGN (type), size);
12524 fprintf (stderr,
12525 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
12526 cum->nargs_prototype, cum->prototype,
12527 GET_MODE_NAME (mode));
12530 else if (DEFAULT_ABI == ABI_V4)
12532 if (abi_v4_pass_in_fpr (mode))
12534 /* _Decimal128 must use an even/odd register pair. This assumes
12535 that the register number is odd when fregno is odd. */
12536 if (mode == TDmode && (cum->fregno % 2) == 1)
12537 cum->fregno++;
12539 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
12540 <= FP_ARG_V4_MAX_REG)
12541 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
12542 else
12544 cum->fregno = FP_ARG_V4_MAX_REG + 1;
12545 if (mode == DFmode || FLOAT128_IBM_P (mode)
12546 || mode == DDmode || mode == TDmode)
12547 cum->words += cum->words & 1;
12548 cum->words += rs6000_arg_size (mode, type);
12551 else
12553 int n_words = rs6000_arg_size (mode, type);
12554 int gregno = cum->sysv_gregno;
12556 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
12557 (r7,r8) or (r9,r10). As does any other 2 word item such
12558 as complex int due to a historical mistake. */
12559 if (n_words == 2)
12560 gregno += (1 - gregno) & 1;
12562 /* Multi-reg args are not split between registers and stack. */
12563 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
12565 /* Long long and SPE vectors are aligned on the stack.
12566 So are other 2 word items such as complex int due to
12567 a historical mistake. */
12568 if (n_words == 2)
12569 cum->words += cum->words & 1;
12570 cum->words += n_words;
12573 /* Note: continuing to accumulate gregno past when we've started
12574 spilling to the stack indicates the fact that we've started
12575 spilling to the stack to expand_builtin_saveregs. */
12576 cum->sysv_gregno = gregno + n_words;
12579 if (TARGET_DEBUG_ARG)
12581 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
12582 cum->words, cum->fregno);
12583 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
12584 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
12585 fprintf (stderr, "mode = %4s, named = %d\n",
12586 GET_MODE_NAME (mode), named);
12589 else
12591 int n_words = rs6000_arg_size (mode, type);
12592 int start_words = cum->words;
12593 int align_words = rs6000_parm_start (mode, type, start_words);
12595 cum->words = align_words + n_words;
12597 if (SCALAR_FLOAT_MODE_P (elt_mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
12599 /* _Decimal128 must be passed in an even/odd float register pair.
12600 This assumes that the register number is odd when fregno is
12601 odd. */
12602 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
12603 cum->fregno++;
12604 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
12607 if (TARGET_DEBUG_ARG)
12609 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
12610 cum->words, cum->fregno);
12611 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
12612 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
12613 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
12614 named, align_words - start_words, depth);
12619 static void
12620 rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
12621 const_tree type, bool named)
12623 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
12627 static rtx
12628 spe_build_register_parallel (machine_mode mode, int gregno)
12630 rtx r1, r3, r5, r7;
12632 switch (mode)
12634 case E_DFmode:
12635 r1 = gen_rtx_REG (DImode, gregno);
12636 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
12637 return gen_rtx_PARALLEL (mode, gen_rtvec (1, r1));
12639 case E_DCmode:
12640 case E_TFmode:
12641 r1 = gen_rtx_REG (DImode, gregno);
12642 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
12643 r3 = gen_rtx_REG (DImode, gregno + 2);
12644 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
12645 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r3));
12647 case E_TCmode:
12648 r1 = gen_rtx_REG (DImode, gregno);
12649 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
12650 r3 = gen_rtx_REG (DImode, gregno + 2);
12651 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
12652 r5 = gen_rtx_REG (DImode, gregno + 4);
12653 r5 = gen_rtx_EXPR_LIST (VOIDmode, r5, GEN_INT (16));
12654 r7 = gen_rtx_REG (DImode, gregno + 6);
12655 r7 = gen_rtx_EXPR_LIST (VOIDmode, r7, GEN_INT (24));
12656 return gen_rtx_PARALLEL (mode, gen_rtvec (4, r1, r3, r5, r7));
12658 default:
12659 gcc_unreachable ();
12663 /* Determine where to put a SIMD argument on the SPE. */
12664 static rtx
12665 rs6000_spe_function_arg (const CUMULATIVE_ARGS *cum, machine_mode mode,
12666 const_tree type)
12668 int gregno = cum->sysv_gregno;
12670 /* On E500 v2, double arithmetic is done on the full 64-bit GPR, but
12671 are passed and returned in a pair of GPRs for ABI compatibility. */
12672 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
12673 || mode == DCmode || mode == TCmode))
12675 int n_words = rs6000_arg_size (mode, type);
12677 /* Doubles go in an odd/even register pair (r5/r6, etc). */
12678 if (mode == DFmode)
12679 gregno += (1 - gregno) & 1;
12681 /* Multi-reg args are not split between registers and stack. */
12682 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
12683 return NULL_RTX;
12685 return spe_build_register_parallel (mode, gregno);
12687 if (cum->stdarg)
12689 int n_words = rs6000_arg_size (mode, type);
12691 /* SPE vectors are put in odd registers. */
12692 if (n_words == 2 && (gregno & 1) == 0)
12693 gregno += 1;
12695 if (gregno + n_words - 1 <= GP_ARG_MAX_REG)
12697 rtx r1, r2;
12698 machine_mode m = SImode;
12700 r1 = gen_rtx_REG (m, gregno);
12701 r1 = gen_rtx_EXPR_LIST (m, r1, const0_rtx);
12702 r2 = gen_rtx_REG (m, gregno + 1);
12703 r2 = gen_rtx_EXPR_LIST (m, r2, GEN_INT (4));
12704 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
12706 else
12707 return NULL_RTX;
12709 else
12711 if (gregno <= GP_ARG_MAX_REG)
12712 return gen_rtx_REG (mode, gregno);
12713 else
12714 return NULL_RTX;
12718 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
12719 structure between cum->intoffset and bitpos to integer registers. */
12721 static void
12722 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
12723 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
12725 machine_mode mode;
12726 unsigned int regno;
12727 unsigned int startbit, endbit;
12728 int this_regno, intregs, intoffset;
12729 rtx reg;
12731 if (cum->intoffset == -1)
12732 return;
12734 intoffset = cum->intoffset;
12735 cum->intoffset = -1;
12737 /* If this is the trailing part of a word, try to only load that
12738 much into the register. Otherwise load the whole register. Note
12739 that in the latter case we may pick up unwanted bits. It's not a
12740 problem at the moment but may wish to revisit. */
12742 if (intoffset % BITS_PER_WORD != 0)
12744 unsigned int bits = BITS_PER_WORD - intoffset % BITS_PER_WORD;
12745 if (!int_mode_for_size (bits, 0).exists (&mode))
12747 /* We couldn't find an appropriate mode, which happens,
12748 e.g., in packed structs when there are 3 bytes to load.
12749 Back intoffset back to the beginning of the word in this
12750 case. */
12751 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
12752 mode = word_mode;
12755 else
12756 mode = word_mode;
12758 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
12759 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
12760 intregs = (endbit - startbit) / BITS_PER_WORD;
12761 this_regno = cum->words + intoffset / BITS_PER_WORD;
12763 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
12764 cum->use_stack = 1;
12766 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
12767 if (intregs <= 0)
12768 return;
12770 intoffset /= BITS_PER_UNIT;
12773 regno = GP_ARG_MIN_REG + this_regno;
12774 reg = gen_rtx_REG (mode, regno);
12775 rvec[(*k)++] =
12776 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
12778 this_regno += 1;
12779 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
12780 mode = word_mode;
12781 intregs -= 1;
12783 while (intregs > 0);
12786 /* Recursive workhorse for the following. */
12788 static void
12789 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
12790 HOST_WIDE_INT startbitpos, rtx rvec[],
12791 int *k)
12793 tree f;
12795 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
12796 if (TREE_CODE (f) == FIELD_DECL)
12798 HOST_WIDE_INT bitpos = startbitpos;
12799 tree ftype = TREE_TYPE (f);
12800 machine_mode mode;
12801 if (ftype == error_mark_node)
12802 continue;
12803 mode = TYPE_MODE (ftype);
12805 if (DECL_SIZE (f) != 0
12806 && tree_fits_uhwi_p (bit_position (f)))
12807 bitpos += int_bit_position (f);
12809 /* ??? FIXME: else assume zero offset. */
12811 if (TREE_CODE (ftype) == RECORD_TYPE)
12812 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
12813 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
12815 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
12816 #if 0
12817 switch (mode)
12819 case E_SCmode: mode = SFmode; break;
12820 case E_DCmode: mode = DFmode; break;
12821 case E_TCmode: mode = TFmode; break;
12822 default: break;
12824 #endif
12825 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
12826 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
12828 gcc_assert (cum->fregno == FP_ARG_MAX_REG
12829 && (mode == TFmode || mode == TDmode));
12830 /* Long double or _Decimal128 split over regs and memory. */
12831 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
12832 cum->use_stack=1;
12834 rvec[(*k)++]
12835 = gen_rtx_EXPR_LIST (VOIDmode,
12836 gen_rtx_REG (mode, cum->fregno++),
12837 GEN_INT (bitpos / BITS_PER_UNIT));
12838 if (FLOAT128_2REG_P (mode))
12839 cum->fregno++;
12841 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
12843 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
12844 rvec[(*k)++]
12845 = gen_rtx_EXPR_LIST (VOIDmode,
12846 gen_rtx_REG (mode, cum->vregno++),
12847 GEN_INT (bitpos / BITS_PER_UNIT));
12849 else if (cum->intoffset == -1)
12850 cum->intoffset = bitpos;
12854 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
12855 the register(s) to be used for each field and subfield of a struct
12856 being passed by value, along with the offset of where the
12857 register's value may be found in the block. FP fields go in FP
12858 register, vector fields go in vector registers, and everything
12859 else goes in int registers, packed as in memory.
12861 This code is also used for function return values. RETVAL indicates
12862 whether this is the case.
12864 Much of this is taken from the SPARC V9 port, which has a similar
12865 calling convention. */
12867 static rtx
12868 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
12869 bool named, bool retval)
12871 rtx rvec[FIRST_PSEUDO_REGISTER];
12872 int k = 1, kbase = 1;
12873 HOST_WIDE_INT typesize = int_size_in_bytes (type);
12874 /* This is a copy; modifications are not visible to our caller. */
12875 CUMULATIVE_ARGS copy_cum = *orig_cum;
12876 CUMULATIVE_ARGS *cum = &copy_cum;
12878 /* Pad to 16 byte boundary if needed. */
12879 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
12880 && (cum->words % 2) != 0)
12881 cum->words++;
12883 cum->intoffset = 0;
12884 cum->use_stack = 0;
12885 cum->named = named;
12887 /* Put entries into rvec[] for individual FP and vector fields, and
12888 for the chunks of memory that go in int regs. Note we start at
12889 element 1; 0 is reserved for an indication of using memory, and
12890 may or may not be filled in below. */
12891 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
12892 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
12894 /* If any part of the struct went on the stack put all of it there.
12895 This hack is because the generic code for
12896 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
12897 parts of the struct are not at the beginning. */
12898 if (cum->use_stack)
12900 if (retval)
12901 return NULL_RTX; /* doesn't go in registers at all */
12902 kbase = 0;
12903 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12905 if (k > 1 || cum->use_stack)
12906 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
12907 else
12908 return NULL_RTX;
12911 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
12913 static rtx
12914 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
12915 int align_words)
12917 int n_units;
12918 int i, k;
12919 rtx rvec[GP_ARG_NUM_REG + 1];
12921 if (align_words >= GP_ARG_NUM_REG)
12922 return NULL_RTX;
12924 n_units = rs6000_arg_size (mode, type);
12926 /* Optimize the simple case where the arg fits in one gpr, except in
12927 the case of BLKmode due to assign_parms assuming that registers are
12928 BITS_PER_WORD wide. */
12929 if (n_units == 0
12930 || (n_units == 1 && mode != BLKmode))
12931 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12933 k = 0;
12934 if (align_words + n_units > GP_ARG_NUM_REG)
12935 /* Not all of the arg fits in gprs. Say that it goes in memory too,
12936 using a magic NULL_RTX component.
12937 This is not strictly correct. Only some of the arg belongs in
12938 memory, not all of it. However, the normal scheme using
12939 function_arg_partial_nregs can result in unusual subregs, eg.
12940 (subreg:SI (reg:DF) 4), which are not handled well. The code to
12941 store the whole arg to memory is often more efficient than code
12942 to store pieces, and we know that space is available in the right
12943 place for the whole arg. */
12944 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12946 i = 0;
12949 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
12950 rtx off = GEN_INT (i++ * 4);
12951 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12953 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
12955 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
12958 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
12959 but must also be copied into the parameter save area starting at
12960 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
12961 to the GPRs and/or memory. Return the number of elements used. */
12963 static int
12964 rs6000_psave_function_arg (machine_mode mode, const_tree type,
12965 int align_words, rtx *rvec)
12967 int k = 0;
12969 if (align_words < GP_ARG_NUM_REG)
12971 int n_words = rs6000_arg_size (mode, type);
12973 if (align_words + n_words > GP_ARG_NUM_REG
12974 || mode == BLKmode
12975 || (TARGET_32BIT && TARGET_POWERPC64))
12977 /* If this is partially on the stack, then we only
12978 include the portion actually in registers here. */
12979 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
12980 int i = 0;
12982 if (align_words + n_words > GP_ARG_NUM_REG)
12984 /* Not all of the arg fits in gprs. Say that it goes in memory
12985 too, using a magic NULL_RTX component. Also see comment in
12986 rs6000_mixed_function_arg for why the normal
12987 function_arg_partial_nregs scheme doesn't work in this case. */
12988 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12993 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
12994 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
12995 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12997 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
12999 else
13001 /* The whole arg fits in gprs. */
13002 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
13003 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
13006 else
13008 /* It's entirely in memory. */
13009 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
13012 return k;
13015 /* RVEC is a vector of K components of an argument of mode MODE.
13016 Construct the final function_arg return value from it. */
13018 static rtx
13019 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
13021 gcc_assert (k >= 1);
13023 /* Avoid returning a PARALLEL in the trivial cases. */
13024 if (k == 1)
13026 if (XEXP (rvec[0], 0) == NULL_RTX)
13027 return NULL_RTX;
13029 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
13030 return XEXP (rvec[0], 0);
13033 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
13036 /* Determine where to put an argument to a function.
13037 Value is zero to push the argument on the stack,
13038 or a hard register in which to store the argument.
13040 MODE is the argument's machine mode.
13041 TYPE is the data type of the argument (as a tree).
13042 This is null for libcalls where that information may
13043 not be available.
13044 CUM is a variable of type CUMULATIVE_ARGS which gives info about
13045 the preceding args and about the function being called. It is
13046 not modified in this routine.
13047 NAMED is nonzero if this argument is a named parameter
13048 (otherwise it is an extra parameter matching an ellipsis).
13050 On RS/6000 the first eight words of non-FP are normally in registers
13051 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
13052 Under V.4, the first 8 FP args are in registers.
13054 If this is floating-point and no prototype is specified, we use
13055 both an FP and integer register (or possibly FP reg and stack). Library
13056 functions (when CALL_LIBCALL is set) always have the proper types for args,
13057 so we can pass the FP value just in one register. emit_library_function
13058 doesn't support PARALLEL anyway.
13060 Note that for args passed by reference, function_arg will be called
13061 with MODE and TYPE set to that of the pointer to the arg, not the arg
13062 itself. */
13064 static rtx
13065 rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
13066 const_tree type, bool named)
13068 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
13069 enum rs6000_abi abi = DEFAULT_ABI;
13070 machine_mode elt_mode;
13071 int n_elts;
13073 /* Return a marker to indicate whether CR1 needs to set or clear the
13074 bit that V.4 uses to say fp args were passed in registers.
13075 Assume that we don't need the marker for software floating point,
13076 or compiler generated library calls. */
13077 if (mode == VOIDmode)
13079 if (abi == ABI_V4
13080 && (cum->call_cookie & CALL_LIBCALL) == 0
13081 && (cum->stdarg
13082 || (cum->nargs_prototype < 0
13083 && (cum->prototype || TARGET_NO_PROTOTYPE))))
13085 /* For the SPE, we need to crxor CR6 always. */
13086 if (TARGET_SPE_ABI)
13087 return GEN_INT (cum->call_cookie | CALL_V4_SET_FP_ARGS);
13088 else if (TARGET_HARD_FLOAT && TARGET_FPRS)
13089 return GEN_INT (cum->call_cookie
13090 | ((cum->fregno == FP_ARG_MIN_REG)
13091 ? CALL_V4_SET_FP_ARGS
13092 : CALL_V4_CLEAR_FP_ARGS));
13095 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
13098 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
13100 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
13102 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
13103 if (rslt != NULL_RTX)
13104 return rslt;
13105 /* Else fall through to usual handling. */
13108 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
13110 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
13111 rtx r, off;
13112 int i, k = 0;
13114 /* Do we also need to pass this argument in the parameter save area?
13115 Library support functions for IEEE 128-bit are assumed to not need the
13116 value passed both in GPRs and in vector registers. */
13117 if (TARGET_64BIT && !cum->prototype
13118 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
13120 int align_words = ROUND_UP (cum->words, 2);
13121 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
13124 /* Describe where this argument goes in the vector registers. */
13125 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
13127 r = gen_rtx_REG (elt_mode, cum->vregno + i);
13128 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
13129 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
13132 return rs6000_finish_function_arg (mode, rvec, k);
13134 else if (TARGET_ALTIVEC_ABI
13135 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
13136 || (type && TREE_CODE (type) == VECTOR_TYPE
13137 && int_size_in_bytes (type) == 16)))
13139 if (named || abi == ABI_V4)
13140 return NULL_RTX;
13141 else
13143 /* Vector parameters to varargs functions under AIX or Darwin
13144 get passed in memory and possibly also in GPRs. */
13145 int align, align_words, n_words;
13146 machine_mode part_mode;
13148 /* Vector parameters must be 16-byte aligned. In 32-bit
13149 mode this means we need to take into account the offset
13150 to the parameter save area. In 64-bit mode, they just
13151 have to start on an even word, since the parameter save
13152 area is 16-byte aligned. */
13153 if (TARGET_32BIT)
13154 align = -(rs6000_parm_offset () + cum->words) & 3;
13155 else
13156 align = cum->words & 1;
13157 align_words = cum->words + align;
13159 /* Out of registers? Memory, then. */
13160 if (align_words >= GP_ARG_NUM_REG)
13161 return NULL_RTX;
13163 if (TARGET_32BIT && TARGET_POWERPC64)
13164 return rs6000_mixed_function_arg (mode, type, align_words);
13166 /* The vector value goes in GPRs. Only the part of the
13167 value in GPRs is reported here. */
13168 part_mode = mode;
13169 n_words = rs6000_arg_size (mode, type);
13170 if (align_words + n_words > GP_ARG_NUM_REG)
13171 /* Fortunately, there are only two possibilities, the value
13172 is either wholly in GPRs or half in GPRs and half not. */
13173 part_mode = DImode;
13175 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
13178 else if (TARGET_SPE_ABI && TARGET_SPE
13179 && (SPE_VECTOR_MODE (mode)
13180 || (TARGET_E500_DOUBLE && (mode == DFmode
13181 || mode == DCmode
13182 || mode == TFmode
13183 || mode == TCmode))))
13184 return rs6000_spe_function_arg (cum, mode, type);
13186 else if (abi == ABI_V4)
13188 if (abi_v4_pass_in_fpr (mode))
13190 /* _Decimal128 must use an even/odd register pair. This assumes
13191 that the register number is odd when fregno is odd. */
13192 if (mode == TDmode && (cum->fregno % 2) == 1)
13193 cum->fregno++;
13195 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
13196 <= FP_ARG_V4_MAX_REG)
13197 return gen_rtx_REG (mode, cum->fregno);
13198 else
13199 return NULL_RTX;
13201 else
13203 int n_words = rs6000_arg_size (mode, type);
13204 int gregno = cum->sysv_gregno;
13206 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
13207 (r7,r8) or (r9,r10). As does any other 2 word item such
13208 as complex int due to a historical mistake. */
13209 if (n_words == 2)
13210 gregno += (1 - gregno) & 1;
13212 /* Multi-reg args are not split between registers and stack. */
13213 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
13214 return NULL_RTX;
13216 if (TARGET_32BIT && TARGET_POWERPC64)
13217 return rs6000_mixed_function_arg (mode, type,
13218 gregno - GP_ARG_MIN_REG);
13219 return gen_rtx_REG (mode, gregno);
13222 else
13224 int align_words = rs6000_parm_start (mode, type, cum->words);
13226 /* _Decimal128 must be passed in an even/odd float register pair.
13227 This assumes that the register number is odd when fregno is odd. */
13228 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
13229 cum->fregno++;
13231 if (USE_FP_FOR_ARG_P (cum, elt_mode))
13233 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
13234 rtx r, off;
13235 int i, k = 0;
13236 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
13237 int fpr_words;
13239 /* Do we also need to pass this argument in the parameter
13240 save area? */
13241 if (type && (cum->nargs_prototype <= 0
13242 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
13243 && TARGET_XL_COMPAT
13244 && align_words >= GP_ARG_NUM_REG)))
13245 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
13247 /* Describe where this argument goes in the fprs. */
13248 for (i = 0; i < n_elts
13249 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
13251 /* Check if the argument is split over registers and memory.
13252 This can only ever happen for long double or _Decimal128;
13253 complex types are handled via split_complex_arg. */
13254 machine_mode fmode = elt_mode;
13255 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
13257 gcc_assert (FLOAT128_2REG_P (fmode));
13258 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
13261 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
13262 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
13263 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
13266 /* If there were not enough FPRs to hold the argument, the rest
13267 usually goes into memory. However, if the current position
13268 is still within the register parameter area, a portion may
13269 actually have to go into GPRs.
13271 Note that it may happen that the portion of the argument
13272 passed in the first "half" of the first GPR was already
13273 passed in the last FPR as well.
13275 For unnamed arguments, we already set up GPRs to cover the
13276 whole argument in rs6000_psave_function_arg, so there is
13277 nothing further to do at this point. */
13278 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
13279 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
13280 && cum->nargs_prototype > 0)
13282 static bool warned;
13284 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
13285 int n_words = rs6000_arg_size (mode, type);
13287 align_words += fpr_words;
13288 n_words -= fpr_words;
13292 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
13293 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
13294 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
13296 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
13298 if (!warned && warn_psabi)
13300 warned = true;
13301 inform (input_location,
13302 "the ABI of passing homogeneous float aggregates"
13303 " has changed in GCC 5");
13307 return rs6000_finish_function_arg (mode, rvec, k);
13309 else if (align_words < GP_ARG_NUM_REG)
13311 if (TARGET_32BIT && TARGET_POWERPC64)
13312 return rs6000_mixed_function_arg (mode, type, align_words);
13314 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
13316 else
13317 return NULL_RTX;
13321 /* For an arg passed partly in registers and partly in memory, this is
13322 the number of bytes passed in registers. For args passed entirely in
13323 registers or entirely in memory, zero. When an arg is described by a
13324 PARALLEL, perhaps using more than one register type, this function
13325 returns the number of bytes used by the first element of the PARALLEL. */
13327 static int
13328 rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
13329 tree type, bool named)
13331 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
13332 bool passed_in_gprs = true;
13333 int ret = 0;
13334 int align_words;
13335 machine_mode elt_mode;
13336 int n_elts;
13338 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
13340 if (DEFAULT_ABI == ABI_V4)
13341 return 0;
13343 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
13345 /* If we are passing this arg in the fixed parameter save area (gprs or
13346 memory) as well as VRs, we do not use the partial bytes mechanism;
13347 instead, rs6000_function_arg will return a PARALLEL including a memory
13348 element as necessary. Library support functions for IEEE 128-bit are
13349 assumed to not need the value passed both in GPRs and in vector
13350 registers. */
13351 if (TARGET_64BIT && !cum->prototype
13352 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
13353 return 0;
13355 /* Otherwise, we pass in VRs only. Check for partial copies. */
13356 passed_in_gprs = false;
13357 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
13358 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
13361 /* In this complicated case we just disable the partial_nregs code. */
13362 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
13363 return 0;
13365 align_words = rs6000_parm_start (mode, type, cum->words);
13367 if (USE_FP_FOR_ARG_P (cum, elt_mode))
13369 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
13371 /* If we are passing this arg in the fixed parameter save area
13372 (gprs or memory) as well as FPRs, we do not use the partial
13373 bytes mechanism; instead, rs6000_function_arg will return a
13374 PARALLEL including a memory element as necessary. */
13375 if (type
13376 && (cum->nargs_prototype <= 0
13377 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
13378 && TARGET_XL_COMPAT
13379 && align_words >= GP_ARG_NUM_REG)))
13380 return 0;
13382 /* Otherwise, we pass in FPRs only. Check for partial copies. */
13383 passed_in_gprs = false;
13384 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
13386 /* Compute number of bytes / words passed in FPRs. If there
13387 is still space available in the register parameter area
13388 *after* that amount, a part of the argument will be passed
13389 in GPRs. In that case, the total amount passed in any
13390 registers is equal to the amount that would have been passed
13391 in GPRs if everything were passed there, so we fall back to
13392 the GPR code below to compute the appropriate value. */
13393 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
13394 * MIN (8, GET_MODE_SIZE (elt_mode)));
13395 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
13397 if (align_words + fpr_words < GP_ARG_NUM_REG)
13398 passed_in_gprs = true;
13399 else
13400 ret = fpr;
13404 if (passed_in_gprs
13405 && align_words < GP_ARG_NUM_REG
13406 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
13407 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
13409 if (ret != 0 && TARGET_DEBUG_ARG)
13410 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
13412 return ret;
13415 /* A C expression that indicates when an argument must be passed by
13416 reference. If nonzero for an argument, a copy of that argument is
13417 made in memory and a pointer to the argument is passed instead of
13418 the argument itself. The pointer is passed in whatever way is
13419 appropriate for passing a pointer to that type.
13421 Under V.4, aggregates and long double are passed by reference.
13423 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
13424 reference unless the AltiVec vector extension ABI is in force.
13426 As an extension to all ABIs, variable sized types are passed by
13427 reference. */
13429 static bool
13430 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
13431 machine_mode mode, const_tree type,
13432 bool named ATTRIBUTE_UNUSED)
13434 if (!type)
13435 return 0;
13437 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
13438 && FLOAT128_IEEE_P (TYPE_MODE (type)))
13440 if (TARGET_DEBUG_ARG)
13441 fprintf (stderr, "function_arg_pass_by_reference: V4 IEEE 128-bit\n");
13442 return 1;
13445 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
13447 if (TARGET_DEBUG_ARG)
13448 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
13449 return 1;
13452 if (int_size_in_bytes (type) < 0)
13454 if (TARGET_DEBUG_ARG)
13455 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
13456 return 1;
13459 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
13460 modes only exist for GCC vector types if -maltivec. */
13461 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
13463 if (TARGET_DEBUG_ARG)
13464 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
13465 return 1;
13468 /* Pass synthetic vectors in memory. */
13469 if (TREE_CODE (type) == VECTOR_TYPE
13470 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
13472 static bool warned_for_pass_big_vectors = false;
13473 if (TARGET_DEBUG_ARG)
13474 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
13475 if (!warned_for_pass_big_vectors)
13477 warning (OPT_Wpsabi, "GCC vector passed by reference: "
13478 "non-standard ABI extension with no compatibility guarantee");
13479 warned_for_pass_big_vectors = true;
13481 return 1;
13484 return 0;
13487 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
13488 already processes. Return true if the parameter must be passed
13489 (fully or partially) on the stack. */
13491 static bool
13492 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
13494 machine_mode mode;
13495 int unsignedp;
13496 rtx entry_parm;
13498 /* Catch errors. */
13499 if (type == NULL || type == error_mark_node)
13500 return true;
13502 /* Handle types with no storage requirement. */
13503 if (TYPE_MODE (type) == VOIDmode)
13504 return false;
13506 /* Handle complex types. */
13507 if (TREE_CODE (type) == COMPLEX_TYPE)
13508 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
13509 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
13511 /* Handle transparent aggregates. */
13512 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
13513 && TYPE_TRANSPARENT_AGGR (type))
13514 type = TREE_TYPE (first_field (type));
13516 /* See if this arg was passed by invisible reference. */
13517 if (pass_by_reference (get_cumulative_args (args_so_far),
13518 TYPE_MODE (type), type, true))
13519 type = build_pointer_type (type);
13521 /* Find mode as it is passed by the ABI. */
13522 unsignedp = TYPE_UNSIGNED (type);
13523 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
13525 /* If we must pass in stack, we need a stack. */
13526 if (rs6000_must_pass_in_stack (mode, type))
13527 return true;
13529 /* If there is no incoming register, we need a stack. */
13530 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
13531 if (entry_parm == NULL)
13532 return true;
13534 /* Likewise if we need to pass both in registers and on the stack. */
13535 if (GET_CODE (entry_parm) == PARALLEL
13536 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
13537 return true;
13539 /* Also true if we're partially in registers and partially not. */
13540 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
13541 return true;
13543 /* Update info on where next arg arrives in registers. */
13544 rs6000_function_arg_advance (args_so_far, mode, type, true);
13545 return false;
13548 /* Return true if FUN has no prototype, has a variable argument
13549 list, or passes any parameter in memory. */
13551 static bool
13552 rs6000_function_parms_need_stack (tree fun, bool incoming)
13554 tree fntype, result;
13555 CUMULATIVE_ARGS args_so_far_v;
13556 cumulative_args_t args_so_far;
13558 if (!fun)
13559 /* Must be a libcall, all of which only use reg parms. */
13560 return false;
13562 fntype = fun;
13563 if (!TYPE_P (fun))
13564 fntype = TREE_TYPE (fun);
13566 /* Varargs functions need the parameter save area. */
13567 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
13568 return true;
13570 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
13571 args_so_far = pack_cumulative_args (&args_so_far_v);
13573 /* When incoming, we will have been passed the function decl.
13574 It is necessary to use the decl to handle K&R style functions,
13575 where TYPE_ARG_TYPES may not be available. */
13576 if (incoming)
13578 gcc_assert (DECL_P (fun));
13579 result = DECL_RESULT (fun);
13581 else
13582 result = TREE_TYPE (fntype);
13584 if (result && aggregate_value_p (result, fntype))
13586 if (!TYPE_P (result))
13587 result = TREE_TYPE (result);
13588 result = build_pointer_type (result);
13589 rs6000_parm_needs_stack (args_so_far, result);
13592 if (incoming)
13594 tree parm;
13596 for (parm = DECL_ARGUMENTS (fun);
13597 parm && parm != void_list_node;
13598 parm = TREE_CHAIN (parm))
13599 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
13600 return true;
13602 else
13604 function_args_iterator args_iter;
13605 tree arg_type;
13607 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
13608 if (rs6000_parm_needs_stack (args_so_far, arg_type))
13609 return true;
13612 return false;
13615 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
13616 usually a constant depending on the ABI. However, in the ELFv2 ABI
13617 the register parameter area is optional when calling a function that
13618 has a prototype is scope, has no variable argument list, and passes
13619 all parameters in registers. */
13622 rs6000_reg_parm_stack_space (tree fun, bool incoming)
13624 int reg_parm_stack_space;
13626 switch (DEFAULT_ABI)
13628 default:
13629 reg_parm_stack_space = 0;
13630 break;
13632 case ABI_AIX:
13633 case ABI_DARWIN:
13634 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
13635 break;
13637 case ABI_ELFv2:
13638 /* ??? Recomputing this every time is a bit expensive. Is there
13639 a place to cache this information? */
13640 if (rs6000_function_parms_need_stack (fun, incoming))
13641 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
13642 else
13643 reg_parm_stack_space = 0;
13644 break;
13647 return reg_parm_stack_space;
13650 static void
13651 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
13653 int i;
13654 machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
13656 if (nregs == 0)
13657 return;
13659 for (i = 0; i < nregs; i++)
13661 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
13662 if (reload_completed)
13664 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
13665 tem = NULL_RTX;
13666 else
13667 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
13668 i * GET_MODE_SIZE (reg_mode));
13670 else
13671 tem = replace_equiv_address (tem, XEXP (tem, 0));
13673 gcc_assert (tem);
13675 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
13679 /* Perform any needed actions needed for a function that is receiving a
13680 variable number of arguments.
13682 CUM is as above.
13684 MODE and TYPE are the mode and type of the current parameter.
13686 PRETEND_SIZE is a variable that should be set to the amount of stack
13687 that must be pushed by the prolog to pretend that our caller pushed
13690 Normally, this macro will push all remaining incoming registers on the
13691 stack and set PRETEND_SIZE to the length of the registers pushed. */
13693 static void
13694 setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
13695 tree type, int *pretend_size ATTRIBUTE_UNUSED,
13696 int no_rtl)
13698 CUMULATIVE_ARGS next_cum;
13699 int reg_size = TARGET_32BIT ? 4 : 8;
13700 rtx save_area = NULL_RTX, mem;
13701 int first_reg_offset;
13702 alias_set_type set;
13704 /* Skip the last named argument. */
13705 next_cum = *get_cumulative_args (cum);
13706 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
13708 if (DEFAULT_ABI == ABI_V4)
13710 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
13712 if (! no_rtl)
13714 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
13715 HOST_WIDE_INT offset = 0;
13717 /* Try to optimize the size of the varargs save area.
13718 The ABI requires that ap.reg_save_area is doubleword
13719 aligned, but we don't need to allocate space for all
13720 the bytes, only those to which we actually will save
13721 anything. */
13722 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
13723 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
13724 if (TARGET_HARD_FLOAT && TARGET_FPRS
13725 && next_cum.fregno <= FP_ARG_V4_MAX_REG
13726 && cfun->va_list_fpr_size)
13728 if (gpr_reg_num)
13729 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
13730 * UNITS_PER_FP_WORD;
13731 if (cfun->va_list_fpr_size
13732 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
13733 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
13734 else
13735 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
13736 * UNITS_PER_FP_WORD;
13738 if (gpr_reg_num)
13740 offset = -((first_reg_offset * reg_size) & ~7);
13741 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
13743 gpr_reg_num = cfun->va_list_gpr_size;
13744 if (reg_size == 4 && (first_reg_offset & 1))
13745 gpr_reg_num++;
13747 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
13749 else if (fpr_size)
13750 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
13751 * UNITS_PER_FP_WORD
13752 - (int) (GP_ARG_NUM_REG * reg_size);
13754 if (gpr_size + fpr_size)
13756 rtx reg_save_area
13757 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
13758 gcc_assert (GET_CODE (reg_save_area) == MEM);
13759 reg_save_area = XEXP (reg_save_area, 0);
13760 if (GET_CODE (reg_save_area) == PLUS)
13762 gcc_assert (XEXP (reg_save_area, 0)
13763 == virtual_stack_vars_rtx);
13764 gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT);
13765 offset += INTVAL (XEXP (reg_save_area, 1));
13767 else
13768 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
13771 cfun->machine->varargs_save_offset = offset;
13772 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
13775 else
13777 first_reg_offset = next_cum.words;
13778 save_area = crtl->args.internal_arg_pointer;
13780 if (targetm.calls.must_pass_in_stack (mode, type))
13781 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
13784 set = get_varargs_alias_set ();
13785 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
13786 && cfun->va_list_gpr_size)
13788 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
13790 if (va_list_gpr_counter_field)
13791 /* V4 va_list_gpr_size counts number of registers needed. */
13792 n_gpr = cfun->va_list_gpr_size;
13793 else
13794 /* char * va_list instead counts number of bytes needed. */
13795 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
13797 if (nregs > n_gpr)
13798 nregs = n_gpr;
13800 mem = gen_rtx_MEM (BLKmode,
13801 plus_constant (Pmode, save_area,
13802 first_reg_offset * reg_size));
13803 MEM_NOTRAP_P (mem) = 1;
13804 set_mem_alias_set (mem, set);
13805 set_mem_align (mem, BITS_PER_WORD);
13807 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
13808 nregs);
13811 /* Save FP registers if needed. */
13812 if (DEFAULT_ABI == ABI_V4
13813 && TARGET_HARD_FLOAT && TARGET_FPRS
13814 && ! no_rtl
13815 && next_cum.fregno <= FP_ARG_V4_MAX_REG
13816 && cfun->va_list_fpr_size)
13818 int fregno = next_cum.fregno, nregs;
13819 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
13820 rtx lab = gen_label_rtx ();
13821 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
13822 * UNITS_PER_FP_WORD);
13824 emit_jump_insn
13825 (gen_rtx_SET (pc_rtx,
13826 gen_rtx_IF_THEN_ELSE (VOIDmode,
13827 gen_rtx_NE (VOIDmode, cr1,
13828 const0_rtx),
13829 gen_rtx_LABEL_REF (VOIDmode, lab),
13830 pc_rtx)));
13832 for (nregs = 0;
13833 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
13834 fregno++, off += UNITS_PER_FP_WORD, nregs++)
13836 mem = gen_rtx_MEM ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13837 ? DFmode : SFmode,
13838 plus_constant (Pmode, save_area, off));
13839 MEM_NOTRAP_P (mem) = 1;
13840 set_mem_alias_set (mem, set);
13841 set_mem_align (mem, GET_MODE_ALIGNMENT (
13842 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13843 ? DFmode : SFmode));
13844 emit_move_insn (mem, gen_rtx_REG (
13845 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13846 ? DFmode : SFmode, fregno));
13849 emit_label (lab);
13853 /* Create the va_list data type. */
13855 static tree
13856 rs6000_build_builtin_va_list (void)
13858 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
13860 /* For AIX, prefer 'char *' because that's what the system
13861 header files like. */
13862 if (DEFAULT_ABI != ABI_V4)
13863 return build_pointer_type (char_type_node);
13865 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
13866 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
13867 get_identifier ("__va_list_tag"), record);
13869 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
13870 unsigned_char_type_node);
13871 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
13872 unsigned_char_type_node);
13873 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
13874 every user file. */
13875 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13876 get_identifier ("reserved"), short_unsigned_type_node);
13877 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13878 get_identifier ("overflow_arg_area"),
13879 ptr_type_node);
13880 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13881 get_identifier ("reg_save_area"),
13882 ptr_type_node);
13884 va_list_gpr_counter_field = f_gpr;
13885 va_list_fpr_counter_field = f_fpr;
13887 DECL_FIELD_CONTEXT (f_gpr) = record;
13888 DECL_FIELD_CONTEXT (f_fpr) = record;
13889 DECL_FIELD_CONTEXT (f_res) = record;
13890 DECL_FIELD_CONTEXT (f_ovf) = record;
13891 DECL_FIELD_CONTEXT (f_sav) = record;
13893 TYPE_STUB_DECL (record) = type_decl;
13894 TYPE_NAME (record) = type_decl;
13895 TYPE_FIELDS (record) = f_gpr;
13896 DECL_CHAIN (f_gpr) = f_fpr;
13897 DECL_CHAIN (f_fpr) = f_res;
13898 DECL_CHAIN (f_res) = f_ovf;
13899 DECL_CHAIN (f_ovf) = f_sav;
13901 layout_type (record);
13903 /* The correct type is an array type of one element. */
13904 return build_array_type (record, build_index_type (size_zero_node));
13907 /* Implement va_start. */
13909 static void
13910 rs6000_va_start (tree valist, rtx nextarg)
13912 HOST_WIDE_INT words, n_gpr, n_fpr;
13913 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
13914 tree gpr, fpr, ovf, sav, t;
13916 /* Only SVR4 needs something special. */
13917 if (DEFAULT_ABI != ABI_V4)
13919 std_expand_builtin_va_start (valist, nextarg);
13920 return;
13923 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
13924 f_fpr = DECL_CHAIN (f_gpr);
13925 f_res = DECL_CHAIN (f_fpr);
13926 f_ovf = DECL_CHAIN (f_res);
13927 f_sav = DECL_CHAIN (f_ovf);
13929 valist = build_simple_mem_ref (valist);
13930 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
13931 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
13932 f_fpr, NULL_TREE);
13933 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
13934 f_ovf, NULL_TREE);
13935 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
13936 f_sav, NULL_TREE);
13938 /* Count number of gp and fp argument registers used. */
13939 words = crtl->args.info.words;
13940 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
13941 GP_ARG_NUM_REG);
13942 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
13943 FP_ARG_NUM_REG);
13945 if (TARGET_DEBUG_ARG)
13946 fprintf (stderr, "va_start: words = " HOST_WIDE_INT_PRINT_DEC", n_gpr = "
13947 HOST_WIDE_INT_PRINT_DEC", n_fpr = " HOST_WIDE_INT_PRINT_DEC"\n",
13948 words, n_gpr, n_fpr);
13950 if (cfun->va_list_gpr_size)
13952 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
13953 build_int_cst (NULL_TREE, n_gpr));
13954 TREE_SIDE_EFFECTS (t) = 1;
13955 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13958 if (cfun->va_list_fpr_size)
13960 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
13961 build_int_cst (NULL_TREE, n_fpr));
13962 TREE_SIDE_EFFECTS (t) = 1;
13963 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13965 #ifdef HAVE_AS_GNU_ATTRIBUTE
13966 if (call_ABI_of_interest (cfun->decl))
13967 rs6000_passes_float = true;
13968 #endif
13971 /* Find the overflow area. */
13972 t = make_tree (TREE_TYPE (ovf), crtl->args.internal_arg_pointer);
13973 if (words != 0)
13974 t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD);
13975 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
13976 TREE_SIDE_EFFECTS (t) = 1;
13977 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13979 /* If there were no va_arg invocations, don't set up the register
13980 save area. */
13981 if (!cfun->va_list_gpr_size
13982 && !cfun->va_list_fpr_size
13983 && n_gpr < GP_ARG_NUM_REG
13984 && n_fpr < FP_ARG_V4_MAX_REG)
13985 return;
13987 /* Find the register save area. */
13988 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
13989 if (cfun->machine->varargs_save_offset)
13990 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
13991 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
13992 TREE_SIDE_EFFECTS (t) = 1;
13993 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13996 /* Implement va_arg. */
13998 static tree
13999 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
14000 gimple_seq *post_p)
14002 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
14003 tree gpr, fpr, ovf, sav, reg, t, u;
14004 int size, rsize, n_reg, sav_ofs, sav_scale;
14005 tree lab_false, lab_over, addr;
14006 int align;
14007 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
14008 int regalign = 0;
14009 gimple *stmt;
14011 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
14013 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
14014 return build_va_arg_indirect_ref (t);
14017 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
14018 earlier version of gcc, with the property that it always applied alignment
14019 adjustments to the va-args (even for zero-sized types). The cheapest way
14020 to deal with this is to replicate the effect of the part of
14021 std_gimplify_va_arg_expr that carries out the align adjust, for the case
14022 of relevance.
14023 We don't need to check for pass-by-reference because of the test above.
14024 We can return a simplifed answer, since we know there's no offset to add. */
14026 if (((TARGET_MACHO
14027 && rs6000_darwin64_abi)
14028 || DEFAULT_ABI == ABI_ELFv2
14029 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
14030 && integer_zerop (TYPE_SIZE (type)))
14032 unsigned HOST_WIDE_INT align, boundary;
14033 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
14034 align = PARM_BOUNDARY / BITS_PER_UNIT;
14035 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
14036 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
14037 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
14038 boundary /= BITS_PER_UNIT;
14039 if (boundary > align)
14041 tree t ;
14042 /* This updates arg ptr by the amount that would be necessary
14043 to align the zero-sized (but not zero-alignment) item. */
14044 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
14045 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
14046 gimplify_and_add (t, pre_p);
14048 t = fold_convert (sizetype, valist_tmp);
14049 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
14050 fold_convert (TREE_TYPE (valist),
14051 fold_build2 (BIT_AND_EXPR, sizetype, t,
14052 size_int (-boundary))));
14053 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
14054 gimplify_and_add (t, pre_p);
14056 /* Since it is zero-sized there's no increment for the item itself. */
14057 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
14058 return build_va_arg_indirect_ref (valist_tmp);
14061 if (DEFAULT_ABI != ABI_V4)
14063 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
14065 tree elem_type = TREE_TYPE (type);
14066 machine_mode elem_mode = TYPE_MODE (elem_type);
14067 int elem_size = GET_MODE_SIZE (elem_mode);
14069 if (elem_size < UNITS_PER_WORD)
14071 tree real_part, imag_part;
14072 gimple_seq post = NULL;
14074 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
14075 &post);
14076 /* Copy the value into a temporary, lest the formal temporary
14077 be reused out from under us. */
14078 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
14079 gimple_seq_add_seq (pre_p, post);
14081 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
14082 post_p);
14084 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
14088 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
14091 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
14092 f_fpr = DECL_CHAIN (f_gpr);
14093 f_res = DECL_CHAIN (f_fpr);
14094 f_ovf = DECL_CHAIN (f_res);
14095 f_sav = DECL_CHAIN (f_ovf);
14097 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
14098 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
14099 f_fpr, NULL_TREE);
14100 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
14101 f_ovf, NULL_TREE);
14102 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
14103 f_sav, NULL_TREE);
14105 size = int_size_in_bytes (type);
14106 rsize = (size + 3) / 4;
14107 int pad = 4 * rsize - size;
14108 align = 1;
14110 machine_mode mode = TYPE_MODE (type);
14111 if (abi_v4_pass_in_fpr (mode))
14113 /* FP args go in FP registers, if present. */
14114 reg = fpr;
14115 n_reg = (size + 7) / 8;
14116 sav_ofs = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4) * 4;
14117 sav_scale = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4);
14118 if (mode != SFmode && mode != SDmode)
14119 align = 8;
14121 else
14123 /* Otherwise into GP registers. */
14124 reg = gpr;
14125 n_reg = rsize;
14126 sav_ofs = 0;
14127 sav_scale = 4;
14128 if (n_reg == 2)
14129 align = 8;
14132 /* Pull the value out of the saved registers.... */
14134 lab_over = NULL;
14135 addr = create_tmp_var (ptr_type_node, "addr");
14137 /* AltiVec vectors never go in registers when -mabi=altivec. */
14138 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
14139 align = 16;
14140 else
14142 lab_false = create_artificial_label (input_location);
14143 lab_over = create_artificial_label (input_location);
14145 /* Long long and SPE vectors are aligned in the registers.
14146 As are any other 2 gpr item such as complex int due to a
14147 historical mistake. */
14148 u = reg;
14149 if (n_reg == 2 && reg == gpr)
14151 regalign = 1;
14152 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
14153 build_int_cst (TREE_TYPE (reg), n_reg - 1));
14154 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
14155 unshare_expr (reg), u);
14157 /* _Decimal128 is passed in even/odd fpr pairs; the stored
14158 reg number is 0 for f1, so we want to make it odd. */
14159 else if (reg == fpr && mode == TDmode)
14161 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
14162 build_int_cst (TREE_TYPE (reg), 1));
14163 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
14166 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
14167 t = build2 (GE_EXPR, boolean_type_node, u, t);
14168 u = build1 (GOTO_EXPR, void_type_node, lab_false);
14169 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
14170 gimplify_and_add (t, pre_p);
14172 t = sav;
14173 if (sav_ofs)
14174 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
14176 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
14177 build_int_cst (TREE_TYPE (reg), n_reg));
14178 u = fold_convert (sizetype, u);
14179 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
14180 t = fold_build_pointer_plus (t, u);
14182 /* _Decimal32 varargs are located in the second word of the 64-bit
14183 FP register for 32-bit binaries. */
14184 if (TARGET_32BIT
14185 && TARGET_HARD_FLOAT && TARGET_FPRS
14186 && mode == SDmode)
14187 t = fold_build_pointer_plus_hwi (t, size);
14189 /* Args are passed right-aligned. */
14190 if (BYTES_BIG_ENDIAN)
14191 t = fold_build_pointer_plus_hwi (t, pad);
14193 gimplify_assign (addr, t, pre_p);
14195 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
14197 stmt = gimple_build_label (lab_false);
14198 gimple_seq_add_stmt (pre_p, stmt);
14200 if ((n_reg == 2 && !regalign) || n_reg > 2)
14202 /* Ensure that we don't find any more args in regs.
14203 Alignment has taken care of for special cases. */
14204 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
14208 /* ... otherwise out of the overflow area. */
14210 /* Care for on-stack alignment if needed. */
14211 t = ovf;
14212 if (align != 1)
14214 t = fold_build_pointer_plus_hwi (t, align - 1);
14215 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
14216 build_int_cst (TREE_TYPE (t), -align));
14219 /* Args are passed right-aligned. */
14220 if (BYTES_BIG_ENDIAN)
14221 t = fold_build_pointer_plus_hwi (t, pad);
14223 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
14225 gimplify_assign (unshare_expr (addr), t, pre_p);
14227 t = fold_build_pointer_plus_hwi (t, size);
14228 gimplify_assign (unshare_expr (ovf), t, pre_p);
14230 if (lab_over)
14232 stmt = gimple_build_label (lab_over);
14233 gimple_seq_add_stmt (pre_p, stmt);
14236 if (STRICT_ALIGNMENT
14237 && (TYPE_ALIGN (type)
14238 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
14240 /* The value (of type complex double, for example) may not be
14241 aligned in memory in the saved registers, so copy via a
14242 temporary. (This is the same code as used for SPARC.) */
14243 tree tmp = create_tmp_var (type, "va_arg_tmp");
14244 tree dest_addr = build_fold_addr_expr (tmp);
14246 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
14247 3, dest_addr, addr, size_int (rsize * 4));
14249 gimplify_and_add (copy, pre_p);
14250 addr = dest_addr;
14253 addr = fold_convert (ptrtype, addr);
14254 return build_va_arg_indirect_ref (addr);
14257 /* Builtins. */
14259 static void
14260 def_builtin (const char *name, tree type, enum rs6000_builtins code)
14262 tree t;
14263 unsigned classify = rs6000_builtin_info[(int)code].attr;
14264 const char *attr_string = "";
14266 gcc_assert (name != NULL);
14267 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
14269 if (rs6000_builtin_decls[(int)code])
14270 fatal_error (input_location,
14271 "internal error: builtin function %s already processed", name);
14273 rs6000_builtin_decls[(int)code] = t =
14274 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
14276 /* Set any special attributes. */
14277 if ((classify & RS6000_BTC_CONST) != 0)
14279 /* const function, function only depends on the inputs. */
14280 TREE_READONLY (t) = 1;
14281 TREE_NOTHROW (t) = 1;
14282 attr_string = ", const";
14284 else if ((classify & RS6000_BTC_PURE) != 0)
14286 /* pure function, function can read global memory, but does not set any
14287 external state. */
14288 DECL_PURE_P (t) = 1;
14289 TREE_NOTHROW (t) = 1;
14290 attr_string = ", pure";
14292 else if ((classify & RS6000_BTC_FP) != 0)
14294 /* Function is a math function. If rounding mode is on, then treat the
14295 function as not reading global memory, but it can have arbitrary side
14296 effects. If it is off, then assume the function is a const function.
14297 This mimics the ATTR_MATHFN_FPROUNDING attribute in
14298 builtin-attribute.def that is used for the math functions. */
14299 TREE_NOTHROW (t) = 1;
14300 if (flag_rounding_math)
14302 DECL_PURE_P (t) = 1;
14303 DECL_IS_NOVOPS (t) = 1;
14304 attr_string = ", fp, pure";
14306 else
14308 TREE_READONLY (t) = 1;
14309 attr_string = ", fp, const";
14312 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
14313 gcc_unreachable ();
14315 if (TARGET_DEBUG_BUILTIN)
14316 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
14317 (int)code, name, attr_string);
14320 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
14322 #undef RS6000_BUILTIN_0
14323 #undef RS6000_BUILTIN_1
14324 #undef RS6000_BUILTIN_2
14325 #undef RS6000_BUILTIN_3
14326 #undef RS6000_BUILTIN_A
14327 #undef RS6000_BUILTIN_D
14328 #undef RS6000_BUILTIN_E
14329 #undef RS6000_BUILTIN_H
14330 #undef RS6000_BUILTIN_P
14331 #undef RS6000_BUILTIN_Q
14332 #undef RS6000_BUILTIN_S
14333 #undef RS6000_BUILTIN_X
14335 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14336 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14337 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14338 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
14339 { MASK, ICODE, NAME, ENUM },
14341 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14342 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14343 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14344 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14345 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14346 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14347 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14348 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14350 static const struct builtin_description bdesc_3arg[] =
14352 #include "powerpcspe-builtin.def"
14355 /* DST operations: void foo (void *, const int, const char). */
14357 #undef RS6000_BUILTIN_0
14358 #undef RS6000_BUILTIN_1
14359 #undef RS6000_BUILTIN_2
14360 #undef RS6000_BUILTIN_3
14361 #undef RS6000_BUILTIN_A
14362 #undef RS6000_BUILTIN_D
14363 #undef RS6000_BUILTIN_E
14364 #undef RS6000_BUILTIN_H
14365 #undef RS6000_BUILTIN_P
14366 #undef RS6000_BUILTIN_Q
14367 #undef RS6000_BUILTIN_S
14368 #undef RS6000_BUILTIN_X
14370 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14371 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14372 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14373 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14374 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14375 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
14376 { MASK, ICODE, NAME, ENUM },
14378 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14379 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14380 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14381 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14382 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14383 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14385 static const struct builtin_description bdesc_dst[] =
14387 #include "powerpcspe-builtin.def"
14390 /* Simple binary operations: VECc = foo (VECa, VECb). */
14392 #undef RS6000_BUILTIN_0
14393 #undef RS6000_BUILTIN_1
14394 #undef RS6000_BUILTIN_2
14395 #undef RS6000_BUILTIN_3
14396 #undef RS6000_BUILTIN_A
14397 #undef RS6000_BUILTIN_D
14398 #undef RS6000_BUILTIN_E
14399 #undef RS6000_BUILTIN_H
14400 #undef RS6000_BUILTIN_P
14401 #undef RS6000_BUILTIN_Q
14402 #undef RS6000_BUILTIN_S
14403 #undef RS6000_BUILTIN_X
14405 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14406 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14407 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
14408 { MASK, ICODE, NAME, ENUM },
14410 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14411 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14412 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14413 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14414 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14415 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14416 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14417 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14418 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14420 static const struct builtin_description bdesc_2arg[] =
14422 #include "powerpcspe-builtin.def"
14425 #undef RS6000_BUILTIN_0
14426 #undef RS6000_BUILTIN_1
14427 #undef RS6000_BUILTIN_2
14428 #undef RS6000_BUILTIN_3
14429 #undef RS6000_BUILTIN_A
14430 #undef RS6000_BUILTIN_D
14431 #undef RS6000_BUILTIN_E
14432 #undef RS6000_BUILTIN_H
14433 #undef RS6000_BUILTIN_P
14434 #undef RS6000_BUILTIN_Q
14435 #undef RS6000_BUILTIN_S
14436 #undef RS6000_BUILTIN_X
14438 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14439 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14440 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14441 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14442 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14443 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14444 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14445 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14446 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
14447 { MASK, ICODE, NAME, ENUM },
14449 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14450 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14451 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14453 /* AltiVec predicates. */
14455 static const struct builtin_description bdesc_altivec_preds[] =
14457 #include "powerpcspe-builtin.def"
14460 /* SPE predicates. */
14461 #undef RS6000_BUILTIN_0
14462 #undef RS6000_BUILTIN_1
14463 #undef RS6000_BUILTIN_2
14464 #undef RS6000_BUILTIN_3
14465 #undef RS6000_BUILTIN_A
14466 #undef RS6000_BUILTIN_D
14467 #undef RS6000_BUILTIN_E
14468 #undef RS6000_BUILTIN_H
14469 #undef RS6000_BUILTIN_P
14470 #undef RS6000_BUILTIN_Q
14471 #undef RS6000_BUILTIN_S
14472 #undef RS6000_BUILTIN_X
14474 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14475 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14476 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14477 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14478 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14479 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14480 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14481 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14482 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14483 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14484 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
14485 { MASK, ICODE, NAME, ENUM },
14487 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14489 static const struct builtin_description bdesc_spe_predicates[] =
14491 #include "powerpcspe-builtin.def"
14494 /* SPE evsel predicates. */
14495 #undef RS6000_BUILTIN_0
14496 #undef RS6000_BUILTIN_1
14497 #undef RS6000_BUILTIN_2
14498 #undef RS6000_BUILTIN_3
14499 #undef RS6000_BUILTIN_A
14500 #undef RS6000_BUILTIN_D
14501 #undef RS6000_BUILTIN_E
14502 #undef RS6000_BUILTIN_H
14503 #undef RS6000_BUILTIN_P
14504 #undef RS6000_BUILTIN_Q
14505 #undef RS6000_BUILTIN_S
14506 #undef RS6000_BUILTIN_X
14508 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14509 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14510 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14511 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14512 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14513 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14514 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
14515 { MASK, ICODE, NAME, ENUM },
14517 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14518 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14519 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14520 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14521 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14523 static const struct builtin_description bdesc_spe_evsel[] =
14525 #include "powerpcspe-builtin.def"
14528 /* PAIRED predicates. */
14529 #undef RS6000_BUILTIN_0
14530 #undef RS6000_BUILTIN_1
14531 #undef RS6000_BUILTIN_2
14532 #undef RS6000_BUILTIN_3
14533 #undef RS6000_BUILTIN_A
14534 #undef RS6000_BUILTIN_D
14535 #undef RS6000_BUILTIN_E
14536 #undef RS6000_BUILTIN_H
14537 #undef RS6000_BUILTIN_P
14538 #undef RS6000_BUILTIN_Q
14539 #undef RS6000_BUILTIN_S
14540 #undef RS6000_BUILTIN_X
14542 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14543 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14544 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14545 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14546 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14547 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14548 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14549 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14550 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14551 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
14552 { MASK, ICODE, NAME, ENUM },
14554 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14555 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14557 static const struct builtin_description bdesc_paired_preds[] =
14559 #include "powerpcspe-builtin.def"
14562 /* ABS* operations. */
14564 #undef RS6000_BUILTIN_0
14565 #undef RS6000_BUILTIN_1
14566 #undef RS6000_BUILTIN_2
14567 #undef RS6000_BUILTIN_3
14568 #undef RS6000_BUILTIN_A
14569 #undef RS6000_BUILTIN_D
14570 #undef RS6000_BUILTIN_E
14571 #undef RS6000_BUILTIN_H
14572 #undef RS6000_BUILTIN_P
14573 #undef RS6000_BUILTIN_Q
14574 #undef RS6000_BUILTIN_S
14575 #undef RS6000_BUILTIN_X
14577 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14578 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14579 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14580 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14581 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
14582 { MASK, ICODE, NAME, ENUM },
14584 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14585 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14586 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14587 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14588 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14589 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14590 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14592 static const struct builtin_description bdesc_abs[] =
14594 #include "powerpcspe-builtin.def"
14597 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
14598 foo (VECa). */
14600 #undef RS6000_BUILTIN_0
14601 #undef RS6000_BUILTIN_1
14602 #undef RS6000_BUILTIN_2
14603 #undef RS6000_BUILTIN_3
14604 #undef RS6000_BUILTIN_A
14605 #undef RS6000_BUILTIN_D
14606 #undef RS6000_BUILTIN_E
14607 #undef RS6000_BUILTIN_H
14608 #undef RS6000_BUILTIN_P
14609 #undef RS6000_BUILTIN_Q
14610 #undef RS6000_BUILTIN_S
14611 #undef RS6000_BUILTIN_X
14613 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14614 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
14615 { MASK, ICODE, NAME, ENUM },
14617 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14618 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14619 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14620 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14621 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14622 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14623 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14624 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14625 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14626 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14628 static const struct builtin_description bdesc_1arg[] =
14630 #include "powerpcspe-builtin.def"
14633 /* Simple no-argument operations: result = __builtin_darn_32 () */
14635 #undef RS6000_BUILTIN_0
14636 #undef RS6000_BUILTIN_1
14637 #undef RS6000_BUILTIN_2
14638 #undef RS6000_BUILTIN_3
14639 #undef RS6000_BUILTIN_A
14640 #undef RS6000_BUILTIN_D
14641 #undef RS6000_BUILTIN_E
14642 #undef RS6000_BUILTIN_H
14643 #undef RS6000_BUILTIN_P
14644 #undef RS6000_BUILTIN_Q
14645 #undef RS6000_BUILTIN_S
14646 #undef RS6000_BUILTIN_X
14648 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
14649 { MASK, ICODE, NAME, ENUM },
14651 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14652 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14653 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14654 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14655 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14656 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14657 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14658 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14659 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14660 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14661 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14663 static const struct builtin_description bdesc_0arg[] =
14665 #include "powerpcspe-builtin.def"
14668 /* HTM builtins. */
14669 #undef RS6000_BUILTIN_0
14670 #undef RS6000_BUILTIN_1
14671 #undef RS6000_BUILTIN_2
14672 #undef RS6000_BUILTIN_3
14673 #undef RS6000_BUILTIN_A
14674 #undef RS6000_BUILTIN_D
14675 #undef RS6000_BUILTIN_E
14676 #undef RS6000_BUILTIN_H
14677 #undef RS6000_BUILTIN_P
14678 #undef RS6000_BUILTIN_Q
14679 #undef RS6000_BUILTIN_S
14680 #undef RS6000_BUILTIN_X
14682 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14683 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14684 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14685 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14686 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14687 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14688 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14689 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
14690 { MASK, ICODE, NAME, ENUM },
14692 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14693 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14694 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14695 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14697 static const struct builtin_description bdesc_htm[] =
14699 #include "powerpcspe-builtin.def"
14702 #undef RS6000_BUILTIN_0
14703 #undef RS6000_BUILTIN_1
14704 #undef RS6000_BUILTIN_2
14705 #undef RS6000_BUILTIN_3
14706 #undef RS6000_BUILTIN_A
14707 #undef RS6000_BUILTIN_D
14708 #undef RS6000_BUILTIN_E
14709 #undef RS6000_BUILTIN_H
14710 #undef RS6000_BUILTIN_P
14711 #undef RS6000_BUILTIN_Q
14712 #undef RS6000_BUILTIN_S
14714 /* Return true if a builtin function is overloaded. */
14715 bool
14716 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
14718 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
14721 const char *
14722 rs6000_overloaded_builtin_name (enum rs6000_builtins fncode)
14724 return rs6000_builtin_info[(int)fncode].name;
14727 /* Expand an expression EXP that calls a builtin without arguments. */
14728 static rtx
14729 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
14731 rtx pat;
14732 machine_mode tmode = insn_data[icode].operand[0].mode;
14734 if (icode == CODE_FOR_nothing)
14735 /* Builtin not supported on this processor. */
14736 return 0;
14738 if (target == 0
14739 || GET_MODE (target) != tmode
14740 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14741 target = gen_reg_rtx (tmode);
14743 pat = GEN_FCN (icode) (target);
14744 if (! pat)
14745 return 0;
14746 emit_insn (pat);
14748 return target;
14752 static rtx
14753 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
14755 rtx pat;
14756 tree arg0 = CALL_EXPR_ARG (exp, 0);
14757 tree arg1 = CALL_EXPR_ARG (exp, 1);
14758 rtx op0 = expand_normal (arg0);
14759 rtx op1 = expand_normal (arg1);
14760 machine_mode mode0 = insn_data[icode].operand[0].mode;
14761 machine_mode mode1 = insn_data[icode].operand[1].mode;
14763 if (icode == CODE_FOR_nothing)
14764 /* Builtin not supported on this processor. */
14765 return 0;
14767 /* If we got invalid arguments bail out before generating bad rtl. */
14768 if (arg0 == error_mark_node || arg1 == error_mark_node)
14769 return const0_rtx;
14771 if (GET_CODE (op0) != CONST_INT
14772 || INTVAL (op0) > 255
14773 || INTVAL (op0) < 0)
14775 error ("argument 1 must be an 8-bit field value");
14776 return const0_rtx;
14779 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14780 op0 = copy_to_mode_reg (mode0, op0);
14782 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14783 op1 = copy_to_mode_reg (mode1, op1);
14785 pat = GEN_FCN (icode) (op0, op1);
14786 if (! pat)
14787 return const0_rtx;
14788 emit_insn (pat);
14790 return NULL_RTX;
14793 static rtx
14794 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
14796 rtx pat;
14797 tree arg0 = CALL_EXPR_ARG (exp, 0);
14798 rtx op0 = expand_normal (arg0);
14799 machine_mode tmode = insn_data[icode].operand[0].mode;
14800 machine_mode mode0 = insn_data[icode].operand[1].mode;
14802 if (icode == CODE_FOR_nothing)
14803 /* Builtin not supported on this processor. */
14804 return 0;
14806 /* If we got invalid arguments bail out before generating bad rtl. */
14807 if (arg0 == error_mark_node)
14808 return const0_rtx;
14810 if (icode == CODE_FOR_altivec_vspltisb
14811 || icode == CODE_FOR_altivec_vspltish
14812 || icode == CODE_FOR_altivec_vspltisw
14813 || icode == CODE_FOR_spe_evsplatfi
14814 || icode == CODE_FOR_spe_evsplati)
14816 /* Only allow 5-bit *signed* literals. */
14817 if (GET_CODE (op0) != CONST_INT
14818 || INTVAL (op0) > 15
14819 || INTVAL (op0) < -16)
14821 error ("argument 1 must be a 5-bit signed literal");
14822 return CONST0_RTX (tmode);
14826 if (target == 0
14827 || GET_MODE (target) != tmode
14828 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14829 target = gen_reg_rtx (tmode);
14831 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14832 op0 = copy_to_mode_reg (mode0, op0);
14834 pat = GEN_FCN (icode) (target, op0);
14835 if (! pat)
14836 return 0;
14837 emit_insn (pat);
14839 return target;
14842 static rtx
14843 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
14845 rtx pat, scratch1, scratch2;
14846 tree arg0 = CALL_EXPR_ARG (exp, 0);
14847 rtx op0 = expand_normal (arg0);
14848 machine_mode tmode = insn_data[icode].operand[0].mode;
14849 machine_mode mode0 = insn_data[icode].operand[1].mode;
14851 /* If we have invalid arguments, bail out before generating bad rtl. */
14852 if (arg0 == error_mark_node)
14853 return const0_rtx;
14855 if (target == 0
14856 || GET_MODE (target) != tmode
14857 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14858 target = gen_reg_rtx (tmode);
14860 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14861 op0 = copy_to_mode_reg (mode0, op0);
14863 scratch1 = gen_reg_rtx (mode0);
14864 scratch2 = gen_reg_rtx (mode0);
14866 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
14867 if (! pat)
14868 return 0;
14869 emit_insn (pat);
14871 return target;
14874 static rtx
14875 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
14877 rtx pat;
14878 tree arg0 = CALL_EXPR_ARG (exp, 0);
14879 tree arg1 = CALL_EXPR_ARG (exp, 1);
14880 rtx op0 = expand_normal (arg0);
14881 rtx op1 = expand_normal (arg1);
14882 machine_mode tmode = insn_data[icode].operand[0].mode;
14883 machine_mode mode0 = insn_data[icode].operand[1].mode;
14884 machine_mode mode1 = insn_data[icode].operand[2].mode;
14886 if (icode == CODE_FOR_nothing)
14887 /* Builtin not supported on this processor. */
14888 return 0;
14890 /* If we got invalid arguments bail out before generating bad rtl. */
14891 if (arg0 == error_mark_node || arg1 == error_mark_node)
14892 return const0_rtx;
14894 if (icode == CODE_FOR_altivec_vcfux
14895 || icode == CODE_FOR_altivec_vcfsx
14896 || icode == CODE_FOR_altivec_vctsxs
14897 || icode == CODE_FOR_altivec_vctuxs
14898 || icode == CODE_FOR_altivec_vspltb
14899 || icode == CODE_FOR_altivec_vsplth
14900 || icode == CODE_FOR_altivec_vspltw
14901 || icode == CODE_FOR_spe_evaddiw
14902 || icode == CODE_FOR_spe_evldd
14903 || icode == CODE_FOR_spe_evldh
14904 || icode == CODE_FOR_spe_evldw
14905 || icode == CODE_FOR_spe_evlhhesplat
14906 || icode == CODE_FOR_spe_evlhhossplat
14907 || icode == CODE_FOR_spe_evlhhousplat
14908 || icode == CODE_FOR_spe_evlwhe
14909 || icode == CODE_FOR_spe_evlwhos
14910 || icode == CODE_FOR_spe_evlwhou
14911 || icode == CODE_FOR_spe_evlwhsplat
14912 || icode == CODE_FOR_spe_evlwwsplat
14913 || icode == CODE_FOR_spe_evrlwi
14914 || icode == CODE_FOR_spe_evslwi
14915 || icode == CODE_FOR_spe_evsrwis
14916 || icode == CODE_FOR_spe_evsubifw
14917 || icode == CODE_FOR_spe_evsrwiu)
14919 /* Only allow 5-bit unsigned literals. */
14920 STRIP_NOPS (arg1);
14921 if (TREE_CODE (arg1) != INTEGER_CST
14922 || TREE_INT_CST_LOW (arg1) & ~0x1f)
14924 error ("argument 2 must be a 5-bit unsigned literal");
14925 return CONST0_RTX (tmode);
14928 else if (icode == CODE_FOR_dfptstsfi_eq_dd
14929 || icode == CODE_FOR_dfptstsfi_lt_dd
14930 || icode == CODE_FOR_dfptstsfi_gt_dd
14931 || icode == CODE_FOR_dfptstsfi_unordered_dd
14932 || icode == CODE_FOR_dfptstsfi_eq_td
14933 || icode == CODE_FOR_dfptstsfi_lt_td
14934 || icode == CODE_FOR_dfptstsfi_gt_td
14935 || icode == CODE_FOR_dfptstsfi_unordered_td)
14937 /* Only allow 6-bit unsigned literals. */
14938 STRIP_NOPS (arg0);
14939 if (TREE_CODE (arg0) != INTEGER_CST
14940 || !IN_RANGE (TREE_INT_CST_LOW (arg0), 0, 63))
14942 error ("argument 1 must be a 6-bit unsigned literal");
14943 return CONST0_RTX (tmode);
14946 else if (icode == CODE_FOR_xststdcdp
14947 || icode == CODE_FOR_xststdcsp
14948 || icode == CODE_FOR_xvtstdcdp
14949 || icode == CODE_FOR_xvtstdcsp)
14951 /* Only allow 7-bit unsigned literals. */
14952 STRIP_NOPS (arg1);
14953 if (TREE_CODE (arg1) != INTEGER_CST
14954 || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 127))
14956 error ("argument 2 must be a 7-bit unsigned literal");
14957 return CONST0_RTX (tmode);
14961 if (target == 0
14962 || GET_MODE (target) != tmode
14963 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14964 target = gen_reg_rtx (tmode);
14966 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14967 op0 = copy_to_mode_reg (mode0, op0);
14968 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14969 op1 = copy_to_mode_reg (mode1, op1);
14971 pat = GEN_FCN (icode) (target, op0, op1);
14972 if (! pat)
14973 return 0;
14974 emit_insn (pat);
14976 return target;
14979 static rtx
14980 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
14982 rtx pat, scratch;
14983 tree cr6_form = CALL_EXPR_ARG (exp, 0);
14984 tree arg0 = CALL_EXPR_ARG (exp, 1);
14985 tree arg1 = CALL_EXPR_ARG (exp, 2);
14986 rtx op0 = expand_normal (arg0);
14987 rtx op1 = expand_normal (arg1);
14988 machine_mode tmode = SImode;
14989 machine_mode mode0 = insn_data[icode].operand[1].mode;
14990 machine_mode mode1 = insn_data[icode].operand[2].mode;
14991 int cr6_form_int;
14993 if (TREE_CODE (cr6_form) != INTEGER_CST)
14995 error ("argument 1 of __builtin_altivec_predicate must be a constant");
14996 return const0_rtx;
14998 else
14999 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
15001 gcc_assert (mode0 == mode1);
15003 /* If we have invalid arguments, bail out before generating bad rtl. */
15004 if (arg0 == error_mark_node || arg1 == error_mark_node)
15005 return const0_rtx;
15007 if (target == 0
15008 || GET_MODE (target) != tmode
15009 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15010 target = gen_reg_rtx (tmode);
15012 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15013 op0 = copy_to_mode_reg (mode0, op0);
15014 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15015 op1 = copy_to_mode_reg (mode1, op1);
15017 /* Note that for many of the relevant operations (e.g. cmpne or
15018 cmpeq) with float or double operands, it makes more sense for the
15019 mode of the allocated scratch register to select a vector of
15020 integer. But the choice to copy the mode of operand 0 was made
15021 long ago and there are no plans to change it. */
15022 scratch = gen_reg_rtx (mode0);
15024 pat = GEN_FCN (icode) (scratch, op0, op1);
15025 if (! pat)
15026 return 0;
15027 emit_insn (pat);
15029 /* The vec_any* and vec_all* predicates use the same opcodes for two
15030 different operations, but the bits in CR6 will be different
15031 depending on what information we want. So we have to play tricks
15032 with CR6 to get the right bits out.
15034 If you think this is disgusting, look at the specs for the
15035 AltiVec predicates. */
15037 switch (cr6_form_int)
15039 case 0:
15040 emit_insn (gen_cr6_test_for_zero (target));
15041 break;
15042 case 1:
15043 emit_insn (gen_cr6_test_for_zero_reverse (target));
15044 break;
15045 case 2:
15046 emit_insn (gen_cr6_test_for_lt (target));
15047 break;
15048 case 3:
15049 emit_insn (gen_cr6_test_for_lt_reverse (target));
15050 break;
15051 default:
15052 error ("argument 1 of __builtin_altivec_predicate is out of range");
15053 break;
15056 return target;
15059 static rtx
15060 paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
15062 rtx pat, addr;
15063 tree arg0 = CALL_EXPR_ARG (exp, 0);
15064 tree arg1 = CALL_EXPR_ARG (exp, 1);
15065 machine_mode tmode = insn_data[icode].operand[0].mode;
15066 machine_mode mode0 = Pmode;
15067 machine_mode mode1 = Pmode;
15068 rtx op0 = expand_normal (arg0);
15069 rtx op1 = expand_normal (arg1);
15071 if (icode == CODE_FOR_nothing)
15072 /* Builtin not supported on this processor. */
15073 return 0;
15075 /* If we got invalid arguments bail out before generating bad rtl. */
15076 if (arg0 == error_mark_node || arg1 == error_mark_node)
15077 return const0_rtx;
15079 if (target == 0
15080 || GET_MODE (target) != tmode
15081 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15082 target = gen_reg_rtx (tmode);
15084 op1 = copy_to_mode_reg (mode1, op1);
15086 if (op0 == const0_rtx)
15088 addr = gen_rtx_MEM (tmode, op1);
15090 else
15092 op0 = copy_to_mode_reg (mode0, op0);
15093 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op0, op1));
15096 pat = GEN_FCN (icode) (target, addr);
15098 if (! pat)
15099 return 0;
15100 emit_insn (pat);
15102 return target;
15105 /* Return a constant vector for use as a little-endian permute control vector
15106 to reverse the order of elements of the given vector mode. */
15107 static rtx
15108 swap_selector_for_mode (machine_mode mode)
15110 /* These are little endian vectors, so their elements are reversed
15111 from what you would normally expect for a permute control vector. */
15112 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
15113 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
15114 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
15115 unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
15116 unsigned int *swaparray, i;
15117 rtx perm[16];
15119 switch (mode)
15121 case E_V2DFmode:
15122 case E_V2DImode:
15123 swaparray = swap2;
15124 break;
15125 case E_V4SFmode:
15126 case E_V4SImode:
15127 swaparray = swap4;
15128 break;
15129 case E_V8HImode:
15130 swaparray = swap8;
15131 break;
15132 case E_V16QImode:
15133 swaparray = swap16;
15134 break;
15135 default:
15136 gcc_unreachable ();
15139 for (i = 0; i < 16; ++i)
15140 perm[i] = GEN_INT (swaparray[i]);
15142 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)));
15145 /* Generate code for an "lvxl", or "lve*x" built-in for a little endian target
15146 with -maltivec=be specified. Issue the load followed by an element-
15147 reversing permute. */
15148 void
15149 altivec_expand_lvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
15151 rtx tmp = gen_reg_rtx (mode);
15152 rtx load = gen_rtx_SET (tmp, op1);
15153 rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
15154 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx));
15155 rtx sel = swap_selector_for_mode (mode);
15156 rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM);
15158 gcc_assert (REG_P (op0));
15159 emit_insn (par);
15160 emit_insn (gen_rtx_SET (op0, vperm));
15163 /* Generate code for a "stvxl" built-in for a little endian target with
15164 -maltivec=be specified. Issue the store preceded by an element-reversing
15165 permute. */
15166 void
15167 altivec_expand_stvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
15169 rtx tmp = gen_reg_rtx (mode);
15170 rtx store = gen_rtx_SET (op0, tmp);
15171 rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
15172 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx));
15173 rtx sel = swap_selector_for_mode (mode);
15174 rtx vperm;
15176 gcc_assert (REG_P (op1));
15177 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
15178 emit_insn (gen_rtx_SET (tmp, vperm));
15179 emit_insn (par);
15182 /* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
15183 specified. Issue the store preceded by an element-reversing permute. */
15184 void
15185 altivec_expand_stvex_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
15187 machine_mode inner_mode = GET_MODE_INNER (mode);
15188 rtx tmp = gen_reg_rtx (mode);
15189 rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec);
15190 rtx sel = swap_selector_for_mode (mode);
15191 rtx vperm;
15193 gcc_assert (REG_P (op1));
15194 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
15195 emit_insn (gen_rtx_SET (tmp, vperm));
15196 emit_insn (gen_rtx_SET (op0, stvx));
15199 static rtx
15200 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
15202 rtx pat, addr;
15203 tree arg0 = CALL_EXPR_ARG (exp, 0);
15204 tree arg1 = CALL_EXPR_ARG (exp, 1);
15205 machine_mode tmode = insn_data[icode].operand[0].mode;
15206 machine_mode mode0 = Pmode;
15207 machine_mode mode1 = Pmode;
15208 rtx op0 = expand_normal (arg0);
15209 rtx op1 = expand_normal (arg1);
15211 if (icode == CODE_FOR_nothing)
15212 /* Builtin not supported on this processor. */
15213 return 0;
15215 /* If we got invalid arguments bail out before generating bad rtl. */
15216 if (arg0 == error_mark_node || arg1 == error_mark_node)
15217 return const0_rtx;
15219 if (target == 0
15220 || GET_MODE (target) != tmode
15221 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15222 target = gen_reg_rtx (tmode);
15224 op1 = copy_to_mode_reg (mode1, op1);
15226 /* For LVX, express the RTL accurately by ANDing the address with -16.
15227 LVXL and LVE*X expand to use UNSPECs to hide their special behavior,
15228 so the raw address is fine. */
15229 if (icode == CODE_FOR_altivec_lvx_v2df_2op
15230 || icode == CODE_FOR_altivec_lvx_v2di_2op
15231 || icode == CODE_FOR_altivec_lvx_v4sf_2op
15232 || icode == CODE_FOR_altivec_lvx_v4si_2op
15233 || icode == CODE_FOR_altivec_lvx_v8hi_2op
15234 || icode == CODE_FOR_altivec_lvx_v16qi_2op)
15236 rtx rawaddr;
15237 if (op0 == const0_rtx)
15238 rawaddr = op1;
15239 else
15241 op0 = copy_to_mode_reg (mode0, op0);
15242 rawaddr = gen_rtx_PLUS (Pmode, op1, op0);
15244 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
15245 addr = gen_rtx_MEM (blk ? BLKmode : tmode, addr);
15247 /* For -maltivec=be, emit the load and follow it up with a
15248 permute to swap the elements. */
15249 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
15251 rtx temp = gen_reg_rtx (tmode);
15252 emit_insn (gen_rtx_SET (temp, addr));
15254 rtx sel = swap_selector_for_mode (tmode);
15255 rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, temp, temp, sel),
15256 UNSPEC_VPERM);
15257 emit_insn (gen_rtx_SET (target, vperm));
15259 else
15260 emit_insn (gen_rtx_SET (target, addr));
15262 else
15264 if (op0 == const0_rtx)
15265 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
15266 else
15268 op0 = copy_to_mode_reg (mode0, op0);
15269 addr = gen_rtx_MEM (blk ? BLKmode : tmode,
15270 gen_rtx_PLUS (Pmode, op1, op0));
15273 pat = GEN_FCN (icode) (target, addr);
15274 if (! pat)
15275 return 0;
15276 emit_insn (pat);
15279 return target;
15282 static rtx
15283 spe_expand_stv_builtin (enum insn_code icode, tree exp)
15285 tree arg0 = CALL_EXPR_ARG (exp, 0);
15286 tree arg1 = CALL_EXPR_ARG (exp, 1);
15287 tree arg2 = CALL_EXPR_ARG (exp, 2);
15288 rtx op0 = expand_normal (arg0);
15289 rtx op1 = expand_normal (arg1);
15290 rtx op2 = expand_normal (arg2);
15291 rtx pat;
15292 machine_mode mode0 = insn_data[icode].operand[0].mode;
15293 machine_mode mode1 = insn_data[icode].operand[1].mode;
15294 machine_mode mode2 = insn_data[icode].operand[2].mode;
15296 /* Invalid arguments. Bail before doing anything stoopid! */
15297 if (arg0 == error_mark_node
15298 || arg1 == error_mark_node
15299 || arg2 == error_mark_node)
15300 return const0_rtx;
15302 if (! (*insn_data[icode].operand[2].predicate) (op0, mode2))
15303 op0 = copy_to_mode_reg (mode2, op0);
15304 if (! (*insn_data[icode].operand[0].predicate) (op1, mode0))
15305 op1 = copy_to_mode_reg (mode0, op1);
15306 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
15307 op2 = copy_to_mode_reg (mode1, op2);
15309 pat = GEN_FCN (icode) (op1, op2, op0);
15310 if (pat)
15311 emit_insn (pat);
15312 return NULL_RTX;
15315 static rtx
15316 paired_expand_stv_builtin (enum insn_code icode, tree exp)
15318 tree arg0 = CALL_EXPR_ARG (exp, 0);
15319 tree arg1 = CALL_EXPR_ARG (exp, 1);
15320 tree arg2 = CALL_EXPR_ARG (exp, 2);
15321 rtx op0 = expand_normal (arg0);
15322 rtx op1 = expand_normal (arg1);
15323 rtx op2 = expand_normal (arg2);
15324 rtx pat, addr;
15325 machine_mode tmode = insn_data[icode].operand[0].mode;
15326 machine_mode mode1 = Pmode;
15327 machine_mode mode2 = Pmode;
15329 /* Invalid arguments. Bail before doing anything stoopid! */
15330 if (arg0 == error_mark_node
15331 || arg1 == error_mark_node
15332 || arg2 == error_mark_node)
15333 return const0_rtx;
15335 if (! (*insn_data[icode].operand[1].predicate) (op0, tmode))
15336 op0 = copy_to_mode_reg (tmode, op0);
15338 op2 = copy_to_mode_reg (mode2, op2);
15340 if (op1 == const0_rtx)
15342 addr = gen_rtx_MEM (tmode, op2);
15344 else
15346 op1 = copy_to_mode_reg (mode1, op1);
15347 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
15350 pat = GEN_FCN (icode) (addr, op0);
15351 if (pat)
15352 emit_insn (pat);
15353 return NULL_RTX;
15356 static rtx
15357 altivec_expand_stxvl_builtin (enum insn_code icode, tree exp)
15359 rtx pat;
15360 tree arg0 = CALL_EXPR_ARG (exp, 0);
15361 tree arg1 = CALL_EXPR_ARG (exp, 1);
15362 tree arg2 = CALL_EXPR_ARG (exp, 2);
15363 rtx op0 = expand_normal (arg0);
15364 rtx op1 = expand_normal (arg1);
15365 rtx op2 = expand_normal (arg2);
15366 machine_mode mode0 = insn_data[icode].operand[0].mode;
15367 machine_mode mode1 = insn_data[icode].operand[1].mode;
15368 machine_mode mode2 = insn_data[icode].operand[2].mode;
15370 if (icode == CODE_FOR_nothing)
15371 /* Builtin not supported on this processor. */
15372 return NULL_RTX;
15374 /* If we got invalid arguments bail out before generating bad rtl. */
15375 if (arg0 == error_mark_node
15376 || arg1 == error_mark_node
15377 || arg2 == error_mark_node)
15378 return NULL_RTX;
15380 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15381 op0 = copy_to_mode_reg (mode0, op0);
15382 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15383 op1 = copy_to_mode_reg (mode1, op1);
15384 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15385 op2 = copy_to_mode_reg (mode2, op2);
15387 pat = GEN_FCN (icode) (op0, op1, op2);
15388 if (pat)
15389 emit_insn (pat);
15391 return NULL_RTX;
15394 static rtx
15395 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
15397 tree arg0 = CALL_EXPR_ARG (exp, 0);
15398 tree arg1 = CALL_EXPR_ARG (exp, 1);
15399 tree arg2 = CALL_EXPR_ARG (exp, 2);
15400 rtx op0 = expand_normal (arg0);
15401 rtx op1 = expand_normal (arg1);
15402 rtx op2 = expand_normal (arg2);
15403 rtx pat, addr, rawaddr;
15404 machine_mode tmode = insn_data[icode].operand[0].mode;
15405 machine_mode smode = insn_data[icode].operand[1].mode;
15406 machine_mode mode1 = Pmode;
15407 machine_mode mode2 = Pmode;
15409 /* Invalid arguments. Bail before doing anything stoopid! */
15410 if (arg0 == error_mark_node
15411 || arg1 == error_mark_node
15412 || arg2 == error_mark_node)
15413 return const0_rtx;
15415 op2 = copy_to_mode_reg (mode2, op2);
15417 /* For STVX, express the RTL accurately by ANDing the address with -16.
15418 STVXL and STVE*X expand to use UNSPECs to hide their special behavior,
15419 so the raw address is fine. */
15420 if (icode == CODE_FOR_altivec_stvx_v2df_2op
15421 || icode == CODE_FOR_altivec_stvx_v2di_2op
15422 || icode == CODE_FOR_altivec_stvx_v4sf_2op
15423 || icode == CODE_FOR_altivec_stvx_v4si_2op
15424 || icode == CODE_FOR_altivec_stvx_v8hi_2op
15425 || icode == CODE_FOR_altivec_stvx_v16qi_2op)
15427 if (op1 == const0_rtx)
15428 rawaddr = op2;
15429 else
15431 op1 = copy_to_mode_reg (mode1, op1);
15432 rawaddr = gen_rtx_PLUS (Pmode, op2, op1);
15435 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
15436 addr = gen_rtx_MEM (tmode, addr);
15438 op0 = copy_to_mode_reg (tmode, op0);
15440 /* For -maltivec=be, emit a permute to swap the elements, followed
15441 by the store. */
15442 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
15444 rtx temp = gen_reg_rtx (tmode);
15445 rtx sel = swap_selector_for_mode (tmode);
15446 rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, op0, op0, sel),
15447 UNSPEC_VPERM);
15448 emit_insn (gen_rtx_SET (temp, vperm));
15449 emit_insn (gen_rtx_SET (addr, temp));
15451 else
15452 emit_insn (gen_rtx_SET (addr, op0));
15454 else
15456 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
15457 op0 = copy_to_mode_reg (smode, op0);
15459 if (op1 == const0_rtx)
15460 addr = gen_rtx_MEM (tmode, op2);
15461 else
15463 op1 = copy_to_mode_reg (mode1, op1);
15464 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op2, op1));
15467 pat = GEN_FCN (icode) (addr, op0);
15468 if (pat)
15469 emit_insn (pat);
15472 return NULL_RTX;
15475 /* Return the appropriate SPR number associated with the given builtin. */
15476 static inline HOST_WIDE_INT
15477 htm_spr_num (enum rs6000_builtins code)
15479 if (code == HTM_BUILTIN_GET_TFHAR
15480 || code == HTM_BUILTIN_SET_TFHAR)
15481 return TFHAR_SPR;
15482 else if (code == HTM_BUILTIN_GET_TFIAR
15483 || code == HTM_BUILTIN_SET_TFIAR)
15484 return TFIAR_SPR;
15485 else if (code == HTM_BUILTIN_GET_TEXASR
15486 || code == HTM_BUILTIN_SET_TEXASR)
15487 return TEXASR_SPR;
15488 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
15489 || code == HTM_BUILTIN_SET_TEXASRU);
15490 return TEXASRU_SPR;
15493 /* Return the appropriate SPR regno associated with the given builtin. */
15494 static inline HOST_WIDE_INT
15495 htm_spr_regno (enum rs6000_builtins code)
15497 if (code == HTM_BUILTIN_GET_TFHAR
15498 || code == HTM_BUILTIN_SET_TFHAR)
15499 return TFHAR_REGNO;
15500 else if (code == HTM_BUILTIN_GET_TFIAR
15501 || code == HTM_BUILTIN_SET_TFIAR)
15502 return TFIAR_REGNO;
15503 gcc_assert (code == HTM_BUILTIN_GET_TEXASR
15504 || code == HTM_BUILTIN_SET_TEXASR
15505 || code == HTM_BUILTIN_GET_TEXASRU
15506 || code == HTM_BUILTIN_SET_TEXASRU);
15507 return TEXASR_REGNO;
15510 /* Return the correct ICODE value depending on whether we are
15511 setting or reading the HTM SPRs. */
15512 static inline enum insn_code
15513 rs6000_htm_spr_icode (bool nonvoid)
15515 if (nonvoid)
15516 return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
15517 else
15518 return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
15521 /* Expand the HTM builtin in EXP and store the result in TARGET.
15522 Store true in *EXPANDEDP if we found a builtin to expand. */
15523 static rtx
15524 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
15526 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15527 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
15528 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15529 const struct builtin_description *d;
15530 size_t i;
15532 *expandedp = true;
15534 if (!TARGET_POWERPC64
15535 && (fcode == HTM_BUILTIN_TABORTDC
15536 || fcode == HTM_BUILTIN_TABORTDCI))
15538 size_t uns_fcode = (size_t)fcode;
15539 const char *name = rs6000_builtin_info[uns_fcode].name;
15540 error ("builtin %s is only valid in 64-bit mode", name);
15541 return const0_rtx;
15544 /* Expand the HTM builtins. */
15545 d = bdesc_htm;
15546 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
15547 if (d->code == fcode)
15549 rtx op[MAX_HTM_OPERANDS], pat;
15550 int nopnds = 0;
15551 tree arg;
15552 call_expr_arg_iterator iter;
15553 unsigned attr = rs6000_builtin_info[fcode].attr;
15554 enum insn_code icode = d->icode;
15555 const struct insn_operand_data *insn_op;
15556 bool uses_spr = (attr & RS6000_BTC_SPR);
15557 rtx cr = NULL_RTX;
15559 if (uses_spr)
15560 icode = rs6000_htm_spr_icode (nonvoid);
15561 insn_op = &insn_data[icode].operand[0];
15563 if (nonvoid)
15565 machine_mode tmode = (uses_spr) ? insn_op->mode : E_SImode;
15566 if (!target
15567 || GET_MODE (target) != tmode
15568 || (uses_spr && !(*insn_op->predicate) (target, tmode)))
15569 target = gen_reg_rtx (tmode);
15570 if (uses_spr)
15571 op[nopnds++] = target;
15574 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
15576 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
15577 return const0_rtx;
15579 insn_op = &insn_data[icode].operand[nopnds];
15581 op[nopnds] = expand_normal (arg);
15583 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
15585 if (!strcmp (insn_op->constraint, "n"))
15587 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
15588 if (!CONST_INT_P (op[nopnds]))
15589 error ("argument %d must be an unsigned literal", arg_num);
15590 else
15591 error ("argument %d is an unsigned literal that is "
15592 "out of range", arg_num);
15593 return const0_rtx;
15595 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
15598 nopnds++;
15601 /* Handle the builtins for extended mnemonics. These accept
15602 no arguments, but map to builtins that take arguments. */
15603 switch (fcode)
15605 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
15606 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
15607 op[nopnds++] = GEN_INT (1);
15608 if (flag_checking)
15609 attr |= RS6000_BTC_UNARY;
15610 break;
15611 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
15612 op[nopnds++] = GEN_INT (0);
15613 if (flag_checking)
15614 attr |= RS6000_BTC_UNARY;
15615 break;
15616 default:
15617 break;
15620 /* If this builtin accesses SPRs, then pass in the appropriate
15621 SPR number and SPR regno as the last two operands. */
15622 if (uses_spr)
15624 machine_mode mode = (TARGET_POWERPC64) ? DImode : SImode;
15625 op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode));
15626 op[nopnds++] = gen_rtx_REG (mode, htm_spr_regno (fcode));
15628 /* If this builtin accesses a CR, then pass in a scratch
15629 CR as the last operand. */
15630 else if (attr & RS6000_BTC_CR)
15631 { cr = gen_reg_rtx (CCmode);
15632 op[nopnds++] = cr;
15635 if (flag_checking)
15637 int expected_nopnds = 0;
15638 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
15639 expected_nopnds = 1;
15640 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
15641 expected_nopnds = 2;
15642 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
15643 expected_nopnds = 3;
15644 if (!(attr & RS6000_BTC_VOID))
15645 expected_nopnds += 1;
15646 if (uses_spr)
15647 expected_nopnds += 2;
15649 gcc_assert (nopnds == expected_nopnds
15650 && nopnds <= MAX_HTM_OPERANDS);
15653 switch (nopnds)
15655 case 1:
15656 pat = GEN_FCN (icode) (op[0]);
15657 break;
15658 case 2:
15659 pat = GEN_FCN (icode) (op[0], op[1]);
15660 break;
15661 case 3:
15662 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
15663 break;
15664 case 4:
15665 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
15666 break;
15667 default:
15668 gcc_unreachable ();
15670 if (!pat)
15671 return NULL_RTX;
15672 emit_insn (pat);
15674 if (attr & RS6000_BTC_CR)
15676 if (fcode == HTM_BUILTIN_TBEGIN)
15678 /* Emit code to set TARGET to true or false depending on
15679 whether the tbegin. instruction successfully or failed
15680 to start a transaction. We do this by placing the 1's
15681 complement of CR's EQ bit into TARGET. */
15682 rtx scratch = gen_reg_rtx (SImode);
15683 emit_insn (gen_rtx_SET (scratch,
15684 gen_rtx_EQ (SImode, cr,
15685 const0_rtx)));
15686 emit_insn (gen_rtx_SET (target,
15687 gen_rtx_XOR (SImode, scratch,
15688 GEN_INT (1))));
15690 else
15692 /* Emit code to copy the 4-bit condition register field
15693 CR into the least significant end of register TARGET. */
15694 rtx scratch1 = gen_reg_rtx (SImode);
15695 rtx scratch2 = gen_reg_rtx (SImode);
15696 rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0);
15697 emit_insn (gen_movcc (subreg, cr));
15698 emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28)));
15699 emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf)));
15703 if (nonvoid)
15704 return target;
15705 return const0_rtx;
15708 *expandedp = false;
15709 return NULL_RTX;
15712 /* Expand the CPU builtin in FCODE and store the result in TARGET. */
15714 static rtx
15715 cpu_expand_builtin (enum rs6000_builtins fcode, tree exp ATTRIBUTE_UNUSED,
15716 rtx target)
15718 /* __builtin_cpu_init () is a nop, so expand to nothing. */
15719 if (fcode == RS6000_BUILTIN_CPU_INIT)
15720 return const0_rtx;
15722 if (target == 0 || GET_MODE (target) != SImode)
15723 target = gen_reg_rtx (SImode);
15725 #ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
15726 tree arg = TREE_OPERAND (CALL_EXPR_ARG (exp, 0), 0);
15727 if (TREE_CODE (arg) != STRING_CST)
15729 error ("builtin %s only accepts a string argument",
15730 rs6000_builtin_info[(size_t) fcode].name);
15731 return const0_rtx;
15734 if (fcode == RS6000_BUILTIN_CPU_IS)
15736 const char *cpu = TREE_STRING_POINTER (arg);
15737 rtx cpuid = NULL_RTX;
15738 for (size_t i = 0; i < ARRAY_SIZE (cpu_is_info); i++)
15739 if (strcmp (cpu, cpu_is_info[i].cpu) == 0)
15741 /* The CPUID value in the TCB is offset by _DL_FIRST_PLATFORM. */
15742 cpuid = GEN_INT (cpu_is_info[i].cpuid + _DL_FIRST_PLATFORM);
15743 break;
15745 if (cpuid == NULL_RTX)
15747 /* Invalid CPU argument. */
15748 error ("cpu %s is an invalid argument to builtin %s",
15749 cpu, rs6000_builtin_info[(size_t) fcode].name);
15750 return const0_rtx;
15753 rtx platform = gen_reg_rtx (SImode);
15754 rtx tcbmem = gen_const_mem (SImode,
15755 gen_rtx_PLUS (Pmode,
15756 gen_rtx_REG (Pmode, TLS_REGNUM),
15757 GEN_INT (TCB_PLATFORM_OFFSET)));
15758 emit_move_insn (platform, tcbmem);
15759 emit_insn (gen_eqsi3 (target, platform, cpuid));
15761 else if (fcode == RS6000_BUILTIN_CPU_SUPPORTS)
15763 const char *hwcap = TREE_STRING_POINTER (arg);
15764 rtx mask = NULL_RTX;
15765 int hwcap_offset;
15766 for (size_t i = 0; i < ARRAY_SIZE (cpu_supports_info); i++)
15767 if (strcmp (hwcap, cpu_supports_info[i].hwcap) == 0)
15769 mask = GEN_INT (cpu_supports_info[i].mask);
15770 hwcap_offset = TCB_HWCAP_OFFSET (cpu_supports_info[i].id);
15771 break;
15773 if (mask == NULL_RTX)
15775 /* Invalid HWCAP argument. */
15776 error ("hwcap %s is an invalid argument to builtin %s",
15777 hwcap, rs6000_builtin_info[(size_t) fcode].name);
15778 return const0_rtx;
15781 rtx tcb_hwcap = gen_reg_rtx (SImode);
15782 rtx tcbmem = gen_const_mem (SImode,
15783 gen_rtx_PLUS (Pmode,
15784 gen_rtx_REG (Pmode, TLS_REGNUM),
15785 GEN_INT (hwcap_offset)));
15786 emit_move_insn (tcb_hwcap, tcbmem);
15787 rtx scratch1 = gen_reg_rtx (SImode);
15788 emit_insn (gen_rtx_SET (scratch1, gen_rtx_AND (SImode, tcb_hwcap, mask)));
15789 rtx scratch2 = gen_reg_rtx (SImode);
15790 emit_insn (gen_eqsi3 (scratch2, scratch1, const0_rtx));
15791 emit_insn (gen_rtx_SET (target, gen_rtx_XOR (SImode, scratch2, const1_rtx)));
15794 /* Record that we have expanded a CPU builtin, so that we can later
15795 emit a reference to the special symbol exported by LIBC to ensure we
15796 do not link against an old LIBC that doesn't support this feature. */
15797 cpu_builtin_p = true;
15799 #else
15800 /* For old LIBCs, always return FALSE. */
15801 emit_move_insn (target, GEN_INT (0));
15802 #endif /* TARGET_LIBC_PROVIDES_HWCAP_IN_TCB */
15804 return target;
15807 static rtx
15808 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
15810 rtx pat;
15811 tree arg0 = CALL_EXPR_ARG (exp, 0);
15812 tree arg1 = CALL_EXPR_ARG (exp, 1);
15813 tree arg2 = CALL_EXPR_ARG (exp, 2);
15814 rtx op0 = expand_normal (arg0);
15815 rtx op1 = expand_normal (arg1);
15816 rtx op2 = expand_normal (arg2);
15817 machine_mode tmode = insn_data[icode].operand[0].mode;
15818 machine_mode mode0 = insn_data[icode].operand[1].mode;
15819 machine_mode mode1 = insn_data[icode].operand[2].mode;
15820 machine_mode mode2 = insn_data[icode].operand[3].mode;
15822 if (icode == CODE_FOR_nothing)
15823 /* Builtin not supported on this processor. */
15824 return 0;
15826 /* If we got invalid arguments bail out before generating bad rtl. */
15827 if (arg0 == error_mark_node
15828 || arg1 == error_mark_node
15829 || arg2 == error_mark_node)
15830 return const0_rtx;
15832 /* Check and prepare argument depending on the instruction code.
15834 Note that a switch statement instead of the sequence of tests
15835 would be incorrect as many of the CODE_FOR values could be
15836 CODE_FOR_nothing and that would yield multiple alternatives
15837 with identical values. We'd never reach here at runtime in
15838 this case. */
15839 if (icode == CODE_FOR_altivec_vsldoi_v4sf
15840 || icode == CODE_FOR_altivec_vsldoi_v2df
15841 || icode == CODE_FOR_altivec_vsldoi_v4si
15842 || icode == CODE_FOR_altivec_vsldoi_v8hi
15843 || icode == CODE_FOR_altivec_vsldoi_v16qi)
15845 /* Only allow 4-bit unsigned literals. */
15846 STRIP_NOPS (arg2);
15847 if (TREE_CODE (arg2) != INTEGER_CST
15848 || TREE_INT_CST_LOW (arg2) & ~0xf)
15850 error ("argument 3 must be a 4-bit unsigned literal");
15851 return CONST0_RTX (tmode);
15854 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
15855 || icode == CODE_FOR_vsx_xxpermdi_v2di
15856 || icode == CODE_FOR_vsx_xxpermdi_v2df_be
15857 || icode == CODE_FOR_vsx_xxpermdi_v2di_be
15858 || icode == CODE_FOR_vsx_xxpermdi_v1ti
15859 || icode == CODE_FOR_vsx_xxpermdi_v4sf
15860 || icode == CODE_FOR_vsx_xxpermdi_v4si
15861 || icode == CODE_FOR_vsx_xxpermdi_v8hi
15862 || icode == CODE_FOR_vsx_xxpermdi_v16qi
15863 || icode == CODE_FOR_vsx_xxsldwi_v16qi
15864 || icode == CODE_FOR_vsx_xxsldwi_v8hi
15865 || icode == CODE_FOR_vsx_xxsldwi_v4si
15866 || icode == CODE_FOR_vsx_xxsldwi_v4sf
15867 || icode == CODE_FOR_vsx_xxsldwi_v2di
15868 || icode == CODE_FOR_vsx_xxsldwi_v2df)
15870 /* Only allow 2-bit unsigned literals. */
15871 STRIP_NOPS (arg2);
15872 if (TREE_CODE (arg2) != INTEGER_CST
15873 || TREE_INT_CST_LOW (arg2) & ~0x3)
15875 error ("argument 3 must be a 2-bit unsigned literal");
15876 return CONST0_RTX (tmode);
15879 else if (icode == CODE_FOR_vsx_set_v2df
15880 || icode == CODE_FOR_vsx_set_v2di
15881 || icode == CODE_FOR_bcdadd
15882 || icode == CODE_FOR_bcdadd_lt
15883 || icode == CODE_FOR_bcdadd_eq
15884 || icode == CODE_FOR_bcdadd_gt
15885 || icode == CODE_FOR_bcdsub
15886 || icode == CODE_FOR_bcdsub_lt
15887 || icode == CODE_FOR_bcdsub_eq
15888 || icode == CODE_FOR_bcdsub_gt)
15890 /* Only allow 1-bit unsigned literals. */
15891 STRIP_NOPS (arg2);
15892 if (TREE_CODE (arg2) != INTEGER_CST
15893 || TREE_INT_CST_LOW (arg2) & ~0x1)
15895 error ("argument 3 must be a 1-bit unsigned literal");
15896 return CONST0_RTX (tmode);
15899 else if (icode == CODE_FOR_dfp_ddedpd_dd
15900 || icode == CODE_FOR_dfp_ddedpd_td)
15902 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
15903 STRIP_NOPS (arg0);
15904 if (TREE_CODE (arg0) != INTEGER_CST
15905 || TREE_INT_CST_LOW (arg2) & ~0x3)
15907 error ("argument 1 must be 0 or 2");
15908 return CONST0_RTX (tmode);
15911 else if (icode == CODE_FOR_dfp_denbcd_dd
15912 || icode == CODE_FOR_dfp_denbcd_td)
15914 /* Only allow 1-bit unsigned literals. */
15915 STRIP_NOPS (arg0);
15916 if (TREE_CODE (arg0) != INTEGER_CST
15917 || TREE_INT_CST_LOW (arg0) & ~0x1)
15919 error ("argument 1 must be a 1-bit unsigned literal");
15920 return CONST0_RTX (tmode);
15923 else if (icode == CODE_FOR_dfp_dscli_dd
15924 || icode == CODE_FOR_dfp_dscli_td
15925 || icode == CODE_FOR_dfp_dscri_dd
15926 || icode == CODE_FOR_dfp_dscri_td)
15928 /* Only allow 6-bit unsigned literals. */
15929 STRIP_NOPS (arg1);
15930 if (TREE_CODE (arg1) != INTEGER_CST
15931 || TREE_INT_CST_LOW (arg1) & ~0x3f)
15933 error ("argument 2 must be a 6-bit unsigned literal");
15934 return CONST0_RTX (tmode);
15937 else if (icode == CODE_FOR_crypto_vshasigmaw
15938 || icode == CODE_FOR_crypto_vshasigmad)
15940 /* Check whether the 2nd and 3rd arguments are integer constants and in
15941 range and prepare arguments. */
15942 STRIP_NOPS (arg1);
15943 if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (arg1, 2))
15945 error ("argument 2 must be 0 or 1");
15946 return CONST0_RTX (tmode);
15949 STRIP_NOPS (arg2);
15950 if (TREE_CODE (arg2) != INTEGER_CST || wi::geu_p (arg2, 16))
15952 error ("argument 3 must be in the range 0..15");
15953 return CONST0_RTX (tmode);
15957 if (target == 0
15958 || GET_MODE (target) != tmode
15959 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15960 target = gen_reg_rtx (tmode);
15962 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15963 op0 = copy_to_mode_reg (mode0, op0);
15964 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15965 op1 = copy_to_mode_reg (mode1, op1);
15966 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15967 op2 = copy_to_mode_reg (mode2, op2);
15969 if (TARGET_PAIRED_FLOAT && icode == CODE_FOR_selv2sf4)
15970 pat = GEN_FCN (icode) (target, op0, op1, op2, CONST0_RTX (SFmode));
15971 else
15972 pat = GEN_FCN (icode) (target, op0, op1, op2);
15973 if (! pat)
15974 return 0;
15975 emit_insn (pat);
15977 return target;
15980 /* Expand the lvx builtins. */
15981 static rtx
15982 altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp)
15984 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15985 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
15986 tree arg0;
15987 machine_mode tmode, mode0;
15988 rtx pat, op0;
15989 enum insn_code icode;
15991 switch (fcode)
15993 case ALTIVEC_BUILTIN_LD_INTERNAL_16qi:
15994 icode = CODE_FOR_vector_altivec_load_v16qi;
15995 break;
15996 case ALTIVEC_BUILTIN_LD_INTERNAL_8hi:
15997 icode = CODE_FOR_vector_altivec_load_v8hi;
15998 break;
15999 case ALTIVEC_BUILTIN_LD_INTERNAL_4si:
16000 icode = CODE_FOR_vector_altivec_load_v4si;
16001 break;
16002 case ALTIVEC_BUILTIN_LD_INTERNAL_4sf:
16003 icode = CODE_FOR_vector_altivec_load_v4sf;
16004 break;
16005 case ALTIVEC_BUILTIN_LD_INTERNAL_2df:
16006 icode = CODE_FOR_vector_altivec_load_v2df;
16007 break;
16008 case ALTIVEC_BUILTIN_LD_INTERNAL_2di:
16009 icode = CODE_FOR_vector_altivec_load_v2di;
16010 break;
16011 case ALTIVEC_BUILTIN_LD_INTERNAL_1ti:
16012 icode = CODE_FOR_vector_altivec_load_v1ti;
16013 break;
16014 default:
16015 *expandedp = false;
16016 return NULL_RTX;
16019 *expandedp = true;
16021 arg0 = CALL_EXPR_ARG (exp, 0);
16022 op0 = expand_normal (arg0);
16023 tmode = insn_data[icode].operand[0].mode;
16024 mode0 = insn_data[icode].operand[1].mode;
16026 if (target == 0
16027 || GET_MODE (target) != tmode
16028 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16029 target = gen_reg_rtx (tmode);
16031 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16032 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16034 pat = GEN_FCN (icode) (target, op0);
16035 if (! pat)
16036 return 0;
16037 emit_insn (pat);
16038 return target;
16041 /* Expand the stvx builtins. */
16042 static rtx
16043 altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
16044 bool *expandedp)
16046 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16047 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
16048 tree arg0, arg1;
16049 machine_mode mode0, mode1;
16050 rtx pat, op0, op1;
16051 enum insn_code icode;
16053 switch (fcode)
16055 case ALTIVEC_BUILTIN_ST_INTERNAL_16qi:
16056 icode = CODE_FOR_vector_altivec_store_v16qi;
16057 break;
16058 case ALTIVEC_BUILTIN_ST_INTERNAL_8hi:
16059 icode = CODE_FOR_vector_altivec_store_v8hi;
16060 break;
16061 case ALTIVEC_BUILTIN_ST_INTERNAL_4si:
16062 icode = CODE_FOR_vector_altivec_store_v4si;
16063 break;
16064 case ALTIVEC_BUILTIN_ST_INTERNAL_4sf:
16065 icode = CODE_FOR_vector_altivec_store_v4sf;
16066 break;
16067 case ALTIVEC_BUILTIN_ST_INTERNAL_2df:
16068 icode = CODE_FOR_vector_altivec_store_v2df;
16069 break;
16070 case ALTIVEC_BUILTIN_ST_INTERNAL_2di:
16071 icode = CODE_FOR_vector_altivec_store_v2di;
16072 break;
16073 case ALTIVEC_BUILTIN_ST_INTERNAL_1ti:
16074 icode = CODE_FOR_vector_altivec_store_v1ti;
16075 break;
16076 default:
16077 *expandedp = false;
16078 return NULL_RTX;
16081 arg0 = CALL_EXPR_ARG (exp, 0);
16082 arg1 = CALL_EXPR_ARG (exp, 1);
16083 op0 = expand_normal (arg0);
16084 op1 = expand_normal (arg1);
16085 mode0 = insn_data[icode].operand[0].mode;
16086 mode1 = insn_data[icode].operand[1].mode;
16088 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16089 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16090 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
16091 op1 = copy_to_mode_reg (mode1, op1);
16093 pat = GEN_FCN (icode) (op0, op1);
16094 if (pat)
16095 emit_insn (pat);
16097 *expandedp = true;
16098 return NULL_RTX;
16101 /* Expand the dst builtins. */
16102 static rtx
16103 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
16104 bool *expandedp)
16106 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16107 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
16108 tree arg0, arg1, arg2;
16109 machine_mode mode0, mode1;
16110 rtx pat, op0, op1, op2;
16111 const struct builtin_description *d;
16112 size_t i;
16114 *expandedp = false;
16116 /* Handle DST variants. */
16117 d = bdesc_dst;
16118 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
16119 if (d->code == fcode)
16121 arg0 = CALL_EXPR_ARG (exp, 0);
16122 arg1 = CALL_EXPR_ARG (exp, 1);
16123 arg2 = CALL_EXPR_ARG (exp, 2);
16124 op0 = expand_normal (arg0);
16125 op1 = expand_normal (arg1);
16126 op2 = expand_normal (arg2);
16127 mode0 = insn_data[d->icode].operand[0].mode;
16128 mode1 = insn_data[d->icode].operand[1].mode;
16130 /* Invalid arguments, bail out before generating bad rtl. */
16131 if (arg0 == error_mark_node
16132 || arg1 == error_mark_node
16133 || arg2 == error_mark_node)
16134 return const0_rtx;
16136 *expandedp = true;
16137 STRIP_NOPS (arg2);
16138 if (TREE_CODE (arg2) != INTEGER_CST
16139 || TREE_INT_CST_LOW (arg2) & ~0x3)
16141 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
16142 return const0_rtx;
16145 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
16146 op0 = copy_to_mode_reg (Pmode, op0);
16147 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
16148 op1 = copy_to_mode_reg (mode1, op1);
16150 pat = GEN_FCN (d->icode) (op0, op1, op2);
16151 if (pat != 0)
16152 emit_insn (pat);
16154 return NULL_RTX;
16157 return NULL_RTX;
16160 /* Expand vec_init builtin. */
16161 static rtx
16162 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
16164 machine_mode tmode = TYPE_MODE (type);
16165 machine_mode inner_mode = GET_MODE_INNER (tmode);
16166 int i, n_elt = GET_MODE_NUNITS (tmode);
16168 gcc_assert (VECTOR_MODE_P (tmode));
16169 gcc_assert (n_elt == call_expr_nargs (exp));
16171 if (!target || !register_operand (target, tmode))
16172 target = gen_reg_rtx (tmode);
16174 /* If we have a vector compromised of a single element, such as V1TImode, do
16175 the initialization directly. */
16176 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
16178 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
16179 emit_move_insn (target, gen_lowpart (tmode, x));
16181 else
16183 rtvec v = rtvec_alloc (n_elt);
16185 for (i = 0; i < n_elt; ++i)
16187 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
16188 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
16191 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
16194 return target;
16197 /* Return the integer constant in ARG. Constrain it to be in the range
16198 of the subparts of VEC_TYPE; issue an error if not. */
16200 static int
16201 get_element_number (tree vec_type, tree arg)
16203 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
16205 if (!tree_fits_uhwi_p (arg)
16206 || (elt = tree_to_uhwi (arg), elt > max))
16208 error ("selector must be an integer constant in the range 0..%wi", max);
16209 return 0;
16212 return elt;
16215 /* Expand vec_set builtin. */
16216 static rtx
16217 altivec_expand_vec_set_builtin (tree exp)
16219 machine_mode tmode, mode1;
16220 tree arg0, arg1, arg2;
16221 int elt;
16222 rtx op0, op1;
16224 arg0 = CALL_EXPR_ARG (exp, 0);
16225 arg1 = CALL_EXPR_ARG (exp, 1);
16226 arg2 = CALL_EXPR_ARG (exp, 2);
16228 tmode = TYPE_MODE (TREE_TYPE (arg0));
16229 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16230 gcc_assert (VECTOR_MODE_P (tmode));
16232 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
16233 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
16234 elt = get_element_number (TREE_TYPE (arg0), arg2);
16236 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
16237 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
16239 op0 = force_reg (tmode, op0);
16240 op1 = force_reg (mode1, op1);
16242 rs6000_expand_vector_set (op0, op1, elt);
16244 return op0;
16247 /* Expand vec_ext builtin. */
16248 static rtx
16249 altivec_expand_vec_ext_builtin (tree exp, rtx target)
16251 machine_mode tmode, mode0;
16252 tree arg0, arg1;
16253 rtx op0;
16254 rtx op1;
16256 arg0 = CALL_EXPR_ARG (exp, 0);
16257 arg1 = CALL_EXPR_ARG (exp, 1);
16259 op0 = expand_normal (arg0);
16260 op1 = expand_normal (arg1);
16262 /* Call get_element_number to validate arg1 if it is a constant. */
16263 if (TREE_CODE (arg1) == INTEGER_CST)
16264 (void) get_element_number (TREE_TYPE (arg0), arg1);
16266 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16267 mode0 = TYPE_MODE (TREE_TYPE (arg0));
16268 gcc_assert (VECTOR_MODE_P (mode0));
16270 op0 = force_reg (mode0, op0);
16272 if (optimize || !target || !register_operand (target, tmode))
16273 target = gen_reg_rtx (tmode);
16275 rs6000_expand_vector_extract (target, op0, op1);
16277 return target;
16280 /* Expand the builtin in EXP and store the result in TARGET. Store
16281 true in *EXPANDEDP if we found a builtin to expand. */
16282 static rtx
16283 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
16285 const struct builtin_description *d;
16286 size_t i;
16287 enum insn_code icode;
16288 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16289 tree arg0, arg1, arg2;
16290 rtx op0, pat;
16291 machine_mode tmode, mode0;
16292 enum rs6000_builtins fcode
16293 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
16295 if (rs6000_overloaded_builtin_p (fcode))
16297 *expandedp = true;
16298 error ("unresolved overload for Altivec builtin %qF", fndecl);
16300 /* Given it is invalid, just generate a normal call. */
16301 return expand_call (exp, target, false);
16304 target = altivec_expand_ld_builtin (exp, target, expandedp);
16305 if (*expandedp)
16306 return target;
16308 target = altivec_expand_st_builtin (exp, target, expandedp);
16309 if (*expandedp)
16310 return target;
16312 target = altivec_expand_dst_builtin (exp, target, expandedp);
16313 if (*expandedp)
16314 return target;
16316 *expandedp = true;
16318 switch (fcode)
16320 case ALTIVEC_BUILTIN_STVX_V2DF:
16321 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df_2op, exp);
16322 case ALTIVEC_BUILTIN_STVX_V2DI:
16323 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di_2op, exp);
16324 case ALTIVEC_BUILTIN_STVX_V4SF:
16325 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf_2op, exp);
16326 case ALTIVEC_BUILTIN_STVX:
16327 case ALTIVEC_BUILTIN_STVX_V4SI:
16328 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si_2op, exp);
16329 case ALTIVEC_BUILTIN_STVX_V8HI:
16330 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi_2op, exp);
16331 case ALTIVEC_BUILTIN_STVX_V16QI:
16332 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi_2op, exp);
16333 case ALTIVEC_BUILTIN_STVEBX:
16334 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
16335 case ALTIVEC_BUILTIN_STVEHX:
16336 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
16337 case ALTIVEC_BUILTIN_STVEWX:
16338 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
16339 case ALTIVEC_BUILTIN_STVXL_V2DF:
16340 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
16341 case ALTIVEC_BUILTIN_STVXL_V2DI:
16342 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
16343 case ALTIVEC_BUILTIN_STVXL_V4SF:
16344 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
16345 case ALTIVEC_BUILTIN_STVXL:
16346 case ALTIVEC_BUILTIN_STVXL_V4SI:
16347 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
16348 case ALTIVEC_BUILTIN_STVXL_V8HI:
16349 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
16350 case ALTIVEC_BUILTIN_STVXL_V16QI:
16351 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
16353 case ALTIVEC_BUILTIN_STVLX:
16354 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
16355 case ALTIVEC_BUILTIN_STVLXL:
16356 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
16357 case ALTIVEC_BUILTIN_STVRX:
16358 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
16359 case ALTIVEC_BUILTIN_STVRXL:
16360 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
16362 case P9V_BUILTIN_STXVL:
16363 return altivec_expand_stxvl_builtin (CODE_FOR_stxvl, exp);
16365 case VSX_BUILTIN_STXVD2X_V1TI:
16366 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
16367 case VSX_BUILTIN_STXVD2X_V2DF:
16368 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
16369 case VSX_BUILTIN_STXVD2X_V2DI:
16370 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
16371 case VSX_BUILTIN_STXVW4X_V4SF:
16372 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
16373 case VSX_BUILTIN_STXVW4X_V4SI:
16374 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
16375 case VSX_BUILTIN_STXVW4X_V8HI:
16376 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
16377 case VSX_BUILTIN_STXVW4X_V16QI:
16378 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
16380 /* For the following on big endian, it's ok to use any appropriate
16381 unaligned-supporting store, so use a generic expander. For
16382 little-endian, the exact element-reversing instruction must
16383 be used. */
16384 case VSX_BUILTIN_ST_ELEMREV_V2DF:
16386 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2df
16387 : CODE_FOR_vsx_st_elemrev_v2df);
16388 return altivec_expand_stv_builtin (code, exp);
16390 case VSX_BUILTIN_ST_ELEMREV_V2DI:
16392 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2di
16393 : CODE_FOR_vsx_st_elemrev_v2di);
16394 return altivec_expand_stv_builtin (code, exp);
16396 case VSX_BUILTIN_ST_ELEMREV_V4SF:
16398 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4sf
16399 : CODE_FOR_vsx_st_elemrev_v4sf);
16400 return altivec_expand_stv_builtin (code, exp);
16402 case VSX_BUILTIN_ST_ELEMREV_V4SI:
16404 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4si
16405 : CODE_FOR_vsx_st_elemrev_v4si);
16406 return altivec_expand_stv_builtin (code, exp);
16408 case VSX_BUILTIN_ST_ELEMREV_V8HI:
16410 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v8hi
16411 : CODE_FOR_vsx_st_elemrev_v8hi);
16412 return altivec_expand_stv_builtin (code, exp);
16414 case VSX_BUILTIN_ST_ELEMREV_V16QI:
16416 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v16qi
16417 : CODE_FOR_vsx_st_elemrev_v16qi);
16418 return altivec_expand_stv_builtin (code, exp);
16421 case ALTIVEC_BUILTIN_MFVSCR:
16422 icode = CODE_FOR_altivec_mfvscr;
16423 tmode = insn_data[icode].operand[0].mode;
16425 if (target == 0
16426 || GET_MODE (target) != tmode
16427 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16428 target = gen_reg_rtx (tmode);
16430 pat = GEN_FCN (icode) (target);
16431 if (! pat)
16432 return 0;
16433 emit_insn (pat);
16434 return target;
16436 case ALTIVEC_BUILTIN_MTVSCR:
16437 icode = CODE_FOR_altivec_mtvscr;
16438 arg0 = CALL_EXPR_ARG (exp, 0);
16439 op0 = expand_normal (arg0);
16440 mode0 = insn_data[icode].operand[0].mode;
16442 /* If we got invalid arguments bail out before generating bad rtl. */
16443 if (arg0 == error_mark_node)
16444 return const0_rtx;
16446 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16447 op0 = copy_to_mode_reg (mode0, op0);
16449 pat = GEN_FCN (icode) (op0);
16450 if (pat)
16451 emit_insn (pat);
16452 return NULL_RTX;
16454 case ALTIVEC_BUILTIN_DSSALL:
16455 emit_insn (gen_altivec_dssall ());
16456 return NULL_RTX;
16458 case ALTIVEC_BUILTIN_DSS:
16459 icode = CODE_FOR_altivec_dss;
16460 arg0 = CALL_EXPR_ARG (exp, 0);
16461 STRIP_NOPS (arg0);
16462 op0 = expand_normal (arg0);
16463 mode0 = insn_data[icode].operand[0].mode;
16465 /* If we got invalid arguments bail out before generating bad rtl. */
16466 if (arg0 == error_mark_node)
16467 return const0_rtx;
16469 if (TREE_CODE (arg0) != INTEGER_CST
16470 || TREE_INT_CST_LOW (arg0) & ~0x3)
16472 error ("argument to dss must be a 2-bit unsigned literal");
16473 return const0_rtx;
16476 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16477 op0 = copy_to_mode_reg (mode0, op0);
16479 emit_insn (gen_altivec_dss (op0));
16480 return NULL_RTX;
16482 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
16483 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
16484 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
16485 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
16486 case VSX_BUILTIN_VEC_INIT_V2DF:
16487 case VSX_BUILTIN_VEC_INIT_V2DI:
16488 case VSX_BUILTIN_VEC_INIT_V1TI:
16489 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
16491 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
16492 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
16493 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
16494 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
16495 case VSX_BUILTIN_VEC_SET_V2DF:
16496 case VSX_BUILTIN_VEC_SET_V2DI:
16497 case VSX_BUILTIN_VEC_SET_V1TI:
16498 return altivec_expand_vec_set_builtin (exp);
16500 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
16501 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
16502 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
16503 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
16504 case VSX_BUILTIN_VEC_EXT_V2DF:
16505 case VSX_BUILTIN_VEC_EXT_V2DI:
16506 case VSX_BUILTIN_VEC_EXT_V1TI:
16507 return altivec_expand_vec_ext_builtin (exp, target);
16509 case P9V_BUILTIN_VEXTRACT4B:
16510 case P9V_BUILTIN_VEC_VEXTRACT4B:
16511 arg1 = CALL_EXPR_ARG (exp, 1);
16512 STRIP_NOPS (arg1);
16514 /* Generate a normal call if it is invalid. */
16515 if (arg1 == error_mark_node)
16516 return expand_call (exp, target, false);
16518 if (TREE_CODE (arg1) != INTEGER_CST || TREE_INT_CST_LOW (arg1) > 12)
16520 error ("second argument to vec_vextract4b must be 0..12");
16521 return expand_call (exp, target, false);
16523 break;
16525 case P9V_BUILTIN_VINSERT4B:
16526 case P9V_BUILTIN_VINSERT4B_DI:
16527 case P9V_BUILTIN_VEC_VINSERT4B:
16528 arg2 = CALL_EXPR_ARG (exp, 2);
16529 STRIP_NOPS (arg2);
16531 /* Generate a normal call if it is invalid. */
16532 if (arg2 == error_mark_node)
16533 return expand_call (exp, target, false);
16535 if (TREE_CODE (arg2) != INTEGER_CST || TREE_INT_CST_LOW (arg2) > 12)
16537 error ("third argument to vec_vinsert4b must be 0..12");
16538 return expand_call (exp, target, false);
16540 break;
16542 default:
16543 break;
16544 /* Fall through. */
16547 /* Expand abs* operations. */
16548 d = bdesc_abs;
16549 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
16550 if (d->code == fcode)
16551 return altivec_expand_abs_builtin (d->icode, exp, target);
16553 /* Expand the AltiVec predicates. */
16554 d = bdesc_altivec_preds;
16555 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
16556 if (d->code == fcode)
16557 return altivec_expand_predicate_builtin (d->icode, exp, target);
16559 /* LV* are funky. We initialized them differently. */
16560 switch (fcode)
16562 case ALTIVEC_BUILTIN_LVSL:
16563 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
16564 exp, target, false);
16565 case ALTIVEC_BUILTIN_LVSR:
16566 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
16567 exp, target, false);
16568 case ALTIVEC_BUILTIN_LVEBX:
16569 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
16570 exp, target, false);
16571 case ALTIVEC_BUILTIN_LVEHX:
16572 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
16573 exp, target, false);
16574 case ALTIVEC_BUILTIN_LVEWX:
16575 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
16576 exp, target, false);
16577 case ALTIVEC_BUILTIN_LVXL_V2DF:
16578 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
16579 exp, target, false);
16580 case ALTIVEC_BUILTIN_LVXL_V2DI:
16581 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
16582 exp, target, false);
16583 case ALTIVEC_BUILTIN_LVXL_V4SF:
16584 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
16585 exp, target, false);
16586 case ALTIVEC_BUILTIN_LVXL:
16587 case ALTIVEC_BUILTIN_LVXL_V4SI:
16588 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
16589 exp, target, false);
16590 case ALTIVEC_BUILTIN_LVXL_V8HI:
16591 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
16592 exp, target, false);
16593 case ALTIVEC_BUILTIN_LVXL_V16QI:
16594 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
16595 exp, target, false);
16596 case ALTIVEC_BUILTIN_LVX_V2DF:
16597 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df_2op,
16598 exp, target, false);
16599 case ALTIVEC_BUILTIN_LVX_V2DI:
16600 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di_2op,
16601 exp, target, false);
16602 case ALTIVEC_BUILTIN_LVX_V4SF:
16603 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf_2op,
16604 exp, target, false);
16605 case ALTIVEC_BUILTIN_LVX:
16606 case ALTIVEC_BUILTIN_LVX_V4SI:
16607 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si_2op,
16608 exp, target, false);
16609 case ALTIVEC_BUILTIN_LVX_V8HI:
16610 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi_2op,
16611 exp, target, false);
16612 case ALTIVEC_BUILTIN_LVX_V16QI:
16613 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi_2op,
16614 exp, target, false);
16615 case ALTIVEC_BUILTIN_LVLX:
16616 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
16617 exp, target, true);
16618 case ALTIVEC_BUILTIN_LVLXL:
16619 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
16620 exp, target, true);
16621 case ALTIVEC_BUILTIN_LVRX:
16622 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
16623 exp, target, true);
16624 case ALTIVEC_BUILTIN_LVRXL:
16625 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
16626 exp, target, true);
16627 case VSX_BUILTIN_LXVD2X_V1TI:
16628 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
16629 exp, target, false);
16630 case VSX_BUILTIN_LXVD2X_V2DF:
16631 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
16632 exp, target, false);
16633 case VSX_BUILTIN_LXVD2X_V2DI:
16634 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
16635 exp, target, false);
16636 case VSX_BUILTIN_LXVW4X_V4SF:
16637 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
16638 exp, target, false);
16639 case VSX_BUILTIN_LXVW4X_V4SI:
16640 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
16641 exp, target, false);
16642 case VSX_BUILTIN_LXVW4X_V8HI:
16643 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
16644 exp, target, false);
16645 case VSX_BUILTIN_LXVW4X_V16QI:
16646 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
16647 exp, target, false);
16648 /* For the following on big endian, it's ok to use any appropriate
16649 unaligned-supporting load, so use a generic expander. For
16650 little-endian, the exact element-reversing instruction must
16651 be used. */
16652 case VSX_BUILTIN_LD_ELEMREV_V2DF:
16654 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2df
16655 : CODE_FOR_vsx_ld_elemrev_v2df);
16656 return altivec_expand_lv_builtin (code, exp, target, false);
16658 case VSX_BUILTIN_LD_ELEMREV_V2DI:
16660 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2di
16661 : CODE_FOR_vsx_ld_elemrev_v2di);
16662 return altivec_expand_lv_builtin (code, exp, target, false);
16664 case VSX_BUILTIN_LD_ELEMREV_V4SF:
16666 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4sf
16667 : CODE_FOR_vsx_ld_elemrev_v4sf);
16668 return altivec_expand_lv_builtin (code, exp, target, false);
16670 case VSX_BUILTIN_LD_ELEMREV_V4SI:
16672 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4si
16673 : CODE_FOR_vsx_ld_elemrev_v4si);
16674 return altivec_expand_lv_builtin (code, exp, target, false);
16676 case VSX_BUILTIN_LD_ELEMREV_V8HI:
16678 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v8hi
16679 : CODE_FOR_vsx_ld_elemrev_v8hi);
16680 return altivec_expand_lv_builtin (code, exp, target, false);
16682 case VSX_BUILTIN_LD_ELEMREV_V16QI:
16684 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v16qi
16685 : CODE_FOR_vsx_ld_elemrev_v16qi);
16686 return altivec_expand_lv_builtin (code, exp, target, false);
16688 break;
16689 default:
16690 break;
16691 /* Fall through. */
16694 *expandedp = false;
16695 return NULL_RTX;
16698 /* Expand the builtin in EXP and store the result in TARGET. Store
16699 true in *EXPANDEDP if we found a builtin to expand. */
16700 static rtx
16701 paired_expand_builtin (tree exp, rtx target, bool * expandedp)
16703 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16704 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
16705 const struct builtin_description *d;
16706 size_t i;
16708 *expandedp = true;
16710 switch (fcode)
16712 case PAIRED_BUILTIN_STX:
16713 return paired_expand_stv_builtin (CODE_FOR_paired_stx, exp);
16714 case PAIRED_BUILTIN_LX:
16715 return paired_expand_lv_builtin (CODE_FOR_paired_lx, exp, target);
16716 default:
16717 break;
16718 /* Fall through. */
16721 /* Expand the paired predicates. */
16722 d = bdesc_paired_preds;
16723 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); i++, d++)
16724 if (d->code == fcode)
16725 return paired_expand_predicate_builtin (d->icode, exp, target);
16727 *expandedp = false;
16728 return NULL_RTX;
16731 /* Binops that need to be initialized manually, but can be expanded
16732 automagically by rs6000_expand_binop_builtin. */
16733 static const struct builtin_description bdesc_2arg_spe[] =
16735 { RS6000_BTM_SPE, CODE_FOR_spe_evlddx, "__builtin_spe_evlddx", SPE_BUILTIN_EVLDDX },
16736 { RS6000_BTM_SPE, CODE_FOR_spe_evldwx, "__builtin_spe_evldwx", SPE_BUILTIN_EVLDWX },
16737 { RS6000_BTM_SPE, CODE_FOR_spe_evldhx, "__builtin_spe_evldhx", SPE_BUILTIN_EVLDHX },
16738 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhex, "__builtin_spe_evlwhex", SPE_BUILTIN_EVLWHEX },
16739 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhoux, "__builtin_spe_evlwhoux", SPE_BUILTIN_EVLWHOUX },
16740 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhosx, "__builtin_spe_evlwhosx", SPE_BUILTIN_EVLWHOSX },
16741 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplatx, "__builtin_spe_evlwwsplatx", SPE_BUILTIN_EVLWWSPLATX },
16742 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplatx, "__builtin_spe_evlwhsplatx", SPE_BUILTIN_EVLWHSPLATX },
16743 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplatx, "__builtin_spe_evlhhesplatx", SPE_BUILTIN_EVLHHESPLATX },
16744 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplatx, "__builtin_spe_evlhhousplatx", SPE_BUILTIN_EVLHHOUSPLATX },
16745 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplatx, "__builtin_spe_evlhhossplatx", SPE_BUILTIN_EVLHHOSSPLATX },
16746 { RS6000_BTM_SPE, CODE_FOR_spe_evldd, "__builtin_spe_evldd", SPE_BUILTIN_EVLDD },
16747 { RS6000_BTM_SPE, CODE_FOR_spe_evldw, "__builtin_spe_evldw", SPE_BUILTIN_EVLDW },
16748 { RS6000_BTM_SPE, CODE_FOR_spe_evldh, "__builtin_spe_evldh", SPE_BUILTIN_EVLDH },
16749 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhe, "__builtin_spe_evlwhe", SPE_BUILTIN_EVLWHE },
16750 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhou, "__builtin_spe_evlwhou", SPE_BUILTIN_EVLWHOU },
16751 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhos, "__builtin_spe_evlwhos", SPE_BUILTIN_EVLWHOS },
16752 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplat, "__builtin_spe_evlwwsplat", SPE_BUILTIN_EVLWWSPLAT },
16753 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplat, "__builtin_spe_evlwhsplat", SPE_BUILTIN_EVLWHSPLAT },
16754 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplat, "__builtin_spe_evlhhesplat", SPE_BUILTIN_EVLHHESPLAT },
16755 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplat, "__builtin_spe_evlhhousplat", SPE_BUILTIN_EVLHHOUSPLAT },
16756 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplat, "__builtin_spe_evlhhossplat", SPE_BUILTIN_EVLHHOSSPLAT }
16759 /* Expand the builtin in EXP and store the result in TARGET. Store
16760 true in *EXPANDEDP if we found a builtin to expand.
16762 This expands the SPE builtins that are not simple unary and binary
16763 operations. */
16764 static rtx
16765 spe_expand_builtin (tree exp, rtx target, bool *expandedp)
16767 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16768 tree arg1, arg0;
16769 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
16770 enum insn_code icode;
16771 machine_mode tmode, mode0;
16772 rtx pat, op0;
16773 const struct builtin_description *d;
16774 size_t i;
16776 *expandedp = true;
16778 /* Syntax check for a 5-bit unsigned immediate. */
16779 switch (fcode)
16781 case SPE_BUILTIN_EVSTDD:
16782 case SPE_BUILTIN_EVSTDH:
16783 case SPE_BUILTIN_EVSTDW:
16784 case SPE_BUILTIN_EVSTWHE:
16785 case SPE_BUILTIN_EVSTWHO:
16786 case SPE_BUILTIN_EVSTWWE:
16787 case SPE_BUILTIN_EVSTWWO:
16788 arg1 = CALL_EXPR_ARG (exp, 2);
16789 if (TREE_CODE (arg1) != INTEGER_CST
16790 || TREE_INT_CST_LOW (arg1) & ~0x1f)
16792 error ("argument 2 must be a 5-bit unsigned literal");
16793 return const0_rtx;
16795 break;
16796 default:
16797 break;
16800 /* The evsplat*i instructions are not quite generic. */
16801 switch (fcode)
16803 case SPE_BUILTIN_EVSPLATFI:
16804 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplatfi,
16805 exp, target);
16806 case SPE_BUILTIN_EVSPLATI:
16807 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplati,
16808 exp, target);
16809 default:
16810 break;
16813 d = bdesc_2arg_spe;
16814 for (i = 0; i < ARRAY_SIZE (bdesc_2arg_spe); ++i, ++d)
16815 if (d->code == fcode)
16816 return rs6000_expand_binop_builtin (d->icode, exp, target);
16818 d = bdesc_spe_predicates;
16819 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, ++d)
16820 if (d->code == fcode)
16821 return spe_expand_predicate_builtin (d->icode, exp, target);
16823 d = bdesc_spe_evsel;
16824 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, ++d)
16825 if (d->code == fcode)
16826 return spe_expand_evsel_builtin (d->icode, exp, target);
16828 switch (fcode)
16830 case SPE_BUILTIN_EVSTDDX:
16831 return spe_expand_stv_builtin (CODE_FOR_spe_evstddx, exp);
16832 case SPE_BUILTIN_EVSTDHX:
16833 return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx, exp);
16834 case SPE_BUILTIN_EVSTDWX:
16835 return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx, exp);
16836 case SPE_BUILTIN_EVSTWHEX:
16837 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex, exp);
16838 case SPE_BUILTIN_EVSTWHOX:
16839 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox, exp);
16840 case SPE_BUILTIN_EVSTWWEX:
16841 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex, exp);
16842 case SPE_BUILTIN_EVSTWWOX:
16843 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox, exp);
16844 case SPE_BUILTIN_EVSTDD:
16845 return spe_expand_stv_builtin (CODE_FOR_spe_evstdd, exp);
16846 case SPE_BUILTIN_EVSTDH:
16847 return spe_expand_stv_builtin (CODE_FOR_spe_evstdh, exp);
16848 case SPE_BUILTIN_EVSTDW:
16849 return spe_expand_stv_builtin (CODE_FOR_spe_evstdw, exp);
16850 case SPE_BUILTIN_EVSTWHE:
16851 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe, exp);
16852 case SPE_BUILTIN_EVSTWHO:
16853 return spe_expand_stv_builtin (CODE_FOR_spe_evstwho, exp);
16854 case SPE_BUILTIN_EVSTWWE:
16855 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe, exp);
16856 case SPE_BUILTIN_EVSTWWO:
16857 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo, exp);
16858 case SPE_BUILTIN_MFSPEFSCR:
16859 icode = CODE_FOR_spe_mfspefscr;
16860 tmode = insn_data[icode].operand[0].mode;
16862 if (target == 0
16863 || GET_MODE (target) != tmode
16864 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16865 target = gen_reg_rtx (tmode);
16867 pat = GEN_FCN (icode) (target);
16868 if (! pat)
16869 return 0;
16870 emit_insn (pat);
16871 return target;
16872 case SPE_BUILTIN_MTSPEFSCR:
16873 icode = CODE_FOR_spe_mtspefscr;
16874 arg0 = CALL_EXPR_ARG (exp, 0);
16875 op0 = expand_normal (arg0);
16876 mode0 = insn_data[icode].operand[0].mode;
16878 if (arg0 == error_mark_node)
16879 return const0_rtx;
16881 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16882 op0 = copy_to_mode_reg (mode0, op0);
16884 pat = GEN_FCN (icode) (op0);
16885 if (pat)
16886 emit_insn (pat);
16887 return NULL_RTX;
16888 default:
16889 break;
16892 *expandedp = false;
16893 return NULL_RTX;
16896 static rtx
16897 paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
16899 rtx pat, scratch, tmp;
16900 tree form = CALL_EXPR_ARG (exp, 0);
16901 tree arg0 = CALL_EXPR_ARG (exp, 1);
16902 tree arg1 = CALL_EXPR_ARG (exp, 2);
16903 rtx op0 = expand_normal (arg0);
16904 rtx op1 = expand_normal (arg1);
16905 machine_mode mode0 = insn_data[icode].operand[1].mode;
16906 machine_mode mode1 = insn_data[icode].operand[2].mode;
16907 int form_int;
16908 enum rtx_code code;
16910 if (TREE_CODE (form) != INTEGER_CST)
16912 error ("argument 1 of __builtin_paired_predicate must be a constant");
16913 return const0_rtx;
16915 else
16916 form_int = TREE_INT_CST_LOW (form);
16918 gcc_assert (mode0 == mode1);
16920 if (arg0 == error_mark_node || arg1 == error_mark_node)
16921 return const0_rtx;
16923 if (target == 0
16924 || GET_MODE (target) != SImode
16925 || !(*insn_data[icode].operand[0].predicate) (target, SImode))
16926 target = gen_reg_rtx (SImode);
16927 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
16928 op0 = copy_to_mode_reg (mode0, op0);
16929 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
16930 op1 = copy_to_mode_reg (mode1, op1);
16932 scratch = gen_reg_rtx (CCFPmode);
16934 pat = GEN_FCN (icode) (scratch, op0, op1);
16935 if (!pat)
16936 return const0_rtx;
16938 emit_insn (pat);
16940 switch (form_int)
16942 /* LT bit. */
16943 case 0:
16944 code = LT;
16945 break;
16946 /* GT bit. */
16947 case 1:
16948 code = GT;
16949 break;
16950 /* EQ bit. */
16951 case 2:
16952 code = EQ;
16953 break;
16954 /* UN bit. */
16955 case 3:
16956 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
16957 return target;
16958 default:
16959 error ("argument 1 of __builtin_paired_predicate is out of range");
16960 return const0_rtx;
16963 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
16964 emit_move_insn (target, tmp);
16965 return target;
16968 static rtx
16969 spe_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
16971 rtx pat, scratch, tmp;
16972 tree form = CALL_EXPR_ARG (exp, 0);
16973 tree arg0 = CALL_EXPR_ARG (exp, 1);
16974 tree arg1 = CALL_EXPR_ARG (exp, 2);
16975 rtx op0 = expand_normal (arg0);
16976 rtx op1 = expand_normal (arg1);
16977 machine_mode mode0 = insn_data[icode].operand[1].mode;
16978 machine_mode mode1 = insn_data[icode].operand[2].mode;
16979 int form_int;
16980 enum rtx_code code;
16982 if (TREE_CODE (form) != INTEGER_CST)
16984 error ("argument 1 of __builtin_spe_predicate must be a constant");
16985 return const0_rtx;
16987 else
16988 form_int = TREE_INT_CST_LOW (form);
16990 gcc_assert (mode0 == mode1);
16992 if (arg0 == error_mark_node || arg1 == error_mark_node)
16993 return const0_rtx;
16995 if (target == 0
16996 || GET_MODE (target) != SImode
16997 || ! (*insn_data[icode].operand[0].predicate) (target, SImode))
16998 target = gen_reg_rtx (SImode);
17000 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17001 op0 = copy_to_mode_reg (mode0, op0);
17002 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
17003 op1 = copy_to_mode_reg (mode1, op1);
17005 scratch = gen_reg_rtx (CCmode);
17007 pat = GEN_FCN (icode) (scratch, op0, op1);
17008 if (! pat)
17009 return const0_rtx;
17010 emit_insn (pat);
17012 /* There are 4 variants for each predicate: _any_, _all_, _upper_,
17013 _lower_. We use one compare, but look in different bits of the
17014 CR for each variant.
17016 There are 2 elements in each SPE simd type (upper/lower). The CR
17017 bits are set as follows:
17019 BIT0 | BIT 1 | BIT 2 | BIT 3
17020 U | L | (U | L) | (U & L)
17022 So, for an "all" relationship, BIT 3 would be set.
17023 For an "any" relationship, BIT 2 would be set. Etc.
17025 Following traditional nomenclature, these bits map to:
17027 BIT0 | BIT 1 | BIT 2 | BIT 3
17028 LT | GT | EQ | OV
17030 Later, we will generate rtl to look in the LT/EQ/EQ/OV bits.
17033 switch (form_int)
17035 /* All variant. OV bit. */
17036 case 0:
17037 /* We need to get to the OV bit, which is the ORDERED bit. We
17038 could generate (ordered:SI (reg:CC xx) (const_int 0)), but
17039 that's ugly and will make validate_condition_mode die.
17040 So let's just use another pattern. */
17041 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
17042 return target;
17043 /* Any variant. EQ bit. */
17044 case 1:
17045 code = EQ;
17046 break;
17047 /* Upper variant. LT bit. */
17048 case 2:
17049 code = LT;
17050 break;
17051 /* Lower variant. GT bit. */
17052 case 3:
17053 code = GT;
17054 break;
17055 default:
17056 error ("argument 1 of __builtin_spe_predicate is out of range");
17057 return const0_rtx;
17060 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
17061 emit_move_insn (target, tmp);
17063 return target;
17066 /* The evsel builtins look like this:
17068 e = __builtin_spe_evsel_OP (a, b, c, d);
17070 and work like this:
17072 e[upper] = a[upper] *OP* b[upper] ? c[upper] : d[upper];
17073 e[lower] = a[lower] *OP* b[lower] ? c[lower] : d[lower];
17076 static rtx
17077 spe_expand_evsel_builtin (enum insn_code icode, tree exp, rtx target)
17079 rtx pat, scratch;
17080 tree arg0 = CALL_EXPR_ARG (exp, 0);
17081 tree arg1 = CALL_EXPR_ARG (exp, 1);
17082 tree arg2 = CALL_EXPR_ARG (exp, 2);
17083 tree arg3 = CALL_EXPR_ARG (exp, 3);
17084 rtx op0 = expand_normal (arg0);
17085 rtx op1 = expand_normal (arg1);
17086 rtx op2 = expand_normal (arg2);
17087 rtx op3 = expand_normal (arg3);
17088 machine_mode mode0 = insn_data[icode].operand[1].mode;
17089 machine_mode mode1 = insn_data[icode].operand[2].mode;
17091 gcc_assert (mode0 == mode1);
17093 if (arg0 == error_mark_node || arg1 == error_mark_node
17094 || arg2 == error_mark_node || arg3 == error_mark_node)
17095 return const0_rtx;
17097 if (target == 0
17098 || GET_MODE (target) != mode0
17099 || ! (*insn_data[icode].operand[0].predicate) (target, mode0))
17100 target = gen_reg_rtx (mode0);
17102 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17103 op0 = copy_to_mode_reg (mode0, op0);
17104 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
17105 op1 = copy_to_mode_reg (mode0, op1);
17106 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
17107 op2 = copy_to_mode_reg (mode0, op2);
17108 if (! (*insn_data[icode].operand[1].predicate) (op3, mode1))
17109 op3 = copy_to_mode_reg (mode0, op3);
17111 /* Generate the compare. */
17112 scratch = gen_reg_rtx (CCmode);
17113 pat = GEN_FCN (icode) (scratch, op0, op1);
17114 if (! pat)
17115 return const0_rtx;
17116 emit_insn (pat);
17118 if (mode0 == V2SImode)
17119 emit_insn (gen_spe_evsel (target, op2, op3, scratch));
17120 else
17121 emit_insn (gen_spe_evsel_fs (target, op2, op3, scratch));
17123 return target;
17126 /* Raise an error message for a builtin function that is called without the
17127 appropriate target options being set. */
17129 static void
17130 rs6000_invalid_builtin (enum rs6000_builtins fncode)
17132 size_t uns_fncode = (size_t)fncode;
17133 const char *name = rs6000_builtin_info[uns_fncode].name;
17134 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
17136 gcc_assert (name != NULL);
17137 if ((fnmask & RS6000_BTM_CELL) != 0)
17138 error ("Builtin function %s is only valid for the cell processor", name);
17139 else if ((fnmask & RS6000_BTM_VSX) != 0)
17140 error ("Builtin function %s requires the -mvsx option", name);
17141 else if ((fnmask & RS6000_BTM_HTM) != 0)
17142 error ("Builtin function %s requires the -mhtm option", name);
17143 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
17144 error ("Builtin function %s requires the -maltivec option", name);
17145 else if ((fnmask & RS6000_BTM_PAIRED) != 0)
17146 error ("Builtin function %s requires the -mpaired option", name);
17147 else if ((fnmask & RS6000_BTM_SPE) != 0)
17148 error ("Builtin function %s requires the -mspe option", name);
17149 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
17150 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
17151 error ("Builtin function %s requires the -mhard-dfp and"
17152 " -mpower8-vector options", name);
17153 else if ((fnmask & RS6000_BTM_DFP) != 0)
17154 error ("Builtin function %s requires the -mhard-dfp option", name);
17155 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
17156 error ("Builtin function %s requires the -mpower8-vector option", name);
17157 else if ((fnmask & (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
17158 == (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
17159 error ("Builtin function %s requires the -mcpu=power9 and"
17160 " -m64 options", name);
17161 else if ((fnmask & RS6000_BTM_P9_VECTOR) != 0)
17162 error ("Builtin function %s requires the -mcpu=power9 option", name);
17163 else if ((fnmask & (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
17164 == (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
17165 error ("Builtin function %s requires the -mcpu=power9 and"
17166 " -m64 options", name);
17167 else if ((fnmask & RS6000_BTM_P9_MISC) == RS6000_BTM_P9_MISC)
17168 error ("Builtin function %s requires the -mcpu=power9 option", name);
17169 else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
17170 == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
17171 error ("Builtin function %s requires the -mhard-float and"
17172 " -mlong-double-128 options", name);
17173 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
17174 error ("Builtin function %s requires the -mhard-float option", name);
17175 else if ((fnmask & RS6000_BTM_FLOAT128) != 0)
17176 error ("Builtin function %s requires the -mfloat128 option", name);
17177 else
17178 error ("Builtin function %s is not supported with the current options",
17179 name);
17182 /* Target hook for early folding of built-ins, shamelessly stolen
17183 from ia64.c. */
17185 static tree
17186 rs6000_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
17187 tree *args, bool ignore ATTRIBUTE_UNUSED)
17189 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
17191 enum rs6000_builtins fn_code
17192 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
17193 switch (fn_code)
17195 case RS6000_BUILTIN_NANQ:
17196 case RS6000_BUILTIN_NANSQ:
17198 tree type = TREE_TYPE (TREE_TYPE (fndecl));
17199 const char *str = c_getstr (*args);
17200 int quiet = fn_code == RS6000_BUILTIN_NANQ;
17201 REAL_VALUE_TYPE real;
17203 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
17204 return build_real (type, real);
17205 return NULL_TREE;
17207 case RS6000_BUILTIN_INFQ:
17208 case RS6000_BUILTIN_HUGE_VALQ:
17210 tree type = TREE_TYPE (TREE_TYPE (fndecl));
17211 REAL_VALUE_TYPE inf;
17212 real_inf (&inf);
17213 return build_real (type, inf);
17215 default:
17216 break;
17219 #ifdef SUBTARGET_FOLD_BUILTIN
17220 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
17221 #else
17222 return NULL_TREE;
17223 #endif
17226 /* Fold a machine-dependent built-in in GIMPLE. (For folding into
17227 a constant, use rs6000_fold_builtin.) */
17229 bool
17230 rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
17232 gimple *stmt = gsi_stmt (*gsi);
17233 tree fndecl = gimple_call_fndecl (stmt);
17234 gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD);
17235 enum rs6000_builtins fn_code
17236 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
17237 tree arg0, arg1, lhs;
17239 switch (fn_code)
17241 /* Flavors of vec_add. We deliberately don't expand
17242 P8V_BUILTIN_VADDUQM as it gets lowered from V1TImode to
17243 TImode, resulting in much poorer code generation. */
17244 case ALTIVEC_BUILTIN_VADDUBM:
17245 case ALTIVEC_BUILTIN_VADDUHM:
17246 case ALTIVEC_BUILTIN_VADDUWM:
17247 case P8V_BUILTIN_VADDUDM:
17248 case ALTIVEC_BUILTIN_VADDFP:
17249 case VSX_BUILTIN_XVADDDP:
17251 arg0 = gimple_call_arg (stmt, 0);
17252 arg1 = gimple_call_arg (stmt, 1);
17253 lhs = gimple_call_lhs (stmt);
17254 gimple *g = gimple_build_assign (lhs, PLUS_EXPR, arg0, arg1);
17255 gimple_set_location (g, gimple_location (stmt));
17256 gsi_replace (gsi, g, true);
17257 return true;
17259 /* Flavors of vec_sub. We deliberately don't expand
17260 P8V_BUILTIN_VSUBUQM. */
17261 case ALTIVEC_BUILTIN_VSUBUBM:
17262 case ALTIVEC_BUILTIN_VSUBUHM:
17263 case ALTIVEC_BUILTIN_VSUBUWM:
17264 case P8V_BUILTIN_VSUBUDM:
17265 case ALTIVEC_BUILTIN_VSUBFP:
17266 case VSX_BUILTIN_XVSUBDP:
17268 arg0 = gimple_call_arg (stmt, 0);
17269 arg1 = gimple_call_arg (stmt, 1);
17270 lhs = gimple_call_lhs (stmt);
17271 gimple *g = gimple_build_assign (lhs, MINUS_EXPR, arg0, arg1);
17272 gimple_set_location (g, gimple_location (stmt));
17273 gsi_replace (gsi, g, true);
17274 return true;
17276 case VSX_BUILTIN_XVMULSP:
17277 case VSX_BUILTIN_XVMULDP:
17279 arg0 = gimple_call_arg (stmt, 0);
17280 arg1 = gimple_call_arg (stmt, 1);
17281 lhs = gimple_call_lhs (stmt);
17282 gimple *g = gimple_build_assign (lhs, MULT_EXPR, arg0, arg1);
17283 gimple_set_location (g, gimple_location (stmt));
17284 gsi_replace (gsi, g, true);
17285 return true;
17287 /* Even element flavors of vec_mul (signed). */
17288 case ALTIVEC_BUILTIN_VMULESB:
17289 case ALTIVEC_BUILTIN_VMULESH:
17290 /* Even element flavors of vec_mul (unsigned). */
17291 case ALTIVEC_BUILTIN_VMULEUB:
17292 case ALTIVEC_BUILTIN_VMULEUH:
17294 arg0 = gimple_call_arg (stmt, 0);
17295 arg1 = gimple_call_arg (stmt, 1);
17296 lhs = gimple_call_lhs (stmt);
17297 gimple *g = gimple_build_assign (lhs, VEC_WIDEN_MULT_EVEN_EXPR, arg0, arg1);
17298 gimple_set_location (g, gimple_location (stmt));
17299 gsi_replace (gsi, g, true);
17300 return true;
17302 /* Odd element flavors of vec_mul (signed). */
17303 case ALTIVEC_BUILTIN_VMULOSB:
17304 case ALTIVEC_BUILTIN_VMULOSH:
17305 /* Odd element flavors of vec_mul (unsigned). */
17306 case ALTIVEC_BUILTIN_VMULOUB:
17307 case ALTIVEC_BUILTIN_VMULOUH:
17309 arg0 = gimple_call_arg (stmt, 0);
17310 arg1 = gimple_call_arg (stmt, 1);
17311 lhs = gimple_call_lhs (stmt);
17312 gimple *g = gimple_build_assign (lhs, VEC_WIDEN_MULT_ODD_EXPR, arg0, arg1);
17313 gimple_set_location (g, gimple_location (stmt));
17314 gsi_replace (gsi, g, true);
17315 return true;
17317 /* Flavors of vec_div (Integer). */
17318 case VSX_BUILTIN_DIV_V2DI:
17319 case VSX_BUILTIN_UDIV_V2DI:
17321 arg0 = gimple_call_arg (stmt, 0);
17322 arg1 = gimple_call_arg (stmt, 1);
17323 lhs = gimple_call_lhs (stmt);
17324 gimple *g = gimple_build_assign (lhs, TRUNC_DIV_EXPR, arg0, arg1);
17325 gimple_set_location (g, gimple_location (stmt));
17326 gsi_replace (gsi, g, true);
17327 return true;
17329 /* Flavors of vec_div (Float). */
17330 case VSX_BUILTIN_XVDIVSP:
17331 case VSX_BUILTIN_XVDIVDP:
17333 arg0 = gimple_call_arg (stmt, 0);
17334 arg1 = gimple_call_arg (stmt, 1);
17335 lhs = gimple_call_lhs (stmt);
17336 gimple *g = gimple_build_assign (lhs, RDIV_EXPR, arg0, arg1);
17337 gimple_set_location (g, gimple_location (stmt));
17338 gsi_replace (gsi, g, true);
17339 return true;
17341 /* Flavors of vec_and. */
17342 case ALTIVEC_BUILTIN_VAND:
17344 arg0 = gimple_call_arg (stmt, 0);
17345 arg1 = gimple_call_arg (stmt, 1);
17346 lhs = gimple_call_lhs (stmt);
17347 gimple *g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, arg1);
17348 gimple_set_location (g, gimple_location (stmt));
17349 gsi_replace (gsi, g, true);
17350 return true;
17352 /* Flavors of vec_andc. */
17353 case ALTIVEC_BUILTIN_VANDC:
17355 arg0 = gimple_call_arg (stmt, 0);
17356 arg1 = gimple_call_arg (stmt, 1);
17357 lhs = gimple_call_lhs (stmt);
17358 tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
17359 gimple *g = gimple_build_assign(temp, BIT_NOT_EXPR, arg1);
17360 gimple_set_location (g, gimple_location (stmt));
17361 gsi_insert_before(gsi, g, GSI_SAME_STMT);
17362 g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, temp);
17363 gimple_set_location (g, gimple_location (stmt));
17364 gsi_replace (gsi, g, true);
17365 return true;
17367 /* Flavors of vec_nand. */
17368 case P8V_BUILTIN_VEC_NAND:
17369 case P8V_BUILTIN_NAND_V16QI:
17370 case P8V_BUILTIN_NAND_V8HI:
17371 case P8V_BUILTIN_NAND_V4SI:
17372 case P8V_BUILTIN_NAND_V4SF:
17373 case P8V_BUILTIN_NAND_V2DF:
17374 case P8V_BUILTIN_NAND_V2DI:
17376 arg0 = gimple_call_arg (stmt, 0);
17377 arg1 = gimple_call_arg (stmt, 1);
17378 lhs = gimple_call_lhs (stmt);
17379 tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
17380 gimple *g = gimple_build_assign(temp, BIT_AND_EXPR, arg0, arg1);
17381 gimple_set_location (g, gimple_location (stmt));
17382 gsi_insert_before(gsi, g, GSI_SAME_STMT);
17383 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
17384 gimple_set_location (g, gimple_location (stmt));
17385 gsi_replace (gsi, g, true);
17386 return true;
17388 /* Flavors of vec_or. */
17389 case ALTIVEC_BUILTIN_VOR:
17391 arg0 = gimple_call_arg (stmt, 0);
17392 arg1 = gimple_call_arg (stmt, 1);
17393 lhs = gimple_call_lhs (stmt);
17394 gimple *g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, arg1);
17395 gimple_set_location (g, gimple_location (stmt));
17396 gsi_replace (gsi, g, true);
17397 return true;
17399 /* flavors of vec_orc. */
17400 case P8V_BUILTIN_ORC_V16QI:
17401 case P8V_BUILTIN_ORC_V8HI:
17402 case P8V_BUILTIN_ORC_V4SI:
17403 case P8V_BUILTIN_ORC_V4SF:
17404 case P8V_BUILTIN_ORC_V2DF:
17405 case P8V_BUILTIN_ORC_V2DI:
17407 arg0 = gimple_call_arg (stmt, 0);
17408 arg1 = gimple_call_arg (stmt, 1);
17409 lhs = gimple_call_lhs (stmt);
17410 tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
17411 gimple *g = gimple_build_assign(temp, BIT_NOT_EXPR, arg1);
17412 gimple_set_location (g, gimple_location (stmt));
17413 gsi_insert_before(gsi, g, GSI_SAME_STMT);
17414 g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, temp);
17415 gimple_set_location (g, gimple_location (stmt));
17416 gsi_replace (gsi, g, true);
17417 return true;
17419 /* Flavors of vec_xor. */
17420 case ALTIVEC_BUILTIN_VXOR:
17422 arg0 = gimple_call_arg (stmt, 0);
17423 arg1 = gimple_call_arg (stmt, 1);
17424 lhs = gimple_call_lhs (stmt);
17425 gimple *g = gimple_build_assign (lhs, BIT_XOR_EXPR, arg0, arg1);
17426 gimple_set_location (g, gimple_location (stmt));
17427 gsi_replace (gsi, g, true);
17428 return true;
17430 /* Flavors of vec_nor. */
17431 case ALTIVEC_BUILTIN_VNOR:
17433 arg0 = gimple_call_arg (stmt, 0);
17434 arg1 = gimple_call_arg (stmt, 1);
17435 lhs = gimple_call_lhs (stmt);
17436 tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
17437 gimple *g = gimple_build_assign (temp, BIT_IOR_EXPR, arg0, arg1);
17438 gimple_set_location (g, gimple_location (stmt));
17439 gsi_insert_before(gsi, g, GSI_SAME_STMT);
17440 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
17441 gimple_set_location (g, gimple_location (stmt));
17442 gsi_replace (gsi, g, true);
17443 return true;
17445 default:
17446 break;
17449 return false;
17452 /* Expand an expression EXP that calls a built-in function,
17453 with result going to TARGET if that's convenient
17454 (and in mode MODE if that's convenient).
17455 SUBTARGET may be used as the target for computing one of EXP's operands.
17456 IGNORE is nonzero if the value is to be ignored. */
17458 static rtx
17459 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
17460 machine_mode mode ATTRIBUTE_UNUSED,
17461 int ignore ATTRIBUTE_UNUSED)
17463 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
17464 enum rs6000_builtins fcode
17465 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
17466 size_t uns_fcode = (size_t)fcode;
17467 const struct builtin_description *d;
17468 size_t i;
17469 rtx ret;
17470 bool success;
17471 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
17472 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
17474 if (TARGET_DEBUG_BUILTIN)
17476 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
17477 const char *name1 = rs6000_builtin_info[uns_fcode].name;
17478 const char *name2 = ((icode != CODE_FOR_nothing)
17479 ? get_insn_name ((int)icode)
17480 : "nothing");
17481 const char *name3;
17483 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
17485 default: name3 = "unknown"; break;
17486 case RS6000_BTC_SPECIAL: name3 = "special"; break;
17487 case RS6000_BTC_UNARY: name3 = "unary"; break;
17488 case RS6000_BTC_BINARY: name3 = "binary"; break;
17489 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
17490 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
17491 case RS6000_BTC_ABS: name3 = "abs"; break;
17492 case RS6000_BTC_EVSEL: name3 = "evsel"; break;
17493 case RS6000_BTC_DST: name3 = "dst"; break;
17497 fprintf (stderr,
17498 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
17499 (name1) ? name1 : "---", fcode,
17500 (name2) ? name2 : "---", (int)icode,
17501 name3,
17502 func_valid_p ? "" : ", not valid");
17505 if (!func_valid_p)
17507 rs6000_invalid_builtin (fcode);
17509 /* Given it is invalid, just generate a normal call. */
17510 return expand_call (exp, target, ignore);
17513 switch (fcode)
17515 case RS6000_BUILTIN_RECIP:
17516 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
17518 case RS6000_BUILTIN_RECIPF:
17519 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
17521 case RS6000_BUILTIN_RSQRTF:
17522 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
17524 case RS6000_BUILTIN_RSQRT:
17525 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
17527 case POWER7_BUILTIN_BPERMD:
17528 return rs6000_expand_binop_builtin (((TARGET_64BIT)
17529 ? CODE_FOR_bpermd_di
17530 : CODE_FOR_bpermd_si), exp, target);
17532 case RS6000_BUILTIN_GET_TB:
17533 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
17534 target);
17536 case RS6000_BUILTIN_MFTB:
17537 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
17538 ? CODE_FOR_rs6000_mftb_di
17539 : CODE_FOR_rs6000_mftb_si),
17540 target);
17542 case RS6000_BUILTIN_MFFS:
17543 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
17545 case RS6000_BUILTIN_MTFSF:
17546 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
17548 case RS6000_BUILTIN_CPU_INIT:
17549 case RS6000_BUILTIN_CPU_IS:
17550 case RS6000_BUILTIN_CPU_SUPPORTS:
17551 return cpu_expand_builtin (fcode, exp, target);
17553 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
17554 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
17556 int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
17557 : (int) CODE_FOR_altivec_lvsl_direct);
17558 machine_mode tmode = insn_data[icode].operand[0].mode;
17559 machine_mode mode = insn_data[icode].operand[1].mode;
17560 tree arg;
17561 rtx op, addr, pat;
17563 gcc_assert (TARGET_ALTIVEC);
17565 arg = CALL_EXPR_ARG (exp, 0);
17566 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
17567 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
17568 addr = memory_address (mode, op);
17569 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
17570 op = addr;
17571 else
17573 /* For the load case need to negate the address. */
17574 op = gen_reg_rtx (GET_MODE (addr));
17575 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
17577 op = gen_rtx_MEM (mode, op);
17579 if (target == 0
17580 || GET_MODE (target) != tmode
17581 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17582 target = gen_reg_rtx (tmode);
17584 pat = GEN_FCN (icode) (target, op);
17585 if (!pat)
17586 return 0;
17587 emit_insn (pat);
17589 return target;
17592 case ALTIVEC_BUILTIN_VCFUX:
17593 case ALTIVEC_BUILTIN_VCFSX:
17594 case ALTIVEC_BUILTIN_VCTUXS:
17595 case ALTIVEC_BUILTIN_VCTSXS:
17596 /* FIXME: There's got to be a nicer way to handle this case than
17597 constructing a new CALL_EXPR. */
17598 if (call_expr_nargs (exp) == 1)
17600 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
17601 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
17603 break;
17605 default:
17606 break;
17609 if (TARGET_ALTIVEC)
17611 ret = altivec_expand_builtin (exp, target, &success);
17613 if (success)
17614 return ret;
17616 if (TARGET_SPE)
17618 ret = spe_expand_builtin (exp, target, &success);
17620 if (success)
17621 return ret;
17623 if (TARGET_PAIRED_FLOAT)
17625 ret = paired_expand_builtin (exp, target, &success);
17627 if (success)
17628 return ret;
17630 if (TARGET_HTM)
17632 ret = htm_expand_builtin (exp, target, &success);
17634 if (success)
17635 return ret;
17638 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
17639 /* RS6000_BTC_SPECIAL represents no-operand operators. */
17640 gcc_assert (attr == RS6000_BTC_UNARY
17641 || attr == RS6000_BTC_BINARY
17642 || attr == RS6000_BTC_TERNARY
17643 || attr == RS6000_BTC_SPECIAL);
17645 /* Handle simple unary operations. */
17646 d = bdesc_1arg;
17647 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17648 if (d->code == fcode)
17649 return rs6000_expand_unop_builtin (d->icode, exp, target);
17651 /* Handle simple binary operations. */
17652 d = bdesc_2arg;
17653 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17654 if (d->code == fcode)
17655 return rs6000_expand_binop_builtin (d->icode, exp, target);
17657 /* Handle simple ternary operations. */
17658 d = bdesc_3arg;
17659 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
17660 if (d->code == fcode)
17661 return rs6000_expand_ternop_builtin (d->icode, exp, target);
17663 /* Handle simple no-argument operations. */
17664 d = bdesc_0arg;
17665 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
17666 if (d->code == fcode)
17667 return rs6000_expand_zeroop_builtin (d->icode, target);
17669 gcc_unreachable ();
17672 /* Create a builtin vector type with a name. Taking care not to give
17673 the canonical type a name. */
17675 static tree
17676 rs6000_vector_type (const char *name, tree elt_type, unsigned num_elts)
17678 tree result = build_vector_type (elt_type, num_elts);
17680 /* Copy so we don't give the canonical type a name. */
17681 result = build_variant_type_copy (result);
17683 add_builtin_type (name, result);
17685 return result;
17688 static void
17689 rs6000_init_builtins (void)
17691 tree tdecl;
17692 tree ftype;
17693 machine_mode mode;
17695 if (TARGET_DEBUG_BUILTIN)
17696 fprintf (stderr, "rs6000_init_builtins%s%s%s%s\n",
17697 (TARGET_PAIRED_FLOAT) ? ", paired" : "",
17698 (TARGET_SPE) ? ", spe" : "",
17699 (TARGET_ALTIVEC) ? ", altivec" : "",
17700 (TARGET_VSX) ? ", vsx" : "");
17702 V2SI_type_node = build_vector_type (intSI_type_node, 2);
17703 V2SF_type_node = build_vector_type (float_type_node, 2);
17704 V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64 ? "__vector long"
17705 : "__vector long long",
17706 intDI_type_node, 2);
17707 V2DF_type_node = rs6000_vector_type ("__vector double", double_type_node, 2);
17708 V4HI_type_node = build_vector_type (intHI_type_node, 4);
17709 V4SI_type_node = rs6000_vector_type ("__vector signed int",
17710 intSI_type_node, 4);
17711 V4SF_type_node = rs6000_vector_type ("__vector float", float_type_node, 4);
17712 V8HI_type_node = rs6000_vector_type ("__vector signed short",
17713 intHI_type_node, 8);
17714 V16QI_type_node = rs6000_vector_type ("__vector signed char",
17715 intQI_type_node, 16);
17717 unsigned_V16QI_type_node = rs6000_vector_type ("__vector unsigned char",
17718 unsigned_intQI_type_node, 16);
17719 unsigned_V8HI_type_node = rs6000_vector_type ("__vector unsigned short",
17720 unsigned_intHI_type_node, 8);
17721 unsigned_V4SI_type_node = rs6000_vector_type ("__vector unsigned int",
17722 unsigned_intSI_type_node, 4);
17723 unsigned_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64
17724 ? "__vector unsigned long"
17725 : "__vector unsigned long long",
17726 unsigned_intDI_type_node, 2);
17728 opaque_V2SF_type_node = build_opaque_vector_type (float_type_node, 2);
17729 opaque_V2SI_type_node = build_opaque_vector_type (intSI_type_node, 2);
17730 opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node);
17731 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
17733 const_str_type_node
17734 = build_pointer_type (build_qualified_type (char_type_node,
17735 TYPE_QUAL_CONST));
17737 /* We use V1TI mode as a special container to hold __int128_t items that
17738 must live in VSX registers. */
17739 if (intTI_type_node)
17741 V1TI_type_node = rs6000_vector_type ("__vector __int128",
17742 intTI_type_node, 1);
17743 unsigned_V1TI_type_node
17744 = rs6000_vector_type ("__vector unsigned __int128",
17745 unsigned_intTI_type_node, 1);
17748 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
17749 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
17750 'vector unsigned short'. */
17752 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
17753 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
17754 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
17755 bool_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
17756 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
17758 long_integer_type_internal_node = long_integer_type_node;
17759 long_unsigned_type_internal_node = long_unsigned_type_node;
17760 long_long_integer_type_internal_node = long_long_integer_type_node;
17761 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
17762 intQI_type_internal_node = intQI_type_node;
17763 uintQI_type_internal_node = unsigned_intQI_type_node;
17764 intHI_type_internal_node = intHI_type_node;
17765 uintHI_type_internal_node = unsigned_intHI_type_node;
17766 intSI_type_internal_node = intSI_type_node;
17767 uintSI_type_internal_node = unsigned_intSI_type_node;
17768 intDI_type_internal_node = intDI_type_node;
17769 uintDI_type_internal_node = unsigned_intDI_type_node;
17770 intTI_type_internal_node = intTI_type_node;
17771 uintTI_type_internal_node = unsigned_intTI_type_node;
17772 float_type_internal_node = float_type_node;
17773 double_type_internal_node = double_type_node;
17774 long_double_type_internal_node = long_double_type_node;
17775 dfloat64_type_internal_node = dfloat64_type_node;
17776 dfloat128_type_internal_node = dfloat128_type_node;
17777 void_type_internal_node = void_type_node;
17779 /* 128-bit floating point support. KFmode is IEEE 128-bit floating point.
17780 IFmode is the IBM extended 128-bit format that is a pair of doubles.
17781 TFmode will be either IEEE 128-bit floating point or the IBM double-double
17782 format that uses a pair of doubles, depending on the switches and
17783 defaults.
17785 We do not enable the actual __float128 keyword unless the user explicitly
17786 asks for it, because the library support is not yet complete.
17788 If we don't support for either 128-bit IBM double double or IEEE 128-bit
17789 floating point, we need make sure the type is non-zero or else self-test
17790 fails during bootstrap.
17792 We don't register a built-in type for __ibm128 if the type is the same as
17793 long double. Instead we add a #define for __ibm128 in
17794 rs6000_cpu_cpp_builtins to long double. */
17795 if (TARGET_LONG_DOUBLE_128 && FLOAT128_IEEE_P (TFmode))
17797 ibm128_float_type_node = make_node (REAL_TYPE);
17798 TYPE_PRECISION (ibm128_float_type_node) = 128;
17799 SET_TYPE_MODE (ibm128_float_type_node, IFmode);
17800 layout_type (ibm128_float_type_node);
17802 lang_hooks.types.register_builtin_type (ibm128_float_type_node,
17803 "__ibm128");
17805 else
17806 ibm128_float_type_node = long_double_type_node;
17808 if (TARGET_FLOAT128_KEYWORD)
17810 ieee128_float_type_node = float128_type_node;
17811 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
17812 "__float128");
17815 else if (TARGET_FLOAT128_TYPE)
17817 ieee128_float_type_node = make_node (REAL_TYPE);
17818 TYPE_PRECISION (ibm128_float_type_node) = 128;
17819 SET_TYPE_MODE (ieee128_float_type_node, KFmode);
17820 layout_type (ieee128_float_type_node);
17822 /* If we are not exporting the __float128/_Float128 keywords, we need a
17823 keyword to get the types created. Use __ieee128 as the dummy
17824 keyword. */
17825 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
17826 "__ieee128");
17829 else
17830 ieee128_float_type_node = long_double_type_node;
17832 /* Initialize the modes for builtin_function_type, mapping a machine mode to
17833 tree type node. */
17834 builtin_mode_to_type[QImode][0] = integer_type_node;
17835 builtin_mode_to_type[HImode][0] = integer_type_node;
17836 builtin_mode_to_type[SImode][0] = intSI_type_node;
17837 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
17838 builtin_mode_to_type[DImode][0] = intDI_type_node;
17839 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
17840 builtin_mode_to_type[TImode][0] = intTI_type_node;
17841 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
17842 builtin_mode_to_type[SFmode][0] = float_type_node;
17843 builtin_mode_to_type[DFmode][0] = double_type_node;
17844 builtin_mode_to_type[IFmode][0] = ibm128_float_type_node;
17845 builtin_mode_to_type[KFmode][0] = ieee128_float_type_node;
17846 builtin_mode_to_type[TFmode][0] = long_double_type_node;
17847 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
17848 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
17849 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
17850 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
17851 builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
17852 builtin_mode_to_type[V2SFmode][0] = V2SF_type_node;
17853 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
17854 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
17855 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
17856 builtin_mode_to_type[V4HImode][0] = V4HI_type_node;
17857 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
17858 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
17859 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
17860 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
17861 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
17862 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
17863 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
17865 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
17866 TYPE_NAME (bool_char_type_node) = tdecl;
17868 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
17869 TYPE_NAME (bool_short_type_node) = tdecl;
17871 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
17872 TYPE_NAME (bool_int_type_node) = tdecl;
17874 tdecl = add_builtin_type ("__pixel", pixel_type_node);
17875 TYPE_NAME (pixel_type_node) = tdecl;
17877 bool_V16QI_type_node = rs6000_vector_type ("__vector __bool char",
17878 bool_char_type_node, 16);
17879 bool_V8HI_type_node = rs6000_vector_type ("__vector __bool short",
17880 bool_short_type_node, 8);
17881 bool_V4SI_type_node = rs6000_vector_type ("__vector __bool int",
17882 bool_int_type_node, 4);
17883 bool_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64
17884 ? "__vector __bool long"
17885 : "__vector __bool long long",
17886 bool_long_type_node, 2);
17887 pixel_V8HI_type_node = rs6000_vector_type ("__vector __pixel",
17888 pixel_type_node, 8);
17890 /* Paired and SPE builtins are only available if you build a compiler with
17891 the appropriate options, so only create those builtins with the
17892 appropriate compiler option. Create Altivec and VSX builtins on machines
17893 with at least the general purpose extensions (970 and newer) to allow the
17894 use of the target attribute. */
17895 if (TARGET_PAIRED_FLOAT)
17896 paired_init_builtins ();
17897 if (TARGET_SPE)
17898 spe_init_builtins ();
17899 if (TARGET_EXTRA_BUILTINS)
17900 altivec_init_builtins ();
17901 if (TARGET_HTM)
17902 htm_init_builtins ();
17904 if (TARGET_EXTRA_BUILTINS || TARGET_SPE || TARGET_PAIRED_FLOAT)
17905 rs6000_common_init_builtins ();
17907 ftype = build_function_type_list (ieee128_float_type_node,
17908 const_str_type_node, NULL_TREE);
17909 def_builtin ("__builtin_nanq", ftype, RS6000_BUILTIN_NANQ);
17910 def_builtin ("__builtin_nansq", ftype, RS6000_BUILTIN_NANSQ);
17912 ftype = build_function_type_list (ieee128_float_type_node, NULL_TREE);
17913 def_builtin ("__builtin_infq", ftype, RS6000_BUILTIN_INFQ);
17914 def_builtin ("__builtin_huge_valq", ftype, RS6000_BUILTIN_HUGE_VALQ);
17916 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
17917 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
17918 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
17920 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
17921 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
17922 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
17924 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
17925 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
17926 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
17928 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
17929 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
17930 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
17932 mode = (TARGET_64BIT) ? DImode : SImode;
17933 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
17934 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
17935 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
17937 ftype = build_function_type_list (unsigned_intDI_type_node,
17938 NULL_TREE);
17939 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
17941 if (TARGET_64BIT)
17942 ftype = build_function_type_list (unsigned_intDI_type_node,
17943 NULL_TREE);
17944 else
17945 ftype = build_function_type_list (unsigned_intSI_type_node,
17946 NULL_TREE);
17947 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
17949 ftype = build_function_type_list (double_type_node, NULL_TREE);
17950 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
17952 ftype = build_function_type_list (void_type_node,
17953 intSI_type_node, double_type_node,
17954 NULL_TREE);
17955 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
17957 ftype = build_function_type_list (void_type_node, NULL_TREE);
17958 def_builtin ("__builtin_cpu_init", ftype, RS6000_BUILTIN_CPU_INIT);
17960 ftype = build_function_type_list (bool_int_type_node, const_ptr_type_node,
17961 NULL_TREE);
17962 def_builtin ("__builtin_cpu_is", ftype, RS6000_BUILTIN_CPU_IS);
17963 def_builtin ("__builtin_cpu_supports", ftype, RS6000_BUILTIN_CPU_SUPPORTS);
17965 /* AIX libm provides clog as __clog. */
17966 if (TARGET_XCOFF &&
17967 (tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
17968 set_user_assembler_name (tdecl, "__clog");
17970 #ifdef SUBTARGET_INIT_BUILTINS
17971 SUBTARGET_INIT_BUILTINS;
17972 #endif
17975 /* Returns the rs6000 builtin decl for CODE. */
17977 static tree
17978 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
17980 HOST_WIDE_INT fnmask;
17982 if (code >= RS6000_BUILTIN_COUNT)
17983 return error_mark_node;
17985 fnmask = rs6000_builtin_info[code].mask;
17986 if ((fnmask & rs6000_builtin_mask) != fnmask)
17988 rs6000_invalid_builtin ((enum rs6000_builtins)code);
17989 return error_mark_node;
17992 return rs6000_builtin_decls[code];
17995 static void
17996 spe_init_builtins (void)
17998 tree puint_type_node = build_pointer_type (unsigned_type_node);
17999 tree pushort_type_node = build_pointer_type (short_unsigned_type_node);
18000 const struct builtin_description *d;
18001 size_t i;
18002 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
18004 tree v2si_ftype_4_v2si
18005 = build_function_type_list (opaque_V2SI_type_node,
18006 opaque_V2SI_type_node,
18007 opaque_V2SI_type_node,
18008 opaque_V2SI_type_node,
18009 opaque_V2SI_type_node,
18010 NULL_TREE);
18012 tree v2sf_ftype_4_v2sf
18013 = build_function_type_list (opaque_V2SF_type_node,
18014 opaque_V2SF_type_node,
18015 opaque_V2SF_type_node,
18016 opaque_V2SF_type_node,
18017 opaque_V2SF_type_node,
18018 NULL_TREE);
18020 tree int_ftype_int_v2si_v2si
18021 = build_function_type_list (integer_type_node,
18022 integer_type_node,
18023 opaque_V2SI_type_node,
18024 opaque_V2SI_type_node,
18025 NULL_TREE);
18027 tree int_ftype_int_v2sf_v2sf
18028 = build_function_type_list (integer_type_node,
18029 integer_type_node,
18030 opaque_V2SF_type_node,
18031 opaque_V2SF_type_node,
18032 NULL_TREE);
18034 tree void_ftype_v2si_puint_int
18035 = build_function_type_list (void_type_node,
18036 opaque_V2SI_type_node,
18037 puint_type_node,
18038 integer_type_node,
18039 NULL_TREE);
18041 tree void_ftype_v2si_puint_char
18042 = build_function_type_list (void_type_node,
18043 opaque_V2SI_type_node,
18044 puint_type_node,
18045 char_type_node,
18046 NULL_TREE);
18048 tree void_ftype_v2si_pv2si_int
18049 = build_function_type_list (void_type_node,
18050 opaque_V2SI_type_node,
18051 opaque_p_V2SI_type_node,
18052 integer_type_node,
18053 NULL_TREE);
18055 tree void_ftype_v2si_pv2si_char
18056 = build_function_type_list (void_type_node,
18057 opaque_V2SI_type_node,
18058 opaque_p_V2SI_type_node,
18059 char_type_node,
18060 NULL_TREE);
18062 tree void_ftype_int
18063 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
18065 tree int_ftype_void
18066 = build_function_type_list (integer_type_node, NULL_TREE);
18068 tree v2si_ftype_pv2si_int
18069 = build_function_type_list (opaque_V2SI_type_node,
18070 opaque_p_V2SI_type_node,
18071 integer_type_node,
18072 NULL_TREE);
18074 tree v2si_ftype_puint_int
18075 = build_function_type_list (opaque_V2SI_type_node,
18076 puint_type_node,
18077 integer_type_node,
18078 NULL_TREE);
18080 tree v2si_ftype_pushort_int
18081 = build_function_type_list (opaque_V2SI_type_node,
18082 pushort_type_node,
18083 integer_type_node,
18084 NULL_TREE);
18086 tree v2si_ftype_signed_char
18087 = build_function_type_list (opaque_V2SI_type_node,
18088 signed_char_type_node,
18089 NULL_TREE);
18091 add_builtin_type ("__ev64_opaque__", opaque_V2SI_type_node);
18093 /* Initialize irregular SPE builtins. */
18095 def_builtin ("__builtin_spe_mtspefscr", void_ftype_int, SPE_BUILTIN_MTSPEFSCR);
18096 def_builtin ("__builtin_spe_mfspefscr", int_ftype_void, SPE_BUILTIN_MFSPEFSCR);
18097 def_builtin ("__builtin_spe_evstddx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDDX);
18098 def_builtin ("__builtin_spe_evstdhx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDHX);
18099 def_builtin ("__builtin_spe_evstdwx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDWX);
18100 def_builtin ("__builtin_spe_evstwhex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHEX);
18101 def_builtin ("__builtin_spe_evstwhox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHOX);
18102 def_builtin ("__builtin_spe_evstwwex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWEX);
18103 def_builtin ("__builtin_spe_evstwwox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWOX);
18104 def_builtin ("__builtin_spe_evstdd", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDD);
18105 def_builtin ("__builtin_spe_evstdh", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDH);
18106 def_builtin ("__builtin_spe_evstdw", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDW);
18107 def_builtin ("__builtin_spe_evstwhe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHE);
18108 def_builtin ("__builtin_spe_evstwho", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHO);
18109 def_builtin ("__builtin_spe_evstwwe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWE);
18110 def_builtin ("__builtin_spe_evstwwo", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWO);
18111 def_builtin ("__builtin_spe_evsplatfi", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATFI);
18112 def_builtin ("__builtin_spe_evsplati", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATI);
18114 /* Loads. */
18115 def_builtin ("__builtin_spe_evlddx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDDX);
18116 def_builtin ("__builtin_spe_evldwx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDWX);
18117 def_builtin ("__builtin_spe_evldhx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDHX);
18118 def_builtin ("__builtin_spe_evlwhex", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHEX);
18119 def_builtin ("__builtin_spe_evlwhoux", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOUX);
18120 def_builtin ("__builtin_spe_evlwhosx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOSX);
18121 def_builtin ("__builtin_spe_evlwwsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLATX);
18122 def_builtin ("__builtin_spe_evlwhsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLATX);
18123 def_builtin ("__builtin_spe_evlhhesplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLATX);
18124 def_builtin ("__builtin_spe_evlhhousplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLATX);
18125 def_builtin ("__builtin_spe_evlhhossplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLATX);
18126 def_builtin ("__builtin_spe_evldd", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDD);
18127 def_builtin ("__builtin_spe_evldw", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDW);
18128 def_builtin ("__builtin_spe_evldh", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDH);
18129 def_builtin ("__builtin_spe_evlhhesplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLAT);
18130 def_builtin ("__builtin_spe_evlhhossplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLAT);
18131 def_builtin ("__builtin_spe_evlhhousplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLAT);
18132 def_builtin ("__builtin_spe_evlwhe", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHE);
18133 def_builtin ("__builtin_spe_evlwhos", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOS);
18134 def_builtin ("__builtin_spe_evlwhou", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOU);
18135 def_builtin ("__builtin_spe_evlwhsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLAT);
18136 def_builtin ("__builtin_spe_evlwwsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLAT);
18138 /* Predicates. */
18139 d = bdesc_spe_predicates;
18140 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, d++)
18142 tree type;
18143 HOST_WIDE_INT mask = d->mask;
18145 if ((mask & builtin_mask) != mask)
18147 if (TARGET_DEBUG_BUILTIN)
18148 fprintf (stderr, "spe_init_builtins, skip predicate %s\n",
18149 d->name);
18150 continue;
18153 /* Cannot define builtin if the instruction is disabled. */
18154 gcc_assert (d->icode != CODE_FOR_nothing);
18155 switch (insn_data[d->icode].operand[1].mode)
18157 case E_V2SImode:
18158 type = int_ftype_int_v2si_v2si;
18159 break;
18160 case E_V2SFmode:
18161 type = int_ftype_int_v2sf_v2sf;
18162 break;
18163 default:
18164 gcc_unreachable ();
18167 def_builtin (d->name, type, d->code);
18170 /* Evsel predicates. */
18171 d = bdesc_spe_evsel;
18172 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, d++)
18174 tree type;
18175 HOST_WIDE_INT mask = d->mask;
18177 if ((mask & builtin_mask) != mask)
18179 if (TARGET_DEBUG_BUILTIN)
18180 fprintf (stderr, "spe_init_builtins, skip evsel %s\n",
18181 d->name);
18182 continue;
18185 /* Cannot define builtin if the instruction is disabled. */
18186 gcc_assert (d->icode != CODE_FOR_nothing);
18187 switch (insn_data[d->icode].operand[1].mode)
18189 case E_V2SImode:
18190 type = v2si_ftype_4_v2si;
18191 break;
18192 case E_V2SFmode:
18193 type = v2sf_ftype_4_v2sf;
18194 break;
18195 default:
18196 gcc_unreachable ();
18199 def_builtin (d->name, type, d->code);
18203 static void
18204 paired_init_builtins (void)
18206 const struct builtin_description *d;
18207 size_t i;
18208 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
18210 tree int_ftype_int_v2sf_v2sf
18211 = build_function_type_list (integer_type_node,
18212 integer_type_node,
18213 V2SF_type_node,
18214 V2SF_type_node,
18215 NULL_TREE);
18216 tree pcfloat_type_node =
18217 build_pointer_type (build_qualified_type
18218 (float_type_node, TYPE_QUAL_CONST));
18220 tree v2sf_ftype_long_pcfloat = build_function_type_list (V2SF_type_node,
18221 long_integer_type_node,
18222 pcfloat_type_node,
18223 NULL_TREE);
18224 tree void_ftype_v2sf_long_pcfloat =
18225 build_function_type_list (void_type_node,
18226 V2SF_type_node,
18227 long_integer_type_node,
18228 pcfloat_type_node,
18229 NULL_TREE);
18232 def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat,
18233 PAIRED_BUILTIN_LX);
18236 def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat,
18237 PAIRED_BUILTIN_STX);
18239 /* Predicates. */
18240 d = bdesc_paired_preds;
18241 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); ++i, d++)
18243 tree type;
18244 HOST_WIDE_INT mask = d->mask;
18246 if ((mask & builtin_mask) != mask)
18248 if (TARGET_DEBUG_BUILTIN)
18249 fprintf (stderr, "paired_init_builtins, skip predicate %s\n",
18250 d->name);
18251 continue;
18254 /* Cannot define builtin if the instruction is disabled. */
18255 gcc_assert (d->icode != CODE_FOR_nothing);
18257 if (TARGET_DEBUG_BUILTIN)
18258 fprintf (stderr, "paired pred #%d, insn = %s [%d], mode = %s\n",
18259 (int)i, get_insn_name (d->icode), (int)d->icode,
18260 GET_MODE_NAME (insn_data[d->icode].operand[1].mode));
18262 switch (insn_data[d->icode].operand[1].mode)
18264 case E_V2SFmode:
18265 type = int_ftype_int_v2sf_v2sf;
18266 break;
18267 default:
18268 gcc_unreachable ();
18271 def_builtin (d->name, type, d->code);
18275 static void
18276 altivec_init_builtins (void)
18278 const struct builtin_description *d;
18279 size_t i;
18280 tree ftype;
18281 tree decl;
18282 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
18284 tree pvoid_type_node = build_pointer_type (void_type_node);
18286 tree pcvoid_type_node
18287 = build_pointer_type (build_qualified_type (void_type_node,
18288 TYPE_QUAL_CONST));
18290 tree int_ftype_opaque
18291 = build_function_type_list (integer_type_node,
18292 opaque_V4SI_type_node, NULL_TREE);
18293 tree opaque_ftype_opaque
18294 = build_function_type_list (integer_type_node, NULL_TREE);
18295 tree opaque_ftype_opaque_int
18296 = build_function_type_list (opaque_V4SI_type_node,
18297 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
18298 tree opaque_ftype_opaque_opaque_int
18299 = build_function_type_list (opaque_V4SI_type_node,
18300 opaque_V4SI_type_node, opaque_V4SI_type_node,
18301 integer_type_node, NULL_TREE);
18302 tree opaque_ftype_opaque_opaque_opaque
18303 = build_function_type_list (opaque_V4SI_type_node,
18304 opaque_V4SI_type_node, opaque_V4SI_type_node,
18305 opaque_V4SI_type_node, NULL_TREE);
18306 tree opaque_ftype_opaque_opaque
18307 = build_function_type_list (opaque_V4SI_type_node,
18308 opaque_V4SI_type_node, opaque_V4SI_type_node,
18309 NULL_TREE);
18310 tree int_ftype_int_opaque_opaque
18311 = build_function_type_list (integer_type_node,
18312 integer_type_node, opaque_V4SI_type_node,
18313 opaque_V4SI_type_node, NULL_TREE);
18314 tree int_ftype_int_v4si_v4si
18315 = build_function_type_list (integer_type_node,
18316 integer_type_node, V4SI_type_node,
18317 V4SI_type_node, NULL_TREE);
18318 tree int_ftype_int_v2di_v2di
18319 = build_function_type_list (integer_type_node,
18320 integer_type_node, V2DI_type_node,
18321 V2DI_type_node, NULL_TREE);
18322 tree void_ftype_v4si
18323 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
18324 tree v8hi_ftype_void
18325 = build_function_type_list (V8HI_type_node, NULL_TREE);
18326 tree void_ftype_void
18327 = build_function_type_list (void_type_node, NULL_TREE);
18328 tree void_ftype_int
18329 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
18331 tree opaque_ftype_long_pcvoid
18332 = build_function_type_list (opaque_V4SI_type_node,
18333 long_integer_type_node, pcvoid_type_node,
18334 NULL_TREE);
18335 tree v16qi_ftype_long_pcvoid
18336 = build_function_type_list (V16QI_type_node,
18337 long_integer_type_node, pcvoid_type_node,
18338 NULL_TREE);
18339 tree v8hi_ftype_long_pcvoid
18340 = build_function_type_list (V8HI_type_node,
18341 long_integer_type_node, pcvoid_type_node,
18342 NULL_TREE);
18343 tree v4si_ftype_long_pcvoid
18344 = build_function_type_list (V4SI_type_node,
18345 long_integer_type_node, pcvoid_type_node,
18346 NULL_TREE);
18347 tree v4sf_ftype_long_pcvoid
18348 = build_function_type_list (V4SF_type_node,
18349 long_integer_type_node, pcvoid_type_node,
18350 NULL_TREE);
18351 tree v2df_ftype_long_pcvoid
18352 = build_function_type_list (V2DF_type_node,
18353 long_integer_type_node, pcvoid_type_node,
18354 NULL_TREE);
18355 tree v2di_ftype_long_pcvoid
18356 = build_function_type_list (V2DI_type_node,
18357 long_integer_type_node, pcvoid_type_node,
18358 NULL_TREE);
18360 tree void_ftype_opaque_long_pvoid
18361 = build_function_type_list (void_type_node,
18362 opaque_V4SI_type_node, long_integer_type_node,
18363 pvoid_type_node, NULL_TREE);
18364 tree void_ftype_v4si_long_pvoid
18365 = build_function_type_list (void_type_node,
18366 V4SI_type_node, long_integer_type_node,
18367 pvoid_type_node, NULL_TREE);
18368 tree void_ftype_v16qi_long_pvoid
18369 = build_function_type_list (void_type_node,
18370 V16QI_type_node, long_integer_type_node,
18371 pvoid_type_node, NULL_TREE);
18373 tree void_ftype_v16qi_pvoid_long
18374 = build_function_type_list (void_type_node,
18375 V16QI_type_node, pvoid_type_node,
18376 long_integer_type_node, NULL_TREE);
18378 tree void_ftype_v8hi_long_pvoid
18379 = build_function_type_list (void_type_node,
18380 V8HI_type_node, long_integer_type_node,
18381 pvoid_type_node, NULL_TREE);
18382 tree void_ftype_v4sf_long_pvoid
18383 = build_function_type_list (void_type_node,
18384 V4SF_type_node, long_integer_type_node,
18385 pvoid_type_node, NULL_TREE);
18386 tree void_ftype_v2df_long_pvoid
18387 = build_function_type_list (void_type_node,
18388 V2DF_type_node, long_integer_type_node,
18389 pvoid_type_node, NULL_TREE);
18390 tree void_ftype_v2di_long_pvoid
18391 = build_function_type_list (void_type_node,
18392 V2DI_type_node, long_integer_type_node,
18393 pvoid_type_node, NULL_TREE);
18394 tree int_ftype_int_v8hi_v8hi
18395 = build_function_type_list (integer_type_node,
18396 integer_type_node, V8HI_type_node,
18397 V8HI_type_node, NULL_TREE);
18398 tree int_ftype_int_v16qi_v16qi
18399 = build_function_type_list (integer_type_node,
18400 integer_type_node, V16QI_type_node,
18401 V16QI_type_node, NULL_TREE);
18402 tree int_ftype_int_v4sf_v4sf
18403 = build_function_type_list (integer_type_node,
18404 integer_type_node, V4SF_type_node,
18405 V4SF_type_node, NULL_TREE);
18406 tree int_ftype_int_v2df_v2df
18407 = build_function_type_list (integer_type_node,
18408 integer_type_node, V2DF_type_node,
18409 V2DF_type_node, NULL_TREE);
18410 tree v2di_ftype_v2di
18411 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
18412 tree v4si_ftype_v4si
18413 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
18414 tree v8hi_ftype_v8hi
18415 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
18416 tree v16qi_ftype_v16qi
18417 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
18418 tree v4sf_ftype_v4sf
18419 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
18420 tree v2df_ftype_v2df
18421 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
18422 tree void_ftype_pcvoid_int_int
18423 = build_function_type_list (void_type_node,
18424 pcvoid_type_node, integer_type_node,
18425 integer_type_node, NULL_TREE);
18427 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
18428 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
18429 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
18430 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
18431 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
18432 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
18433 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
18434 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
18435 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
18436 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
18437 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
18438 ALTIVEC_BUILTIN_LVXL_V2DF);
18439 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
18440 ALTIVEC_BUILTIN_LVXL_V2DI);
18441 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
18442 ALTIVEC_BUILTIN_LVXL_V4SF);
18443 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
18444 ALTIVEC_BUILTIN_LVXL_V4SI);
18445 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
18446 ALTIVEC_BUILTIN_LVXL_V8HI);
18447 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
18448 ALTIVEC_BUILTIN_LVXL_V16QI);
18449 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
18450 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
18451 ALTIVEC_BUILTIN_LVX_V2DF);
18452 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
18453 ALTIVEC_BUILTIN_LVX_V2DI);
18454 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
18455 ALTIVEC_BUILTIN_LVX_V4SF);
18456 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
18457 ALTIVEC_BUILTIN_LVX_V4SI);
18458 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
18459 ALTIVEC_BUILTIN_LVX_V8HI);
18460 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
18461 ALTIVEC_BUILTIN_LVX_V16QI);
18462 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
18463 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
18464 ALTIVEC_BUILTIN_STVX_V2DF);
18465 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
18466 ALTIVEC_BUILTIN_STVX_V2DI);
18467 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
18468 ALTIVEC_BUILTIN_STVX_V4SF);
18469 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
18470 ALTIVEC_BUILTIN_STVX_V4SI);
18471 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
18472 ALTIVEC_BUILTIN_STVX_V8HI);
18473 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
18474 ALTIVEC_BUILTIN_STVX_V16QI);
18475 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
18476 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
18477 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
18478 ALTIVEC_BUILTIN_STVXL_V2DF);
18479 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
18480 ALTIVEC_BUILTIN_STVXL_V2DI);
18481 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
18482 ALTIVEC_BUILTIN_STVXL_V4SF);
18483 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
18484 ALTIVEC_BUILTIN_STVXL_V4SI);
18485 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
18486 ALTIVEC_BUILTIN_STVXL_V8HI);
18487 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
18488 ALTIVEC_BUILTIN_STVXL_V16QI);
18489 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
18490 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
18491 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
18492 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
18493 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
18494 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
18495 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
18496 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
18497 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
18498 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
18499 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
18500 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
18501 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
18502 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
18503 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
18504 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
18506 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
18507 VSX_BUILTIN_LXVD2X_V2DF);
18508 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
18509 VSX_BUILTIN_LXVD2X_V2DI);
18510 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
18511 VSX_BUILTIN_LXVW4X_V4SF);
18512 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
18513 VSX_BUILTIN_LXVW4X_V4SI);
18514 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
18515 VSX_BUILTIN_LXVW4X_V8HI);
18516 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
18517 VSX_BUILTIN_LXVW4X_V16QI);
18518 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
18519 VSX_BUILTIN_STXVD2X_V2DF);
18520 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
18521 VSX_BUILTIN_STXVD2X_V2DI);
18522 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
18523 VSX_BUILTIN_STXVW4X_V4SF);
18524 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
18525 VSX_BUILTIN_STXVW4X_V4SI);
18526 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
18527 VSX_BUILTIN_STXVW4X_V8HI);
18528 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
18529 VSX_BUILTIN_STXVW4X_V16QI);
18531 def_builtin ("__builtin_vsx_ld_elemrev_v2df", v2df_ftype_long_pcvoid,
18532 VSX_BUILTIN_LD_ELEMREV_V2DF);
18533 def_builtin ("__builtin_vsx_ld_elemrev_v2di", v2di_ftype_long_pcvoid,
18534 VSX_BUILTIN_LD_ELEMREV_V2DI);
18535 def_builtin ("__builtin_vsx_ld_elemrev_v4sf", v4sf_ftype_long_pcvoid,
18536 VSX_BUILTIN_LD_ELEMREV_V4SF);
18537 def_builtin ("__builtin_vsx_ld_elemrev_v4si", v4si_ftype_long_pcvoid,
18538 VSX_BUILTIN_LD_ELEMREV_V4SI);
18539 def_builtin ("__builtin_vsx_st_elemrev_v2df", void_ftype_v2df_long_pvoid,
18540 VSX_BUILTIN_ST_ELEMREV_V2DF);
18541 def_builtin ("__builtin_vsx_st_elemrev_v2di", void_ftype_v2di_long_pvoid,
18542 VSX_BUILTIN_ST_ELEMREV_V2DI);
18543 def_builtin ("__builtin_vsx_st_elemrev_v4sf", void_ftype_v4sf_long_pvoid,
18544 VSX_BUILTIN_ST_ELEMREV_V4SF);
18545 def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid,
18546 VSX_BUILTIN_ST_ELEMREV_V4SI);
18548 if (TARGET_P9_VECTOR)
18550 def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid,
18551 VSX_BUILTIN_LD_ELEMREV_V8HI);
18552 def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid,
18553 VSX_BUILTIN_LD_ELEMREV_V16QI);
18554 def_builtin ("__builtin_vsx_st_elemrev_v8hi",
18555 void_ftype_v8hi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V8HI);
18556 def_builtin ("__builtin_vsx_st_elemrev_v16qi",
18557 void_ftype_v16qi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V16QI);
18559 else
18561 rs6000_builtin_decls[(int) VSX_BUILTIN_LD_ELEMREV_V8HI]
18562 = rs6000_builtin_decls[(int) VSX_BUILTIN_LXVW4X_V8HI];
18563 rs6000_builtin_decls[(int) VSX_BUILTIN_LD_ELEMREV_V16QI]
18564 = rs6000_builtin_decls[(int) VSX_BUILTIN_LXVW4X_V16QI];
18565 rs6000_builtin_decls[(int) VSX_BUILTIN_ST_ELEMREV_V8HI]
18566 = rs6000_builtin_decls[(int) VSX_BUILTIN_STXVW4X_V8HI];
18567 rs6000_builtin_decls[(int) VSX_BUILTIN_ST_ELEMREV_V16QI]
18568 = rs6000_builtin_decls[(int) VSX_BUILTIN_STXVW4X_V16QI];
18571 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
18572 VSX_BUILTIN_VEC_LD);
18573 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
18574 VSX_BUILTIN_VEC_ST);
18575 def_builtin ("__builtin_vec_xl", opaque_ftype_long_pcvoid,
18576 VSX_BUILTIN_VEC_XL);
18577 def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid,
18578 VSX_BUILTIN_VEC_XST);
18580 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
18581 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
18582 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
18584 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
18585 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
18586 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
18587 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
18588 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
18589 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
18590 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
18591 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
18592 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
18593 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
18594 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
18595 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
18597 def_builtin ("__builtin_vec_adde", opaque_ftype_opaque_opaque_opaque,
18598 ALTIVEC_BUILTIN_VEC_ADDE);
18599 def_builtin ("__builtin_vec_addec", opaque_ftype_opaque_opaque_opaque,
18600 ALTIVEC_BUILTIN_VEC_ADDEC);
18601 def_builtin ("__builtin_vec_cmpne", opaque_ftype_opaque_opaque,
18602 ALTIVEC_BUILTIN_VEC_CMPNE);
18603 def_builtin ("__builtin_vec_mul", opaque_ftype_opaque_opaque,
18604 ALTIVEC_BUILTIN_VEC_MUL);
18606 /* Cell builtins. */
18607 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
18608 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
18609 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
18610 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
18612 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
18613 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
18614 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
18615 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
18617 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
18618 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
18619 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
18620 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
18622 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
18623 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
18624 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
18625 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
18627 if (TARGET_P9_VECTOR)
18628 def_builtin ("__builtin_altivec_stxvl", void_ftype_v16qi_pvoid_long,
18629 P9V_BUILTIN_STXVL);
18631 /* Add the DST variants. */
18632 d = bdesc_dst;
18633 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
18635 HOST_WIDE_INT mask = d->mask;
18637 /* It is expected that these dst built-in functions may have
18638 d->icode equal to CODE_FOR_nothing. */
18639 if ((mask & builtin_mask) != mask)
18641 if (TARGET_DEBUG_BUILTIN)
18642 fprintf (stderr, "altivec_init_builtins, skip dst %s\n",
18643 d->name);
18644 continue;
18646 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
18649 /* Initialize the predicates. */
18650 d = bdesc_altivec_preds;
18651 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
18653 machine_mode mode1;
18654 tree type;
18655 HOST_WIDE_INT mask = d->mask;
18657 if ((mask & builtin_mask) != mask)
18659 if (TARGET_DEBUG_BUILTIN)
18660 fprintf (stderr, "altivec_init_builtins, skip predicate %s\n",
18661 d->name);
18662 continue;
18665 if (rs6000_overloaded_builtin_p (d->code))
18666 mode1 = VOIDmode;
18667 else
18669 /* Cannot define builtin if the instruction is disabled. */
18670 gcc_assert (d->icode != CODE_FOR_nothing);
18671 mode1 = insn_data[d->icode].operand[1].mode;
18674 switch (mode1)
18676 case E_VOIDmode:
18677 type = int_ftype_int_opaque_opaque;
18678 break;
18679 case E_V2DImode:
18680 type = int_ftype_int_v2di_v2di;
18681 break;
18682 case E_V4SImode:
18683 type = int_ftype_int_v4si_v4si;
18684 break;
18685 case E_V8HImode:
18686 type = int_ftype_int_v8hi_v8hi;
18687 break;
18688 case E_V16QImode:
18689 type = int_ftype_int_v16qi_v16qi;
18690 break;
18691 case E_V4SFmode:
18692 type = int_ftype_int_v4sf_v4sf;
18693 break;
18694 case E_V2DFmode:
18695 type = int_ftype_int_v2df_v2df;
18696 break;
18697 default:
18698 gcc_unreachable ();
18701 def_builtin (d->name, type, d->code);
18704 /* Initialize the abs* operators. */
18705 d = bdesc_abs;
18706 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
18708 machine_mode mode0;
18709 tree type;
18710 HOST_WIDE_INT mask = d->mask;
18712 if ((mask & builtin_mask) != mask)
18714 if (TARGET_DEBUG_BUILTIN)
18715 fprintf (stderr, "altivec_init_builtins, skip abs %s\n",
18716 d->name);
18717 continue;
18720 /* Cannot define builtin if the instruction is disabled. */
18721 gcc_assert (d->icode != CODE_FOR_nothing);
18722 mode0 = insn_data[d->icode].operand[0].mode;
18724 switch (mode0)
18726 case E_V2DImode:
18727 type = v2di_ftype_v2di;
18728 break;
18729 case E_V4SImode:
18730 type = v4si_ftype_v4si;
18731 break;
18732 case E_V8HImode:
18733 type = v8hi_ftype_v8hi;
18734 break;
18735 case E_V16QImode:
18736 type = v16qi_ftype_v16qi;
18737 break;
18738 case E_V4SFmode:
18739 type = v4sf_ftype_v4sf;
18740 break;
18741 case E_V2DFmode:
18742 type = v2df_ftype_v2df;
18743 break;
18744 default:
18745 gcc_unreachable ();
18748 def_builtin (d->name, type, d->code);
18751 /* Initialize target builtin that implements
18752 targetm.vectorize.builtin_mask_for_load. */
18754 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
18755 v16qi_ftype_long_pcvoid,
18756 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
18757 BUILT_IN_MD, NULL, NULL_TREE);
18758 TREE_READONLY (decl) = 1;
18759 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
18760 altivec_builtin_mask_for_load = decl;
18762 /* Access to the vec_init patterns. */
18763 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
18764 integer_type_node, integer_type_node,
18765 integer_type_node, NULL_TREE);
18766 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
18768 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
18769 short_integer_type_node,
18770 short_integer_type_node,
18771 short_integer_type_node,
18772 short_integer_type_node,
18773 short_integer_type_node,
18774 short_integer_type_node,
18775 short_integer_type_node, NULL_TREE);
18776 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
18778 ftype = build_function_type_list (V16QI_type_node, char_type_node,
18779 char_type_node, char_type_node,
18780 char_type_node, char_type_node,
18781 char_type_node, char_type_node,
18782 char_type_node, char_type_node,
18783 char_type_node, char_type_node,
18784 char_type_node, char_type_node,
18785 char_type_node, char_type_node,
18786 char_type_node, NULL_TREE);
18787 def_builtin ("__builtin_vec_init_v16qi", ftype,
18788 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
18790 ftype = build_function_type_list (V4SF_type_node, float_type_node,
18791 float_type_node, float_type_node,
18792 float_type_node, NULL_TREE);
18793 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
18795 /* VSX builtins. */
18796 ftype = build_function_type_list (V2DF_type_node, double_type_node,
18797 double_type_node, NULL_TREE);
18798 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
18800 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
18801 intDI_type_node, NULL_TREE);
18802 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
18804 /* Access to the vec_set patterns. */
18805 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
18806 intSI_type_node,
18807 integer_type_node, NULL_TREE);
18808 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
18810 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
18811 intHI_type_node,
18812 integer_type_node, NULL_TREE);
18813 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
18815 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
18816 intQI_type_node,
18817 integer_type_node, NULL_TREE);
18818 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
18820 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
18821 float_type_node,
18822 integer_type_node, NULL_TREE);
18823 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
18825 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
18826 double_type_node,
18827 integer_type_node, NULL_TREE);
18828 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
18830 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
18831 intDI_type_node,
18832 integer_type_node, NULL_TREE);
18833 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
18835 /* Access to the vec_extract patterns. */
18836 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
18837 integer_type_node, NULL_TREE);
18838 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
18840 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
18841 integer_type_node, NULL_TREE);
18842 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
18844 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
18845 integer_type_node, NULL_TREE);
18846 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
18848 ftype = build_function_type_list (float_type_node, V4SF_type_node,
18849 integer_type_node, NULL_TREE);
18850 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
18852 ftype = build_function_type_list (double_type_node, V2DF_type_node,
18853 integer_type_node, NULL_TREE);
18854 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
18856 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
18857 integer_type_node, NULL_TREE);
18858 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
18861 if (V1TI_type_node)
18863 tree v1ti_ftype_long_pcvoid
18864 = build_function_type_list (V1TI_type_node,
18865 long_integer_type_node, pcvoid_type_node,
18866 NULL_TREE);
18867 tree void_ftype_v1ti_long_pvoid
18868 = build_function_type_list (void_type_node,
18869 V1TI_type_node, long_integer_type_node,
18870 pvoid_type_node, NULL_TREE);
18871 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
18872 VSX_BUILTIN_LXVD2X_V1TI);
18873 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
18874 VSX_BUILTIN_STXVD2X_V1TI);
18875 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
18876 NULL_TREE, NULL_TREE);
18877 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
18878 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
18879 intTI_type_node,
18880 integer_type_node, NULL_TREE);
18881 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
18882 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
18883 integer_type_node, NULL_TREE);
18884 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
18889 static void
18890 htm_init_builtins (void)
18892 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
18893 const struct builtin_description *d;
18894 size_t i;
18896 d = bdesc_htm;
18897 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
18899 tree op[MAX_HTM_OPERANDS], type;
18900 HOST_WIDE_INT mask = d->mask;
18901 unsigned attr = rs6000_builtin_info[d->code].attr;
18902 bool void_func = (attr & RS6000_BTC_VOID);
18903 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
18904 int nopnds = 0;
18905 tree gpr_type_node;
18906 tree rettype;
18907 tree argtype;
18909 /* It is expected that these htm built-in functions may have
18910 d->icode equal to CODE_FOR_nothing. */
18912 if (TARGET_32BIT && TARGET_POWERPC64)
18913 gpr_type_node = long_long_unsigned_type_node;
18914 else
18915 gpr_type_node = long_unsigned_type_node;
18917 if (attr & RS6000_BTC_SPR)
18919 rettype = gpr_type_node;
18920 argtype = gpr_type_node;
18922 else if (d->code == HTM_BUILTIN_TABORTDC
18923 || d->code == HTM_BUILTIN_TABORTDCI)
18925 rettype = unsigned_type_node;
18926 argtype = gpr_type_node;
18928 else
18930 rettype = unsigned_type_node;
18931 argtype = unsigned_type_node;
18934 if ((mask & builtin_mask) != mask)
18936 if (TARGET_DEBUG_BUILTIN)
18937 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
18938 continue;
18941 if (d->name == 0)
18943 if (TARGET_DEBUG_BUILTIN)
18944 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
18945 (long unsigned) i);
18946 continue;
18949 op[nopnds++] = (void_func) ? void_type_node : rettype;
18951 if (attr_args == RS6000_BTC_UNARY)
18952 op[nopnds++] = argtype;
18953 else if (attr_args == RS6000_BTC_BINARY)
18955 op[nopnds++] = argtype;
18956 op[nopnds++] = argtype;
18958 else if (attr_args == RS6000_BTC_TERNARY)
18960 op[nopnds++] = argtype;
18961 op[nopnds++] = argtype;
18962 op[nopnds++] = argtype;
18965 switch (nopnds)
18967 case 1:
18968 type = build_function_type_list (op[0], NULL_TREE);
18969 break;
18970 case 2:
18971 type = build_function_type_list (op[0], op[1], NULL_TREE);
18972 break;
18973 case 3:
18974 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
18975 break;
18976 case 4:
18977 type = build_function_type_list (op[0], op[1], op[2], op[3],
18978 NULL_TREE);
18979 break;
18980 default:
18981 gcc_unreachable ();
18984 def_builtin (d->name, type, d->code);
18988 /* Hash function for builtin functions with up to 3 arguments and a return
18989 type. */
18990 hashval_t
18991 builtin_hasher::hash (builtin_hash_struct *bh)
18993 unsigned ret = 0;
18994 int i;
18996 for (i = 0; i < 4; i++)
18998 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
18999 ret = (ret * 2) + bh->uns_p[i];
19002 return ret;
19005 /* Compare builtin hash entries H1 and H2 for equivalence. */
19006 bool
19007 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
19009 return ((p1->mode[0] == p2->mode[0])
19010 && (p1->mode[1] == p2->mode[1])
19011 && (p1->mode[2] == p2->mode[2])
19012 && (p1->mode[3] == p2->mode[3])
19013 && (p1->uns_p[0] == p2->uns_p[0])
19014 && (p1->uns_p[1] == p2->uns_p[1])
19015 && (p1->uns_p[2] == p2->uns_p[2])
19016 && (p1->uns_p[3] == p2->uns_p[3]));
19019 /* Map types for builtin functions with an explicit return type and up to 3
19020 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
19021 of the argument. */
19022 static tree
19023 builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
19024 machine_mode mode_arg1, machine_mode mode_arg2,
19025 enum rs6000_builtins builtin, const char *name)
19027 struct builtin_hash_struct h;
19028 struct builtin_hash_struct *h2;
19029 int num_args = 3;
19030 int i;
19031 tree ret_type = NULL_TREE;
19032 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
19034 /* Create builtin_hash_table. */
19035 if (builtin_hash_table == NULL)
19036 builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
19038 h.type = NULL_TREE;
19039 h.mode[0] = mode_ret;
19040 h.mode[1] = mode_arg0;
19041 h.mode[2] = mode_arg1;
19042 h.mode[3] = mode_arg2;
19043 h.uns_p[0] = 0;
19044 h.uns_p[1] = 0;
19045 h.uns_p[2] = 0;
19046 h.uns_p[3] = 0;
19048 /* If the builtin is a type that produces unsigned results or takes unsigned
19049 arguments, and it is returned as a decl for the vectorizer (such as
19050 widening multiplies, permute), make sure the arguments and return value
19051 are type correct. */
19052 switch (builtin)
19054 /* unsigned 1 argument functions. */
19055 case CRYPTO_BUILTIN_VSBOX:
19056 case P8V_BUILTIN_VGBBD:
19057 case MISC_BUILTIN_CDTBCD:
19058 case MISC_BUILTIN_CBCDTD:
19059 h.uns_p[0] = 1;
19060 h.uns_p[1] = 1;
19061 break;
19063 /* unsigned 2 argument functions. */
19064 case ALTIVEC_BUILTIN_VMULEUB:
19065 case ALTIVEC_BUILTIN_VMULEUH:
19066 case ALTIVEC_BUILTIN_VMULOUB:
19067 case ALTIVEC_BUILTIN_VMULOUH:
19068 case CRYPTO_BUILTIN_VCIPHER:
19069 case CRYPTO_BUILTIN_VCIPHERLAST:
19070 case CRYPTO_BUILTIN_VNCIPHER:
19071 case CRYPTO_BUILTIN_VNCIPHERLAST:
19072 case CRYPTO_BUILTIN_VPMSUMB:
19073 case CRYPTO_BUILTIN_VPMSUMH:
19074 case CRYPTO_BUILTIN_VPMSUMW:
19075 case CRYPTO_BUILTIN_VPMSUMD:
19076 case CRYPTO_BUILTIN_VPMSUM:
19077 case MISC_BUILTIN_ADDG6S:
19078 case MISC_BUILTIN_DIVWEU:
19079 case MISC_BUILTIN_DIVWEUO:
19080 case MISC_BUILTIN_DIVDEU:
19081 case MISC_BUILTIN_DIVDEUO:
19082 case VSX_BUILTIN_UDIV_V2DI:
19083 h.uns_p[0] = 1;
19084 h.uns_p[1] = 1;
19085 h.uns_p[2] = 1;
19086 break;
19088 /* unsigned 3 argument functions. */
19089 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
19090 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
19091 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
19092 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
19093 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
19094 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
19095 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
19096 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
19097 case VSX_BUILTIN_VPERM_16QI_UNS:
19098 case VSX_BUILTIN_VPERM_8HI_UNS:
19099 case VSX_BUILTIN_VPERM_4SI_UNS:
19100 case VSX_BUILTIN_VPERM_2DI_UNS:
19101 case VSX_BUILTIN_XXSEL_16QI_UNS:
19102 case VSX_BUILTIN_XXSEL_8HI_UNS:
19103 case VSX_BUILTIN_XXSEL_4SI_UNS:
19104 case VSX_BUILTIN_XXSEL_2DI_UNS:
19105 case CRYPTO_BUILTIN_VPERMXOR:
19106 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
19107 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
19108 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
19109 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
19110 case CRYPTO_BUILTIN_VSHASIGMAW:
19111 case CRYPTO_BUILTIN_VSHASIGMAD:
19112 case CRYPTO_BUILTIN_VSHASIGMA:
19113 h.uns_p[0] = 1;
19114 h.uns_p[1] = 1;
19115 h.uns_p[2] = 1;
19116 h.uns_p[3] = 1;
19117 break;
19119 /* signed permute functions with unsigned char mask. */
19120 case ALTIVEC_BUILTIN_VPERM_16QI:
19121 case ALTIVEC_BUILTIN_VPERM_8HI:
19122 case ALTIVEC_BUILTIN_VPERM_4SI:
19123 case ALTIVEC_BUILTIN_VPERM_4SF:
19124 case ALTIVEC_BUILTIN_VPERM_2DI:
19125 case ALTIVEC_BUILTIN_VPERM_2DF:
19126 case VSX_BUILTIN_VPERM_16QI:
19127 case VSX_BUILTIN_VPERM_8HI:
19128 case VSX_BUILTIN_VPERM_4SI:
19129 case VSX_BUILTIN_VPERM_4SF:
19130 case VSX_BUILTIN_VPERM_2DI:
19131 case VSX_BUILTIN_VPERM_2DF:
19132 h.uns_p[3] = 1;
19133 break;
19135 /* unsigned args, signed return. */
19136 case VSX_BUILTIN_XVCVUXDSP:
19137 case VSX_BUILTIN_XVCVUXDDP_UNS:
19138 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
19139 h.uns_p[1] = 1;
19140 break;
19142 /* signed args, unsigned return. */
19143 case VSX_BUILTIN_XVCVDPUXDS_UNS:
19144 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
19145 case MISC_BUILTIN_UNPACK_TD:
19146 case MISC_BUILTIN_UNPACK_V1TI:
19147 h.uns_p[0] = 1;
19148 break;
19150 /* unsigned arguments for 128-bit pack instructions. */
19151 case MISC_BUILTIN_PACK_TD:
19152 case MISC_BUILTIN_PACK_V1TI:
19153 h.uns_p[1] = 1;
19154 h.uns_p[2] = 1;
19155 break;
19157 default:
19158 break;
19161 /* Figure out how many args are present. */
19162 while (num_args > 0 && h.mode[num_args] == VOIDmode)
19163 num_args--;
19165 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
19166 if (!ret_type && h.uns_p[0])
19167 ret_type = builtin_mode_to_type[h.mode[0]][0];
19169 if (!ret_type)
19170 fatal_error (input_location,
19171 "internal error: builtin function %s had an unexpected "
19172 "return type %s", name, GET_MODE_NAME (h.mode[0]));
19174 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
19175 arg_type[i] = NULL_TREE;
19177 for (i = 0; i < num_args; i++)
19179 int m = (int) h.mode[i+1];
19180 int uns_p = h.uns_p[i+1];
19182 arg_type[i] = builtin_mode_to_type[m][uns_p];
19183 if (!arg_type[i] && uns_p)
19184 arg_type[i] = builtin_mode_to_type[m][0];
19186 if (!arg_type[i])
19187 fatal_error (input_location,
19188 "internal error: builtin function %s, argument %d "
19189 "had unexpected argument type %s", name, i,
19190 GET_MODE_NAME (m));
19193 builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
19194 if (*found == NULL)
19196 h2 = ggc_alloc<builtin_hash_struct> ();
19197 *h2 = h;
19198 *found = h2;
19200 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
19201 arg_type[2], NULL_TREE);
19204 return (*found)->type;
19207 static void
19208 rs6000_common_init_builtins (void)
19210 const struct builtin_description *d;
19211 size_t i;
19213 tree opaque_ftype_opaque = NULL_TREE;
19214 tree opaque_ftype_opaque_opaque = NULL_TREE;
19215 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
19216 tree v2si_ftype = NULL_TREE;
19217 tree v2si_ftype_qi = NULL_TREE;
19218 tree v2si_ftype_v2si_qi = NULL_TREE;
19219 tree v2si_ftype_int_qi = NULL_TREE;
19220 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
19222 if (!TARGET_PAIRED_FLOAT)
19224 builtin_mode_to_type[V2SImode][0] = opaque_V2SI_type_node;
19225 builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node;
19228 /* Paired and SPE builtins are only available if you build a compiler with
19229 the appropriate options, so only create those builtins with the
19230 appropriate compiler option. Create Altivec and VSX builtins on machines
19231 with at least the general purpose extensions (970 and newer) to allow the
19232 use of the target attribute.. */
19234 if (TARGET_EXTRA_BUILTINS)
19235 builtin_mask |= RS6000_BTM_COMMON;
19237 /* Add the ternary operators. */
19238 d = bdesc_3arg;
19239 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
19241 tree type;
19242 HOST_WIDE_INT mask = d->mask;
19244 if ((mask & builtin_mask) != mask)
19246 if (TARGET_DEBUG_BUILTIN)
19247 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
19248 continue;
19251 if (rs6000_overloaded_builtin_p (d->code))
19253 if (! (type = opaque_ftype_opaque_opaque_opaque))
19254 type = opaque_ftype_opaque_opaque_opaque
19255 = build_function_type_list (opaque_V4SI_type_node,
19256 opaque_V4SI_type_node,
19257 opaque_V4SI_type_node,
19258 opaque_V4SI_type_node,
19259 NULL_TREE);
19261 else
19263 enum insn_code icode = d->icode;
19264 if (d->name == 0)
19266 if (TARGET_DEBUG_BUILTIN)
19267 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
19268 (long unsigned)i);
19270 continue;
19273 if (icode == CODE_FOR_nothing)
19275 if (TARGET_DEBUG_BUILTIN)
19276 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
19277 d->name);
19279 continue;
19282 type = builtin_function_type (insn_data[icode].operand[0].mode,
19283 insn_data[icode].operand[1].mode,
19284 insn_data[icode].operand[2].mode,
19285 insn_data[icode].operand[3].mode,
19286 d->code, d->name);
19289 def_builtin (d->name, type, d->code);
19292 /* Add the binary operators. */
19293 d = bdesc_2arg;
19294 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19296 machine_mode mode0, mode1, mode2;
19297 tree type;
19298 HOST_WIDE_INT mask = d->mask;
19300 if ((mask & builtin_mask) != mask)
19302 if (TARGET_DEBUG_BUILTIN)
19303 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
19304 continue;
19307 if (rs6000_overloaded_builtin_p (d->code))
19309 if (! (type = opaque_ftype_opaque_opaque))
19310 type = opaque_ftype_opaque_opaque
19311 = build_function_type_list (opaque_V4SI_type_node,
19312 opaque_V4SI_type_node,
19313 opaque_V4SI_type_node,
19314 NULL_TREE);
19316 else
19318 enum insn_code icode = d->icode;
19319 if (d->name == 0)
19321 if (TARGET_DEBUG_BUILTIN)
19322 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
19323 (long unsigned)i);
19325 continue;
19328 if (icode == CODE_FOR_nothing)
19330 if (TARGET_DEBUG_BUILTIN)
19331 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
19332 d->name);
19334 continue;
19337 mode0 = insn_data[icode].operand[0].mode;
19338 mode1 = insn_data[icode].operand[1].mode;
19339 mode2 = insn_data[icode].operand[2].mode;
19341 if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode)
19343 if (! (type = v2si_ftype_v2si_qi))
19344 type = v2si_ftype_v2si_qi
19345 = build_function_type_list (opaque_V2SI_type_node,
19346 opaque_V2SI_type_node,
19347 char_type_node,
19348 NULL_TREE);
19351 else if (mode0 == V2SImode && GET_MODE_CLASS (mode1) == MODE_INT
19352 && mode2 == QImode)
19354 if (! (type = v2si_ftype_int_qi))
19355 type = v2si_ftype_int_qi
19356 = build_function_type_list (opaque_V2SI_type_node,
19357 integer_type_node,
19358 char_type_node,
19359 NULL_TREE);
19362 else
19363 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
19364 d->code, d->name);
19367 def_builtin (d->name, type, d->code);
19370 /* Add the simple unary operators. */
19371 d = bdesc_1arg;
19372 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19374 machine_mode mode0, mode1;
19375 tree type;
19376 HOST_WIDE_INT mask = d->mask;
19378 if ((mask & builtin_mask) != mask)
19380 if (TARGET_DEBUG_BUILTIN)
19381 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
19382 continue;
19385 if (rs6000_overloaded_builtin_p (d->code))
19387 if (! (type = opaque_ftype_opaque))
19388 type = opaque_ftype_opaque
19389 = build_function_type_list (opaque_V4SI_type_node,
19390 opaque_V4SI_type_node,
19391 NULL_TREE);
19393 else
19395 enum insn_code icode = d->icode;
19396 if (d->name == 0)
19398 if (TARGET_DEBUG_BUILTIN)
19399 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
19400 (long unsigned)i);
19402 continue;
19405 if (icode == CODE_FOR_nothing)
19407 if (TARGET_DEBUG_BUILTIN)
19408 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
19409 d->name);
19411 continue;
19414 mode0 = insn_data[icode].operand[0].mode;
19415 mode1 = insn_data[icode].operand[1].mode;
19417 if (mode0 == V2SImode && mode1 == QImode)
19419 if (! (type = v2si_ftype_qi))
19420 type = v2si_ftype_qi
19421 = build_function_type_list (opaque_V2SI_type_node,
19422 char_type_node,
19423 NULL_TREE);
19426 else
19427 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
19428 d->code, d->name);
19431 def_builtin (d->name, type, d->code);
19434 /* Add the simple no-argument operators. */
19435 d = bdesc_0arg;
19436 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
19438 machine_mode mode0;
19439 tree type;
19440 HOST_WIDE_INT mask = d->mask;
19442 if ((mask & builtin_mask) != mask)
19444 if (TARGET_DEBUG_BUILTIN)
19445 fprintf (stderr, "rs6000_builtin, skip no-argument %s\n", d->name);
19446 continue;
19448 if (rs6000_overloaded_builtin_p (d->code))
19450 if (!opaque_ftype_opaque)
19451 opaque_ftype_opaque
19452 = build_function_type_list (opaque_V4SI_type_node, NULL_TREE);
19453 type = opaque_ftype_opaque;
19455 else
19457 enum insn_code icode = d->icode;
19458 if (d->name == 0)
19460 if (TARGET_DEBUG_BUILTIN)
19461 fprintf (stderr, "rs6000_builtin, bdesc_0arg[%lu] no name\n",
19462 (long unsigned) i);
19463 continue;
19465 if (icode == CODE_FOR_nothing)
19467 if (TARGET_DEBUG_BUILTIN)
19468 fprintf (stderr,
19469 "rs6000_builtin, skip no-argument %s (no code)\n",
19470 d->name);
19471 continue;
19473 mode0 = insn_data[icode].operand[0].mode;
19474 if (mode0 == V2SImode)
19476 /* code for SPE */
19477 if (! (type = v2si_ftype))
19479 v2si_ftype
19480 = build_function_type_list (opaque_V2SI_type_node,
19481 NULL_TREE);
19482 type = v2si_ftype;
19485 else
19486 type = builtin_function_type (mode0, VOIDmode, VOIDmode, VOIDmode,
19487 d->code, d->name);
19489 def_builtin (d->name, type, d->code);
19493 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
19494 static void
19495 init_float128_ibm (machine_mode mode)
19497 if (!TARGET_XL_COMPAT)
19499 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
19500 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
19501 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
19502 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
19504 if (!(TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)))
19506 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
19507 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
19508 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
19509 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
19510 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
19511 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
19512 set_optab_libfunc (le_optab, mode, "__gcc_qle");
19514 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
19515 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
19516 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
19517 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
19518 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
19519 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
19520 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
19521 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
19524 if (!(TARGET_HARD_FLOAT && TARGET_FPRS))
19525 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
19527 else
19529 set_optab_libfunc (add_optab, mode, "_xlqadd");
19530 set_optab_libfunc (sub_optab, mode, "_xlqsub");
19531 set_optab_libfunc (smul_optab, mode, "_xlqmul");
19532 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
19535 /* Add various conversions for IFmode to use the traditional TFmode
19536 names. */
19537 if (mode == IFmode)
19539 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf2");
19540 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf2");
19541 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctftd2");
19542 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd2");
19543 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd2");
19544 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdtf2");
19546 if (TARGET_POWERPC64)
19548 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
19549 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
19550 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
19551 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
19556 /* Set up IEEE 128-bit floating point routines. Use different names if the
19557 arguments can be passed in a vector register. The historical PowerPC
19558 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
19559 continue to use that if we aren't using vector registers to pass IEEE
19560 128-bit floating point. */
19562 static void
19563 init_float128_ieee (machine_mode mode)
19565 if (FLOAT128_VECTOR_P (mode))
19567 set_optab_libfunc (add_optab, mode, "__addkf3");
19568 set_optab_libfunc (sub_optab, mode, "__subkf3");
19569 set_optab_libfunc (neg_optab, mode, "__negkf2");
19570 set_optab_libfunc (smul_optab, mode, "__mulkf3");
19571 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
19572 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
19573 set_optab_libfunc (abs_optab, mode, "__abstkf2");
19575 set_optab_libfunc (eq_optab, mode, "__eqkf2");
19576 set_optab_libfunc (ne_optab, mode, "__nekf2");
19577 set_optab_libfunc (gt_optab, mode, "__gtkf2");
19578 set_optab_libfunc (ge_optab, mode, "__gekf2");
19579 set_optab_libfunc (lt_optab, mode, "__ltkf2");
19580 set_optab_libfunc (le_optab, mode, "__lekf2");
19581 set_optab_libfunc (unord_optab, mode, "__unordkf2");
19583 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
19584 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
19585 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
19586 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
19588 set_conv_libfunc (sext_optab, mode, IFmode, "__extendtfkf2");
19589 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
19590 set_conv_libfunc (sext_optab, mode, TFmode, "__extendtfkf2");
19592 set_conv_libfunc (trunc_optab, IFmode, mode, "__trunckftf2");
19593 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
19594 set_conv_libfunc (trunc_optab, TFmode, mode, "__trunckftf2");
19596 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf2");
19597 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf2");
19598 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunckftd2");
19599 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd2");
19600 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd2");
19601 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdkf2");
19603 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
19604 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
19605 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
19606 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
19608 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
19609 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
19610 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
19611 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
19613 if (TARGET_POWERPC64)
19615 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
19616 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
19617 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
19618 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
19622 else
19624 set_optab_libfunc (add_optab, mode, "_q_add");
19625 set_optab_libfunc (sub_optab, mode, "_q_sub");
19626 set_optab_libfunc (neg_optab, mode, "_q_neg");
19627 set_optab_libfunc (smul_optab, mode, "_q_mul");
19628 set_optab_libfunc (sdiv_optab, mode, "_q_div");
19629 if (TARGET_PPC_GPOPT)
19630 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
19632 set_optab_libfunc (eq_optab, mode, "_q_feq");
19633 set_optab_libfunc (ne_optab, mode, "_q_fne");
19634 set_optab_libfunc (gt_optab, mode, "_q_fgt");
19635 set_optab_libfunc (ge_optab, mode, "_q_fge");
19636 set_optab_libfunc (lt_optab, mode, "_q_flt");
19637 set_optab_libfunc (le_optab, mode, "_q_fle");
19639 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
19640 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
19641 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
19642 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
19643 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
19644 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
19645 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
19646 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
19650 static void
19651 rs6000_init_libfuncs (void)
19653 /* __float128 support. */
19654 if (TARGET_FLOAT128_TYPE)
19656 init_float128_ibm (IFmode);
19657 init_float128_ieee (KFmode);
19660 /* AIX/Darwin/64-bit Linux quad floating point routines. */
19661 if (TARGET_LONG_DOUBLE_128)
19663 if (!TARGET_IEEEQUAD)
19664 init_float128_ibm (TFmode);
19666 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
19667 else
19668 init_float128_ieee (TFmode);
19673 /* Expand a block clear operation, and return 1 if successful. Return 0
19674 if we should let the compiler generate normal code.
19676 operands[0] is the destination
19677 operands[1] is the length
19678 operands[3] is the alignment */
19681 expand_block_clear (rtx operands[])
19683 rtx orig_dest = operands[0];
19684 rtx bytes_rtx = operands[1];
19685 rtx align_rtx = operands[3];
19686 bool constp = (GET_CODE (bytes_rtx) == CONST_INT);
19687 HOST_WIDE_INT align;
19688 HOST_WIDE_INT bytes;
19689 int offset;
19690 int clear_bytes;
19691 int clear_step;
19693 /* If this is not a fixed size move, just call memcpy */
19694 if (! constp)
19695 return 0;
19697 /* This must be a fixed size alignment */
19698 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
19699 align = INTVAL (align_rtx) * BITS_PER_UNIT;
19701 /* Anything to clear? */
19702 bytes = INTVAL (bytes_rtx);
19703 if (bytes <= 0)
19704 return 1;
19706 /* Use the builtin memset after a point, to avoid huge code bloat.
19707 When optimize_size, avoid any significant code bloat; calling
19708 memset is about 4 instructions, so allow for one instruction to
19709 load zero and three to do clearing. */
19710 if (TARGET_ALTIVEC && align >= 128)
19711 clear_step = 16;
19712 else if (TARGET_POWERPC64 && (align >= 64 || !STRICT_ALIGNMENT))
19713 clear_step = 8;
19714 else if (TARGET_SPE && align >= 64)
19715 clear_step = 8;
19716 else
19717 clear_step = 4;
19719 if (optimize_size && bytes > 3 * clear_step)
19720 return 0;
19721 if (! optimize_size && bytes > 8 * clear_step)
19722 return 0;
19724 for (offset = 0; bytes > 0; offset += clear_bytes, bytes -= clear_bytes)
19726 machine_mode mode = BLKmode;
19727 rtx dest;
19729 if (bytes >= 16 && TARGET_ALTIVEC && align >= 128)
19731 clear_bytes = 16;
19732 mode = V4SImode;
19734 else if (bytes >= 8 && TARGET_SPE && align >= 64)
19736 clear_bytes = 8;
19737 mode = V2SImode;
19739 else if (bytes >= 8 && TARGET_POWERPC64
19740 && (align >= 64 || !STRICT_ALIGNMENT))
19742 clear_bytes = 8;
19743 mode = DImode;
19744 if (offset == 0 && align < 64)
19746 rtx addr;
19748 /* If the address form is reg+offset with offset not a
19749 multiple of four, reload into reg indirect form here
19750 rather than waiting for reload. This way we get one
19751 reload, not one per store. */
19752 addr = XEXP (orig_dest, 0);
19753 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
19754 && GET_CODE (XEXP (addr, 1)) == CONST_INT
19755 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
19757 addr = copy_addr_to_reg (addr);
19758 orig_dest = replace_equiv_address (orig_dest, addr);
19762 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
19763 { /* move 4 bytes */
19764 clear_bytes = 4;
19765 mode = SImode;
19767 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
19768 { /* move 2 bytes */
19769 clear_bytes = 2;
19770 mode = HImode;
19772 else /* move 1 byte at a time */
19774 clear_bytes = 1;
19775 mode = QImode;
19778 dest = adjust_address (orig_dest, mode, offset);
19780 emit_move_insn (dest, CONST0_RTX (mode));
19783 return 1;
19786 /* Emit a potentially record-form instruction, setting DST from SRC.
19787 If DOT is 0, that is all; otherwise, set CCREG to the result of the
19788 signed comparison of DST with zero. If DOT is 1, the generated RTL
19789 doesn't care about the DST result; if DOT is 2, it does. If CCREG
19790 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
19791 a separate COMPARE. */
19793 static void
19794 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
19796 if (dot == 0)
19798 emit_move_insn (dst, src);
19799 return;
19802 if (cc_reg_not_cr0_operand (ccreg, CCmode))
19804 emit_move_insn (dst, src);
19805 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
19806 return;
19809 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
19810 if (dot == 1)
19812 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
19813 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
19815 else
19817 rtx set = gen_rtx_SET (dst, src);
19818 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
19822 /* Figure out the correct instructions to generate to load data for
19823 block compare. MODE is used for the read from memory, and
19824 data is zero extended if REG is wider than MODE. If LE code
19825 is being generated, bswap loads are used.
19827 REG is the destination register to move the data into.
19828 MEM is the memory block being read.
19829 MODE is the mode of memory to use for the read. */
19830 static void
19831 do_load_for_compare (rtx reg, rtx mem, machine_mode mode)
19833 switch (GET_MODE (reg))
19835 case E_DImode:
19836 switch (mode)
19838 case E_QImode:
19839 emit_insn (gen_zero_extendqidi2 (reg, mem));
19840 break;
19841 case E_HImode:
19843 rtx src = mem;
19844 if (!BYTES_BIG_ENDIAN)
19846 src = gen_reg_rtx (HImode);
19847 emit_insn (gen_bswaphi2 (src, mem));
19849 emit_insn (gen_zero_extendhidi2 (reg, src));
19850 break;
19852 case E_SImode:
19854 rtx src = mem;
19855 if (!BYTES_BIG_ENDIAN)
19857 src = gen_reg_rtx (SImode);
19858 emit_insn (gen_bswapsi2 (src, mem));
19860 emit_insn (gen_zero_extendsidi2 (reg, src));
19862 break;
19863 case E_DImode:
19864 if (!BYTES_BIG_ENDIAN)
19865 emit_insn (gen_bswapdi2 (reg, mem));
19866 else
19867 emit_insn (gen_movdi (reg, mem));
19868 break;
19869 default:
19870 gcc_unreachable ();
19872 break;
19874 case E_SImode:
19875 switch (mode)
19877 case E_QImode:
19878 emit_insn (gen_zero_extendqisi2 (reg, mem));
19879 break;
19880 case E_HImode:
19882 rtx src = mem;
19883 if (!BYTES_BIG_ENDIAN)
19885 src = gen_reg_rtx (HImode);
19886 emit_insn (gen_bswaphi2 (src, mem));
19888 emit_insn (gen_zero_extendhisi2 (reg, src));
19889 break;
19891 case E_SImode:
19892 if (!BYTES_BIG_ENDIAN)
19893 emit_insn (gen_bswapsi2 (reg, mem));
19894 else
19895 emit_insn (gen_movsi (reg, mem));
19896 break;
19897 case E_DImode:
19898 /* DImode is larger than the destination reg so is not expected. */
19899 gcc_unreachable ();
19900 break;
19901 default:
19902 gcc_unreachable ();
19904 break;
19905 default:
19906 gcc_unreachable ();
19907 break;
19911 /* Select the mode to be used for reading the next chunk of bytes
19912 in the compare.
19914 OFFSET is the current read offset from the beginning of the block.
19915 BYTES is the number of bytes remaining to be read.
19916 ALIGN is the minimum alignment of the memory blocks being compared in bytes.
19917 WORD_MODE_OK indicates using WORD_MODE is allowed, else SImode is
19918 the largest allowable mode. */
19919 static machine_mode
19920 select_block_compare_mode (unsigned HOST_WIDE_INT offset,
19921 unsigned HOST_WIDE_INT bytes,
19922 unsigned HOST_WIDE_INT align, bool word_mode_ok)
19924 /* First see if we can do a whole load unit
19925 as that will be more efficient than a larger load + shift. */
19927 /* If big, use biggest chunk.
19928 If exactly chunk size, use that size.
19929 If remainder can be done in one piece with shifting, do that.
19930 Do largest chunk possible without violating alignment rules. */
19932 /* The most we can read without potential page crossing. */
19933 unsigned HOST_WIDE_INT maxread = ROUND_UP (bytes, align);
19935 if (word_mode_ok && bytes >= UNITS_PER_WORD)
19936 return word_mode;
19937 else if (bytes == GET_MODE_SIZE (SImode))
19938 return SImode;
19939 else if (bytes == GET_MODE_SIZE (HImode))
19940 return HImode;
19941 else if (bytes == GET_MODE_SIZE (QImode))
19942 return QImode;
19943 else if (bytes < GET_MODE_SIZE (SImode)
19944 && offset >= GET_MODE_SIZE (SImode) - bytes)
19945 /* This matches the case were we have SImode and 3 bytes
19946 and offset >= 1 and permits us to move back one and overlap
19947 with the previous read, thus avoiding having to shift
19948 unwanted bytes off of the input. */
19949 return SImode;
19950 else if (word_mode_ok && bytes < UNITS_PER_WORD
19951 && offset >= UNITS_PER_WORD-bytes)
19952 /* Similarly, if we can use DImode it will get matched here and
19953 can do an overlapping read that ends at the end of the block. */
19954 return word_mode;
19955 else if (word_mode_ok && maxread >= UNITS_PER_WORD)
19956 /* It is safe to do all remaining in one load of largest size,
19957 possibly with a shift to get rid of unwanted bytes. */
19958 return word_mode;
19959 else if (maxread >= GET_MODE_SIZE (SImode))
19960 /* It is safe to do all remaining in one SImode load,
19961 possibly with a shift to get rid of unwanted bytes. */
19962 return SImode;
19963 else if (bytes > GET_MODE_SIZE (SImode))
19964 return SImode;
19965 else if (bytes > GET_MODE_SIZE (HImode))
19966 return HImode;
19968 /* final fallback is do one byte */
19969 return QImode;
19972 /* Compute the alignment of pointer+OFFSET where the original alignment
19973 of pointer was BASE_ALIGN. */
19974 static unsigned HOST_WIDE_INT
19975 compute_current_alignment (unsigned HOST_WIDE_INT base_align,
19976 unsigned HOST_WIDE_INT offset)
19978 if (offset == 0)
19979 return base_align;
19980 return min (base_align, offset & -offset);
19983 /* Expand a block compare operation, and return true if successful.
19984 Return false if we should let the compiler generate normal code,
19985 probably a memcmp call.
19987 OPERANDS[0] is the target (result).
19988 OPERANDS[1] is the first source.
19989 OPERANDS[2] is the second source.
19990 OPERANDS[3] is the length.
19991 OPERANDS[4] is the alignment. */
19992 bool
19993 expand_block_compare (rtx operands[])
19995 rtx target = operands[0];
19996 rtx orig_src1 = operands[1];
19997 rtx orig_src2 = operands[2];
19998 rtx bytes_rtx = operands[3];
19999 rtx align_rtx = operands[4];
20000 HOST_WIDE_INT cmp_bytes = 0;
20001 rtx src1 = orig_src1;
20002 rtx src2 = orig_src2;
20004 /* This case is complicated to handle because the subtract
20005 with carry instructions do not generate the 64-bit
20006 carry and so we must emit code to calculate it ourselves.
20007 We choose not to implement this yet. */
20008 if (TARGET_32BIT && TARGET_POWERPC64)
20009 return false;
20011 /* If this is not a fixed size compare, just call memcmp. */
20012 if (!CONST_INT_P (bytes_rtx))
20013 return false;
20015 /* This must be a fixed size alignment. */
20016 if (!CONST_INT_P (align_rtx))
20017 return false;
20019 unsigned int base_align = UINTVAL (align_rtx) / BITS_PER_UNIT;
20021 /* rs6000_slow_unaligned_access -- don't do unaligned stuff. */
20022 if (rs6000_slow_unaligned_access (word_mode, MEM_ALIGN (orig_src1))
20023 || rs6000_slow_unaligned_access (word_mode, MEM_ALIGN (orig_src2)))
20024 return false;
20026 gcc_assert (GET_MODE (target) == SImode);
20028 /* Anything to move? */
20029 unsigned HOST_WIDE_INT bytes = UINTVAL (bytes_rtx);
20030 if (bytes == 0)
20031 return true;
20033 /* The code generated for p7 and older is not faster than glibc
20034 memcmp if alignment is small and length is not short, so bail
20035 out to avoid those conditions. */
20036 if (!TARGET_EFFICIENT_OVERLAPPING_UNALIGNED
20037 && ((base_align == 1 && bytes > 16)
20038 || (base_align == 2 && bytes > 32)))
20039 return false;
20041 rtx tmp_reg_src1 = gen_reg_rtx (word_mode);
20042 rtx tmp_reg_src2 = gen_reg_rtx (word_mode);
20043 /* P7/P8 code uses cond for subfc. but P9 uses
20044 it for cmpld which needs CCUNSmode. */
20045 rtx cond;
20046 if (TARGET_P9_MISC)
20047 cond = gen_reg_rtx (CCUNSmode);
20048 else
20049 cond = gen_reg_rtx (CCmode);
20051 /* If we have an LE target without ldbrx and word_mode is DImode,
20052 then we must avoid using word_mode. */
20053 int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX
20054 && word_mode == DImode);
20056 /* Strategy phase. How many ops will this take and should we expand it? */
20058 unsigned HOST_WIDE_INT offset = 0;
20059 machine_mode load_mode =
20060 select_block_compare_mode (offset, bytes, base_align, word_mode_ok);
20061 unsigned int load_mode_size = GET_MODE_SIZE (load_mode);
20063 /* We don't want to generate too much code. */
20064 unsigned HOST_WIDE_INT max_bytes =
20065 load_mode_size * (unsigned HOST_WIDE_INT) rs6000_block_compare_inline_limit;
20066 if (!IN_RANGE (bytes, 1, max_bytes))
20067 return false;
20069 bool generate_6432_conversion = false;
20070 rtx convert_label = NULL;
20071 rtx final_label = NULL;
20073 /* Example of generated code for 18 bytes aligned 1 byte.
20074 Compiled with -fno-reorder-blocks for clarity.
20075 ldbrx 10,31,8
20076 ldbrx 9,7,8
20077 subfc. 9,9,10
20078 bne 0,.L6487
20079 addi 9,12,8
20080 addi 5,11,8
20081 ldbrx 10,0,9
20082 ldbrx 9,0,5
20083 subfc. 9,9,10
20084 bne 0,.L6487
20085 addi 9,12,16
20086 lhbrx 10,0,9
20087 addi 9,11,16
20088 lhbrx 9,0,9
20089 subf 9,9,10
20090 b .L6488
20091 .p2align 4,,15
20092 .L6487: #convert_label
20093 popcntd 9,9
20094 subfe 10,10,10
20095 or 9,9,10
20096 .L6488: #final_label
20097 extsw 10,9
20099 We start off with DImode for two blocks that jump to the DI->SI conversion
20100 if the difference is found there, then a final block of HImode that skips
20101 the DI->SI conversion. */
20103 while (bytes > 0)
20105 unsigned int align = compute_current_alignment (base_align, offset);
20106 if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
20107 load_mode = select_block_compare_mode (offset, bytes, align,
20108 word_mode_ok);
20109 else
20110 load_mode = select_block_compare_mode (0, bytes, align, word_mode_ok);
20111 load_mode_size = GET_MODE_SIZE (load_mode);
20112 if (bytes >= load_mode_size)
20113 cmp_bytes = load_mode_size;
20114 else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
20116 /* Move this load back so it doesn't go past the end.
20117 P8/P9 can do this efficiently. */
20118 unsigned int extra_bytes = load_mode_size - bytes;
20119 cmp_bytes = bytes;
20120 if (extra_bytes < offset)
20122 offset -= extra_bytes;
20123 cmp_bytes = load_mode_size;
20124 bytes = cmp_bytes;
20127 else
20128 /* P7 and earlier can't do the overlapping load trick fast,
20129 so this forces a non-overlapping load and a shift to get
20130 rid of the extra bytes. */
20131 cmp_bytes = bytes;
20133 src1 = adjust_address (orig_src1, load_mode, offset);
20134 src2 = adjust_address (orig_src2, load_mode, offset);
20136 if (!REG_P (XEXP (src1, 0)))
20138 rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
20139 src1 = replace_equiv_address (src1, src1_reg);
20141 set_mem_size (src1, cmp_bytes);
20143 if (!REG_P (XEXP (src2, 0)))
20145 rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
20146 src2 = replace_equiv_address (src2, src2_reg);
20148 set_mem_size (src2, cmp_bytes);
20150 do_load_for_compare (tmp_reg_src1, src1, load_mode);
20151 do_load_for_compare (tmp_reg_src2, src2, load_mode);
20153 if (cmp_bytes < load_mode_size)
20155 /* Shift unneeded bytes off. */
20156 rtx sh = GEN_INT (BITS_PER_UNIT * (load_mode_size - cmp_bytes));
20157 if (word_mode == DImode)
20159 emit_insn (gen_lshrdi3 (tmp_reg_src1, tmp_reg_src1, sh));
20160 emit_insn (gen_lshrdi3 (tmp_reg_src2, tmp_reg_src2, sh));
20162 else
20164 emit_insn (gen_lshrsi3 (tmp_reg_src1, tmp_reg_src1, sh));
20165 emit_insn (gen_lshrsi3 (tmp_reg_src2, tmp_reg_src2, sh));
20169 int remain = bytes - cmp_bytes;
20170 if (GET_MODE_SIZE (GET_MODE (target)) > GET_MODE_SIZE (load_mode))
20172 /* Target is larger than load size so we don't need to
20173 reduce result size. */
20175 /* We previously did a block that need 64->32 conversion but
20176 the current block does not, so a label is needed to jump
20177 to the end. */
20178 if (generate_6432_conversion && !final_label)
20179 final_label = gen_label_rtx ();
20181 if (remain > 0)
20183 /* This is not the last block, branch to the end if the result
20184 of this subtract is not zero. */
20185 if (!final_label)
20186 final_label = gen_label_rtx ();
20187 rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
20188 rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2);
20189 rtx cr = gen_reg_rtx (CCmode);
20190 rs6000_emit_dot_insn (tmp_reg_src2, tmp, 2, cr);
20191 emit_insn (gen_movsi (target,
20192 gen_lowpart (SImode, tmp_reg_src2)));
20193 rtx ne_rtx = gen_rtx_NE (VOIDmode, cr, const0_rtx);
20194 rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
20195 fin_ref, pc_rtx);
20196 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
20197 JUMP_LABEL (j) = final_label;
20198 LABEL_NUSES (final_label) += 1;
20200 else
20202 if (word_mode == DImode)
20204 emit_insn (gen_subdi3 (tmp_reg_src2, tmp_reg_src1,
20205 tmp_reg_src2));
20206 emit_insn (gen_movsi (target,
20207 gen_lowpart (SImode, tmp_reg_src2)));
20209 else
20210 emit_insn (gen_subsi3 (target, tmp_reg_src1, tmp_reg_src2));
20212 if (final_label)
20214 rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
20215 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref));
20216 JUMP_LABEL(j) = final_label;
20217 LABEL_NUSES (final_label) += 1;
20218 emit_barrier ();
20222 else
20224 /* Do we need a 64->32 conversion block? We need the 64->32
20225 conversion even if target size == load_mode size because
20226 the subtract generates one extra bit. */
20227 generate_6432_conversion = true;
20229 if (remain > 0)
20231 if (!convert_label)
20232 convert_label = gen_label_rtx ();
20234 /* Compare to zero and branch to convert_label if not zero. */
20235 rtx cvt_ref = gen_rtx_LABEL_REF (VOIDmode, convert_label);
20236 if (TARGET_P9_MISC)
20238 /* Generate a compare, and convert with a setb later. */
20239 rtx cmp = gen_rtx_COMPARE (CCUNSmode, tmp_reg_src1,
20240 tmp_reg_src2);
20241 emit_insn (gen_rtx_SET (cond, cmp));
20243 else
20244 /* Generate a subfc. and use the longer
20245 sequence for conversion. */
20246 if (TARGET_64BIT)
20247 emit_insn (gen_subfdi3_carry_dot2 (tmp_reg_src2, tmp_reg_src2,
20248 tmp_reg_src1, cond));
20249 else
20250 emit_insn (gen_subfsi3_carry_dot2 (tmp_reg_src2, tmp_reg_src2,
20251 tmp_reg_src1, cond));
20252 rtx ne_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20253 rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
20254 cvt_ref, pc_rtx);
20255 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
20256 JUMP_LABEL(j) = convert_label;
20257 LABEL_NUSES (convert_label) += 1;
20259 else
20261 /* Just do the subtract/compare. Since this is the last block
20262 the convert code will be generated immediately following. */
20263 if (TARGET_P9_MISC)
20265 rtx cmp = gen_rtx_COMPARE (CCUNSmode, tmp_reg_src1,
20266 tmp_reg_src2);
20267 emit_insn (gen_rtx_SET (cond, cmp));
20269 else
20270 if (TARGET_64BIT)
20271 emit_insn (gen_subfdi3_carry (tmp_reg_src2, tmp_reg_src2,
20272 tmp_reg_src1));
20273 else
20274 emit_insn (gen_subfsi3_carry (tmp_reg_src2, tmp_reg_src2,
20275 tmp_reg_src1));
20279 offset += cmp_bytes;
20280 bytes -= cmp_bytes;
20283 if (generate_6432_conversion)
20285 if (convert_label)
20286 emit_label (convert_label);
20288 /* We need to produce DI result from sub, then convert to target SI
20289 while maintaining <0 / ==0 / >0 properties. This sequence works:
20290 subfc L,A,B
20291 subfe H,H,H
20292 popcntd L,L
20293 rldimi L,H,6,0
20295 This is an alternate one Segher cooked up if somebody
20296 wants to expand this for something that doesn't have popcntd:
20297 subfc L,a,b
20298 subfe H,x,x
20299 addic t,L,-1
20300 subfe v,t,L
20301 or z,v,H
20303 And finally, p9 can just do this:
20304 cmpld A,B
20305 setb r */
20307 if (TARGET_P9_MISC)
20309 emit_insn (gen_setb_unsigned (target, cond));
20311 else
20313 if (TARGET_64BIT)
20315 rtx tmp_reg_ca = gen_reg_rtx (DImode);
20316 emit_insn (gen_subfdi3_carry_in_xx (tmp_reg_ca));
20317 emit_insn (gen_popcntddi2 (tmp_reg_src2, tmp_reg_src2));
20318 emit_insn (gen_iordi3 (tmp_reg_src2, tmp_reg_src2, tmp_reg_ca));
20319 emit_insn (gen_movsi (target, gen_lowpart (SImode, tmp_reg_src2)));
20321 else
20323 rtx tmp_reg_ca = gen_reg_rtx (SImode);
20324 emit_insn (gen_subfsi3_carry_in_xx (tmp_reg_ca));
20325 emit_insn (gen_popcntdsi2 (tmp_reg_src2, tmp_reg_src2));
20326 emit_insn (gen_iorsi3 (target, tmp_reg_src2, tmp_reg_ca));
20331 if (final_label)
20332 emit_label (final_label);
20334 gcc_assert (bytes == 0);
20335 return true;
20338 /* Generate alignment check and branch code to set up for
20339 strncmp when we don't have DI alignment.
20340 STRNCMP_LABEL is the label to branch if there is a page crossing.
20341 SRC is the string pointer to be examined.
20342 BYTES is the max number of bytes to compare. */
20343 static void
20344 expand_strncmp_align_check (rtx strncmp_label, rtx src, HOST_WIDE_INT bytes)
20346 rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, strncmp_label);
20347 rtx src_check = copy_addr_to_reg (XEXP (src, 0));
20348 if (GET_MODE (src_check) == SImode)
20349 emit_insn (gen_andsi3 (src_check, src_check, GEN_INT (0xfff)));
20350 else
20351 emit_insn (gen_anddi3 (src_check, src_check, GEN_INT (0xfff)));
20352 rtx cond = gen_reg_rtx (CCmode);
20353 emit_move_insn (cond, gen_rtx_COMPARE (CCmode, src_check,
20354 GEN_INT (4096 - bytes)));
20356 rtx cmp_rtx = gen_rtx_LT (VOIDmode, cond, const0_rtx);
20358 rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
20359 pc_rtx, lab_ref);
20360 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
20361 JUMP_LABEL (j) = strncmp_label;
20362 LABEL_NUSES (strncmp_label) += 1;
20365 /* Expand a string compare operation with length, and return
20366 true if successful. Return false if we should let the
20367 compiler generate normal code, probably a strncmp call.
20369 OPERANDS[0] is the target (result).
20370 OPERANDS[1] is the first source.
20371 OPERANDS[2] is the second source.
20372 If NO_LENGTH is zero, then:
20373 OPERANDS[3] is the length.
20374 OPERANDS[4] is the alignment in bytes.
20375 If NO_LENGTH is nonzero, then:
20376 OPERANDS[3] is the alignment in bytes. */
20377 bool
20378 expand_strn_compare (rtx operands[], int no_length)
20380 rtx target = operands[0];
20381 rtx orig_src1 = operands[1];
20382 rtx orig_src2 = operands[2];
20383 rtx bytes_rtx, align_rtx;
20384 if (no_length)
20386 bytes_rtx = NULL;
20387 align_rtx = operands[3];
20389 else
20391 bytes_rtx = operands[3];
20392 align_rtx = operands[4];
20394 unsigned HOST_WIDE_INT cmp_bytes = 0;
20395 rtx src1 = orig_src1;
20396 rtx src2 = orig_src2;
20398 /* If we have a length, it must be constant. This simplifies things
20399 a bit as we don't have to generate code to check if we've exceeded
20400 the length. Later this could be expanded to handle this case. */
20401 if (!no_length && !CONST_INT_P (bytes_rtx))
20402 return false;
20404 /* This must be a fixed size alignment. */
20405 if (!CONST_INT_P (align_rtx))
20406 return false;
20408 unsigned int base_align = UINTVAL (align_rtx);
20409 int align1 = MEM_ALIGN (orig_src1) / BITS_PER_UNIT;
20410 int align2 = MEM_ALIGN (orig_src2) / BITS_PER_UNIT;
20412 /* rs6000_slow_unaligned_access -- don't do unaligned stuff. */
20413 if (rs6000_slow_unaligned_access (word_mode, align1)
20414 || rs6000_slow_unaligned_access (word_mode, align2))
20415 return false;
20417 gcc_assert (GET_MODE (target) == SImode);
20419 /* If we have an LE target without ldbrx and word_mode is DImode,
20420 then we must avoid using word_mode. */
20421 int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX
20422 && word_mode == DImode);
20424 unsigned int word_mode_size = GET_MODE_SIZE (word_mode);
20426 unsigned HOST_WIDE_INT offset = 0;
20427 unsigned HOST_WIDE_INT bytes; /* N from the strncmp args if available. */
20428 unsigned HOST_WIDE_INT compare_length; /* How much to compare inline. */
20429 if (no_length)
20430 /* Use this as a standin to determine the mode to use. */
20431 bytes = rs6000_string_compare_inline_limit * word_mode_size;
20432 else
20433 bytes = UINTVAL (bytes_rtx);
20435 machine_mode load_mode =
20436 select_block_compare_mode (offset, bytes, base_align, word_mode_ok);
20437 unsigned int load_mode_size = GET_MODE_SIZE (load_mode);
20438 compare_length = rs6000_string_compare_inline_limit * load_mode_size;
20440 /* If we have equality at the end of the last compare and we have not
20441 found the end of the string, we need to call strcmp/strncmp to
20442 compare the remainder. */
20443 bool equality_compare_rest = false;
20445 if (no_length)
20447 bytes = compare_length;
20448 equality_compare_rest = true;
20450 else
20452 if (bytes <= compare_length)
20453 compare_length = bytes;
20454 else
20455 equality_compare_rest = true;
20458 rtx result_reg = gen_reg_rtx (word_mode);
20459 rtx final_move_label = gen_label_rtx ();
20460 rtx final_label = gen_label_rtx ();
20461 rtx begin_compare_label = NULL;
20463 if (base_align < 8)
20465 /* Generate code that checks distance to 4k boundary for this case. */
20466 begin_compare_label = gen_label_rtx ();
20467 rtx strncmp_label = gen_label_rtx ();
20468 rtx jmp;
20470 /* Strncmp for power8 in glibc does this:
20471 rldicl r8,r3,0,52
20472 cmpldi cr7,r8,4096-16
20473 bgt cr7,L(pagecross) */
20475 /* Make sure that the length we use for the alignment test and
20476 the subsequent code generation are in agreement so we do not
20477 go past the length we tested for a 4k boundary crossing. */
20478 unsigned HOST_WIDE_INT align_test = compare_length;
20479 if (align_test < 8)
20481 align_test = HOST_WIDE_INT_1U << ceil_log2 (align_test);
20482 base_align = align_test;
20484 else
20486 align_test = ROUND_UP (align_test, 8);
20487 base_align = 8;
20490 if (align1 < 8)
20491 expand_strncmp_align_check (strncmp_label, src1, align_test);
20492 if (align2 < 8)
20493 expand_strncmp_align_check (strncmp_label, src2, align_test);
20495 /* Now generate the following sequence:
20496 - branch to begin_compare
20497 - strncmp_label
20498 - call to strncmp
20499 - branch to final_label
20500 - begin_compare_label */
20502 rtx cmp_ref = gen_rtx_LABEL_REF (VOIDmode, begin_compare_label);
20503 jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, cmp_ref));
20504 JUMP_LABEL (jmp) = begin_compare_label;
20505 LABEL_NUSES (begin_compare_label) += 1;
20506 emit_barrier ();
20508 emit_label (strncmp_label);
20510 if (!REG_P (XEXP (src1, 0)))
20512 rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
20513 src1 = replace_equiv_address (src1, src1_reg);
20516 if (!REG_P (XEXP (src2, 0)))
20518 rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
20519 src2 = replace_equiv_address (src2, src2_reg);
20522 if (no_length)
20524 tree fun = builtin_decl_explicit (BUILT_IN_STRCMP);
20525 emit_library_call_value (XEXP (DECL_RTL (fun), 0),
20526 target, LCT_NORMAL, GET_MODE (target),
20527 force_reg (Pmode, XEXP (src1, 0)), Pmode,
20528 force_reg (Pmode, XEXP (src2, 0)), Pmode);
20530 else
20532 /* -m32 -mpowerpc64 results in word_mode being DImode even
20533 though otherwise it is 32-bit. The length arg to strncmp
20534 is a size_t which will be the same size as pointers. */
20535 rtx len_rtx;
20536 if (TARGET_64BIT)
20537 len_rtx = gen_reg_rtx (DImode);
20538 else
20539 len_rtx = gen_reg_rtx (SImode);
20541 emit_move_insn (len_rtx, bytes_rtx);
20543 tree fun = builtin_decl_explicit (BUILT_IN_STRNCMP);
20544 emit_library_call_value (XEXP (DECL_RTL (fun), 0),
20545 target, LCT_NORMAL, GET_MODE (target),
20546 force_reg (Pmode, XEXP (src1, 0)), Pmode,
20547 force_reg (Pmode, XEXP (src2, 0)), Pmode,
20548 len_rtx, GET_MODE (len_rtx));
20551 rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
20552 jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref));
20553 JUMP_LABEL (jmp) = final_label;
20554 LABEL_NUSES (final_label) += 1;
20555 emit_barrier ();
20556 emit_label (begin_compare_label);
20559 rtx cleanup_label = NULL;
20560 rtx tmp_reg_src1 = gen_reg_rtx (word_mode);
20561 rtx tmp_reg_src2 = gen_reg_rtx (word_mode);
20563 /* Generate sequence of ld/ldbrx, cmpb to compare out
20564 to the length specified. */
20565 unsigned HOST_WIDE_INT bytes_to_compare = compare_length;
20566 while (bytes_to_compare > 0)
20568 /* Compare sequence:
20569 check each 8B with: ld/ld cmpd bne
20570 If equal, use rldicr/cmpb to check for zero byte.
20571 cleanup code at end:
20572 cmpb get byte that differs
20573 cmpb look for zero byte
20574 orc combine
20575 cntlzd get bit of first zero/diff byte
20576 subfic convert for rldcl use
20577 rldcl rldcl extract diff/zero byte
20578 subf subtract for final result
20580 The last compare can branch around the cleanup code if the
20581 result is zero because the strings are exactly equal. */
20582 unsigned int align = compute_current_alignment (base_align, offset);
20583 if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
20584 load_mode = select_block_compare_mode (offset, bytes_to_compare, align,
20585 word_mode_ok);
20586 else
20587 load_mode = select_block_compare_mode (0, bytes_to_compare, align,
20588 word_mode_ok);
20589 load_mode_size = GET_MODE_SIZE (load_mode);
20590 if (bytes_to_compare >= load_mode_size)
20591 cmp_bytes = load_mode_size;
20592 else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
20594 /* Move this load back so it doesn't go past the end.
20595 P8/P9 can do this efficiently. */
20596 unsigned int extra_bytes = load_mode_size - bytes_to_compare;
20597 cmp_bytes = bytes_to_compare;
20598 if (extra_bytes < offset)
20600 offset -= extra_bytes;
20601 cmp_bytes = load_mode_size;
20602 bytes_to_compare = cmp_bytes;
20605 else
20606 /* P7 and earlier can't do the overlapping load trick fast,
20607 so this forces a non-overlapping load and a shift to get
20608 rid of the extra bytes. */
20609 cmp_bytes = bytes_to_compare;
20611 src1 = adjust_address (orig_src1, load_mode, offset);
20612 src2 = adjust_address (orig_src2, load_mode, offset);
20614 if (!REG_P (XEXP (src1, 0)))
20616 rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
20617 src1 = replace_equiv_address (src1, src1_reg);
20619 set_mem_size (src1, cmp_bytes);
20621 if (!REG_P (XEXP (src2, 0)))
20623 rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
20624 src2 = replace_equiv_address (src2, src2_reg);
20626 set_mem_size (src2, cmp_bytes);
20628 do_load_for_compare (tmp_reg_src1, src1, load_mode);
20629 do_load_for_compare (tmp_reg_src2, src2, load_mode);
20631 /* We must always left-align the data we read, and
20632 clear any bytes to the right that are beyond the string.
20633 Otherwise the cmpb sequence won't produce the correct
20634 results. The beginning of the compare will be done
20635 with word_mode so will not have any extra shifts or
20636 clear rights. */
20638 if (load_mode_size < word_mode_size)
20640 /* Rotate left first. */
20641 rtx sh = GEN_INT (BITS_PER_UNIT * (word_mode_size - load_mode_size));
20642 if (word_mode == DImode)
20644 emit_insn (gen_rotldi3 (tmp_reg_src1, tmp_reg_src1, sh));
20645 emit_insn (gen_rotldi3 (tmp_reg_src2, tmp_reg_src2, sh));
20647 else
20649 emit_insn (gen_rotlsi3 (tmp_reg_src1, tmp_reg_src1, sh));
20650 emit_insn (gen_rotlsi3 (tmp_reg_src2, tmp_reg_src2, sh));
20654 if (cmp_bytes < word_mode_size)
20656 /* Now clear right. This plus the rotate can be
20657 turned into a rldicr instruction. */
20658 HOST_WIDE_INT mb = BITS_PER_UNIT * (word_mode_size - cmp_bytes);
20659 rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
20660 if (word_mode == DImode)
20662 emit_insn (gen_anddi3_mask (tmp_reg_src1, tmp_reg_src1, mask));
20663 emit_insn (gen_anddi3_mask (tmp_reg_src2, tmp_reg_src2, mask));
20665 else
20667 emit_insn (gen_andsi3_mask (tmp_reg_src1, tmp_reg_src1, mask));
20668 emit_insn (gen_andsi3_mask (tmp_reg_src2, tmp_reg_src2, mask));
20672 /* Cases to handle. A and B are chunks of the two strings.
20673 1: Not end of comparison:
20674 A != B: branch to cleanup code to compute result.
20675 A == B: check for 0 byte, next block if not found.
20676 2: End of the inline comparison:
20677 A != B: branch to cleanup code to compute result.
20678 A == B: check for 0 byte, call strcmp/strncmp
20679 3: compared requested N bytes:
20680 A == B: branch to result 0.
20681 A != B: cleanup code to compute result. */
20683 unsigned HOST_WIDE_INT remain = bytes_to_compare - cmp_bytes;
20685 rtx dst_label;
20686 if (remain > 0 || equality_compare_rest)
20688 /* Branch to cleanup code, otherwise fall through to do
20689 more compares. */
20690 if (!cleanup_label)
20691 cleanup_label = gen_label_rtx ();
20692 dst_label = cleanup_label;
20694 else
20695 /* Branch to end and produce result of 0. */
20696 dst_label = final_move_label;
20698 rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, dst_label);
20699 rtx cond = gen_reg_rtx (CCmode);
20701 /* Always produce the 0 result, it is needed if
20702 cmpb finds a 0 byte in this chunk. */
20703 rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2);
20704 rs6000_emit_dot_insn (result_reg, tmp, 1, cond);
20706 rtx cmp_rtx;
20707 if (remain == 0 && !equality_compare_rest)
20708 cmp_rtx = gen_rtx_EQ (VOIDmode, cond, const0_rtx);
20709 else
20710 cmp_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20712 rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
20713 lab_ref, pc_rtx);
20714 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
20715 JUMP_LABEL (j) = dst_label;
20716 LABEL_NUSES (dst_label) += 1;
20718 if (remain > 0 || equality_compare_rest)
20720 /* Generate a cmpb to test for a 0 byte and branch
20721 to final result if found. */
20722 rtx cmpb_zero = gen_reg_rtx (word_mode);
20723 rtx lab_ref_fin = gen_rtx_LABEL_REF (VOIDmode, final_move_label);
20724 rtx condz = gen_reg_rtx (CCmode);
20725 rtx zero_reg = gen_reg_rtx (word_mode);
20726 if (word_mode == SImode)
20728 emit_insn (gen_movsi (zero_reg, GEN_INT (0)));
20729 emit_insn (gen_cmpbsi3 (cmpb_zero, tmp_reg_src1, zero_reg));
20730 if (cmp_bytes < word_mode_size)
20732 /* Don't want to look at zero bytes past end. */
20733 HOST_WIDE_INT mb =
20734 BITS_PER_UNIT * (word_mode_size - cmp_bytes);
20735 rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
20736 emit_insn (gen_andsi3_mask (cmpb_zero, cmpb_zero, mask));
20739 else
20741 emit_insn (gen_movdi (zero_reg, GEN_INT (0)));
20742 emit_insn (gen_cmpbdi3 (cmpb_zero, tmp_reg_src1, zero_reg));
20743 if (cmp_bytes < word_mode_size)
20745 /* Don't want to look at zero bytes past end. */
20746 HOST_WIDE_INT mb =
20747 BITS_PER_UNIT * (word_mode_size - cmp_bytes);
20748 rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
20749 emit_insn (gen_anddi3_mask (cmpb_zero, cmpb_zero, mask));
20753 emit_move_insn (condz, gen_rtx_COMPARE (CCmode, cmpb_zero, zero_reg));
20754 rtx cmpnz_rtx = gen_rtx_NE (VOIDmode, condz, const0_rtx);
20755 rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmpnz_rtx,
20756 lab_ref_fin, pc_rtx);
20757 rtx j2 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
20758 JUMP_LABEL (j2) = final_move_label;
20759 LABEL_NUSES (final_move_label) += 1;
20763 offset += cmp_bytes;
20764 bytes_to_compare -= cmp_bytes;
20767 if (equality_compare_rest)
20769 /* Update pointers past what has been compared already. */
20770 src1 = adjust_address (orig_src1, load_mode, offset);
20771 src2 = adjust_address (orig_src2, load_mode, offset);
20773 if (!REG_P (XEXP (src1, 0)))
20775 rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
20776 src1 = replace_equiv_address (src1, src1_reg);
20778 set_mem_size (src1, cmp_bytes);
20780 if (!REG_P (XEXP (src2, 0)))
20782 rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
20783 src2 = replace_equiv_address (src2, src2_reg);
20785 set_mem_size (src2, cmp_bytes);
20787 /* Construct call to strcmp/strncmp to compare the rest of the string. */
20788 if (no_length)
20790 tree fun = builtin_decl_explicit (BUILT_IN_STRCMP);
20791 emit_library_call_value (XEXP (DECL_RTL (fun), 0),
20792 target, LCT_NORMAL, GET_MODE (target),
20793 force_reg (Pmode, XEXP (src1, 0)), Pmode,
20794 force_reg (Pmode, XEXP (src2, 0)), Pmode);
20796 else
20798 rtx len_rtx;
20799 if (TARGET_64BIT)
20800 len_rtx = gen_reg_rtx (DImode);
20801 else
20802 len_rtx = gen_reg_rtx (SImode);
20804 emit_move_insn (len_rtx, GEN_INT (bytes - compare_length));
20805 tree fun = builtin_decl_explicit (BUILT_IN_STRNCMP);
20806 emit_library_call_value (XEXP (DECL_RTL (fun), 0),
20807 target, LCT_NORMAL, GET_MODE (target),
20808 force_reg (Pmode, XEXP (src1, 0)), Pmode,
20809 force_reg (Pmode, XEXP (src2, 0)), Pmode,
20810 len_rtx, GET_MODE (len_rtx));
20813 rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
20814 rtx jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref));
20815 JUMP_LABEL (jmp) = final_label;
20816 LABEL_NUSES (final_label) += 1;
20817 emit_barrier ();
20820 if (cleanup_label)
20821 emit_label (cleanup_label);
20823 /* Generate the final sequence that identifies the differing
20824 byte and generates the final result, taking into account
20825 zero bytes:
20827 cmpb cmpb_result1, src1, src2
20828 cmpb cmpb_result2, src1, zero
20829 orc cmpb_result1, cmp_result1, cmpb_result2
20830 cntlzd get bit of first zero/diff byte
20831 addi convert for rldcl use
20832 rldcl rldcl extract diff/zero byte
20833 subf subtract for final result
20836 rtx cmpb_diff = gen_reg_rtx (word_mode);
20837 rtx cmpb_zero = gen_reg_rtx (word_mode);
20838 rtx rot_amt = gen_reg_rtx (word_mode);
20839 rtx zero_reg = gen_reg_rtx (word_mode);
20841 rtx rot1_1 = gen_reg_rtx (word_mode);
20842 rtx rot1_2 = gen_reg_rtx (word_mode);
20843 rtx rot2_1 = gen_reg_rtx (word_mode);
20844 rtx rot2_2 = gen_reg_rtx (word_mode);
20846 if (word_mode == SImode)
20848 emit_insn (gen_cmpbsi3 (cmpb_diff, tmp_reg_src1, tmp_reg_src2));
20849 emit_insn (gen_movsi (zero_reg, GEN_INT (0)));
20850 emit_insn (gen_cmpbsi3 (cmpb_zero, tmp_reg_src1, zero_reg));
20851 emit_insn (gen_one_cmplsi2 (cmpb_diff,cmpb_diff));
20852 emit_insn (gen_iorsi3 (cmpb_diff, cmpb_diff, cmpb_zero));
20853 emit_insn (gen_clzsi2 (rot_amt, cmpb_diff));
20854 emit_insn (gen_addsi3 (rot_amt, rot_amt, GEN_INT (8)));
20855 emit_insn (gen_rotlsi3 (rot1_1, tmp_reg_src1,
20856 gen_lowpart (SImode, rot_amt)));
20857 emit_insn (gen_andsi3_mask (rot1_2, rot1_1, GEN_INT (0xff)));
20858 emit_insn (gen_rotlsi3 (rot2_1, tmp_reg_src2,
20859 gen_lowpart (SImode, rot_amt)));
20860 emit_insn (gen_andsi3_mask (rot2_2, rot2_1, GEN_INT (0xff)));
20861 emit_insn (gen_subsi3 (result_reg, rot1_2, rot2_2));
20863 else
20865 emit_insn (gen_cmpbdi3 (cmpb_diff, tmp_reg_src1, tmp_reg_src2));
20866 emit_insn (gen_movdi (zero_reg, GEN_INT (0)));
20867 emit_insn (gen_cmpbdi3 (cmpb_zero, tmp_reg_src1, zero_reg));
20868 emit_insn (gen_one_cmpldi2 (cmpb_diff,cmpb_diff));
20869 emit_insn (gen_iordi3 (cmpb_diff, cmpb_diff, cmpb_zero));
20870 emit_insn (gen_clzdi2 (rot_amt, cmpb_diff));
20871 emit_insn (gen_adddi3 (rot_amt, rot_amt, GEN_INT (8)));
20872 emit_insn (gen_rotldi3 (rot1_1, tmp_reg_src1,
20873 gen_lowpart (SImode, rot_amt)));
20874 emit_insn (gen_anddi3_mask (rot1_2, rot1_1, GEN_INT (0xff)));
20875 emit_insn (gen_rotldi3 (rot2_1, tmp_reg_src2,
20876 gen_lowpart (SImode, rot_amt)));
20877 emit_insn (gen_anddi3_mask (rot2_2, rot2_1, GEN_INT (0xff)));
20878 emit_insn (gen_subdi3 (result_reg, rot1_2, rot2_2));
20881 emit_label (final_move_label);
20882 emit_insn (gen_movsi (target,
20883 gen_lowpart (SImode, result_reg)));
20884 emit_label (final_label);
20885 return true;
20888 /* Expand a block move operation, and return 1 if successful. Return 0
20889 if we should let the compiler generate normal code.
20891 operands[0] is the destination
20892 operands[1] is the source
20893 operands[2] is the length
20894 operands[3] is the alignment */
20896 #define MAX_MOVE_REG 4
20899 expand_block_move (rtx operands[])
20901 rtx orig_dest = operands[0];
20902 rtx orig_src = operands[1];
20903 rtx bytes_rtx = operands[2];
20904 rtx align_rtx = operands[3];
20905 int constp = (GET_CODE (bytes_rtx) == CONST_INT);
20906 int align;
20907 int bytes;
20908 int offset;
20909 int move_bytes;
20910 rtx stores[MAX_MOVE_REG];
20911 int num_reg = 0;
20913 /* If this is not a fixed size move, just call memcpy */
20914 if (! constp)
20915 return 0;
20917 /* This must be a fixed size alignment */
20918 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
20919 align = INTVAL (align_rtx) * BITS_PER_UNIT;
20921 /* Anything to move? */
20922 bytes = INTVAL (bytes_rtx);
20923 if (bytes <= 0)
20924 return 1;
20926 if (bytes > rs6000_block_move_inline_limit)
20927 return 0;
20929 for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes)
20931 union {
20932 rtx (*movmemsi) (rtx, rtx, rtx, rtx);
20933 rtx (*mov) (rtx, rtx);
20934 } gen_func;
20935 machine_mode mode = BLKmode;
20936 rtx src, dest;
20938 /* Altivec first, since it will be faster than a string move
20939 when it applies, and usually not significantly larger. */
20940 if (TARGET_ALTIVEC && bytes >= 16 && align >= 128)
20942 move_bytes = 16;
20943 mode = V4SImode;
20944 gen_func.mov = gen_movv4si;
20946 else if (TARGET_SPE && bytes >= 8 && align >= 64)
20948 move_bytes = 8;
20949 mode = V2SImode;
20950 gen_func.mov = gen_movv2si;
20952 else if (TARGET_STRING
20953 && bytes > 24 /* move up to 32 bytes at a time */
20954 && ! fixed_regs[5]
20955 && ! fixed_regs[6]
20956 && ! fixed_regs[7]
20957 && ! fixed_regs[8]
20958 && ! fixed_regs[9]
20959 && ! fixed_regs[10]
20960 && ! fixed_regs[11]
20961 && ! fixed_regs[12])
20963 move_bytes = (bytes > 32) ? 32 : bytes;
20964 gen_func.movmemsi = gen_movmemsi_8reg;
20966 else if (TARGET_STRING
20967 && bytes > 16 /* move up to 24 bytes at a time */
20968 && ! fixed_regs[5]
20969 && ! fixed_regs[6]
20970 && ! fixed_regs[7]
20971 && ! fixed_regs[8]
20972 && ! fixed_regs[9]
20973 && ! fixed_regs[10])
20975 move_bytes = (bytes > 24) ? 24 : bytes;
20976 gen_func.movmemsi = gen_movmemsi_6reg;
20978 else if (TARGET_STRING
20979 && bytes > 8 /* move up to 16 bytes at a time */
20980 && ! fixed_regs[5]
20981 && ! fixed_regs[6]
20982 && ! fixed_regs[7]
20983 && ! fixed_regs[8])
20985 move_bytes = (bytes > 16) ? 16 : bytes;
20986 gen_func.movmemsi = gen_movmemsi_4reg;
20988 else if (bytes >= 8 && TARGET_POWERPC64
20989 && (align >= 64 || !STRICT_ALIGNMENT))
20991 move_bytes = 8;
20992 mode = DImode;
20993 gen_func.mov = gen_movdi;
20994 if (offset == 0 && align < 64)
20996 rtx addr;
20998 /* If the address form is reg+offset with offset not a
20999 multiple of four, reload into reg indirect form here
21000 rather than waiting for reload. This way we get one
21001 reload, not one per load and/or store. */
21002 addr = XEXP (orig_dest, 0);
21003 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
21004 && GET_CODE (XEXP (addr, 1)) == CONST_INT
21005 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
21007 addr = copy_addr_to_reg (addr);
21008 orig_dest = replace_equiv_address (orig_dest, addr);
21010 addr = XEXP (orig_src, 0);
21011 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
21012 && GET_CODE (XEXP (addr, 1)) == CONST_INT
21013 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
21015 addr = copy_addr_to_reg (addr);
21016 orig_src = replace_equiv_address (orig_src, addr);
21020 else if (TARGET_STRING && bytes > 4 && !TARGET_POWERPC64)
21021 { /* move up to 8 bytes at a time */
21022 move_bytes = (bytes > 8) ? 8 : bytes;
21023 gen_func.movmemsi = gen_movmemsi_2reg;
21025 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
21026 { /* move 4 bytes */
21027 move_bytes = 4;
21028 mode = SImode;
21029 gen_func.mov = gen_movsi;
21031 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
21032 { /* move 2 bytes */
21033 move_bytes = 2;
21034 mode = HImode;
21035 gen_func.mov = gen_movhi;
21037 else if (TARGET_STRING && bytes > 1)
21038 { /* move up to 4 bytes at a time */
21039 move_bytes = (bytes > 4) ? 4 : bytes;
21040 gen_func.movmemsi = gen_movmemsi_1reg;
21042 else /* move 1 byte at a time */
21044 move_bytes = 1;
21045 mode = QImode;
21046 gen_func.mov = gen_movqi;
21049 src = adjust_address (orig_src, mode, offset);
21050 dest = adjust_address (orig_dest, mode, offset);
21052 if (mode != BLKmode)
21054 rtx tmp_reg = gen_reg_rtx (mode);
21056 emit_insn ((*gen_func.mov) (tmp_reg, src));
21057 stores[num_reg++] = (*gen_func.mov) (dest, tmp_reg);
21060 if (mode == BLKmode || num_reg >= MAX_MOVE_REG || bytes == move_bytes)
21062 int i;
21063 for (i = 0; i < num_reg; i++)
21064 emit_insn (stores[i]);
21065 num_reg = 0;
21068 if (mode == BLKmode)
21070 /* Move the address into scratch registers. The movmemsi
21071 patterns require zero offset. */
21072 if (!REG_P (XEXP (src, 0)))
21074 rtx src_reg = copy_addr_to_reg (XEXP (src, 0));
21075 src = replace_equiv_address (src, src_reg);
21077 set_mem_size (src, move_bytes);
21079 if (!REG_P (XEXP (dest, 0)))
21081 rtx dest_reg = copy_addr_to_reg (XEXP (dest, 0));
21082 dest = replace_equiv_address (dest, dest_reg);
21084 set_mem_size (dest, move_bytes);
21086 emit_insn ((*gen_func.movmemsi) (dest, src,
21087 GEN_INT (move_bytes & 31),
21088 align_rtx));
21092 return 1;
21096 /* Return a string to perform a load_multiple operation.
21097 operands[0] is the vector.
21098 operands[1] is the source address.
21099 operands[2] is the first destination register. */
21101 const char *
21102 rs6000_output_load_multiple (rtx operands[3])
21104 /* We have to handle the case where the pseudo used to contain the address
21105 is assigned to one of the output registers. */
21106 int i, j;
21107 int words = XVECLEN (operands[0], 0);
21108 rtx xop[10];
21110 if (XVECLEN (operands[0], 0) == 1)
21111 return "lwz %2,0(%1)";
21113 for (i = 0; i < words; i++)
21114 if (refers_to_regno_p (REGNO (operands[2]) + i, operands[1]))
21116 if (i == words-1)
21118 xop[0] = GEN_INT (4 * (words-1));
21119 xop[1] = operands[1];
21120 xop[2] = operands[2];
21121 output_asm_insn ("lswi %2,%1,%0\n\tlwz %1,%0(%1)", xop);
21122 return "";
21124 else if (i == 0)
21126 xop[0] = GEN_INT (4 * (words-1));
21127 xop[1] = operands[1];
21128 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
21129 output_asm_insn ("addi %1,%1,4\n\tlswi %2,%1,%0\n\tlwz %1,-4(%1)", xop);
21130 return "";
21132 else
21134 for (j = 0; j < words; j++)
21135 if (j != i)
21137 xop[0] = GEN_INT (j * 4);
21138 xop[1] = operands[1];
21139 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + j);
21140 output_asm_insn ("lwz %2,%0(%1)", xop);
21142 xop[0] = GEN_INT (i * 4);
21143 xop[1] = operands[1];
21144 output_asm_insn ("lwz %1,%0(%1)", xop);
21145 return "";
21149 return "lswi %2,%1,%N0";
21153 /* A validation routine: say whether CODE, a condition code, and MODE
21154 match. The other alternatives either don't make sense or should
21155 never be generated. */
21157 void
21158 validate_condition_mode (enum rtx_code code, machine_mode mode)
21160 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
21161 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
21162 && GET_MODE_CLASS (mode) == MODE_CC);
21164 /* These don't make sense. */
21165 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
21166 || mode != CCUNSmode);
21168 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
21169 || mode == CCUNSmode);
21171 gcc_assert (mode == CCFPmode
21172 || (code != ORDERED && code != UNORDERED
21173 && code != UNEQ && code != LTGT
21174 && code != UNGT && code != UNLT
21175 && code != UNGE && code != UNLE));
21177 /* These should never be generated except for
21178 flag_finite_math_only. */
21179 gcc_assert (mode != CCFPmode
21180 || flag_finite_math_only
21181 || (code != LE && code != GE
21182 && code != UNEQ && code != LTGT
21183 && code != UNGT && code != UNLT));
21185 /* These are invalid; the information is not there. */
21186 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
21190 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
21191 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
21192 not zero, store there the bit offset (counted from the right) where
21193 the single stretch of 1 bits begins; and similarly for B, the bit
21194 offset where it ends. */
21196 bool
21197 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
21199 unsigned HOST_WIDE_INT val = INTVAL (mask);
21200 unsigned HOST_WIDE_INT bit;
21201 int nb, ne;
21202 int n = GET_MODE_PRECISION (mode);
21204 if (mode != DImode && mode != SImode)
21205 return false;
21207 if (INTVAL (mask) >= 0)
21209 bit = val & -val;
21210 ne = exact_log2 (bit);
21211 nb = exact_log2 (val + bit);
21213 else if (val + 1 == 0)
21215 nb = n;
21216 ne = 0;
21218 else if (val & 1)
21220 val = ~val;
21221 bit = val & -val;
21222 nb = exact_log2 (bit);
21223 ne = exact_log2 (val + bit);
21225 else
21227 bit = val & -val;
21228 ne = exact_log2 (bit);
21229 if (val + bit == 0)
21230 nb = n;
21231 else
21232 nb = 0;
21235 nb--;
21237 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
21238 return false;
21240 if (b)
21241 *b = nb;
21242 if (e)
21243 *e = ne;
21245 return true;
21248 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
21249 or rldicr instruction, to implement an AND with it in mode MODE. */
21251 bool
21252 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
21254 int nb, ne;
21256 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
21257 return false;
21259 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
21260 does not wrap. */
21261 if (mode == DImode)
21262 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
21264 /* For SImode, rlwinm can do everything. */
21265 if (mode == SImode)
21266 return (nb < 32 && ne < 32);
21268 return false;
21271 /* Return the instruction template for an AND with mask in mode MODE, with
21272 operands OPERANDS. If DOT is true, make it a record-form instruction. */
21274 const char *
21275 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
21277 int nb, ne;
21279 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
21280 gcc_unreachable ();
21282 if (mode == DImode && ne == 0)
21284 operands[3] = GEN_INT (63 - nb);
21285 if (dot)
21286 return "rldicl. %0,%1,0,%3";
21287 return "rldicl %0,%1,0,%3";
21290 if (mode == DImode && nb == 63)
21292 operands[3] = GEN_INT (63 - ne);
21293 if (dot)
21294 return "rldicr. %0,%1,0,%3";
21295 return "rldicr %0,%1,0,%3";
21298 if (nb < 32 && ne < 32)
21300 operands[3] = GEN_INT (31 - nb);
21301 operands[4] = GEN_INT (31 - ne);
21302 if (dot)
21303 return "rlwinm. %0,%1,0,%3,%4";
21304 return "rlwinm %0,%1,0,%3,%4";
21307 gcc_unreachable ();
21310 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
21311 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
21312 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
21314 bool
21315 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
21317 int nb, ne;
21319 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
21320 return false;
21322 int n = GET_MODE_PRECISION (mode);
21323 int sh = -1;
21325 if (CONST_INT_P (XEXP (shift, 1)))
21327 sh = INTVAL (XEXP (shift, 1));
21328 if (sh < 0 || sh >= n)
21329 return false;
21332 rtx_code code = GET_CODE (shift);
21334 /* Convert any shift by 0 to a rotate, to simplify below code. */
21335 if (sh == 0)
21336 code = ROTATE;
21338 /* Convert rotate to simple shift if we can, to make analysis simpler. */
21339 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
21340 code = ASHIFT;
21341 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
21343 code = LSHIFTRT;
21344 sh = n - sh;
21347 /* DImode rotates need rld*. */
21348 if (mode == DImode && code == ROTATE)
21349 return (nb == 63 || ne == 0 || ne == sh);
21351 /* SImode rotates need rlw*. */
21352 if (mode == SImode && code == ROTATE)
21353 return (nb < 32 && ne < 32 && sh < 32);
21355 /* Wrap-around masks are only okay for rotates. */
21356 if (ne > nb)
21357 return false;
21359 /* Variable shifts are only okay for rotates. */
21360 if (sh < 0)
21361 return false;
21363 /* Don't allow ASHIFT if the mask is wrong for that. */
21364 if (code == ASHIFT && ne < sh)
21365 return false;
21367 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
21368 if the mask is wrong for that. */
21369 if (nb < 32 && ne < 32 && sh < 32
21370 && !(code == LSHIFTRT && nb >= 32 - sh))
21371 return true;
21373 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
21374 if the mask is wrong for that. */
21375 if (code == LSHIFTRT)
21376 sh = 64 - sh;
21377 if (nb == 63 || ne == 0 || ne == sh)
21378 return !(code == LSHIFTRT && nb >= sh);
21380 return false;
21383 /* Return the instruction template for a shift with mask in mode MODE, with
21384 operands OPERANDS. If DOT is true, make it a record-form instruction. */
21386 const char *
21387 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
21389 int nb, ne;
21391 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
21392 gcc_unreachable ();
21394 if (mode == DImode && ne == 0)
21396 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
21397 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
21398 operands[3] = GEN_INT (63 - nb);
21399 if (dot)
21400 return "rld%I2cl. %0,%1,%2,%3";
21401 return "rld%I2cl %0,%1,%2,%3";
21404 if (mode == DImode && nb == 63)
21406 operands[3] = GEN_INT (63 - ne);
21407 if (dot)
21408 return "rld%I2cr. %0,%1,%2,%3";
21409 return "rld%I2cr %0,%1,%2,%3";
21412 if (mode == DImode
21413 && GET_CODE (operands[4]) != LSHIFTRT
21414 && CONST_INT_P (operands[2])
21415 && ne == INTVAL (operands[2]))
21417 operands[3] = GEN_INT (63 - nb);
21418 if (dot)
21419 return "rld%I2c. %0,%1,%2,%3";
21420 return "rld%I2c %0,%1,%2,%3";
21423 if (nb < 32 && ne < 32)
21425 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
21426 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
21427 operands[3] = GEN_INT (31 - nb);
21428 operands[4] = GEN_INT (31 - ne);
21429 /* This insn can also be a 64-bit rotate with mask that really makes
21430 it just a shift right (with mask); the %h below are to adjust for
21431 that situation (shift count is >= 32 in that case). */
21432 if (dot)
21433 return "rlw%I2nm. %0,%1,%h2,%3,%4";
21434 return "rlw%I2nm %0,%1,%h2,%3,%4";
21437 gcc_unreachable ();
21440 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
21441 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
21442 ASHIFT, or LSHIFTRT) in mode MODE. */
21444 bool
21445 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
21447 int nb, ne;
21449 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
21450 return false;
21452 int n = GET_MODE_PRECISION (mode);
21454 int sh = INTVAL (XEXP (shift, 1));
21455 if (sh < 0 || sh >= n)
21456 return false;
21458 rtx_code code = GET_CODE (shift);
21460 /* Convert any shift by 0 to a rotate, to simplify below code. */
21461 if (sh == 0)
21462 code = ROTATE;
21464 /* Convert rotate to simple shift if we can, to make analysis simpler. */
21465 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
21466 code = ASHIFT;
21467 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
21469 code = LSHIFTRT;
21470 sh = n - sh;
21473 /* DImode rotates need rldimi. */
21474 if (mode == DImode && code == ROTATE)
21475 return (ne == sh);
21477 /* SImode rotates need rlwimi. */
21478 if (mode == SImode && code == ROTATE)
21479 return (nb < 32 && ne < 32 && sh < 32);
21481 /* Wrap-around masks are only okay for rotates. */
21482 if (ne > nb)
21483 return false;
21485 /* Don't allow ASHIFT if the mask is wrong for that. */
21486 if (code == ASHIFT && ne < sh)
21487 return false;
21489 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
21490 if the mask is wrong for that. */
21491 if (nb < 32 && ne < 32 && sh < 32
21492 && !(code == LSHIFTRT && nb >= 32 - sh))
21493 return true;
21495 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
21496 if the mask is wrong for that. */
21497 if (code == LSHIFTRT)
21498 sh = 64 - sh;
21499 if (ne == sh)
21500 return !(code == LSHIFTRT && nb >= sh);
21502 return false;
21505 /* Return the instruction template for an insert with mask in mode MODE, with
21506 operands OPERANDS. If DOT is true, make it a record-form instruction. */
21508 const char *
21509 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
21511 int nb, ne;
21513 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
21514 gcc_unreachable ();
21516 /* Prefer rldimi because rlwimi is cracked. */
21517 if (TARGET_POWERPC64
21518 && (!dot || mode == DImode)
21519 && GET_CODE (operands[4]) != LSHIFTRT
21520 && ne == INTVAL (operands[2]))
21522 operands[3] = GEN_INT (63 - nb);
21523 if (dot)
21524 return "rldimi. %0,%1,%2,%3";
21525 return "rldimi %0,%1,%2,%3";
21528 if (nb < 32 && ne < 32)
21530 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
21531 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
21532 operands[3] = GEN_INT (31 - nb);
21533 operands[4] = GEN_INT (31 - ne);
21534 if (dot)
21535 return "rlwimi. %0,%1,%2,%3,%4";
21536 return "rlwimi %0,%1,%2,%3,%4";
21539 gcc_unreachable ();
21542 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
21543 using two machine instructions. */
21545 bool
21546 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
21548 /* There are two kinds of AND we can handle with two insns:
21549 1) those we can do with two rl* insn;
21550 2) ori[s];xori[s].
21552 We do not handle that last case yet. */
21554 /* If there is just one stretch of ones, we can do it. */
21555 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
21556 return true;
21558 /* Otherwise, fill in the lowest "hole"; if we can do the result with
21559 one insn, we can do the whole thing with two. */
21560 unsigned HOST_WIDE_INT val = INTVAL (c);
21561 unsigned HOST_WIDE_INT bit1 = val & -val;
21562 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
21563 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
21564 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
21565 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
21568 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
21569 If EXPAND is true, split rotate-and-mask instructions we generate to
21570 their constituent parts as well (this is used during expand); if DOT
21571 is 1, make the last insn a record-form instruction clobbering the
21572 destination GPR and setting the CC reg (from operands[3]); if 2, set
21573 that GPR as well as the CC reg. */
21575 void
21576 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
21578 gcc_assert (!(expand && dot));
21580 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
21582 /* If it is one stretch of ones, it is DImode; shift left, mask, then
21583 shift right. This generates better code than doing the masks without
21584 shifts, or shifting first right and then left. */
21585 int nb, ne;
21586 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
21588 gcc_assert (mode == DImode);
21590 int shift = 63 - nb;
21591 if (expand)
21593 rtx tmp1 = gen_reg_rtx (DImode);
21594 rtx tmp2 = gen_reg_rtx (DImode);
21595 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
21596 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
21597 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
21599 else
21601 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
21602 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
21603 emit_move_insn (operands[0], tmp);
21604 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
21605 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
21607 return;
21610 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
21611 that does the rest. */
21612 unsigned HOST_WIDE_INT bit1 = val & -val;
21613 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
21614 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
21615 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
21617 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
21618 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
21620 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
21622 /* Two "no-rotate"-and-mask instructions, for SImode. */
21623 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
21625 gcc_assert (mode == SImode);
21627 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
21628 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
21629 emit_move_insn (reg, tmp);
21630 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
21631 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
21632 return;
21635 gcc_assert (mode == DImode);
21637 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
21638 insns; we have to do the first in SImode, because it wraps. */
21639 if (mask2 <= 0xffffffff
21640 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
21642 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
21643 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
21644 GEN_INT (mask1));
21645 rtx reg_low = gen_lowpart (SImode, reg);
21646 emit_move_insn (reg_low, tmp);
21647 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
21648 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
21649 return;
21652 /* Two rld* insns: rotate, clear the hole in the middle (which now is
21653 at the top end), rotate back and clear the other hole. */
21654 int right = exact_log2 (bit3);
21655 int left = 64 - right;
21657 /* Rotate the mask too. */
21658 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
21660 if (expand)
21662 rtx tmp1 = gen_reg_rtx (DImode);
21663 rtx tmp2 = gen_reg_rtx (DImode);
21664 rtx tmp3 = gen_reg_rtx (DImode);
21665 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
21666 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
21667 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
21668 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
21670 else
21672 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
21673 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
21674 emit_move_insn (operands[0], tmp);
21675 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
21676 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
21677 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
21681 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
21682 for lfq and stfq insns iff the registers are hard registers. */
21685 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
21687 /* We might have been passed a SUBREG. */
21688 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
21689 return 0;
21691 /* We might have been passed non floating point registers. */
21692 if (!FP_REGNO_P (REGNO (reg1))
21693 || !FP_REGNO_P (REGNO (reg2)))
21694 return 0;
21696 return (REGNO (reg1) == REGNO (reg2) - 1);
21699 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
21700 addr1 and addr2 must be in consecutive memory locations
21701 (addr2 == addr1 + 8). */
21704 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
21706 rtx addr1, addr2;
21707 unsigned int reg1, reg2;
21708 int offset1, offset2;
21710 /* The mems cannot be volatile. */
21711 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
21712 return 0;
21714 addr1 = XEXP (mem1, 0);
21715 addr2 = XEXP (mem2, 0);
21717 /* Extract an offset (if used) from the first addr. */
21718 if (GET_CODE (addr1) == PLUS)
21720 /* If not a REG, return zero. */
21721 if (GET_CODE (XEXP (addr1, 0)) != REG)
21722 return 0;
21723 else
21725 reg1 = REGNO (XEXP (addr1, 0));
21726 /* The offset must be constant! */
21727 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
21728 return 0;
21729 offset1 = INTVAL (XEXP (addr1, 1));
21732 else if (GET_CODE (addr1) != REG)
21733 return 0;
21734 else
21736 reg1 = REGNO (addr1);
21737 /* This was a simple (mem (reg)) expression. Offset is 0. */
21738 offset1 = 0;
21741 /* And now for the second addr. */
21742 if (GET_CODE (addr2) == PLUS)
21744 /* If not a REG, return zero. */
21745 if (GET_CODE (XEXP (addr2, 0)) != REG)
21746 return 0;
21747 else
21749 reg2 = REGNO (XEXP (addr2, 0));
21750 /* The offset must be constant. */
21751 if (GET_CODE (XEXP (addr2, 1)) != CONST_INT)
21752 return 0;
21753 offset2 = INTVAL (XEXP (addr2, 1));
21756 else if (GET_CODE (addr2) != REG)
21757 return 0;
21758 else
21760 reg2 = REGNO (addr2);
21761 /* This was a simple (mem (reg)) expression. Offset is 0. */
21762 offset2 = 0;
21765 /* Both of these must have the same base register. */
21766 if (reg1 != reg2)
21767 return 0;
21769 /* The offset for the second addr must be 8 more than the first addr. */
21770 if (offset2 != offset1 + 8)
21771 return 0;
21773 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
21774 instructions. */
21775 return 1;
21780 rs6000_secondary_memory_needed_rtx (machine_mode mode)
21782 static bool eliminated = false;
21783 rtx ret;
21785 if (mode != SDmode || TARGET_NO_SDMODE_STACK)
21786 ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
21787 else
21789 rtx mem = cfun->machine->sdmode_stack_slot;
21790 gcc_assert (mem != NULL_RTX);
21792 if (!eliminated)
21794 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
21795 cfun->machine->sdmode_stack_slot = mem;
21796 eliminated = true;
21798 ret = mem;
21801 if (TARGET_DEBUG_ADDR)
21803 fprintf (stderr, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
21804 GET_MODE_NAME (mode));
21805 if (!ret)
21806 fprintf (stderr, "\tNULL_RTX\n");
21807 else
21808 debug_rtx (ret);
21811 return ret;
21814 /* Return the mode to be used for memory when a secondary memory
21815 location is needed. For SDmode values we need to use DDmode, in
21816 all other cases we can use the same mode. */
21817 machine_mode
21818 rs6000_secondary_memory_needed_mode (machine_mode mode)
21820 if (lra_in_progress && mode == SDmode)
21821 return DDmode;
21822 return mode;
21825 static tree
21826 rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
21828 /* Don't walk into types. */
21829 if (*tp == NULL_TREE || *tp == error_mark_node || TYPE_P (*tp))
21831 *walk_subtrees = 0;
21832 return NULL_TREE;
21835 switch (TREE_CODE (*tp))
21837 case VAR_DECL:
21838 case PARM_DECL:
21839 case FIELD_DECL:
21840 case RESULT_DECL:
21841 case SSA_NAME:
21842 case REAL_CST:
21843 case MEM_REF:
21844 case VIEW_CONVERT_EXPR:
21845 if (TYPE_MODE (TREE_TYPE (*tp)) == SDmode)
21846 return *tp;
21847 break;
21848 default:
21849 break;
21852 return NULL_TREE;
21855 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
21856 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
21857 only work on the traditional altivec registers, note if an altivec register
21858 was chosen. */
21860 static enum rs6000_reg_type
21861 register_to_reg_type (rtx reg, bool *is_altivec)
21863 HOST_WIDE_INT regno;
21864 enum reg_class rclass;
21866 if (GET_CODE (reg) == SUBREG)
21867 reg = SUBREG_REG (reg);
21869 if (!REG_P (reg))
21870 return NO_REG_TYPE;
21872 regno = REGNO (reg);
21873 if (regno >= FIRST_PSEUDO_REGISTER)
21875 if (!lra_in_progress && !reload_in_progress && !reload_completed)
21876 return PSEUDO_REG_TYPE;
21878 regno = true_regnum (reg);
21879 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
21880 return PSEUDO_REG_TYPE;
21883 gcc_assert (regno >= 0);
21885 if (is_altivec && ALTIVEC_REGNO_P (regno))
21886 *is_altivec = true;
21888 rclass = rs6000_regno_regclass[regno];
21889 return reg_class_to_reg_type[(int)rclass];
21892 /* Helper function to return the cost of adding a TOC entry address. */
21894 static inline int
21895 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
21897 int ret;
21899 if (TARGET_CMODEL != CMODEL_SMALL)
21900 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
21902 else
21903 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
21905 return ret;
21908 /* Helper function for rs6000_secondary_reload to determine whether the memory
21909 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
21910 needs reloading. Return negative if the memory is not handled by the memory
21911 helper functions and to try a different reload method, 0 if no additional
21912 instructions are need, and positive to give the extra cost for the
21913 memory. */
21915 static int
21916 rs6000_secondary_reload_memory (rtx addr,
21917 enum reg_class rclass,
21918 machine_mode mode)
21920 int extra_cost = 0;
21921 rtx reg, and_arg, plus_arg0, plus_arg1;
21922 addr_mask_type addr_mask;
21923 const char *type = NULL;
21924 const char *fail_msg = NULL;
21926 if (GPR_REG_CLASS_P (rclass))
21927 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
21929 else if (rclass == FLOAT_REGS)
21930 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
21932 else if (rclass == ALTIVEC_REGS)
21933 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
21935 /* For the combined VSX_REGS, turn off Altivec AND -16. */
21936 else if (rclass == VSX_REGS)
21937 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
21938 & ~RELOAD_REG_AND_M16);
21940 /* If the register allocator hasn't made up its mind yet on the register
21941 class to use, settle on defaults to use. */
21942 else if (rclass == NO_REGS)
21944 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
21945 & ~RELOAD_REG_AND_M16);
21947 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
21948 addr_mask &= ~(RELOAD_REG_INDEXED
21949 | RELOAD_REG_PRE_INCDEC
21950 | RELOAD_REG_PRE_MODIFY);
21953 else
21954 addr_mask = 0;
21956 /* If the register isn't valid in this register class, just return now. */
21957 if ((addr_mask & RELOAD_REG_VALID) == 0)
21959 if (TARGET_DEBUG_ADDR)
21961 fprintf (stderr,
21962 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
21963 "not valid in class\n",
21964 GET_MODE_NAME (mode), reg_class_names[rclass]);
21965 debug_rtx (addr);
21968 return -1;
21971 switch (GET_CODE (addr))
21973 /* Does the register class supports auto update forms for this mode? We
21974 don't need a scratch register, since the powerpc only supports
21975 PRE_INC, PRE_DEC, and PRE_MODIFY. */
21976 case PRE_INC:
21977 case PRE_DEC:
21978 reg = XEXP (addr, 0);
21979 if (!base_reg_operand (addr, GET_MODE (reg)))
21981 fail_msg = "no base register #1";
21982 extra_cost = -1;
21985 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
21987 extra_cost = 1;
21988 type = "update";
21990 break;
21992 case PRE_MODIFY:
21993 reg = XEXP (addr, 0);
21994 plus_arg1 = XEXP (addr, 1);
21995 if (!base_reg_operand (reg, GET_MODE (reg))
21996 || GET_CODE (plus_arg1) != PLUS
21997 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
21999 fail_msg = "bad PRE_MODIFY";
22000 extra_cost = -1;
22003 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
22005 extra_cost = 1;
22006 type = "update";
22008 break;
22010 /* Do we need to simulate AND -16 to clear the bottom address bits used
22011 in VMX load/stores? Only allow the AND for vector sizes. */
22012 case AND:
22013 and_arg = XEXP (addr, 0);
22014 if (GET_MODE_SIZE (mode) != 16
22015 || GET_CODE (XEXP (addr, 1)) != CONST_INT
22016 || INTVAL (XEXP (addr, 1)) != -16)
22018 fail_msg = "bad Altivec AND #1";
22019 extra_cost = -1;
22022 if (rclass != ALTIVEC_REGS)
22024 if (legitimate_indirect_address_p (and_arg, false))
22025 extra_cost = 1;
22027 else if (legitimate_indexed_address_p (and_arg, false))
22028 extra_cost = 2;
22030 else
22032 fail_msg = "bad Altivec AND #2";
22033 extra_cost = -1;
22036 type = "and";
22038 break;
22040 /* If this is an indirect address, make sure it is a base register. */
22041 case REG:
22042 case SUBREG:
22043 if (!legitimate_indirect_address_p (addr, false))
22045 extra_cost = 1;
22046 type = "move";
22048 break;
22050 /* If this is an indexed address, make sure the register class can handle
22051 indexed addresses for this mode. */
22052 case PLUS:
22053 plus_arg0 = XEXP (addr, 0);
22054 plus_arg1 = XEXP (addr, 1);
22056 /* (plus (plus (reg) (constant)) (constant)) is generated during
22057 push_reload processing, so handle it now. */
22058 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
22060 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
22062 extra_cost = 1;
22063 type = "offset";
22067 /* (plus (plus (reg) (constant)) (reg)) is also generated during
22068 push_reload processing, so handle it now. */
22069 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
22071 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
22073 extra_cost = 1;
22074 type = "indexed #2";
22078 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
22080 fail_msg = "no base register #2";
22081 extra_cost = -1;
22084 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
22086 if ((addr_mask & RELOAD_REG_INDEXED) == 0
22087 || !legitimate_indexed_address_p (addr, false))
22089 extra_cost = 1;
22090 type = "indexed";
22094 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
22095 && CONST_INT_P (plus_arg1))
22097 if (!quad_address_offset_p (INTVAL (plus_arg1)))
22099 extra_cost = 1;
22100 type = "vector d-form offset";
22104 /* Make sure the register class can handle offset addresses. */
22105 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
22107 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
22109 extra_cost = 1;
22110 type = "offset #2";
22114 else
22116 fail_msg = "bad PLUS";
22117 extra_cost = -1;
22120 break;
22122 case LO_SUM:
22123 /* Quad offsets are restricted and can't handle normal addresses. */
22124 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
22126 extra_cost = -1;
22127 type = "vector d-form lo_sum";
22130 else if (!legitimate_lo_sum_address_p (mode, addr, false))
22132 fail_msg = "bad LO_SUM";
22133 extra_cost = -1;
22136 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
22138 extra_cost = 1;
22139 type = "lo_sum";
22141 break;
22143 /* Static addresses need to create a TOC entry. */
22144 case CONST:
22145 case SYMBOL_REF:
22146 case LABEL_REF:
22147 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
22149 extra_cost = -1;
22150 type = "vector d-form lo_sum #2";
22153 else
22155 type = "address";
22156 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
22158 break;
22160 /* TOC references look like offsetable memory. */
22161 case UNSPEC:
22162 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
22164 fail_msg = "bad UNSPEC";
22165 extra_cost = -1;
22168 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
22170 extra_cost = -1;
22171 type = "vector d-form lo_sum #3";
22174 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
22176 extra_cost = 1;
22177 type = "toc reference";
22179 break;
22181 default:
22183 fail_msg = "bad address";
22184 extra_cost = -1;
22188 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
22190 if (extra_cost < 0)
22191 fprintf (stderr,
22192 "rs6000_secondary_reload_memory error: mode = %s, "
22193 "class = %s, addr_mask = '%s', %s\n",
22194 GET_MODE_NAME (mode),
22195 reg_class_names[rclass],
22196 rs6000_debug_addr_mask (addr_mask, false),
22197 (fail_msg != NULL) ? fail_msg : "<bad address>");
22199 else
22200 fprintf (stderr,
22201 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
22202 "addr_mask = '%s', extra cost = %d, %s\n",
22203 GET_MODE_NAME (mode),
22204 reg_class_names[rclass],
22205 rs6000_debug_addr_mask (addr_mask, false),
22206 extra_cost,
22207 (type) ? type : "<none>");
22209 debug_rtx (addr);
22212 return extra_cost;
22215 /* Helper function for rs6000_secondary_reload to return true if a move to a
22216 different register classe is really a simple move. */
22218 static bool
22219 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
22220 enum rs6000_reg_type from_type,
22221 machine_mode mode)
22223 int size = GET_MODE_SIZE (mode);
22225 /* Add support for various direct moves available. In this function, we only
22226 look at cases where we don't need any extra registers, and one or more
22227 simple move insns are issued. Originally small integers are not allowed
22228 in FPR/VSX registers. Single precision binary floating is not a simple
22229 move because we need to convert to the single precision memory layout.
22230 The 4-byte SDmode can be moved. TDmode values are disallowed since they
22231 need special direct move handling, which we do not support yet. */
22232 if (TARGET_DIRECT_MOVE
22233 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
22234 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
22236 if (TARGET_POWERPC64)
22238 /* ISA 2.07: MTVSRD or MVFVSRD. */
22239 if (size == 8)
22240 return true;
22242 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
22243 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
22244 return true;
22247 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
22248 if (TARGET_VSX_SMALL_INTEGER)
22250 if (mode == SImode)
22251 return true;
22253 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
22254 return true;
22257 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
22258 if (mode == SDmode)
22259 return true;
22262 /* Power6+: MFTGPR or MFFGPR. */
22263 else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
22264 && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
22265 || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
22266 return true;
22268 /* Move to/from SPR. */
22269 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
22270 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
22271 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
22272 return true;
22274 return false;
22277 /* Direct move helper function for rs6000_secondary_reload, handle all of the
22278 special direct moves that involve allocating an extra register, return the
22279 insn code of the helper function if there is such a function or
22280 CODE_FOR_nothing if not. */
22282 static bool
22283 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
22284 enum rs6000_reg_type from_type,
22285 machine_mode mode,
22286 secondary_reload_info *sri,
22287 bool altivec_p)
22289 bool ret = false;
22290 enum insn_code icode = CODE_FOR_nothing;
22291 int cost = 0;
22292 int size = GET_MODE_SIZE (mode);
22294 if (TARGET_POWERPC64 && size == 16)
22296 /* Handle moving 128-bit values from GPRs to VSX point registers on
22297 ISA 2.07 (power8, power9) when running in 64-bit mode using
22298 XXPERMDI to glue the two 64-bit values back together. */
22299 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
22301 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
22302 icode = reg_addr[mode].reload_vsx_gpr;
22305 /* Handle moving 128-bit values from VSX point registers to GPRs on
22306 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
22307 bottom 64-bit value. */
22308 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
22310 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
22311 icode = reg_addr[mode].reload_gpr_vsx;
22315 else if (TARGET_POWERPC64 && mode == SFmode)
22317 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
22319 cost = 3; /* xscvdpspn, mfvsrd, and. */
22320 icode = reg_addr[mode].reload_gpr_vsx;
22323 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
22325 cost = 2; /* mtvsrz, xscvspdpn. */
22326 icode = reg_addr[mode].reload_vsx_gpr;
22330 else if (!TARGET_POWERPC64 && size == 8)
22332 /* Handle moving 64-bit values from GPRs to floating point registers on
22333 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
22334 32-bit values back together. Altivec register classes must be handled
22335 specially since a different instruction is used, and the secondary
22336 reload support requires a single instruction class in the scratch
22337 register constraint. However, right now TFmode is not allowed in
22338 Altivec registers, so the pattern will never match. */
22339 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
22341 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
22342 icode = reg_addr[mode].reload_fpr_gpr;
22346 if (icode != CODE_FOR_nothing)
22348 ret = true;
22349 if (sri)
22351 sri->icode = icode;
22352 sri->extra_cost = cost;
22356 return ret;
22359 /* Return whether a move between two register classes can be done either
22360 directly (simple move) or via a pattern that uses a single extra temporary
22361 (using ISA 2.07's direct move in this case. */
22363 static bool
22364 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
22365 enum rs6000_reg_type from_type,
22366 machine_mode mode,
22367 secondary_reload_info *sri,
22368 bool altivec_p)
22370 /* Fall back to load/store reloads if either type is not a register. */
22371 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
22372 return false;
22374 /* If we haven't allocated registers yet, assume the move can be done for the
22375 standard register types. */
22376 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
22377 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
22378 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
22379 return true;
22381 /* Moves to the same set of registers is a simple move for non-specialized
22382 registers. */
22383 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
22384 return true;
22386 /* Check whether a simple move can be done directly. */
22387 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
22389 if (sri)
22391 sri->icode = CODE_FOR_nothing;
22392 sri->extra_cost = 0;
22394 return true;
22397 /* Now check if we can do it in a few steps. */
22398 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
22399 altivec_p);
22402 /* Inform reload about cases where moving X with a mode MODE to a register in
22403 RCLASS requires an extra scratch or immediate register. Return the class
22404 needed for the immediate register.
22406 For VSX and Altivec, we may need a register to convert sp+offset into
22407 reg+sp.
22409 For misaligned 64-bit gpr loads and stores we need a register to
22410 convert an offset address to indirect. */
22412 static reg_class_t
22413 rs6000_secondary_reload (bool in_p,
22414 rtx x,
22415 reg_class_t rclass_i,
22416 machine_mode mode,
22417 secondary_reload_info *sri)
22419 enum reg_class rclass = (enum reg_class) rclass_i;
22420 reg_class_t ret = ALL_REGS;
22421 enum insn_code icode;
22422 bool default_p = false;
22423 bool done_p = false;
22425 /* Allow subreg of memory before/during reload. */
22426 bool memory_p = (MEM_P (x)
22427 || (!reload_completed && GET_CODE (x) == SUBREG
22428 && MEM_P (SUBREG_REG (x))));
22430 sri->icode = CODE_FOR_nothing;
22431 sri->t_icode = CODE_FOR_nothing;
22432 sri->extra_cost = 0;
22433 icode = ((in_p)
22434 ? reg_addr[mode].reload_load
22435 : reg_addr[mode].reload_store);
22437 if (REG_P (x) || register_operand (x, mode))
22439 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
22440 bool altivec_p = (rclass == ALTIVEC_REGS);
22441 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
22443 if (!in_p)
22444 std::swap (to_type, from_type);
22446 /* Can we do a direct move of some sort? */
22447 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
22448 altivec_p))
22450 icode = (enum insn_code)sri->icode;
22451 default_p = false;
22452 done_p = true;
22453 ret = NO_REGS;
22457 /* Make sure 0.0 is not reloaded or forced into memory. */
22458 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
22460 ret = NO_REGS;
22461 default_p = false;
22462 done_p = true;
22465 /* If this is a scalar floating point value and we want to load it into the
22466 traditional Altivec registers, do it via a move via a traditional floating
22467 point register, unless we have D-form addressing. Also make sure that
22468 non-zero constants use a FPR. */
22469 if (!done_p && reg_addr[mode].scalar_in_vmx_p
22470 && !mode_supports_vmx_dform (mode)
22471 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
22472 && (memory_p || (GET_CODE (x) == CONST_DOUBLE)))
22474 ret = FLOAT_REGS;
22475 default_p = false;
22476 done_p = true;
22479 /* Handle reload of load/stores if we have reload helper functions. */
22480 if (!done_p && icode != CODE_FOR_nothing && memory_p)
22482 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
22483 mode);
22485 if (extra_cost >= 0)
22487 done_p = true;
22488 ret = NO_REGS;
22489 if (extra_cost > 0)
22491 sri->extra_cost = extra_cost;
22492 sri->icode = icode;
22497 /* Handle unaligned loads and stores of integer registers. */
22498 if (!done_p && TARGET_POWERPC64
22499 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
22500 && memory_p
22501 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
22503 rtx addr = XEXP (x, 0);
22504 rtx off = address_offset (addr);
22506 if (off != NULL_RTX)
22508 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
22509 unsigned HOST_WIDE_INT offset = INTVAL (off);
22511 /* We need a secondary reload when our legitimate_address_p
22512 says the address is good (as otherwise the entire address
22513 will be reloaded), and the offset is not a multiple of
22514 four or we have an address wrap. Address wrap will only
22515 occur for LO_SUMs since legitimate_offset_address_p
22516 rejects addresses for 16-byte mems that will wrap. */
22517 if (GET_CODE (addr) == LO_SUM
22518 ? (1 /* legitimate_address_p allows any offset for lo_sum */
22519 && ((offset & 3) != 0
22520 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
22521 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
22522 && (offset & 3) != 0))
22524 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
22525 if (in_p)
22526 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
22527 : CODE_FOR_reload_di_load);
22528 else
22529 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
22530 : CODE_FOR_reload_di_store);
22531 sri->extra_cost = 2;
22532 ret = NO_REGS;
22533 done_p = true;
22535 else
22536 default_p = true;
22538 else
22539 default_p = true;
22542 if (!done_p && !TARGET_POWERPC64
22543 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
22544 && memory_p
22545 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
22547 rtx addr = XEXP (x, 0);
22548 rtx off = address_offset (addr);
22550 if (off != NULL_RTX)
22552 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
22553 unsigned HOST_WIDE_INT offset = INTVAL (off);
22555 /* We need a secondary reload when our legitimate_address_p
22556 says the address is good (as otherwise the entire address
22557 will be reloaded), and we have a wrap.
22559 legitimate_lo_sum_address_p allows LO_SUM addresses to
22560 have any offset so test for wrap in the low 16 bits.
22562 legitimate_offset_address_p checks for the range
22563 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
22564 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
22565 [0x7ff4,0x7fff] respectively, so test for the
22566 intersection of these ranges, [0x7ffc,0x7fff] and
22567 [0x7ff4,0x7ff7] respectively.
22569 Note that the address we see here may have been
22570 manipulated by legitimize_reload_address. */
22571 if (GET_CODE (addr) == LO_SUM
22572 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
22573 : offset - (0x8000 - extra) < UNITS_PER_WORD)
22575 if (in_p)
22576 sri->icode = CODE_FOR_reload_si_load;
22577 else
22578 sri->icode = CODE_FOR_reload_si_store;
22579 sri->extra_cost = 2;
22580 ret = NO_REGS;
22581 done_p = true;
22583 else
22584 default_p = true;
22586 else
22587 default_p = true;
22590 if (!done_p)
22591 default_p = true;
22593 if (default_p)
22594 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
22596 gcc_assert (ret != ALL_REGS);
22598 if (TARGET_DEBUG_ADDR)
22600 fprintf (stderr,
22601 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
22602 "mode = %s",
22603 reg_class_names[ret],
22604 in_p ? "true" : "false",
22605 reg_class_names[rclass],
22606 GET_MODE_NAME (mode));
22608 if (reload_completed)
22609 fputs (", after reload", stderr);
22611 if (!done_p)
22612 fputs (", done_p not set", stderr);
22614 if (default_p)
22615 fputs (", default secondary reload", stderr);
22617 if (sri->icode != CODE_FOR_nothing)
22618 fprintf (stderr, ", reload func = %s, extra cost = %d",
22619 insn_data[sri->icode].name, sri->extra_cost);
22621 else if (sri->extra_cost > 0)
22622 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
22624 fputs ("\n", stderr);
22625 debug_rtx (x);
22628 return ret;
22631 /* Better tracing for rs6000_secondary_reload_inner. */
22633 static void
22634 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
22635 bool store_p)
22637 rtx set, clobber;
22639 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
22641 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
22642 store_p ? "store" : "load");
22644 if (store_p)
22645 set = gen_rtx_SET (mem, reg);
22646 else
22647 set = gen_rtx_SET (reg, mem);
22649 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
22650 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
22653 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
22654 ATTRIBUTE_NORETURN;
22656 static void
22657 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
22658 bool store_p)
22660 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
22661 gcc_unreachable ();
22664 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
22665 reload helper functions. These were identified in
22666 rs6000_secondary_reload_memory, and if reload decided to use the secondary
22667 reload, it calls the insns:
22668 reload_<RELOAD:mode>_<P:mptrsize>_store
22669 reload_<RELOAD:mode>_<P:mptrsize>_load
22671 which in turn calls this function, to do whatever is necessary to create
22672 valid addresses. */
22674 void
22675 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
22677 int regno = true_regnum (reg);
22678 machine_mode mode = GET_MODE (reg);
22679 addr_mask_type addr_mask;
22680 rtx addr;
22681 rtx new_addr;
22682 rtx op_reg, op0, op1;
22683 rtx and_op;
22684 rtx cc_clobber;
22685 rtvec rv;
22687 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER || !MEM_P (mem)
22688 || !base_reg_operand (scratch, GET_MODE (scratch)))
22689 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22691 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
22692 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
22694 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
22695 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
22697 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
22698 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
22700 else
22701 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22703 /* Make sure the mode is valid in this register class. */
22704 if ((addr_mask & RELOAD_REG_VALID) == 0)
22705 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22707 if (TARGET_DEBUG_ADDR)
22708 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
22710 new_addr = addr = XEXP (mem, 0);
22711 switch (GET_CODE (addr))
22713 /* Does the register class support auto update forms for this mode? If
22714 not, do the update now. We don't need a scratch register, since the
22715 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
22716 case PRE_INC:
22717 case PRE_DEC:
22718 op_reg = XEXP (addr, 0);
22719 if (!base_reg_operand (op_reg, Pmode))
22720 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22722 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
22724 emit_insn (gen_add2_insn (op_reg, GEN_INT (GET_MODE_SIZE (mode))));
22725 new_addr = op_reg;
22727 break;
22729 case PRE_MODIFY:
22730 op0 = XEXP (addr, 0);
22731 op1 = XEXP (addr, 1);
22732 if (!base_reg_operand (op0, Pmode)
22733 || GET_CODE (op1) != PLUS
22734 || !rtx_equal_p (op0, XEXP (op1, 0)))
22735 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22737 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
22739 emit_insn (gen_rtx_SET (op0, op1));
22740 new_addr = reg;
22742 break;
22744 /* Do we need to simulate AND -16 to clear the bottom address bits used
22745 in VMX load/stores? */
22746 case AND:
22747 op0 = XEXP (addr, 0);
22748 op1 = XEXP (addr, 1);
22749 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
22751 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
22752 op_reg = op0;
22754 else if (GET_CODE (op1) == PLUS)
22756 emit_insn (gen_rtx_SET (scratch, op1));
22757 op_reg = scratch;
22760 else
22761 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22763 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
22764 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
22765 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
22766 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
22767 new_addr = scratch;
22769 break;
22771 /* If this is an indirect address, make sure it is a base register. */
22772 case REG:
22773 case SUBREG:
22774 if (!base_reg_operand (addr, GET_MODE (addr)))
22776 emit_insn (gen_rtx_SET (scratch, addr));
22777 new_addr = scratch;
22779 break;
22781 /* If this is an indexed address, make sure the register class can handle
22782 indexed addresses for this mode. */
22783 case PLUS:
22784 op0 = XEXP (addr, 0);
22785 op1 = XEXP (addr, 1);
22786 if (!base_reg_operand (op0, Pmode))
22787 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22789 else if (int_reg_operand (op1, Pmode))
22791 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
22793 emit_insn (gen_rtx_SET (scratch, addr));
22794 new_addr = scratch;
22798 else if (mode_supports_vsx_dform_quad (mode) && CONST_INT_P (op1))
22800 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
22801 || !quad_address_p (addr, mode, false))
22803 emit_insn (gen_rtx_SET (scratch, addr));
22804 new_addr = scratch;
22808 /* Make sure the register class can handle offset addresses. */
22809 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
22811 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
22813 emit_insn (gen_rtx_SET (scratch, addr));
22814 new_addr = scratch;
22818 else
22819 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22821 break;
22823 case LO_SUM:
22824 op0 = XEXP (addr, 0);
22825 op1 = XEXP (addr, 1);
22826 if (!base_reg_operand (op0, Pmode))
22827 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22829 else if (int_reg_operand (op1, Pmode))
22831 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
22833 emit_insn (gen_rtx_SET (scratch, addr));
22834 new_addr = scratch;
22838 /* Quad offsets are restricted and can't handle normal addresses. */
22839 else if (mode_supports_vsx_dform_quad (mode))
22841 emit_insn (gen_rtx_SET (scratch, addr));
22842 new_addr = scratch;
22845 /* Make sure the register class can handle offset addresses. */
22846 else if (legitimate_lo_sum_address_p (mode, addr, false))
22848 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
22850 emit_insn (gen_rtx_SET (scratch, addr));
22851 new_addr = scratch;
22855 else
22856 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22858 break;
22860 case SYMBOL_REF:
22861 case CONST:
22862 case LABEL_REF:
22863 rs6000_emit_move (scratch, addr, Pmode);
22864 new_addr = scratch;
22865 break;
22867 default:
22868 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22871 /* Adjust the address if it changed. */
22872 if (addr != new_addr)
22874 mem = replace_equiv_address_nv (mem, new_addr);
22875 if (TARGET_DEBUG_ADDR)
22876 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
22879 /* Now create the move. */
22880 if (store_p)
22881 emit_insn (gen_rtx_SET (mem, reg));
22882 else
22883 emit_insn (gen_rtx_SET (reg, mem));
22885 return;
22888 /* Convert reloads involving 64-bit gprs and misaligned offset
22889 addressing, or multiple 32-bit gprs and offsets that are too large,
22890 to use indirect addressing. */
22892 void
22893 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
22895 int regno = true_regnum (reg);
22896 enum reg_class rclass;
22897 rtx addr;
22898 rtx scratch_or_premodify = scratch;
22900 if (TARGET_DEBUG_ADDR)
22902 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
22903 store_p ? "store" : "load");
22904 fprintf (stderr, "reg:\n");
22905 debug_rtx (reg);
22906 fprintf (stderr, "mem:\n");
22907 debug_rtx (mem);
22908 fprintf (stderr, "scratch:\n");
22909 debug_rtx (scratch);
22912 gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
22913 gcc_assert (GET_CODE (mem) == MEM);
22914 rclass = REGNO_REG_CLASS (regno);
22915 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
22916 addr = XEXP (mem, 0);
22918 if (GET_CODE (addr) == PRE_MODIFY)
22920 gcc_assert (REG_P (XEXP (addr, 0))
22921 && GET_CODE (XEXP (addr, 1)) == PLUS
22922 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
22923 scratch_or_premodify = XEXP (addr, 0);
22924 if (!HARD_REGISTER_P (scratch_or_premodify))
22925 /* If we have a pseudo here then reload will have arranged
22926 to have it replaced, but only in the original insn.
22927 Use the replacement here too. */
22928 scratch_or_premodify = find_replacement (&XEXP (addr, 0));
22930 /* RTL emitted by rs6000_secondary_reload_gpr uses RTL
22931 expressions from the original insn, without unsharing them.
22932 Any RTL that points into the original insn will of course
22933 have register replacements applied. That is why we don't
22934 need to look for replacements under the PLUS. */
22935 addr = XEXP (addr, 1);
22937 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
22939 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
22941 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
22943 /* Now create the move. */
22944 if (store_p)
22945 emit_insn (gen_rtx_SET (mem, reg));
22946 else
22947 emit_insn (gen_rtx_SET (reg, mem));
22949 return;
22952 /* Allocate a 64-bit stack slot to be used for copying SDmode values through if
22953 this function has any SDmode references. If we are on a power7 or later, we
22954 don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
22955 can load/store the value. */
22957 static void
22958 rs6000_alloc_sdmode_stack_slot (void)
22960 tree t;
22961 basic_block bb;
22962 gimple_stmt_iterator gsi;
22964 gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
22965 /* We use a different approach for dealing with the secondary
22966 memory in LRA. */
22967 if (ira_use_lra_p)
22968 return;
22970 if (TARGET_NO_SDMODE_STACK)
22971 return;
22973 FOR_EACH_BB_FN (bb, cfun)
22974 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
22976 tree ret = walk_gimple_op (gsi_stmt (gsi), rs6000_check_sdmode, NULL);
22977 if (ret)
22979 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
22980 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
22981 SDmode, 0);
22982 return;
22986 /* Check for any SDmode parameters of the function. */
22987 for (t = DECL_ARGUMENTS (cfun->decl); t; t = DECL_CHAIN (t))
22989 if (TREE_TYPE (t) == error_mark_node)
22990 continue;
22992 if (TYPE_MODE (TREE_TYPE (t)) == SDmode
22993 || TYPE_MODE (DECL_ARG_TYPE (t)) == SDmode)
22995 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
22996 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
22997 SDmode, 0);
22998 return;
23003 static void
23004 rs6000_instantiate_decls (void)
23006 if (cfun->machine->sdmode_stack_slot != NULL_RTX)
23007 instantiate_decl_rtl (cfun->machine->sdmode_stack_slot);
23010 /* Given an rtx X being reloaded into a reg required to be
23011 in class CLASS, return the class of reg to actually use.
23012 In general this is just CLASS; but on some machines
23013 in some cases it is preferable to use a more restrictive class.
23015 On the RS/6000, we have to return NO_REGS when we want to reload a
23016 floating-point CONST_DOUBLE to force it to be copied to memory.
23018 We also don't want to reload integer values into floating-point
23019 registers if we can at all help it. In fact, this can
23020 cause reload to die, if it tries to generate a reload of CTR
23021 into a FP register and discovers it doesn't have the memory location
23022 required.
23024 ??? Would it be a good idea to have reload do the converse, that is
23025 try to reload floating modes into FP registers if possible?
23028 static enum reg_class
23029 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
23031 machine_mode mode = GET_MODE (x);
23032 bool is_constant = CONSTANT_P (x);
23034 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
23035 reload class for it. */
23036 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
23037 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
23038 return NO_REGS;
23040 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
23041 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
23042 return NO_REGS;
23044 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
23045 the reloading of address expressions using PLUS into floating point
23046 registers. */
23047 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
23049 if (is_constant)
23051 /* Zero is always allowed in all VSX registers. */
23052 if (x == CONST0_RTX (mode))
23053 return rclass;
23055 /* If this is a vector constant that can be formed with a few Altivec
23056 instructions, we want altivec registers. */
23057 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
23058 return ALTIVEC_REGS;
23060 /* If this is an integer constant that can easily be loaded into
23061 vector registers, allow it. */
23062 if (CONST_INT_P (x))
23064 HOST_WIDE_INT value = INTVAL (x);
23066 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
23067 2.06 can generate it in the Altivec registers with
23068 VSPLTI<x>. */
23069 if (value == -1)
23071 if (TARGET_P8_VECTOR)
23072 return rclass;
23073 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
23074 return ALTIVEC_REGS;
23075 else
23076 return NO_REGS;
23079 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
23080 a sign extend in the Altivec registers. */
23081 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
23082 && TARGET_VSX_SMALL_INTEGER
23083 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
23084 return ALTIVEC_REGS;
23087 /* Force constant to memory. */
23088 return NO_REGS;
23091 /* D-form addressing can easily reload the value. */
23092 if (mode_supports_vmx_dform (mode)
23093 || mode_supports_vsx_dform_quad (mode))
23094 return rclass;
23096 /* If this is a scalar floating point value and we don't have D-form
23097 addressing, prefer the traditional floating point registers so that we
23098 can use D-form (register+offset) addressing. */
23099 if (rclass == VSX_REGS
23100 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
23101 return FLOAT_REGS;
23103 /* Prefer the Altivec registers if Altivec is handling the vector
23104 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
23105 loads. */
23106 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
23107 || mode == V1TImode)
23108 return ALTIVEC_REGS;
23110 return rclass;
23113 if (is_constant || GET_CODE (x) == PLUS)
23115 if (reg_class_subset_p (GENERAL_REGS, rclass))
23116 return GENERAL_REGS;
23117 if (reg_class_subset_p (BASE_REGS, rclass))
23118 return BASE_REGS;
23119 return NO_REGS;
23122 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
23123 return GENERAL_REGS;
23125 return rclass;
23128 /* Debug version of rs6000_preferred_reload_class. */
23129 static enum reg_class
23130 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
23132 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
23134 fprintf (stderr,
23135 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
23136 "mode = %s, x:\n",
23137 reg_class_names[ret], reg_class_names[rclass],
23138 GET_MODE_NAME (GET_MODE (x)));
23139 debug_rtx (x);
23141 return ret;
23144 /* If we are copying between FP or AltiVec registers and anything else, we need
23145 a memory location. The exception is when we are targeting ppc64 and the
23146 move to/from fpr to gpr instructions are available. Also, under VSX, you
23147 can copy vector registers from the FP register set to the Altivec register
23148 set and vice versa. */
23150 static bool
23151 rs6000_secondary_memory_needed (enum reg_class from_class,
23152 enum reg_class to_class,
23153 machine_mode mode)
23155 enum rs6000_reg_type from_type, to_type;
23156 bool altivec_p = ((from_class == ALTIVEC_REGS)
23157 || (to_class == ALTIVEC_REGS));
23159 /* If a simple/direct move is available, we don't need secondary memory */
23160 from_type = reg_class_to_reg_type[(int)from_class];
23161 to_type = reg_class_to_reg_type[(int)to_class];
23163 if (rs6000_secondary_reload_move (to_type, from_type, mode,
23164 (secondary_reload_info *)0, altivec_p))
23165 return false;
23167 /* If we have a floating point or vector register class, we need to use
23168 memory to transfer the data. */
23169 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
23170 return true;
23172 return false;
23175 /* Debug version of rs6000_secondary_memory_needed. */
23176 static bool
23177 rs6000_debug_secondary_memory_needed (enum reg_class from_class,
23178 enum reg_class to_class,
23179 machine_mode mode)
23181 bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
23183 fprintf (stderr,
23184 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
23185 "to_class = %s, mode = %s\n",
23186 ret ? "true" : "false",
23187 reg_class_names[from_class],
23188 reg_class_names[to_class],
23189 GET_MODE_NAME (mode));
23191 return ret;
23194 /* Return the register class of a scratch register needed to copy IN into
23195 or out of a register in RCLASS in MODE. If it can be done directly,
23196 NO_REGS is returned. */
23198 static enum reg_class
23199 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
23200 rtx in)
23202 int regno;
23204 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
23205 #if TARGET_MACHO
23206 && MACHOPIC_INDIRECT
23207 #endif
23210 /* We cannot copy a symbolic operand directly into anything
23211 other than BASE_REGS for TARGET_ELF. So indicate that a
23212 register from BASE_REGS is needed as an intermediate
23213 register.
23215 On Darwin, pic addresses require a load from memory, which
23216 needs a base register. */
23217 if (rclass != BASE_REGS
23218 && (GET_CODE (in) == SYMBOL_REF
23219 || GET_CODE (in) == HIGH
23220 || GET_CODE (in) == LABEL_REF
23221 || GET_CODE (in) == CONST))
23222 return BASE_REGS;
23225 if (GET_CODE (in) == REG)
23227 regno = REGNO (in);
23228 if (regno >= FIRST_PSEUDO_REGISTER)
23230 regno = true_regnum (in);
23231 if (regno >= FIRST_PSEUDO_REGISTER)
23232 regno = -1;
23235 else if (GET_CODE (in) == SUBREG)
23237 regno = true_regnum (in);
23238 if (regno >= FIRST_PSEUDO_REGISTER)
23239 regno = -1;
23241 else
23242 regno = -1;
23244 /* If we have VSX register moves, prefer moving scalar values between
23245 Altivec registers and GPR by going via an FPR (and then via memory)
23246 instead of reloading the secondary memory address for Altivec moves. */
23247 if (TARGET_VSX
23248 && GET_MODE_SIZE (mode) < 16
23249 && !mode_supports_vmx_dform (mode)
23250 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
23251 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
23252 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
23253 && (regno >= 0 && INT_REGNO_P (regno)))))
23254 return FLOAT_REGS;
23256 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
23257 into anything. */
23258 if (rclass == GENERAL_REGS || rclass == BASE_REGS
23259 || (regno >= 0 && INT_REGNO_P (regno)))
23260 return NO_REGS;
23262 /* Constants, memory, and VSX registers can go into VSX registers (both the
23263 traditional floating point and the altivec registers). */
23264 if (rclass == VSX_REGS
23265 && (regno == -1 || VSX_REGNO_P (regno)))
23266 return NO_REGS;
23268 /* Constants, memory, and FP registers can go into FP registers. */
23269 if ((regno == -1 || FP_REGNO_P (regno))
23270 && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
23271 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
23273 /* Memory, and AltiVec registers can go into AltiVec registers. */
23274 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
23275 && rclass == ALTIVEC_REGS)
23276 return NO_REGS;
23278 /* We can copy among the CR registers. */
23279 if ((rclass == CR_REGS || rclass == CR0_REGS)
23280 && regno >= 0 && CR_REGNO_P (regno))
23281 return NO_REGS;
23283 /* Otherwise, we need GENERAL_REGS. */
23284 return GENERAL_REGS;
23287 /* Debug version of rs6000_secondary_reload_class. */
23288 static enum reg_class
23289 rs6000_debug_secondary_reload_class (enum reg_class rclass,
23290 machine_mode mode, rtx in)
23292 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
23293 fprintf (stderr,
23294 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
23295 "mode = %s, input rtx:\n",
23296 reg_class_names[ret], reg_class_names[rclass],
23297 GET_MODE_NAME (mode));
23298 debug_rtx (in);
23300 return ret;
23303 /* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
23305 static bool
23306 rs6000_cannot_change_mode_class (machine_mode from,
23307 machine_mode to,
23308 enum reg_class rclass)
23310 unsigned from_size = GET_MODE_SIZE (from);
23311 unsigned to_size = GET_MODE_SIZE (to);
23313 if (from_size != to_size)
23315 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
23317 if (reg_classes_intersect_p (xclass, rclass))
23319 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
23320 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
23321 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
23322 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
23324 /* Don't allow 64-bit types to overlap with 128-bit types that take a
23325 single register under VSX because the scalar part of the register
23326 is in the upper 64-bits, and not the lower 64-bits. Types like
23327 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
23328 IEEE floating point can't overlap, and neither can small
23329 values. */
23331 if (to_float128_vector_p && from_float128_vector_p)
23332 return false;
23334 else if (to_float128_vector_p || from_float128_vector_p)
23335 return true;
23337 /* TDmode in floating-mode registers must always go into a register
23338 pair with the most significant word in the even-numbered register
23339 to match ISA requirements. In little-endian mode, this does not
23340 match subreg numbering, so we cannot allow subregs. */
23341 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
23342 return true;
23344 if (from_size < 8 || to_size < 8)
23345 return true;
23347 if (from_size == 8 && (8 * to_nregs) != to_size)
23348 return true;
23350 if (to_size == 8 && (8 * from_nregs) != from_size)
23351 return true;
23353 return false;
23355 else
23356 return false;
23359 if (TARGET_E500_DOUBLE
23360 && ((((to) == DFmode) + ((from) == DFmode)) == 1
23361 || (((to) == TFmode) + ((from) == TFmode)) == 1
23362 || (((to) == IFmode) + ((from) == IFmode)) == 1
23363 || (((to) == KFmode) + ((from) == KFmode)) == 1
23364 || (((to) == DDmode) + ((from) == DDmode)) == 1
23365 || (((to) == TDmode) + ((from) == TDmode)) == 1
23366 || (((to) == DImode) + ((from) == DImode)) == 1))
23367 return true;
23369 /* Since the VSX register set includes traditional floating point registers
23370 and altivec registers, just check for the size being different instead of
23371 trying to check whether the modes are vector modes. Otherwise it won't
23372 allow say DF and DI to change classes. For types like TFmode and TDmode
23373 that take 2 64-bit registers, rather than a single 128-bit register, don't
23374 allow subregs of those types to other 128 bit types. */
23375 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
23377 unsigned num_regs = (from_size + 15) / 16;
23378 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
23379 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
23380 return true;
23382 return (from_size != 8 && from_size != 16);
23385 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
23386 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
23387 return true;
23389 if (TARGET_SPE && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1
23390 && reg_classes_intersect_p (GENERAL_REGS, rclass))
23391 return true;
23393 return false;
23396 /* Debug version of rs6000_cannot_change_mode_class. */
23397 static bool
23398 rs6000_debug_cannot_change_mode_class (machine_mode from,
23399 machine_mode to,
23400 enum reg_class rclass)
23402 bool ret = rs6000_cannot_change_mode_class (from, to, rclass);
23404 fprintf (stderr,
23405 "rs6000_cannot_change_mode_class, return %s, from = %s, "
23406 "to = %s, rclass = %s\n",
23407 ret ? "true" : "false",
23408 GET_MODE_NAME (from), GET_MODE_NAME (to),
23409 reg_class_names[rclass]);
23411 return ret;
23414 /* Return a string to do a move operation of 128 bits of data. */
23416 const char *
23417 rs6000_output_move_128bit (rtx operands[])
23419 rtx dest = operands[0];
23420 rtx src = operands[1];
23421 machine_mode mode = GET_MODE (dest);
23422 int dest_regno;
23423 int src_regno;
23424 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
23425 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
23427 if (REG_P (dest))
23429 dest_regno = REGNO (dest);
23430 dest_gpr_p = INT_REGNO_P (dest_regno);
23431 dest_fp_p = FP_REGNO_P (dest_regno);
23432 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
23433 dest_vsx_p = dest_fp_p | dest_vmx_p;
23435 else
23437 dest_regno = -1;
23438 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
23441 if (REG_P (src))
23443 src_regno = REGNO (src);
23444 src_gpr_p = INT_REGNO_P (src_regno);
23445 src_fp_p = FP_REGNO_P (src_regno);
23446 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
23447 src_vsx_p = src_fp_p | src_vmx_p;
23449 else
23451 src_regno = -1;
23452 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
23455 /* Register moves. */
23456 if (dest_regno >= 0 && src_regno >= 0)
23458 if (dest_gpr_p)
23460 if (src_gpr_p)
23461 return "#";
23463 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
23464 return (WORDS_BIG_ENDIAN
23465 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
23466 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
23468 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
23469 return "#";
23472 else if (TARGET_VSX && dest_vsx_p)
23474 if (src_vsx_p)
23475 return "xxlor %x0,%x1,%x1";
23477 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
23478 return (WORDS_BIG_ENDIAN
23479 ? "mtvsrdd %x0,%1,%L1"
23480 : "mtvsrdd %x0,%L1,%1");
23482 else if (TARGET_DIRECT_MOVE && src_gpr_p)
23483 return "#";
23486 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
23487 return "vor %0,%1,%1";
23489 else if (dest_fp_p && src_fp_p)
23490 return "#";
23493 /* Loads. */
23494 else if (dest_regno >= 0 && MEM_P (src))
23496 if (dest_gpr_p)
23498 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
23499 return "lq %0,%1";
23500 else
23501 return "#";
23504 else if (TARGET_ALTIVEC && dest_vmx_p
23505 && altivec_indexed_or_indirect_operand (src, mode))
23506 return "lvx %0,%y1";
23508 else if (TARGET_VSX && dest_vsx_p)
23510 if (mode_supports_vsx_dform_quad (mode)
23511 && quad_address_p (XEXP (src, 0), mode, true))
23512 return "lxv %x0,%1";
23514 else if (TARGET_P9_VECTOR)
23515 return "lxvx %x0,%y1";
23517 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
23518 return "lxvw4x %x0,%y1";
23520 else
23521 return "lxvd2x %x0,%y1";
23524 else if (TARGET_ALTIVEC && dest_vmx_p)
23525 return "lvx %0,%y1";
23527 else if (dest_fp_p)
23528 return "#";
23531 /* Stores. */
23532 else if (src_regno >= 0 && MEM_P (dest))
23534 if (src_gpr_p)
23536 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
23537 return "stq %1,%0";
23538 else
23539 return "#";
23542 else if (TARGET_ALTIVEC && src_vmx_p
23543 && altivec_indexed_or_indirect_operand (src, mode))
23544 return "stvx %1,%y0";
23546 else if (TARGET_VSX && src_vsx_p)
23548 if (mode_supports_vsx_dform_quad (mode)
23549 && quad_address_p (XEXP (dest, 0), mode, true))
23550 return "stxv %x1,%0";
23552 else if (TARGET_P9_VECTOR)
23553 return "stxvx %x1,%y0";
23555 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
23556 return "stxvw4x %x1,%y0";
23558 else
23559 return "stxvd2x %x1,%y0";
23562 else if (TARGET_ALTIVEC && src_vmx_p)
23563 return "stvx %1,%y0";
23565 else if (src_fp_p)
23566 return "#";
23569 /* Constants. */
23570 else if (dest_regno >= 0
23571 && (GET_CODE (src) == CONST_INT
23572 || GET_CODE (src) == CONST_WIDE_INT
23573 || GET_CODE (src) == CONST_DOUBLE
23574 || GET_CODE (src) == CONST_VECTOR))
23576 if (dest_gpr_p)
23577 return "#";
23579 else if ((dest_vmx_p && TARGET_ALTIVEC)
23580 || (dest_vsx_p && TARGET_VSX))
23581 return output_vec_const_move (operands);
23584 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
23587 /* Validate a 128-bit move. */
23588 bool
23589 rs6000_move_128bit_ok_p (rtx operands[])
23591 machine_mode mode = GET_MODE (operands[0]);
23592 return (gpc_reg_operand (operands[0], mode)
23593 || gpc_reg_operand (operands[1], mode));
23596 /* Return true if a 128-bit move needs to be split. */
23597 bool
23598 rs6000_split_128bit_ok_p (rtx operands[])
23600 if (!reload_completed)
23601 return false;
23603 if (!gpr_or_gpr_p (operands[0], operands[1]))
23604 return false;
23606 if (quad_load_store_p (operands[0], operands[1]))
23607 return false;
23609 return true;
23613 /* Given a comparison operation, return the bit number in CCR to test. We
23614 know this is a valid comparison.
23616 SCC_P is 1 if this is for an scc. That means that %D will have been
23617 used instead of %C, so the bits will be in different places.
23619 Return -1 if OP isn't a valid comparison for some reason. */
23622 ccr_bit (rtx op, int scc_p)
23624 enum rtx_code code = GET_CODE (op);
23625 machine_mode cc_mode;
23626 int cc_regnum;
23627 int base_bit;
23628 rtx reg;
23630 if (!COMPARISON_P (op))
23631 return -1;
23633 reg = XEXP (op, 0);
23635 gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg)));
23637 cc_mode = GET_MODE (reg);
23638 cc_regnum = REGNO (reg);
23639 base_bit = 4 * (cc_regnum - CR0_REGNO);
23641 validate_condition_mode (code, cc_mode);
23643 /* When generating a sCOND operation, only positive conditions are
23644 allowed. */
23645 gcc_assert (!scc_p
23646 || code == EQ || code == GT || code == LT || code == UNORDERED
23647 || code == GTU || code == LTU);
23649 switch (code)
23651 case NE:
23652 return scc_p ? base_bit + 3 : base_bit + 2;
23653 case EQ:
23654 return base_bit + 2;
23655 case GT: case GTU: case UNLE:
23656 return base_bit + 1;
23657 case LT: case LTU: case UNGE:
23658 return base_bit;
23659 case ORDERED: case UNORDERED:
23660 return base_bit + 3;
23662 case GE: case GEU:
23663 /* If scc, we will have done a cror to put the bit in the
23664 unordered position. So test that bit. For integer, this is ! LT
23665 unless this is an scc insn. */
23666 return scc_p ? base_bit + 3 : base_bit;
23668 case LE: case LEU:
23669 return scc_p ? base_bit + 3 : base_bit + 1;
23671 default:
23672 gcc_unreachable ();
23676 /* Return the GOT register. */
23679 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
23681 /* The second flow pass currently (June 1999) can't update
23682 regs_ever_live without disturbing other parts of the compiler, so
23683 update it here to make the prolog/epilogue code happy. */
23684 if (!can_create_pseudo_p ()
23685 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
23686 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
23688 crtl->uses_pic_offset_table = 1;
23690 return pic_offset_table_rtx;
23693 static rs6000_stack_t stack_info;
23695 /* Function to init struct machine_function.
23696 This will be called, via a pointer variable,
23697 from push_function_context. */
23699 static struct machine_function *
23700 rs6000_init_machine_status (void)
23702 stack_info.reload_completed = 0;
23703 return ggc_cleared_alloc<machine_function> ();
23706 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
23708 /* Write out a function code label. */
23710 void
23711 rs6000_output_function_entry (FILE *file, const char *fname)
23713 if (fname[0] != '.')
23715 switch (DEFAULT_ABI)
23717 default:
23718 gcc_unreachable ();
23720 case ABI_AIX:
23721 if (DOT_SYMBOLS)
23722 putc ('.', file);
23723 else
23724 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
23725 break;
23727 case ABI_ELFv2:
23728 case ABI_V4:
23729 case ABI_DARWIN:
23730 break;
23734 RS6000_OUTPUT_BASENAME (file, fname);
23737 /* Print an operand. Recognize special options, documented below. */
23739 #if TARGET_ELF
23740 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
23741 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
23742 #else
23743 #define SMALL_DATA_RELOC "sda21"
23744 #define SMALL_DATA_REG 0
23745 #endif
23747 void
23748 print_operand (FILE *file, rtx x, int code)
23750 int i;
23751 unsigned HOST_WIDE_INT uval;
23753 switch (code)
23755 /* %a is output_address. */
23757 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
23758 output_operand. */
23760 case 'D':
23761 /* Like 'J' but get to the GT bit only. */
23762 gcc_assert (REG_P (x));
23764 /* Bit 1 is GT bit. */
23765 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
23767 /* Add one for shift count in rlinm for scc. */
23768 fprintf (file, "%d", i + 1);
23769 return;
23771 case 'e':
23772 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
23773 if (! INT_P (x))
23775 output_operand_lossage ("invalid %%e value");
23776 return;
23779 uval = INTVAL (x);
23780 if ((uval & 0xffff) == 0 && uval != 0)
23781 putc ('s', file);
23782 return;
23784 case 'E':
23785 /* X is a CR register. Print the number of the EQ bit of the CR */
23786 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
23787 output_operand_lossage ("invalid %%E value");
23788 else
23789 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
23790 return;
23792 case 'f':
23793 /* X is a CR register. Print the shift count needed to move it
23794 to the high-order four bits. */
23795 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
23796 output_operand_lossage ("invalid %%f value");
23797 else
23798 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
23799 return;
23801 case 'F':
23802 /* Similar, but print the count for the rotate in the opposite
23803 direction. */
23804 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
23805 output_operand_lossage ("invalid %%F value");
23806 else
23807 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
23808 return;
23810 case 'G':
23811 /* X is a constant integer. If it is negative, print "m",
23812 otherwise print "z". This is to make an aze or ame insn. */
23813 if (GET_CODE (x) != CONST_INT)
23814 output_operand_lossage ("invalid %%G value");
23815 else if (INTVAL (x) >= 0)
23816 putc ('z', file);
23817 else
23818 putc ('m', file);
23819 return;
23821 case 'h':
23822 /* If constant, output low-order five bits. Otherwise, write
23823 normally. */
23824 if (INT_P (x))
23825 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
23826 else
23827 print_operand (file, x, 0);
23828 return;
23830 case 'H':
23831 /* If constant, output low-order six bits. Otherwise, write
23832 normally. */
23833 if (INT_P (x))
23834 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
23835 else
23836 print_operand (file, x, 0);
23837 return;
23839 case 'I':
23840 /* Print `i' if this is a constant, else nothing. */
23841 if (INT_P (x))
23842 putc ('i', file);
23843 return;
23845 case 'j':
23846 /* Write the bit number in CCR for jump. */
23847 i = ccr_bit (x, 0);
23848 if (i == -1)
23849 output_operand_lossage ("invalid %%j code");
23850 else
23851 fprintf (file, "%d", i);
23852 return;
23854 case 'J':
23855 /* Similar, but add one for shift count in rlinm for scc and pass
23856 scc flag to `ccr_bit'. */
23857 i = ccr_bit (x, 1);
23858 if (i == -1)
23859 output_operand_lossage ("invalid %%J code");
23860 else
23861 /* If we want bit 31, write a shift count of zero, not 32. */
23862 fprintf (file, "%d", i == 31 ? 0 : i + 1);
23863 return;
23865 case 'k':
23866 /* X must be a constant. Write the 1's complement of the
23867 constant. */
23868 if (! INT_P (x))
23869 output_operand_lossage ("invalid %%k value");
23870 else
23871 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
23872 return;
23874 case 'K':
23875 /* X must be a symbolic constant on ELF. Write an
23876 expression suitable for an 'addi' that adds in the low 16
23877 bits of the MEM. */
23878 if (GET_CODE (x) == CONST)
23880 if (GET_CODE (XEXP (x, 0)) != PLUS
23881 || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
23882 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
23883 || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
23884 output_operand_lossage ("invalid %%K value");
23886 print_operand_address (file, x);
23887 fputs ("@l", file);
23888 return;
23890 /* %l is output_asm_label. */
23892 case 'L':
23893 /* Write second word of DImode or DFmode reference. Works on register
23894 or non-indexed memory only. */
23895 if (REG_P (x))
23896 fputs (reg_names[REGNO (x) + 1], file);
23897 else if (MEM_P (x))
23899 machine_mode mode = GET_MODE (x);
23900 /* Handle possible auto-increment. Since it is pre-increment and
23901 we have already done it, we can just use an offset of word. */
23902 if (GET_CODE (XEXP (x, 0)) == PRE_INC
23903 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
23904 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
23905 UNITS_PER_WORD));
23906 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
23907 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
23908 UNITS_PER_WORD));
23909 else
23910 output_address (mode, XEXP (adjust_address_nv (x, SImode,
23911 UNITS_PER_WORD),
23912 0));
23914 if (small_data_operand (x, GET_MODE (x)))
23915 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
23916 reg_names[SMALL_DATA_REG]);
23918 return;
23920 case 'N':
23921 /* Write the number of elements in the vector times 4. */
23922 if (GET_CODE (x) != PARALLEL)
23923 output_operand_lossage ("invalid %%N value");
23924 else
23925 fprintf (file, "%d", XVECLEN (x, 0) * 4);
23926 return;
23928 case 'O':
23929 /* Similar, but subtract 1 first. */
23930 if (GET_CODE (x) != PARALLEL)
23931 output_operand_lossage ("invalid %%O value");
23932 else
23933 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
23934 return;
23936 case 'p':
23937 /* X is a CONST_INT that is a power of two. Output the logarithm. */
23938 if (! INT_P (x)
23939 || INTVAL (x) < 0
23940 || (i = exact_log2 (INTVAL (x))) < 0)
23941 output_operand_lossage ("invalid %%p value");
23942 else
23943 fprintf (file, "%d", i);
23944 return;
23946 case 'P':
23947 /* The operand must be an indirect memory reference. The result
23948 is the register name. */
23949 if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG
23950 || REGNO (XEXP (x, 0)) >= 32)
23951 output_operand_lossage ("invalid %%P value");
23952 else
23953 fputs (reg_names[REGNO (XEXP (x, 0))], file);
23954 return;
23956 case 'q':
23957 /* This outputs the logical code corresponding to a boolean
23958 expression. The expression may have one or both operands
23959 negated (if one, only the first one). For condition register
23960 logical operations, it will also treat the negated
23961 CR codes as NOTs, but not handle NOTs of them. */
23963 const char *const *t = 0;
23964 const char *s;
23965 enum rtx_code code = GET_CODE (x);
23966 static const char * const tbl[3][3] = {
23967 { "and", "andc", "nor" },
23968 { "or", "orc", "nand" },
23969 { "xor", "eqv", "xor" } };
23971 if (code == AND)
23972 t = tbl[0];
23973 else if (code == IOR)
23974 t = tbl[1];
23975 else if (code == XOR)
23976 t = tbl[2];
23977 else
23978 output_operand_lossage ("invalid %%q value");
23980 if (GET_CODE (XEXP (x, 0)) != NOT)
23981 s = t[0];
23982 else
23984 if (GET_CODE (XEXP (x, 1)) == NOT)
23985 s = t[2];
23986 else
23987 s = t[1];
23990 fputs (s, file);
23992 return;
23994 case 'Q':
23995 if (! TARGET_MFCRF)
23996 return;
23997 fputc (',', file);
23998 /* FALLTHRU */
24000 case 'R':
24001 /* X is a CR register. Print the mask for `mtcrf'. */
24002 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
24003 output_operand_lossage ("invalid %%R value");
24004 else
24005 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
24006 return;
24008 case 's':
24009 /* Low 5 bits of 32 - value */
24010 if (! INT_P (x))
24011 output_operand_lossage ("invalid %%s value");
24012 else
24013 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
24014 return;
24016 case 't':
24017 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
24018 gcc_assert (REG_P (x) && GET_MODE (x) == CCmode);
24020 /* Bit 3 is OV bit. */
24021 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
24023 /* If we want bit 31, write a shift count of zero, not 32. */
24024 fprintf (file, "%d", i == 31 ? 0 : i + 1);
24025 return;
24027 case 'T':
24028 /* Print the symbolic name of a branch target register. */
24029 if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO
24030 && REGNO (x) != CTR_REGNO))
24031 output_operand_lossage ("invalid %%T value");
24032 else if (REGNO (x) == LR_REGNO)
24033 fputs ("lr", file);
24034 else
24035 fputs ("ctr", file);
24036 return;
24038 case 'u':
24039 /* High-order or low-order 16 bits of constant, whichever is non-zero,
24040 for use in unsigned operand. */
24041 if (! INT_P (x))
24043 output_operand_lossage ("invalid %%u value");
24044 return;
24047 uval = INTVAL (x);
24048 if ((uval & 0xffff) == 0)
24049 uval >>= 16;
24051 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
24052 return;
24054 case 'v':
24055 /* High-order 16 bits of constant for use in signed operand. */
24056 if (! INT_P (x))
24057 output_operand_lossage ("invalid %%v value");
24058 else
24059 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
24060 (INTVAL (x) >> 16) & 0xffff);
24061 return;
24063 case 'U':
24064 /* Print `u' if this has an auto-increment or auto-decrement. */
24065 if (MEM_P (x)
24066 && (GET_CODE (XEXP (x, 0)) == PRE_INC
24067 || GET_CODE (XEXP (x, 0)) == PRE_DEC
24068 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
24069 putc ('u', file);
24070 return;
24072 case 'V':
24073 /* Print the trap code for this operand. */
24074 switch (GET_CODE (x))
24076 case EQ:
24077 fputs ("eq", file); /* 4 */
24078 break;
24079 case NE:
24080 fputs ("ne", file); /* 24 */
24081 break;
24082 case LT:
24083 fputs ("lt", file); /* 16 */
24084 break;
24085 case LE:
24086 fputs ("le", file); /* 20 */
24087 break;
24088 case GT:
24089 fputs ("gt", file); /* 8 */
24090 break;
24091 case GE:
24092 fputs ("ge", file); /* 12 */
24093 break;
24094 case LTU:
24095 fputs ("llt", file); /* 2 */
24096 break;
24097 case LEU:
24098 fputs ("lle", file); /* 6 */
24099 break;
24100 case GTU:
24101 fputs ("lgt", file); /* 1 */
24102 break;
24103 case GEU:
24104 fputs ("lge", file); /* 5 */
24105 break;
24106 default:
24107 gcc_unreachable ();
24109 break;
24111 case 'w':
24112 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
24113 normally. */
24114 if (INT_P (x))
24115 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
24116 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
24117 else
24118 print_operand (file, x, 0);
24119 return;
24121 case 'x':
24122 /* X is a FPR or Altivec register used in a VSX context. */
24123 if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
24124 output_operand_lossage ("invalid %%x value");
24125 else
24127 int reg = REGNO (x);
24128 int vsx_reg = (FP_REGNO_P (reg)
24129 ? reg - 32
24130 : reg - FIRST_ALTIVEC_REGNO + 32);
24132 #ifdef TARGET_REGNAMES
24133 if (TARGET_REGNAMES)
24134 fprintf (file, "%%vs%d", vsx_reg);
24135 else
24136 #endif
24137 fprintf (file, "%d", vsx_reg);
24139 return;
24141 case 'X':
24142 if (MEM_P (x)
24143 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
24144 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
24145 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
24146 putc ('x', file);
24147 return;
24149 case 'Y':
24150 /* Like 'L', for third word of TImode/PTImode */
24151 if (REG_P (x))
24152 fputs (reg_names[REGNO (x) + 2], file);
24153 else if (MEM_P (x))
24155 machine_mode mode = GET_MODE (x);
24156 if (GET_CODE (XEXP (x, 0)) == PRE_INC
24157 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
24158 output_address (mode, plus_constant (Pmode,
24159 XEXP (XEXP (x, 0), 0), 8));
24160 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
24161 output_address (mode, plus_constant (Pmode,
24162 XEXP (XEXP (x, 0), 0), 8));
24163 else
24164 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
24165 if (small_data_operand (x, GET_MODE (x)))
24166 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
24167 reg_names[SMALL_DATA_REG]);
24169 return;
24171 case 'z':
24172 /* X is a SYMBOL_REF. Write out the name preceded by a
24173 period and without any trailing data in brackets. Used for function
24174 names. If we are configured for System V (or the embedded ABI) on
24175 the PowerPC, do not emit the period, since those systems do not use
24176 TOCs and the like. */
24177 gcc_assert (GET_CODE (x) == SYMBOL_REF);
24179 /* For macho, check to see if we need a stub. */
24180 if (TARGET_MACHO)
24182 const char *name = XSTR (x, 0);
24183 #if TARGET_MACHO
24184 if (darwin_emit_branch_islands
24185 && MACHOPIC_INDIRECT
24186 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
24187 name = machopic_indirection_name (x, /*stub_p=*/true);
24188 #endif
24189 assemble_name (file, name);
24191 else if (!DOT_SYMBOLS)
24192 assemble_name (file, XSTR (x, 0));
24193 else
24194 rs6000_output_function_entry (file, XSTR (x, 0));
24195 return;
24197 case 'Z':
24198 /* Like 'L', for last word of TImode/PTImode. */
24199 if (REG_P (x))
24200 fputs (reg_names[REGNO (x) + 3], file);
24201 else if (MEM_P (x))
24203 machine_mode mode = GET_MODE (x);
24204 if (GET_CODE (XEXP (x, 0)) == PRE_INC
24205 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
24206 output_address (mode, plus_constant (Pmode,
24207 XEXP (XEXP (x, 0), 0), 12));
24208 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
24209 output_address (mode, plus_constant (Pmode,
24210 XEXP (XEXP (x, 0), 0), 12));
24211 else
24212 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
24213 if (small_data_operand (x, GET_MODE (x)))
24214 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
24215 reg_names[SMALL_DATA_REG]);
24217 return;
24219 /* Print AltiVec or SPE memory operand. */
24220 case 'y':
24222 rtx tmp;
24224 gcc_assert (MEM_P (x));
24226 tmp = XEXP (x, 0);
24228 /* Ugly hack because %y is overloaded. */
24229 if ((TARGET_SPE || TARGET_E500_DOUBLE)
24230 && (GET_MODE_SIZE (GET_MODE (x)) == 8
24231 || FLOAT128_2REG_P (GET_MODE (x))
24232 || GET_MODE (x) == TImode
24233 || GET_MODE (x) == PTImode))
24235 /* Handle [reg]. */
24236 if (REG_P (tmp))
24238 fprintf (file, "0(%s)", reg_names[REGNO (tmp)]);
24239 break;
24241 /* Handle [reg+UIMM]. */
24242 else if (GET_CODE (tmp) == PLUS &&
24243 GET_CODE (XEXP (tmp, 1)) == CONST_INT)
24245 int x;
24247 gcc_assert (REG_P (XEXP (tmp, 0)));
24249 x = INTVAL (XEXP (tmp, 1));
24250 fprintf (file, "%d(%s)", x, reg_names[REGNO (XEXP (tmp, 0))]);
24251 break;
24254 /* Fall through. Must be [reg+reg]. */
24256 if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x))
24257 && GET_CODE (tmp) == AND
24258 && GET_CODE (XEXP (tmp, 1)) == CONST_INT
24259 && INTVAL (XEXP (tmp, 1)) == -16)
24260 tmp = XEXP (tmp, 0);
24261 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
24262 && GET_CODE (tmp) == PRE_MODIFY)
24263 tmp = XEXP (tmp, 1);
24264 if (REG_P (tmp))
24265 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
24266 else
24268 if (GET_CODE (tmp) != PLUS
24269 || !REG_P (XEXP (tmp, 0))
24270 || !REG_P (XEXP (tmp, 1)))
24272 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
24273 break;
24276 if (REGNO (XEXP (tmp, 0)) == 0)
24277 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
24278 reg_names[ REGNO (XEXP (tmp, 0)) ]);
24279 else
24280 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
24281 reg_names[ REGNO (XEXP (tmp, 1)) ]);
24283 break;
24286 case 0:
24287 if (REG_P (x))
24288 fprintf (file, "%s", reg_names[REGNO (x)]);
24289 else if (MEM_P (x))
24291 /* We need to handle PRE_INC and PRE_DEC here, since we need to
24292 know the width from the mode. */
24293 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
24294 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
24295 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
24296 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
24297 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
24298 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
24299 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
24300 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
24301 else
24302 output_address (GET_MODE (x), XEXP (x, 0));
24304 else
24306 if (toc_relative_expr_p (x, false))
24307 /* This hack along with a corresponding hack in
24308 rs6000_output_addr_const_extra arranges to output addends
24309 where the assembler expects to find them. eg.
24310 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
24311 without this hack would be output as "x@toc+4". We
24312 want "x+4@toc". */
24313 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
24314 else
24315 output_addr_const (file, x);
24317 return;
24319 case '&':
24320 if (const char *name = get_some_local_dynamic_name ())
24321 assemble_name (file, name);
24322 else
24323 output_operand_lossage ("'%%&' used without any "
24324 "local dynamic TLS references");
24325 return;
24327 default:
24328 output_operand_lossage ("invalid %%xn code");
24332 /* Print the address of an operand. */
24334 void
24335 print_operand_address (FILE *file, rtx x)
24337 if (REG_P (x))
24338 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
24339 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST
24340 || GET_CODE (x) == LABEL_REF)
24342 output_addr_const (file, x);
24343 if (small_data_operand (x, GET_MODE (x)))
24344 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
24345 reg_names[SMALL_DATA_REG]);
24346 else
24347 gcc_assert (!TARGET_TOC);
24349 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
24350 && REG_P (XEXP (x, 1)))
24352 if (REGNO (XEXP (x, 0)) == 0)
24353 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
24354 reg_names[ REGNO (XEXP (x, 0)) ]);
24355 else
24356 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
24357 reg_names[ REGNO (XEXP (x, 1)) ]);
24359 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
24360 && GET_CODE (XEXP (x, 1)) == CONST_INT)
24361 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
24362 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
24363 #if TARGET_MACHO
24364 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
24365 && CONSTANT_P (XEXP (x, 1)))
24367 fprintf (file, "lo16(");
24368 output_addr_const (file, XEXP (x, 1));
24369 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
24371 #endif
24372 #if TARGET_ELF
24373 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
24374 && CONSTANT_P (XEXP (x, 1)))
24376 output_addr_const (file, XEXP (x, 1));
24377 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
24379 #endif
24380 else if (toc_relative_expr_p (x, false))
24382 /* This hack along with a corresponding hack in
24383 rs6000_output_addr_const_extra arranges to output addends
24384 where the assembler expects to find them. eg.
24385 (lo_sum (reg 9)
24386 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
24387 without this hack would be output as "x@toc+8@l(9)". We
24388 want "x+8@toc@l(9)". */
24389 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
24390 if (GET_CODE (x) == LO_SUM)
24391 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
24392 else
24393 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base, 0, 1))]);
24395 else
24396 gcc_unreachable ();
24399 /* Implement TARGET_OUTPUT_ADDR_CONST_EXTRA. */
24401 static bool
24402 rs6000_output_addr_const_extra (FILE *file, rtx x)
24404 if (GET_CODE (x) == UNSPEC)
24405 switch (XINT (x, 1))
24407 case UNSPEC_TOCREL:
24408 gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF
24409 && REG_P (XVECEXP (x, 0, 1))
24410 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
24411 output_addr_const (file, XVECEXP (x, 0, 0));
24412 if (x == tocrel_base && tocrel_offset != const0_rtx)
24414 if (INTVAL (tocrel_offset) >= 0)
24415 fprintf (file, "+");
24416 output_addr_const (file, CONST_CAST_RTX (tocrel_offset));
24418 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
24420 putc ('-', file);
24421 assemble_name (file, toc_label_name);
24422 need_toc_init = 1;
24424 else if (TARGET_ELF)
24425 fputs ("@toc", file);
24426 return true;
24428 #if TARGET_MACHO
24429 case UNSPEC_MACHOPIC_OFFSET:
24430 output_addr_const (file, XVECEXP (x, 0, 0));
24431 putc ('-', file);
24432 machopic_output_function_base_name (file);
24433 return true;
24434 #endif
24436 return false;
24439 /* Target hook for assembling integer objects. The PowerPC version has
24440 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
24441 is defined. It also needs to handle DI-mode objects on 64-bit
24442 targets. */
24444 static bool
24445 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
24447 #ifdef RELOCATABLE_NEEDS_FIXUP
24448 /* Special handling for SI values. */
24449 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
24451 static int recurse = 0;
24453 /* For -mrelocatable, we mark all addresses that need to be fixed up in
24454 the .fixup section. Since the TOC section is already relocated, we
24455 don't need to mark it here. We used to skip the text section, but it
24456 should never be valid for relocated addresses to be placed in the text
24457 section. */
24458 if (DEFAULT_ABI == ABI_V4
24459 && (TARGET_RELOCATABLE || flag_pic > 1)
24460 && in_section != toc_section
24461 && !recurse
24462 && !CONST_SCALAR_INT_P (x)
24463 && CONSTANT_P (x))
24465 char buf[256];
24467 recurse = 1;
24468 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
24469 fixuplabelno++;
24470 ASM_OUTPUT_LABEL (asm_out_file, buf);
24471 fprintf (asm_out_file, "\t.long\t(");
24472 output_addr_const (asm_out_file, x);
24473 fprintf (asm_out_file, ")@fixup\n");
24474 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
24475 ASM_OUTPUT_ALIGN (asm_out_file, 2);
24476 fprintf (asm_out_file, "\t.long\t");
24477 assemble_name (asm_out_file, buf);
24478 fprintf (asm_out_file, "\n\t.previous\n");
24479 recurse = 0;
24480 return true;
24482 /* Remove initial .'s to turn a -mcall-aixdesc function
24483 address into the address of the descriptor, not the function
24484 itself. */
24485 else if (GET_CODE (x) == SYMBOL_REF
24486 && XSTR (x, 0)[0] == '.'
24487 && DEFAULT_ABI == ABI_AIX)
24489 const char *name = XSTR (x, 0);
24490 while (*name == '.')
24491 name++;
24493 fprintf (asm_out_file, "\t.long\t%s\n", name);
24494 return true;
24497 #endif /* RELOCATABLE_NEEDS_FIXUP */
24498 return default_assemble_integer (x, size, aligned_p);
24501 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
24502 /* Emit an assembler directive to set symbol visibility for DECL to
24503 VISIBILITY_TYPE. */
24505 static void
24506 rs6000_assemble_visibility (tree decl, int vis)
24508 if (TARGET_XCOFF)
24509 return;
24511 /* Functions need to have their entry point symbol visibility set as
24512 well as their descriptor symbol visibility. */
24513 if (DEFAULT_ABI == ABI_AIX
24514 && DOT_SYMBOLS
24515 && TREE_CODE (decl) == FUNCTION_DECL)
24517 static const char * const visibility_types[] = {
24518 NULL, "protected", "hidden", "internal"
24521 const char *name, *type;
24523 name = ((* targetm.strip_name_encoding)
24524 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
24525 type = visibility_types[vis];
24527 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
24528 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
24530 else
24531 default_assemble_visibility (decl, vis);
24533 #endif
24535 enum rtx_code
24536 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
24538 /* Reversal of FP compares takes care -- an ordered compare
24539 becomes an unordered compare and vice versa. */
24540 if (mode == CCFPmode
24541 && (!flag_finite_math_only
24542 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
24543 || code == UNEQ || code == LTGT))
24544 return reverse_condition_maybe_unordered (code);
24545 else
24546 return reverse_condition (code);
24549 /* Generate a compare for CODE. Return a brand-new rtx that
24550 represents the result of the compare. */
24552 static rtx
24553 rs6000_generate_compare (rtx cmp, machine_mode mode)
24555 machine_mode comp_mode;
24556 rtx compare_result;
24557 enum rtx_code code = GET_CODE (cmp);
24558 rtx op0 = XEXP (cmp, 0);
24559 rtx op1 = XEXP (cmp, 1);
24561 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
24562 comp_mode = CCmode;
24563 else if (FLOAT_MODE_P (mode))
24564 comp_mode = CCFPmode;
24565 else if (code == GTU || code == LTU
24566 || code == GEU || code == LEU)
24567 comp_mode = CCUNSmode;
24568 else if ((code == EQ || code == NE)
24569 && unsigned_reg_p (op0)
24570 && (unsigned_reg_p (op1)
24571 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
24572 /* These are unsigned values, perhaps there will be a later
24573 ordering compare that can be shared with this one. */
24574 comp_mode = CCUNSmode;
24575 else
24576 comp_mode = CCmode;
24578 /* If we have an unsigned compare, make sure we don't have a signed value as
24579 an immediate. */
24580 if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT
24581 && INTVAL (op1) < 0)
24583 op0 = copy_rtx_if_shared (op0);
24584 op1 = force_reg (GET_MODE (op0), op1);
24585 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
24588 /* First, the compare. */
24589 compare_result = gen_reg_rtx (comp_mode);
24591 /* E500 FP compare instructions on the GPRs. Yuck! */
24592 if ((!TARGET_FPRS && TARGET_HARD_FLOAT)
24593 && FLOAT_MODE_P (mode))
24595 rtx cmp, or_result, compare_result2;
24596 machine_mode op_mode = GET_MODE (op0);
24597 bool reverse_p;
24599 if (op_mode == VOIDmode)
24600 op_mode = GET_MODE (op1);
24602 /* First reverse the condition codes that aren't directly supported. */
24603 switch (code)
24605 case NE:
24606 case UNLT:
24607 case UNLE:
24608 case UNGT:
24609 case UNGE:
24610 code = reverse_condition_maybe_unordered (code);
24611 reverse_p = true;
24612 break;
24614 case EQ:
24615 case LT:
24616 case LE:
24617 case GT:
24618 case GE:
24619 reverse_p = false;
24620 break;
24622 default:
24623 gcc_unreachable ();
24626 /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only.
24627 This explains the following mess. */
24629 switch (code)
24631 case EQ:
24632 switch (op_mode)
24634 case E_SFmode:
24635 cmp = (flag_finite_math_only && !flag_trapping_math)
24636 ? gen_tstsfeq_gpr (compare_result, op0, op1)
24637 : gen_cmpsfeq_gpr (compare_result, op0, op1);
24638 break;
24640 case E_DFmode:
24641 cmp = (flag_finite_math_only && !flag_trapping_math)
24642 ? gen_tstdfeq_gpr (compare_result, op0, op1)
24643 : gen_cmpdfeq_gpr (compare_result, op0, op1);
24644 break;
24646 case E_TFmode:
24647 case E_IFmode:
24648 case E_KFmode:
24649 cmp = (flag_finite_math_only && !flag_trapping_math)
24650 ? gen_tsttfeq_gpr (compare_result, op0, op1)
24651 : gen_cmptfeq_gpr (compare_result, op0, op1);
24652 break;
24654 default:
24655 gcc_unreachable ();
24657 break;
24659 case GT:
24660 case GE:
24661 switch (op_mode)
24663 case E_SFmode:
24664 cmp = (flag_finite_math_only && !flag_trapping_math)
24665 ? gen_tstsfgt_gpr (compare_result, op0, op1)
24666 : gen_cmpsfgt_gpr (compare_result, op0, op1);
24667 break;
24669 case E_DFmode:
24670 cmp = (flag_finite_math_only && !flag_trapping_math)
24671 ? gen_tstdfgt_gpr (compare_result, op0, op1)
24672 : gen_cmpdfgt_gpr (compare_result, op0, op1);
24673 break;
24675 case E_TFmode:
24676 case E_IFmode:
24677 case E_KFmode:
24678 cmp = (flag_finite_math_only && !flag_trapping_math)
24679 ? gen_tsttfgt_gpr (compare_result, op0, op1)
24680 : gen_cmptfgt_gpr (compare_result, op0, op1);
24681 break;
24683 default:
24684 gcc_unreachable ();
24686 break;
24688 case LT:
24689 case LE:
24690 switch (op_mode)
24692 case E_SFmode:
24693 cmp = (flag_finite_math_only && !flag_trapping_math)
24694 ? gen_tstsflt_gpr (compare_result, op0, op1)
24695 : gen_cmpsflt_gpr (compare_result, op0, op1);
24696 break;
24698 case E_DFmode:
24699 cmp = (flag_finite_math_only && !flag_trapping_math)
24700 ? gen_tstdflt_gpr (compare_result, op0, op1)
24701 : gen_cmpdflt_gpr (compare_result, op0, op1);
24702 break;
24704 case E_TFmode:
24705 case E_IFmode:
24706 case E_KFmode:
24707 cmp = (flag_finite_math_only && !flag_trapping_math)
24708 ? gen_tsttflt_gpr (compare_result, op0, op1)
24709 : gen_cmptflt_gpr (compare_result, op0, op1);
24710 break;
24712 default:
24713 gcc_unreachable ();
24715 break;
24717 default:
24718 gcc_unreachable ();
24721 /* Synthesize LE and GE from LT/GT || EQ. */
24722 if (code == LE || code == GE)
24724 emit_insn (cmp);
24726 compare_result2 = gen_reg_rtx (CCFPmode);
24728 /* Do the EQ. */
24729 switch (op_mode)
24731 case E_SFmode:
24732 cmp = (flag_finite_math_only && !flag_trapping_math)
24733 ? gen_tstsfeq_gpr (compare_result2, op0, op1)
24734 : gen_cmpsfeq_gpr (compare_result2, op0, op1);
24735 break;
24737 case E_DFmode:
24738 cmp = (flag_finite_math_only && !flag_trapping_math)
24739 ? gen_tstdfeq_gpr (compare_result2, op0, op1)
24740 : gen_cmpdfeq_gpr (compare_result2, op0, op1);
24741 break;
24743 case E_TFmode:
24744 case E_IFmode:
24745 case E_KFmode:
24746 cmp = (flag_finite_math_only && !flag_trapping_math)
24747 ? gen_tsttfeq_gpr (compare_result2, op0, op1)
24748 : gen_cmptfeq_gpr (compare_result2, op0, op1);
24749 break;
24751 default:
24752 gcc_unreachable ();
24755 emit_insn (cmp);
24757 /* OR them together. */
24758 or_result = gen_reg_rtx (CCFPmode);
24759 cmp = gen_e500_cr_ior_compare (or_result, compare_result,
24760 compare_result2);
24761 compare_result = or_result;
24764 code = reverse_p ? NE : EQ;
24766 emit_insn (cmp);
24769 /* IEEE 128-bit support in VSX registers when we do not have hardware
24770 support. */
24771 else if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
24773 rtx libfunc = NULL_RTX;
24774 bool check_nan = false;
24775 rtx dest;
24777 switch (code)
24779 case EQ:
24780 case NE:
24781 libfunc = optab_libfunc (eq_optab, mode);
24782 break;
24784 case GT:
24785 case GE:
24786 libfunc = optab_libfunc (ge_optab, mode);
24787 break;
24789 case LT:
24790 case LE:
24791 libfunc = optab_libfunc (le_optab, mode);
24792 break;
24794 case UNORDERED:
24795 case ORDERED:
24796 libfunc = optab_libfunc (unord_optab, mode);
24797 code = (code == UNORDERED) ? NE : EQ;
24798 break;
24800 case UNGE:
24801 case UNGT:
24802 check_nan = true;
24803 libfunc = optab_libfunc (ge_optab, mode);
24804 code = (code == UNGE) ? GE : GT;
24805 break;
24807 case UNLE:
24808 case UNLT:
24809 check_nan = true;
24810 libfunc = optab_libfunc (le_optab, mode);
24811 code = (code == UNLE) ? LE : LT;
24812 break;
24814 case UNEQ:
24815 case LTGT:
24816 check_nan = true;
24817 libfunc = optab_libfunc (eq_optab, mode);
24818 code = (code = UNEQ) ? EQ : NE;
24819 break;
24821 default:
24822 gcc_unreachable ();
24825 gcc_assert (libfunc);
24827 if (!check_nan)
24828 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
24829 SImode, op0, mode, op1, mode);
24831 /* The library signals an exception for signalling NaNs, so we need to
24832 handle isgreater, etc. by first checking isordered. */
24833 else
24835 rtx ne_rtx, normal_dest, unord_dest;
24836 rtx unord_func = optab_libfunc (unord_optab, mode);
24837 rtx join_label = gen_label_rtx ();
24838 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
24839 rtx unord_cmp = gen_reg_rtx (comp_mode);
24842 /* Test for either value being a NaN. */
24843 gcc_assert (unord_func);
24844 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
24845 SImode, op0, mode, op1, mode);
24847 /* Set value (0) if either value is a NaN, and jump to the join
24848 label. */
24849 dest = gen_reg_rtx (SImode);
24850 emit_move_insn (dest, const1_rtx);
24851 emit_insn (gen_rtx_SET (unord_cmp,
24852 gen_rtx_COMPARE (comp_mode, unord_dest,
24853 const0_rtx)));
24855 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
24856 emit_jump_insn (gen_rtx_SET (pc_rtx,
24857 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
24858 join_ref,
24859 pc_rtx)));
24861 /* Do the normal comparison, knowing that the values are not
24862 NaNs. */
24863 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
24864 SImode, op0, mode, op1, mode);
24866 emit_insn (gen_cstoresi4 (dest,
24867 gen_rtx_fmt_ee (code, SImode, normal_dest,
24868 const0_rtx),
24869 normal_dest, const0_rtx));
24871 /* Join NaN and non-Nan paths. Compare dest against 0. */
24872 emit_label (join_label);
24873 code = NE;
24876 emit_insn (gen_rtx_SET (compare_result,
24877 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
24880 else
24882 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
24883 CLOBBERs to match cmptf_internal2 pattern. */
24884 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
24885 && FLOAT128_IBM_P (GET_MODE (op0))
24886 && TARGET_HARD_FLOAT && TARGET_FPRS)
24887 emit_insn (gen_rtx_PARALLEL (VOIDmode,
24888 gen_rtvec (10,
24889 gen_rtx_SET (compare_result,
24890 gen_rtx_COMPARE (comp_mode, op0, op1)),
24891 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24892 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24893 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24894 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24895 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24896 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24897 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24898 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24899 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
24900 else if (GET_CODE (op1) == UNSPEC
24901 && XINT (op1, 1) == UNSPEC_SP_TEST)
24903 rtx op1b = XVECEXP (op1, 0, 0);
24904 comp_mode = CCEQmode;
24905 compare_result = gen_reg_rtx (CCEQmode);
24906 if (TARGET_64BIT)
24907 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
24908 else
24909 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
24911 else
24912 emit_insn (gen_rtx_SET (compare_result,
24913 gen_rtx_COMPARE (comp_mode, op0, op1)));
24916 /* Some kinds of FP comparisons need an OR operation;
24917 under flag_finite_math_only we don't bother. */
24918 if (FLOAT_MODE_P (mode)
24919 && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)
24920 && !flag_finite_math_only
24921 && !(TARGET_HARD_FLOAT && !TARGET_FPRS)
24922 && (code == LE || code == GE
24923 || code == UNEQ || code == LTGT
24924 || code == UNGT || code == UNLT))
24926 enum rtx_code or1, or2;
24927 rtx or1_rtx, or2_rtx, compare2_rtx;
24928 rtx or_result = gen_reg_rtx (CCEQmode);
24930 switch (code)
24932 case LE: or1 = LT; or2 = EQ; break;
24933 case GE: or1 = GT; or2 = EQ; break;
24934 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
24935 case LTGT: or1 = LT; or2 = GT; break;
24936 case UNGT: or1 = UNORDERED; or2 = GT; break;
24937 case UNLT: or1 = UNORDERED; or2 = LT; break;
24938 default: gcc_unreachable ();
24940 validate_condition_mode (or1, comp_mode);
24941 validate_condition_mode (or2, comp_mode);
24942 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
24943 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
24944 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
24945 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
24946 const_true_rtx);
24947 emit_insn (gen_rtx_SET (or_result, compare2_rtx));
24949 compare_result = or_result;
24950 code = EQ;
24953 validate_condition_mode (code, GET_MODE (compare_result));
24955 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
24959 /* Return the diagnostic message string if the binary operation OP is
24960 not permitted on TYPE1 and TYPE2, NULL otherwise. */
24962 static const char*
24963 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
24964 const_tree type1,
24965 const_tree type2)
24967 machine_mode mode1 = TYPE_MODE (type1);
24968 machine_mode mode2 = TYPE_MODE (type2);
24970 /* For complex modes, use the inner type. */
24971 if (COMPLEX_MODE_P (mode1))
24972 mode1 = GET_MODE_INNER (mode1);
24974 if (COMPLEX_MODE_P (mode2))
24975 mode2 = GET_MODE_INNER (mode2);
24977 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
24978 double to intermix unless -mfloat128-convert. */
24979 if (mode1 == mode2)
24980 return NULL;
24982 if (!TARGET_FLOAT128_CVT)
24984 if ((mode1 == KFmode && mode2 == IFmode)
24985 || (mode1 == IFmode && mode2 == KFmode))
24986 return N_("__float128 and __ibm128 cannot be used in the same "
24987 "expression");
24989 if (TARGET_IEEEQUAD
24990 && ((mode1 == IFmode && mode2 == TFmode)
24991 || (mode1 == TFmode && mode2 == IFmode)))
24992 return N_("__ibm128 and long double cannot be used in the same "
24993 "expression");
24995 if (!TARGET_IEEEQUAD
24996 && ((mode1 == KFmode && mode2 == TFmode)
24997 || (mode1 == TFmode && mode2 == KFmode)))
24998 return N_("__float128 and long double cannot be used in the same "
24999 "expression");
25002 return NULL;
25006 /* Expand floating point conversion to/from __float128 and __ibm128. */
25008 void
25009 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
25011 machine_mode dest_mode = GET_MODE (dest);
25012 machine_mode src_mode = GET_MODE (src);
25013 convert_optab cvt = unknown_optab;
25014 bool do_move = false;
25015 rtx libfunc = NULL_RTX;
25016 rtx dest2;
25017 typedef rtx (*rtx_2func_t) (rtx, rtx);
25018 rtx_2func_t hw_convert = (rtx_2func_t)0;
25019 size_t kf_or_tf;
25021 struct hw_conv_t {
25022 rtx_2func_t from_df;
25023 rtx_2func_t from_sf;
25024 rtx_2func_t from_si_sign;
25025 rtx_2func_t from_si_uns;
25026 rtx_2func_t from_di_sign;
25027 rtx_2func_t from_di_uns;
25028 rtx_2func_t to_df;
25029 rtx_2func_t to_sf;
25030 rtx_2func_t to_si_sign;
25031 rtx_2func_t to_si_uns;
25032 rtx_2func_t to_di_sign;
25033 rtx_2func_t to_di_uns;
25034 } hw_conversions[2] = {
25035 /* convertions to/from KFmode */
25037 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
25038 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
25039 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
25040 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
25041 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
25042 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
25043 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
25044 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
25045 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
25046 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
25047 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
25048 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
25051 /* convertions to/from TFmode */
25053 gen_extenddftf2_hw, /* TFmode <- DFmode. */
25054 gen_extendsftf2_hw, /* TFmode <- SFmode. */
25055 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
25056 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
25057 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
25058 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
25059 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
25060 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
25061 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
25062 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
25063 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
25064 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
25068 if (dest_mode == src_mode)
25069 gcc_unreachable ();
25071 /* Eliminate memory operations. */
25072 if (MEM_P (src))
25073 src = force_reg (src_mode, src);
25075 if (MEM_P (dest))
25077 rtx tmp = gen_reg_rtx (dest_mode);
25078 rs6000_expand_float128_convert (tmp, src, unsigned_p);
25079 rs6000_emit_move (dest, tmp, dest_mode);
25080 return;
25083 /* Convert to IEEE 128-bit floating point. */
25084 if (FLOAT128_IEEE_P (dest_mode))
25086 if (dest_mode == KFmode)
25087 kf_or_tf = 0;
25088 else if (dest_mode == TFmode)
25089 kf_or_tf = 1;
25090 else
25091 gcc_unreachable ();
25093 switch (src_mode)
25095 case E_DFmode:
25096 cvt = sext_optab;
25097 hw_convert = hw_conversions[kf_or_tf].from_df;
25098 break;
25100 case E_SFmode:
25101 cvt = sext_optab;
25102 hw_convert = hw_conversions[kf_or_tf].from_sf;
25103 break;
25105 case E_KFmode:
25106 case E_IFmode:
25107 case E_TFmode:
25108 if (FLOAT128_IBM_P (src_mode))
25109 cvt = sext_optab;
25110 else
25111 do_move = true;
25112 break;
25114 case E_SImode:
25115 if (unsigned_p)
25117 cvt = ufloat_optab;
25118 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
25120 else
25122 cvt = sfloat_optab;
25123 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
25125 break;
25127 case E_DImode:
25128 if (unsigned_p)
25130 cvt = ufloat_optab;
25131 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
25133 else
25135 cvt = sfloat_optab;
25136 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
25138 break;
25140 default:
25141 gcc_unreachable ();
25145 /* Convert from IEEE 128-bit floating point. */
25146 else if (FLOAT128_IEEE_P (src_mode))
25148 if (src_mode == KFmode)
25149 kf_or_tf = 0;
25150 else if (src_mode == TFmode)
25151 kf_or_tf = 1;
25152 else
25153 gcc_unreachable ();
25155 switch (dest_mode)
25157 case E_DFmode:
25158 cvt = trunc_optab;
25159 hw_convert = hw_conversions[kf_or_tf].to_df;
25160 break;
25162 case E_SFmode:
25163 cvt = trunc_optab;
25164 hw_convert = hw_conversions[kf_or_tf].to_sf;
25165 break;
25167 case E_KFmode:
25168 case E_IFmode:
25169 case E_TFmode:
25170 if (FLOAT128_IBM_P (dest_mode))
25171 cvt = trunc_optab;
25172 else
25173 do_move = true;
25174 break;
25176 case E_SImode:
25177 if (unsigned_p)
25179 cvt = ufix_optab;
25180 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
25182 else
25184 cvt = sfix_optab;
25185 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
25187 break;
25189 case E_DImode:
25190 if (unsigned_p)
25192 cvt = ufix_optab;
25193 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
25195 else
25197 cvt = sfix_optab;
25198 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
25200 break;
25202 default:
25203 gcc_unreachable ();
25207 /* Both IBM format. */
25208 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
25209 do_move = true;
25211 else
25212 gcc_unreachable ();
25214 /* Handle conversion between TFmode/KFmode. */
25215 if (do_move)
25216 emit_move_insn (dest, gen_lowpart (dest_mode, src));
25218 /* Handle conversion if we have hardware support. */
25219 else if (TARGET_FLOAT128_HW && hw_convert)
25220 emit_insn ((hw_convert) (dest, src));
25222 /* Call an external function to do the conversion. */
25223 else if (cvt != unknown_optab)
25225 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
25226 gcc_assert (libfunc != NULL_RTX);
25228 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
25229 src, src_mode);
25231 gcc_assert (dest2 != NULL_RTX);
25232 if (!rtx_equal_p (dest, dest2))
25233 emit_move_insn (dest, dest2);
25236 else
25237 gcc_unreachable ();
25239 return;
25243 /* Emit the RTL for an sISEL pattern. */
25245 void
25246 rs6000_emit_sISEL (machine_mode mode ATTRIBUTE_UNUSED, rtx operands[])
25248 rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx);
25251 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
25252 can be used as that dest register. Return the dest register. */
25255 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
25257 if (op2 == const0_rtx)
25258 return op1;
25260 if (GET_CODE (scratch) == SCRATCH)
25261 scratch = gen_reg_rtx (mode);
25263 if (logical_operand (op2, mode))
25264 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
25265 else
25266 emit_insn (gen_rtx_SET (scratch,
25267 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
25269 return scratch;
25272 void
25273 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
25275 rtx condition_rtx;
25276 machine_mode op_mode;
25277 enum rtx_code cond_code;
25278 rtx result = operands[0];
25280 condition_rtx = rs6000_generate_compare (operands[1], mode);
25281 cond_code = GET_CODE (condition_rtx);
25283 if (FLOAT_MODE_P (mode)
25284 && !TARGET_FPRS && TARGET_HARD_FLOAT)
25286 rtx t;
25288 PUT_MODE (condition_rtx, SImode);
25289 t = XEXP (condition_rtx, 0);
25291 gcc_assert (cond_code == NE || cond_code == EQ);
25293 if (cond_code == NE)
25294 emit_insn (gen_e500_flip_gt_bit (t, t));
25296 emit_insn (gen_move_from_CR_gt_bit (result, t));
25297 return;
25300 if (cond_code == NE
25301 || cond_code == GE || cond_code == LE
25302 || cond_code == GEU || cond_code == LEU
25303 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
25305 rtx not_result = gen_reg_rtx (CCEQmode);
25306 rtx not_op, rev_cond_rtx;
25307 machine_mode cc_mode;
25309 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
25311 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
25312 SImode, XEXP (condition_rtx, 0), const0_rtx);
25313 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
25314 emit_insn (gen_rtx_SET (not_result, not_op));
25315 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
25318 op_mode = GET_MODE (XEXP (operands[1], 0));
25319 if (op_mode == VOIDmode)
25320 op_mode = GET_MODE (XEXP (operands[1], 1));
25322 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
25324 PUT_MODE (condition_rtx, DImode);
25325 convert_move (result, condition_rtx, 0);
25327 else
25329 PUT_MODE (condition_rtx, SImode);
25330 emit_insn (gen_rtx_SET (result, condition_rtx));
25334 /* Emit a branch of kind CODE to location LOC. */
25336 void
25337 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
25339 rtx condition_rtx, loc_ref;
25341 condition_rtx = rs6000_generate_compare (operands[0], mode);
25342 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
25343 emit_jump_insn (gen_rtx_SET (pc_rtx,
25344 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
25345 loc_ref, pc_rtx)));
25348 /* Return the string to output a conditional branch to LABEL, which is
25349 the operand template of the label, or NULL if the branch is really a
25350 conditional return.
25352 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
25353 condition code register and its mode specifies what kind of
25354 comparison we made.
25356 REVERSED is nonzero if we should reverse the sense of the comparison.
25358 INSN is the insn. */
25360 char *
25361 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
25363 static char string[64];
25364 enum rtx_code code = GET_CODE (op);
25365 rtx cc_reg = XEXP (op, 0);
25366 machine_mode mode = GET_MODE (cc_reg);
25367 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
25368 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
25369 int really_reversed = reversed ^ need_longbranch;
25370 char *s = string;
25371 const char *ccode;
25372 const char *pred;
25373 rtx note;
25375 validate_condition_mode (code, mode);
25377 /* Work out which way this really branches. We could use
25378 reverse_condition_maybe_unordered here always but this
25379 makes the resulting assembler clearer. */
25380 if (really_reversed)
25382 /* Reversal of FP compares takes care -- an ordered compare
25383 becomes an unordered compare and vice versa. */
25384 if (mode == CCFPmode)
25385 code = reverse_condition_maybe_unordered (code);
25386 else
25387 code = reverse_condition (code);
25390 if ((!TARGET_FPRS && TARGET_HARD_FLOAT) && mode == CCFPmode)
25392 /* The efscmp/tst* instructions twiddle bit 2, which maps nicely
25393 to the GT bit. */
25394 switch (code)
25396 case EQ:
25397 /* Opposite of GT. */
25398 code = GT;
25399 break;
25401 case NE:
25402 code = UNLE;
25403 break;
25405 default:
25406 gcc_unreachable ();
25410 switch (code)
25412 /* Not all of these are actually distinct opcodes, but
25413 we distinguish them for clarity of the resulting assembler. */
25414 case NE: case LTGT:
25415 ccode = "ne"; break;
25416 case EQ: case UNEQ:
25417 ccode = "eq"; break;
25418 case GE: case GEU:
25419 ccode = "ge"; break;
25420 case GT: case GTU: case UNGT:
25421 ccode = "gt"; break;
25422 case LE: case LEU:
25423 ccode = "le"; break;
25424 case LT: case LTU: case UNLT:
25425 ccode = "lt"; break;
25426 case UNORDERED: ccode = "un"; break;
25427 case ORDERED: ccode = "nu"; break;
25428 case UNGE: ccode = "nl"; break;
25429 case UNLE: ccode = "ng"; break;
25430 default:
25431 gcc_unreachable ();
25434 /* Maybe we have a guess as to how likely the branch is. */
25435 pred = "";
25436 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
25437 if (note != NULL_RTX)
25439 /* PROB is the difference from 50%. */
25440 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
25441 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
25443 /* Only hint for highly probable/improbable branches on newer cpus when
25444 we have real profile data, as static prediction overrides processor
25445 dynamic prediction. For older cpus we may as well always hint, but
25446 assume not taken for branches that are very close to 50% as a
25447 mispredicted taken branch is more expensive than a
25448 mispredicted not-taken branch. */
25449 if (rs6000_always_hint
25450 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
25451 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
25452 && br_prob_note_reliable_p (note)))
25454 if (abs (prob) > REG_BR_PROB_BASE / 20
25455 && ((prob > 0) ^ need_longbranch))
25456 pred = "+";
25457 else
25458 pred = "-";
25462 if (label == NULL)
25463 s += sprintf (s, "b%slr%s ", ccode, pred);
25464 else
25465 s += sprintf (s, "b%s%s ", ccode, pred);
25467 /* We need to escape any '%' characters in the reg_names string.
25468 Assume they'd only be the first character.... */
25469 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
25470 *s++ = '%';
25471 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
25473 if (label != NULL)
25475 /* If the branch distance was too far, we may have to use an
25476 unconditional branch to go the distance. */
25477 if (need_longbranch)
25478 s += sprintf (s, ",$+8\n\tb %s", label);
25479 else
25480 s += sprintf (s, ",%s", label);
25483 return string;
25486 /* Return the string to flip the GT bit on a CR. */
25487 char *
25488 output_e500_flip_gt_bit (rtx dst, rtx src)
25490 static char string[64];
25491 int a, b;
25493 gcc_assert (GET_CODE (dst) == REG && CR_REGNO_P (REGNO (dst))
25494 && GET_CODE (src) == REG && CR_REGNO_P (REGNO (src)));
25496 /* GT bit. */
25497 a = 4 * (REGNO (dst) - CR0_REGNO) + 1;
25498 b = 4 * (REGNO (src) - CR0_REGNO) + 1;
25500 sprintf (string, "crnot %d,%d", a, b);
25501 return string;
25504 /* Return insn for VSX or Altivec comparisons. */
25506 static rtx
25507 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
25509 rtx mask;
25510 machine_mode mode = GET_MODE (op0);
25512 switch (code)
25514 default:
25515 break;
25517 case GE:
25518 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
25519 return NULL_RTX;
25520 /* FALLTHRU */
25522 case EQ:
25523 case GT:
25524 case GTU:
25525 case ORDERED:
25526 case UNORDERED:
25527 case UNEQ:
25528 case LTGT:
25529 mask = gen_reg_rtx (mode);
25530 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
25531 return mask;
25534 return NULL_RTX;
25537 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
25538 DMODE is expected destination mode. This is a recursive function. */
25540 static rtx
25541 rs6000_emit_vector_compare (enum rtx_code rcode,
25542 rtx op0, rtx op1,
25543 machine_mode dmode)
25545 rtx mask;
25546 bool swap_operands = false;
25547 bool try_again = false;
25549 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
25550 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
25552 /* See if the comparison works as is. */
25553 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
25554 if (mask)
25555 return mask;
25557 switch (rcode)
25559 case LT:
25560 rcode = GT;
25561 swap_operands = true;
25562 try_again = true;
25563 break;
25564 case LTU:
25565 rcode = GTU;
25566 swap_operands = true;
25567 try_again = true;
25568 break;
25569 case NE:
25570 case UNLE:
25571 case UNLT:
25572 case UNGE:
25573 case UNGT:
25574 /* Invert condition and try again.
25575 e.g., A != B becomes ~(A==B). */
25577 enum rtx_code rev_code;
25578 enum insn_code nor_code;
25579 rtx mask2;
25581 rev_code = reverse_condition_maybe_unordered (rcode);
25582 if (rev_code == UNKNOWN)
25583 return NULL_RTX;
25585 nor_code = optab_handler (one_cmpl_optab, dmode);
25586 if (nor_code == CODE_FOR_nothing)
25587 return NULL_RTX;
25589 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
25590 if (!mask2)
25591 return NULL_RTX;
25593 mask = gen_reg_rtx (dmode);
25594 emit_insn (GEN_FCN (nor_code) (mask, mask2));
25595 return mask;
25597 break;
25598 case GE:
25599 case GEU:
25600 case LE:
25601 case LEU:
25602 /* Try GT/GTU/LT/LTU OR EQ */
25604 rtx c_rtx, eq_rtx;
25605 enum insn_code ior_code;
25606 enum rtx_code new_code;
25608 switch (rcode)
25610 case GE:
25611 new_code = GT;
25612 break;
25614 case GEU:
25615 new_code = GTU;
25616 break;
25618 case LE:
25619 new_code = LT;
25620 break;
25622 case LEU:
25623 new_code = LTU;
25624 break;
25626 default:
25627 gcc_unreachable ();
25630 ior_code = optab_handler (ior_optab, dmode);
25631 if (ior_code == CODE_FOR_nothing)
25632 return NULL_RTX;
25634 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
25635 if (!c_rtx)
25636 return NULL_RTX;
25638 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
25639 if (!eq_rtx)
25640 return NULL_RTX;
25642 mask = gen_reg_rtx (dmode);
25643 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
25644 return mask;
25646 break;
25647 default:
25648 return NULL_RTX;
25651 if (try_again)
25653 if (swap_operands)
25654 std::swap (op0, op1);
25656 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
25657 if (mask)
25658 return mask;
25661 /* You only get two chances. */
25662 return NULL_RTX;
25665 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
25666 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
25667 operands for the relation operation COND. */
25670 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
25671 rtx cond, rtx cc_op0, rtx cc_op1)
25673 machine_mode dest_mode = GET_MODE (dest);
25674 machine_mode mask_mode = GET_MODE (cc_op0);
25675 enum rtx_code rcode = GET_CODE (cond);
25676 machine_mode cc_mode = CCmode;
25677 rtx mask;
25678 rtx cond2;
25679 bool invert_move = false;
25681 if (VECTOR_UNIT_NONE_P (dest_mode))
25682 return 0;
25684 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
25685 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
25687 switch (rcode)
25689 /* Swap operands if we can, and fall back to doing the operation as
25690 specified, and doing a NOR to invert the test. */
25691 case NE:
25692 case UNLE:
25693 case UNLT:
25694 case UNGE:
25695 case UNGT:
25696 /* Invert condition and try again.
25697 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
25698 invert_move = true;
25699 rcode = reverse_condition_maybe_unordered (rcode);
25700 if (rcode == UNKNOWN)
25701 return 0;
25702 break;
25704 case GE:
25705 case LE:
25706 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
25708 /* Invert condition to avoid compound test. */
25709 invert_move = true;
25710 rcode = reverse_condition (rcode);
25712 break;
25714 case GTU:
25715 case GEU:
25716 case LTU:
25717 case LEU:
25718 /* Mark unsigned tests with CCUNSmode. */
25719 cc_mode = CCUNSmode;
25721 /* Invert condition to avoid compound test if necessary. */
25722 if (rcode == GEU || rcode == LEU)
25724 invert_move = true;
25725 rcode = reverse_condition (rcode);
25727 break;
25729 default:
25730 break;
25733 /* Get the vector mask for the given relational operations. */
25734 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
25736 if (!mask)
25737 return 0;
25739 if (invert_move)
25740 std::swap (op_true, op_false);
25742 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
25743 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
25744 && (GET_CODE (op_true) == CONST_VECTOR
25745 || GET_CODE (op_false) == CONST_VECTOR))
25747 rtx constant_0 = CONST0_RTX (dest_mode);
25748 rtx constant_m1 = CONSTM1_RTX (dest_mode);
25750 if (op_true == constant_m1 && op_false == constant_0)
25752 emit_move_insn (dest, mask);
25753 return 1;
25756 else if (op_true == constant_0 && op_false == constant_m1)
25758 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
25759 return 1;
25762 /* If we can't use the vector comparison directly, perhaps we can use
25763 the mask for the true or false fields, instead of loading up a
25764 constant. */
25765 if (op_true == constant_m1)
25766 op_true = mask;
25768 if (op_false == constant_0)
25769 op_false = mask;
25772 if (!REG_P (op_true) && !SUBREG_P (op_true))
25773 op_true = force_reg (dest_mode, op_true);
25775 if (!REG_P (op_false) && !SUBREG_P (op_false))
25776 op_false = force_reg (dest_mode, op_false);
25778 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
25779 CONST0_RTX (dest_mode));
25780 emit_insn (gen_rtx_SET (dest,
25781 gen_rtx_IF_THEN_ELSE (dest_mode,
25782 cond2,
25783 op_true,
25784 op_false)));
25785 return 1;
25788 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
25789 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
25790 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
25791 hardware has no such operation. */
25793 static int
25794 rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond)
25796 enum rtx_code code = GET_CODE (op);
25797 rtx op0 = XEXP (op, 0);
25798 rtx op1 = XEXP (op, 1);
25799 machine_mode compare_mode = GET_MODE (op0);
25800 machine_mode result_mode = GET_MODE (dest);
25801 bool max_p = false;
25803 if (result_mode != compare_mode)
25804 return 0;
25806 if (code == GE || code == GT)
25807 max_p = true;
25808 else if (code == LE || code == LT)
25809 max_p = false;
25810 else
25811 return 0;
25813 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
25816 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond))
25817 max_p = !max_p;
25819 else
25820 return 0;
25822 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
25823 return 1;
25826 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
25827 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
25828 operands of the last comparison is nonzero/true, FALSE_COND if it is
25829 zero/false. Return 0 if the hardware has no such operation. */
25831 static int
25832 rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
25834 enum rtx_code code = GET_CODE (op);
25835 rtx op0 = XEXP (op, 0);
25836 rtx op1 = XEXP (op, 1);
25837 machine_mode result_mode = GET_MODE (dest);
25838 rtx compare_rtx;
25839 rtx cmove_rtx;
25840 rtx clobber_rtx;
25842 if (!can_create_pseudo_p ())
25843 return 0;
25845 switch (code)
25847 case EQ:
25848 case GE:
25849 case GT:
25850 break;
25852 case NE:
25853 case LT:
25854 case LE:
25855 code = swap_condition (code);
25856 std::swap (op0, op1);
25857 break;
25859 default:
25860 return 0;
25863 /* Generate: [(parallel [(set (dest)
25864 (if_then_else (op (cmp1) (cmp2))
25865 (true)
25866 (false)))
25867 (clobber (scratch))])]. */
25869 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
25870 cmove_rtx = gen_rtx_SET (dest,
25871 gen_rtx_IF_THEN_ELSE (result_mode,
25872 compare_rtx,
25873 true_cond,
25874 false_cond));
25876 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
25877 emit_insn (gen_rtx_PARALLEL (VOIDmode,
25878 gen_rtvec (2, cmove_rtx, clobber_rtx)));
25880 return 1;
25883 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
25884 operands of the last comparison is nonzero/true, FALSE_COND if it
25885 is zero/false. Return 0 if the hardware has no such operation. */
25888 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
25890 enum rtx_code code = GET_CODE (op);
25891 rtx op0 = XEXP (op, 0);
25892 rtx op1 = XEXP (op, 1);
25893 machine_mode compare_mode = GET_MODE (op0);
25894 machine_mode result_mode = GET_MODE (dest);
25895 rtx temp;
25896 bool is_against_zero;
25898 /* These modes should always match. */
25899 if (GET_MODE (op1) != compare_mode
25900 /* In the isel case however, we can use a compare immediate, so
25901 op1 may be a small constant. */
25902 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
25903 return 0;
25904 if (GET_MODE (true_cond) != result_mode)
25905 return 0;
25906 if (GET_MODE (false_cond) != result_mode)
25907 return 0;
25909 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
25910 if (TARGET_P9_MINMAX
25911 && (compare_mode == SFmode || compare_mode == DFmode)
25912 && (result_mode == SFmode || result_mode == DFmode))
25914 if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond))
25915 return 1;
25917 if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond))
25918 return 1;
25921 /* Don't allow using floating point comparisons for integer results for
25922 now. */
25923 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
25924 return 0;
25926 /* First, work out if the hardware can do this at all, or
25927 if it's too slow.... */
25928 if (!FLOAT_MODE_P (compare_mode))
25930 if (TARGET_ISEL)
25931 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
25932 return 0;
25934 else if (TARGET_HARD_FLOAT && !TARGET_FPRS
25935 && SCALAR_FLOAT_MODE_P (compare_mode))
25936 return 0;
25938 is_against_zero = op1 == CONST0_RTX (compare_mode);
25940 /* A floating-point subtract might overflow, underflow, or produce
25941 an inexact result, thus changing the floating-point flags, so it
25942 can't be generated if we care about that. It's safe if one side
25943 of the construct is zero, since then no subtract will be
25944 generated. */
25945 if (SCALAR_FLOAT_MODE_P (compare_mode)
25946 && flag_trapping_math && ! is_against_zero)
25947 return 0;
25949 /* Eliminate half of the comparisons by switching operands, this
25950 makes the remaining code simpler. */
25951 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
25952 || code == LTGT || code == LT || code == UNLE)
25954 code = reverse_condition_maybe_unordered (code);
25955 temp = true_cond;
25956 true_cond = false_cond;
25957 false_cond = temp;
25960 /* UNEQ and LTGT take four instructions for a comparison with zero,
25961 it'll probably be faster to use a branch here too. */
25962 if (code == UNEQ && HONOR_NANS (compare_mode))
25963 return 0;
25965 /* We're going to try to implement comparisons by performing
25966 a subtract, then comparing against zero. Unfortunately,
25967 Inf - Inf is NaN which is not zero, and so if we don't
25968 know that the operand is finite and the comparison
25969 would treat EQ different to UNORDERED, we can't do it. */
25970 if (HONOR_INFINITIES (compare_mode)
25971 && code != GT && code != UNGE
25972 && (GET_CODE (op1) != CONST_DOUBLE
25973 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
25974 /* Constructs of the form (a OP b ? a : b) are safe. */
25975 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
25976 || (! rtx_equal_p (op0, true_cond)
25977 && ! rtx_equal_p (op1, true_cond))))
25978 return 0;
25980 /* At this point we know we can use fsel. */
25982 /* Reduce the comparison to a comparison against zero. */
25983 if (! is_against_zero)
25985 temp = gen_reg_rtx (compare_mode);
25986 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
25987 op0 = temp;
25988 op1 = CONST0_RTX (compare_mode);
25991 /* If we don't care about NaNs we can reduce some of the comparisons
25992 down to faster ones. */
25993 if (! HONOR_NANS (compare_mode))
25994 switch (code)
25996 case GT:
25997 code = LE;
25998 temp = true_cond;
25999 true_cond = false_cond;
26000 false_cond = temp;
26001 break;
26002 case UNGE:
26003 code = GE;
26004 break;
26005 case UNEQ:
26006 code = EQ;
26007 break;
26008 default:
26009 break;
26012 /* Now, reduce everything down to a GE. */
26013 switch (code)
26015 case GE:
26016 break;
26018 case LE:
26019 temp = gen_reg_rtx (compare_mode);
26020 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
26021 op0 = temp;
26022 break;
26024 case ORDERED:
26025 temp = gen_reg_rtx (compare_mode);
26026 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
26027 op0 = temp;
26028 break;
26030 case EQ:
26031 temp = gen_reg_rtx (compare_mode);
26032 emit_insn (gen_rtx_SET (temp,
26033 gen_rtx_NEG (compare_mode,
26034 gen_rtx_ABS (compare_mode, op0))));
26035 op0 = temp;
26036 break;
26038 case UNGE:
26039 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
26040 temp = gen_reg_rtx (result_mode);
26041 emit_insn (gen_rtx_SET (temp,
26042 gen_rtx_IF_THEN_ELSE (result_mode,
26043 gen_rtx_GE (VOIDmode,
26044 op0, op1),
26045 true_cond, false_cond)));
26046 false_cond = true_cond;
26047 true_cond = temp;
26049 temp = gen_reg_rtx (compare_mode);
26050 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
26051 op0 = temp;
26052 break;
26054 case GT:
26055 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
26056 temp = gen_reg_rtx (result_mode);
26057 emit_insn (gen_rtx_SET (temp,
26058 gen_rtx_IF_THEN_ELSE (result_mode,
26059 gen_rtx_GE (VOIDmode,
26060 op0, op1),
26061 true_cond, false_cond)));
26062 true_cond = false_cond;
26063 false_cond = temp;
26065 temp = gen_reg_rtx (compare_mode);
26066 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
26067 op0 = temp;
26068 break;
26070 default:
26071 gcc_unreachable ();
26074 emit_insn (gen_rtx_SET (dest,
26075 gen_rtx_IF_THEN_ELSE (result_mode,
26076 gen_rtx_GE (VOIDmode,
26077 op0, op1),
26078 true_cond, false_cond)));
26079 return 1;
26082 /* Same as above, but for ints (isel). */
26084 static int
26085 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
26087 rtx condition_rtx, cr;
26088 machine_mode mode = GET_MODE (dest);
26089 enum rtx_code cond_code;
26090 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
26091 bool signedp;
26093 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
26094 return 0;
26096 /* We still have to do the compare, because isel doesn't do a
26097 compare, it just looks at the CRx bits set by a previous compare
26098 instruction. */
26099 condition_rtx = rs6000_generate_compare (op, mode);
26100 cond_code = GET_CODE (condition_rtx);
26101 cr = XEXP (condition_rtx, 0);
26102 signedp = GET_MODE (cr) == CCmode;
26104 isel_func = (mode == SImode
26105 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
26106 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
26108 switch (cond_code)
26110 case LT: case GT: case LTU: case GTU: case EQ:
26111 /* isel handles these directly. */
26112 break;
26114 default:
26115 /* We need to swap the sense of the comparison. */
26117 std::swap (false_cond, true_cond);
26118 PUT_CODE (condition_rtx, reverse_condition (cond_code));
26120 break;
26123 false_cond = force_reg (mode, false_cond);
26124 if (true_cond != const0_rtx)
26125 true_cond = force_reg (mode, true_cond);
26127 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
26129 return 1;
26132 const char *
26133 output_isel (rtx *operands)
26135 enum rtx_code code;
26137 code = GET_CODE (operands[1]);
26139 if (code == GE || code == GEU || code == LE || code == LEU || code == NE)
26141 gcc_assert (GET_CODE (operands[2]) == REG
26142 && GET_CODE (operands[3]) == REG);
26143 PUT_CODE (operands[1], reverse_condition (code));
26144 return "isel %0,%3,%2,%j1";
26147 return "isel %0,%2,%3,%j1";
26150 void
26151 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
26153 machine_mode mode = GET_MODE (op0);
26154 enum rtx_code c;
26155 rtx target;
26157 /* VSX/altivec have direct min/max insns. */
26158 if ((code == SMAX || code == SMIN)
26159 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
26160 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
26162 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
26163 return;
26166 if (code == SMAX || code == SMIN)
26167 c = GE;
26168 else
26169 c = GEU;
26171 if (code == SMAX || code == UMAX)
26172 target = emit_conditional_move (dest, c, op0, op1, mode,
26173 op0, op1, mode, 0);
26174 else
26175 target = emit_conditional_move (dest, c, op0, op1, mode,
26176 op1, op0, mode, 0);
26177 gcc_assert (target);
26178 if (target != dest)
26179 emit_move_insn (dest, target);
26182 /* Split a signbit operation on 64-bit machines with direct move. Also allow
26183 for the value to come from memory or if it is already loaded into a GPR. */
26185 void
26186 rs6000_split_signbit (rtx dest, rtx src)
26188 machine_mode d_mode = GET_MODE (dest);
26189 machine_mode s_mode = GET_MODE (src);
26190 rtx dest_di = (d_mode == DImode) ? dest : gen_lowpart (DImode, dest);
26191 rtx shift_reg = dest_di;
26193 gcc_assert (FLOAT128_IEEE_P (s_mode) && TARGET_POWERPC64);
26195 if (MEM_P (src))
26197 rtx mem = (WORDS_BIG_ENDIAN
26198 ? adjust_address (src, DImode, 0)
26199 : adjust_address (src, DImode, 8));
26200 emit_insn (gen_rtx_SET (dest_di, mem));
26203 else
26205 unsigned int r = reg_or_subregno (src);
26207 if (INT_REGNO_P (r))
26208 shift_reg = gen_rtx_REG (DImode, r + (BYTES_BIG_ENDIAN == 0));
26210 else
26212 /* Generate the special mfvsrd instruction to get it in a GPR. */
26213 gcc_assert (VSX_REGNO_P (r));
26214 if (s_mode == KFmode)
26215 emit_insn (gen_signbitkf2_dm2 (dest_di, src));
26216 else
26217 emit_insn (gen_signbittf2_dm2 (dest_di, src));
26221 emit_insn (gen_lshrdi3 (dest_di, shift_reg, GEN_INT (63)));
26222 return;
26225 /* A subroutine of the atomic operation splitters. Jump to LABEL if
26226 COND is true. Mark the jump as unlikely to be taken. */
26228 static void
26229 emit_unlikely_jump (rtx cond, rtx label)
26231 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
26232 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
26233 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
26236 /* A subroutine of the atomic operation splitters. Emit a load-locked
26237 instruction in MODE. For QI/HImode, possibly use a pattern than includes
26238 the zero_extend operation. */
26240 static void
26241 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
26243 rtx (*fn) (rtx, rtx) = NULL;
26245 switch (mode)
26247 case E_QImode:
26248 fn = gen_load_lockedqi;
26249 break;
26250 case E_HImode:
26251 fn = gen_load_lockedhi;
26252 break;
26253 case E_SImode:
26254 if (GET_MODE (mem) == QImode)
26255 fn = gen_load_lockedqi_si;
26256 else if (GET_MODE (mem) == HImode)
26257 fn = gen_load_lockedhi_si;
26258 else
26259 fn = gen_load_lockedsi;
26260 break;
26261 case E_DImode:
26262 fn = gen_load_lockeddi;
26263 break;
26264 case E_TImode:
26265 fn = gen_load_lockedti;
26266 break;
26267 default:
26268 gcc_unreachable ();
26270 emit_insn (fn (reg, mem));
26273 /* A subroutine of the atomic operation splitters. Emit a store-conditional
26274 instruction in MODE. */
26276 static void
26277 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
26279 rtx (*fn) (rtx, rtx, rtx) = NULL;
26281 switch (mode)
26283 case E_QImode:
26284 fn = gen_store_conditionalqi;
26285 break;
26286 case E_HImode:
26287 fn = gen_store_conditionalhi;
26288 break;
26289 case E_SImode:
26290 fn = gen_store_conditionalsi;
26291 break;
26292 case E_DImode:
26293 fn = gen_store_conditionaldi;
26294 break;
26295 case E_TImode:
26296 fn = gen_store_conditionalti;
26297 break;
26298 default:
26299 gcc_unreachable ();
26302 /* Emit sync before stwcx. to address PPC405 Erratum. */
26303 if (PPC405_ERRATUM77)
26304 emit_insn (gen_hwsync ());
26306 emit_insn (fn (res, mem, val));
26309 /* Expand barriers before and after a load_locked/store_cond sequence. */
26311 static rtx
26312 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
26314 rtx addr = XEXP (mem, 0);
26315 int strict_p = (reload_in_progress || reload_completed);
26317 if (!legitimate_indirect_address_p (addr, strict_p)
26318 && !legitimate_indexed_address_p (addr, strict_p))
26320 addr = force_reg (Pmode, addr);
26321 mem = replace_equiv_address_nv (mem, addr);
26324 switch (model)
26326 case MEMMODEL_RELAXED:
26327 case MEMMODEL_CONSUME:
26328 case MEMMODEL_ACQUIRE:
26329 break;
26330 case MEMMODEL_RELEASE:
26331 case MEMMODEL_ACQ_REL:
26332 emit_insn (gen_lwsync ());
26333 break;
26334 case MEMMODEL_SEQ_CST:
26335 emit_insn (gen_hwsync ());
26336 break;
26337 default:
26338 gcc_unreachable ();
26340 return mem;
26343 static void
26344 rs6000_post_atomic_barrier (enum memmodel model)
26346 switch (model)
26348 case MEMMODEL_RELAXED:
26349 case MEMMODEL_CONSUME:
26350 case MEMMODEL_RELEASE:
26351 break;
26352 case MEMMODEL_ACQUIRE:
26353 case MEMMODEL_ACQ_REL:
26354 case MEMMODEL_SEQ_CST:
26355 emit_insn (gen_isync ());
26356 break;
26357 default:
26358 gcc_unreachable ();
26362 /* A subroutine of the various atomic expanders. For sub-word operations,
26363 we must adjust things to operate on SImode. Given the original MEM,
26364 return a new aligned memory. Also build and return the quantities by
26365 which to shift and mask. */
26367 static rtx
26368 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
26370 rtx addr, align, shift, mask, mem;
26371 HOST_WIDE_INT shift_mask;
26372 machine_mode mode = GET_MODE (orig_mem);
26374 /* For smaller modes, we have to implement this via SImode. */
26375 shift_mask = (mode == QImode ? 0x18 : 0x10);
26377 addr = XEXP (orig_mem, 0);
26378 addr = force_reg (GET_MODE (addr), addr);
26380 /* Aligned memory containing subword. Generate a new memory. We
26381 do not want any of the existing MEM_ATTR data, as we're now
26382 accessing memory outside the original object. */
26383 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
26384 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26385 mem = gen_rtx_MEM (SImode, align);
26386 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
26387 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
26388 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
26390 /* Shift amount for subword relative to aligned word. */
26391 shift = gen_reg_rtx (SImode);
26392 addr = gen_lowpart (SImode, addr);
26393 rtx tmp = gen_reg_rtx (SImode);
26394 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
26395 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
26396 if (BYTES_BIG_ENDIAN)
26397 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
26398 shift, 1, OPTAB_LIB_WIDEN);
26399 *pshift = shift;
26401 /* Mask for insertion. */
26402 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
26403 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
26404 *pmask = mask;
26406 return mem;
26409 /* A subroutine of the various atomic expanders. For sub-word operands,
26410 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
26412 static rtx
26413 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
26415 rtx x;
26417 x = gen_reg_rtx (SImode);
26418 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
26419 gen_rtx_NOT (SImode, mask),
26420 oldval)));
26422 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
26424 return x;
26427 /* A subroutine of the various atomic expanders. For sub-word operands,
26428 extract WIDE to NARROW via SHIFT. */
26430 static void
26431 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
26433 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
26434 wide, 1, OPTAB_LIB_WIDEN);
26435 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
26438 /* Expand an atomic compare and swap operation. */
26440 void
26441 rs6000_expand_atomic_compare_and_swap (rtx operands[])
26443 rtx boolval, retval, mem, oldval, newval, cond;
26444 rtx label1, label2, x, mask, shift;
26445 machine_mode mode, orig_mode;
26446 enum memmodel mod_s, mod_f;
26447 bool is_weak;
26449 boolval = operands[0];
26450 retval = operands[1];
26451 mem = operands[2];
26452 oldval = operands[3];
26453 newval = operands[4];
26454 is_weak = (INTVAL (operands[5]) != 0);
26455 mod_s = memmodel_base (INTVAL (operands[6]));
26456 mod_f = memmodel_base (INTVAL (operands[7]));
26457 orig_mode = mode = GET_MODE (mem);
26459 mask = shift = NULL_RTX;
26460 if (mode == QImode || mode == HImode)
26462 /* Before power8, we didn't have access to lbarx/lharx, so generate a
26463 lwarx and shift/mask operations. With power8, we need to do the
26464 comparison in SImode, but the store is still done in QI/HImode. */
26465 oldval = convert_modes (SImode, mode, oldval, 1);
26467 if (!TARGET_SYNC_HI_QI)
26469 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
26471 /* Shift and mask OLDVAL into position with the word. */
26472 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
26473 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26475 /* Shift and mask NEWVAL into position within the word. */
26476 newval = convert_modes (SImode, mode, newval, 1);
26477 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
26478 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26481 /* Prepare to adjust the return value. */
26482 retval = gen_reg_rtx (SImode);
26483 mode = SImode;
26485 else if (reg_overlap_mentioned_p (retval, oldval))
26486 oldval = copy_to_reg (oldval);
26488 if (mode != TImode && !reg_or_short_operand (oldval, mode))
26489 oldval = copy_to_mode_reg (mode, oldval);
26491 if (reg_overlap_mentioned_p (retval, newval))
26492 newval = copy_to_reg (newval);
26494 mem = rs6000_pre_atomic_barrier (mem, mod_s);
26496 label1 = NULL_RTX;
26497 if (!is_weak)
26499 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
26500 emit_label (XEXP (label1, 0));
26502 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
26504 emit_load_locked (mode, retval, mem);
26506 x = retval;
26507 if (mask)
26508 x = expand_simple_binop (SImode, AND, retval, mask,
26509 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26511 cond = gen_reg_rtx (CCmode);
26512 /* If we have TImode, synthesize a comparison. */
26513 if (mode != TImode)
26514 x = gen_rtx_COMPARE (CCmode, x, oldval);
26515 else
26517 rtx xor1_result = gen_reg_rtx (DImode);
26518 rtx xor2_result = gen_reg_rtx (DImode);
26519 rtx or_result = gen_reg_rtx (DImode);
26520 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
26521 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
26522 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
26523 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
26525 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
26526 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
26527 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
26528 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
26531 emit_insn (gen_rtx_SET (cond, x));
26533 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
26534 emit_unlikely_jump (x, label2);
26536 x = newval;
26537 if (mask)
26538 x = rs6000_mask_atomic_subword (retval, newval, mask);
26540 emit_store_conditional (orig_mode, cond, mem, x);
26542 if (!is_weak)
26544 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
26545 emit_unlikely_jump (x, label1);
26548 if (!is_mm_relaxed (mod_f))
26549 emit_label (XEXP (label2, 0));
26551 rs6000_post_atomic_barrier (mod_s);
26553 if (is_mm_relaxed (mod_f))
26554 emit_label (XEXP (label2, 0));
26556 if (shift)
26557 rs6000_finish_atomic_subword (operands[1], retval, shift);
26558 else if (mode != GET_MODE (operands[1]))
26559 convert_move (operands[1], retval, 1);
26561 /* In all cases, CR0 contains EQ on success, and NE on failure. */
26562 x = gen_rtx_EQ (SImode, cond, const0_rtx);
26563 emit_insn (gen_rtx_SET (boolval, x));
26566 /* Expand an atomic exchange operation. */
26568 void
26569 rs6000_expand_atomic_exchange (rtx operands[])
26571 rtx retval, mem, val, cond;
26572 machine_mode mode;
26573 enum memmodel model;
26574 rtx label, x, mask, shift;
26576 retval = operands[0];
26577 mem = operands[1];
26578 val = operands[2];
26579 model = memmodel_base (INTVAL (operands[3]));
26580 mode = GET_MODE (mem);
26582 mask = shift = NULL_RTX;
26583 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
26585 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
26587 /* Shift and mask VAL into position with the word. */
26588 val = convert_modes (SImode, mode, val, 1);
26589 val = expand_simple_binop (SImode, ASHIFT, val, shift,
26590 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26592 /* Prepare to adjust the return value. */
26593 retval = gen_reg_rtx (SImode);
26594 mode = SImode;
26597 mem = rs6000_pre_atomic_barrier (mem, model);
26599 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
26600 emit_label (XEXP (label, 0));
26602 emit_load_locked (mode, retval, mem);
26604 x = val;
26605 if (mask)
26606 x = rs6000_mask_atomic_subword (retval, val, mask);
26608 cond = gen_reg_rtx (CCmode);
26609 emit_store_conditional (mode, cond, mem, x);
26611 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
26612 emit_unlikely_jump (x, label);
26614 rs6000_post_atomic_barrier (model);
26616 if (shift)
26617 rs6000_finish_atomic_subword (operands[0], retval, shift);
26620 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
26621 to perform. MEM is the memory on which to operate. VAL is the second
26622 operand of the binary operator. BEFORE and AFTER are optional locations to
26623 return the value of MEM either before of after the operation. MODEL_RTX
26624 is a CONST_INT containing the memory model to use. */
26626 void
26627 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
26628 rtx orig_before, rtx orig_after, rtx model_rtx)
26630 enum memmodel model = memmodel_base (INTVAL (model_rtx));
26631 machine_mode mode = GET_MODE (mem);
26632 machine_mode store_mode = mode;
26633 rtx label, x, cond, mask, shift;
26634 rtx before = orig_before, after = orig_after;
26636 mask = shift = NULL_RTX;
26637 /* On power8, we want to use SImode for the operation. On previous systems,
26638 use the operation in a subword and shift/mask to get the proper byte or
26639 halfword. */
26640 if (mode == QImode || mode == HImode)
26642 if (TARGET_SYNC_HI_QI)
26644 val = convert_modes (SImode, mode, val, 1);
26646 /* Prepare to adjust the return value. */
26647 before = gen_reg_rtx (SImode);
26648 if (after)
26649 after = gen_reg_rtx (SImode);
26650 mode = SImode;
26652 else
26654 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
26656 /* Shift and mask VAL into position with the word. */
26657 val = convert_modes (SImode, mode, val, 1);
26658 val = expand_simple_binop (SImode, ASHIFT, val, shift,
26659 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26661 switch (code)
26663 case IOR:
26664 case XOR:
26665 /* We've already zero-extended VAL. That is sufficient to
26666 make certain that it does not affect other bits. */
26667 mask = NULL;
26668 break;
26670 case AND:
26671 /* If we make certain that all of the other bits in VAL are
26672 set, that will be sufficient to not affect other bits. */
26673 x = gen_rtx_NOT (SImode, mask);
26674 x = gen_rtx_IOR (SImode, x, val);
26675 emit_insn (gen_rtx_SET (val, x));
26676 mask = NULL;
26677 break;
26679 case NOT:
26680 case PLUS:
26681 case MINUS:
26682 /* These will all affect bits outside the field and need
26683 adjustment via MASK within the loop. */
26684 break;
26686 default:
26687 gcc_unreachable ();
26690 /* Prepare to adjust the return value. */
26691 before = gen_reg_rtx (SImode);
26692 if (after)
26693 after = gen_reg_rtx (SImode);
26694 store_mode = mode = SImode;
26698 mem = rs6000_pre_atomic_barrier (mem, model);
26700 label = gen_label_rtx ();
26701 emit_label (label);
26702 label = gen_rtx_LABEL_REF (VOIDmode, label);
26704 if (before == NULL_RTX)
26705 before = gen_reg_rtx (mode);
26707 emit_load_locked (mode, before, mem);
26709 if (code == NOT)
26711 x = expand_simple_binop (mode, AND, before, val,
26712 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26713 after = expand_simple_unop (mode, NOT, x, after, 1);
26715 else
26717 after = expand_simple_binop (mode, code, before, val,
26718 after, 1, OPTAB_LIB_WIDEN);
26721 x = after;
26722 if (mask)
26724 x = expand_simple_binop (SImode, AND, after, mask,
26725 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26726 x = rs6000_mask_atomic_subword (before, x, mask);
26728 else if (store_mode != mode)
26729 x = convert_modes (store_mode, mode, x, 1);
26731 cond = gen_reg_rtx (CCmode);
26732 emit_store_conditional (store_mode, cond, mem, x);
26734 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
26735 emit_unlikely_jump (x, label);
26737 rs6000_post_atomic_barrier (model);
26739 if (shift)
26741 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
26742 then do the calcuations in a SImode register. */
26743 if (orig_before)
26744 rs6000_finish_atomic_subword (orig_before, before, shift);
26745 if (orig_after)
26746 rs6000_finish_atomic_subword (orig_after, after, shift);
26748 else if (store_mode != mode)
26750 /* QImode/HImode on machines with lbarx/lharx where we do the native
26751 operation and then do the calcuations in a SImode register. */
26752 if (orig_before)
26753 convert_move (orig_before, before, 1);
26754 if (orig_after)
26755 convert_move (orig_after, after, 1);
26757 else if (orig_after && after != orig_after)
26758 emit_move_insn (orig_after, after);
26761 /* Emit instructions to move SRC to DST. Called by splitters for
26762 multi-register moves. It will emit at most one instruction for
26763 each register that is accessed; that is, it won't emit li/lis pairs
26764 (or equivalent for 64-bit code). One of SRC or DST must be a hard
26765 register. */
26767 void
26768 rs6000_split_multireg_move (rtx dst, rtx src)
26770 /* The register number of the first register being moved. */
26771 int reg;
26772 /* The mode that is to be moved. */
26773 machine_mode mode;
26774 /* The mode that the move is being done in, and its size. */
26775 machine_mode reg_mode;
26776 int reg_mode_size;
26777 /* The number of registers that will be moved. */
26778 int nregs;
26780 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
26781 mode = GET_MODE (dst);
26782 nregs = hard_regno_nregs (reg, mode);
26783 if (FP_REGNO_P (reg))
26784 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
26785 ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? DFmode : SFmode);
26786 else if (ALTIVEC_REGNO_P (reg))
26787 reg_mode = V16QImode;
26788 else if (TARGET_E500_DOUBLE && FLOAT128_2REG_P (mode))
26789 reg_mode = DFmode;
26790 else
26791 reg_mode = word_mode;
26792 reg_mode_size = GET_MODE_SIZE (reg_mode);
26794 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
26796 /* TDmode residing in FP registers is special, since the ISA requires that
26797 the lower-numbered word of a register pair is always the most significant
26798 word, even in little-endian mode. This does not match the usual subreg
26799 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
26800 the appropriate constituent registers "by hand" in little-endian mode.
26802 Note we do not need to check for destructive overlap here since TDmode
26803 can only reside in even/odd register pairs. */
26804 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
26806 rtx p_src, p_dst;
26807 int i;
26809 for (i = 0; i < nregs; i++)
26811 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
26812 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
26813 else
26814 p_src = simplify_gen_subreg (reg_mode, src, mode,
26815 i * reg_mode_size);
26817 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
26818 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
26819 else
26820 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
26821 i * reg_mode_size);
26823 emit_insn (gen_rtx_SET (p_dst, p_src));
26826 return;
26829 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
26831 /* Move register range backwards, if we might have destructive
26832 overlap. */
26833 int i;
26834 for (i = nregs - 1; i >= 0; i--)
26835 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
26836 i * reg_mode_size),
26837 simplify_gen_subreg (reg_mode, src, mode,
26838 i * reg_mode_size)));
26840 else
26842 int i;
26843 int j = -1;
26844 bool used_update = false;
26845 rtx restore_basereg = NULL_RTX;
26847 if (MEM_P (src) && INT_REGNO_P (reg))
26849 rtx breg;
26851 if (GET_CODE (XEXP (src, 0)) == PRE_INC
26852 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
26854 rtx delta_rtx;
26855 breg = XEXP (XEXP (src, 0), 0);
26856 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
26857 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
26858 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
26859 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
26860 src = replace_equiv_address (src, breg);
26862 else if (! rs6000_offsettable_memref_p (src, reg_mode))
26864 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
26866 rtx basereg = XEXP (XEXP (src, 0), 0);
26867 if (TARGET_UPDATE)
26869 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
26870 emit_insn (gen_rtx_SET (ndst,
26871 gen_rtx_MEM (reg_mode,
26872 XEXP (src, 0))));
26873 used_update = true;
26875 else
26876 emit_insn (gen_rtx_SET (basereg,
26877 XEXP (XEXP (src, 0), 1)));
26878 src = replace_equiv_address (src, basereg);
26880 else
26882 rtx basereg = gen_rtx_REG (Pmode, reg);
26883 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
26884 src = replace_equiv_address (src, basereg);
26888 breg = XEXP (src, 0);
26889 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
26890 breg = XEXP (breg, 0);
26892 /* If the base register we are using to address memory is
26893 also a destination reg, then change that register last. */
26894 if (REG_P (breg)
26895 && REGNO (breg) >= REGNO (dst)
26896 && REGNO (breg) < REGNO (dst) + nregs)
26897 j = REGNO (breg) - REGNO (dst);
26899 else if (MEM_P (dst) && INT_REGNO_P (reg))
26901 rtx breg;
26903 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
26904 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
26906 rtx delta_rtx;
26907 breg = XEXP (XEXP (dst, 0), 0);
26908 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
26909 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
26910 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
26912 /* We have to update the breg before doing the store.
26913 Use store with update, if available. */
26915 if (TARGET_UPDATE)
26917 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
26918 emit_insn (TARGET_32BIT
26919 ? (TARGET_POWERPC64
26920 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
26921 : gen_movsi_update (breg, breg, delta_rtx, nsrc))
26922 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
26923 used_update = true;
26925 else
26926 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
26927 dst = replace_equiv_address (dst, breg);
26929 else if (!rs6000_offsettable_memref_p (dst, reg_mode)
26930 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
26932 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
26934 rtx basereg = XEXP (XEXP (dst, 0), 0);
26935 if (TARGET_UPDATE)
26937 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
26938 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
26939 XEXP (dst, 0)),
26940 nsrc));
26941 used_update = true;
26943 else
26944 emit_insn (gen_rtx_SET (basereg,
26945 XEXP (XEXP (dst, 0), 1)));
26946 dst = replace_equiv_address (dst, basereg);
26948 else
26950 rtx basereg = XEXP (XEXP (dst, 0), 0);
26951 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
26952 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
26953 && REG_P (basereg)
26954 && REG_P (offsetreg)
26955 && REGNO (basereg) != REGNO (offsetreg));
26956 if (REGNO (basereg) == 0)
26958 rtx tmp = offsetreg;
26959 offsetreg = basereg;
26960 basereg = tmp;
26962 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
26963 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
26964 dst = replace_equiv_address (dst, basereg);
26967 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
26968 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode));
26971 for (i = 0; i < nregs; i++)
26973 /* Calculate index to next subword. */
26974 ++j;
26975 if (j == nregs)
26976 j = 0;
26978 /* If compiler already emitted move of first word by
26979 store with update, no need to do anything. */
26980 if (j == 0 && used_update)
26981 continue;
26983 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
26984 j * reg_mode_size),
26985 simplify_gen_subreg (reg_mode, src, mode,
26986 j * reg_mode_size)));
26988 if (restore_basereg != NULL_RTX)
26989 emit_insn (restore_basereg);
26994 /* This page contains routines that are used to determine what the
26995 function prologue and epilogue code will do and write them out. */
26997 static inline bool
26998 save_reg_p (int r)
27000 return !call_used_regs[r] && df_regs_ever_live_p (r);
27003 /* Determine whether the gp REG is really used. */
27005 static bool
27006 rs6000_reg_live_or_pic_offset_p (int reg)
27008 /* We need to mark the PIC offset register live for the same conditions
27009 as it is set up, or otherwise it won't be saved before we clobber it. */
27011 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM && !TARGET_SINGLE_PIC_BASE)
27013 if (TARGET_TOC && TARGET_MINIMAL_TOC
27014 && (crtl->calls_eh_return
27015 || df_regs_ever_live_p (reg)
27016 || !constant_pool_empty_p ()))
27017 return true;
27019 if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
27020 && flag_pic)
27021 return true;
27024 /* If the function calls eh_return, claim used all the registers that would
27025 be checked for liveness otherwise. */
27027 return ((crtl->calls_eh_return || df_regs_ever_live_p (reg))
27028 && !call_used_regs[reg]);
27031 /* Return the first fixed-point register that is required to be
27032 saved. 32 if none. */
27035 first_reg_to_save (void)
27037 int first_reg;
27039 /* Find lowest numbered live register. */
27040 for (first_reg = 13; first_reg <= 31; first_reg++)
27041 if (save_reg_p (first_reg))
27042 break;
27044 if (first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM
27045 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
27046 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
27047 || (TARGET_TOC && TARGET_MINIMAL_TOC))
27048 && rs6000_reg_live_or_pic_offset_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
27049 first_reg = RS6000_PIC_OFFSET_TABLE_REGNUM;
27051 #if TARGET_MACHO
27052 if (flag_pic
27053 && crtl->uses_pic_offset_table
27054 && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM)
27055 return RS6000_PIC_OFFSET_TABLE_REGNUM;
27056 #endif
27058 return first_reg;
27061 /* Similar, for FP regs. */
27064 first_fp_reg_to_save (void)
27066 int first_reg;
27068 /* Find lowest numbered live register. */
27069 for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
27070 if (save_reg_p (first_reg))
27071 break;
27073 return first_reg;
27076 /* Similar, for AltiVec regs. */
27078 static int
27079 first_altivec_reg_to_save (void)
27081 int i;
27083 /* Stack frame remains as is unless we are in AltiVec ABI. */
27084 if (! TARGET_ALTIVEC_ABI)
27085 return LAST_ALTIVEC_REGNO + 1;
27087 /* On Darwin, the unwind routines are compiled without
27088 TARGET_ALTIVEC, and use save_world to save/restore the
27089 altivec registers when necessary. */
27090 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
27091 && ! TARGET_ALTIVEC)
27092 return FIRST_ALTIVEC_REGNO + 20;
27094 /* Find lowest numbered live register. */
27095 for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
27096 if (save_reg_p (i))
27097 break;
27099 return i;
27102 /* Return a 32-bit mask of the AltiVec registers we need to set in
27103 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
27104 the 32-bit word is 0. */
27106 static unsigned int
27107 compute_vrsave_mask (void)
27109 unsigned int i, mask = 0;
27111 /* On Darwin, the unwind routines are compiled without
27112 TARGET_ALTIVEC, and use save_world to save/restore the
27113 call-saved altivec registers when necessary. */
27114 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
27115 && ! TARGET_ALTIVEC)
27116 mask |= 0xFFF;
27118 /* First, find out if we use _any_ altivec registers. */
27119 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
27120 if (df_regs_ever_live_p (i))
27121 mask |= ALTIVEC_REG_BIT (i);
27123 if (mask == 0)
27124 return mask;
27126 /* Next, remove the argument registers from the set. These must
27127 be in the VRSAVE mask set by the caller, so we don't need to add
27128 them in again. More importantly, the mask we compute here is
27129 used to generate CLOBBERs in the set_vrsave insn, and we do not
27130 wish the argument registers to die. */
27131 for (i = ALTIVEC_ARG_MIN_REG; i < (unsigned) crtl->args.info.vregno; i++)
27132 mask &= ~ALTIVEC_REG_BIT (i);
27134 /* Similarly, remove the return value from the set. */
27136 bool yes = false;
27137 diddle_return_value (is_altivec_return_reg, &yes);
27138 if (yes)
27139 mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
27142 return mask;
27145 /* For a very restricted set of circumstances, we can cut down the
27146 size of prologues/epilogues by calling our own save/restore-the-world
27147 routines. */
27149 static void
27150 compute_save_world_info (rs6000_stack_t *info)
27152 info->world_save_p = 1;
27153 info->world_save_p
27154 = (WORLD_SAVE_P (info)
27155 && DEFAULT_ABI == ABI_DARWIN
27156 && !cfun->has_nonlocal_label
27157 && info->first_fp_reg_save == FIRST_SAVED_FP_REGNO
27158 && info->first_gp_reg_save == FIRST_SAVED_GP_REGNO
27159 && info->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
27160 && info->cr_save_p);
27162 /* This will not work in conjunction with sibcalls. Make sure there
27163 are none. (This check is expensive, but seldom executed.) */
27164 if (WORLD_SAVE_P (info))
27166 rtx_insn *insn;
27167 for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
27168 if (CALL_P (insn) && SIBLING_CALL_P (insn))
27170 info->world_save_p = 0;
27171 break;
27175 if (WORLD_SAVE_P (info))
27177 /* Even if we're not touching VRsave, make sure there's room on the
27178 stack for it, if it looks like we're calling SAVE_WORLD, which
27179 will attempt to save it. */
27180 info->vrsave_size = 4;
27182 /* If we are going to save the world, we need to save the link register too. */
27183 info->lr_save_p = 1;
27185 /* "Save" the VRsave register too if we're saving the world. */
27186 if (info->vrsave_mask == 0)
27187 info->vrsave_mask = compute_vrsave_mask ();
27189 /* Because the Darwin register save/restore routines only handle
27190 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
27191 check. */
27192 gcc_assert (info->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
27193 && (info->first_altivec_reg_save
27194 >= FIRST_SAVED_ALTIVEC_REGNO));
27197 return;
27201 static void
27202 is_altivec_return_reg (rtx reg, void *xyes)
27204 bool *yes = (bool *) xyes;
27205 if (REGNO (reg) == ALTIVEC_ARG_RETURN)
27206 *yes = true;
27210 /* Return whether REG is a global user reg or has been specifed by
27211 -ffixed-REG. We should not restore these, and so cannot use
27212 lmw or out-of-line restore functions if there are any. We also
27213 can't save them (well, emit frame notes for them), because frame
27214 unwinding during exception handling will restore saved registers. */
27216 static bool
27217 fixed_reg_p (int reg)
27219 /* Ignore fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] when the
27220 backend sets it, overriding anything the user might have given. */
27221 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
27222 && ((DEFAULT_ABI == ABI_V4 && flag_pic)
27223 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
27224 || (TARGET_TOC && TARGET_MINIMAL_TOC)))
27225 return false;
27227 return fixed_regs[reg];
27230 /* Determine the strategy for savings/restoring registers. */
27232 enum {
27233 SAVE_MULTIPLE = 0x1,
27234 SAVE_INLINE_GPRS = 0x2,
27235 SAVE_INLINE_FPRS = 0x4,
27236 SAVE_NOINLINE_GPRS_SAVES_LR = 0x8,
27237 SAVE_NOINLINE_FPRS_SAVES_LR = 0x10,
27238 SAVE_INLINE_VRS = 0x20,
27239 REST_MULTIPLE = 0x100,
27240 REST_INLINE_GPRS = 0x200,
27241 REST_INLINE_FPRS = 0x400,
27242 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x800,
27243 REST_INLINE_VRS = 0x1000
27246 static int
27247 rs6000_savres_strategy (rs6000_stack_t *info,
27248 bool using_static_chain_p)
27250 int strategy = 0;
27252 /* Select between in-line and out-of-line save and restore of regs.
27253 First, all the obvious cases where we don't use out-of-line. */
27254 if (crtl->calls_eh_return
27255 || cfun->machine->ra_need_lr)
27256 strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
27257 | SAVE_INLINE_GPRS | REST_INLINE_GPRS
27258 | SAVE_INLINE_VRS | REST_INLINE_VRS);
27260 if (info->first_gp_reg_save == 32)
27261 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
27263 if (info->first_fp_reg_save == 64
27264 /* The out-of-line FP routines use double-precision stores;
27265 we can't use those routines if we don't have such stores. */
27266 || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT))
27267 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
27269 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1)
27270 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
27272 /* Define cutoff for using out-of-line functions to save registers. */
27273 if (DEFAULT_ABI == ABI_V4 || TARGET_ELF)
27275 if (!optimize_size)
27277 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
27278 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
27279 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
27281 else
27283 /* Prefer out-of-line restore if it will exit. */
27284 if (info->first_fp_reg_save > 61)
27285 strategy |= SAVE_INLINE_FPRS;
27286 if (info->first_gp_reg_save > 29)
27288 if (info->first_fp_reg_save == 64)
27289 strategy |= SAVE_INLINE_GPRS;
27290 else
27291 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
27293 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO)
27294 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
27297 else if (DEFAULT_ABI == ABI_DARWIN)
27299 if (info->first_fp_reg_save > 60)
27300 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
27301 if (info->first_gp_reg_save > 29)
27302 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
27303 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
27305 else
27307 gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
27308 if ((flag_shrink_wrap_separate && optimize_function_for_speed_p (cfun))
27309 || info->first_fp_reg_save > 61)
27310 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
27311 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
27312 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
27315 /* Don't bother to try to save things out-of-line if r11 is occupied
27316 by the static chain. It would require too much fiddling and the
27317 static chain is rarely used anyway. FPRs are saved w.r.t the stack
27318 pointer on Darwin, and AIX uses r1 or r12. */
27319 if (using_static_chain_p
27320 && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
27321 strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
27322 | SAVE_INLINE_GPRS
27323 | SAVE_INLINE_VRS);
27325 /* Saving CR interferes with the exit routines used on the SPE, so
27326 just punt here. */
27327 if (TARGET_SPE_ABI
27328 && info->spe_64bit_regs_used
27329 && info->cr_save_p)
27330 strategy |= REST_INLINE_GPRS;
27332 /* We can only use the out-of-line routines to restore fprs if we've
27333 saved all the registers from first_fp_reg_save in the prologue.
27334 Otherwise, we risk loading garbage. Of course, if we have saved
27335 out-of-line then we know we haven't skipped any fprs. */
27336 if ((strategy & SAVE_INLINE_FPRS)
27337 && !(strategy & REST_INLINE_FPRS))
27339 int i;
27341 for (i = info->first_fp_reg_save; i < 64; i++)
27342 if (fixed_regs[i] || !save_reg_p (i))
27344 strategy |= REST_INLINE_FPRS;
27345 break;
27349 /* Similarly, for altivec regs. */
27350 if ((strategy & SAVE_INLINE_VRS)
27351 && !(strategy & REST_INLINE_VRS))
27353 int i;
27355 for (i = info->first_altivec_reg_save; i < LAST_ALTIVEC_REGNO + 1; i++)
27356 if (fixed_regs[i] || !save_reg_p (i))
27358 strategy |= REST_INLINE_VRS;
27359 break;
27363 /* info->lr_save_p isn't yet set if the only reason lr needs to be
27364 saved is an out-of-line save or restore. Set up the value for
27365 the next test (excluding out-of-line gprs). */
27366 bool lr_save_p = (info->lr_save_p
27367 || !(strategy & SAVE_INLINE_FPRS)
27368 || !(strategy & SAVE_INLINE_VRS)
27369 || !(strategy & REST_INLINE_FPRS)
27370 || !(strategy & REST_INLINE_VRS));
27372 if (TARGET_MULTIPLE
27373 && !TARGET_POWERPC64
27374 && !(TARGET_SPE_ABI && info->spe_64bit_regs_used)
27375 && info->first_gp_reg_save < 31
27376 && !(flag_shrink_wrap
27377 && flag_shrink_wrap_separate
27378 && optimize_function_for_speed_p (cfun)))
27380 /* Prefer store multiple for saves over out-of-line routines,
27381 since the store-multiple instruction will always be smaller. */
27382 strategy |= SAVE_INLINE_GPRS | SAVE_MULTIPLE;
27384 /* The situation is more complicated with load multiple. We'd
27385 prefer to use the out-of-line routines for restores, since the
27386 "exit" out-of-line routines can handle the restore of LR and the
27387 frame teardown. However if doesn't make sense to use the
27388 out-of-line routine if that is the only reason we'd need to save
27389 LR, and we can't use the "exit" out-of-line gpr restore if we
27390 have saved some fprs; In those cases it is advantageous to use
27391 load multiple when available. */
27392 if (info->first_fp_reg_save != 64 || !lr_save_p)
27393 strategy |= REST_INLINE_GPRS | REST_MULTIPLE;
27396 /* Using the "exit" out-of-line routine does not improve code size
27397 if using it would require lr to be saved and if only saving one
27398 or two gprs. */
27399 else if (!lr_save_p && info->first_gp_reg_save > 29)
27400 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
27402 /* We can only use load multiple or the out-of-line routines to
27403 restore gprs if we've saved all the registers from
27404 first_gp_reg_save. Otherwise, we risk loading garbage.
27405 Of course, if we have saved out-of-line or used stmw then we know
27406 we haven't skipped any gprs. */
27407 if ((strategy & (SAVE_INLINE_GPRS | SAVE_MULTIPLE)) == SAVE_INLINE_GPRS
27408 && (strategy & (REST_INLINE_GPRS | REST_MULTIPLE)) != REST_INLINE_GPRS)
27410 int i;
27412 for (i = info->first_gp_reg_save; i < 32; i++)
27413 if (fixed_reg_p (i) || !save_reg_p (i))
27415 strategy |= REST_INLINE_GPRS;
27416 strategy &= ~REST_MULTIPLE;
27417 break;
27421 if (TARGET_ELF && TARGET_64BIT)
27423 if (!(strategy & SAVE_INLINE_FPRS))
27424 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
27425 else if (!(strategy & SAVE_INLINE_GPRS)
27426 && info->first_fp_reg_save == 64)
27427 strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
27429 else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
27430 strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
27432 if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
27433 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
27435 return strategy;
27438 /* Calculate the stack information for the current function. This is
27439 complicated by having two separate calling sequences, the AIX calling
27440 sequence and the V.4 calling sequence.
27442 AIX (and Darwin/Mac OS X) stack frames look like:
27443 32-bit 64-bit
27444 SP----> +---------------------------------------+
27445 | back chain to caller | 0 0
27446 +---------------------------------------+
27447 | saved CR | 4 8 (8-11)
27448 +---------------------------------------+
27449 | saved LR | 8 16
27450 +---------------------------------------+
27451 | reserved for compilers | 12 24
27452 +---------------------------------------+
27453 | reserved for binders | 16 32
27454 +---------------------------------------+
27455 | saved TOC pointer | 20 40
27456 +---------------------------------------+
27457 | Parameter save area (+padding*) (P) | 24 48
27458 +---------------------------------------+
27459 | Alloca space (A) | 24+P etc.
27460 +---------------------------------------+
27461 | Local variable space (L) | 24+P+A
27462 +---------------------------------------+
27463 | Float/int conversion temporary (X) | 24+P+A+L
27464 +---------------------------------------+
27465 | Save area for AltiVec registers (W) | 24+P+A+L+X
27466 +---------------------------------------+
27467 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
27468 +---------------------------------------+
27469 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
27470 +---------------------------------------+
27471 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
27472 +---------------------------------------+
27473 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
27474 +---------------------------------------+
27475 old SP->| back chain to caller's caller |
27476 +---------------------------------------+
27478 * If the alloca area is present, the parameter save area is
27479 padded so that the former starts 16-byte aligned.
27481 The required alignment for AIX configurations is two words (i.e., 8
27482 or 16 bytes).
27484 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
27486 SP----> +---------------------------------------+
27487 | Back chain to caller | 0
27488 +---------------------------------------+
27489 | Save area for CR | 8
27490 +---------------------------------------+
27491 | Saved LR | 16
27492 +---------------------------------------+
27493 | Saved TOC pointer | 24
27494 +---------------------------------------+
27495 | Parameter save area (+padding*) (P) | 32
27496 +---------------------------------------+
27497 | Alloca space (A) | 32+P
27498 +---------------------------------------+
27499 | Local variable space (L) | 32+P+A
27500 +---------------------------------------+
27501 | Save area for AltiVec registers (W) | 32+P+A+L
27502 +---------------------------------------+
27503 | AltiVec alignment padding (Y) | 32+P+A+L+W
27504 +---------------------------------------+
27505 | Save area for GP registers (G) | 32+P+A+L+W+Y
27506 +---------------------------------------+
27507 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
27508 +---------------------------------------+
27509 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
27510 +---------------------------------------+
27512 * If the alloca area is present, the parameter save area is
27513 padded so that the former starts 16-byte aligned.
27515 V.4 stack frames look like:
27517 SP----> +---------------------------------------+
27518 | back chain to caller | 0
27519 +---------------------------------------+
27520 | caller's saved LR | 4
27521 +---------------------------------------+
27522 | Parameter save area (+padding*) (P) | 8
27523 +---------------------------------------+
27524 | Alloca space (A) | 8+P
27525 +---------------------------------------+
27526 | Varargs save area (V) | 8+P+A
27527 +---------------------------------------+
27528 | Local variable space (L) | 8+P+A+V
27529 +---------------------------------------+
27530 | Float/int conversion temporary (X) | 8+P+A+V+L
27531 +---------------------------------------+
27532 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
27533 +---------------------------------------+
27534 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
27535 +---------------------------------------+
27536 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
27537 +---------------------------------------+
27538 | SPE: area for 64-bit GP registers |
27539 +---------------------------------------+
27540 | SPE alignment padding |
27541 +---------------------------------------+
27542 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
27543 +---------------------------------------+
27544 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
27545 +---------------------------------------+
27546 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
27547 +---------------------------------------+
27548 old SP->| back chain to caller's caller |
27549 +---------------------------------------+
27551 * If the alloca area is present and the required alignment is
27552 16 bytes, the parameter save area is padded so that the
27553 alloca area starts 16-byte aligned.
27555 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
27556 given. (But note below and in sysv4.h that we require only 8 and
27557 may round up the size of our stack frame anyways. The historical
27558 reason is early versions of powerpc-linux which didn't properly
27559 align the stack at program startup. A happy side-effect is that
27560 -mno-eabi libraries can be used with -meabi programs.)
27562 The EABI configuration defaults to the V.4 layout. However,
27563 the stack alignment requirements may differ. If -mno-eabi is not
27564 given, the required stack alignment is 8 bytes; if -mno-eabi is
27565 given, the required alignment is 16 bytes. (But see V.4 comment
27566 above.) */
27568 #ifndef ABI_STACK_BOUNDARY
27569 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
27570 #endif
27572 static rs6000_stack_t *
27573 rs6000_stack_info (void)
27575 /* We should never be called for thunks, we are not set up for that. */
27576 gcc_assert (!cfun->is_thunk);
27578 rs6000_stack_t *info = &stack_info;
27579 int reg_size = TARGET_32BIT ? 4 : 8;
27580 int ehrd_size;
27581 int ehcr_size;
27582 int save_align;
27583 int first_gp;
27584 HOST_WIDE_INT non_fixed_size;
27585 bool using_static_chain_p;
27587 if (reload_completed && info->reload_completed)
27588 return info;
27590 memset (info, 0, sizeof (*info));
27591 info->reload_completed = reload_completed;
27593 if (TARGET_SPE)
27595 /* Cache value so we don't rescan instruction chain over and over. */
27596 if (cfun->machine->spe_insn_chain_scanned_p == 0)
27597 cfun->machine->spe_insn_chain_scanned_p
27598 = spe_func_has_64bit_regs_p () + 1;
27599 info->spe_64bit_regs_used = cfun->machine->spe_insn_chain_scanned_p - 1;
27602 /* Select which calling sequence. */
27603 info->abi = DEFAULT_ABI;
27605 /* Calculate which registers need to be saved & save area size. */
27606 info->first_gp_reg_save = first_reg_to_save ();
27607 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
27608 even if it currently looks like we won't. Reload may need it to
27609 get at a constant; if so, it will have already created a constant
27610 pool entry for it. */
27611 if (((TARGET_TOC && TARGET_MINIMAL_TOC)
27612 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
27613 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
27614 && crtl->uses_const_pool
27615 && info->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
27616 first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
27617 else
27618 first_gp = info->first_gp_reg_save;
27620 info->gp_size = reg_size * (32 - first_gp);
27622 /* For the SPE, we have an additional upper 32-bits on each GPR.
27623 Ideally we should save the entire 64-bits only when the upper
27624 half is used in SIMD instructions. Since we only record
27625 registers live (not the size they are used in), this proves
27626 difficult because we'd have to traverse the instruction chain at
27627 the right time, taking reload into account. This is a real pain,
27628 so we opt to save the GPRs in 64-bits always if but one register
27629 gets used in 64-bits. Otherwise, all the registers in the frame
27630 get saved in 32-bits.
27632 So... since when we save all GPRs (except the SP) in 64-bits, the
27633 traditional GP save area will be empty. */
27634 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
27635 info->gp_size = 0;
27637 info->first_fp_reg_save = first_fp_reg_to_save ();
27638 info->fp_size = 8 * (64 - info->first_fp_reg_save);
27640 info->first_altivec_reg_save = first_altivec_reg_to_save ();
27641 info->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
27642 - info->first_altivec_reg_save);
27644 /* Does this function call anything? */
27645 info->calls_p = (!crtl->is_leaf || cfun->machine->ra_needs_full_frame);
27647 /* Determine if we need to save the condition code registers. */
27648 if (save_reg_p (CR2_REGNO)
27649 || save_reg_p (CR3_REGNO)
27650 || save_reg_p (CR4_REGNO))
27652 info->cr_save_p = 1;
27653 if (DEFAULT_ABI == ABI_V4)
27654 info->cr_size = reg_size;
27657 /* If the current function calls __builtin_eh_return, then we need
27658 to allocate stack space for registers that will hold data for
27659 the exception handler. */
27660 if (crtl->calls_eh_return)
27662 unsigned int i;
27663 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
27664 continue;
27666 /* SPE saves EH registers in 64-bits. */
27667 ehrd_size = i * (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0
27668 ? UNITS_PER_SPE_WORD : UNITS_PER_WORD);
27670 else
27671 ehrd_size = 0;
27673 /* In the ELFv2 ABI, we also need to allocate space for separate
27674 CR field save areas if the function calls __builtin_eh_return. */
27675 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
27677 /* This hard-codes that we have three call-saved CR fields. */
27678 ehcr_size = 3 * reg_size;
27679 /* We do *not* use the regular CR save mechanism. */
27680 info->cr_save_p = 0;
27682 else
27683 ehcr_size = 0;
27685 /* Determine various sizes. */
27686 info->reg_size = reg_size;
27687 info->fixed_size = RS6000_SAVE_AREA;
27688 info->vars_size = RS6000_ALIGN (get_frame_size (), 8);
27689 if (cfun->calls_alloca)
27690 info->parm_size =
27691 RS6000_ALIGN (crtl->outgoing_args_size + info->fixed_size,
27692 STACK_BOUNDARY / BITS_PER_UNIT) - info->fixed_size;
27693 else
27694 info->parm_size = RS6000_ALIGN (crtl->outgoing_args_size,
27695 TARGET_ALTIVEC ? 16 : 8);
27696 if (FRAME_GROWS_DOWNWARD)
27697 info->vars_size
27698 += RS6000_ALIGN (info->fixed_size + info->vars_size + info->parm_size,
27699 ABI_STACK_BOUNDARY / BITS_PER_UNIT)
27700 - (info->fixed_size + info->vars_size + info->parm_size);
27702 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
27703 info->spe_gp_size = 8 * (32 - first_gp);
27705 if (TARGET_ALTIVEC_ABI)
27706 info->vrsave_mask = compute_vrsave_mask ();
27708 if (TARGET_ALTIVEC_VRSAVE && info->vrsave_mask)
27709 info->vrsave_size = 4;
27711 compute_save_world_info (info);
27713 /* Calculate the offsets. */
27714 switch (DEFAULT_ABI)
27716 case ABI_NONE:
27717 default:
27718 gcc_unreachable ();
27720 case ABI_AIX:
27721 case ABI_ELFv2:
27722 case ABI_DARWIN:
27723 info->fp_save_offset = -info->fp_size;
27724 info->gp_save_offset = info->fp_save_offset - info->gp_size;
27726 if (TARGET_ALTIVEC_ABI)
27728 info->vrsave_save_offset = info->gp_save_offset - info->vrsave_size;
27730 /* Align stack so vector save area is on a quadword boundary.
27731 The padding goes above the vectors. */
27732 if (info->altivec_size != 0)
27733 info->altivec_padding_size = info->vrsave_save_offset & 0xF;
27735 info->altivec_save_offset = info->vrsave_save_offset
27736 - info->altivec_padding_size
27737 - info->altivec_size;
27738 gcc_assert (info->altivec_size == 0
27739 || info->altivec_save_offset % 16 == 0);
27741 /* Adjust for AltiVec case. */
27742 info->ehrd_offset = info->altivec_save_offset - ehrd_size;
27744 else
27745 info->ehrd_offset = info->gp_save_offset - ehrd_size;
27747 info->ehcr_offset = info->ehrd_offset - ehcr_size;
27748 info->cr_save_offset = reg_size; /* first word when 64-bit. */
27749 info->lr_save_offset = 2*reg_size;
27750 break;
27752 case ABI_V4:
27753 info->fp_save_offset = -info->fp_size;
27754 info->gp_save_offset = info->fp_save_offset - info->gp_size;
27755 info->cr_save_offset = info->gp_save_offset - info->cr_size;
27757 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
27759 /* Align stack so SPE GPR save area is aligned on a
27760 double-word boundary. */
27761 if (info->spe_gp_size != 0 && info->cr_save_offset != 0)
27762 info->spe_padding_size = 8 - (-info->cr_save_offset % 8);
27763 else
27764 info->spe_padding_size = 0;
27766 info->spe_gp_save_offset = info->cr_save_offset
27767 - info->spe_padding_size
27768 - info->spe_gp_size;
27770 /* Adjust for SPE case. */
27771 info->ehrd_offset = info->spe_gp_save_offset;
27773 else if (TARGET_ALTIVEC_ABI)
27775 info->vrsave_save_offset = info->cr_save_offset - info->vrsave_size;
27777 /* Align stack so vector save area is on a quadword boundary. */
27778 if (info->altivec_size != 0)
27779 info->altivec_padding_size = 16 - (-info->vrsave_save_offset % 16);
27781 info->altivec_save_offset = info->vrsave_save_offset
27782 - info->altivec_padding_size
27783 - info->altivec_size;
27785 /* Adjust for AltiVec case. */
27786 info->ehrd_offset = info->altivec_save_offset;
27788 else
27789 info->ehrd_offset = info->cr_save_offset;
27791 info->ehrd_offset -= ehrd_size;
27792 info->lr_save_offset = reg_size;
27795 save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
27796 info->save_size = RS6000_ALIGN (info->fp_size
27797 + info->gp_size
27798 + info->altivec_size
27799 + info->altivec_padding_size
27800 + info->spe_gp_size
27801 + info->spe_padding_size
27802 + ehrd_size
27803 + ehcr_size
27804 + info->cr_size
27805 + info->vrsave_size,
27806 save_align);
27808 non_fixed_size = info->vars_size + info->parm_size + info->save_size;
27810 info->total_size = RS6000_ALIGN (non_fixed_size + info->fixed_size,
27811 ABI_STACK_BOUNDARY / BITS_PER_UNIT);
27813 /* Determine if we need to save the link register. */
27814 if (info->calls_p
27815 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27816 && crtl->profile
27817 && !TARGET_PROFILE_KERNEL)
27818 || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
27819 #ifdef TARGET_RELOCATABLE
27820 || (DEFAULT_ABI == ABI_V4
27821 && (TARGET_RELOCATABLE || flag_pic > 1)
27822 && !constant_pool_empty_p ())
27823 #endif
27824 || rs6000_ra_ever_killed ())
27825 info->lr_save_p = 1;
27827 using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
27828 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
27829 && call_used_regs[STATIC_CHAIN_REGNUM]);
27830 info->savres_strategy = rs6000_savres_strategy (info, using_static_chain_p);
27832 if (!(info->savres_strategy & SAVE_INLINE_GPRS)
27833 || !(info->savres_strategy & SAVE_INLINE_FPRS)
27834 || !(info->savres_strategy & SAVE_INLINE_VRS)
27835 || !(info->savres_strategy & REST_INLINE_GPRS)
27836 || !(info->savres_strategy & REST_INLINE_FPRS)
27837 || !(info->savres_strategy & REST_INLINE_VRS))
27838 info->lr_save_p = 1;
27840 if (info->lr_save_p)
27841 df_set_regs_ever_live (LR_REGNO, true);
27843 /* Determine if we need to allocate any stack frame:
27845 For AIX we need to push the stack if a frame pointer is needed
27846 (because the stack might be dynamically adjusted), if we are
27847 debugging, if we make calls, or if the sum of fp_save, gp_save,
27848 and local variables are more than the space needed to save all
27849 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
27850 + 18*8 = 288 (GPR13 reserved).
27852 For V.4 we don't have the stack cushion that AIX uses, but assume
27853 that the debugger can handle stackless frames. */
27855 if (info->calls_p)
27856 info->push_p = 1;
27858 else if (DEFAULT_ABI == ABI_V4)
27859 info->push_p = non_fixed_size != 0;
27861 else if (frame_pointer_needed)
27862 info->push_p = 1;
27864 else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
27865 info->push_p = 1;
27867 else
27868 info->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
27870 return info;
27873 /* Return true if the current function uses any GPRs in 64-bit SIMD
27874 mode. */
27876 static bool
27877 spe_func_has_64bit_regs_p (void)
27879 rtx_insn *insns, *insn;
27881 /* Functions that save and restore all the call-saved registers will
27882 need to save/restore the registers in 64-bits. */
27883 if (crtl->calls_eh_return
27884 || cfun->calls_setjmp
27885 || crtl->has_nonlocal_goto)
27886 return true;
27888 insns = get_insns ();
27890 for (insn = NEXT_INSN (insns); insn != NULL_RTX; insn = NEXT_INSN (insn))
27892 if (INSN_P (insn))
27894 rtx i;
27896 /* FIXME: This should be implemented with attributes...
27898 (set_attr "spe64" "true")....then,
27899 if (get_spe64(insn)) return true;
27901 It's the only reliable way to do the stuff below. */
27903 i = PATTERN (insn);
27904 if (GET_CODE (i) == SET)
27906 machine_mode mode = GET_MODE (SET_SRC (i));
27908 if (SPE_VECTOR_MODE (mode))
27909 return true;
27910 if (TARGET_E500_DOUBLE
27911 && (mode == DFmode || FLOAT128_2REG_P (mode)))
27912 return true;
27917 return false;
27920 static void
27921 debug_stack_info (rs6000_stack_t *info)
27923 const char *abi_string;
27925 if (! info)
27926 info = rs6000_stack_info ();
27928 fprintf (stderr, "\nStack information for function %s:\n",
27929 ((current_function_decl && DECL_NAME (current_function_decl))
27930 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
27931 : "<unknown>"));
27933 switch (info->abi)
27935 default: abi_string = "Unknown"; break;
27936 case ABI_NONE: abi_string = "NONE"; break;
27937 case ABI_AIX: abi_string = "AIX"; break;
27938 case ABI_ELFv2: abi_string = "ELFv2"; break;
27939 case ABI_DARWIN: abi_string = "Darwin"; break;
27940 case ABI_V4: abi_string = "V.4"; break;
27943 fprintf (stderr, "\tABI = %5s\n", abi_string);
27945 if (TARGET_ALTIVEC_ABI)
27946 fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
27948 if (TARGET_SPE_ABI)
27949 fprintf (stderr, "\tSPE ABI extensions enabled.\n");
27951 if (info->first_gp_reg_save != 32)
27952 fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save);
27954 if (info->first_fp_reg_save != 64)
27955 fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save);
27957 if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
27958 fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
27959 info->first_altivec_reg_save);
27961 if (info->lr_save_p)
27962 fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p);
27964 if (info->cr_save_p)
27965 fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p);
27967 if (info->vrsave_mask)
27968 fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask);
27970 if (info->push_p)
27971 fprintf (stderr, "\tpush_p = %5d\n", info->push_p);
27973 if (info->calls_p)
27974 fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p);
27976 if (info->gp_size)
27977 fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset);
27979 if (info->fp_size)
27980 fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset);
27982 if (info->altivec_size)
27983 fprintf (stderr, "\taltivec_save_offset = %5d\n",
27984 info->altivec_save_offset);
27986 if (info->spe_gp_size)
27987 fprintf (stderr, "\tspe_gp_save_offset = %5d\n",
27988 info->spe_gp_save_offset);
27990 if (info->vrsave_size)
27991 fprintf (stderr, "\tvrsave_save_offset = %5d\n",
27992 info->vrsave_save_offset);
27994 if (info->lr_save_p)
27995 fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset);
27997 if (info->cr_save_p)
27998 fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset);
28000 if (info->varargs_save_offset)
28001 fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
28003 if (info->total_size)
28004 fprintf (stderr, "\ttotal_size = " HOST_WIDE_INT_PRINT_DEC"\n",
28005 info->total_size);
28007 if (info->vars_size)
28008 fprintf (stderr, "\tvars_size = " HOST_WIDE_INT_PRINT_DEC"\n",
28009 info->vars_size);
28011 if (info->parm_size)
28012 fprintf (stderr, "\tparm_size = %5d\n", info->parm_size);
28014 if (info->fixed_size)
28015 fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size);
28017 if (info->gp_size)
28018 fprintf (stderr, "\tgp_size = %5d\n", info->gp_size);
28020 if (info->spe_gp_size)
28021 fprintf (stderr, "\tspe_gp_size = %5d\n", info->spe_gp_size);
28023 if (info->fp_size)
28024 fprintf (stderr, "\tfp_size = %5d\n", info->fp_size);
28026 if (info->altivec_size)
28027 fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size);
28029 if (info->vrsave_size)
28030 fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size);
28032 if (info->altivec_padding_size)
28033 fprintf (stderr, "\taltivec_padding_size= %5d\n",
28034 info->altivec_padding_size);
28036 if (info->spe_padding_size)
28037 fprintf (stderr, "\tspe_padding_size = %5d\n",
28038 info->spe_padding_size);
28040 if (info->cr_size)
28041 fprintf (stderr, "\tcr_size = %5d\n", info->cr_size);
28043 if (info->save_size)
28044 fprintf (stderr, "\tsave_size = %5d\n", info->save_size);
28046 if (info->reg_size != 4)
28047 fprintf (stderr, "\treg_size = %5d\n", info->reg_size);
28049 fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy);
28051 fprintf (stderr, "\n");
28055 rs6000_return_addr (int count, rtx frame)
28057 /* Currently we don't optimize very well between prolog and body
28058 code and for PIC code the code can be actually quite bad, so
28059 don't try to be too clever here. */
28060 if (count != 0
28061 || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
28063 cfun->machine->ra_needs_full_frame = 1;
28065 return
28066 gen_rtx_MEM
28067 (Pmode,
28068 memory_address
28069 (Pmode,
28070 plus_constant (Pmode,
28071 copy_to_reg
28072 (gen_rtx_MEM (Pmode,
28073 memory_address (Pmode, frame))),
28074 RETURN_ADDRESS_OFFSET)));
28077 cfun->machine->ra_need_lr = 1;
28078 return get_hard_reg_initial_val (Pmode, LR_REGNO);
28081 /* Say whether a function is a candidate for sibcall handling or not. */
28083 static bool
28084 rs6000_function_ok_for_sibcall (tree decl, tree exp)
28086 tree fntype;
28088 if (decl)
28089 fntype = TREE_TYPE (decl);
28090 else
28091 fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
28093 /* We can't do it if the called function has more vector parameters
28094 than the current function; there's nowhere to put the VRsave code. */
28095 if (TARGET_ALTIVEC_ABI
28096 && TARGET_ALTIVEC_VRSAVE
28097 && !(decl && decl == current_function_decl))
28099 function_args_iterator args_iter;
28100 tree type;
28101 int nvreg = 0;
28103 /* Functions with vector parameters are required to have a
28104 prototype, so the argument type info must be available
28105 here. */
28106 FOREACH_FUNCTION_ARGS(fntype, type, args_iter)
28107 if (TREE_CODE (type) == VECTOR_TYPE
28108 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
28109 nvreg++;
28111 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter)
28112 if (TREE_CODE (type) == VECTOR_TYPE
28113 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
28114 nvreg--;
28116 if (nvreg > 0)
28117 return false;
28120 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
28121 functions, because the callee may have a different TOC pointer to
28122 the caller and there's no way to ensure we restore the TOC when
28123 we return. With the secure-plt SYSV ABI we can't make non-local
28124 calls when -fpic/PIC because the plt call stubs use r30. */
28125 if (DEFAULT_ABI == ABI_DARWIN
28126 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28127 && decl
28128 && !DECL_EXTERNAL (decl)
28129 && !DECL_WEAK (decl)
28130 && (*targetm.binds_local_p) (decl))
28131 || (DEFAULT_ABI == ABI_V4
28132 && (!TARGET_SECURE_PLT
28133 || !flag_pic
28134 || (decl
28135 && (*targetm.binds_local_p) (decl)))))
28137 tree attr_list = TYPE_ATTRIBUTES (fntype);
28139 if (!lookup_attribute ("longcall", attr_list)
28140 || lookup_attribute ("shortcall", attr_list))
28141 return true;
28144 return false;
28147 static int
28148 rs6000_ra_ever_killed (void)
28150 rtx_insn *top;
28151 rtx reg;
28152 rtx_insn *insn;
28154 if (cfun->is_thunk)
28155 return 0;
28157 if (cfun->machine->lr_save_state)
28158 return cfun->machine->lr_save_state - 1;
28160 /* regs_ever_live has LR marked as used if any sibcalls are present,
28161 but this should not force saving and restoring in the
28162 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
28163 clobbers LR, so that is inappropriate. */
28165 /* Also, the prologue can generate a store into LR that
28166 doesn't really count, like this:
28168 move LR->R0
28169 bcl to set PIC register
28170 move LR->R31
28171 move R0->LR
28173 When we're called from the epilogue, we need to avoid counting
28174 this as a store. */
28176 push_topmost_sequence ();
28177 top = get_insns ();
28178 pop_topmost_sequence ();
28179 reg = gen_rtx_REG (Pmode, LR_REGNO);
28181 for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
28183 if (INSN_P (insn))
28185 if (CALL_P (insn))
28187 if (!SIBLING_CALL_P (insn))
28188 return 1;
28190 else if (find_regno_note (insn, REG_INC, LR_REGNO))
28191 return 1;
28192 else if (set_of (reg, insn) != NULL_RTX
28193 && !prologue_epilogue_contains (insn))
28194 return 1;
28197 return 0;
28200 /* Emit instructions needed to load the TOC register.
28201 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
28202 a constant pool; or for SVR4 -fpic. */
28204 void
28205 rs6000_emit_load_toc_table (int fromprolog)
28207 rtx dest;
28208 dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
28210 if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
28212 char buf[30];
28213 rtx lab, tmp1, tmp2, got;
28215 lab = gen_label_rtx ();
28216 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
28217 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
28218 if (flag_pic == 2)
28220 got = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
28221 need_toc_init = 1;
28223 else
28224 got = rs6000_got_sym ();
28225 tmp1 = tmp2 = dest;
28226 if (!fromprolog)
28228 tmp1 = gen_reg_rtx (Pmode);
28229 tmp2 = gen_reg_rtx (Pmode);
28231 emit_insn (gen_load_toc_v4_PIC_1 (lab));
28232 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
28233 emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
28234 emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
28236 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
28238 emit_insn (gen_load_toc_v4_pic_si ());
28239 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
28241 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
28243 char buf[30];
28244 rtx temp0 = (fromprolog
28245 ? gen_rtx_REG (Pmode, 0)
28246 : gen_reg_rtx (Pmode));
28248 if (fromprolog)
28250 rtx symF, symL;
28252 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
28253 symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
28255 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
28256 symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
28258 emit_insn (gen_load_toc_v4_PIC_1 (symF));
28259 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
28260 emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
28262 else
28264 rtx tocsym, lab;
28266 tocsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
28267 need_toc_init = 1;
28268 lab = gen_label_rtx ();
28269 emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
28270 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
28271 if (TARGET_LINK_STACK)
28272 emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
28273 emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
28275 emit_insn (gen_addsi3 (dest, temp0, dest));
28277 else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
28279 /* This is for AIX code running in non-PIC ELF32. */
28280 rtx realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
28282 need_toc_init = 1;
28283 emit_insn (gen_elf_high (dest, realsym));
28284 emit_insn (gen_elf_low (dest, dest, realsym));
28286 else
28288 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
28290 if (TARGET_32BIT)
28291 emit_insn (gen_load_toc_aix_si (dest));
28292 else
28293 emit_insn (gen_load_toc_aix_di (dest));
28297 /* Emit instructions to restore the link register after determining where
28298 its value has been stored. */
28300 void
28301 rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
28303 rs6000_stack_t *info = rs6000_stack_info ();
28304 rtx operands[2];
28306 operands[0] = source;
28307 operands[1] = scratch;
28309 if (info->lr_save_p)
28311 rtx frame_rtx = stack_pointer_rtx;
28312 HOST_WIDE_INT sp_offset = 0;
28313 rtx tmp;
28315 if (frame_pointer_needed
28316 || cfun->calls_alloca
28317 || info->total_size > 32767)
28319 tmp = gen_frame_mem (Pmode, frame_rtx);
28320 emit_move_insn (operands[1], tmp);
28321 frame_rtx = operands[1];
28323 else if (info->push_p)
28324 sp_offset = info->total_size;
28326 tmp = plus_constant (Pmode, frame_rtx,
28327 info->lr_save_offset + sp_offset);
28328 tmp = gen_frame_mem (Pmode, tmp);
28329 emit_move_insn (tmp, operands[0]);
28331 else
28332 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
28334 /* Freeze lr_save_p. We've just emitted rtl that depends on the
28335 state of lr_save_p so any change from here on would be a bug. In
28336 particular, stop rs6000_ra_ever_killed from considering the SET
28337 of lr we may have added just above. */
28338 cfun->machine->lr_save_state = info->lr_save_p + 1;
28341 static GTY(()) alias_set_type set = -1;
28343 alias_set_type
28344 get_TOC_alias_set (void)
28346 if (set == -1)
28347 set = new_alias_set ();
28348 return set;
28351 /* This returns nonzero if the current function uses the TOC. This is
28352 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
28353 is generated by the ABI_V4 load_toc_* patterns. */
28354 #if TARGET_ELF
28355 static int
28356 uses_TOC (void)
28358 rtx_insn *insn;
28360 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
28361 if (INSN_P (insn))
28363 rtx pat = PATTERN (insn);
28364 int i;
28366 if (GET_CODE (pat) == PARALLEL)
28367 for (i = 0; i < XVECLEN (pat, 0); i++)
28369 rtx sub = XVECEXP (pat, 0, i);
28370 if (GET_CODE (sub) == USE)
28372 sub = XEXP (sub, 0);
28373 if (GET_CODE (sub) == UNSPEC
28374 && XINT (sub, 1) == UNSPEC_TOC)
28375 return 1;
28379 return 0;
28381 #endif
28384 create_TOC_reference (rtx symbol, rtx largetoc_reg)
28386 rtx tocrel, tocreg, hi;
28388 if (TARGET_DEBUG_ADDR)
28390 if (GET_CODE (symbol) == SYMBOL_REF)
28391 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
28392 XSTR (symbol, 0));
28393 else
28395 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
28396 GET_RTX_NAME (GET_CODE (symbol)));
28397 debug_rtx (symbol);
28401 if (!can_create_pseudo_p ())
28402 df_set_regs_ever_live (TOC_REGISTER, true);
28404 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
28405 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
28406 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
28407 return tocrel;
28409 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
28410 if (largetoc_reg != NULL)
28412 emit_move_insn (largetoc_reg, hi);
28413 hi = largetoc_reg;
28415 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
28418 /* Issue assembly directives that create a reference to the given DWARF
28419 FRAME_TABLE_LABEL from the current function section. */
28420 void
28421 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
28423 fprintf (asm_out_file, "\t.ref %s\n",
28424 (* targetm.strip_name_encoding) (frame_table_label));
28427 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
28428 and the change to the stack pointer. */
28430 static void
28431 rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
28433 rtvec p;
28434 int i;
28435 rtx regs[3];
28437 i = 0;
28438 regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
28439 if (hard_frame_needed)
28440 regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
28441 if (!(REGNO (fp) == STACK_POINTER_REGNUM
28442 || (hard_frame_needed
28443 && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
28444 regs[i++] = fp;
28446 p = rtvec_alloc (i);
28447 while (--i >= 0)
28449 rtx mem = gen_frame_mem (BLKmode, regs[i]);
28450 RTVEC_ELT (p, i) = gen_rtx_SET (mem, const0_rtx);
28453 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
28456 /* Emit the correct code for allocating stack space, as insns.
28457 If COPY_REG, make sure a copy of the old frame is left there.
28458 The generated code may use hard register 0 as a temporary. */
28460 static rtx_insn *
28461 rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
28463 rtx_insn *insn;
28464 rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
28465 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
28466 rtx todec = gen_int_mode (-size, Pmode);
28467 rtx par, set, mem;
28469 if (INTVAL (todec) != -size)
28471 warning (0, "stack frame too large");
28472 emit_insn (gen_trap ());
28473 return 0;
28476 if (crtl->limit_stack)
28478 if (REG_P (stack_limit_rtx)
28479 && REGNO (stack_limit_rtx) > 1
28480 && REGNO (stack_limit_rtx) <= 31)
28482 emit_insn (gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size)));
28483 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
28484 const0_rtx));
28486 else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF
28487 && TARGET_32BIT
28488 && DEFAULT_ABI == ABI_V4
28489 && !flag_pic)
28491 rtx toload = gen_rtx_CONST (VOIDmode,
28492 gen_rtx_PLUS (Pmode,
28493 stack_limit_rtx,
28494 GEN_INT (size)));
28496 emit_insn (gen_elf_high (tmp_reg, toload));
28497 emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
28498 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
28499 const0_rtx));
28501 else
28502 warning (0, "stack limit expression is not supported");
28505 if (copy_reg)
28507 if (copy_off != 0)
28508 emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
28509 else
28510 emit_move_insn (copy_reg, stack_reg);
28513 if (size > 32767)
28515 /* Need a note here so that try_split doesn't get confused. */
28516 if (get_last_insn () == NULL_RTX)
28517 emit_note (NOTE_INSN_DELETED);
28518 insn = emit_move_insn (tmp_reg, todec);
28519 try_split (PATTERN (insn), insn, 0);
28520 todec = tmp_reg;
28523 insn = emit_insn (TARGET_32BIT
28524 ? gen_movsi_update_stack (stack_reg, stack_reg,
28525 todec, stack_reg)
28526 : gen_movdi_di_update_stack (stack_reg, stack_reg,
28527 todec, stack_reg));
28528 /* Since we didn't use gen_frame_mem to generate the MEM, grab
28529 it now and set the alias set/attributes. The above gen_*_update
28530 calls will generate a PARALLEL with the MEM set being the first
28531 operation. */
28532 par = PATTERN (insn);
28533 gcc_assert (GET_CODE (par) == PARALLEL);
28534 set = XVECEXP (par, 0, 0);
28535 gcc_assert (GET_CODE (set) == SET);
28536 mem = SET_DEST (set);
28537 gcc_assert (MEM_P (mem));
28538 MEM_NOTRAP_P (mem) = 1;
28539 set_mem_alias_set (mem, get_frame_alias_set ());
28541 RTX_FRAME_RELATED_P (insn) = 1;
28542 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
28543 gen_rtx_SET (stack_reg, gen_rtx_PLUS (Pmode, stack_reg,
28544 GEN_INT (-size))));
28545 return insn;
28548 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
28550 #if PROBE_INTERVAL > 32768
28551 #error Cannot use indexed addressing mode for stack probing
28552 #endif
28554 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
28555 inclusive. These are offsets from the current stack pointer. */
28557 static void
28558 rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
28560 /* See if we have a constant small number of probes to generate. If so,
28561 that's the easy case. */
28562 if (first + size <= 32768)
28564 HOST_WIDE_INT i;
28566 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
28567 it exceeds SIZE. If only one probe is needed, this will not
28568 generate any code. Then probe at FIRST + SIZE. */
28569 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
28570 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
28571 -(first + i)));
28573 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
28574 -(first + size)));
28577 /* Otherwise, do the same as above, but in a loop. Note that we must be
28578 extra careful with variables wrapping around because we might be at
28579 the very top (or the very bottom) of the address space and we have
28580 to be able to handle this case properly; in particular, we use an
28581 equality test for the loop condition. */
28582 else
28584 HOST_WIDE_INT rounded_size;
28585 rtx r12 = gen_rtx_REG (Pmode, 12);
28586 rtx r0 = gen_rtx_REG (Pmode, 0);
28588 /* Sanity check for the addressing mode we're going to use. */
28589 gcc_assert (first <= 32768);
28591 /* Step 1: round SIZE to the previous multiple of the interval. */
28593 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
28596 /* Step 2: compute initial and final value of the loop counter. */
28598 /* TEST_ADDR = SP + FIRST. */
28599 emit_insn (gen_rtx_SET (r12, plus_constant (Pmode, stack_pointer_rtx,
28600 -first)));
28602 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
28603 if (rounded_size > 32768)
28605 emit_move_insn (r0, GEN_INT (-rounded_size));
28606 emit_insn (gen_rtx_SET (r0, gen_rtx_PLUS (Pmode, r12, r0)));
28608 else
28609 emit_insn (gen_rtx_SET (r0, plus_constant (Pmode, r12,
28610 -rounded_size)));
28613 /* Step 3: the loop
28617 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
28618 probe at TEST_ADDR
28620 while (TEST_ADDR != LAST_ADDR)
28622 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
28623 until it is equal to ROUNDED_SIZE. */
28625 if (TARGET_64BIT)
28626 emit_insn (gen_probe_stack_rangedi (r12, r12, r0));
28627 else
28628 emit_insn (gen_probe_stack_rangesi (r12, r12, r0));
28631 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
28632 that SIZE is equal to ROUNDED_SIZE. */
28634 if (size != rounded_size)
28635 emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
28639 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
28640 absolute addresses. */
28642 const char *
28643 output_probe_stack_range (rtx reg1, rtx reg2)
28645 static int labelno = 0;
28646 char loop_lab[32];
28647 rtx xops[2];
28649 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
28651 /* Loop. */
28652 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
28654 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
28655 xops[0] = reg1;
28656 xops[1] = GEN_INT (-PROBE_INTERVAL);
28657 output_asm_insn ("addi %0,%0,%1", xops);
28659 /* Probe at TEST_ADDR. */
28660 xops[1] = gen_rtx_REG (Pmode, 0);
28661 output_asm_insn ("stw %1,0(%0)", xops);
28663 /* Test if TEST_ADDR == LAST_ADDR. */
28664 xops[1] = reg2;
28665 if (TARGET_64BIT)
28666 output_asm_insn ("cmpd 0,%0,%1", xops);
28667 else
28668 output_asm_insn ("cmpw 0,%0,%1", xops);
28670 /* Branch. */
28671 fputs ("\tbne 0,", asm_out_file);
28672 assemble_name_raw (asm_out_file, loop_lab);
28673 fputc ('\n', asm_out_file);
28675 return "";
28678 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
28679 with (plus:P (reg 1) VAL), and with REG2 replaced with REPL2 if REG2
28680 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
28681 deduce these equivalences by itself so it wasn't necessary to hold
28682 its hand so much. Don't be tempted to always supply d2_f_d_e with
28683 the actual cfa register, ie. r31 when we are using a hard frame
28684 pointer. That fails when saving regs off r1, and sched moves the
28685 r31 setup past the reg saves. */
28687 static rtx_insn *
28688 rs6000_frame_related (rtx_insn *insn, rtx reg, HOST_WIDE_INT val,
28689 rtx reg2, rtx repl2)
28691 rtx repl;
28693 if (REGNO (reg) == STACK_POINTER_REGNUM)
28695 gcc_checking_assert (val == 0);
28696 repl = NULL_RTX;
28698 else
28699 repl = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
28700 GEN_INT (val));
28702 rtx pat = PATTERN (insn);
28703 if (!repl && !reg2)
28705 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
28706 if (GET_CODE (pat) == PARALLEL)
28707 for (int i = 0; i < XVECLEN (pat, 0); i++)
28708 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
28710 rtx set = XVECEXP (pat, 0, i);
28712 /* If this PARALLEL has been emitted for out-of-line
28713 register save functions, or store multiple, then omit
28714 eh_frame info for any user-defined global regs. If
28715 eh_frame info is supplied, frame unwinding will
28716 restore a user reg. */
28717 if (!REG_P (SET_SRC (set))
28718 || !fixed_reg_p (REGNO (SET_SRC (set))))
28719 RTX_FRAME_RELATED_P (set) = 1;
28721 RTX_FRAME_RELATED_P (insn) = 1;
28722 return insn;
28725 /* We expect that 'pat' is either a SET or a PARALLEL containing
28726 SETs (and possibly other stuff). In a PARALLEL, all the SETs
28727 are important so they all have to be marked RTX_FRAME_RELATED_P.
28728 Call simplify_replace_rtx on the SETs rather than the whole insn
28729 so as to leave the other stuff alone (for example USE of r12). */
28731 set_used_flags (pat);
28732 if (GET_CODE (pat) == SET)
28734 if (repl)
28735 pat = simplify_replace_rtx (pat, reg, repl);
28736 if (reg2)
28737 pat = simplify_replace_rtx (pat, reg2, repl2);
28739 else if (GET_CODE (pat) == PARALLEL)
28741 pat = shallow_copy_rtx (pat);
28742 XVEC (pat, 0) = shallow_copy_rtvec (XVEC (pat, 0));
28744 for (int i = 0; i < XVECLEN (pat, 0); i++)
28745 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
28747 rtx set = XVECEXP (pat, 0, i);
28749 if (repl)
28750 set = simplify_replace_rtx (set, reg, repl);
28751 if (reg2)
28752 set = simplify_replace_rtx (set, reg2, repl2);
28753 XVECEXP (pat, 0, i) = set;
28755 /* Omit eh_frame info for any user-defined global regs. */
28756 if (!REG_P (SET_SRC (set))
28757 || !fixed_reg_p (REGNO (SET_SRC (set))))
28758 RTX_FRAME_RELATED_P (set) = 1;
28761 else
28762 gcc_unreachable ();
28764 RTX_FRAME_RELATED_P (insn) = 1;
28765 add_reg_note (insn, REG_FRAME_RELATED_EXPR, copy_rtx_if_shared (pat));
28767 return insn;
28770 /* Returns an insn that has a vrsave set operation with the
28771 appropriate CLOBBERs. */
28773 static rtx
28774 generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
28776 int nclobs, i;
28777 rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
28778 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
28780 clobs[0]
28781 = gen_rtx_SET (vrsave,
28782 gen_rtx_UNSPEC_VOLATILE (SImode,
28783 gen_rtvec (2, reg, vrsave),
28784 UNSPECV_SET_VRSAVE));
28786 nclobs = 1;
28788 /* We need to clobber the registers in the mask so the scheduler
28789 does not move sets to VRSAVE before sets of AltiVec registers.
28791 However, if the function receives nonlocal gotos, reload will set
28792 all call saved registers live. We will end up with:
28794 (set (reg 999) (mem))
28795 (parallel [ (set (reg vrsave) (unspec blah))
28796 (clobber (reg 999))])
28798 The clobber will cause the store into reg 999 to be dead, and
28799 flow will attempt to delete an epilogue insn. In this case, we
28800 need an unspec use/set of the register. */
28802 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
28803 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
28805 if (!epiloguep || call_used_regs [i])
28806 clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode,
28807 gen_rtx_REG (V4SImode, i));
28808 else
28810 rtx reg = gen_rtx_REG (V4SImode, i);
28812 clobs[nclobs++]
28813 = gen_rtx_SET (reg,
28814 gen_rtx_UNSPEC (V4SImode,
28815 gen_rtvec (1, reg), 27));
28819 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
28821 for (i = 0; i < nclobs; ++i)
28822 XVECEXP (insn, 0, i) = clobs[i];
28824 return insn;
28827 static rtx
28828 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
28830 rtx addr, mem;
28832 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
28833 mem = gen_frame_mem (GET_MODE (reg), addr);
28834 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
28837 static rtx
28838 gen_frame_load (rtx reg, rtx frame_reg, int offset)
28840 return gen_frame_set (reg, frame_reg, offset, false);
28843 static rtx
28844 gen_frame_store (rtx reg, rtx frame_reg, int offset)
28846 return gen_frame_set (reg, frame_reg, offset, true);
28849 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
28850 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
28852 static rtx_insn *
28853 emit_frame_save (rtx frame_reg, machine_mode mode,
28854 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
28856 rtx reg;
28858 /* Some cases that need register indexed addressing. */
28859 gcc_checking_assert (!((TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
28860 || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
28861 || (TARGET_E500_DOUBLE && mode == DFmode)
28862 || (TARGET_SPE_ABI
28863 && SPE_VECTOR_MODE (mode)
28864 && !SPE_CONST_OFFSET_OK (offset))));
28866 reg = gen_rtx_REG (mode, regno);
28867 rtx_insn *insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
28868 return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
28869 NULL_RTX, NULL_RTX);
28872 /* Emit an offset memory reference suitable for a frame store, while
28873 converting to a valid addressing mode. */
28875 static rtx
28876 gen_frame_mem_offset (machine_mode mode, rtx reg, int offset)
28878 rtx int_rtx, offset_rtx;
28880 int_rtx = GEN_INT (offset);
28882 if ((TARGET_SPE_ABI && SPE_VECTOR_MODE (mode) && !SPE_CONST_OFFSET_OK (offset))
28883 || (TARGET_E500_DOUBLE && mode == DFmode))
28885 offset_rtx = gen_rtx_REG (Pmode, FIXED_SCRATCH);
28886 emit_move_insn (offset_rtx, int_rtx);
28888 else
28889 offset_rtx = int_rtx;
28891 return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, offset_rtx));
28894 #ifndef TARGET_FIX_AND_CONTINUE
28895 #define TARGET_FIX_AND_CONTINUE 0
28896 #endif
28898 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
28899 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
28900 #define LAST_SAVRES_REGISTER 31
28901 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
28903 enum {
28904 SAVRES_LR = 0x1,
28905 SAVRES_SAVE = 0x2,
28906 SAVRES_REG = 0x0c,
28907 SAVRES_GPR = 0,
28908 SAVRES_FPR = 4,
28909 SAVRES_VR = 8
28912 static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
28914 /* Temporary holding space for an out-of-line register save/restore
28915 routine name. */
28916 static char savres_routine_name[30];
28918 /* Return the name for an out-of-line register save/restore routine.
28919 We are saving/restoring GPRs if GPR is true. */
28921 static char *
28922 rs6000_savres_routine_name (rs6000_stack_t *info, int regno, int sel)
28924 const char *prefix = "";
28925 const char *suffix = "";
28927 /* Different targets are supposed to define
28928 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
28929 routine name could be defined with:
28931 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
28933 This is a nice idea in practice, but in reality, things are
28934 complicated in several ways:
28936 - ELF targets have save/restore routines for GPRs.
28938 - SPE targets use different prefixes for 32/64-bit registers, and
28939 neither of them fit neatly in the FOO_{PREFIX,SUFFIX} regimen.
28941 - PPC64 ELF targets have routines for save/restore of GPRs that
28942 differ in what they do with the link register, so having a set
28943 prefix doesn't work. (We only use one of the save routines at
28944 the moment, though.)
28946 - PPC32 elf targets have "exit" versions of the restore routines
28947 that restore the link register and can save some extra space.
28948 These require an extra suffix. (There are also "tail" versions
28949 of the restore routines and "GOT" versions of the save routines,
28950 but we don't generate those at present. Same problems apply,
28951 though.)
28953 We deal with all this by synthesizing our own prefix/suffix and
28954 using that for the simple sprintf call shown above. */
28955 if (TARGET_SPE)
28957 /* No floating point saves on the SPE. */
28958 gcc_assert ((sel & SAVRES_REG) == SAVRES_GPR);
28960 if ((sel & SAVRES_SAVE))
28961 prefix = info->spe_64bit_regs_used ? "_save64gpr_" : "_save32gpr_";
28962 else
28963 prefix = info->spe_64bit_regs_used ? "_rest64gpr_" : "_rest32gpr_";
28965 if ((sel & SAVRES_LR))
28966 suffix = "_x";
28968 else if (DEFAULT_ABI == ABI_V4)
28970 if (TARGET_64BIT)
28971 goto aix_names;
28973 if ((sel & SAVRES_REG) == SAVRES_GPR)
28974 prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
28975 else if ((sel & SAVRES_REG) == SAVRES_FPR)
28976 prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
28977 else if ((sel & SAVRES_REG) == SAVRES_VR)
28978 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
28979 else
28980 abort ();
28982 if ((sel & SAVRES_LR))
28983 suffix = "_x";
28985 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28987 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
28988 /* No out-of-line save/restore routines for GPRs on AIX. */
28989 gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
28990 #endif
28992 aix_names:
28993 if ((sel & SAVRES_REG) == SAVRES_GPR)
28994 prefix = ((sel & SAVRES_SAVE)
28995 ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
28996 : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
28997 else if ((sel & SAVRES_REG) == SAVRES_FPR)
28999 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
29000 if ((sel & SAVRES_LR))
29001 prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
29002 else
29003 #endif
29005 prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
29006 suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
29009 else if ((sel & SAVRES_REG) == SAVRES_VR)
29010 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
29011 else
29012 abort ();
29015 if (DEFAULT_ABI == ABI_DARWIN)
29017 /* The Darwin approach is (slightly) different, in order to be
29018 compatible with code generated by the system toolchain. There is a
29019 single symbol for the start of save sequence, and the code here
29020 embeds an offset into that code on the basis of the first register
29021 to be saved. */
29022 prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
29023 if ((sel & SAVRES_REG) == SAVRES_GPR)
29024 sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
29025 ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
29026 (regno - 13) * 4, prefix, regno);
29027 else if ((sel & SAVRES_REG) == SAVRES_FPR)
29028 sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
29029 (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
29030 else if ((sel & SAVRES_REG) == SAVRES_VR)
29031 sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
29032 (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
29033 else
29034 abort ();
29036 else
29037 sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
29039 return savres_routine_name;
29042 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
29043 We are saving/restoring GPRs if GPR is true. */
29045 static rtx
29046 rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
29048 int regno = ((sel & SAVRES_REG) == SAVRES_GPR
29049 ? info->first_gp_reg_save
29050 : (sel & SAVRES_REG) == SAVRES_FPR
29051 ? info->first_fp_reg_save - 32
29052 : (sel & SAVRES_REG) == SAVRES_VR
29053 ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
29054 : -1);
29055 rtx sym;
29056 int select = sel;
29058 /* On the SPE, we never have any FPRs, but we do have 32/64-bit
29059 versions of the gpr routines. */
29060 if (TARGET_SPE_ABI && (sel & SAVRES_REG) == SAVRES_GPR
29061 && info->spe_64bit_regs_used)
29062 select ^= SAVRES_FPR ^ SAVRES_GPR;
29064 /* Don't generate bogus routine names. */
29065 gcc_assert (FIRST_SAVRES_REGISTER <= regno
29066 && regno <= LAST_SAVRES_REGISTER
29067 && select >= 0 && select <= 12);
29069 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
29071 if (sym == NULL)
29073 char *name;
29075 name = rs6000_savres_routine_name (info, regno, sel);
29077 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
29078 = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
29079 SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
29082 return sym;
29085 /* Emit a sequence of insns, including a stack tie if needed, for
29086 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
29087 reset the stack pointer, but move the base of the frame into
29088 reg UPDT_REGNO for use by out-of-line register restore routines. */
29090 static rtx
29091 rs6000_emit_stack_reset (rs6000_stack_t *info,
29092 rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
29093 unsigned updt_regno)
29095 /* If there is nothing to do, don't do anything. */
29096 if (frame_off == 0 && REGNO (frame_reg_rtx) == updt_regno)
29097 return NULL_RTX;
29099 rtx updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
29101 /* This blockage is needed so that sched doesn't decide to move
29102 the sp change before the register restores. */
29103 if (DEFAULT_ABI == ABI_V4
29104 || (TARGET_SPE_ABI
29105 && info->spe_64bit_regs_used != 0
29106 && info->first_gp_reg_save != 32))
29107 return emit_insn (gen_stack_restore_tie (updt_reg_rtx, frame_reg_rtx,
29108 GEN_INT (frame_off)));
29110 /* If we are restoring registers out-of-line, we will be using the
29111 "exit" variants of the restore routines, which will reset the
29112 stack for us. But we do need to point updt_reg into the
29113 right place for those routines. */
29114 if (frame_off != 0)
29115 return emit_insn (gen_add3_insn (updt_reg_rtx,
29116 frame_reg_rtx, GEN_INT (frame_off)));
29117 else
29118 return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
29120 return NULL_RTX;
29123 /* Return the register number used as a pointer by out-of-line
29124 save/restore functions. */
29126 static inline unsigned
29127 ptr_regno_for_savres (int sel)
29129 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29130 return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
29131 return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
29134 /* Construct a parallel rtx describing the effect of a call to an
29135 out-of-line register save/restore routine, and emit the insn
29136 or jump_insn as appropriate. */
29138 static rtx_insn *
29139 rs6000_emit_savres_rtx (rs6000_stack_t *info,
29140 rtx frame_reg_rtx, int save_area_offset, int lr_offset,
29141 machine_mode reg_mode, int sel)
29143 int i;
29144 int offset, start_reg, end_reg, n_regs, use_reg;
29145 int reg_size = GET_MODE_SIZE (reg_mode);
29146 rtx sym;
29147 rtvec p;
29148 rtx par;
29149 rtx_insn *insn;
29151 offset = 0;
29152 start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
29153 ? info->first_gp_reg_save
29154 : (sel & SAVRES_REG) == SAVRES_FPR
29155 ? info->first_fp_reg_save
29156 : (sel & SAVRES_REG) == SAVRES_VR
29157 ? info->first_altivec_reg_save
29158 : -1);
29159 end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
29160 ? 32
29161 : (sel & SAVRES_REG) == SAVRES_FPR
29162 ? 64
29163 : (sel & SAVRES_REG) == SAVRES_VR
29164 ? LAST_ALTIVEC_REGNO + 1
29165 : -1);
29166 n_regs = end_reg - start_reg;
29167 p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
29168 + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
29169 + n_regs);
29171 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
29172 RTVEC_ELT (p, offset++) = ret_rtx;
29174 RTVEC_ELT (p, offset++)
29175 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
29177 sym = rs6000_savres_routine_sym (info, sel);
29178 RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
29180 use_reg = ptr_regno_for_savres (sel);
29181 if ((sel & SAVRES_REG) == SAVRES_VR)
29183 /* Vector regs are saved/restored using [reg+reg] addressing. */
29184 RTVEC_ELT (p, offset++)
29185 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg));
29186 RTVEC_ELT (p, offset++)
29187 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
29189 else
29190 RTVEC_ELT (p, offset++)
29191 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
29193 for (i = 0; i < end_reg - start_reg; i++)
29194 RTVEC_ELT (p, i + offset)
29195 = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
29196 frame_reg_rtx, save_area_offset + reg_size * i,
29197 (sel & SAVRES_SAVE) != 0);
29199 if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
29200 RTVEC_ELT (p, i + offset)
29201 = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
29203 par = gen_rtx_PARALLEL (VOIDmode, p);
29205 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
29207 insn = emit_jump_insn (par);
29208 JUMP_LABEL (insn) = ret_rtx;
29210 else
29211 insn = emit_insn (par);
29212 return insn;
29215 /* Emit code to store CR fields that need to be saved into REG. */
29217 static void
29218 rs6000_emit_move_from_cr (rtx reg)
29220 /* Only the ELFv2 ABI allows storing only selected fields. */
29221 if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
29223 int i, cr_reg[8], count = 0;
29225 /* Collect CR fields that must be saved. */
29226 for (i = 0; i < 8; i++)
29227 if (save_reg_p (CR0_REGNO + i))
29228 cr_reg[count++] = i;
29230 /* If it's just a single one, use mfcrf. */
29231 if (count == 1)
29233 rtvec p = rtvec_alloc (1);
29234 rtvec r = rtvec_alloc (2);
29235 RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
29236 RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
29237 RTVEC_ELT (p, 0)
29238 = gen_rtx_SET (reg,
29239 gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
29241 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
29242 return;
29245 /* ??? It might be better to handle count == 2 / 3 cases here
29246 as well, using logical operations to combine the values. */
29249 emit_insn (gen_movesi_from_cr (reg));
29252 /* Return whether the split-stack arg pointer (r12) is used. */
29254 static bool
29255 split_stack_arg_pointer_used_p (void)
29257 /* If the pseudo holding the arg pointer is no longer a pseudo,
29258 then the arg pointer is used. */
29259 if (cfun->machine->split_stack_arg_pointer != NULL_RTX
29260 && (!REG_P (cfun->machine->split_stack_arg_pointer)
29261 || (REGNO (cfun->machine->split_stack_arg_pointer)
29262 < FIRST_PSEUDO_REGISTER)))
29263 return true;
29265 /* Unfortunately we also need to do some code scanning, since
29266 r12 may have been substituted for the pseudo. */
29267 rtx_insn *insn;
29268 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
29269 FOR_BB_INSNS (bb, insn)
29270 if (NONDEBUG_INSN_P (insn))
29272 /* A call destroys r12. */
29273 if (CALL_P (insn))
29274 return false;
29276 df_ref use;
29277 FOR_EACH_INSN_USE (use, insn)
29279 rtx x = DF_REF_REG (use);
29280 if (REG_P (x) && REGNO (x) == 12)
29281 return true;
29283 df_ref def;
29284 FOR_EACH_INSN_DEF (def, insn)
29286 rtx x = DF_REF_REG (def);
29287 if (REG_P (x) && REGNO (x) == 12)
29288 return false;
29291 return bitmap_bit_p (DF_LR_OUT (bb), 12);
29294 /* Return whether we need to emit an ELFv2 global entry point prologue. */
29296 static bool
29297 rs6000_global_entry_point_needed_p (void)
29299 /* Only needed for the ELFv2 ABI. */
29300 if (DEFAULT_ABI != ABI_ELFv2)
29301 return false;
29303 /* With -msingle-pic-base, we assume the whole program shares the same
29304 TOC, so no global entry point prologues are needed anywhere. */
29305 if (TARGET_SINGLE_PIC_BASE)
29306 return false;
29308 /* Ensure we have a global entry point for thunks. ??? We could
29309 avoid that if the target routine doesn't need a global entry point,
29310 but we do not know whether this is the case at this point. */
29311 if (cfun->is_thunk)
29312 return true;
29314 /* For regular functions, rs6000_emit_prologue sets this flag if the
29315 routine ever uses the TOC pointer. */
29316 return cfun->machine->r2_setup_needed;
29319 /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
29320 static sbitmap
29321 rs6000_get_separate_components (void)
29323 rs6000_stack_t *info = rs6000_stack_info ();
29325 if (WORLD_SAVE_P (info))
29326 return NULL;
29328 if (TARGET_SPE_ABI)
29329 return NULL;
29331 gcc_assert (!(info->savres_strategy & SAVE_MULTIPLE)
29332 && !(info->savres_strategy & REST_MULTIPLE));
29334 /* Component 0 is the save/restore of LR (done via GPR0).
29335 Components 13..31 are the save/restore of GPR13..GPR31.
29336 Components 46..63 are the save/restore of FPR14..FPR31. */
29338 cfun->machine->n_components = 64;
29340 sbitmap components = sbitmap_alloc (cfun->machine->n_components);
29341 bitmap_clear (components);
29343 int reg_size = TARGET_32BIT ? 4 : 8;
29344 int fp_reg_size = 8;
29346 /* The GPRs we need saved to the frame. */
29347 if ((info->savres_strategy & SAVE_INLINE_GPRS)
29348 && (info->savres_strategy & REST_INLINE_GPRS))
29350 int offset = info->gp_save_offset;
29351 if (info->push_p)
29352 offset += info->total_size;
29354 for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++)
29356 if (IN_RANGE (offset, -0x8000, 0x7fff)
29357 && rs6000_reg_live_or_pic_offset_p (regno))
29358 bitmap_set_bit (components, regno);
29360 offset += reg_size;
29364 /* Don't mess with the hard frame pointer. */
29365 if (frame_pointer_needed)
29366 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
29368 /* Don't mess with the fixed TOC register. */
29369 if ((TARGET_TOC && TARGET_MINIMAL_TOC)
29370 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
29371 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
29372 bitmap_clear_bit (components, RS6000_PIC_OFFSET_TABLE_REGNUM);
29374 /* The FPRs we need saved to the frame. */
29375 if ((info->savres_strategy & SAVE_INLINE_FPRS)
29376 && (info->savres_strategy & REST_INLINE_FPRS))
29378 int offset = info->fp_save_offset;
29379 if (info->push_p)
29380 offset += info->total_size;
29382 for (unsigned regno = info->first_fp_reg_save; regno < 64; regno++)
29384 if (IN_RANGE (offset, -0x8000, 0x7fff) && save_reg_p (regno))
29385 bitmap_set_bit (components, regno);
29387 offset += fp_reg_size;
29391 /* Optimize LR save and restore if we can. This is component 0. Any
29392 out-of-line register save/restore routines need LR. */
29393 if (info->lr_save_p
29394 && !(flag_pic && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
29395 && (info->savres_strategy & SAVE_INLINE_GPRS)
29396 && (info->savres_strategy & REST_INLINE_GPRS)
29397 && (info->savres_strategy & SAVE_INLINE_FPRS)
29398 && (info->savres_strategy & REST_INLINE_FPRS)
29399 && (info->savres_strategy & SAVE_INLINE_VRS)
29400 && (info->savres_strategy & REST_INLINE_VRS))
29402 int offset = info->lr_save_offset;
29403 if (info->push_p)
29404 offset += info->total_size;
29405 if (IN_RANGE (offset, -0x8000, 0x7fff))
29406 bitmap_set_bit (components, 0);
29409 return components;
29412 /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
29413 static sbitmap
29414 rs6000_components_for_bb (basic_block bb)
29416 rs6000_stack_t *info = rs6000_stack_info ();
29418 bitmap in = DF_LIVE_IN (bb);
29419 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
29420 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
29422 sbitmap components = sbitmap_alloc (cfun->machine->n_components);
29423 bitmap_clear (components);
29425 /* A register is used in a bb if it is in the IN, GEN, or KILL sets. */
29427 /* GPRs. */
29428 for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++)
29429 if (bitmap_bit_p (in, regno)
29430 || bitmap_bit_p (gen, regno)
29431 || bitmap_bit_p (kill, regno))
29432 bitmap_set_bit (components, regno);
29434 /* FPRs. */
29435 for (unsigned regno = info->first_fp_reg_save; regno < 64; regno++)
29436 if (bitmap_bit_p (in, regno)
29437 || bitmap_bit_p (gen, regno)
29438 || bitmap_bit_p (kill, regno))
29439 bitmap_set_bit (components, regno);
29441 /* The link register. */
29442 if (bitmap_bit_p (in, LR_REGNO)
29443 || bitmap_bit_p (gen, LR_REGNO)
29444 || bitmap_bit_p (kill, LR_REGNO))
29445 bitmap_set_bit (components, 0);
29447 return components;
29450 /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */
29451 static void
29452 rs6000_disqualify_components (sbitmap components, edge e,
29453 sbitmap edge_components, bool /*is_prologue*/)
29455 /* Our LR pro/epilogue code moves LR via R0, so R0 had better not be
29456 live where we want to place that code. */
29457 if (bitmap_bit_p (edge_components, 0)
29458 && bitmap_bit_p (DF_LIVE_IN (e->dest), 0))
29460 if (dump_file)
29461 fprintf (dump_file, "Disqualifying LR because GPR0 is live "
29462 "on entry to bb %d\n", e->dest->index);
29463 bitmap_clear_bit (components, 0);
29467 /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
29468 static void
29469 rs6000_emit_prologue_components (sbitmap components)
29471 rs6000_stack_t *info = rs6000_stack_info ();
29472 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
29473 ? HARD_FRAME_POINTER_REGNUM
29474 : STACK_POINTER_REGNUM);
29476 machine_mode reg_mode = Pmode;
29477 int reg_size = TARGET_32BIT ? 4 : 8;
29478 machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
29479 ? DFmode : SFmode;
29480 int fp_reg_size = 8;
29482 /* Prologue for LR. */
29483 if (bitmap_bit_p (components, 0))
29485 rtx reg = gen_rtx_REG (reg_mode, 0);
29486 rtx_insn *insn = emit_move_insn (reg, gen_rtx_REG (reg_mode, LR_REGNO));
29487 RTX_FRAME_RELATED_P (insn) = 1;
29488 add_reg_note (insn, REG_CFA_REGISTER, NULL);
29490 int offset = info->lr_save_offset;
29491 if (info->push_p)
29492 offset += info->total_size;
29494 insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
29495 RTX_FRAME_RELATED_P (insn) = 1;
29496 rtx lr = gen_rtx_REG (reg_mode, LR_REGNO);
29497 rtx mem = copy_rtx (SET_DEST (single_set (insn)));
29498 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, lr));
29501 /* Prologue for the GPRs. */
29502 int offset = info->gp_save_offset;
29503 if (info->push_p)
29504 offset += info->total_size;
29506 for (int i = info->first_gp_reg_save; i < 32; i++)
29508 if (bitmap_bit_p (components, i))
29510 rtx reg = gen_rtx_REG (reg_mode, i);
29511 rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
29512 RTX_FRAME_RELATED_P (insn) = 1;
29513 rtx set = copy_rtx (single_set (insn));
29514 add_reg_note (insn, REG_CFA_OFFSET, set);
29517 offset += reg_size;
29520 /* Prologue for the FPRs. */
29521 offset = info->fp_save_offset;
29522 if (info->push_p)
29523 offset += info->total_size;
29525 for (int i = info->first_fp_reg_save; i < 64; i++)
29527 if (bitmap_bit_p (components, i))
29529 rtx reg = gen_rtx_REG (fp_reg_mode, i);
29530 rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
29531 RTX_FRAME_RELATED_P (insn) = 1;
29532 rtx set = copy_rtx (single_set (insn));
29533 add_reg_note (insn, REG_CFA_OFFSET, set);
29536 offset += fp_reg_size;
29540 /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
29541 static void
29542 rs6000_emit_epilogue_components (sbitmap components)
29544 rs6000_stack_t *info = rs6000_stack_info ();
29545 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
29546 ? HARD_FRAME_POINTER_REGNUM
29547 : STACK_POINTER_REGNUM);
29549 machine_mode reg_mode = Pmode;
29550 int reg_size = TARGET_32BIT ? 4 : 8;
29552 machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
29553 ? DFmode : SFmode;
29554 int fp_reg_size = 8;
29556 /* Epilogue for the FPRs. */
29557 int offset = info->fp_save_offset;
29558 if (info->push_p)
29559 offset += info->total_size;
29561 for (int i = info->first_fp_reg_save; i < 64; i++)
29563 if (bitmap_bit_p (components, i))
29565 rtx reg = gen_rtx_REG (fp_reg_mode, i);
29566 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
29567 RTX_FRAME_RELATED_P (insn) = 1;
29568 add_reg_note (insn, REG_CFA_RESTORE, reg);
29571 offset += fp_reg_size;
29574 /* Epilogue for the GPRs. */
29575 offset = info->gp_save_offset;
29576 if (info->push_p)
29577 offset += info->total_size;
29579 for (int i = info->first_gp_reg_save; i < 32; i++)
29581 if (bitmap_bit_p (components, i))
29583 rtx reg = gen_rtx_REG (reg_mode, i);
29584 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
29585 RTX_FRAME_RELATED_P (insn) = 1;
29586 add_reg_note (insn, REG_CFA_RESTORE, reg);
29589 offset += reg_size;
29592 /* Epilogue for LR. */
29593 if (bitmap_bit_p (components, 0))
29595 int offset = info->lr_save_offset;
29596 if (info->push_p)
29597 offset += info->total_size;
29599 rtx reg = gen_rtx_REG (reg_mode, 0);
29600 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
29602 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
29603 insn = emit_move_insn (lr, reg);
29604 RTX_FRAME_RELATED_P (insn) = 1;
29605 add_reg_note (insn, REG_CFA_RESTORE, lr);
29609 /* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
29610 static void
29611 rs6000_set_handled_components (sbitmap components)
29613 rs6000_stack_t *info = rs6000_stack_info ();
29615 for (int i = info->first_gp_reg_save; i < 32; i++)
29616 if (bitmap_bit_p (components, i))
29617 cfun->machine->gpr_is_wrapped_separately[i] = true;
29619 for (int i = info->first_fp_reg_save; i < 64; i++)
29620 if (bitmap_bit_p (components, i))
29621 cfun->machine->fpr_is_wrapped_separately[i - 32] = true;
29623 if (bitmap_bit_p (components, 0))
29624 cfun->machine->lr_is_wrapped_separately = true;
29627 /* Emit function prologue as insns. */
29629 void
29630 rs6000_emit_prologue (void)
29632 rs6000_stack_t *info = rs6000_stack_info ();
29633 machine_mode reg_mode = Pmode;
29634 int reg_size = TARGET_32BIT ? 4 : 8;
29635 machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
29636 ? DFmode : SFmode;
29637 int fp_reg_size = 8;
29638 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
29639 rtx frame_reg_rtx = sp_reg_rtx;
29640 unsigned int cr_save_regno;
29641 rtx cr_save_rtx = NULL_RTX;
29642 rtx_insn *insn;
29643 int strategy;
29644 int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
29645 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
29646 && call_used_regs[STATIC_CHAIN_REGNUM]);
29647 int using_split_stack = (flag_split_stack
29648 && (lookup_attribute ("no_split_stack",
29649 DECL_ATTRIBUTES (cfun->decl))
29650 == NULL));
29652 /* Offset to top of frame for frame_reg and sp respectively. */
29653 HOST_WIDE_INT frame_off = 0;
29654 HOST_WIDE_INT sp_off = 0;
29655 /* sp_adjust is the stack adjusting instruction, tracked so that the
29656 insn setting up the split-stack arg pointer can be emitted just
29657 prior to it, when r12 is not used here for other purposes. */
29658 rtx_insn *sp_adjust = 0;
29660 #if CHECKING_P
29661 /* Track and check usage of r0, r11, r12. */
29662 int reg_inuse = using_static_chain_p ? 1 << 11 : 0;
29663 #define START_USE(R) do \
29665 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
29666 reg_inuse |= 1 << (R); \
29667 } while (0)
29668 #define END_USE(R) do \
29670 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
29671 reg_inuse &= ~(1 << (R)); \
29672 } while (0)
29673 #define NOT_INUSE(R) do \
29675 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
29676 } while (0)
29677 #else
29678 #define START_USE(R) do {} while (0)
29679 #define END_USE(R) do {} while (0)
29680 #define NOT_INUSE(R) do {} while (0)
29681 #endif
29683 if (DEFAULT_ABI == ABI_ELFv2
29684 && !TARGET_SINGLE_PIC_BASE)
29686 cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
29688 /* With -mminimal-toc we may generate an extra use of r2 below. */
29689 if (TARGET_TOC && TARGET_MINIMAL_TOC
29690 && !constant_pool_empty_p ())
29691 cfun->machine->r2_setup_needed = true;
29695 if (flag_stack_usage_info)
29696 current_function_static_stack_size = info->total_size;
29698 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
29700 HOST_WIDE_INT size = info->total_size;
29702 if (crtl->is_leaf && !cfun->calls_alloca)
29704 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
29705 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT,
29706 size - STACK_CHECK_PROTECT);
29708 else if (size > 0)
29709 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
29712 if (TARGET_FIX_AND_CONTINUE)
29714 /* gdb on darwin arranges to forward a function from the old
29715 address by modifying the first 5 instructions of the function
29716 to branch to the overriding function. This is necessary to
29717 permit function pointers that point to the old function to
29718 actually forward to the new function. */
29719 emit_insn (gen_nop ());
29720 emit_insn (gen_nop ());
29721 emit_insn (gen_nop ());
29722 emit_insn (gen_nop ());
29723 emit_insn (gen_nop ());
29726 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
29728 reg_mode = V2SImode;
29729 reg_size = 8;
29732 /* Handle world saves specially here. */
29733 if (WORLD_SAVE_P (info))
29735 int i, j, sz;
29736 rtx treg;
29737 rtvec p;
29738 rtx reg0;
29740 /* save_world expects lr in r0. */
29741 reg0 = gen_rtx_REG (Pmode, 0);
29742 if (info->lr_save_p)
29744 insn = emit_move_insn (reg0,
29745 gen_rtx_REG (Pmode, LR_REGNO));
29746 RTX_FRAME_RELATED_P (insn) = 1;
29749 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
29750 assumptions about the offsets of various bits of the stack
29751 frame. */
29752 gcc_assert (info->gp_save_offset == -220
29753 && info->fp_save_offset == -144
29754 && info->lr_save_offset == 8
29755 && info->cr_save_offset == 4
29756 && info->push_p
29757 && info->lr_save_p
29758 && (!crtl->calls_eh_return
29759 || info->ehrd_offset == -432)
29760 && info->vrsave_save_offset == -224
29761 && info->altivec_save_offset == -416);
29763 treg = gen_rtx_REG (SImode, 11);
29764 emit_move_insn (treg, GEN_INT (-info->total_size));
29766 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
29767 in R11. It also clobbers R12, so beware! */
29769 /* Preserve CR2 for save_world prologues */
29770 sz = 5;
29771 sz += 32 - info->first_gp_reg_save;
29772 sz += 64 - info->first_fp_reg_save;
29773 sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
29774 p = rtvec_alloc (sz);
29775 j = 0;
29776 RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
29777 gen_rtx_REG (SImode,
29778 LR_REGNO));
29779 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
29780 gen_rtx_SYMBOL_REF (Pmode,
29781 "*save_world"));
29782 /* We do floats first so that the instruction pattern matches
29783 properly. */
29784 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
29785 RTVEC_ELT (p, j++)
29786 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
29787 ? DFmode : SFmode,
29788 info->first_fp_reg_save + i),
29789 frame_reg_rtx,
29790 info->fp_save_offset + frame_off + 8 * i);
29791 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
29792 RTVEC_ELT (p, j++)
29793 = gen_frame_store (gen_rtx_REG (V4SImode,
29794 info->first_altivec_reg_save + i),
29795 frame_reg_rtx,
29796 info->altivec_save_offset + frame_off + 16 * i);
29797 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
29798 RTVEC_ELT (p, j++)
29799 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
29800 frame_reg_rtx,
29801 info->gp_save_offset + frame_off + reg_size * i);
29803 /* CR register traditionally saved as CR2. */
29804 RTVEC_ELT (p, j++)
29805 = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO),
29806 frame_reg_rtx, info->cr_save_offset + frame_off);
29807 /* Explain about use of R0. */
29808 if (info->lr_save_p)
29809 RTVEC_ELT (p, j++)
29810 = gen_frame_store (reg0,
29811 frame_reg_rtx, info->lr_save_offset + frame_off);
29812 /* Explain what happens to the stack pointer. */
29814 rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
29815 RTVEC_ELT (p, j++) = gen_rtx_SET (sp_reg_rtx, newval);
29818 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
29819 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
29820 treg, GEN_INT (-info->total_size));
29821 sp_off = frame_off = info->total_size;
29824 strategy = info->savres_strategy;
29826 /* For V.4, update stack before we do any saving and set back pointer. */
29827 if (! WORLD_SAVE_P (info)
29828 && info->push_p
29829 && (DEFAULT_ABI == ABI_V4
29830 || crtl->calls_eh_return))
29832 bool need_r11 = (TARGET_SPE
29833 ? (!(strategy & SAVE_INLINE_GPRS)
29834 && info->spe_64bit_regs_used == 0)
29835 : (!(strategy & SAVE_INLINE_FPRS)
29836 || !(strategy & SAVE_INLINE_GPRS)
29837 || !(strategy & SAVE_INLINE_VRS)));
29838 int ptr_regno = -1;
29839 rtx ptr_reg = NULL_RTX;
29840 int ptr_off = 0;
29842 if (info->total_size < 32767)
29843 frame_off = info->total_size;
29844 else if (need_r11)
29845 ptr_regno = 11;
29846 else if (info->cr_save_p
29847 || info->lr_save_p
29848 || info->first_fp_reg_save < 64
29849 || info->first_gp_reg_save < 32
29850 || info->altivec_size != 0
29851 || info->vrsave_size != 0
29852 || crtl->calls_eh_return)
29853 ptr_regno = 12;
29854 else
29856 /* The prologue won't be saving any regs so there is no need
29857 to set up a frame register to access any frame save area.
29858 We also won't be using frame_off anywhere below, but set
29859 the correct value anyway to protect against future
29860 changes to this function. */
29861 frame_off = info->total_size;
29863 if (ptr_regno != -1)
29865 /* Set up the frame offset to that needed by the first
29866 out-of-line save function. */
29867 START_USE (ptr_regno);
29868 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
29869 frame_reg_rtx = ptr_reg;
29870 if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0)
29871 gcc_checking_assert (info->fp_save_offset + info->fp_size == 0);
29872 else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32)
29873 ptr_off = info->gp_save_offset + info->gp_size;
29874 else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0)
29875 ptr_off = info->altivec_save_offset + info->altivec_size;
29876 frame_off = -ptr_off;
29878 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
29879 ptr_reg, ptr_off);
29880 if (REGNO (frame_reg_rtx) == 12)
29881 sp_adjust = 0;
29882 sp_off = info->total_size;
29883 if (frame_reg_rtx != sp_reg_rtx)
29884 rs6000_emit_stack_tie (frame_reg_rtx, false);
29887 /* If we use the link register, get it into r0. */
29888 if (!WORLD_SAVE_P (info) && info->lr_save_p
29889 && !cfun->machine->lr_is_wrapped_separately)
29891 rtx addr, reg, mem;
29893 reg = gen_rtx_REG (Pmode, 0);
29894 START_USE (0);
29895 insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
29896 RTX_FRAME_RELATED_P (insn) = 1;
29898 if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
29899 | SAVE_NOINLINE_FPRS_SAVES_LR)))
29901 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
29902 GEN_INT (info->lr_save_offset + frame_off));
29903 mem = gen_rtx_MEM (Pmode, addr);
29904 /* This should not be of rs6000_sr_alias_set, because of
29905 __builtin_return_address. */
29907 insn = emit_move_insn (mem, reg);
29908 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
29909 NULL_RTX, NULL_RTX);
29910 END_USE (0);
29914 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
29915 r12 will be needed by out-of-line gpr restore. */
29916 cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29917 && !(strategy & (SAVE_INLINE_GPRS
29918 | SAVE_NOINLINE_GPRS_SAVES_LR))
29919 ? 11 : 12);
29920 if (!WORLD_SAVE_P (info)
29921 && info->cr_save_p
29922 && REGNO (frame_reg_rtx) != cr_save_regno
29923 && !(using_static_chain_p && cr_save_regno == 11)
29924 && !(using_split_stack && cr_save_regno == 12 && sp_adjust))
29926 cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
29927 START_USE (cr_save_regno);
29928 rs6000_emit_move_from_cr (cr_save_rtx);
29931 /* Do any required saving of fpr's. If only one or two to save, do
29932 it ourselves. Otherwise, call function. */
29933 if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
29935 int offset = info->fp_save_offset + frame_off;
29936 for (int i = info->first_fp_reg_save; i < 64; i++)
29938 if (save_reg_p (i)
29939 && !cfun->machine->fpr_is_wrapped_separately[i - 32])
29940 emit_frame_save (frame_reg_rtx, fp_reg_mode, i, offset,
29941 sp_off - frame_off);
29943 offset += fp_reg_size;
29946 else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
29948 bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
29949 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
29950 unsigned ptr_regno = ptr_regno_for_savres (sel);
29951 rtx ptr_reg = frame_reg_rtx;
29953 if (REGNO (frame_reg_rtx) == ptr_regno)
29954 gcc_checking_assert (frame_off == 0);
29955 else
29957 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
29958 NOT_INUSE (ptr_regno);
29959 emit_insn (gen_add3_insn (ptr_reg,
29960 frame_reg_rtx, GEN_INT (frame_off)));
29962 insn = rs6000_emit_savres_rtx (info, ptr_reg,
29963 info->fp_save_offset,
29964 info->lr_save_offset,
29965 DFmode, sel);
29966 rs6000_frame_related (insn, ptr_reg, sp_off,
29967 NULL_RTX, NULL_RTX);
29968 if (lr)
29969 END_USE (0);
29972 /* Save GPRs. This is done as a PARALLEL if we are using
29973 the store-multiple instructions. */
29974 if (!WORLD_SAVE_P (info)
29975 && TARGET_SPE_ABI
29976 && info->spe_64bit_regs_used != 0
29977 && info->first_gp_reg_save != 32)
29979 int i;
29980 rtx spe_save_area_ptr;
29981 HOST_WIDE_INT save_off;
29982 int ool_adjust = 0;
29984 /* Determine whether we can address all of the registers that need
29985 to be saved with an offset from frame_reg_rtx that fits in
29986 the small const field for SPE memory instructions. */
29987 int spe_regs_addressable
29988 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
29989 + reg_size * (32 - info->first_gp_reg_save - 1))
29990 && (strategy & SAVE_INLINE_GPRS));
29992 if (spe_regs_addressable)
29994 spe_save_area_ptr = frame_reg_rtx;
29995 save_off = frame_off;
29997 else
29999 /* Make r11 point to the start of the SPE save area. We need
30000 to be careful here if r11 is holding the static chain. If
30001 it is, then temporarily save it in r0. */
30002 HOST_WIDE_INT offset;
30004 if (!(strategy & SAVE_INLINE_GPRS))
30005 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
30006 offset = info->spe_gp_save_offset + frame_off - ool_adjust;
30007 spe_save_area_ptr = gen_rtx_REG (Pmode, 11);
30008 save_off = frame_off - offset;
30010 if (using_static_chain_p)
30012 rtx r0 = gen_rtx_REG (Pmode, 0);
30014 START_USE (0);
30015 gcc_assert (info->first_gp_reg_save > 11);
30017 emit_move_insn (r0, spe_save_area_ptr);
30019 else if (REGNO (frame_reg_rtx) != 11)
30020 START_USE (11);
30022 emit_insn (gen_addsi3 (spe_save_area_ptr,
30023 frame_reg_rtx, GEN_INT (offset)));
30024 if (!using_static_chain_p && REGNO (frame_reg_rtx) == 11)
30025 frame_off = -info->spe_gp_save_offset + ool_adjust;
30028 if ((strategy & SAVE_INLINE_GPRS))
30030 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
30031 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
30032 emit_frame_save (spe_save_area_ptr, reg_mode,
30033 info->first_gp_reg_save + i,
30034 (info->spe_gp_save_offset + save_off
30035 + reg_size * i),
30036 sp_off - save_off);
30038 else
30040 insn = rs6000_emit_savres_rtx (info, spe_save_area_ptr,
30041 info->spe_gp_save_offset + save_off,
30042 0, reg_mode,
30043 SAVRES_SAVE | SAVRES_GPR);
30045 rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off,
30046 NULL_RTX, NULL_RTX);
30049 /* Move the static chain pointer back. */
30050 if (!spe_regs_addressable)
30052 if (using_static_chain_p)
30054 emit_move_insn (spe_save_area_ptr, gen_rtx_REG (Pmode, 0));
30055 END_USE (0);
30057 else if (REGNO (frame_reg_rtx) != 11)
30058 END_USE (11);
30061 else if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS))
30063 bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0;
30064 int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0);
30065 unsigned ptr_regno = ptr_regno_for_savres (sel);
30066 rtx ptr_reg = frame_reg_rtx;
30067 bool ptr_set_up = REGNO (ptr_reg) == ptr_regno;
30068 int end_save = info->gp_save_offset + info->gp_size;
30069 int ptr_off;
30071 if (ptr_regno == 12)
30072 sp_adjust = 0;
30073 if (!ptr_set_up)
30074 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
30076 /* Need to adjust r11 (r12) if we saved any FPRs. */
30077 if (end_save + frame_off != 0)
30079 rtx offset = GEN_INT (end_save + frame_off);
30081 if (ptr_set_up)
30082 frame_off = -end_save;
30083 else
30084 NOT_INUSE (ptr_regno);
30085 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
30087 else if (!ptr_set_up)
30089 NOT_INUSE (ptr_regno);
30090 emit_move_insn (ptr_reg, frame_reg_rtx);
30092 ptr_off = -end_save;
30093 insn = rs6000_emit_savres_rtx (info, ptr_reg,
30094 info->gp_save_offset + ptr_off,
30095 info->lr_save_offset + ptr_off,
30096 reg_mode, sel);
30097 rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
30098 NULL_RTX, NULL_RTX);
30099 if (lr)
30100 END_USE (0);
30102 else if (!WORLD_SAVE_P (info) && (strategy & SAVE_MULTIPLE))
30104 rtvec p;
30105 int i;
30106 p = rtvec_alloc (32 - info->first_gp_reg_save);
30107 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
30108 RTVEC_ELT (p, i)
30109 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
30110 frame_reg_rtx,
30111 info->gp_save_offset + frame_off + reg_size * i);
30112 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
30113 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
30114 NULL_RTX, NULL_RTX);
30116 else if (!WORLD_SAVE_P (info))
30118 int offset = info->gp_save_offset + frame_off;
30119 for (int i = info->first_gp_reg_save; i < 32; i++)
30121 if (rs6000_reg_live_or_pic_offset_p (i)
30122 && !cfun->machine->gpr_is_wrapped_separately[i])
30123 emit_frame_save (frame_reg_rtx, reg_mode, i, offset,
30124 sp_off - frame_off);
30126 offset += reg_size;
30130 if (crtl->calls_eh_return)
30132 unsigned int i;
30133 rtvec p;
30135 for (i = 0; ; ++i)
30137 unsigned int regno = EH_RETURN_DATA_REGNO (i);
30138 if (regno == INVALID_REGNUM)
30139 break;
30142 p = rtvec_alloc (i);
30144 for (i = 0; ; ++i)
30146 unsigned int regno = EH_RETURN_DATA_REGNO (i);
30147 if (regno == INVALID_REGNUM)
30148 break;
30150 rtx set
30151 = gen_frame_store (gen_rtx_REG (reg_mode, regno),
30152 sp_reg_rtx,
30153 info->ehrd_offset + sp_off + reg_size * (int) i);
30154 RTVEC_ELT (p, i) = set;
30155 RTX_FRAME_RELATED_P (set) = 1;
30158 insn = emit_insn (gen_blockage ());
30159 RTX_FRAME_RELATED_P (insn) = 1;
30160 add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p));
30163 /* In AIX ABI we need to make sure r2 is really saved. */
30164 if (TARGET_AIX && crtl->calls_eh_return)
30166 rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
30167 rtx join_insn, note;
30168 rtx_insn *save_insn;
30169 long toc_restore_insn;
30171 tmp_reg = gen_rtx_REG (Pmode, 11);
30172 tmp_reg_si = gen_rtx_REG (SImode, 11);
30173 if (using_static_chain_p)
30175 START_USE (0);
30176 emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
30178 else
30179 START_USE (11);
30180 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
30181 /* Peek at instruction to which this function returns. If it's
30182 restoring r2, then we know we've already saved r2. We can't
30183 unconditionally save r2 because the value we have will already
30184 be updated if we arrived at this function via a plt call or
30185 toc adjusting stub. */
30186 emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
30187 toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
30188 + RS6000_TOC_SAVE_SLOT);
30189 hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
30190 emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
30191 compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
30192 validate_condition_mode (EQ, CCUNSmode);
30193 lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
30194 emit_insn (gen_rtx_SET (compare_result,
30195 gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
30196 toc_save_done = gen_label_rtx ();
30197 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
30198 gen_rtx_EQ (VOIDmode, compare_result,
30199 const0_rtx),
30200 gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
30201 pc_rtx);
30202 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
30203 JUMP_LABEL (jump) = toc_save_done;
30204 LABEL_NUSES (toc_save_done) += 1;
30206 save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
30207 TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
30208 sp_off - frame_off);
30210 emit_label (toc_save_done);
30212 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
30213 have a CFG that has different saves along different paths.
30214 Move the note to a dummy blockage insn, which describes that
30215 R2 is unconditionally saved after the label. */
30216 /* ??? An alternate representation might be a special insn pattern
30217 containing both the branch and the store. That might let the
30218 code that minimizes the number of DW_CFA_advance opcodes better
30219 freedom in placing the annotations. */
30220 note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL);
30221 if (note)
30222 remove_note (save_insn, note);
30223 else
30224 note = alloc_reg_note (REG_FRAME_RELATED_EXPR,
30225 copy_rtx (PATTERN (save_insn)), NULL_RTX);
30226 RTX_FRAME_RELATED_P (save_insn) = 0;
30228 join_insn = emit_insn (gen_blockage ());
30229 REG_NOTES (join_insn) = note;
30230 RTX_FRAME_RELATED_P (join_insn) = 1;
30232 if (using_static_chain_p)
30234 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
30235 END_USE (0);
30237 else
30238 END_USE (11);
30241 /* Save CR if we use any that must be preserved. */
30242 if (!WORLD_SAVE_P (info) && info->cr_save_p)
30244 rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
30245 GEN_INT (info->cr_save_offset + frame_off));
30246 rtx mem = gen_frame_mem (SImode, addr);
30248 /* If we didn't copy cr before, do so now using r0. */
30249 if (cr_save_rtx == NULL_RTX)
30251 START_USE (0);
30252 cr_save_rtx = gen_rtx_REG (SImode, 0);
30253 rs6000_emit_move_from_cr (cr_save_rtx);
30256 /* Saving CR requires a two-instruction sequence: one instruction
30257 to move the CR to a general-purpose register, and a second
30258 instruction that stores the GPR to memory.
30260 We do not emit any DWARF CFI records for the first of these,
30261 because we cannot properly represent the fact that CR is saved in
30262 a register. One reason is that we cannot express that multiple
30263 CR fields are saved; another reason is that on 64-bit, the size
30264 of the CR register in DWARF (4 bytes) differs from the size of
30265 a general-purpose register.
30267 This means if any intervening instruction were to clobber one of
30268 the call-saved CR fields, we'd have incorrect CFI. To prevent
30269 this from happening, we mark the store to memory as a use of
30270 those CR fields, which prevents any such instruction from being
30271 scheduled in between the two instructions. */
30272 rtx crsave_v[9];
30273 int n_crsave = 0;
30274 int i;
30276 crsave_v[n_crsave++] = gen_rtx_SET (mem, cr_save_rtx);
30277 for (i = 0; i < 8; i++)
30278 if (save_reg_p (CR0_REGNO + i))
30279 crsave_v[n_crsave++]
30280 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
30282 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
30283 gen_rtvec_v (n_crsave, crsave_v)));
30284 END_USE (REGNO (cr_save_rtx));
30286 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
30287 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
30288 so we need to construct a frame expression manually. */
30289 RTX_FRAME_RELATED_P (insn) = 1;
30291 /* Update address to be stack-pointer relative, like
30292 rs6000_frame_related would do. */
30293 addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
30294 GEN_INT (info->cr_save_offset + sp_off));
30295 mem = gen_frame_mem (SImode, addr);
30297 if (DEFAULT_ABI == ABI_ELFv2)
30299 /* In the ELFv2 ABI we generate separate CFI records for each
30300 CR field that was actually saved. They all point to the
30301 same 32-bit stack slot. */
30302 rtx crframe[8];
30303 int n_crframe = 0;
30305 for (i = 0; i < 8; i++)
30306 if (save_reg_p (CR0_REGNO + i))
30308 crframe[n_crframe]
30309 = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR0_REGNO + i));
30311 RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
30312 n_crframe++;
30315 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
30316 gen_rtx_PARALLEL (VOIDmode,
30317 gen_rtvec_v (n_crframe, crframe)));
30319 else
30321 /* In other ABIs, by convention, we use a single CR regnum to
30322 represent the fact that all call-saved CR fields are saved.
30323 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
30324 rtx set = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR2_REGNO));
30325 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
30329 /* In the ELFv2 ABI we need to save all call-saved CR fields into
30330 *separate* slots if the routine calls __builtin_eh_return, so
30331 that they can be independently restored by the unwinder. */
30332 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
30334 int i, cr_off = info->ehcr_offset;
30335 rtx crsave;
30337 /* ??? We might get better performance by using multiple mfocrf
30338 instructions. */
30339 crsave = gen_rtx_REG (SImode, 0);
30340 emit_insn (gen_movesi_from_cr (crsave));
30342 for (i = 0; i < 8; i++)
30343 if (!call_used_regs[CR0_REGNO + i])
30345 rtvec p = rtvec_alloc (2);
30346 RTVEC_ELT (p, 0)
30347 = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
30348 RTVEC_ELT (p, 1)
30349 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
30351 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
30353 RTX_FRAME_RELATED_P (insn) = 1;
30354 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
30355 gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
30356 sp_reg_rtx, cr_off + sp_off));
30358 cr_off += reg_size;
30362 /* Update stack and set back pointer unless this is V.4,
30363 for which it was done previously. */
30364 if (!WORLD_SAVE_P (info) && info->push_p
30365 && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
30367 rtx ptr_reg = NULL;
30368 int ptr_off = 0;
30370 /* If saving altivec regs we need to be able to address all save
30371 locations using a 16-bit offset. */
30372 if ((strategy & SAVE_INLINE_VRS) == 0
30373 || (info->altivec_size != 0
30374 && (info->altivec_save_offset + info->altivec_size - 16
30375 + info->total_size - frame_off) > 32767)
30376 || (info->vrsave_size != 0
30377 && (info->vrsave_save_offset
30378 + info->total_size - frame_off) > 32767))
30380 int sel = SAVRES_SAVE | SAVRES_VR;
30381 unsigned ptr_regno = ptr_regno_for_savres (sel);
30383 if (using_static_chain_p
30384 && ptr_regno == STATIC_CHAIN_REGNUM)
30385 ptr_regno = 12;
30386 if (REGNO (frame_reg_rtx) != ptr_regno)
30387 START_USE (ptr_regno);
30388 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
30389 frame_reg_rtx = ptr_reg;
30390 ptr_off = info->altivec_save_offset + info->altivec_size;
30391 frame_off = -ptr_off;
30393 else if (REGNO (frame_reg_rtx) == 1)
30394 frame_off = info->total_size;
30395 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
30396 ptr_reg, ptr_off);
30397 if (REGNO (frame_reg_rtx) == 12)
30398 sp_adjust = 0;
30399 sp_off = info->total_size;
30400 if (frame_reg_rtx != sp_reg_rtx)
30401 rs6000_emit_stack_tie (frame_reg_rtx, false);
30404 /* Set frame pointer, if needed. */
30405 if (frame_pointer_needed)
30407 insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
30408 sp_reg_rtx);
30409 RTX_FRAME_RELATED_P (insn) = 1;
30412 /* Save AltiVec registers if needed. Save here because the red zone does
30413 not always include AltiVec registers. */
30414 if (!WORLD_SAVE_P (info)
30415 && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0)
30417 int end_save = info->altivec_save_offset + info->altivec_size;
30418 int ptr_off;
30419 /* Oddly, the vector save/restore functions point r0 at the end
30420 of the save area, then use r11 or r12 to load offsets for
30421 [reg+reg] addressing. */
30422 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
30423 int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR);
30424 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
30426 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
30427 NOT_INUSE (0);
30428 if (scratch_regno == 12)
30429 sp_adjust = 0;
30430 if (end_save + frame_off != 0)
30432 rtx offset = GEN_INT (end_save + frame_off);
30434 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
30436 else
30437 emit_move_insn (ptr_reg, frame_reg_rtx);
30439 ptr_off = -end_save;
30440 insn = rs6000_emit_savres_rtx (info, scratch_reg,
30441 info->altivec_save_offset + ptr_off,
30442 0, V4SImode, SAVRES_SAVE | SAVRES_VR);
30443 rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
30444 NULL_RTX, NULL_RTX);
30445 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
30447 /* The oddity mentioned above clobbered our frame reg. */
30448 emit_move_insn (frame_reg_rtx, ptr_reg);
30449 frame_off = ptr_off;
30452 else if (!WORLD_SAVE_P (info)
30453 && info->altivec_size != 0)
30455 int i;
30457 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
30458 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
30460 rtx areg, savereg, mem;
30461 HOST_WIDE_INT offset;
30463 offset = (info->altivec_save_offset + frame_off
30464 + 16 * (i - info->first_altivec_reg_save));
30466 savereg = gen_rtx_REG (V4SImode, i);
30468 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
30470 mem = gen_frame_mem (V4SImode,
30471 gen_rtx_PLUS (Pmode, frame_reg_rtx,
30472 GEN_INT (offset)));
30473 insn = emit_insn (gen_rtx_SET (mem, savereg));
30474 areg = NULL_RTX;
30476 else
30478 NOT_INUSE (0);
30479 areg = gen_rtx_REG (Pmode, 0);
30480 emit_move_insn (areg, GEN_INT (offset));
30482 /* AltiVec addressing mode is [reg+reg]. */
30483 mem = gen_frame_mem (V4SImode,
30484 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
30486 /* Rather than emitting a generic move, force use of the stvx
30487 instruction, which we always want on ISA 2.07 (power8) systems.
30488 In particular we don't want xxpermdi/stxvd2x for little
30489 endian. */
30490 insn = emit_insn (gen_altivec_stvx_v4si_internal (mem, savereg));
30493 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
30494 areg, GEN_INT (offset));
30498 /* VRSAVE is a bit vector representing which AltiVec registers
30499 are used. The OS uses this to determine which vector
30500 registers to save on a context switch. We need to save
30501 VRSAVE on the stack frame, add whatever AltiVec registers we
30502 used in this function, and do the corresponding magic in the
30503 epilogue. */
30505 if (!WORLD_SAVE_P (info)
30506 && info->vrsave_size != 0)
30508 rtx reg, vrsave;
30509 int offset;
30510 int save_regno;
30512 /* Get VRSAVE onto a GPR. Note that ABI_V4 and ABI_DARWIN might
30513 be using r12 as frame_reg_rtx and r11 as the static chain
30514 pointer for nested functions. */
30515 save_regno = 12;
30516 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
30517 && !using_static_chain_p)
30518 save_regno = 11;
30519 else if (using_split_stack || REGNO (frame_reg_rtx) == 12)
30521 save_regno = 11;
30522 if (using_static_chain_p)
30523 save_regno = 0;
30526 NOT_INUSE (save_regno);
30527 reg = gen_rtx_REG (SImode, save_regno);
30528 vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
30529 if (TARGET_MACHO)
30530 emit_insn (gen_get_vrsave_internal (reg));
30531 else
30532 emit_insn (gen_rtx_SET (reg, vrsave));
30534 /* Save VRSAVE. */
30535 offset = info->vrsave_save_offset + frame_off;
30536 insn = emit_insn (gen_frame_store (reg, frame_reg_rtx, offset));
30538 /* Include the registers in the mask. */
30539 emit_insn (gen_iorsi3 (reg, reg, GEN_INT ((int) info->vrsave_mask)));
30541 insn = emit_insn (generate_set_vrsave (reg, info, 0));
30544 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
30545 if (!TARGET_SINGLE_PIC_BASE
30546 && ((TARGET_TOC && TARGET_MINIMAL_TOC
30547 && !constant_pool_empty_p ())
30548 || (DEFAULT_ABI == ABI_V4
30549 && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
30550 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
30552 /* If emit_load_toc_table will use the link register, we need to save
30553 it. We use R12 for this purpose because emit_load_toc_table
30554 can use register 0. This allows us to use a plain 'blr' to return
30555 from the procedure more often. */
30556 int save_LR_around_toc_setup = (TARGET_ELF
30557 && DEFAULT_ABI == ABI_V4
30558 && flag_pic
30559 && ! info->lr_save_p
30560 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0);
30561 if (save_LR_around_toc_setup)
30563 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
30564 rtx tmp = gen_rtx_REG (Pmode, 12);
30566 sp_adjust = 0;
30567 insn = emit_move_insn (tmp, lr);
30568 RTX_FRAME_RELATED_P (insn) = 1;
30570 rs6000_emit_load_toc_table (TRUE);
30572 insn = emit_move_insn (lr, tmp);
30573 add_reg_note (insn, REG_CFA_RESTORE, lr);
30574 RTX_FRAME_RELATED_P (insn) = 1;
30576 else
30577 rs6000_emit_load_toc_table (TRUE);
30580 #if TARGET_MACHO
30581 if (!TARGET_SINGLE_PIC_BASE
30582 && DEFAULT_ABI == ABI_DARWIN
30583 && flag_pic && crtl->uses_pic_offset_table)
30585 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
30586 rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
30588 /* Save and restore LR locally around this call (in R0). */
30589 if (!info->lr_save_p)
30590 emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
30592 emit_insn (gen_load_macho_picbase (src));
30594 emit_move_insn (gen_rtx_REG (Pmode,
30595 RS6000_PIC_OFFSET_TABLE_REGNUM),
30596 lr);
30598 if (!info->lr_save_p)
30599 emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
30601 #endif
30603 /* If we need to, save the TOC register after doing the stack setup.
30604 Do not emit eh frame info for this save. The unwinder wants info,
30605 conceptually attached to instructions in this function, about
30606 register values in the caller of this function. This R2 may have
30607 already been changed from the value in the caller.
30608 We don't attempt to write accurate DWARF EH frame info for R2
30609 because code emitted by gcc for a (non-pointer) function call
30610 doesn't save and restore R2. Instead, R2 is managed out-of-line
30611 by a linker generated plt call stub when the function resides in
30612 a shared library. This behavior is costly to describe in DWARF,
30613 both in terms of the size of DWARF info and the time taken in the
30614 unwinder to interpret it. R2 changes, apart from the
30615 calls_eh_return case earlier in this function, are handled by
30616 linux-unwind.h frob_update_context. */
30617 if (rs6000_save_toc_in_prologue_p ())
30619 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
30620 emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
30623 if (using_split_stack && split_stack_arg_pointer_used_p ())
30625 /* Set up the arg pointer (r12) for -fsplit-stack code. If
30626 __morestack was called, it left the arg pointer to the old
30627 stack in r29. Otherwise, the arg pointer is the top of the
30628 current frame. */
30629 cfun->machine->split_stack_argp_used = true;
30630 if (sp_adjust)
30632 rtx r12 = gen_rtx_REG (Pmode, 12);
30633 rtx set_r12 = gen_rtx_SET (r12, sp_reg_rtx);
30634 emit_insn_before (set_r12, sp_adjust);
30636 else if (frame_off != 0 || REGNO (frame_reg_rtx) != 12)
30638 rtx r12 = gen_rtx_REG (Pmode, 12);
30639 if (frame_off == 0)
30640 emit_move_insn (r12, frame_reg_rtx);
30641 else
30642 emit_insn (gen_add3_insn (r12, frame_reg_rtx, GEN_INT (frame_off)));
30644 if (info->push_p)
30646 rtx r12 = gen_rtx_REG (Pmode, 12);
30647 rtx r29 = gen_rtx_REG (Pmode, 29);
30648 rtx cr7 = gen_rtx_REG (CCUNSmode, CR7_REGNO);
30649 rtx not_more = gen_label_rtx ();
30650 rtx jump;
30652 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
30653 gen_rtx_GEU (VOIDmode, cr7, const0_rtx),
30654 gen_rtx_LABEL_REF (VOIDmode, not_more),
30655 pc_rtx);
30656 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
30657 JUMP_LABEL (jump) = not_more;
30658 LABEL_NUSES (not_more) += 1;
30659 emit_move_insn (r12, r29);
30660 emit_label (not_more);
30665 /* Output .extern statements for the save/restore routines we use. */
30667 static void
30668 rs6000_output_savres_externs (FILE *file)
30670 rs6000_stack_t *info = rs6000_stack_info ();
30672 if (TARGET_DEBUG_STACK)
30673 debug_stack_info (info);
30675 /* Write .extern for any function we will call to save and restore
30676 fp values. */
30677 if (info->first_fp_reg_save < 64
30678 && !TARGET_MACHO
30679 && !TARGET_ELF)
30681 char *name;
30682 int regno = info->first_fp_reg_save - 32;
30684 if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
30686 bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
30687 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
30688 name = rs6000_savres_routine_name (info, regno, sel);
30689 fprintf (file, "\t.extern %s\n", name);
30691 if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
30693 bool lr = (info->savres_strategy
30694 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
30695 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
30696 name = rs6000_savres_routine_name (info, regno, sel);
30697 fprintf (file, "\t.extern %s\n", name);
30702 /* Write function prologue. */
30704 static void
30705 rs6000_output_function_prologue (FILE *file)
30707 if (!cfun->is_thunk)
30708 rs6000_output_savres_externs (file);
30710 /* ELFv2 ABI r2 setup code and local entry point. This must follow
30711 immediately after the global entry point label. */
30712 if (rs6000_global_entry_point_needed_p ())
30714 const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
30716 (*targetm.asm_out.internal_label) (file, "LCF", rs6000_pic_labelno);
30718 if (TARGET_CMODEL != CMODEL_LARGE)
30720 /* In the small and medium code models, we assume the TOC is less
30721 2 GB away from the text section, so it can be computed via the
30722 following two-instruction sequence. */
30723 char buf[256];
30725 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
30726 fprintf (file, "0:\taddis 2,12,.TOC.-");
30727 assemble_name (file, buf);
30728 fprintf (file, "@ha\n");
30729 fprintf (file, "\taddi 2,2,.TOC.-");
30730 assemble_name (file, buf);
30731 fprintf (file, "@l\n");
30733 else
30735 /* In the large code model, we allow arbitrary offsets between the
30736 TOC and the text section, so we have to load the offset from
30737 memory. The data field is emitted directly before the global
30738 entry point in rs6000_elf_declare_function_name. */
30739 char buf[256];
30741 #ifdef HAVE_AS_ENTRY_MARKERS
30742 /* If supported by the linker, emit a marker relocation. If the
30743 total code size of the final executable or shared library
30744 happens to fit into 2 GB after all, the linker will replace
30745 this code sequence with the sequence for the small or medium
30746 code model. */
30747 fprintf (file, "\t.reloc .,R_PPC64_ENTRY\n");
30748 #endif
30749 fprintf (file, "\tld 2,");
30750 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
30751 assemble_name (file, buf);
30752 fprintf (file, "-");
30753 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
30754 assemble_name (file, buf);
30755 fprintf (file, "(12)\n");
30756 fprintf (file, "\tadd 2,2,12\n");
30759 fputs ("\t.localentry\t", file);
30760 assemble_name (file, name);
30761 fputs (",.-", file);
30762 assemble_name (file, name);
30763 fputs ("\n", file);
30766 /* Output -mprofile-kernel code. This needs to be done here instead of
30767 in output_function_profile since it must go after the ELFv2 ABI
30768 local entry point. */
30769 if (TARGET_PROFILE_KERNEL && crtl->profile)
30771 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
30772 gcc_assert (!TARGET_32BIT);
30774 asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
30776 /* In the ELFv2 ABI we have no compiler stack word. It must be
30777 the resposibility of _mcount to preserve the static chain
30778 register if required. */
30779 if (DEFAULT_ABI != ABI_ELFv2
30780 && cfun->static_chain_decl != NULL)
30782 asm_fprintf (file, "\tstd %s,24(%s)\n",
30783 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
30784 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
30785 asm_fprintf (file, "\tld %s,24(%s)\n",
30786 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
30788 else
30789 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
30792 rs6000_pic_labelno++;
30795 /* -mprofile-kernel code calls mcount before the function prolog,
30796 so a profiled leaf function should stay a leaf function. */
30797 static bool
30798 rs6000_keep_leaf_when_profiled ()
30800 return TARGET_PROFILE_KERNEL;
30803 /* Non-zero if vmx regs are restored before the frame pop, zero if
30804 we restore after the pop when possible. */
30805 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
30807 /* Restoring cr is a two step process: loading a reg from the frame
30808 save, then moving the reg to cr. For ABI_V4 we must let the
30809 unwinder know that the stack location is no longer valid at or
30810 before the stack deallocation, but we can't emit a cfa_restore for
30811 cr at the stack deallocation like we do for other registers.
30812 The trouble is that it is possible for the move to cr to be
30813 scheduled after the stack deallocation. So say exactly where cr
30814 is located on each of the two insns. */
30816 static rtx
30817 load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
30819 rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
30820 rtx reg = gen_rtx_REG (SImode, regno);
30821 rtx_insn *insn = emit_move_insn (reg, mem);
30823 if (!exit_func && DEFAULT_ABI == ABI_V4)
30825 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
30826 rtx set = gen_rtx_SET (reg, cr);
30828 add_reg_note (insn, REG_CFA_REGISTER, set);
30829 RTX_FRAME_RELATED_P (insn) = 1;
30831 return reg;
30834 /* Reload CR from REG. */
30836 static void
30837 restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
30839 int count = 0;
30840 int i;
30842 if (using_mfcr_multiple)
30844 for (i = 0; i < 8; i++)
30845 if (save_reg_p (CR0_REGNO + i))
30846 count++;
30847 gcc_assert (count);
30850 if (using_mfcr_multiple && count > 1)
30852 rtx_insn *insn;
30853 rtvec p;
30854 int ndx;
30856 p = rtvec_alloc (count);
30858 ndx = 0;
30859 for (i = 0; i < 8; i++)
30860 if (save_reg_p (CR0_REGNO + i))
30862 rtvec r = rtvec_alloc (2);
30863 RTVEC_ELT (r, 0) = reg;
30864 RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
30865 RTVEC_ELT (p, ndx) =
30866 gen_rtx_SET (gen_rtx_REG (CCmode, CR0_REGNO + i),
30867 gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
30868 ndx++;
30870 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
30871 gcc_assert (ndx == count);
30873 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
30874 CR field separately. */
30875 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
30877 for (i = 0; i < 8; i++)
30878 if (save_reg_p (CR0_REGNO + i))
30879 add_reg_note (insn, REG_CFA_RESTORE,
30880 gen_rtx_REG (SImode, CR0_REGNO + i));
30882 RTX_FRAME_RELATED_P (insn) = 1;
30885 else
30886 for (i = 0; i < 8; i++)
30887 if (save_reg_p (CR0_REGNO + i))
30889 rtx insn = emit_insn (gen_movsi_to_cr_one
30890 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
30892 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
30893 CR field separately, attached to the insn that in fact
30894 restores this particular CR field. */
30895 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
30897 add_reg_note (insn, REG_CFA_RESTORE,
30898 gen_rtx_REG (SImode, CR0_REGNO + i));
30900 RTX_FRAME_RELATED_P (insn) = 1;
30904 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
30905 if (!exit_func && DEFAULT_ABI != ABI_ELFv2
30906 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
30908 rtx_insn *insn = get_last_insn ();
30909 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
30911 add_reg_note (insn, REG_CFA_RESTORE, cr);
30912 RTX_FRAME_RELATED_P (insn) = 1;
30916 /* Like cr, the move to lr instruction can be scheduled after the
30917 stack deallocation, but unlike cr, its stack frame save is still
30918 valid. So we only need to emit the cfa_restore on the correct
30919 instruction. */
30921 static void
30922 load_lr_save (int regno, rtx frame_reg_rtx, int offset)
30924 rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
30925 rtx reg = gen_rtx_REG (Pmode, regno);
30927 emit_move_insn (reg, mem);
30930 static void
30931 restore_saved_lr (int regno, bool exit_func)
30933 rtx reg = gen_rtx_REG (Pmode, regno);
30934 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
30935 rtx_insn *insn = emit_move_insn (lr, reg);
30937 if (!exit_func && flag_shrink_wrap)
30939 add_reg_note (insn, REG_CFA_RESTORE, lr);
30940 RTX_FRAME_RELATED_P (insn) = 1;
30944 static rtx
30945 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
30947 if (DEFAULT_ABI == ABI_ELFv2)
30949 int i;
30950 for (i = 0; i < 8; i++)
30951 if (save_reg_p (CR0_REGNO + i))
30953 rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
30954 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
30955 cfa_restores);
30958 else if (info->cr_save_p)
30959 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
30960 gen_rtx_REG (SImode, CR2_REGNO),
30961 cfa_restores);
30963 if (info->lr_save_p)
30964 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
30965 gen_rtx_REG (Pmode, LR_REGNO),
30966 cfa_restores);
30967 return cfa_restores;
30970 /* Return true if OFFSET from stack pointer can be clobbered by signals.
30971 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
30972 below stack pointer not cloberred by signals. */
30974 static inline bool
30975 offset_below_red_zone_p (HOST_WIDE_INT offset)
30977 return offset < (DEFAULT_ABI == ABI_V4
30979 : TARGET_32BIT ? -220 : -288);
30982 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
30984 static void
30985 emit_cfa_restores (rtx cfa_restores)
30987 rtx_insn *insn = get_last_insn ();
30988 rtx *loc = &REG_NOTES (insn);
30990 while (*loc)
30991 loc = &XEXP (*loc, 1);
30992 *loc = cfa_restores;
30993 RTX_FRAME_RELATED_P (insn) = 1;
30996 /* Emit function epilogue as insns. */
30998 void
30999 rs6000_emit_epilogue (int sibcall)
31001 rs6000_stack_t *info;
31002 int restoring_GPRs_inline;
31003 int restoring_FPRs_inline;
31004 int using_load_multiple;
31005 int using_mtcr_multiple;
31006 int use_backchain_to_restore_sp;
31007 int restore_lr;
31008 int strategy;
31009 HOST_WIDE_INT frame_off = 0;
31010 rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
31011 rtx frame_reg_rtx = sp_reg_rtx;
31012 rtx cfa_restores = NULL_RTX;
31013 rtx insn;
31014 rtx cr_save_reg = NULL_RTX;
31015 machine_mode reg_mode = Pmode;
31016 int reg_size = TARGET_32BIT ? 4 : 8;
31017 machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
31018 ? DFmode : SFmode;
31019 int fp_reg_size = 8;
31020 int i;
31021 bool exit_func;
31022 unsigned ptr_regno;
31024 info = rs6000_stack_info ();
31026 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
31028 reg_mode = V2SImode;
31029 reg_size = 8;
31032 strategy = info->savres_strategy;
31033 using_load_multiple = strategy & REST_MULTIPLE;
31034 restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
31035 restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
31036 using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601
31037 || rs6000_cpu == PROCESSOR_PPC603
31038 || rs6000_cpu == PROCESSOR_PPC750
31039 || optimize_size);
31040 /* Restore via the backchain when we have a large frame, since this
31041 is more efficient than an addis, addi pair. The second condition
31042 here will not trigger at the moment; We don't actually need a
31043 frame pointer for alloca, but the generic parts of the compiler
31044 give us one anyway. */
31045 use_backchain_to_restore_sp = (info->total_size + (info->lr_save_p
31046 ? info->lr_save_offset
31047 : 0) > 32767
31048 || (cfun->calls_alloca
31049 && !frame_pointer_needed));
31050 restore_lr = (info->lr_save_p
31051 && (restoring_FPRs_inline
31052 || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
31053 && (restoring_GPRs_inline
31054 || info->first_fp_reg_save < 64)
31055 && !cfun->machine->lr_is_wrapped_separately);
31058 if (WORLD_SAVE_P (info))
31060 int i, j;
31061 char rname[30];
31062 const char *alloc_rname;
31063 rtvec p;
31065 /* eh_rest_world_r10 will return to the location saved in the LR
31066 stack slot (which is not likely to be our caller.)
31067 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
31068 rest_world is similar, except any R10 parameter is ignored.
31069 The exception-handling stuff that was here in 2.95 is no
31070 longer necessary. */
31072 p = rtvec_alloc (9
31073 + 32 - info->first_gp_reg_save
31074 + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
31075 + 63 + 1 - info->first_fp_reg_save);
31077 strcpy (rname, ((crtl->calls_eh_return) ?
31078 "*eh_rest_world_r10" : "*rest_world"));
31079 alloc_rname = ggc_strdup (rname);
31081 j = 0;
31082 RTVEC_ELT (p, j++) = ret_rtx;
31083 RTVEC_ELT (p, j++)
31084 = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
31085 /* The instruction pattern requires a clobber here;
31086 it is shared with the restVEC helper. */
31087 RTVEC_ELT (p, j++)
31088 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
31091 /* CR register traditionally saved as CR2. */
31092 rtx reg = gen_rtx_REG (SImode, CR2_REGNO);
31093 RTVEC_ELT (p, j++)
31094 = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset);
31095 if (flag_shrink_wrap)
31097 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
31098 gen_rtx_REG (Pmode, LR_REGNO),
31099 cfa_restores);
31100 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31104 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
31106 rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
31107 RTVEC_ELT (p, j++)
31108 = gen_frame_load (reg,
31109 frame_reg_rtx, info->gp_save_offset + reg_size * i);
31110 if (flag_shrink_wrap)
31111 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31113 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
31115 rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
31116 RTVEC_ELT (p, j++)
31117 = gen_frame_load (reg,
31118 frame_reg_rtx, info->altivec_save_offset + 16 * i);
31119 if (flag_shrink_wrap)
31120 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31122 for (i = 0; info->first_fp_reg_save + i <= 63; i++)
31124 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
31125 ? DFmode : SFmode),
31126 info->first_fp_reg_save + i);
31127 RTVEC_ELT (p, j++)
31128 = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i);
31129 if (flag_shrink_wrap)
31130 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31132 RTVEC_ELT (p, j++)
31133 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
31134 RTVEC_ELT (p, j++)
31135 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
31136 RTVEC_ELT (p, j++)
31137 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
31138 RTVEC_ELT (p, j++)
31139 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
31140 RTVEC_ELT (p, j++)
31141 = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
31142 insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
31144 if (flag_shrink_wrap)
31146 REG_NOTES (insn) = cfa_restores;
31147 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
31148 RTX_FRAME_RELATED_P (insn) = 1;
31150 return;
31153 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
31154 if (info->push_p)
31155 frame_off = info->total_size;
31157 /* Restore AltiVec registers if we must do so before adjusting the
31158 stack. */
31159 if (info->altivec_size != 0
31160 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
31161 || (DEFAULT_ABI != ABI_V4
31162 && offset_below_red_zone_p (info->altivec_save_offset))))
31164 int i;
31165 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
31167 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
31168 if (use_backchain_to_restore_sp)
31170 int frame_regno = 11;
31172 if ((strategy & REST_INLINE_VRS) == 0)
31174 /* Of r11 and r12, select the one not clobbered by an
31175 out-of-line restore function for the frame register. */
31176 frame_regno = 11 + 12 - scratch_regno;
31178 frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno);
31179 emit_move_insn (frame_reg_rtx,
31180 gen_rtx_MEM (Pmode, sp_reg_rtx));
31181 frame_off = 0;
31183 else if (frame_pointer_needed)
31184 frame_reg_rtx = hard_frame_pointer_rtx;
31186 if ((strategy & REST_INLINE_VRS) == 0)
31188 int end_save = info->altivec_save_offset + info->altivec_size;
31189 int ptr_off;
31190 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
31191 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
31193 if (end_save + frame_off != 0)
31195 rtx offset = GEN_INT (end_save + frame_off);
31197 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
31199 else
31200 emit_move_insn (ptr_reg, frame_reg_rtx);
31202 ptr_off = -end_save;
31203 insn = rs6000_emit_savres_rtx (info, scratch_reg,
31204 info->altivec_save_offset + ptr_off,
31205 0, V4SImode, SAVRES_VR);
31207 else
31209 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
31210 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
31212 rtx addr, areg, mem, insn;
31213 rtx reg = gen_rtx_REG (V4SImode, i);
31214 HOST_WIDE_INT offset
31215 = (info->altivec_save_offset + frame_off
31216 + 16 * (i - info->first_altivec_reg_save));
31218 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
31220 mem = gen_frame_mem (V4SImode,
31221 gen_rtx_PLUS (Pmode, frame_reg_rtx,
31222 GEN_INT (offset)));
31223 insn = gen_rtx_SET (reg, mem);
31225 else
31227 areg = gen_rtx_REG (Pmode, 0);
31228 emit_move_insn (areg, GEN_INT (offset));
31230 /* AltiVec addressing mode is [reg+reg]. */
31231 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
31232 mem = gen_frame_mem (V4SImode, addr);
31234 /* Rather than emitting a generic move, force use of the
31235 lvx instruction, which we always want. In particular we
31236 don't want lxvd2x/xxpermdi for little endian. */
31237 insn = gen_altivec_lvx_v4si_internal (reg, mem);
31240 (void) emit_insn (insn);
31244 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
31245 if (((strategy & REST_INLINE_VRS) == 0
31246 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
31247 && (flag_shrink_wrap
31248 || (offset_below_red_zone_p
31249 (info->altivec_save_offset
31250 + 16 * (i - info->first_altivec_reg_save)))))
31252 rtx reg = gen_rtx_REG (V4SImode, i);
31253 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31257 /* Restore VRSAVE if we must do so before adjusting the stack. */
31258 if (info->vrsave_size != 0
31259 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
31260 || (DEFAULT_ABI != ABI_V4
31261 && offset_below_red_zone_p (info->vrsave_save_offset))))
31263 rtx reg;
31265 if (frame_reg_rtx == sp_reg_rtx)
31267 if (use_backchain_to_restore_sp)
31269 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
31270 emit_move_insn (frame_reg_rtx,
31271 gen_rtx_MEM (Pmode, sp_reg_rtx));
31272 frame_off = 0;
31274 else if (frame_pointer_needed)
31275 frame_reg_rtx = hard_frame_pointer_rtx;
31278 reg = gen_rtx_REG (SImode, 12);
31279 emit_insn (gen_frame_load (reg, frame_reg_rtx,
31280 info->vrsave_save_offset + frame_off));
31282 emit_insn (generate_set_vrsave (reg, info, 1));
31285 insn = NULL_RTX;
31286 /* If we have a large stack frame, restore the old stack pointer
31287 using the backchain. */
31288 if (use_backchain_to_restore_sp)
31290 if (frame_reg_rtx == sp_reg_rtx)
31292 /* Under V.4, don't reset the stack pointer until after we're done
31293 loading the saved registers. */
31294 if (DEFAULT_ABI == ABI_V4)
31295 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
31297 insn = emit_move_insn (frame_reg_rtx,
31298 gen_rtx_MEM (Pmode, sp_reg_rtx));
31299 frame_off = 0;
31301 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
31302 && DEFAULT_ABI == ABI_V4)
31303 /* frame_reg_rtx has been set up by the altivec restore. */
31305 else
31307 insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
31308 frame_reg_rtx = sp_reg_rtx;
31311 /* If we have a frame pointer, we can restore the old stack pointer
31312 from it. */
31313 else if (frame_pointer_needed)
31315 frame_reg_rtx = sp_reg_rtx;
31316 if (DEFAULT_ABI == ABI_V4)
31317 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
31318 /* Prevent reordering memory accesses against stack pointer restore. */
31319 else if (cfun->calls_alloca
31320 || offset_below_red_zone_p (-info->total_size))
31321 rs6000_emit_stack_tie (frame_reg_rtx, true);
31323 insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
31324 GEN_INT (info->total_size)));
31325 frame_off = 0;
31327 else if (info->push_p
31328 && DEFAULT_ABI != ABI_V4
31329 && !crtl->calls_eh_return)
31331 /* Prevent reordering memory accesses against stack pointer restore. */
31332 if (cfun->calls_alloca
31333 || offset_below_red_zone_p (-info->total_size))
31334 rs6000_emit_stack_tie (frame_reg_rtx, false);
31335 insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
31336 GEN_INT (info->total_size)));
31337 frame_off = 0;
31339 if (insn && frame_reg_rtx == sp_reg_rtx)
31341 if (cfa_restores)
31343 REG_NOTES (insn) = cfa_restores;
31344 cfa_restores = NULL_RTX;
31346 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
31347 RTX_FRAME_RELATED_P (insn) = 1;
31350 /* Restore AltiVec registers if we have not done so already. */
31351 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
31352 && info->altivec_size != 0
31353 && (DEFAULT_ABI == ABI_V4
31354 || !offset_below_red_zone_p (info->altivec_save_offset)))
31356 int i;
31358 if ((strategy & REST_INLINE_VRS) == 0)
31360 int end_save = info->altivec_save_offset + info->altivec_size;
31361 int ptr_off;
31362 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
31363 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
31364 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
31366 if (end_save + frame_off != 0)
31368 rtx offset = GEN_INT (end_save + frame_off);
31370 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
31372 else
31373 emit_move_insn (ptr_reg, frame_reg_rtx);
31375 ptr_off = -end_save;
31376 insn = rs6000_emit_savres_rtx (info, scratch_reg,
31377 info->altivec_save_offset + ptr_off,
31378 0, V4SImode, SAVRES_VR);
31379 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
31381 /* Frame reg was clobbered by out-of-line save. Restore it
31382 from ptr_reg, and if we are calling out-of-line gpr or
31383 fpr restore set up the correct pointer and offset. */
31384 unsigned newptr_regno = 1;
31385 if (!restoring_GPRs_inline)
31387 bool lr = info->gp_save_offset + info->gp_size == 0;
31388 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
31389 newptr_regno = ptr_regno_for_savres (sel);
31390 end_save = info->gp_save_offset + info->gp_size;
31392 else if (!restoring_FPRs_inline)
31394 bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR);
31395 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
31396 newptr_regno = ptr_regno_for_savres (sel);
31397 end_save = info->fp_save_offset + info->fp_size;
31400 if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
31401 frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
31403 if (end_save + ptr_off != 0)
31405 rtx offset = GEN_INT (end_save + ptr_off);
31407 frame_off = -end_save;
31408 if (TARGET_32BIT)
31409 emit_insn (gen_addsi3_carry (frame_reg_rtx,
31410 ptr_reg, offset));
31411 else
31412 emit_insn (gen_adddi3_carry (frame_reg_rtx,
31413 ptr_reg, offset));
31415 else
31417 frame_off = ptr_off;
31418 emit_move_insn (frame_reg_rtx, ptr_reg);
31422 else
31424 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
31425 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
31427 rtx addr, areg, mem, insn;
31428 rtx reg = gen_rtx_REG (V4SImode, i);
31429 HOST_WIDE_INT offset
31430 = (info->altivec_save_offset + frame_off
31431 + 16 * (i - info->first_altivec_reg_save));
31433 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
31435 mem = gen_frame_mem (V4SImode,
31436 gen_rtx_PLUS (Pmode, frame_reg_rtx,
31437 GEN_INT (offset)));
31438 insn = gen_rtx_SET (reg, mem);
31440 else
31442 areg = gen_rtx_REG (Pmode, 0);
31443 emit_move_insn (areg, GEN_INT (offset));
31445 /* AltiVec addressing mode is [reg+reg]. */
31446 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
31447 mem = gen_frame_mem (V4SImode, addr);
31449 /* Rather than emitting a generic move, force use of the
31450 lvx instruction, which we always want. In particular we
31451 don't want lxvd2x/xxpermdi for little endian. */
31452 insn = gen_altivec_lvx_v4si_internal (reg, mem);
31455 (void) emit_insn (insn);
31459 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
31460 if (((strategy & REST_INLINE_VRS) == 0
31461 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
31462 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
31464 rtx reg = gen_rtx_REG (V4SImode, i);
31465 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31469 /* Restore VRSAVE if we have not done so already. */
31470 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
31471 && info->vrsave_size != 0
31472 && (DEFAULT_ABI == ABI_V4
31473 || !offset_below_red_zone_p (info->vrsave_save_offset)))
31475 rtx reg;
31477 reg = gen_rtx_REG (SImode, 12);
31478 emit_insn (gen_frame_load (reg, frame_reg_rtx,
31479 info->vrsave_save_offset + frame_off));
31481 emit_insn (generate_set_vrsave (reg, info, 1));
31484 /* If we exit by an out-of-line restore function on ABI_V4 then that
31485 function will deallocate the stack, so we don't need to worry
31486 about the unwinder restoring cr from an invalid stack frame
31487 location. */
31488 exit_func = (!restoring_FPRs_inline
31489 || (!restoring_GPRs_inline
31490 && info->first_fp_reg_save == 64));
31492 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
31493 *separate* slots if the routine calls __builtin_eh_return, so
31494 that they can be independently restored by the unwinder. */
31495 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
31497 int i, cr_off = info->ehcr_offset;
31499 for (i = 0; i < 8; i++)
31500 if (!call_used_regs[CR0_REGNO + i])
31502 rtx reg = gen_rtx_REG (SImode, 0);
31503 emit_insn (gen_frame_load (reg, frame_reg_rtx,
31504 cr_off + frame_off));
31506 insn = emit_insn (gen_movsi_to_cr_one
31507 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
31509 if (!exit_func && flag_shrink_wrap)
31511 add_reg_note (insn, REG_CFA_RESTORE,
31512 gen_rtx_REG (SImode, CR0_REGNO + i));
31514 RTX_FRAME_RELATED_P (insn) = 1;
31517 cr_off += reg_size;
31521 /* Get the old lr if we saved it. If we are restoring registers
31522 out-of-line, then the out-of-line routines can do this for us. */
31523 if (restore_lr && restoring_GPRs_inline)
31524 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
31526 /* Get the old cr if we saved it. */
31527 if (info->cr_save_p)
31529 unsigned cr_save_regno = 12;
31531 if (!restoring_GPRs_inline)
31533 /* Ensure we don't use the register used by the out-of-line
31534 gpr register restore below. */
31535 bool lr = info->gp_save_offset + info->gp_size == 0;
31536 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
31537 int gpr_ptr_regno = ptr_regno_for_savres (sel);
31539 if (gpr_ptr_regno == 12)
31540 cr_save_regno = 11;
31541 gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno);
31543 else if (REGNO (frame_reg_rtx) == 12)
31544 cr_save_regno = 11;
31546 cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx,
31547 info->cr_save_offset + frame_off,
31548 exit_func);
31551 /* Set LR here to try to overlap restores below. */
31552 if (restore_lr && restoring_GPRs_inline)
31553 restore_saved_lr (0, exit_func);
31555 /* Load exception handler data registers, if needed. */
31556 if (crtl->calls_eh_return)
31558 unsigned int i, regno;
31560 if (TARGET_AIX)
31562 rtx reg = gen_rtx_REG (reg_mode, 2);
31563 emit_insn (gen_frame_load (reg, frame_reg_rtx,
31564 frame_off + RS6000_TOC_SAVE_SLOT));
31567 for (i = 0; ; ++i)
31569 rtx mem;
31571 regno = EH_RETURN_DATA_REGNO (i);
31572 if (regno == INVALID_REGNUM)
31573 break;
31575 /* Note: possible use of r0 here to address SPE regs. */
31576 mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
31577 info->ehrd_offset + frame_off
31578 + reg_size * (int) i);
31580 emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
31584 /* Restore GPRs. This is done as a PARALLEL if we are using
31585 the load-multiple instructions. */
31586 if (TARGET_SPE_ABI
31587 && info->spe_64bit_regs_used
31588 && info->first_gp_reg_save != 32)
31590 /* Determine whether we can address all of the registers that need
31591 to be saved with an offset from frame_reg_rtx that fits in
31592 the small const field for SPE memory instructions. */
31593 int spe_regs_addressable
31594 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
31595 + reg_size * (32 - info->first_gp_reg_save - 1))
31596 && restoring_GPRs_inline);
31598 if (!spe_regs_addressable)
31600 int ool_adjust = 0;
31601 rtx old_frame_reg_rtx = frame_reg_rtx;
31602 /* Make r11 point to the start of the SPE save area. We worried about
31603 not clobbering it when we were saving registers in the prologue.
31604 There's no need to worry here because the static chain is passed
31605 anew to every function. */
31607 if (!restoring_GPRs_inline)
31608 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
31609 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
31610 emit_insn (gen_addsi3 (frame_reg_rtx, old_frame_reg_rtx,
31611 GEN_INT (info->spe_gp_save_offset
31612 + frame_off
31613 - ool_adjust)));
31614 /* Keep the invariant that frame_reg_rtx + frame_off points
31615 at the top of the stack frame. */
31616 frame_off = -info->spe_gp_save_offset + ool_adjust;
31619 if (restoring_GPRs_inline)
31621 HOST_WIDE_INT spe_offset = info->spe_gp_save_offset + frame_off;
31623 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
31624 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
31626 rtx offset, addr, mem, reg;
31628 /* We're doing all this to ensure that the immediate offset
31629 fits into the immediate field of 'evldd'. */
31630 gcc_assert (SPE_CONST_OFFSET_OK (spe_offset + reg_size * i));
31632 offset = GEN_INT (spe_offset + reg_size * i);
31633 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, offset);
31634 mem = gen_rtx_MEM (V2SImode, addr);
31635 reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
31637 emit_move_insn (reg, mem);
31640 else
31641 rs6000_emit_savres_rtx (info, frame_reg_rtx,
31642 info->spe_gp_save_offset + frame_off,
31643 info->lr_save_offset + frame_off,
31644 reg_mode,
31645 SAVRES_GPR | SAVRES_LR);
31647 else if (!restoring_GPRs_inline)
31649 /* We are jumping to an out-of-line function. */
31650 rtx ptr_reg;
31651 int end_save = info->gp_save_offset + info->gp_size;
31652 bool can_use_exit = end_save == 0;
31653 int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0);
31654 int ptr_off;
31656 /* Emit stack reset code if we need it. */
31657 ptr_regno = ptr_regno_for_savres (sel);
31658 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
31659 if (can_use_exit)
31660 rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
31661 else if (end_save + frame_off != 0)
31662 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx,
31663 GEN_INT (end_save + frame_off)));
31664 else if (REGNO (frame_reg_rtx) != ptr_regno)
31665 emit_move_insn (ptr_reg, frame_reg_rtx);
31666 if (REGNO (frame_reg_rtx) == ptr_regno)
31667 frame_off = -end_save;
31669 if (can_use_exit && info->cr_save_p)
31670 restore_saved_cr (cr_save_reg, using_mtcr_multiple, true);
31672 ptr_off = -end_save;
31673 rs6000_emit_savres_rtx (info, ptr_reg,
31674 info->gp_save_offset + ptr_off,
31675 info->lr_save_offset + ptr_off,
31676 reg_mode, sel);
31678 else if (using_load_multiple)
31680 rtvec p;
31681 p = rtvec_alloc (32 - info->first_gp_reg_save);
31682 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
31683 RTVEC_ELT (p, i)
31684 = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
31685 frame_reg_rtx,
31686 info->gp_save_offset + frame_off + reg_size * i);
31687 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
31689 else
31691 int offset = info->gp_save_offset + frame_off;
31692 for (i = info->first_gp_reg_save; i < 32; i++)
31694 if (rs6000_reg_live_or_pic_offset_p (i)
31695 && !cfun->machine->gpr_is_wrapped_separately[i])
31697 rtx reg = gen_rtx_REG (reg_mode, i);
31698 emit_insn (gen_frame_load (reg, frame_reg_rtx, offset));
31701 offset += reg_size;
31705 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
31707 /* If the frame pointer was used then we can't delay emitting
31708 a REG_CFA_DEF_CFA note. This must happen on the insn that
31709 restores the frame pointer, r31. We may have already emitted
31710 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
31711 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
31712 be harmless if emitted. */
31713 if (frame_pointer_needed)
31715 insn = get_last_insn ();
31716 add_reg_note (insn, REG_CFA_DEF_CFA,
31717 plus_constant (Pmode, frame_reg_rtx, frame_off));
31718 RTX_FRAME_RELATED_P (insn) = 1;
31721 /* Set up cfa_restores. We always need these when
31722 shrink-wrapping. If not shrink-wrapping then we only need
31723 the cfa_restore when the stack location is no longer valid.
31724 The cfa_restores must be emitted on or before the insn that
31725 invalidates the stack, and of course must not be emitted
31726 before the insn that actually does the restore. The latter
31727 is why it is a bad idea to emit the cfa_restores as a group
31728 on the last instruction here that actually does a restore:
31729 That insn may be reordered with respect to others doing
31730 restores. */
31731 if (flag_shrink_wrap
31732 && !restoring_GPRs_inline
31733 && info->first_fp_reg_save == 64)
31734 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
31736 for (i = info->first_gp_reg_save; i < 32; i++)
31737 if (!restoring_GPRs_inline
31738 || using_load_multiple
31739 || rs6000_reg_live_or_pic_offset_p (i))
31741 if (cfun->machine->gpr_is_wrapped_separately[i])
31742 continue;
31744 rtx reg = gen_rtx_REG (reg_mode, i);
31745 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31749 if (!restoring_GPRs_inline
31750 && info->first_fp_reg_save == 64)
31752 /* We are jumping to an out-of-line function. */
31753 if (cfa_restores)
31754 emit_cfa_restores (cfa_restores);
31755 return;
31758 if (restore_lr && !restoring_GPRs_inline)
31760 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
31761 restore_saved_lr (0, exit_func);
31764 /* Restore fpr's if we need to do it without calling a function. */
31765 if (restoring_FPRs_inline)
31767 int offset = info->fp_save_offset + frame_off;
31768 for (i = info->first_fp_reg_save; i < 64; i++)
31770 if (save_reg_p (i)
31771 && !cfun->machine->fpr_is_wrapped_separately[i - 32])
31773 rtx reg = gen_rtx_REG (fp_reg_mode, i);
31774 emit_insn (gen_frame_load (reg, frame_reg_rtx, offset));
31775 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
31776 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
31777 cfa_restores);
31780 offset += fp_reg_size;
31784 /* If we saved cr, restore it here. Just those that were used. */
31785 if (info->cr_save_p)
31786 restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func);
31788 /* If this is V.4, unwind the stack pointer after all of the loads
31789 have been done, or set up r11 if we are restoring fp out of line. */
31790 ptr_regno = 1;
31791 if (!restoring_FPRs_inline)
31793 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
31794 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
31795 ptr_regno = ptr_regno_for_savres (sel);
31798 insn = rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
31799 if (REGNO (frame_reg_rtx) == ptr_regno)
31800 frame_off = 0;
31802 if (insn && restoring_FPRs_inline)
31804 if (cfa_restores)
31806 REG_NOTES (insn) = cfa_restores;
31807 cfa_restores = NULL_RTX;
31809 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
31810 RTX_FRAME_RELATED_P (insn) = 1;
31813 if (crtl->calls_eh_return)
31815 rtx sa = EH_RETURN_STACKADJ_RTX;
31816 emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
31819 if (!sibcall && restoring_FPRs_inline)
31821 if (cfa_restores)
31823 /* We can't hang the cfa_restores off a simple return,
31824 since the shrink-wrap code sometimes uses an existing
31825 return. This means there might be a path from
31826 pre-prologue code to this return, and dwarf2cfi code
31827 wants the eh_frame unwinder state to be the same on
31828 all paths to any point. So we need to emit the
31829 cfa_restores before the return. For -m64 we really
31830 don't need epilogue cfa_restores at all, except for
31831 this irritating dwarf2cfi with shrink-wrap
31832 requirement; The stack red-zone means eh_frame info
31833 from the prologue telling the unwinder to restore
31834 from the stack is perfectly good right to the end of
31835 the function. */
31836 emit_insn (gen_blockage ());
31837 emit_cfa_restores (cfa_restores);
31838 cfa_restores = NULL_RTX;
31841 emit_jump_insn (targetm.gen_simple_return ());
31844 if (!sibcall && !restoring_FPRs_inline)
31846 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
31847 rtvec p = rtvec_alloc (3 + !!lr + 64 - info->first_fp_reg_save);
31848 int elt = 0;
31849 RTVEC_ELT (p, elt++) = ret_rtx;
31850 if (lr)
31851 RTVEC_ELT (p, elt++)
31852 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
31854 /* We have to restore more than two FP registers, so branch to the
31855 restore function. It will return to our caller. */
31856 int i;
31857 int reg;
31858 rtx sym;
31860 if (flag_shrink_wrap)
31861 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
31863 sym = rs6000_savres_routine_sym (info, SAVRES_FPR | (lr ? SAVRES_LR : 0));
31864 RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, sym);
31865 reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
31866 RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
31868 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
31870 rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
31872 RTVEC_ELT (p, elt++)
31873 = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i);
31874 if (flag_shrink_wrap)
31875 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31878 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
31881 if (cfa_restores)
31883 if (sibcall)
31884 /* Ensure the cfa_restores are hung off an insn that won't
31885 be reordered above other restores. */
31886 emit_insn (gen_blockage ());
31888 emit_cfa_restores (cfa_restores);
31892 /* Write function epilogue. */
31894 static void
31895 rs6000_output_function_epilogue (FILE *file)
31897 #if TARGET_MACHO
31898 macho_branch_islands ();
31901 rtx_insn *insn = get_last_insn ();
31902 rtx_insn *deleted_debug_label = NULL;
31904 /* Mach-O doesn't support labels at the end of objects, so if
31905 it looks like we might want one, take special action.
31907 First, collect any sequence of deleted debug labels. */
31908 while (insn
31909 && NOTE_P (insn)
31910 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
31912 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
31913 notes only, instead set their CODE_LABEL_NUMBER to -1,
31914 otherwise there would be code generation differences
31915 in between -g and -g0. */
31916 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
31917 deleted_debug_label = insn;
31918 insn = PREV_INSN (insn);
31921 /* Second, if we have:
31922 label:
31923 barrier
31924 then this needs to be detected, so skip past the barrier. */
31926 if (insn && BARRIER_P (insn))
31927 insn = PREV_INSN (insn);
31929 /* Up to now we've only seen notes or barriers. */
31930 if (insn)
31932 if (LABEL_P (insn)
31933 || (NOTE_P (insn)
31934 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
31935 /* Trailing label: <barrier>. */
31936 fputs ("\tnop\n", file);
31937 else
31939 /* Lastly, see if we have a completely empty function body. */
31940 while (insn && ! INSN_P (insn))
31941 insn = PREV_INSN (insn);
31942 /* If we don't find any insns, we've got an empty function body;
31943 I.e. completely empty - without a return or branch. This is
31944 taken as the case where a function body has been removed
31945 because it contains an inline __builtin_unreachable(). GCC
31946 states that reaching __builtin_unreachable() means UB so we're
31947 not obliged to do anything special; however, we want
31948 non-zero-sized function bodies. To meet this, and help the
31949 user out, let's trap the case. */
31950 if (insn == NULL)
31951 fputs ("\ttrap\n", file);
31954 else if (deleted_debug_label)
31955 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
31956 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
31957 CODE_LABEL_NUMBER (insn) = -1;
31959 #endif
31961 /* Output a traceback table here. See /usr/include/sys/debug.h for info
31962 on its format.
31964 We don't output a traceback table if -finhibit-size-directive was
31965 used. The documentation for -finhibit-size-directive reads
31966 ``don't output a @code{.size} assembler directive, or anything
31967 else that would cause trouble if the function is split in the
31968 middle, and the two halves are placed at locations far apart in
31969 memory.'' The traceback table has this property, since it
31970 includes the offset from the start of the function to the
31971 traceback table itself.
31973 System V.4 Powerpc's (and the embedded ABI derived from it) use a
31974 different traceback table. */
31975 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
31976 && ! flag_inhibit_size_directive
31977 && rs6000_traceback != traceback_none && !cfun->is_thunk)
31979 const char *fname = NULL;
31980 const char *language_string = lang_hooks.name;
31981 int fixed_parms = 0, float_parms = 0, parm_info = 0;
31982 int i;
31983 int optional_tbtab;
31984 rs6000_stack_t *info = rs6000_stack_info ();
31986 if (rs6000_traceback == traceback_full)
31987 optional_tbtab = 1;
31988 else if (rs6000_traceback == traceback_part)
31989 optional_tbtab = 0;
31990 else
31991 optional_tbtab = !optimize_size && !TARGET_ELF;
31993 if (optional_tbtab)
31995 fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
31996 while (*fname == '.') /* V.4 encodes . in the name */
31997 fname++;
31999 /* Need label immediately before tbtab, so we can compute
32000 its offset from the function start. */
32001 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
32002 ASM_OUTPUT_LABEL (file, fname);
32005 /* The .tbtab pseudo-op can only be used for the first eight
32006 expressions, since it can't handle the possibly variable
32007 length fields that follow. However, if you omit the optional
32008 fields, the assembler outputs zeros for all optional fields
32009 anyways, giving each variable length field is minimum length
32010 (as defined in sys/debug.h). Thus we can not use the .tbtab
32011 pseudo-op at all. */
32013 /* An all-zero word flags the start of the tbtab, for debuggers
32014 that have to find it by searching forward from the entry
32015 point or from the current pc. */
32016 fputs ("\t.long 0\n", file);
32018 /* Tbtab format type. Use format type 0. */
32019 fputs ("\t.byte 0,", file);
32021 /* Language type. Unfortunately, there does not seem to be any
32022 official way to discover the language being compiled, so we
32023 use language_string.
32024 C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9.
32025 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
32026 a number, so for now use 9. LTO, Go and JIT aren't assigned numbers
32027 either, so for now use 0. */
32028 if (lang_GNU_C ()
32029 || ! strcmp (language_string, "GNU GIMPLE")
32030 || ! strcmp (language_string, "GNU Go")
32031 || ! strcmp (language_string, "libgccjit"))
32032 i = 0;
32033 else if (! strcmp (language_string, "GNU F77")
32034 || lang_GNU_Fortran ())
32035 i = 1;
32036 else if (! strcmp (language_string, "GNU Pascal"))
32037 i = 2;
32038 else if (! strcmp (language_string, "GNU Ada"))
32039 i = 3;
32040 else if (lang_GNU_CXX ()
32041 || ! strcmp (language_string, "GNU Objective-C++"))
32042 i = 9;
32043 else if (! strcmp (language_string, "GNU Java"))
32044 i = 13;
32045 else if (! strcmp (language_string, "GNU Objective-C"))
32046 i = 14;
32047 else
32048 gcc_unreachable ();
32049 fprintf (file, "%d,", i);
32051 /* 8 single bit fields: global linkage (not set for C extern linkage,
32052 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
32053 from start of procedure stored in tbtab, internal function, function
32054 has controlled storage, function has no toc, function uses fp,
32055 function logs/aborts fp operations. */
32056 /* Assume that fp operations are used if any fp reg must be saved. */
32057 fprintf (file, "%d,",
32058 (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
32060 /* 6 bitfields: function is interrupt handler, name present in
32061 proc table, function calls alloca, on condition directives
32062 (controls stack walks, 3 bits), saves condition reg, saves
32063 link reg. */
32064 /* The `function calls alloca' bit seems to be set whenever reg 31 is
32065 set up as a frame pointer, even when there is no alloca call. */
32066 fprintf (file, "%d,",
32067 ((optional_tbtab << 6)
32068 | ((optional_tbtab & frame_pointer_needed) << 5)
32069 | (info->cr_save_p << 1)
32070 | (info->lr_save_p)));
32072 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
32073 (6 bits). */
32074 fprintf (file, "%d,",
32075 (info->push_p << 7) | (64 - info->first_fp_reg_save));
32077 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
32078 fprintf (file, "%d,", (32 - first_reg_to_save ()));
32080 if (optional_tbtab)
32082 /* Compute the parameter info from the function decl argument
32083 list. */
32084 tree decl;
32085 int next_parm_info_bit = 31;
32087 for (decl = DECL_ARGUMENTS (current_function_decl);
32088 decl; decl = DECL_CHAIN (decl))
32090 rtx parameter = DECL_INCOMING_RTL (decl);
32091 machine_mode mode = GET_MODE (parameter);
32093 if (GET_CODE (parameter) == REG)
32095 if (SCALAR_FLOAT_MODE_P (mode))
32097 int bits;
32099 float_parms++;
32101 switch (mode)
32103 case E_SFmode:
32104 case E_SDmode:
32105 bits = 0x2;
32106 break;
32108 case E_DFmode:
32109 case E_DDmode:
32110 case E_TFmode:
32111 case E_TDmode:
32112 case E_IFmode:
32113 case E_KFmode:
32114 bits = 0x3;
32115 break;
32117 default:
32118 gcc_unreachable ();
32121 /* If only one bit will fit, don't or in this entry. */
32122 if (next_parm_info_bit > 0)
32123 parm_info |= (bits << (next_parm_info_bit - 1));
32124 next_parm_info_bit -= 2;
32126 else
32128 fixed_parms += ((GET_MODE_SIZE (mode)
32129 + (UNITS_PER_WORD - 1))
32130 / UNITS_PER_WORD);
32131 next_parm_info_bit -= 1;
32137 /* Number of fixed point parameters. */
32138 /* This is actually the number of words of fixed point parameters; thus
32139 an 8 byte struct counts as 2; and thus the maximum value is 8. */
32140 fprintf (file, "%d,", fixed_parms);
32142 /* 2 bitfields: number of floating point parameters (7 bits), parameters
32143 all on stack. */
32144 /* This is actually the number of fp registers that hold parameters;
32145 and thus the maximum value is 13. */
32146 /* Set parameters on stack bit if parameters are not in their original
32147 registers, regardless of whether they are on the stack? Xlc
32148 seems to set the bit when not optimizing. */
32149 fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
32151 if (optional_tbtab)
32153 /* Optional fields follow. Some are variable length. */
32155 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single
32156 float, 11 double float. */
32157 /* There is an entry for each parameter in a register, in the order
32158 that they occur in the parameter list. Any intervening arguments
32159 on the stack are ignored. If the list overflows a long (max
32160 possible length 34 bits) then completely leave off all elements
32161 that don't fit. */
32162 /* Only emit this long if there was at least one parameter. */
32163 if (fixed_parms || float_parms)
32164 fprintf (file, "\t.long %d\n", parm_info);
32166 /* Offset from start of code to tb table. */
32167 fputs ("\t.long ", file);
32168 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
32169 RS6000_OUTPUT_BASENAME (file, fname);
32170 putc ('-', file);
32171 rs6000_output_function_entry (file, fname);
32172 putc ('\n', file);
32174 /* Interrupt handler mask. */
32175 /* Omit this long, since we never set the interrupt handler bit
32176 above. */
32178 /* Number of CTL (controlled storage) anchors. */
32179 /* Omit this long, since the has_ctl bit is never set above. */
32181 /* Displacement into stack of each CTL anchor. */
32182 /* Omit this list of longs, because there are no CTL anchors. */
32184 /* Length of function name. */
32185 if (*fname == '*')
32186 ++fname;
32187 fprintf (file, "\t.short %d\n", (int) strlen (fname));
32189 /* Function name. */
32190 assemble_string (fname, strlen (fname));
32192 /* Register for alloca automatic storage; this is always reg 31.
32193 Only emit this if the alloca bit was set above. */
32194 if (frame_pointer_needed)
32195 fputs ("\t.byte 31\n", file);
32197 fputs ("\t.align 2\n", file);
32201 /* Arrange to define .LCTOC1 label, if not already done. */
32202 if (need_toc_init)
32204 need_toc_init = 0;
32205 if (!toc_initialized)
32207 switch_to_section (toc_section);
32208 switch_to_section (current_function_section ());
32213 /* -fsplit-stack support. */
32215 /* A SYMBOL_REF for __morestack. */
32216 static GTY(()) rtx morestack_ref;
32218 static rtx
32219 gen_add3_const (rtx rt, rtx ra, long c)
32221 if (TARGET_64BIT)
32222 return gen_adddi3 (rt, ra, GEN_INT (c));
32223 else
32224 return gen_addsi3 (rt, ra, GEN_INT (c));
32227 /* Emit -fsplit-stack prologue, which goes before the regular function
32228 prologue (at local entry point in the case of ELFv2). */
32230 void
32231 rs6000_expand_split_stack_prologue (void)
32233 rs6000_stack_t *info = rs6000_stack_info ();
32234 unsigned HOST_WIDE_INT allocate;
32235 long alloc_hi, alloc_lo;
32236 rtx r0, r1, r12, lr, ok_label, compare, jump, call_fusage;
32237 rtx_insn *insn;
32239 gcc_assert (flag_split_stack && reload_completed);
32241 if (!info->push_p)
32242 return;
32244 if (global_regs[29])
32246 error ("-fsplit-stack uses register r29");
32247 inform (DECL_SOURCE_LOCATION (global_regs_decl[29]),
32248 "conflicts with %qD", global_regs_decl[29]);
32251 allocate = info->total_size;
32252 if (allocate > (unsigned HOST_WIDE_INT) 1 << 31)
32254 sorry ("Stack frame larger than 2G is not supported for -fsplit-stack");
32255 return;
32257 if (morestack_ref == NULL_RTX)
32259 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
32260 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
32261 | SYMBOL_FLAG_FUNCTION);
32264 r0 = gen_rtx_REG (Pmode, 0);
32265 r1 = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
32266 r12 = gen_rtx_REG (Pmode, 12);
32267 emit_insn (gen_load_split_stack_limit (r0));
32268 /* Always emit two insns here to calculate the requested stack,
32269 so that the linker can edit them when adjusting size for calling
32270 non-split-stack code. */
32271 alloc_hi = (-allocate + 0x8000) & ~0xffffL;
32272 alloc_lo = -allocate - alloc_hi;
32273 if (alloc_hi != 0)
32275 emit_insn (gen_add3_const (r12, r1, alloc_hi));
32276 if (alloc_lo != 0)
32277 emit_insn (gen_add3_const (r12, r12, alloc_lo));
32278 else
32279 emit_insn (gen_nop ());
32281 else
32283 emit_insn (gen_add3_const (r12, r1, alloc_lo));
32284 emit_insn (gen_nop ());
32287 compare = gen_rtx_REG (CCUNSmode, CR7_REGNO);
32288 emit_insn (gen_rtx_SET (compare, gen_rtx_COMPARE (CCUNSmode, r12, r0)));
32289 ok_label = gen_label_rtx ();
32290 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
32291 gen_rtx_GEU (VOIDmode, compare, const0_rtx),
32292 gen_rtx_LABEL_REF (VOIDmode, ok_label),
32293 pc_rtx);
32294 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
32295 JUMP_LABEL (insn) = ok_label;
32296 /* Mark the jump as very likely to be taken. */
32297 add_reg_br_prob_note (insn, profile_probability::very_likely ());
32299 lr = gen_rtx_REG (Pmode, LR_REGNO);
32300 insn = emit_move_insn (r0, lr);
32301 RTX_FRAME_RELATED_P (insn) = 1;
32302 insn = emit_insn (gen_frame_store (r0, r1, info->lr_save_offset));
32303 RTX_FRAME_RELATED_P (insn) = 1;
32305 insn = emit_call_insn (gen_call (gen_rtx_MEM (SImode, morestack_ref),
32306 const0_rtx, const0_rtx));
32307 call_fusage = NULL_RTX;
32308 use_reg (&call_fusage, r12);
32309 /* Say the call uses r0, even though it doesn't, to stop regrename
32310 from twiddling with the insns saving lr, trashing args for cfun.
32311 The insns restoring lr are similarly protected by making
32312 split_stack_return use r0. */
32313 use_reg (&call_fusage, r0);
32314 add_function_usage_to (insn, call_fusage);
32315 /* Indicate that this function can't jump to non-local gotos. */
32316 make_reg_eh_region_note_nothrow_nononlocal (insn);
32317 emit_insn (gen_frame_load (r0, r1, info->lr_save_offset));
32318 insn = emit_move_insn (lr, r0);
32319 add_reg_note (insn, REG_CFA_RESTORE, lr);
32320 RTX_FRAME_RELATED_P (insn) = 1;
32321 emit_insn (gen_split_stack_return ());
32323 emit_label (ok_label);
32324 LABEL_NUSES (ok_label) = 1;
32327 /* Return the internal arg pointer used for function incoming
32328 arguments. When -fsplit-stack, the arg pointer is r12 so we need
32329 to copy it to a pseudo in order for it to be preserved over calls
32330 and suchlike. We'd really like to use a pseudo here for the
32331 internal arg pointer but data-flow analysis is not prepared to
32332 accept pseudos as live at the beginning of a function. */
32334 static rtx
32335 rs6000_internal_arg_pointer (void)
32337 if (flag_split_stack
32338 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
32339 == NULL))
32342 if (cfun->machine->split_stack_arg_pointer == NULL_RTX)
32344 rtx pat;
32346 cfun->machine->split_stack_arg_pointer = gen_reg_rtx (Pmode);
32347 REG_POINTER (cfun->machine->split_stack_arg_pointer) = 1;
32349 /* Put the pseudo initialization right after the note at the
32350 beginning of the function. */
32351 pat = gen_rtx_SET (cfun->machine->split_stack_arg_pointer,
32352 gen_rtx_REG (Pmode, 12));
32353 push_topmost_sequence ();
32354 emit_insn_after (pat, get_insns ());
32355 pop_topmost_sequence ();
32357 return plus_constant (Pmode, cfun->machine->split_stack_arg_pointer,
32358 FIRST_PARM_OFFSET (current_function_decl));
32360 return virtual_incoming_args_rtx;
32363 /* We may have to tell the dataflow pass that the split stack prologue
32364 is initializing a register. */
32366 static void
32367 rs6000_live_on_entry (bitmap regs)
32369 if (flag_split_stack)
32370 bitmap_set_bit (regs, 12);
32373 /* Emit -fsplit-stack dynamic stack allocation space check. */
32375 void
32376 rs6000_split_stack_space_check (rtx size, rtx label)
32378 rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
32379 rtx limit = gen_reg_rtx (Pmode);
32380 rtx requested = gen_reg_rtx (Pmode);
32381 rtx cmp = gen_reg_rtx (CCUNSmode);
32382 rtx jump;
32384 emit_insn (gen_load_split_stack_limit (limit));
32385 if (CONST_INT_P (size))
32386 emit_insn (gen_add3_insn (requested, sp, GEN_INT (-INTVAL (size))));
32387 else
32389 size = force_reg (Pmode, size);
32390 emit_move_insn (requested, gen_rtx_MINUS (Pmode, sp, size));
32392 emit_insn (gen_rtx_SET (cmp, gen_rtx_COMPARE (CCUNSmode, requested, limit)));
32393 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
32394 gen_rtx_GEU (VOIDmode, cmp, const0_rtx),
32395 gen_rtx_LABEL_REF (VOIDmode, label),
32396 pc_rtx);
32397 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
32398 JUMP_LABEL (jump) = label;
32401 /* A C compound statement that outputs the assembler code for a thunk
32402 function, used to implement C++ virtual function calls with
32403 multiple inheritance. The thunk acts as a wrapper around a virtual
32404 function, adjusting the implicit object parameter before handing
32405 control off to the real function.
32407 First, emit code to add the integer DELTA to the location that
32408 contains the incoming first argument. Assume that this argument
32409 contains a pointer, and is the one used to pass the `this' pointer
32410 in C++. This is the incoming argument *before* the function
32411 prologue, e.g. `%o0' on a sparc. The addition must preserve the
32412 values of all other incoming arguments.
32414 After the addition, emit code to jump to FUNCTION, which is a
32415 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
32416 not touch the return address. Hence returning from FUNCTION will
32417 return to whoever called the current `thunk'.
32419 The effect must be as if FUNCTION had been called directly with the
32420 adjusted first argument. This macro is responsible for emitting
32421 all of the code for a thunk function; output_function_prologue()
32422 and output_function_epilogue() are not invoked.
32424 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
32425 been extracted from it.) It might possibly be useful on some
32426 targets, but probably not.
32428 If you do not define this macro, the target-independent code in the
32429 C++ frontend will generate a less efficient heavyweight thunk that
32430 calls FUNCTION instead of jumping to it. The generic approach does
32431 not support varargs. */
32433 static void
32434 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
32435 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
32436 tree function)
32438 rtx this_rtx, funexp;
32439 rtx_insn *insn;
32441 reload_completed = 1;
32442 epilogue_completed = 1;
32444 /* Mark the end of the (empty) prologue. */
32445 emit_note (NOTE_INSN_PROLOGUE_END);
32447 /* Find the "this" pointer. If the function returns a structure,
32448 the structure return pointer is in r3. */
32449 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
32450 this_rtx = gen_rtx_REG (Pmode, 4);
32451 else
32452 this_rtx = gen_rtx_REG (Pmode, 3);
32454 /* Apply the constant offset, if required. */
32455 if (delta)
32456 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
32458 /* Apply the offset from the vtable, if required. */
32459 if (vcall_offset)
32461 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
32462 rtx tmp = gen_rtx_REG (Pmode, 12);
32464 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
32465 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
32467 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
32468 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
32470 else
32472 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
32474 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
32476 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
32479 /* Generate a tail call to the target function. */
32480 if (!TREE_USED (function))
32482 assemble_external (function);
32483 TREE_USED (function) = 1;
32485 funexp = XEXP (DECL_RTL (function), 0);
32486 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
32488 #if TARGET_MACHO
32489 if (MACHOPIC_INDIRECT)
32490 funexp = machopic_indirect_call_target (funexp);
32491 #endif
32493 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
32494 generate sibcall RTL explicitly. */
32495 insn = emit_call_insn (
32496 gen_rtx_PARALLEL (VOIDmode,
32497 gen_rtvec (3,
32498 gen_rtx_CALL (VOIDmode,
32499 funexp, const0_rtx),
32500 gen_rtx_USE (VOIDmode, const0_rtx),
32501 simple_return_rtx)));
32502 SIBLING_CALL_P (insn) = 1;
32503 emit_barrier ();
32505 /* Run just enough of rest_of_compilation to get the insns emitted.
32506 There's not really enough bulk here to make other passes such as
32507 instruction scheduling worth while. Note that use_thunk calls
32508 assemble_start_function and assemble_end_function. */
32509 insn = get_insns ();
32510 shorten_branches (insn);
32511 final_start_function (insn, file, 1);
32512 final (insn, file, 1);
32513 final_end_function ();
32515 reload_completed = 0;
32516 epilogue_completed = 0;
32519 /* A quick summary of the various types of 'constant-pool tables'
32520 under PowerPC:
32522 Target Flags Name One table per
32523 AIX (none) AIX TOC object file
32524 AIX -mfull-toc AIX TOC object file
32525 AIX -mminimal-toc AIX minimal TOC translation unit
32526 SVR4/EABI (none) SVR4 SDATA object file
32527 SVR4/EABI -fpic SVR4 pic object file
32528 SVR4/EABI -fPIC SVR4 PIC translation unit
32529 SVR4/EABI -mrelocatable EABI TOC function
32530 SVR4/EABI -maix AIX TOC object file
32531 SVR4/EABI -maix -mminimal-toc
32532 AIX minimal TOC translation unit
32534 Name Reg. Set by entries contains:
32535 made by addrs? fp? sum?
32537 AIX TOC 2 crt0 as Y option option
32538 AIX minimal TOC 30 prolog gcc Y Y option
32539 SVR4 SDATA 13 crt0 gcc N Y N
32540 SVR4 pic 30 prolog ld Y not yet N
32541 SVR4 PIC 30 prolog gcc Y option option
32542 EABI TOC 30 prolog gcc Y option option
32546 /* Hash functions for the hash table. */
32548 static unsigned
32549 rs6000_hash_constant (rtx k)
32551 enum rtx_code code = GET_CODE (k);
32552 machine_mode mode = GET_MODE (k);
32553 unsigned result = (code << 3) ^ mode;
32554 const char *format;
32555 int flen, fidx;
32557 format = GET_RTX_FORMAT (code);
32558 flen = strlen (format);
32559 fidx = 0;
32561 switch (code)
32563 case LABEL_REF:
32564 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
32566 case CONST_WIDE_INT:
32568 int i;
32569 flen = CONST_WIDE_INT_NUNITS (k);
32570 for (i = 0; i < flen; i++)
32571 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
32572 return result;
32575 case CONST_DOUBLE:
32576 if (mode != VOIDmode)
32577 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
32578 flen = 2;
32579 break;
32581 case CODE_LABEL:
32582 fidx = 3;
32583 break;
32585 default:
32586 break;
32589 for (; fidx < flen; fidx++)
32590 switch (format[fidx])
32592 case 's':
32594 unsigned i, len;
32595 const char *str = XSTR (k, fidx);
32596 len = strlen (str);
32597 result = result * 613 + len;
32598 for (i = 0; i < len; i++)
32599 result = result * 613 + (unsigned) str[i];
32600 break;
32602 case 'u':
32603 case 'e':
32604 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
32605 break;
32606 case 'i':
32607 case 'n':
32608 result = result * 613 + (unsigned) XINT (k, fidx);
32609 break;
32610 case 'w':
32611 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
32612 result = result * 613 + (unsigned) XWINT (k, fidx);
32613 else
32615 size_t i;
32616 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
32617 result = result * 613 + (unsigned) (XWINT (k, fidx)
32618 >> CHAR_BIT * i);
32620 break;
32621 case '0':
32622 break;
32623 default:
32624 gcc_unreachable ();
32627 return result;
32630 hashval_t
32631 toc_hasher::hash (toc_hash_struct *thc)
32633 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
32636 /* Compare H1 and H2 for equivalence. */
32638 bool
32639 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
32641 rtx r1 = h1->key;
32642 rtx r2 = h2->key;
32644 if (h1->key_mode != h2->key_mode)
32645 return 0;
32647 return rtx_equal_p (r1, r2);
32650 /* These are the names given by the C++ front-end to vtables, and
32651 vtable-like objects. Ideally, this logic should not be here;
32652 instead, there should be some programmatic way of inquiring as
32653 to whether or not an object is a vtable. */
32655 #define VTABLE_NAME_P(NAME) \
32656 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
32657 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
32658 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
32659 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
32660 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
32662 #ifdef NO_DOLLAR_IN_LABEL
32663 /* Return a GGC-allocated character string translating dollar signs in
32664 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
32666 const char *
32667 rs6000_xcoff_strip_dollar (const char *name)
32669 char *strip, *p;
32670 const char *q;
32671 size_t len;
32673 q = (const char *) strchr (name, '$');
32675 if (q == 0 || q == name)
32676 return name;
32678 len = strlen (name);
32679 strip = XALLOCAVEC (char, len + 1);
32680 strcpy (strip, name);
32681 p = strip + (q - name);
32682 while (p)
32684 *p = '_';
32685 p = strchr (p + 1, '$');
32688 return ggc_alloc_string (strip, len);
32690 #endif
32692 void
32693 rs6000_output_symbol_ref (FILE *file, rtx x)
32695 const char *name = XSTR (x, 0);
32697 /* Currently C++ toc references to vtables can be emitted before it
32698 is decided whether the vtable is public or private. If this is
32699 the case, then the linker will eventually complain that there is
32700 a reference to an unknown section. Thus, for vtables only,
32701 we emit the TOC reference to reference the identifier and not the
32702 symbol. */
32703 if (VTABLE_NAME_P (name))
32705 RS6000_OUTPUT_BASENAME (file, name);
32707 else
32708 assemble_name (file, name);
32711 /* Output a TOC entry. We derive the entry name from what is being
32712 written. */
32714 void
32715 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
32717 char buf[256];
32718 const char *name = buf;
32719 rtx base = x;
32720 HOST_WIDE_INT offset = 0;
32722 gcc_assert (!TARGET_NO_TOC);
32724 /* When the linker won't eliminate them, don't output duplicate
32725 TOC entries (this happens on AIX if there is any kind of TOC,
32726 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
32727 CODE_LABELs. */
32728 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
32730 struct toc_hash_struct *h;
32732 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
32733 time because GGC is not initialized at that point. */
32734 if (toc_hash_table == NULL)
32735 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
32737 h = ggc_alloc<toc_hash_struct> ();
32738 h->key = x;
32739 h->key_mode = mode;
32740 h->labelno = labelno;
32742 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
32743 if (*found == NULL)
32744 *found = h;
32745 else /* This is indeed a duplicate.
32746 Set this label equal to that label. */
32748 fputs ("\t.set ", file);
32749 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
32750 fprintf (file, "%d,", labelno);
32751 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
32752 fprintf (file, "%d\n", ((*found)->labelno));
32754 #ifdef HAVE_AS_TLS
32755 if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF
32756 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
32757 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
32759 fputs ("\t.set ", file);
32760 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
32761 fprintf (file, "%d,", labelno);
32762 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
32763 fprintf (file, "%d\n", ((*found)->labelno));
32765 #endif
32766 return;
32770 /* If we're going to put a double constant in the TOC, make sure it's
32771 aligned properly when strict alignment is on. */
32772 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
32773 && STRICT_ALIGNMENT
32774 && GET_MODE_BITSIZE (mode) >= 64
32775 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
32776 ASM_OUTPUT_ALIGN (file, 3);
32779 (*targetm.asm_out.internal_label) (file, "LC", labelno);
32781 /* Handle FP constants specially. Note that if we have a minimal
32782 TOC, things we put here aren't actually in the TOC, so we can allow
32783 FP constants. */
32784 if (GET_CODE (x) == CONST_DOUBLE &&
32785 (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
32786 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
32788 long k[4];
32790 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
32791 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
32792 else
32793 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
32795 if (TARGET_64BIT)
32797 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32798 fputs (DOUBLE_INT_ASM_OP, file);
32799 else
32800 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
32801 k[0] & 0xffffffff, k[1] & 0xffffffff,
32802 k[2] & 0xffffffff, k[3] & 0xffffffff);
32803 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
32804 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
32805 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
32806 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
32807 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
32808 return;
32810 else
32812 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32813 fputs ("\t.long ", file);
32814 else
32815 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
32816 k[0] & 0xffffffff, k[1] & 0xffffffff,
32817 k[2] & 0xffffffff, k[3] & 0xffffffff);
32818 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
32819 k[0] & 0xffffffff, k[1] & 0xffffffff,
32820 k[2] & 0xffffffff, k[3] & 0xffffffff);
32821 return;
32824 else if (GET_CODE (x) == CONST_DOUBLE &&
32825 (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
32827 long k[2];
32829 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
32830 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
32831 else
32832 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
32834 if (TARGET_64BIT)
32836 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32837 fputs (DOUBLE_INT_ASM_OP, file);
32838 else
32839 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
32840 k[0] & 0xffffffff, k[1] & 0xffffffff);
32841 fprintf (file, "0x%lx%08lx\n",
32842 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
32843 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
32844 return;
32846 else
32848 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32849 fputs ("\t.long ", file);
32850 else
32851 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
32852 k[0] & 0xffffffff, k[1] & 0xffffffff);
32853 fprintf (file, "0x%lx,0x%lx\n",
32854 k[0] & 0xffffffff, k[1] & 0xffffffff);
32855 return;
32858 else if (GET_CODE (x) == CONST_DOUBLE &&
32859 (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
32861 long l;
32863 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
32864 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
32865 else
32866 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
32868 if (TARGET_64BIT)
32870 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32871 fputs (DOUBLE_INT_ASM_OP, file);
32872 else
32873 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
32874 if (WORDS_BIG_ENDIAN)
32875 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
32876 else
32877 fprintf (file, "0x%lx\n", l & 0xffffffff);
32878 return;
32880 else
32882 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32883 fputs ("\t.long ", file);
32884 else
32885 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
32886 fprintf (file, "0x%lx\n", l & 0xffffffff);
32887 return;
32890 else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT)
32892 unsigned HOST_WIDE_INT low;
32893 HOST_WIDE_INT high;
32895 low = INTVAL (x) & 0xffffffff;
32896 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
32898 /* TOC entries are always Pmode-sized, so when big-endian
32899 smaller integer constants in the TOC need to be padded.
32900 (This is still a win over putting the constants in
32901 a separate constant pool, because then we'd have
32902 to have both a TOC entry _and_ the actual constant.)
32904 For a 32-bit target, CONST_INT values are loaded and shifted
32905 entirely within `low' and can be stored in one TOC entry. */
32907 /* It would be easy to make this work, but it doesn't now. */
32908 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
32910 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
32912 low |= high << 32;
32913 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
32914 high = (HOST_WIDE_INT) low >> 32;
32915 low &= 0xffffffff;
32918 if (TARGET_64BIT)
32920 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32921 fputs (DOUBLE_INT_ASM_OP, file);
32922 else
32923 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
32924 (long) high & 0xffffffff, (long) low & 0xffffffff);
32925 fprintf (file, "0x%lx%08lx\n",
32926 (long) high & 0xffffffff, (long) low & 0xffffffff);
32927 return;
32929 else
32931 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
32933 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32934 fputs ("\t.long ", file);
32935 else
32936 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
32937 (long) high & 0xffffffff, (long) low & 0xffffffff);
32938 fprintf (file, "0x%lx,0x%lx\n",
32939 (long) high & 0xffffffff, (long) low & 0xffffffff);
32941 else
32943 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32944 fputs ("\t.long ", file);
32945 else
32946 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
32947 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
32949 return;
32953 if (GET_CODE (x) == CONST)
32955 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
32956 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
32958 base = XEXP (XEXP (x, 0), 0);
32959 offset = INTVAL (XEXP (XEXP (x, 0), 1));
32962 switch (GET_CODE (base))
32964 case SYMBOL_REF:
32965 name = XSTR (base, 0);
32966 break;
32968 case LABEL_REF:
32969 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
32970 CODE_LABEL_NUMBER (XEXP (base, 0)));
32971 break;
32973 case CODE_LABEL:
32974 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
32975 break;
32977 default:
32978 gcc_unreachable ();
32981 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32982 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
32983 else
32985 fputs ("\t.tc ", file);
32986 RS6000_OUTPUT_BASENAME (file, name);
32988 if (offset < 0)
32989 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
32990 else if (offset)
32991 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
32993 /* Mark large TOC symbols on AIX with [TE] so they are mapped
32994 after other TOC symbols, reducing overflow of small TOC access
32995 to [TC] symbols. */
32996 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
32997 ? "[TE]," : "[TC],", file);
33000 /* Currently C++ toc references to vtables can be emitted before it
33001 is decided whether the vtable is public or private. If this is
33002 the case, then the linker will eventually complain that there is
33003 a TOC reference to an unknown section. Thus, for vtables only,
33004 we emit the TOC reference to reference the symbol and not the
33005 section. */
33006 if (VTABLE_NAME_P (name))
33008 RS6000_OUTPUT_BASENAME (file, name);
33009 if (offset < 0)
33010 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
33011 else if (offset > 0)
33012 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
33014 else
33015 output_addr_const (file, x);
33017 #if HAVE_AS_TLS
33018 if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF)
33020 switch (SYMBOL_REF_TLS_MODEL (base))
33022 case 0:
33023 break;
33024 case TLS_MODEL_LOCAL_EXEC:
33025 fputs ("@le", file);
33026 break;
33027 case TLS_MODEL_INITIAL_EXEC:
33028 fputs ("@ie", file);
33029 break;
33030 /* Use global-dynamic for local-dynamic. */
33031 case TLS_MODEL_GLOBAL_DYNAMIC:
33032 case TLS_MODEL_LOCAL_DYNAMIC:
33033 putc ('\n', file);
33034 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
33035 fputs ("\t.tc .", file);
33036 RS6000_OUTPUT_BASENAME (file, name);
33037 fputs ("[TC],", file);
33038 output_addr_const (file, x);
33039 fputs ("@m", file);
33040 break;
33041 default:
33042 gcc_unreachable ();
33045 #endif
33047 putc ('\n', file);
33050 /* Output an assembler pseudo-op to write an ASCII string of N characters
33051 starting at P to FILE.
33053 On the RS/6000, we have to do this using the .byte operation and
33054 write out special characters outside the quoted string.
33055 Also, the assembler is broken; very long strings are truncated,
33056 so we must artificially break them up early. */
33058 void
33059 output_ascii (FILE *file, const char *p, int n)
33061 char c;
33062 int i, count_string;
33063 const char *for_string = "\t.byte \"";
33064 const char *for_decimal = "\t.byte ";
33065 const char *to_close = NULL;
33067 count_string = 0;
33068 for (i = 0; i < n; i++)
33070 c = *p++;
33071 if (c >= ' ' && c < 0177)
33073 if (for_string)
33074 fputs (for_string, file);
33075 putc (c, file);
33077 /* Write two quotes to get one. */
33078 if (c == '"')
33080 putc (c, file);
33081 ++count_string;
33084 for_string = NULL;
33085 for_decimal = "\"\n\t.byte ";
33086 to_close = "\"\n";
33087 ++count_string;
33089 if (count_string >= 512)
33091 fputs (to_close, file);
33093 for_string = "\t.byte \"";
33094 for_decimal = "\t.byte ";
33095 to_close = NULL;
33096 count_string = 0;
33099 else
33101 if (for_decimal)
33102 fputs (for_decimal, file);
33103 fprintf (file, "%d", c);
33105 for_string = "\n\t.byte \"";
33106 for_decimal = ", ";
33107 to_close = "\n";
33108 count_string = 0;
33112 /* Now close the string if we have written one. Then end the line. */
33113 if (to_close)
33114 fputs (to_close, file);
33117 /* Generate a unique section name for FILENAME for a section type
33118 represented by SECTION_DESC. Output goes into BUF.
33120 SECTION_DESC can be any string, as long as it is different for each
33121 possible section type.
33123 We name the section in the same manner as xlc. The name begins with an
33124 underscore followed by the filename (after stripping any leading directory
33125 names) with the last period replaced by the string SECTION_DESC. If
33126 FILENAME does not contain a period, SECTION_DESC is appended to the end of
33127 the name. */
33129 void
33130 rs6000_gen_section_name (char **buf, const char *filename,
33131 const char *section_desc)
33133 const char *q, *after_last_slash, *last_period = 0;
33134 char *p;
33135 int len;
33137 after_last_slash = filename;
33138 for (q = filename; *q; q++)
33140 if (*q == '/')
33141 after_last_slash = q + 1;
33142 else if (*q == '.')
33143 last_period = q;
33146 len = strlen (after_last_slash) + strlen (section_desc) + 2;
33147 *buf = (char *) xmalloc (len);
33149 p = *buf;
33150 *p++ = '_';
33152 for (q = after_last_slash; *q; q++)
33154 if (q == last_period)
33156 strcpy (p, section_desc);
33157 p += strlen (section_desc);
33158 break;
33161 else if (ISALNUM (*q))
33162 *p++ = *q;
33165 if (last_period == 0)
33166 strcpy (p, section_desc);
33167 else
33168 *p = '\0';
33171 /* Emit profile function. */
33173 void
33174 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
33176 /* Non-standard profiling for kernels, which just saves LR then calls
33177 _mcount without worrying about arg saves. The idea is to change
33178 the function prologue as little as possible as it isn't easy to
33179 account for arg save/restore code added just for _mcount. */
33180 if (TARGET_PROFILE_KERNEL)
33181 return;
33183 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
33185 #ifndef NO_PROFILE_COUNTERS
33186 # define NO_PROFILE_COUNTERS 0
33187 #endif
33188 if (NO_PROFILE_COUNTERS)
33189 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
33190 LCT_NORMAL, VOIDmode);
33191 else
33193 char buf[30];
33194 const char *label_name;
33195 rtx fun;
33197 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
33198 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
33199 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
33201 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
33202 LCT_NORMAL, VOIDmode, fun, Pmode);
33205 else if (DEFAULT_ABI == ABI_DARWIN)
33207 const char *mcount_name = RS6000_MCOUNT;
33208 int caller_addr_regno = LR_REGNO;
33210 /* Be conservative and always set this, at least for now. */
33211 crtl->uses_pic_offset_table = 1;
33213 #if TARGET_MACHO
33214 /* For PIC code, set up a stub and collect the caller's address
33215 from r0, which is where the prologue puts it. */
33216 if (MACHOPIC_INDIRECT
33217 && crtl->uses_pic_offset_table)
33218 caller_addr_regno = 0;
33219 #endif
33220 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
33221 LCT_NORMAL, VOIDmode,
33222 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
33226 /* Write function profiler code. */
33228 void
33229 output_function_profiler (FILE *file, int labelno)
33231 char buf[100];
33233 switch (DEFAULT_ABI)
33235 default:
33236 gcc_unreachable ();
33238 case ABI_V4:
33239 if (!TARGET_32BIT)
33241 warning (0, "no profiling of 64-bit code for this ABI");
33242 return;
33244 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
33245 fprintf (file, "\tmflr %s\n", reg_names[0]);
33246 if (NO_PROFILE_COUNTERS)
33248 asm_fprintf (file, "\tstw %s,4(%s)\n",
33249 reg_names[0], reg_names[1]);
33251 else if (TARGET_SECURE_PLT && flag_pic)
33253 if (TARGET_LINK_STACK)
33255 char name[32];
33256 get_ppc476_thunk_name (name);
33257 asm_fprintf (file, "\tbl %s\n", name);
33259 else
33260 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
33261 asm_fprintf (file, "\tstw %s,4(%s)\n",
33262 reg_names[0], reg_names[1]);
33263 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
33264 asm_fprintf (file, "\taddis %s,%s,",
33265 reg_names[12], reg_names[12]);
33266 assemble_name (file, buf);
33267 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
33268 assemble_name (file, buf);
33269 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
33271 else if (flag_pic == 1)
33273 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
33274 asm_fprintf (file, "\tstw %s,4(%s)\n",
33275 reg_names[0], reg_names[1]);
33276 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
33277 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
33278 assemble_name (file, buf);
33279 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
33281 else if (flag_pic > 1)
33283 asm_fprintf (file, "\tstw %s,4(%s)\n",
33284 reg_names[0], reg_names[1]);
33285 /* Now, we need to get the address of the label. */
33286 if (TARGET_LINK_STACK)
33288 char name[32];
33289 get_ppc476_thunk_name (name);
33290 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
33291 assemble_name (file, buf);
33292 fputs ("-.\n1:", file);
33293 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
33294 asm_fprintf (file, "\taddi %s,%s,4\n",
33295 reg_names[11], reg_names[11]);
33297 else
33299 fputs ("\tbcl 20,31,1f\n\t.long ", file);
33300 assemble_name (file, buf);
33301 fputs ("-.\n1:", file);
33302 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
33304 asm_fprintf (file, "\tlwz %s,0(%s)\n",
33305 reg_names[0], reg_names[11]);
33306 asm_fprintf (file, "\tadd %s,%s,%s\n",
33307 reg_names[0], reg_names[0], reg_names[11]);
33309 else
33311 asm_fprintf (file, "\tlis %s,", reg_names[12]);
33312 assemble_name (file, buf);
33313 fputs ("@ha\n", file);
33314 asm_fprintf (file, "\tstw %s,4(%s)\n",
33315 reg_names[0], reg_names[1]);
33316 asm_fprintf (file, "\tla %s,", reg_names[0]);
33317 assemble_name (file, buf);
33318 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
33321 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
33322 fprintf (file, "\tbl %s%s\n",
33323 RS6000_MCOUNT, flag_pic ? "@plt" : "");
33324 break;
33326 case ABI_AIX:
33327 case ABI_ELFv2:
33328 case ABI_DARWIN:
33329 /* Don't do anything, done in output_profile_hook (). */
33330 break;
33336 /* The following variable value is the last issued insn. */
33338 static rtx_insn *last_scheduled_insn;
33340 /* The following variable helps to balance issuing of load and
33341 store instructions */
33343 static int load_store_pendulum;
33345 /* The following variable helps pair divide insns during scheduling. */
33346 static int divide_cnt;
33347 /* The following variable helps pair and alternate vector and vector load
33348 insns during scheduling. */
33349 static int vec_pairing;
33352 /* Power4 load update and store update instructions are cracked into a
33353 load or store and an integer insn which are executed in the same cycle.
33354 Branches have their own dispatch slot which does not count against the
33355 GCC issue rate, but it changes the program flow so there are no other
33356 instructions to issue in this cycle. */
33358 static int
33359 rs6000_variable_issue_1 (rtx_insn *insn, int more)
33361 last_scheduled_insn = insn;
33362 if (GET_CODE (PATTERN (insn)) == USE
33363 || GET_CODE (PATTERN (insn)) == CLOBBER)
33365 cached_can_issue_more = more;
33366 return cached_can_issue_more;
33369 if (insn_terminates_group_p (insn, current_group))
33371 cached_can_issue_more = 0;
33372 return cached_can_issue_more;
33375 /* If no reservation, but reach here */
33376 if (recog_memoized (insn) < 0)
33377 return more;
33379 if (rs6000_sched_groups)
33381 if (is_microcoded_insn (insn))
33382 cached_can_issue_more = 0;
33383 else if (is_cracked_insn (insn))
33384 cached_can_issue_more = more > 2 ? more - 2 : 0;
33385 else
33386 cached_can_issue_more = more - 1;
33388 return cached_can_issue_more;
33391 if (rs6000_cpu_attr == CPU_CELL && is_nonpipeline_insn (insn))
33392 return 0;
33394 cached_can_issue_more = more - 1;
33395 return cached_can_issue_more;
33398 static int
33399 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
33401 int r = rs6000_variable_issue_1 (insn, more);
33402 if (verbose)
33403 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
33404 return r;
33407 /* Adjust the cost of a scheduling dependency. Return the new cost of
33408 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
33410 static int
33411 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
33412 unsigned int)
33414 enum attr_type attr_type;
33416 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
33417 return cost;
33419 switch (dep_type)
33421 case REG_DEP_TRUE:
33423 /* Data dependency; DEP_INSN writes a register that INSN reads
33424 some cycles later. */
33426 /* Separate a load from a narrower, dependent store. */
33427 if ((rs6000_sched_groups || rs6000_cpu_attr == CPU_POWER9)
33428 && GET_CODE (PATTERN (insn)) == SET
33429 && GET_CODE (PATTERN (dep_insn)) == SET
33430 && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
33431 && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
33432 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
33433 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
33434 return cost + 14;
33436 attr_type = get_attr_type (insn);
33438 switch (attr_type)
33440 case TYPE_JMPREG:
33441 /* Tell the first scheduling pass about the latency between
33442 a mtctr and bctr (and mtlr and br/blr). The first
33443 scheduling pass will not know about this latency since
33444 the mtctr instruction, which has the latency associated
33445 to it, will be generated by reload. */
33446 return 4;
33447 case TYPE_BRANCH:
33448 /* Leave some extra cycles between a compare and its
33449 dependent branch, to inhibit expensive mispredicts. */
33450 if ((rs6000_cpu_attr == CPU_PPC603
33451 || rs6000_cpu_attr == CPU_PPC604
33452 || rs6000_cpu_attr == CPU_PPC604E
33453 || rs6000_cpu_attr == CPU_PPC620
33454 || rs6000_cpu_attr == CPU_PPC630
33455 || rs6000_cpu_attr == CPU_PPC750
33456 || rs6000_cpu_attr == CPU_PPC7400
33457 || rs6000_cpu_attr == CPU_PPC7450
33458 || rs6000_cpu_attr == CPU_PPCE5500
33459 || rs6000_cpu_attr == CPU_PPCE6500
33460 || rs6000_cpu_attr == CPU_POWER4
33461 || rs6000_cpu_attr == CPU_POWER5
33462 || rs6000_cpu_attr == CPU_POWER7
33463 || rs6000_cpu_attr == CPU_POWER8
33464 || rs6000_cpu_attr == CPU_POWER9
33465 || rs6000_cpu_attr == CPU_CELL)
33466 && recog_memoized (dep_insn)
33467 && (INSN_CODE (dep_insn) >= 0))
33469 switch (get_attr_type (dep_insn))
33471 case TYPE_CMP:
33472 case TYPE_FPCOMPARE:
33473 case TYPE_CR_LOGICAL:
33474 case TYPE_DELAYED_CR:
33475 return cost + 2;
33476 case TYPE_EXTS:
33477 case TYPE_MUL:
33478 if (get_attr_dot (dep_insn) == DOT_YES)
33479 return cost + 2;
33480 else
33481 break;
33482 case TYPE_SHIFT:
33483 if (get_attr_dot (dep_insn) == DOT_YES
33484 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
33485 return cost + 2;
33486 else
33487 break;
33488 default:
33489 break;
33491 break;
33493 case TYPE_STORE:
33494 case TYPE_FPSTORE:
33495 if ((rs6000_cpu == PROCESSOR_POWER6)
33496 && recog_memoized (dep_insn)
33497 && (INSN_CODE (dep_insn) >= 0))
33500 if (GET_CODE (PATTERN (insn)) != SET)
33501 /* If this happens, we have to extend this to schedule
33502 optimally. Return default for now. */
33503 return cost;
33505 /* Adjust the cost for the case where the value written
33506 by a fixed point operation is used as the address
33507 gen value on a store. */
33508 switch (get_attr_type (dep_insn))
33510 case TYPE_LOAD:
33511 case TYPE_CNTLZ:
33513 if (! rs6000_store_data_bypass_p (dep_insn, insn))
33514 return get_attr_sign_extend (dep_insn)
33515 == SIGN_EXTEND_YES ? 6 : 4;
33516 break;
33518 case TYPE_SHIFT:
33520 if (! rs6000_store_data_bypass_p (dep_insn, insn))
33521 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
33522 6 : 3;
33523 break;
33525 case TYPE_INTEGER:
33526 case TYPE_ADD:
33527 case TYPE_LOGICAL:
33528 case TYPE_EXTS:
33529 case TYPE_INSERT:
33531 if (! rs6000_store_data_bypass_p (dep_insn, insn))
33532 return 3;
33533 break;
33535 case TYPE_STORE:
33536 case TYPE_FPLOAD:
33537 case TYPE_FPSTORE:
33539 if (get_attr_update (dep_insn) == UPDATE_YES
33540 && ! rs6000_store_data_bypass_p (dep_insn, insn))
33541 return 3;
33542 break;
33544 case TYPE_MUL:
33546 if (! rs6000_store_data_bypass_p (dep_insn, insn))
33547 return 17;
33548 break;
33550 case TYPE_DIV:
33552 if (! rs6000_store_data_bypass_p (dep_insn, insn))
33553 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
33554 break;
33556 default:
33557 break;
33560 break;
33562 case TYPE_LOAD:
33563 if ((rs6000_cpu == PROCESSOR_POWER6)
33564 && recog_memoized (dep_insn)
33565 && (INSN_CODE (dep_insn) >= 0))
33568 /* Adjust the cost for the case where the value written
33569 by a fixed point instruction is used within the address
33570 gen portion of a subsequent load(u)(x) */
33571 switch (get_attr_type (dep_insn))
33573 case TYPE_LOAD:
33574 case TYPE_CNTLZ:
33576 if (set_to_load_agen (dep_insn, insn))
33577 return get_attr_sign_extend (dep_insn)
33578 == SIGN_EXTEND_YES ? 6 : 4;
33579 break;
33581 case TYPE_SHIFT:
33583 if (set_to_load_agen (dep_insn, insn))
33584 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
33585 6 : 3;
33586 break;
33588 case TYPE_INTEGER:
33589 case TYPE_ADD:
33590 case TYPE_LOGICAL:
33591 case TYPE_EXTS:
33592 case TYPE_INSERT:
33594 if (set_to_load_agen (dep_insn, insn))
33595 return 3;
33596 break;
33598 case TYPE_STORE:
33599 case TYPE_FPLOAD:
33600 case TYPE_FPSTORE:
33602 if (get_attr_update (dep_insn) == UPDATE_YES
33603 && set_to_load_agen (dep_insn, insn))
33604 return 3;
33605 break;
33607 case TYPE_MUL:
33609 if (set_to_load_agen (dep_insn, insn))
33610 return 17;
33611 break;
33613 case TYPE_DIV:
33615 if (set_to_load_agen (dep_insn, insn))
33616 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
33617 break;
33619 default:
33620 break;
33623 break;
33625 case TYPE_FPLOAD:
33626 if ((rs6000_cpu == PROCESSOR_POWER6)
33627 && get_attr_update (insn) == UPDATE_NO
33628 && recog_memoized (dep_insn)
33629 && (INSN_CODE (dep_insn) >= 0)
33630 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
33631 return 2;
33633 default:
33634 break;
33637 /* Fall out to return default cost. */
33639 break;
33641 case REG_DEP_OUTPUT:
33642 /* Output dependency; DEP_INSN writes a register that INSN writes some
33643 cycles later. */
33644 if ((rs6000_cpu == PROCESSOR_POWER6)
33645 && recog_memoized (dep_insn)
33646 && (INSN_CODE (dep_insn) >= 0))
33648 attr_type = get_attr_type (insn);
33650 switch (attr_type)
33652 case TYPE_FP:
33653 case TYPE_FPSIMPLE:
33654 if (get_attr_type (dep_insn) == TYPE_FP
33655 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
33656 return 1;
33657 break;
33658 case TYPE_FPLOAD:
33659 if (get_attr_update (insn) == UPDATE_NO
33660 && get_attr_type (dep_insn) == TYPE_MFFGPR)
33661 return 2;
33662 break;
33663 default:
33664 break;
33667 /* Fall through, no cost for output dependency. */
33668 /* FALLTHRU */
33670 case REG_DEP_ANTI:
33671 /* Anti dependency; DEP_INSN reads a register that INSN writes some
33672 cycles later. */
33673 return 0;
33675 default:
33676 gcc_unreachable ();
33679 return cost;
33682 /* Debug version of rs6000_adjust_cost. */
33684 static int
33685 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
33686 int cost, unsigned int dw)
33688 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
33690 if (ret != cost)
33692 const char *dep;
33694 switch (dep_type)
33696 default: dep = "unknown depencency"; break;
33697 case REG_DEP_TRUE: dep = "data dependency"; break;
33698 case REG_DEP_OUTPUT: dep = "output dependency"; break;
33699 case REG_DEP_ANTI: dep = "anti depencency"; break;
33702 fprintf (stderr,
33703 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
33704 "%s, insn:\n", ret, cost, dep);
33706 debug_rtx (insn);
33709 return ret;
33712 /* The function returns a true if INSN is microcoded.
33713 Return false otherwise. */
33715 static bool
33716 is_microcoded_insn (rtx_insn *insn)
33718 if (!insn || !NONDEBUG_INSN_P (insn)
33719 || GET_CODE (PATTERN (insn)) == USE
33720 || GET_CODE (PATTERN (insn)) == CLOBBER)
33721 return false;
33723 if (rs6000_cpu_attr == CPU_CELL)
33724 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
33726 if (rs6000_sched_groups
33727 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
33729 enum attr_type type = get_attr_type (insn);
33730 if ((type == TYPE_LOAD
33731 && get_attr_update (insn) == UPDATE_YES
33732 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
33733 || ((type == TYPE_LOAD || type == TYPE_STORE)
33734 && get_attr_update (insn) == UPDATE_YES
33735 && get_attr_indexed (insn) == INDEXED_YES)
33736 || type == TYPE_MFCR)
33737 return true;
33740 return false;
33743 /* The function returns true if INSN is cracked into 2 instructions
33744 by the processor (and therefore occupies 2 issue slots). */
33746 static bool
33747 is_cracked_insn (rtx_insn *insn)
33749 if (!insn || !NONDEBUG_INSN_P (insn)
33750 || GET_CODE (PATTERN (insn)) == USE
33751 || GET_CODE (PATTERN (insn)) == CLOBBER)
33752 return false;
33754 if (rs6000_sched_groups
33755 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
33757 enum attr_type type = get_attr_type (insn);
33758 if ((type == TYPE_LOAD
33759 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
33760 && get_attr_update (insn) == UPDATE_NO)
33761 || (type == TYPE_LOAD
33762 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
33763 && get_attr_update (insn) == UPDATE_YES
33764 && get_attr_indexed (insn) == INDEXED_NO)
33765 || (type == TYPE_STORE
33766 && get_attr_update (insn) == UPDATE_YES
33767 && get_attr_indexed (insn) == INDEXED_NO)
33768 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
33769 && get_attr_update (insn) == UPDATE_YES)
33770 || type == TYPE_DELAYED_CR
33771 || (type == TYPE_EXTS
33772 && get_attr_dot (insn) == DOT_YES)
33773 || (type == TYPE_SHIFT
33774 && get_attr_dot (insn) == DOT_YES
33775 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
33776 || (type == TYPE_MUL
33777 && get_attr_dot (insn) == DOT_YES)
33778 || type == TYPE_DIV
33779 || (type == TYPE_INSERT
33780 && get_attr_size (insn) == SIZE_32))
33781 return true;
33784 return false;
33787 /* The function returns true if INSN can be issued only from
33788 the branch slot. */
33790 static bool
33791 is_branch_slot_insn (rtx_insn *insn)
33793 if (!insn || !NONDEBUG_INSN_P (insn)
33794 || GET_CODE (PATTERN (insn)) == USE
33795 || GET_CODE (PATTERN (insn)) == CLOBBER)
33796 return false;
33798 if (rs6000_sched_groups)
33800 enum attr_type type = get_attr_type (insn);
33801 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
33802 return true;
33803 return false;
33806 return false;
33809 /* The function returns true if out_inst sets a value that is
33810 used in the address generation computation of in_insn */
33811 static bool
33812 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
33814 rtx out_set, in_set;
33816 /* For performance reasons, only handle the simple case where
33817 both loads are a single_set. */
33818 out_set = single_set (out_insn);
33819 if (out_set)
33821 in_set = single_set (in_insn);
33822 if (in_set)
33823 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
33826 return false;
33829 /* Try to determine base/offset/size parts of the given MEM.
33830 Return true if successful, false if all the values couldn't
33831 be determined.
33833 This function only looks for REG or REG+CONST address forms.
33834 REG+REG address form will return false. */
33836 static bool
33837 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
33838 HOST_WIDE_INT *size)
33840 rtx addr_rtx;
33841 if MEM_SIZE_KNOWN_P (mem)
33842 *size = MEM_SIZE (mem);
33843 else
33844 return false;
33846 addr_rtx = (XEXP (mem, 0));
33847 if (GET_CODE (addr_rtx) == PRE_MODIFY)
33848 addr_rtx = XEXP (addr_rtx, 1);
33850 *offset = 0;
33851 while (GET_CODE (addr_rtx) == PLUS
33852 && CONST_INT_P (XEXP (addr_rtx, 1)))
33854 *offset += INTVAL (XEXP (addr_rtx, 1));
33855 addr_rtx = XEXP (addr_rtx, 0);
33857 if (!REG_P (addr_rtx))
33858 return false;
33860 *base = addr_rtx;
33861 return true;
33864 /* The function returns true if the target storage location of
33865 mem1 is adjacent to the target storage location of mem2 */
33866 /* Return 1 if memory locations are adjacent. */
33868 static bool
33869 adjacent_mem_locations (rtx mem1, rtx mem2)
33871 rtx reg1, reg2;
33872 HOST_WIDE_INT off1, size1, off2, size2;
33874 if (get_memref_parts (mem1, &reg1, &off1, &size1)
33875 && get_memref_parts (mem2, &reg2, &off2, &size2))
33876 return ((REGNO (reg1) == REGNO (reg2))
33877 && ((off1 + size1 == off2)
33878 || (off2 + size2 == off1)));
33880 return false;
33883 /* This function returns true if it can be determined that the two MEM
33884 locations overlap by at least 1 byte based on base reg/offset/size. */
33886 static bool
33887 mem_locations_overlap (rtx mem1, rtx mem2)
33889 rtx reg1, reg2;
33890 HOST_WIDE_INT off1, size1, off2, size2;
33892 if (get_memref_parts (mem1, &reg1, &off1, &size1)
33893 && get_memref_parts (mem2, &reg2, &off2, &size2))
33894 return ((REGNO (reg1) == REGNO (reg2))
33895 && (((off1 <= off2) && (off1 + size1 > off2))
33896 || ((off2 <= off1) && (off2 + size2 > off1))));
33898 return false;
33901 /* A C statement (sans semicolon) to update the integer scheduling
33902 priority INSN_PRIORITY (INSN). Increase the priority to execute the
33903 INSN earlier, reduce the priority to execute INSN later. Do not
33904 define this macro if you do not need to adjust the scheduling
33905 priorities of insns. */
33907 static int
33908 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
33910 rtx load_mem, str_mem;
33911 /* On machines (like the 750) which have asymmetric integer units,
33912 where one integer unit can do multiply and divides and the other
33913 can't, reduce the priority of multiply/divide so it is scheduled
33914 before other integer operations. */
33916 #if 0
33917 if (! INSN_P (insn))
33918 return priority;
33920 if (GET_CODE (PATTERN (insn)) == USE)
33921 return priority;
33923 switch (rs6000_cpu_attr) {
33924 case CPU_PPC750:
33925 switch (get_attr_type (insn))
33927 default:
33928 break;
33930 case TYPE_MUL:
33931 case TYPE_DIV:
33932 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
33933 priority, priority);
33934 if (priority >= 0 && priority < 0x01000000)
33935 priority >>= 3;
33936 break;
33939 #endif
33941 if (insn_must_be_first_in_group (insn)
33942 && reload_completed
33943 && current_sched_info->sched_max_insns_priority
33944 && rs6000_sched_restricted_insns_priority)
33947 /* Prioritize insns that can be dispatched only in the first
33948 dispatch slot. */
33949 if (rs6000_sched_restricted_insns_priority == 1)
33950 /* Attach highest priority to insn. This means that in
33951 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
33952 precede 'priority' (critical path) considerations. */
33953 return current_sched_info->sched_max_insns_priority;
33954 else if (rs6000_sched_restricted_insns_priority == 2)
33955 /* Increase priority of insn by a minimal amount. This means that in
33956 haifa-sched.c:ready_sort(), only 'priority' (critical path)
33957 considerations precede dispatch-slot restriction considerations. */
33958 return (priority + 1);
33961 if (rs6000_cpu == PROCESSOR_POWER6
33962 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
33963 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
33964 /* Attach highest priority to insn if the scheduler has just issued two
33965 stores and this instruction is a load, or two loads and this instruction
33966 is a store. Power6 wants loads and stores scheduled alternately
33967 when possible */
33968 return current_sched_info->sched_max_insns_priority;
33970 return priority;
33973 /* Return true if the instruction is nonpipelined on the Cell. */
33974 static bool
33975 is_nonpipeline_insn (rtx_insn *insn)
33977 enum attr_type type;
33978 if (!insn || !NONDEBUG_INSN_P (insn)
33979 || GET_CODE (PATTERN (insn)) == USE
33980 || GET_CODE (PATTERN (insn)) == CLOBBER)
33981 return false;
33983 type = get_attr_type (insn);
33984 if (type == TYPE_MUL
33985 || type == TYPE_DIV
33986 || type == TYPE_SDIV
33987 || type == TYPE_DDIV
33988 || type == TYPE_SSQRT
33989 || type == TYPE_DSQRT
33990 || type == TYPE_MFCR
33991 || type == TYPE_MFCRF
33992 || type == TYPE_MFJMPR)
33994 return true;
33996 return false;
34000 /* Return how many instructions the machine can issue per cycle. */
34002 static int
34003 rs6000_issue_rate (void)
34005 /* Unless scheduling for register pressure, use issue rate of 1 for
34006 first scheduling pass to decrease degradation. */
34007 if (!reload_completed && !flag_sched_pressure)
34008 return 1;
34010 switch (rs6000_cpu_attr) {
34011 case CPU_RS64A:
34012 case CPU_PPC601: /* ? */
34013 case CPU_PPC7450:
34014 return 3;
34015 case CPU_PPC440:
34016 case CPU_PPC603:
34017 case CPU_PPC750:
34018 case CPU_PPC7400:
34019 case CPU_PPC8540:
34020 case CPU_PPC8548:
34021 case CPU_CELL:
34022 case CPU_PPCE300C2:
34023 case CPU_PPCE300C3:
34024 case CPU_PPCE500MC:
34025 case CPU_PPCE500MC64:
34026 case CPU_PPCE5500:
34027 case CPU_PPCE6500:
34028 case CPU_TITAN:
34029 return 2;
34030 case CPU_PPC476:
34031 case CPU_PPC604:
34032 case CPU_PPC604E:
34033 case CPU_PPC620:
34034 case CPU_PPC630:
34035 return 4;
34036 case CPU_POWER4:
34037 case CPU_POWER5:
34038 case CPU_POWER6:
34039 case CPU_POWER7:
34040 return 5;
34041 case CPU_POWER8:
34042 return 7;
34043 case CPU_POWER9:
34044 return 6;
34045 default:
34046 return 1;
34050 /* Return how many instructions to look ahead for better insn
34051 scheduling. */
34053 static int
34054 rs6000_use_sched_lookahead (void)
34056 switch (rs6000_cpu_attr)
34058 case CPU_PPC8540:
34059 case CPU_PPC8548:
34060 return 4;
34062 case CPU_CELL:
34063 return (reload_completed ? 8 : 0);
34065 default:
34066 return 0;
34070 /* We are choosing insn from the ready queue. Return zero if INSN can be
34071 chosen. */
34072 static int
34073 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
34075 if (ready_index == 0)
34076 return 0;
34078 if (rs6000_cpu_attr != CPU_CELL)
34079 return 0;
34081 gcc_assert (insn != NULL_RTX && INSN_P (insn));
34083 if (!reload_completed
34084 || is_nonpipeline_insn (insn)
34085 || is_microcoded_insn (insn))
34086 return 1;
34088 return 0;
34091 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
34092 and return true. */
34094 static bool
34095 find_mem_ref (rtx pat, rtx *mem_ref)
34097 const char * fmt;
34098 int i, j;
34100 /* stack_tie does not produce any real memory traffic. */
34101 if (tie_operand (pat, VOIDmode))
34102 return false;
34104 if (GET_CODE (pat) == MEM)
34106 *mem_ref = pat;
34107 return true;
34110 /* Recursively process the pattern. */
34111 fmt = GET_RTX_FORMAT (GET_CODE (pat));
34113 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
34115 if (fmt[i] == 'e')
34117 if (find_mem_ref (XEXP (pat, i), mem_ref))
34118 return true;
34120 else if (fmt[i] == 'E')
34121 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
34123 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
34124 return true;
34128 return false;
34131 /* Determine if PAT is a PATTERN of a load insn. */
34133 static bool
34134 is_load_insn1 (rtx pat, rtx *load_mem)
34136 if (!pat || pat == NULL_RTX)
34137 return false;
34139 if (GET_CODE (pat) == SET)
34140 return find_mem_ref (SET_SRC (pat), load_mem);
34142 if (GET_CODE (pat) == PARALLEL)
34144 int i;
34146 for (i = 0; i < XVECLEN (pat, 0); i++)
34147 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
34148 return true;
34151 return false;
34154 /* Determine if INSN loads from memory. */
34156 static bool
34157 is_load_insn (rtx insn, rtx *load_mem)
34159 if (!insn || !INSN_P (insn))
34160 return false;
34162 if (CALL_P (insn))
34163 return false;
34165 return is_load_insn1 (PATTERN (insn), load_mem);
34168 /* Determine if PAT is a PATTERN of a store insn. */
34170 static bool
34171 is_store_insn1 (rtx pat, rtx *str_mem)
34173 if (!pat || pat == NULL_RTX)
34174 return false;
34176 if (GET_CODE (pat) == SET)
34177 return find_mem_ref (SET_DEST (pat), str_mem);
34179 if (GET_CODE (pat) == PARALLEL)
34181 int i;
34183 for (i = 0; i < XVECLEN (pat, 0); i++)
34184 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
34185 return true;
34188 return false;
34191 /* Determine if INSN stores to memory. */
34193 static bool
34194 is_store_insn (rtx insn, rtx *str_mem)
34196 if (!insn || !INSN_P (insn))
34197 return false;
34199 return is_store_insn1 (PATTERN (insn), str_mem);
34202 /* Return whether TYPE is a Power9 pairable vector instruction type. */
34204 static bool
34205 is_power9_pairable_vec_type (enum attr_type type)
34207 switch (type)
34209 case TYPE_VECSIMPLE:
34210 case TYPE_VECCOMPLEX:
34211 case TYPE_VECDIV:
34212 case TYPE_VECCMP:
34213 case TYPE_VECPERM:
34214 case TYPE_VECFLOAT:
34215 case TYPE_VECFDIV:
34216 case TYPE_VECDOUBLE:
34217 return true;
34218 default:
34219 break;
34221 return false;
34224 /* Returns whether the dependence between INSN and NEXT is considered
34225 costly by the given target. */
34227 static bool
34228 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
34230 rtx insn;
34231 rtx next;
34232 rtx load_mem, str_mem;
34234 /* If the flag is not enabled - no dependence is considered costly;
34235 allow all dependent insns in the same group.
34236 This is the most aggressive option. */
34237 if (rs6000_sched_costly_dep == no_dep_costly)
34238 return false;
34240 /* If the flag is set to 1 - a dependence is always considered costly;
34241 do not allow dependent instructions in the same group.
34242 This is the most conservative option. */
34243 if (rs6000_sched_costly_dep == all_deps_costly)
34244 return true;
34246 insn = DEP_PRO (dep);
34247 next = DEP_CON (dep);
34249 if (rs6000_sched_costly_dep == store_to_load_dep_costly
34250 && is_load_insn (next, &load_mem)
34251 && is_store_insn (insn, &str_mem))
34252 /* Prevent load after store in the same group. */
34253 return true;
34255 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
34256 && is_load_insn (next, &load_mem)
34257 && is_store_insn (insn, &str_mem)
34258 && DEP_TYPE (dep) == REG_DEP_TRUE
34259 && mem_locations_overlap(str_mem, load_mem))
34260 /* Prevent load after store in the same group if it is a true
34261 dependence. */
34262 return true;
34264 /* The flag is set to X; dependences with latency >= X are considered costly,
34265 and will not be scheduled in the same group. */
34266 if (rs6000_sched_costly_dep <= max_dep_latency
34267 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
34268 return true;
34270 return false;
34273 /* Return the next insn after INSN that is found before TAIL is reached,
34274 skipping any "non-active" insns - insns that will not actually occupy
34275 an issue slot. Return NULL_RTX if such an insn is not found. */
34277 static rtx_insn *
34278 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
34280 if (insn == NULL_RTX || insn == tail)
34281 return NULL;
34283 while (1)
34285 insn = NEXT_INSN (insn);
34286 if (insn == NULL_RTX || insn == tail)
34287 return NULL;
34289 if (CALL_P (insn)
34290 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
34291 || (NONJUMP_INSN_P (insn)
34292 && GET_CODE (PATTERN (insn)) != USE
34293 && GET_CODE (PATTERN (insn)) != CLOBBER
34294 && INSN_CODE (insn) != CODE_FOR_stack_tie))
34295 break;
34297 return insn;
34300 /* Do Power9 specific sched_reorder2 reordering of ready list. */
34302 static int
34303 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
34305 int pos;
34306 int i;
34307 rtx_insn *tmp;
34308 enum attr_type type, type2;
34310 type = get_attr_type (last_scheduled_insn);
34312 /* Try to issue fixed point divides back-to-back in pairs so they will be
34313 routed to separate execution units and execute in parallel. */
34314 if (type == TYPE_DIV && divide_cnt == 0)
34316 /* First divide has been scheduled. */
34317 divide_cnt = 1;
34319 /* Scan the ready list looking for another divide, if found move it
34320 to the end of the list so it is chosen next. */
34321 pos = lastpos;
34322 while (pos >= 0)
34324 if (recog_memoized (ready[pos]) >= 0
34325 && get_attr_type (ready[pos]) == TYPE_DIV)
34327 tmp = ready[pos];
34328 for (i = pos; i < lastpos; i++)
34329 ready[i] = ready[i + 1];
34330 ready[lastpos] = tmp;
34331 break;
34333 pos--;
34336 else
34338 /* Last insn was the 2nd divide or not a divide, reset the counter. */
34339 divide_cnt = 0;
34341 /* The best dispatch throughput for vector and vector load insns can be
34342 achieved by interleaving a vector and vector load such that they'll
34343 dispatch to the same superslice. If this pairing cannot be achieved
34344 then it is best to pair vector insns together and vector load insns
34345 together.
34347 To aid in this pairing, vec_pairing maintains the current state with
34348 the following values:
34350 0 : Initial state, no vecload/vector pairing has been started.
34352 1 : A vecload or vector insn has been issued and a candidate for
34353 pairing has been found and moved to the end of the ready
34354 list. */
34355 if (type == TYPE_VECLOAD)
34357 /* Issued a vecload. */
34358 if (vec_pairing == 0)
34360 int vecload_pos = -1;
34361 /* We issued a single vecload, look for a vector insn to pair it
34362 with. If one isn't found, try to pair another vecload. */
34363 pos = lastpos;
34364 while (pos >= 0)
34366 if (recog_memoized (ready[pos]) >= 0)
34368 type2 = get_attr_type (ready[pos]);
34369 if (is_power9_pairable_vec_type (type2))
34371 /* Found a vector insn to pair with, move it to the
34372 end of the ready list so it is scheduled next. */
34373 tmp = ready[pos];
34374 for (i = pos; i < lastpos; i++)
34375 ready[i] = ready[i + 1];
34376 ready[lastpos] = tmp;
34377 vec_pairing = 1;
34378 return cached_can_issue_more;
34380 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
34381 /* Remember position of first vecload seen. */
34382 vecload_pos = pos;
34384 pos--;
34386 if (vecload_pos >= 0)
34388 /* Didn't find a vector to pair with but did find a vecload,
34389 move it to the end of the ready list. */
34390 tmp = ready[vecload_pos];
34391 for (i = vecload_pos; i < lastpos; i++)
34392 ready[i] = ready[i + 1];
34393 ready[lastpos] = tmp;
34394 vec_pairing = 1;
34395 return cached_can_issue_more;
34399 else if (is_power9_pairable_vec_type (type))
34401 /* Issued a vector operation. */
34402 if (vec_pairing == 0)
34404 int vec_pos = -1;
34405 /* We issued a single vector insn, look for a vecload to pair it
34406 with. If one isn't found, try to pair another vector. */
34407 pos = lastpos;
34408 while (pos >= 0)
34410 if (recog_memoized (ready[pos]) >= 0)
34412 type2 = get_attr_type (ready[pos]);
34413 if (type2 == TYPE_VECLOAD)
34415 /* Found a vecload insn to pair with, move it to the
34416 end of the ready list so it is scheduled next. */
34417 tmp = ready[pos];
34418 for (i = pos; i < lastpos; i++)
34419 ready[i] = ready[i + 1];
34420 ready[lastpos] = tmp;
34421 vec_pairing = 1;
34422 return cached_can_issue_more;
34424 else if (is_power9_pairable_vec_type (type2)
34425 && vec_pos == -1)
34426 /* Remember position of first vector insn seen. */
34427 vec_pos = pos;
34429 pos--;
34431 if (vec_pos >= 0)
34433 /* Didn't find a vecload to pair with but did find a vector
34434 insn, move it to the end of the ready list. */
34435 tmp = ready[vec_pos];
34436 for (i = vec_pos; i < lastpos; i++)
34437 ready[i] = ready[i + 1];
34438 ready[lastpos] = tmp;
34439 vec_pairing = 1;
34440 return cached_can_issue_more;
34445 /* We've either finished a vec/vecload pair, couldn't find an insn to
34446 continue the current pair, or the last insn had nothing to do with
34447 with pairing. In any case, reset the state. */
34448 vec_pairing = 0;
34451 return cached_can_issue_more;
34454 /* We are about to begin issuing insns for this clock cycle. */
34456 static int
34457 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
34458 rtx_insn **ready ATTRIBUTE_UNUSED,
34459 int *pn_ready ATTRIBUTE_UNUSED,
34460 int clock_var ATTRIBUTE_UNUSED)
34462 int n_ready = *pn_ready;
34464 if (sched_verbose)
34465 fprintf (dump, "// rs6000_sched_reorder :\n");
34467 /* Reorder the ready list, if the second to last ready insn
34468 is a nonepipeline insn. */
34469 if (rs6000_cpu_attr == CPU_CELL && n_ready > 1)
34471 if (is_nonpipeline_insn (ready[n_ready - 1])
34472 && (recog_memoized (ready[n_ready - 2]) > 0))
34473 /* Simply swap first two insns. */
34474 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
34477 if (rs6000_cpu == PROCESSOR_POWER6)
34478 load_store_pendulum = 0;
34480 return rs6000_issue_rate ();
34483 /* Like rs6000_sched_reorder, but called after issuing each insn. */
34485 static int
34486 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
34487 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
34489 if (sched_verbose)
34490 fprintf (dump, "// rs6000_sched_reorder2 :\n");
34492 /* For Power6, we need to handle some special cases to try and keep the
34493 store queue from overflowing and triggering expensive flushes.
34495 This code monitors how load and store instructions are being issued
34496 and skews the ready list one way or the other to increase the likelihood
34497 that a desired instruction is issued at the proper time.
34499 A couple of things are done. First, we maintain a "load_store_pendulum"
34500 to track the current state of load/store issue.
34502 - If the pendulum is at zero, then no loads or stores have been
34503 issued in the current cycle so we do nothing.
34505 - If the pendulum is 1, then a single load has been issued in this
34506 cycle and we attempt to locate another load in the ready list to
34507 issue with it.
34509 - If the pendulum is -2, then two stores have already been
34510 issued in this cycle, so we increase the priority of the first load
34511 in the ready list to increase it's likelihood of being chosen first
34512 in the next cycle.
34514 - If the pendulum is -1, then a single store has been issued in this
34515 cycle and we attempt to locate another store in the ready list to
34516 issue with it, preferring a store to an adjacent memory location to
34517 facilitate store pairing in the store queue.
34519 - If the pendulum is 2, then two loads have already been
34520 issued in this cycle, so we increase the priority of the first store
34521 in the ready list to increase it's likelihood of being chosen first
34522 in the next cycle.
34524 - If the pendulum < -2 or > 2, then do nothing.
34526 Note: This code covers the most common scenarios. There exist non
34527 load/store instructions which make use of the LSU and which
34528 would need to be accounted for to strictly model the behavior
34529 of the machine. Those instructions are currently unaccounted
34530 for to help minimize compile time overhead of this code.
34532 if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn)
34534 int pos;
34535 int i;
34536 rtx_insn *tmp;
34537 rtx load_mem, str_mem;
34539 if (is_store_insn (last_scheduled_insn, &str_mem))
34540 /* Issuing a store, swing the load_store_pendulum to the left */
34541 load_store_pendulum--;
34542 else if (is_load_insn (last_scheduled_insn, &load_mem))
34543 /* Issuing a load, swing the load_store_pendulum to the right */
34544 load_store_pendulum++;
34545 else
34546 return cached_can_issue_more;
34548 /* If the pendulum is balanced, or there is only one instruction on
34549 the ready list, then all is well, so return. */
34550 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
34551 return cached_can_issue_more;
34553 if (load_store_pendulum == 1)
34555 /* A load has been issued in this cycle. Scan the ready list
34556 for another load to issue with it */
34557 pos = *pn_ready-1;
34559 while (pos >= 0)
34561 if (is_load_insn (ready[pos], &load_mem))
34563 /* Found a load. Move it to the head of the ready list,
34564 and adjust it's priority so that it is more likely to
34565 stay there */
34566 tmp = ready[pos];
34567 for (i=pos; i<*pn_ready-1; i++)
34568 ready[i] = ready[i + 1];
34569 ready[*pn_ready-1] = tmp;
34571 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
34572 INSN_PRIORITY (tmp)++;
34573 break;
34575 pos--;
34578 else if (load_store_pendulum == -2)
34580 /* Two stores have been issued in this cycle. Increase the
34581 priority of the first load in the ready list to favor it for
34582 issuing in the next cycle. */
34583 pos = *pn_ready-1;
34585 while (pos >= 0)
34587 if (is_load_insn (ready[pos], &load_mem)
34588 && !sel_sched_p ()
34589 && INSN_PRIORITY_KNOWN (ready[pos]))
34591 INSN_PRIORITY (ready[pos])++;
34593 /* Adjust the pendulum to account for the fact that a load
34594 was found and increased in priority. This is to prevent
34595 increasing the priority of multiple loads */
34596 load_store_pendulum--;
34598 break;
34600 pos--;
34603 else if (load_store_pendulum == -1)
34605 /* A store has been issued in this cycle. Scan the ready list for
34606 another store to issue with it, preferring a store to an adjacent
34607 memory location */
34608 int first_store_pos = -1;
34610 pos = *pn_ready-1;
34612 while (pos >= 0)
34614 if (is_store_insn (ready[pos], &str_mem))
34616 rtx str_mem2;
34617 /* Maintain the index of the first store found on the
34618 list */
34619 if (first_store_pos == -1)
34620 first_store_pos = pos;
34622 if (is_store_insn (last_scheduled_insn, &str_mem2)
34623 && adjacent_mem_locations (str_mem, str_mem2))
34625 /* Found an adjacent store. Move it to the head of the
34626 ready list, and adjust it's priority so that it is
34627 more likely to stay there */
34628 tmp = ready[pos];
34629 for (i=pos; i<*pn_ready-1; i++)
34630 ready[i] = ready[i + 1];
34631 ready[*pn_ready-1] = tmp;
34633 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
34634 INSN_PRIORITY (tmp)++;
34636 first_store_pos = -1;
34638 break;
34641 pos--;
34644 if (first_store_pos >= 0)
34646 /* An adjacent store wasn't found, but a non-adjacent store was,
34647 so move the non-adjacent store to the front of the ready
34648 list, and adjust its priority so that it is more likely to
34649 stay there. */
34650 tmp = ready[first_store_pos];
34651 for (i=first_store_pos; i<*pn_ready-1; i++)
34652 ready[i] = ready[i + 1];
34653 ready[*pn_ready-1] = tmp;
34654 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
34655 INSN_PRIORITY (tmp)++;
34658 else if (load_store_pendulum == 2)
34660 /* Two loads have been issued in this cycle. Increase the priority
34661 of the first store in the ready list to favor it for issuing in
34662 the next cycle. */
34663 pos = *pn_ready-1;
34665 while (pos >= 0)
34667 if (is_store_insn (ready[pos], &str_mem)
34668 && !sel_sched_p ()
34669 && INSN_PRIORITY_KNOWN (ready[pos]))
34671 INSN_PRIORITY (ready[pos])++;
34673 /* Adjust the pendulum to account for the fact that a store
34674 was found and increased in priority. This is to prevent
34675 increasing the priority of multiple stores */
34676 load_store_pendulum++;
34678 break;
34680 pos--;
34685 /* Do Power9 dependent reordering if necessary. */
34686 if (rs6000_cpu == PROCESSOR_POWER9 && last_scheduled_insn
34687 && recog_memoized (last_scheduled_insn) >= 0)
34688 return power9_sched_reorder2 (ready, *pn_ready - 1);
34690 return cached_can_issue_more;
34693 /* Return whether the presence of INSN causes a dispatch group termination
34694 of group WHICH_GROUP.
34696 If WHICH_GROUP == current_group, this function will return true if INSN
34697 causes the termination of the current group (i.e, the dispatch group to
34698 which INSN belongs). This means that INSN will be the last insn in the
34699 group it belongs to.
34701 If WHICH_GROUP == previous_group, this function will return true if INSN
34702 causes the termination of the previous group (i.e, the dispatch group that
34703 precedes the group to which INSN belongs). This means that INSN will be
34704 the first insn in the group it belongs to). */
34706 static bool
34707 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
34709 bool first, last;
34711 if (! insn)
34712 return false;
34714 first = insn_must_be_first_in_group (insn);
34715 last = insn_must_be_last_in_group (insn);
34717 if (first && last)
34718 return true;
34720 if (which_group == current_group)
34721 return last;
34722 else if (which_group == previous_group)
34723 return first;
34725 return false;
34729 static bool
34730 insn_must_be_first_in_group (rtx_insn *insn)
34732 enum attr_type type;
34734 if (!insn
34735 || NOTE_P (insn)
34736 || DEBUG_INSN_P (insn)
34737 || GET_CODE (PATTERN (insn)) == USE
34738 || GET_CODE (PATTERN (insn)) == CLOBBER)
34739 return false;
34741 switch (rs6000_cpu)
34743 case PROCESSOR_POWER5:
34744 if (is_cracked_insn (insn))
34745 return true;
34746 /* FALLTHRU */
34747 case PROCESSOR_POWER4:
34748 if (is_microcoded_insn (insn))
34749 return true;
34751 if (!rs6000_sched_groups)
34752 return false;
34754 type = get_attr_type (insn);
34756 switch (type)
34758 case TYPE_MFCR:
34759 case TYPE_MFCRF:
34760 case TYPE_MTCR:
34761 case TYPE_DELAYED_CR:
34762 case TYPE_CR_LOGICAL:
34763 case TYPE_MTJMPR:
34764 case TYPE_MFJMPR:
34765 case TYPE_DIV:
34766 case TYPE_LOAD_L:
34767 case TYPE_STORE_C:
34768 case TYPE_ISYNC:
34769 case TYPE_SYNC:
34770 return true;
34771 default:
34772 break;
34774 break;
34775 case PROCESSOR_POWER6:
34776 type = get_attr_type (insn);
34778 switch (type)
34780 case TYPE_EXTS:
34781 case TYPE_CNTLZ:
34782 case TYPE_TRAP:
34783 case TYPE_MUL:
34784 case TYPE_INSERT:
34785 case TYPE_FPCOMPARE:
34786 case TYPE_MFCR:
34787 case TYPE_MTCR:
34788 case TYPE_MFJMPR:
34789 case TYPE_MTJMPR:
34790 case TYPE_ISYNC:
34791 case TYPE_SYNC:
34792 case TYPE_LOAD_L:
34793 case TYPE_STORE_C:
34794 return true;
34795 case TYPE_SHIFT:
34796 if (get_attr_dot (insn) == DOT_NO
34797 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
34798 return true;
34799 else
34800 break;
34801 case TYPE_DIV:
34802 if (get_attr_size (insn) == SIZE_32)
34803 return true;
34804 else
34805 break;
34806 case TYPE_LOAD:
34807 case TYPE_STORE:
34808 case TYPE_FPLOAD:
34809 case TYPE_FPSTORE:
34810 if (get_attr_update (insn) == UPDATE_YES)
34811 return true;
34812 else
34813 break;
34814 default:
34815 break;
34817 break;
34818 case PROCESSOR_POWER7:
34819 type = get_attr_type (insn);
34821 switch (type)
34823 case TYPE_CR_LOGICAL:
34824 case TYPE_MFCR:
34825 case TYPE_MFCRF:
34826 case TYPE_MTCR:
34827 case TYPE_DIV:
34828 case TYPE_ISYNC:
34829 case TYPE_LOAD_L:
34830 case TYPE_STORE_C:
34831 case TYPE_MFJMPR:
34832 case TYPE_MTJMPR:
34833 return true;
34834 case TYPE_MUL:
34835 case TYPE_SHIFT:
34836 case TYPE_EXTS:
34837 if (get_attr_dot (insn) == DOT_YES)
34838 return true;
34839 else
34840 break;
34841 case TYPE_LOAD:
34842 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
34843 || get_attr_update (insn) == UPDATE_YES)
34844 return true;
34845 else
34846 break;
34847 case TYPE_STORE:
34848 case TYPE_FPLOAD:
34849 case TYPE_FPSTORE:
34850 if (get_attr_update (insn) == UPDATE_YES)
34851 return true;
34852 else
34853 break;
34854 default:
34855 break;
34857 break;
34858 case PROCESSOR_POWER8:
34859 type = get_attr_type (insn);
34861 switch (type)
34863 case TYPE_CR_LOGICAL:
34864 case TYPE_DELAYED_CR:
34865 case TYPE_MFCR:
34866 case TYPE_MFCRF:
34867 case TYPE_MTCR:
34868 case TYPE_SYNC:
34869 case TYPE_ISYNC:
34870 case TYPE_LOAD_L:
34871 case TYPE_STORE_C:
34872 case TYPE_VECSTORE:
34873 case TYPE_MFJMPR:
34874 case TYPE_MTJMPR:
34875 return true;
34876 case TYPE_SHIFT:
34877 case TYPE_EXTS:
34878 case TYPE_MUL:
34879 if (get_attr_dot (insn) == DOT_YES)
34880 return true;
34881 else
34882 break;
34883 case TYPE_LOAD:
34884 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
34885 || get_attr_update (insn) == UPDATE_YES)
34886 return true;
34887 else
34888 break;
34889 case TYPE_STORE:
34890 if (get_attr_update (insn) == UPDATE_YES
34891 && get_attr_indexed (insn) == INDEXED_YES)
34892 return true;
34893 else
34894 break;
34895 default:
34896 break;
34898 break;
34899 default:
34900 break;
34903 return false;
34906 static bool
34907 insn_must_be_last_in_group (rtx_insn *insn)
34909 enum attr_type type;
34911 if (!insn
34912 || NOTE_P (insn)
34913 || DEBUG_INSN_P (insn)
34914 || GET_CODE (PATTERN (insn)) == USE
34915 || GET_CODE (PATTERN (insn)) == CLOBBER)
34916 return false;
34918 switch (rs6000_cpu) {
34919 case PROCESSOR_POWER4:
34920 case PROCESSOR_POWER5:
34921 if (is_microcoded_insn (insn))
34922 return true;
34924 if (is_branch_slot_insn (insn))
34925 return true;
34927 break;
34928 case PROCESSOR_POWER6:
34929 type = get_attr_type (insn);
34931 switch (type)
34933 case TYPE_EXTS:
34934 case TYPE_CNTLZ:
34935 case TYPE_TRAP:
34936 case TYPE_MUL:
34937 case TYPE_FPCOMPARE:
34938 case TYPE_MFCR:
34939 case TYPE_MTCR:
34940 case TYPE_MFJMPR:
34941 case TYPE_MTJMPR:
34942 case TYPE_ISYNC:
34943 case TYPE_SYNC:
34944 case TYPE_LOAD_L:
34945 case TYPE_STORE_C:
34946 return true;
34947 case TYPE_SHIFT:
34948 if (get_attr_dot (insn) == DOT_NO
34949 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
34950 return true;
34951 else
34952 break;
34953 case TYPE_DIV:
34954 if (get_attr_size (insn) == SIZE_32)
34955 return true;
34956 else
34957 break;
34958 default:
34959 break;
34961 break;
34962 case PROCESSOR_POWER7:
34963 type = get_attr_type (insn);
34965 switch (type)
34967 case TYPE_ISYNC:
34968 case TYPE_SYNC:
34969 case TYPE_LOAD_L:
34970 case TYPE_STORE_C:
34971 return true;
34972 case TYPE_LOAD:
34973 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
34974 && get_attr_update (insn) == UPDATE_YES)
34975 return true;
34976 else
34977 break;
34978 case TYPE_STORE:
34979 if (get_attr_update (insn) == UPDATE_YES
34980 && get_attr_indexed (insn) == INDEXED_YES)
34981 return true;
34982 else
34983 break;
34984 default:
34985 break;
34987 break;
34988 case PROCESSOR_POWER8:
34989 type = get_attr_type (insn);
34991 switch (type)
34993 case TYPE_MFCR:
34994 case TYPE_MTCR:
34995 case TYPE_ISYNC:
34996 case TYPE_SYNC:
34997 case TYPE_LOAD_L:
34998 case TYPE_STORE_C:
34999 return true;
35000 case TYPE_LOAD:
35001 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
35002 && get_attr_update (insn) == UPDATE_YES)
35003 return true;
35004 else
35005 break;
35006 case TYPE_STORE:
35007 if (get_attr_update (insn) == UPDATE_YES
35008 && get_attr_indexed (insn) == INDEXED_YES)
35009 return true;
35010 else
35011 break;
35012 default:
35013 break;
35015 break;
35016 default:
35017 break;
35020 return false;
35023 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
35024 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
35026 static bool
35027 is_costly_group (rtx *group_insns, rtx next_insn)
35029 int i;
35030 int issue_rate = rs6000_issue_rate ();
35032 for (i = 0; i < issue_rate; i++)
35034 sd_iterator_def sd_it;
35035 dep_t dep;
35036 rtx insn = group_insns[i];
35038 if (!insn)
35039 continue;
35041 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
35043 rtx next = DEP_CON (dep);
35045 if (next == next_insn
35046 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
35047 return true;
35051 return false;
35054 /* Utility of the function redefine_groups.
35055 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
35056 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
35057 to keep it "far" (in a separate group) from GROUP_INSNS, following
35058 one of the following schemes, depending on the value of the flag
35059 -minsert_sched_nops = X:
35060 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
35061 in order to force NEXT_INSN into a separate group.
35062 (2) X < sched_finish_regroup_exact: insert exactly X nops.
35063 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
35064 insertion (has a group just ended, how many vacant issue slots remain in the
35065 last group, and how many dispatch groups were encountered so far). */
35067 static int
35068 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
35069 rtx_insn *next_insn, bool *group_end, int can_issue_more,
35070 int *group_count)
35072 rtx nop;
35073 bool force;
35074 int issue_rate = rs6000_issue_rate ();
35075 bool end = *group_end;
35076 int i;
35078 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
35079 return can_issue_more;
35081 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
35082 return can_issue_more;
35084 force = is_costly_group (group_insns, next_insn);
35085 if (!force)
35086 return can_issue_more;
35088 if (sched_verbose > 6)
35089 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
35090 *group_count ,can_issue_more);
35092 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
35094 if (*group_end)
35095 can_issue_more = 0;
35097 /* Since only a branch can be issued in the last issue_slot, it is
35098 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
35099 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
35100 in this case the last nop will start a new group and the branch
35101 will be forced to the new group. */
35102 if (can_issue_more && !is_branch_slot_insn (next_insn))
35103 can_issue_more--;
35105 /* Do we have a special group ending nop? */
35106 if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
35107 || rs6000_cpu_attr == CPU_POWER8)
35109 nop = gen_group_ending_nop ();
35110 emit_insn_before (nop, next_insn);
35111 can_issue_more = 0;
35113 else
35114 while (can_issue_more > 0)
35116 nop = gen_nop ();
35117 emit_insn_before (nop, next_insn);
35118 can_issue_more--;
35121 *group_end = true;
35122 return 0;
35125 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
35127 int n_nops = rs6000_sched_insert_nops;
35129 /* Nops can't be issued from the branch slot, so the effective
35130 issue_rate for nops is 'issue_rate - 1'. */
35131 if (can_issue_more == 0)
35132 can_issue_more = issue_rate;
35133 can_issue_more--;
35134 if (can_issue_more == 0)
35136 can_issue_more = issue_rate - 1;
35137 (*group_count)++;
35138 end = true;
35139 for (i = 0; i < issue_rate; i++)
35141 group_insns[i] = 0;
35145 while (n_nops > 0)
35147 nop = gen_nop ();
35148 emit_insn_before (nop, next_insn);
35149 if (can_issue_more == issue_rate - 1) /* new group begins */
35150 end = false;
35151 can_issue_more--;
35152 if (can_issue_more == 0)
35154 can_issue_more = issue_rate - 1;
35155 (*group_count)++;
35156 end = true;
35157 for (i = 0; i < issue_rate; i++)
35159 group_insns[i] = 0;
35162 n_nops--;
35165 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
35166 can_issue_more++;
35168 /* Is next_insn going to start a new group? */
35169 *group_end
35170 = (end
35171 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
35172 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
35173 || (can_issue_more < issue_rate &&
35174 insn_terminates_group_p (next_insn, previous_group)));
35175 if (*group_end && end)
35176 (*group_count)--;
35178 if (sched_verbose > 6)
35179 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
35180 *group_count, can_issue_more);
35181 return can_issue_more;
35184 return can_issue_more;
35187 /* This function tries to synch the dispatch groups that the compiler "sees"
35188 with the dispatch groups that the processor dispatcher is expected to
35189 form in practice. It tries to achieve this synchronization by forcing the
35190 estimated processor grouping on the compiler (as opposed to the function
35191 'pad_goups' which tries to force the scheduler's grouping on the processor).
35193 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
35194 examines the (estimated) dispatch groups that will be formed by the processor
35195 dispatcher. It marks these group boundaries to reflect the estimated
35196 processor grouping, overriding the grouping that the scheduler had marked.
35197 Depending on the value of the flag '-minsert-sched-nops' this function can
35198 force certain insns into separate groups or force a certain distance between
35199 them by inserting nops, for example, if there exists a "costly dependence"
35200 between the insns.
35202 The function estimates the group boundaries that the processor will form as
35203 follows: It keeps track of how many vacant issue slots are available after
35204 each insn. A subsequent insn will start a new group if one of the following
35205 4 cases applies:
35206 - no more vacant issue slots remain in the current dispatch group.
35207 - only the last issue slot, which is the branch slot, is vacant, but the next
35208 insn is not a branch.
35209 - only the last 2 or less issue slots, including the branch slot, are vacant,
35210 which means that a cracked insn (which occupies two issue slots) can't be
35211 issued in this group.
35212 - less than 'issue_rate' slots are vacant, and the next insn always needs to
35213 start a new group. */
35215 static int
35216 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
35217 rtx_insn *tail)
35219 rtx_insn *insn, *next_insn;
35220 int issue_rate;
35221 int can_issue_more;
35222 int slot, i;
35223 bool group_end;
35224 int group_count = 0;
35225 rtx *group_insns;
35227 /* Initialize. */
35228 issue_rate = rs6000_issue_rate ();
35229 group_insns = XALLOCAVEC (rtx, issue_rate);
35230 for (i = 0; i < issue_rate; i++)
35232 group_insns[i] = 0;
35234 can_issue_more = issue_rate;
35235 slot = 0;
35236 insn = get_next_active_insn (prev_head_insn, tail);
35237 group_end = false;
35239 while (insn != NULL_RTX)
35241 slot = (issue_rate - can_issue_more);
35242 group_insns[slot] = insn;
35243 can_issue_more =
35244 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
35245 if (insn_terminates_group_p (insn, current_group))
35246 can_issue_more = 0;
35248 next_insn = get_next_active_insn (insn, tail);
35249 if (next_insn == NULL_RTX)
35250 return group_count + 1;
35252 /* Is next_insn going to start a new group? */
35253 group_end
35254 = (can_issue_more == 0
35255 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
35256 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
35257 || (can_issue_more < issue_rate &&
35258 insn_terminates_group_p (next_insn, previous_group)));
35260 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
35261 next_insn, &group_end, can_issue_more,
35262 &group_count);
35264 if (group_end)
35266 group_count++;
35267 can_issue_more = 0;
35268 for (i = 0; i < issue_rate; i++)
35270 group_insns[i] = 0;
35274 if (GET_MODE (next_insn) == TImode && can_issue_more)
35275 PUT_MODE (next_insn, VOIDmode);
35276 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
35277 PUT_MODE (next_insn, TImode);
35279 insn = next_insn;
35280 if (can_issue_more == 0)
35281 can_issue_more = issue_rate;
35282 } /* while */
35284 return group_count;
35287 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
35288 dispatch group boundaries that the scheduler had marked. Pad with nops
35289 any dispatch groups which have vacant issue slots, in order to force the
35290 scheduler's grouping on the processor dispatcher. The function
35291 returns the number of dispatch groups found. */
35293 static int
35294 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
35295 rtx_insn *tail)
35297 rtx_insn *insn, *next_insn;
35298 rtx nop;
35299 int issue_rate;
35300 int can_issue_more;
35301 int group_end;
35302 int group_count = 0;
35304 /* Initialize issue_rate. */
35305 issue_rate = rs6000_issue_rate ();
35306 can_issue_more = issue_rate;
35308 insn = get_next_active_insn (prev_head_insn, tail);
35309 next_insn = get_next_active_insn (insn, tail);
35311 while (insn != NULL_RTX)
35313 can_issue_more =
35314 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
35316 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
35318 if (next_insn == NULL_RTX)
35319 break;
35321 if (group_end)
35323 /* If the scheduler had marked group termination at this location
35324 (between insn and next_insn), and neither insn nor next_insn will
35325 force group termination, pad the group with nops to force group
35326 termination. */
35327 if (can_issue_more
35328 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
35329 && !insn_terminates_group_p (insn, current_group)
35330 && !insn_terminates_group_p (next_insn, previous_group))
35332 if (!is_branch_slot_insn (next_insn))
35333 can_issue_more--;
35335 while (can_issue_more)
35337 nop = gen_nop ();
35338 emit_insn_before (nop, next_insn);
35339 can_issue_more--;
35343 can_issue_more = issue_rate;
35344 group_count++;
35347 insn = next_insn;
35348 next_insn = get_next_active_insn (insn, tail);
35351 return group_count;
35354 /* We're beginning a new block. Initialize data structures as necessary. */
35356 static void
35357 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
35358 int sched_verbose ATTRIBUTE_UNUSED,
35359 int max_ready ATTRIBUTE_UNUSED)
35361 last_scheduled_insn = NULL;
35362 load_store_pendulum = 0;
35363 divide_cnt = 0;
35364 vec_pairing = 0;
35367 /* The following function is called at the end of scheduling BB.
35368 After reload, it inserts nops at insn group bundling. */
35370 static void
35371 rs6000_sched_finish (FILE *dump, int sched_verbose)
35373 int n_groups;
35375 if (sched_verbose)
35376 fprintf (dump, "=== Finishing schedule.\n");
35378 if (reload_completed && rs6000_sched_groups)
35380 /* Do not run sched_finish hook when selective scheduling enabled. */
35381 if (sel_sched_p ())
35382 return;
35384 if (rs6000_sched_insert_nops == sched_finish_none)
35385 return;
35387 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
35388 n_groups = pad_groups (dump, sched_verbose,
35389 current_sched_info->prev_head,
35390 current_sched_info->next_tail);
35391 else
35392 n_groups = redefine_groups (dump, sched_verbose,
35393 current_sched_info->prev_head,
35394 current_sched_info->next_tail);
35396 if (sched_verbose >= 6)
35398 fprintf (dump, "ngroups = %d\n", n_groups);
35399 print_rtl (dump, current_sched_info->prev_head);
35400 fprintf (dump, "Done finish_sched\n");
35405 struct rs6000_sched_context
35407 short cached_can_issue_more;
35408 rtx_insn *last_scheduled_insn;
35409 int load_store_pendulum;
35410 int divide_cnt;
35411 int vec_pairing;
35414 typedef struct rs6000_sched_context rs6000_sched_context_def;
35415 typedef rs6000_sched_context_def *rs6000_sched_context_t;
35417 /* Allocate store for new scheduling context. */
35418 static void *
35419 rs6000_alloc_sched_context (void)
35421 return xmalloc (sizeof (rs6000_sched_context_def));
35424 /* If CLEAN_P is true then initializes _SC with clean data,
35425 and from the global context otherwise. */
35426 static void
35427 rs6000_init_sched_context (void *_sc, bool clean_p)
35429 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
35431 if (clean_p)
35433 sc->cached_can_issue_more = 0;
35434 sc->last_scheduled_insn = NULL;
35435 sc->load_store_pendulum = 0;
35436 sc->divide_cnt = 0;
35437 sc->vec_pairing = 0;
35439 else
35441 sc->cached_can_issue_more = cached_can_issue_more;
35442 sc->last_scheduled_insn = last_scheduled_insn;
35443 sc->load_store_pendulum = load_store_pendulum;
35444 sc->divide_cnt = divide_cnt;
35445 sc->vec_pairing = vec_pairing;
35449 /* Sets the global scheduling context to the one pointed to by _SC. */
35450 static void
35451 rs6000_set_sched_context (void *_sc)
35453 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
35455 gcc_assert (sc != NULL);
35457 cached_can_issue_more = sc->cached_can_issue_more;
35458 last_scheduled_insn = sc->last_scheduled_insn;
35459 load_store_pendulum = sc->load_store_pendulum;
35460 divide_cnt = sc->divide_cnt;
35461 vec_pairing = sc->vec_pairing;
35464 /* Free _SC. */
35465 static void
35466 rs6000_free_sched_context (void *_sc)
35468 gcc_assert (_sc != NULL);
35470 free (_sc);
35473 static bool
35474 rs6000_sched_can_speculate_insn (rtx_insn *insn)
35476 switch (get_attr_type (insn))
35478 case TYPE_DIV:
35479 case TYPE_SDIV:
35480 case TYPE_DDIV:
35481 case TYPE_VECDIV:
35482 case TYPE_SSQRT:
35483 case TYPE_DSQRT:
35484 return false;
35486 default:
35487 return true;
35491 /* Length in units of the trampoline for entering a nested function. */
35494 rs6000_trampoline_size (void)
35496 int ret = 0;
35498 switch (DEFAULT_ABI)
35500 default:
35501 gcc_unreachable ();
35503 case ABI_AIX:
35504 ret = (TARGET_32BIT) ? 12 : 24;
35505 break;
35507 case ABI_ELFv2:
35508 gcc_assert (!TARGET_32BIT);
35509 ret = 32;
35510 break;
35512 case ABI_DARWIN:
35513 case ABI_V4:
35514 ret = (TARGET_32BIT) ? 40 : 48;
35515 break;
35518 return ret;
35521 /* Emit RTL insns to initialize the variable parts of a trampoline.
35522 FNADDR is an RTX for the address of the function's pure code.
35523 CXT is an RTX for the static chain value for the function. */
35525 static void
35526 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
35528 int regsize = (TARGET_32BIT) ? 4 : 8;
35529 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
35530 rtx ctx_reg = force_reg (Pmode, cxt);
35531 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
35533 switch (DEFAULT_ABI)
35535 default:
35536 gcc_unreachable ();
35538 /* Under AIX, just build the 3 word function descriptor */
35539 case ABI_AIX:
35541 rtx fnmem, fn_reg, toc_reg;
35543 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
35544 error ("You cannot take the address of a nested function if you use "
35545 "the -mno-pointers-to-nested-functions option.");
35547 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
35548 fn_reg = gen_reg_rtx (Pmode);
35549 toc_reg = gen_reg_rtx (Pmode);
35551 /* Macro to shorten the code expansions below. */
35552 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
35554 m_tramp = replace_equiv_address (m_tramp, addr);
35556 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
35557 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
35558 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
35559 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
35560 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
35562 # undef MEM_PLUS
35564 break;
35566 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
35567 case ABI_ELFv2:
35568 case ABI_DARWIN:
35569 case ABI_V4:
35570 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
35571 LCT_NORMAL, VOIDmode,
35572 addr, Pmode,
35573 GEN_INT (rs6000_trampoline_size ()), SImode,
35574 fnaddr, Pmode,
35575 ctx_reg, Pmode);
35576 break;
35581 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
35582 identifier as an argument, so the front end shouldn't look it up. */
35584 static bool
35585 rs6000_attribute_takes_identifier_p (const_tree attr_id)
35587 return is_attribute_p ("altivec", attr_id);
35590 /* Handle the "altivec" attribute. The attribute may have
35591 arguments as follows:
35593 __attribute__((altivec(vector__)))
35594 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
35595 __attribute__((altivec(bool__))) (always followed by 'unsigned')
35597 and may appear more than once (e.g., 'vector bool char') in a
35598 given declaration. */
35600 static tree
35601 rs6000_handle_altivec_attribute (tree *node,
35602 tree name ATTRIBUTE_UNUSED,
35603 tree args,
35604 int flags ATTRIBUTE_UNUSED,
35605 bool *no_add_attrs)
35607 tree type = *node, result = NULL_TREE;
35608 machine_mode mode;
35609 int unsigned_p;
35610 char altivec_type
35611 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
35612 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
35613 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
35614 : '?');
35616 while (POINTER_TYPE_P (type)
35617 || TREE_CODE (type) == FUNCTION_TYPE
35618 || TREE_CODE (type) == METHOD_TYPE
35619 || TREE_CODE (type) == ARRAY_TYPE)
35620 type = TREE_TYPE (type);
35622 mode = TYPE_MODE (type);
35624 /* Check for invalid AltiVec type qualifiers. */
35625 if (type == long_double_type_node)
35626 error ("use of %<long double%> in AltiVec types is invalid");
35627 else if (type == boolean_type_node)
35628 error ("use of boolean types in AltiVec types is invalid");
35629 else if (TREE_CODE (type) == COMPLEX_TYPE)
35630 error ("use of %<complex%> in AltiVec types is invalid");
35631 else if (DECIMAL_FLOAT_MODE_P (mode))
35632 error ("use of decimal floating point types in AltiVec types is invalid");
35633 else if (!TARGET_VSX)
35635 if (type == long_unsigned_type_node || type == long_integer_type_node)
35637 if (TARGET_64BIT)
35638 error ("use of %<long%> in AltiVec types is invalid for "
35639 "64-bit code without -mvsx");
35640 else if (rs6000_warn_altivec_long)
35641 warning (0, "use of %<long%> in AltiVec types is deprecated; "
35642 "use %<int%>");
35644 else if (type == long_long_unsigned_type_node
35645 || type == long_long_integer_type_node)
35646 error ("use of %<long long%> in AltiVec types is invalid without "
35647 "-mvsx");
35648 else if (type == double_type_node)
35649 error ("use of %<double%> in AltiVec types is invalid without -mvsx");
35652 switch (altivec_type)
35654 case 'v':
35655 unsigned_p = TYPE_UNSIGNED (type);
35656 switch (mode)
35658 case E_TImode:
35659 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
35660 break;
35661 case E_DImode:
35662 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
35663 break;
35664 case E_SImode:
35665 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
35666 break;
35667 case E_HImode:
35668 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
35669 break;
35670 case E_QImode:
35671 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
35672 break;
35673 case E_SFmode: result = V4SF_type_node; break;
35674 case E_DFmode: result = V2DF_type_node; break;
35675 /* If the user says 'vector int bool', we may be handed the 'bool'
35676 attribute _before_ the 'vector' attribute, and so select the
35677 proper type in the 'b' case below. */
35678 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
35679 case E_V2DImode: case E_V2DFmode:
35680 result = type;
35681 default: break;
35683 break;
35684 case 'b':
35685 switch (mode)
35687 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
35688 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
35689 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
35690 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
35691 default: break;
35693 break;
35694 case 'p':
35695 switch (mode)
35697 case E_V8HImode: result = pixel_V8HI_type_node;
35698 default: break;
35700 default: break;
35703 /* Propagate qualifiers attached to the element type
35704 onto the vector type. */
35705 if (result && result != type && TYPE_QUALS (type))
35706 result = build_qualified_type (result, TYPE_QUALS (type));
35708 *no_add_attrs = true; /* No need to hang on to the attribute. */
35710 if (result)
35711 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
35713 return NULL_TREE;
35716 /* AltiVec defines four built-in scalar types that serve as vector
35717 elements; we must teach the compiler how to mangle them. */
35719 static const char *
35720 rs6000_mangle_type (const_tree type)
35722 type = TYPE_MAIN_VARIANT (type);
35724 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
35725 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
35726 return NULL;
35728 if (type == bool_char_type_node) return "U6__boolc";
35729 if (type == bool_short_type_node) return "U6__bools";
35730 if (type == pixel_type_node) return "u7__pixel";
35731 if (type == bool_int_type_node) return "U6__booli";
35732 if (type == bool_long_type_node) return "U6__booll";
35734 /* Use a unique name for __float128 rather than trying to use "e" or "g". Use
35735 "g" for IBM extended double, no matter whether it is long double (using
35736 -mabi=ibmlongdouble) or the distinct __ibm128 type. */
35737 if (TARGET_FLOAT128_TYPE)
35739 if (type == ieee128_float_type_node)
35740 return "U10__float128";
35742 if (type == ibm128_float_type_node)
35743 return "g";
35745 if (type == long_double_type_node && TARGET_LONG_DOUBLE_128)
35746 return (TARGET_IEEEQUAD) ? "U10__float128" : "g";
35749 /* Mangle IBM extended float long double as `g' (__float128) on
35750 powerpc*-linux where long-double-64 previously was the default. */
35751 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
35752 && TARGET_ELF
35753 && TARGET_LONG_DOUBLE_128
35754 && !TARGET_IEEEQUAD)
35755 return "g";
35757 /* For all other types, use normal C++ mangling. */
35758 return NULL;
35761 /* Handle a "longcall" or "shortcall" attribute; arguments as in
35762 struct attribute_spec.handler. */
35764 static tree
35765 rs6000_handle_longcall_attribute (tree *node, tree name,
35766 tree args ATTRIBUTE_UNUSED,
35767 int flags ATTRIBUTE_UNUSED,
35768 bool *no_add_attrs)
35770 if (TREE_CODE (*node) != FUNCTION_TYPE
35771 && TREE_CODE (*node) != FIELD_DECL
35772 && TREE_CODE (*node) != TYPE_DECL)
35774 warning (OPT_Wattributes, "%qE attribute only applies to functions",
35775 name);
35776 *no_add_attrs = true;
35779 return NULL_TREE;
35782 /* Set longcall attributes on all functions declared when
35783 rs6000_default_long_calls is true. */
35784 static void
35785 rs6000_set_default_type_attributes (tree type)
35787 if (rs6000_default_long_calls
35788 && (TREE_CODE (type) == FUNCTION_TYPE
35789 || TREE_CODE (type) == METHOD_TYPE))
35790 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
35791 NULL_TREE,
35792 TYPE_ATTRIBUTES (type));
35794 #if TARGET_MACHO
35795 darwin_set_default_type_attributes (type);
35796 #endif
35799 /* Return a reference suitable for calling a function with the
35800 longcall attribute. */
35803 rs6000_longcall_ref (rtx call_ref)
35805 const char *call_name;
35806 tree node;
35808 if (GET_CODE (call_ref) != SYMBOL_REF)
35809 return call_ref;
35811 /* System V adds '.' to the internal name, so skip them. */
35812 call_name = XSTR (call_ref, 0);
35813 if (*call_name == '.')
35815 while (*call_name == '.')
35816 call_name++;
35818 node = get_identifier (call_name);
35819 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
35822 return force_reg (Pmode, call_ref);
35825 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
35826 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
35827 #endif
35829 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
35830 struct attribute_spec.handler. */
35831 static tree
35832 rs6000_handle_struct_attribute (tree *node, tree name,
35833 tree args ATTRIBUTE_UNUSED,
35834 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
35836 tree *type = NULL;
35837 if (DECL_P (*node))
35839 if (TREE_CODE (*node) == TYPE_DECL)
35840 type = &TREE_TYPE (*node);
35842 else
35843 type = node;
35845 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
35846 || TREE_CODE (*type) == UNION_TYPE)))
35848 warning (OPT_Wattributes, "%qE attribute ignored", name);
35849 *no_add_attrs = true;
35852 else if ((is_attribute_p ("ms_struct", name)
35853 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
35854 || ((is_attribute_p ("gcc_struct", name)
35855 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
35857 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
35858 name);
35859 *no_add_attrs = true;
35862 return NULL_TREE;
35865 static bool
35866 rs6000_ms_bitfield_layout_p (const_tree record_type)
35868 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
35869 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
35870 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
35873 #ifdef USING_ELFOS_H
35875 /* A get_unnamed_section callback, used for switching to toc_section. */
35877 static void
35878 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
35880 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
35881 && TARGET_MINIMAL_TOC)
35883 if (!toc_initialized)
35885 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
35886 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
35887 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
35888 fprintf (asm_out_file, "\t.tc ");
35889 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
35890 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
35891 fprintf (asm_out_file, "\n");
35893 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
35894 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
35895 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
35896 fprintf (asm_out_file, " = .+32768\n");
35897 toc_initialized = 1;
35899 else
35900 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
35902 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
35904 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
35905 if (!toc_initialized)
35907 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
35908 toc_initialized = 1;
35911 else
35913 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
35914 if (!toc_initialized)
35916 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
35917 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
35918 fprintf (asm_out_file, " = .+32768\n");
35919 toc_initialized = 1;
35924 /* Implement TARGET_ASM_INIT_SECTIONS. */
35926 static void
35927 rs6000_elf_asm_init_sections (void)
35929 toc_section
35930 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
35932 sdata2_section
35933 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
35934 SDATA2_SECTION_ASM_OP);
35937 /* Implement TARGET_SELECT_RTX_SECTION. */
35939 static section *
35940 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
35941 unsigned HOST_WIDE_INT align)
35943 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
35944 return toc_section;
35945 else
35946 return default_elf_select_rtx_section (mode, x, align);
35949 /* For a SYMBOL_REF, set generic flags and then perform some
35950 target-specific processing.
35952 When the AIX ABI is requested on a non-AIX system, replace the
35953 function name with the real name (with a leading .) rather than the
35954 function descriptor name. This saves a lot of overriding code to
35955 read the prefixes. */
35957 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
35958 static void
35959 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
35961 default_encode_section_info (decl, rtl, first);
35963 if (first
35964 && TREE_CODE (decl) == FUNCTION_DECL
35965 && !TARGET_AIX
35966 && DEFAULT_ABI == ABI_AIX)
35968 rtx sym_ref = XEXP (rtl, 0);
35969 size_t len = strlen (XSTR (sym_ref, 0));
35970 char *str = XALLOCAVEC (char, len + 2);
35971 str[0] = '.';
35972 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
35973 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
35977 static inline bool
35978 compare_section_name (const char *section, const char *templ)
35980 int len;
35982 len = strlen (templ);
35983 return (strncmp (section, templ, len) == 0
35984 && (section[len] == 0 || section[len] == '.'));
35987 bool
35988 rs6000_elf_in_small_data_p (const_tree decl)
35990 if (rs6000_sdata == SDATA_NONE)
35991 return false;
35993 /* We want to merge strings, so we never consider them small data. */
35994 if (TREE_CODE (decl) == STRING_CST)
35995 return false;
35997 /* Functions are never in the small data area. */
35998 if (TREE_CODE (decl) == FUNCTION_DECL)
35999 return false;
36001 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
36003 const char *section = DECL_SECTION_NAME (decl);
36004 if (compare_section_name (section, ".sdata")
36005 || compare_section_name (section, ".sdata2")
36006 || compare_section_name (section, ".gnu.linkonce.s")
36007 || compare_section_name (section, ".sbss")
36008 || compare_section_name (section, ".sbss2")
36009 || compare_section_name (section, ".gnu.linkonce.sb")
36010 || strcmp (section, ".PPC.EMB.sdata0") == 0
36011 || strcmp (section, ".PPC.EMB.sbss0") == 0)
36012 return true;
36014 else
36016 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
36018 if (size > 0
36019 && size <= g_switch_value
36020 /* If it's not public, and we're not going to reference it there,
36021 there's no need to put it in the small data section. */
36022 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
36023 return true;
36026 return false;
36029 #endif /* USING_ELFOS_H */
36031 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
36033 static bool
36034 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
36036 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
36039 /* Do not place thread-local symbols refs in the object blocks. */
36041 static bool
36042 rs6000_use_blocks_for_decl_p (const_tree decl)
36044 return !DECL_THREAD_LOCAL_P (decl);
36047 /* Return a REG that occurs in ADDR with coefficient 1.
36048 ADDR can be effectively incremented by incrementing REG.
36050 r0 is special and we must not select it as an address
36051 register by this routine since our caller will try to
36052 increment the returned register via an "la" instruction. */
36055 find_addr_reg (rtx addr)
36057 while (GET_CODE (addr) == PLUS)
36059 if (GET_CODE (XEXP (addr, 0)) == REG
36060 && REGNO (XEXP (addr, 0)) != 0)
36061 addr = XEXP (addr, 0);
36062 else if (GET_CODE (XEXP (addr, 1)) == REG
36063 && REGNO (XEXP (addr, 1)) != 0)
36064 addr = XEXP (addr, 1);
36065 else if (CONSTANT_P (XEXP (addr, 0)))
36066 addr = XEXP (addr, 1);
36067 else if (CONSTANT_P (XEXP (addr, 1)))
36068 addr = XEXP (addr, 0);
36069 else
36070 gcc_unreachable ();
36072 gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0);
36073 return addr;
36076 void
36077 rs6000_fatal_bad_address (rtx op)
36079 fatal_insn ("bad address", op);
36082 #if TARGET_MACHO
36084 typedef struct branch_island_d {
36085 tree function_name;
36086 tree label_name;
36087 int line_number;
36088 } branch_island;
36091 static vec<branch_island, va_gc> *branch_islands;
36093 /* Remember to generate a branch island for far calls to the given
36094 function. */
36096 static void
36097 add_compiler_branch_island (tree label_name, tree function_name,
36098 int line_number)
36100 branch_island bi = {function_name, label_name, line_number};
36101 vec_safe_push (branch_islands, bi);
36104 /* Generate far-jump branch islands for everything recorded in
36105 branch_islands. Invoked immediately after the last instruction of
36106 the epilogue has been emitted; the branch islands must be appended
36107 to, and contiguous with, the function body. Mach-O stubs are
36108 generated in machopic_output_stub(). */
36110 static void
36111 macho_branch_islands (void)
36113 char tmp_buf[512];
36115 while (!vec_safe_is_empty (branch_islands))
36117 branch_island *bi = &branch_islands->last ();
36118 const char *label = IDENTIFIER_POINTER (bi->label_name);
36119 const char *name = IDENTIFIER_POINTER (bi->function_name);
36120 char name_buf[512];
36121 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
36122 if (name[0] == '*' || name[0] == '&')
36123 strcpy (name_buf, name+1);
36124 else
36126 name_buf[0] = '_';
36127 strcpy (name_buf+1, name);
36129 strcpy (tmp_buf, "\n");
36130 strcat (tmp_buf, label);
36131 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
36132 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
36133 dbxout_stabd (N_SLINE, bi->line_number);
36134 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
36135 if (flag_pic)
36137 if (TARGET_LINK_STACK)
36139 char name[32];
36140 get_ppc476_thunk_name (name);
36141 strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
36142 strcat (tmp_buf, name);
36143 strcat (tmp_buf, "\n");
36144 strcat (tmp_buf, label);
36145 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
36147 else
36149 strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
36150 strcat (tmp_buf, label);
36151 strcat (tmp_buf, "_pic\n");
36152 strcat (tmp_buf, label);
36153 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
36156 strcat (tmp_buf, "\taddis r11,r11,ha16(");
36157 strcat (tmp_buf, name_buf);
36158 strcat (tmp_buf, " - ");
36159 strcat (tmp_buf, label);
36160 strcat (tmp_buf, "_pic)\n");
36162 strcat (tmp_buf, "\tmtlr r0\n");
36164 strcat (tmp_buf, "\taddi r12,r11,lo16(");
36165 strcat (tmp_buf, name_buf);
36166 strcat (tmp_buf, " - ");
36167 strcat (tmp_buf, label);
36168 strcat (tmp_buf, "_pic)\n");
36170 strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
36172 else
36174 strcat (tmp_buf, ":\nlis r12,hi16(");
36175 strcat (tmp_buf, name_buf);
36176 strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
36177 strcat (tmp_buf, name_buf);
36178 strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
36180 output_asm_insn (tmp_buf, 0);
36181 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
36182 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
36183 dbxout_stabd (N_SLINE, bi->line_number);
36184 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
36185 branch_islands->pop ();
36189 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
36190 already there or not. */
36192 static int
36193 no_previous_def (tree function_name)
36195 branch_island *bi;
36196 unsigned ix;
36198 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
36199 if (function_name == bi->function_name)
36200 return 0;
36201 return 1;
36204 /* GET_PREV_LABEL gets the label name from the previous definition of
36205 the function. */
36207 static tree
36208 get_prev_label (tree function_name)
36210 branch_island *bi;
36211 unsigned ix;
36213 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
36214 if (function_name == bi->function_name)
36215 return bi->label_name;
36216 return NULL_TREE;
36219 /* INSN is either a function call or a millicode call. It may have an
36220 unconditional jump in its delay slot.
36222 CALL_DEST is the routine we are calling. */
36224 char *
36225 output_call (rtx_insn *insn, rtx *operands, int dest_operand_number,
36226 int cookie_operand_number)
36228 static char buf[256];
36229 if (darwin_emit_branch_islands
36230 && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
36231 && (INTVAL (operands[cookie_operand_number]) & CALL_LONG))
36233 tree labelname;
36234 tree funname = get_identifier (XSTR (operands[dest_operand_number], 0));
36236 if (no_previous_def (funname))
36238 rtx label_rtx = gen_label_rtx ();
36239 char *label_buf, temp_buf[256];
36240 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
36241 CODE_LABEL_NUMBER (label_rtx));
36242 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
36243 labelname = get_identifier (label_buf);
36244 add_compiler_branch_island (labelname, funname, insn_line (insn));
36246 else
36247 labelname = get_prev_label (funname);
36249 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
36250 instruction will reach 'foo', otherwise link as 'bl L42'".
36251 "L42" should be a 'branch island', that will do a far jump to
36252 'foo'. Branch islands are generated in
36253 macho_branch_islands(). */
36254 sprintf (buf, "jbsr %%z%d,%.246s",
36255 dest_operand_number, IDENTIFIER_POINTER (labelname));
36257 else
36258 sprintf (buf, "bl %%z%d", dest_operand_number);
36259 return buf;
36262 /* Generate PIC and indirect symbol stubs. */
36264 void
36265 machopic_output_stub (FILE *file, const char *symb, const char *stub)
36267 unsigned int length;
36268 char *symbol_name, *lazy_ptr_name;
36269 char *local_label_0;
36270 static int label = 0;
36272 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
36273 symb = (*targetm.strip_name_encoding) (symb);
36276 length = strlen (symb);
36277 symbol_name = XALLOCAVEC (char, length + 32);
36278 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
36280 lazy_ptr_name = XALLOCAVEC (char, length + 32);
36281 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
36283 if (flag_pic == 2)
36284 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
36285 else
36286 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
36288 if (flag_pic == 2)
36290 fprintf (file, "\t.align 5\n");
36292 fprintf (file, "%s:\n", stub);
36293 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
36295 label++;
36296 local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
36297 sprintf (local_label_0, "\"L%011d$spb\"", label);
36299 fprintf (file, "\tmflr r0\n");
36300 if (TARGET_LINK_STACK)
36302 char name[32];
36303 get_ppc476_thunk_name (name);
36304 fprintf (file, "\tbl %s\n", name);
36305 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
36307 else
36309 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
36310 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
36312 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
36313 lazy_ptr_name, local_label_0);
36314 fprintf (file, "\tmtlr r0\n");
36315 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
36316 (TARGET_64BIT ? "ldu" : "lwzu"),
36317 lazy_ptr_name, local_label_0);
36318 fprintf (file, "\tmtctr r12\n");
36319 fprintf (file, "\tbctr\n");
36321 else
36323 fprintf (file, "\t.align 4\n");
36325 fprintf (file, "%s:\n", stub);
36326 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
36328 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
36329 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
36330 (TARGET_64BIT ? "ldu" : "lwzu"),
36331 lazy_ptr_name);
36332 fprintf (file, "\tmtctr r12\n");
36333 fprintf (file, "\tbctr\n");
36336 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
36337 fprintf (file, "%s:\n", lazy_ptr_name);
36338 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
36339 fprintf (file, "%sdyld_stub_binding_helper\n",
36340 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
36343 /* Legitimize PIC addresses. If the address is already
36344 position-independent, we return ORIG. Newly generated
36345 position-independent addresses go into a reg. This is REG if non
36346 zero, otherwise we allocate register(s) as necessary. */
36348 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
36351 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
36352 rtx reg)
36354 rtx base, offset;
36356 if (reg == NULL && ! reload_in_progress && ! reload_completed)
36357 reg = gen_reg_rtx (Pmode);
36359 if (GET_CODE (orig) == CONST)
36361 rtx reg_temp;
36363 if (GET_CODE (XEXP (orig, 0)) == PLUS
36364 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
36365 return orig;
36367 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
36369 /* Use a different reg for the intermediate value, as
36370 it will be marked UNCHANGING. */
36371 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
36372 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
36373 Pmode, reg_temp);
36374 offset =
36375 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
36376 Pmode, reg);
36378 if (GET_CODE (offset) == CONST_INT)
36380 if (SMALL_INT (offset))
36381 return plus_constant (Pmode, base, INTVAL (offset));
36382 else if (! reload_in_progress && ! reload_completed)
36383 offset = force_reg (Pmode, offset);
36384 else
36386 rtx mem = force_const_mem (Pmode, orig);
36387 return machopic_legitimize_pic_address (mem, Pmode, reg);
36390 return gen_rtx_PLUS (Pmode, base, offset);
36393 /* Fall back on generic machopic code. */
36394 return machopic_legitimize_pic_address (orig, mode, reg);
36397 /* Output a .machine directive for the Darwin assembler, and call
36398 the generic start_file routine. */
36400 static void
36401 rs6000_darwin_file_start (void)
36403 static const struct
36405 const char *arg;
36406 const char *name;
36407 HOST_WIDE_INT if_set;
36408 } mapping[] = {
36409 { "ppc64", "ppc64", MASK_64BIT },
36410 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
36411 { "power4", "ppc970", 0 },
36412 { "G5", "ppc970", 0 },
36413 { "7450", "ppc7450", 0 },
36414 { "7400", "ppc7400", MASK_ALTIVEC },
36415 { "G4", "ppc7400", 0 },
36416 { "750", "ppc750", 0 },
36417 { "740", "ppc750", 0 },
36418 { "G3", "ppc750", 0 },
36419 { "604e", "ppc604e", 0 },
36420 { "604", "ppc604", 0 },
36421 { "603e", "ppc603", 0 },
36422 { "603", "ppc603", 0 },
36423 { "601", "ppc601", 0 },
36424 { NULL, "ppc", 0 } };
36425 const char *cpu_id = "";
36426 size_t i;
36428 rs6000_file_start ();
36429 darwin_file_start ();
36431 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
36433 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
36434 cpu_id = rs6000_default_cpu;
36436 if (global_options_set.x_rs6000_cpu_index)
36437 cpu_id = processor_target_table[rs6000_cpu_index].name;
36439 /* Look through the mapping array. Pick the first name that either
36440 matches the argument, has a bit set in IF_SET that is also set
36441 in the target flags, or has a NULL name. */
36443 i = 0;
36444 while (mapping[i].arg != NULL
36445 && strcmp (mapping[i].arg, cpu_id) != 0
36446 && (mapping[i].if_set & rs6000_isa_flags) == 0)
36447 i++;
36449 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
36452 #endif /* TARGET_MACHO */
36454 #if TARGET_ELF
36455 static int
36456 rs6000_elf_reloc_rw_mask (void)
36458 if (flag_pic)
36459 return 3;
36460 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
36461 return 2;
36462 else
36463 return 0;
36466 /* Record an element in the table of global constructors. SYMBOL is
36467 a SYMBOL_REF of the function to be called; PRIORITY is a number
36468 between 0 and MAX_INIT_PRIORITY.
36470 This differs from default_named_section_asm_out_constructor in
36471 that we have special handling for -mrelocatable. */
36473 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
36474 static void
36475 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
36477 const char *section = ".ctors";
36478 char buf[18];
36480 if (priority != DEFAULT_INIT_PRIORITY)
36482 sprintf (buf, ".ctors.%.5u",
36483 /* Invert the numbering so the linker puts us in the proper
36484 order; constructors are run from right to left, and the
36485 linker sorts in increasing order. */
36486 MAX_INIT_PRIORITY - priority);
36487 section = buf;
36490 switch_to_section (get_section (section, SECTION_WRITE, NULL));
36491 assemble_align (POINTER_SIZE);
36493 if (DEFAULT_ABI == ABI_V4
36494 && (TARGET_RELOCATABLE || flag_pic > 1))
36496 fputs ("\t.long (", asm_out_file);
36497 output_addr_const (asm_out_file, symbol);
36498 fputs (")@fixup\n", asm_out_file);
36500 else
36501 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
36504 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
36505 static void
36506 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
36508 const char *section = ".dtors";
36509 char buf[18];
36511 if (priority != DEFAULT_INIT_PRIORITY)
36513 sprintf (buf, ".dtors.%.5u",
36514 /* Invert the numbering so the linker puts us in the proper
36515 order; constructors are run from right to left, and the
36516 linker sorts in increasing order. */
36517 MAX_INIT_PRIORITY - priority);
36518 section = buf;
36521 switch_to_section (get_section (section, SECTION_WRITE, NULL));
36522 assemble_align (POINTER_SIZE);
36524 if (DEFAULT_ABI == ABI_V4
36525 && (TARGET_RELOCATABLE || flag_pic > 1))
36527 fputs ("\t.long (", asm_out_file);
36528 output_addr_const (asm_out_file, symbol);
36529 fputs (")@fixup\n", asm_out_file);
36531 else
36532 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
36535 void
36536 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
36538 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
36540 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
36541 ASM_OUTPUT_LABEL (file, name);
36542 fputs (DOUBLE_INT_ASM_OP, file);
36543 rs6000_output_function_entry (file, name);
36544 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
36545 if (DOT_SYMBOLS)
36547 fputs ("\t.size\t", file);
36548 assemble_name (file, name);
36549 fputs (",24\n\t.type\t.", file);
36550 assemble_name (file, name);
36551 fputs (",@function\n", file);
36552 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
36554 fputs ("\t.globl\t.", file);
36555 assemble_name (file, name);
36556 putc ('\n', file);
36559 else
36560 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
36561 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
36562 rs6000_output_function_entry (file, name);
36563 fputs (":\n", file);
36564 return;
36567 if (DEFAULT_ABI == ABI_V4
36568 && (TARGET_RELOCATABLE || flag_pic > 1)
36569 && !TARGET_SECURE_PLT
36570 && (!constant_pool_empty_p () || crtl->profile)
36571 && uses_TOC ())
36573 char buf[256];
36575 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
36577 fprintf (file, "\t.long ");
36578 assemble_name (file, toc_label_name);
36579 need_toc_init = 1;
36580 putc ('-', file);
36581 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
36582 assemble_name (file, buf);
36583 putc ('\n', file);
36586 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
36587 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
36589 if (TARGET_CMODEL == CMODEL_LARGE && rs6000_global_entry_point_needed_p ())
36591 char buf[256];
36593 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
36595 fprintf (file, "\t.quad .TOC.-");
36596 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
36597 assemble_name (file, buf);
36598 putc ('\n', file);
36601 if (DEFAULT_ABI == ABI_AIX)
36603 const char *desc_name, *orig_name;
36605 orig_name = (*targetm.strip_name_encoding) (name);
36606 desc_name = orig_name;
36607 while (*desc_name == '.')
36608 desc_name++;
36610 if (TREE_PUBLIC (decl))
36611 fprintf (file, "\t.globl %s\n", desc_name);
36613 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
36614 fprintf (file, "%s:\n", desc_name);
36615 fprintf (file, "\t.long %s\n", orig_name);
36616 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
36617 fputs ("\t.long 0\n", file);
36618 fprintf (file, "\t.previous\n");
36620 ASM_OUTPUT_LABEL (file, name);
36623 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
36624 static void
36625 rs6000_elf_file_end (void)
36627 #ifdef HAVE_AS_GNU_ATTRIBUTE
36628 /* ??? The value emitted depends on options active at file end.
36629 Assume anyone using #pragma or attributes that might change
36630 options knows what they are doing. */
36631 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
36632 && rs6000_passes_float)
36634 int fp;
36636 if (TARGET_DF_FPR | TARGET_DF_SPE)
36637 fp = 1;
36638 else if (TARGET_SF_FPR | TARGET_SF_SPE)
36639 fp = 3;
36640 else
36641 fp = 2;
36642 if (rs6000_passes_long_double)
36644 if (!TARGET_LONG_DOUBLE_128)
36645 fp |= 2 * 4;
36646 else if (TARGET_IEEEQUAD)
36647 fp |= 3 * 4;
36648 else
36649 fp |= 1 * 4;
36651 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
36653 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
36655 if (rs6000_passes_vector)
36656 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
36657 (TARGET_ALTIVEC_ABI ? 2
36658 : TARGET_SPE_ABI ? 3
36659 : 1));
36660 if (rs6000_returns_struct)
36661 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
36662 aix_struct_return ? 2 : 1);
36664 #endif
36665 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
36666 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
36667 file_end_indicate_exec_stack ();
36668 #endif
36670 if (flag_split_stack)
36671 file_end_indicate_split_stack ();
36673 if (cpu_builtin_p)
36675 /* We have expanded a CPU builtin, so we need to emit a reference to
36676 the special symbol that LIBC uses to declare it supports the
36677 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
36678 switch_to_section (data_section);
36679 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
36680 fprintf (asm_out_file, "\t%s %s\n",
36681 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
36684 #endif
36686 #if TARGET_XCOFF
36688 #ifndef HAVE_XCOFF_DWARF_EXTRAS
36689 #define HAVE_XCOFF_DWARF_EXTRAS 0
36690 #endif
36692 static enum unwind_info_type
36693 rs6000_xcoff_debug_unwind_info (void)
36695 return UI_NONE;
36698 static void
36699 rs6000_xcoff_asm_output_anchor (rtx symbol)
36701 char buffer[100];
36703 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
36704 SYMBOL_REF_BLOCK_OFFSET (symbol));
36705 fprintf (asm_out_file, "%s", SET_ASM_OP);
36706 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
36707 fprintf (asm_out_file, ",");
36708 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
36709 fprintf (asm_out_file, "\n");
36712 static void
36713 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
36715 fputs (GLOBAL_ASM_OP, stream);
36716 RS6000_OUTPUT_BASENAME (stream, name);
36717 putc ('\n', stream);
36720 /* A get_unnamed_decl callback, used for read-only sections. PTR
36721 points to the section string variable. */
36723 static void
36724 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
36726 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
36727 *(const char *const *) directive,
36728 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
36731 /* Likewise for read-write sections. */
36733 static void
36734 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
36736 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
36737 *(const char *const *) directive,
36738 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
36741 static void
36742 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
36744 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
36745 *(const char *const *) directive,
36746 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
36749 /* A get_unnamed_section callback, used for switching to toc_section. */
36751 static void
36752 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
36754 if (TARGET_MINIMAL_TOC)
36756 /* toc_section is always selected at least once from
36757 rs6000_xcoff_file_start, so this is guaranteed to
36758 always be defined once and only once in each file. */
36759 if (!toc_initialized)
36761 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
36762 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
36763 toc_initialized = 1;
36765 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
36766 (TARGET_32BIT ? "" : ",3"));
36768 else
36769 fputs ("\t.toc\n", asm_out_file);
36772 /* Implement TARGET_ASM_INIT_SECTIONS. */
36774 static void
36775 rs6000_xcoff_asm_init_sections (void)
36777 read_only_data_section
36778 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
36779 &xcoff_read_only_section_name);
36781 private_data_section
36782 = get_unnamed_section (SECTION_WRITE,
36783 rs6000_xcoff_output_readwrite_section_asm_op,
36784 &xcoff_private_data_section_name);
36786 tls_data_section
36787 = get_unnamed_section (SECTION_TLS,
36788 rs6000_xcoff_output_tls_section_asm_op,
36789 &xcoff_tls_data_section_name);
36791 tls_private_data_section
36792 = get_unnamed_section (SECTION_TLS,
36793 rs6000_xcoff_output_tls_section_asm_op,
36794 &xcoff_private_data_section_name);
36796 read_only_private_data_section
36797 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
36798 &xcoff_private_data_section_name);
36800 toc_section
36801 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
36803 readonly_data_section = read_only_data_section;
36806 static int
36807 rs6000_xcoff_reloc_rw_mask (void)
36809 return 3;
36812 static void
36813 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
36814 tree decl ATTRIBUTE_UNUSED)
36816 int smclass;
36817 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
36819 if (flags & SECTION_EXCLUDE)
36820 smclass = 4;
36821 else if (flags & SECTION_DEBUG)
36823 fprintf (asm_out_file, "\t.dwsect %s\n", name);
36824 return;
36826 else if (flags & SECTION_CODE)
36827 smclass = 0;
36828 else if (flags & SECTION_TLS)
36829 smclass = 3;
36830 else if (flags & SECTION_WRITE)
36831 smclass = 2;
36832 else
36833 smclass = 1;
36835 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
36836 (flags & SECTION_CODE) ? "." : "",
36837 name, suffix[smclass], flags & SECTION_ENTSIZE);
36840 #define IN_NAMED_SECTION(DECL) \
36841 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
36842 && DECL_SECTION_NAME (DECL) != NULL)
36844 static section *
36845 rs6000_xcoff_select_section (tree decl, int reloc,
36846 unsigned HOST_WIDE_INT align)
36848 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
36849 named section. */
36850 if (align > BIGGEST_ALIGNMENT)
36852 resolve_unique_section (decl, reloc, true);
36853 if (IN_NAMED_SECTION (decl))
36854 return get_named_section (decl, NULL, reloc);
36857 if (decl_readonly_section (decl, reloc))
36859 if (TREE_PUBLIC (decl))
36860 return read_only_data_section;
36861 else
36862 return read_only_private_data_section;
36864 else
36866 #if HAVE_AS_TLS
36867 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
36869 if (TREE_PUBLIC (decl))
36870 return tls_data_section;
36871 else if (bss_initializer_p (decl))
36873 /* Convert to COMMON to emit in BSS. */
36874 DECL_COMMON (decl) = 1;
36875 return tls_comm_section;
36877 else
36878 return tls_private_data_section;
36880 else
36881 #endif
36882 if (TREE_PUBLIC (decl))
36883 return data_section;
36884 else
36885 return private_data_section;
36889 static void
36890 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
36892 const char *name;
36894 /* Use select_section for private data and uninitialized data with
36895 alignment <= BIGGEST_ALIGNMENT. */
36896 if (!TREE_PUBLIC (decl)
36897 || DECL_COMMON (decl)
36898 || (DECL_INITIAL (decl) == NULL_TREE
36899 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
36900 || DECL_INITIAL (decl) == error_mark_node
36901 || (flag_zero_initialized_in_bss
36902 && initializer_zerop (DECL_INITIAL (decl))))
36903 return;
36905 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
36906 name = (*targetm.strip_name_encoding) (name);
36907 set_decl_section_name (decl, name);
36910 /* Select section for constant in constant pool.
36912 On RS/6000, all constants are in the private read-only data area.
36913 However, if this is being placed in the TOC it must be output as a
36914 toc entry. */
36916 static section *
36917 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
36918 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
36920 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
36921 return toc_section;
36922 else
36923 return read_only_private_data_section;
36926 /* Remove any trailing [DS] or the like from the symbol name. */
36928 static const char *
36929 rs6000_xcoff_strip_name_encoding (const char *name)
36931 size_t len;
36932 if (*name == '*')
36933 name++;
36934 len = strlen (name);
36935 if (name[len - 1] == ']')
36936 return ggc_alloc_string (name, len - 4);
36937 else
36938 return name;
36941 /* Section attributes. AIX is always PIC. */
36943 static unsigned int
36944 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
36946 unsigned int align;
36947 unsigned int flags = default_section_type_flags (decl, name, reloc);
36949 /* Align to at least UNIT size. */
36950 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
36951 align = MIN_UNITS_PER_WORD;
36952 else
36953 /* Increase alignment of large objects if not already stricter. */
36954 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
36955 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
36956 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
36958 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
36961 /* Output at beginning of assembler file.
36963 Initialize the section names for the RS/6000 at this point.
36965 Specify filename, including full path, to assembler.
36967 We want to go into the TOC section so at least one .toc will be emitted.
36968 Also, in order to output proper .bs/.es pairs, we need at least one static
36969 [RW] section emitted.
36971 Finally, declare mcount when profiling to make the assembler happy. */
36973 static void
36974 rs6000_xcoff_file_start (void)
36976 rs6000_gen_section_name (&xcoff_bss_section_name,
36977 main_input_filename, ".bss_");
36978 rs6000_gen_section_name (&xcoff_private_data_section_name,
36979 main_input_filename, ".rw_");
36980 rs6000_gen_section_name (&xcoff_read_only_section_name,
36981 main_input_filename, ".ro_");
36982 rs6000_gen_section_name (&xcoff_tls_data_section_name,
36983 main_input_filename, ".tls_");
36984 rs6000_gen_section_name (&xcoff_tbss_section_name,
36985 main_input_filename, ".tbss_[UL]");
36987 fputs ("\t.file\t", asm_out_file);
36988 output_quoted_string (asm_out_file, main_input_filename);
36989 fputc ('\n', asm_out_file);
36990 if (write_symbols != NO_DEBUG)
36991 switch_to_section (private_data_section);
36992 switch_to_section (toc_section);
36993 switch_to_section (text_section);
36994 if (profile_flag)
36995 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
36996 rs6000_file_start ();
36999 /* Output at end of assembler file.
37000 On the RS/6000, referencing data should automatically pull in text. */
37002 static void
37003 rs6000_xcoff_file_end (void)
37005 switch_to_section (text_section);
37006 fputs ("_section_.text:\n", asm_out_file);
37007 switch_to_section (data_section);
37008 fputs (TARGET_32BIT
37009 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
37010 asm_out_file);
37013 struct declare_alias_data
37015 FILE *file;
37016 bool function_descriptor;
37019 /* Declare alias N. A helper function for for_node_and_aliases. */
37021 static bool
37022 rs6000_declare_alias (struct symtab_node *n, void *d)
37024 struct declare_alias_data *data = (struct declare_alias_data *)d;
37025 /* Main symbol is output specially, because varasm machinery does part of
37026 the job for us - we do not need to declare .globl/lglobs and such. */
37027 if (!n->alias || n->weakref)
37028 return false;
37030 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
37031 return false;
37033 /* Prevent assemble_alias from trying to use .set pseudo operation
37034 that does not behave as expected by the middle-end. */
37035 TREE_ASM_WRITTEN (n->decl) = true;
37037 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
37038 char *buffer = (char *) alloca (strlen (name) + 2);
37039 char *p;
37040 int dollar_inside = 0;
37042 strcpy (buffer, name);
37043 p = strchr (buffer, '$');
37044 while (p) {
37045 *p = '_';
37046 dollar_inside++;
37047 p = strchr (p + 1, '$');
37049 if (TREE_PUBLIC (n->decl))
37051 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
37053 if (dollar_inside) {
37054 if (data->function_descriptor)
37055 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
37056 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
37058 if (data->function_descriptor)
37060 fputs ("\t.globl .", data->file);
37061 RS6000_OUTPUT_BASENAME (data->file, buffer);
37062 putc ('\n', data->file);
37064 fputs ("\t.globl ", data->file);
37065 RS6000_OUTPUT_BASENAME (data->file, buffer);
37066 putc ('\n', data->file);
37068 #ifdef ASM_WEAKEN_DECL
37069 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
37070 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
37071 #endif
37073 else
37075 if (dollar_inside)
37077 if (data->function_descriptor)
37078 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
37079 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
37081 if (data->function_descriptor)
37083 fputs ("\t.lglobl .", data->file);
37084 RS6000_OUTPUT_BASENAME (data->file, buffer);
37085 putc ('\n', data->file);
37087 fputs ("\t.lglobl ", data->file);
37088 RS6000_OUTPUT_BASENAME (data->file, buffer);
37089 putc ('\n', data->file);
37091 if (data->function_descriptor)
37092 fputs (".", data->file);
37093 RS6000_OUTPUT_BASENAME (data->file, buffer);
37094 fputs (":\n", data->file);
37095 return false;
37099 #ifdef HAVE_GAS_HIDDEN
37100 /* Helper function to calculate visibility of a DECL
37101 and return the value as a const string. */
37103 static const char *
37104 rs6000_xcoff_visibility (tree decl)
37106 static const char * const visibility_types[] = {
37107 "", ",protected", ",hidden", ",internal"
37110 enum symbol_visibility vis = DECL_VISIBILITY (decl);
37112 if (TREE_CODE (decl) == FUNCTION_DECL
37113 && cgraph_node::get (decl)
37114 && cgraph_node::get (decl)->instrumentation_clone
37115 && cgraph_node::get (decl)->instrumented_version)
37116 vis = DECL_VISIBILITY (cgraph_node::get (decl)->instrumented_version->decl);
37118 return visibility_types[vis];
37120 #endif
37123 /* This macro produces the initial definition of a function name.
37124 On the RS/6000, we need to place an extra '.' in the function name and
37125 output the function descriptor.
37126 Dollar signs are converted to underscores.
37128 The csect for the function will have already been created when
37129 text_section was selected. We do have to go back to that csect, however.
37131 The third and fourth parameters to the .function pseudo-op (16 and 044)
37132 are placeholders which no longer have any use.
37134 Because AIX assembler's .set command has unexpected semantics, we output
37135 all aliases as alternative labels in front of the definition. */
37137 void
37138 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
37140 char *buffer = (char *) alloca (strlen (name) + 1);
37141 char *p;
37142 int dollar_inside = 0;
37143 struct declare_alias_data data = {file, false};
37145 strcpy (buffer, name);
37146 p = strchr (buffer, '$');
37147 while (p) {
37148 *p = '_';
37149 dollar_inside++;
37150 p = strchr (p + 1, '$');
37152 if (TREE_PUBLIC (decl))
37154 if (!RS6000_WEAK || !DECL_WEAK (decl))
37156 if (dollar_inside) {
37157 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
37158 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
37160 fputs ("\t.globl .", file);
37161 RS6000_OUTPUT_BASENAME (file, buffer);
37162 #ifdef HAVE_GAS_HIDDEN
37163 fputs (rs6000_xcoff_visibility (decl), file);
37164 #endif
37165 putc ('\n', file);
37168 else
37170 if (dollar_inside) {
37171 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
37172 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
37174 fputs ("\t.lglobl .", file);
37175 RS6000_OUTPUT_BASENAME (file, buffer);
37176 putc ('\n', file);
37178 fputs ("\t.csect ", file);
37179 RS6000_OUTPUT_BASENAME (file, buffer);
37180 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
37181 RS6000_OUTPUT_BASENAME (file, buffer);
37182 fputs (":\n", file);
37183 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
37184 &data, true);
37185 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
37186 RS6000_OUTPUT_BASENAME (file, buffer);
37187 fputs (", TOC[tc0], 0\n", file);
37188 in_section = NULL;
37189 switch_to_section (function_section (decl));
37190 putc ('.', file);
37191 RS6000_OUTPUT_BASENAME (file, buffer);
37192 fputs (":\n", file);
37193 data.function_descriptor = true;
37194 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
37195 &data, true);
37196 if (!DECL_IGNORED_P (decl))
37198 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
37199 xcoffout_declare_function (file, decl, buffer);
37200 else if (write_symbols == DWARF2_DEBUG)
37202 name = (*targetm.strip_name_encoding) (name);
37203 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
37206 return;
37210 /* Output assembly language to globalize a symbol from a DECL,
37211 possibly with visibility. */
37213 void
37214 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
37216 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
37217 fputs (GLOBAL_ASM_OP, stream);
37218 RS6000_OUTPUT_BASENAME (stream, name);
37219 #ifdef HAVE_GAS_HIDDEN
37220 fputs (rs6000_xcoff_visibility (decl), stream);
37221 #endif
37222 putc ('\n', stream);
37225 /* Output assembly language to define a symbol as COMMON from a DECL,
37226 possibly with visibility. */
37228 void
37229 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
37230 tree decl ATTRIBUTE_UNUSED,
37231 const char *name,
37232 unsigned HOST_WIDE_INT size,
37233 unsigned HOST_WIDE_INT align)
37235 unsigned HOST_WIDE_INT align2 = 2;
37237 if (align > 32)
37238 align2 = floor_log2 (align / BITS_PER_UNIT);
37239 else if (size > 4)
37240 align2 = 3;
37242 fputs (COMMON_ASM_OP, stream);
37243 RS6000_OUTPUT_BASENAME (stream, name);
37245 fprintf (stream,
37246 "," HOST_WIDE_INT_PRINT_UNSIGNED "," HOST_WIDE_INT_PRINT_UNSIGNED,
37247 size, align2);
37249 #ifdef HAVE_GAS_HIDDEN
37250 fputs (rs6000_xcoff_visibility (decl), stream);
37251 #endif
37252 putc ('\n', stream);
37255 /* This macro produces the initial definition of a object (variable) name.
37256 Because AIX assembler's .set command has unexpected semantics, we output
37257 all aliases as alternative labels in front of the definition. */
37259 void
37260 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
37262 struct declare_alias_data data = {file, false};
37263 RS6000_OUTPUT_BASENAME (file, name);
37264 fputs (":\n", file);
37265 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
37266 &data, true);
37269 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
37271 void
37272 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
37274 fputs (integer_asm_op (size, FALSE), file);
37275 assemble_name (file, label);
37276 fputs ("-$", file);
37279 /* Output a symbol offset relative to the dbase for the current object.
37280 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
37281 signed offsets.
37283 __gcc_unwind_dbase is embedded in all executables/libraries through
37284 libgcc/config/rs6000/crtdbase.S. */
37286 void
37287 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
37289 fputs (integer_asm_op (size, FALSE), file);
37290 assemble_name (file, label);
37291 fputs("-__gcc_unwind_dbase", file);
37294 #ifdef HAVE_AS_TLS
37295 static void
37296 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
37298 rtx symbol;
37299 int flags;
37300 const char *symname;
37302 default_encode_section_info (decl, rtl, first);
37304 /* Careful not to prod global register variables. */
37305 if (!MEM_P (rtl))
37306 return;
37307 symbol = XEXP (rtl, 0);
37308 if (GET_CODE (symbol) != SYMBOL_REF)
37309 return;
37311 flags = SYMBOL_REF_FLAGS (symbol);
37313 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
37314 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
37316 SYMBOL_REF_FLAGS (symbol) = flags;
37318 /* Append mapping class to extern decls. */
37319 symname = XSTR (symbol, 0);
37320 if (decl /* sync condition with assemble_external () */
37321 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
37322 && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl))
37323 || TREE_CODE (decl) == FUNCTION_DECL)
37324 && symname[strlen (symname) - 1] != ']')
37326 char *newname = (char *) alloca (strlen (symname) + 5);
37327 strcpy (newname, symname);
37328 strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL
37329 ? "[DS]" : "[UA]"));
37330 XSTR (symbol, 0) = ggc_strdup (newname);
37333 #endif /* HAVE_AS_TLS */
37334 #endif /* TARGET_XCOFF */
37336 void
37337 rs6000_asm_weaken_decl (FILE *stream, tree decl,
37338 const char *name, const char *val)
37340 fputs ("\t.weak\t", stream);
37341 RS6000_OUTPUT_BASENAME (stream, name);
37342 if (decl && TREE_CODE (decl) == FUNCTION_DECL
37343 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
37345 if (TARGET_XCOFF)
37346 fputs ("[DS]", stream);
37347 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
37348 if (TARGET_XCOFF)
37349 fputs (rs6000_xcoff_visibility (decl), stream);
37350 #endif
37351 fputs ("\n\t.weak\t.", stream);
37352 RS6000_OUTPUT_BASENAME (stream, name);
37354 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
37355 if (TARGET_XCOFF)
37356 fputs (rs6000_xcoff_visibility (decl), stream);
37357 #endif
37358 fputc ('\n', stream);
37359 if (val)
37361 #ifdef ASM_OUTPUT_DEF
37362 ASM_OUTPUT_DEF (stream, name, val);
37363 #endif
37364 if (decl && TREE_CODE (decl) == FUNCTION_DECL
37365 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
37367 fputs ("\t.set\t.", stream);
37368 RS6000_OUTPUT_BASENAME (stream, name);
37369 fputs (",.", stream);
37370 RS6000_OUTPUT_BASENAME (stream, val);
37371 fputc ('\n', stream);
37377 /* Return true if INSN should not be copied. */
37379 static bool
37380 rs6000_cannot_copy_insn_p (rtx_insn *insn)
37382 return recog_memoized (insn) >= 0
37383 && get_attr_cannot_copy (insn);
37386 /* Compute a (partial) cost for rtx X. Return true if the complete
37387 cost has been computed, and false if subexpressions should be
37388 scanned. In either case, *TOTAL contains the cost result. */
37390 static bool
37391 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
37392 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
37394 int code = GET_CODE (x);
37396 switch (code)
37398 /* On the RS/6000, if it is valid in the insn, it is free. */
37399 case CONST_INT:
37400 if (((outer_code == SET
37401 || outer_code == PLUS
37402 || outer_code == MINUS)
37403 && (satisfies_constraint_I (x)
37404 || satisfies_constraint_L (x)))
37405 || (outer_code == AND
37406 && (satisfies_constraint_K (x)
37407 || (mode == SImode
37408 ? satisfies_constraint_L (x)
37409 : satisfies_constraint_J (x))))
37410 || ((outer_code == IOR || outer_code == XOR)
37411 && (satisfies_constraint_K (x)
37412 || (mode == SImode
37413 ? satisfies_constraint_L (x)
37414 : satisfies_constraint_J (x))))
37415 || outer_code == ASHIFT
37416 || outer_code == ASHIFTRT
37417 || outer_code == LSHIFTRT
37418 || outer_code == ROTATE
37419 || outer_code == ROTATERT
37420 || outer_code == ZERO_EXTRACT
37421 || (outer_code == MULT
37422 && satisfies_constraint_I (x))
37423 || ((outer_code == DIV || outer_code == UDIV
37424 || outer_code == MOD || outer_code == UMOD)
37425 && exact_log2 (INTVAL (x)) >= 0)
37426 || (outer_code == COMPARE
37427 && (satisfies_constraint_I (x)
37428 || satisfies_constraint_K (x)))
37429 || ((outer_code == EQ || outer_code == NE)
37430 && (satisfies_constraint_I (x)
37431 || satisfies_constraint_K (x)
37432 || (mode == SImode
37433 ? satisfies_constraint_L (x)
37434 : satisfies_constraint_J (x))))
37435 || (outer_code == GTU
37436 && satisfies_constraint_I (x))
37437 || (outer_code == LTU
37438 && satisfies_constraint_P (x)))
37440 *total = 0;
37441 return true;
37443 else if ((outer_code == PLUS
37444 && reg_or_add_cint_operand (x, VOIDmode))
37445 || (outer_code == MINUS
37446 && reg_or_sub_cint_operand (x, VOIDmode))
37447 || ((outer_code == SET
37448 || outer_code == IOR
37449 || outer_code == XOR)
37450 && (INTVAL (x)
37451 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
37453 *total = COSTS_N_INSNS (1);
37454 return true;
37456 /* FALLTHRU */
37458 case CONST_DOUBLE:
37459 case CONST_WIDE_INT:
37460 case CONST:
37461 case HIGH:
37462 case SYMBOL_REF:
37463 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
37464 return true;
37466 case MEM:
37467 /* When optimizing for size, MEM should be slightly more expensive
37468 than generating address, e.g., (plus (reg) (const)).
37469 L1 cache latency is about two instructions. */
37470 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
37471 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
37472 *total += COSTS_N_INSNS (100);
37473 return true;
37475 case LABEL_REF:
37476 *total = 0;
37477 return true;
37479 case PLUS:
37480 case MINUS:
37481 if (FLOAT_MODE_P (mode))
37482 *total = rs6000_cost->fp;
37483 else
37484 *total = COSTS_N_INSNS (1);
37485 return false;
37487 case MULT:
37488 if (GET_CODE (XEXP (x, 1)) == CONST_INT
37489 && satisfies_constraint_I (XEXP (x, 1)))
37491 if (INTVAL (XEXP (x, 1)) >= -256
37492 && INTVAL (XEXP (x, 1)) <= 255)
37493 *total = rs6000_cost->mulsi_const9;
37494 else
37495 *total = rs6000_cost->mulsi_const;
37497 else if (mode == SFmode)
37498 *total = rs6000_cost->fp;
37499 else if (FLOAT_MODE_P (mode))
37500 *total = rs6000_cost->dmul;
37501 else if (mode == DImode)
37502 *total = rs6000_cost->muldi;
37503 else
37504 *total = rs6000_cost->mulsi;
37505 return false;
37507 case FMA:
37508 if (mode == SFmode)
37509 *total = rs6000_cost->fp;
37510 else
37511 *total = rs6000_cost->dmul;
37512 break;
37514 case DIV:
37515 case MOD:
37516 if (FLOAT_MODE_P (mode))
37518 *total = mode == DFmode ? rs6000_cost->ddiv
37519 : rs6000_cost->sdiv;
37520 return false;
37522 /* FALLTHRU */
37524 case UDIV:
37525 case UMOD:
37526 if (GET_CODE (XEXP (x, 1)) == CONST_INT
37527 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
37529 if (code == DIV || code == MOD)
37530 /* Shift, addze */
37531 *total = COSTS_N_INSNS (2);
37532 else
37533 /* Shift */
37534 *total = COSTS_N_INSNS (1);
37536 else
37538 if (GET_MODE (XEXP (x, 1)) == DImode)
37539 *total = rs6000_cost->divdi;
37540 else
37541 *total = rs6000_cost->divsi;
37543 /* Add in shift and subtract for MOD unless we have a mod instruction. */
37544 if (!TARGET_MODULO && (code == MOD || code == UMOD))
37545 *total += COSTS_N_INSNS (2);
37546 return false;
37548 case CTZ:
37549 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
37550 return false;
37552 case FFS:
37553 *total = COSTS_N_INSNS (4);
37554 return false;
37556 case POPCOUNT:
37557 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
37558 return false;
37560 case PARITY:
37561 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
37562 return false;
37564 case NOT:
37565 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
37566 *total = 0;
37567 else
37568 *total = COSTS_N_INSNS (1);
37569 return false;
37571 case AND:
37572 if (CONST_INT_P (XEXP (x, 1)))
37574 rtx left = XEXP (x, 0);
37575 rtx_code left_code = GET_CODE (left);
37577 /* rotate-and-mask: 1 insn. */
37578 if ((left_code == ROTATE
37579 || left_code == ASHIFT
37580 || left_code == LSHIFTRT)
37581 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
37583 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
37584 if (!CONST_INT_P (XEXP (left, 1)))
37585 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
37586 *total += COSTS_N_INSNS (1);
37587 return true;
37590 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
37591 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
37592 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
37593 || (val & 0xffff) == val
37594 || (val & 0xffff0000) == val
37595 || ((val & 0xffff) == 0 && mode == SImode))
37597 *total = rtx_cost (left, mode, AND, 0, speed);
37598 *total += COSTS_N_INSNS (1);
37599 return true;
37602 /* 2 insns. */
37603 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
37605 *total = rtx_cost (left, mode, AND, 0, speed);
37606 *total += COSTS_N_INSNS (2);
37607 return true;
37611 *total = COSTS_N_INSNS (1);
37612 return false;
37614 case IOR:
37615 /* FIXME */
37616 *total = COSTS_N_INSNS (1);
37617 return true;
37619 case CLZ:
37620 case XOR:
37621 case ZERO_EXTRACT:
37622 *total = COSTS_N_INSNS (1);
37623 return false;
37625 case ASHIFT:
37626 /* The EXTSWSLI instruction is a combined instruction. Don't count both
37627 the sign extend and shift separately within the insn. */
37628 if (TARGET_EXTSWSLI && mode == DImode
37629 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
37630 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
37632 *total = 0;
37633 return false;
37635 /* fall through */
37637 case ASHIFTRT:
37638 case LSHIFTRT:
37639 case ROTATE:
37640 case ROTATERT:
37641 /* Handle mul_highpart. */
37642 if (outer_code == TRUNCATE
37643 && GET_CODE (XEXP (x, 0)) == MULT)
37645 if (mode == DImode)
37646 *total = rs6000_cost->muldi;
37647 else
37648 *total = rs6000_cost->mulsi;
37649 return true;
37651 else if (outer_code == AND)
37652 *total = 0;
37653 else
37654 *total = COSTS_N_INSNS (1);
37655 return false;
37657 case SIGN_EXTEND:
37658 case ZERO_EXTEND:
37659 if (GET_CODE (XEXP (x, 0)) == MEM)
37660 *total = 0;
37661 else
37662 *total = COSTS_N_INSNS (1);
37663 return false;
37665 case COMPARE:
37666 case NEG:
37667 case ABS:
37668 if (!FLOAT_MODE_P (mode))
37670 *total = COSTS_N_INSNS (1);
37671 return false;
37673 /* FALLTHRU */
37675 case FLOAT:
37676 case UNSIGNED_FLOAT:
37677 case FIX:
37678 case UNSIGNED_FIX:
37679 case FLOAT_TRUNCATE:
37680 *total = rs6000_cost->fp;
37681 return false;
37683 case FLOAT_EXTEND:
37684 if (mode == DFmode)
37685 *total = rs6000_cost->sfdf_convert;
37686 else
37687 *total = rs6000_cost->fp;
37688 return false;
37690 case UNSPEC:
37691 switch (XINT (x, 1))
37693 case UNSPEC_FRSP:
37694 *total = rs6000_cost->fp;
37695 return true;
37697 default:
37698 break;
37700 break;
37702 case CALL:
37703 case IF_THEN_ELSE:
37704 if (!speed)
37706 *total = COSTS_N_INSNS (1);
37707 return true;
37709 else if (FLOAT_MODE_P (mode)
37710 && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS)
37712 *total = rs6000_cost->fp;
37713 return false;
37715 break;
37717 case NE:
37718 case EQ:
37719 case GTU:
37720 case LTU:
37721 /* Carry bit requires mode == Pmode.
37722 NEG or PLUS already counted so only add one. */
37723 if (mode == Pmode
37724 && (outer_code == NEG || outer_code == PLUS))
37726 *total = COSTS_N_INSNS (1);
37727 return true;
37729 if (outer_code == SET)
37731 if (XEXP (x, 1) == const0_rtx)
37733 if (TARGET_ISEL && !TARGET_MFCRF)
37734 *total = COSTS_N_INSNS (8);
37735 else
37736 *total = COSTS_N_INSNS (2);
37737 return true;
37739 else
37741 *total = COSTS_N_INSNS (3);
37742 return false;
37745 /* FALLTHRU */
37747 case GT:
37748 case LT:
37749 case UNORDERED:
37750 if (outer_code == SET && (XEXP (x, 1) == const0_rtx))
37752 if (TARGET_ISEL && !TARGET_MFCRF)
37753 *total = COSTS_N_INSNS (8);
37754 else
37755 *total = COSTS_N_INSNS (2);
37756 return true;
37758 /* CC COMPARE. */
37759 if (outer_code == COMPARE)
37761 *total = 0;
37762 return true;
37764 break;
37766 default:
37767 break;
37770 return false;
37773 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
37775 static bool
37776 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
37777 int opno, int *total, bool speed)
37779 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
37781 fprintf (stderr,
37782 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
37783 "opno = %d, total = %d, speed = %s, x:\n",
37784 ret ? "complete" : "scan inner",
37785 GET_MODE_NAME (mode),
37786 GET_RTX_NAME (outer_code),
37787 opno,
37788 *total,
37789 speed ? "true" : "false");
37791 debug_rtx (x);
37793 return ret;
37796 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
37798 static int
37799 rs6000_debug_address_cost (rtx x, machine_mode mode,
37800 addr_space_t as, bool speed)
37802 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
37804 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
37805 ret, speed ? "true" : "false");
37806 debug_rtx (x);
37808 return ret;
37812 /* A C expression returning the cost of moving data from a register of class
37813 CLASS1 to one of CLASS2. */
37815 static int
37816 rs6000_register_move_cost (machine_mode mode,
37817 reg_class_t from, reg_class_t to)
37819 int ret;
37821 if (TARGET_DEBUG_COST)
37822 dbg_cost_ctrl++;
37824 /* Moves from/to GENERAL_REGS. */
37825 if (reg_classes_intersect_p (to, GENERAL_REGS)
37826 || reg_classes_intersect_p (from, GENERAL_REGS))
37828 reg_class_t rclass = from;
37830 if (! reg_classes_intersect_p (to, GENERAL_REGS))
37831 rclass = to;
37833 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
37834 ret = (rs6000_memory_move_cost (mode, rclass, false)
37835 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
37837 /* It's more expensive to move CR_REGS than CR0_REGS because of the
37838 shift. */
37839 else if (rclass == CR_REGS)
37840 ret = 4;
37842 /* For those processors that have slow LR/CTR moves, make them more
37843 expensive than memory in order to bias spills to memory .*/
37844 else if ((rs6000_cpu == PROCESSOR_POWER6
37845 || rs6000_cpu == PROCESSOR_POWER7
37846 || rs6000_cpu == PROCESSOR_POWER8
37847 || rs6000_cpu == PROCESSOR_POWER9)
37848 && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
37849 ret = 6 * hard_regno_nregs (0, mode);
37851 else
37852 /* A move will cost one instruction per GPR moved. */
37853 ret = 2 * hard_regno_nregs (0, mode);
37856 /* If we have VSX, we can easily move between FPR or Altivec registers. */
37857 else if (VECTOR_MEM_VSX_P (mode)
37858 && reg_classes_intersect_p (to, VSX_REGS)
37859 && reg_classes_intersect_p (from, VSX_REGS))
37860 ret = 2 * hard_regno_nregs (FIRST_FPR_REGNO, mode);
37862 /* Moving between two similar registers is just one instruction. */
37863 else if (reg_classes_intersect_p (to, from))
37864 ret = (FLOAT128_2REG_P (mode)) ? 4 : 2;
37866 /* Everything else has to go through GENERAL_REGS. */
37867 else
37868 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
37869 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
37871 if (TARGET_DEBUG_COST)
37873 if (dbg_cost_ctrl == 1)
37874 fprintf (stderr,
37875 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
37876 ret, GET_MODE_NAME (mode), reg_class_names[from],
37877 reg_class_names[to]);
37878 dbg_cost_ctrl--;
37881 return ret;
37884 /* A C expressions returning the cost of moving data of MODE from a register to
37885 or from memory. */
37887 static int
37888 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
37889 bool in ATTRIBUTE_UNUSED)
37891 int ret;
37893 if (TARGET_DEBUG_COST)
37894 dbg_cost_ctrl++;
37896 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
37897 ret = 4 * hard_regno_nregs (0, mode);
37898 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
37899 || reg_classes_intersect_p (rclass, VSX_REGS)))
37900 ret = 4 * hard_regno_nregs (32, mode);
37901 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
37902 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
37903 else
37904 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
37906 if (TARGET_DEBUG_COST)
37908 if (dbg_cost_ctrl == 1)
37909 fprintf (stderr,
37910 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
37911 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
37912 dbg_cost_ctrl--;
37915 return ret;
37918 /* Returns a code for a target-specific builtin that implements
37919 reciprocal of the function, or NULL_TREE if not available. */
37921 static tree
37922 rs6000_builtin_reciprocal (tree fndecl)
37924 switch (DECL_FUNCTION_CODE (fndecl))
37926 case VSX_BUILTIN_XVSQRTDP:
37927 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
37928 return NULL_TREE;
37930 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
37932 case VSX_BUILTIN_XVSQRTSP:
37933 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
37934 return NULL_TREE;
37936 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
37938 default:
37939 return NULL_TREE;
37943 /* Load up a constant. If the mode is a vector mode, splat the value across
37944 all of the vector elements. */
37946 static rtx
37947 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
37949 rtx reg;
37951 if (mode == SFmode || mode == DFmode)
37953 rtx d = const_double_from_real_value (dconst, mode);
37954 reg = force_reg (mode, d);
37956 else if (mode == V4SFmode)
37958 rtx d = const_double_from_real_value (dconst, SFmode);
37959 rtvec v = gen_rtvec (4, d, d, d, d);
37960 reg = gen_reg_rtx (mode);
37961 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
37963 else if (mode == V2DFmode)
37965 rtx d = const_double_from_real_value (dconst, DFmode);
37966 rtvec v = gen_rtvec (2, d, d);
37967 reg = gen_reg_rtx (mode);
37968 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
37970 else
37971 gcc_unreachable ();
37973 return reg;
37976 /* Generate an FMA instruction. */
37978 static void
37979 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
37981 machine_mode mode = GET_MODE (target);
37982 rtx dst;
37984 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
37985 gcc_assert (dst != NULL);
37987 if (dst != target)
37988 emit_move_insn (target, dst);
37991 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
37993 static void
37994 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
37996 machine_mode mode = GET_MODE (dst);
37997 rtx r;
37999 /* This is a tad more complicated, since the fnma_optab is for
38000 a different expression: fma(-m1, m2, a), which is the same
38001 thing except in the case of signed zeros.
38003 Fortunately we know that if FMA is supported that FNMSUB is
38004 also supported in the ISA. Just expand it directly. */
38006 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
38008 r = gen_rtx_NEG (mode, a);
38009 r = gen_rtx_FMA (mode, m1, m2, r);
38010 r = gen_rtx_NEG (mode, r);
38011 emit_insn (gen_rtx_SET (dst, r));
38014 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
38015 add a reg_note saying that this was a division. Support both scalar and
38016 vector divide. Assumes no trapping math and finite arguments. */
38018 void
38019 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
38021 machine_mode mode = GET_MODE (dst);
38022 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
38023 int i;
38025 /* Low precision estimates guarantee 5 bits of accuracy. High
38026 precision estimates guarantee 14 bits of accuracy. SFmode
38027 requires 23 bits of accuracy. DFmode requires 52 bits of
38028 accuracy. Each pass at least doubles the accuracy, leading
38029 to the following. */
38030 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
38031 if (mode == DFmode || mode == V2DFmode)
38032 passes++;
38034 enum insn_code code = optab_handler (smul_optab, mode);
38035 insn_gen_fn gen_mul = GEN_FCN (code);
38037 gcc_assert (code != CODE_FOR_nothing);
38039 one = rs6000_load_constant_and_splat (mode, dconst1);
38041 /* x0 = 1./d estimate */
38042 x0 = gen_reg_rtx (mode);
38043 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
38044 UNSPEC_FRES)));
38046 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
38047 if (passes > 1) {
38049 /* e0 = 1. - d * x0 */
38050 e0 = gen_reg_rtx (mode);
38051 rs6000_emit_nmsub (e0, d, x0, one);
38053 /* x1 = x0 + e0 * x0 */
38054 x1 = gen_reg_rtx (mode);
38055 rs6000_emit_madd (x1, e0, x0, x0);
38057 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
38058 ++i, xprev = xnext, eprev = enext) {
38060 /* enext = eprev * eprev */
38061 enext = gen_reg_rtx (mode);
38062 emit_insn (gen_mul (enext, eprev, eprev));
38064 /* xnext = xprev + enext * xprev */
38065 xnext = gen_reg_rtx (mode);
38066 rs6000_emit_madd (xnext, enext, xprev, xprev);
38069 } else
38070 xprev = x0;
38072 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
38074 /* u = n * xprev */
38075 u = gen_reg_rtx (mode);
38076 emit_insn (gen_mul (u, n, xprev));
38078 /* v = n - (d * u) */
38079 v = gen_reg_rtx (mode);
38080 rs6000_emit_nmsub (v, d, u, n);
38082 /* dst = (v * xprev) + u */
38083 rs6000_emit_madd (dst, v, xprev, u);
38085 if (note_p)
38086 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
38089 /* Goldschmidt's Algorithm for single/double-precision floating point
38090 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
38092 void
38093 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
38095 machine_mode mode = GET_MODE (src);
38096 rtx e = gen_reg_rtx (mode);
38097 rtx g = gen_reg_rtx (mode);
38098 rtx h = gen_reg_rtx (mode);
38100 /* Low precision estimates guarantee 5 bits of accuracy. High
38101 precision estimates guarantee 14 bits of accuracy. SFmode
38102 requires 23 bits of accuracy. DFmode requires 52 bits of
38103 accuracy. Each pass at least doubles the accuracy, leading
38104 to the following. */
38105 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
38106 if (mode == DFmode || mode == V2DFmode)
38107 passes++;
38109 int i;
38110 rtx mhalf;
38111 enum insn_code code = optab_handler (smul_optab, mode);
38112 insn_gen_fn gen_mul = GEN_FCN (code);
38114 gcc_assert (code != CODE_FOR_nothing);
38116 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
38118 /* e = rsqrt estimate */
38119 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
38120 UNSPEC_RSQRT)));
38122 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
38123 if (!recip)
38125 rtx zero = force_reg (mode, CONST0_RTX (mode));
38127 if (mode == SFmode)
38129 rtx target = emit_conditional_move (e, GT, src, zero, mode,
38130 e, zero, mode, 0);
38131 if (target != e)
38132 emit_move_insn (e, target);
38134 else
38136 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
38137 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
38141 /* g = sqrt estimate. */
38142 emit_insn (gen_mul (g, e, src));
38143 /* h = 1/(2*sqrt) estimate. */
38144 emit_insn (gen_mul (h, e, mhalf));
38146 if (recip)
38148 if (passes == 1)
38150 rtx t = gen_reg_rtx (mode);
38151 rs6000_emit_nmsub (t, g, h, mhalf);
38152 /* Apply correction directly to 1/rsqrt estimate. */
38153 rs6000_emit_madd (dst, e, t, e);
38155 else
38157 for (i = 0; i < passes; i++)
38159 rtx t1 = gen_reg_rtx (mode);
38160 rtx g1 = gen_reg_rtx (mode);
38161 rtx h1 = gen_reg_rtx (mode);
38163 rs6000_emit_nmsub (t1, g, h, mhalf);
38164 rs6000_emit_madd (g1, g, t1, g);
38165 rs6000_emit_madd (h1, h, t1, h);
38167 g = g1;
38168 h = h1;
38170 /* Multiply by 2 for 1/rsqrt. */
38171 emit_insn (gen_add3_insn (dst, h, h));
38174 else
38176 rtx t = gen_reg_rtx (mode);
38177 rs6000_emit_nmsub (t, g, h, mhalf);
38178 rs6000_emit_madd (dst, g, t, g);
38181 return;
38184 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
38185 (Power7) targets. DST is the target, and SRC is the argument operand. */
38187 void
38188 rs6000_emit_popcount (rtx dst, rtx src)
38190 machine_mode mode = GET_MODE (dst);
38191 rtx tmp1, tmp2;
38193 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
38194 if (TARGET_POPCNTD)
38196 if (mode == SImode)
38197 emit_insn (gen_popcntdsi2 (dst, src));
38198 else
38199 emit_insn (gen_popcntddi2 (dst, src));
38200 return;
38203 tmp1 = gen_reg_rtx (mode);
38205 if (mode == SImode)
38207 emit_insn (gen_popcntbsi2 (tmp1, src));
38208 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
38209 NULL_RTX, 0);
38210 tmp2 = force_reg (SImode, tmp2);
38211 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
38213 else
38215 emit_insn (gen_popcntbdi2 (tmp1, src));
38216 tmp2 = expand_mult (DImode, tmp1,
38217 GEN_INT ((HOST_WIDE_INT)
38218 0x01010101 << 32 | 0x01010101),
38219 NULL_RTX, 0);
38220 tmp2 = force_reg (DImode, tmp2);
38221 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
38226 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
38227 target, and SRC is the argument operand. */
38229 void
38230 rs6000_emit_parity (rtx dst, rtx src)
38232 machine_mode mode = GET_MODE (dst);
38233 rtx tmp;
38235 tmp = gen_reg_rtx (mode);
38237 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
38238 if (TARGET_CMPB)
38240 if (mode == SImode)
38242 emit_insn (gen_popcntbsi2 (tmp, src));
38243 emit_insn (gen_paritysi2_cmpb (dst, tmp));
38245 else
38247 emit_insn (gen_popcntbdi2 (tmp, src));
38248 emit_insn (gen_paritydi2_cmpb (dst, tmp));
38250 return;
38253 if (mode == SImode)
38255 /* Is mult+shift >= shift+xor+shift+xor? */
38256 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
38258 rtx tmp1, tmp2, tmp3, tmp4;
38260 tmp1 = gen_reg_rtx (SImode);
38261 emit_insn (gen_popcntbsi2 (tmp1, src));
38263 tmp2 = gen_reg_rtx (SImode);
38264 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
38265 tmp3 = gen_reg_rtx (SImode);
38266 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
38268 tmp4 = gen_reg_rtx (SImode);
38269 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
38270 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
38272 else
38273 rs6000_emit_popcount (tmp, src);
38274 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
38276 else
38278 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
38279 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
38281 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
38283 tmp1 = gen_reg_rtx (DImode);
38284 emit_insn (gen_popcntbdi2 (tmp1, src));
38286 tmp2 = gen_reg_rtx (DImode);
38287 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
38288 tmp3 = gen_reg_rtx (DImode);
38289 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
38291 tmp4 = gen_reg_rtx (DImode);
38292 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
38293 tmp5 = gen_reg_rtx (DImode);
38294 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
38296 tmp6 = gen_reg_rtx (DImode);
38297 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
38298 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
38300 else
38301 rs6000_emit_popcount (tmp, src);
38302 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
38306 /* Expand an Altivec constant permutation for little endian mode.
38307 There are two issues: First, the two input operands must be
38308 swapped so that together they form a double-wide array in LE
38309 order. Second, the vperm instruction has surprising behavior
38310 in LE mode: it interprets the elements of the source vectors
38311 in BE mode ("left to right") and interprets the elements of
38312 the destination vector in LE mode ("right to left"). To
38313 correct for this, we must subtract each element of the permute
38314 control vector from 31.
38316 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
38317 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
38318 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
38319 serve as the permute control vector. Then, in BE mode,
38321 vperm 9,10,11,12
38323 places the desired result in vr9. However, in LE mode the
38324 vector contents will be
38326 vr10 = 00000003 00000002 00000001 00000000
38327 vr11 = 00000007 00000006 00000005 00000004
38329 The result of the vperm using the same permute control vector is
38331 vr9 = 05000000 07000000 01000000 03000000
38333 That is, the leftmost 4 bytes of vr10 are interpreted as the
38334 source for the rightmost 4 bytes of vr9, and so on.
38336 If we change the permute control vector to
38338 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
38340 and issue
38342 vperm 9,11,10,12
38344 we get the desired
38346 vr9 = 00000006 00000004 00000002 00000000. */
38348 void
38349 altivec_expand_vec_perm_const_le (rtx operands[4])
38351 unsigned int i;
38352 rtx perm[16];
38353 rtx constv, unspec;
38354 rtx target = operands[0];
38355 rtx op0 = operands[1];
38356 rtx op1 = operands[2];
38357 rtx sel = operands[3];
38359 /* Unpack and adjust the constant selector. */
38360 for (i = 0; i < 16; ++i)
38362 rtx e = XVECEXP (sel, 0, i);
38363 unsigned int elt = 31 - (INTVAL (e) & 31);
38364 perm[i] = GEN_INT (elt);
38367 /* Expand to a permute, swapping the inputs and using the
38368 adjusted selector. */
38369 if (!REG_P (op0))
38370 op0 = force_reg (V16QImode, op0);
38371 if (!REG_P (op1))
38372 op1 = force_reg (V16QImode, op1);
38374 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
38375 constv = force_reg (V16QImode, constv);
38376 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
38377 UNSPEC_VPERM);
38378 if (!REG_P (target))
38380 rtx tmp = gen_reg_rtx (V16QImode);
38381 emit_move_insn (tmp, unspec);
38382 unspec = tmp;
38385 emit_move_insn (target, unspec);
38388 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
38389 permute control vector. But here it's not a constant, so we must
38390 generate a vector NAND or NOR to do the adjustment. */
38392 void
38393 altivec_expand_vec_perm_le (rtx operands[4])
38395 rtx notx, iorx, unspec;
38396 rtx target = operands[0];
38397 rtx op0 = operands[1];
38398 rtx op1 = operands[2];
38399 rtx sel = operands[3];
38400 rtx tmp = target;
38401 rtx norreg = gen_reg_rtx (V16QImode);
38402 machine_mode mode = GET_MODE (target);
38404 /* Get everything in regs so the pattern matches. */
38405 if (!REG_P (op0))
38406 op0 = force_reg (mode, op0);
38407 if (!REG_P (op1))
38408 op1 = force_reg (mode, op1);
38409 if (!REG_P (sel))
38410 sel = force_reg (V16QImode, sel);
38411 if (!REG_P (target))
38412 tmp = gen_reg_rtx (mode);
38414 if (TARGET_P9_VECTOR)
38416 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op0, op1, sel),
38417 UNSPEC_VPERMR);
38419 else
38421 /* Invert the selector with a VNAND if available, else a VNOR.
38422 The VNAND is preferred for future fusion opportunities. */
38423 notx = gen_rtx_NOT (V16QImode, sel);
38424 iorx = (TARGET_P8_VECTOR
38425 ? gen_rtx_IOR (V16QImode, notx, notx)
38426 : gen_rtx_AND (V16QImode, notx, notx));
38427 emit_insn (gen_rtx_SET (norreg, iorx));
38429 /* Permute with operands reversed and adjusted selector. */
38430 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
38431 UNSPEC_VPERM);
38434 /* Copy into target, possibly by way of a register. */
38435 if (!REG_P (target))
38437 emit_move_insn (tmp, unspec);
38438 unspec = tmp;
38441 emit_move_insn (target, unspec);
38444 /* Expand an Altivec constant permutation. Return true if we match
38445 an efficient implementation; false to fall back to VPERM. */
38447 bool
38448 altivec_expand_vec_perm_const (rtx operands[4])
38450 struct altivec_perm_insn {
38451 HOST_WIDE_INT mask;
38452 enum insn_code impl;
38453 unsigned char perm[16];
38455 static const struct altivec_perm_insn patterns[] = {
38456 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
38457 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
38458 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
38459 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
38460 { OPTION_MASK_ALTIVEC,
38461 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
38462 : CODE_FOR_altivec_vmrglb_direct),
38463 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
38464 { OPTION_MASK_ALTIVEC,
38465 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
38466 : CODE_FOR_altivec_vmrglh_direct),
38467 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
38468 { OPTION_MASK_ALTIVEC,
38469 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
38470 : CODE_FOR_altivec_vmrglw_direct),
38471 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
38472 { OPTION_MASK_ALTIVEC,
38473 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
38474 : CODE_FOR_altivec_vmrghb_direct),
38475 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
38476 { OPTION_MASK_ALTIVEC,
38477 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
38478 : CODE_FOR_altivec_vmrghh_direct),
38479 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
38480 { OPTION_MASK_ALTIVEC,
38481 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
38482 : CODE_FOR_altivec_vmrghw_direct),
38483 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
38484 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
38485 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
38486 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
38487 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
38490 unsigned int i, j, elt, which;
38491 unsigned char perm[16];
38492 rtx target, op0, op1, sel, x;
38493 bool one_vec;
38495 target = operands[0];
38496 op0 = operands[1];
38497 op1 = operands[2];
38498 sel = operands[3];
38500 /* Unpack the constant selector. */
38501 for (i = which = 0; i < 16; ++i)
38503 rtx e = XVECEXP (sel, 0, i);
38504 elt = INTVAL (e) & 31;
38505 which |= (elt < 16 ? 1 : 2);
38506 perm[i] = elt;
38509 /* Simplify the constant selector based on operands. */
38510 switch (which)
38512 default:
38513 gcc_unreachable ();
38515 case 3:
38516 one_vec = false;
38517 if (!rtx_equal_p (op0, op1))
38518 break;
38519 /* FALLTHRU */
38521 case 2:
38522 for (i = 0; i < 16; ++i)
38523 perm[i] &= 15;
38524 op0 = op1;
38525 one_vec = true;
38526 break;
38528 case 1:
38529 op1 = op0;
38530 one_vec = true;
38531 break;
38534 /* Look for splat patterns. */
38535 if (one_vec)
38537 elt = perm[0];
38539 for (i = 0; i < 16; ++i)
38540 if (perm[i] != elt)
38541 break;
38542 if (i == 16)
38544 if (!BYTES_BIG_ENDIAN)
38545 elt = 15 - elt;
38546 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
38547 return true;
38550 if (elt % 2 == 0)
38552 for (i = 0; i < 16; i += 2)
38553 if (perm[i] != elt || perm[i + 1] != elt + 1)
38554 break;
38555 if (i == 16)
38557 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
38558 x = gen_reg_rtx (V8HImode);
38559 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
38560 GEN_INT (field)));
38561 emit_move_insn (target, gen_lowpart (V16QImode, x));
38562 return true;
38566 if (elt % 4 == 0)
38568 for (i = 0; i < 16; i += 4)
38569 if (perm[i] != elt
38570 || perm[i + 1] != elt + 1
38571 || perm[i + 2] != elt + 2
38572 || perm[i + 3] != elt + 3)
38573 break;
38574 if (i == 16)
38576 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
38577 x = gen_reg_rtx (V4SImode);
38578 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
38579 GEN_INT (field)));
38580 emit_move_insn (target, gen_lowpart (V16QImode, x));
38581 return true;
38586 /* Look for merge and pack patterns. */
38587 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
38589 bool swapped;
38591 if ((patterns[j].mask & rs6000_isa_flags) == 0)
38592 continue;
38594 elt = patterns[j].perm[0];
38595 if (perm[0] == elt)
38596 swapped = false;
38597 else if (perm[0] == elt + 16)
38598 swapped = true;
38599 else
38600 continue;
38601 for (i = 1; i < 16; ++i)
38603 elt = patterns[j].perm[i];
38604 if (swapped)
38605 elt = (elt >= 16 ? elt - 16 : elt + 16);
38606 else if (one_vec && elt >= 16)
38607 elt -= 16;
38608 if (perm[i] != elt)
38609 break;
38611 if (i == 16)
38613 enum insn_code icode = patterns[j].impl;
38614 machine_mode omode = insn_data[icode].operand[0].mode;
38615 machine_mode imode = insn_data[icode].operand[1].mode;
38617 /* For little-endian, don't use vpkuwum and vpkuhum if the
38618 underlying vector type is not V4SI and V8HI, respectively.
38619 For example, using vpkuwum with a V8HI picks up the even
38620 halfwords (BE numbering) when the even halfwords (LE
38621 numbering) are what we need. */
38622 if (!BYTES_BIG_ENDIAN
38623 && icode == CODE_FOR_altivec_vpkuwum_direct
38624 && ((GET_CODE (op0) == REG
38625 && GET_MODE (op0) != V4SImode)
38626 || (GET_CODE (op0) == SUBREG
38627 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
38628 continue;
38629 if (!BYTES_BIG_ENDIAN
38630 && icode == CODE_FOR_altivec_vpkuhum_direct
38631 && ((GET_CODE (op0) == REG
38632 && GET_MODE (op0) != V8HImode)
38633 || (GET_CODE (op0) == SUBREG
38634 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
38635 continue;
38637 /* For little-endian, the two input operands must be swapped
38638 (or swapped back) to ensure proper right-to-left numbering
38639 from 0 to 2N-1. */
38640 if (swapped ^ !BYTES_BIG_ENDIAN)
38641 std::swap (op0, op1);
38642 if (imode != V16QImode)
38644 op0 = gen_lowpart (imode, op0);
38645 op1 = gen_lowpart (imode, op1);
38647 if (omode == V16QImode)
38648 x = target;
38649 else
38650 x = gen_reg_rtx (omode);
38651 emit_insn (GEN_FCN (icode) (x, op0, op1));
38652 if (omode != V16QImode)
38653 emit_move_insn (target, gen_lowpart (V16QImode, x));
38654 return true;
38658 if (!BYTES_BIG_ENDIAN)
38660 altivec_expand_vec_perm_const_le (operands);
38661 return true;
38664 return false;
38667 /* Expand a Paired Single, VSX Permute Doubleword, or SPE constant permutation.
38668 Return true if we match an efficient implementation. */
38670 static bool
38671 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
38672 unsigned char perm0, unsigned char perm1)
38674 rtx x;
38676 /* If both selectors come from the same operand, fold to single op. */
38677 if ((perm0 & 2) == (perm1 & 2))
38679 if (perm0 & 2)
38680 op0 = op1;
38681 else
38682 op1 = op0;
38684 /* If both operands are equal, fold to simpler permutation. */
38685 if (rtx_equal_p (op0, op1))
38687 perm0 = perm0 & 1;
38688 perm1 = (perm1 & 1) + 2;
38690 /* If the first selector comes from the second operand, swap. */
38691 else if (perm0 & 2)
38693 if (perm1 & 2)
38694 return false;
38695 perm0 -= 2;
38696 perm1 += 2;
38697 std::swap (op0, op1);
38699 /* If the second selector does not come from the second operand, fail. */
38700 else if ((perm1 & 2) == 0)
38701 return false;
38703 /* Success! */
38704 if (target != NULL)
38706 machine_mode vmode, dmode;
38707 rtvec v;
38709 vmode = GET_MODE (target);
38710 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
38711 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
38712 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
38713 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
38714 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
38715 emit_insn (gen_rtx_SET (target, x));
38717 return true;
38720 bool
38721 rs6000_expand_vec_perm_const (rtx operands[4])
38723 rtx target, op0, op1, sel;
38724 unsigned char perm0, perm1;
38726 target = operands[0];
38727 op0 = operands[1];
38728 op1 = operands[2];
38729 sel = operands[3];
38731 /* Unpack the constant selector. */
38732 perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3;
38733 perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3;
38735 return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1);
38738 /* Test whether a constant permutation is supported. */
38740 static bool
38741 rs6000_vectorize_vec_perm_const_ok (machine_mode vmode,
38742 const unsigned char *sel)
38744 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
38745 if (TARGET_ALTIVEC)
38746 return true;
38748 /* Check for ps_merge* or evmerge* insns. */
38749 if ((TARGET_PAIRED_FLOAT && vmode == V2SFmode)
38750 || (TARGET_SPE && vmode == V2SImode))
38752 rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
38753 rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
38754 return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]);
38757 return false;
38760 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */
38762 static void
38763 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
38764 machine_mode vmode, unsigned nelt, rtx perm[])
38766 machine_mode imode;
38767 rtx x;
38769 imode = vmode;
38770 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT)
38771 imode = mode_for_int_vector (vmode).require ();
38773 x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm));
38774 x = expand_vec_perm (vmode, op0, op1, x, target);
38775 if (x != target)
38776 emit_move_insn (target, x);
38779 /* Expand an extract even operation. */
38781 void
38782 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
38784 machine_mode vmode = GET_MODE (target);
38785 unsigned i, nelt = GET_MODE_NUNITS (vmode);
38786 rtx perm[16];
38788 for (i = 0; i < nelt; i++)
38789 perm[i] = GEN_INT (i * 2);
38791 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
38794 /* Expand a vector interleave operation. */
38796 void
38797 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
38799 machine_mode vmode = GET_MODE (target);
38800 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
38801 rtx perm[16];
38803 high = (highp ? 0 : nelt / 2);
38804 for (i = 0; i < nelt / 2; i++)
38806 perm[i * 2] = GEN_INT (i + high);
38807 perm[i * 2 + 1] = GEN_INT (i + nelt + high);
38810 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
38813 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
38814 void
38815 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
38817 HOST_WIDE_INT hwi_scale (scale);
38818 REAL_VALUE_TYPE r_pow;
38819 rtvec v = rtvec_alloc (2);
38820 rtx elt;
38821 rtx scale_vec = gen_reg_rtx (V2DFmode);
38822 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
38823 elt = const_double_from_real_value (r_pow, DFmode);
38824 RTVEC_ELT (v, 0) = elt;
38825 RTVEC_ELT (v, 1) = elt;
38826 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
38827 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
38830 /* Return an RTX representing where to find the function value of a
38831 function returning MODE. */
38832 static rtx
38833 rs6000_complex_function_value (machine_mode mode)
38835 unsigned int regno;
38836 rtx r1, r2;
38837 machine_mode inner = GET_MODE_INNER (mode);
38838 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
38840 if (TARGET_FLOAT128_TYPE
38841 && (mode == KCmode
38842 || (mode == TCmode && TARGET_IEEEQUAD)))
38843 regno = ALTIVEC_ARG_RETURN;
38845 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
38846 regno = FP_ARG_RETURN;
38848 else
38850 regno = GP_ARG_RETURN;
38852 /* 32-bit is OK since it'll go in r3/r4. */
38853 if (TARGET_32BIT && inner_bytes >= 4)
38854 return gen_rtx_REG (mode, regno);
38857 if (inner_bytes >= 8)
38858 return gen_rtx_REG (mode, regno);
38860 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
38861 const0_rtx);
38862 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
38863 GEN_INT (inner_bytes));
38864 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
38867 /* Return an rtx describing a return value of MODE as a PARALLEL
38868 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
38869 stride REG_STRIDE. */
38871 static rtx
38872 rs6000_parallel_return (machine_mode mode,
38873 int n_elts, machine_mode elt_mode,
38874 unsigned int regno, unsigned int reg_stride)
38876 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
38878 int i;
38879 for (i = 0; i < n_elts; i++)
38881 rtx r = gen_rtx_REG (elt_mode, regno);
38882 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
38883 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
38884 regno += reg_stride;
38887 return par;
38890 /* Target hook for TARGET_FUNCTION_VALUE.
38892 On the SPE, both FPs and vectors are returned in r3.
38894 On RS/6000 an integer value is in r3 and a floating-point value is in
38895 fp1, unless -msoft-float. */
38897 static rtx
38898 rs6000_function_value (const_tree valtype,
38899 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
38900 bool outgoing ATTRIBUTE_UNUSED)
38902 machine_mode mode;
38903 unsigned int regno;
38904 machine_mode elt_mode;
38905 int n_elts;
38907 /* Special handling for structs in darwin64. */
38908 if (TARGET_MACHO
38909 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
38911 CUMULATIVE_ARGS valcum;
38912 rtx valret;
38914 valcum.words = 0;
38915 valcum.fregno = FP_ARG_MIN_REG;
38916 valcum.vregno = ALTIVEC_ARG_MIN_REG;
38917 /* Do a trial code generation as if this were going to be passed as
38918 an argument; if any part goes in memory, we return NULL. */
38919 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
38920 if (valret)
38921 return valret;
38922 /* Otherwise fall through to standard ABI rules. */
38925 mode = TYPE_MODE (valtype);
38927 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
38928 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
38930 int first_reg, n_regs;
38932 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
38934 /* _Decimal128 must use even/odd register pairs. */
38935 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
38936 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
38938 else
38940 first_reg = ALTIVEC_ARG_RETURN;
38941 n_regs = 1;
38944 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
38947 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
38948 if (TARGET_32BIT && TARGET_POWERPC64)
38949 switch (mode)
38951 default:
38952 break;
38953 case E_DImode:
38954 case E_SCmode:
38955 case E_DCmode:
38956 case E_TCmode:
38957 int count = GET_MODE_SIZE (mode) / 4;
38958 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
38961 if ((INTEGRAL_TYPE_P (valtype)
38962 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
38963 || POINTER_TYPE_P (valtype))
38964 mode = TARGET_32BIT ? SImode : DImode;
38966 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
38967 /* _Decimal128 must use an even/odd register pair. */
38968 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
38969 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT && TARGET_FPRS
38970 && !FLOAT128_VECTOR_P (mode)
38971 && ((TARGET_SINGLE_FLOAT && (mode == SFmode)) || TARGET_DOUBLE_FLOAT))
38972 regno = FP_ARG_RETURN;
38973 else if (TREE_CODE (valtype) == COMPLEX_TYPE
38974 && targetm.calls.split_complex_arg)
38975 return rs6000_complex_function_value (mode);
38976 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
38977 return register is used in both cases, and we won't see V2DImode/V2DFmode
38978 for pure altivec, combine the two cases. */
38979 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
38980 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
38981 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
38982 regno = ALTIVEC_ARG_RETURN;
38983 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
38984 && (mode == DFmode || mode == DCmode
38985 || FLOAT128_IBM_P (mode) || mode == TCmode))
38986 return spe_build_register_parallel (mode, GP_ARG_RETURN);
38987 else
38988 regno = GP_ARG_RETURN;
38990 return gen_rtx_REG (mode, regno);
38993 /* Define how to find the value returned by a library function
38994 assuming the value has mode MODE. */
38996 rs6000_libcall_value (machine_mode mode)
38998 unsigned int regno;
39000 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
39001 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
39002 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
39004 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
39005 /* _Decimal128 must use an even/odd register pair. */
39006 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
39007 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode)
39008 && TARGET_HARD_FLOAT && TARGET_FPRS
39009 && ((TARGET_SINGLE_FLOAT && mode == SFmode) || TARGET_DOUBLE_FLOAT))
39010 regno = FP_ARG_RETURN;
39011 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
39012 return register is used in both cases, and we won't see V2DImode/V2DFmode
39013 for pure altivec, combine the two cases. */
39014 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
39015 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
39016 regno = ALTIVEC_ARG_RETURN;
39017 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
39018 return rs6000_complex_function_value (mode);
39019 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
39020 && (mode == DFmode || mode == DCmode
39021 || FLOAT128_IBM_P (mode) || mode == TCmode))
39022 return spe_build_register_parallel (mode, GP_ARG_RETURN);
39023 else
39024 regno = GP_ARG_RETURN;
39026 return gen_rtx_REG (mode, regno);
39030 /* Return true if we use LRA instead of reload pass. */
39031 static bool
39032 rs6000_lra_p (void)
39034 return TARGET_LRA;
39037 /* Compute register pressure classes. We implement the target hook to avoid
39038 IRA picking something like NON_SPECIAL_REGS as a pressure class, which can
39039 lead to incorrect estimates of number of available registers and therefor
39040 increased register pressure/spill. */
39041 static int
39042 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
39044 int n;
39046 n = 0;
39047 pressure_classes[n++] = GENERAL_REGS;
39048 if (TARGET_VSX)
39049 pressure_classes[n++] = VSX_REGS;
39050 else
39052 if (TARGET_ALTIVEC)
39053 pressure_classes[n++] = ALTIVEC_REGS;
39054 if (TARGET_HARD_FLOAT && TARGET_FPRS)
39055 pressure_classes[n++] = FLOAT_REGS;
39057 pressure_classes[n++] = CR_REGS;
39058 pressure_classes[n++] = SPECIAL_REGS;
39060 return n;
39063 /* Given FROM and TO register numbers, say whether this elimination is allowed.
39064 Frame pointer elimination is automatically handled.
39066 For the RS/6000, if frame pointer elimination is being done, we would like
39067 to convert ap into fp, not sp.
39069 We need r30 if -mminimal-toc was specified, and there are constant pool
39070 references. */
39072 static bool
39073 rs6000_can_eliminate (const int from, const int to)
39075 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
39076 ? ! frame_pointer_needed
39077 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
39078 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC
39079 || constant_pool_empty_p ()
39080 : true);
39083 /* Define the offset between two registers, FROM to be eliminated and its
39084 replacement TO, at the start of a routine. */
39085 HOST_WIDE_INT
39086 rs6000_initial_elimination_offset (int from, int to)
39088 rs6000_stack_t *info = rs6000_stack_info ();
39089 HOST_WIDE_INT offset;
39091 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
39092 offset = info->push_p ? 0 : -info->total_size;
39093 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
39095 offset = info->push_p ? 0 : -info->total_size;
39096 if (FRAME_GROWS_DOWNWARD)
39097 offset += info->fixed_size + info->vars_size + info->parm_size;
39099 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
39100 offset = FRAME_GROWS_DOWNWARD
39101 ? info->fixed_size + info->vars_size + info->parm_size
39102 : 0;
39103 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
39104 offset = info->total_size;
39105 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
39106 offset = info->push_p ? info->total_size : 0;
39107 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
39108 offset = 0;
39109 else
39110 gcc_unreachable ();
39112 return offset;
39115 static rtx
39116 rs6000_dwarf_register_span (rtx reg)
39118 rtx parts[8];
39119 int i, words;
39120 unsigned regno = REGNO (reg);
39121 machine_mode mode = GET_MODE (reg);
39123 if (TARGET_SPE
39124 && regno < 32
39125 && (SPE_VECTOR_MODE (GET_MODE (reg))
39126 || (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode)
39127 && mode != SFmode && mode != SDmode && mode != SCmode)))
39129 else
39130 return NULL_RTX;
39132 regno = REGNO (reg);
39134 /* The duality of the SPE register size wreaks all kinds of havoc.
39135 This is a way of distinguishing r0 in 32-bits from r0 in
39136 64-bits. */
39137 words = (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD;
39138 gcc_assert (words <= 4);
39139 for (i = 0; i < words; i++, regno++)
39141 if (BYTES_BIG_ENDIAN)
39143 parts[2 * i] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
39144 parts[2 * i + 1] = gen_rtx_REG (SImode, regno);
39146 else
39148 parts[2 * i] = gen_rtx_REG (SImode, regno);
39149 parts[2 * i + 1] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
39153 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (words * 2, parts));
39156 /* Fill in sizes for SPE register high parts in table used by unwinder. */
39158 static void
39159 rs6000_init_dwarf_reg_sizes_extra (tree address)
39161 if (TARGET_SPE)
39163 int i;
39164 machine_mode mode = TYPE_MODE (char_type_node);
39165 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
39166 rtx mem = gen_rtx_MEM (BLKmode, addr);
39167 rtx value = gen_int_mode (4, mode);
39169 for (i = FIRST_SPE_HIGH_REGNO; i < LAST_SPE_HIGH_REGNO+1; i++)
39171 int column = DWARF_REG_TO_UNWIND_COLUMN
39172 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
39173 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
39175 emit_move_insn (adjust_address (mem, mode, offset), value);
39179 if (TARGET_MACHO && ! TARGET_ALTIVEC)
39181 int i;
39182 machine_mode mode = TYPE_MODE (char_type_node);
39183 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
39184 rtx mem = gen_rtx_MEM (BLKmode, addr);
39185 rtx value = gen_int_mode (16, mode);
39187 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
39188 The unwinder still needs to know the size of Altivec registers. */
39190 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
39192 int column = DWARF_REG_TO_UNWIND_COLUMN
39193 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
39194 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
39196 emit_move_insn (adjust_address (mem, mode, offset), value);
39201 /* Map internal gcc register numbers to debug format register numbers.
39202 FORMAT specifies the type of debug register number to use:
39203 0 -- debug information, except for frame-related sections
39204 1 -- DWARF .debug_frame section
39205 2 -- DWARF .eh_frame section */
39207 unsigned int
39208 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
39210 /* We never use the GCC internal number for SPE high registers.
39211 Those are mapped to the 1200..1231 range for all debug formats. */
39212 if (SPE_HIGH_REGNO_P (regno))
39213 return regno - FIRST_SPE_HIGH_REGNO + 1200;
39215 /* Except for the above, we use the internal number for non-DWARF
39216 debug information, and also for .eh_frame. */
39217 if ((format == 0 && write_symbols != DWARF2_DEBUG) || format == 2)
39218 return regno;
39220 /* On some platforms, we use the standard DWARF register
39221 numbering for .debug_info and .debug_frame. */
39222 #ifdef RS6000_USE_DWARF_NUMBERING
39223 if (regno <= 63)
39224 return regno;
39225 if (regno == LR_REGNO)
39226 return 108;
39227 if (regno == CTR_REGNO)
39228 return 109;
39229 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
39230 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
39231 The actual code emitted saves the whole of CR, so we map CR2_REGNO
39232 to the DWARF reg for CR. */
39233 if (format == 1 && regno == CR2_REGNO)
39234 return 64;
39235 if (CR_REGNO_P (regno))
39236 return regno - CR0_REGNO + 86;
39237 if (regno == CA_REGNO)
39238 return 101; /* XER */
39239 if (ALTIVEC_REGNO_P (regno))
39240 return regno - FIRST_ALTIVEC_REGNO + 1124;
39241 if (regno == VRSAVE_REGNO)
39242 return 356;
39243 if (regno == VSCR_REGNO)
39244 return 67;
39245 if (regno == SPE_ACC_REGNO)
39246 return 99;
39247 if (regno == SPEFSCR_REGNO)
39248 return 612;
39249 #endif
39250 return regno;
39253 /* target hook eh_return_filter_mode */
39254 static scalar_int_mode
39255 rs6000_eh_return_filter_mode (void)
39257 return TARGET_32BIT ? SImode : word_mode;
39260 /* Target hook for scalar_mode_supported_p. */
39261 static bool
39262 rs6000_scalar_mode_supported_p (scalar_mode mode)
39264 /* -m32 does not support TImode. This is the default, from
39265 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
39266 same ABI as for -m32. But default_scalar_mode_supported_p allows
39267 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
39268 for -mpowerpc64. */
39269 if (TARGET_32BIT && mode == TImode)
39270 return false;
39272 if (DECIMAL_FLOAT_MODE_P (mode))
39273 return default_decimal_float_supported_p ();
39274 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
39275 return true;
39276 else
39277 return default_scalar_mode_supported_p (mode);
39280 /* Target hook for vector_mode_supported_p. */
39281 static bool
39282 rs6000_vector_mode_supported_p (machine_mode mode)
39285 if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (mode))
39286 return true;
39288 if (TARGET_SPE && SPE_VECTOR_MODE (mode))
39289 return true;
39291 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
39292 128-bit, the compiler might try to widen IEEE 128-bit to IBM
39293 double-double. */
39294 else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
39295 return true;
39297 else
39298 return false;
39301 /* Target hook for floatn_mode. */
39302 static opt_scalar_float_mode
39303 rs6000_floatn_mode (int n, bool extended)
39305 if (extended)
39307 switch (n)
39309 case 32:
39310 return DFmode;
39312 case 64:
39313 if (TARGET_FLOAT128_KEYWORD)
39314 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
39315 else
39316 return opt_scalar_float_mode ();
39318 case 128:
39319 return opt_scalar_float_mode ();
39321 default:
39322 /* Those are the only valid _FloatNx types. */
39323 gcc_unreachable ();
39326 else
39328 switch (n)
39330 case 32:
39331 return SFmode;
39333 case 64:
39334 return DFmode;
39336 case 128:
39337 if (TARGET_FLOAT128_KEYWORD)
39338 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
39339 else
39340 return opt_scalar_float_mode ();
39342 default:
39343 return opt_scalar_float_mode ();
39349 /* Target hook for c_mode_for_suffix. */
39350 static machine_mode
39351 rs6000_c_mode_for_suffix (char suffix)
39353 if (TARGET_FLOAT128_TYPE)
39355 if (suffix == 'q' || suffix == 'Q')
39356 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
39358 /* At the moment, we are not defining a suffix for IBM extended double.
39359 If/when the default for -mabi=ieeelongdouble is changed, and we want
39360 to support __ibm128 constants in legacy library code, we may need to
39361 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
39362 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
39363 __float80 constants. */
39366 return VOIDmode;
39369 /* Target hook for invalid_arg_for_unprototyped_fn. */
39370 static const char *
39371 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
39373 return (!rs6000_darwin64_abi
39374 && typelist == 0
39375 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
39376 && (funcdecl == NULL_TREE
39377 || (TREE_CODE (funcdecl) == FUNCTION_DECL
39378 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
39379 ? N_("AltiVec argument passed to unprototyped function")
39380 : NULL;
39383 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
39384 setup by using __stack_chk_fail_local hidden function instead of
39385 calling __stack_chk_fail directly. Otherwise it is better to call
39386 __stack_chk_fail directly. */
39388 static tree ATTRIBUTE_UNUSED
39389 rs6000_stack_protect_fail (void)
39391 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
39392 ? default_hidden_stack_protect_fail ()
39393 : default_external_stack_protect_fail ();
39396 void
39397 rs6000_final_prescan_insn (rtx_insn *insn, rtx *operand ATTRIBUTE_UNUSED,
39398 int num_operands ATTRIBUTE_UNUSED)
39400 if (rs6000_warn_cell_microcode)
39402 const char *temp;
39403 int insn_code_number = recog_memoized (insn);
39404 location_t location = INSN_LOCATION (insn);
39406 /* Punt on insns we cannot recognize. */
39407 if (insn_code_number < 0)
39408 return;
39410 /* get_insn_template can modify recog_data, so save and restore it. */
39411 struct recog_data_d recog_data_save = recog_data;
39412 for (int i = 0; i < recog_data.n_operands; i++)
39413 recog_data.operand[i] = copy_rtx (recog_data.operand[i]);
39414 temp = get_insn_template (insn_code_number, insn);
39415 recog_data = recog_data_save;
39417 if (get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS)
39418 warning_at (location, OPT_mwarn_cell_microcode,
39419 "emitting microcode insn %s\t[%s] #%d",
39420 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
39421 else if (get_attr_cell_micro (insn) == CELL_MICRO_CONDITIONAL)
39422 warning_at (location, OPT_mwarn_cell_microcode,
39423 "emitting conditional microcode insn %s\t[%s] #%d",
39424 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
39428 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
39430 #if TARGET_ELF
39431 static unsigned HOST_WIDE_INT
39432 rs6000_asan_shadow_offset (void)
39434 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
39436 #endif
39438 /* Mask options that we want to support inside of attribute((target)) and
39439 #pragma GCC target operations. Note, we do not include things like
39440 64/32-bit, endianness, hard/soft floating point, etc. that would have
39441 different calling sequences. */
39443 struct rs6000_opt_mask {
39444 const char *name; /* option name */
39445 HOST_WIDE_INT mask; /* mask to set */
39446 bool invert; /* invert sense of mask */
39447 bool valid_target; /* option is a target option */
39450 static struct rs6000_opt_mask const rs6000_opt_masks[] =
39452 { "altivec", OPTION_MASK_ALTIVEC, false, true },
39453 { "cmpb", OPTION_MASK_CMPB, false, true },
39454 { "crypto", OPTION_MASK_CRYPTO, false, true },
39455 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
39456 { "dlmzb", OPTION_MASK_DLMZB, false, true },
39457 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
39458 false, true },
39459 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, false },
39460 { "float128-type", OPTION_MASK_FLOAT128_TYPE, false, false },
39461 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, false },
39462 { "fprnd", OPTION_MASK_FPRND, false, true },
39463 { "hard-dfp", OPTION_MASK_DFP, false, true },
39464 { "htm", OPTION_MASK_HTM, false, true },
39465 { "isel", OPTION_MASK_ISEL, false, true },
39466 { "mfcrf", OPTION_MASK_MFCRF, false, true },
39467 { "mfpgpr", OPTION_MASK_MFPGPR, false, true },
39468 { "modulo", OPTION_MASK_MODULO, false, true },
39469 { "mulhw", OPTION_MASK_MULHW, false, true },
39470 { "multiple", OPTION_MASK_MULTIPLE, false, true },
39471 { "popcntb", OPTION_MASK_POPCNTB, false, true },
39472 { "popcntd", OPTION_MASK_POPCNTD, false, true },
39473 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
39474 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
39475 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
39476 { "power9-dform-scalar", OPTION_MASK_P9_DFORM_SCALAR, false, true },
39477 { "power9-dform-vector", OPTION_MASK_P9_DFORM_VECTOR, false, true },
39478 { "power9-fusion", OPTION_MASK_P9_FUSION, false, true },
39479 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
39480 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
39481 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
39482 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
39483 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
39484 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
39485 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
39486 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
39487 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
39488 { "string", OPTION_MASK_STRING, false, true },
39489 { "toc-fusion", OPTION_MASK_TOC_FUSION, false, true },
39490 { "update", OPTION_MASK_NO_UPDATE, true , true },
39491 { "upper-regs-di", OPTION_MASK_UPPER_REGS_DI, false, true },
39492 { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, true },
39493 { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, true },
39494 { "vsx", OPTION_MASK_VSX, false, true },
39495 { "vsx-small-integer", OPTION_MASK_VSX_SMALL_INTEGER, false, true },
39496 { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true },
39497 #ifdef OPTION_MASK_64BIT
39498 #if TARGET_AIX_OS
39499 { "aix64", OPTION_MASK_64BIT, false, false },
39500 { "aix32", OPTION_MASK_64BIT, true, false },
39501 #else
39502 { "64", OPTION_MASK_64BIT, false, false },
39503 { "32", OPTION_MASK_64BIT, true, false },
39504 #endif
39505 #endif
39506 #ifdef OPTION_MASK_EABI
39507 { "eabi", OPTION_MASK_EABI, false, false },
39508 #endif
39509 #ifdef OPTION_MASK_LITTLE_ENDIAN
39510 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
39511 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
39512 #endif
39513 #ifdef OPTION_MASK_RELOCATABLE
39514 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
39515 #endif
39516 #ifdef OPTION_MASK_STRICT_ALIGN
39517 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
39518 #endif
39519 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
39520 { "string", OPTION_MASK_STRING, false, false },
39523 /* Builtin mask mapping for printing the flags. */
39524 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
39526 { "altivec", RS6000_BTM_ALTIVEC, false, false },
39527 { "vsx", RS6000_BTM_VSX, false, false },
39528 { "spe", RS6000_BTM_SPE, false, false },
39529 { "paired", RS6000_BTM_PAIRED, false, false },
39530 { "fre", RS6000_BTM_FRE, false, false },
39531 { "fres", RS6000_BTM_FRES, false, false },
39532 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
39533 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
39534 { "popcntd", RS6000_BTM_POPCNTD, false, false },
39535 { "cell", RS6000_BTM_CELL, false, false },
39536 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
39537 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
39538 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
39539 { "crypto", RS6000_BTM_CRYPTO, false, false },
39540 { "htm", RS6000_BTM_HTM, false, false },
39541 { "hard-dfp", RS6000_BTM_DFP, false, false },
39542 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
39543 { "long-double-128", RS6000_BTM_LDBL128, false, false },
39544 { "float128", RS6000_BTM_FLOAT128, false, false },
39547 /* Option variables that we want to support inside attribute((target)) and
39548 #pragma GCC target operations. */
39550 struct rs6000_opt_var {
39551 const char *name; /* option name */
39552 size_t global_offset; /* offset of the option in global_options. */
39553 size_t target_offset; /* offset of the option in target options. */
39556 static struct rs6000_opt_var const rs6000_opt_vars[] =
39558 { "friz",
39559 offsetof (struct gcc_options, x_TARGET_FRIZ),
39560 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
39561 { "avoid-indexed-addresses",
39562 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
39563 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
39564 { "paired",
39565 offsetof (struct gcc_options, x_rs6000_paired_float),
39566 offsetof (struct cl_target_option, x_rs6000_paired_float), },
39567 { "longcall",
39568 offsetof (struct gcc_options, x_rs6000_default_long_calls),
39569 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
39570 { "optimize-swaps",
39571 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
39572 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
39573 { "allow-movmisalign",
39574 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
39575 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
39576 { "allow-df-permute",
39577 offsetof (struct gcc_options, x_TARGET_ALLOW_DF_PERMUTE),
39578 offsetof (struct cl_target_option, x_TARGET_ALLOW_DF_PERMUTE), },
39579 { "sched-groups",
39580 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
39581 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
39582 { "always-hint",
39583 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
39584 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
39585 { "align-branch-targets",
39586 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
39587 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
39588 { "vectorize-builtins",
39589 offsetof (struct gcc_options, x_TARGET_VECTORIZE_BUILTINS),
39590 offsetof (struct cl_target_option, x_TARGET_VECTORIZE_BUILTINS), },
39591 { "tls-markers",
39592 offsetof (struct gcc_options, x_tls_markers),
39593 offsetof (struct cl_target_option, x_tls_markers), },
39594 { "sched-prolog",
39595 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
39596 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
39597 { "sched-epilog",
39598 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
39599 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
39600 { "gen-cell-microcode",
39601 offsetof (struct gcc_options, x_rs6000_gen_cell_microcode),
39602 offsetof (struct cl_target_option, x_rs6000_gen_cell_microcode), },
39603 { "warn-cell-microcode",
39604 offsetof (struct gcc_options, x_rs6000_warn_cell_microcode),
39605 offsetof (struct cl_target_option, x_rs6000_warn_cell_microcode), },
39608 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
39609 parsing. Return true if there were no errors. */
39611 static bool
39612 rs6000_inner_target_options (tree args, bool attr_p)
39614 bool ret = true;
39616 if (args == NULL_TREE)
39619 else if (TREE_CODE (args) == STRING_CST)
39621 char *p = ASTRDUP (TREE_STRING_POINTER (args));
39622 char *q;
39624 while ((q = strtok (p, ",")) != NULL)
39626 bool error_p = false;
39627 bool not_valid_p = false;
39628 const char *cpu_opt = NULL;
39630 p = NULL;
39631 if (strncmp (q, "cpu=", 4) == 0)
39633 int cpu_index = rs6000_cpu_name_lookup (q+4);
39634 if (cpu_index >= 0)
39635 rs6000_cpu_index = cpu_index;
39636 else
39638 error_p = true;
39639 cpu_opt = q+4;
39642 else if (strncmp (q, "tune=", 5) == 0)
39644 int tune_index = rs6000_cpu_name_lookup (q+5);
39645 if (tune_index >= 0)
39646 rs6000_tune_index = tune_index;
39647 else
39649 error_p = true;
39650 cpu_opt = q+5;
39653 else
39655 size_t i;
39656 bool invert = false;
39657 char *r = q;
39659 error_p = true;
39660 if (strncmp (r, "no-", 3) == 0)
39662 invert = true;
39663 r += 3;
39666 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
39667 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
39669 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
39671 if (!rs6000_opt_masks[i].valid_target)
39672 not_valid_p = true;
39673 else
39675 error_p = false;
39676 rs6000_isa_flags_explicit |= mask;
39678 /* VSX needs altivec, so -mvsx automagically sets
39679 altivec and disables -mavoid-indexed-addresses. */
39680 if (!invert)
39682 if (mask == OPTION_MASK_VSX)
39684 mask |= OPTION_MASK_ALTIVEC;
39685 TARGET_AVOID_XFORM = 0;
39689 if (rs6000_opt_masks[i].invert)
39690 invert = !invert;
39692 if (invert)
39693 rs6000_isa_flags &= ~mask;
39694 else
39695 rs6000_isa_flags |= mask;
39697 break;
39700 if (error_p && !not_valid_p)
39702 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
39703 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
39705 size_t j = rs6000_opt_vars[i].global_offset;
39706 *((int *) ((char *)&global_options + j)) = !invert;
39707 error_p = false;
39708 not_valid_p = false;
39709 break;
39714 if (error_p)
39716 const char *eprefix, *esuffix;
39718 ret = false;
39719 if (attr_p)
39721 eprefix = "__attribute__((__target__(";
39722 esuffix = ")))";
39724 else
39726 eprefix = "#pragma GCC target ";
39727 esuffix = "";
39730 if (cpu_opt)
39731 error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt, eprefix,
39732 q, esuffix);
39733 else if (not_valid_p)
39734 error ("%s\"%s\"%s is not allowed", eprefix, q, esuffix);
39735 else
39736 error ("%s\"%s\"%s is invalid", eprefix, q, esuffix);
39741 else if (TREE_CODE (args) == TREE_LIST)
39745 tree value = TREE_VALUE (args);
39746 if (value)
39748 bool ret2 = rs6000_inner_target_options (value, attr_p);
39749 if (!ret2)
39750 ret = false;
39752 args = TREE_CHAIN (args);
39754 while (args != NULL_TREE);
39757 else
39759 error ("attribute %<target%> argument not a string");
39760 return false;
39763 return ret;
39766 /* Print out the target options as a list for -mdebug=target. */
39768 static void
39769 rs6000_debug_target_options (tree args, const char *prefix)
39771 if (args == NULL_TREE)
39772 fprintf (stderr, "%s<NULL>", prefix);
39774 else if (TREE_CODE (args) == STRING_CST)
39776 char *p = ASTRDUP (TREE_STRING_POINTER (args));
39777 char *q;
39779 while ((q = strtok (p, ",")) != NULL)
39781 p = NULL;
39782 fprintf (stderr, "%s\"%s\"", prefix, q);
39783 prefix = ", ";
39787 else if (TREE_CODE (args) == TREE_LIST)
39791 tree value = TREE_VALUE (args);
39792 if (value)
39794 rs6000_debug_target_options (value, prefix);
39795 prefix = ", ";
39797 args = TREE_CHAIN (args);
39799 while (args != NULL_TREE);
39802 else
39803 gcc_unreachable ();
39805 return;
39809 /* Hook to validate attribute((target("..."))). */
39811 static bool
39812 rs6000_valid_attribute_p (tree fndecl,
39813 tree ARG_UNUSED (name),
39814 tree args,
39815 int flags)
39817 struct cl_target_option cur_target;
39818 bool ret;
39819 tree old_optimize = build_optimization_node (&global_options);
39820 tree new_target, new_optimize;
39821 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
39823 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
39825 if (TARGET_DEBUG_TARGET)
39827 tree tname = DECL_NAME (fndecl);
39828 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
39829 if (tname)
39830 fprintf (stderr, "function: %.*s\n",
39831 (int) IDENTIFIER_LENGTH (tname),
39832 IDENTIFIER_POINTER (tname));
39833 else
39834 fprintf (stderr, "function: unknown\n");
39836 fprintf (stderr, "args:");
39837 rs6000_debug_target_options (args, " ");
39838 fprintf (stderr, "\n");
39840 if (flags)
39841 fprintf (stderr, "flags: 0x%x\n", flags);
39843 fprintf (stderr, "--------------------\n");
39846 old_optimize = build_optimization_node (&global_options);
39847 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
39849 /* If the function changed the optimization levels as well as setting target
39850 options, start with the optimizations specified. */
39851 if (func_optimize && func_optimize != old_optimize)
39852 cl_optimization_restore (&global_options,
39853 TREE_OPTIMIZATION (func_optimize));
39855 /* The target attributes may also change some optimization flags, so update
39856 the optimization options if necessary. */
39857 cl_target_option_save (&cur_target, &global_options);
39858 rs6000_cpu_index = rs6000_tune_index = -1;
39859 ret = rs6000_inner_target_options (args, true);
39861 /* Set up any additional state. */
39862 if (ret)
39864 ret = rs6000_option_override_internal (false);
39865 new_target = build_target_option_node (&global_options);
39867 else
39868 new_target = NULL;
39870 new_optimize = build_optimization_node (&global_options);
39872 if (!new_target)
39873 ret = false;
39875 else if (fndecl)
39877 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
39879 if (old_optimize != new_optimize)
39880 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
39883 cl_target_option_restore (&global_options, &cur_target);
39885 if (old_optimize != new_optimize)
39886 cl_optimization_restore (&global_options,
39887 TREE_OPTIMIZATION (old_optimize));
39889 return ret;
39893 /* Hook to validate the current #pragma GCC target and set the state, and
39894 update the macros based on what was changed. If ARGS is NULL, then
39895 POP_TARGET is used to reset the options. */
39897 bool
39898 rs6000_pragma_target_parse (tree args, tree pop_target)
39900 tree prev_tree = build_target_option_node (&global_options);
39901 tree cur_tree;
39902 struct cl_target_option *prev_opt, *cur_opt;
39903 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
39904 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
39906 if (TARGET_DEBUG_TARGET)
39908 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
39909 fprintf (stderr, "args:");
39910 rs6000_debug_target_options (args, " ");
39911 fprintf (stderr, "\n");
39913 if (pop_target)
39915 fprintf (stderr, "pop_target:\n");
39916 debug_tree (pop_target);
39918 else
39919 fprintf (stderr, "pop_target: <NULL>\n");
39921 fprintf (stderr, "--------------------\n");
39924 if (! args)
39926 cur_tree = ((pop_target)
39927 ? pop_target
39928 : target_option_default_node);
39929 cl_target_option_restore (&global_options,
39930 TREE_TARGET_OPTION (cur_tree));
39932 else
39934 rs6000_cpu_index = rs6000_tune_index = -1;
39935 if (!rs6000_inner_target_options (args, false)
39936 || !rs6000_option_override_internal (false)
39937 || (cur_tree = build_target_option_node (&global_options))
39938 == NULL_TREE)
39940 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
39941 fprintf (stderr, "invalid pragma\n");
39943 return false;
39947 target_option_current_node = cur_tree;
39949 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
39950 change the macros that are defined. */
39951 if (rs6000_target_modify_macros_ptr)
39953 prev_opt = TREE_TARGET_OPTION (prev_tree);
39954 prev_bumask = prev_opt->x_rs6000_builtin_mask;
39955 prev_flags = prev_opt->x_rs6000_isa_flags;
39957 cur_opt = TREE_TARGET_OPTION (cur_tree);
39958 cur_flags = cur_opt->x_rs6000_isa_flags;
39959 cur_bumask = cur_opt->x_rs6000_builtin_mask;
39961 diff_bumask = (prev_bumask ^ cur_bumask);
39962 diff_flags = (prev_flags ^ cur_flags);
39964 if ((diff_flags != 0) || (diff_bumask != 0))
39966 /* Delete old macros. */
39967 rs6000_target_modify_macros_ptr (false,
39968 prev_flags & diff_flags,
39969 prev_bumask & diff_bumask);
39971 /* Define new macros. */
39972 rs6000_target_modify_macros_ptr (true,
39973 cur_flags & diff_flags,
39974 cur_bumask & diff_bumask);
39978 return true;
39982 /* Remember the last target of rs6000_set_current_function. */
39983 static GTY(()) tree rs6000_previous_fndecl;
39985 /* Establish appropriate back-end context for processing the function
39986 FNDECL. The argument might be NULL to indicate processing at top
39987 level, outside of any function scope. */
39988 static void
39989 rs6000_set_current_function (tree fndecl)
39991 tree old_tree = (rs6000_previous_fndecl
39992 ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl)
39993 : NULL_TREE);
39995 tree new_tree = (fndecl
39996 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
39997 : NULL_TREE);
39999 if (TARGET_DEBUG_TARGET)
40001 bool print_final = false;
40002 fprintf (stderr, "\n==================== rs6000_set_current_function");
40004 if (fndecl)
40005 fprintf (stderr, ", fndecl %s (%p)",
40006 (DECL_NAME (fndecl)
40007 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
40008 : "<unknown>"), (void *)fndecl);
40010 if (rs6000_previous_fndecl)
40011 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
40013 fprintf (stderr, "\n");
40014 if (new_tree)
40016 fprintf (stderr, "\nnew fndecl target specific options:\n");
40017 debug_tree (new_tree);
40018 print_final = true;
40021 if (old_tree)
40023 fprintf (stderr, "\nold fndecl target specific options:\n");
40024 debug_tree (old_tree);
40025 print_final = true;
40028 if (print_final)
40029 fprintf (stderr, "--------------------\n");
40032 /* Only change the context if the function changes. This hook is called
40033 several times in the course of compiling a function, and we don't want to
40034 slow things down too much or call target_reinit when it isn't safe. */
40035 if (fndecl && fndecl != rs6000_previous_fndecl)
40037 rs6000_previous_fndecl = fndecl;
40038 if (old_tree == new_tree)
40041 else if (new_tree && new_tree != target_option_default_node)
40043 cl_target_option_restore (&global_options,
40044 TREE_TARGET_OPTION (new_tree));
40045 if (TREE_TARGET_GLOBALS (new_tree))
40046 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
40047 else
40048 TREE_TARGET_GLOBALS (new_tree)
40049 = save_target_globals_default_opts ();
40052 else if (old_tree && old_tree != target_option_default_node)
40054 new_tree = target_option_current_node;
40055 cl_target_option_restore (&global_options,
40056 TREE_TARGET_OPTION (new_tree));
40057 if (TREE_TARGET_GLOBALS (new_tree))
40058 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
40059 else if (new_tree == target_option_default_node)
40060 restore_target_globals (&default_target_globals);
40061 else
40062 TREE_TARGET_GLOBALS (new_tree)
40063 = save_target_globals_default_opts ();
40069 /* Save the current options */
40071 static void
40072 rs6000_function_specific_save (struct cl_target_option *ptr,
40073 struct gcc_options *opts)
40075 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
40076 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
40079 /* Restore the current options */
40081 static void
40082 rs6000_function_specific_restore (struct gcc_options *opts,
40083 struct cl_target_option *ptr)
40086 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
40087 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
40088 (void) rs6000_option_override_internal (false);
40091 /* Print the current options */
40093 static void
40094 rs6000_function_specific_print (FILE *file, int indent,
40095 struct cl_target_option *ptr)
40097 rs6000_print_isa_options (file, indent, "Isa options set",
40098 ptr->x_rs6000_isa_flags);
40100 rs6000_print_isa_options (file, indent, "Isa options explicit",
40101 ptr->x_rs6000_isa_flags_explicit);
40104 /* Helper function to print the current isa or misc options on a line. */
40106 static void
40107 rs6000_print_options_internal (FILE *file,
40108 int indent,
40109 const char *string,
40110 HOST_WIDE_INT flags,
40111 const char *prefix,
40112 const struct rs6000_opt_mask *opts,
40113 size_t num_elements)
40115 size_t i;
40116 size_t start_column = 0;
40117 size_t cur_column;
40118 size_t max_column = 120;
40119 size_t prefix_len = strlen (prefix);
40120 size_t comma_len = 0;
40121 const char *comma = "";
40123 if (indent)
40124 start_column += fprintf (file, "%*s", indent, "");
40126 if (!flags)
40128 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
40129 return;
40132 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
40134 /* Print the various mask options. */
40135 cur_column = start_column;
40136 for (i = 0; i < num_elements; i++)
40138 bool invert = opts[i].invert;
40139 const char *name = opts[i].name;
40140 const char *no_str = "";
40141 HOST_WIDE_INT mask = opts[i].mask;
40142 size_t len = comma_len + prefix_len + strlen (name);
40144 if (!invert)
40146 if ((flags & mask) == 0)
40148 no_str = "no-";
40149 len += sizeof ("no-") - 1;
40152 flags &= ~mask;
40155 else
40157 if ((flags & mask) != 0)
40159 no_str = "no-";
40160 len += sizeof ("no-") - 1;
40163 flags |= mask;
40166 cur_column += len;
40167 if (cur_column > max_column)
40169 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
40170 cur_column = start_column + len;
40171 comma = "";
40174 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
40175 comma = ", ";
40176 comma_len = sizeof (", ") - 1;
40179 fputs ("\n", file);
40182 /* Helper function to print the current isa options on a line. */
40184 static void
40185 rs6000_print_isa_options (FILE *file, int indent, const char *string,
40186 HOST_WIDE_INT flags)
40188 rs6000_print_options_internal (file, indent, string, flags, "-m",
40189 &rs6000_opt_masks[0],
40190 ARRAY_SIZE (rs6000_opt_masks));
40193 static void
40194 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
40195 HOST_WIDE_INT flags)
40197 rs6000_print_options_internal (file, indent, string, flags, "",
40198 &rs6000_builtin_mask_names[0],
40199 ARRAY_SIZE (rs6000_builtin_mask_names));
40202 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
40203 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
40204 -mvsx-timode, -mupper-regs-df).
40206 If the user used -mno-power8-vector, we need to turn off all of the implicit
40207 ISA 2.07 and 3.0 options that relate to the vector unit.
40209 If the user used -mno-power9-vector, we need to turn off all of the implicit
40210 ISA 3.0 options that relate to the vector unit.
40212 This function does not handle explicit options such as the user specifying
40213 -mdirect-move. These are handled in rs6000_option_override_internal, and
40214 the appropriate error is given if needed.
40216 We return a mask of all of the implicit options that should not be enabled
40217 by default. */
40219 static HOST_WIDE_INT
40220 rs6000_disable_incompatible_switches (void)
40222 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
40223 size_t i, j;
40225 static const struct {
40226 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
40227 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
40228 const char *const name; /* name of the switch. */
40229 } flags[] = {
40230 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
40231 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
40232 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
40235 for (i = 0; i < ARRAY_SIZE (flags); i++)
40237 HOST_WIDE_INT no_flag = flags[i].no_flag;
40239 if ((rs6000_isa_flags & no_flag) == 0
40240 && (rs6000_isa_flags_explicit & no_flag) != 0)
40242 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
40243 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
40244 & rs6000_isa_flags
40245 & dep_flags);
40247 if (set_flags)
40249 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
40250 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
40252 set_flags &= ~rs6000_opt_masks[j].mask;
40253 error ("-mno-%s turns off -m%s",
40254 flags[i].name,
40255 rs6000_opt_masks[j].name);
40258 gcc_assert (!set_flags);
40261 rs6000_isa_flags &= ~dep_flags;
40262 ignore_masks |= no_flag | dep_flags;
40266 if (!TARGET_P9_VECTOR
40267 && (rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) != 0
40268 && TARGET_P9_DFORM_BOTH > 0)
40270 error ("-mno-power9-vector turns off -mpower9-dform");
40271 TARGET_P9_DFORM_BOTH = 0;
40274 return ignore_masks;
40278 /* Hook to determine if one function can safely inline another. */
40280 static bool
40281 rs6000_can_inline_p (tree caller, tree callee)
40283 bool ret = false;
40284 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
40285 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
40287 /* If callee has no option attributes, then it is ok to inline. */
40288 if (!callee_tree)
40289 ret = true;
40291 /* If caller has no option attributes, but callee does then it is not ok to
40292 inline. */
40293 else if (!caller_tree)
40294 ret = false;
40296 else
40298 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
40299 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
40301 /* Callee's options should a subset of the caller's, i.e. a vsx function
40302 can inline an altivec function but a non-vsx function can't inline a
40303 vsx function. */
40304 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
40305 == callee_opts->x_rs6000_isa_flags)
40306 ret = true;
40309 if (TARGET_DEBUG_TARGET)
40310 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
40311 (DECL_NAME (caller)
40312 ? IDENTIFIER_POINTER (DECL_NAME (caller))
40313 : "<unknown>"),
40314 (DECL_NAME (callee)
40315 ? IDENTIFIER_POINTER (DECL_NAME (callee))
40316 : "<unknown>"),
40317 (ret ? "can" : "cannot"));
40319 return ret;
40322 /* Allocate a stack temp and fixup the address so it meets the particular
40323 memory requirements (either offetable or REG+REG addressing). */
40326 rs6000_allocate_stack_temp (machine_mode mode,
40327 bool offsettable_p,
40328 bool reg_reg_p)
40330 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40331 rtx addr = XEXP (stack, 0);
40332 int strict_p = (reload_in_progress || reload_completed);
40334 if (!legitimate_indirect_address_p (addr, strict_p))
40336 if (offsettable_p
40337 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
40338 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
40340 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
40341 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
40344 return stack;
40347 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
40348 to such a form to deal with memory reference instructions like STFIWX that
40349 only take reg+reg addressing. */
40352 rs6000_address_for_fpconvert (rtx x)
40354 int strict_p = (reload_in_progress || reload_completed);
40355 rtx addr;
40357 gcc_assert (MEM_P (x));
40358 addr = XEXP (x, 0);
40359 if (! legitimate_indirect_address_p (addr, strict_p)
40360 && ! legitimate_indexed_address_p (addr, strict_p))
40362 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
40364 rtx reg = XEXP (addr, 0);
40365 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
40366 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
40367 gcc_assert (REG_P (reg));
40368 emit_insn (gen_add3_insn (reg, reg, size_rtx));
40369 addr = reg;
40371 else if (GET_CODE (addr) == PRE_MODIFY)
40373 rtx reg = XEXP (addr, 0);
40374 rtx expr = XEXP (addr, 1);
40375 gcc_assert (REG_P (reg));
40376 gcc_assert (GET_CODE (expr) == PLUS);
40377 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
40378 addr = reg;
40381 x = replace_equiv_address (x, copy_addr_to_reg (addr));
40384 return x;
40387 /* Given a memory reference, if it is not in the form for altivec memory
40388 reference instructions (i.e. reg or reg+reg addressing with AND of -16),
40389 convert to the altivec format. */
40392 rs6000_address_for_altivec (rtx x)
40394 gcc_assert (MEM_P (x));
40395 if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x)))
40397 rtx addr = XEXP (x, 0);
40398 int strict_p = (reload_in_progress || reload_completed);
40400 if (!legitimate_indexed_address_p (addr, strict_p)
40401 && !legitimate_indirect_address_p (addr, strict_p))
40402 addr = copy_to_mode_reg (Pmode, addr);
40404 addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
40405 x = change_address (x, GET_MODE (x), addr);
40408 return x;
40411 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
40413 On the RS/6000, all integer constants are acceptable, most won't be valid
40414 for particular insns, though. Only easy FP constants are acceptable. */
40416 static bool
40417 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
40419 if (TARGET_ELF && tls_referenced_p (x))
40420 return false;
40422 return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR)
40423 || GET_MODE (x) == VOIDmode
40424 || (TARGET_POWERPC64 && mode == DImode)
40425 || easy_fp_constant (x, mode)
40426 || easy_vector_constant (x, mode));
40430 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
40432 static bool
40433 chain_already_loaded (rtx_insn *last)
40435 for (; last != NULL; last = PREV_INSN (last))
40437 if (NONJUMP_INSN_P (last))
40439 rtx patt = PATTERN (last);
40441 if (GET_CODE (patt) == SET)
40443 rtx lhs = XEXP (patt, 0);
40445 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
40446 return true;
40450 return false;
40453 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
40455 void
40456 rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
40458 const bool direct_call_p
40459 = GET_CODE (func_desc) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (func_desc);
40460 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
40461 rtx toc_load = NULL_RTX;
40462 rtx toc_restore = NULL_RTX;
40463 rtx func_addr;
40464 rtx abi_reg = NULL_RTX;
40465 rtx call[4];
40466 int n_call;
40467 rtx insn;
40469 /* Handle longcall attributes. */
40470 if (INTVAL (cookie) & CALL_LONG)
40471 func_desc = rs6000_longcall_ref (func_desc);
40473 /* Handle indirect calls. */
40474 if (GET_CODE (func_desc) != SYMBOL_REF
40475 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
40477 /* Save the TOC into its reserved slot before the call,
40478 and prepare to restore it after the call. */
40479 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
40480 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
40481 rtx stack_toc_mem = gen_frame_mem (Pmode,
40482 gen_rtx_PLUS (Pmode, stack_ptr,
40483 stack_toc_offset));
40484 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
40485 gen_rtvec (1, stack_toc_offset),
40486 UNSPEC_TOCSLOT);
40487 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
40489 /* Can we optimize saving the TOC in the prologue or
40490 do we need to do it at every call? */
40491 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
40492 cfun->machine->save_toc_in_prologue = true;
40493 else
40495 MEM_VOLATILE_P (stack_toc_mem) = 1;
40496 emit_move_insn (stack_toc_mem, toc_reg);
40499 if (DEFAULT_ABI == ABI_ELFv2)
40501 /* A function pointer in the ELFv2 ABI is just a plain address, but
40502 the ABI requires it to be loaded into r12 before the call. */
40503 func_addr = gen_rtx_REG (Pmode, 12);
40504 emit_move_insn (func_addr, func_desc);
40505 abi_reg = func_addr;
40507 else
40509 /* A function pointer under AIX is a pointer to a data area whose
40510 first word contains the actual address of the function, whose
40511 second word contains a pointer to its TOC, and whose third word
40512 contains a value to place in the static chain register (r11).
40513 Note that if we load the static chain, our "trampoline" need
40514 not have any executable code. */
40516 /* Load up address of the actual function. */
40517 func_desc = force_reg (Pmode, func_desc);
40518 func_addr = gen_reg_rtx (Pmode);
40519 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
40521 /* Prepare to load the TOC of the called function. Note that the
40522 TOC load must happen immediately before the actual call so
40523 that unwinding the TOC registers works correctly. See the
40524 comment in frob_update_context. */
40525 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
40526 rtx func_toc_mem = gen_rtx_MEM (Pmode,
40527 gen_rtx_PLUS (Pmode, func_desc,
40528 func_toc_offset));
40529 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
40531 /* If we have a static chain, load it up. But, if the call was
40532 originally direct, the 3rd word has not been written since no
40533 trampoline has been built, so we ought not to load it, lest we
40534 override a static chain value. */
40535 if (!direct_call_p
40536 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
40537 && !chain_already_loaded (get_current_sequence ()->next->last))
40539 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
40540 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
40541 rtx func_sc_mem = gen_rtx_MEM (Pmode,
40542 gen_rtx_PLUS (Pmode, func_desc,
40543 func_sc_offset));
40544 emit_move_insn (sc_reg, func_sc_mem);
40545 abi_reg = sc_reg;
40549 else
40551 /* Direct calls use the TOC: for local calls, the callee will
40552 assume the TOC register is set; for non-local calls, the
40553 PLT stub needs the TOC register. */
40554 abi_reg = toc_reg;
40555 func_addr = func_desc;
40558 /* Create the call. */
40559 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
40560 if (value != NULL_RTX)
40561 call[0] = gen_rtx_SET (value, call[0]);
40562 n_call = 1;
40564 if (toc_load)
40565 call[n_call++] = toc_load;
40566 if (toc_restore)
40567 call[n_call++] = toc_restore;
40569 call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
40571 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
40572 insn = emit_call_insn (insn);
40574 /* Mention all registers defined by the ABI to hold information
40575 as uses in CALL_INSN_FUNCTION_USAGE. */
40576 if (abi_reg)
40577 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
40580 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
40582 void
40583 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
40585 rtx call[2];
40586 rtx insn;
40588 gcc_assert (INTVAL (cookie) == 0);
40590 /* Create the call. */
40591 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
40592 if (value != NULL_RTX)
40593 call[0] = gen_rtx_SET (value, call[0]);
40595 call[1] = simple_return_rtx;
40597 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
40598 insn = emit_call_insn (insn);
40600 /* Note use of the TOC register. */
40601 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
40604 /* Return whether we need to always update the saved TOC pointer when we update
40605 the stack pointer. */
40607 static bool
40608 rs6000_save_toc_in_prologue_p (void)
40610 return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue);
40613 #ifdef HAVE_GAS_HIDDEN
40614 # define USE_HIDDEN_LINKONCE 1
40615 #else
40616 # define USE_HIDDEN_LINKONCE 0
40617 #endif
40619 /* Fills in the label name that should be used for a 476 link stack thunk. */
40621 void
40622 get_ppc476_thunk_name (char name[32])
40624 gcc_assert (TARGET_LINK_STACK);
40626 if (USE_HIDDEN_LINKONCE)
40627 sprintf (name, "__ppc476.get_thunk");
40628 else
40629 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
40632 /* This function emits the simple thunk routine that is used to preserve
40633 the link stack on the 476 cpu. */
40635 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
40636 static void
40637 rs6000_code_end (void)
40639 char name[32];
40640 tree decl;
40642 if (!TARGET_LINK_STACK)
40643 return;
40645 get_ppc476_thunk_name (name);
40647 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
40648 build_function_type_list (void_type_node, NULL_TREE));
40649 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
40650 NULL_TREE, void_type_node);
40651 TREE_PUBLIC (decl) = 1;
40652 TREE_STATIC (decl) = 1;
40654 #if RS6000_WEAK
40655 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
40657 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
40658 targetm.asm_out.unique_section (decl, 0);
40659 switch_to_section (get_named_section (decl, NULL, 0));
40660 DECL_WEAK (decl) = 1;
40661 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
40662 targetm.asm_out.globalize_label (asm_out_file, name);
40663 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
40664 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
40666 else
40667 #endif
40669 switch_to_section (text_section);
40670 ASM_OUTPUT_LABEL (asm_out_file, name);
40673 DECL_INITIAL (decl) = make_node (BLOCK);
40674 current_function_decl = decl;
40675 allocate_struct_function (decl, false);
40676 init_function_start (decl);
40677 first_function_block_is_cold = false;
40678 /* Make sure unwind info is emitted for the thunk if needed. */
40679 final_start_function (emit_barrier (), asm_out_file, 1);
40681 fputs ("\tblr\n", asm_out_file);
40683 final_end_function ();
40684 init_insn_lengths ();
40685 free_after_compilation (cfun);
40686 set_cfun (NULL);
40687 current_function_decl = NULL;
40690 /* Add r30 to hard reg set if the prologue sets it up and it is not
40691 pic_offset_table_rtx. */
40693 static void
40694 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
40696 if (!TARGET_SINGLE_PIC_BASE
40697 && TARGET_TOC
40698 && TARGET_MINIMAL_TOC
40699 && !constant_pool_empty_p ())
40700 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
40701 if (cfun->machine->split_stack_argp_used)
40702 add_to_hard_reg_set (&set->set, Pmode, 12);
40706 /* Helper function for rs6000_split_logical to emit a logical instruction after
40707 spliting the operation to single GPR registers.
40709 DEST is the destination register.
40710 OP1 and OP2 are the input source registers.
40711 CODE is the base operation (AND, IOR, XOR, NOT).
40712 MODE is the machine mode.
40713 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
40714 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
40715 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
40717 static void
40718 rs6000_split_logical_inner (rtx dest,
40719 rtx op1,
40720 rtx op2,
40721 enum rtx_code code,
40722 machine_mode mode,
40723 bool complement_final_p,
40724 bool complement_op1_p,
40725 bool complement_op2_p)
40727 rtx bool_rtx;
40729 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
40730 if (op2 && GET_CODE (op2) == CONST_INT
40731 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
40732 && !complement_final_p && !complement_op1_p && !complement_op2_p)
40734 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
40735 HOST_WIDE_INT value = INTVAL (op2) & mask;
40737 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
40738 if (code == AND)
40740 if (value == 0)
40742 emit_insn (gen_rtx_SET (dest, const0_rtx));
40743 return;
40746 else if (value == mask)
40748 if (!rtx_equal_p (dest, op1))
40749 emit_insn (gen_rtx_SET (dest, op1));
40750 return;
40754 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
40755 into separate ORI/ORIS or XORI/XORIS instrucitons. */
40756 else if (code == IOR || code == XOR)
40758 if (value == 0)
40760 if (!rtx_equal_p (dest, op1))
40761 emit_insn (gen_rtx_SET (dest, op1));
40762 return;
40767 if (code == AND && mode == SImode
40768 && !complement_final_p && !complement_op1_p && !complement_op2_p)
40770 emit_insn (gen_andsi3 (dest, op1, op2));
40771 return;
40774 if (complement_op1_p)
40775 op1 = gen_rtx_NOT (mode, op1);
40777 if (complement_op2_p)
40778 op2 = gen_rtx_NOT (mode, op2);
40780 /* For canonical RTL, if only one arm is inverted it is the first. */
40781 if (!complement_op1_p && complement_op2_p)
40782 std::swap (op1, op2);
40784 bool_rtx = ((code == NOT)
40785 ? gen_rtx_NOT (mode, op1)
40786 : gen_rtx_fmt_ee (code, mode, op1, op2));
40788 if (complement_final_p)
40789 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
40791 emit_insn (gen_rtx_SET (dest, bool_rtx));
40794 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
40795 operations are split immediately during RTL generation to allow for more
40796 optimizations of the AND/IOR/XOR.
40798 OPERANDS is an array containing the destination and two input operands.
40799 CODE is the base operation (AND, IOR, XOR, NOT).
40800 MODE is the machine mode.
40801 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
40802 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
40803 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
40804 CLOBBER_REG is either NULL or a scratch register of type CC to allow
40805 formation of the AND instructions. */
40807 static void
40808 rs6000_split_logical_di (rtx operands[3],
40809 enum rtx_code code,
40810 bool complement_final_p,
40811 bool complement_op1_p,
40812 bool complement_op2_p)
40814 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
40815 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
40816 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
40817 enum hi_lo { hi = 0, lo = 1 };
40818 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
40819 size_t i;
40821 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
40822 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
40823 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
40824 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
40826 if (code == NOT)
40827 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
40828 else
40830 if (GET_CODE (operands[2]) != CONST_INT)
40832 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
40833 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
40835 else
40837 HOST_WIDE_INT value = INTVAL (operands[2]);
40838 HOST_WIDE_INT value_hi_lo[2];
40840 gcc_assert (!complement_final_p);
40841 gcc_assert (!complement_op1_p);
40842 gcc_assert (!complement_op2_p);
40844 value_hi_lo[hi] = value >> 32;
40845 value_hi_lo[lo] = value & lower_32bits;
40847 for (i = 0; i < 2; i++)
40849 HOST_WIDE_INT sub_value = value_hi_lo[i];
40851 if (sub_value & sign_bit)
40852 sub_value |= upper_32bits;
40854 op2_hi_lo[i] = GEN_INT (sub_value);
40856 /* If this is an AND instruction, check to see if we need to load
40857 the value in a register. */
40858 if (code == AND && sub_value != -1 && sub_value != 0
40859 && !and_operand (op2_hi_lo[i], SImode))
40860 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
40865 for (i = 0; i < 2; i++)
40867 /* Split large IOR/XOR operations. */
40868 if ((code == IOR || code == XOR)
40869 && GET_CODE (op2_hi_lo[i]) == CONST_INT
40870 && !complement_final_p
40871 && !complement_op1_p
40872 && !complement_op2_p
40873 && !logical_const_operand (op2_hi_lo[i], SImode))
40875 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
40876 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
40877 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
40878 rtx tmp = gen_reg_rtx (SImode);
40880 /* Make sure the constant is sign extended. */
40881 if ((hi_16bits & sign_bit) != 0)
40882 hi_16bits |= upper_32bits;
40884 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
40885 code, SImode, false, false, false);
40887 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
40888 code, SImode, false, false, false);
40890 else
40891 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
40892 code, SImode, complement_final_p,
40893 complement_op1_p, complement_op2_p);
40896 return;
40899 /* Split the insns that make up boolean operations operating on multiple GPR
40900 registers. The boolean MD patterns ensure that the inputs either are
40901 exactly the same as the output registers, or there is no overlap.
40903 OPERANDS is an array containing the destination and two input operands.
40904 CODE is the base operation (AND, IOR, XOR, NOT).
40905 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
40906 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
40907 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
40909 void
40910 rs6000_split_logical (rtx operands[3],
40911 enum rtx_code code,
40912 bool complement_final_p,
40913 bool complement_op1_p,
40914 bool complement_op2_p)
40916 machine_mode mode = GET_MODE (operands[0]);
40917 machine_mode sub_mode;
40918 rtx op0, op1, op2;
40919 int sub_size, regno0, regno1, nregs, i;
40921 /* If this is DImode, use the specialized version that can run before
40922 register allocation. */
40923 if (mode == DImode && !TARGET_POWERPC64)
40925 rs6000_split_logical_di (operands, code, complement_final_p,
40926 complement_op1_p, complement_op2_p);
40927 return;
40930 op0 = operands[0];
40931 op1 = operands[1];
40932 op2 = (code == NOT) ? NULL_RTX : operands[2];
40933 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
40934 sub_size = GET_MODE_SIZE (sub_mode);
40935 regno0 = REGNO (op0);
40936 regno1 = REGNO (op1);
40938 gcc_assert (reload_completed);
40939 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
40940 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
40942 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
40943 gcc_assert (nregs > 1);
40945 if (op2 && REG_P (op2))
40946 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
40948 for (i = 0; i < nregs; i++)
40950 int offset = i * sub_size;
40951 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
40952 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
40953 rtx sub_op2 = ((code == NOT)
40954 ? NULL_RTX
40955 : simplify_subreg (sub_mode, op2, mode, offset));
40957 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
40958 complement_final_p, complement_op1_p,
40959 complement_op2_p);
40962 return;
40966 /* Return true if the peephole2 can combine a load involving a combination of
40967 an addis instruction and a load with an offset that can be fused together on
40968 a power8. */
40970 bool
40971 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
40972 rtx addis_value, /* addis value. */
40973 rtx target, /* target register that is loaded. */
40974 rtx mem) /* bottom part of the memory addr. */
40976 rtx addr;
40977 rtx base_reg;
40979 /* Validate arguments. */
40980 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
40981 return false;
40983 if (!base_reg_operand (target, GET_MODE (target)))
40984 return false;
40986 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
40987 return false;
40989 /* Allow sign/zero extension. */
40990 if (GET_CODE (mem) == ZERO_EXTEND
40991 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
40992 mem = XEXP (mem, 0);
40994 if (!MEM_P (mem))
40995 return false;
40997 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
40998 return false;
41000 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
41001 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
41002 return false;
41004 /* Validate that the register used to load the high value is either the
41005 register being loaded, or we can safely replace its use.
41007 This function is only called from the peephole2 pass and we assume that
41008 there are 2 instructions in the peephole (addis and load), so we want to
41009 check if the target register was not used in the memory address and the
41010 register to hold the addis result is dead after the peephole. */
41011 if (REGNO (addis_reg) != REGNO (target))
41013 if (reg_mentioned_p (target, mem))
41014 return false;
41016 if (!peep2_reg_dead_p (2, addis_reg))
41017 return false;
41019 /* If the target register being loaded is the stack pointer, we must
41020 avoid loading any other value into it, even temporarily. */
41021 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
41022 return false;
41025 base_reg = XEXP (addr, 0);
41026 return REGNO (addis_reg) == REGNO (base_reg);
41029 /* During the peephole2 pass, adjust and expand the insns for a load fusion
41030 sequence. We adjust the addis register to use the target register. If the
41031 load sign extends, we adjust the code to do the zero extending load, and an
41032 explicit sign extension later since the fusion only covers zero extending
41033 loads.
41035 The operands are:
41036 operands[0] register set with addis (to be replaced with target)
41037 operands[1] value set via addis
41038 operands[2] target register being loaded
41039 operands[3] D-form memory reference using operands[0]. */
41041 void
41042 expand_fusion_gpr_load (rtx *operands)
41044 rtx addis_value = operands[1];
41045 rtx target = operands[2];
41046 rtx orig_mem = operands[3];
41047 rtx new_addr, new_mem, orig_addr, offset;
41048 enum rtx_code plus_or_lo_sum;
41049 machine_mode target_mode = GET_MODE (target);
41050 machine_mode extend_mode = target_mode;
41051 machine_mode ptr_mode = Pmode;
41052 enum rtx_code extend = UNKNOWN;
41054 if (GET_CODE (orig_mem) == ZERO_EXTEND
41055 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
41057 extend = GET_CODE (orig_mem);
41058 orig_mem = XEXP (orig_mem, 0);
41059 target_mode = GET_MODE (orig_mem);
41062 gcc_assert (MEM_P (orig_mem));
41064 orig_addr = XEXP (orig_mem, 0);
41065 plus_or_lo_sum = GET_CODE (orig_addr);
41066 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
41068 offset = XEXP (orig_addr, 1);
41069 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
41070 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
41072 if (extend != UNKNOWN)
41073 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
41075 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
41076 UNSPEC_FUSION_GPR);
41077 emit_insn (gen_rtx_SET (target, new_mem));
41079 if (extend == SIGN_EXTEND)
41081 int sub_off = ((BYTES_BIG_ENDIAN)
41082 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
41083 : 0);
41084 rtx sign_reg
41085 = simplify_subreg (target_mode, target, extend_mode, sub_off);
41087 emit_insn (gen_rtx_SET (target,
41088 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
41091 return;
41094 /* Emit the addis instruction that will be part of a fused instruction
41095 sequence. */
41097 void
41098 emit_fusion_addis (rtx target, rtx addis_value, const char *comment,
41099 const char *mode_name)
41101 rtx fuse_ops[10];
41102 char insn_template[80];
41103 const char *addis_str = NULL;
41104 const char *comment_str = ASM_COMMENT_START;
41106 if (*comment_str == ' ')
41107 comment_str++;
41109 /* Emit the addis instruction. */
41110 fuse_ops[0] = target;
41111 if (satisfies_constraint_L (addis_value))
41113 fuse_ops[1] = addis_value;
41114 addis_str = "lis %0,%v1";
41117 else if (GET_CODE (addis_value) == PLUS)
41119 rtx op0 = XEXP (addis_value, 0);
41120 rtx op1 = XEXP (addis_value, 1);
41122 if (REG_P (op0) && CONST_INT_P (op1)
41123 && satisfies_constraint_L (op1))
41125 fuse_ops[1] = op0;
41126 fuse_ops[2] = op1;
41127 addis_str = "addis %0,%1,%v2";
41131 else if (GET_CODE (addis_value) == HIGH)
41133 rtx value = XEXP (addis_value, 0);
41134 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
41136 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
41137 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
41138 if (TARGET_ELF)
41139 addis_str = "addis %0,%2,%1@toc@ha";
41141 else if (TARGET_XCOFF)
41142 addis_str = "addis %0,%1@u(%2)";
41144 else
41145 gcc_unreachable ();
41148 else if (GET_CODE (value) == PLUS)
41150 rtx op0 = XEXP (value, 0);
41151 rtx op1 = XEXP (value, 1);
41153 if (GET_CODE (op0) == UNSPEC
41154 && XINT (op0, 1) == UNSPEC_TOCREL
41155 && CONST_INT_P (op1))
41157 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
41158 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
41159 fuse_ops[3] = op1;
41160 if (TARGET_ELF)
41161 addis_str = "addis %0,%2,%1+%3@toc@ha";
41163 else if (TARGET_XCOFF)
41164 addis_str = "addis %0,%1+%3@u(%2)";
41166 else
41167 gcc_unreachable ();
41171 else if (satisfies_constraint_L (value))
41173 fuse_ops[1] = value;
41174 addis_str = "lis %0,%v1";
41177 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
41179 fuse_ops[1] = value;
41180 addis_str = "lis %0,%1@ha";
41184 if (!addis_str)
41185 fatal_insn ("Could not generate addis value for fusion", addis_value);
41187 sprintf (insn_template, "%s\t\t%s %s, type %s", addis_str, comment_str,
41188 comment, mode_name);
41189 output_asm_insn (insn_template, fuse_ops);
41192 /* Emit a D-form load or store instruction that is the second instruction
41193 of a fusion sequence. */
41195 void
41196 emit_fusion_load_store (rtx load_store_reg, rtx addis_reg, rtx offset,
41197 const char *insn_str)
41199 rtx fuse_ops[10];
41200 char insn_template[80];
41202 fuse_ops[0] = load_store_reg;
41203 fuse_ops[1] = addis_reg;
41205 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
41207 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
41208 fuse_ops[2] = offset;
41209 output_asm_insn (insn_template, fuse_ops);
41212 else if (GET_CODE (offset) == UNSPEC
41213 && XINT (offset, 1) == UNSPEC_TOCREL)
41215 if (TARGET_ELF)
41216 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
41218 else if (TARGET_XCOFF)
41219 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
41221 else
41222 gcc_unreachable ();
41224 fuse_ops[2] = XVECEXP (offset, 0, 0);
41225 output_asm_insn (insn_template, fuse_ops);
41228 else if (GET_CODE (offset) == PLUS
41229 && GET_CODE (XEXP (offset, 0)) == UNSPEC
41230 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
41231 && CONST_INT_P (XEXP (offset, 1)))
41233 rtx tocrel_unspec = XEXP (offset, 0);
41234 if (TARGET_ELF)
41235 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
41237 else if (TARGET_XCOFF)
41238 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
41240 else
41241 gcc_unreachable ();
41243 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
41244 fuse_ops[3] = XEXP (offset, 1);
41245 output_asm_insn (insn_template, fuse_ops);
41248 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
41250 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
41252 fuse_ops[2] = offset;
41253 output_asm_insn (insn_template, fuse_ops);
41256 else
41257 fatal_insn ("Unable to generate load/store offset for fusion", offset);
41259 return;
41262 /* Wrap a TOC address that can be fused to indicate that special fusion
41263 processing is needed. */
41266 fusion_wrap_memory_address (rtx old_mem)
41268 rtx old_addr = XEXP (old_mem, 0);
41269 rtvec v = gen_rtvec (1, old_addr);
41270 rtx new_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_FUSION_ADDIS);
41271 return replace_equiv_address_nv (old_mem, new_addr, false);
41274 /* Given an address, convert it into the addis and load offset parts. Addresses
41275 created during the peephole2 process look like:
41276 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
41277 (unspec [(...)] UNSPEC_TOCREL))
41279 Addresses created via toc fusion look like:
41280 (unspec [(unspec [(...)] UNSPEC_TOCREL)] UNSPEC_FUSION_ADDIS)) */
41282 static void
41283 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
41285 rtx hi, lo;
41287 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_FUSION_ADDIS)
41289 lo = XVECEXP (addr, 0, 0);
41290 hi = gen_rtx_HIGH (Pmode, lo);
41292 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
41294 hi = XEXP (addr, 0);
41295 lo = XEXP (addr, 1);
41297 else
41298 gcc_unreachable ();
41300 *p_hi = hi;
41301 *p_lo = lo;
41304 /* Return a string to fuse an addis instruction with a gpr load to the same
41305 register that we loaded up the addis instruction. The address that is used
41306 is the logical address that was formed during peephole2:
41307 (lo_sum (high) (low-part))
41309 Or the address is the TOC address that is wrapped before register allocation:
41310 (unspec [(addr) (toc-reg)] UNSPEC_FUSION_ADDIS)
41312 The code is complicated, so we call output_asm_insn directly, and just
41313 return "". */
41315 const char *
41316 emit_fusion_gpr_load (rtx target, rtx mem)
41318 rtx addis_value;
41319 rtx addr;
41320 rtx load_offset;
41321 const char *load_str = NULL;
41322 const char *mode_name = NULL;
41323 machine_mode mode;
41325 if (GET_CODE (mem) == ZERO_EXTEND)
41326 mem = XEXP (mem, 0);
41328 gcc_assert (REG_P (target) && MEM_P (mem));
41330 addr = XEXP (mem, 0);
41331 fusion_split_address (addr, &addis_value, &load_offset);
41333 /* Now emit the load instruction to the same register. */
41334 mode = GET_MODE (mem);
41335 switch (mode)
41337 case E_QImode:
41338 mode_name = "char";
41339 load_str = "lbz";
41340 break;
41342 case E_HImode:
41343 mode_name = "short";
41344 load_str = "lhz";
41345 break;
41347 case E_SImode:
41348 case E_SFmode:
41349 mode_name = (mode == SFmode) ? "float" : "int";
41350 load_str = "lwz";
41351 break;
41353 case E_DImode:
41354 case E_DFmode:
41355 gcc_assert (TARGET_POWERPC64);
41356 mode_name = (mode == DFmode) ? "double" : "long";
41357 load_str = "ld";
41358 break;
41360 default:
41361 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
41364 /* Emit the addis instruction. */
41365 emit_fusion_addis (target, addis_value, "gpr load fusion", mode_name);
41367 /* Emit the D-form load instruction. */
41368 emit_fusion_load_store (target, target, load_offset, load_str);
41370 return "";
41374 /* Return true if the peephole2 can combine a load/store involving a
41375 combination of an addis instruction and the memory operation. This was
41376 added to the ISA 3.0 (power9) hardware. */
41378 bool
41379 fusion_p9_p (rtx addis_reg, /* register set via addis. */
41380 rtx addis_value, /* addis value. */
41381 rtx dest, /* destination (memory or register). */
41382 rtx src) /* source (register or memory). */
41384 rtx addr, mem, offset;
41385 machine_mode mode = GET_MODE (src);
41387 /* Validate arguments. */
41388 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
41389 return false;
41391 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
41392 return false;
41394 /* Ignore extend operations that are part of the load. */
41395 if (GET_CODE (src) == FLOAT_EXTEND || GET_CODE (src) == ZERO_EXTEND)
41396 src = XEXP (src, 0);
41398 /* Test for memory<-register or register<-memory. */
41399 if (fpr_reg_operand (src, mode) || int_reg_operand (src, mode))
41401 if (!MEM_P (dest))
41402 return false;
41404 mem = dest;
41407 else if (MEM_P (src))
41409 if (!fpr_reg_operand (dest, mode) && !int_reg_operand (dest, mode))
41410 return false;
41412 mem = src;
41415 else
41416 return false;
41418 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
41419 if (GET_CODE (addr) == PLUS)
41421 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
41422 return false;
41424 return satisfies_constraint_I (XEXP (addr, 1));
41427 else if (GET_CODE (addr) == LO_SUM)
41429 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
41430 return false;
41432 offset = XEXP (addr, 1);
41433 if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
41434 return small_toc_ref (offset, GET_MODE (offset));
41436 else if (TARGET_ELF && !TARGET_POWERPC64)
41437 return CONSTANT_P (offset);
41440 return false;
41443 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
41444 load sequence.
41446 The operands are:
41447 operands[0] register set with addis
41448 operands[1] value set via addis
41449 operands[2] target register being loaded
41450 operands[3] D-form memory reference using operands[0].
41452 This is similar to the fusion introduced with power8, except it scales to
41453 both loads/stores and does not require the result register to be the same as
41454 the base register. At the moment, we only do this if register set with addis
41455 is dead. */
41457 void
41458 expand_fusion_p9_load (rtx *operands)
41460 rtx tmp_reg = operands[0];
41461 rtx addis_value = operands[1];
41462 rtx target = operands[2];
41463 rtx orig_mem = operands[3];
41464 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn;
41465 enum rtx_code plus_or_lo_sum;
41466 machine_mode target_mode = GET_MODE (target);
41467 machine_mode extend_mode = target_mode;
41468 machine_mode ptr_mode = Pmode;
41469 enum rtx_code extend = UNKNOWN;
41471 if (GET_CODE (orig_mem) == FLOAT_EXTEND || GET_CODE (orig_mem) == ZERO_EXTEND)
41473 extend = GET_CODE (orig_mem);
41474 orig_mem = XEXP (orig_mem, 0);
41475 target_mode = GET_MODE (orig_mem);
41478 gcc_assert (MEM_P (orig_mem));
41480 orig_addr = XEXP (orig_mem, 0);
41481 plus_or_lo_sum = GET_CODE (orig_addr);
41482 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
41484 offset = XEXP (orig_addr, 1);
41485 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
41486 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
41488 if (extend != UNKNOWN)
41489 new_mem = gen_rtx_fmt_e (extend, extend_mode, new_mem);
41491 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
41492 UNSPEC_FUSION_P9);
41494 set = gen_rtx_SET (target, new_mem);
41495 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
41496 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
41497 emit_insn (insn);
41499 return;
41502 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
41503 store sequence.
41505 The operands are:
41506 operands[0] register set with addis
41507 operands[1] value set via addis
41508 operands[2] target D-form memory being stored to
41509 operands[3] register being stored
41511 This is similar to the fusion introduced with power8, except it scales to
41512 both loads/stores and does not require the result register to be the same as
41513 the base register. At the moment, we only do this if register set with addis
41514 is dead. */
41516 void
41517 expand_fusion_p9_store (rtx *operands)
41519 rtx tmp_reg = operands[0];
41520 rtx addis_value = operands[1];
41521 rtx orig_mem = operands[2];
41522 rtx src = operands[3];
41523 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn, new_src;
41524 enum rtx_code plus_or_lo_sum;
41525 machine_mode target_mode = GET_MODE (orig_mem);
41526 machine_mode ptr_mode = Pmode;
41528 gcc_assert (MEM_P (orig_mem));
41530 orig_addr = XEXP (orig_mem, 0);
41531 plus_or_lo_sum = GET_CODE (orig_addr);
41532 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
41534 offset = XEXP (orig_addr, 1);
41535 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
41536 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
41538 new_src = gen_rtx_UNSPEC (target_mode, gen_rtvec (1, src),
41539 UNSPEC_FUSION_P9);
41541 set = gen_rtx_SET (new_mem, new_src);
41542 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
41543 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
41544 emit_insn (insn);
41546 return;
41549 /* Return a string to fuse an addis instruction with a load using extended
41550 fusion. The address that is used is the logical address that was formed
41551 during peephole2: (lo_sum (high) (low-part))
41553 The code is complicated, so we call output_asm_insn directly, and just
41554 return "". */
41556 const char *
41557 emit_fusion_p9_load (rtx reg, rtx mem, rtx tmp_reg)
41559 machine_mode mode = GET_MODE (reg);
41560 rtx hi;
41561 rtx lo;
41562 rtx addr;
41563 const char *load_string;
41564 int r;
41566 if (GET_CODE (mem) == FLOAT_EXTEND || GET_CODE (mem) == ZERO_EXTEND)
41568 mem = XEXP (mem, 0);
41569 mode = GET_MODE (mem);
41572 if (GET_CODE (reg) == SUBREG)
41574 gcc_assert (SUBREG_BYTE (reg) == 0);
41575 reg = SUBREG_REG (reg);
41578 if (!REG_P (reg))
41579 fatal_insn ("emit_fusion_p9_load, bad reg #1", reg);
41581 r = REGNO (reg);
41582 if (FP_REGNO_P (r))
41584 if (mode == SFmode)
41585 load_string = "lfs";
41586 else if (mode == DFmode || mode == DImode)
41587 load_string = "lfd";
41588 else
41589 gcc_unreachable ();
41591 else if (ALTIVEC_REGNO_P (r) && TARGET_P9_DFORM_SCALAR)
41593 if (mode == SFmode)
41594 load_string = "lxssp";
41595 else if (mode == DFmode || mode == DImode)
41596 load_string = "lxsd";
41597 else
41598 gcc_unreachable ();
41600 else if (INT_REGNO_P (r))
41602 switch (mode)
41604 case E_QImode:
41605 load_string = "lbz";
41606 break;
41607 case E_HImode:
41608 load_string = "lhz";
41609 break;
41610 case E_SImode:
41611 case E_SFmode:
41612 load_string = "lwz";
41613 break;
41614 case E_DImode:
41615 case E_DFmode:
41616 if (!TARGET_POWERPC64)
41617 gcc_unreachable ();
41618 load_string = "ld";
41619 break;
41620 default:
41621 gcc_unreachable ();
41624 else
41625 fatal_insn ("emit_fusion_p9_load, bad reg #2", reg);
41627 if (!MEM_P (mem))
41628 fatal_insn ("emit_fusion_p9_load not MEM", mem);
41630 addr = XEXP (mem, 0);
41631 fusion_split_address (addr, &hi, &lo);
41633 /* Emit the addis instruction. */
41634 emit_fusion_addis (tmp_reg, hi, "power9 load fusion", GET_MODE_NAME (mode));
41636 /* Emit the D-form load instruction. */
41637 emit_fusion_load_store (reg, tmp_reg, lo, load_string);
41639 return "";
41642 /* Return a string to fuse an addis instruction with a store using extended
41643 fusion. The address that is used is the logical address that was formed
41644 during peephole2: (lo_sum (high) (low-part))
41646 The code is complicated, so we call output_asm_insn directly, and just
41647 return "". */
41649 const char *
41650 emit_fusion_p9_store (rtx mem, rtx reg, rtx tmp_reg)
41652 machine_mode mode = GET_MODE (reg);
41653 rtx hi;
41654 rtx lo;
41655 rtx addr;
41656 const char *store_string;
41657 int r;
41659 if (GET_CODE (reg) == SUBREG)
41661 gcc_assert (SUBREG_BYTE (reg) == 0);
41662 reg = SUBREG_REG (reg);
41665 if (!REG_P (reg))
41666 fatal_insn ("emit_fusion_p9_store, bad reg #1", reg);
41668 r = REGNO (reg);
41669 if (FP_REGNO_P (r))
41671 if (mode == SFmode)
41672 store_string = "stfs";
41673 else if (mode == DFmode)
41674 store_string = "stfd";
41675 else
41676 gcc_unreachable ();
41678 else if (ALTIVEC_REGNO_P (r) && TARGET_P9_DFORM_SCALAR)
41680 if (mode == SFmode)
41681 store_string = "stxssp";
41682 else if (mode == DFmode || mode == DImode)
41683 store_string = "stxsd";
41684 else
41685 gcc_unreachable ();
41687 else if (INT_REGNO_P (r))
41689 switch (mode)
41691 case E_QImode:
41692 store_string = "stb";
41693 break;
41694 case E_HImode:
41695 store_string = "sth";
41696 break;
41697 case E_SImode:
41698 case E_SFmode:
41699 store_string = "stw";
41700 break;
41701 case E_DImode:
41702 case E_DFmode:
41703 if (!TARGET_POWERPC64)
41704 gcc_unreachable ();
41705 store_string = "std";
41706 break;
41707 default:
41708 gcc_unreachable ();
41711 else
41712 fatal_insn ("emit_fusion_p9_store, bad reg #2", reg);
41714 if (!MEM_P (mem))
41715 fatal_insn ("emit_fusion_p9_store not MEM", mem);
41717 addr = XEXP (mem, 0);
41718 fusion_split_address (addr, &hi, &lo);
41720 /* Emit the addis instruction. */
41721 emit_fusion_addis (tmp_reg, hi, "power9 store fusion", GET_MODE_NAME (mode));
41723 /* Emit the D-form load instruction. */
41724 emit_fusion_load_store (reg, tmp_reg, lo, store_string);
41726 return "";
41730 /* Analyze vector computations and remove unnecessary doubleword
41731 swaps (xxswapdi instructions). This pass is performed only
41732 for little-endian VSX code generation.
41734 For this specific case, loads and stores of 4x32 and 2x64 vectors
41735 are inefficient. These are implemented using the lvx2dx and
41736 stvx2dx instructions, which invert the order of doublewords in
41737 a vector register. Thus the code generation inserts an xxswapdi
41738 after each such load, and prior to each such store. (For spill
41739 code after register assignment, an additional xxswapdi is inserted
41740 following each store in order to return a hard register to its
41741 unpermuted value.)
41743 The extra xxswapdi instructions reduce performance. This can be
41744 particularly bad for vectorized code. The purpose of this pass
41745 is to reduce the number of xxswapdi instructions required for
41746 correctness.
41748 The primary insight is that much code that operates on vectors
41749 does not care about the relative order of elements in a register,
41750 so long as the correct memory order is preserved. If we have
41751 a computation where all input values are provided by lvxd2x/xxswapdi
41752 sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
41753 and all intermediate computations are pure SIMD (independent of
41754 element order), then all the xxswapdi's associated with the loads
41755 and stores may be removed.
41757 This pass uses some of the infrastructure and logical ideas from
41758 the "web" pass in web.c. We create maximal webs of computations
41759 fitting the description above using union-find. Each such web is
41760 then optimized by removing its unnecessary xxswapdi instructions.
41762 The pass is placed prior to global optimization so that we can
41763 perform the optimization in the safest and simplest way possible;
41764 that is, by replacing each xxswapdi insn with a register copy insn.
41765 Subsequent forward propagation will remove copies where possible.
41767 There are some operations sensitive to element order for which we
41768 can still allow the operation, provided we modify those operations.
41769 These include CONST_VECTORs, for which we must swap the first and
41770 second halves of the constant vector; and SUBREGs, for which we
41771 must adjust the byte offset to account for the swapped doublewords.
41772 A remaining opportunity would be non-immediate-form splats, for
41773 which we should adjust the selected lane of the input. We should
41774 also make code generation adjustments for sum-across operations,
41775 since this is a common vectorizer reduction.
41777 Because we run prior to the first split, we can see loads and stores
41778 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
41779 vector loads and stores that have not yet been split into a permuting
41780 load/store and a swap. (One way this can happen is with a builtin
41781 call to vec_vsx_{ld,st}.) We can handle these as well, but rather
41782 than deleting a swap, we convert the load/store into a permuting
41783 load/store (which effectively removes the swap). */
41785 /* Notes on Permutes
41787 We do not currently handle computations that contain permutes. There
41788 is a general transformation that can be performed correctly, but it
41789 may introduce more expensive code than it replaces. To handle these
41790 would require a cost model to determine when to perform the optimization.
41791 This commentary records how this could be done if desired.
41793 The most general permute is something like this (example for V16QI):
41795 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
41796 (parallel [(const_int a0) (const_int a1)
41798 (const_int a14) (const_int a15)]))
41800 where a0,...,a15 are in [0,31] and select elements from op1 and op2
41801 to produce in the result.
41803 Regardless of mode, we can convert the PARALLEL to a mask of 16
41804 byte-element selectors. Let's call this M, with M[i] representing
41805 the ith byte-element selector value. Then if we swap doublewords
41806 throughout the computation, we can get correct behavior by replacing
41807 M with M' as follows:
41809 M'[i] = { (M[i]+8)%16 : M[i] in [0,15]
41810 { ((M[i]+8)%16)+16 : M[i] in [16,31]
41812 This seems promising at first, since we are just replacing one mask
41813 with another. But certain masks are preferable to others. If M
41814 is a mask that matches a vmrghh pattern, for example, M' certainly
41815 will not. Instead of a single vmrghh, we would generate a load of
41816 M' and a vperm. So we would need to know how many xxswapd's we can
41817 remove as a result of this transformation to determine if it's
41818 profitable; and preferably the logic would need to be aware of all
41819 the special preferable masks.
41821 Another form of permute is an UNSPEC_VPERM, in which the mask is
41822 already in a register. In some cases, this mask may be a constant
41823 that we can discover with ud-chains, in which case the above
41824 transformation is ok. However, the common usage here is for the
41825 mask to be produced by an UNSPEC_LVSL, in which case the mask
41826 cannot be known at compile time. In such a case we would have to
41827 generate several instructions to compute M' as above at run time,
41828 and a cost model is needed again.
41830 However, when the mask M for an UNSPEC_VPERM is loaded from the
41831 constant pool, we can replace M with M' as above at no cost
41832 beyond adding a constant pool entry. */
41834 /* This is based on the union-find logic in web.c. web_entry_base is
41835 defined in df.h. */
41836 class swap_web_entry : public web_entry_base
41838 public:
41839 /* Pointer to the insn. */
41840 rtx_insn *insn;
41841 /* Set if insn contains a mention of a vector register. All other
41842 fields are undefined if this field is unset. */
41843 unsigned int is_relevant : 1;
41844 /* Set if insn is a load. */
41845 unsigned int is_load : 1;
41846 /* Set if insn is a store. */
41847 unsigned int is_store : 1;
41848 /* Set if insn is a doubleword swap. This can either be a register swap
41849 or a permuting load or store (test is_load and is_store for this). */
41850 unsigned int is_swap : 1;
41851 /* Set if the insn has a live-in use of a parameter register. */
41852 unsigned int is_live_in : 1;
41853 /* Set if the insn has a live-out def of a return register. */
41854 unsigned int is_live_out : 1;
41855 /* Set if the insn contains a subreg reference of a vector register. */
41856 unsigned int contains_subreg : 1;
41857 /* Set if the insn contains a 128-bit integer operand. */
41858 unsigned int is_128_int : 1;
41859 /* Set if this is a call-insn. */
41860 unsigned int is_call : 1;
41861 /* Set if this insn does not perform a vector operation for which
41862 element order matters, or if we know how to fix it up if it does.
41863 Undefined if is_swap is set. */
41864 unsigned int is_swappable : 1;
41865 /* A nonzero value indicates what kind of special handling for this
41866 insn is required if doublewords are swapped. Undefined if
41867 is_swappable is not set. */
41868 unsigned int special_handling : 4;
41869 /* Set if the web represented by this entry cannot be optimized. */
41870 unsigned int web_not_optimizable : 1;
41871 /* Set if this insn should be deleted. */
41872 unsigned int will_delete : 1;
41875 enum special_handling_values {
41876 SH_NONE = 0,
41877 SH_CONST_VECTOR,
41878 SH_SUBREG,
41879 SH_NOSWAP_LD,
41880 SH_NOSWAP_ST,
41881 SH_EXTRACT,
41882 SH_SPLAT,
41883 SH_XXPERMDI,
41884 SH_CONCAT,
41885 SH_VPERM
41888 /* Union INSN with all insns containing definitions that reach USE.
41889 Detect whether USE is live-in to the current function. */
41890 static void
41891 union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
41893 struct df_link *link = DF_REF_CHAIN (use);
41895 if (!link)
41896 insn_entry[INSN_UID (insn)].is_live_in = 1;
41898 while (link)
41900 if (DF_REF_IS_ARTIFICIAL (link->ref))
41901 insn_entry[INSN_UID (insn)].is_live_in = 1;
41903 if (DF_REF_INSN_INFO (link->ref))
41905 rtx def_insn = DF_REF_INSN (link->ref);
41906 (void)unionfind_union (insn_entry + INSN_UID (insn),
41907 insn_entry + INSN_UID (def_insn));
41910 link = link->next;
41914 /* Union INSN with all insns containing uses reached from DEF.
41915 Detect whether DEF is live-out from the current function. */
41916 static void
41917 union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
41919 struct df_link *link = DF_REF_CHAIN (def);
41921 if (!link)
41922 insn_entry[INSN_UID (insn)].is_live_out = 1;
41924 while (link)
41926 /* This could be an eh use or some other artificial use;
41927 we treat these all the same (killing the optimization). */
41928 if (DF_REF_IS_ARTIFICIAL (link->ref))
41929 insn_entry[INSN_UID (insn)].is_live_out = 1;
41931 if (DF_REF_INSN_INFO (link->ref))
41933 rtx use_insn = DF_REF_INSN (link->ref);
41934 (void)unionfind_union (insn_entry + INSN_UID (insn),
41935 insn_entry + INSN_UID (use_insn));
41938 link = link->next;
41942 /* Return 1 iff INSN is a load insn, including permuting loads that
41943 represent an lvxd2x instruction; else return 0. */
41944 static unsigned int
41945 insn_is_load_p (rtx insn)
41947 rtx body = PATTERN (insn);
41949 if (GET_CODE (body) == SET)
41951 if (GET_CODE (SET_SRC (body)) == MEM)
41952 return 1;
41954 if (GET_CODE (SET_SRC (body)) == VEC_SELECT
41955 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM)
41956 return 1;
41958 return 0;
41961 if (GET_CODE (body) != PARALLEL)
41962 return 0;
41964 rtx set = XVECEXP (body, 0, 0);
41966 if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM)
41967 return 1;
41969 return 0;
41972 /* Return 1 iff INSN is a store insn, including permuting stores that
41973 represent an stvxd2x instruction; else return 0. */
41974 static unsigned int
41975 insn_is_store_p (rtx insn)
41977 rtx body = PATTERN (insn);
41978 if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM)
41979 return 1;
41980 if (GET_CODE (body) != PARALLEL)
41981 return 0;
41982 rtx set = XVECEXP (body, 0, 0);
41983 if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM)
41984 return 1;
41985 return 0;
41988 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
41989 a permuting load, or a permuting store. */
41990 static unsigned int
41991 insn_is_swap_p (rtx insn)
41993 rtx body = PATTERN (insn);
41994 if (GET_CODE (body) != SET)
41995 return 0;
41996 rtx rhs = SET_SRC (body);
41997 if (GET_CODE (rhs) != VEC_SELECT)
41998 return 0;
41999 rtx parallel = XEXP (rhs, 1);
42000 if (GET_CODE (parallel) != PARALLEL)
42001 return 0;
42002 unsigned int len = XVECLEN (parallel, 0);
42003 if (len != 2 && len != 4 && len != 8 && len != 16)
42004 return 0;
42005 for (unsigned int i = 0; i < len / 2; ++i)
42007 rtx op = XVECEXP (parallel, 0, i);
42008 if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i)
42009 return 0;
42011 for (unsigned int i = len / 2; i < len; ++i)
42013 rtx op = XVECEXP (parallel, 0, i);
42014 if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2)
42015 return 0;
42017 return 1;
42020 /* Return TRUE if insn is a swap fed by a load from the constant pool. */
42021 static bool
42022 const_load_sequence_p (swap_web_entry *insn_entry, rtx insn)
42024 unsigned uid = INSN_UID (insn);
42025 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load)
42026 return false;
42028 /* Find the unique use in the swap and locate its def. If the def
42029 isn't unique, punt. */
42030 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
42031 df_ref use;
42032 FOR_EACH_INSN_INFO_USE (use, insn_info)
42034 struct df_link *def_link = DF_REF_CHAIN (use);
42035 if (!def_link || def_link->next)
42036 return false;
42038 rtx def_insn = DF_REF_INSN (def_link->ref);
42039 unsigned uid2 = INSN_UID (def_insn);
42040 if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap)
42041 return false;
42043 rtx body = PATTERN (def_insn);
42044 if (GET_CODE (body) != SET
42045 || GET_CODE (SET_SRC (body)) != VEC_SELECT
42046 || GET_CODE (XEXP (SET_SRC (body), 0)) != MEM)
42047 return false;
42049 rtx mem = XEXP (SET_SRC (body), 0);
42050 rtx base_reg = XEXP (mem, 0);
42052 df_ref base_use;
42053 insn_info = DF_INSN_INFO_GET (def_insn);
42054 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
42056 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
42057 continue;
42059 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
42060 if (!base_def_link || base_def_link->next)
42061 return false;
42063 rtx tocrel_insn = DF_REF_INSN (base_def_link->ref);
42064 rtx tocrel_body = PATTERN (tocrel_insn);
42065 rtx base, offset;
42066 if (GET_CODE (tocrel_body) != SET)
42067 return false;
42068 /* There is an extra level of indirection for small/large
42069 code models. */
42070 rtx tocrel_expr = SET_SRC (tocrel_body);
42071 if (GET_CODE (tocrel_expr) == MEM)
42072 tocrel_expr = XEXP (tocrel_expr, 0);
42073 if (!toc_relative_expr_p (tocrel_expr, false))
42074 return false;
42075 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
42076 if (GET_CODE (base) != SYMBOL_REF || !CONSTANT_POOL_ADDRESS_P (base))
42077 return false;
42080 return true;
42083 /* Return TRUE iff OP matches a V2DF reduction pattern. See the
42084 definition of vsx_reduc_<VEC_reduc_name>_v2df in vsx.md. */
42085 static bool
42086 v2df_reduction_p (rtx op)
42088 if (GET_MODE (op) != V2DFmode)
42089 return false;
42091 enum rtx_code code = GET_CODE (op);
42092 if (code != PLUS && code != SMIN && code != SMAX)
42093 return false;
42095 rtx concat = XEXP (op, 0);
42096 if (GET_CODE (concat) != VEC_CONCAT)
42097 return false;
42099 rtx select0 = XEXP (concat, 0);
42100 rtx select1 = XEXP (concat, 1);
42101 if (GET_CODE (select0) != VEC_SELECT || GET_CODE (select1) != VEC_SELECT)
42102 return false;
42104 rtx reg0 = XEXP (select0, 0);
42105 rtx reg1 = XEXP (select1, 0);
42106 if (!rtx_equal_p (reg0, reg1) || !REG_P (reg0))
42107 return false;
42109 rtx parallel0 = XEXP (select0, 1);
42110 rtx parallel1 = XEXP (select1, 1);
42111 if (GET_CODE (parallel0) != PARALLEL || GET_CODE (parallel1) != PARALLEL)
42112 return false;
42114 if (!rtx_equal_p (XVECEXP (parallel0, 0, 0), const1_rtx)
42115 || !rtx_equal_p (XVECEXP (parallel1, 0, 0), const0_rtx))
42116 return false;
42118 return true;
42121 /* Return 1 iff OP is an operand that will not be affected by having
42122 vector doublewords swapped in memory. */
42123 static unsigned int
42124 rtx_is_swappable_p (rtx op, unsigned int *special)
42126 enum rtx_code code = GET_CODE (op);
42127 int i, j;
42128 rtx parallel;
42130 switch (code)
42132 case LABEL_REF:
42133 case SYMBOL_REF:
42134 case CLOBBER:
42135 case REG:
42136 return 1;
42138 case VEC_CONCAT:
42139 case ASM_INPUT:
42140 case ASM_OPERANDS:
42141 return 0;
42143 case CONST_VECTOR:
42145 *special = SH_CONST_VECTOR;
42146 return 1;
42149 case VEC_DUPLICATE:
42150 /* Opportunity: If XEXP (op, 0) has the same mode as the result,
42151 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
42152 it represents a vector splat for which we can do special
42153 handling. */
42154 if (GET_CODE (XEXP (op, 0)) == CONST_INT)
42155 return 1;
42156 else if (REG_P (XEXP (op, 0))
42157 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
42158 /* This catches V2DF and V2DI splat, at a minimum. */
42159 return 1;
42160 else if (GET_CODE (XEXP (op, 0)) == TRUNCATE
42161 && REG_P (XEXP (XEXP (op, 0), 0))
42162 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
42163 /* This catches splat of a truncated value. */
42164 return 1;
42165 else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
42166 /* If the duplicated item is from a select, defer to the select
42167 processing to see if we can change the lane for the splat. */
42168 return rtx_is_swappable_p (XEXP (op, 0), special);
42169 else
42170 return 0;
42172 case VEC_SELECT:
42173 /* A vec_extract operation is ok if we change the lane. */
42174 if (GET_CODE (XEXP (op, 0)) == REG
42175 && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
42176 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
42177 && XVECLEN (parallel, 0) == 1
42178 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT)
42180 *special = SH_EXTRACT;
42181 return 1;
42183 /* An XXPERMDI is ok if we adjust the lanes. Note that if the
42184 XXPERMDI is a swap operation, it will be identified by
42185 insn_is_swap_p and therefore we won't get here. */
42186 else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT
42187 && (GET_MODE (XEXP (op, 0)) == V4DFmode
42188 || GET_MODE (XEXP (op, 0)) == V4DImode)
42189 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
42190 && XVECLEN (parallel, 0) == 2
42191 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT
42192 && GET_CODE (XVECEXP (parallel, 0, 1)) == CONST_INT)
42194 *special = SH_XXPERMDI;
42195 return 1;
42197 else if (v2df_reduction_p (op))
42198 return 1;
42199 else
42200 return 0;
42202 case UNSPEC:
42204 /* Various operations are unsafe for this optimization, at least
42205 without significant additional work. Permutes are obviously
42206 problematic, as both the permute control vector and the ordering
42207 of the target values are invalidated by doubleword swapping.
42208 Vector pack and unpack modify the number of vector lanes.
42209 Merge-high/low will not operate correctly on swapped operands.
42210 Vector shifts across element boundaries are clearly uncool,
42211 as are vector select and concatenate operations. Vector
42212 sum-across instructions define one operand with a specific
42213 order-dependent element, so additional fixup code would be
42214 needed to make those work. Vector set and non-immediate-form
42215 vector splat are element-order sensitive. A few of these
42216 cases might be workable with special handling if required.
42217 Adding cost modeling would be appropriate in some cases. */
42218 int val = XINT (op, 1);
42219 switch (val)
42221 default:
42222 break;
42223 case UNSPEC_VMRGH_DIRECT:
42224 case UNSPEC_VMRGL_DIRECT:
42225 case UNSPEC_VPACK_SIGN_SIGN_SAT:
42226 case UNSPEC_VPACK_SIGN_UNS_SAT:
42227 case UNSPEC_VPACK_UNS_UNS_MOD:
42228 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
42229 case UNSPEC_VPACK_UNS_UNS_SAT:
42230 case UNSPEC_VPERM:
42231 case UNSPEC_VPERM_UNS:
42232 case UNSPEC_VPERMHI:
42233 case UNSPEC_VPERMSI:
42234 case UNSPEC_VPKPX:
42235 case UNSPEC_VSLDOI:
42236 case UNSPEC_VSLO:
42237 case UNSPEC_VSRO:
42238 case UNSPEC_VSUM2SWS:
42239 case UNSPEC_VSUM4S:
42240 case UNSPEC_VSUM4UBS:
42241 case UNSPEC_VSUMSWS:
42242 case UNSPEC_VSUMSWS_DIRECT:
42243 case UNSPEC_VSX_CONCAT:
42244 case UNSPEC_VSX_SET:
42245 case UNSPEC_VSX_SLDWI:
42246 case UNSPEC_VUNPACK_HI_SIGN:
42247 case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
42248 case UNSPEC_VUNPACK_LO_SIGN:
42249 case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
42250 case UNSPEC_VUPKHPX:
42251 case UNSPEC_VUPKHS_V4SF:
42252 case UNSPEC_VUPKHU_V4SF:
42253 case UNSPEC_VUPKLPX:
42254 case UNSPEC_VUPKLS_V4SF:
42255 case UNSPEC_VUPKLU_V4SF:
42256 case UNSPEC_VSX_CVDPSPN:
42257 case UNSPEC_VSX_CVSPDP:
42258 case UNSPEC_VSX_CVSPDPN:
42259 case UNSPEC_VSX_EXTRACT:
42260 case UNSPEC_VSX_VSLO:
42261 case UNSPEC_VSX_VEC_INIT:
42262 return 0;
42263 case UNSPEC_VSPLT_DIRECT:
42264 case UNSPEC_VSX_XXSPLTD:
42265 *special = SH_SPLAT;
42266 return 1;
42267 case UNSPEC_REDUC_PLUS:
42268 case UNSPEC_REDUC:
42269 return 1;
42273 default:
42274 break;
42277 const char *fmt = GET_RTX_FORMAT (code);
42278 int ok = 1;
42280 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
42281 if (fmt[i] == 'e' || fmt[i] == 'u')
42283 unsigned int special_op = SH_NONE;
42284 ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
42285 if (special_op == SH_NONE)
42286 continue;
42287 /* Ensure we never have two kinds of special handling
42288 for the same insn. */
42289 if (*special != SH_NONE && *special != special_op)
42290 return 0;
42291 *special = special_op;
42293 else if (fmt[i] == 'E')
42294 for (j = 0; j < XVECLEN (op, i); ++j)
42296 unsigned int special_op = SH_NONE;
42297 ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
42298 if (special_op == SH_NONE)
42299 continue;
42300 /* Ensure we never have two kinds of special handling
42301 for the same insn. */
42302 if (*special != SH_NONE && *special != special_op)
42303 return 0;
42304 *special = special_op;
42307 return ok;
42310 /* Return 1 iff INSN is an operand that will not be affected by
42311 having vector doublewords swapped in memory (in which case
42312 *SPECIAL is unchanged), or that can be modified to be correct
42313 if vector doublewords are swapped in memory (in which case
42314 *SPECIAL is changed to a value indicating how). */
42315 static unsigned int
42316 insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
42317 unsigned int *special)
42319 /* Calls are always bad. */
42320 if (GET_CODE (insn) == CALL_INSN)
42321 return 0;
42323 /* Loads and stores seen here are not permuting, but we can still
42324 fix them up by converting them to permuting ones. Exceptions:
42325 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
42326 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
42327 for the SET source. Also we must now make an exception for lvx
42328 and stvx when they are not in the UNSPEC_LVX/STVX form (with the
42329 explicit "& -16") since this leads to unrecognizable insns. */
42330 rtx body = PATTERN (insn);
42331 int i = INSN_UID (insn);
42333 if (insn_entry[i].is_load)
42335 if (GET_CODE (body) == SET)
42337 rtx rhs = SET_SRC (body);
42338 /* Even without a swap, the RHS might be a vec_select for, say,
42339 a byte-reversing load. */
42340 if (GET_CODE (rhs) != MEM)
42341 return 0;
42342 if (GET_CODE (XEXP (rhs, 0)) == AND)
42343 return 0;
42345 *special = SH_NOSWAP_LD;
42346 return 1;
42348 else
42349 return 0;
42352 if (insn_entry[i].is_store)
42354 if (GET_CODE (body) == SET
42355 && GET_CODE (SET_SRC (body)) != UNSPEC)
42357 rtx lhs = SET_DEST (body);
42358 /* Even without a swap, the LHS might be a vec_select for, say,
42359 a byte-reversing store. */
42360 if (GET_CODE (lhs) != MEM)
42361 return 0;
42362 if (GET_CODE (XEXP (lhs, 0)) == AND)
42363 return 0;
42365 *special = SH_NOSWAP_ST;
42366 return 1;
42368 else
42369 return 0;
42372 /* A convert to single precision can be left as is provided that
42373 all of its uses are in xxspltw instructions that splat BE element
42374 zero. */
42375 if (GET_CODE (body) == SET
42376 && GET_CODE (SET_SRC (body)) == UNSPEC
42377 && XINT (SET_SRC (body), 1) == UNSPEC_VSX_CVDPSPN)
42379 df_ref def;
42380 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
42382 FOR_EACH_INSN_INFO_DEF (def, insn_info)
42384 struct df_link *link = DF_REF_CHAIN (def);
42385 if (!link)
42386 return 0;
42388 for (; link; link = link->next) {
42389 rtx use_insn = DF_REF_INSN (link->ref);
42390 rtx use_body = PATTERN (use_insn);
42391 if (GET_CODE (use_body) != SET
42392 || GET_CODE (SET_SRC (use_body)) != UNSPEC
42393 || XINT (SET_SRC (use_body), 1) != UNSPEC_VSX_XXSPLTW
42394 || XVECEXP (SET_SRC (use_body), 0, 1) != const0_rtx)
42395 return 0;
42399 return 1;
42402 /* A concatenation of two doublewords is ok if we reverse the
42403 order of the inputs. */
42404 if (GET_CODE (body) == SET
42405 && GET_CODE (SET_SRC (body)) == VEC_CONCAT
42406 && (GET_MODE (SET_SRC (body)) == V2DFmode
42407 || GET_MODE (SET_SRC (body)) == V2DImode))
42409 *special = SH_CONCAT;
42410 return 1;
42413 /* V2DF reductions are always swappable. */
42414 if (GET_CODE (body) == PARALLEL)
42416 rtx expr = XVECEXP (body, 0, 0);
42417 if (GET_CODE (expr) == SET
42418 && v2df_reduction_p (SET_SRC (expr)))
42419 return 1;
42422 /* An UNSPEC_VPERM is ok if the mask operand is loaded from the
42423 constant pool. */
42424 if (GET_CODE (body) == SET
42425 && GET_CODE (SET_SRC (body)) == UNSPEC
42426 && XINT (SET_SRC (body), 1) == UNSPEC_VPERM
42427 && XVECLEN (SET_SRC (body), 0) == 3
42428 && GET_CODE (XVECEXP (SET_SRC (body), 0, 2)) == REG)
42430 rtx mask_reg = XVECEXP (SET_SRC (body), 0, 2);
42431 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
42432 df_ref use;
42433 FOR_EACH_INSN_INFO_USE (use, insn_info)
42434 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
42436 struct df_link *def_link = DF_REF_CHAIN (use);
42437 /* Punt if multiple definitions for this reg. */
42438 if (def_link && !def_link->next &&
42439 const_load_sequence_p (insn_entry,
42440 DF_REF_INSN (def_link->ref)))
42442 *special = SH_VPERM;
42443 return 1;
42448 /* Otherwise check the operands for vector lane violations. */
42449 return rtx_is_swappable_p (body, special);
42452 enum chain_purpose { FOR_LOADS, FOR_STORES };
42454 /* Return true if the UD or DU chain headed by LINK is non-empty,
42455 and every entry on the chain references an insn that is a
42456 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
42457 register swap must have only permuting loads as reaching defs.
42458 If PURPOSE is FOR_STORES, each such register swap must have only
42459 register swaps or permuting stores as reached uses. */
42460 static bool
42461 chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
42462 enum chain_purpose purpose)
42464 if (!link)
42465 return false;
42467 for (; link; link = link->next)
42469 if (!ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (DF_REF_REG (link->ref))))
42470 continue;
42472 if (DF_REF_IS_ARTIFICIAL (link->ref))
42473 return false;
42475 rtx reached_insn = DF_REF_INSN (link->ref);
42476 unsigned uid = INSN_UID (reached_insn);
42477 struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn);
42479 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
42480 || insn_entry[uid].is_store)
42481 return false;
42483 if (purpose == FOR_LOADS)
42485 df_ref use;
42486 FOR_EACH_INSN_INFO_USE (use, insn_info)
42488 struct df_link *swap_link = DF_REF_CHAIN (use);
42490 while (swap_link)
42492 if (DF_REF_IS_ARTIFICIAL (link->ref))
42493 return false;
42495 rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
42496 unsigned uid2 = INSN_UID (swap_def_insn);
42498 /* Only permuting loads are allowed. */
42499 if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
42500 return false;
42502 swap_link = swap_link->next;
42506 else if (purpose == FOR_STORES)
42508 df_ref def;
42509 FOR_EACH_INSN_INFO_DEF (def, insn_info)
42511 struct df_link *swap_link = DF_REF_CHAIN (def);
42513 while (swap_link)
42515 if (DF_REF_IS_ARTIFICIAL (link->ref))
42516 return false;
42518 rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
42519 unsigned uid2 = INSN_UID (swap_use_insn);
42521 /* Permuting stores or register swaps are allowed. */
42522 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
42523 return false;
42525 swap_link = swap_link->next;
42531 return true;
42534 /* Mark the xxswapdi instructions associated with permuting loads and
42535 stores for removal. Note that we only flag them for deletion here,
42536 as there is a possibility of a swap being reached from multiple
42537 loads, etc. */
42538 static void
42539 mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
42541 rtx insn = insn_entry[i].insn;
42542 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
42544 if (insn_entry[i].is_load)
42546 df_ref def;
42547 FOR_EACH_INSN_INFO_DEF (def, insn_info)
42549 struct df_link *link = DF_REF_CHAIN (def);
42551 /* We know by now that these are swaps, so we can delete
42552 them confidently. */
42553 while (link)
42555 rtx use_insn = DF_REF_INSN (link->ref);
42556 insn_entry[INSN_UID (use_insn)].will_delete = 1;
42557 link = link->next;
42561 else if (insn_entry[i].is_store)
42563 df_ref use;
42564 FOR_EACH_INSN_INFO_USE (use, insn_info)
42566 /* Ignore uses for addressability. */
42567 machine_mode mode = GET_MODE (DF_REF_REG (use));
42568 if (!ALTIVEC_OR_VSX_VECTOR_MODE (mode))
42569 continue;
42571 struct df_link *link = DF_REF_CHAIN (use);
42573 /* We know by now that these are swaps, so we can delete
42574 them confidently. */
42575 while (link)
42577 rtx def_insn = DF_REF_INSN (link->ref);
42578 insn_entry[INSN_UID (def_insn)].will_delete = 1;
42579 link = link->next;
42585 /* OP is either a CONST_VECTOR or an expression containing one.
42586 Swap the first half of the vector with the second in the first
42587 case. Recurse to find it in the second. */
42588 static void
42589 swap_const_vector_halves (rtx op)
42591 int i;
42592 enum rtx_code code = GET_CODE (op);
42593 if (GET_CODE (op) == CONST_VECTOR)
42595 int half_units = GET_MODE_NUNITS (GET_MODE (op)) / 2;
42596 for (i = 0; i < half_units; ++i)
42598 rtx temp = CONST_VECTOR_ELT (op, i);
42599 CONST_VECTOR_ELT (op, i) = CONST_VECTOR_ELT (op, i + half_units);
42600 CONST_VECTOR_ELT (op, i + half_units) = temp;
42603 else
42605 int j;
42606 const char *fmt = GET_RTX_FORMAT (code);
42607 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
42608 if (fmt[i] == 'e' || fmt[i] == 'u')
42609 swap_const_vector_halves (XEXP (op, i));
42610 else if (fmt[i] == 'E')
42611 for (j = 0; j < XVECLEN (op, i); ++j)
42612 swap_const_vector_halves (XVECEXP (op, i, j));
42616 /* Find all subregs of a vector expression that perform a narrowing,
42617 and adjust the subreg index to account for doubleword swapping. */
42618 static void
42619 adjust_subreg_index (rtx op)
42621 enum rtx_code code = GET_CODE (op);
42622 if (code == SUBREG
42623 && (GET_MODE_SIZE (GET_MODE (op))
42624 < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
42626 unsigned int index = SUBREG_BYTE (op);
42627 if (index < 8)
42628 index += 8;
42629 else
42630 index -= 8;
42631 SUBREG_BYTE (op) = index;
42634 const char *fmt = GET_RTX_FORMAT (code);
42635 int i,j;
42636 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
42637 if (fmt[i] == 'e' || fmt[i] == 'u')
42638 adjust_subreg_index (XEXP (op, i));
42639 else if (fmt[i] == 'E')
42640 for (j = 0; j < XVECLEN (op, i); ++j)
42641 adjust_subreg_index (XVECEXP (op, i, j));
42644 /* Convert the non-permuting load INSN to a permuting one. */
42645 static void
42646 permute_load (rtx_insn *insn)
42648 rtx body = PATTERN (insn);
42649 rtx mem_op = SET_SRC (body);
42650 rtx tgt_reg = SET_DEST (body);
42651 machine_mode mode = GET_MODE (tgt_reg);
42652 int n_elts = GET_MODE_NUNITS (mode);
42653 int half_elts = n_elts / 2;
42654 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
42655 int i, j;
42656 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
42657 XVECEXP (par, 0, i) = GEN_INT (j);
42658 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
42659 XVECEXP (par, 0, i) = GEN_INT (j);
42660 rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
42661 SET_SRC (body) = sel;
42662 INSN_CODE (insn) = -1; /* Force re-recognition. */
42663 df_insn_rescan (insn);
42665 if (dump_file)
42666 fprintf (dump_file, "Replacing load %d with permuted load\n",
42667 INSN_UID (insn));
42670 /* Convert the non-permuting store INSN to a permuting one. */
42671 static void
42672 permute_store (rtx_insn *insn)
42674 rtx body = PATTERN (insn);
42675 rtx src_reg = SET_SRC (body);
42676 machine_mode mode = GET_MODE (src_reg);
42677 int n_elts = GET_MODE_NUNITS (mode);
42678 int half_elts = n_elts / 2;
42679 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
42680 int i, j;
42681 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
42682 XVECEXP (par, 0, i) = GEN_INT (j);
42683 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
42684 XVECEXP (par, 0, i) = GEN_INT (j);
42685 rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
42686 SET_SRC (body) = sel;
42687 INSN_CODE (insn) = -1; /* Force re-recognition. */
42688 df_insn_rescan (insn);
42690 if (dump_file)
42691 fprintf (dump_file, "Replacing store %d with permuted store\n",
42692 INSN_UID (insn));
42695 /* Given OP that contains a vector extract operation, adjust the index
42696 of the extracted lane to account for the doubleword swap. */
42697 static void
42698 adjust_extract (rtx_insn *insn)
42700 rtx pattern = PATTERN (insn);
42701 if (GET_CODE (pattern) == PARALLEL)
42702 pattern = XVECEXP (pattern, 0, 0);
42703 rtx src = SET_SRC (pattern);
42704 /* The vec_select may be wrapped in a vec_duplicate for a splat, so
42705 account for that. */
42706 rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
42707 rtx par = XEXP (sel, 1);
42708 int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
42709 int lane = INTVAL (XVECEXP (par, 0, 0));
42710 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
42711 XVECEXP (par, 0, 0) = GEN_INT (lane);
42712 INSN_CODE (insn) = -1; /* Force re-recognition. */
42713 df_insn_rescan (insn);
42715 if (dump_file)
42716 fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
42719 /* Given OP that contains a vector direct-splat operation, adjust the index
42720 of the source lane to account for the doubleword swap. */
42721 static void
42722 adjust_splat (rtx_insn *insn)
42724 rtx body = PATTERN (insn);
42725 rtx unspec = XEXP (body, 1);
42726 int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
42727 int lane = INTVAL (XVECEXP (unspec, 0, 1));
42728 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
42729 XVECEXP (unspec, 0, 1) = GEN_INT (lane);
42730 INSN_CODE (insn) = -1; /* Force re-recognition. */
42731 df_insn_rescan (insn);
42733 if (dump_file)
42734 fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
42737 /* Given OP that contains an XXPERMDI operation (that is not a doubleword
42738 swap), reverse the order of the source operands and adjust the indices
42739 of the source lanes to account for doubleword reversal. */
42740 static void
42741 adjust_xxpermdi (rtx_insn *insn)
42743 rtx set = PATTERN (insn);
42744 rtx select = XEXP (set, 1);
42745 rtx concat = XEXP (select, 0);
42746 rtx src0 = XEXP (concat, 0);
42747 XEXP (concat, 0) = XEXP (concat, 1);
42748 XEXP (concat, 1) = src0;
42749 rtx parallel = XEXP (select, 1);
42750 int lane0 = INTVAL (XVECEXP (parallel, 0, 0));
42751 int lane1 = INTVAL (XVECEXP (parallel, 0, 1));
42752 int new_lane0 = 3 - lane1;
42753 int new_lane1 = 3 - lane0;
42754 XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0);
42755 XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1);
42756 INSN_CODE (insn) = -1; /* Force re-recognition. */
42757 df_insn_rescan (insn);
42759 if (dump_file)
42760 fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn));
42763 /* Given OP that contains a VEC_CONCAT operation of two doublewords,
42764 reverse the order of those inputs. */
42765 static void
42766 adjust_concat (rtx_insn *insn)
42768 rtx set = PATTERN (insn);
42769 rtx concat = XEXP (set, 1);
42770 rtx src0 = XEXP (concat, 0);
42771 XEXP (concat, 0) = XEXP (concat, 1);
42772 XEXP (concat, 1) = src0;
42773 INSN_CODE (insn) = -1; /* Force re-recognition. */
42774 df_insn_rescan (insn);
42776 if (dump_file)
42777 fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn));
42780 /* Given an UNSPEC_VPERM insn, modify the mask loaded from the
42781 constant pool to reflect swapped doublewords. */
42782 static void
42783 adjust_vperm (rtx_insn *insn)
42785 /* We previously determined that the UNSPEC_VPERM was fed by a
42786 swap of a swapping load of a TOC-relative constant pool symbol.
42787 Find the MEM in the swapping load and replace it with a MEM for
42788 the adjusted mask constant. */
42789 rtx set = PATTERN (insn);
42790 rtx mask_reg = XVECEXP (SET_SRC (set), 0, 2);
42792 /* Find the swap. */
42793 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
42794 df_ref use;
42795 rtx_insn *swap_insn = 0;
42796 FOR_EACH_INSN_INFO_USE (use, insn_info)
42797 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
42799 struct df_link *def_link = DF_REF_CHAIN (use);
42800 gcc_assert (def_link && !def_link->next);
42801 swap_insn = DF_REF_INSN (def_link->ref);
42802 break;
42804 gcc_assert (swap_insn);
42806 /* Find the load. */
42807 insn_info = DF_INSN_INFO_GET (swap_insn);
42808 rtx_insn *load_insn = 0;
42809 FOR_EACH_INSN_INFO_USE (use, insn_info)
42811 struct df_link *def_link = DF_REF_CHAIN (use);
42812 gcc_assert (def_link && !def_link->next);
42813 load_insn = DF_REF_INSN (def_link->ref);
42814 break;
42816 gcc_assert (load_insn);
42818 /* Find the TOC-relative symbol access. */
42819 insn_info = DF_INSN_INFO_GET (load_insn);
42820 rtx_insn *tocrel_insn = 0;
42821 FOR_EACH_INSN_INFO_USE (use, insn_info)
42823 struct df_link *def_link = DF_REF_CHAIN (use);
42824 gcc_assert (def_link && !def_link->next);
42825 tocrel_insn = DF_REF_INSN (def_link->ref);
42826 break;
42828 gcc_assert (tocrel_insn);
42830 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p
42831 to set tocrel_base; otherwise it would be unnecessary as we've
42832 already established it will return true. */
42833 rtx base, offset;
42834 rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn));
42835 /* There is an extra level of indirection for small/large code models. */
42836 if (GET_CODE (tocrel_expr) == MEM)
42837 tocrel_expr = XEXP (tocrel_expr, 0);
42838 if (!toc_relative_expr_p (tocrel_expr, false))
42839 gcc_unreachable ();
42840 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
42841 rtx const_vector = get_pool_constant (base);
42842 /* With the extra indirection, get_pool_constant will produce the
42843 real constant from the reg_equal expression, so get the real
42844 constant. */
42845 if (GET_CODE (const_vector) == SYMBOL_REF)
42846 const_vector = get_pool_constant (const_vector);
42847 gcc_assert (GET_CODE (const_vector) == CONST_VECTOR);
42849 /* Create an adjusted mask from the initial mask. */
42850 unsigned int new_mask[16], i, val;
42851 for (i = 0; i < 16; ++i) {
42852 val = INTVAL (XVECEXP (const_vector, 0, i));
42853 if (val < 16)
42854 new_mask[i] = (val + 8) % 16;
42855 else
42856 new_mask[i] = ((val + 8) % 16) + 16;
42859 /* Create a new CONST_VECTOR and a MEM that references it. */
42860 rtx vals = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
42861 for (i = 0; i < 16; ++i)
42862 XVECEXP (vals, 0, i) = GEN_INT (new_mask[i]);
42863 rtx new_const_vector = gen_rtx_CONST_VECTOR (V16QImode, XVEC (vals, 0));
42864 rtx new_mem = force_const_mem (V16QImode, new_const_vector);
42865 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
42866 can't recognize. Force the SYMBOL_REF into a register. */
42867 if (!REG_P (XEXP (new_mem, 0))) {
42868 rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0));
42869 XEXP (new_mem, 0) = base_reg;
42870 /* Move the newly created insn ahead of the load insn. */
42871 rtx_insn *force_insn = get_last_insn ();
42872 remove_insn (force_insn);
42873 rtx_insn *before_load_insn = PREV_INSN (load_insn);
42874 add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn));
42875 df_insn_rescan (before_load_insn);
42876 df_insn_rescan (force_insn);
42879 /* Replace the MEM in the load instruction and rescan it. */
42880 XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem;
42881 INSN_CODE (load_insn) = -1; /* Force re-recognition. */
42882 df_insn_rescan (load_insn);
42884 if (dump_file)
42885 fprintf (dump_file, "Adjusting mask for vperm %d\n", INSN_UID (insn));
42888 /* The insn described by INSN_ENTRY[I] can be swapped, but only
42889 with special handling. Take care of that here. */
42890 static void
42891 handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
42893 rtx_insn *insn = insn_entry[i].insn;
42894 rtx body = PATTERN (insn);
42896 switch (insn_entry[i].special_handling)
42898 default:
42899 gcc_unreachable ();
42900 case SH_CONST_VECTOR:
42902 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
42903 gcc_assert (GET_CODE (body) == SET);
42904 rtx rhs = SET_SRC (body);
42905 swap_const_vector_halves (rhs);
42906 if (dump_file)
42907 fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
42908 break;
42910 case SH_SUBREG:
42911 /* A subreg of the same size is already safe. For subregs that
42912 select a smaller portion of a reg, adjust the index for
42913 swapped doublewords. */
42914 adjust_subreg_index (body);
42915 if (dump_file)
42916 fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
42917 break;
42918 case SH_NOSWAP_LD:
42919 /* Convert a non-permuting load to a permuting one. */
42920 permute_load (insn);
42921 break;
42922 case SH_NOSWAP_ST:
42923 /* Convert a non-permuting store to a permuting one. */
42924 permute_store (insn);
42925 break;
42926 case SH_EXTRACT:
42927 /* Change the lane on an extract operation. */
42928 adjust_extract (insn);
42929 break;
42930 case SH_SPLAT:
42931 /* Change the lane on a direct-splat operation. */
42932 adjust_splat (insn);
42933 break;
42934 case SH_XXPERMDI:
42935 /* Change the lanes on an XXPERMDI operation. */
42936 adjust_xxpermdi (insn);
42937 break;
42938 case SH_CONCAT:
42939 /* Reverse the order of a concatenation operation. */
42940 adjust_concat (insn);
42941 break;
42942 case SH_VPERM:
42943 /* Change the mask loaded from the constant pool for a VPERM. */
42944 adjust_vperm (insn);
42945 break;
42949 /* Find the insn from the Ith table entry, which is known to be a
42950 register swap Y = SWAP(X). Replace it with a copy Y = X. */
42951 static void
42952 replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
42954 rtx_insn *insn = insn_entry[i].insn;
42955 rtx body = PATTERN (insn);
42956 rtx src_reg = XEXP (SET_SRC (body), 0);
42957 rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
42958 rtx_insn *new_insn = emit_insn_before (copy, insn);
42959 set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
42960 df_insn_rescan (new_insn);
42962 if (dump_file)
42964 unsigned int new_uid = INSN_UID (new_insn);
42965 fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
42968 df_insn_delete (insn);
42969 remove_insn (insn);
42970 insn->set_deleted ();
42973 /* Dump the swap table to DUMP_FILE. */
42974 static void
42975 dump_swap_insn_table (swap_web_entry *insn_entry)
42977 int e = get_max_uid ();
42978 fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
42980 for (int i = 0; i < e; ++i)
42981 if (insn_entry[i].is_relevant)
42983 swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
42984 fprintf (dump_file, "%6d %6d ", i,
42985 pred_entry && pred_entry->insn
42986 ? INSN_UID (pred_entry->insn) : 0);
42987 if (insn_entry[i].is_load)
42988 fputs ("load ", dump_file);
42989 if (insn_entry[i].is_store)
42990 fputs ("store ", dump_file);
42991 if (insn_entry[i].is_swap)
42992 fputs ("swap ", dump_file);
42993 if (insn_entry[i].is_live_in)
42994 fputs ("live-in ", dump_file);
42995 if (insn_entry[i].is_live_out)
42996 fputs ("live-out ", dump_file);
42997 if (insn_entry[i].contains_subreg)
42998 fputs ("subreg ", dump_file);
42999 if (insn_entry[i].is_128_int)
43000 fputs ("int128 ", dump_file);
43001 if (insn_entry[i].is_call)
43002 fputs ("call ", dump_file);
43003 if (insn_entry[i].is_swappable)
43005 fputs ("swappable ", dump_file);
43006 if (insn_entry[i].special_handling == SH_CONST_VECTOR)
43007 fputs ("special:constvec ", dump_file);
43008 else if (insn_entry[i].special_handling == SH_SUBREG)
43009 fputs ("special:subreg ", dump_file);
43010 else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
43011 fputs ("special:load ", dump_file);
43012 else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
43013 fputs ("special:store ", dump_file);
43014 else if (insn_entry[i].special_handling == SH_EXTRACT)
43015 fputs ("special:extract ", dump_file);
43016 else if (insn_entry[i].special_handling == SH_SPLAT)
43017 fputs ("special:splat ", dump_file);
43018 else if (insn_entry[i].special_handling == SH_XXPERMDI)
43019 fputs ("special:xxpermdi ", dump_file);
43020 else if (insn_entry[i].special_handling == SH_CONCAT)
43021 fputs ("special:concat ", dump_file);
43022 else if (insn_entry[i].special_handling == SH_VPERM)
43023 fputs ("special:vperm ", dump_file);
43025 if (insn_entry[i].web_not_optimizable)
43026 fputs ("unoptimizable ", dump_file);
43027 if (insn_entry[i].will_delete)
43028 fputs ("delete ", dump_file);
43029 fputs ("\n", dump_file);
43031 fputs ("\n", dump_file);
43034 /* Return RTX with its address canonicalized to (reg) or (+ reg reg).
43035 Here RTX is an (& addr (const_int -16)). Always return a new copy
43036 to avoid problems with combine. */
43037 static rtx
43038 alignment_with_canonical_addr (rtx align)
43040 rtx canon;
43041 rtx addr = XEXP (align, 0);
43043 if (REG_P (addr))
43044 canon = addr;
43046 else if (GET_CODE (addr) == PLUS)
43048 rtx addrop0 = XEXP (addr, 0);
43049 rtx addrop1 = XEXP (addr, 1);
43051 if (!REG_P (addrop0))
43052 addrop0 = force_reg (GET_MODE (addrop0), addrop0);
43054 if (!REG_P (addrop1))
43055 addrop1 = force_reg (GET_MODE (addrop1), addrop1);
43057 canon = gen_rtx_PLUS (GET_MODE (addr), addrop0, addrop1);
43060 else
43061 canon = force_reg (GET_MODE (addr), addr);
43063 return gen_rtx_AND (GET_MODE (align), canon, GEN_INT (-16));
43066 /* Check whether an rtx is an alignment mask, and if so, return
43067 a fully-expanded rtx for the masking operation. */
43068 static rtx
43069 alignment_mask (rtx_insn *insn)
43071 rtx body = PATTERN (insn);
43073 if (GET_CODE (body) != SET
43074 || GET_CODE (SET_SRC (body)) != AND
43075 || !REG_P (XEXP (SET_SRC (body), 0)))
43076 return 0;
43078 rtx mask = XEXP (SET_SRC (body), 1);
43080 if (GET_CODE (mask) == CONST_INT)
43082 if (INTVAL (mask) == -16)
43083 return alignment_with_canonical_addr (SET_SRC (body));
43084 else
43085 return 0;
43088 if (!REG_P (mask))
43089 return 0;
43091 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
43092 df_ref use;
43093 rtx real_mask = 0;
43095 FOR_EACH_INSN_INFO_USE (use, insn_info)
43097 if (!rtx_equal_p (DF_REF_REG (use), mask))
43098 continue;
43100 struct df_link *def_link = DF_REF_CHAIN (use);
43101 if (!def_link || def_link->next)
43102 return 0;
43104 rtx_insn *const_insn = DF_REF_INSN (def_link->ref);
43105 rtx const_body = PATTERN (const_insn);
43106 if (GET_CODE (const_body) != SET)
43107 return 0;
43109 real_mask = SET_SRC (const_body);
43111 if (GET_CODE (real_mask) != CONST_INT
43112 || INTVAL (real_mask) != -16)
43113 return 0;
43116 if (real_mask == 0)
43117 return 0;
43119 return alignment_with_canonical_addr (SET_SRC (body));
43122 /* Given INSN that's a load or store based at BASE_REG, look for a
43123 feeding computation that aligns its address on a 16-byte boundary. */
43124 static rtx
43125 find_alignment_op (rtx_insn *insn, rtx base_reg)
43127 df_ref base_use;
43128 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
43129 rtx and_operation = 0;
43131 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
43133 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
43134 continue;
43136 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
43137 if (!base_def_link || base_def_link->next)
43138 break;
43140 /* With stack-protector code enabled, and possibly in other
43141 circumstances, there may not be an associated insn for
43142 the def. */
43143 if (DF_REF_IS_ARTIFICIAL (base_def_link->ref))
43144 break;
43146 rtx_insn *and_insn = DF_REF_INSN (base_def_link->ref);
43147 and_operation = alignment_mask (and_insn);
43148 if (and_operation != 0)
43149 break;
43152 return and_operation;
43155 struct del_info { bool replace; rtx_insn *replace_insn; };
43157 /* If INSN is the load for an lvx pattern, put it in canonical form. */
43158 static void
43159 recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete)
43161 rtx body = PATTERN (insn);
43162 gcc_assert (GET_CODE (body) == SET
43163 && GET_CODE (SET_SRC (body)) == VEC_SELECT
43164 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM);
43166 rtx mem = XEXP (SET_SRC (body), 0);
43167 rtx base_reg = XEXP (mem, 0);
43169 rtx and_operation = find_alignment_op (insn, base_reg);
43171 if (and_operation != 0)
43173 df_ref def;
43174 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
43175 FOR_EACH_INSN_INFO_DEF (def, insn_info)
43177 struct df_link *link = DF_REF_CHAIN (def);
43178 if (!link || link->next)
43179 break;
43181 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
43182 if (!insn_is_swap_p (swap_insn)
43183 || insn_is_load_p (swap_insn)
43184 || insn_is_store_p (swap_insn))
43185 break;
43187 /* Expected lvx pattern found. Change the swap to
43188 a copy, and propagate the AND operation into the
43189 load. */
43190 to_delete[INSN_UID (swap_insn)].replace = true;
43191 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
43193 XEXP (mem, 0) = and_operation;
43194 SET_SRC (body) = mem;
43195 INSN_CODE (insn) = -1; /* Force re-recognition. */
43196 df_insn_rescan (insn);
43198 if (dump_file)
43199 fprintf (dump_file, "lvx opportunity found at %d\n",
43200 INSN_UID (insn));
43205 /* If INSN is the store for an stvx pattern, put it in canonical form. */
43206 static void
43207 recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete)
43209 rtx body = PATTERN (insn);
43210 gcc_assert (GET_CODE (body) == SET
43211 && GET_CODE (SET_DEST (body)) == MEM
43212 && GET_CODE (SET_SRC (body)) == VEC_SELECT);
43213 rtx mem = SET_DEST (body);
43214 rtx base_reg = XEXP (mem, 0);
43216 rtx and_operation = find_alignment_op (insn, base_reg);
43218 if (and_operation != 0)
43220 rtx src_reg = XEXP (SET_SRC (body), 0);
43221 df_ref src_use;
43222 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
43223 FOR_EACH_INSN_INFO_USE (src_use, insn_info)
43225 if (!rtx_equal_p (DF_REF_REG (src_use), src_reg))
43226 continue;
43228 struct df_link *link = DF_REF_CHAIN (src_use);
43229 if (!link || link->next)
43230 break;
43232 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
43233 if (!insn_is_swap_p (swap_insn)
43234 || insn_is_load_p (swap_insn)
43235 || insn_is_store_p (swap_insn))
43236 break;
43238 /* Expected stvx pattern found. Change the swap to
43239 a copy, and propagate the AND operation into the
43240 store. */
43241 to_delete[INSN_UID (swap_insn)].replace = true;
43242 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
43244 XEXP (mem, 0) = and_operation;
43245 SET_SRC (body) = src_reg;
43246 INSN_CODE (insn) = -1; /* Force re-recognition. */
43247 df_insn_rescan (insn);
43249 if (dump_file)
43250 fprintf (dump_file, "stvx opportunity found at %d\n",
43251 INSN_UID (insn));
43256 /* Look for patterns created from builtin lvx and stvx calls, and
43257 canonicalize them to be properly recognized as such. */
43258 static void
43259 recombine_lvx_stvx_patterns (function *fun)
43261 int i;
43262 basic_block bb;
43263 rtx_insn *insn;
43265 int num_insns = get_max_uid ();
43266 del_info *to_delete = XCNEWVEC (del_info, num_insns);
43268 FOR_ALL_BB_FN (bb, fun)
43269 FOR_BB_INSNS (bb, insn)
43271 if (!NONDEBUG_INSN_P (insn))
43272 continue;
43274 if (insn_is_load_p (insn) && insn_is_swap_p (insn))
43275 recombine_lvx_pattern (insn, to_delete);
43276 else if (insn_is_store_p (insn) && insn_is_swap_p (insn))
43277 recombine_stvx_pattern (insn, to_delete);
43280 /* Turning swaps into copies is delayed until now, to avoid problems
43281 with deleting instructions during the insn walk. */
43282 for (i = 0; i < num_insns; i++)
43283 if (to_delete[i].replace)
43285 rtx swap_body = PATTERN (to_delete[i].replace_insn);
43286 rtx src_reg = XEXP (SET_SRC (swap_body), 0);
43287 rtx copy = gen_rtx_SET (SET_DEST (swap_body), src_reg);
43288 rtx_insn *new_insn = emit_insn_before (copy,
43289 to_delete[i].replace_insn);
43290 set_block_for_insn (new_insn,
43291 BLOCK_FOR_INSN (to_delete[i].replace_insn));
43292 df_insn_rescan (new_insn);
43293 df_insn_delete (to_delete[i].replace_insn);
43294 remove_insn (to_delete[i].replace_insn);
43295 to_delete[i].replace_insn->set_deleted ();
43298 free (to_delete);
43301 /* Main entry point for this pass. */
43302 unsigned int
43303 rs6000_analyze_swaps (function *fun)
43305 swap_web_entry *insn_entry;
43306 basic_block bb;
43307 rtx_insn *insn, *curr_insn = 0;
43309 /* Dataflow analysis for use-def chains. */
43310 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
43311 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
43312 df_analyze ();
43313 df_set_flags (DF_DEFER_INSN_RESCAN);
43315 /* Pre-pass to recombine lvx and stvx patterns so we don't lose info. */
43316 recombine_lvx_stvx_patterns (fun);
43318 /* Allocate structure to represent webs of insns. */
43319 insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
43321 /* Walk the insns to gather basic data. */
43322 FOR_ALL_BB_FN (bb, fun)
43323 FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
43325 unsigned int uid = INSN_UID (insn);
43326 if (NONDEBUG_INSN_P (insn))
43328 insn_entry[uid].insn = insn;
43330 if (GET_CODE (insn) == CALL_INSN)
43331 insn_entry[uid].is_call = 1;
43333 /* Walk the uses and defs to see if we mention vector regs.
43334 Record any constraints on optimization of such mentions. */
43335 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
43336 df_ref mention;
43337 FOR_EACH_INSN_INFO_USE (mention, insn_info)
43339 /* We use DF_REF_REAL_REG here to get inside any subregs. */
43340 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
43342 /* If a use gets its value from a call insn, it will be
43343 a hard register and will look like (reg:V4SI 3 3).
43344 The df analysis creates two mentions for GPR3 and GPR4,
43345 both DImode. We must recognize this and treat it as a
43346 vector mention to ensure the call is unioned with this
43347 use. */
43348 if (mode == DImode && DF_REF_INSN_INFO (mention))
43350 rtx feeder = DF_REF_INSN (mention);
43351 /* FIXME: It is pretty hard to get from the df mention
43352 to the mode of the use in the insn. We arbitrarily
43353 pick a vector mode here, even though the use might
43354 be a real DImode. We can be too conservative
43355 (create a web larger than necessary) because of
43356 this, so consider eventually fixing this. */
43357 if (GET_CODE (feeder) == CALL_INSN)
43358 mode = V4SImode;
43361 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
43363 insn_entry[uid].is_relevant = 1;
43364 if (mode == TImode || mode == V1TImode
43365 || FLOAT128_VECTOR_P (mode))
43366 insn_entry[uid].is_128_int = 1;
43367 if (DF_REF_INSN_INFO (mention))
43368 insn_entry[uid].contains_subreg
43369 = !rtx_equal_p (DF_REF_REG (mention),
43370 DF_REF_REAL_REG (mention));
43371 union_defs (insn_entry, insn, mention);
43374 FOR_EACH_INSN_INFO_DEF (mention, insn_info)
43376 /* We use DF_REF_REAL_REG here to get inside any subregs. */
43377 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
43379 /* If we're loading up a hard vector register for a call,
43380 it looks like (set (reg:V4SI 9 9) (...)). The df
43381 analysis creates two mentions for GPR9 and GPR10, both
43382 DImode. So relying on the mode from the mentions
43383 isn't sufficient to ensure we union the call into the
43384 web with the parameter setup code. */
43385 if (mode == DImode && GET_CODE (insn) == SET
43386 && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn))))
43387 mode = GET_MODE (SET_DEST (insn));
43389 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
43391 insn_entry[uid].is_relevant = 1;
43392 if (mode == TImode || mode == V1TImode
43393 || FLOAT128_VECTOR_P (mode))
43394 insn_entry[uid].is_128_int = 1;
43395 if (DF_REF_INSN_INFO (mention))
43396 insn_entry[uid].contains_subreg
43397 = !rtx_equal_p (DF_REF_REG (mention),
43398 DF_REF_REAL_REG (mention));
43399 /* REG_FUNCTION_VALUE_P is not valid for subregs. */
43400 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
43401 insn_entry[uid].is_live_out = 1;
43402 union_uses (insn_entry, insn, mention);
43406 if (insn_entry[uid].is_relevant)
43408 /* Determine if this is a load or store. */
43409 insn_entry[uid].is_load = insn_is_load_p (insn);
43410 insn_entry[uid].is_store = insn_is_store_p (insn);
43412 /* Determine if this is a doubleword swap. If not,
43413 determine whether it can legally be swapped. */
43414 if (insn_is_swap_p (insn))
43415 insn_entry[uid].is_swap = 1;
43416 else
43418 unsigned int special = SH_NONE;
43419 insn_entry[uid].is_swappable
43420 = insn_is_swappable_p (insn_entry, insn, &special);
43421 if (special != SH_NONE && insn_entry[uid].contains_subreg)
43422 insn_entry[uid].is_swappable = 0;
43423 else if (special != SH_NONE)
43424 insn_entry[uid].special_handling = special;
43425 else if (insn_entry[uid].contains_subreg)
43426 insn_entry[uid].special_handling = SH_SUBREG;
43432 if (dump_file)
43434 fprintf (dump_file, "\nSwap insn entry table when first built\n");
43435 dump_swap_insn_table (insn_entry);
43438 /* Record unoptimizable webs. */
43439 unsigned e = get_max_uid (), i;
43440 for (i = 0; i < e; ++i)
43442 if (!insn_entry[i].is_relevant)
43443 continue;
43445 swap_web_entry *root
43446 = (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
43448 if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
43449 || (insn_entry[i].contains_subreg
43450 && insn_entry[i].special_handling != SH_SUBREG)
43451 || insn_entry[i].is_128_int || insn_entry[i].is_call
43452 || !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
43453 root->web_not_optimizable = 1;
43455 /* If we have loads or stores that aren't permuting then the
43456 optimization isn't appropriate. */
43457 else if ((insn_entry[i].is_load || insn_entry[i].is_store)
43458 && !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
43459 root->web_not_optimizable = 1;
43461 /* If we have permuting loads or stores that are not accompanied
43462 by a register swap, the optimization isn't appropriate. */
43463 else if (insn_entry[i].is_load && insn_entry[i].is_swap)
43465 rtx insn = insn_entry[i].insn;
43466 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
43467 df_ref def;
43469 FOR_EACH_INSN_INFO_DEF (def, insn_info)
43471 struct df_link *link = DF_REF_CHAIN (def);
43473 if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
43475 root->web_not_optimizable = 1;
43476 break;
43480 else if (insn_entry[i].is_store && insn_entry[i].is_swap)
43482 rtx insn = insn_entry[i].insn;
43483 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
43484 df_ref use;
43486 FOR_EACH_INSN_INFO_USE (use, insn_info)
43488 struct df_link *link = DF_REF_CHAIN (use);
43490 if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
43492 root->web_not_optimizable = 1;
43493 break;
43499 if (dump_file)
43501 fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
43502 dump_swap_insn_table (insn_entry);
43505 /* For each load and store in an optimizable web (which implies
43506 the loads and stores are permuting), find the associated
43507 register swaps and mark them for removal. Due to various
43508 optimizations we may mark the same swap more than once. Also
43509 perform special handling for swappable insns that require it. */
43510 for (i = 0; i < e; ++i)
43511 if ((insn_entry[i].is_load || insn_entry[i].is_store)
43512 && insn_entry[i].is_swap)
43514 swap_web_entry* root_entry
43515 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
43516 if (!root_entry->web_not_optimizable)
43517 mark_swaps_for_removal (insn_entry, i);
43519 else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
43521 swap_web_entry* root_entry
43522 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
43523 if (!root_entry->web_not_optimizable)
43524 handle_special_swappables (insn_entry, i);
43527 /* Now delete the swaps marked for removal. */
43528 for (i = 0; i < e; ++i)
43529 if (insn_entry[i].will_delete)
43530 replace_swap_with_copy (insn_entry, i);
43532 /* Clean up. */
43533 free (insn_entry);
43534 return 0;
43537 const pass_data pass_data_analyze_swaps =
43539 RTL_PASS, /* type */
43540 "swaps", /* name */
43541 OPTGROUP_NONE, /* optinfo_flags */
43542 TV_NONE, /* tv_id */
43543 0, /* properties_required */
43544 0, /* properties_provided */
43545 0, /* properties_destroyed */
43546 0, /* todo_flags_start */
43547 TODO_df_finish, /* todo_flags_finish */
43550 class pass_analyze_swaps : public rtl_opt_pass
43552 public:
43553 pass_analyze_swaps(gcc::context *ctxt)
43554 : rtl_opt_pass(pass_data_analyze_swaps, ctxt)
43557 /* opt_pass methods: */
43558 virtual bool gate (function *)
43560 return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
43561 && !TARGET_P9_VECTOR && rs6000_optimize_swaps);
43564 virtual unsigned int execute (function *fun)
43566 return rs6000_analyze_swaps (fun);
43569 opt_pass *clone ()
43571 return new pass_analyze_swaps (m_ctxt);
43574 }; // class pass_analyze_swaps
43576 rtl_opt_pass *
43577 make_pass_analyze_swaps (gcc::context *ctxt)
43579 return new pass_analyze_swaps (ctxt);
43582 #ifdef RS6000_GLIBC_ATOMIC_FENV
43583 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
43584 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
43585 #endif
43587 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
43589 static void
43590 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
43592 if (!TARGET_HARD_FLOAT || !TARGET_FPRS)
43594 #ifdef RS6000_GLIBC_ATOMIC_FENV
43595 if (atomic_hold_decl == NULL_TREE)
43597 atomic_hold_decl
43598 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
43599 get_identifier ("__atomic_feholdexcept"),
43600 build_function_type_list (void_type_node,
43601 double_ptr_type_node,
43602 NULL_TREE));
43603 TREE_PUBLIC (atomic_hold_decl) = 1;
43604 DECL_EXTERNAL (atomic_hold_decl) = 1;
43607 if (atomic_clear_decl == NULL_TREE)
43609 atomic_clear_decl
43610 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
43611 get_identifier ("__atomic_feclearexcept"),
43612 build_function_type_list (void_type_node,
43613 NULL_TREE));
43614 TREE_PUBLIC (atomic_clear_decl) = 1;
43615 DECL_EXTERNAL (atomic_clear_decl) = 1;
43618 tree const_double = build_qualified_type (double_type_node,
43619 TYPE_QUAL_CONST);
43620 tree const_double_ptr = build_pointer_type (const_double);
43621 if (atomic_update_decl == NULL_TREE)
43623 atomic_update_decl
43624 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
43625 get_identifier ("__atomic_feupdateenv"),
43626 build_function_type_list (void_type_node,
43627 const_double_ptr,
43628 NULL_TREE));
43629 TREE_PUBLIC (atomic_update_decl) = 1;
43630 DECL_EXTERNAL (atomic_update_decl) = 1;
43633 tree fenv_var = create_tmp_var_raw (double_type_node);
43634 TREE_ADDRESSABLE (fenv_var) = 1;
43635 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
43637 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
43638 *clear = build_call_expr (atomic_clear_decl, 0);
43639 *update = build_call_expr (atomic_update_decl, 1,
43640 fold_convert (const_double_ptr, fenv_addr));
43641 #endif
43642 return;
43645 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
43646 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
43647 tree call_mffs = build_call_expr (mffs, 0);
43649 /* Generates the equivalent of feholdexcept (&fenv_var)
43651 *fenv_var = __builtin_mffs ();
43652 double fenv_hold;
43653 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
43654 __builtin_mtfsf (0xff, fenv_hold); */
43656 /* Mask to clear everything except for the rounding modes and non-IEEE
43657 arithmetic flag. */
43658 const unsigned HOST_WIDE_INT hold_exception_mask =
43659 HOST_WIDE_INT_C (0xffffffff00000007);
43661 tree fenv_var = create_tmp_var_raw (double_type_node);
43663 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
43665 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
43666 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
43667 build_int_cst (uint64_type_node,
43668 hold_exception_mask));
43670 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
43671 fenv_llu_and);
43673 tree hold_mtfsf = build_call_expr (mtfsf, 2,
43674 build_int_cst (unsigned_type_node, 0xff),
43675 fenv_hold_mtfsf);
43677 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
43679 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
43681 double fenv_clear = __builtin_mffs ();
43682 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
43683 __builtin_mtfsf (0xff, fenv_clear); */
43685 /* Mask to clear everything except for the rounding modes and non-IEEE
43686 arithmetic flag. */
43687 const unsigned HOST_WIDE_INT clear_exception_mask =
43688 HOST_WIDE_INT_C (0xffffffff00000000);
43690 tree fenv_clear = create_tmp_var_raw (double_type_node);
43692 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
43694 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
43695 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
43696 fenv_clean_llu,
43697 build_int_cst (uint64_type_node,
43698 clear_exception_mask));
43700 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
43701 fenv_clear_llu_and);
43703 tree clear_mtfsf = build_call_expr (mtfsf, 2,
43704 build_int_cst (unsigned_type_node, 0xff),
43705 fenv_clear_mtfsf);
43707 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
43709 /* Generates the equivalent of feupdateenv (&fenv_var)
43711 double old_fenv = __builtin_mffs ();
43712 double fenv_update;
43713 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
43714 (*(uint64_t*)fenv_var 0x1ff80fff);
43715 __builtin_mtfsf (0xff, fenv_update); */
43717 const unsigned HOST_WIDE_INT update_exception_mask =
43718 HOST_WIDE_INT_C (0xffffffff1fffff00);
43719 const unsigned HOST_WIDE_INT new_exception_mask =
43720 HOST_WIDE_INT_C (0x1ff80fff);
43722 tree old_fenv = create_tmp_var_raw (double_type_node);
43723 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
43725 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
43726 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
43727 build_int_cst (uint64_type_node,
43728 update_exception_mask));
43730 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
43731 build_int_cst (uint64_type_node,
43732 new_exception_mask));
43734 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
43735 old_llu_and, new_llu_and);
43737 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
43738 new_llu_mask);
43740 tree update_mtfsf = build_call_expr (mtfsf, 2,
43741 build_int_cst (unsigned_type_node, 0xff),
43742 fenv_update_mtfsf);
43744 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
43747 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
43749 static bool
43750 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
43751 optimization_type opt_type)
43753 switch (op)
43755 case rsqrt_optab:
43756 return (opt_type == OPTIMIZE_FOR_SPEED
43757 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
43759 default:
43760 return true;
43764 struct gcc_target targetm = TARGET_INITIALIZER;
43766 #include "gt-powerpcspe.h"