2017-04-11 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
[official-gcc.git] / gcc / config / rs6000 / rs6000.c
blobd34c19d71f2e713422342dfdc34745464136723d
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "memmodel.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "cfgloop.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "expmed.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "ira.h"
38 #include "recog.h"
39 #include "cgraph.h"
40 #include "diagnostic-core.h"
41 #include "insn-attr.h"
42 #include "flags.h"
43 #include "alias.h"
44 #include "fold-const.h"
45 #include "stor-layout.h"
46 #include "calls.h"
47 #include "print-tree.h"
48 #include "varasm.h"
49 #include "explow.h"
50 #include "expr.h"
51 #include "output.h"
52 #include "dbxout.h"
53 #include "common/common-target.h"
54 #include "langhooks.h"
55 #include "reload.h"
56 #include "sched-int.h"
57 #include "gimplify.h"
58 #include "gimple-iterator.h"
59 #include "gimple-ssa.h"
60 #include "gimple-walk.h"
61 #include "intl.h"
62 #include "params.h"
63 #include "tm-constrs.h"
64 #include "tree-vectorizer.h"
65 #include "target-globals.h"
66 #include "builtins.h"
67 #include "context.h"
68 #include "tree-pass.h"
69 #include "except.h"
70 #if TARGET_XCOFF
71 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
72 #endif
73 #if TARGET_MACHO
74 #include "gstab.h" /* for N_SLINE */
75 #endif
76 #include "case-cfn-macros.h"
77 #include "ppc-auxv.h"
79 /* This file should be included last. */
80 #include "target-def.h"
82 #ifndef TARGET_NO_PROTOTYPE
83 #define TARGET_NO_PROTOTYPE 0
84 #endif
86 #define min(A,B) ((A) < (B) ? (A) : (B))
87 #define max(A,B) ((A) > (B) ? (A) : (B))
89 /* Structure used to define the rs6000 stack */
90 typedef struct rs6000_stack {
91 int reload_completed; /* stack info won't change from here on */
92 int first_gp_reg_save; /* first callee saved GP register used */
93 int first_fp_reg_save; /* first callee saved FP register used */
94 int first_altivec_reg_save; /* first callee saved AltiVec register used */
95 int lr_save_p; /* true if the link reg needs to be saved */
96 int cr_save_p; /* true if the CR reg needs to be saved */
97 unsigned int vrsave_mask; /* mask of vec registers to save */
98 int push_p; /* true if we need to allocate stack space */
99 int calls_p; /* true if the function makes any calls */
100 int world_save_p; /* true if we're saving *everything*:
101 r13-r31, cr, f14-f31, vrsave, v20-v31 */
102 enum rs6000_abi abi; /* which ABI to use */
103 int gp_save_offset; /* offset to save GP regs from initial SP */
104 int fp_save_offset; /* offset to save FP regs from initial SP */
105 int altivec_save_offset; /* offset to save AltiVec regs from initial SP */
106 int lr_save_offset; /* offset to save LR from initial SP */
107 int cr_save_offset; /* offset to save CR from initial SP */
108 int vrsave_save_offset; /* offset to save VRSAVE from initial SP */
109 int spe_gp_save_offset; /* offset to save spe 64-bit gprs */
110 int varargs_save_offset; /* offset to save the varargs registers */
111 int ehrd_offset; /* offset to EH return data */
112 int ehcr_offset; /* offset to EH CR field data */
113 int reg_size; /* register size (4 or 8) */
114 HOST_WIDE_INT vars_size; /* variable save area size */
115 int parm_size; /* outgoing parameter size */
116 int save_size; /* save area size */
117 int fixed_size; /* fixed size of stack frame */
118 int gp_size; /* size of saved GP registers */
119 int fp_size; /* size of saved FP registers */
120 int altivec_size; /* size of saved AltiVec registers */
121 int cr_size; /* size to hold CR if not in fixed area */
122 int vrsave_size; /* size to hold VRSAVE */
123 int altivec_padding_size; /* size of altivec alignment padding */
124 int spe_gp_size; /* size of 64-bit GPR save size for SPE */
125 int spe_padding_size;
126 HOST_WIDE_INT total_size; /* total bytes allocated for stack */
127 int spe_64bit_regs_used;
128 int savres_strategy;
129 } rs6000_stack_t;
131 /* A C structure for machine-specific, per-function data.
132 This is added to the cfun structure. */
133 typedef struct GTY(()) machine_function
135 /* Whether the instruction chain has been scanned already. */
136 int spe_insn_chain_scanned_p;
137 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
138 int ra_needs_full_frame;
139 /* Flags if __builtin_return_address (0) was used. */
140 int ra_need_lr;
141 /* Cache lr_save_p after expansion of builtin_eh_return. */
142 int lr_save_state;
143 /* Whether we need to save the TOC to the reserved stack location in the
144 function prologue. */
145 bool save_toc_in_prologue;
146 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
147 varargs save area. */
148 HOST_WIDE_INT varargs_save_offset;
149 /* Temporary stack slot to use for SDmode copies. This slot is
150 64-bits wide and is allocated early enough so that the offset
151 does not overflow the 16-bit load/store offset field. */
152 rtx sdmode_stack_slot;
153 /* Alternative internal arg pointer for -fsplit-stack. */
154 rtx split_stack_arg_pointer;
155 bool split_stack_argp_used;
156 /* Flag if r2 setup is needed with ELFv2 ABI. */
157 bool r2_setup_needed;
158 /* The components already handled by separate shrink-wrapping, which should
159 not be considered by the prologue and epilogue. */
160 bool gpr_is_wrapped_separately[32];
161 bool lr_is_wrapped_separately;
162 } machine_function;
164 /* Support targetm.vectorize.builtin_mask_for_load. */
165 static GTY(()) tree altivec_builtin_mask_for_load;
167 /* Set to nonzero once AIX common-mode calls have been defined. */
168 static GTY(()) int common_mode_defined;
170 /* Label number of label created for -mrelocatable, to call to so we can
171 get the address of the GOT section */
172 static int rs6000_pic_labelno;
174 #ifdef USING_ELFOS_H
175 /* Counter for labels which are to be placed in .fixup. */
176 int fixuplabelno = 0;
177 #endif
179 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
180 int dot_symbols;
182 /* Specify the machine mode that pointers have. After generation of rtl, the
183 compiler makes no further distinction between pointers and any other objects
184 of this machine mode. The type is unsigned since not all things that
185 include rs6000.h also include machmode.h. */
186 unsigned rs6000_pmode;
188 /* Width in bits of a pointer. */
189 unsigned rs6000_pointer_size;
191 #ifdef HAVE_AS_GNU_ATTRIBUTE
192 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
193 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
194 # endif
195 /* Flag whether floating point values have been passed/returned.
196 Note that this doesn't say whether fprs are used, since the
197 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
198 should be set for soft-float values passed in gprs and ieee128
199 values passed in vsx registers. */
200 static bool rs6000_passes_float;
201 static bool rs6000_passes_long_double;
202 /* Flag whether vector values have been passed/returned. */
203 static bool rs6000_passes_vector;
204 /* Flag whether small (<= 8 byte) structures have been returned. */
205 static bool rs6000_returns_struct;
206 #endif
208 /* Value is TRUE if register/mode pair is acceptable. */
209 bool rs6000_hard_regno_mode_ok_p[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
211 /* Maximum number of registers needed for a given register class and mode. */
212 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
214 /* How many registers are needed for a given register and mode. */
215 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
217 /* Map register number to register class. */
218 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
220 static int dbg_cost_ctrl;
222 /* Built in types. */
223 tree rs6000_builtin_types[RS6000_BTI_MAX];
224 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
226 /* Flag to say the TOC is initialized */
227 int toc_initialized, need_toc_init;
228 char toc_label_name[10];
230 /* Cached value of rs6000_variable_issue. This is cached in
231 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
232 static short cached_can_issue_more;
234 static GTY(()) section *read_only_data_section;
235 static GTY(()) section *private_data_section;
236 static GTY(()) section *tls_data_section;
237 static GTY(()) section *tls_private_data_section;
238 static GTY(()) section *read_only_private_data_section;
239 static GTY(()) section *sdata2_section;
240 static GTY(()) section *toc_section;
242 struct builtin_description
244 const HOST_WIDE_INT mask;
245 const enum insn_code icode;
246 const char *const name;
247 const enum rs6000_builtins code;
250 /* Describe the vector unit used for modes. */
251 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
252 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
254 /* Register classes for various constraints that are based on the target
255 switches. */
256 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
258 /* Describe the alignment of a vector. */
259 int rs6000_vector_align[NUM_MACHINE_MODES];
261 /* Map selected modes to types for builtins. */
262 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
264 /* What modes to automatically generate reciprocal divide estimate (fre) and
265 reciprocal sqrt (frsqrte) for. */
266 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
268 /* Masks to determine which reciprocal esitmate instructions to generate
269 automatically. */
270 enum rs6000_recip_mask {
271 RECIP_SF_DIV = 0x001, /* Use divide estimate */
272 RECIP_DF_DIV = 0x002,
273 RECIP_V4SF_DIV = 0x004,
274 RECIP_V2DF_DIV = 0x008,
276 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
277 RECIP_DF_RSQRT = 0x020,
278 RECIP_V4SF_RSQRT = 0x040,
279 RECIP_V2DF_RSQRT = 0x080,
281 /* Various combination of flags for -mrecip=xxx. */
282 RECIP_NONE = 0,
283 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
284 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
285 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
287 RECIP_HIGH_PRECISION = RECIP_ALL,
289 /* On low precision machines like the power5, don't enable double precision
290 reciprocal square root estimate, since it isn't accurate enough. */
291 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
294 /* -mrecip options. */
295 static struct
297 const char *string; /* option name */
298 unsigned int mask; /* mask bits to set */
299 } recip_options[] = {
300 { "all", RECIP_ALL },
301 { "none", RECIP_NONE },
302 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
303 | RECIP_V2DF_DIV) },
304 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
305 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
306 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
307 | RECIP_V2DF_RSQRT) },
308 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
309 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
312 /* Used by __builtin_cpu_is(), mapping from PLATFORM names to values. */
313 static const struct
315 const char *cpu;
316 unsigned int cpuid;
317 } cpu_is_info[] = {
318 { "power9", PPC_PLATFORM_POWER9 },
319 { "power8", PPC_PLATFORM_POWER8 },
320 { "power7", PPC_PLATFORM_POWER7 },
321 { "power6x", PPC_PLATFORM_POWER6X },
322 { "power6", PPC_PLATFORM_POWER6 },
323 { "power5+", PPC_PLATFORM_POWER5_PLUS },
324 { "power5", PPC_PLATFORM_POWER5 },
325 { "ppc970", PPC_PLATFORM_PPC970 },
326 { "power4", PPC_PLATFORM_POWER4 },
327 { "ppca2", PPC_PLATFORM_PPCA2 },
328 { "ppc476", PPC_PLATFORM_PPC476 },
329 { "ppc464", PPC_PLATFORM_PPC464 },
330 { "ppc440", PPC_PLATFORM_PPC440 },
331 { "ppc405", PPC_PLATFORM_PPC405 },
332 { "ppc-cell-be", PPC_PLATFORM_CELL_BE }
335 /* Used by __builtin_cpu_supports(), mapping from HWCAP names to masks. */
336 static const struct
338 const char *hwcap;
339 int mask;
340 unsigned int id;
341 } cpu_supports_info[] = {
342 /* AT_HWCAP masks. */
343 { "4xxmac", PPC_FEATURE_HAS_4xxMAC, 0 },
344 { "altivec", PPC_FEATURE_HAS_ALTIVEC, 0 },
345 { "arch_2_05", PPC_FEATURE_ARCH_2_05, 0 },
346 { "arch_2_06", PPC_FEATURE_ARCH_2_06, 0 },
347 { "archpmu", PPC_FEATURE_PERFMON_COMPAT, 0 },
348 { "booke", PPC_FEATURE_BOOKE, 0 },
349 { "cellbe", PPC_FEATURE_CELL_BE, 0 },
350 { "dfp", PPC_FEATURE_HAS_DFP, 0 },
351 { "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE, 0 },
352 { "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE, 0 },
353 { "fpu", PPC_FEATURE_HAS_FPU, 0 },
354 { "ic_snoop", PPC_FEATURE_ICACHE_SNOOP, 0 },
355 { "mmu", PPC_FEATURE_HAS_MMU, 0 },
356 { "notb", PPC_FEATURE_NO_TB, 0 },
357 { "pa6t", PPC_FEATURE_PA6T, 0 },
358 { "power4", PPC_FEATURE_POWER4, 0 },
359 { "power5", PPC_FEATURE_POWER5, 0 },
360 { "power5+", PPC_FEATURE_POWER5_PLUS, 0 },
361 { "power6x", PPC_FEATURE_POWER6_EXT, 0 },
362 { "ppc32", PPC_FEATURE_32, 0 },
363 { "ppc601", PPC_FEATURE_601_INSTR, 0 },
364 { "ppc64", PPC_FEATURE_64, 0 },
365 { "ppcle", PPC_FEATURE_PPC_LE, 0 },
366 { "smt", PPC_FEATURE_SMT, 0 },
367 { "spe", PPC_FEATURE_HAS_SPE, 0 },
368 { "true_le", PPC_FEATURE_TRUE_LE, 0 },
369 { "ucache", PPC_FEATURE_UNIFIED_CACHE, 0 },
370 { "vsx", PPC_FEATURE_HAS_VSX, 0 },
372 /* AT_HWCAP2 masks. */
373 { "arch_2_07", PPC_FEATURE2_ARCH_2_07, 1 },
374 { "dscr", PPC_FEATURE2_HAS_DSCR, 1 },
375 { "ebb", PPC_FEATURE2_HAS_EBB, 1 },
376 { "htm", PPC_FEATURE2_HAS_HTM, 1 },
377 { "htm-nosc", PPC_FEATURE2_HTM_NOSC, 1 },
378 { "isel", PPC_FEATURE2_HAS_ISEL, 1 },
379 { "tar", PPC_FEATURE2_HAS_TAR, 1 },
380 { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO, 1 },
381 { "arch_3_00", PPC_FEATURE2_ARCH_3_00, 1 },
382 { "ieee128", PPC_FEATURE2_HAS_IEEE128, 1 }
385 /* Newer LIBCs explicitly export this symbol to declare that they provide
386 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
387 reference to this symbol whenever we expand a CPU builtin, so that
388 we never link against an old LIBC. */
389 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
391 /* True if we have expanded a CPU builtin. */
392 bool cpu_builtin_p;
394 /* Pointer to function (in rs6000-c.c) that can define or undefine target
395 macros that have changed. Languages that don't support the preprocessor
396 don't link in rs6000-c.c, so we can't call it directly. */
397 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
399 /* Simplfy register classes into simpler classifications. We assume
400 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
401 check for standard register classes (gpr/floating/altivec/vsx) and
402 floating/vector classes (float/altivec/vsx). */
404 enum rs6000_reg_type {
405 NO_REG_TYPE,
406 PSEUDO_REG_TYPE,
407 GPR_REG_TYPE,
408 VSX_REG_TYPE,
409 ALTIVEC_REG_TYPE,
410 FPR_REG_TYPE,
411 SPR_REG_TYPE,
412 CR_REG_TYPE,
413 SPE_ACC_TYPE,
414 SPEFSCR_REG_TYPE
417 /* Map register class to register type. */
418 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
420 /* First/last register type for the 'normal' register types (i.e. general
421 purpose, floating point, altivec, and VSX registers). */
422 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
424 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
427 /* Register classes we care about in secondary reload or go if legitimate
428 address. We only need to worry about GPR, FPR, and Altivec registers here,
429 along an ANY field that is the OR of the 3 register classes. */
431 enum rs6000_reload_reg_type {
432 RELOAD_REG_GPR, /* General purpose registers. */
433 RELOAD_REG_FPR, /* Traditional floating point regs. */
434 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
435 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
436 N_RELOAD_REG
439 /* For setting up register classes, loop through the 3 register classes mapping
440 into real registers, and skip the ANY class, which is just an OR of the
441 bits. */
442 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
443 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
445 /* Map reload register type to a register in the register class. */
446 struct reload_reg_map_type {
447 const char *name; /* Register class name. */
448 int reg; /* Register in the register class. */
451 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
452 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
453 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
454 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
455 { "Any", -1 }, /* RELOAD_REG_ANY. */
458 /* Mask bits for each register class, indexed per mode. Historically the
459 compiler has been more restrictive which types can do PRE_MODIFY instead of
460 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
461 typedef unsigned char addr_mask_type;
463 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
464 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
465 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
466 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
467 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
468 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
469 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
470 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
472 /* Register type masks based on the type, of valid addressing modes. */
473 struct rs6000_reg_addr {
474 enum insn_code reload_load; /* INSN to reload for loading. */
475 enum insn_code reload_store; /* INSN to reload for storing. */
476 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
477 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
478 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
479 enum insn_code fusion_gpr_ld; /* INSN for fusing gpr ADDIS/loads. */
480 /* INSNs for fusing addi with loads
481 or stores for each reg. class. */
482 enum insn_code fusion_addi_ld[(int)N_RELOAD_REG];
483 enum insn_code fusion_addi_st[(int)N_RELOAD_REG];
484 /* INSNs for fusing addis with loads
485 or stores for each reg. class. */
486 enum insn_code fusion_addis_ld[(int)N_RELOAD_REG];
487 enum insn_code fusion_addis_st[(int)N_RELOAD_REG];
488 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
489 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
490 bool fused_toc; /* Mode supports TOC fusion. */
493 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
495 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
496 static inline bool
497 mode_supports_pre_incdec_p (machine_mode mode)
499 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
500 != 0);
503 /* Helper function to say whether a mode supports PRE_MODIFY. */
504 static inline bool
505 mode_supports_pre_modify_p (machine_mode mode)
507 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
508 != 0);
511 /* Return true if we have D-form addressing in altivec registers. */
512 static inline bool
513 mode_supports_vmx_dform (machine_mode mode)
515 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
518 /* Return true if we have D-form addressing in VSX registers. This addressing
519 is more limited than normal d-form addressing in that the offset must be
520 aligned on a 16-byte boundary. */
521 static inline bool
522 mode_supports_vsx_dform_quad (machine_mode mode)
524 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
525 != 0);
529 /* Target cpu costs. */
531 struct processor_costs {
532 const int mulsi; /* cost of SImode multiplication. */
533 const int mulsi_const; /* cost of SImode multiplication by constant. */
534 const int mulsi_const9; /* cost of SImode mult by short constant. */
535 const int muldi; /* cost of DImode multiplication. */
536 const int divsi; /* cost of SImode division. */
537 const int divdi; /* cost of DImode division. */
538 const int fp; /* cost of simple SFmode and DFmode insns. */
539 const int dmul; /* cost of DFmode multiplication (and fmadd). */
540 const int sdiv; /* cost of SFmode division (fdivs). */
541 const int ddiv; /* cost of DFmode division (fdiv). */
542 const int cache_line_size; /* cache line size in bytes. */
543 const int l1_cache_size; /* size of l1 cache, in kilobytes. */
544 const int l2_cache_size; /* size of l2 cache, in kilobytes. */
545 const int simultaneous_prefetches; /* number of parallel prefetch
546 operations. */
547 const int sfdf_convert; /* cost of SF->DF conversion. */
550 const struct processor_costs *rs6000_cost;
552 /* Processor costs (relative to an add) */
554 /* Instruction size costs on 32bit processors. */
555 static const
556 struct processor_costs size32_cost = {
557 COSTS_N_INSNS (1), /* mulsi */
558 COSTS_N_INSNS (1), /* mulsi_const */
559 COSTS_N_INSNS (1), /* mulsi_const9 */
560 COSTS_N_INSNS (1), /* muldi */
561 COSTS_N_INSNS (1), /* divsi */
562 COSTS_N_INSNS (1), /* divdi */
563 COSTS_N_INSNS (1), /* fp */
564 COSTS_N_INSNS (1), /* dmul */
565 COSTS_N_INSNS (1), /* sdiv */
566 COSTS_N_INSNS (1), /* ddiv */
567 32, /* cache line size */
568 0, /* l1 cache */
569 0, /* l2 cache */
570 0, /* streams */
571 0, /* SF->DF convert */
574 /* Instruction size costs on 64bit processors. */
575 static const
576 struct processor_costs size64_cost = {
577 COSTS_N_INSNS (1), /* mulsi */
578 COSTS_N_INSNS (1), /* mulsi_const */
579 COSTS_N_INSNS (1), /* mulsi_const9 */
580 COSTS_N_INSNS (1), /* muldi */
581 COSTS_N_INSNS (1), /* divsi */
582 COSTS_N_INSNS (1), /* divdi */
583 COSTS_N_INSNS (1), /* fp */
584 COSTS_N_INSNS (1), /* dmul */
585 COSTS_N_INSNS (1), /* sdiv */
586 COSTS_N_INSNS (1), /* ddiv */
587 128, /* cache line size */
588 0, /* l1 cache */
589 0, /* l2 cache */
590 0, /* streams */
591 0, /* SF->DF convert */
594 /* Instruction costs on RS64A processors. */
595 static const
596 struct processor_costs rs64a_cost = {
597 COSTS_N_INSNS (20), /* mulsi */
598 COSTS_N_INSNS (12), /* mulsi_const */
599 COSTS_N_INSNS (8), /* mulsi_const9 */
600 COSTS_N_INSNS (34), /* muldi */
601 COSTS_N_INSNS (65), /* divsi */
602 COSTS_N_INSNS (67), /* divdi */
603 COSTS_N_INSNS (4), /* fp */
604 COSTS_N_INSNS (4), /* dmul */
605 COSTS_N_INSNS (31), /* sdiv */
606 COSTS_N_INSNS (31), /* ddiv */
607 128, /* cache line size */
608 128, /* l1 cache */
609 2048, /* l2 cache */
610 1, /* streams */
611 0, /* SF->DF convert */
614 /* Instruction costs on MPCCORE processors. */
615 static const
616 struct processor_costs mpccore_cost = {
617 COSTS_N_INSNS (2), /* mulsi */
618 COSTS_N_INSNS (2), /* mulsi_const */
619 COSTS_N_INSNS (2), /* mulsi_const9 */
620 COSTS_N_INSNS (2), /* muldi */
621 COSTS_N_INSNS (6), /* divsi */
622 COSTS_N_INSNS (6), /* divdi */
623 COSTS_N_INSNS (4), /* fp */
624 COSTS_N_INSNS (5), /* dmul */
625 COSTS_N_INSNS (10), /* sdiv */
626 COSTS_N_INSNS (17), /* ddiv */
627 32, /* cache line size */
628 4, /* l1 cache */
629 16, /* l2 cache */
630 1, /* streams */
631 0, /* SF->DF convert */
634 /* Instruction costs on PPC403 processors. */
635 static const
636 struct processor_costs ppc403_cost = {
637 COSTS_N_INSNS (4), /* mulsi */
638 COSTS_N_INSNS (4), /* mulsi_const */
639 COSTS_N_INSNS (4), /* mulsi_const9 */
640 COSTS_N_INSNS (4), /* muldi */
641 COSTS_N_INSNS (33), /* divsi */
642 COSTS_N_INSNS (33), /* divdi */
643 COSTS_N_INSNS (11), /* fp */
644 COSTS_N_INSNS (11), /* dmul */
645 COSTS_N_INSNS (11), /* sdiv */
646 COSTS_N_INSNS (11), /* ddiv */
647 32, /* cache line size */
648 4, /* l1 cache */
649 16, /* l2 cache */
650 1, /* streams */
651 0, /* SF->DF convert */
654 /* Instruction costs on PPC405 processors. */
655 static const
656 struct processor_costs ppc405_cost = {
657 COSTS_N_INSNS (5), /* mulsi */
658 COSTS_N_INSNS (4), /* mulsi_const */
659 COSTS_N_INSNS (3), /* mulsi_const9 */
660 COSTS_N_INSNS (5), /* muldi */
661 COSTS_N_INSNS (35), /* divsi */
662 COSTS_N_INSNS (35), /* divdi */
663 COSTS_N_INSNS (11), /* fp */
664 COSTS_N_INSNS (11), /* dmul */
665 COSTS_N_INSNS (11), /* sdiv */
666 COSTS_N_INSNS (11), /* ddiv */
667 32, /* cache line size */
668 16, /* l1 cache */
669 128, /* l2 cache */
670 1, /* streams */
671 0, /* SF->DF convert */
674 /* Instruction costs on PPC440 processors. */
675 static const
676 struct processor_costs ppc440_cost = {
677 COSTS_N_INSNS (3), /* mulsi */
678 COSTS_N_INSNS (2), /* mulsi_const */
679 COSTS_N_INSNS (2), /* mulsi_const9 */
680 COSTS_N_INSNS (3), /* muldi */
681 COSTS_N_INSNS (34), /* divsi */
682 COSTS_N_INSNS (34), /* divdi */
683 COSTS_N_INSNS (5), /* fp */
684 COSTS_N_INSNS (5), /* dmul */
685 COSTS_N_INSNS (19), /* sdiv */
686 COSTS_N_INSNS (33), /* ddiv */
687 32, /* cache line size */
688 32, /* l1 cache */
689 256, /* l2 cache */
690 1, /* streams */
691 0, /* SF->DF convert */
694 /* Instruction costs on PPC476 processors. */
695 static const
696 struct processor_costs ppc476_cost = {
697 COSTS_N_INSNS (4), /* mulsi */
698 COSTS_N_INSNS (4), /* mulsi_const */
699 COSTS_N_INSNS (4), /* mulsi_const9 */
700 COSTS_N_INSNS (4), /* muldi */
701 COSTS_N_INSNS (11), /* divsi */
702 COSTS_N_INSNS (11), /* divdi */
703 COSTS_N_INSNS (6), /* fp */
704 COSTS_N_INSNS (6), /* dmul */
705 COSTS_N_INSNS (19), /* sdiv */
706 COSTS_N_INSNS (33), /* ddiv */
707 32, /* l1 cache line size */
708 32, /* l1 cache */
709 512, /* l2 cache */
710 1, /* streams */
711 0, /* SF->DF convert */
714 /* Instruction costs on PPC601 processors. */
715 static const
716 struct processor_costs ppc601_cost = {
717 COSTS_N_INSNS (5), /* mulsi */
718 COSTS_N_INSNS (5), /* mulsi_const */
719 COSTS_N_INSNS (5), /* mulsi_const9 */
720 COSTS_N_INSNS (5), /* muldi */
721 COSTS_N_INSNS (36), /* divsi */
722 COSTS_N_INSNS (36), /* divdi */
723 COSTS_N_INSNS (4), /* fp */
724 COSTS_N_INSNS (5), /* dmul */
725 COSTS_N_INSNS (17), /* sdiv */
726 COSTS_N_INSNS (31), /* ddiv */
727 32, /* cache line size */
728 32, /* l1 cache */
729 256, /* l2 cache */
730 1, /* streams */
731 0, /* SF->DF convert */
734 /* Instruction costs on PPC603 processors. */
735 static const
736 struct processor_costs ppc603_cost = {
737 COSTS_N_INSNS (5), /* mulsi */
738 COSTS_N_INSNS (3), /* mulsi_const */
739 COSTS_N_INSNS (2), /* mulsi_const9 */
740 COSTS_N_INSNS (5), /* muldi */
741 COSTS_N_INSNS (37), /* divsi */
742 COSTS_N_INSNS (37), /* divdi */
743 COSTS_N_INSNS (3), /* fp */
744 COSTS_N_INSNS (4), /* dmul */
745 COSTS_N_INSNS (18), /* sdiv */
746 COSTS_N_INSNS (33), /* ddiv */
747 32, /* cache line size */
748 8, /* l1 cache */
749 64, /* l2 cache */
750 1, /* streams */
751 0, /* SF->DF convert */
754 /* Instruction costs on PPC604 processors. */
755 static const
756 struct processor_costs ppc604_cost = {
757 COSTS_N_INSNS (4), /* mulsi */
758 COSTS_N_INSNS (4), /* mulsi_const */
759 COSTS_N_INSNS (4), /* mulsi_const9 */
760 COSTS_N_INSNS (4), /* muldi */
761 COSTS_N_INSNS (20), /* divsi */
762 COSTS_N_INSNS (20), /* divdi */
763 COSTS_N_INSNS (3), /* fp */
764 COSTS_N_INSNS (3), /* dmul */
765 COSTS_N_INSNS (18), /* sdiv */
766 COSTS_N_INSNS (32), /* ddiv */
767 32, /* cache line size */
768 16, /* l1 cache */
769 512, /* l2 cache */
770 1, /* streams */
771 0, /* SF->DF convert */
774 /* Instruction costs on PPC604e processors. */
775 static const
776 struct processor_costs ppc604e_cost = {
777 COSTS_N_INSNS (2), /* mulsi */
778 COSTS_N_INSNS (2), /* mulsi_const */
779 COSTS_N_INSNS (2), /* mulsi_const9 */
780 COSTS_N_INSNS (2), /* muldi */
781 COSTS_N_INSNS (20), /* divsi */
782 COSTS_N_INSNS (20), /* divdi */
783 COSTS_N_INSNS (3), /* fp */
784 COSTS_N_INSNS (3), /* dmul */
785 COSTS_N_INSNS (18), /* sdiv */
786 COSTS_N_INSNS (32), /* ddiv */
787 32, /* cache line size */
788 32, /* l1 cache */
789 1024, /* l2 cache */
790 1, /* streams */
791 0, /* SF->DF convert */
794 /* Instruction costs on PPC620 processors. */
795 static const
796 struct processor_costs ppc620_cost = {
797 COSTS_N_INSNS (5), /* mulsi */
798 COSTS_N_INSNS (4), /* mulsi_const */
799 COSTS_N_INSNS (3), /* mulsi_const9 */
800 COSTS_N_INSNS (7), /* muldi */
801 COSTS_N_INSNS (21), /* divsi */
802 COSTS_N_INSNS (37), /* divdi */
803 COSTS_N_INSNS (3), /* fp */
804 COSTS_N_INSNS (3), /* dmul */
805 COSTS_N_INSNS (18), /* sdiv */
806 COSTS_N_INSNS (32), /* ddiv */
807 128, /* cache line size */
808 32, /* l1 cache */
809 1024, /* l2 cache */
810 1, /* streams */
811 0, /* SF->DF convert */
814 /* Instruction costs on PPC630 processors. */
815 static const
816 struct processor_costs ppc630_cost = {
817 COSTS_N_INSNS (5), /* mulsi */
818 COSTS_N_INSNS (4), /* mulsi_const */
819 COSTS_N_INSNS (3), /* mulsi_const9 */
820 COSTS_N_INSNS (7), /* muldi */
821 COSTS_N_INSNS (21), /* divsi */
822 COSTS_N_INSNS (37), /* divdi */
823 COSTS_N_INSNS (3), /* fp */
824 COSTS_N_INSNS (3), /* dmul */
825 COSTS_N_INSNS (17), /* sdiv */
826 COSTS_N_INSNS (21), /* ddiv */
827 128, /* cache line size */
828 64, /* l1 cache */
829 1024, /* l2 cache */
830 1, /* streams */
831 0, /* SF->DF convert */
834 /* Instruction costs on Cell processor. */
835 /* COSTS_N_INSNS (1) ~ one add. */
836 static const
837 struct processor_costs ppccell_cost = {
838 COSTS_N_INSNS (9/2)+2, /* mulsi */
839 COSTS_N_INSNS (6/2), /* mulsi_const */
840 COSTS_N_INSNS (6/2), /* mulsi_const9 */
841 COSTS_N_INSNS (15/2)+2, /* muldi */
842 COSTS_N_INSNS (38/2), /* divsi */
843 COSTS_N_INSNS (70/2), /* divdi */
844 COSTS_N_INSNS (10/2), /* fp */
845 COSTS_N_INSNS (10/2), /* dmul */
846 COSTS_N_INSNS (74/2), /* sdiv */
847 COSTS_N_INSNS (74/2), /* ddiv */
848 128, /* cache line size */
849 32, /* l1 cache */
850 512, /* l2 cache */
851 6, /* streams */
852 0, /* SF->DF convert */
855 /* Instruction costs on PPC750 and PPC7400 processors. */
856 static const
857 struct processor_costs ppc750_cost = {
858 COSTS_N_INSNS (5), /* mulsi */
859 COSTS_N_INSNS (3), /* mulsi_const */
860 COSTS_N_INSNS (2), /* mulsi_const9 */
861 COSTS_N_INSNS (5), /* muldi */
862 COSTS_N_INSNS (17), /* divsi */
863 COSTS_N_INSNS (17), /* divdi */
864 COSTS_N_INSNS (3), /* fp */
865 COSTS_N_INSNS (3), /* dmul */
866 COSTS_N_INSNS (17), /* sdiv */
867 COSTS_N_INSNS (31), /* ddiv */
868 32, /* cache line size */
869 32, /* l1 cache */
870 512, /* l2 cache */
871 1, /* streams */
872 0, /* SF->DF convert */
875 /* Instruction costs on PPC7450 processors. */
876 static const
877 struct processor_costs ppc7450_cost = {
878 COSTS_N_INSNS (4), /* mulsi */
879 COSTS_N_INSNS (3), /* mulsi_const */
880 COSTS_N_INSNS (3), /* mulsi_const9 */
881 COSTS_N_INSNS (4), /* muldi */
882 COSTS_N_INSNS (23), /* divsi */
883 COSTS_N_INSNS (23), /* divdi */
884 COSTS_N_INSNS (5), /* fp */
885 COSTS_N_INSNS (5), /* dmul */
886 COSTS_N_INSNS (21), /* sdiv */
887 COSTS_N_INSNS (35), /* ddiv */
888 32, /* cache line size */
889 32, /* l1 cache */
890 1024, /* l2 cache */
891 1, /* streams */
892 0, /* SF->DF convert */
895 /* Instruction costs on PPC8540 processors. */
896 static const
897 struct processor_costs ppc8540_cost = {
898 COSTS_N_INSNS (4), /* mulsi */
899 COSTS_N_INSNS (4), /* mulsi_const */
900 COSTS_N_INSNS (4), /* mulsi_const9 */
901 COSTS_N_INSNS (4), /* muldi */
902 COSTS_N_INSNS (19), /* divsi */
903 COSTS_N_INSNS (19), /* divdi */
904 COSTS_N_INSNS (4), /* fp */
905 COSTS_N_INSNS (4), /* dmul */
906 COSTS_N_INSNS (29), /* sdiv */
907 COSTS_N_INSNS (29), /* ddiv */
908 32, /* cache line size */
909 32, /* l1 cache */
910 256, /* l2 cache */
911 1, /* prefetch streams /*/
912 0, /* SF->DF convert */
915 /* Instruction costs on E300C2 and E300C3 cores. */
916 static const
917 struct processor_costs ppce300c2c3_cost = {
918 COSTS_N_INSNS (4), /* mulsi */
919 COSTS_N_INSNS (4), /* mulsi_const */
920 COSTS_N_INSNS (4), /* mulsi_const9 */
921 COSTS_N_INSNS (4), /* muldi */
922 COSTS_N_INSNS (19), /* divsi */
923 COSTS_N_INSNS (19), /* divdi */
924 COSTS_N_INSNS (3), /* fp */
925 COSTS_N_INSNS (4), /* dmul */
926 COSTS_N_INSNS (18), /* sdiv */
927 COSTS_N_INSNS (33), /* ddiv */
929 16, /* l1 cache */
930 16, /* l2 cache */
931 1, /* prefetch streams /*/
932 0, /* SF->DF convert */
935 /* Instruction costs on PPCE500MC processors. */
936 static const
937 struct processor_costs ppce500mc_cost = {
938 COSTS_N_INSNS (4), /* mulsi */
939 COSTS_N_INSNS (4), /* mulsi_const */
940 COSTS_N_INSNS (4), /* mulsi_const9 */
941 COSTS_N_INSNS (4), /* muldi */
942 COSTS_N_INSNS (14), /* divsi */
943 COSTS_N_INSNS (14), /* divdi */
944 COSTS_N_INSNS (8), /* fp */
945 COSTS_N_INSNS (10), /* dmul */
946 COSTS_N_INSNS (36), /* sdiv */
947 COSTS_N_INSNS (66), /* ddiv */
948 64, /* cache line size */
949 32, /* l1 cache */
950 128, /* l2 cache */
951 1, /* prefetch streams /*/
952 0, /* SF->DF convert */
955 /* Instruction costs on PPCE500MC64 processors. */
956 static const
957 struct processor_costs ppce500mc64_cost = {
958 COSTS_N_INSNS (4), /* mulsi */
959 COSTS_N_INSNS (4), /* mulsi_const */
960 COSTS_N_INSNS (4), /* mulsi_const9 */
961 COSTS_N_INSNS (4), /* muldi */
962 COSTS_N_INSNS (14), /* divsi */
963 COSTS_N_INSNS (14), /* divdi */
964 COSTS_N_INSNS (4), /* fp */
965 COSTS_N_INSNS (10), /* dmul */
966 COSTS_N_INSNS (36), /* sdiv */
967 COSTS_N_INSNS (66), /* ddiv */
968 64, /* cache line size */
969 32, /* l1 cache */
970 128, /* l2 cache */
971 1, /* prefetch streams /*/
972 0, /* SF->DF convert */
975 /* Instruction costs on PPCE5500 processors. */
976 static const
977 struct processor_costs ppce5500_cost = {
978 COSTS_N_INSNS (5), /* mulsi */
979 COSTS_N_INSNS (5), /* mulsi_const */
980 COSTS_N_INSNS (4), /* mulsi_const9 */
981 COSTS_N_INSNS (5), /* muldi */
982 COSTS_N_INSNS (14), /* divsi */
983 COSTS_N_INSNS (14), /* divdi */
984 COSTS_N_INSNS (7), /* fp */
985 COSTS_N_INSNS (10), /* dmul */
986 COSTS_N_INSNS (36), /* sdiv */
987 COSTS_N_INSNS (66), /* ddiv */
988 64, /* cache line size */
989 32, /* l1 cache */
990 128, /* l2 cache */
991 1, /* prefetch streams /*/
992 0, /* SF->DF convert */
995 /* Instruction costs on PPCE6500 processors. */
996 static const
997 struct processor_costs ppce6500_cost = {
998 COSTS_N_INSNS (5), /* mulsi */
999 COSTS_N_INSNS (5), /* mulsi_const */
1000 COSTS_N_INSNS (4), /* mulsi_const9 */
1001 COSTS_N_INSNS (5), /* muldi */
1002 COSTS_N_INSNS (14), /* divsi */
1003 COSTS_N_INSNS (14), /* divdi */
1004 COSTS_N_INSNS (7), /* fp */
1005 COSTS_N_INSNS (10), /* dmul */
1006 COSTS_N_INSNS (36), /* sdiv */
1007 COSTS_N_INSNS (66), /* ddiv */
1008 64, /* cache line size */
1009 32, /* l1 cache */
1010 128, /* l2 cache */
1011 1, /* prefetch streams /*/
1012 0, /* SF->DF convert */
1015 /* Instruction costs on AppliedMicro Titan processors. */
1016 static const
1017 struct processor_costs titan_cost = {
1018 COSTS_N_INSNS (5), /* mulsi */
1019 COSTS_N_INSNS (5), /* mulsi_const */
1020 COSTS_N_INSNS (5), /* mulsi_const9 */
1021 COSTS_N_INSNS (5), /* muldi */
1022 COSTS_N_INSNS (18), /* divsi */
1023 COSTS_N_INSNS (18), /* divdi */
1024 COSTS_N_INSNS (10), /* fp */
1025 COSTS_N_INSNS (10), /* dmul */
1026 COSTS_N_INSNS (46), /* sdiv */
1027 COSTS_N_INSNS (72), /* ddiv */
1028 32, /* cache line size */
1029 32, /* l1 cache */
1030 512, /* l2 cache */
1031 1, /* prefetch streams /*/
1032 0, /* SF->DF convert */
1035 /* Instruction costs on POWER4 and POWER5 processors. */
1036 static const
1037 struct processor_costs power4_cost = {
1038 COSTS_N_INSNS (3), /* mulsi */
1039 COSTS_N_INSNS (2), /* mulsi_const */
1040 COSTS_N_INSNS (2), /* mulsi_const9 */
1041 COSTS_N_INSNS (4), /* muldi */
1042 COSTS_N_INSNS (18), /* divsi */
1043 COSTS_N_INSNS (34), /* divdi */
1044 COSTS_N_INSNS (3), /* fp */
1045 COSTS_N_INSNS (3), /* dmul */
1046 COSTS_N_INSNS (17), /* sdiv */
1047 COSTS_N_INSNS (17), /* ddiv */
1048 128, /* cache line size */
1049 32, /* l1 cache */
1050 1024, /* l2 cache */
1051 8, /* prefetch streams /*/
1052 0, /* SF->DF convert */
1055 /* Instruction costs on POWER6 processors. */
1056 static const
1057 struct processor_costs power6_cost = {
1058 COSTS_N_INSNS (8), /* mulsi */
1059 COSTS_N_INSNS (8), /* mulsi_const */
1060 COSTS_N_INSNS (8), /* mulsi_const9 */
1061 COSTS_N_INSNS (8), /* muldi */
1062 COSTS_N_INSNS (22), /* divsi */
1063 COSTS_N_INSNS (28), /* divdi */
1064 COSTS_N_INSNS (3), /* fp */
1065 COSTS_N_INSNS (3), /* dmul */
1066 COSTS_N_INSNS (13), /* sdiv */
1067 COSTS_N_INSNS (16), /* ddiv */
1068 128, /* cache line size */
1069 64, /* l1 cache */
1070 2048, /* l2 cache */
1071 16, /* prefetch streams */
1072 0, /* SF->DF convert */
1075 /* Instruction costs on POWER7 processors. */
1076 static const
1077 struct processor_costs power7_cost = {
1078 COSTS_N_INSNS (2), /* mulsi */
1079 COSTS_N_INSNS (2), /* mulsi_const */
1080 COSTS_N_INSNS (2), /* mulsi_const9 */
1081 COSTS_N_INSNS (2), /* muldi */
1082 COSTS_N_INSNS (18), /* divsi */
1083 COSTS_N_INSNS (34), /* divdi */
1084 COSTS_N_INSNS (3), /* fp */
1085 COSTS_N_INSNS (3), /* dmul */
1086 COSTS_N_INSNS (13), /* sdiv */
1087 COSTS_N_INSNS (16), /* ddiv */
1088 128, /* cache line size */
1089 32, /* l1 cache */
1090 256, /* l2 cache */
1091 12, /* prefetch streams */
1092 COSTS_N_INSNS (3), /* SF->DF convert */
1095 /* Instruction costs on POWER8 processors. */
1096 static const
1097 struct processor_costs power8_cost = {
1098 COSTS_N_INSNS (3), /* mulsi */
1099 COSTS_N_INSNS (3), /* mulsi_const */
1100 COSTS_N_INSNS (3), /* mulsi_const9 */
1101 COSTS_N_INSNS (3), /* muldi */
1102 COSTS_N_INSNS (19), /* divsi */
1103 COSTS_N_INSNS (35), /* divdi */
1104 COSTS_N_INSNS (3), /* fp */
1105 COSTS_N_INSNS (3), /* dmul */
1106 COSTS_N_INSNS (14), /* sdiv */
1107 COSTS_N_INSNS (17), /* ddiv */
1108 128, /* cache line size */
1109 32, /* l1 cache */
1110 256, /* l2 cache */
1111 12, /* prefetch streams */
1112 COSTS_N_INSNS (3), /* SF->DF convert */
1115 /* Instruction costs on POWER9 processors. */
1116 static const
1117 struct processor_costs power9_cost = {
1118 COSTS_N_INSNS (3), /* mulsi */
1119 COSTS_N_INSNS (3), /* mulsi_const */
1120 COSTS_N_INSNS (3), /* mulsi_const9 */
1121 COSTS_N_INSNS (3), /* muldi */
1122 COSTS_N_INSNS (8), /* divsi */
1123 COSTS_N_INSNS (12), /* divdi */
1124 COSTS_N_INSNS (3), /* fp */
1125 COSTS_N_INSNS (3), /* dmul */
1126 COSTS_N_INSNS (13), /* sdiv */
1127 COSTS_N_INSNS (18), /* ddiv */
1128 128, /* cache line size */
1129 32, /* l1 cache */
1130 512, /* l2 cache */
1131 8, /* prefetch streams */
1132 COSTS_N_INSNS (3), /* SF->DF convert */
1135 /* Instruction costs on POWER A2 processors. */
1136 static const
1137 struct processor_costs ppca2_cost = {
1138 COSTS_N_INSNS (16), /* mulsi */
1139 COSTS_N_INSNS (16), /* mulsi_const */
1140 COSTS_N_INSNS (16), /* mulsi_const9 */
1141 COSTS_N_INSNS (16), /* muldi */
1142 COSTS_N_INSNS (22), /* divsi */
1143 COSTS_N_INSNS (28), /* divdi */
1144 COSTS_N_INSNS (3), /* fp */
1145 COSTS_N_INSNS (3), /* dmul */
1146 COSTS_N_INSNS (59), /* sdiv */
1147 COSTS_N_INSNS (72), /* ddiv */
1149 16, /* l1 cache */
1150 2048, /* l2 cache */
1151 16, /* prefetch streams */
1152 0, /* SF->DF convert */
1156 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1157 #undef RS6000_BUILTIN_0
1158 #undef RS6000_BUILTIN_1
1159 #undef RS6000_BUILTIN_2
1160 #undef RS6000_BUILTIN_3
1161 #undef RS6000_BUILTIN_A
1162 #undef RS6000_BUILTIN_D
1163 #undef RS6000_BUILTIN_E
1164 #undef RS6000_BUILTIN_H
1165 #undef RS6000_BUILTIN_P
1166 #undef RS6000_BUILTIN_Q
1167 #undef RS6000_BUILTIN_S
1168 #undef RS6000_BUILTIN_X
1170 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
1171 { NAME, ICODE, MASK, ATTR },
1173 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1174 { NAME, ICODE, MASK, ATTR },
1176 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1177 { NAME, ICODE, MASK, ATTR },
1179 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1180 { NAME, ICODE, MASK, ATTR },
1182 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1183 { NAME, ICODE, MASK, ATTR },
1185 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1186 { NAME, ICODE, MASK, ATTR },
1188 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
1189 { NAME, ICODE, MASK, ATTR },
1191 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1192 { NAME, ICODE, MASK, ATTR },
1194 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1195 { NAME, ICODE, MASK, ATTR },
1197 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
1198 { NAME, ICODE, MASK, ATTR },
1200 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
1201 { NAME, ICODE, MASK, ATTR },
1203 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1204 { NAME, ICODE, MASK, ATTR },
1206 struct rs6000_builtin_info_type {
1207 const char *name;
1208 const enum insn_code icode;
1209 const HOST_WIDE_INT mask;
1210 const unsigned attr;
1213 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1215 #include "rs6000-builtin.def"
1218 #undef RS6000_BUILTIN_0
1219 #undef RS6000_BUILTIN_1
1220 #undef RS6000_BUILTIN_2
1221 #undef RS6000_BUILTIN_3
1222 #undef RS6000_BUILTIN_A
1223 #undef RS6000_BUILTIN_D
1224 #undef RS6000_BUILTIN_E
1225 #undef RS6000_BUILTIN_H
1226 #undef RS6000_BUILTIN_P
1227 #undef RS6000_BUILTIN_Q
1228 #undef RS6000_BUILTIN_S
1229 #undef RS6000_BUILTIN_X
1231 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1232 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1235 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1236 static bool spe_func_has_64bit_regs_p (void);
1237 static struct machine_function * rs6000_init_machine_status (void);
1238 static int rs6000_ra_ever_killed (void);
1239 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1240 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1241 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1242 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1243 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1244 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1245 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1246 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1247 bool);
1248 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1249 unsigned int);
1250 static bool is_microcoded_insn (rtx_insn *);
1251 static bool is_nonpipeline_insn (rtx_insn *);
1252 static bool is_cracked_insn (rtx_insn *);
1253 static bool is_load_insn (rtx, rtx *);
1254 static bool is_store_insn (rtx, rtx *);
1255 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1256 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1257 static bool insn_must_be_first_in_group (rtx_insn *);
1258 static bool insn_must_be_last_in_group (rtx_insn *);
1259 static void altivec_init_builtins (void);
1260 static tree builtin_function_type (machine_mode, machine_mode,
1261 machine_mode, machine_mode,
1262 enum rs6000_builtins, const char *name);
1263 static void rs6000_common_init_builtins (void);
1264 static void paired_init_builtins (void);
1265 static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx);
1266 static void spe_init_builtins (void);
1267 static void htm_init_builtins (void);
1268 static rtx spe_expand_predicate_builtin (enum insn_code, tree, rtx);
1269 static rtx spe_expand_evsel_builtin (enum insn_code, tree, rtx);
1270 static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
1271 static rs6000_stack_t *rs6000_stack_info (void);
1272 static void is_altivec_return_reg (rtx, void *);
1273 int easy_vector_constant (rtx, machine_mode);
1274 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1275 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1276 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1277 bool, bool);
1278 #if TARGET_MACHO
1279 static void macho_branch_islands (void);
1280 #endif
1281 static rtx rs6000_legitimize_reload_address (rtx, machine_mode, int, int,
1282 int, int *);
1283 static rtx rs6000_debug_legitimize_reload_address (rtx, machine_mode, int,
1284 int, int, int *);
1285 static bool rs6000_mode_dependent_address (const_rtx);
1286 static bool rs6000_debug_mode_dependent_address (const_rtx);
1287 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1288 machine_mode, rtx);
1289 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1290 machine_mode,
1291 rtx);
1292 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1293 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1294 enum reg_class);
1295 static bool rs6000_secondary_memory_needed (enum reg_class, enum reg_class,
1296 machine_mode);
1297 static bool rs6000_debug_secondary_memory_needed (enum reg_class,
1298 enum reg_class,
1299 machine_mode);
1300 static bool rs6000_cannot_change_mode_class (machine_mode,
1301 machine_mode,
1302 enum reg_class);
1303 static bool rs6000_debug_cannot_change_mode_class (machine_mode,
1304 machine_mode,
1305 enum reg_class);
1306 static bool rs6000_save_toc_in_prologue_p (void);
1307 static rtx rs6000_internal_arg_pointer (void);
1309 rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int,
1310 int, int *)
1311 = rs6000_legitimize_reload_address;
1313 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1314 = rs6000_mode_dependent_address;
1316 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1317 machine_mode, rtx)
1318 = rs6000_secondary_reload_class;
1320 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1321 = rs6000_preferred_reload_class;
1323 bool (*rs6000_secondary_memory_needed_ptr) (enum reg_class, enum reg_class,
1324 machine_mode)
1325 = rs6000_secondary_memory_needed;
1327 bool (*rs6000_cannot_change_mode_class_ptr) (machine_mode,
1328 machine_mode,
1329 enum reg_class)
1330 = rs6000_cannot_change_mode_class;
1332 const int INSN_NOT_AVAILABLE = -1;
1334 static void rs6000_print_isa_options (FILE *, int, const char *,
1335 HOST_WIDE_INT);
1336 static void rs6000_print_builtin_options (FILE *, int, const char *,
1337 HOST_WIDE_INT);
1339 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1340 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1341 enum rs6000_reg_type,
1342 machine_mode,
1343 secondary_reload_info *,
1344 bool);
1345 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1346 static bool rs6000_keep_leaf_when_profiled () __attribute__ ((unused));
1347 static tree rs6000_fold_builtin (tree, int, tree *, bool);
1349 /* Hash table stuff for keeping track of TOC entries. */
1351 struct GTY((for_user)) toc_hash_struct
1353 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1354 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1355 rtx key;
1356 machine_mode key_mode;
1357 int labelno;
1360 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1362 static hashval_t hash (toc_hash_struct *);
1363 static bool equal (toc_hash_struct *, toc_hash_struct *);
1366 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1368 /* Hash table to keep track of the argument types for builtin functions. */
1370 struct GTY((for_user)) builtin_hash_struct
1372 tree type;
1373 machine_mode mode[4]; /* return value + 3 arguments. */
1374 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1377 struct builtin_hasher : ggc_ptr_hash<builtin_hash_struct>
1379 static hashval_t hash (builtin_hash_struct *);
1380 static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1383 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1386 /* Default register names. */
1387 char rs6000_reg_names[][8] =
1389 "0", "1", "2", "3", "4", "5", "6", "7",
1390 "8", "9", "10", "11", "12", "13", "14", "15",
1391 "16", "17", "18", "19", "20", "21", "22", "23",
1392 "24", "25", "26", "27", "28", "29", "30", "31",
1393 "0", "1", "2", "3", "4", "5", "6", "7",
1394 "8", "9", "10", "11", "12", "13", "14", "15",
1395 "16", "17", "18", "19", "20", "21", "22", "23",
1396 "24", "25", "26", "27", "28", "29", "30", "31",
1397 "mq", "lr", "ctr","ap",
1398 "0", "1", "2", "3", "4", "5", "6", "7",
1399 "ca",
1400 /* AltiVec registers. */
1401 "0", "1", "2", "3", "4", "5", "6", "7",
1402 "8", "9", "10", "11", "12", "13", "14", "15",
1403 "16", "17", "18", "19", "20", "21", "22", "23",
1404 "24", "25", "26", "27", "28", "29", "30", "31",
1405 "vrsave", "vscr",
1406 /* SPE registers. */
1407 "spe_acc", "spefscr",
1408 /* Soft frame pointer. */
1409 "sfp",
1410 /* HTM SPR registers. */
1411 "tfhar", "tfiar", "texasr",
1412 /* SPE High registers. */
1413 "0", "1", "2", "3", "4", "5", "6", "7",
1414 "8", "9", "10", "11", "12", "13", "14", "15",
1415 "16", "17", "18", "19", "20", "21", "22", "23",
1416 "24", "25", "26", "27", "28", "29", "30", "31"
1419 #ifdef TARGET_REGNAMES
1420 static const char alt_reg_names[][8] =
1422 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1423 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1424 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1425 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1426 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1427 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1428 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1429 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1430 "mq", "lr", "ctr", "ap",
1431 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1432 "ca",
1433 /* AltiVec registers. */
1434 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1435 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1436 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1437 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1438 "vrsave", "vscr",
1439 /* SPE registers. */
1440 "spe_acc", "spefscr",
1441 /* Soft frame pointer. */
1442 "sfp",
1443 /* HTM SPR registers. */
1444 "tfhar", "tfiar", "texasr",
1445 /* SPE High registers. */
1446 "%rh0", "%rh1", "%rh2", "%rh3", "%rh4", "%rh5", "%rh6", "%rh7",
1447 "%rh8", "%rh9", "%rh10", "%r11", "%rh12", "%rh13", "%rh14", "%rh15",
1448 "%rh16", "%rh17", "%rh18", "%rh19", "%rh20", "%rh21", "%rh22", "%rh23",
1449 "%rh24", "%rh25", "%rh26", "%rh27", "%rh28", "%rh29", "%rh30", "%rh31"
1451 #endif
1453 /* Table of valid machine attributes. */
1455 static const struct attribute_spec rs6000_attribute_table[] =
1457 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
1458 affects_type_identity } */
1459 { "altivec", 1, 1, false, true, false, rs6000_handle_altivec_attribute,
1460 false },
1461 { "longcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1462 false },
1463 { "shortcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1464 false },
1465 { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1466 false },
1467 { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1468 false },
1469 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1470 SUBTARGET_ATTRIBUTE_TABLE,
1471 #endif
1472 { NULL, 0, 0, false, false, false, NULL, false }
1475 #ifndef TARGET_PROFILE_KERNEL
1476 #define TARGET_PROFILE_KERNEL 0
1477 #endif
1479 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1480 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1482 /* Initialize the GCC target structure. */
1483 #undef TARGET_ATTRIBUTE_TABLE
1484 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1485 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1486 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1487 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1488 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1490 #undef TARGET_ASM_ALIGNED_DI_OP
1491 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1493 /* Default unaligned ops are only provided for ELF. Find the ops needed
1494 for non-ELF systems. */
1495 #ifndef OBJECT_FORMAT_ELF
1496 #if TARGET_XCOFF
1497 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1498 64-bit targets. */
1499 #undef TARGET_ASM_UNALIGNED_HI_OP
1500 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1501 #undef TARGET_ASM_UNALIGNED_SI_OP
1502 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1503 #undef TARGET_ASM_UNALIGNED_DI_OP
1504 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1505 #else
1506 /* For Darwin. */
1507 #undef TARGET_ASM_UNALIGNED_HI_OP
1508 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1509 #undef TARGET_ASM_UNALIGNED_SI_OP
1510 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1511 #undef TARGET_ASM_UNALIGNED_DI_OP
1512 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1513 #undef TARGET_ASM_ALIGNED_DI_OP
1514 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1515 #endif
1516 #endif
1518 /* This hook deals with fixups for relocatable code and DI-mode objects
1519 in 64-bit code. */
1520 #undef TARGET_ASM_INTEGER
1521 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1523 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1524 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1525 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1526 #endif
1528 #undef TARGET_SET_UP_BY_PROLOGUE
1529 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1531 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1532 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1533 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1534 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1535 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1536 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1537 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1538 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1539 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1540 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1541 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1542 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1544 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1545 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1547 #undef TARGET_INTERNAL_ARG_POINTER
1548 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1550 #undef TARGET_HAVE_TLS
1551 #define TARGET_HAVE_TLS HAVE_AS_TLS
1553 #undef TARGET_CANNOT_FORCE_CONST_MEM
1554 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1556 #undef TARGET_DELEGITIMIZE_ADDRESS
1557 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1559 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1560 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1562 #undef TARGET_LEGITIMATE_COMBINED_INSN
1563 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1565 #undef TARGET_ASM_FUNCTION_PROLOGUE
1566 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1567 #undef TARGET_ASM_FUNCTION_EPILOGUE
1568 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1570 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1571 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1573 #undef TARGET_LEGITIMIZE_ADDRESS
1574 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1576 #undef TARGET_SCHED_VARIABLE_ISSUE
1577 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1579 #undef TARGET_SCHED_ISSUE_RATE
1580 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1581 #undef TARGET_SCHED_ADJUST_COST
1582 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1583 #undef TARGET_SCHED_ADJUST_PRIORITY
1584 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1585 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1586 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1587 #undef TARGET_SCHED_INIT
1588 #define TARGET_SCHED_INIT rs6000_sched_init
1589 #undef TARGET_SCHED_FINISH
1590 #define TARGET_SCHED_FINISH rs6000_sched_finish
1591 #undef TARGET_SCHED_REORDER
1592 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1593 #undef TARGET_SCHED_REORDER2
1594 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1596 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1597 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1599 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1600 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1602 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1603 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1604 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1605 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1606 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1607 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1608 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1609 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1611 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1612 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1614 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1615 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1616 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1617 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1618 rs6000_builtin_support_vector_misalignment
1619 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1620 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1621 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1622 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1623 rs6000_builtin_vectorization_cost
1624 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1625 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1626 rs6000_preferred_simd_mode
1627 #undef TARGET_VECTORIZE_INIT_COST
1628 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1629 #undef TARGET_VECTORIZE_ADD_STMT_COST
1630 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1631 #undef TARGET_VECTORIZE_FINISH_COST
1632 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1633 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1634 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1636 #undef TARGET_INIT_BUILTINS
1637 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1638 #undef TARGET_BUILTIN_DECL
1639 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1641 #undef TARGET_FOLD_BUILTIN
1642 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1643 #undef TARGET_GIMPLE_FOLD_BUILTIN
1644 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1646 #undef TARGET_EXPAND_BUILTIN
1647 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1649 #undef TARGET_MANGLE_TYPE
1650 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1652 #undef TARGET_INIT_LIBFUNCS
1653 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1655 #if TARGET_MACHO
1656 #undef TARGET_BINDS_LOCAL_P
1657 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1658 #endif
1660 #undef TARGET_MS_BITFIELD_LAYOUT_P
1661 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1663 #undef TARGET_ASM_OUTPUT_MI_THUNK
1664 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1666 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1667 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1669 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1670 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1672 #undef TARGET_REGISTER_MOVE_COST
1673 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1674 #undef TARGET_MEMORY_MOVE_COST
1675 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1676 #undef TARGET_CANNOT_COPY_INSN_P
1677 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1678 #undef TARGET_RTX_COSTS
1679 #define TARGET_RTX_COSTS rs6000_rtx_costs
1680 #undef TARGET_ADDRESS_COST
1681 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1683 #undef TARGET_DWARF_REGISTER_SPAN
1684 #define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span
1686 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1687 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1689 #undef TARGET_MEMBER_TYPE_FORCES_BLK
1690 #define TARGET_MEMBER_TYPE_FORCES_BLK rs6000_member_type_forces_blk
1692 #undef TARGET_PROMOTE_FUNCTION_MODE
1693 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1695 #undef TARGET_RETURN_IN_MEMORY
1696 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1698 #undef TARGET_RETURN_IN_MSB
1699 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1701 #undef TARGET_SETUP_INCOMING_VARARGS
1702 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1704 /* Always strict argument naming on rs6000. */
1705 #undef TARGET_STRICT_ARGUMENT_NAMING
1706 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1707 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1708 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1709 #undef TARGET_SPLIT_COMPLEX_ARG
1710 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1711 #undef TARGET_MUST_PASS_IN_STACK
1712 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1713 #undef TARGET_PASS_BY_REFERENCE
1714 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1715 #undef TARGET_ARG_PARTIAL_BYTES
1716 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1717 #undef TARGET_FUNCTION_ARG_ADVANCE
1718 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1719 #undef TARGET_FUNCTION_ARG
1720 #define TARGET_FUNCTION_ARG rs6000_function_arg
1721 #undef TARGET_FUNCTION_ARG_BOUNDARY
1722 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1724 #undef TARGET_BUILD_BUILTIN_VA_LIST
1725 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1727 #undef TARGET_EXPAND_BUILTIN_VA_START
1728 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1730 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1731 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1733 #undef TARGET_EH_RETURN_FILTER_MODE
1734 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1736 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1737 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1739 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1740 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1742 #undef TARGET_FLOATN_MODE
1743 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1745 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1746 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1748 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
1749 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
1751 #undef TARGET_MD_ASM_ADJUST
1752 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1754 #undef TARGET_OPTION_OVERRIDE
1755 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1757 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1758 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1759 rs6000_builtin_vectorized_function
1761 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1762 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1763 rs6000_builtin_md_vectorized_function
1765 #undef TARGET_STACK_PROTECT_GUARD
1766 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1768 #if !TARGET_MACHO
1769 #undef TARGET_STACK_PROTECT_FAIL
1770 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1771 #endif
1773 #ifdef HAVE_AS_TLS
1774 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1775 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1776 #endif
1778 /* Use a 32-bit anchor range. This leads to sequences like:
1780 addis tmp,anchor,high
1781 add dest,tmp,low
1783 where tmp itself acts as an anchor, and can be shared between
1784 accesses to the same 64k page. */
1785 #undef TARGET_MIN_ANCHOR_OFFSET
1786 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1787 #undef TARGET_MAX_ANCHOR_OFFSET
1788 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1789 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1790 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1791 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1792 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1794 #undef TARGET_BUILTIN_RECIPROCAL
1795 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1797 #undef TARGET_EXPAND_TO_RTL_HOOK
1798 #define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot
1800 #undef TARGET_INSTANTIATE_DECLS
1801 #define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
1803 #undef TARGET_SECONDARY_RELOAD
1804 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1806 #undef TARGET_LEGITIMATE_ADDRESS_P
1807 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1809 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1810 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1812 #undef TARGET_LRA_P
1813 #define TARGET_LRA_P rs6000_lra_p
1815 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1816 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1818 #undef TARGET_CAN_ELIMINATE
1819 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1821 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1822 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1824 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1825 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1827 #undef TARGET_TRAMPOLINE_INIT
1828 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1830 #undef TARGET_FUNCTION_VALUE
1831 #define TARGET_FUNCTION_VALUE rs6000_function_value
1833 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1834 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1836 #undef TARGET_OPTION_SAVE
1837 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1839 #undef TARGET_OPTION_RESTORE
1840 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1842 #undef TARGET_OPTION_PRINT
1843 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1845 #undef TARGET_CAN_INLINE_P
1846 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1848 #undef TARGET_SET_CURRENT_FUNCTION
1849 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1851 #undef TARGET_LEGITIMATE_CONSTANT_P
1852 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1854 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
1855 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok
1857 #undef TARGET_CAN_USE_DOLOOP_P
1858 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1860 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1861 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1863 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1864 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1865 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1866 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1867 #undef TARGET_UNWIND_WORD_MODE
1868 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1870 #undef TARGET_OFFLOAD_OPTIONS
1871 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1873 #undef TARGET_C_MODE_FOR_SUFFIX
1874 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1876 #undef TARGET_INVALID_BINARY_OP
1877 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1879 #undef TARGET_OPTAB_SUPPORTED_P
1880 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1882 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1883 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1886 /* Processor table. */
1887 struct rs6000_ptt
1889 const char *const name; /* Canonical processor name. */
1890 const enum processor_type processor; /* Processor type enum value. */
1891 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1894 static struct rs6000_ptt const processor_target_table[] =
1896 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1897 #include "rs6000-cpus.def"
1898 #undef RS6000_CPU
1901 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1902 name is invalid. */
1904 static int
1905 rs6000_cpu_name_lookup (const char *name)
1907 size_t i;
1909 if (name != NULL)
1911 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1912 if (! strcmp (name, processor_target_table[i].name))
1913 return (int)i;
1916 return -1;
1920 /* Return number of consecutive hard regs needed starting at reg REGNO
1921 to hold something of mode MODE.
1922 This is ordinarily the length in words of a value of mode MODE
1923 but can be less for certain modes in special long registers.
1925 For the SPE, GPRs are 64 bits but only 32 bits are visible in
1926 scalar instructions. The upper 32 bits are only available to the
1927 SIMD instructions.
1929 POWER and PowerPC GPRs hold 32 bits worth;
1930 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1932 static int
1933 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1935 unsigned HOST_WIDE_INT reg_size;
1937 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1938 128-bit floating point that can go in vector registers, which has VSX
1939 memory addressing. */
1940 if (FP_REGNO_P (regno))
1941 reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
1942 ? UNITS_PER_VSX_WORD
1943 : UNITS_PER_FP_WORD);
1945 else if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1946 reg_size = UNITS_PER_SPE_WORD;
1948 else if (ALTIVEC_REGNO_P (regno))
1949 reg_size = UNITS_PER_ALTIVEC_WORD;
1951 /* The value returned for SCmode in the E500 double case is 2 for
1952 ABI compatibility; storing an SCmode value in a single register
1953 would require function_arg and rs6000_spe_function_arg to handle
1954 SCmode so as to pass the value correctly in a pair of
1955 registers. */
1956 else if (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) && mode != SCmode
1957 && !DECIMAL_FLOAT_MODE_P (mode) && SPE_SIMD_REGNO_P (regno))
1958 reg_size = UNITS_PER_FP_WORD;
1960 else
1961 reg_size = UNITS_PER_WORD;
1963 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1966 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1967 MODE. */
1968 static int
1969 rs6000_hard_regno_mode_ok (int regno, machine_mode mode)
1971 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1973 if (COMPLEX_MODE_P (mode))
1974 mode = GET_MODE_INNER (mode);
1976 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1977 register combinations, and use PTImode where we need to deal with quad
1978 word memory operations. Don't allow quad words in the argument or frame
1979 pointer registers, just registers 0..31. */
1980 if (mode == PTImode)
1981 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1982 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1983 && ((regno & 1) == 0));
1985 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1986 implementations. Don't allow an item to be split between a FP register
1987 and an Altivec register. Allow TImode in all VSX registers if the user
1988 asked for it. */
1989 if (TARGET_VSX && VSX_REGNO_P (regno)
1990 && (VECTOR_MEM_VSX_P (mode)
1991 || FLOAT128_VECTOR_P (mode)
1992 || reg_addr[mode].scalar_in_vmx_p
1993 || (TARGET_VSX_TIMODE && mode == TImode)
1994 || (TARGET_VADDUQM && mode == V1TImode)))
1996 if (FP_REGNO_P (regno))
1997 return FP_REGNO_P (last_regno);
1999 if (ALTIVEC_REGNO_P (regno))
2001 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
2002 return 0;
2004 return ALTIVEC_REGNO_P (last_regno);
2008 /* The GPRs can hold any mode, but values bigger than one register
2009 cannot go past R31. */
2010 if (INT_REGNO_P (regno))
2011 return INT_REGNO_P (last_regno);
2013 /* The float registers (except for VSX vector modes) can only hold floating
2014 modes and DImode. */
2015 if (FP_REGNO_P (regno))
2017 if (FLOAT128_VECTOR_P (mode))
2018 return false;
2020 if (SCALAR_FLOAT_MODE_P (mode)
2021 && (mode != TDmode || (regno % 2) == 0)
2022 && FP_REGNO_P (last_regno))
2023 return 1;
2025 if (GET_MODE_CLASS (mode) == MODE_INT)
2027 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
2028 return 1;
2030 if (TARGET_VSX_SMALL_INTEGER)
2032 if (mode == SImode)
2033 return 1;
2035 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
2036 return 1;
2040 if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT
2041 && PAIRED_VECTOR_MODE (mode))
2042 return 1;
2044 return 0;
2047 /* The CR register can only hold CC modes. */
2048 if (CR_REGNO_P (regno))
2049 return GET_MODE_CLASS (mode) == MODE_CC;
2051 if (CA_REGNO_P (regno))
2052 return mode == Pmode || mode == SImode;
2054 /* AltiVec only in AldyVec registers. */
2055 if (ALTIVEC_REGNO_P (regno))
2056 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
2057 || mode == V1TImode);
2059 /* ...but GPRs can hold SIMD data on the SPE in one register. */
2060 if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
2061 return 1;
2063 /* We cannot put non-VSX TImode or PTImode anywhere except general register
2064 and it must be able to fit within the register set. */
2066 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
2069 /* Print interesting facts about registers. */
2070 static void
2071 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2073 int r, m;
2075 for (r = first_regno; r <= last_regno; ++r)
2077 const char *comma = "";
2078 int len;
2080 if (first_regno == last_regno)
2081 fprintf (stderr, "%s:\t", reg_name);
2082 else
2083 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2085 len = 8;
2086 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2087 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2089 if (len > 70)
2091 fprintf (stderr, ",\n\t");
2092 len = 8;
2093 comma = "";
2096 if (rs6000_hard_regno_nregs[m][r] > 1)
2097 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2098 rs6000_hard_regno_nregs[m][r]);
2099 else
2100 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2102 comma = ", ";
2105 if (call_used_regs[r])
2107 if (len > 70)
2109 fprintf (stderr, ",\n\t");
2110 len = 8;
2111 comma = "";
2114 len += fprintf (stderr, "%s%s", comma, "call-used");
2115 comma = ", ";
2118 if (fixed_regs[r])
2120 if (len > 70)
2122 fprintf (stderr, ",\n\t");
2123 len = 8;
2124 comma = "";
2127 len += fprintf (stderr, "%s%s", comma, "fixed");
2128 comma = ", ";
2131 if (len > 70)
2133 fprintf (stderr, ",\n\t");
2134 comma = "";
2137 len += fprintf (stderr, "%sreg-class = %s", comma,
2138 reg_class_names[(int)rs6000_regno_regclass[r]]);
2139 comma = ", ";
2141 if (len > 70)
2143 fprintf (stderr, ",\n\t");
2144 comma = "";
2147 fprintf (stderr, "%sregno = %d\n", comma, r);
2151 static const char *
2152 rs6000_debug_vector_unit (enum rs6000_vector v)
2154 const char *ret;
2156 switch (v)
2158 case VECTOR_NONE: ret = "none"; break;
2159 case VECTOR_ALTIVEC: ret = "altivec"; break;
2160 case VECTOR_VSX: ret = "vsx"; break;
2161 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2162 case VECTOR_PAIRED: ret = "paired"; break;
2163 case VECTOR_SPE: ret = "spe"; break;
2164 case VECTOR_OTHER: ret = "other"; break;
2165 default: ret = "unknown"; break;
2168 return ret;
2171 /* Inner function printing just the address mask for a particular reload
2172 register class. */
2173 DEBUG_FUNCTION char *
2174 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2176 static char ret[8];
2177 char *p = ret;
2179 if ((mask & RELOAD_REG_VALID) != 0)
2180 *p++ = 'v';
2181 else if (keep_spaces)
2182 *p++ = ' ';
2184 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2185 *p++ = 'm';
2186 else if (keep_spaces)
2187 *p++ = ' ';
2189 if ((mask & RELOAD_REG_INDEXED) != 0)
2190 *p++ = 'i';
2191 else if (keep_spaces)
2192 *p++ = ' ';
2194 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2195 *p++ = 'O';
2196 else if ((mask & RELOAD_REG_OFFSET) != 0)
2197 *p++ = 'o';
2198 else if (keep_spaces)
2199 *p++ = ' ';
2201 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2202 *p++ = '+';
2203 else if (keep_spaces)
2204 *p++ = ' ';
2206 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2207 *p++ = '+';
2208 else if (keep_spaces)
2209 *p++ = ' ';
2211 if ((mask & RELOAD_REG_AND_M16) != 0)
2212 *p++ = '&';
2213 else if (keep_spaces)
2214 *p++ = ' ';
2216 *p = '\0';
2218 return ret;
2221 /* Print the address masks in a human readble fashion. */
2222 DEBUG_FUNCTION void
2223 rs6000_debug_print_mode (ssize_t m)
2225 ssize_t rc;
2226 int spaces = 0;
2227 bool fuse_extra_p;
2229 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2230 for (rc = 0; rc < N_RELOAD_REG; rc++)
2231 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2232 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2234 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2235 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2236 fprintf (stderr, " Reload=%c%c",
2237 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2238 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2239 else
2240 spaces += sizeof (" Reload=sl") - 1;
2242 if (reg_addr[m].scalar_in_vmx_p)
2244 fprintf (stderr, "%*s Upper=y", spaces, "");
2245 spaces = 0;
2247 else
2248 spaces += sizeof (" Upper=y") - 1;
2250 fuse_extra_p = ((reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2251 || reg_addr[m].fused_toc);
2252 if (!fuse_extra_p)
2254 for (rc = 0; rc < N_RELOAD_REG; rc++)
2256 if (rc != RELOAD_REG_ANY)
2258 if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2259 || reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2260 || reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing
2261 || reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing
2262 || reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2264 fuse_extra_p = true;
2265 break;
2271 if (fuse_extra_p)
2273 fprintf (stderr, "%*s Fuse:", spaces, "");
2274 spaces = 0;
2276 for (rc = 0; rc < N_RELOAD_REG; rc++)
2278 if (rc != RELOAD_REG_ANY)
2280 char load, store;
2282 if (reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing)
2283 load = 'l';
2284 else if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing)
2285 load = 'L';
2286 else
2287 load = '-';
2289 if (reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2290 store = 's';
2291 else if (reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing)
2292 store = 'S';
2293 else
2294 store = '-';
2296 if (load == '-' && store == '-')
2297 spaces += 5;
2298 else
2300 fprintf (stderr, "%*s%c=%c%c", (spaces + 1), "",
2301 reload_reg_map[rc].name[0], load, store);
2302 spaces = 0;
2307 if (reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2309 fprintf (stderr, "%*sP8gpr", (spaces + 1), "");
2310 spaces = 0;
2312 else
2313 spaces += sizeof (" P8gpr") - 1;
2315 if (reg_addr[m].fused_toc)
2317 fprintf (stderr, "%*sToc", (spaces + 1), "");
2318 spaces = 0;
2320 else
2321 spaces += sizeof (" Toc") - 1;
2323 else
2324 spaces += sizeof (" Fuse: G=ls F=ls v=ls P8gpr Toc") - 1;
2326 if (rs6000_vector_unit[m] != VECTOR_NONE
2327 || rs6000_vector_mem[m] != VECTOR_NONE)
2329 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2330 spaces, "",
2331 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2332 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2335 fputs ("\n", stderr);
2338 #define DEBUG_FMT_ID "%-32s= "
2339 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2340 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2341 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2343 /* Print various interesting information with -mdebug=reg. */
2344 static void
2345 rs6000_debug_reg_global (void)
2347 static const char *const tf[2] = { "false", "true" };
2348 const char *nl = (const char *)0;
2349 int m;
2350 size_t m1, m2, v;
2351 char costly_num[20];
2352 char nop_num[20];
2353 char flags_buffer[40];
2354 const char *costly_str;
2355 const char *nop_str;
2356 const char *trace_str;
2357 const char *abi_str;
2358 const char *cmodel_str;
2359 struct cl_target_option cl_opts;
2361 /* Modes we want tieable information on. */
2362 static const machine_mode print_tieable_modes[] = {
2363 QImode,
2364 HImode,
2365 SImode,
2366 DImode,
2367 TImode,
2368 PTImode,
2369 SFmode,
2370 DFmode,
2371 TFmode,
2372 IFmode,
2373 KFmode,
2374 SDmode,
2375 DDmode,
2376 TDmode,
2377 V8QImode,
2378 V4HImode,
2379 V2SImode,
2380 V16QImode,
2381 V8HImode,
2382 V4SImode,
2383 V2DImode,
2384 V1TImode,
2385 V32QImode,
2386 V16HImode,
2387 V8SImode,
2388 V4DImode,
2389 V2TImode,
2390 V2SFmode,
2391 V4SFmode,
2392 V2DFmode,
2393 V8SFmode,
2394 V4DFmode,
2395 CCmode,
2396 CCUNSmode,
2397 CCEQmode,
2400 /* Virtual regs we are interested in. */
2401 const static struct {
2402 int regno; /* register number. */
2403 const char *name; /* register name. */
2404 } virtual_regs[] = {
2405 { STACK_POINTER_REGNUM, "stack pointer:" },
2406 { TOC_REGNUM, "toc: " },
2407 { STATIC_CHAIN_REGNUM, "static chain: " },
2408 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2409 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2410 { ARG_POINTER_REGNUM, "arg pointer: " },
2411 { FRAME_POINTER_REGNUM, "frame pointer:" },
2412 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2413 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2414 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2415 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2416 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2417 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2418 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2419 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2420 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2423 fputs ("\nHard register information:\n", stderr);
2424 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2425 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2426 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2427 LAST_ALTIVEC_REGNO,
2428 "vs");
2429 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2430 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2431 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2432 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2433 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2434 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2435 rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a");
2436 rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f");
2438 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2439 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2440 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2442 fprintf (stderr,
2443 "\n"
2444 "d reg_class = %s\n"
2445 "f reg_class = %s\n"
2446 "v reg_class = %s\n"
2447 "wa reg_class = %s\n"
2448 "wb reg_class = %s\n"
2449 "wd reg_class = %s\n"
2450 "we reg_class = %s\n"
2451 "wf reg_class = %s\n"
2452 "wg reg_class = %s\n"
2453 "wh reg_class = %s\n"
2454 "wi reg_class = %s\n"
2455 "wj reg_class = %s\n"
2456 "wk reg_class = %s\n"
2457 "wl reg_class = %s\n"
2458 "wm reg_class = %s\n"
2459 "wo reg_class = %s\n"
2460 "wp reg_class = %s\n"
2461 "wq reg_class = %s\n"
2462 "wr reg_class = %s\n"
2463 "ws reg_class = %s\n"
2464 "wt reg_class = %s\n"
2465 "wu reg_class = %s\n"
2466 "wv reg_class = %s\n"
2467 "ww reg_class = %s\n"
2468 "wx reg_class = %s\n"
2469 "wy reg_class = %s\n"
2470 "wz reg_class = %s\n"
2471 "wA reg_class = %s\n"
2472 "wH reg_class = %s\n"
2473 "wI reg_class = %s\n"
2474 "wJ reg_class = %s\n"
2475 "wK reg_class = %s\n"
2476 "\n",
2477 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2478 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2479 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2480 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2481 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wb]],
2482 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
2483 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2484 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
2485 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
2486 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]],
2487 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]],
2488 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]],
2489 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]],
2490 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
2491 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
2492 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wo]],
2493 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wp]],
2494 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wq]],
2495 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2496 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
2497 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
2498 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
2499 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
2500 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
2501 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2502 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
2503 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]],
2504 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]],
2505 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wH]],
2506 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wI]],
2507 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wJ]],
2508 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wK]]);
2510 nl = "\n";
2511 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2512 rs6000_debug_print_mode (m);
2514 fputs ("\n", stderr);
2516 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2518 machine_mode mode1 = print_tieable_modes[m1];
2519 bool first_time = true;
2521 nl = (const char *)0;
2522 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2524 machine_mode mode2 = print_tieable_modes[m2];
2525 if (mode1 != mode2 && MODES_TIEABLE_P (mode1, mode2))
2527 if (first_time)
2529 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2530 nl = "\n";
2531 first_time = false;
2534 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2538 if (!first_time)
2539 fputs ("\n", stderr);
2542 if (nl)
2543 fputs (nl, stderr);
2545 if (rs6000_recip_control)
2547 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2549 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2550 if (rs6000_recip_bits[m])
2552 fprintf (stderr,
2553 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2554 GET_MODE_NAME (m),
2555 (RS6000_RECIP_AUTO_RE_P (m)
2556 ? "auto"
2557 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2558 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2559 ? "auto"
2560 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2563 fputs ("\n", stderr);
2566 if (rs6000_cpu_index >= 0)
2568 const char *name = processor_target_table[rs6000_cpu_index].name;
2569 HOST_WIDE_INT flags
2570 = processor_target_table[rs6000_cpu_index].target_enable;
2572 sprintf (flags_buffer, "-mcpu=%s flags", name);
2573 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2575 else
2576 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2578 if (rs6000_tune_index >= 0)
2580 const char *name = processor_target_table[rs6000_tune_index].name;
2581 HOST_WIDE_INT flags
2582 = processor_target_table[rs6000_tune_index].target_enable;
2584 sprintf (flags_buffer, "-mtune=%s flags", name);
2585 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2587 else
2588 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2590 cl_target_option_save (&cl_opts, &global_options);
2591 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2592 rs6000_isa_flags);
2594 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2595 rs6000_isa_flags_explicit);
2597 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2598 rs6000_builtin_mask);
2600 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2602 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2603 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2605 switch (rs6000_sched_costly_dep)
2607 case max_dep_latency:
2608 costly_str = "max_dep_latency";
2609 break;
2611 case no_dep_costly:
2612 costly_str = "no_dep_costly";
2613 break;
2615 case all_deps_costly:
2616 costly_str = "all_deps_costly";
2617 break;
2619 case true_store_to_load_dep_costly:
2620 costly_str = "true_store_to_load_dep_costly";
2621 break;
2623 case store_to_load_dep_costly:
2624 costly_str = "store_to_load_dep_costly";
2625 break;
2627 default:
2628 costly_str = costly_num;
2629 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2630 break;
2633 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2635 switch (rs6000_sched_insert_nops)
2637 case sched_finish_regroup_exact:
2638 nop_str = "sched_finish_regroup_exact";
2639 break;
2641 case sched_finish_pad_groups:
2642 nop_str = "sched_finish_pad_groups";
2643 break;
2645 case sched_finish_none:
2646 nop_str = "sched_finish_none";
2647 break;
2649 default:
2650 nop_str = nop_num;
2651 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2652 break;
2655 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2657 switch (rs6000_sdata)
2659 default:
2660 case SDATA_NONE:
2661 break;
2663 case SDATA_DATA:
2664 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2665 break;
2667 case SDATA_SYSV:
2668 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2669 break;
2671 case SDATA_EABI:
2672 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2673 break;
2677 switch (rs6000_traceback)
2679 case traceback_default: trace_str = "default"; break;
2680 case traceback_none: trace_str = "none"; break;
2681 case traceback_part: trace_str = "part"; break;
2682 case traceback_full: trace_str = "full"; break;
2683 default: trace_str = "unknown"; break;
2686 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2688 switch (rs6000_current_cmodel)
2690 case CMODEL_SMALL: cmodel_str = "small"; break;
2691 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2692 case CMODEL_LARGE: cmodel_str = "large"; break;
2693 default: cmodel_str = "unknown"; break;
2696 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2698 switch (rs6000_current_abi)
2700 case ABI_NONE: abi_str = "none"; break;
2701 case ABI_AIX: abi_str = "aix"; break;
2702 case ABI_ELFv2: abi_str = "ELFv2"; break;
2703 case ABI_V4: abi_str = "V4"; break;
2704 case ABI_DARWIN: abi_str = "darwin"; break;
2705 default: abi_str = "unknown"; break;
2708 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2710 if (rs6000_altivec_abi)
2711 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2713 if (rs6000_spe_abi)
2714 fprintf (stderr, DEBUG_FMT_S, "spe_abi", "true");
2716 if (rs6000_darwin64_abi)
2717 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2719 if (rs6000_float_gprs)
2720 fprintf (stderr, DEBUG_FMT_S, "float_gprs", "true");
2722 fprintf (stderr, DEBUG_FMT_S, "fprs",
2723 (TARGET_FPRS ? "true" : "false"));
2725 fprintf (stderr, DEBUG_FMT_S, "single_float",
2726 (TARGET_SINGLE_FLOAT ? "true" : "false"));
2728 fprintf (stderr, DEBUG_FMT_S, "double_float",
2729 (TARGET_DOUBLE_FLOAT ? "true" : "false"));
2731 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2732 (TARGET_SOFT_FLOAT ? "true" : "false"));
2734 fprintf (stderr, DEBUG_FMT_S, "e500_single",
2735 (TARGET_E500_SINGLE ? "true" : "false"));
2737 fprintf (stderr, DEBUG_FMT_S, "e500_double",
2738 (TARGET_E500_DOUBLE ? "true" : "false"));
2740 if (TARGET_LINK_STACK)
2741 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2743 fprintf (stderr, DEBUG_FMT_S, "lra", TARGET_LRA ? "true" : "false");
2745 if (TARGET_P8_FUSION)
2747 char options[80];
2749 strcpy (options, (TARGET_P9_FUSION) ? "power9" : "power8");
2750 if (TARGET_TOC_FUSION)
2751 strcat (options, ", toc");
2753 if (TARGET_P8_FUSION_SIGN)
2754 strcat (options, ", sign");
2756 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2759 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2760 TARGET_SECURE_PLT ? "secure" : "bss");
2761 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2762 aix_struct_return ? "aix" : "sysv");
2763 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2764 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2765 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2766 tf[!!rs6000_align_branch_targets]);
2767 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2768 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2769 rs6000_long_double_type_size);
2770 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2771 (int)rs6000_sched_restricted_insns_priority);
2772 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2773 (int)END_BUILTINS);
2774 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2775 (int)RS6000_BUILTIN_COUNT);
2777 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2778 (int)TARGET_FLOAT128_ENABLE_TYPE);
2780 if (TARGET_VSX)
2781 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2782 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2784 if (TARGET_DIRECT_MOVE_128)
2785 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2786 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2790 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2791 legitimate address support to figure out the appropriate addressing to
2792 use. */
2794 static void
2795 rs6000_setup_reg_addr_masks (void)
2797 ssize_t rc, reg, m, nregs;
2798 addr_mask_type any_addr_mask, addr_mask;
2800 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2802 machine_mode m2 = (machine_mode) m;
2803 bool complex_p = false;
2804 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2805 size_t msize;
2807 if (COMPLEX_MODE_P (m2))
2809 complex_p = true;
2810 m2 = GET_MODE_INNER (m2);
2813 msize = GET_MODE_SIZE (m2);
2815 /* SDmode is special in that we want to access it only via REG+REG
2816 addressing on power7 and above, since we want to use the LFIWZX and
2817 STFIWZX instructions to load it. */
2818 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2820 any_addr_mask = 0;
2821 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2823 addr_mask = 0;
2824 reg = reload_reg_map[rc].reg;
2826 /* Can mode values go in the GPR/FPR/Altivec registers? */
2827 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2829 bool small_int_vsx_p = (small_int_p
2830 && (rc == RELOAD_REG_FPR
2831 || rc == RELOAD_REG_VMX));
2833 nregs = rs6000_hard_regno_nregs[m][reg];
2834 addr_mask |= RELOAD_REG_VALID;
2836 /* Indicate if the mode takes more than 1 physical register. If
2837 it takes a single register, indicate it can do REG+REG
2838 addressing. Small integers in VSX registers can only do
2839 REG+REG addressing. */
2840 if (small_int_vsx_p)
2841 addr_mask |= RELOAD_REG_INDEXED;
2842 else if (nregs > 1 || m == BLKmode || complex_p)
2843 addr_mask |= RELOAD_REG_MULTIPLE;
2844 else
2845 addr_mask |= RELOAD_REG_INDEXED;
2847 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2848 addressing. Restrict addressing on SPE for 64-bit types
2849 because of the SUBREG hackery used to address 64-bit floats in
2850 '32-bit' GPRs. If we allow scalars into Altivec registers,
2851 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY. */
2853 if (TARGET_UPDATE
2854 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2855 && msize <= 8
2856 && !VECTOR_MODE_P (m2)
2857 && !FLOAT128_VECTOR_P (m2)
2858 && !complex_p
2859 && !small_int_vsx_p
2860 && (m2 != DFmode || !TARGET_UPPER_REGS_DF)
2861 && (m2 != SFmode || !TARGET_UPPER_REGS_SF)
2862 && !(TARGET_E500_DOUBLE && msize == 8))
2864 addr_mask |= RELOAD_REG_PRE_INCDEC;
2866 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2867 we don't allow PRE_MODIFY for some multi-register
2868 operations. */
2869 switch (m)
2871 default:
2872 addr_mask |= RELOAD_REG_PRE_MODIFY;
2873 break;
2875 case DImode:
2876 if (TARGET_POWERPC64)
2877 addr_mask |= RELOAD_REG_PRE_MODIFY;
2878 break;
2880 case DFmode:
2881 case DDmode:
2882 if (TARGET_DF_INSN)
2883 addr_mask |= RELOAD_REG_PRE_MODIFY;
2884 break;
2889 /* GPR and FPR registers can do REG+OFFSET addressing, except
2890 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2891 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2892 if ((addr_mask != 0) && !indexed_only_p
2893 && msize <= 8
2894 && (rc == RELOAD_REG_GPR
2895 || ((msize == 8 || m2 == SFmode)
2896 && (rc == RELOAD_REG_FPR
2897 || (rc == RELOAD_REG_VMX
2898 && TARGET_P9_DFORM_SCALAR)))))
2899 addr_mask |= RELOAD_REG_OFFSET;
2901 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2902 instructions are enabled. The offset for 128-bit VSX registers is
2903 only 12-bits. While GPRs can handle the full offset range, VSX
2904 registers can only handle the restricted range. */
2905 else if ((addr_mask != 0) && !indexed_only_p
2906 && msize == 16 && TARGET_P9_DFORM_VECTOR
2907 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2908 || (m2 == TImode && TARGET_VSX_TIMODE)))
2910 addr_mask |= RELOAD_REG_OFFSET;
2911 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2912 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2915 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2916 addressing on 128-bit types. */
2917 if (rc == RELOAD_REG_VMX && msize == 16
2918 && (addr_mask & RELOAD_REG_VALID) != 0)
2919 addr_mask |= RELOAD_REG_AND_M16;
2921 reg_addr[m].addr_mask[rc] = addr_mask;
2922 any_addr_mask |= addr_mask;
2925 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2930 /* Initialize the various global tables that are based on register size. */
2931 static void
2932 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2934 ssize_t r, m, c;
2935 int align64;
2936 int align32;
2938 /* Precalculate REGNO_REG_CLASS. */
2939 rs6000_regno_regclass[0] = GENERAL_REGS;
2940 for (r = 1; r < 32; ++r)
2941 rs6000_regno_regclass[r] = BASE_REGS;
2943 for (r = 32; r < 64; ++r)
2944 rs6000_regno_regclass[r] = FLOAT_REGS;
2946 for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r)
2947 rs6000_regno_regclass[r] = NO_REGS;
2949 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2950 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2952 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2953 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2954 rs6000_regno_regclass[r] = CR_REGS;
2956 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2957 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2958 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2959 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2960 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2961 rs6000_regno_regclass[SPE_ACC_REGNO] = SPE_ACC_REGS;
2962 rs6000_regno_regclass[SPEFSCR_REGNO] = SPEFSCR_REGS;
2963 rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
2964 rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
2965 rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
2966 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2967 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2969 /* Precalculate register class to simpler reload register class. We don't
2970 need all of the register classes that are combinations of different
2971 classes, just the simple ones that have constraint letters. */
2972 for (c = 0; c < N_REG_CLASSES; c++)
2973 reg_class_to_reg_type[c] = NO_REG_TYPE;
2975 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2976 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2977 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2978 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2979 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2980 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2981 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2982 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2983 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2984 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2985 reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
2986 reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
2988 if (TARGET_VSX)
2990 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2991 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2993 else
2995 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2996 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2999 /* Precalculate the valid memory formats as well as the vector information,
3000 this must be set up before the rs6000_hard_regno_nregs_internal calls
3001 below. */
3002 gcc_assert ((int)VECTOR_NONE == 0);
3003 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
3004 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
3006 gcc_assert ((int)CODE_FOR_nothing == 0);
3007 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
3009 gcc_assert ((int)NO_REGS == 0);
3010 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
3012 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
3013 believes it can use native alignment or still uses 128-bit alignment. */
3014 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
3016 align64 = 64;
3017 align32 = 32;
3019 else
3021 align64 = 128;
3022 align32 = 128;
3025 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
3026 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
3027 if (TARGET_FLOAT128_TYPE)
3029 rs6000_vector_mem[KFmode] = VECTOR_VSX;
3030 rs6000_vector_align[KFmode] = 128;
3032 if (FLOAT128_IEEE_P (TFmode))
3034 rs6000_vector_mem[TFmode] = VECTOR_VSX;
3035 rs6000_vector_align[TFmode] = 128;
3039 /* V2DF mode, VSX only. */
3040 if (TARGET_VSX)
3042 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
3043 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
3044 rs6000_vector_align[V2DFmode] = align64;
3047 /* V4SF mode, either VSX or Altivec. */
3048 if (TARGET_VSX)
3050 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
3051 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
3052 rs6000_vector_align[V4SFmode] = align32;
3054 else if (TARGET_ALTIVEC)
3056 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
3057 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
3058 rs6000_vector_align[V4SFmode] = align32;
3061 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
3062 and stores. */
3063 if (TARGET_ALTIVEC)
3065 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
3066 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
3067 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
3068 rs6000_vector_align[V4SImode] = align32;
3069 rs6000_vector_align[V8HImode] = align32;
3070 rs6000_vector_align[V16QImode] = align32;
3072 if (TARGET_VSX)
3074 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
3075 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
3076 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
3078 else
3080 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
3081 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
3082 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
3086 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
3087 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
3088 if (TARGET_VSX)
3090 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
3091 rs6000_vector_unit[V2DImode]
3092 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3093 rs6000_vector_align[V2DImode] = align64;
3095 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
3096 rs6000_vector_unit[V1TImode]
3097 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3098 rs6000_vector_align[V1TImode] = 128;
3101 /* DFmode, see if we want to use the VSX unit. Memory is handled
3102 differently, so don't set rs6000_vector_mem. */
3103 if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE)
3105 rs6000_vector_unit[DFmode] = VECTOR_VSX;
3106 rs6000_vector_align[DFmode] = 64;
3109 /* SFmode, see if we want to use the VSX unit. */
3110 if (TARGET_P8_VECTOR && TARGET_VSX_SCALAR_FLOAT)
3112 rs6000_vector_unit[SFmode] = VECTOR_VSX;
3113 rs6000_vector_align[SFmode] = 32;
3116 /* Allow TImode in VSX register and set the VSX memory macros. */
3117 if (TARGET_VSX && TARGET_VSX_TIMODE)
3119 rs6000_vector_mem[TImode] = VECTOR_VSX;
3120 rs6000_vector_align[TImode] = align64;
3123 /* TODO add SPE and paired floating point vector support. */
3125 /* Register class constraints for the constraints that depend on compile
3126 switches. When the VSX code was added, different constraints were added
3127 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
3128 of the VSX registers are used. The register classes for scalar floating
3129 point types is set, based on whether we allow that type into the upper
3130 (Altivec) registers. GCC has register classes to target the Altivec
3131 registers for load/store operations, to select using a VSX memory
3132 operation instead of the traditional floating point operation. The
3133 constraints are:
3135 d - Register class to use with traditional DFmode instructions.
3136 f - Register class to use with traditional SFmode instructions.
3137 v - Altivec register.
3138 wa - Any VSX register.
3139 wc - Reserved to represent individual CR bits (used in LLVM).
3140 wd - Preferred register class for V2DFmode.
3141 wf - Preferred register class for V4SFmode.
3142 wg - Float register for power6x move insns.
3143 wh - FP register for direct move instructions.
3144 wi - FP or VSX register to hold 64-bit integers for VSX insns.
3145 wj - FP or VSX register to hold 64-bit integers for direct moves.
3146 wk - FP or VSX register to hold 64-bit doubles for direct moves.
3147 wl - Float register if we can do 32-bit signed int loads.
3148 wm - VSX register for ISA 2.07 direct move operations.
3149 wn - always NO_REGS.
3150 wr - GPR if 64-bit mode is permitted.
3151 ws - Register class to do ISA 2.06 DF operations.
3152 wt - VSX register for TImode in VSX registers.
3153 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
3154 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
3155 ww - Register class to do SF conversions in with VSX operations.
3156 wx - Float register if we can do 32-bit int stores.
3157 wy - Register class to do ISA 2.07 SF operations.
3158 wz - Float register if we can do 32-bit unsigned int loads.
3159 wH - Altivec register if SImode is allowed in VSX registers.
3160 wI - VSX register if SImode is allowed in VSX registers.
3161 wJ - VSX register if QImode/HImode are allowed in VSX registers.
3162 wK - Altivec register if QImode/HImode are allowed in VSX registers. */
3164 if (TARGET_HARD_FLOAT && TARGET_FPRS)
3165 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
3167 if (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
3168 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
3170 if (TARGET_VSX)
3172 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
3173 rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */
3174 rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */
3176 if (TARGET_VSX_TIMODE)
3177 rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */
3179 if (TARGET_UPPER_REGS_DF) /* DFmode */
3181 rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS;
3182 rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
3184 else
3185 rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS;
3187 if (TARGET_UPPER_REGS_DI) /* DImode */
3188 rs6000_constraints[RS6000_CONSTRAINT_wi] = VSX_REGS;
3189 else
3190 rs6000_constraints[RS6000_CONSTRAINT_wi] = FLOAT_REGS;
3193 /* Add conditional constraints based on various options, to allow us to
3194 collapse multiple insn patterns. */
3195 if (TARGET_ALTIVEC)
3196 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
3198 if (TARGET_MFPGPR) /* DFmode */
3199 rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
3201 if (TARGET_LFIWAX)
3202 rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */
3204 if (TARGET_DIRECT_MOVE)
3206 rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS;
3207 rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */
3208 = rs6000_constraints[RS6000_CONSTRAINT_wi];
3209 rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */
3210 = rs6000_constraints[RS6000_CONSTRAINT_ws];
3211 rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
3214 if (TARGET_POWERPC64)
3216 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
3217 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
3220 if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF) /* SFmode */
3222 rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
3223 rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
3224 rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
3226 else if (TARGET_P8_VECTOR)
3228 rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS;
3229 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3231 else if (TARGET_VSX)
3232 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3234 if (TARGET_STFIWX)
3235 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
3237 if (TARGET_LFIWZX)
3238 rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */
3240 if (TARGET_FLOAT128_TYPE)
3242 rs6000_constraints[RS6000_CONSTRAINT_wq] = VSX_REGS; /* KFmode */
3243 if (FLOAT128_IEEE_P (TFmode))
3244 rs6000_constraints[RS6000_CONSTRAINT_wp] = VSX_REGS; /* TFmode */
3247 /* Support for new D-form instructions. */
3248 if (TARGET_P9_DFORM_SCALAR)
3249 rs6000_constraints[RS6000_CONSTRAINT_wb] = ALTIVEC_REGS;
3251 /* Support for ISA 3.0 (power9) vectors. */
3252 if (TARGET_P9_VECTOR)
3253 rs6000_constraints[RS6000_CONSTRAINT_wo] = VSX_REGS;
3255 /* Support for new direct moves (ISA 3.0 + 64bit). */
3256 if (TARGET_DIRECT_MOVE_128)
3257 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
3259 /* Support small integers in VSX registers. */
3260 if (TARGET_VSX_SMALL_INTEGER)
3262 rs6000_constraints[RS6000_CONSTRAINT_wH] = ALTIVEC_REGS;
3263 rs6000_constraints[RS6000_CONSTRAINT_wI] = FLOAT_REGS;
3264 if (TARGET_P9_VECTOR)
3266 rs6000_constraints[RS6000_CONSTRAINT_wJ] = FLOAT_REGS;
3267 rs6000_constraints[RS6000_CONSTRAINT_wK] = ALTIVEC_REGS;
3271 /* Set up the reload helper and direct move functions. */
3272 if (TARGET_VSX || TARGET_ALTIVEC)
3274 if (TARGET_64BIT)
3276 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
3277 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
3278 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
3279 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
3280 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
3281 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
3282 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
3283 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
3284 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
3285 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
3286 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
3287 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
3288 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
3289 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
3290 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
3291 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
3292 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
3293 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3294 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3295 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3297 if (FLOAT128_VECTOR_P (KFmode))
3299 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3300 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3303 if (FLOAT128_VECTOR_P (TFmode))
3305 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3306 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3309 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3310 available. */
3311 if (TARGET_NO_SDMODE_STACK)
3313 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3314 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3317 if (TARGET_VSX_TIMODE)
3319 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3320 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3323 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3325 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3326 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3327 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3328 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3329 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3330 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3331 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3332 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3333 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3335 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3336 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3337 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3338 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3339 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3340 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3341 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3342 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3343 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3345 if (FLOAT128_VECTOR_P (KFmode))
3347 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3348 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3351 if (FLOAT128_VECTOR_P (TFmode))
3353 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3354 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3358 else
3360 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3361 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3362 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3363 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3364 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3365 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3366 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3367 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3368 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3369 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3370 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3371 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3372 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3373 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3374 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3375 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3376 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3377 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3378 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3379 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3381 if (FLOAT128_VECTOR_P (KFmode))
3383 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3384 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3387 if (FLOAT128_IEEE_P (TFmode))
3389 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3390 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3393 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3394 available. */
3395 if (TARGET_NO_SDMODE_STACK)
3397 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3398 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3401 if (TARGET_VSX_TIMODE)
3403 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3404 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3407 if (TARGET_DIRECT_MOVE)
3409 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3410 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3411 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3415 if (TARGET_UPPER_REGS_DF)
3416 reg_addr[DFmode].scalar_in_vmx_p = true;
3418 if (TARGET_UPPER_REGS_DI)
3419 reg_addr[DImode].scalar_in_vmx_p = true;
3421 if (TARGET_UPPER_REGS_SF)
3422 reg_addr[SFmode].scalar_in_vmx_p = true;
3424 if (TARGET_VSX_SMALL_INTEGER)
3426 reg_addr[SImode].scalar_in_vmx_p = true;
3427 if (TARGET_P9_VECTOR)
3429 reg_addr[HImode].scalar_in_vmx_p = true;
3430 reg_addr[QImode].scalar_in_vmx_p = true;
3435 /* Setup the fusion operations. */
3436 if (TARGET_P8_FUSION)
3438 reg_addr[QImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_qi;
3439 reg_addr[HImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_hi;
3440 reg_addr[SImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_si;
3441 if (TARGET_64BIT)
3442 reg_addr[DImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_di;
3445 if (TARGET_P9_FUSION)
3447 struct fuse_insns {
3448 enum machine_mode mode; /* mode of the fused type. */
3449 enum machine_mode pmode; /* pointer mode. */
3450 enum rs6000_reload_reg_type rtype; /* register type. */
3451 enum insn_code load; /* load insn. */
3452 enum insn_code store; /* store insn. */
3455 static const struct fuse_insns addis_insns[] = {
3456 { SFmode, DImode, RELOAD_REG_FPR,
3457 CODE_FOR_fusion_vsx_di_sf_load,
3458 CODE_FOR_fusion_vsx_di_sf_store },
3460 { SFmode, SImode, RELOAD_REG_FPR,
3461 CODE_FOR_fusion_vsx_si_sf_load,
3462 CODE_FOR_fusion_vsx_si_sf_store },
3464 { DFmode, DImode, RELOAD_REG_FPR,
3465 CODE_FOR_fusion_vsx_di_df_load,
3466 CODE_FOR_fusion_vsx_di_df_store },
3468 { DFmode, SImode, RELOAD_REG_FPR,
3469 CODE_FOR_fusion_vsx_si_df_load,
3470 CODE_FOR_fusion_vsx_si_df_store },
3472 { DImode, DImode, RELOAD_REG_FPR,
3473 CODE_FOR_fusion_vsx_di_di_load,
3474 CODE_FOR_fusion_vsx_di_di_store },
3476 { DImode, SImode, RELOAD_REG_FPR,
3477 CODE_FOR_fusion_vsx_si_di_load,
3478 CODE_FOR_fusion_vsx_si_di_store },
3480 { QImode, DImode, RELOAD_REG_GPR,
3481 CODE_FOR_fusion_gpr_di_qi_load,
3482 CODE_FOR_fusion_gpr_di_qi_store },
3484 { QImode, SImode, RELOAD_REG_GPR,
3485 CODE_FOR_fusion_gpr_si_qi_load,
3486 CODE_FOR_fusion_gpr_si_qi_store },
3488 { HImode, DImode, RELOAD_REG_GPR,
3489 CODE_FOR_fusion_gpr_di_hi_load,
3490 CODE_FOR_fusion_gpr_di_hi_store },
3492 { HImode, SImode, RELOAD_REG_GPR,
3493 CODE_FOR_fusion_gpr_si_hi_load,
3494 CODE_FOR_fusion_gpr_si_hi_store },
3496 { SImode, DImode, RELOAD_REG_GPR,
3497 CODE_FOR_fusion_gpr_di_si_load,
3498 CODE_FOR_fusion_gpr_di_si_store },
3500 { SImode, SImode, RELOAD_REG_GPR,
3501 CODE_FOR_fusion_gpr_si_si_load,
3502 CODE_FOR_fusion_gpr_si_si_store },
3504 { SFmode, DImode, RELOAD_REG_GPR,
3505 CODE_FOR_fusion_gpr_di_sf_load,
3506 CODE_FOR_fusion_gpr_di_sf_store },
3508 { SFmode, SImode, RELOAD_REG_GPR,
3509 CODE_FOR_fusion_gpr_si_sf_load,
3510 CODE_FOR_fusion_gpr_si_sf_store },
3512 { DImode, DImode, RELOAD_REG_GPR,
3513 CODE_FOR_fusion_gpr_di_di_load,
3514 CODE_FOR_fusion_gpr_di_di_store },
3516 { DFmode, DImode, RELOAD_REG_GPR,
3517 CODE_FOR_fusion_gpr_di_df_load,
3518 CODE_FOR_fusion_gpr_di_df_store },
3521 enum machine_mode cur_pmode = Pmode;
3522 size_t i;
3524 for (i = 0; i < ARRAY_SIZE (addis_insns); i++)
3526 enum machine_mode xmode = addis_insns[i].mode;
3527 enum rs6000_reload_reg_type rtype = addis_insns[i].rtype;
3529 if (addis_insns[i].pmode != cur_pmode)
3530 continue;
3532 if (rtype == RELOAD_REG_FPR
3533 && (!TARGET_HARD_FLOAT || !TARGET_FPRS))
3534 continue;
3536 reg_addr[xmode].fusion_addis_ld[rtype] = addis_insns[i].load;
3537 reg_addr[xmode].fusion_addis_st[rtype] = addis_insns[i].store;
3539 if (rtype == RELOAD_REG_FPR && TARGET_P9_DFORM_SCALAR)
3541 reg_addr[xmode].fusion_addis_ld[RELOAD_REG_VMX]
3542 = addis_insns[i].load;
3543 reg_addr[xmode].fusion_addis_st[RELOAD_REG_VMX]
3544 = addis_insns[i].store;
3549 /* Note which types we support fusing TOC setup plus memory insn. We only do
3550 fused TOCs for medium/large code models. */
3551 if (TARGET_P8_FUSION && TARGET_TOC_FUSION && TARGET_POWERPC64
3552 && (TARGET_CMODEL != CMODEL_SMALL))
3554 reg_addr[QImode].fused_toc = true;
3555 reg_addr[HImode].fused_toc = true;
3556 reg_addr[SImode].fused_toc = true;
3557 reg_addr[DImode].fused_toc = true;
3558 if (TARGET_HARD_FLOAT && TARGET_FPRS)
3560 if (TARGET_SINGLE_FLOAT)
3561 reg_addr[SFmode].fused_toc = true;
3562 if (TARGET_DOUBLE_FLOAT)
3563 reg_addr[DFmode].fused_toc = true;
3567 /* Precalculate HARD_REGNO_NREGS. */
3568 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3569 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3570 rs6000_hard_regno_nregs[m][r]
3571 = rs6000_hard_regno_nregs_internal (r, (machine_mode)m);
3573 /* Precalculate HARD_REGNO_MODE_OK. */
3574 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3575 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3576 if (rs6000_hard_regno_mode_ok (r, (machine_mode)m))
3577 rs6000_hard_regno_mode_ok_p[m][r] = true;
3579 /* Precalculate CLASS_MAX_NREGS sizes. */
3580 for (c = 0; c < LIM_REG_CLASSES; ++c)
3582 int reg_size;
3584 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3585 reg_size = UNITS_PER_VSX_WORD;
3587 else if (c == ALTIVEC_REGS)
3588 reg_size = UNITS_PER_ALTIVEC_WORD;
3590 else if (c == FLOAT_REGS)
3591 reg_size = UNITS_PER_FP_WORD;
3593 else
3594 reg_size = UNITS_PER_WORD;
3596 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3598 machine_mode m2 = (machine_mode)m;
3599 int reg_size2 = reg_size;
3601 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3602 in VSX. */
3603 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3604 reg_size2 = UNITS_PER_FP_WORD;
3606 rs6000_class_max_nregs[m][c]
3607 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3611 if (TARGET_E500_DOUBLE)
3612 rs6000_class_max_nregs[DFmode][GENERAL_REGS] = 1;
3614 /* Calculate which modes to automatically generate code to use a the
3615 reciprocal divide and square root instructions. In the future, possibly
3616 automatically generate the instructions even if the user did not specify
3617 -mrecip. The older machines double precision reciprocal sqrt estimate is
3618 not accurate enough. */
3619 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3620 if (TARGET_FRES)
3621 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3622 if (TARGET_FRE)
3623 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3624 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3625 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3626 if (VECTOR_UNIT_VSX_P (V2DFmode))
3627 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3629 if (TARGET_FRSQRTES)
3630 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3631 if (TARGET_FRSQRTE)
3632 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3633 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3634 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3635 if (VECTOR_UNIT_VSX_P (V2DFmode))
3636 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3638 if (rs6000_recip_control)
3640 if (!flag_finite_math_only)
3641 warning (0, "-mrecip requires -ffinite-math or -ffast-math");
3642 if (flag_trapping_math)
3643 warning (0, "-mrecip requires -fno-trapping-math or -ffast-math");
3644 if (!flag_reciprocal_math)
3645 warning (0, "-mrecip requires -freciprocal-math or -ffast-math");
3646 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3648 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3649 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3650 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3652 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3653 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3654 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3656 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3657 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3658 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3660 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3661 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3662 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3664 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3665 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3666 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3668 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3669 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3670 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3672 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3673 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3674 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3676 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3677 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3678 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3682 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3683 legitimate address support to figure out the appropriate addressing to
3684 use. */
3685 rs6000_setup_reg_addr_masks ();
3687 if (global_init_p || TARGET_DEBUG_TARGET)
3689 if (TARGET_DEBUG_REG)
3690 rs6000_debug_reg_global ();
3692 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3693 fprintf (stderr,
3694 "SImode variable mult cost = %d\n"
3695 "SImode constant mult cost = %d\n"
3696 "SImode short constant mult cost = %d\n"
3697 "DImode multipliciation cost = %d\n"
3698 "SImode division cost = %d\n"
3699 "DImode division cost = %d\n"
3700 "Simple fp operation cost = %d\n"
3701 "DFmode multiplication cost = %d\n"
3702 "SFmode division cost = %d\n"
3703 "DFmode division cost = %d\n"
3704 "cache line size = %d\n"
3705 "l1 cache size = %d\n"
3706 "l2 cache size = %d\n"
3707 "simultaneous prefetches = %d\n"
3708 "\n",
3709 rs6000_cost->mulsi,
3710 rs6000_cost->mulsi_const,
3711 rs6000_cost->mulsi_const9,
3712 rs6000_cost->muldi,
3713 rs6000_cost->divsi,
3714 rs6000_cost->divdi,
3715 rs6000_cost->fp,
3716 rs6000_cost->dmul,
3717 rs6000_cost->sdiv,
3718 rs6000_cost->ddiv,
3719 rs6000_cost->cache_line_size,
3720 rs6000_cost->l1_cache_size,
3721 rs6000_cost->l2_cache_size,
3722 rs6000_cost->simultaneous_prefetches);
3726 #if TARGET_MACHO
3727 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3729 static void
3730 darwin_rs6000_override_options (void)
3732 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3733 off. */
3734 rs6000_altivec_abi = 1;
3735 TARGET_ALTIVEC_VRSAVE = 1;
3736 rs6000_current_abi = ABI_DARWIN;
3738 if (DEFAULT_ABI == ABI_DARWIN
3739 && TARGET_64BIT)
3740 darwin_one_byte_bool = 1;
3742 if (TARGET_64BIT && ! TARGET_POWERPC64)
3744 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3745 warning (0, "-m64 requires PowerPC64 architecture, enabling");
3747 if (flag_mkernel)
3749 rs6000_default_long_calls = 1;
3750 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3753 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3754 Altivec. */
3755 if (!flag_mkernel && !flag_apple_kext
3756 && TARGET_64BIT
3757 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3758 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3760 /* Unless the user (not the configurer) has explicitly overridden
3761 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3762 G4 unless targeting the kernel. */
3763 if (!flag_mkernel
3764 && !flag_apple_kext
3765 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3766 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3767 && ! global_options_set.x_rs6000_cpu_index)
3769 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3772 #endif
3774 /* If not otherwise specified by a target, make 'long double' equivalent to
3775 'double'. */
3777 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3778 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3779 #endif
3781 /* Return the builtin mask of the various options used that could affect which
3782 builtins were used. In the past we used target_flags, but we've run out of
3783 bits, and some options like SPE and PAIRED are no longer in
3784 target_flags. */
3786 HOST_WIDE_INT
3787 rs6000_builtin_mask_calculate (void)
3789 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3790 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3791 | ((TARGET_SPE) ? RS6000_BTM_SPE : 0)
3792 | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0)
3793 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3794 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3795 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3796 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3797 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3798 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3799 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3800 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3801 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3802 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3803 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3804 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3805 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3806 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3807 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3808 | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0)
3809 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0));
3812 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3813 to clobber the XER[CA] bit because clobbering that bit without telling
3814 the compiler worked just fine with versions of GCC before GCC 5, and
3815 breaking a lot of older code in ways that are hard to track down is
3816 not such a great idea. */
3818 static rtx_insn *
3819 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3820 vec<const char *> &/*constraints*/,
3821 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3823 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3824 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3825 return NULL;
3828 /* Override command line options.
3830 Combine build-specific configuration information with options
3831 specified on the command line to set various state variables which
3832 influence code generation, optimization, and expansion of built-in
3833 functions. Assure that command-line configuration preferences are
3834 compatible with each other and with the build configuration; issue
3835 warnings while adjusting configuration or error messages while
3836 rejecting configuration.
3838 Upon entry to this function:
3840 This function is called once at the beginning of
3841 compilation, and then again at the start and end of compiling
3842 each section of code that has a different configuration, as
3843 indicated, for example, by adding the
3845 __attribute__((__target__("cpu=power9")))
3847 qualifier to a function definition or, for example, by bracketing
3848 code between
3850 #pragma GCC target("altivec")
3854 #pragma GCC reset_options
3856 directives. Parameter global_init_p is true for the initial
3857 invocation, which initializes global variables, and false for all
3858 subsequent invocations.
3861 Various global state information is assumed to be valid. This
3862 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3863 default CPU specified at build configure time, TARGET_DEFAULT,
3864 representing the default set of option flags for the default
3865 target, and global_options_set.x_rs6000_isa_flags, representing
3866 which options were requested on the command line.
3868 Upon return from this function:
3870 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3871 was set by name on the command line. Additionally, if certain
3872 attributes are automatically enabled or disabled by this function
3873 in order to assure compatibility between options and
3874 configuration, the flags associated with those attributes are
3875 also set. By setting these "explicit bits", we avoid the risk
3876 that other code might accidentally overwrite these particular
3877 attributes with "default values".
3879 The various bits of rs6000_isa_flags are set to indicate the
3880 target options that have been selected for the most current
3881 compilation efforts. This has the effect of also turning on the
3882 associated TARGET_XXX values since these are macros which are
3883 generally defined to test the corresponding bit of the
3884 rs6000_isa_flags variable.
3886 The variable rs6000_builtin_mask is set to represent the target
3887 options for the most current compilation efforts, consistent with
3888 the current contents of rs6000_isa_flags. This variable controls
3889 expansion of built-in functions.
3891 Various other global variables and fields of global structures
3892 (over 50 in all) are initialized to reflect the desired options
3893 for the most current compilation efforts. */
3895 static bool
3896 rs6000_option_override_internal (bool global_init_p)
3898 bool ret = true;
3899 bool have_cpu = false;
3901 /* The default cpu requested at configure time, if any. */
3902 const char *implicit_cpu = OPTION_TARGET_CPU_DEFAULT;
3904 HOST_WIDE_INT set_masks;
3905 int cpu_index;
3906 int tune_index;
3907 struct cl_target_option *main_target_opt
3908 = ((global_init_p || target_option_default_node == NULL)
3909 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3911 /* Print defaults. */
3912 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3913 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3915 /* Remember the explicit arguments. */
3916 if (global_init_p)
3917 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3919 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3920 library functions, so warn about it. The flag may be useful for
3921 performance studies from time to time though, so don't disable it
3922 entirely. */
3923 if (global_options_set.x_rs6000_alignment_flags
3924 && rs6000_alignment_flags == MASK_ALIGN_POWER
3925 && DEFAULT_ABI == ABI_DARWIN
3926 && TARGET_64BIT)
3927 warning (0, "-malign-power is not supported for 64-bit Darwin;"
3928 " it is incompatible with the installed C and C++ libraries");
3930 /* Numerous experiment shows that IRA based loop pressure
3931 calculation works better for RTL loop invariant motion on targets
3932 with enough (>= 32) registers. It is an expensive optimization.
3933 So it is on only for peak performance. */
3934 if (optimize >= 3 && global_init_p
3935 && !global_options_set.x_flag_ira_loop_pressure)
3936 flag_ira_loop_pressure = 1;
3938 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3939 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3940 options were already specified. */
3941 if (flag_sanitize & SANITIZE_USER_ADDRESS
3942 && !global_options_set.x_flag_asynchronous_unwind_tables)
3943 flag_asynchronous_unwind_tables = 1;
3945 /* Set the pointer size. */
3946 if (TARGET_64BIT)
3948 rs6000_pmode = (int)DImode;
3949 rs6000_pointer_size = 64;
3951 else
3953 rs6000_pmode = (int)SImode;
3954 rs6000_pointer_size = 32;
3957 /* Some OSs don't support saving the high part of 64-bit registers on context
3958 switch. Other OSs don't support saving Altivec registers. On those OSs,
3959 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3960 if the user wants either, the user must explicitly specify them and we
3961 won't interfere with the user's specification. */
3963 set_masks = POWERPC_MASKS;
3964 #ifdef OS_MISSING_POWERPC64
3965 if (OS_MISSING_POWERPC64)
3966 set_masks &= ~OPTION_MASK_POWERPC64;
3967 #endif
3968 #ifdef OS_MISSING_ALTIVEC
3969 if (OS_MISSING_ALTIVEC)
3970 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX);
3971 #endif
3973 /* Don't override by the processor default if given explicitly. */
3974 set_masks &= ~rs6000_isa_flags_explicit;
3976 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3977 the cpu in a target attribute or pragma, but did not specify a tuning
3978 option, use the cpu for the tuning option rather than the option specified
3979 with -mtune on the command line. Process a '--with-cpu' configuration
3980 request as an implicit --cpu. */
3981 if (rs6000_cpu_index >= 0)
3983 cpu_index = rs6000_cpu_index;
3984 have_cpu = true;
3986 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3988 rs6000_cpu_index = cpu_index = main_target_opt->x_rs6000_cpu_index;
3989 have_cpu = true;
3991 else if (implicit_cpu)
3993 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (implicit_cpu);
3994 have_cpu = true;
3996 else
3998 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3999 const char *default_cpu = ((!TARGET_POWERPC64)
4000 ? "powerpc"
4001 : ((BYTES_BIG_ENDIAN)
4002 ? "powerpc64"
4003 : "powerpc64le"));
4005 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
4006 have_cpu = false;
4009 gcc_assert (cpu_index >= 0);
4011 if (have_cpu)
4013 #ifndef HAVE_AS_POWER9
4014 if (processor_target_table[rs6000_cpu_index].processor
4015 == PROCESSOR_POWER9)
4017 have_cpu = false;
4018 warning (0, "will not generate power9 instructions because "
4019 "assembler lacks power9 support");
4021 #endif
4022 #ifndef HAVE_AS_POWER8
4023 if (processor_target_table[rs6000_cpu_index].processor
4024 == PROCESSOR_POWER8)
4026 have_cpu = false;
4027 warning (0, "will not generate power8 instructions because "
4028 "assembler lacks power8 support");
4030 #endif
4031 #ifndef HAVE_AS_POPCNTD
4032 if (processor_target_table[rs6000_cpu_index].processor
4033 == PROCESSOR_POWER7)
4035 have_cpu = false;
4036 warning (0, "will not generate power7 instructions because "
4037 "assembler lacks power7 support");
4039 #endif
4040 #ifndef HAVE_AS_DFP
4041 if (processor_target_table[rs6000_cpu_index].processor
4042 == PROCESSOR_POWER6)
4044 have_cpu = false;
4045 warning (0, "will not generate power6 instructions because "
4046 "assembler lacks power6 support");
4048 #endif
4049 #ifndef HAVE_AS_POPCNTB
4050 if (processor_target_table[rs6000_cpu_index].processor
4051 == PROCESSOR_POWER5)
4053 have_cpu = false;
4054 warning (0, "will not generate power5 instructions because "
4055 "assembler lacks power5 support");
4057 #endif
4059 if (!have_cpu)
4061 /* PowerPC 64-bit LE requires at least ISA 2.07. */
4062 const char *default_cpu = (!TARGET_POWERPC64
4063 ? "powerpc"
4064 : (BYTES_BIG_ENDIAN
4065 ? "powerpc64"
4066 : "powerpc64le"));
4068 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
4072 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
4073 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
4074 with those from the cpu, except for options that were explicitly set. If
4075 we don't have a cpu, do not override the target bits set in
4076 TARGET_DEFAULT. */
4077 if (have_cpu)
4079 rs6000_isa_flags &= ~set_masks;
4080 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
4081 & set_masks);
4083 else
4085 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
4086 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
4087 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
4088 to using rs6000_isa_flags, we need to do the initialization here.
4090 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
4091 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
4092 HOST_WIDE_INT flags = ((TARGET_DEFAULT) ? TARGET_DEFAULT
4093 : processor_target_table[cpu_index].target_enable);
4094 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
4097 if (rs6000_tune_index >= 0)
4098 tune_index = rs6000_tune_index;
4099 else if (have_cpu)
4100 rs6000_tune_index = tune_index = cpu_index;
4101 else
4103 size_t i;
4104 enum processor_type tune_proc
4105 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
4107 tune_index = -1;
4108 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
4109 if (processor_target_table[i].processor == tune_proc)
4111 rs6000_tune_index = tune_index = i;
4112 break;
4116 gcc_assert (tune_index >= 0);
4117 rs6000_cpu = processor_target_table[tune_index].processor;
4119 /* Pick defaults for SPE related control flags. Do this early to make sure
4120 that the TARGET_ macros are representative ASAP. */
4122 int spe_capable_cpu =
4123 (rs6000_cpu == PROCESSOR_PPC8540
4124 || rs6000_cpu == PROCESSOR_PPC8548);
4126 if (!global_options_set.x_rs6000_spe_abi)
4127 rs6000_spe_abi = spe_capable_cpu;
4129 if (!global_options_set.x_rs6000_spe)
4130 rs6000_spe = spe_capable_cpu;
4132 if (!global_options_set.x_rs6000_float_gprs)
4133 rs6000_float_gprs =
4134 (rs6000_cpu == PROCESSOR_PPC8540 ? 1
4135 : rs6000_cpu == PROCESSOR_PPC8548 ? 2
4136 : 0);
4139 if (global_options_set.x_rs6000_spe_abi
4140 && rs6000_spe_abi
4141 && !TARGET_SPE_ABI)
4142 error ("not configured for SPE ABI");
4144 if (global_options_set.x_rs6000_spe
4145 && rs6000_spe
4146 && !TARGET_SPE)
4147 error ("not configured for SPE instruction set");
4149 if (main_target_opt != NULL
4150 && ((main_target_opt->x_rs6000_spe_abi != rs6000_spe_abi)
4151 || (main_target_opt->x_rs6000_spe != rs6000_spe)
4152 || (main_target_opt->x_rs6000_float_gprs != rs6000_float_gprs)))
4153 error ("target attribute or pragma changes SPE ABI");
4155 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
4156 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
4157 || rs6000_cpu == PROCESSOR_PPCE5500)
4159 if (TARGET_ALTIVEC)
4160 error ("AltiVec not supported in this target");
4161 if (TARGET_SPE)
4162 error ("SPE not supported in this target");
4164 if (rs6000_cpu == PROCESSOR_PPCE6500)
4166 if (TARGET_SPE)
4167 error ("SPE not supported in this target");
4170 /* Disable Cell microcode if we are optimizing for the Cell
4171 and not optimizing for size. */
4172 if (rs6000_gen_cell_microcode == -1)
4173 rs6000_gen_cell_microcode = !(rs6000_cpu == PROCESSOR_CELL
4174 && !optimize_size);
4176 /* If we are optimizing big endian systems for space and it's OK to
4177 use instructions that would be microcoded on the Cell, use the
4178 load/store multiple and string instructions. */
4179 if (BYTES_BIG_ENDIAN && optimize_size && rs6000_gen_cell_microcode)
4180 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & (OPTION_MASK_MULTIPLE
4181 | OPTION_MASK_STRING);
4183 /* Don't allow -mmultiple or -mstring on little endian systems
4184 unless the cpu is a 750, because the hardware doesn't support the
4185 instructions used in little endian mode, and causes an alignment
4186 trap. The 750 does not cause an alignment trap (except when the
4187 target is unaligned). */
4189 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750)
4191 if (TARGET_MULTIPLE)
4193 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
4194 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
4195 warning (0, "-mmultiple is not supported on little endian systems");
4198 if (TARGET_STRING)
4200 rs6000_isa_flags &= ~OPTION_MASK_STRING;
4201 if ((rs6000_isa_flags_explicit & OPTION_MASK_STRING) != 0)
4202 warning (0, "-mstring is not supported on little endian systems");
4206 /* If little-endian, default to -mstrict-align on older processors.
4207 Testing for htm matches power8 and later. */
4208 if (!BYTES_BIG_ENDIAN
4209 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
4210 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
4212 /* -maltivec={le,be} implies -maltivec. */
4213 if (rs6000_altivec_element_order != 0)
4214 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
4216 /* Disallow -maltivec=le in big endian mode for now. This is not
4217 known to be useful for anyone. */
4218 if (BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 1)
4220 warning (0, N_("-maltivec=le not allowed for big-endian targets"));
4221 rs6000_altivec_element_order = 0;
4224 /* Add some warnings for VSX. */
4225 if (TARGET_VSX)
4227 const char *msg = NULL;
4228 if (!TARGET_HARD_FLOAT || !TARGET_FPRS
4229 || !TARGET_SINGLE_FLOAT || !TARGET_DOUBLE_FLOAT)
4231 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4232 msg = N_("-mvsx requires hardware floating point");
4233 else
4235 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
4236 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4239 else if (TARGET_PAIRED_FLOAT)
4240 msg = N_("-mvsx and -mpaired are incompatible");
4241 else if (TARGET_AVOID_XFORM > 0)
4242 msg = N_("-mvsx needs indexed addressing");
4243 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
4244 & OPTION_MASK_ALTIVEC))
4246 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4247 msg = N_("-mvsx and -mno-altivec are incompatible");
4248 else
4249 msg = N_("-mno-altivec disables vsx");
4252 if (msg)
4254 warning (0, msg);
4255 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
4256 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4260 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
4261 the -mcpu setting to enable options that conflict. */
4262 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
4263 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
4264 | OPTION_MASK_ALTIVEC
4265 | OPTION_MASK_VSX)) != 0)
4266 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
4267 | OPTION_MASK_DIRECT_MOVE)
4268 & ~rs6000_isa_flags_explicit);
4270 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4271 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
4273 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
4274 unless the user explicitly used the -mno-<option> to disable the code. */
4275 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_DFORM_SCALAR
4276 || TARGET_P9_DFORM_VECTOR || TARGET_P9_DFORM_BOTH > 0)
4277 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~rs6000_isa_flags_explicit);
4278 else if (TARGET_P9_MINMAX)
4280 if (have_cpu)
4282 if (cpu_index == PROCESSOR_POWER9)
4284 /* legacy behavior: allow -mcpu-power9 with certain
4285 capabilities explicitly disabled. */
4286 rs6000_isa_flags |=
4287 (ISA_3_0_MASKS_SERVER & ~rs6000_isa_flags_explicit);
4288 /* However, reject this automatic fix if certain
4289 capabilities required for TARGET_P9_MINMAX support
4290 have been explicitly disabled. */
4291 if (((OPTION_MASK_VSX | OPTION_MASK_UPPER_REGS_SF
4292 | OPTION_MASK_UPPER_REGS_DF) & rs6000_isa_flags)
4293 != (OPTION_MASK_VSX | OPTION_MASK_UPPER_REGS_SF
4294 | OPTION_MASK_UPPER_REGS_DF))
4295 error ("-mpower9-minmax incompatible with explicitly disabled options");
4297 else
4298 error ("Power9 target option is incompatible with -mcpu=<xxx> for "
4299 "<xxx> less than power9");
4301 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
4302 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
4303 & rs6000_isa_flags_explicit))
4304 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
4305 were explicitly cleared. */
4306 error ("-mpower9-minmax incompatible with explicitly disabled options");
4307 else
4308 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
4310 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
4311 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~rs6000_isa_flags_explicit);
4312 else if (TARGET_VSX)
4313 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~rs6000_isa_flags_explicit);
4314 else if (TARGET_POPCNTD)
4315 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
4316 else if (TARGET_DFP)
4317 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~rs6000_isa_flags_explicit);
4318 else if (TARGET_CMPB)
4319 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
4320 else if (TARGET_FPRND)
4321 rs6000_isa_flags |= (ISA_2_4_MASKS & ~rs6000_isa_flags_explicit);
4322 else if (TARGET_POPCNTB)
4323 rs6000_isa_flags |= (ISA_2_2_MASKS & ~rs6000_isa_flags_explicit);
4324 else if (TARGET_ALTIVEC)
4325 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~rs6000_isa_flags_explicit);
4327 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
4329 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
4330 error ("-mcrypto requires -maltivec");
4331 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
4334 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
4336 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
4337 error ("-mdirect-move requires -mvsx");
4338 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
4341 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
4343 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4344 error ("-mpower8-vector requires -maltivec");
4345 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4348 if (TARGET_P8_VECTOR && !TARGET_VSX)
4350 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4351 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
4352 error ("-mpower8-vector requires -mvsx");
4353 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
4355 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4356 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4357 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4359 else
4361 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
4362 not explicit. */
4363 rs6000_isa_flags |= OPTION_MASK_VSX;
4364 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4368 if (TARGET_VSX_TIMODE && !TARGET_VSX)
4370 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
4371 error ("-mvsx-timode requires -mvsx");
4372 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
4375 if (TARGET_DFP && !TARGET_HARD_FLOAT)
4377 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
4378 error ("-mhard-dfp requires -mhard-float");
4379 rs6000_isa_flags &= ~OPTION_MASK_DFP;
4382 /* Allow an explicit -mupper-regs to set -mupper-regs-df, -mupper-regs-di,
4383 and -mupper-regs-sf, depending on the cpu, unless the user explicitly also
4384 set the individual option. */
4385 if (TARGET_UPPER_REGS > 0)
4387 if (TARGET_VSX
4388 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
4390 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DF;
4391 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
4393 if (TARGET_VSX
4394 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI))
4396 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DI;
4397 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DI;
4399 if (TARGET_P8_VECTOR
4400 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
4402 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_SF;
4403 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
4406 else if (TARGET_UPPER_REGS == 0)
4408 if (TARGET_VSX
4409 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
4411 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4412 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
4414 if (TARGET_VSX
4415 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI))
4417 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DI;
4418 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DI;
4420 if (TARGET_P8_VECTOR
4421 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
4423 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
4424 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
4428 if (TARGET_UPPER_REGS_DF && !TARGET_VSX)
4430 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4431 error ("-mupper-regs-df requires -mvsx");
4432 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4435 if (TARGET_UPPER_REGS_DI && !TARGET_VSX)
4437 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI)
4438 error ("-mupper-regs-di requires -mvsx");
4439 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DI;
4442 if (TARGET_UPPER_REGS_SF && !TARGET_P8_VECTOR)
4444 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
4445 error ("-mupper-regs-sf requires -mpower8-vector");
4446 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
4449 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
4450 silently turn off quad memory mode. */
4451 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
4453 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4454 warning (0, N_("-mquad-memory requires 64-bit mode"));
4456 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
4457 warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
4459 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
4460 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
4463 /* Non-atomic quad memory load/store are disabled for little endian, since
4464 the words are reversed, but atomic operations can still be done by
4465 swapping the words. */
4466 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
4468 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4469 warning (0, N_("-mquad-memory is not available in little endian mode"));
4471 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
4474 /* Assume if the user asked for normal quad memory instructions, they want
4475 the atomic versions as well, unless they explicity told us not to use quad
4476 word atomic instructions. */
4477 if (TARGET_QUAD_MEMORY
4478 && !TARGET_QUAD_MEMORY_ATOMIC
4479 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
4480 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
4482 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4483 generating power8 instructions. */
4484 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4485 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4486 & OPTION_MASK_P8_FUSION);
4488 /* Setting additional fusion flags turns on base fusion. */
4489 if (!TARGET_P8_FUSION && (TARGET_P8_FUSION_SIGN || TARGET_TOC_FUSION))
4491 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4493 if (TARGET_P8_FUSION_SIGN)
4494 error ("-mpower8-fusion-sign requires -mpower8-fusion");
4496 if (TARGET_TOC_FUSION)
4497 error ("-mtoc-fusion requires -mpower8-fusion");
4499 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4501 else
4502 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4505 /* Power9 fusion is a superset over power8 fusion. */
4506 if (TARGET_P9_FUSION && !TARGET_P8_FUSION)
4508 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4510 /* We prefer to not mention undocumented options in
4511 error messages. However, if users have managed to select
4512 power9-fusion without selecting power8-fusion, they
4513 already know about undocumented flags. */
4514 error ("-mpower9-fusion requires -mpower8-fusion");
4515 rs6000_isa_flags &= ~OPTION_MASK_P9_FUSION;
4517 else
4518 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4521 /* Enable power9 fusion if we are tuning for power9, even if we aren't
4522 generating power9 instructions. */
4523 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_FUSION))
4524 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4525 & OPTION_MASK_P9_FUSION);
4527 /* Power8 does not fuse sign extended loads with the addis. If we are
4528 optimizing at high levels for speed, convert a sign extended load into a
4529 zero extending load, and an explicit sign extension. */
4530 if (TARGET_P8_FUSION
4531 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4532 && optimize_function_for_speed_p (cfun)
4533 && optimize >= 3)
4534 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4536 /* TOC fusion requires 64-bit and medium/large code model. */
4537 if (TARGET_TOC_FUSION && !TARGET_POWERPC64)
4539 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4540 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4541 warning (0, N_("-mtoc-fusion requires 64-bit"));
4544 if (TARGET_TOC_FUSION && (TARGET_CMODEL == CMODEL_SMALL))
4546 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4547 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4548 warning (0, N_("-mtoc-fusion requires medium/large code model"));
4551 /* Turn on -mtoc-fusion by default if p8-fusion and 64-bit medium/large code
4552 model. */
4553 if (TARGET_P8_FUSION && !TARGET_TOC_FUSION && TARGET_POWERPC64
4554 && (TARGET_CMODEL != CMODEL_SMALL)
4555 && !(rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION))
4556 rs6000_isa_flags |= OPTION_MASK_TOC_FUSION;
4558 /* ISA 3.0 vector instructions include ISA 2.07. */
4559 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4561 /* We prefer to not mention undocumented options in
4562 error messages. However, if users have managed to select
4563 power9-vector without selecting power8-vector, they
4564 already know about undocumented flags. */
4565 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
4566 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
4567 error ("-mpower9-vector requires -mpower8-vector");
4568 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
4570 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4571 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4572 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
4574 else
4576 /* OPTION_MASK_P9_VECTOR is explicit and
4577 OPTION_MASK_P8_VECTOR is not explicit. */
4578 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
4579 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4583 /* -mpower9-dform turns on both -mpower9-dform-scalar and
4584 -mpower9-dform-vector. */
4585 if (TARGET_P9_DFORM_BOTH > 0)
4587 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR))
4588 rs6000_isa_flags |= OPTION_MASK_P9_DFORM_VECTOR;
4590 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR))
4591 rs6000_isa_flags |= OPTION_MASK_P9_DFORM_SCALAR;
4593 else if (TARGET_P9_DFORM_BOTH == 0)
4595 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR))
4596 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_VECTOR;
4598 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR))
4599 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4602 /* ISA 3.0 D-form instructions require p9-vector and upper-regs. */
4603 if ((TARGET_P9_DFORM_SCALAR || TARGET_P9_DFORM_VECTOR) && !TARGET_P9_VECTOR)
4605 /* We prefer to not mention undocumented options in
4606 error messages. However, if users have managed to select
4607 power9-dform without selecting power9-vector, they
4608 already know about undocumented flags. */
4609 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR)
4610 && (rs6000_isa_flags_explicit & (OPTION_MASK_P9_DFORM_SCALAR
4611 | OPTION_MASK_P9_DFORM_VECTOR)))
4612 error ("-mpower9-dform requires -mpower9-vector");
4613 else if (rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR)
4615 rs6000_isa_flags &=
4616 ~(OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR);
4617 rs6000_isa_flags_explicit |=
4618 (OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR);
4620 else
4622 /* We know that OPTION_MASK_P9_VECTOR is not explicit and
4623 OPTION_MASK_P9_DFORM_SCALAR or OPTION_MASK_P9_DORM_VECTOR
4624 may be explicit. */
4625 rs6000_isa_flags |= OPTION_MASK_P9_VECTOR;
4626 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
4630 if ((TARGET_P9_DFORM_SCALAR || TARGET_P9_DFORM_VECTOR)
4631 && !TARGET_DIRECT_MOVE)
4633 /* We prefer to not mention undocumented options in
4634 error messages. However, if users have managed to select
4635 power9-dform without selecting direct-move, they
4636 already know about undocumented flags. */
4637 if ((rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
4638 && ((rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR) ||
4639 (rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR) ||
4640 (TARGET_P9_DFORM_BOTH == 1)))
4641 error ("-mpower9-dform, -mpower9-dform-vector, -mpower9-dform-scalar"
4642 " require -mdirect-move");
4643 else if ((rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE) == 0)
4645 rs6000_isa_flags |= OPTION_MASK_DIRECT_MOVE;
4646 rs6000_isa_flags_explicit |= OPTION_MASK_DIRECT_MOVE;
4648 else
4650 rs6000_isa_flags &=
4651 ~(OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR);
4652 rs6000_isa_flags_explicit |=
4653 (OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR);
4657 if (TARGET_P9_DFORM_SCALAR && !TARGET_UPPER_REGS_DF)
4659 /* We prefer to not mention undocumented options in
4660 error messages. However, if users have managed to select
4661 power9-dform without selecting upper-regs-df, they
4662 already know about undocumented flags. */
4663 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4664 error ("-mpower9-dform requires -mupper-regs-df");
4665 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4668 if (TARGET_P9_DFORM_SCALAR && !TARGET_UPPER_REGS_SF)
4670 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
4671 error ("-mpower9-dform requires -mupper-regs-sf");
4672 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4675 /* Enable LRA by default. */
4676 if ((rs6000_isa_flags_explicit & OPTION_MASK_LRA) == 0)
4677 rs6000_isa_flags |= OPTION_MASK_LRA;
4679 /* There have been bugs with -mvsx-timode that don't show up with -mlra,
4680 but do show up with -mno-lra. Given -mlra will become the default once
4681 PR 69847 is fixed, turn off the options with problems by default if
4682 -mno-lra was used, and warn if the user explicitly asked for the option.
4684 Enable -mpower9-dform-vector by default if LRA and other power9 options.
4685 Enable -mvsx-timode by default if LRA and VSX. */
4686 if (!TARGET_LRA)
4688 if (TARGET_VSX_TIMODE)
4690 if ((rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) != 0)
4691 warning (0, "-mvsx-timode might need -mlra");
4693 else
4694 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
4698 else
4700 if (TARGET_VSX && !TARGET_VSX_TIMODE
4701 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) == 0)
4702 rs6000_isa_flags |= OPTION_MASK_VSX_TIMODE;
4705 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4706 support. If we only have ISA 2.06 support, and the user did not specify
4707 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4708 but we don't enable the full vectorization support */
4709 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4710 TARGET_ALLOW_MOVMISALIGN = 1;
4712 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4714 if (TARGET_ALLOW_MOVMISALIGN > 0
4715 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
4716 error ("-mallow-movmisalign requires -mvsx");
4718 TARGET_ALLOW_MOVMISALIGN = 0;
4721 /* Determine when unaligned vector accesses are permitted, and when
4722 they are preferred over masked Altivec loads. Note that if
4723 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4724 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4725 not true. */
4726 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4728 if (!TARGET_VSX)
4730 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4731 error ("-mefficient-unaligned-vsx requires -mvsx");
4733 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4736 else if (!TARGET_ALLOW_MOVMISALIGN)
4738 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4739 error ("-mefficient-unaligned-vsx requires -mallow-movmisalign");
4741 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4745 /* Check whether we should allow small integers into VSX registers. We
4746 require direct move to prevent the register allocator from having to move
4747 variables through memory to do moves. SImode can be used on ISA 2.07,
4748 while HImode and QImode require ISA 3.0. */
4749 if (TARGET_VSX_SMALL_INTEGER
4750 && (!TARGET_DIRECT_MOVE || !TARGET_P8_VECTOR || !TARGET_UPPER_REGS_DI))
4752 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_SMALL_INTEGER)
4753 error ("-mvsx-small-integer requires -mpower8-vector, "
4754 "-mupper-regs-di, and -mdirect-move");
4756 rs6000_isa_flags &= ~OPTION_MASK_VSX_SMALL_INTEGER;
4759 /* Set long double size before the IEEE 128-bit tests. */
4760 if (!global_options_set.x_rs6000_long_double_type_size)
4762 if (main_target_opt != NULL
4763 && (main_target_opt->x_rs6000_long_double_type_size
4764 != RS6000_DEFAULT_LONG_DOUBLE_SIZE))
4765 error ("target attribute or pragma changes long double size");
4766 else
4767 rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE;
4770 /* Set -mabi=ieeelongdouble on some old targets. Note, AIX and Darwin
4771 explicitly redefine TARGET_IEEEQUAD to 0, so those systems will not
4772 pick up this default. */
4773 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
4774 if (!global_options_set.x_rs6000_ieeequad)
4775 rs6000_ieeequad = 1;
4776 #endif
4778 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4779 sytems, but don't enable the __float128 keyword. */
4780 if (TARGET_VSX && TARGET_LONG_DOUBLE_128
4781 && (TARGET_FLOAT128_ENABLE_TYPE || TARGET_IEEEQUAD)
4782 && ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_TYPE) == 0))
4783 rs6000_isa_flags |= OPTION_MASK_FLOAT128_TYPE;
4785 /* IEEE 128-bit floating point requires VSX support. */
4786 if (!TARGET_VSX)
4788 if (TARGET_FLOAT128_KEYWORD)
4790 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4791 error ("-mfloat128 requires VSX support");
4793 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4794 | OPTION_MASK_FLOAT128_KEYWORD
4795 | OPTION_MASK_FLOAT128_HW);
4798 else if (TARGET_FLOAT128_TYPE)
4800 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_TYPE) != 0)
4801 error ("-mfloat128-type requires VSX support");
4803 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4804 | OPTION_MASK_FLOAT128_KEYWORD
4805 | OPTION_MASK_FLOAT128_HW);
4809 /* -mfloat128 and -mfloat128-hardware internally require the underlying IEEE
4810 128-bit floating point support to be enabled. */
4811 if (!TARGET_FLOAT128_TYPE)
4813 if (TARGET_FLOAT128_KEYWORD)
4815 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4817 error ("-mfloat128 requires -mfloat128-type");
4818 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4819 | OPTION_MASK_FLOAT128_KEYWORD
4820 | OPTION_MASK_FLOAT128_HW);
4822 else
4823 rs6000_isa_flags |= OPTION_MASK_FLOAT128_TYPE;
4826 if (TARGET_FLOAT128_HW)
4828 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4830 error ("-mfloat128-hardware requires -mfloat128-type");
4831 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4833 else
4834 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4835 | OPTION_MASK_FLOAT128_KEYWORD
4836 | OPTION_MASK_FLOAT128_HW);
4840 /* If we have -mfloat128-type and full ISA 3.0 support, enable
4841 -mfloat128-hardware by default. However, don't enable the __float128
4842 keyword. If the user explicitly turned on -mfloat128-hardware, enable the
4843 -mfloat128 option as well if it was not already set. */
4844 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW
4845 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4846 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4847 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4849 if (TARGET_FLOAT128_HW
4850 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4852 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4853 error ("-mfloat128-hardware requires full ISA 3.0 support");
4855 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4858 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4860 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4861 error ("-mfloat128-hardware requires -m64");
4863 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4866 if (TARGET_FLOAT128_HW && !TARGET_FLOAT128_KEYWORD
4867 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0
4868 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4869 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4871 /* Print the options after updating the defaults. */
4872 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4873 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4875 /* E500mc does "better" if we inline more aggressively. Respect the
4876 user's opinion, though. */
4877 if (rs6000_block_move_inline_limit == 0
4878 && (rs6000_cpu == PROCESSOR_PPCE500MC
4879 || rs6000_cpu == PROCESSOR_PPCE500MC64
4880 || rs6000_cpu == PROCESSOR_PPCE5500
4881 || rs6000_cpu == PROCESSOR_PPCE6500))
4882 rs6000_block_move_inline_limit = 128;
4884 /* store_one_arg depends on expand_block_move to handle at least the
4885 size of reg_parm_stack_space. */
4886 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4887 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4889 if (global_init_p)
4891 /* If the appropriate debug option is enabled, replace the target hooks
4892 with debug versions that call the real version and then prints
4893 debugging information. */
4894 if (TARGET_DEBUG_COST)
4896 targetm.rtx_costs = rs6000_debug_rtx_costs;
4897 targetm.address_cost = rs6000_debug_address_cost;
4898 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4901 if (TARGET_DEBUG_ADDR)
4903 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4904 targetm.legitimize_address = rs6000_debug_legitimize_address;
4905 rs6000_secondary_reload_class_ptr
4906 = rs6000_debug_secondary_reload_class;
4907 rs6000_secondary_memory_needed_ptr
4908 = rs6000_debug_secondary_memory_needed;
4909 rs6000_cannot_change_mode_class_ptr
4910 = rs6000_debug_cannot_change_mode_class;
4911 rs6000_preferred_reload_class_ptr
4912 = rs6000_debug_preferred_reload_class;
4913 rs6000_legitimize_reload_address_ptr
4914 = rs6000_debug_legitimize_reload_address;
4915 rs6000_mode_dependent_address_ptr
4916 = rs6000_debug_mode_dependent_address;
4919 if (rs6000_veclibabi_name)
4921 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4922 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4923 else
4925 error ("unknown vectorization library ABI type (%s) for "
4926 "-mveclibabi= switch", rs6000_veclibabi_name);
4927 ret = false;
4932 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4933 target attribute or pragma which automatically enables both options,
4934 unless the altivec ABI was set. This is set by default for 64-bit, but
4935 not for 32-bit. */
4936 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4937 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4938 | OPTION_MASK_FLOAT128_TYPE
4939 | OPTION_MASK_FLOAT128_KEYWORD)
4940 & ~rs6000_isa_flags_explicit);
4942 /* Enable Altivec ABI for AIX -maltivec. */
4943 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
4945 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4946 error ("target attribute or pragma changes AltiVec ABI");
4947 else
4948 rs6000_altivec_abi = 1;
4951 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4952 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4953 be explicitly overridden in either case. */
4954 if (TARGET_ELF)
4956 if (!global_options_set.x_rs6000_altivec_abi
4957 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4959 if (main_target_opt != NULL &&
4960 !main_target_opt->x_rs6000_altivec_abi)
4961 error ("target attribute or pragma changes AltiVec ABI");
4962 else
4963 rs6000_altivec_abi = 1;
4967 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4968 So far, the only darwin64 targets are also MACH-O. */
4969 if (TARGET_MACHO
4970 && DEFAULT_ABI == ABI_DARWIN
4971 && TARGET_64BIT)
4973 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4974 error ("target attribute or pragma changes darwin64 ABI");
4975 else
4977 rs6000_darwin64_abi = 1;
4978 /* Default to natural alignment, for better performance. */
4979 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4983 /* Place FP constants in the constant pool instead of TOC
4984 if section anchors enabled. */
4985 if (flag_section_anchors
4986 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4987 TARGET_NO_FP_IN_TOC = 1;
4989 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4990 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4992 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4993 SUBTARGET_OVERRIDE_OPTIONS;
4994 #endif
4995 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4996 SUBSUBTARGET_OVERRIDE_OPTIONS;
4997 #endif
4998 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4999 SUB3TARGET_OVERRIDE_OPTIONS;
5000 #endif
5002 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
5003 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
5005 /* For the E500 family of cores, reset the single/double FP flags to let us
5006 check that they remain constant across attributes or pragmas. Also,
5007 clear a possible request for string instructions, not supported and which
5008 we might have silently queried above for -Os.
5010 For other families, clear ISEL in case it was set implicitly.
5013 switch (rs6000_cpu)
5015 case PROCESSOR_PPC8540:
5016 case PROCESSOR_PPC8548:
5017 case PROCESSOR_PPCE500MC:
5018 case PROCESSOR_PPCE500MC64:
5019 case PROCESSOR_PPCE5500:
5020 case PROCESSOR_PPCE6500:
5022 rs6000_single_float = TARGET_E500_SINGLE || TARGET_E500_DOUBLE;
5023 rs6000_double_float = TARGET_E500_DOUBLE;
5025 rs6000_isa_flags &= ~OPTION_MASK_STRING;
5027 break;
5029 default:
5031 if (have_cpu && !(rs6000_isa_flags_explicit & OPTION_MASK_ISEL))
5032 rs6000_isa_flags &= ~OPTION_MASK_ISEL;
5034 break;
5037 if (main_target_opt)
5039 if (main_target_opt->x_rs6000_single_float != rs6000_single_float)
5040 error ("target attribute or pragma changes single precision floating "
5041 "point");
5042 if (main_target_opt->x_rs6000_double_float != rs6000_double_float)
5043 error ("target attribute or pragma changes double precision floating "
5044 "point");
5047 /* Detect invalid option combinations with E500. */
5048 CHECK_E500_OPTIONS;
5050 rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4
5051 && rs6000_cpu != PROCESSOR_POWER5
5052 && rs6000_cpu != PROCESSOR_POWER6
5053 && rs6000_cpu != PROCESSOR_POWER7
5054 && rs6000_cpu != PROCESSOR_POWER8
5055 && rs6000_cpu != PROCESSOR_POWER9
5056 && rs6000_cpu != PROCESSOR_PPCA2
5057 && rs6000_cpu != PROCESSOR_CELL
5058 && rs6000_cpu != PROCESSOR_PPC476);
5059 rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
5060 || rs6000_cpu == PROCESSOR_POWER5
5061 || rs6000_cpu == PROCESSOR_POWER7
5062 || rs6000_cpu == PROCESSOR_POWER8);
5063 rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
5064 || rs6000_cpu == PROCESSOR_POWER5
5065 || rs6000_cpu == PROCESSOR_POWER6
5066 || rs6000_cpu == PROCESSOR_POWER7
5067 || rs6000_cpu == PROCESSOR_POWER8
5068 || rs6000_cpu == PROCESSOR_POWER9
5069 || rs6000_cpu == PROCESSOR_PPCE500MC
5070 || rs6000_cpu == PROCESSOR_PPCE500MC64
5071 || rs6000_cpu == PROCESSOR_PPCE5500
5072 || rs6000_cpu == PROCESSOR_PPCE6500);
5074 /* Allow debug switches to override the above settings. These are set to -1
5075 in rs6000.opt to indicate the user hasn't directly set the switch. */
5076 if (TARGET_ALWAYS_HINT >= 0)
5077 rs6000_always_hint = TARGET_ALWAYS_HINT;
5079 if (TARGET_SCHED_GROUPS >= 0)
5080 rs6000_sched_groups = TARGET_SCHED_GROUPS;
5082 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
5083 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
5085 rs6000_sched_restricted_insns_priority
5086 = (rs6000_sched_groups ? 1 : 0);
5088 /* Handle -msched-costly-dep option. */
5089 rs6000_sched_costly_dep
5090 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
5092 if (rs6000_sched_costly_dep_str)
5094 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
5095 rs6000_sched_costly_dep = no_dep_costly;
5096 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
5097 rs6000_sched_costly_dep = all_deps_costly;
5098 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
5099 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
5100 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
5101 rs6000_sched_costly_dep = store_to_load_dep_costly;
5102 else
5103 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
5104 atoi (rs6000_sched_costly_dep_str));
5107 /* Handle -minsert-sched-nops option. */
5108 rs6000_sched_insert_nops
5109 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
5111 if (rs6000_sched_insert_nops_str)
5113 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
5114 rs6000_sched_insert_nops = sched_finish_none;
5115 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
5116 rs6000_sched_insert_nops = sched_finish_pad_groups;
5117 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
5118 rs6000_sched_insert_nops = sched_finish_regroup_exact;
5119 else
5120 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
5121 atoi (rs6000_sched_insert_nops_str));
5124 /* Handle stack protector */
5125 if (!global_options_set.x_rs6000_stack_protector_guard)
5126 #ifdef TARGET_THREAD_SSP_OFFSET
5127 rs6000_stack_protector_guard = SSP_TLS;
5128 #else
5129 rs6000_stack_protector_guard = SSP_GLOBAL;
5130 #endif
5132 #ifdef TARGET_THREAD_SSP_OFFSET
5133 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
5134 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
5135 #endif
5137 if (global_options_set.x_rs6000_stack_protector_guard_offset_str)
5139 char *endp;
5140 const char *str = rs6000_stack_protector_guard_offset_str;
5142 errno = 0;
5143 long offset = strtol (str, &endp, 0);
5144 if (!*str || *endp || errno)
5145 error ("%qs is not a valid number "
5146 "in -mstack-protector-guard-offset=", str);
5148 if (!IN_RANGE (offset, -0x8000, 0x7fff)
5149 || (TARGET_64BIT && (offset & 3)))
5150 error ("%qs is not a valid offset "
5151 "in -mstack-protector-guard-offset=", str);
5153 rs6000_stack_protector_guard_offset = offset;
5156 if (global_options_set.x_rs6000_stack_protector_guard_reg_str)
5158 const char *str = rs6000_stack_protector_guard_reg_str;
5159 int reg = decode_reg_name (str);
5161 if (!IN_RANGE (reg, 1, 31))
5162 error ("%qs is not a valid base register "
5163 "in -mstack-protector-guard-reg=", str);
5165 rs6000_stack_protector_guard_reg = reg;
5168 if (rs6000_stack_protector_guard == SSP_TLS
5169 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
5170 error ("-mstack-protector-guard=tls needs a valid base register");
5172 if (global_init_p)
5174 #ifdef TARGET_REGNAMES
5175 /* If the user desires alternate register names, copy in the
5176 alternate names now. */
5177 if (TARGET_REGNAMES)
5178 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
5179 #endif
5181 /* Set aix_struct_return last, after the ABI is determined.
5182 If -maix-struct-return or -msvr4-struct-return was explicitly
5183 used, don't override with the ABI default. */
5184 if (!global_options_set.x_aix_struct_return)
5185 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
5187 #if 0
5188 /* IBM XL compiler defaults to unsigned bitfields. */
5189 if (TARGET_XL_COMPAT)
5190 flag_signed_bitfields = 0;
5191 #endif
5193 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
5194 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
5196 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
5198 /* We can only guarantee the availability of DI pseudo-ops when
5199 assembling for 64-bit targets. */
5200 if (!TARGET_64BIT)
5202 targetm.asm_out.aligned_op.di = NULL;
5203 targetm.asm_out.unaligned_op.di = NULL;
5207 /* Set branch target alignment, if not optimizing for size. */
5208 if (!optimize_size)
5210 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
5211 aligned 8byte to avoid misprediction by the branch predictor. */
5212 if (rs6000_cpu == PROCESSOR_TITAN
5213 || rs6000_cpu == PROCESSOR_CELL)
5215 if (align_functions <= 0)
5216 align_functions = 8;
5217 if (align_jumps <= 0)
5218 align_jumps = 8;
5219 if (align_loops <= 0)
5220 align_loops = 8;
5222 if (rs6000_align_branch_targets)
5224 if (align_functions <= 0)
5225 align_functions = 16;
5226 if (align_jumps <= 0)
5227 align_jumps = 16;
5228 if (align_loops <= 0)
5230 can_override_loop_align = 1;
5231 align_loops = 16;
5234 if (align_jumps_max_skip <= 0)
5235 align_jumps_max_skip = 15;
5236 if (align_loops_max_skip <= 0)
5237 align_loops_max_skip = 15;
5240 /* Arrange to save and restore machine status around nested functions. */
5241 init_machine_status = rs6000_init_machine_status;
5243 /* We should always be splitting complex arguments, but we can't break
5244 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
5245 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
5246 targetm.calls.split_complex_arg = NULL;
5248 /* The AIX and ELFv1 ABIs define standard function descriptors. */
5249 if (DEFAULT_ABI == ABI_AIX)
5250 targetm.calls.custom_function_descriptors = 0;
5253 /* Initialize rs6000_cost with the appropriate target costs. */
5254 if (optimize_size)
5255 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
5256 else
5257 switch (rs6000_cpu)
5259 case PROCESSOR_RS64A:
5260 rs6000_cost = &rs64a_cost;
5261 break;
5263 case PROCESSOR_MPCCORE:
5264 rs6000_cost = &mpccore_cost;
5265 break;
5267 case PROCESSOR_PPC403:
5268 rs6000_cost = &ppc403_cost;
5269 break;
5271 case PROCESSOR_PPC405:
5272 rs6000_cost = &ppc405_cost;
5273 break;
5275 case PROCESSOR_PPC440:
5276 rs6000_cost = &ppc440_cost;
5277 break;
5279 case PROCESSOR_PPC476:
5280 rs6000_cost = &ppc476_cost;
5281 break;
5283 case PROCESSOR_PPC601:
5284 rs6000_cost = &ppc601_cost;
5285 break;
5287 case PROCESSOR_PPC603:
5288 rs6000_cost = &ppc603_cost;
5289 break;
5291 case PROCESSOR_PPC604:
5292 rs6000_cost = &ppc604_cost;
5293 break;
5295 case PROCESSOR_PPC604e:
5296 rs6000_cost = &ppc604e_cost;
5297 break;
5299 case PROCESSOR_PPC620:
5300 rs6000_cost = &ppc620_cost;
5301 break;
5303 case PROCESSOR_PPC630:
5304 rs6000_cost = &ppc630_cost;
5305 break;
5307 case PROCESSOR_CELL:
5308 rs6000_cost = &ppccell_cost;
5309 break;
5311 case PROCESSOR_PPC750:
5312 case PROCESSOR_PPC7400:
5313 rs6000_cost = &ppc750_cost;
5314 break;
5316 case PROCESSOR_PPC7450:
5317 rs6000_cost = &ppc7450_cost;
5318 break;
5320 case PROCESSOR_PPC8540:
5321 case PROCESSOR_PPC8548:
5322 rs6000_cost = &ppc8540_cost;
5323 break;
5325 case PROCESSOR_PPCE300C2:
5326 case PROCESSOR_PPCE300C3:
5327 rs6000_cost = &ppce300c2c3_cost;
5328 break;
5330 case PROCESSOR_PPCE500MC:
5331 rs6000_cost = &ppce500mc_cost;
5332 break;
5334 case PROCESSOR_PPCE500MC64:
5335 rs6000_cost = &ppce500mc64_cost;
5336 break;
5338 case PROCESSOR_PPCE5500:
5339 rs6000_cost = &ppce5500_cost;
5340 break;
5342 case PROCESSOR_PPCE6500:
5343 rs6000_cost = &ppce6500_cost;
5344 break;
5346 case PROCESSOR_TITAN:
5347 rs6000_cost = &titan_cost;
5348 break;
5350 case PROCESSOR_POWER4:
5351 case PROCESSOR_POWER5:
5352 rs6000_cost = &power4_cost;
5353 break;
5355 case PROCESSOR_POWER6:
5356 rs6000_cost = &power6_cost;
5357 break;
5359 case PROCESSOR_POWER7:
5360 rs6000_cost = &power7_cost;
5361 break;
5363 case PROCESSOR_POWER8:
5364 rs6000_cost = &power8_cost;
5365 break;
5367 case PROCESSOR_POWER9:
5368 rs6000_cost = &power9_cost;
5369 break;
5371 case PROCESSOR_PPCA2:
5372 rs6000_cost = &ppca2_cost;
5373 break;
5375 default:
5376 gcc_unreachable ();
5379 if (global_init_p)
5381 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
5382 rs6000_cost->simultaneous_prefetches,
5383 global_options.x_param_values,
5384 global_options_set.x_param_values);
5385 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
5386 global_options.x_param_values,
5387 global_options_set.x_param_values);
5388 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
5389 rs6000_cost->cache_line_size,
5390 global_options.x_param_values,
5391 global_options_set.x_param_values);
5392 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
5393 global_options.x_param_values,
5394 global_options_set.x_param_values);
5396 /* Increase loop peeling limits based on performance analysis. */
5397 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
5398 global_options.x_param_values,
5399 global_options_set.x_param_values);
5400 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
5401 global_options.x_param_values,
5402 global_options_set.x_param_values);
5404 /* Use the 'model' -fsched-pressure algorithm by default. */
5405 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM,
5406 SCHED_PRESSURE_MODEL,
5407 global_options.x_param_values,
5408 global_options_set.x_param_values);
5410 /* If using typedef char *va_list, signal that
5411 __builtin_va_start (&ap, 0) can be optimized to
5412 ap = __builtin_next_arg (0). */
5413 if (DEFAULT_ABI != ABI_V4)
5414 targetm.expand_builtin_va_start = NULL;
5417 /* Set up single/double float flags.
5418 If TARGET_HARD_FLOAT is set, but neither single or double is set,
5419 then set both flags. */
5420 if (TARGET_HARD_FLOAT && TARGET_FPRS
5421 && rs6000_single_float == 0 && rs6000_double_float == 0)
5422 rs6000_single_float = rs6000_double_float = 1;
5424 /* If not explicitly specified via option, decide whether to generate indexed
5425 load/store instructions. A value of -1 indicates that the
5426 initial value of this variable has not been overwritten. During
5427 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
5428 if (TARGET_AVOID_XFORM == -1)
5429 /* Avoid indexed addressing when targeting Power6 in order to avoid the
5430 DERAT mispredict penalty. However the LVE and STVE altivec instructions
5431 need indexed accesses and the type used is the scalar type of the element
5432 being loaded or stored. */
5433 TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB
5434 && !TARGET_ALTIVEC);
5436 /* Set the -mrecip options. */
5437 if (rs6000_recip_name)
5439 char *p = ASTRDUP (rs6000_recip_name);
5440 char *q;
5441 unsigned int mask, i;
5442 bool invert;
5444 while ((q = strtok (p, ",")) != NULL)
5446 p = NULL;
5447 if (*q == '!')
5449 invert = true;
5450 q++;
5452 else
5453 invert = false;
5455 if (!strcmp (q, "default"))
5456 mask = ((TARGET_RECIP_PRECISION)
5457 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
5458 else
5460 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
5461 if (!strcmp (q, recip_options[i].string))
5463 mask = recip_options[i].mask;
5464 break;
5467 if (i == ARRAY_SIZE (recip_options))
5469 error ("unknown option for -mrecip=%s", q);
5470 invert = false;
5471 mask = 0;
5472 ret = false;
5476 if (invert)
5477 rs6000_recip_control &= ~mask;
5478 else
5479 rs6000_recip_control |= mask;
5483 /* Set the builtin mask of the various options used that could affect which
5484 builtins were used. In the past we used target_flags, but we've run out
5485 of bits, and some options like SPE and PAIRED are no longer in
5486 target_flags. */
5487 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
5488 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
5489 rs6000_print_builtin_options (stderr, 0, "builtin mask",
5490 rs6000_builtin_mask);
5492 /* Initialize all of the registers. */
5493 rs6000_init_hard_regno_mode_ok (global_init_p);
5495 /* Save the initial options in case the user does function specific options */
5496 if (global_init_p)
5497 target_option_default_node = target_option_current_node
5498 = build_target_option_node (&global_options);
5500 /* If not explicitly specified via option, decide whether to generate the
5501 extra blr's required to preserve the link stack on some cpus (eg, 476). */
5502 if (TARGET_LINK_STACK == -1)
5503 SET_TARGET_LINK_STACK (rs6000_cpu == PROCESSOR_PPC476 && flag_pic);
5505 return ret;
5508 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
5509 define the target cpu type. */
5511 static void
5512 rs6000_option_override (void)
5514 (void) rs6000_option_override_internal (true);
5518 /* Implement targetm.vectorize.builtin_mask_for_load. */
5519 static tree
5520 rs6000_builtin_mask_for_load (void)
5522 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
5523 if ((TARGET_ALTIVEC && !TARGET_VSX)
5524 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
5525 return altivec_builtin_mask_for_load;
5526 else
5527 return 0;
5530 /* Implement LOOP_ALIGN. */
5532 rs6000_loop_align (rtx label)
5534 basic_block bb;
5535 int ninsns;
5537 /* Don't override loop alignment if -falign-loops was specified. */
5538 if (!can_override_loop_align)
5539 return align_loops_log;
5541 bb = BLOCK_FOR_INSN (label);
5542 ninsns = num_loop_insns(bb->loop_father);
5544 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
5545 if (ninsns > 4 && ninsns <= 8
5546 && (rs6000_cpu == PROCESSOR_POWER4
5547 || rs6000_cpu == PROCESSOR_POWER5
5548 || rs6000_cpu == PROCESSOR_POWER6
5549 || rs6000_cpu == PROCESSOR_POWER7
5550 || rs6000_cpu == PROCESSOR_POWER8
5551 || rs6000_cpu == PROCESSOR_POWER9))
5552 return 5;
5553 else
5554 return align_loops_log;
5557 /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
5558 static int
5559 rs6000_loop_align_max_skip (rtx_insn *label)
5561 return (1 << rs6000_loop_align (label)) - 1;
5564 /* Return true iff, data reference of TYPE can reach vector alignment (16)
5565 after applying N number of iterations. This routine does not determine
5566 how may iterations are required to reach desired alignment. */
5568 static bool
5569 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
5571 if (is_packed)
5572 return false;
5574 if (TARGET_32BIT)
5576 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
5577 return true;
5579 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
5580 return true;
5582 return false;
5584 else
5586 if (TARGET_MACHO)
5587 return false;
5589 /* Assuming that all other types are naturally aligned. CHECKME! */
5590 return true;
5594 /* Return true if the vector misalignment factor is supported by the
5595 target. */
5596 static bool
5597 rs6000_builtin_support_vector_misalignment (machine_mode mode,
5598 const_tree type,
5599 int misalignment,
5600 bool is_packed)
5602 if (TARGET_VSX)
5604 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5605 return true;
5607 /* Return if movmisalign pattern is not supported for this mode. */
5608 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
5609 return false;
5611 if (misalignment == -1)
5613 /* Misalignment factor is unknown at compile time but we know
5614 it's word aligned. */
5615 if (rs6000_vector_alignment_reachable (type, is_packed))
5617 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
5619 if (element_size == 64 || element_size == 32)
5620 return true;
5623 return false;
5626 /* VSX supports word-aligned vector. */
5627 if (misalignment % 4 == 0)
5628 return true;
5630 return false;
5633 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5634 static int
5635 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5636 tree vectype, int misalign)
5638 unsigned elements;
5639 tree elem_type;
5641 switch (type_of_cost)
5643 case scalar_stmt:
5644 case scalar_load:
5645 case scalar_store:
5646 case vector_stmt:
5647 case vector_load:
5648 case vector_store:
5649 case vec_to_scalar:
5650 case scalar_to_vec:
5651 case cond_branch_not_taken:
5652 return 1;
5654 case vec_perm:
5655 if (TARGET_VSX)
5656 return 3;
5657 else
5658 return 1;
5660 case vec_promote_demote:
5661 if (TARGET_VSX)
5662 return 4;
5663 else
5664 return 1;
5666 case cond_branch_taken:
5667 return 3;
5669 case unaligned_load:
5670 if (TARGET_P9_VECTOR)
5671 return 3;
5673 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5674 return 1;
5676 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5678 elements = TYPE_VECTOR_SUBPARTS (vectype);
5679 if (elements == 2)
5680 /* Double word aligned. */
5681 return 2;
5683 if (elements == 4)
5685 switch (misalign)
5687 case 8:
5688 /* Double word aligned. */
5689 return 2;
5691 case -1:
5692 /* Unknown misalignment. */
5693 case 4:
5694 case 12:
5695 /* Word aligned. */
5696 return 22;
5698 default:
5699 gcc_unreachable ();
5704 if (TARGET_ALTIVEC)
5705 /* Misaligned loads are not supported. */
5706 gcc_unreachable ();
5708 return 2;
5710 case unaligned_store:
5711 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5712 return 1;
5714 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5716 elements = TYPE_VECTOR_SUBPARTS (vectype);
5717 if (elements == 2)
5718 /* Double word aligned. */
5719 return 2;
5721 if (elements == 4)
5723 switch (misalign)
5725 case 8:
5726 /* Double word aligned. */
5727 return 2;
5729 case -1:
5730 /* Unknown misalignment. */
5731 case 4:
5732 case 12:
5733 /* Word aligned. */
5734 return 23;
5736 default:
5737 gcc_unreachable ();
5742 if (TARGET_ALTIVEC)
5743 /* Misaligned stores are not supported. */
5744 gcc_unreachable ();
5746 return 2;
5748 case vec_construct:
5749 /* This is a rough approximation assuming non-constant elements
5750 constructed into a vector via element insertion. FIXME:
5751 vec_construct is not granular enough for uniformly good
5752 decisions. If the initialization is a splat, this is
5753 cheaper than we estimate. Improve this someday. */
5754 elem_type = TREE_TYPE (vectype);
5755 /* 32-bit vectors loaded into registers are stored as double
5756 precision, so we need 2 permutes, 2 converts, and 1 merge
5757 to construct a vector of short floats from them. */
5758 if (SCALAR_FLOAT_TYPE_P (elem_type)
5759 && TYPE_PRECISION (elem_type) == 32)
5760 return 5;
5761 else
5762 return max (2, TYPE_VECTOR_SUBPARTS (vectype) - 1);
5764 default:
5765 gcc_unreachable ();
5769 /* Implement targetm.vectorize.preferred_simd_mode. */
5771 static machine_mode
5772 rs6000_preferred_simd_mode (machine_mode mode)
5774 if (TARGET_VSX)
5775 switch (mode)
5777 case DFmode:
5778 return V2DFmode;
5779 default:;
5781 if (TARGET_ALTIVEC || TARGET_VSX)
5782 switch (mode)
5784 case SFmode:
5785 return V4SFmode;
5786 case TImode:
5787 return V1TImode;
5788 case DImode:
5789 return V2DImode;
5790 case SImode:
5791 return V4SImode;
5792 case HImode:
5793 return V8HImode;
5794 case QImode:
5795 return V16QImode;
5796 default:;
5798 if (TARGET_SPE)
5799 switch (mode)
5801 case SFmode:
5802 return V2SFmode;
5803 case SImode:
5804 return V2SImode;
5805 default:;
5807 if (TARGET_PAIRED_FLOAT
5808 && mode == SFmode)
5809 return V2SFmode;
5810 return word_mode;
5813 typedef struct _rs6000_cost_data
5815 struct loop *loop_info;
5816 unsigned cost[3];
5817 } rs6000_cost_data;
5819 /* Test for likely overcommitment of vector hardware resources. If a
5820 loop iteration is relatively large, and too large a percentage of
5821 instructions in the loop are vectorized, the cost model may not
5822 adequately reflect delays from unavailable vector resources.
5823 Penalize the loop body cost for this case. */
5825 static void
5826 rs6000_density_test (rs6000_cost_data *data)
5828 const int DENSITY_PCT_THRESHOLD = 85;
5829 const int DENSITY_SIZE_THRESHOLD = 70;
5830 const int DENSITY_PENALTY = 10;
5831 struct loop *loop = data->loop_info;
5832 basic_block *bbs = get_loop_body (loop);
5833 int nbbs = loop->num_nodes;
5834 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
5835 int i, density_pct;
5837 for (i = 0; i < nbbs; i++)
5839 basic_block bb = bbs[i];
5840 gimple_stmt_iterator gsi;
5842 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5844 gimple *stmt = gsi_stmt (gsi);
5845 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5847 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5848 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5849 not_vec_cost++;
5853 free (bbs);
5854 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5856 if (density_pct > DENSITY_PCT_THRESHOLD
5857 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
5859 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
5860 if (dump_enabled_p ())
5861 dump_printf_loc (MSG_NOTE, vect_location,
5862 "density %d%%, cost %d exceeds threshold, penalizing "
5863 "loop body cost by %d%%", density_pct,
5864 vec_cost + not_vec_cost, DENSITY_PENALTY);
5868 /* Implement targetm.vectorize.init_cost. */
5870 static void *
5871 rs6000_init_cost (struct loop *loop_info)
5873 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
5874 data->loop_info = loop_info;
5875 data->cost[vect_prologue] = 0;
5876 data->cost[vect_body] = 0;
5877 data->cost[vect_epilogue] = 0;
5878 return data;
5881 /* Implement targetm.vectorize.add_stmt_cost. */
5883 static unsigned
5884 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5885 struct _stmt_vec_info *stmt_info, int misalign,
5886 enum vect_cost_model_location where)
5888 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5889 unsigned retval = 0;
5891 if (flag_vect_cost_model)
5893 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5894 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5895 misalign);
5896 /* Statements in an inner loop relative to the loop being
5897 vectorized are weighted more heavily. The value here is
5898 arbitrary and could potentially be improved with analysis. */
5899 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5900 count *= 50; /* FIXME. */
5902 retval = (unsigned) (count * stmt_cost);
5903 cost_data->cost[where] += retval;
5906 return retval;
5909 /* Implement targetm.vectorize.finish_cost. */
5911 static void
5912 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5913 unsigned *body_cost, unsigned *epilogue_cost)
5915 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5917 if (cost_data->loop_info)
5918 rs6000_density_test (cost_data);
5920 *prologue_cost = cost_data->cost[vect_prologue];
5921 *body_cost = cost_data->cost[vect_body];
5922 *epilogue_cost = cost_data->cost[vect_epilogue];
5925 /* Implement targetm.vectorize.destroy_cost_data. */
5927 static void
5928 rs6000_destroy_cost_data (void *data)
5930 free (data);
5933 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5934 library with vectorized intrinsics. */
5936 static tree
5937 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5938 tree type_in)
5940 char name[32];
5941 const char *suffix = NULL;
5942 tree fntype, new_fndecl, bdecl = NULL_TREE;
5943 int n_args = 1;
5944 const char *bname;
5945 machine_mode el_mode, in_mode;
5946 int n, in_n;
5948 /* Libmass is suitable for unsafe math only as it does not correctly support
5949 parts of IEEE with the required precision such as denormals. Only support
5950 it if we have VSX to use the simd d2 or f4 functions.
5951 XXX: Add variable length support. */
5952 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5953 return NULL_TREE;
5955 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5956 n = TYPE_VECTOR_SUBPARTS (type_out);
5957 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5958 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5959 if (el_mode != in_mode
5960 || n != in_n)
5961 return NULL_TREE;
5963 switch (fn)
5965 CASE_CFN_ATAN2:
5966 CASE_CFN_HYPOT:
5967 CASE_CFN_POW:
5968 n_args = 2;
5969 gcc_fallthrough ();
5971 CASE_CFN_ACOS:
5972 CASE_CFN_ACOSH:
5973 CASE_CFN_ASIN:
5974 CASE_CFN_ASINH:
5975 CASE_CFN_ATAN:
5976 CASE_CFN_ATANH:
5977 CASE_CFN_CBRT:
5978 CASE_CFN_COS:
5979 CASE_CFN_COSH:
5980 CASE_CFN_ERF:
5981 CASE_CFN_ERFC:
5982 CASE_CFN_EXP2:
5983 CASE_CFN_EXP:
5984 CASE_CFN_EXPM1:
5985 CASE_CFN_LGAMMA:
5986 CASE_CFN_LOG10:
5987 CASE_CFN_LOG1P:
5988 CASE_CFN_LOG2:
5989 CASE_CFN_LOG:
5990 CASE_CFN_SIN:
5991 CASE_CFN_SINH:
5992 CASE_CFN_SQRT:
5993 CASE_CFN_TAN:
5994 CASE_CFN_TANH:
5995 if (el_mode == DFmode && n == 2)
5997 bdecl = mathfn_built_in (double_type_node, fn);
5998 suffix = "d2"; /* pow -> powd2 */
6000 else if (el_mode == SFmode && n == 4)
6002 bdecl = mathfn_built_in (float_type_node, fn);
6003 suffix = "4"; /* powf -> powf4 */
6005 else
6006 return NULL_TREE;
6007 if (!bdecl)
6008 return NULL_TREE;
6009 break;
6011 default:
6012 return NULL_TREE;
6015 gcc_assert (suffix != NULL);
6016 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
6017 if (!bname)
6018 return NULL_TREE;
6020 strcpy (name, bname + sizeof ("__builtin_") - 1);
6021 strcat (name, suffix);
6023 if (n_args == 1)
6024 fntype = build_function_type_list (type_out, type_in, NULL);
6025 else if (n_args == 2)
6026 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
6027 else
6028 gcc_unreachable ();
6030 /* Build a function declaration for the vectorized function. */
6031 new_fndecl = build_decl (BUILTINS_LOCATION,
6032 FUNCTION_DECL, get_identifier (name), fntype);
6033 TREE_PUBLIC (new_fndecl) = 1;
6034 DECL_EXTERNAL (new_fndecl) = 1;
6035 DECL_IS_NOVOPS (new_fndecl) = 1;
6036 TREE_READONLY (new_fndecl) = 1;
6038 return new_fndecl;
6041 /* Returns a function decl for a vectorized version of the builtin function
6042 with builtin function code FN and the result vector type TYPE, or NULL_TREE
6043 if it is not available. */
6045 static tree
6046 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
6047 tree type_in)
6049 machine_mode in_mode, out_mode;
6050 int in_n, out_n;
6052 if (TARGET_DEBUG_BUILTIN)
6053 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
6054 combined_fn_name (combined_fn (fn)),
6055 GET_MODE_NAME (TYPE_MODE (type_out)),
6056 GET_MODE_NAME (TYPE_MODE (type_in)));
6058 if (TREE_CODE (type_out) != VECTOR_TYPE
6059 || TREE_CODE (type_in) != VECTOR_TYPE
6060 || !TARGET_VECTORIZE_BUILTINS)
6061 return NULL_TREE;
6063 out_mode = TYPE_MODE (TREE_TYPE (type_out));
6064 out_n = TYPE_VECTOR_SUBPARTS (type_out);
6065 in_mode = TYPE_MODE (TREE_TYPE (type_in));
6066 in_n = TYPE_VECTOR_SUBPARTS (type_in);
6068 switch (fn)
6070 CASE_CFN_COPYSIGN:
6071 if (VECTOR_UNIT_VSX_P (V2DFmode)
6072 && out_mode == DFmode && out_n == 2
6073 && in_mode == DFmode && in_n == 2)
6074 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
6075 if (VECTOR_UNIT_VSX_P (V4SFmode)
6076 && out_mode == SFmode && out_n == 4
6077 && in_mode == SFmode && in_n == 4)
6078 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
6079 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
6080 && out_mode == SFmode && out_n == 4
6081 && in_mode == SFmode && in_n == 4)
6082 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
6083 break;
6084 CASE_CFN_CEIL:
6085 if (VECTOR_UNIT_VSX_P (V2DFmode)
6086 && out_mode == DFmode && out_n == 2
6087 && in_mode == DFmode && in_n == 2)
6088 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
6089 if (VECTOR_UNIT_VSX_P (V4SFmode)
6090 && out_mode == SFmode && out_n == 4
6091 && in_mode == SFmode && in_n == 4)
6092 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
6093 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
6094 && out_mode == SFmode && out_n == 4
6095 && in_mode == SFmode && in_n == 4)
6096 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
6097 break;
6098 CASE_CFN_FLOOR:
6099 if (VECTOR_UNIT_VSX_P (V2DFmode)
6100 && out_mode == DFmode && out_n == 2
6101 && in_mode == DFmode && in_n == 2)
6102 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
6103 if (VECTOR_UNIT_VSX_P (V4SFmode)
6104 && out_mode == SFmode && out_n == 4
6105 && in_mode == SFmode && in_n == 4)
6106 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
6107 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
6108 && out_mode == SFmode && out_n == 4
6109 && in_mode == SFmode && in_n == 4)
6110 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
6111 break;
6112 CASE_CFN_FMA:
6113 if (VECTOR_UNIT_VSX_P (V2DFmode)
6114 && out_mode == DFmode && out_n == 2
6115 && in_mode == DFmode && in_n == 2)
6116 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
6117 if (VECTOR_UNIT_VSX_P (V4SFmode)
6118 && out_mode == SFmode && out_n == 4
6119 && in_mode == SFmode && in_n == 4)
6120 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
6121 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
6122 && out_mode == SFmode && out_n == 4
6123 && in_mode == SFmode && in_n == 4)
6124 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
6125 break;
6126 CASE_CFN_TRUNC:
6127 if (VECTOR_UNIT_VSX_P (V2DFmode)
6128 && out_mode == DFmode && out_n == 2
6129 && in_mode == DFmode && in_n == 2)
6130 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
6131 if (VECTOR_UNIT_VSX_P (V4SFmode)
6132 && out_mode == SFmode && out_n == 4
6133 && in_mode == SFmode && in_n == 4)
6134 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
6135 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
6136 && out_mode == SFmode && out_n == 4
6137 && in_mode == SFmode && in_n == 4)
6138 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
6139 break;
6140 CASE_CFN_NEARBYINT:
6141 if (VECTOR_UNIT_VSX_P (V2DFmode)
6142 && flag_unsafe_math_optimizations
6143 && out_mode == DFmode && out_n == 2
6144 && in_mode == DFmode && in_n == 2)
6145 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
6146 if (VECTOR_UNIT_VSX_P (V4SFmode)
6147 && flag_unsafe_math_optimizations
6148 && out_mode == SFmode && out_n == 4
6149 && in_mode == SFmode && in_n == 4)
6150 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
6151 break;
6152 CASE_CFN_RINT:
6153 if (VECTOR_UNIT_VSX_P (V2DFmode)
6154 && !flag_trapping_math
6155 && out_mode == DFmode && out_n == 2
6156 && in_mode == DFmode && in_n == 2)
6157 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
6158 if (VECTOR_UNIT_VSX_P (V4SFmode)
6159 && !flag_trapping_math
6160 && out_mode == SFmode && out_n == 4
6161 && in_mode == SFmode && in_n == 4)
6162 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
6163 break;
6164 default:
6165 break;
6168 /* Generate calls to libmass if appropriate. */
6169 if (rs6000_veclib_handler)
6170 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
6172 return NULL_TREE;
6175 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
6177 static tree
6178 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
6179 tree type_in)
6181 machine_mode in_mode, out_mode;
6182 int in_n, out_n;
6184 if (TARGET_DEBUG_BUILTIN)
6185 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
6186 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
6187 GET_MODE_NAME (TYPE_MODE (type_out)),
6188 GET_MODE_NAME (TYPE_MODE (type_in)));
6190 if (TREE_CODE (type_out) != VECTOR_TYPE
6191 || TREE_CODE (type_in) != VECTOR_TYPE
6192 || !TARGET_VECTORIZE_BUILTINS)
6193 return NULL_TREE;
6195 out_mode = TYPE_MODE (TREE_TYPE (type_out));
6196 out_n = TYPE_VECTOR_SUBPARTS (type_out);
6197 in_mode = TYPE_MODE (TREE_TYPE (type_in));
6198 in_n = TYPE_VECTOR_SUBPARTS (type_in);
6200 enum rs6000_builtins fn
6201 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
6202 switch (fn)
6204 case RS6000_BUILTIN_RSQRTF:
6205 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
6206 && out_mode == SFmode && out_n == 4
6207 && in_mode == SFmode && in_n == 4)
6208 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
6209 break;
6210 case RS6000_BUILTIN_RSQRT:
6211 if (VECTOR_UNIT_VSX_P (V2DFmode)
6212 && out_mode == DFmode && out_n == 2
6213 && in_mode == DFmode && in_n == 2)
6214 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
6215 break;
6216 case RS6000_BUILTIN_RECIPF:
6217 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
6218 && out_mode == SFmode && out_n == 4
6219 && in_mode == SFmode && in_n == 4)
6220 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
6221 break;
6222 case RS6000_BUILTIN_RECIP:
6223 if (VECTOR_UNIT_VSX_P (V2DFmode)
6224 && out_mode == DFmode && out_n == 2
6225 && in_mode == DFmode && in_n == 2)
6226 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
6227 break;
6228 default:
6229 break;
6231 return NULL_TREE;
6234 /* Default CPU string for rs6000*_file_start functions. */
6235 static const char *rs6000_default_cpu;
6237 /* Do anything needed at the start of the asm file. */
6239 static void
6240 rs6000_file_start (void)
6242 char buffer[80];
6243 const char *start = buffer;
6244 FILE *file = asm_out_file;
6246 rs6000_default_cpu = TARGET_CPU_DEFAULT;
6248 default_file_start ();
6250 if (flag_verbose_asm)
6252 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
6254 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
6256 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
6257 start = "";
6260 if (global_options_set.x_rs6000_cpu_index)
6262 fprintf (file, "%s -mcpu=%s", start,
6263 processor_target_table[rs6000_cpu_index].name);
6264 start = "";
6267 if (global_options_set.x_rs6000_tune_index)
6269 fprintf (file, "%s -mtune=%s", start,
6270 processor_target_table[rs6000_tune_index].name);
6271 start = "";
6274 if (PPC405_ERRATUM77)
6276 fprintf (file, "%s PPC405CR_ERRATUM77", start);
6277 start = "";
6280 #ifdef USING_ELFOS_H
6281 switch (rs6000_sdata)
6283 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
6284 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
6285 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
6286 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
6289 if (rs6000_sdata && g_switch_value)
6291 fprintf (file, "%s -G %d", start,
6292 g_switch_value);
6293 start = "";
6295 #endif
6297 if (*start == '\0')
6298 putc ('\n', file);
6301 #ifdef USING_ELFOS_H
6302 if (!(rs6000_default_cpu && rs6000_default_cpu[0])
6303 && !global_options_set.x_rs6000_cpu_index)
6305 fputs ("\t.machine ", asm_out_file);
6306 if ((rs6000_isa_flags & OPTION_MASK_MODULO) != 0)
6307 fputs ("power9\n", asm_out_file);
6308 else if ((rs6000_isa_flags & OPTION_MASK_DIRECT_MOVE) != 0)
6309 fputs ("power8\n", asm_out_file);
6310 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTD) != 0)
6311 fputs ("power7\n", asm_out_file);
6312 else if ((rs6000_isa_flags & OPTION_MASK_CMPB) != 0)
6313 fputs ("power6\n", asm_out_file);
6314 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTB) != 0)
6315 fputs ("power5\n", asm_out_file);
6316 else if ((rs6000_isa_flags & OPTION_MASK_MFCRF) != 0)
6317 fputs ("power4\n", asm_out_file);
6318 else if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) != 0)
6319 fputs ("ppc64\n", asm_out_file);
6320 else
6321 fputs ("ppc\n", asm_out_file);
6323 #endif
6325 if (DEFAULT_ABI == ABI_ELFv2)
6326 fprintf (file, "\t.abiversion 2\n");
6330 /* Return nonzero if this function is known to have a null epilogue. */
6333 direct_return (void)
6335 if (reload_completed)
6337 rs6000_stack_t *info = rs6000_stack_info ();
6339 if (info->first_gp_reg_save == 32
6340 && info->first_fp_reg_save == 64
6341 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
6342 && ! info->lr_save_p
6343 && ! info->cr_save_p
6344 && info->vrsave_size == 0
6345 && ! info->push_p)
6346 return 1;
6349 return 0;
6352 /* Return the number of instructions it takes to form a constant in an
6353 integer register. */
6356 num_insns_constant_wide (HOST_WIDE_INT value)
6358 /* signed constant loadable with addi */
6359 if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
6360 return 1;
6362 /* constant loadable with addis */
6363 else if ((value & 0xffff) == 0
6364 && (value >> 31 == -1 || value >> 31 == 0))
6365 return 1;
6367 else if (TARGET_POWERPC64)
6369 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
6370 HOST_WIDE_INT high = value >> 31;
6372 if (high == 0 || high == -1)
6373 return 2;
6375 high >>= 1;
6377 if (low == 0)
6378 return num_insns_constant_wide (high) + 1;
6379 else if (high == 0)
6380 return num_insns_constant_wide (low) + 1;
6381 else
6382 return (num_insns_constant_wide (high)
6383 + num_insns_constant_wide (low) + 1);
6386 else
6387 return 2;
6391 num_insns_constant (rtx op, machine_mode mode)
6393 HOST_WIDE_INT low, high;
6395 switch (GET_CODE (op))
6397 case CONST_INT:
6398 if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1
6399 && rs6000_is_valid_and_mask (op, mode))
6400 return 2;
6401 else
6402 return num_insns_constant_wide (INTVAL (op));
6404 case CONST_WIDE_INT:
6406 int i;
6407 int ins = CONST_WIDE_INT_NUNITS (op) - 1;
6408 for (i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
6409 ins += num_insns_constant_wide (CONST_WIDE_INT_ELT (op, i));
6410 return ins;
6413 case CONST_DOUBLE:
6414 if (mode == SFmode || mode == SDmode)
6416 long l;
6418 if (DECIMAL_FLOAT_MODE_P (mode))
6419 REAL_VALUE_TO_TARGET_DECIMAL32
6420 (*CONST_DOUBLE_REAL_VALUE (op), l);
6421 else
6422 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), l);
6423 return num_insns_constant_wide ((HOST_WIDE_INT) l);
6426 long l[2];
6427 if (DECIMAL_FLOAT_MODE_P (mode))
6428 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (op), l);
6429 else
6430 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (op), l);
6431 high = l[WORDS_BIG_ENDIAN == 0];
6432 low = l[WORDS_BIG_ENDIAN != 0];
6434 if (TARGET_32BIT)
6435 return (num_insns_constant_wide (low)
6436 + num_insns_constant_wide (high));
6437 else
6439 if ((high == 0 && low >= 0)
6440 || (high == -1 && low < 0))
6441 return num_insns_constant_wide (low);
6443 else if (rs6000_is_valid_and_mask (op, mode))
6444 return 2;
6446 else if (low == 0)
6447 return num_insns_constant_wide (high) + 1;
6449 else
6450 return (num_insns_constant_wide (high)
6451 + num_insns_constant_wide (low) + 1);
6454 default:
6455 gcc_unreachable ();
6459 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6460 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6461 corresponding element of the vector, but for V4SFmode and V2SFmode,
6462 the corresponding "float" is interpreted as an SImode integer. */
6464 HOST_WIDE_INT
6465 const_vector_elt_as_int (rtx op, unsigned int elt)
6467 rtx tmp;
6469 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6470 gcc_assert (GET_MODE (op) != V2DImode
6471 && GET_MODE (op) != V2DFmode);
6473 tmp = CONST_VECTOR_ELT (op, elt);
6474 if (GET_MODE (op) == V4SFmode
6475 || GET_MODE (op) == V2SFmode)
6476 tmp = gen_lowpart (SImode, tmp);
6477 return INTVAL (tmp);
6480 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6481 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6482 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6483 all items are set to the same value and contain COPIES replicas of the
6484 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6485 operand and the others are set to the value of the operand's msb. */
6487 static bool
6488 vspltis_constant (rtx op, unsigned step, unsigned copies)
6490 machine_mode mode = GET_MODE (op);
6491 machine_mode inner = GET_MODE_INNER (mode);
6493 unsigned i;
6494 unsigned nunits;
6495 unsigned bitsize;
6496 unsigned mask;
6498 HOST_WIDE_INT val;
6499 HOST_WIDE_INT splat_val;
6500 HOST_WIDE_INT msb_val;
6502 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
6503 return false;
6505 nunits = GET_MODE_NUNITS (mode);
6506 bitsize = GET_MODE_BITSIZE (inner);
6507 mask = GET_MODE_MASK (inner);
6509 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6510 splat_val = val;
6511 msb_val = val >= 0 ? 0 : -1;
6513 /* Construct the value to be splatted, if possible. If not, return 0. */
6514 for (i = 2; i <= copies; i *= 2)
6516 HOST_WIDE_INT small_val;
6517 bitsize /= 2;
6518 small_val = splat_val >> bitsize;
6519 mask >>= bitsize;
6520 if (splat_val != ((HOST_WIDE_INT)
6521 ((unsigned HOST_WIDE_INT) small_val << bitsize)
6522 | (small_val & mask)))
6523 return false;
6524 splat_val = small_val;
6527 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6528 if (EASY_VECTOR_15 (splat_val))
6531 /* Also check if we can splat, and then add the result to itself. Do so if
6532 the value is positive, of if the splat instruction is using OP's mode;
6533 for splat_val < 0, the splat and the add should use the same mode. */
6534 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
6535 && (splat_val >= 0 || (step == 1 && copies == 1)))
6538 /* Also check if are loading up the most significant bit which can be done by
6539 loading up -1 and shifting the value left by -1. */
6540 else if (EASY_VECTOR_MSB (splat_val, inner))
6543 else
6544 return false;
6546 /* Check if VAL is present in every STEP-th element, and the
6547 other elements are filled with its most significant bit. */
6548 for (i = 1; i < nunits; ++i)
6550 HOST_WIDE_INT desired_val;
6551 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6552 if ((i & (step - 1)) == 0)
6553 desired_val = val;
6554 else
6555 desired_val = msb_val;
6557 if (desired_val != const_vector_elt_as_int (op, elt))
6558 return false;
6561 return true;
6564 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6565 instruction, filling in the bottom elements with 0 or -1.
6567 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6568 for the number of zeroes to shift in, or negative for the number of 0xff
6569 bytes to shift in.
6571 OP is a CONST_VECTOR. */
6574 vspltis_shifted (rtx op)
6576 machine_mode mode = GET_MODE (op);
6577 machine_mode inner = GET_MODE_INNER (mode);
6579 unsigned i, j;
6580 unsigned nunits;
6581 unsigned mask;
6583 HOST_WIDE_INT val;
6585 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
6586 return false;
6588 /* We need to create pseudo registers to do the shift, so don't recognize
6589 shift vector constants after reload. */
6590 if (!can_create_pseudo_p ())
6591 return false;
6593 nunits = GET_MODE_NUNITS (mode);
6594 mask = GET_MODE_MASK (inner);
6596 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
6598 /* Check if the value can really be the operand of a vspltis[bhw]. */
6599 if (EASY_VECTOR_15 (val))
6602 /* Also check if we are loading up the most significant bit which can be done
6603 by loading up -1 and shifting the value left by -1. */
6604 else if (EASY_VECTOR_MSB (val, inner))
6607 else
6608 return 0;
6610 /* Check if VAL is present in every STEP-th element until we find elements
6611 that are 0 or all 1 bits. */
6612 for (i = 1; i < nunits; ++i)
6614 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6615 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6617 /* If the value isn't the splat value, check for the remaining elements
6618 being 0/-1. */
6619 if (val != elt_val)
6621 if (elt_val == 0)
6623 for (j = i+1; j < nunits; ++j)
6625 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6626 if (const_vector_elt_as_int (op, elt2) != 0)
6627 return 0;
6630 return (nunits - i) * GET_MODE_SIZE (inner);
6633 else if ((elt_val & mask) == mask)
6635 for (j = i+1; j < nunits; ++j)
6637 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6638 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6639 return 0;
6642 return -((nunits - i) * GET_MODE_SIZE (inner));
6645 else
6646 return 0;
6650 /* If all elements are equal, we don't need to do VLSDOI. */
6651 return 0;
6655 /* Return true if OP is of the given MODE and can be synthesized
6656 with a vspltisb, vspltish or vspltisw. */
6658 bool
6659 easy_altivec_constant (rtx op, machine_mode mode)
6661 unsigned step, copies;
6663 if (mode == VOIDmode)
6664 mode = GET_MODE (op);
6665 else if (mode != GET_MODE (op))
6666 return false;
6668 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6669 constants. */
6670 if (mode == V2DFmode)
6671 return zero_constant (op, mode);
6673 else if (mode == V2DImode)
6675 if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
6676 || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT)
6677 return false;
6679 if (zero_constant (op, mode))
6680 return true;
6682 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6683 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6684 return true;
6686 return false;
6689 /* V1TImode is a special container for TImode. Ignore for now. */
6690 else if (mode == V1TImode)
6691 return false;
6693 /* Start with a vspltisw. */
6694 step = GET_MODE_NUNITS (mode) / 4;
6695 copies = 1;
6697 if (vspltis_constant (op, step, copies))
6698 return true;
6700 /* Then try with a vspltish. */
6701 if (step == 1)
6702 copies <<= 1;
6703 else
6704 step >>= 1;
6706 if (vspltis_constant (op, step, copies))
6707 return true;
6709 /* And finally a vspltisb. */
6710 if (step == 1)
6711 copies <<= 1;
6712 else
6713 step >>= 1;
6715 if (vspltis_constant (op, step, copies))
6716 return true;
6718 if (vspltis_shifted (op) != 0)
6719 return true;
6721 return false;
6724 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6725 result is OP. Abort if it is not possible. */
6728 gen_easy_altivec_constant (rtx op)
6730 machine_mode mode = GET_MODE (op);
6731 int nunits = GET_MODE_NUNITS (mode);
6732 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6733 unsigned step = nunits / 4;
6734 unsigned copies = 1;
6736 /* Start with a vspltisw. */
6737 if (vspltis_constant (op, step, copies))
6738 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6740 /* Then try with a vspltish. */
6741 if (step == 1)
6742 copies <<= 1;
6743 else
6744 step >>= 1;
6746 if (vspltis_constant (op, step, copies))
6747 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6749 /* And finally a vspltisb. */
6750 if (step == 1)
6751 copies <<= 1;
6752 else
6753 step >>= 1;
6755 if (vspltis_constant (op, step, copies))
6756 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6758 gcc_unreachable ();
6761 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6762 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6764 Return the number of instructions needed (1 or 2) into the address pointed
6765 via NUM_INSNS_PTR.
6767 Return the constant that is being split via CONSTANT_PTR. */
6769 bool
6770 xxspltib_constant_p (rtx op,
6771 machine_mode mode,
6772 int *num_insns_ptr,
6773 int *constant_ptr)
6775 size_t nunits = GET_MODE_NUNITS (mode);
6776 size_t i;
6777 HOST_WIDE_INT value;
6778 rtx element;
6780 /* Set the returned values to out of bound values. */
6781 *num_insns_ptr = -1;
6782 *constant_ptr = 256;
6784 if (!TARGET_P9_VECTOR)
6785 return false;
6787 if (mode == VOIDmode)
6788 mode = GET_MODE (op);
6790 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6791 return false;
6793 /* Handle (vec_duplicate <constant>). */
6794 if (GET_CODE (op) == VEC_DUPLICATE)
6796 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6797 && mode != V2DImode)
6798 return false;
6800 element = XEXP (op, 0);
6801 if (!CONST_INT_P (element))
6802 return false;
6804 value = INTVAL (element);
6805 if (!IN_RANGE (value, -128, 127))
6806 return false;
6809 /* Handle (const_vector [...]). */
6810 else if (GET_CODE (op) == CONST_VECTOR)
6812 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6813 && mode != V2DImode)
6814 return false;
6816 element = CONST_VECTOR_ELT (op, 0);
6817 if (!CONST_INT_P (element))
6818 return false;
6820 value = INTVAL (element);
6821 if (!IN_RANGE (value, -128, 127))
6822 return false;
6824 for (i = 1; i < nunits; i++)
6826 element = CONST_VECTOR_ELT (op, i);
6827 if (!CONST_INT_P (element))
6828 return false;
6830 if (value != INTVAL (element))
6831 return false;
6835 /* Handle integer constants being loaded into the upper part of the VSX
6836 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6837 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6838 else if (CONST_INT_P (op))
6840 if (!SCALAR_INT_MODE_P (mode))
6841 return false;
6843 value = INTVAL (op);
6844 if (!IN_RANGE (value, -128, 127))
6845 return false;
6847 if (!IN_RANGE (value, -1, 0))
6849 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6850 return false;
6852 if (EASY_VECTOR_15 (value))
6853 return false;
6857 else
6858 return false;
6860 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6861 sign extend. Special case 0/-1 to allow getting any VSX register instead
6862 of an Altivec register. */
6863 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6864 && EASY_VECTOR_15 (value))
6865 return false;
6867 /* Return # of instructions and the constant byte for XXSPLTIB. */
6868 if (mode == V16QImode)
6869 *num_insns_ptr = 1;
6871 else if (IN_RANGE (value, -1, 0))
6872 *num_insns_ptr = 1;
6874 else
6875 *num_insns_ptr = 2;
6877 *constant_ptr = (int) value;
6878 return true;
6881 const char *
6882 output_vec_const_move (rtx *operands)
6884 int cst, cst2, shift;
6885 machine_mode mode;
6886 rtx dest, vec;
6888 dest = operands[0];
6889 vec = operands[1];
6890 mode = GET_MODE (dest);
6892 if (TARGET_VSX)
6894 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6895 int xxspltib_value = 256;
6896 int num_insns = -1;
6898 if (zero_constant (vec, mode))
6900 if (TARGET_P9_VECTOR)
6901 return "xxspltib %x0,0";
6903 else if (dest_vmx_p)
6904 return "vspltisw %0,0";
6906 else
6907 return "xxlxor %x0,%x0,%x0";
6910 if (all_ones_constant (vec, mode))
6912 if (TARGET_P9_VECTOR)
6913 return "xxspltib %x0,255";
6915 else if (dest_vmx_p)
6916 return "vspltisw %0,-1";
6918 else if (TARGET_P8_VECTOR)
6919 return "xxlorc %x0,%x0,%x0";
6921 else
6922 gcc_unreachable ();
6925 if (TARGET_P9_VECTOR
6926 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6928 if (num_insns == 1)
6930 operands[2] = GEN_INT (xxspltib_value & 0xff);
6931 return "xxspltib %x0,%2";
6934 return "#";
6938 if (TARGET_ALTIVEC)
6940 rtx splat_vec;
6942 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6943 if (zero_constant (vec, mode))
6944 return "vspltisw %0,0";
6946 if (all_ones_constant (vec, mode))
6947 return "vspltisw %0,-1";
6949 /* Do we need to construct a value using VSLDOI? */
6950 shift = vspltis_shifted (vec);
6951 if (shift != 0)
6952 return "#";
6954 splat_vec = gen_easy_altivec_constant (vec);
6955 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6956 operands[1] = XEXP (splat_vec, 0);
6957 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6958 return "#";
6960 switch (GET_MODE (splat_vec))
6962 case V4SImode:
6963 return "vspltisw %0,%1";
6965 case V8HImode:
6966 return "vspltish %0,%1";
6968 case V16QImode:
6969 return "vspltisb %0,%1";
6971 default:
6972 gcc_unreachable ();
6976 gcc_assert (TARGET_SPE);
6978 /* Vector constant 0 is handled as a splitter of V2SI, and in the
6979 pattern of V1DI, V4HI, and V2SF.
6981 FIXME: We should probably return # and add post reload
6982 splitters for these, but this way is so easy ;-). */
6983 cst = INTVAL (CONST_VECTOR_ELT (vec, 0));
6984 cst2 = INTVAL (CONST_VECTOR_ELT (vec, 1));
6985 operands[1] = CONST_VECTOR_ELT (vec, 0);
6986 operands[2] = CONST_VECTOR_ELT (vec, 1);
6987 if (cst == cst2)
6988 return "li %0,%1\n\tevmergelo %0,%0,%0";
6989 else if (WORDS_BIG_ENDIAN)
6990 return "li %0,%1\n\tevmergelo %0,%0,%0\n\tli %0,%2";
6991 else
6992 return "li %0,%2\n\tevmergelo %0,%0,%0\n\tli %0,%1";
6995 /* Initialize TARGET of vector PAIRED to VALS. */
6997 void
6998 paired_expand_vector_init (rtx target, rtx vals)
7000 machine_mode mode = GET_MODE (target);
7001 int n_elts = GET_MODE_NUNITS (mode);
7002 int n_var = 0;
7003 rtx x, new_rtx, tmp, constant_op, op1, op2;
7004 int i;
7006 for (i = 0; i < n_elts; ++i)
7008 x = XVECEXP (vals, 0, i);
7009 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
7010 ++n_var;
7012 if (n_var == 0)
7014 /* Load from constant pool. */
7015 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
7016 return;
7019 if (n_var == 2)
7021 /* The vector is initialized only with non-constants. */
7022 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, XVECEXP (vals, 0, 0),
7023 XVECEXP (vals, 0, 1));
7025 emit_move_insn (target, new_rtx);
7026 return;
7029 /* One field is non-constant and the other one is a constant. Load the
7030 constant from the constant pool and use ps_merge instruction to
7031 construct the whole vector. */
7032 op1 = XVECEXP (vals, 0, 0);
7033 op2 = XVECEXP (vals, 0, 1);
7035 constant_op = (CONSTANT_P (op1)) ? op1 : op2;
7037 tmp = gen_reg_rtx (GET_MODE (constant_op));
7038 emit_move_insn (tmp, constant_op);
7040 if (CONSTANT_P (op1))
7041 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, tmp, op2);
7042 else
7043 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, op1, tmp);
7045 emit_move_insn (target, new_rtx);
7048 void
7049 paired_expand_vector_move (rtx operands[])
7051 rtx op0 = operands[0], op1 = operands[1];
7053 emit_move_insn (op0, op1);
7056 /* Emit vector compare for code RCODE. DEST is destination, OP1 and
7057 OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two
7058 operands for the relation operation COND. This is a recursive
7059 function. */
7061 static void
7062 paired_emit_vector_compare (enum rtx_code rcode,
7063 rtx dest, rtx op0, rtx op1,
7064 rtx cc_op0, rtx cc_op1)
7066 rtx tmp = gen_reg_rtx (V2SFmode);
7067 rtx tmp1, max, min;
7069 gcc_assert (TARGET_PAIRED_FLOAT);
7070 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
7072 switch (rcode)
7074 case LT:
7075 case LTU:
7076 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
7077 return;
7078 case GE:
7079 case GEU:
7080 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
7081 emit_insn (gen_selv2sf4 (dest, tmp, op0, op1, CONST0_RTX (SFmode)));
7082 return;
7083 case LE:
7084 case LEU:
7085 paired_emit_vector_compare (GE, dest, op0, op1, cc_op1, cc_op0);
7086 return;
7087 case GT:
7088 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
7089 return;
7090 case EQ:
7091 tmp1 = gen_reg_rtx (V2SFmode);
7092 max = gen_reg_rtx (V2SFmode);
7093 min = gen_reg_rtx (V2SFmode);
7094 gen_reg_rtx (V2SFmode);
7096 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
7097 emit_insn (gen_selv2sf4
7098 (max, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
7099 emit_insn (gen_subv2sf3 (tmp, cc_op1, cc_op0));
7100 emit_insn (gen_selv2sf4
7101 (min, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
7102 emit_insn (gen_subv2sf3 (tmp1, min, max));
7103 emit_insn (gen_selv2sf4 (dest, tmp1, op0, op1, CONST0_RTX (SFmode)));
7104 return;
7105 case NE:
7106 paired_emit_vector_compare (EQ, dest, op1, op0, cc_op0, cc_op1);
7107 return;
7108 case UNLE:
7109 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
7110 return;
7111 case UNLT:
7112 paired_emit_vector_compare (LT, dest, op1, op0, cc_op0, cc_op1);
7113 return;
7114 case UNGE:
7115 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
7116 return;
7117 case UNGT:
7118 paired_emit_vector_compare (GT, dest, op1, op0, cc_op0, cc_op1);
7119 return;
7120 default:
7121 gcc_unreachable ();
7124 return;
7127 /* Emit vector conditional expression.
7128 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
7129 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
7132 paired_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
7133 rtx cond, rtx cc_op0, rtx cc_op1)
7135 enum rtx_code rcode = GET_CODE (cond);
7137 if (!TARGET_PAIRED_FLOAT)
7138 return 0;
7140 paired_emit_vector_compare (rcode, dest, op1, op2, cc_op0, cc_op1);
7142 return 1;
7145 /* Initialize vector TARGET to VALS. */
7147 void
7148 rs6000_expand_vector_init (rtx target, rtx vals)
7150 machine_mode mode = GET_MODE (target);
7151 machine_mode inner_mode = GET_MODE_INNER (mode);
7152 int n_elts = GET_MODE_NUNITS (mode);
7153 int n_var = 0, one_var = -1;
7154 bool all_same = true, all_const_zero = true;
7155 rtx x, mem;
7156 int i;
7158 for (i = 0; i < n_elts; ++i)
7160 x = XVECEXP (vals, 0, i);
7161 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
7162 ++n_var, one_var = i;
7163 else if (x != CONST0_RTX (inner_mode))
7164 all_const_zero = false;
7166 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
7167 all_same = false;
7170 if (n_var == 0)
7172 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
7173 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
7174 if ((int_vector_p || TARGET_VSX) && all_const_zero)
7176 /* Zero register. */
7177 emit_move_insn (target, CONST0_RTX (mode));
7178 return;
7180 else if (int_vector_p && easy_vector_constant (const_vec, mode))
7182 /* Splat immediate. */
7183 emit_insn (gen_rtx_SET (target, const_vec));
7184 return;
7186 else
7188 /* Load from constant pool. */
7189 emit_move_insn (target, const_vec);
7190 return;
7194 /* Double word values on VSX can use xxpermdi or lxvdsx. */
7195 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
7197 rtx op[2];
7198 size_t i;
7199 size_t num_elements = all_same ? 1 : 2;
7200 for (i = 0; i < num_elements; i++)
7202 op[i] = XVECEXP (vals, 0, i);
7203 /* Just in case there is a SUBREG with a smaller mode, do a
7204 conversion. */
7205 if (GET_MODE (op[i]) != inner_mode)
7207 rtx tmp = gen_reg_rtx (inner_mode);
7208 convert_move (tmp, op[i], 0);
7209 op[i] = tmp;
7211 /* Allow load with splat double word. */
7212 else if (MEM_P (op[i]))
7214 if (!all_same)
7215 op[i] = force_reg (inner_mode, op[i]);
7217 else if (!REG_P (op[i]))
7218 op[i] = force_reg (inner_mode, op[i]);
7221 if (all_same)
7223 if (mode == V2DFmode)
7224 emit_insn (gen_vsx_splat_v2df (target, op[0]));
7225 else
7226 emit_insn (gen_vsx_splat_v2di (target, op[0]));
7228 else
7230 if (mode == V2DFmode)
7231 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
7232 else
7233 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
7235 return;
7238 /* Special case initializing vector int if we are on 64-bit systems with
7239 direct move or we have the ISA 3.0 instructions. */
7240 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
7241 && TARGET_DIRECT_MOVE_64BIT)
7243 if (all_same)
7245 rtx element0 = XVECEXP (vals, 0, 0);
7246 if (MEM_P (element0))
7247 element0 = rs6000_address_for_fpconvert (element0);
7248 else
7249 element0 = force_reg (SImode, element0);
7251 if (TARGET_P9_VECTOR)
7252 emit_insn (gen_vsx_splat_v4si (target, element0));
7253 else
7255 rtx tmp = gen_reg_rtx (DImode);
7256 emit_insn (gen_zero_extendsidi2 (tmp, element0));
7257 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
7259 return;
7261 else
7263 rtx elements[4];
7264 size_t i;
7266 for (i = 0; i < 4; i++)
7268 elements[i] = XVECEXP (vals, 0, i);
7269 if (!CONST_INT_P (elements[i]) && !REG_P (elements[i]))
7270 elements[i] = copy_to_mode_reg (SImode, elements[i]);
7273 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
7274 elements[2], elements[3]));
7275 return;
7279 /* With single precision floating point on VSX, know that internally single
7280 precision is actually represented as a double, and either make 2 V2DF
7281 vectors, and convert these vectors to single precision, or do one
7282 conversion, and splat the result to the other elements. */
7283 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
7285 if (all_same)
7287 rtx element0 = XVECEXP (vals, 0, 0);
7289 if (TARGET_P9_VECTOR)
7291 if (MEM_P (element0))
7292 element0 = rs6000_address_for_fpconvert (element0);
7294 emit_insn (gen_vsx_splat_v4sf (target, element0));
7297 else
7299 rtx freg = gen_reg_rtx (V4SFmode);
7300 rtx sreg = force_reg (SFmode, element0);
7301 rtx cvt = (TARGET_XSCVDPSPN
7302 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
7303 : gen_vsx_xscvdpsp_scalar (freg, sreg));
7305 emit_insn (cvt);
7306 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
7307 const0_rtx));
7310 else
7312 rtx dbl_even = gen_reg_rtx (V2DFmode);
7313 rtx dbl_odd = gen_reg_rtx (V2DFmode);
7314 rtx flt_even = gen_reg_rtx (V4SFmode);
7315 rtx flt_odd = gen_reg_rtx (V4SFmode);
7316 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
7317 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
7318 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
7319 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
7321 /* Use VMRGEW if we can instead of doing a permute. */
7322 if (TARGET_P8_VECTOR)
7324 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op2));
7325 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op1, op3));
7326 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
7327 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
7328 if (BYTES_BIG_ENDIAN)
7329 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_even, flt_odd));
7330 else
7331 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_odd, flt_even));
7333 else
7335 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
7336 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
7337 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
7338 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
7339 rs6000_expand_extract_even (target, flt_even, flt_odd);
7342 return;
7345 /* Special case initializing vector short/char that are splats if we are on
7346 64-bit systems with direct move. */
7347 if (all_same && TARGET_DIRECT_MOVE_64BIT
7348 && (mode == V16QImode || mode == V8HImode))
7350 rtx op0 = XVECEXP (vals, 0, 0);
7351 rtx di_tmp = gen_reg_rtx (DImode);
7353 if (!REG_P (op0))
7354 op0 = force_reg (GET_MODE_INNER (mode), op0);
7356 if (mode == V16QImode)
7358 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
7359 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
7360 return;
7363 if (mode == V8HImode)
7365 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
7366 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
7367 return;
7371 /* Store value to stack temp. Load vector element. Splat. However, splat
7372 of 64-bit items is not supported on Altivec. */
7373 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
7375 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7376 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
7377 XVECEXP (vals, 0, 0));
7378 x = gen_rtx_UNSPEC (VOIDmode,
7379 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7380 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7381 gen_rtvec (2,
7382 gen_rtx_SET (target, mem),
7383 x)));
7384 x = gen_rtx_VEC_SELECT (inner_mode, target,
7385 gen_rtx_PARALLEL (VOIDmode,
7386 gen_rtvec (1, const0_rtx)));
7387 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
7388 return;
7391 /* One field is non-constant. Load constant then overwrite
7392 varying field. */
7393 if (n_var == 1)
7395 rtx copy = copy_rtx (vals);
7397 /* Load constant part of vector, substitute neighboring value for
7398 varying element. */
7399 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
7400 rs6000_expand_vector_init (target, copy);
7402 /* Insert variable. */
7403 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
7404 return;
7407 /* Construct the vector in memory one field at a time
7408 and load the whole vector. */
7409 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7410 for (i = 0; i < n_elts; i++)
7411 emit_move_insn (adjust_address_nv (mem, inner_mode,
7412 i * GET_MODE_SIZE (inner_mode)),
7413 XVECEXP (vals, 0, i));
7414 emit_move_insn (target, mem);
7417 /* Set field ELT of TARGET to VAL. */
7419 void
7420 rs6000_expand_vector_set (rtx target, rtx val, int elt)
7422 machine_mode mode = GET_MODE (target);
7423 machine_mode inner_mode = GET_MODE_INNER (mode);
7424 rtx reg = gen_reg_rtx (mode);
7425 rtx mask, mem, x;
7426 int width = GET_MODE_SIZE (inner_mode);
7427 int i;
7429 val = force_reg (GET_MODE (val), val);
7431 if (VECTOR_MEM_VSX_P (mode))
7433 rtx insn = NULL_RTX;
7434 rtx elt_rtx = GEN_INT (elt);
7436 if (mode == V2DFmode)
7437 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
7439 else if (mode == V2DImode)
7440 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
7442 else if (TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER
7443 && TARGET_UPPER_REGS_DI && TARGET_POWERPC64)
7445 if (mode == V4SImode)
7446 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
7447 else if (mode == V8HImode)
7448 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
7449 else if (mode == V16QImode)
7450 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
7453 if (insn)
7455 emit_insn (insn);
7456 return;
7460 /* Simplify setting single element vectors like V1TImode. */
7461 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
7463 emit_move_insn (target, gen_lowpart (mode, val));
7464 return;
7467 /* Load single variable value. */
7468 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7469 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
7470 x = gen_rtx_UNSPEC (VOIDmode,
7471 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7472 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7473 gen_rtvec (2,
7474 gen_rtx_SET (reg, mem),
7475 x)));
7477 /* Linear sequence. */
7478 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
7479 for (i = 0; i < 16; ++i)
7480 XVECEXP (mask, 0, i) = GEN_INT (i);
7482 /* Set permute mask to insert element into target. */
7483 for (i = 0; i < width; ++i)
7484 XVECEXP (mask, 0, elt*width + i)
7485 = GEN_INT (i + 0x10);
7486 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
7488 if (BYTES_BIG_ENDIAN)
7489 x = gen_rtx_UNSPEC (mode,
7490 gen_rtvec (3, target, reg,
7491 force_reg (V16QImode, x)),
7492 UNSPEC_VPERM);
7493 else
7495 if (TARGET_P9_VECTOR)
7496 x = gen_rtx_UNSPEC (mode,
7497 gen_rtvec (3, target, reg,
7498 force_reg (V16QImode, x)),
7499 UNSPEC_VPERMR);
7500 else
7502 /* Invert selector. We prefer to generate VNAND on P8 so
7503 that future fusion opportunities can kick in, but must
7504 generate VNOR elsewhere. */
7505 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
7506 rtx iorx = (TARGET_P8_VECTOR
7507 ? gen_rtx_IOR (V16QImode, notx, notx)
7508 : gen_rtx_AND (V16QImode, notx, notx));
7509 rtx tmp = gen_reg_rtx (V16QImode);
7510 emit_insn (gen_rtx_SET (tmp, iorx));
7512 /* Permute with operands reversed and adjusted selector. */
7513 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
7514 UNSPEC_VPERM);
7518 emit_insn (gen_rtx_SET (target, x));
7521 /* Extract field ELT from VEC into TARGET. */
7523 void
7524 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
7526 machine_mode mode = GET_MODE (vec);
7527 machine_mode inner_mode = GET_MODE_INNER (mode);
7528 rtx mem;
7530 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
7532 switch (mode)
7534 default:
7535 break;
7536 case V1TImode:
7537 gcc_assert (INTVAL (elt) == 0 && inner_mode == TImode);
7538 emit_move_insn (target, gen_lowpart (TImode, vec));
7539 break;
7540 case V2DFmode:
7541 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
7542 return;
7543 case V2DImode:
7544 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
7545 return;
7546 case V4SFmode:
7547 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
7548 return;
7549 case V16QImode:
7550 if (TARGET_DIRECT_MOVE_64BIT)
7552 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
7553 return;
7555 else
7556 break;
7557 case V8HImode:
7558 if (TARGET_DIRECT_MOVE_64BIT)
7560 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
7561 return;
7563 else
7564 break;
7565 case V4SImode:
7566 if (TARGET_DIRECT_MOVE_64BIT)
7568 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
7569 return;
7571 break;
7574 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
7575 && TARGET_DIRECT_MOVE_64BIT)
7577 if (GET_MODE (elt) != DImode)
7579 rtx tmp = gen_reg_rtx (DImode);
7580 convert_move (tmp, elt, 0);
7581 elt = tmp;
7583 else if (!REG_P (elt))
7584 elt = force_reg (DImode, elt);
7586 switch (mode)
7588 case V2DFmode:
7589 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
7590 return;
7592 case V2DImode:
7593 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
7594 return;
7596 case V4SFmode:
7597 if (TARGET_UPPER_REGS_SF)
7599 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
7600 return;
7602 break;
7604 case V4SImode:
7605 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
7606 return;
7608 case V8HImode:
7609 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
7610 return;
7612 case V16QImode:
7613 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
7614 return;
7616 default:
7617 gcc_unreachable ();
7621 gcc_assert (CONST_INT_P (elt));
7623 /* Allocate mode-sized buffer. */
7624 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7626 emit_move_insn (mem, vec);
7628 /* Add offset to field within buffer matching vector element. */
7629 mem = adjust_address_nv (mem, inner_mode,
7630 INTVAL (elt) * GET_MODE_SIZE (inner_mode));
7632 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
7635 /* Helper function to return the register number of a RTX. */
7636 static inline int
7637 regno_or_subregno (rtx op)
7639 if (REG_P (op))
7640 return REGNO (op);
7641 else if (SUBREG_P (op))
7642 return subreg_regno (op);
7643 else
7644 gcc_unreachable ();
7647 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7648 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7649 temporary (BASE_TMP) to fixup the address. Return the new memory address
7650 that is valid for reads or writes to a given register (SCALAR_REG). */
7653 rs6000_adjust_vec_address (rtx scalar_reg,
7654 rtx mem,
7655 rtx element,
7656 rtx base_tmp,
7657 machine_mode scalar_mode)
7659 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7660 rtx addr = XEXP (mem, 0);
7661 rtx element_offset;
7662 rtx new_addr;
7663 bool valid_addr_p;
7665 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7666 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
7668 /* Calculate what we need to add to the address to get the element
7669 address. */
7670 if (CONST_INT_P (element))
7671 element_offset = GEN_INT (INTVAL (element) * scalar_size);
7672 else
7674 int byte_shift = exact_log2 (scalar_size);
7675 gcc_assert (byte_shift >= 0);
7677 if (byte_shift == 0)
7678 element_offset = element;
7680 else
7682 if (TARGET_POWERPC64)
7683 emit_insn (gen_ashldi3 (base_tmp, element, GEN_INT (byte_shift)));
7684 else
7685 emit_insn (gen_ashlsi3 (base_tmp, element, GEN_INT (byte_shift)));
7687 element_offset = base_tmp;
7691 /* Create the new address pointing to the element within the vector. If we
7692 are adding 0, we don't have to change the address. */
7693 if (element_offset == const0_rtx)
7694 new_addr = addr;
7696 /* A simple indirect address can be converted into a reg + offset
7697 address. */
7698 else if (REG_P (addr) || SUBREG_P (addr))
7699 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
7701 /* Optimize D-FORM addresses with constant offset with a constant element, to
7702 include the element offset in the address directly. */
7703 else if (GET_CODE (addr) == PLUS)
7705 rtx op0 = XEXP (addr, 0);
7706 rtx op1 = XEXP (addr, 1);
7707 rtx insn;
7709 gcc_assert (REG_P (op0) || SUBREG_P (op0));
7710 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
7712 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
7713 rtx offset_rtx = GEN_INT (offset);
7715 if (IN_RANGE (offset, -32768, 32767)
7716 && (scalar_size < 8 || (offset & 0x3) == 0))
7717 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
7718 else
7720 emit_move_insn (base_tmp, offset_rtx);
7721 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7724 else
7726 bool op1_reg_p = (REG_P (op1) || SUBREG_P (op1));
7727 bool ele_reg_p = (REG_P (element_offset) || SUBREG_P (element_offset));
7729 /* Note, ADDI requires the register being added to be a base
7730 register. If the register was R0, load it up into the temporary
7731 and do the add. */
7732 if (op1_reg_p
7733 && (ele_reg_p || reg_or_subregno (op1) != FIRST_GPR_REGNO))
7735 insn = gen_add3_insn (base_tmp, op1, element_offset);
7736 gcc_assert (insn != NULL_RTX);
7737 emit_insn (insn);
7740 else if (ele_reg_p
7741 && reg_or_subregno (element_offset) != FIRST_GPR_REGNO)
7743 insn = gen_add3_insn (base_tmp, element_offset, op1);
7744 gcc_assert (insn != NULL_RTX);
7745 emit_insn (insn);
7748 else
7750 emit_move_insn (base_tmp, op1);
7751 emit_insn (gen_add2_insn (base_tmp, element_offset));
7754 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7758 else
7760 emit_move_insn (base_tmp, addr);
7761 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7764 /* If we have a PLUS, we need to see whether the particular register class
7765 allows for D-FORM or X-FORM addressing. */
7766 if (GET_CODE (new_addr) == PLUS)
7768 rtx op1 = XEXP (new_addr, 1);
7769 addr_mask_type addr_mask;
7770 int scalar_regno = regno_or_subregno (scalar_reg);
7772 gcc_assert (scalar_regno < FIRST_PSEUDO_REGISTER);
7773 if (INT_REGNO_P (scalar_regno))
7774 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_GPR];
7776 else if (FP_REGNO_P (scalar_regno))
7777 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_FPR];
7779 else if (ALTIVEC_REGNO_P (scalar_regno))
7780 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_VMX];
7782 else
7783 gcc_unreachable ();
7785 if (REG_P (op1) || SUBREG_P (op1))
7786 valid_addr_p = (addr_mask & RELOAD_REG_INDEXED) != 0;
7787 else
7788 valid_addr_p = (addr_mask & RELOAD_REG_OFFSET) != 0;
7791 else if (REG_P (new_addr) || SUBREG_P (new_addr))
7792 valid_addr_p = true;
7794 else
7795 valid_addr_p = false;
7797 if (!valid_addr_p)
7799 emit_move_insn (base_tmp, new_addr);
7800 new_addr = base_tmp;
7803 return change_address (mem, scalar_mode, new_addr);
7806 /* Split a variable vec_extract operation into the component instructions. */
7808 void
7809 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
7810 rtx tmp_altivec)
7812 machine_mode mode = GET_MODE (src);
7813 machine_mode scalar_mode = GET_MODE (dest);
7814 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7815 int byte_shift = exact_log2 (scalar_size);
7817 gcc_assert (byte_shift >= 0);
7819 /* If we are given a memory address, optimize to load just the element. We
7820 don't have to adjust the vector element number on little endian
7821 systems. */
7822 if (MEM_P (src))
7824 gcc_assert (REG_P (tmp_gpr));
7825 emit_move_insn (dest, rs6000_adjust_vec_address (dest, src, element,
7826 tmp_gpr, scalar_mode));
7827 return;
7830 else if (REG_P (src) || SUBREG_P (src))
7832 int bit_shift = byte_shift + 3;
7833 rtx element2;
7834 int dest_regno = regno_or_subregno (dest);
7835 int src_regno = regno_or_subregno (src);
7836 int element_regno = regno_or_subregno (element);
7838 gcc_assert (REG_P (tmp_gpr));
7840 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7841 a general purpose register. */
7842 if (TARGET_P9_VECTOR
7843 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
7844 && INT_REGNO_P (dest_regno)
7845 && ALTIVEC_REGNO_P (src_regno)
7846 && INT_REGNO_P (element_regno))
7848 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
7849 rtx element_si = gen_rtx_REG (SImode, element_regno);
7851 if (mode == V16QImode)
7852 emit_insn (VECTOR_ELT_ORDER_BIG
7853 ? gen_vextublx (dest_si, element_si, src)
7854 : gen_vextubrx (dest_si, element_si, src));
7856 else if (mode == V8HImode)
7858 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7859 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
7860 emit_insn (VECTOR_ELT_ORDER_BIG
7861 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
7862 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
7866 else
7868 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7869 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
7870 emit_insn (VECTOR_ELT_ORDER_BIG
7871 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
7872 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
7875 return;
7879 gcc_assert (REG_P (tmp_altivec));
7881 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7882 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7883 will shift the element into the upper position (adding 3 to convert a
7884 byte shift into a bit shift). */
7885 if (scalar_size == 8)
7887 if (!VECTOR_ELT_ORDER_BIG)
7889 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7890 element2 = tmp_gpr;
7892 else
7893 element2 = element;
7895 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7896 bit. */
7897 emit_insn (gen_rtx_SET (tmp_gpr,
7898 gen_rtx_AND (DImode,
7899 gen_rtx_ASHIFT (DImode,
7900 element2,
7901 GEN_INT (6)),
7902 GEN_INT (64))));
7904 else
7906 if (!VECTOR_ELT_ORDER_BIG)
7908 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
7910 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
7911 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
7912 element2 = tmp_gpr;
7914 else
7915 element2 = element;
7917 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7920 /* Get the value into the lower byte of the Altivec register where VSLO
7921 expects it. */
7922 if (TARGET_P9_VECTOR)
7923 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7924 else if (can_create_pseudo_p ())
7925 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7926 else
7928 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7929 emit_move_insn (tmp_di, tmp_gpr);
7930 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
7933 /* Do the VSLO to get the value into the final location. */
7934 switch (mode)
7936 case V2DFmode:
7937 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
7938 return;
7940 case V2DImode:
7941 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
7942 return;
7944 case V4SFmode:
7946 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7947 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
7948 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7949 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7950 tmp_altivec));
7952 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
7953 return;
7956 case V4SImode:
7957 case V8HImode:
7958 case V16QImode:
7960 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7961 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7962 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7963 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7964 tmp_altivec));
7965 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7966 emit_insn (gen_ashrdi3 (tmp_gpr_di, tmp_gpr_di,
7967 GEN_INT (64 - (8 * scalar_size))));
7968 return;
7971 default:
7972 gcc_unreachable ();
7975 return;
7977 else
7978 gcc_unreachable ();
7981 /* Helper function for rs6000_split_v4si_init to build up a DImode value from
7982 two SImode values. */
7984 static void
7985 rs6000_split_v4si_init_di_reg (rtx dest, rtx si1, rtx si2, rtx tmp)
7987 const unsigned HOST_WIDE_INT mask_32bit = HOST_WIDE_INT_C (0xffffffff);
7989 if (CONST_INT_P (si1) && CONST_INT_P (si2))
7991 unsigned HOST_WIDE_INT const1 = (UINTVAL (si1) & mask_32bit) << 32;
7992 unsigned HOST_WIDE_INT const2 = UINTVAL (si2) & mask_32bit;
7994 emit_move_insn (dest, GEN_INT (const1 | const2));
7995 return;
7998 /* Put si1 into upper 32-bits of dest. */
7999 if (CONST_INT_P (si1))
8000 emit_move_insn (dest, GEN_INT ((UINTVAL (si1) & mask_32bit) << 32));
8001 else
8003 /* Generate RLDIC. */
8004 rtx si1_di = gen_rtx_REG (DImode, regno_or_subregno (si1));
8005 rtx shift_rtx = gen_rtx_ASHIFT (DImode, si1_di, GEN_INT (32));
8006 rtx mask_rtx = GEN_INT (mask_32bit << 32);
8007 rtx and_rtx = gen_rtx_AND (DImode, shift_rtx, mask_rtx);
8008 gcc_assert (!reg_overlap_mentioned_p (dest, si1));
8009 emit_insn (gen_rtx_SET (dest, and_rtx));
8012 /* Put si2 into the temporary. */
8013 gcc_assert (!reg_overlap_mentioned_p (dest, tmp));
8014 if (CONST_INT_P (si2))
8015 emit_move_insn (tmp, GEN_INT (UINTVAL (si2) & mask_32bit));
8016 else
8017 emit_insn (gen_zero_extendsidi2 (tmp, si2));
8019 /* Combine the two parts. */
8020 emit_insn (gen_iordi3 (dest, dest, tmp));
8021 return;
8024 /* Split a V4SI initialization. */
8026 void
8027 rs6000_split_v4si_init (rtx operands[])
8029 rtx dest = operands[0];
8031 /* Destination is a GPR, build up the two DImode parts in place. */
8032 if (REG_P (dest) || SUBREG_P (dest))
8034 int d_regno = regno_or_subregno (dest);
8035 rtx scalar1 = operands[1];
8036 rtx scalar2 = operands[2];
8037 rtx scalar3 = operands[3];
8038 rtx scalar4 = operands[4];
8039 rtx tmp1 = operands[5];
8040 rtx tmp2 = operands[6];
8042 /* Even though we only need one temporary (plus the destination, which
8043 has an early clobber constraint, try to use two temporaries, one for
8044 each double word created. That way the 2nd insn scheduling pass can
8045 rearrange things so the two parts are done in parallel. */
8046 if (BYTES_BIG_ENDIAN)
8048 rtx di_lo = gen_rtx_REG (DImode, d_regno);
8049 rtx di_hi = gen_rtx_REG (DImode, d_regno + 1);
8050 rs6000_split_v4si_init_di_reg (di_lo, scalar1, scalar2, tmp1);
8051 rs6000_split_v4si_init_di_reg (di_hi, scalar3, scalar4, tmp2);
8053 else
8055 rtx di_lo = gen_rtx_REG (DImode, d_regno + 1);
8056 rtx di_hi = gen_rtx_REG (DImode, d_regno);
8057 gcc_assert (!VECTOR_ELT_ORDER_BIG);
8058 rs6000_split_v4si_init_di_reg (di_lo, scalar4, scalar3, tmp1);
8059 rs6000_split_v4si_init_di_reg (di_hi, scalar2, scalar1, tmp2);
8061 return;
8064 else
8065 gcc_unreachable ();
8068 /* Return TRUE if OP is an invalid SUBREG operation on the e500. */
8070 bool
8071 invalid_e500_subreg (rtx op, machine_mode mode)
8073 if (TARGET_E500_DOUBLE)
8075 /* Reject (subreg:SI (reg:DF)); likewise with subreg:DI or
8076 subreg:TI and reg:TF. Decimal float modes are like integer
8077 modes (only low part of each register used) for this
8078 purpose. */
8079 if (GET_CODE (op) == SUBREG
8080 && (mode == SImode || mode == DImode || mode == TImode
8081 || mode == DDmode || mode == TDmode || mode == PTImode)
8082 && REG_P (SUBREG_REG (op))
8083 && (GET_MODE (SUBREG_REG (op)) == DFmode
8084 || GET_MODE (SUBREG_REG (op)) == TFmode
8085 || GET_MODE (SUBREG_REG (op)) == IFmode
8086 || GET_MODE (SUBREG_REG (op)) == KFmode))
8087 return true;
8089 /* Reject (subreg:DF (reg:DI)); likewise with subreg:TF and
8090 reg:TI. */
8091 if (GET_CODE (op) == SUBREG
8092 && (mode == DFmode || mode == TFmode || mode == IFmode
8093 || mode == KFmode)
8094 && REG_P (SUBREG_REG (op))
8095 && (GET_MODE (SUBREG_REG (op)) == DImode
8096 || GET_MODE (SUBREG_REG (op)) == TImode
8097 || GET_MODE (SUBREG_REG (op)) == PTImode
8098 || GET_MODE (SUBREG_REG (op)) == DDmode
8099 || GET_MODE (SUBREG_REG (op)) == TDmode))
8100 return true;
8103 if (TARGET_SPE
8104 && GET_CODE (op) == SUBREG
8105 && mode == SImode
8106 && REG_P (SUBREG_REG (op))
8107 && SPE_VECTOR_MODE (GET_MODE (SUBREG_REG (op))))
8108 return true;
8110 return false;
8113 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
8114 selects whether the alignment is abi mandated, optional, or
8115 both abi and optional alignment. */
8117 unsigned int
8118 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
8120 if (how != align_opt)
8122 if (TREE_CODE (type) == VECTOR_TYPE)
8124 if ((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (type)))
8125 || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type))))
8127 if (align < 64)
8128 align = 64;
8130 else if (align < 128)
8131 align = 128;
8133 else if (TARGET_E500_DOUBLE
8134 && TREE_CODE (type) == REAL_TYPE
8135 && TYPE_MODE (type) == DFmode)
8137 if (align < 64)
8138 align = 64;
8142 if (how != align_abi)
8144 if (TREE_CODE (type) == ARRAY_TYPE
8145 && TYPE_MODE (TREE_TYPE (type)) == QImode)
8147 if (align < BITS_PER_WORD)
8148 align = BITS_PER_WORD;
8152 return align;
8155 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
8157 bool
8158 rs6000_special_adjust_field_align_p (tree type, unsigned int computed)
8160 if (TARGET_ALTIVEC && TREE_CODE (type) == VECTOR_TYPE)
8162 if (computed != 128)
8164 static bool warned;
8165 if (!warned && warn_psabi)
8167 warned = true;
8168 inform (input_location,
8169 "the layout of aggregates containing vectors with"
8170 " %d-byte alignment has changed in GCC 5",
8171 computed / BITS_PER_UNIT);
8174 /* In current GCC there is no special case. */
8175 return false;
8178 return false;
8181 /* AIX increases natural record alignment to doubleword if the first
8182 field is an FP double while the FP fields remain word aligned. */
8184 unsigned int
8185 rs6000_special_round_type_align (tree type, unsigned int computed,
8186 unsigned int specified)
8188 unsigned int align = MAX (computed, specified);
8189 tree field = TYPE_FIELDS (type);
8191 /* Skip all non field decls */
8192 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
8193 field = DECL_CHAIN (field);
8195 if (field != NULL && field != type)
8197 type = TREE_TYPE (field);
8198 while (TREE_CODE (type) == ARRAY_TYPE)
8199 type = TREE_TYPE (type);
8201 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
8202 align = MAX (align, 64);
8205 return align;
8208 /* Darwin increases record alignment to the natural alignment of
8209 the first field. */
8211 unsigned int
8212 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
8213 unsigned int specified)
8215 unsigned int align = MAX (computed, specified);
8217 if (TYPE_PACKED (type))
8218 return align;
8220 /* Find the first field, looking down into aggregates. */
8221 do {
8222 tree field = TYPE_FIELDS (type);
8223 /* Skip all non field decls */
8224 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
8225 field = DECL_CHAIN (field);
8226 if (! field)
8227 break;
8228 /* A packed field does not contribute any extra alignment. */
8229 if (DECL_PACKED (field))
8230 return align;
8231 type = TREE_TYPE (field);
8232 while (TREE_CODE (type) == ARRAY_TYPE)
8233 type = TREE_TYPE (type);
8234 } while (AGGREGATE_TYPE_P (type));
8236 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
8237 align = MAX (align, TYPE_ALIGN (type));
8239 return align;
8242 /* Return 1 for an operand in small memory on V.4/eabi. */
8245 small_data_operand (rtx op ATTRIBUTE_UNUSED,
8246 machine_mode mode ATTRIBUTE_UNUSED)
8248 #if TARGET_ELF
8249 rtx sym_ref;
8251 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
8252 return 0;
8254 if (DEFAULT_ABI != ABI_V4)
8255 return 0;
8257 /* Vector and float memory instructions have a limited offset on the
8258 SPE, so using a vector or float variable directly as an operand is
8259 not useful. */
8260 if (TARGET_SPE
8261 && (SPE_VECTOR_MODE (mode) || FLOAT_MODE_P (mode)))
8262 return 0;
8264 if (GET_CODE (op) == SYMBOL_REF)
8265 sym_ref = op;
8267 else if (GET_CODE (op) != CONST
8268 || GET_CODE (XEXP (op, 0)) != PLUS
8269 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF
8270 || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT)
8271 return 0;
8273 else
8275 rtx sum = XEXP (op, 0);
8276 HOST_WIDE_INT summand;
8278 /* We have to be careful here, because it is the referenced address
8279 that must be 32k from _SDA_BASE_, not just the symbol. */
8280 summand = INTVAL (XEXP (sum, 1));
8281 if (summand < 0 || summand > g_switch_value)
8282 return 0;
8284 sym_ref = XEXP (sum, 0);
8287 return SYMBOL_REF_SMALL_P (sym_ref);
8288 #else
8289 return 0;
8290 #endif
8293 /* Return true if either operand is a general purpose register. */
8295 bool
8296 gpr_or_gpr_p (rtx op0, rtx op1)
8298 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
8299 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
8302 /* Return true if this is a move direct operation between GPR registers and
8303 floating point/VSX registers. */
8305 bool
8306 direct_move_p (rtx op0, rtx op1)
8308 int regno0, regno1;
8310 if (!REG_P (op0) || !REG_P (op1))
8311 return false;
8313 if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
8314 return false;
8316 regno0 = REGNO (op0);
8317 regno1 = REGNO (op1);
8318 if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
8319 return false;
8321 if (INT_REGNO_P (regno0))
8322 return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
8324 else if (INT_REGNO_P (regno1))
8326 if (TARGET_MFPGPR && FP_REGNO_P (regno0))
8327 return true;
8329 else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
8330 return true;
8333 return false;
8336 /* Return true if the OFFSET is valid for the quad address instructions that
8337 use d-form (register + offset) addressing. */
8339 static inline bool
8340 quad_address_offset_p (HOST_WIDE_INT offset)
8342 return (IN_RANGE (offset, -32768, 32767) && ((offset) & 0xf) == 0);
8345 /* Return true if the ADDR is an acceptable address for a quad memory
8346 operation of mode MODE (either LQ/STQ for general purpose registers, or
8347 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
8348 is intended for LQ/STQ. If it is false, the address is intended for the ISA
8349 3.0 LXV/STXV instruction. */
8351 bool
8352 quad_address_p (rtx addr, machine_mode mode, bool strict)
8354 rtx op0, op1;
8356 if (GET_MODE_SIZE (mode) != 16)
8357 return false;
8359 if (legitimate_indirect_address_p (addr, strict))
8360 return true;
8362 if (VECTOR_MODE_P (mode) && !mode_supports_vsx_dform_quad (mode))
8363 return false;
8365 if (GET_CODE (addr) != PLUS)
8366 return false;
8368 op0 = XEXP (addr, 0);
8369 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
8370 return false;
8372 op1 = XEXP (addr, 1);
8373 if (!CONST_INT_P (op1))
8374 return false;
8376 return quad_address_offset_p (INTVAL (op1));
8379 /* Return true if this is a load or store quad operation. This function does
8380 not handle the atomic quad memory instructions. */
8382 bool
8383 quad_load_store_p (rtx op0, rtx op1)
8385 bool ret;
8387 if (!TARGET_QUAD_MEMORY)
8388 ret = false;
8390 else if (REG_P (op0) && MEM_P (op1))
8391 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
8392 && quad_memory_operand (op1, GET_MODE (op1))
8393 && !reg_overlap_mentioned_p (op0, op1));
8395 else if (MEM_P (op0) && REG_P (op1))
8396 ret = (quad_memory_operand (op0, GET_MODE (op0))
8397 && quad_int_reg_operand (op1, GET_MODE (op1)));
8399 else
8400 ret = false;
8402 if (TARGET_DEBUG_ADDR)
8404 fprintf (stderr, "\n========== quad_load_store, return %s\n",
8405 ret ? "true" : "false");
8406 debug_rtx (gen_rtx_SET (op0, op1));
8409 return ret;
8412 /* Given an address, return a constant offset term if one exists. */
8414 static rtx
8415 address_offset (rtx op)
8417 if (GET_CODE (op) == PRE_INC
8418 || GET_CODE (op) == PRE_DEC)
8419 op = XEXP (op, 0);
8420 else if (GET_CODE (op) == PRE_MODIFY
8421 || GET_CODE (op) == LO_SUM)
8422 op = XEXP (op, 1);
8424 if (GET_CODE (op) == CONST)
8425 op = XEXP (op, 0);
8427 if (GET_CODE (op) == PLUS)
8428 op = XEXP (op, 1);
8430 if (CONST_INT_P (op))
8431 return op;
8433 return NULL_RTX;
8436 /* Return true if the MEM operand is a memory operand suitable for use
8437 with a (full width, possibly multiple) gpr load/store. On
8438 powerpc64 this means the offset must be divisible by 4.
8439 Implements 'Y' constraint.
8441 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8442 a constraint function we know the operand has satisfied a suitable
8443 memory predicate. Also accept some odd rtl generated by reload
8444 (see rs6000_legitimize_reload_address for various forms). It is
8445 important that reload rtl be accepted by appropriate constraints
8446 but not by the operand predicate.
8448 Offsetting a lo_sum should not be allowed, except where we know by
8449 alignment that a 32k boundary is not crossed, but see the ???
8450 comment in rs6000_legitimize_reload_address. Note that by
8451 "offsetting" here we mean a further offset to access parts of the
8452 MEM. It's fine to have a lo_sum where the inner address is offset
8453 from a sym, since the same sym+offset will appear in the high part
8454 of the address calculation. */
8456 bool
8457 mem_operand_gpr (rtx op, machine_mode mode)
8459 unsigned HOST_WIDE_INT offset;
8460 int extra;
8461 rtx addr = XEXP (op, 0);
8463 op = address_offset (addr);
8464 if (op == NULL_RTX)
8465 return true;
8467 offset = INTVAL (op);
8468 if (TARGET_POWERPC64 && (offset & 3) != 0)
8469 return false;
8471 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8472 if (extra < 0)
8473 extra = 0;
8475 if (GET_CODE (addr) == LO_SUM)
8476 /* For lo_sum addresses, we must allow any offset except one that
8477 causes a wrap, so test only the low 16 bits. */
8478 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8480 return offset + 0x8000 < 0x10000u - extra;
8483 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8484 enforce an offset divisible by 4 even for 32-bit. */
8486 bool
8487 mem_operand_ds_form (rtx op, machine_mode mode)
8489 unsigned HOST_WIDE_INT offset;
8490 int extra;
8491 rtx addr = XEXP (op, 0);
8493 if (!offsettable_address_p (false, mode, addr))
8494 return false;
8496 op = address_offset (addr);
8497 if (op == NULL_RTX)
8498 return true;
8500 offset = INTVAL (op);
8501 if ((offset & 3) != 0)
8502 return false;
8504 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8505 if (extra < 0)
8506 extra = 0;
8508 if (GET_CODE (addr) == LO_SUM)
8509 /* For lo_sum addresses, we must allow any offset except one that
8510 causes a wrap, so test only the low 16 bits. */
8511 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8513 return offset + 0x8000 < 0x10000u - extra;
8516 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8518 static bool
8519 reg_offset_addressing_ok_p (machine_mode mode)
8521 switch (mode)
8523 case V16QImode:
8524 case V8HImode:
8525 case V4SFmode:
8526 case V4SImode:
8527 case V2DFmode:
8528 case V2DImode:
8529 case V1TImode:
8530 case TImode:
8531 case TFmode:
8532 case KFmode:
8533 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8534 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8535 a vector mode, if we want to use the VSX registers to move it around,
8536 we need to restrict ourselves to reg+reg addressing. Similarly for
8537 IEEE 128-bit floating point that is passed in a single vector
8538 register. */
8539 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
8540 return mode_supports_vsx_dform_quad (mode);
8541 break;
8543 case V4HImode:
8544 case V2SImode:
8545 case V1DImode:
8546 case V2SFmode:
8547 /* Paired vector modes. Only reg+reg addressing is valid. */
8548 if (TARGET_PAIRED_FLOAT)
8549 return false;
8550 break;
8552 case SDmode:
8553 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8554 addressing for the LFIWZX and STFIWX instructions. */
8555 if (TARGET_NO_SDMODE_STACK)
8556 return false;
8557 break;
8559 default:
8560 break;
8563 return true;
8566 static bool
8567 virtual_stack_registers_memory_p (rtx op)
8569 int regnum;
8571 if (GET_CODE (op) == REG)
8572 regnum = REGNO (op);
8574 else if (GET_CODE (op) == PLUS
8575 && GET_CODE (XEXP (op, 0)) == REG
8576 && GET_CODE (XEXP (op, 1)) == CONST_INT)
8577 regnum = REGNO (XEXP (op, 0));
8579 else
8580 return false;
8582 return (regnum >= FIRST_VIRTUAL_REGISTER
8583 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
8586 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8587 is known to not straddle a 32k boundary. This function is used
8588 to determine whether -mcmodel=medium code can use TOC pointer
8589 relative addressing for OP. This means the alignment of the TOC
8590 pointer must also be taken into account, and unfortunately that is
8591 only 8 bytes. */
8593 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8594 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8595 #endif
8597 static bool
8598 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
8599 machine_mode mode)
8601 tree decl;
8602 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
8604 if (GET_CODE (op) != SYMBOL_REF)
8605 return false;
8607 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8608 SYMBOL_REF. */
8609 if (mode_supports_vsx_dform_quad (mode))
8610 return false;
8612 dsize = GET_MODE_SIZE (mode);
8613 decl = SYMBOL_REF_DECL (op);
8614 if (!decl)
8616 if (dsize == 0)
8617 return false;
8619 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8620 replacing memory addresses with an anchor plus offset. We
8621 could find the decl by rummaging around in the block->objects
8622 VEC for the given offset but that seems like too much work. */
8623 dalign = BITS_PER_UNIT;
8624 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
8625 && SYMBOL_REF_ANCHOR_P (op)
8626 && SYMBOL_REF_BLOCK (op) != NULL)
8628 struct object_block *block = SYMBOL_REF_BLOCK (op);
8630 dalign = block->alignment;
8631 offset += SYMBOL_REF_BLOCK_OFFSET (op);
8633 else if (CONSTANT_POOL_ADDRESS_P (op))
8635 /* It would be nice to have get_pool_align().. */
8636 machine_mode cmode = get_pool_mode (op);
8638 dalign = GET_MODE_ALIGNMENT (cmode);
8641 else if (DECL_P (decl))
8643 dalign = DECL_ALIGN (decl);
8645 if (dsize == 0)
8647 /* Allow BLKmode when the entire object is known to not
8648 cross a 32k boundary. */
8649 if (!DECL_SIZE_UNIT (decl))
8650 return false;
8652 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
8653 return false;
8655 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
8656 if (dsize > 32768)
8657 return false;
8659 dalign /= BITS_PER_UNIT;
8660 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8661 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8662 return dalign >= dsize;
8665 else
8666 gcc_unreachable ();
8668 /* Find how many bits of the alignment we know for this access. */
8669 dalign /= BITS_PER_UNIT;
8670 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8671 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8672 mask = dalign - 1;
8673 lsb = offset & -offset;
8674 mask &= lsb - 1;
8675 dalign = mask + 1;
8677 return dalign >= dsize;
8680 static bool
8681 constant_pool_expr_p (rtx op)
8683 rtx base, offset;
8685 split_const (op, &base, &offset);
8686 return (GET_CODE (base) == SYMBOL_REF
8687 && CONSTANT_POOL_ADDRESS_P (base)
8688 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
8691 static const_rtx tocrel_base, tocrel_offset;
8693 /* Return true if OP is a toc pointer relative address (the output
8694 of create_TOC_reference). If STRICT, do not match non-split
8695 -mcmodel=large/medium toc pointer relative addresses. */
8697 bool
8698 toc_relative_expr_p (const_rtx op, bool strict)
8700 if (!TARGET_TOC)
8701 return false;
8703 if (TARGET_CMODEL != CMODEL_SMALL)
8705 /* When strict ensure we have everything tidy. */
8706 if (strict
8707 && !(GET_CODE (op) == LO_SUM
8708 && REG_P (XEXP (op, 0))
8709 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
8710 return false;
8712 /* When not strict, allow non-split TOC addresses and also allow
8713 (lo_sum (high ..)) TOC addresses created during reload. */
8714 if (GET_CODE (op) == LO_SUM)
8715 op = XEXP (op, 1);
8718 tocrel_base = op;
8719 tocrel_offset = const0_rtx;
8720 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
8722 tocrel_base = XEXP (op, 0);
8723 tocrel_offset = XEXP (op, 1);
8726 return (GET_CODE (tocrel_base) == UNSPEC
8727 && XINT (tocrel_base, 1) == UNSPEC_TOCREL);
8730 /* Return true if X is a constant pool address, and also for cmodel=medium
8731 if X is a toc-relative address known to be offsettable within MODE. */
8733 bool
8734 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
8735 bool strict)
8737 return (toc_relative_expr_p (x, strict)
8738 && (TARGET_CMODEL != CMODEL_MEDIUM
8739 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
8740 || mode == QImode
8741 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
8742 INTVAL (tocrel_offset), mode)));
8745 static bool
8746 legitimate_small_data_p (machine_mode mode, rtx x)
8748 return (DEFAULT_ABI == ABI_V4
8749 && !flag_pic && !TARGET_TOC
8750 && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
8751 && small_data_operand (x, mode));
8754 /* SPE offset addressing is limited to 5-bits worth of double words. */
8755 #define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0)
8757 bool
8758 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
8759 bool strict, bool worst_case)
8761 unsigned HOST_WIDE_INT offset;
8762 unsigned int extra;
8764 if (GET_CODE (x) != PLUS)
8765 return false;
8766 if (!REG_P (XEXP (x, 0)))
8767 return false;
8768 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8769 return false;
8770 if (mode_supports_vsx_dform_quad (mode))
8771 return quad_address_p (x, mode, strict);
8772 if (!reg_offset_addressing_ok_p (mode))
8773 return virtual_stack_registers_memory_p (x);
8774 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
8775 return true;
8776 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
8777 return false;
8779 offset = INTVAL (XEXP (x, 1));
8780 extra = 0;
8781 switch (mode)
8783 case V4HImode:
8784 case V2SImode:
8785 case V1DImode:
8786 case V2SFmode:
8787 /* SPE vector modes. */
8788 return SPE_CONST_OFFSET_OK (offset);
8790 case DFmode:
8791 case DDmode:
8792 case DImode:
8793 /* On e500v2, we may have:
8795 (subreg:DF (mem:DI (plus (reg) (const_int))) 0).
8797 Which gets addressed with evldd instructions. */
8798 if (TARGET_E500_DOUBLE)
8799 return SPE_CONST_OFFSET_OK (offset);
8801 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8802 addressing. */
8803 if (VECTOR_MEM_VSX_P (mode))
8804 return false;
8806 if (!worst_case)
8807 break;
8808 if (!TARGET_POWERPC64)
8809 extra = 4;
8810 else if (offset & 3)
8811 return false;
8812 break;
8814 case TFmode:
8815 case IFmode:
8816 case KFmode:
8817 case TDmode:
8818 case TImode:
8819 case PTImode:
8820 if (TARGET_E500_DOUBLE)
8821 return (SPE_CONST_OFFSET_OK (offset)
8822 && SPE_CONST_OFFSET_OK (offset + 8));
8824 extra = 8;
8825 if (!worst_case)
8826 break;
8827 if (!TARGET_POWERPC64)
8828 extra = 12;
8829 else if (offset & 3)
8830 return false;
8831 break;
8833 default:
8834 break;
8837 offset += 0x8000;
8838 return offset < 0x10000 - extra;
8841 bool
8842 legitimate_indexed_address_p (rtx x, int strict)
8844 rtx op0, op1;
8846 if (GET_CODE (x) != PLUS)
8847 return false;
8849 op0 = XEXP (x, 0);
8850 op1 = XEXP (x, 1);
8852 /* Recognize the rtl generated by reload which we know will later be
8853 replaced with proper base and index regs. */
8854 if (!strict
8855 && reload_in_progress
8856 && (REG_P (op0) || GET_CODE (op0) == PLUS)
8857 && REG_P (op1))
8858 return true;
8860 return (REG_P (op0) && REG_P (op1)
8861 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
8862 && INT_REG_OK_FOR_INDEX_P (op1, strict))
8863 || (INT_REG_OK_FOR_BASE_P (op1, strict)
8864 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
8867 bool
8868 avoiding_indexed_address_p (machine_mode mode)
8870 /* Avoid indexed addressing for modes that have non-indexed
8871 load/store instruction forms. */
8872 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
8875 bool
8876 legitimate_indirect_address_p (rtx x, int strict)
8878 return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict);
8881 bool
8882 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
8884 if (!TARGET_MACHO || !flag_pic
8885 || mode != SImode || GET_CODE (x) != MEM)
8886 return false;
8887 x = XEXP (x, 0);
8889 if (GET_CODE (x) != LO_SUM)
8890 return false;
8891 if (GET_CODE (XEXP (x, 0)) != REG)
8892 return false;
8893 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
8894 return false;
8895 x = XEXP (x, 1);
8897 return CONSTANT_P (x);
8900 static bool
8901 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
8903 if (GET_CODE (x) != LO_SUM)
8904 return false;
8905 if (GET_CODE (XEXP (x, 0)) != REG)
8906 return false;
8907 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8908 return false;
8909 /* quad word addresses are restricted, and we can't use LO_SUM. */
8910 if (mode_supports_vsx_dform_quad (mode))
8911 return false;
8912 /* Restrict addressing for DI because of our SUBREG hackery. */
8913 if (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
8914 return false;
8915 x = XEXP (x, 1);
8917 if (TARGET_ELF || TARGET_MACHO)
8919 bool large_toc_ok;
8921 if (DEFAULT_ABI == ABI_V4 && flag_pic)
8922 return false;
8923 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
8924 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
8925 recognizes some LO_SUM addresses as valid although this
8926 function says opposite. In most cases, LRA through different
8927 transformations can generate correct code for address reloads.
8928 It can not manage only some LO_SUM cases. So we need to add
8929 code analogous to one in rs6000_legitimize_reload_address for
8930 LOW_SUM here saying that some addresses are still valid. */
8931 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
8932 && small_toc_ref (x, VOIDmode));
8933 if (TARGET_TOC && ! large_toc_ok)
8934 return false;
8935 if (GET_MODE_NUNITS (mode) != 1)
8936 return false;
8937 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
8938 && !(/* ??? Assume floating point reg based on mode? */
8939 TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
8940 && (mode == DFmode || mode == DDmode)))
8941 return false;
8943 return CONSTANT_P (x) || large_toc_ok;
8946 return false;
8950 /* Try machine-dependent ways of modifying an illegitimate address
8951 to be legitimate. If we find one, return the new, valid address.
8952 This is used from only one place: `memory_address' in explow.c.
8954 OLDX is the address as it was before break_out_memory_refs was
8955 called. In some cases it is useful to look at this to decide what
8956 needs to be done.
8958 It is always safe for this function to do nothing. It exists to
8959 recognize opportunities to optimize the output.
8961 On RS/6000, first check for the sum of a register with a constant
8962 integer that is out of range. If so, generate code to add the
8963 constant with the low-order 16 bits masked to the register and force
8964 this result into another register (this can be done with `cau').
8965 Then generate an address of REG+(CONST&0xffff), allowing for the
8966 possibility of bit 16 being a one.
8968 Then check for the sum of a register and something not constant, try to
8969 load the other things into a register and return the sum. */
8971 static rtx
8972 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
8973 machine_mode mode)
8975 unsigned int extra;
8977 if (!reg_offset_addressing_ok_p (mode)
8978 || mode_supports_vsx_dform_quad (mode))
8980 if (virtual_stack_registers_memory_p (x))
8981 return x;
8983 /* In theory we should not be seeing addresses of the form reg+0,
8984 but just in case it is generated, optimize it away. */
8985 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
8986 return force_reg (Pmode, XEXP (x, 0));
8988 /* For TImode with load/store quad, restrict addresses to just a single
8989 pointer, so it works with both GPRs and VSX registers. */
8990 /* Make sure both operands are registers. */
8991 else if (GET_CODE (x) == PLUS
8992 && (mode != TImode || !TARGET_VSX_TIMODE))
8993 return gen_rtx_PLUS (Pmode,
8994 force_reg (Pmode, XEXP (x, 0)),
8995 force_reg (Pmode, XEXP (x, 1)));
8996 else
8997 return force_reg (Pmode, x);
8999 if (GET_CODE (x) == SYMBOL_REF)
9001 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
9002 if (model != 0)
9003 return rs6000_legitimize_tls_address (x, model);
9006 extra = 0;
9007 switch (mode)
9009 case TFmode:
9010 case TDmode:
9011 case TImode:
9012 case PTImode:
9013 case IFmode:
9014 case KFmode:
9015 /* As in legitimate_offset_address_p we do not assume
9016 worst-case. The mode here is just a hint as to the registers
9017 used. A TImode is usually in gprs, but may actually be in
9018 fprs. Leave worst-case scenario for reload to handle via
9019 insn constraints. PTImode is only GPRs. */
9020 extra = 8;
9021 break;
9022 default:
9023 break;
9026 if (GET_CODE (x) == PLUS
9027 && GET_CODE (XEXP (x, 0)) == REG
9028 && GET_CODE (XEXP (x, 1)) == CONST_INT
9029 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
9030 >= 0x10000 - extra)
9031 && !(SPE_VECTOR_MODE (mode)
9032 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)))
9034 HOST_WIDE_INT high_int, low_int;
9035 rtx sum;
9036 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
9037 if (low_int >= 0x8000 - extra)
9038 low_int = 0;
9039 high_int = INTVAL (XEXP (x, 1)) - low_int;
9040 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
9041 GEN_INT (high_int)), 0);
9042 return plus_constant (Pmode, sum, low_int);
9044 else if (GET_CODE (x) == PLUS
9045 && GET_CODE (XEXP (x, 0)) == REG
9046 && GET_CODE (XEXP (x, 1)) != CONST_INT
9047 && GET_MODE_NUNITS (mode) == 1
9048 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
9049 || (/* ??? Assume floating point reg based on mode? */
9050 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
9051 && (mode == DFmode || mode == DDmode)))
9052 && !avoiding_indexed_address_p (mode))
9054 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
9055 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
9057 else if (SPE_VECTOR_MODE (mode)
9058 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD))
9060 if (mode == DImode)
9061 return x;
9062 /* We accept [reg + reg] and [reg + OFFSET]. */
9064 if (GET_CODE (x) == PLUS)
9066 rtx op1 = XEXP (x, 0);
9067 rtx op2 = XEXP (x, 1);
9068 rtx y;
9070 op1 = force_reg (Pmode, op1);
9072 if (GET_CODE (op2) != REG
9073 && (GET_CODE (op2) != CONST_INT
9074 || !SPE_CONST_OFFSET_OK (INTVAL (op2))
9075 || (GET_MODE_SIZE (mode) > 8
9076 && !SPE_CONST_OFFSET_OK (INTVAL (op2) + 8))))
9077 op2 = force_reg (Pmode, op2);
9079 /* We can't always do [reg + reg] for these, because [reg +
9080 reg + offset] is not a legitimate addressing mode. */
9081 y = gen_rtx_PLUS (Pmode, op1, op2);
9083 if ((GET_MODE_SIZE (mode) > 8 || mode == DDmode) && REG_P (op2))
9084 return force_reg (Pmode, y);
9085 else
9086 return y;
9089 return force_reg (Pmode, x);
9091 else if ((TARGET_ELF
9092 #if TARGET_MACHO
9093 || !MACHO_DYNAMIC_NO_PIC_P
9094 #endif
9096 && TARGET_32BIT
9097 && TARGET_NO_TOC
9098 && ! flag_pic
9099 && GET_CODE (x) != CONST_INT
9100 && GET_CODE (x) != CONST_WIDE_INT
9101 && GET_CODE (x) != CONST_DOUBLE
9102 && CONSTANT_P (x)
9103 && GET_MODE_NUNITS (mode) == 1
9104 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
9105 || (/* ??? Assume floating point reg based on mode? */
9106 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
9107 && (mode == DFmode || mode == DDmode))))
9109 rtx reg = gen_reg_rtx (Pmode);
9110 if (TARGET_ELF)
9111 emit_insn (gen_elf_high (reg, x));
9112 else
9113 emit_insn (gen_macho_high (reg, x));
9114 return gen_rtx_LO_SUM (Pmode, reg, x);
9116 else if (TARGET_TOC
9117 && GET_CODE (x) == SYMBOL_REF
9118 && constant_pool_expr_p (x)
9119 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
9120 return create_TOC_reference (x, NULL_RTX);
9121 else
9122 return x;
9125 /* Debug version of rs6000_legitimize_address. */
9126 static rtx
9127 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
9129 rtx ret;
9130 rtx_insn *insns;
9132 start_sequence ();
9133 ret = rs6000_legitimize_address (x, oldx, mode);
9134 insns = get_insns ();
9135 end_sequence ();
9137 if (ret != x)
9139 fprintf (stderr,
9140 "\nrs6000_legitimize_address: mode %s, old code %s, "
9141 "new code %s, modified\n",
9142 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
9143 GET_RTX_NAME (GET_CODE (ret)));
9145 fprintf (stderr, "Original address:\n");
9146 debug_rtx (x);
9148 fprintf (stderr, "oldx:\n");
9149 debug_rtx (oldx);
9151 fprintf (stderr, "New address:\n");
9152 debug_rtx (ret);
9154 if (insns)
9156 fprintf (stderr, "Insns added:\n");
9157 debug_rtx_list (insns, 20);
9160 else
9162 fprintf (stderr,
9163 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
9164 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
9166 debug_rtx (x);
9169 if (insns)
9170 emit_insn (insns);
9172 return ret;
9175 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9176 We need to emit DTP-relative relocations. */
9178 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
9179 static void
9180 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
9182 switch (size)
9184 case 4:
9185 fputs ("\t.long\t", file);
9186 break;
9187 case 8:
9188 fputs (DOUBLE_INT_ASM_OP, file);
9189 break;
9190 default:
9191 gcc_unreachable ();
9193 output_addr_const (file, x);
9194 if (TARGET_ELF)
9195 fputs ("@dtprel+0x8000", file);
9196 else if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF)
9198 switch (SYMBOL_REF_TLS_MODEL (x))
9200 case 0:
9201 break;
9202 case TLS_MODEL_LOCAL_EXEC:
9203 fputs ("@le", file);
9204 break;
9205 case TLS_MODEL_INITIAL_EXEC:
9206 fputs ("@ie", file);
9207 break;
9208 case TLS_MODEL_GLOBAL_DYNAMIC:
9209 case TLS_MODEL_LOCAL_DYNAMIC:
9210 fputs ("@m", file);
9211 break;
9212 default:
9213 gcc_unreachable ();
9218 /* Return true if X is a symbol that refers to real (rather than emulated)
9219 TLS. */
9221 static bool
9222 rs6000_real_tls_symbol_ref_p (rtx x)
9224 return (GET_CODE (x) == SYMBOL_REF
9225 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
9228 /* In the name of slightly smaller debug output, and to cater to
9229 general assembler lossage, recognize various UNSPEC sequences
9230 and turn them back into a direct symbol reference. */
9232 static rtx
9233 rs6000_delegitimize_address (rtx orig_x)
9235 rtx x, y, offset;
9237 orig_x = delegitimize_mem_from_attrs (orig_x);
9238 x = orig_x;
9239 if (MEM_P (x))
9240 x = XEXP (x, 0);
9242 y = x;
9243 if (TARGET_CMODEL != CMODEL_SMALL
9244 && GET_CODE (y) == LO_SUM)
9245 y = XEXP (y, 1);
9247 offset = NULL_RTX;
9248 if (GET_CODE (y) == PLUS
9249 && GET_MODE (y) == Pmode
9250 && CONST_INT_P (XEXP (y, 1)))
9252 offset = XEXP (y, 1);
9253 y = XEXP (y, 0);
9256 if (GET_CODE (y) == UNSPEC
9257 && XINT (y, 1) == UNSPEC_TOCREL)
9259 y = XVECEXP (y, 0, 0);
9261 #ifdef HAVE_AS_TLS
9262 /* Do not associate thread-local symbols with the original
9263 constant pool symbol. */
9264 if (TARGET_XCOFF
9265 && GET_CODE (y) == SYMBOL_REF
9266 && CONSTANT_POOL_ADDRESS_P (y)
9267 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
9268 return orig_x;
9269 #endif
9271 if (offset != NULL_RTX)
9272 y = gen_rtx_PLUS (Pmode, y, offset);
9273 if (!MEM_P (orig_x))
9274 return y;
9275 else
9276 return replace_equiv_address_nv (orig_x, y);
9279 if (TARGET_MACHO
9280 && GET_CODE (orig_x) == LO_SUM
9281 && GET_CODE (XEXP (orig_x, 1)) == CONST)
9283 y = XEXP (XEXP (orig_x, 1), 0);
9284 if (GET_CODE (y) == UNSPEC
9285 && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
9286 return XVECEXP (y, 0, 0);
9289 return orig_x;
9292 /* Return true if X shouldn't be emitted into the debug info.
9293 The linker doesn't like .toc section references from
9294 .debug_* sections, so reject .toc section symbols. */
9296 static bool
9297 rs6000_const_not_ok_for_debug_p (rtx x)
9299 if (GET_CODE (x) == SYMBOL_REF
9300 && CONSTANT_POOL_ADDRESS_P (x))
9302 rtx c = get_pool_constant (x);
9303 machine_mode cmode = get_pool_mode (x);
9304 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
9305 return true;
9308 return false;
9312 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
9314 static bool
9315 rs6000_legitimate_combined_insn (rtx_insn *insn)
9317 int icode = INSN_CODE (insn);
9319 /* Reject creating doloop insns. Combine should not be allowed
9320 to create these for a number of reasons:
9321 1) In a nested loop, if combine creates one of these in an
9322 outer loop and the register allocator happens to allocate ctr
9323 to the outer loop insn, then the inner loop can't use ctr.
9324 Inner loops ought to be more highly optimized.
9325 2) Combine often wants to create one of these from what was
9326 originally a three insn sequence, first combining the three
9327 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
9328 allocated ctr, the splitter takes use back to the three insn
9329 sequence. It's better to stop combine at the two insn
9330 sequence.
9331 3) Faced with not being able to allocate ctr for ctrsi/crtdi
9332 insns, the register allocator sometimes uses floating point
9333 or vector registers for the pseudo. Since ctrsi/ctrdi is a
9334 jump insn and output reloads are not implemented for jumps,
9335 the ctrsi/ctrdi splitters need to handle all possible cases.
9336 That's a pain, and it gets to be seriously difficult when a
9337 splitter that runs after reload needs memory to transfer from
9338 a gpr to fpr. See PR70098 and PR71763 which are not fixed
9339 for the difficult case. It's better to not create problems
9340 in the first place. */
9341 if (icode != CODE_FOR_nothing
9342 && (icode == CODE_FOR_ctrsi_internal1
9343 || icode == CODE_FOR_ctrdi_internal1
9344 || icode == CODE_FOR_ctrsi_internal2
9345 || icode == CODE_FOR_ctrdi_internal2
9346 || icode == CODE_FOR_ctrsi_internal3
9347 || icode == CODE_FOR_ctrdi_internal3
9348 || icode == CODE_FOR_ctrsi_internal4
9349 || icode == CODE_FOR_ctrdi_internal4))
9350 return false;
9352 return true;
9355 /* Construct the SYMBOL_REF for the tls_get_addr function. */
9357 static GTY(()) rtx rs6000_tls_symbol;
9358 static rtx
9359 rs6000_tls_get_addr (void)
9361 if (!rs6000_tls_symbol)
9362 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
9364 return rs6000_tls_symbol;
9367 /* Construct the SYMBOL_REF for TLS GOT references. */
9369 static GTY(()) rtx rs6000_got_symbol;
9370 static rtx
9371 rs6000_got_sym (void)
9373 if (!rs6000_got_symbol)
9375 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
9376 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
9377 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
9380 return rs6000_got_symbol;
9383 /* AIX Thread-Local Address support. */
9385 static rtx
9386 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
9388 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
9389 const char *name;
9390 char *tlsname;
9392 name = XSTR (addr, 0);
9393 /* Append TLS CSECT qualifier, unless the symbol already is qualified
9394 or the symbol will be in TLS private data section. */
9395 if (name[strlen (name) - 1] != ']'
9396 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
9397 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
9399 tlsname = XALLOCAVEC (char, strlen (name) + 4);
9400 strcpy (tlsname, name);
9401 strcat (tlsname,
9402 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
9403 tlsaddr = copy_rtx (addr);
9404 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
9406 else
9407 tlsaddr = addr;
9409 /* Place addr into TOC constant pool. */
9410 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
9412 /* Output the TOC entry and create the MEM referencing the value. */
9413 if (constant_pool_expr_p (XEXP (sym, 0))
9414 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
9416 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
9417 mem = gen_const_mem (Pmode, tocref);
9418 set_mem_alias_set (mem, get_TOC_alias_set ());
9420 else
9421 return sym;
9423 /* Use global-dynamic for local-dynamic. */
9424 if (model == TLS_MODEL_GLOBAL_DYNAMIC
9425 || model == TLS_MODEL_LOCAL_DYNAMIC)
9427 /* Create new TOC reference for @m symbol. */
9428 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
9429 tlsname = XALLOCAVEC (char, strlen (name) + 1);
9430 strcpy (tlsname, "*LCM");
9431 strcat (tlsname, name + 3);
9432 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
9433 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
9434 tocref = create_TOC_reference (modaddr, NULL_RTX);
9435 rtx modmem = gen_const_mem (Pmode, tocref);
9436 set_mem_alias_set (modmem, get_TOC_alias_set ());
9438 rtx modreg = gen_reg_rtx (Pmode);
9439 emit_insn (gen_rtx_SET (modreg, modmem));
9441 tmpreg = gen_reg_rtx (Pmode);
9442 emit_insn (gen_rtx_SET (tmpreg, mem));
9444 dest = gen_reg_rtx (Pmode);
9445 if (TARGET_32BIT)
9446 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
9447 else
9448 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
9449 return dest;
9451 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
9452 else if (TARGET_32BIT)
9454 tlsreg = gen_reg_rtx (SImode);
9455 emit_insn (gen_tls_get_tpointer (tlsreg));
9457 else
9458 tlsreg = gen_rtx_REG (DImode, 13);
9460 /* Load the TOC value into temporary register. */
9461 tmpreg = gen_reg_rtx (Pmode);
9462 emit_insn (gen_rtx_SET (tmpreg, mem));
9463 set_unique_reg_note (get_last_insn (), REG_EQUAL,
9464 gen_rtx_MINUS (Pmode, addr, tlsreg));
9466 /* Add TOC symbol value to TLS pointer. */
9467 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
9469 return dest;
9472 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9473 this (thread-local) address. */
9475 static rtx
9476 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
9478 rtx dest, insn;
9480 if (TARGET_XCOFF)
9481 return rs6000_legitimize_tls_address_aix (addr, model);
9483 dest = gen_reg_rtx (Pmode);
9484 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
9486 rtx tlsreg;
9488 if (TARGET_64BIT)
9490 tlsreg = gen_rtx_REG (Pmode, 13);
9491 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
9493 else
9495 tlsreg = gen_rtx_REG (Pmode, 2);
9496 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
9498 emit_insn (insn);
9500 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
9502 rtx tlsreg, tmp;
9504 tmp = gen_reg_rtx (Pmode);
9505 if (TARGET_64BIT)
9507 tlsreg = gen_rtx_REG (Pmode, 13);
9508 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
9510 else
9512 tlsreg = gen_rtx_REG (Pmode, 2);
9513 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
9515 emit_insn (insn);
9516 if (TARGET_64BIT)
9517 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
9518 else
9519 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
9520 emit_insn (insn);
9522 else
9524 rtx r3, got, tga, tmp1, tmp2, call_insn;
9526 /* We currently use relocations like @got@tlsgd for tls, which
9527 means the linker will handle allocation of tls entries, placing
9528 them in the .got section. So use a pointer to the .got section,
9529 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9530 or to secondary GOT sections used by 32-bit -fPIC. */
9531 if (TARGET_64BIT)
9532 got = gen_rtx_REG (Pmode, 2);
9533 else
9535 if (flag_pic == 1)
9536 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
9537 else
9539 rtx gsym = rs6000_got_sym ();
9540 got = gen_reg_rtx (Pmode);
9541 if (flag_pic == 0)
9542 rs6000_emit_move (got, gsym, Pmode);
9543 else
9545 rtx mem, lab;
9547 tmp1 = gen_reg_rtx (Pmode);
9548 tmp2 = gen_reg_rtx (Pmode);
9549 mem = gen_const_mem (Pmode, tmp1);
9550 lab = gen_label_rtx ();
9551 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
9552 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
9553 if (TARGET_LINK_STACK)
9554 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
9555 emit_move_insn (tmp2, mem);
9556 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
9557 set_unique_reg_note (last, REG_EQUAL, gsym);
9562 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
9564 tga = rs6000_tls_get_addr ();
9565 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
9566 1, const0_rtx, Pmode);
9568 r3 = gen_rtx_REG (Pmode, 3);
9569 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9571 if (TARGET_64BIT)
9572 insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
9573 else
9574 insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
9576 else if (DEFAULT_ABI == ABI_V4)
9577 insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
9578 else
9579 gcc_unreachable ();
9580 call_insn = last_call_insn ();
9581 PATTERN (call_insn) = insn;
9582 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
9583 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
9584 pic_offset_table_rtx);
9586 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
9588 tga = rs6000_tls_get_addr ();
9589 tmp1 = gen_reg_rtx (Pmode);
9590 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
9591 1, const0_rtx, Pmode);
9593 r3 = gen_rtx_REG (Pmode, 3);
9594 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9596 if (TARGET_64BIT)
9597 insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
9598 else
9599 insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
9601 else if (DEFAULT_ABI == ABI_V4)
9602 insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
9603 else
9604 gcc_unreachable ();
9605 call_insn = last_call_insn ();
9606 PATTERN (call_insn) = insn;
9607 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
9608 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
9609 pic_offset_table_rtx);
9611 if (rs6000_tls_size == 16)
9613 if (TARGET_64BIT)
9614 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
9615 else
9616 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
9618 else if (rs6000_tls_size == 32)
9620 tmp2 = gen_reg_rtx (Pmode);
9621 if (TARGET_64BIT)
9622 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
9623 else
9624 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
9625 emit_insn (insn);
9626 if (TARGET_64BIT)
9627 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
9628 else
9629 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
9631 else
9633 tmp2 = gen_reg_rtx (Pmode);
9634 if (TARGET_64BIT)
9635 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
9636 else
9637 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
9638 emit_insn (insn);
9639 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
9641 emit_insn (insn);
9643 else
9645 /* IE, or 64-bit offset LE. */
9646 tmp2 = gen_reg_rtx (Pmode);
9647 if (TARGET_64BIT)
9648 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
9649 else
9650 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
9651 emit_insn (insn);
9652 if (TARGET_64BIT)
9653 insn = gen_tls_tls_64 (dest, tmp2, addr);
9654 else
9655 insn = gen_tls_tls_32 (dest, tmp2, addr);
9656 emit_insn (insn);
9660 return dest;
9663 /* Only create the global variable for the stack protect guard if we are using
9664 the global flavor of that guard. */
9665 static tree
9666 rs6000_init_stack_protect_guard (void)
9668 if (rs6000_stack_protector_guard == SSP_GLOBAL)
9669 return default_stack_protect_guard ();
9671 return NULL_TREE;
9674 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9676 static bool
9677 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9679 if (GET_CODE (x) == HIGH
9680 && GET_CODE (XEXP (x, 0)) == UNSPEC)
9681 return true;
9683 /* A TLS symbol in the TOC cannot contain a sum. */
9684 if (GET_CODE (x) == CONST
9685 && GET_CODE (XEXP (x, 0)) == PLUS
9686 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9687 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
9688 return true;
9690 /* Do not place an ELF TLS symbol in the constant pool. */
9691 return TARGET_ELF && tls_referenced_p (x);
9694 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9695 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9696 can be addressed relative to the toc pointer. */
9698 static bool
9699 use_toc_relative_ref (rtx sym, machine_mode mode)
9701 return ((constant_pool_expr_p (sym)
9702 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
9703 get_pool_mode (sym)))
9704 || (TARGET_CMODEL == CMODEL_MEDIUM
9705 && SYMBOL_REF_LOCAL_P (sym)
9706 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
9709 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
9710 replace the input X, or the original X if no replacement is called for.
9711 The output parameter *WIN is 1 if the calling macro should goto WIN,
9712 0 if it should not.
9714 For RS/6000, we wish to handle large displacements off a base
9715 register by splitting the addend across an addiu/addis and the mem insn.
9716 This cuts number of extra insns needed from 3 to 1.
9718 On Darwin, we use this to generate code for floating point constants.
9719 A movsf_low is generated so we wind up with 2 instructions rather than 3.
9720 The Darwin code is inside #if TARGET_MACHO because only then are the
9721 machopic_* functions defined. */
9722 static rtx
9723 rs6000_legitimize_reload_address (rtx x, machine_mode mode,
9724 int opnum, int type,
9725 int ind_levels ATTRIBUTE_UNUSED, int *win)
9727 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9728 bool quad_offset_p = mode_supports_vsx_dform_quad (mode);
9730 /* Nasty hack for vsx_splat_v2df/v2di load from mem, which takes a
9731 DFmode/DImode MEM. Ditto for ISA 3.0 vsx_splat_v4sf/v4si. */
9732 if (reg_offset_p
9733 && opnum == 1
9734 && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
9735 || (mode == DImode && recog_data.operand_mode[0] == V2DImode)
9736 || (mode == SFmode && recog_data.operand_mode[0] == V4SFmode
9737 && TARGET_P9_VECTOR)
9738 || (mode == SImode && recog_data.operand_mode[0] == V4SImode
9739 && TARGET_P9_VECTOR)))
9740 reg_offset_p = false;
9742 /* We must recognize output that we have already generated ourselves. */
9743 if (GET_CODE (x) == PLUS
9744 && GET_CODE (XEXP (x, 0)) == PLUS
9745 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
9746 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
9747 && GET_CODE (XEXP (x, 1)) == CONST_INT)
9749 if (TARGET_DEBUG_ADDR)
9751 fprintf (stderr, "\nlegitimize_reload_address push_reload #1:\n");
9752 debug_rtx (x);
9754 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9755 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
9756 opnum, (enum reload_type) type);
9757 *win = 1;
9758 return x;
9761 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
9762 if (GET_CODE (x) == LO_SUM
9763 && GET_CODE (XEXP (x, 0)) == HIGH)
9765 if (TARGET_DEBUG_ADDR)
9767 fprintf (stderr, "\nlegitimize_reload_address push_reload #2:\n");
9768 debug_rtx (x);
9770 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9771 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9772 opnum, (enum reload_type) type);
9773 *win = 1;
9774 return x;
9777 #if TARGET_MACHO
9778 if (DEFAULT_ABI == ABI_DARWIN && flag_pic
9779 && GET_CODE (x) == LO_SUM
9780 && GET_CODE (XEXP (x, 0)) == PLUS
9781 && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx
9782 && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH
9783 && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1)
9784 && machopic_operand_p (XEXP (x, 1)))
9786 /* Result of previous invocation of this function on Darwin
9787 floating point constant. */
9788 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9789 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9790 opnum, (enum reload_type) type);
9791 *win = 1;
9792 return x;
9794 #endif
9796 if (TARGET_CMODEL != CMODEL_SMALL
9797 && reg_offset_p
9798 && !quad_offset_p
9799 && small_toc_ref (x, VOIDmode))
9801 rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x));
9802 x = gen_rtx_LO_SUM (Pmode, hi, x);
9803 if (TARGET_DEBUG_ADDR)
9805 fprintf (stderr, "\nlegitimize_reload_address push_reload #3:\n");
9806 debug_rtx (x);
9808 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9809 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9810 opnum, (enum reload_type) type);
9811 *win = 1;
9812 return x;
9815 if (GET_CODE (x) == PLUS
9816 && REG_P (XEXP (x, 0))
9817 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
9818 && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
9819 && CONST_INT_P (XEXP (x, 1))
9820 && reg_offset_p
9821 && !SPE_VECTOR_MODE (mode)
9822 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
9823 && (quad_offset_p || !VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
9825 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
9826 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
9827 HOST_WIDE_INT high
9828 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
9830 /* Check for 32-bit overflow or quad addresses with one of the
9831 four least significant bits set. */
9832 if (high + low != val
9833 || (quad_offset_p && (low & 0xf)))
9835 *win = 0;
9836 return x;
9839 /* Reload the high part into a base reg; leave the low part
9840 in the mem directly. */
9842 x = gen_rtx_PLUS (GET_MODE (x),
9843 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
9844 GEN_INT (high)),
9845 GEN_INT (low));
9847 if (TARGET_DEBUG_ADDR)
9849 fprintf (stderr, "\nlegitimize_reload_address push_reload #4:\n");
9850 debug_rtx (x);
9852 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9853 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
9854 opnum, (enum reload_type) type);
9855 *win = 1;
9856 return x;
9859 if (GET_CODE (x) == SYMBOL_REF
9860 && reg_offset_p
9861 && !quad_offset_p
9862 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
9863 && !SPE_VECTOR_MODE (mode)
9864 #if TARGET_MACHO
9865 && DEFAULT_ABI == ABI_DARWIN
9866 && (flag_pic || MACHO_DYNAMIC_NO_PIC_P)
9867 && machopic_symbol_defined_p (x)
9868 #else
9869 && DEFAULT_ABI == ABI_V4
9870 && !flag_pic
9871 #endif
9872 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
9873 The same goes for DImode without 64-bit gprs and DFmode and DDmode
9874 without fprs.
9875 ??? Assume floating point reg based on mode? This assumption is
9876 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
9877 where reload ends up doing a DFmode load of a constant from
9878 mem using two gprs. Unfortunately, at this point reload
9879 hasn't yet selected regs so poking around in reload data
9880 won't help and even if we could figure out the regs reliably,
9881 we'd still want to allow this transformation when the mem is
9882 naturally aligned. Since we say the address is good here, we
9883 can't disable offsets from LO_SUMs in mem_operand_gpr.
9884 FIXME: Allow offset from lo_sum for other modes too, when
9885 mem is sufficiently aligned.
9887 Also disallow this if the type can go in VMX/Altivec registers, since
9888 those registers do not have d-form (reg+offset) address modes. */
9889 && !reg_addr[mode].scalar_in_vmx_p
9890 && mode != TFmode
9891 && mode != TDmode
9892 && mode != IFmode
9893 && mode != KFmode
9894 && (mode != TImode || !TARGET_VSX_TIMODE)
9895 && mode != PTImode
9896 && (mode != DImode || TARGET_POWERPC64)
9897 && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
9898 || (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)))
9900 #if TARGET_MACHO
9901 if (flag_pic)
9903 rtx offset = machopic_gen_offset (x);
9904 x = gen_rtx_LO_SUM (GET_MODE (x),
9905 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
9906 gen_rtx_HIGH (Pmode, offset)), offset);
9908 else
9909 #endif
9910 x = gen_rtx_LO_SUM (GET_MODE (x),
9911 gen_rtx_HIGH (Pmode, x), x);
9913 if (TARGET_DEBUG_ADDR)
9915 fprintf (stderr, "\nlegitimize_reload_address push_reload #5:\n");
9916 debug_rtx (x);
9918 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9919 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9920 opnum, (enum reload_type) type);
9921 *win = 1;
9922 return x;
9925 /* Reload an offset address wrapped by an AND that represents the
9926 masking of the lower bits. Strip the outer AND and let reload
9927 convert the offset address into an indirect address. For VSX,
9928 force reload to create the address with an AND in a separate
9929 register, because we can't guarantee an altivec register will
9930 be used. */
9931 if (VECTOR_MEM_ALTIVEC_P (mode)
9932 && GET_CODE (x) == AND
9933 && GET_CODE (XEXP (x, 0)) == PLUS
9934 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
9935 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
9936 && GET_CODE (XEXP (x, 1)) == CONST_INT
9937 && INTVAL (XEXP (x, 1)) == -16)
9939 x = XEXP (x, 0);
9940 *win = 1;
9941 return x;
9944 if (TARGET_TOC
9945 && reg_offset_p
9946 && !quad_offset_p
9947 && GET_CODE (x) == SYMBOL_REF
9948 && use_toc_relative_ref (x, mode))
9950 x = create_TOC_reference (x, NULL_RTX);
9951 if (TARGET_CMODEL != CMODEL_SMALL)
9953 if (TARGET_DEBUG_ADDR)
9955 fprintf (stderr, "\nlegitimize_reload_address push_reload #6:\n");
9956 debug_rtx (x);
9958 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9959 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9960 opnum, (enum reload_type) type);
9962 *win = 1;
9963 return x;
9965 *win = 0;
9966 return x;
9969 /* Debug version of rs6000_legitimize_reload_address. */
9970 static rtx
9971 rs6000_debug_legitimize_reload_address (rtx x, machine_mode mode,
9972 int opnum, int type,
9973 int ind_levels, int *win)
9975 rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
9976 ind_levels, win);
9977 fprintf (stderr,
9978 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
9979 "type = %d, ind_levels = %d, win = %d, original addr:\n",
9980 GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
9981 debug_rtx (x);
9983 if (x == ret)
9984 fprintf (stderr, "Same address returned\n");
9985 else if (!ret)
9986 fprintf (stderr, "NULL returned\n");
9987 else
9989 fprintf (stderr, "New address:\n");
9990 debug_rtx (ret);
9993 return ret;
9996 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9997 that is a valid memory address for an instruction.
9998 The MODE argument is the machine mode for the MEM expression
9999 that wants to use this address.
10001 On the RS/6000, there are four valid address: a SYMBOL_REF that
10002 refers to a constant pool entry of an address (or the sum of it
10003 plus a constant), a short (16-bit signed) constant plus a register,
10004 the sum of two registers, or a register indirect, possibly with an
10005 auto-increment. For DFmode, DDmode and DImode with a constant plus
10006 register, we must ensure that both words are addressable or PowerPC64
10007 with offset word aligned.
10009 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
10010 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
10011 because adjacent memory cells are accessed by adding word-sized offsets
10012 during assembly output. */
10013 static bool
10014 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
10016 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
10017 bool quad_offset_p = mode_supports_vsx_dform_quad (mode);
10019 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
10020 if (VECTOR_MEM_ALTIVEC_P (mode)
10021 && GET_CODE (x) == AND
10022 && GET_CODE (XEXP (x, 1)) == CONST_INT
10023 && INTVAL (XEXP (x, 1)) == -16)
10024 x = XEXP (x, 0);
10026 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
10027 return 0;
10028 if (legitimate_indirect_address_p (x, reg_ok_strict))
10029 return 1;
10030 if (TARGET_UPDATE
10031 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
10032 && mode_supports_pre_incdec_p (mode)
10033 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
10034 return 1;
10035 /* Handle restricted vector d-form offsets in ISA 3.0. */
10036 if (quad_offset_p)
10038 if (quad_address_p (x, mode, reg_ok_strict))
10039 return 1;
10041 else if (virtual_stack_registers_memory_p (x))
10042 return 1;
10044 else if (reg_offset_p)
10046 if (legitimate_small_data_p (mode, x))
10047 return 1;
10048 if (legitimate_constant_pool_address_p (x, mode,
10049 reg_ok_strict || lra_in_progress))
10050 return 1;
10051 if (reg_addr[mode].fused_toc && GET_CODE (x) == UNSPEC
10052 && XINT (x, 1) == UNSPEC_FUSION_ADDIS)
10053 return 1;
10056 /* For TImode, if we have TImode in VSX registers, only allow register
10057 indirect addresses. This will allow the values to go in either GPRs
10058 or VSX registers without reloading. The vector types would tend to
10059 go into VSX registers, so we allow REG+REG, while TImode seems
10060 somewhat split, in that some uses are GPR based, and some VSX based. */
10061 /* FIXME: We could loosen this by changing the following to
10062 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
10063 but currently we cannot allow REG+REG addressing for TImode. See
10064 PR72827 for complete details on how this ends up hoodwinking DSE. */
10065 if (mode == TImode && TARGET_VSX_TIMODE)
10066 return 0;
10067 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
10068 if (! reg_ok_strict
10069 && reg_offset_p
10070 && GET_CODE (x) == PLUS
10071 && GET_CODE (XEXP (x, 0)) == REG
10072 && (XEXP (x, 0) == virtual_stack_vars_rtx
10073 || XEXP (x, 0) == arg_pointer_rtx)
10074 && GET_CODE (XEXP (x, 1)) == CONST_INT)
10075 return 1;
10076 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
10077 return 1;
10078 if (!FLOAT128_2REG_P (mode)
10079 && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
10080 || TARGET_POWERPC64
10081 || (mode != DFmode && mode != DDmode)
10082 || (TARGET_E500_DOUBLE && mode != DDmode))
10083 && (TARGET_POWERPC64 || mode != DImode)
10084 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
10085 && mode != PTImode
10086 && !avoiding_indexed_address_p (mode)
10087 && legitimate_indexed_address_p (x, reg_ok_strict))
10088 return 1;
10089 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
10090 && mode_supports_pre_modify_p (mode)
10091 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
10092 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
10093 reg_ok_strict, false)
10094 || (!avoiding_indexed_address_p (mode)
10095 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
10096 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
10097 return 1;
10098 if (reg_offset_p && !quad_offset_p
10099 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
10100 return 1;
10101 return 0;
10104 /* Debug version of rs6000_legitimate_address_p. */
10105 static bool
10106 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
10107 bool reg_ok_strict)
10109 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
10110 fprintf (stderr,
10111 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
10112 "strict = %d, reload = %s, code = %s\n",
10113 ret ? "true" : "false",
10114 GET_MODE_NAME (mode),
10115 reg_ok_strict,
10116 (reload_completed
10117 ? "after"
10118 : (reload_in_progress ? "progress" : "before")),
10119 GET_RTX_NAME (GET_CODE (x)));
10120 debug_rtx (x);
10122 return ret;
10125 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
10127 static bool
10128 rs6000_mode_dependent_address_p (const_rtx addr,
10129 addr_space_t as ATTRIBUTE_UNUSED)
10131 return rs6000_mode_dependent_address_ptr (addr);
10134 /* Go to LABEL if ADDR (a legitimate address expression)
10135 has an effect that depends on the machine mode it is used for.
10137 On the RS/6000 this is true of all integral offsets (since AltiVec
10138 and VSX modes don't allow them) or is a pre-increment or decrement.
10140 ??? Except that due to conceptual problems in offsettable_address_p
10141 we can't really report the problems of integral offsets. So leave
10142 this assuming that the adjustable offset must be valid for the
10143 sub-words of a TFmode operand, which is what we had before. */
10145 static bool
10146 rs6000_mode_dependent_address (const_rtx addr)
10148 switch (GET_CODE (addr))
10150 case PLUS:
10151 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
10152 is considered a legitimate address before reload, so there
10153 are no offset restrictions in that case. Note that this
10154 condition is safe in strict mode because any address involving
10155 virtual_stack_vars_rtx or arg_pointer_rtx would already have
10156 been rejected as illegitimate. */
10157 if (XEXP (addr, 0) != virtual_stack_vars_rtx
10158 && XEXP (addr, 0) != arg_pointer_rtx
10159 && GET_CODE (XEXP (addr, 1)) == CONST_INT)
10161 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
10162 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
10164 break;
10166 case LO_SUM:
10167 /* Anything in the constant pool is sufficiently aligned that
10168 all bytes have the same high part address. */
10169 return !legitimate_constant_pool_address_p (addr, QImode, false);
10171 /* Auto-increment cases are now treated generically in recog.c. */
10172 case PRE_MODIFY:
10173 return TARGET_UPDATE;
10175 /* AND is only allowed in Altivec loads. */
10176 case AND:
10177 return true;
10179 default:
10180 break;
10183 return false;
10186 /* Debug version of rs6000_mode_dependent_address. */
10187 static bool
10188 rs6000_debug_mode_dependent_address (const_rtx addr)
10190 bool ret = rs6000_mode_dependent_address (addr);
10192 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
10193 ret ? "true" : "false");
10194 debug_rtx (addr);
10196 return ret;
10199 /* Implement FIND_BASE_TERM. */
10202 rs6000_find_base_term (rtx op)
10204 rtx base;
10206 base = op;
10207 if (GET_CODE (base) == CONST)
10208 base = XEXP (base, 0);
10209 if (GET_CODE (base) == PLUS)
10210 base = XEXP (base, 0);
10211 if (GET_CODE (base) == UNSPEC)
10212 switch (XINT (base, 1))
10214 case UNSPEC_TOCREL:
10215 case UNSPEC_MACHOPIC_OFFSET:
10216 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
10217 for aliasing purposes. */
10218 return XVECEXP (base, 0, 0);
10221 return op;
10224 /* More elaborate version of recog's offsettable_memref_p predicate
10225 that works around the ??? note of rs6000_mode_dependent_address.
10226 In particular it accepts
10228 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
10230 in 32-bit mode, that the recog predicate rejects. */
10232 static bool
10233 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode)
10235 bool worst_case;
10237 if (!MEM_P (op))
10238 return false;
10240 /* First mimic offsettable_memref_p. */
10241 if (offsettable_address_p (true, GET_MODE (op), XEXP (op, 0)))
10242 return true;
10244 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
10245 the latter predicate knows nothing about the mode of the memory
10246 reference and, therefore, assumes that it is the largest supported
10247 mode (TFmode). As a consequence, legitimate offsettable memory
10248 references are rejected. rs6000_legitimate_offset_address_p contains
10249 the correct logic for the PLUS case of rs6000_mode_dependent_address,
10250 at least with a little bit of help here given that we know the
10251 actual registers used. */
10252 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
10253 || GET_MODE_SIZE (reg_mode) == 4);
10254 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
10255 true, worst_case);
10258 /* Determine the reassociation width to be used in reassociate_bb.
10259 This takes into account how many parallel operations we
10260 can actually do of a given type, and also the latency.
10262 int add/sub 6/cycle
10263 mul 2/cycle
10264 vect add/sub/mul 2/cycle
10265 fp add/sub/mul 2/cycle
10266 dfp 1/cycle
10269 static int
10270 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
10271 enum machine_mode mode)
10273 switch (rs6000_cpu)
10275 case PROCESSOR_POWER8:
10276 case PROCESSOR_POWER9:
10277 if (DECIMAL_FLOAT_MODE_P (mode))
10278 return 1;
10279 if (VECTOR_MODE_P (mode))
10280 return 4;
10281 if (INTEGRAL_MODE_P (mode))
10282 return opc == MULT_EXPR ? 4 : 6;
10283 if (FLOAT_MODE_P (mode))
10284 return 4;
10285 break;
10286 default:
10287 break;
10289 return 1;
10292 /* Change register usage conditional on target flags. */
10293 static void
10294 rs6000_conditional_register_usage (void)
10296 int i;
10298 if (TARGET_DEBUG_TARGET)
10299 fprintf (stderr, "rs6000_conditional_register_usage called\n");
10301 /* Set MQ register fixed (already call_used) so that it will not be
10302 allocated. */
10303 fixed_regs[64] = 1;
10305 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
10306 if (TARGET_64BIT)
10307 fixed_regs[13] = call_used_regs[13]
10308 = call_really_used_regs[13] = 1;
10310 /* Conditionally disable FPRs. */
10311 if (TARGET_SOFT_FLOAT || !TARGET_FPRS)
10312 for (i = 32; i < 64; i++)
10313 fixed_regs[i] = call_used_regs[i]
10314 = call_really_used_regs[i] = 1;
10316 /* The TOC register is not killed across calls in a way that is
10317 visible to the compiler. */
10318 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10319 call_really_used_regs[2] = 0;
10321 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
10322 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10324 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
10325 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10326 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10327 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10329 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
10330 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10331 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10332 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10334 if (TARGET_TOC && TARGET_MINIMAL_TOC)
10335 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10336 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10338 if (TARGET_SPE)
10340 global_regs[SPEFSCR_REGNO] = 1;
10341 /* We used to use r14 as FIXED_SCRATCH to address SPE 64-bit
10342 registers in prologues and epilogues. We no longer use r14
10343 for FIXED_SCRATCH, but we're keeping r14 out of the allocation
10344 pool for link-compatibility with older versions of GCC. Once
10345 "old" code has died out, we can return r14 to the allocation
10346 pool. */
10347 fixed_regs[14]
10348 = call_used_regs[14]
10349 = call_really_used_regs[14] = 1;
10352 if (!TARGET_ALTIVEC && !TARGET_VSX)
10354 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
10355 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
10356 call_really_used_regs[VRSAVE_REGNO] = 1;
10359 if (TARGET_ALTIVEC || TARGET_VSX)
10360 global_regs[VSCR_REGNO] = 1;
10362 if (TARGET_ALTIVEC_ABI)
10364 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
10365 call_used_regs[i] = call_really_used_regs[i] = 1;
10367 /* AIX reserves VR20:31 in non-extended ABI mode. */
10368 if (TARGET_XCOFF)
10369 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
10370 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
10375 /* Output insns to set DEST equal to the constant SOURCE as a series of
10376 lis, ori and shl instructions and return TRUE. */
10378 bool
10379 rs6000_emit_set_const (rtx dest, rtx source)
10381 machine_mode mode = GET_MODE (dest);
10382 rtx temp, set;
10383 rtx_insn *insn;
10384 HOST_WIDE_INT c;
10386 gcc_checking_assert (CONST_INT_P (source));
10387 c = INTVAL (source);
10388 switch (mode)
10390 case QImode:
10391 case HImode:
10392 emit_insn (gen_rtx_SET (dest, source));
10393 return true;
10395 case SImode:
10396 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
10398 emit_insn (gen_rtx_SET (copy_rtx (temp),
10399 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
10400 emit_insn (gen_rtx_SET (dest,
10401 gen_rtx_IOR (SImode, copy_rtx (temp),
10402 GEN_INT (c & 0xffff))));
10403 break;
10405 case DImode:
10406 if (!TARGET_POWERPC64)
10408 rtx hi, lo;
10410 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
10411 DImode);
10412 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
10413 DImode);
10414 emit_move_insn (hi, GEN_INT (c >> 32));
10415 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
10416 emit_move_insn (lo, GEN_INT (c));
10418 else
10419 rs6000_emit_set_long_const (dest, c);
10420 break;
10422 default:
10423 gcc_unreachable ();
10426 insn = get_last_insn ();
10427 set = single_set (insn);
10428 if (! CONSTANT_P (SET_SRC (set)))
10429 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
10431 return true;
10434 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
10435 Output insns to set DEST equal to the constant C as a series of
10436 lis, ori and shl instructions. */
10438 static void
10439 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
10441 rtx temp;
10442 HOST_WIDE_INT ud1, ud2, ud3, ud4;
10444 ud1 = c & 0xffff;
10445 c = c >> 16;
10446 ud2 = c & 0xffff;
10447 c = c >> 16;
10448 ud3 = c & 0xffff;
10449 c = c >> 16;
10450 ud4 = c & 0xffff;
10452 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
10453 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
10454 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
10456 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
10457 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
10459 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10461 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10462 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
10463 if (ud1 != 0)
10464 emit_move_insn (dest,
10465 gen_rtx_IOR (DImode, copy_rtx (temp),
10466 GEN_INT (ud1)));
10468 else if (ud3 == 0 && ud4 == 0)
10470 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10472 gcc_assert (ud2 & 0x8000);
10473 emit_move_insn (copy_rtx (temp),
10474 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
10475 if (ud1 != 0)
10476 emit_move_insn (copy_rtx (temp),
10477 gen_rtx_IOR (DImode, copy_rtx (temp),
10478 GEN_INT (ud1)));
10479 emit_move_insn (dest,
10480 gen_rtx_ZERO_EXTEND (DImode,
10481 gen_lowpart (SImode,
10482 copy_rtx (temp))));
10484 else if ((ud4 == 0xffff && (ud3 & 0x8000))
10485 || (ud4 == 0 && ! (ud3 & 0x8000)))
10487 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10489 emit_move_insn (copy_rtx (temp),
10490 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
10491 if (ud2 != 0)
10492 emit_move_insn (copy_rtx (temp),
10493 gen_rtx_IOR (DImode, copy_rtx (temp),
10494 GEN_INT (ud2)));
10495 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10496 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10497 GEN_INT (16)));
10498 if (ud1 != 0)
10499 emit_move_insn (dest,
10500 gen_rtx_IOR (DImode, copy_rtx (temp),
10501 GEN_INT (ud1)));
10503 else
10505 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10507 emit_move_insn (copy_rtx (temp),
10508 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
10509 if (ud3 != 0)
10510 emit_move_insn (copy_rtx (temp),
10511 gen_rtx_IOR (DImode, copy_rtx (temp),
10512 GEN_INT (ud3)));
10514 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
10515 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10516 GEN_INT (32)));
10517 if (ud2 != 0)
10518 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10519 gen_rtx_IOR (DImode, copy_rtx (temp),
10520 GEN_INT (ud2 << 16)));
10521 if (ud1 != 0)
10522 emit_move_insn (dest,
10523 gen_rtx_IOR (DImode, copy_rtx (temp),
10524 GEN_INT (ud1)));
10528 /* Helper for the following. Get rid of [r+r] memory refs
10529 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
10531 static void
10532 rs6000_eliminate_indexed_memrefs (rtx operands[2])
10534 if (reload_in_progress)
10535 return;
10537 if (GET_CODE (operands[0]) == MEM
10538 && GET_CODE (XEXP (operands[0], 0)) != REG
10539 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
10540 GET_MODE (operands[0]), false))
10541 operands[0]
10542 = replace_equiv_address (operands[0],
10543 copy_addr_to_reg (XEXP (operands[0], 0)));
10545 if (GET_CODE (operands[1]) == MEM
10546 && GET_CODE (XEXP (operands[1], 0)) != REG
10547 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
10548 GET_MODE (operands[1]), false))
10549 operands[1]
10550 = replace_equiv_address (operands[1],
10551 copy_addr_to_reg (XEXP (operands[1], 0)));
10554 /* Generate a vector of constants to permute MODE for a little-endian
10555 storage operation by swapping the two halves of a vector. */
10556 static rtvec
10557 rs6000_const_vec (machine_mode mode)
10559 int i, subparts;
10560 rtvec v;
10562 switch (mode)
10564 case V1TImode:
10565 subparts = 1;
10566 break;
10567 case V2DFmode:
10568 case V2DImode:
10569 subparts = 2;
10570 break;
10571 case V4SFmode:
10572 case V4SImode:
10573 subparts = 4;
10574 break;
10575 case V8HImode:
10576 subparts = 8;
10577 break;
10578 case V16QImode:
10579 subparts = 16;
10580 break;
10581 default:
10582 gcc_unreachable();
10585 v = rtvec_alloc (subparts);
10587 for (i = 0; i < subparts / 2; ++i)
10588 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
10589 for (i = subparts / 2; i < subparts; ++i)
10590 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
10592 return v;
10595 /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
10596 for a VSX load or store operation. */
10598 rs6000_gen_le_vsx_permute (rtx source, machine_mode mode)
10600 /* Use ROTATE instead of VEC_SELECT on IEEE 128-bit floating point, and
10601 128-bit integers if they are allowed in VSX registers. */
10602 if (FLOAT128_VECTOR_P (mode) || mode == TImode || mode == V1TImode)
10603 return gen_rtx_ROTATE (mode, source, GEN_INT (64));
10604 else
10606 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
10607 return gen_rtx_VEC_SELECT (mode, source, par);
10611 /* Emit a little-endian load from vector memory location SOURCE to VSX
10612 register DEST in mode MODE. The load is done with two permuting
10613 insn's that represent an lxvd2x and xxpermdi. */
10614 void
10615 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
10617 rtx tmp, permute_mem, permute_reg;
10619 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10620 V1TImode). */
10621 if (mode == TImode || mode == V1TImode)
10623 mode = V2DImode;
10624 dest = gen_lowpart (V2DImode, dest);
10625 source = adjust_address (source, V2DImode, 0);
10628 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
10629 permute_mem = rs6000_gen_le_vsx_permute (source, mode);
10630 permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
10631 emit_insn (gen_rtx_SET (tmp, permute_mem));
10632 emit_insn (gen_rtx_SET (dest, permute_reg));
10635 /* Emit a little-endian store to vector memory location DEST from VSX
10636 register SOURCE in mode MODE. The store is done with two permuting
10637 insn's that represent an xxpermdi and an stxvd2x. */
10638 void
10639 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
10641 rtx tmp, permute_src, permute_tmp;
10643 /* This should never be called during or after reload, because it does
10644 not re-permute the source register. It is intended only for use
10645 during expand. */
10646 gcc_assert (!reload_in_progress && !lra_in_progress && !reload_completed);
10648 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10649 V1TImode). */
10650 if (mode == TImode || mode == V1TImode)
10652 mode = V2DImode;
10653 dest = adjust_address (dest, V2DImode, 0);
10654 source = gen_lowpart (V2DImode, source);
10657 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
10658 permute_src = rs6000_gen_le_vsx_permute (source, mode);
10659 permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
10660 emit_insn (gen_rtx_SET (tmp, permute_src));
10661 emit_insn (gen_rtx_SET (dest, permute_tmp));
10664 /* Emit a sequence representing a little-endian VSX load or store,
10665 moving data from SOURCE to DEST in mode MODE. This is done
10666 separately from rs6000_emit_move to ensure it is called only
10667 during expand. LE VSX loads and stores introduced later are
10668 handled with a split. The expand-time RTL generation allows
10669 us to optimize away redundant pairs of register-permutes. */
10670 void
10671 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
10673 gcc_assert (!BYTES_BIG_ENDIAN
10674 && VECTOR_MEM_VSX_P (mode)
10675 && !TARGET_P9_VECTOR
10676 && !gpr_or_gpr_p (dest, source)
10677 && (MEM_P (source) ^ MEM_P (dest)));
10679 if (MEM_P (source))
10681 gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG);
10682 rs6000_emit_le_vsx_load (dest, source, mode);
10684 else
10686 if (!REG_P (source))
10687 source = force_reg (mode, source);
10688 rs6000_emit_le_vsx_store (dest, source, mode);
10692 /* Return whether a SFmode or SImode move can be done without converting one
10693 mode to another. This arrises when we have:
10695 (SUBREG:SF (REG:SI ...))
10696 (SUBREG:SI (REG:SF ...))
10698 and one of the values is in a floating point/vector register, where SFmode
10699 scalars are stored in DFmode format. */
10701 bool
10702 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
10704 if (TARGET_ALLOW_SF_SUBREG)
10705 return true;
10707 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
10708 return true;
10710 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
10711 return true;
10713 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
10714 if (SUBREG_P (dest))
10716 rtx dest_subreg = SUBREG_REG (dest);
10717 rtx src_subreg = SUBREG_REG (src);
10718 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
10721 return false;
10725 /* Helper function to change moves with:
10727 (SUBREG:SF (REG:SI)) and
10728 (SUBREG:SI (REG:SF))
10730 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
10731 values are stored as DFmode values in the VSX registers. We need to convert
10732 the bits before we can use a direct move or operate on the bits in the
10733 vector register as an integer type.
10735 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
10737 static bool
10738 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
10740 if (TARGET_DIRECT_MOVE_64BIT && !reload_in_progress && !reload_completed
10741 && !lra_in_progress
10742 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
10743 && SUBREG_P (source) && sf_subreg_operand (source, mode))
10745 rtx inner_source = SUBREG_REG (source);
10746 machine_mode inner_mode = GET_MODE (inner_source);
10748 if (mode == SImode && inner_mode == SFmode)
10750 emit_insn (gen_movsi_from_sf (dest, inner_source));
10751 return true;
10754 if (mode == SFmode && inner_mode == SImode)
10756 emit_insn (gen_movsf_from_si (dest, inner_source));
10757 return true;
10761 return false;
10764 /* Emit a move from SOURCE to DEST in mode MODE. */
10765 void
10766 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
10768 rtx operands[2];
10769 operands[0] = dest;
10770 operands[1] = source;
10772 if (TARGET_DEBUG_ADDR)
10774 fprintf (stderr,
10775 "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, "
10776 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10777 GET_MODE_NAME (mode),
10778 reload_in_progress,
10779 reload_completed,
10780 can_create_pseudo_p ());
10781 debug_rtx (dest);
10782 fprintf (stderr, "source:\n");
10783 debug_rtx (source);
10786 /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
10787 if (CONST_WIDE_INT_P (operands[1])
10788 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
10790 /* This should be fixed with the introduction of CONST_WIDE_INT. */
10791 gcc_unreachable ();
10794 /* See if we need to special case SImode/SFmode SUBREG moves. */
10795 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
10796 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
10797 return;
10799 /* Check if GCC is setting up a block move that will end up using FP
10800 registers as temporaries. We must make sure this is acceptable. */
10801 if (GET_CODE (operands[0]) == MEM
10802 && GET_CODE (operands[1]) == MEM
10803 && mode == DImode
10804 && (SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[0]))
10805 || SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[1])))
10806 && ! (SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[0]) > 32
10807 ? 32 : MEM_ALIGN (operands[0])))
10808 || SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[1]) > 32
10809 ? 32
10810 : MEM_ALIGN (operands[1]))))
10811 && ! MEM_VOLATILE_P (operands [0])
10812 && ! MEM_VOLATILE_P (operands [1]))
10814 emit_move_insn (adjust_address (operands[0], SImode, 0),
10815 adjust_address (operands[1], SImode, 0));
10816 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
10817 adjust_address (copy_rtx (operands[1]), SImode, 4));
10818 return;
10821 if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM
10822 && !gpc_reg_operand (operands[1], mode))
10823 operands[1] = force_reg (mode, operands[1]);
10825 /* Recognize the case where operand[1] is a reference to thread-local
10826 data and load its address to a register. */
10827 if (tls_referenced_p (operands[1]))
10829 enum tls_model model;
10830 rtx tmp = operands[1];
10831 rtx addend = NULL;
10833 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
10835 addend = XEXP (XEXP (tmp, 0), 1);
10836 tmp = XEXP (XEXP (tmp, 0), 0);
10839 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
10840 model = SYMBOL_REF_TLS_MODEL (tmp);
10841 gcc_assert (model != 0);
10843 tmp = rs6000_legitimize_tls_address (tmp, model);
10844 if (addend)
10846 tmp = gen_rtx_PLUS (mode, tmp, addend);
10847 tmp = force_operand (tmp, operands[0]);
10849 operands[1] = tmp;
10852 /* Handle the case where reload calls us with an invalid address. */
10853 if (reload_in_progress && mode == Pmode
10854 && (! general_operand (operands[1], mode)
10855 || ! nonimmediate_operand (operands[0], mode)))
10856 goto emit_set;
10858 /* 128-bit constant floating-point values on Darwin should really be loaded
10859 as two parts. However, this premature splitting is a problem when DFmode
10860 values can go into Altivec registers. */
10861 if (FLOAT128_IBM_P (mode) && !reg_addr[DFmode].scalar_in_vmx_p
10862 && GET_CODE (operands[1]) == CONST_DOUBLE)
10864 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
10865 simplify_gen_subreg (DFmode, operands[1], mode, 0),
10866 DFmode);
10867 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
10868 GET_MODE_SIZE (DFmode)),
10869 simplify_gen_subreg (DFmode, operands[1], mode,
10870 GET_MODE_SIZE (DFmode)),
10871 DFmode);
10872 return;
10875 if (reload_in_progress && cfun->machine->sdmode_stack_slot != NULL_RTX)
10876 cfun->machine->sdmode_stack_slot =
10877 eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
10880 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
10881 p1:SD) if p1 is not of floating point class and p0 is spilled as
10882 we can have no analogous movsd_store for this. */
10883 if (lra_in_progress && mode == DDmode
10884 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
10885 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10886 && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))
10887 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
10889 enum reg_class cl;
10890 int regno = REGNO (SUBREG_REG (operands[1]));
10892 if (regno >= FIRST_PSEUDO_REGISTER)
10894 cl = reg_preferred_class (regno);
10895 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
10897 if (regno >= 0 && ! FP_REGNO_P (regno))
10899 mode = SDmode;
10900 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
10901 operands[1] = SUBREG_REG (operands[1]);
10904 if (lra_in_progress
10905 && mode == SDmode
10906 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
10907 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10908 && (REG_P (operands[1])
10909 || (GET_CODE (operands[1]) == SUBREG
10910 && REG_P (SUBREG_REG (operands[1])))))
10912 int regno = REGNO (GET_CODE (operands[1]) == SUBREG
10913 ? SUBREG_REG (operands[1]) : operands[1]);
10914 enum reg_class cl;
10916 if (regno >= FIRST_PSEUDO_REGISTER)
10918 cl = reg_preferred_class (regno);
10919 gcc_assert (cl != NO_REGS);
10920 regno = ira_class_hard_regs[cl][0];
10922 if (FP_REGNO_P (regno))
10924 if (GET_MODE (operands[0]) != DDmode)
10925 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
10926 emit_insn (gen_movsd_store (operands[0], operands[1]));
10928 else if (INT_REGNO_P (regno))
10929 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10930 else
10931 gcc_unreachable();
10932 return;
10934 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
10935 p:DD)) if p0 is not of floating point class and p1 is spilled as
10936 we can have no analogous movsd_load for this. */
10937 if (lra_in_progress && mode == DDmode
10938 && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0]))
10939 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
10940 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
10941 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10943 enum reg_class cl;
10944 int regno = REGNO (SUBREG_REG (operands[0]));
10946 if (regno >= FIRST_PSEUDO_REGISTER)
10948 cl = reg_preferred_class (regno);
10949 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
10951 if (regno >= 0 && ! FP_REGNO_P (regno))
10953 mode = SDmode;
10954 operands[0] = SUBREG_REG (operands[0]);
10955 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
10958 if (lra_in_progress
10959 && mode == SDmode
10960 && (REG_P (operands[0])
10961 || (GET_CODE (operands[0]) == SUBREG
10962 && REG_P (SUBREG_REG (operands[0]))))
10963 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
10964 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10966 int regno = REGNO (GET_CODE (operands[0]) == SUBREG
10967 ? SUBREG_REG (operands[0]) : operands[0]);
10968 enum reg_class cl;
10970 if (regno >= FIRST_PSEUDO_REGISTER)
10972 cl = reg_preferred_class (regno);
10973 gcc_assert (cl != NO_REGS);
10974 regno = ira_class_hard_regs[cl][0];
10976 if (FP_REGNO_P (regno))
10978 if (GET_MODE (operands[1]) != DDmode)
10979 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
10980 emit_insn (gen_movsd_load (operands[0], operands[1]));
10982 else if (INT_REGNO_P (regno))
10983 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10984 else
10985 gcc_unreachable();
10986 return;
10989 if (reload_in_progress
10990 && mode == SDmode
10991 && cfun->machine->sdmode_stack_slot != NULL_RTX
10992 && MEM_P (operands[0])
10993 && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
10994 && REG_P (operands[1]))
10996 if (FP_REGNO_P (REGNO (operands[1])))
10998 rtx mem = adjust_address_nv (operands[0], DDmode, 0);
10999 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
11000 emit_insn (gen_movsd_store (mem, operands[1]));
11002 else if (INT_REGNO_P (REGNO (operands[1])))
11004 rtx mem = operands[0];
11005 if (BYTES_BIG_ENDIAN)
11006 mem = adjust_address_nv (mem, mode, 4);
11007 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
11008 emit_insn (gen_movsd_hardfloat (mem, operands[1]));
11010 else
11011 gcc_unreachable();
11012 return;
11014 if (reload_in_progress
11015 && mode == SDmode
11016 && REG_P (operands[0])
11017 && MEM_P (operands[1])
11018 && cfun->machine->sdmode_stack_slot != NULL_RTX
11019 && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
11021 if (FP_REGNO_P (REGNO (operands[0])))
11023 rtx mem = adjust_address_nv (operands[1], DDmode, 0);
11024 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
11025 emit_insn (gen_movsd_load (operands[0], mem));
11027 else if (INT_REGNO_P (REGNO (operands[0])))
11029 rtx mem = operands[1];
11030 if (BYTES_BIG_ENDIAN)
11031 mem = adjust_address_nv (mem, mode, 4);
11032 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
11033 emit_insn (gen_movsd_hardfloat (operands[0], mem));
11035 else
11036 gcc_unreachable();
11037 return;
11040 /* FIXME: In the long term, this switch statement should go away
11041 and be replaced by a sequence of tests based on things like
11042 mode == Pmode. */
11043 switch (mode)
11045 case HImode:
11046 case QImode:
11047 if (CONSTANT_P (operands[1])
11048 && GET_CODE (operands[1]) != CONST_INT)
11049 operands[1] = force_const_mem (mode, operands[1]);
11050 break;
11052 case TFmode:
11053 case TDmode:
11054 case IFmode:
11055 case KFmode:
11056 if (FLOAT128_2REG_P (mode))
11057 rs6000_eliminate_indexed_memrefs (operands);
11058 /* fall through */
11060 case DFmode:
11061 case DDmode:
11062 case SFmode:
11063 case SDmode:
11064 if (CONSTANT_P (operands[1])
11065 && ! easy_fp_constant (operands[1], mode))
11066 operands[1] = force_const_mem (mode, operands[1]);
11067 break;
11069 case V16QImode:
11070 case V8HImode:
11071 case V4SFmode:
11072 case V4SImode:
11073 case V4HImode:
11074 case V2SFmode:
11075 case V2SImode:
11076 case V1DImode:
11077 case V2DFmode:
11078 case V2DImode:
11079 case V1TImode:
11080 if (CONSTANT_P (operands[1])
11081 && !easy_vector_constant (operands[1], mode))
11082 operands[1] = force_const_mem (mode, operands[1]);
11083 break;
11085 case SImode:
11086 case DImode:
11087 /* Use default pattern for address of ELF small data */
11088 if (TARGET_ELF
11089 && mode == Pmode
11090 && DEFAULT_ABI == ABI_V4
11091 && (GET_CODE (operands[1]) == SYMBOL_REF
11092 || GET_CODE (operands[1]) == CONST)
11093 && small_data_operand (operands[1], mode))
11095 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11096 return;
11099 if (DEFAULT_ABI == ABI_V4
11100 && mode == Pmode && mode == SImode
11101 && flag_pic == 1 && got_operand (operands[1], mode))
11103 emit_insn (gen_movsi_got (operands[0], operands[1]));
11104 return;
11107 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
11108 && TARGET_NO_TOC
11109 && ! flag_pic
11110 && mode == Pmode
11111 && CONSTANT_P (operands[1])
11112 && GET_CODE (operands[1]) != HIGH
11113 && GET_CODE (operands[1]) != CONST_INT)
11115 rtx target = (!can_create_pseudo_p ()
11116 ? operands[0]
11117 : gen_reg_rtx (mode));
11119 /* If this is a function address on -mcall-aixdesc,
11120 convert it to the address of the descriptor. */
11121 if (DEFAULT_ABI == ABI_AIX
11122 && GET_CODE (operands[1]) == SYMBOL_REF
11123 && XSTR (operands[1], 0)[0] == '.')
11125 const char *name = XSTR (operands[1], 0);
11126 rtx new_ref;
11127 while (*name == '.')
11128 name++;
11129 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
11130 CONSTANT_POOL_ADDRESS_P (new_ref)
11131 = CONSTANT_POOL_ADDRESS_P (operands[1]);
11132 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
11133 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
11134 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
11135 operands[1] = new_ref;
11138 if (DEFAULT_ABI == ABI_DARWIN)
11140 #if TARGET_MACHO
11141 if (MACHO_DYNAMIC_NO_PIC_P)
11143 /* Take care of any required data indirection. */
11144 operands[1] = rs6000_machopic_legitimize_pic_address (
11145 operands[1], mode, operands[0]);
11146 if (operands[0] != operands[1])
11147 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11148 return;
11150 #endif
11151 emit_insn (gen_macho_high (target, operands[1]));
11152 emit_insn (gen_macho_low (operands[0], target, operands[1]));
11153 return;
11156 emit_insn (gen_elf_high (target, operands[1]));
11157 emit_insn (gen_elf_low (operands[0], target, operands[1]));
11158 return;
11161 /* If this is a SYMBOL_REF that refers to a constant pool entry,
11162 and we have put it in the TOC, we just need to make a TOC-relative
11163 reference to it. */
11164 if (TARGET_TOC
11165 && GET_CODE (operands[1]) == SYMBOL_REF
11166 && use_toc_relative_ref (operands[1], mode))
11167 operands[1] = create_TOC_reference (operands[1], operands[0]);
11168 else if (mode == Pmode
11169 && CONSTANT_P (operands[1])
11170 && GET_CODE (operands[1]) != HIGH
11171 && ((GET_CODE (operands[1]) != CONST_INT
11172 && ! easy_fp_constant (operands[1], mode))
11173 || (GET_CODE (operands[1]) == CONST_INT
11174 && (num_insns_constant (operands[1], mode)
11175 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
11176 || (GET_CODE (operands[0]) == REG
11177 && FP_REGNO_P (REGNO (operands[0]))))
11178 && !toc_relative_expr_p (operands[1], false)
11179 && (TARGET_CMODEL == CMODEL_SMALL
11180 || can_create_pseudo_p ()
11181 || (REG_P (operands[0])
11182 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
11185 #if TARGET_MACHO
11186 /* Darwin uses a special PIC legitimizer. */
11187 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
11189 operands[1] =
11190 rs6000_machopic_legitimize_pic_address (operands[1], mode,
11191 operands[0]);
11192 if (operands[0] != operands[1])
11193 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11194 return;
11196 #endif
11198 /* If we are to limit the number of things we put in the TOC and
11199 this is a symbol plus a constant we can add in one insn,
11200 just put the symbol in the TOC and add the constant. Don't do
11201 this if reload is in progress. */
11202 if (GET_CODE (operands[1]) == CONST
11203 && TARGET_NO_SUM_IN_TOC && ! reload_in_progress
11204 && GET_CODE (XEXP (operands[1], 0)) == PLUS
11205 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
11206 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
11207 || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF)
11208 && ! side_effects_p (operands[0]))
11210 rtx sym =
11211 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
11212 rtx other = XEXP (XEXP (operands[1], 0), 1);
11214 sym = force_reg (mode, sym);
11215 emit_insn (gen_add3_insn (operands[0], sym, other));
11216 return;
11219 operands[1] = force_const_mem (mode, operands[1]);
11221 if (TARGET_TOC
11222 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
11223 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
11225 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
11226 operands[0]);
11227 operands[1] = gen_const_mem (mode, tocref);
11228 set_mem_alias_set (operands[1], get_TOC_alias_set ());
11231 break;
11233 case TImode:
11234 if (!VECTOR_MEM_VSX_P (TImode))
11235 rs6000_eliminate_indexed_memrefs (operands);
11236 break;
11238 case PTImode:
11239 rs6000_eliminate_indexed_memrefs (operands);
11240 break;
11242 default:
11243 fatal_insn ("bad move", gen_rtx_SET (dest, source));
11246 /* Above, we may have called force_const_mem which may have returned
11247 an invalid address. If we can, fix this up; otherwise, reload will
11248 have to deal with it. */
11249 if (GET_CODE (operands[1]) == MEM && ! reload_in_progress)
11250 operands[1] = validize_mem (operands[1]);
11252 emit_set:
11253 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11256 /* Return true if a structure, union or array containing FIELD should be
11257 accessed using `BLKMODE'.
11259 For the SPE, simd types are V2SI, and gcc can be tempted to put the
11260 entire thing in a DI and use subregs to access the internals.
11261 store_bit_field() will force (subreg:DI (reg:V2SI x))'s to the
11262 back-end. Because a single GPR can hold a V2SI, but not a DI, the
11263 best thing to do is set structs to BLKmode and avoid Severe Tire
11264 Damage.
11266 On e500 v2, DF and DI modes suffer from the same anomaly. DF can
11267 fit into 1, whereas DI still needs two. */
11269 static bool
11270 rs6000_member_type_forces_blk (const_tree field, machine_mode mode)
11272 return ((TARGET_SPE && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
11273 || (TARGET_E500_DOUBLE && mode == DFmode));
11276 /* Nonzero if we can use a floating-point register to pass this arg. */
11277 #define USE_FP_FOR_ARG_P(CUM,MODE) \
11278 (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \
11279 && (CUM)->fregno <= FP_ARG_MAX_REG \
11280 && TARGET_HARD_FLOAT && TARGET_FPRS)
11282 /* Nonzero if we can use an AltiVec register to pass this arg. */
11283 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
11284 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
11285 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
11286 && TARGET_ALTIVEC_ABI \
11287 && (NAMED))
11289 /* Walk down the type tree of TYPE counting consecutive base elements.
11290 If *MODEP is VOIDmode, then set it to the first valid floating point
11291 or vector type. If a non-floating point or vector type is found, or
11292 if a floating point or vector type that doesn't match a non-VOIDmode
11293 *MODEP is found, then return -1, otherwise return the count in the
11294 sub-tree. */
11296 static int
11297 rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
11299 machine_mode mode;
11300 HOST_WIDE_INT size;
11302 switch (TREE_CODE (type))
11304 case REAL_TYPE:
11305 mode = TYPE_MODE (type);
11306 if (!SCALAR_FLOAT_MODE_P (mode))
11307 return -1;
11309 if (*modep == VOIDmode)
11310 *modep = mode;
11312 if (*modep == mode)
11313 return 1;
11315 break;
11317 case COMPLEX_TYPE:
11318 mode = TYPE_MODE (TREE_TYPE (type));
11319 if (!SCALAR_FLOAT_MODE_P (mode))
11320 return -1;
11322 if (*modep == VOIDmode)
11323 *modep = mode;
11325 if (*modep == mode)
11326 return 2;
11328 break;
11330 case VECTOR_TYPE:
11331 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
11332 return -1;
11334 /* Use V4SImode as representative of all 128-bit vector types. */
11335 size = int_size_in_bytes (type);
11336 switch (size)
11338 case 16:
11339 mode = V4SImode;
11340 break;
11341 default:
11342 return -1;
11345 if (*modep == VOIDmode)
11346 *modep = mode;
11348 /* Vector modes are considered to be opaque: two vectors are
11349 equivalent for the purposes of being homogeneous aggregates
11350 if they are the same size. */
11351 if (*modep == mode)
11352 return 1;
11354 break;
11356 case ARRAY_TYPE:
11358 int count;
11359 tree index = TYPE_DOMAIN (type);
11361 /* Can't handle incomplete types nor sizes that are not
11362 fixed. */
11363 if (!COMPLETE_TYPE_P (type)
11364 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
11365 return -1;
11367 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
11368 if (count == -1
11369 || !index
11370 || !TYPE_MAX_VALUE (index)
11371 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
11372 || !TYPE_MIN_VALUE (index)
11373 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
11374 || count < 0)
11375 return -1;
11377 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
11378 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
11380 /* There must be no padding. */
11381 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
11382 return -1;
11384 return count;
11387 case RECORD_TYPE:
11389 int count = 0;
11390 int sub_count;
11391 tree field;
11393 /* Can't handle incomplete types nor sizes that are not
11394 fixed. */
11395 if (!COMPLETE_TYPE_P (type)
11396 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
11397 return -1;
11399 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
11401 if (TREE_CODE (field) != FIELD_DECL)
11402 continue;
11404 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
11405 if (sub_count < 0)
11406 return -1;
11407 count += sub_count;
11410 /* There must be no padding. */
11411 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
11412 return -1;
11414 return count;
11417 case UNION_TYPE:
11418 case QUAL_UNION_TYPE:
11420 /* These aren't very interesting except in a degenerate case. */
11421 int count = 0;
11422 int sub_count;
11423 tree field;
11425 /* Can't handle incomplete types nor sizes that are not
11426 fixed. */
11427 if (!COMPLETE_TYPE_P (type)
11428 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
11429 return -1;
11431 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
11433 if (TREE_CODE (field) != FIELD_DECL)
11434 continue;
11436 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
11437 if (sub_count < 0)
11438 return -1;
11439 count = count > sub_count ? count : sub_count;
11442 /* There must be no padding. */
11443 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
11444 return -1;
11446 return count;
11449 default:
11450 break;
11453 return -1;
11456 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
11457 float or vector aggregate that shall be passed in FP/vector registers
11458 according to the ELFv2 ABI, return the homogeneous element mode in
11459 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
11461 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
11463 static bool
11464 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
11465 machine_mode *elt_mode,
11466 int *n_elts)
11468 /* Note that we do not accept complex types at the top level as
11469 homogeneous aggregates; these types are handled via the
11470 targetm.calls.split_complex_arg mechanism. Complex types
11471 can be elements of homogeneous aggregates, however. */
11472 if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type))
11474 machine_mode field_mode = VOIDmode;
11475 int field_count = rs6000_aggregate_candidate (type, &field_mode);
11477 if (field_count > 0)
11479 int n_regs = (SCALAR_FLOAT_MODE_P (field_mode) ?
11480 (GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
11482 /* The ELFv2 ABI allows homogeneous aggregates to occupy
11483 up to AGGR_ARG_NUM_REG registers. */
11484 if (field_count * n_regs <= AGGR_ARG_NUM_REG)
11486 if (elt_mode)
11487 *elt_mode = field_mode;
11488 if (n_elts)
11489 *n_elts = field_count;
11490 return true;
11495 if (elt_mode)
11496 *elt_mode = mode;
11497 if (n_elts)
11498 *n_elts = 1;
11499 return false;
11502 /* Return a nonzero value to say to return the function value in
11503 memory, just as large structures are always returned. TYPE will be
11504 the data type of the value, and FNTYPE will be the type of the
11505 function doing the returning, or @code{NULL} for libcalls.
11507 The AIX ABI for the RS/6000 specifies that all structures are
11508 returned in memory. The Darwin ABI does the same.
11510 For the Darwin 64 Bit ABI, a function result can be returned in
11511 registers or in memory, depending on the size of the return data
11512 type. If it is returned in registers, the value occupies the same
11513 registers as it would if it were the first and only function
11514 argument. Otherwise, the function places its result in memory at
11515 the location pointed to by GPR3.
11517 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
11518 but a draft put them in memory, and GCC used to implement the draft
11519 instead of the final standard. Therefore, aix_struct_return
11520 controls this instead of DEFAULT_ABI; V.4 targets needing backward
11521 compatibility can change DRAFT_V4_STRUCT_RET to override the
11522 default, and -m switches get the final word. See
11523 rs6000_option_override_internal for more details.
11525 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
11526 long double support is enabled. These values are returned in memory.
11528 int_size_in_bytes returns -1 for variable size objects, which go in
11529 memory always. The cast to unsigned makes -1 > 8. */
11531 static bool
11532 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
11534 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
11535 if (TARGET_MACHO
11536 && rs6000_darwin64_abi
11537 && TREE_CODE (type) == RECORD_TYPE
11538 && int_size_in_bytes (type) > 0)
11540 CUMULATIVE_ARGS valcum;
11541 rtx valret;
11543 valcum.words = 0;
11544 valcum.fregno = FP_ARG_MIN_REG;
11545 valcum.vregno = ALTIVEC_ARG_MIN_REG;
11546 /* Do a trial code generation as if this were going to be passed
11547 as an argument; if any part goes in memory, we return NULL. */
11548 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
11549 if (valret)
11550 return false;
11551 /* Otherwise fall through to more conventional ABI rules. */
11554 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
11555 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
11556 NULL, NULL))
11557 return false;
11559 /* The ELFv2 ABI returns aggregates up to 16B in registers */
11560 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
11561 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
11562 return false;
11564 if (AGGREGATE_TYPE_P (type)
11565 && (aix_struct_return
11566 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
11567 return true;
11569 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
11570 modes only exist for GCC vector types if -maltivec. */
11571 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
11572 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
11573 return false;
11575 /* Return synthetic vectors in memory. */
11576 if (TREE_CODE (type) == VECTOR_TYPE
11577 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
11579 static bool warned_for_return_big_vectors = false;
11580 if (!warned_for_return_big_vectors)
11582 warning (OPT_Wpsabi, "GCC vector returned by reference: "
11583 "non-standard ABI extension with no compatibility guarantee");
11584 warned_for_return_big_vectors = true;
11586 return true;
11589 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
11590 && FLOAT128_IEEE_P (TYPE_MODE (type)))
11591 return true;
11593 return false;
11596 /* Specify whether values returned in registers should be at the most
11597 significant end of a register. We want aggregates returned by
11598 value to match the way aggregates are passed to functions. */
11600 static bool
11601 rs6000_return_in_msb (const_tree valtype)
11603 return (DEFAULT_ABI == ABI_ELFv2
11604 && BYTES_BIG_ENDIAN
11605 && AGGREGATE_TYPE_P (valtype)
11606 && FUNCTION_ARG_PADDING (TYPE_MODE (valtype), valtype) == upward);
11609 #ifdef HAVE_AS_GNU_ATTRIBUTE
11610 /* Return TRUE if a call to function FNDECL may be one that
11611 potentially affects the function calling ABI of the object file. */
11613 static bool
11614 call_ABI_of_interest (tree fndecl)
11616 if (rs6000_gnu_attr && symtab->state == EXPANSION)
11618 struct cgraph_node *c_node;
11620 /* Libcalls are always interesting. */
11621 if (fndecl == NULL_TREE)
11622 return true;
11624 /* Any call to an external function is interesting. */
11625 if (DECL_EXTERNAL (fndecl))
11626 return true;
11628 /* Interesting functions that we are emitting in this object file. */
11629 c_node = cgraph_node::get (fndecl);
11630 c_node = c_node->ultimate_alias_target ();
11631 return !c_node->only_called_directly_p ();
11633 return false;
11635 #endif
11637 /* Initialize a variable CUM of type CUMULATIVE_ARGS
11638 for a call to a function whose data type is FNTYPE.
11639 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
11641 For incoming args we set the number of arguments in the prototype large
11642 so we never return a PARALLEL. */
11644 void
11645 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
11646 rtx libname ATTRIBUTE_UNUSED, int incoming,
11647 int libcall, int n_named_args,
11648 tree fndecl ATTRIBUTE_UNUSED,
11649 machine_mode return_mode ATTRIBUTE_UNUSED)
11651 static CUMULATIVE_ARGS zero_cumulative;
11653 *cum = zero_cumulative;
11654 cum->words = 0;
11655 cum->fregno = FP_ARG_MIN_REG;
11656 cum->vregno = ALTIVEC_ARG_MIN_REG;
11657 cum->prototype = (fntype && prototype_p (fntype));
11658 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
11659 ? CALL_LIBCALL : CALL_NORMAL);
11660 cum->sysv_gregno = GP_ARG_MIN_REG;
11661 cum->stdarg = stdarg_p (fntype);
11662 cum->libcall = libcall;
11664 cum->nargs_prototype = 0;
11665 if (incoming || cum->prototype)
11666 cum->nargs_prototype = n_named_args;
11668 /* Check for a longcall attribute. */
11669 if ((!fntype && rs6000_default_long_calls)
11670 || (fntype
11671 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
11672 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
11673 cum->call_cookie |= CALL_LONG;
11675 if (TARGET_DEBUG_ARG)
11677 fprintf (stderr, "\ninit_cumulative_args:");
11678 if (fntype)
11680 tree ret_type = TREE_TYPE (fntype);
11681 fprintf (stderr, " ret code = %s,",
11682 get_tree_code_name (TREE_CODE (ret_type)));
11685 if (cum->call_cookie & CALL_LONG)
11686 fprintf (stderr, " longcall,");
11688 fprintf (stderr, " proto = %d, nargs = %d\n",
11689 cum->prototype, cum->nargs_prototype);
11692 #ifdef HAVE_AS_GNU_ATTRIBUTE
11693 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4))
11695 cum->escapes = call_ABI_of_interest (fndecl);
11696 if (cum->escapes)
11698 tree return_type;
11700 if (fntype)
11702 return_type = TREE_TYPE (fntype);
11703 return_mode = TYPE_MODE (return_type);
11705 else
11706 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
11708 if (return_type != NULL)
11710 if (TREE_CODE (return_type) == RECORD_TYPE
11711 && TYPE_TRANSPARENT_AGGR (return_type))
11713 return_type = TREE_TYPE (first_field (return_type));
11714 return_mode = TYPE_MODE (return_type);
11716 if (AGGREGATE_TYPE_P (return_type)
11717 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
11718 <= 8))
11719 rs6000_returns_struct = true;
11721 if (SCALAR_FLOAT_MODE_P (return_mode))
11723 rs6000_passes_float = true;
11724 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
11725 && (FLOAT128_IBM_P (return_mode)
11726 || FLOAT128_IEEE_P (return_mode)
11727 || (return_type != NULL
11728 && (TYPE_MAIN_VARIANT (return_type)
11729 == long_double_type_node))))
11730 rs6000_passes_long_double = true;
11732 if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode)
11733 || SPE_VECTOR_MODE (return_mode))
11734 rs6000_passes_vector = true;
11737 #endif
11739 if (fntype
11740 && !TARGET_ALTIVEC
11741 && TARGET_ALTIVEC_ABI
11742 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
11744 error ("cannot return value in vector register because"
11745 " altivec instructions are disabled, use -maltivec"
11746 " to enable them");
11750 /* The mode the ABI uses for a word. This is not the same as word_mode
11751 for -m32 -mpowerpc64. This is used to implement various target hooks. */
11753 static machine_mode
11754 rs6000_abi_word_mode (void)
11756 return TARGET_32BIT ? SImode : DImode;
11759 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
11760 static char *
11761 rs6000_offload_options (void)
11763 if (TARGET_64BIT)
11764 return xstrdup ("-foffload-abi=lp64");
11765 else
11766 return xstrdup ("-foffload-abi=ilp32");
11769 /* On rs6000, function arguments are promoted, as are function return
11770 values. */
11772 static machine_mode
11773 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
11774 machine_mode mode,
11775 int *punsignedp ATTRIBUTE_UNUSED,
11776 const_tree, int)
11778 PROMOTE_MODE (mode, *punsignedp, type);
11780 return mode;
11783 /* Return true if TYPE must be passed on the stack and not in registers. */
11785 static bool
11786 rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
11788 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
11789 return must_pass_in_stack_var_size (mode, type);
11790 else
11791 return must_pass_in_stack_var_size_or_pad (mode, type);
11794 static inline bool
11795 is_complex_IBM_long_double (machine_mode mode)
11797 return mode == ICmode || (!TARGET_IEEEQUAD && mode == TCmode);
11800 /* Whether ABI_V4 passes MODE args to a function in floating point
11801 registers. */
11803 static bool
11804 abi_v4_pass_in_fpr (machine_mode mode)
11806 if (!TARGET_FPRS || !TARGET_HARD_FLOAT)
11807 return false;
11808 if (TARGET_SINGLE_FLOAT && mode == SFmode)
11809 return true;
11810 if (TARGET_DOUBLE_FLOAT && mode == DFmode)
11811 return true;
11812 /* ABI_V4 passes complex IBM long double in 8 gprs.
11813 Stupid, but we can't change the ABI now. */
11814 if (is_complex_IBM_long_double (mode))
11815 return false;
11816 if (FLOAT128_2REG_P (mode))
11817 return true;
11818 if (DECIMAL_FLOAT_MODE_P (mode))
11819 return true;
11820 return false;
11823 /* If defined, a C expression which determines whether, and in which
11824 direction, to pad out an argument with extra space. The value
11825 should be of type `enum direction': either `upward' to pad above
11826 the argument, `downward' to pad below, or `none' to inhibit
11827 padding.
11829 For the AIX ABI structs are always stored left shifted in their
11830 argument slot. */
11832 enum direction
11833 function_arg_padding (machine_mode mode, const_tree type)
11835 #ifndef AGGREGATE_PADDING_FIXED
11836 #define AGGREGATE_PADDING_FIXED 0
11837 #endif
11838 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
11839 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
11840 #endif
11842 if (!AGGREGATE_PADDING_FIXED)
11844 /* GCC used to pass structures of the same size as integer types as
11845 if they were in fact integers, ignoring FUNCTION_ARG_PADDING.
11846 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
11847 passed padded downward, except that -mstrict-align further
11848 muddied the water in that multi-component structures of 2 and 4
11849 bytes in size were passed padded upward.
11851 The following arranges for best compatibility with previous
11852 versions of gcc, but removes the -mstrict-align dependency. */
11853 if (BYTES_BIG_ENDIAN)
11855 HOST_WIDE_INT size = 0;
11857 if (mode == BLKmode)
11859 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
11860 size = int_size_in_bytes (type);
11862 else
11863 size = GET_MODE_SIZE (mode);
11865 if (size == 1 || size == 2 || size == 4)
11866 return downward;
11868 return upward;
11871 if (AGGREGATES_PAD_UPWARD_ALWAYS)
11873 if (type != 0 && AGGREGATE_TYPE_P (type))
11874 return upward;
11877 /* Fall back to the default. */
11878 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
11881 /* If defined, a C expression that gives the alignment boundary, in bits,
11882 of an argument with the specified mode and type. If it is not defined,
11883 PARM_BOUNDARY is used for all arguments.
11885 V.4 wants long longs and doubles to be double word aligned. Just
11886 testing the mode size is a boneheaded way to do this as it means
11887 that other types such as complex int are also double word aligned.
11888 However, we're stuck with this because changing the ABI might break
11889 existing library interfaces.
11891 Doubleword align SPE vectors.
11892 Quadword align Altivec/VSX vectors.
11893 Quadword align large synthetic vector types. */
11895 static unsigned int
11896 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
11898 machine_mode elt_mode;
11899 int n_elts;
11901 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11903 if (DEFAULT_ABI == ABI_V4
11904 && (GET_MODE_SIZE (mode) == 8
11905 || (TARGET_HARD_FLOAT
11906 && TARGET_FPRS
11907 && !is_complex_IBM_long_double (mode)
11908 && FLOAT128_2REG_P (mode))))
11909 return 64;
11910 else if (FLOAT128_VECTOR_P (mode))
11911 return 128;
11912 else if (SPE_VECTOR_MODE (mode)
11913 || (type && TREE_CODE (type) == VECTOR_TYPE
11914 && int_size_in_bytes (type) >= 8
11915 && int_size_in_bytes (type) < 16))
11916 return 64;
11917 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
11918 || (type && TREE_CODE (type) == VECTOR_TYPE
11919 && int_size_in_bytes (type) >= 16))
11920 return 128;
11922 /* Aggregate types that need > 8 byte alignment are quadword-aligned
11923 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
11924 -mcompat-align-parm is used. */
11925 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
11926 || DEFAULT_ABI == ABI_ELFv2)
11927 && type && TYPE_ALIGN (type) > 64)
11929 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
11930 or homogeneous float/vector aggregates here. We already handled
11931 vector aggregates above, but still need to check for float here. */
11932 bool aggregate_p = (AGGREGATE_TYPE_P (type)
11933 && !SCALAR_FLOAT_MODE_P (elt_mode));
11935 /* We used to check for BLKmode instead of the above aggregate type
11936 check. Warn when this results in any difference to the ABI. */
11937 if (aggregate_p != (mode == BLKmode))
11939 static bool warned;
11940 if (!warned && warn_psabi)
11942 warned = true;
11943 inform (input_location,
11944 "the ABI of passing aggregates with %d-byte alignment"
11945 " has changed in GCC 5",
11946 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
11950 if (aggregate_p)
11951 return 128;
11954 /* Similar for the Darwin64 ABI. Note that for historical reasons we
11955 implement the "aggregate type" check as a BLKmode check here; this
11956 means certain aggregate types are in fact not aligned. */
11957 if (TARGET_MACHO && rs6000_darwin64_abi
11958 && mode == BLKmode
11959 && type && TYPE_ALIGN (type) > 64)
11960 return 128;
11962 return PARM_BOUNDARY;
11965 /* The offset in words to the start of the parameter save area. */
11967 static unsigned int
11968 rs6000_parm_offset (void)
11970 return (DEFAULT_ABI == ABI_V4 ? 2
11971 : DEFAULT_ABI == ABI_ELFv2 ? 4
11972 : 6);
11975 /* For a function parm of MODE and TYPE, return the starting word in
11976 the parameter area. NWORDS of the parameter area are already used. */
11978 static unsigned int
11979 rs6000_parm_start (machine_mode mode, const_tree type,
11980 unsigned int nwords)
11982 unsigned int align;
11984 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
11985 return nwords + (-(rs6000_parm_offset () + nwords) & align);
11988 /* Compute the size (in words) of a function argument. */
11990 static unsigned long
11991 rs6000_arg_size (machine_mode mode, const_tree type)
11993 unsigned long size;
11995 if (mode != BLKmode)
11996 size = GET_MODE_SIZE (mode);
11997 else
11998 size = int_size_in_bytes (type);
12000 if (TARGET_32BIT)
12001 return (size + 3) >> 2;
12002 else
12003 return (size + 7) >> 3;
12006 /* Use this to flush pending int fields. */
12008 static void
12009 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
12010 HOST_WIDE_INT bitpos, int final)
12012 unsigned int startbit, endbit;
12013 int intregs, intoffset;
12014 machine_mode mode;
12016 /* Handle the situations where a float is taking up the first half
12017 of the GPR, and the other half is empty (typically due to
12018 alignment restrictions). We can detect this by a 8-byte-aligned
12019 int field, or by seeing that this is the final flush for this
12020 argument. Count the word and continue on. */
12021 if (cum->floats_in_gpr == 1
12022 && (cum->intoffset % 64 == 0
12023 || (cum->intoffset == -1 && final)))
12025 cum->words++;
12026 cum->floats_in_gpr = 0;
12029 if (cum->intoffset == -1)
12030 return;
12032 intoffset = cum->intoffset;
12033 cum->intoffset = -1;
12034 cum->floats_in_gpr = 0;
12036 if (intoffset % BITS_PER_WORD != 0)
12038 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
12039 MODE_INT, 0);
12040 if (mode == BLKmode)
12042 /* We couldn't find an appropriate mode, which happens,
12043 e.g., in packed structs when there are 3 bytes to load.
12044 Back intoffset back to the beginning of the word in this
12045 case. */
12046 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
12050 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
12051 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
12052 intregs = (endbit - startbit) / BITS_PER_WORD;
12053 cum->words += intregs;
12054 /* words should be unsigned. */
12055 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
12057 int pad = (endbit/BITS_PER_WORD) - cum->words;
12058 cum->words += pad;
12062 /* The darwin64 ABI calls for us to recurse down through structs,
12063 looking for elements passed in registers. Unfortunately, we have
12064 to track int register count here also because of misalignments
12065 in powerpc alignment mode. */
12067 static void
12068 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
12069 const_tree type,
12070 HOST_WIDE_INT startbitpos)
12072 tree f;
12074 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
12075 if (TREE_CODE (f) == FIELD_DECL)
12077 HOST_WIDE_INT bitpos = startbitpos;
12078 tree ftype = TREE_TYPE (f);
12079 machine_mode mode;
12080 if (ftype == error_mark_node)
12081 continue;
12082 mode = TYPE_MODE (ftype);
12084 if (DECL_SIZE (f) != 0
12085 && tree_fits_uhwi_p (bit_position (f)))
12086 bitpos += int_bit_position (f);
12088 /* ??? FIXME: else assume zero offset. */
12090 if (TREE_CODE (ftype) == RECORD_TYPE)
12091 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
12092 else if (USE_FP_FOR_ARG_P (cum, mode))
12094 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
12095 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
12096 cum->fregno += n_fpregs;
12097 /* Single-precision floats present a special problem for
12098 us, because they are smaller than an 8-byte GPR, and so
12099 the structure-packing rules combined with the standard
12100 varargs behavior mean that we want to pack float/float
12101 and float/int combinations into a single register's
12102 space. This is complicated by the arg advance flushing,
12103 which works on arbitrarily large groups of int-type
12104 fields. */
12105 if (mode == SFmode)
12107 if (cum->floats_in_gpr == 1)
12109 /* Two floats in a word; count the word and reset
12110 the float count. */
12111 cum->words++;
12112 cum->floats_in_gpr = 0;
12114 else if (bitpos % 64 == 0)
12116 /* A float at the beginning of an 8-byte word;
12117 count it and put off adjusting cum->words until
12118 we see if a arg advance flush is going to do it
12119 for us. */
12120 cum->floats_in_gpr++;
12122 else
12124 /* The float is at the end of a word, preceded
12125 by integer fields, so the arg advance flush
12126 just above has already set cum->words and
12127 everything is taken care of. */
12130 else
12131 cum->words += n_fpregs;
12133 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
12135 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
12136 cum->vregno++;
12137 cum->words += 2;
12139 else if (cum->intoffset == -1)
12140 cum->intoffset = bitpos;
12144 /* Check for an item that needs to be considered specially under the darwin 64
12145 bit ABI. These are record types where the mode is BLK or the structure is
12146 8 bytes in size. */
12147 static int
12148 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
12150 return rs6000_darwin64_abi
12151 && ((mode == BLKmode
12152 && TREE_CODE (type) == RECORD_TYPE
12153 && int_size_in_bytes (type) > 0)
12154 || (type && TREE_CODE (type) == RECORD_TYPE
12155 && int_size_in_bytes (type) == 8)) ? 1 : 0;
12158 /* Update the data in CUM to advance over an argument
12159 of mode MODE and data type TYPE.
12160 (TYPE is null for libcalls where that information may not be available.)
12162 Note that for args passed by reference, function_arg will be called
12163 with MODE and TYPE set to that of the pointer to the arg, not the arg
12164 itself. */
12166 static void
12167 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
12168 const_tree type, bool named, int depth)
12170 machine_mode elt_mode;
12171 int n_elts;
12173 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
12175 /* Only tick off an argument if we're not recursing. */
12176 if (depth == 0)
12177 cum->nargs_prototype--;
12179 #ifdef HAVE_AS_GNU_ATTRIBUTE
12180 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4)
12181 && cum->escapes)
12183 if (SCALAR_FLOAT_MODE_P (mode))
12185 rs6000_passes_float = true;
12186 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
12187 && (FLOAT128_IBM_P (mode)
12188 || FLOAT128_IEEE_P (mode)
12189 || (type != NULL
12190 && TYPE_MAIN_VARIANT (type) == long_double_type_node)))
12191 rs6000_passes_long_double = true;
12193 if ((named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
12194 || (SPE_VECTOR_MODE (mode)
12195 && !cum->stdarg
12196 && cum->sysv_gregno <= GP_ARG_MAX_REG))
12197 rs6000_passes_vector = true;
12199 #endif
12201 if (TARGET_ALTIVEC_ABI
12202 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
12203 || (type && TREE_CODE (type) == VECTOR_TYPE
12204 && int_size_in_bytes (type) == 16)))
12206 bool stack = false;
12208 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
12210 cum->vregno += n_elts;
12212 if (!TARGET_ALTIVEC)
12213 error ("cannot pass argument in vector register because"
12214 " altivec instructions are disabled, use -maltivec"
12215 " to enable them");
12217 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
12218 even if it is going to be passed in a vector register.
12219 Darwin does the same for variable-argument functions. */
12220 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
12221 && TARGET_64BIT)
12222 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
12223 stack = true;
12225 else
12226 stack = true;
12228 if (stack)
12230 int align;
12232 /* Vector parameters must be 16-byte aligned. In 32-bit
12233 mode this means we need to take into account the offset
12234 to the parameter save area. In 64-bit mode, they just
12235 have to start on an even word, since the parameter save
12236 area is 16-byte aligned. */
12237 if (TARGET_32BIT)
12238 align = -(rs6000_parm_offset () + cum->words) & 3;
12239 else
12240 align = cum->words & 1;
12241 cum->words += align + rs6000_arg_size (mode, type);
12243 if (TARGET_DEBUG_ARG)
12245 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
12246 cum->words, align);
12247 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
12248 cum->nargs_prototype, cum->prototype,
12249 GET_MODE_NAME (mode));
12253 else if (TARGET_SPE_ABI && TARGET_SPE && SPE_VECTOR_MODE (mode)
12254 && !cum->stdarg
12255 && cum->sysv_gregno <= GP_ARG_MAX_REG)
12256 cum->sysv_gregno++;
12258 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
12260 int size = int_size_in_bytes (type);
12261 /* Variable sized types have size == -1 and are
12262 treated as if consisting entirely of ints.
12263 Pad to 16 byte boundary if needed. */
12264 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
12265 && (cum->words % 2) != 0)
12266 cum->words++;
12267 /* For varargs, we can just go up by the size of the struct. */
12268 if (!named)
12269 cum->words += (size + 7) / 8;
12270 else
12272 /* It is tempting to say int register count just goes up by
12273 sizeof(type)/8, but this is wrong in a case such as
12274 { int; double; int; } [powerpc alignment]. We have to
12275 grovel through the fields for these too. */
12276 cum->intoffset = 0;
12277 cum->floats_in_gpr = 0;
12278 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
12279 rs6000_darwin64_record_arg_advance_flush (cum,
12280 size * BITS_PER_UNIT, 1);
12282 if (TARGET_DEBUG_ARG)
12284 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
12285 cum->words, TYPE_ALIGN (type), size);
12286 fprintf (stderr,
12287 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
12288 cum->nargs_prototype, cum->prototype,
12289 GET_MODE_NAME (mode));
12292 else if (DEFAULT_ABI == ABI_V4)
12294 if (abi_v4_pass_in_fpr (mode))
12296 /* _Decimal128 must use an even/odd register pair. This assumes
12297 that the register number is odd when fregno is odd. */
12298 if (mode == TDmode && (cum->fregno % 2) == 1)
12299 cum->fregno++;
12301 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
12302 <= FP_ARG_V4_MAX_REG)
12303 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
12304 else
12306 cum->fregno = FP_ARG_V4_MAX_REG + 1;
12307 if (mode == DFmode || FLOAT128_IBM_P (mode)
12308 || mode == DDmode || mode == TDmode)
12309 cum->words += cum->words & 1;
12310 cum->words += rs6000_arg_size (mode, type);
12313 else
12315 int n_words = rs6000_arg_size (mode, type);
12316 int gregno = cum->sysv_gregno;
12318 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
12319 (r7,r8) or (r9,r10). As does any other 2 word item such
12320 as complex int due to a historical mistake. */
12321 if (n_words == 2)
12322 gregno += (1 - gregno) & 1;
12324 /* Multi-reg args are not split between registers and stack. */
12325 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
12327 /* Long long and SPE vectors are aligned on the stack.
12328 So are other 2 word items such as complex int due to
12329 a historical mistake. */
12330 if (n_words == 2)
12331 cum->words += cum->words & 1;
12332 cum->words += n_words;
12335 /* Note: continuing to accumulate gregno past when we've started
12336 spilling to the stack indicates the fact that we've started
12337 spilling to the stack to expand_builtin_saveregs. */
12338 cum->sysv_gregno = gregno + n_words;
12341 if (TARGET_DEBUG_ARG)
12343 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
12344 cum->words, cum->fregno);
12345 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
12346 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
12347 fprintf (stderr, "mode = %4s, named = %d\n",
12348 GET_MODE_NAME (mode), named);
12351 else
12353 int n_words = rs6000_arg_size (mode, type);
12354 int start_words = cum->words;
12355 int align_words = rs6000_parm_start (mode, type, start_words);
12357 cum->words = align_words + n_words;
12359 if (SCALAR_FLOAT_MODE_P (elt_mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
12361 /* _Decimal128 must be passed in an even/odd float register pair.
12362 This assumes that the register number is odd when fregno is
12363 odd. */
12364 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
12365 cum->fregno++;
12366 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
12369 if (TARGET_DEBUG_ARG)
12371 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
12372 cum->words, cum->fregno);
12373 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
12374 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
12375 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
12376 named, align_words - start_words, depth);
12381 static void
12382 rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
12383 const_tree type, bool named)
12385 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
12389 static rtx
12390 spe_build_register_parallel (machine_mode mode, int gregno)
12392 rtx r1, r3, r5, r7;
12394 switch (mode)
12396 case DFmode:
12397 r1 = gen_rtx_REG (DImode, gregno);
12398 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
12399 return gen_rtx_PARALLEL (mode, gen_rtvec (1, r1));
12401 case DCmode:
12402 case TFmode:
12403 r1 = gen_rtx_REG (DImode, gregno);
12404 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
12405 r3 = gen_rtx_REG (DImode, gregno + 2);
12406 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
12407 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r3));
12409 case TCmode:
12410 r1 = gen_rtx_REG (DImode, gregno);
12411 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
12412 r3 = gen_rtx_REG (DImode, gregno + 2);
12413 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
12414 r5 = gen_rtx_REG (DImode, gregno + 4);
12415 r5 = gen_rtx_EXPR_LIST (VOIDmode, r5, GEN_INT (16));
12416 r7 = gen_rtx_REG (DImode, gregno + 6);
12417 r7 = gen_rtx_EXPR_LIST (VOIDmode, r7, GEN_INT (24));
12418 return gen_rtx_PARALLEL (mode, gen_rtvec (4, r1, r3, r5, r7));
12420 default:
12421 gcc_unreachable ();
12425 /* Determine where to put a SIMD argument on the SPE. */
12426 static rtx
12427 rs6000_spe_function_arg (const CUMULATIVE_ARGS *cum, machine_mode mode,
12428 const_tree type)
12430 int gregno = cum->sysv_gregno;
12432 /* On E500 v2, double arithmetic is done on the full 64-bit GPR, but
12433 are passed and returned in a pair of GPRs for ABI compatibility. */
12434 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
12435 || mode == DCmode || mode == TCmode))
12437 int n_words = rs6000_arg_size (mode, type);
12439 /* Doubles go in an odd/even register pair (r5/r6, etc). */
12440 if (mode == DFmode)
12441 gregno += (1 - gregno) & 1;
12443 /* Multi-reg args are not split between registers and stack. */
12444 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
12445 return NULL_RTX;
12447 return spe_build_register_parallel (mode, gregno);
12449 if (cum->stdarg)
12451 int n_words = rs6000_arg_size (mode, type);
12453 /* SPE vectors are put in odd registers. */
12454 if (n_words == 2 && (gregno & 1) == 0)
12455 gregno += 1;
12457 if (gregno + n_words - 1 <= GP_ARG_MAX_REG)
12459 rtx r1, r2;
12460 machine_mode m = SImode;
12462 r1 = gen_rtx_REG (m, gregno);
12463 r1 = gen_rtx_EXPR_LIST (m, r1, const0_rtx);
12464 r2 = gen_rtx_REG (m, gregno + 1);
12465 r2 = gen_rtx_EXPR_LIST (m, r2, GEN_INT (4));
12466 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
12468 else
12469 return NULL_RTX;
12471 else
12473 if (gregno <= GP_ARG_MAX_REG)
12474 return gen_rtx_REG (mode, gregno);
12475 else
12476 return NULL_RTX;
12480 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
12481 structure between cum->intoffset and bitpos to integer registers. */
12483 static void
12484 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
12485 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
12487 machine_mode mode;
12488 unsigned int regno;
12489 unsigned int startbit, endbit;
12490 int this_regno, intregs, intoffset;
12491 rtx reg;
12493 if (cum->intoffset == -1)
12494 return;
12496 intoffset = cum->intoffset;
12497 cum->intoffset = -1;
12499 /* If this is the trailing part of a word, try to only load that
12500 much into the register. Otherwise load the whole register. Note
12501 that in the latter case we may pick up unwanted bits. It's not a
12502 problem at the moment but may wish to revisit. */
12504 if (intoffset % BITS_PER_WORD != 0)
12506 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
12507 MODE_INT, 0);
12508 if (mode == BLKmode)
12510 /* We couldn't find an appropriate mode, which happens,
12511 e.g., in packed structs when there are 3 bytes to load.
12512 Back intoffset back to the beginning of the word in this
12513 case. */
12514 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
12515 mode = word_mode;
12518 else
12519 mode = word_mode;
12521 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
12522 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
12523 intregs = (endbit - startbit) / BITS_PER_WORD;
12524 this_regno = cum->words + intoffset / BITS_PER_WORD;
12526 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
12527 cum->use_stack = 1;
12529 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
12530 if (intregs <= 0)
12531 return;
12533 intoffset /= BITS_PER_UNIT;
12536 regno = GP_ARG_MIN_REG + this_regno;
12537 reg = gen_rtx_REG (mode, regno);
12538 rvec[(*k)++] =
12539 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
12541 this_regno += 1;
12542 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
12543 mode = word_mode;
12544 intregs -= 1;
12546 while (intregs > 0);
12549 /* Recursive workhorse for the following. */
12551 static void
12552 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
12553 HOST_WIDE_INT startbitpos, rtx rvec[],
12554 int *k)
12556 tree f;
12558 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
12559 if (TREE_CODE (f) == FIELD_DECL)
12561 HOST_WIDE_INT bitpos = startbitpos;
12562 tree ftype = TREE_TYPE (f);
12563 machine_mode mode;
12564 if (ftype == error_mark_node)
12565 continue;
12566 mode = TYPE_MODE (ftype);
12568 if (DECL_SIZE (f) != 0
12569 && tree_fits_uhwi_p (bit_position (f)))
12570 bitpos += int_bit_position (f);
12572 /* ??? FIXME: else assume zero offset. */
12574 if (TREE_CODE (ftype) == RECORD_TYPE)
12575 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
12576 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
12578 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
12579 #if 0
12580 switch (mode)
12582 case SCmode: mode = SFmode; break;
12583 case DCmode: mode = DFmode; break;
12584 case TCmode: mode = TFmode; break;
12585 default: break;
12587 #endif
12588 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
12589 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
12591 gcc_assert (cum->fregno == FP_ARG_MAX_REG
12592 && (mode == TFmode || mode == TDmode));
12593 /* Long double or _Decimal128 split over regs and memory. */
12594 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
12595 cum->use_stack=1;
12597 rvec[(*k)++]
12598 = gen_rtx_EXPR_LIST (VOIDmode,
12599 gen_rtx_REG (mode, cum->fregno++),
12600 GEN_INT (bitpos / BITS_PER_UNIT));
12601 if (FLOAT128_2REG_P (mode))
12602 cum->fregno++;
12604 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
12606 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
12607 rvec[(*k)++]
12608 = gen_rtx_EXPR_LIST (VOIDmode,
12609 gen_rtx_REG (mode, cum->vregno++),
12610 GEN_INT (bitpos / BITS_PER_UNIT));
12612 else if (cum->intoffset == -1)
12613 cum->intoffset = bitpos;
12617 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
12618 the register(s) to be used for each field and subfield of a struct
12619 being passed by value, along with the offset of where the
12620 register's value may be found in the block. FP fields go in FP
12621 register, vector fields go in vector registers, and everything
12622 else goes in int registers, packed as in memory.
12624 This code is also used for function return values. RETVAL indicates
12625 whether this is the case.
12627 Much of this is taken from the SPARC V9 port, which has a similar
12628 calling convention. */
12630 static rtx
12631 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
12632 bool named, bool retval)
12634 rtx rvec[FIRST_PSEUDO_REGISTER];
12635 int k = 1, kbase = 1;
12636 HOST_WIDE_INT typesize = int_size_in_bytes (type);
12637 /* This is a copy; modifications are not visible to our caller. */
12638 CUMULATIVE_ARGS copy_cum = *orig_cum;
12639 CUMULATIVE_ARGS *cum = &copy_cum;
12641 /* Pad to 16 byte boundary if needed. */
12642 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
12643 && (cum->words % 2) != 0)
12644 cum->words++;
12646 cum->intoffset = 0;
12647 cum->use_stack = 0;
12648 cum->named = named;
12650 /* Put entries into rvec[] for individual FP and vector fields, and
12651 for the chunks of memory that go in int regs. Note we start at
12652 element 1; 0 is reserved for an indication of using memory, and
12653 may or may not be filled in below. */
12654 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
12655 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
12657 /* If any part of the struct went on the stack put all of it there.
12658 This hack is because the generic code for
12659 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
12660 parts of the struct are not at the beginning. */
12661 if (cum->use_stack)
12663 if (retval)
12664 return NULL_RTX; /* doesn't go in registers at all */
12665 kbase = 0;
12666 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12668 if (k > 1 || cum->use_stack)
12669 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
12670 else
12671 return NULL_RTX;
12674 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
12676 static rtx
12677 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
12678 int align_words)
12680 int n_units;
12681 int i, k;
12682 rtx rvec[GP_ARG_NUM_REG + 1];
12684 if (align_words >= GP_ARG_NUM_REG)
12685 return NULL_RTX;
12687 n_units = rs6000_arg_size (mode, type);
12689 /* Optimize the simple case where the arg fits in one gpr, except in
12690 the case of BLKmode due to assign_parms assuming that registers are
12691 BITS_PER_WORD wide. */
12692 if (n_units == 0
12693 || (n_units == 1 && mode != BLKmode))
12694 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12696 k = 0;
12697 if (align_words + n_units > GP_ARG_NUM_REG)
12698 /* Not all of the arg fits in gprs. Say that it goes in memory too,
12699 using a magic NULL_RTX component.
12700 This is not strictly correct. Only some of the arg belongs in
12701 memory, not all of it. However, the normal scheme using
12702 function_arg_partial_nregs can result in unusual subregs, eg.
12703 (subreg:SI (reg:DF) 4), which are not handled well. The code to
12704 store the whole arg to memory is often more efficient than code
12705 to store pieces, and we know that space is available in the right
12706 place for the whole arg. */
12707 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12709 i = 0;
12712 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
12713 rtx off = GEN_INT (i++ * 4);
12714 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12716 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
12718 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
12721 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
12722 but must also be copied into the parameter save area starting at
12723 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
12724 to the GPRs and/or memory. Return the number of elements used. */
12726 static int
12727 rs6000_psave_function_arg (machine_mode mode, const_tree type,
12728 int align_words, rtx *rvec)
12730 int k = 0;
12732 if (align_words < GP_ARG_NUM_REG)
12734 int n_words = rs6000_arg_size (mode, type);
12736 if (align_words + n_words > GP_ARG_NUM_REG
12737 || mode == BLKmode
12738 || (TARGET_32BIT && TARGET_POWERPC64))
12740 /* If this is partially on the stack, then we only
12741 include the portion actually in registers here. */
12742 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
12743 int i = 0;
12745 if (align_words + n_words > GP_ARG_NUM_REG)
12747 /* Not all of the arg fits in gprs. Say that it goes in memory
12748 too, using a magic NULL_RTX component. Also see comment in
12749 rs6000_mixed_function_arg for why the normal
12750 function_arg_partial_nregs scheme doesn't work in this case. */
12751 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12756 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
12757 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
12758 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12760 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
12762 else
12764 /* The whole arg fits in gprs. */
12765 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12766 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
12769 else
12771 /* It's entirely in memory. */
12772 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12775 return k;
12778 /* RVEC is a vector of K components of an argument of mode MODE.
12779 Construct the final function_arg return value from it. */
12781 static rtx
12782 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
12784 gcc_assert (k >= 1);
12786 /* Avoid returning a PARALLEL in the trivial cases. */
12787 if (k == 1)
12789 if (XEXP (rvec[0], 0) == NULL_RTX)
12790 return NULL_RTX;
12792 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
12793 return XEXP (rvec[0], 0);
12796 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
12799 /* Determine where to put an argument to a function.
12800 Value is zero to push the argument on the stack,
12801 or a hard register in which to store the argument.
12803 MODE is the argument's machine mode.
12804 TYPE is the data type of the argument (as a tree).
12805 This is null for libcalls where that information may
12806 not be available.
12807 CUM is a variable of type CUMULATIVE_ARGS which gives info about
12808 the preceding args and about the function being called. It is
12809 not modified in this routine.
12810 NAMED is nonzero if this argument is a named parameter
12811 (otherwise it is an extra parameter matching an ellipsis).
12813 On RS/6000 the first eight words of non-FP are normally in registers
12814 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
12815 Under V.4, the first 8 FP args are in registers.
12817 If this is floating-point and no prototype is specified, we use
12818 both an FP and integer register (or possibly FP reg and stack). Library
12819 functions (when CALL_LIBCALL is set) always have the proper types for args,
12820 so we can pass the FP value just in one register. emit_library_function
12821 doesn't support PARALLEL anyway.
12823 Note that for args passed by reference, function_arg will be called
12824 with MODE and TYPE set to that of the pointer to the arg, not the arg
12825 itself. */
12827 static rtx
12828 rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
12829 const_tree type, bool named)
12831 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12832 enum rs6000_abi abi = DEFAULT_ABI;
12833 machine_mode elt_mode;
12834 int n_elts;
12836 /* Return a marker to indicate whether CR1 needs to set or clear the
12837 bit that V.4 uses to say fp args were passed in registers.
12838 Assume that we don't need the marker for software floating point,
12839 or compiler generated library calls. */
12840 if (mode == VOIDmode)
12842 if (abi == ABI_V4
12843 && (cum->call_cookie & CALL_LIBCALL) == 0
12844 && (cum->stdarg
12845 || (cum->nargs_prototype < 0
12846 && (cum->prototype || TARGET_NO_PROTOTYPE))))
12848 /* For the SPE, we need to crxor CR6 always. */
12849 if (TARGET_SPE_ABI)
12850 return GEN_INT (cum->call_cookie | CALL_V4_SET_FP_ARGS);
12851 else if (TARGET_HARD_FLOAT && TARGET_FPRS)
12852 return GEN_INT (cum->call_cookie
12853 | ((cum->fregno == FP_ARG_MIN_REG)
12854 ? CALL_V4_SET_FP_ARGS
12855 : CALL_V4_CLEAR_FP_ARGS));
12858 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
12861 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
12863 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
12865 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
12866 if (rslt != NULL_RTX)
12867 return rslt;
12868 /* Else fall through to usual handling. */
12871 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
12873 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
12874 rtx r, off;
12875 int i, k = 0;
12877 /* Do we also need to pass this argument in the parameter save area?
12878 Library support functions for IEEE 128-bit are assumed to not need the
12879 value passed both in GPRs and in vector registers. */
12880 if (TARGET_64BIT && !cum->prototype
12881 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
12883 int align_words = ROUND_UP (cum->words, 2);
12884 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
12887 /* Describe where this argument goes in the vector registers. */
12888 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
12890 r = gen_rtx_REG (elt_mode, cum->vregno + i);
12891 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
12892 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12895 return rs6000_finish_function_arg (mode, rvec, k);
12897 else if (TARGET_ALTIVEC_ABI
12898 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
12899 || (type && TREE_CODE (type) == VECTOR_TYPE
12900 && int_size_in_bytes (type) == 16)))
12902 if (named || abi == ABI_V4)
12903 return NULL_RTX;
12904 else
12906 /* Vector parameters to varargs functions under AIX or Darwin
12907 get passed in memory and possibly also in GPRs. */
12908 int align, align_words, n_words;
12909 machine_mode part_mode;
12911 /* Vector parameters must be 16-byte aligned. In 32-bit
12912 mode this means we need to take into account the offset
12913 to the parameter save area. In 64-bit mode, they just
12914 have to start on an even word, since the parameter save
12915 area is 16-byte aligned. */
12916 if (TARGET_32BIT)
12917 align = -(rs6000_parm_offset () + cum->words) & 3;
12918 else
12919 align = cum->words & 1;
12920 align_words = cum->words + align;
12922 /* Out of registers? Memory, then. */
12923 if (align_words >= GP_ARG_NUM_REG)
12924 return NULL_RTX;
12926 if (TARGET_32BIT && TARGET_POWERPC64)
12927 return rs6000_mixed_function_arg (mode, type, align_words);
12929 /* The vector value goes in GPRs. Only the part of the
12930 value in GPRs is reported here. */
12931 part_mode = mode;
12932 n_words = rs6000_arg_size (mode, type);
12933 if (align_words + n_words > GP_ARG_NUM_REG)
12934 /* Fortunately, there are only two possibilities, the value
12935 is either wholly in GPRs or half in GPRs and half not. */
12936 part_mode = DImode;
12938 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
12941 else if (TARGET_SPE_ABI && TARGET_SPE
12942 && (SPE_VECTOR_MODE (mode)
12943 || (TARGET_E500_DOUBLE && (mode == DFmode
12944 || mode == DCmode
12945 || mode == TFmode
12946 || mode == TCmode))))
12947 return rs6000_spe_function_arg (cum, mode, type);
12949 else if (abi == ABI_V4)
12951 if (abi_v4_pass_in_fpr (mode))
12953 /* _Decimal128 must use an even/odd register pair. This assumes
12954 that the register number is odd when fregno is odd. */
12955 if (mode == TDmode && (cum->fregno % 2) == 1)
12956 cum->fregno++;
12958 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
12959 <= FP_ARG_V4_MAX_REG)
12960 return gen_rtx_REG (mode, cum->fregno);
12961 else
12962 return NULL_RTX;
12964 else
12966 int n_words = rs6000_arg_size (mode, type);
12967 int gregno = cum->sysv_gregno;
12969 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
12970 (r7,r8) or (r9,r10). As does any other 2 word item such
12971 as complex int due to a historical mistake. */
12972 if (n_words == 2)
12973 gregno += (1 - gregno) & 1;
12975 /* Multi-reg args are not split between registers and stack. */
12976 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
12977 return NULL_RTX;
12979 if (TARGET_32BIT && TARGET_POWERPC64)
12980 return rs6000_mixed_function_arg (mode, type,
12981 gregno - GP_ARG_MIN_REG);
12982 return gen_rtx_REG (mode, gregno);
12985 else
12987 int align_words = rs6000_parm_start (mode, type, cum->words);
12989 /* _Decimal128 must be passed in an even/odd float register pair.
12990 This assumes that the register number is odd when fregno is odd. */
12991 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
12992 cum->fregno++;
12994 if (USE_FP_FOR_ARG_P (cum, elt_mode))
12996 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
12997 rtx r, off;
12998 int i, k = 0;
12999 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
13000 int fpr_words;
13002 /* Do we also need to pass this argument in the parameter
13003 save area? */
13004 if (type && (cum->nargs_prototype <= 0
13005 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
13006 && TARGET_XL_COMPAT
13007 && align_words >= GP_ARG_NUM_REG)))
13008 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
13010 /* Describe where this argument goes in the fprs. */
13011 for (i = 0; i < n_elts
13012 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
13014 /* Check if the argument is split over registers and memory.
13015 This can only ever happen for long double or _Decimal128;
13016 complex types are handled via split_complex_arg. */
13017 machine_mode fmode = elt_mode;
13018 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
13020 gcc_assert (FLOAT128_2REG_P (fmode));
13021 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
13024 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
13025 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
13026 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
13029 /* If there were not enough FPRs to hold the argument, the rest
13030 usually goes into memory. However, if the current position
13031 is still within the register parameter area, a portion may
13032 actually have to go into GPRs.
13034 Note that it may happen that the portion of the argument
13035 passed in the first "half" of the first GPR was already
13036 passed in the last FPR as well.
13038 For unnamed arguments, we already set up GPRs to cover the
13039 whole argument in rs6000_psave_function_arg, so there is
13040 nothing further to do at this point. */
13041 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
13042 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
13043 && cum->nargs_prototype > 0)
13045 static bool warned;
13047 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
13048 int n_words = rs6000_arg_size (mode, type);
13050 align_words += fpr_words;
13051 n_words -= fpr_words;
13055 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
13056 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
13057 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
13059 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
13061 if (!warned && warn_psabi)
13063 warned = true;
13064 inform (input_location,
13065 "the ABI of passing homogeneous float aggregates"
13066 " has changed in GCC 5");
13070 return rs6000_finish_function_arg (mode, rvec, k);
13072 else if (align_words < GP_ARG_NUM_REG)
13074 if (TARGET_32BIT && TARGET_POWERPC64)
13075 return rs6000_mixed_function_arg (mode, type, align_words);
13077 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
13079 else
13080 return NULL_RTX;
13084 /* For an arg passed partly in registers and partly in memory, this is
13085 the number of bytes passed in registers. For args passed entirely in
13086 registers or entirely in memory, zero. When an arg is described by a
13087 PARALLEL, perhaps using more than one register type, this function
13088 returns the number of bytes used by the first element of the PARALLEL. */
13090 static int
13091 rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
13092 tree type, bool named)
13094 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
13095 bool passed_in_gprs = true;
13096 int ret = 0;
13097 int align_words;
13098 machine_mode elt_mode;
13099 int n_elts;
13101 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
13103 if (DEFAULT_ABI == ABI_V4)
13104 return 0;
13106 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
13108 /* If we are passing this arg in the fixed parameter save area (gprs or
13109 memory) as well as VRs, we do not use the partial bytes mechanism;
13110 instead, rs6000_function_arg will return a PARALLEL including a memory
13111 element as necessary. Library support functions for IEEE 128-bit are
13112 assumed to not need the value passed both in GPRs and in vector
13113 registers. */
13114 if (TARGET_64BIT && !cum->prototype
13115 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
13116 return 0;
13118 /* Otherwise, we pass in VRs only. Check for partial copies. */
13119 passed_in_gprs = false;
13120 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
13121 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
13124 /* In this complicated case we just disable the partial_nregs code. */
13125 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
13126 return 0;
13128 align_words = rs6000_parm_start (mode, type, cum->words);
13130 if (USE_FP_FOR_ARG_P (cum, elt_mode))
13132 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
13134 /* If we are passing this arg in the fixed parameter save area
13135 (gprs or memory) as well as FPRs, we do not use the partial
13136 bytes mechanism; instead, rs6000_function_arg will return a
13137 PARALLEL including a memory element as necessary. */
13138 if (type
13139 && (cum->nargs_prototype <= 0
13140 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
13141 && TARGET_XL_COMPAT
13142 && align_words >= GP_ARG_NUM_REG)))
13143 return 0;
13145 /* Otherwise, we pass in FPRs only. Check for partial copies. */
13146 passed_in_gprs = false;
13147 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
13149 /* Compute number of bytes / words passed in FPRs. If there
13150 is still space available in the register parameter area
13151 *after* that amount, a part of the argument will be passed
13152 in GPRs. In that case, the total amount passed in any
13153 registers is equal to the amount that would have been passed
13154 in GPRs if everything were passed there, so we fall back to
13155 the GPR code below to compute the appropriate value. */
13156 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
13157 * MIN (8, GET_MODE_SIZE (elt_mode)));
13158 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
13160 if (align_words + fpr_words < GP_ARG_NUM_REG)
13161 passed_in_gprs = true;
13162 else
13163 ret = fpr;
13167 if (passed_in_gprs
13168 && align_words < GP_ARG_NUM_REG
13169 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
13170 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
13172 if (ret != 0 && TARGET_DEBUG_ARG)
13173 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
13175 return ret;
13178 /* A C expression that indicates when an argument must be passed by
13179 reference. If nonzero for an argument, a copy of that argument is
13180 made in memory and a pointer to the argument is passed instead of
13181 the argument itself. The pointer is passed in whatever way is
13182 appropriate for passing a pointer to that type.
13184 Under V.4, aggregates and long double are passed by reference.
13186 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
13187 reference unless the AltiVec vector extension ABI is in force.
13189 As an extension to all ABIs, variable sized types are passed by
13190 reference. */
13192 static bool
13193 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
13194 machine_mode mode, const_tree type,
13195 bool named ATTRIBUTE_UNUSED)
13197 if (!type)
13198 return 0;
13200 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
13201 && FLOAT128_IEEE_P (TYPE_MODE (type)))
13203 if (TARGET_DEBUG_ARG)
13204 fprintf (stderr, "function_arg_pass_by_reference: V4 IEEE 128-bit\n");
13205 return 1;
13208 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
13210 if (TARGET_DEBUG_ARG)
13211 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
13212 return 1;
13215 if (int_size_in_bytes (type) < 0)
13217 if (TARGET_DEBUG_ARG)
13218 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
13219 return 1;
13222 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
13223 modes only exist for GCC vector types if -maltivec. */
13224 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
13226 if (TARGET_DEBUG_ARG)
13227 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
13228 return 1;
13231 /* Pass synthetic vectors in memory. */
13232 if (TREE_CODE (type) == VECTOR_TYPE
13233 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
13235 static bool warned_for_pass_big_vectors = false;
13236 if (TARGET_DEBUG_ARG)
13237 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
13238 if (!warned_for_pass_big_vectors)
13240 warning (OPT_Wpsabi, "GCC vector passed by reference: "
13241 "non-standard ABI extension with no compatibility guarantee");
13242 warned_for_pass_big_vectors = true;
13244 return 1;
13247 return 0;
13250 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
13251 already processes. Return true if the parameter must be passed
13252 (fully or partially) on the stack. */
13254 static bool
13255 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
13257 machine_mode mode;
13258 int unsignedp;
13259 rtx entry_parm;
13261 /* Catch errors. */
13262 if (type == NULL || type == error_mark_node)
13263 return true;
13265 /* Handle types with no storage requirement. */
13266 if (TYPE_MODE (type) == VOIDmode)
13267 return false;
13269 /* Handle complex types. */
13270 if (TREE_CODE (type) == COMPLEX_TYPE)
13271 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
13272 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
13274 /* Handle transparent aggregates. */
13275 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
13276 && TYPE_TRANSPARENT_AGGR (type))
13277 type = TREE_TYPE (first_field (type));
13279 /* See if this arg was passed by invisible reference. */
13280 if (pass_by_reference (get_cumulative_args (args_so_far),
13281 TYPE_MODE (type), type, true))
13282 type = build_pointer_type (type);
13284 /* Find mode as it is passed by the ABI. */
13285 unsignedp = TYPE_UNSIGNED (type);
13286 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
13288 /* If we must pass in stack, we need a stack. */
13289 if (rs6000_must_pass_in_stack (mode, type))
13290 return true;
13292 /* If there is no incoming register, we need a stack. */
13293 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
13294 if (entry_parm == NULL)
13295 return true;
13297 /* Likewise if we need to pass both in registers and on the stack. */
13298 if (GET_CODE (entry_parm) == PARALLEL
13299 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
13300 return true;
13302 /* Also true if we're partially in registers and partially not. */
13303 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
13304 return true;
13306 /* Update info on where next arg arrives in registers. */
13307 rs6000_function_arg_advance (args_so_far, mode, type, true);
13308 return false;
13311 /* Return true if FUN has no prototype, has a variable argument
13312 list, or passes any parameter in memory. */
13314 static bool
13315 rs6000_function_parms_need_stack (tree fun, bool incoming)
13317 tree fntype, result;
13318 CUMULATIVE_ARGS args_so_far_v;
13319 cumulative_args_t args_so_far;
13321 if (!fun)
13322 /* Must be a libcall, all of which only use reg parms. */
13323 return false;
13325 fntype = fun;
13326 if (!TYPE_P (fun))
13327 fntype = TREE_TYPE (fun);
13329 /* Varargs functions need the parameter save area. */
13330 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
13331 return true;
13333 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
13334 args_so_far = pack_cumulative_args (&args_so_far_v);
13336 /* When incoming, we will have been passed the function decl.
13337 It is necessary to use the decl to handle K&R style functions,
13338 where TYPE_ARG_TYPES may not be available. */
13339 if (incoming)
13341 gcc_assert (DECL_P (fun));
13342 result = DECL_RESULT (fun);
13344 else
13345 result = TREE_TYPE (fntype);
13347 if (result && aggregate_value_p (result, fntype))
13349 if (!TYPE_P (result))
13350 result = TREE_TYPE (result);
13351 result = build_pointer_type (result);
13352 rs6000_parm_needs_stack (args_so_far, result);
13355 if (incoming)
13357 tree parm;
13359 for (parm = DECL_ARGUMENTS (fun);
13360 parm && parm != void_list_node;
13361 parm = TREE_CHAIN (parm))
13362 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
13363 return true;
13365 else
13367 function_args_iterator args_iter;
13368 tree arg_type;
13370 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
13371 if (rs6000_parm_needs_stack (args_so_far, arg_type))
13372 return true;
13375 return false;
13378 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
13379 usually a constant depending on the ABI. However, in the ELFv2 ABI
13380 the register parameter area is optional when calling a function that
13381 has a prototype is scope, has no variable argument list, and passes
13382 all parameters in registers. */
13385 rs6000_reg_parm_stack_space (tree fun, bool incoming)
13387 int reg_parm_stack_space;
13389 switch (DEFAULT_ABI)
13391 default:
13392 reg_parm_stack_space = 0;
13393 break;
13395 case ABI_AIX:
13396 case ABI_DARWIN:
13397 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
13398 break;
13400 case ABI_ELFv2:
13401 /* ??? Recomputing this every time is a bit expensive. Is there
13402 a place to cache this information? */
13403 if (rs6000_function_parms_need_stack (fun, incoming))
13404 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
13405 else
13406 reg_parm_stack_space = 0;
13407 break;
13410 return reg_parm_stack_space;
13413 static void
13414 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
13416 int i;
13417 machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
13419 if (nregs == 0)
13420 return;
13422 for (i = 0; i < nregs; i++)
13424 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
13425 if (reload_completed)
13427 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
13428 tem = NULL_RTX;
13429 else
13430 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
13431 i * GET_MODE_SIZE (reg_mode));
13433 else
13434 tem = replace_equiv_address (tem, XEXP (tem, 0));
13436 gcc_assert (tem);
13438 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
13442 /* Perform any needed actions needed for a function that is receiving a
13443 variable number of arguments.
13445 CUM is as above.
13447 MODE and TYPE are the mode and type of the current parameter.
13449 PRETEND_SIZE is a variable that should be set to the amount of stack
13450 that must be pushed by the prolog to pretend that our caller pushed
13453 Normally, this macro will push all remaining incoming registers on the
13454 stack and set PRETEND_SIZE to the length of the registers pushed. */
13456 static void
13457 setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
13458 tree type, int *pretend_size ATTRIBUTE_UNUSED,
13459 int no_rtl)
13461 CUMULATIVE_ARGS next_cum;
13462 int reg_size = TARGET_32BIT ? 4 : 8;
13463 rtx save_area = NULL_RTX, mem;
13464 int first_reg_offset;
13465 alias_set_type set;
13467 /* Skip the last named argument. */
13468 next_cum = *get_cumulative_args (cum);
13469 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
13471 if (DEFAULT_ABI == ABI_V4)
13473 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
13475 if (! no_rtl)
13477 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
13478 HOST_WIDE_INT offset = 0;
13480 /* Try to optimize the size of the varargs save area.
13481 The ABI requires that ap.reg_save_area is doubleword
13482 aligned, but we don't need to allocate space for all
13483 the bytes, only those to which we actually will save
13484 anything. */
13485 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
13486 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
13487 if (TARGET_HARD_FLOAT && TARGET_FPRS
13488 && next_cum.fregno <= FP_ARG_V4_MAX_REG
13489 && cfun->va_list_fpr_size)
13491 if (gpr_reg_num)
13492 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
13493 * UNITS_PER_FP_WORD;
13494 if (cfun->va_list_fpr_size
13495 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
13496 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
13497 else
13498 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
13499 * UNITS_PER_FP_WORD;
13501 if (gpr_reg_num)
13503 offset = -((first_reg_offset * reg_size) & ~7);
13504 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
13506 gpr_reg_num = cfun->va_list_gpr_size;
13507 if (reg_size == 4 && (first_reg_offset & 1))
13508 gpr_reg_num++;
13510 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
13512 else if (fpr_size)
13513 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
13514 * UNITS_PER_FP_WORD
13515 - (int) (GP_ARG_NUM_REG * reg_size);
13517 if (gpr_size + fpr_size)
13519 rtx reg_save_area
13520 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
13521 gcc_assert (GET_CODE (reg_save_area) == MEM);
13522 reg_save_area = XEXP (reg_save_area, 0);
13523 if (GET_CODE (reg_save_area) == PLUS)
13525 gcc_assert (XEXP (reg_save_area, 0)
13526 == virtual_stack_vars_rtx);
13527 gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT);
13528 offset += INTVAL (XEXP (reg_save_area, 1));
13530 else
13531 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
13534 cfun->machine->varargs_save_offset = offset;
13535 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
13538 else
13540 first_reg_offset = next_cum.words;
13541 save_area = crtl->args.internal_arg_pointer;
13543 if (targetm.calls.must_pass_in_stack (mode, type))
13544 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
13547 set = get_varargs_alias_set ();
13548 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
13549 && cfun->va_list_gpr_size)
13551 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
13553 if (va_list_gpr_counter_field)
13554 /* V4 va_list_gpr_size counts number of registers needed. */
13555 n_gpr = cfun->va_list_gpr_size;
13556 else
13557 /* char * va_list instead counts number of bytes needed. */
13558 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
13560 if (nregs > n_gpr)
13561 nregs = n_gpr;
13563 mem = gen_rtx_MEM (BLKmode,
13564 plus_constant (Pmode, save_area,
13565 first_reg_offset * reg_size));
13566 MEM_NOTRAP_P (mem) = 1;
13567 set_mem_alias_set (mem, set);
13568 set_mem_align (mem, BITS_PER_WORD);
13570 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
13571 nregs);
13574 /* Save FP registers if needed. */
13575 if (DEFAULT_ABI == ABI_V4
13576 && TARGET_HARD_FLOAT && TARGET_FPRS
13577 && ! no_rtl
13578 && next_cum.fregno <= FP_ARG_V4_MAX_REG
13579 && cfun->va_list_fpr_size)
13581 int fregno = next_cum.fregno, nregs;
13582 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
13583 rtx lab = gen_label_rtx ();
13584 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
13585 * UNITS_PER_FP_WORD);
13587 emit_jump_insn
13588 (gen_rtx_SET (pc_rtx,
13589 gen_rtx_IF_THEN_ELSE (VOIDmode,
13590 gen_rtx_NE (VOIDmode, cr1,
13591 const0_rtx),
13592 gen_rtx_LABEL_REF (VOIDmode, lab),
13593 pc_rtx)));
13595 for (nregs = 0;
13596 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
13597 fregno++, off += UNITS_PER_FP_WORD, nregs++)
13599 mem = gen_rtx_MEM ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13600 ? DFmode : SFmode,
13601 plus_constant (Pmode, save_area, off));
13602 MEM_NOTRAP_P (mem) = 1;
13603 set_mem_alias_set (mem, set);
13604 set_mem_align (mem, GET_MODE_ALIGNMENT (
13605 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13606 ? DFmode : SFmode));
13607 emit_move_insn (mem, gen_rtx_REG (
13608 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13609 ? DFmode : SFmode, fregno));
13612 emit_label (lab);
13616 /* Create the va_list data type. */
13618 static tree
13619 rs6000_build_builtin_va_list (void)
13621 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
13623 /* For AIX, prefer 'char *' because that's what the system
13624 header files like. */
13625 if (DEFAULT_ABI != ABI_V4)
13626 return build_pointer_type (char_type_node);
13628 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
13629 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
13630 get_identifier ("__va_list_tag"), record);
13632 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
13633 unsigned_char_type_node);
13634 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
13635 unsigned_char_type_node);
13636 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
13637 every user file. */
13638 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13639 get_identifier ("reserved"), short_unsigned_type_node);
13640 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13641 get_identifier ("overflow_arg_area"),
13642 ptr_type_node);
13643 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13644 get_identifier ("reg_save_area"),
13645 ptr_type_node);
13647 va_list_gpr_counter_field = f_gpr;
13648 va_list_fpr_counter_field = f_fpr;
13650 DECL_FIELD_CONTEXT (f_gpr) = record;
13651 DECL_FIELD_CONTEXT (f_fpr) = record;
13652 DECL_FIELD_CONTEXT (f_res) = record;
13653 DECL_FIELD_CONTEXT (f_ovf) = record;
13654 DECL_FIELD_CONTEXT (f_sav) = record;
13656 TYPE_STUB_DECL (record) = type_decl;
13657 TYPE_NAME (record) = type_decl;
13658 TYPE_FIELDS (record) = f_gpr;
13659 DECL_CHAIN (f_gpr) = f_fpr;
13660 DECL_CHAIN (f_fpr) = f_res;
13661 DECL_CHAIN (f_res) = f_ovf;
13662 DECL_CHAIN (f_ovf) = f_sav;
13664 layout_type (record);
13666 /* The correct type is an array type of one element. */
13667 return build_array_type (record, build_index_type (size_zero_node));
13670 /* Implement va_start. */
13672 static void
13673 rs6000_va_start (tree valist, rtx nextarg)
13675 HOST_WIDE_INT words, n_gpr, n_fpr;
13676 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
13677 tree gpr, fpr, ovf, sav, t;
13679 /* Only SVR4 needs something special. */
13680 if (DEFAULT_ABI != ABI_V4)
13682 std_expand_builtin_va_start (valist, nextarg);
13683 return;
13686 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
13687 f_fpr = DECL_CHAIN (f_gpr);
13688 f_res = DECL_CHAIN (f_fpr);
13689 f_ovf = DECL_CHAIN (f_res);
13690 f_sav = DECL_CHAIN (f_ovf);
13692 valist = build_simple_mem_ref (valist);
13693 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
13694 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
13695 f_fpr, NULL_TREE);
13696 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
13697 f_ovf, NULL_TREE);
13698 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
13699 f_sav, NULL_TREE);
13701 /* Count number of gp and fp argument registers used. */
13702 words = crtl->args.info.words;
13703 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
13704 GP_ARG_NUM_REG);
13705 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
13706 FP_ARG_NUM_REG);
13708 if (TARGET_DEBUG_ARG)
13709 fprintf (stderr, "va_start: words = " HOST_WIDE_INT_PRINT_DEC", n_gpr = "
13710 HOST_WIDE_INT_PRINT_DEC", n_fpr = " HOST_WIDE_INT_PRINT_DEC"\n",
13711 words, n_gpr, n_fpr);
13713 if (cfun->va_list_gpr_size)
13715 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
13716 build_int_cst (NULL_TREE, n_gpr));
13717 TREE_SIDE_EFFECTS (t) = 1;
13718 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13721 if (cfun->va_list_fpr_size)
13723 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
13724 build_int_cst (NULL_TREE, n_fpr));
13725 TREE_SIDE_EFFECTS (t) = 1;
13726 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13728 #ifdef HAVE_AS_GNU_ATTRIBUTE
13729 if (call_ABI_of_interest (cfun->decl))
13730 rs6000_passes_float = true;
13731 #endif
13734 /* Find the overflow area. */
13735 t = make_tree (TREE_TYPE (ovf), crtl->args.internal_arg_pointer);
13736 if (words != 0)
13737 t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD);
13738 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
13739 TREE_SIDE_EFFECTS (t) = 1;
13740 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13742 /* If there were no va_arg invocations, don't set up the register
13743 save area. */
13744 if (!cfun->va_list_gpr_size
13745 && !cfun->va_list_fpr_size
13746 && n_gpr < GP_ARG_NUM_REG
13747 && n_fpr < FP_ARG_V4_MAX_REG)
13748 return;
13750 /* Find the register save area. */
13751 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
13752 if (cfun->machine->varargs_save_offset)
13753 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
13754 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
13755 TREE_SIDE_EFFECTS (t) = 1;
13756 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13759 /* Implement va_arg. */
13761 static tree
13762 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
13763 gimple_seq *post_p)
13765 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
13766 tree gpr, fpr, ovf, sav, reg, t, u;
13767 int size, rsize, n_reg, sav_ofs, sav_scale;
13768 tree lab_false, lab_over, addr;
13769 int align;
13770 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
13771 int regalign = 0;
13772 gimple *stmt;
13774 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
13776 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
13777 return build_va_arg_indirect_ref (t);
13780 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
13781 earlier version of gcc, with the property that it always applied alignment
13782 adjustments to the va-args (even for zero-sized types). The cheapest way
13783 to deal with this is to replicate the effect of the part of
13784 std_gimplify_va_arg_expr that carries out the align adjust, for the case
13785 of relevance.
13786 We don't need to check for pass-by-reference because of the test above.
13787 We can return a simplifed answer, since we know there's no offset to add. */
13789 if (((TARGET_MACHO
13790 && rs6000_darwin64_abi)
13791 || DEFAULT_ABI == ABI_ELFv2
13792 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
13793 && integer_zerop (TYPE_SIZE (type)))
13795 unsigned HOST_WIDE_INT align, boundary;
13796 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
13797 align = PARM_BOUNDARY / BITS_PER_UNIT;
13798 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
13799 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
13800 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
13801 boundary /= BITS_PER_UNIT;
13802 if (boundary > align)
13804 tree t ;
13805 /* This updates arg ptr by the amount that would be necessary
13806 to align the zero-sized (but not zero-alignment) item. */
13807 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
13808 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
13809 gimplify_and_add (t, pre_p);
13811 t = fold_convert (sizetype, valist_tmp);
13812 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
13813 fold_convert (TREE_TYPE (valist),
13814 fold_build2 (BIT_AND_EXPR, sizetype, t,
13815 size_int (-boundary))));
13816 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
13817 gimplify_and_add (t, pre_p);
13819 /* Since it is zero-sized there's no increment for the item itself. */
13820 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
13821 return build_va_arg_indirect_ref (valist_tmp);
13824 if (DEFAULT_ABI != ABI_V4)
13826 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
13828 tree elem_type = TREE_TYPE (type);
13829 machine_mode elem_mode = TYPE_MODE (elem_type);
13830 int elem_size = GET_MODE_SIZE (elem_mode);
13832 if (elem_size < UNITS_PER_WORD)
13834 tree real_part, imag_part;
13835 gimple_seq post = NULL;
13837 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
13838 &post);
13839 /* Copy the value into a temporary, lest the formal temporary
13840 be reused out from under us. */
13841 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
13842 gimple_seq_add_seq (pre_p, post);
13844 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
13845 post_p);
13847 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
13851 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
13854 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
13855 f_fpr = DECL_CHAIN (f_gpr);
13856 f_res = DECL_CHAIN (f_fpr);
13857 f_ovf = DECL_CHAIN (f_res);
13858 f_sav = DECL_CHAIN (f_ovf);
13860 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
13861 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
13862 f_fpr, NULL_TREE);
13863 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
13864 f_ovf, NULL_TREE);
13865 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
13866 f_sav, NULL_TREE);
13868 size = int_size_in_bytes (type);
13869 rsize = (size + 3) / 4;
13870 int pad = 4 * rsize - size;
13871 align = 1;
13873 machine_mode mode = TYPE_MODE (type);
13874 if (abi_v4_pass_in_fpr (mode))
13876 /* FP args go in FP registers, if present. */
13877 reg = fpr;
13878 n_reg = (size + 7) / 8;
13879 sav_ofs = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4) * 4;
13880 sav_scale = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4);
13881 if (mode != SFmode && mode != SDmode)
13882 align = 8;
13884 else
13886 /* Otherwise into GP registers. */
13887 reg = gpr;
13888 n_reg = rsize;
13889 sav_ofs = 0;
13890 sav_scale = 4;
13891 if (n_reg == 2)
13892 align = 8;
13895 /* Pull the value out of the saved registers.... */
13897 lab_over = NULL;
13898 addr = create_tmp_var (ptr_type_node, "addr");
13900 /* AltiVec vectors never go in registers when -mabi=altivec. */
13901 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
13902 align = 16;
13903 else
13905 lab_false = create_artificial_label (input_location);
13906 lab_over = create_artificial_label (input_location);
13908 /* Long long and SPE vectors are aligned in the registers.
13909 As are any other 2 gpr item such as complex int due to a
13910 historical mistake. */
13911 u = reg;
13912 if (n_reg == 2 && reg == gpr)
13914 regalign = 1;
13915 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
13916 build_int_cst (TREE_TYPE (reg), n_reg - 1));
13917 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
13918 unshare_expr (reg), u);
13920 /* _Decimal128 is passed in even/odd fpr pairs; the stored
13921 reg number is 0 for f1, so we want to make it odd. */
13922 else if (reg == fpr && mode == TDmode)
13924 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
13925 build_int_cst (TREE_TYPE (reg), 1));
13926 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
13929 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
13930 t = build2 (GE_EXPR, boolean_type_node, u, t);
13931 u = build1 (GOTO_EXPR, void_type_node, lab_false);
13932 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
13933 gimplify_and_add (t, pre_p);
13935 t = sav;
13936 if (sav_ofs)
13937 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
13939 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
13940 build_int_cst (TREE_TYPE (reg), n_reg));
13941 u = fold_convert (sizetype, u);
13942 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
13943 t = fold_build_pointer_plus (t, u);
13945 /* _Decimal32 varargs are located in the second word of the 64-bit
13946 FP register for 32-bit binaries. */
13947 if (TARGET_32BIT
13948 && TARGET_HARD_FLOAT && TARGET_FPRS
13949 && mode == SDmode)
13950 t = fold_build_pointer_plus_hwi (t, size);
13952 /* Args are passed right-aligned. */
13953 if (BYTES_BIG_ENDIAN)
13954 t = fold_build_pointer_plus_hwi (t, pad);
13956 gimplify_assign (addr, t, pre_p);
13958 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
13960 stmt = gimple_build_label (lab_false);
13961 gimple_seq_add_stmt (pre_p, stmt);
13963 if ((n_reg == 2 && !regalign) || n_reg > 2)
13965 /* Ensure that we don't find any more args in regs.
13966 Alignment has taken care of for special cases. */
13967 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
13971 /* ... otherwise out of the overflow area. */
13973 /* Care for on-stack alignment if needed. */
13974 t = ovf;
13975 if (align != 1)
13977 t = fold_build_pointer_plus_hwi (t, align - 1);
13978 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
13979 build_int_cst (TREE_TYPE (t), -align));
13982 /* Args are passed right-aligned. */
13983 if (BYTES_BIG_ENDIAN)
13984 t = fold_build_pointer_plus_hwi (t, pad);
13986 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
13988 gimplify_assign (unshare_expr (addr), t, pre_p);
13990 t = fold_build_pointer_plus_hwi (t, size);
13991 gimplify_assign (unshare_expr (ovf), t, pre_p);
13993 if (lab_over)
13995 stmt = gimple_build_label (lab_over);
13996 gimple_seq_add_stmt (pre_p, stmt);
13999 if (STRICT_ALIGNMENT
14000 && (TYPE_ALIGN (type)
14001 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
14003 /* The value (of type complex double, for example) may not be
14004 aligned in memory in the saved registers, so copy via a
14005 temporary. (This is the same code as used for SPARC.) */
14006 tree tmp = create_tmp_var (type, "va_arg_tmp");
14007 tree dest_addr = build_fold_addr_expr (tmp);
14009 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
14010 3, dest_addr, addr, size_int (rsize * 4));
14012 gimplify_and_add (copy, pre_p);
14013 addr = dest_addr;
14016 addr = fold_convert (ptrtype, addr);
14017 return build_va_arg_indirect_ref (addr);
14020 /* Builtins. */
14022 static void
14023 def_builtin (const char *name, tree type, enum rs6000_builtins code)
14025 tree t;
14026 unsigned classify = rs6000_builtin_info[(int)code].attr;
14027 const char *attr_string = "";
14029 gcc_assert (name != NULL);
14030 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
14032 if (rs6000_builtin_decls[(int)code])
14033 fatal_error (input_location,
14034 "internal error: builtin function %s already processed", name);
14036 rs6000_builtin_decls[(int)code] = t =
14037 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
14039 /* Set any special attributes. */
14040 if ((classify & RS6000_BTC_CONST) != 0)
14042 /* const function, function only depends on the inputs. */
14043 TREE_READONLY (t) = 1;
14044 TREE_NOTHROW (t) = 1;
14045 attr_string = ", const";
14047 else if ((classify & RS6000_BTC_PURE) != 0)
14049 /* pure function, function can read global memory, but does not set any
14050 external state. */
14051 DECL_PURE_P (t) = 1;
14052 TREE_NOTHROW (t) = 1;
14053 attr_string = ", pure";
14055 else if ((classify & RS6000_BTC_FP) != 0)
14057 /* Function is a math function. If rounding mode is on, then treat the
14058 function as not reading global memory, but it can have arbitrary side
14059 effects. If it is off, then assume the function is a const function.
14060 This mimics the ATTR_MATHFN_FPROUNDING attribute in
14061 builtin-attribute.def that is used for the math functions. */
14062 TREE_NOTHROW (t) = 1;
14063 if (flag_rounding_math)
14065 DECL_PURE_P (t) = 1;
14066 DECL_IS_NOVOPS (t) = 1;
14067 attr_string = ", fp, pure";
14069 else
14071 TREE_READONLY (t) = 1;
14072 attr_string = ", fp, const";
14075 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
14076 gcc_unreachable ();
14078 if (TARGET_DEBUG_BUILTIN)
14079 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
14080 (int)code, name, attr_string);
14083 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
14085 #undef RS6000_BUILTIN_0
14086 #undef RS6000_BUILTIN_1
14087 #undef RS6000_BUILTIN_2
14088 #undef RS6000_BUILTIN_3
14089 #undef RS6000_BUILTIN_A
14090 #undef RS6000_BUILTIN_D
14091 #undef RS6000_BUILTIN_E
14092 #undef RS6000_BUILTIN_H
14093 #undef RS6000_BUILTIN_P
14094 #undef RS6000_BUILTIN_Q
14095 #undef RS6000_BUILTIN_S
14096 #undef RS6000_BUILTIN_X
14098 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14099 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14100 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14101 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
14102 { MASK, ICODE, NAME, ENUM },
14104 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14105 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14106 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14107 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14108 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14109 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14110 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14111 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14113 static const struct builtin_description bdesc_3arg[] =
14115 #include "rs6000-builtin.def"
14118 /* DST operations: void foo (void *, const int, const char). */
14120 #undef RS6000_BUILTIN_0
14121 #undef RS6000_BUILTIN_1
14122 #undef RS6000_BUILTIN_2
14123 #undef RS6000_BUILTIN_3
14124 #undef RS6000_BUILTIN_A
14125 #undef RS6000_BUILTIN_D
14126 #undef RS6000_BUILTIN_E
14127 #undef RS6000_BUILTIN_H
14128 #undef RS6000_BUILTIN_P
14129 #undef RS6000_BUILTIN_Q
14130 #undef RS6000_BUILTIN_S
14131 #undef RS6000_BUILTIN_X
14133 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14134 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14135 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14136 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14137 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14138 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
14139 { MASK, ICODE, NAME, ENUM },
14141 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14142 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14143 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14144 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14145 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14146 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14148 static const struct builtin_description bdesc_dst[] =
14150 #include "rs6000-builtin.def"
14153 /* Simple binary operations: VECc = foo (VECa, VECb). */
14155 #undef RS6000_BUILTIN_0
14156 #undef RS6000_BUILTIN_1
14157 #undef RS6000_BUILTIN_2
14158 #undef RS6000_BUILTIN_3
14159 #undef RS6000_BUILTIN_A
14160 #undef RS6000_BUILTIN_D
14161 #undef RS6000_BUILTIN_E
14162 #undef RS6000_BUILTIN_H
14163 #undef RS6000_BUILTIN_P
14164 #undef RS6000_BUILTIN_Q
14165 #undef RS6000_BUILTIN_S
14166 #undef RS6000_BUILTIN_X
14168 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14169 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14170 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
14171 { MASK, ICODE, NAME, ENUM },
14173 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14174 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14175 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14176 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14177 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14178 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14179 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14180 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14181 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14183 static const struct builtin_description bdesc_2arg[] =
14185 #include "rs6000-builtin.def"
14188 #undef RS6000_BUILTIN_0
14189 #undef RS6000_BUILTIN_1
14190 #undef RS6000_BUILTIN_2
14191 #undef RS6000_BUILTIN_3
14192 #undef RS6000_BUILTIN_A
14193 #undef RS6000_BUILTIN_D
14194 #undef RS6000_BUILTIN_E
14195 #undef RS6000_BUILTIN_H
14196 #undef RS6000_BUILTIN_P
14197 #undef RS6000_BUILTIN_Q
14198 #undef RS6000_BUILTIN_S
14199 #undef RS6000_BUILTIN_X
14201 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14202 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14203 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14204 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14205 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14206 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14207 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14208 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14209 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
14210 { MASK, ICODE, NAME, ENUM },
14212 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14213 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14214 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14216 /* AltiVec predicates. */
14218 static const struct builtin_description bdesc_altivec_preds[] =
14220 #include "rs6000-builtin.def"
14223 /* SPE predicates. */
14224 #undef RS6000_BUILTIN_0
14225 #undef RS6000_BUILTIN_1
14226 #undef RS6000_BUILTIN_2
14227 #undef RS6000_BUILTIN_3
14228 #undef RS6000_BUILTIN_A
14229 #undef RS6000_BUILTIN_D
14230 #undef RS6000_BUILTIN_E
14231 #undef RS6000_BUILTIN_H
14232 #undef RS6000_BUILTIN_P
14233 #undef RS6000_BUILTIN_Q
14234 #undef RS6000_BUILTIN_S
14235 #undef RS6000_BUILTIN_X
14237 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14238 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14239 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14240 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14241 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14242 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14243 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14244 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14245 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14246 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14247 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
14248 { MASK, ICODE, NAME, ENUM },
14250 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14252 static const struct builtin_description bdesc_spe_predicates[] =
14254 #include "rs6000-builtin.def"
14257 /* SPE evsel predicates. */
14258 #undef RS6000_BUILTIN_0
14259 #undef RS6000_BUILTIN_1
14260 #undef RS6000_BUILTIN_2
14261 #undef RS6000_BUILTIN_3
14262 #undef RS6000_BUILTIN_A
14263 #undef RS6000_BUILTIN_D
14264 #undef RS6000_BUILTIN_E
14265 #undef RS6000_BUILTIN_H
14266 #undef RS6000_BUILTIN_P
14267 #undef RS6000_BUILTIN_Q
14268 #undef RS6000_BUILTIN_S
14269 #undef RS6000_BUILTIN_X
14271 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14272 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14273 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14274 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14275 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14276 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14277 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
14278 { MASK, ICODE, NAME, ENUM },
14280 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14281 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14282 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14283 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14284 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14286 static const struct builtin_description bdesc_spe_evsel[] =
14288 #include "rs6000-builtin.def"
14291 /* PAIRED predicates. */
14292 #undef RS6000_BUILTIN_0
14293 #undef RS6000_BUILTIN_1
14294 #undef RS6000_BUILTIN_2
14295 #undef RS6000_BUILTIN_3
14296 #undef RS6000_BUILTIN_A
14297 #undef RS6000_BUILTIN_D
14298 #undef RS6000_BUILTIN_E
14299 #undef RS6000_BUILTIN_H
14300 #undef RS6000_BUILTIN_P
14301 #undef RS6000_BUILTIN_Q
14302 #undef RS6000_BUILTIN_S
14303 #undef RS6000_BUILTIN_X
14305 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14306 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14307 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14308 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14309 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14310 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14311 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14312 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14313 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14314 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
14315 { MASK, ICODE, NAME, ENUM },
14317 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14318 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14320 static const struct builtin_description bdesc_paired_preds[] =
14322 #include "rs6000-builtin.def"
14325 /* ABS* operations. */
14327 #undef RS6000_BUILTIN_0
14328 #undef RS6000_BUILTIN_1
14329 #undef RS6000_BUILTIN_2
14330 #undef RS6000_BUILTIN_3
14331 #undef RS6000_BUILTIN_A
14332 #undef RS6000_BUILTIN_D
14333 #undef RS6000_BUILTIN_E
14334 #undef RS6000_BUILTIN_H
14335 #undef RS6000_BUILTIN_P
14336 #undef RS6000_BUILTIN_Q
14337 #undef RS6000_BUILTIN_S
14338 #undef RS6000_BUILTIN_X
14340 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14341 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14342 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14343 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14344 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
14345 { MASK, ICODE, NAME, ENUM },
14347 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14348 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14349 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14350 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14351 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14352 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14353 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14355 static const struct builtin_description bdesc_abs[] =
14357 #include "rs6000-builtin.def"
14360 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
14361 foo (VECa). */
14363 #undef RS6000_BUILTIN_0
14364 #undef RS6000_BUILTIN_1
14365 #undef RS6000_BUILTIN_2
14366 #undef RS6000_BUILTIN_3
14367 #undef RS6000_BUILTIN_A
14368 #undef RS6000_BUILTIN_D
14369 #undef RS6000_BUILTIN_E
14370 #undef RS6000_BUILTIN_H
14371 #undef RS6000_BUILTIN_P
14372 #undef RS6000_BUILTIN_Q
14373 #undef RS6000_BUILTIN_S
14374 #undef RS6000_BUILTIN_X
14376 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14377 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
14378 { MASK, ICODE, NAME, ENUM },
14380 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14381 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14382 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14383 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14384 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14385 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14386 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14387 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14388 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14389 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14391 static const struct builtin_description bdesc_1arg[] =
14393 #include "rs6000-builtin.def"
14396 /* Simple no-argument operations: result = __builtin_darn_32 () */
14398 #undef RS6000_BUILTIN_0
14399 #undef RS6000_BUILTIN_1
14400 #undef RS6000_BUILTIN_2
14401 #undef RS6000_BUILTIN_3
14402 #undef RS6000_BUILTIN_A
14403 #undef RS6000_BUILTIN_D
14404 #undef RS6000_BUILTIN_E
14405 #undef RS6000_BUILTIN_H
14406 #undef RS6000_BUILTIN_P
14407 #undef RS6000_BUILTIN_Q
14408 #undef RS6000_BUILTIN_S
14409 #undef RS6000_BUILTIN_X
14411 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
14412 { MASK, ICODE, NAME, ENUM },
14414 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14415 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14416 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14417 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14418 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14419 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14420 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14421 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14422 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14423 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14424 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14426 static const struct builtin_description bdesc_0arg[] =
14428 #include "rs6000-builtin.def"
14431 /* HTM builtins. */
14432 #undef RS6000_BUILTIN_0
14433 #undef RS6000_BUILTIN_1
14434 #undef RS6000_BUILTIN_2
14435 #undef RS6000_BUILTIN_3
14436 #undef RS6000_BUILTIN_A
14437 #undef RS6000_BUILTIN_D
14438 #undef RS6000_BUILTIN_E
14439 #undef RS6000_BUILTIN_H
14440 #undef RS6000_BUILTIN_P
14441 #undef RS6000_BUILTIN_Q
14442 #undef RS6000_BUILTIN_S
14443 #undef RS6000_BUILTIN_X
14445 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14446 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14447 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14448 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14449 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14450 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14451 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14452 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
14453 { MASK, ICODE, NAME, ENUM },
14455 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14456 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14457 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14458 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14460 static const struct builtin_description bdesc_htm[] =
14462 #include "rs6000-builtin.def"
14465 #undef RS6000_BUILTIN_0
14466 #undef RS6000_BUILTIN_1
14467 #undef RS6000_BUILTIN_2
14468 #undef RS6000_BUILTIN_3
14469 #undef RS6000_BUILTIN_A
14470 #undef RS6000_BUILTIN_D
14471 #undef RS6000_BUILTIN_E
14472 #undef RS6000_BUILTIN_H
14473 #undef RS6000_BUILTIN_P
14474 #undef RS6000_BUILTIN_Q
14475 #undef RS6000_BUILTIN_S
14477 /* Return true if a builtin function is overloaded. */
14478 bool
14479 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
14481 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
14484 const char *
14485 rs6000_overloaded_builtin_name (enum rs6000_builtins fncode)
14487 return rs6000_builtin_info[(int)fncode].name;
14490 /* Expand an expression EXP that calls a builtin without arguments. */
14491 static rtx
14492 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
14494 rtx pat;
14495 machine_mode tmode = insn_data[icode].operand[0].mode;
14497 if (icode == CODE_FOR_nothing)
14498 /* Builtin not supported on this processor. */
14499 return 0;
14501 if (target == 0
14502 || GET_MODE (target) != tmode
14503 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14504 target = gen_reg_rtx (tmode);
14506 pat = GEN_FCN (icode) (target);
14507 if (! pat)
14508 return 0;
14509 emit_insn (pat);
14511 return target;
14515 static rtx
14516 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
14518 rtx pat;
14519 tree arg0 = CALL_EXPR_ARG (exp, 0);
14520 tree arg1 = CALL_EXPR_ARG (exp, 1);
14521 rtx op0 = expand_normal (arg0);
14522 rtx op1 = expand_normal (arg1);
14523 machine_mode mode0 = insn_data[icode].operand[0].mode;
14524 machine_mode mode1 = insn_data[icode].operand[1].mode;
14526 if (icode == CODE_FOR_nothing)
14527 /* Builtin not supported on this processor. */
14528 return 0;
14530 /* If we got invalid arguments bail out before generating bad rtl. */
14531 if (arg0 == error_mark_node || arg1 == error_mark_node)
14532 return const0_rtx;
14534 if (GET_CODE (op0) != CONST_INT
14535 || INTVAL (op0) > 255
14536 || INTVAL (op0) < 0)
14538 error ("argument 1 must be an 8-bit field value");
14539 return const0_rtx;
14542 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14543 op0 = copy_to_mode_reg (mode0, op0);
14545 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14546 op1 = copy_to_mode_reg (mode1, op1);
14548 pat = GEN_FCN (icode) (op0, op1);
14549 if (! pat)
14550 return const0_rtx;
14551 emit_insn (pat);
14553 return NULL_RTX;
14556 static rtx
14557 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
14559 rtx pat;
14560 tree arg0 = CALL_EXPR_ARG (exp, 0);
14561 rtx op0 = expand_normal (arg0);
14562 machine_mode tmode = insn_data[icode].operand[0].mode;
14563 machine_mode mode0 = insn_data[icode].operand[1].mode;
14565 if (icode == CODE_FOR_nothing)
14566 /* Builtin not supported on this processor. */
14567 return 0;
14569 /* If we got invalid arguments bail out before generating bad rtl. */
14570 if (arg0 == error_mark_node)
14571 return const0_rtx;
14573 if (icode == CODE_FOR_altivec_vspltisb
14574 || icode == CODE_FOR_altivec_vspltish
14575 || icode == CODE_FOR_altivec_vspltisw
14576 || icode == CODE_FOR_spe_evsplatfi
14577 || icode == CODE_FOR_spe_evsplati)
14579 /* Only allow 5-bit *signed* literals. */
14580 if (GET_CODE (op0) != CONST_INT
14581 || INTVAL (op0) > 15
14582 || INTVAL (op0) < -16)
14584 error ("argument 1 must be a 5-bit signed literal");
14585 return CONST0_RTX (tmode);
14589 if (target == 0
14590 || GET_MODE (target) != tmode
14591 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14592 target = gen_reg_rtx (tmode);
14594 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14595 op0 = copy_to_mode_reg (mode0, op0);
14597 pat = GEN_FCN (icode) (target, op0);
14598 if (! pat)
14599 return 0;
14600 emit_insn (pat);
14602 return target;
14605 static rtx
14606 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
14608 rtx pat, scratch1, scratch2;
14609 tree arg0 = CALL_EXPR_ARG (exp, 0);
14610 rtx op0 = expand_normal (arg0);
14611 machine_mode tmode = insn_data[icode].operand[0].mode;
14612 machine_mode mode0 = insn_data[icode].operand[1].mode;
14614 /* If we have invalid arguments, bail out before generating bad rtl. */
14615 if (arg0 == error_mark_node)
14616 return const0_rtx;
14618 if (target == 0
14619 || GET_MODE (target) != tmode
14620 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14621 target = gen_reg_rtx (tmode);
14623 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14624 op0 = copy_to_mode_reg (mode0, op0);
14626 scratch1 = gen_reg_rtx (mode0);
14627 scratch2 = gen_reg_rtx (mode0);
14629 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
14630 if (! pat)
14631 return 0;
14632 emit_insn (pat);
14634 return target;
14637 static rtx
14638 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
14640 rtx pat;
14641 tree arg0 = CALL_EXPR_ARG (exp, 0);
14642 tree arg1 = CALL_EXPR_ARG (exp, 1);
14643 rtx op0 = expand_normal (arg0);
14644 rtx op1 = expand_normal (arg1);
14645 machine_mode tmode = insn_data[icode].operand[0].mode;
14646 machine_mode mode0 = insn_data[icode].operand[1].mode;
14647 machine_mode mode1 = insn_data[icode].operand[2].mode;
14649 if (icode == CODE_FOR_nothing)
14650 /* Builtin not supported on this processor. */
14651 return 0;
14653 /* If we got invalid arguments bail out before generating bad rtl. */
14654 if (arg0 == error_mark_node || arg1 == error_mark_node)
14655 return const0_rtx;
14657 if (icode == CODE_FOR_altivec_vcfux
14658 || icode == CODE_FOR_altivec_vcfsx
14659 || icode == CODE_FOR_altivec_vctsxs
14660 || icode == CODE_FOR_altivec_vctuxs
14661 || icode == CODE_FOR_altivec_vspltb
14662 || icode == CODE_FOR_altivec_vsplth
14663 || icode == CODE_FOR_altivec_vspltw
14664 || icode == CODE_FOR_spe_evaddiw
14665 || icode == CODE_FOR_spe_evldd
14666 || icode == CODE_FOR_spe_evldh
14667 || icode == CODE_FOR_spe_evldw
14668 || icode == CODE_FOR_spe_evlhhesplat
14669 || icode == CODE_FOR_spe_evlhhossplat
14670 || icode == CODE_FOR_spe_evlhhousplat
14671 || icode == CODE_FOR_spe_evlwhe
14672 || icode == CODE_FOR_spe_evlwhos
14673 || icode == CODE_FOR_spe_evlwhou
14674 || icode == CODE_FOR_spe_evlwhsplat
14675 || icode == CODE_FOR_spe_evlwwsplat
14676 || icode == CODE_FOR_spe_evrlwi
14677 || icode == CODE_FOR_spe_evslwi
14678 || icode == CODE_FOR_spe_evsrwis
14679 || icode == CODE_FOR_spe_evsubifw
14680 || icode == CODE_FOR_spe_evsrwiu)
14682 /* Only allow 5-bit unsigned literals. */
14683 STRIP_NOPS (arg1);
14684 if (TREE_CODE (arg1) != INTEGER_CST
14685 || TREE_INT_CST_LOW (arg1) & ~0x1f)
14687 error ("argument 2 must be a 5-bit unsigned literal");
14688 return CONST0_RTX (tmode);
14691 else if (icode == CODE_FOR_dfptstsfi_eq_dd
14692 || icode == CODE_FOR_dfptstsfi_lt_dd
14693 || icode == CODE_FOR_dfptstsfi_gt_dd
14694 || icode == CODE_FOR_dfptstsfi_unordered_dd
14695 || icode == CODE_FOR_dfptstsfi_eq_td
14696 || icode == CODE_FOR_dfptstsfi_lt_td
14697 || icode == CODE_FOR_dfptstsfi_gt_td
14698 || icode == CODE_FOR_dfptstsfi_unordered_td)
14700 /* Only allow 6-bit unsigned literals. */
14701 STRIP_NOPS (arg0);
14702 if (TREE_CODE (arg0) != INTEGER_CST
14703 || !IN_RANGE (TREE_INT_CST_LOW (arg0), 0, 63))
14705 error ("argument 1 must be a 6-bit unsigned literal");
14706 return CONST0_RTX (tmode);
14709 else if (icode == CODE_FOR_xststdcdp
14710 || icode == CODE_FOR_xststdcsp
14711 || icode == CODE_FOR_xvtstdcdp
14712 || icode == CODE_FOR_xvtstdcsp)
14714 /* Only allow 7-bit unsigned literals. */
14715 STRIP_NOPS (arg1);
14716 if (TREE_CODE (arg1) != INTEGER_CST
14717 || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 127))
14719 error ("argument 2 must be a 7-bit unsigned literal");
14720 return CONST0_RTX (tmode);
14724 if (target == 0
14725 || GET_MODE (target) != tmode
14726 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14727 target = gen_reg_rtx (tmode);
14729 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14730 op0 = copy_to_mode_reg (mode0, op0);
14731 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14732 op1 = copy_to_mode_reg (mode1, op1);
14734 pat = GEN_FCN (icode) (target, op0, op1);
14735 if (! pat)
14736 return 0;
14737 emit_insn (pat);
14739 return target;
14742 static rtx
14743 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
14745 rtx pat, scratch;
14746 tree cr6_form = CALL_EXPR_ARG (exp, 0);
14747 tree arg0 = CALL_EXPR_ARG (exp, 1);
14748 tree arg1 = CALL_EXPR_ARG (exp, 2);
14749 rtx op0 = expand_normal (arg0);
14750 rtx op1 = expand_normal (arg1);
14751 machine_mode tmode = SImode;
14752 machine_mode mode0 = insn_data[icode].operand[1].mode;
14753 machine_mode mode1 = insn_data[icode].operand[2].mode;
14754 int cr6_form_int;
14756 if (TREE_CODE (cr6_form) != INTEGER_CST)
14758 error ("argument 1 of __builtin_altivec_predicate must be a constant");
14759 return const0_rtx;
14761 else
14762 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
14764 gcc_assert (mode0 == mode1);
14766 /* If we have invalid arguments, bail out before generating bad rtl. */
14767 if (arg0 == error_mark_node || arg1 == error_mark_node)
14768 return const0_rtx;
14770 if (target == 0
14771 || GET_MODE (target) != tmode
14772 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14773 target = gen_reg_rtx (tmode);
14775 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14776 op0 = copy_to_mode_reg (mode0, op0);
14777 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14778 op1 = copy_to_mode_reg (mode1, op1);
14780 /* Note that for many of the relevant operations (e.g. cmpne or
14781 cmpeq) with float or double operands, it makes more sense for the
14782 mode of the allocated scratch register to select a vector of
14783 integer. But the choice to copy the mode of operand 0 was made
14784 long ago and there are no plans to change it. */
14785 scratch = gen_reg_rtx (mode0);
14787 pat = GEN_FCN (icode) (scratch, op0, op1);
14788 if (! pat)
14789 return 0;
14790 emit_insn (pat);
14792 /* The vec_any* and vec_all* predicates use the same opcodes for two
14793 different operations, but the bits in CR6 will be different
14794 depending on what information we want. So we have to play tricks
14795 with CR6 to get the right bits out.
14797 If you think this is disgusting, look at the specs for the
14798 AltiVec predicates. */
14800 switch (cr6_form_int)
14802 case 0:
14803 emit_insn (gen_cr6_test_for_zero (target));
14804 break;
14805 case 1:
14806 emit_insn (gen_cr6_test_for_zero_reverse (target));
14807 break;
14808 case 2:
14809 emit_insn (gen_cr6_test_for_lt (target));
14810 break;
14811 case 3:
14812 emit_insn (gen_cr6_test_for_lt_reverse (target));
14813 break;
14814 default:
14815 error ("argument 1 of __builtin_altivec_predicate is out of range");
14816 break;
14819 return target;
14822 static rtx
14823 paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
14825 rtx pat, addr;
14826 tree arg0 = CALL_EXPR_ARG (exp, 0);
14827 tree arg1 = CALL_EXPR_ARG (exp, 1);
14828 machine_mode tmode = insn_data[icode].operand[0].mode;
14829 machine_mode mode0 = Pmode;
14830 machine_mode mode1 = Pmode;
14831 rtx op0 = expand_normal (arg0);
14832 rtx op1 = expand_normal (arg1);
14834 if (icode == CODE_FOR_nothing)
14835 /* Builtin not supported on this processor. */
14836 return 0;
14838 /* If we got invalid arguments bail out before generating bad rtl. */
14839 if (arg0 == error_mark_node || arg1 == error_mark_node)
14840 return const0_rtx;
14842 if (target == 0
14843 || GET_MODE (target) != tmode
14844 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14845 target = gen_reg_rtx (tmode);
14847 op1 = copy_to_mode_reg (mode1, op1);
14849 if (op0 == const0_rtx)
14851 addr = gen_rtx_MEM (tmode, op1);
14853 else
14855 op0 = copy_to_mode_reg (mode0, op0);
14856 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op0, op1));
14859 pat = GEN_FCN (icode) (target, addr);
14861 if (! pat)
14862 return 0;
14863 emit_insn (pat);
14865 return target;
14868 /* Return a constant vector for use as a little-endian permute control vector
14869 to reverse the order of elements of the given vector mode. */
14870 static rtx
14871 swap_selector_for_mode (machine_mode mode)
14873 /* These are little endian vectors, so their elements are reversed
14874 from what you would normally expect for a permute control vector. */
14875 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
14876 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
14877 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
14878 unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
14879 unsigned int *swaparray, i;
14880 rtx perm[16];
14882 switch (mode)
14884 case V2DFmode:
14885 case V2DImode:
14886 swaparray = swap2;
14887 break;
14888 case V4SFmode:
14889 case V4SImode:
14890 swaparray = swap4;
14891 break;
14892 case V8HImode:
14893 swaparray = swap8;
14894 break;
14895 case V16QImode:
14896 swaparray = swap16;
14897 break;
14898 default:
14899 gcc_unreachable ();
14902 for (i = 0; i < 16; ++i)
14903 perm[i] = GEN_INT (swaparray[i]);
14905 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)));
14908 /* Generate code for an "lvxl", or "lve*x" built-in for a little endian target
14909 with -maltivec=be specified. Issue the load followed by an element-
14910 reversing permute. */
14911 void
14912 altivec_expand_lvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
14914 rtx tmp = gen_reg_rtx (mode);
14915 rtx load = gen_rtx_SET (tmp, op1);
14916 rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
14917 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx));
14918 rtx sel = swap_selector_for_mode (mode);
14919 rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM);
14921 gcc_assert (REG_P (op0));
14922 emit_insn (par);
14923 emit_insn (gen_rtx_SET (op0, vperm));
14926 /* Generate code for a "stvxl" built-in for a little endian target with
14927 -maltivec=be specified. Issue the store preceded by an element-reversing
14928 permute. */
14929 void
14930 altivec_expand_stvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
14932 rtx tmp = gen_reg_rtx (mode);
14933 rtx store = gen_rtx_SET (op0, tmp);
14934 rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
14935 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx));
14936 rtx sel = swap_selector_for_mode (mode);
14937 rtx vperm;
14939 gcc_assert (REG_P (op1));
14940 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
14941 emit_insn (gen_rtx_SET (tmp, vperm));
14942 emit_insn (par);
14945 /* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
14946 specified. Issue the store preceded by an element-reversing permute. */
14947 void
14948 altivec_expand_stvex_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
14950 machine_mode inner_mode = GET_MODE_INNER (mode);
14951 rtx tmp = gen_reg_rtx (mode);
14952 rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec);
14953 rtx sel = swap_selector_for_mode (mode);
14954 rtx vperm;
14956 gcc_assert (REG_P (op1));
14957 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
14958 emit_insn (gen_rtx_SET (tmp, vperm));
14959 emit_insn (gen_rtx_SET (op0, stvx));
14962 static rtx
14963 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
14965 rtx pat, addr;
14966 tree arg0 = CALL_EXPR_ARG (exp, 0);
14967 tree arg1 = CALL_EXPR_ARG (exp, 1);
14968 machine_mode tmode = insn_data[icode].operand[0].mode;
14969 machine_mode mode0 = Pmode;
14970 machine_mode mode1 = Pmode;
14971 rtx op0 = expand_normal (arg0);
14972 rtx op1 = expand_normal (arg1);
14974 if (icode == CODE_FOR_nothing)
14975 /* Builtin not supported on this processor. */
14976 return 0;
14978 /* If we got invalid arguments bail out before generating bad rtl. */
14979 if (arg0 == error_mark_node || arg1 == error_mark_node)
14980 return const0_rtx;
14982 if (target == 0
14983 || GET_MODE (target) != tmode
14984 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14985 target = gen_reg_rtx (tmode);
14987 op1 = copy_to_mode_reg (mode1, op1);
14989 /* For LVX, express the RTL accurately by ANDing the address with -16.
14990 LVXL and LVE*X expand to use UNSPECs to hide their special behavior,
14991 so the raw address is fine. */
14992 if (icode == CODE_FOR_altivec_lvx_v2df_2op
14993 || icode == CODE_FOR_altivec_lvx_v2di_2op
14994 || icode == CODE_FOR_altivec_lvx_v4sf_2op
14995 || icode == CODE_FOR_altivec_lvx_v4si_2op
14996 || icode == CODE_FOR_altivec_lvx_v8hi_2op
14997 || icode == CODE_FOR_altivec_lvx_v16qi_2op)
14999 rtx rawaddr;
15000 if (op0 == const0_rtx)
15001 rawaddr = op1;
15002 else
15004 op0 = copy_to_mode_reg (mode0, op0);
15005 rawaddr = gen_rtx_PLUS (Pmode, op1, op0);
15007 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
15008 addr = gen_rtx_MEM (blk ? BLKmode : tmode, addr);
15010 /* For -maltivec=be, emit the load and follow it up with a
15011 permute to swap the elements. */
15012 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
15014 rtx temp = gen_reg_rtx (tmode);
15015 emit_insn (gen_rtx_SET (temp, addr));
15017 rtx sel = swap_selector_for_mode (tmode);
15018 rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, temp, temp, sel),
15019 UNSPEC_VPERM);
15020 emit_insn (gen_rtx_SET (target, vperm));
15022 else
15023 emit_insn (gen_rtx_SET (target, addr));
15025 else
15027 if (op0 == const0_rtx)
15028 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
15029 else
15031 op0 = copy_to_mode_reg (mode0, op0);
15032 addr = gen_rtx_MEM (blk ? BLKmode : tmode,
15033 gen_rtx_PLUS (Pmode, op1, op0));
15036 pat = GEN_FCN (icode) (target, addr);
15037 if (! pat)
15038 return 0;
15039 emit_insn (pat);
15042 return target;
15045 static rtx
15046 spe_expand_stv_builtin (enum insn_code icode, tree exp)
15048 tree arg0 = CALL_EXPR_ARG (exp, 0);
15049 tree arg1 = CALL_EXPR_ARG (exp, 1);
15050 tree arg2 = CALL_EXPR_ARG (exp, 2);
15051 rtx op0 = expand_normal (arg0);
15052 rtx op1 = expand_normal (arg1);
15053 rtx op2 = expand_normal (arg2);
15054 rtx pat;
15055 machine_mode mode0 = insn_data[icode].operand[0].mode;
15056 machine_mode mode1 = insn_data[icode].operand[1].mode;
15057 machine_mode mode2 = insn_data[icode].operand[2].mode;
15059 /* Invalid arguments. Bail before doing anything stoopid! */
15060 if (arg0 == error_mark_node
15061 || arg1 == error_mark_node
15062 || arg2 == error_mark_node)
15063 return const0_rtx;
15065 if (! (*insn_data[icode].operand[2].predicate) (op0, mode2))
15066 op0 = copy_to_mode_reg (mode2, op0);
15067 if (! (*insn_data[icode].operand[0].predicate) (op1, mode0))
15068 op1 = copy_to_mode_reg (mode0, op1);
15069 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
15070 op2 = copy_to_mode_reg (mode1, op2);
15072 pat = GEN_FCN (icode) (op1, op2, op0);
15073 if (pat)
15074 emit_insn (pat);
15075 return NULL_RTX;
15078 static rtx
15079 paired_expand_stv_builtin (enum insn_code icode, tree exp)
15081 tree arg0 = CALL_EXPR_ARG (exp, 0);
15082 tree arg1 = CALL_EXPR_ARG (exp, 1);
15083 tree arg2 = CALL_EXPR_ARG (exp, 2);
15084 rtx op0 = expand_normal (arg0);
15085 rtx op1 = expand_normal (arg1);
15086 rtx op2 = expand_normal (arg2);
15087 rtx pat, addr;
15088 machine_mode tmode = insn_data[icode].operand[0].mode;
15089 machine_mode mode1 = Pmode;
15090 machine_mode mode2 = Pmode;
15092 /* Invalid arguments. Bail before doing anything stoopid! */
15093 if (arg0 == error_mark_node
15094 || arg1 == error_mark_node
15095 || arg2 == error_mark_node)
15096 return const0_rtx;
15098 if (! (*insn_data[icode].operand[1].predicate) (op0, tmode))
15099 op0 = copy_to_mode_reg (tmode, op0);
15101 op2 = copy_to_mode_reg (mode2, op2);
15103 if (op1 == const0_rtx)
15105 addr = gen_rtx_MEM (tmode, op2);
15107 else
15109 op1 = copy_to_mode_reg (mode1, op1);
15110 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
15113 pat = GEN_FCN (icode) (addr, op0);
15114 if (pat)
15115 emit_insn (pat);
15116 return NULL_RTX;
15119 static rtx
15120 altivec_expand_stxvl_builtin (enum insn_code icode, tree exp)
15122 rtx pat;
15123 tree arg0 = CALL_EXPR_ARG (exp, 0);
15124 tree arg1 = CALL_EXPR_ARG (exp, 1);
15125 tree arg2 = CALL_EXPR_ARG (exp, 2);
15126 rtx op0 = expand_normal (arg0);
15127 rtx op1 = expand_normal (arg1);
15128 rtx op2 = expand_normal (arg2);
15129 machine_mode mode0 = insn_data[icode].operand[0].mode;
15130 machine_mode mode1 = insn_data[icode].operand[1].mode;
15131 machine_mode mode2 = insn_data[icode].operand[2].mode;
15133 if (icode == CODE_FOR_nothing)
15134 /* Builtin not supported on this processor. */
15135 return NULL_RTX;
15137 /* If we got invalid arguments bail out before generating bad rtl. */
15138 if (arg0 == error_mark_node
15139 || arg1 == error_mark_node
15140 || arg2 == error_mark_node)
15141 return NULL_RTX;
15143 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15144 op0 = copy_to_mode_reg (mode0, op0);
15145 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15146 op1 = copy_to_mode_reg (mode1, op1);
15147 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15148 op2 = copy_to_mode_reg (mode2, op2);
15150 pat = GEN_FCN (icode) (op0, op1, op2);
15151 if (pat)
15152 emit_insn (pat);
15154 return NULL_RTX;
15157 static rtx
15158 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
15160 tree arg0 = CALL_EXPR_ARG (exp, 0);
15161 tree arg1 = CALL_EXPR_ARG (exp, 1);
15162 tree arg2 = CALL_EXPR_ARG (exp, 2);
15163 rtx op0 = expand_normal (arg0);
15164 rtx op1 = expand_normal (arg1);
15165 rtx op2 = expand_normal (arg2);
15166 rtx pat, addr, rawaddr;
15167 machine_mode tmode = insn_data[icode].operand[0].mode;
15168 machine_mode smode = insn_data[icode].operand[1].mode;
15169 machine_mode mode1 = Pmode;
15170 machine_mode mode2 = Pmode;
15172 /* Invalid arguments. Bail before doing anything stoopid! */
15173 if (arg0 == error_mark_node
15174 || arg1 == error_mark_node
15175 || arg2 == error_mark_node)
15176 return const0_rtx;
15178 op2 = copy_to_mode_reg (mode2, op2);
15180 /* For STVX, express the RTL accurately by ANDing the address with -16.
15181 STVXL and STVE*X expand to use UNSPECs to hide their special behavior,
15182 so the raw address is fine. */
15183 if (icode == CODE_FOR_altivec_stvx_v2df_2op
15184 || icode == CODE_FOR_altivec_stvx_v2di_2op
15185 || icode == CODE_FOR_altivec_stvx_v4sf_2op
15186 || icode == CODE_FOR_altivec_stvx_v4si_2op
15187 || icode == CODE_FOR_altivec_stvx_v8hi_2op
15188 || icode == CODE_FOR_altivec_stvx_v16qi_2op)
15190 if (op1 == const0_rtx)
15191 rawaddr = op2;
15192 else
15194 op1 = copy_to_mode_reg (mode1, op1);
15195 rawaddr = gen_rtx_PLUS (Pmode, op2, op1);
15198 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
15199 addr = gen_rtx_MEM (tmode, addr);
15201 op0 = copy_to_mode_reg (tmode, op0);
15203 /* For -maltivec=be, emit a permute to swap the elements, followed
15204 by the store. */
15205 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
15207 rtx temp = gen_reg_rtx (tmode);
15208 rtx sel = swap_selector_for_mode (tmode);
15209 rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, op0, op0, sel),
15210 UNSPEC_VPERM);
15211 emit_insn (gen_rtx_SET (temp, vperm));
15212 emit_insn (gen_rtx_SET (addr, temp));
15214 else
15215 emit_insn (gen_rtx_SET (addr, op0));
15217 else
15219 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
15220 op0 = copy_to_mode_reg (smode, op0);
15222 if (op1 == const0_rtx)
15223 addr = gen_rtx_MEM (tmode, op2);
15224 else
15226 op1 = copy_to_mode_reg (mode1, op1);
15227 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op2, op1));
15230 pat = GEN_FCN (icode) (addr, op0);
15231 if (pat)
15232 emit_insn (pat);
15235 return NULL_RTX;
15238 /* Return the appropriate SPR number associated with the given builtin. */
15239 static inline HOST_WIDE_INT
15240 htm_spr_num (enum rs6000_builtins code)
15242 if (code == HTM_BUILTIN_GET_TFHAR
15243 || code == HTM_BUILTIN_SET_TFHAR)
15244 return TFHAR_SPR;
15245 else if (code == HTM_BUILTIN_GET_TFIAR
15246 || code == HTM_BUILTIN_SET_TFIAR)
15247 return TFIAR_SPR;
15248 else if (code == HTM_BUILTIN_GET_TEXASR
15249 || code == HTM_BUILTIN_SET_TEXASR)
15250 return TEXASR_SPR;
15251 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
15252 || code == HTM_BUILTIN_SET_TEXASRU);
15253 return TEXASRU_SPR;
15256 /* Return the appropriate SPR regno associated with the given builtin. */
15257 static inline HOST_WIDE_INT
15258 htm_spr_regno (enum rs6000_builtins code)
15260 if (code == HTM_BUILTIN_GET_TFHAR
15261 || code == HTM_BUILTIN_SET_TFHAR)
15262 return TFHAR_REGNO;
15263 else if (code == HTM_BUILTIN_GET_TFIAR
15264 || code == HTM_BUILTIN_SET_TFIAR)
15265 return TFIAR_REGNO;
15266 gcc_assert (code == HTM_BUILTIN_GET_TEXASR
15267 || code == HTM_BUILTIN_SET_TEXASR
15268 || code == HTM_BUILTIN_GET_TEXASRU
15269 || code == HTM_BUILTIN_SET_TEXASRU);
15270 return TEXASR_REGNO;
15273 /* Return the correct ICODE value depending on whether we are
15274 setting or reading the HTM SPRs. */
15275 static inline enum insn_code
15276 rs6000_htm_spr_icode (bool nonvoid)
15278 if (nonvoid)
15279 return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
15280 else
15281 return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
15284 /* Expand the HTM builtin in EXP and store the result in TARGET.
15285 Store true in *EXPANDEDP if we found a builtin to expand. */
15286 static rtx
15287 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
15289 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15290 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
15291 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15292 const struct builtin_description *d;
15293 size_t i;
15295 *expandedp = true;
15297 if (!TARGET_POWERPC64
15298 && (fcode == HTM_BUILTIN_TABORTDC
15299 || fcode == HTM_BUILTIN_TABORTDCI))
15301 size_t uns_fcode = (size_t)fcode;
15302 const char *name = rs6000_builtin_info[uns_fcode].name;
15303 error ("builtin %s is only valid in 64-bit mode", name);
15304 return const0_rtx;
15307 /* Expand the HTM builtins. */
15308 d = bdesc_htm;
15309 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
15310 if (d->code == fcode)
15312 rtx op[MAX_HTM_OPERANDS], pat;
15313 int nopnds = 0;
15314 tree arg;
15315 call_expr_arg_iterator iter;
15316 unsigned attr = rs6000_builtin_info[fcode].attr;
15317 enum insn_code icode = d->icode;
15318 const struct insn_operand_data *insn_op;
15319 bool uses_spr = (attr & RS6000_BTC_SPR);
15320 rtx cr = NULL_RTX;
15322 if (uses_spr)
15323 icode = rs6000_htm_spr_icode (nonvoid);
15324 insn_op = &insn_data[icode].operand[0];
15326 if (nonvoid)
15328 machine_mode tmode = (uses_spr) ? insn_op->mode : SImode;
15329 if (!target
15330 || GET_MODE (target) != tmode
15331 || (uses_spr && !(*insn_op->predicate) (target, tmode)))
15332 target = gen_reg_rtx (tmode);
15333 if (uses_spr)
15334 op[nopnds++] = target;
15337 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
15339 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
15340 return const0_rtx;
15342 insn_op = &insn_data[icode].operand[nopnds];
15344 op[nopnds] = expand_normal (arg);
15346 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
15348 if (!strcmp (insn_op->constraint, "n"))
15350 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
15351 if (!CONST_INT_P (op[nopnds]))
15352 error ("argument %d must be an unsigned literal", arg_num);
15353 else
15354 error ("argument %d is an unsigned literal that is "
15355 "out of range", arg_num);
15356 return const0_rtx;
15358 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
15361 nopnds++;
15364 /* Handle the builtins for extended mnemonics. These accept
15365 no arguments, but map to builtins that take arguments. */
15366 switch (fcode)
15368 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
15369 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
15370 op[nopnds++] = GEN_INT (1);
15371 if (flag_checking)
15372 attr |= RS6000_BTC_UNARY;
15373 break;
15374 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
15375 op[nopnds++] = GEN_INT (0);
15376 if (flag_checking)
15377 attr |= RS6000_BTC_UNARY;
15378 break;
15379 default:
15380 break;
15383 /* If this builtin accesses SPRs, then pass in the appropriate
15384 SPR number and SPR regno as the last two operands. */
15385 if (uses_spr)
15387 machine_mode mode = (TARGET_POWERPC64) ? DImode : SImode;
15388 op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode));
15389 op[nopnds++] = gen_rtx_REG (mode, htm_spr_regno (fcode));
15391 /* If this builtin accesses a CR, then pass in a scratch
15392 CR as the last operand. */
15393 else if (attr & RS6000_BTC_CR)
15394 { cr = gen_reg_rtx (CCmode);
15395 op[nopnds++] = cr;
15398 if (flag_checking)
15400 int expected_nopnds = 0;
15401 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
15402 expected_nopnds = 1;
15403 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
15404 expected_nopnds = 2;
15405 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
15406 expected_nopnds = 3;
15407 if (!(attr & RS6000_BTC_VOID))
15408 expected_nopnds += 1;
15409 if (uses_spr)
15410 expected_nopnds += 2;
15412 gcc_assert (nopnds == expected_nopnds
15413 && nopnds <= MAX_HTM_OPERANDS);
15416 switch (nopnds)
15418 case 1:
15419 pat = GEN_FCN (icode) (op[0]);
15420 break;
15421 case 2:
15422 pat = GEN_FCN (icode) (op[0], op[1]);
15423 break;
15424 case 3:
15425 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
15426 break;
15427 case 4:
15428 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
15429 break;
15430 default:
15431 gcc_unreachable ();
15433 if (!pat)
15434 return NULL_RTX;
15435 emit_insn (pat);
15437 if (attr & RS6000_BTC_CR)
15439 if (fcode == HTM_BUILTIN_TBEGIN)
15441 /* Emit code to set TARGET to true or false depending on
15442 whether the tbegin. instruction successfully or failed
15443 to start a transaction. We do this by placing the 1's
15444 complement of CR's EQ bit into TARGET. */
15445 rtx scratch = gen_reg_rtx (SImode);
15446 emit_insn (gen_rtx_SET (scratch,
15447 gen_rtx_EQ (SImode, cr,
15448 const0_rtx)));
15449 emit_insn (gen_rtx_SET (target,
15450 gen_rtx_XOR (SImode, scratch,
15451 GEN_INT (1))));
15453 else
15455 /* Emit code to copy the 4-bit condition register field
15456 CR into the least significant end of register TARGET. */
15457 rtx scratch1 = gen_reg_rtx (SImode);
15458 rtx scratch2 = gen_reg_rtx (SImode);
15459 rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0);
15460 emit_insn (gen_movcc (subreg, cr));
15461 emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28)));
15462 emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf)));
15466 if (nonvoid)
15467 return target;
15468 return const0_rtx;
15471 *expandedp = false;
15472 return NULL_RTX;
15475 /* Expand the CPU builtin in FCODE and store the result in TARGET. */
15477 static rtx
15478 cpu_expand_builtin (enum rs6000_builtins fcode, tree exp ATTRIBUTE_UNUSED,
15479 rtx target)
15481 /* __builtin_cpu_init () is a nop, so expand to nothing. */
15482 if (fcode == RS6000_BUILTIN_CPU_INIT)
15483 return const0_rtx;
15485 if (target == 0 || GET_MODE (target) != SImode)
15486 target = gen_reg_rtx (SImode);
15488 #ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
15489 tree arg = TREE_OPERAND (CALL_EXPR_ARG (exp, 0), 0);
15490 if (TREE_CODE (arg) != STRING_CST)
15492 error ("builtin %s only accepts a string argument",
15493 rs6000_builtin_info[(size_t) fcode].name);
15494 return const0_rtx;
15497 if (fcode == RS6000_BUILTIN_CPU_IS)
15499 const char *cpu = TREE_STRING_POINTER (arg);
15500 rtx cpuid = NULL_RTX;
15501 for (size_t i = 0; i < ARRAY_SIZE (cpu_is_info); i++)
15502 if (strcmp (cpu, cpu_is_info[i].cpu) == 0)
15504 /* The CPUID value in the TCB is offset by _DL_FIRST_PLATFORM. */
15505 cpuid = GEN_INT (cpu_is_info[i].cpuid + _DL_FIRST_PLATFORM);
15506 break;
15508 if (cpuid == NULL_RTX)
15510 /* Invalid CPU argument. */
15511 error ("cpu %s is an invalid argument to builtin %s",
15512 cpu, rs6000_builtin_info[(size_t) fcode].name);
15513 return const0_rtx;
15516 rtx platform = gen_reg_rtx (SImode);
15517 rtx tcbmem = gen_const_mem (SImode,
15518 gen_rtx_PLUS (Pmode,
15519 gen_rtx_REG (Pmode, TLS_REGNUM),
15520 GEN_INT (TCB_PLATFORM_OFFSET)));
15521 emit_move_insn (platform, tcbmem);
15522 emit_insn (gen_eqsi3 (target, platform, cpuid));
15524 else if (fcode == RS6000_BUILTIN_CPU_SUPPORTS)
15526 const char *hwcap = TREE_STRING_POINTER (arg);
15527 rtx mask = NULL_RTX;
15528 int hwcap_offset;
15529 for (size_t i = 0; i < ARRAY_SIZE (cpu_supports_info); i++)
15530 if (strcmp (hwcap, cpu_supports_info[i].hwcap) == 0)
15532 mask = GEN_INT (cpu_supports_info[i].mask);
15533 hwcap_offset = TCB_HWCAP_OFFSET (cpu_supports_info[i].id);
15534 break;
15536 if (mask == NULL_RTX)
15538 /* Invalid HWCAP argument. */
15539 error ("hwcap %s is an invalid argument to builtin %s",
15540 hwcap, rs6000_builtin_info[(size_t) fcode].name);
15541 return const0_rtx;
15544 rtx tcb_hwcap = gen_reg_rtx (SImode);
15545 rtx tcbmem = gen_const_mem (SImode,
15546 gen_rtx_PLUS (Pmode,
15547 gen_rtx_REG (Pmode, TLS_REGNUM),
15548 GEN_INT (hwcap_offset)));
15549 emit_move_insn (tcb_hwcap, tcbmem);
15550 rtx scratch1 = gen_reg_rtx (SImode);
15551 emit_insn (gen_rtx_SET (scratch1, gen_rtx_AND (SImode, tcb_hwcap, mask)));
15552 rtx scratch2 = gen_reg_rtx (SImode);
15553 emit_insn (gen_eqsi3 (scratch2, scratch1, const0_rtx));
15554 emit_insn (gen_rtx_SET (target, gen_rtx_XOR (SImode, scratch2, const1_rtx)));
15557 /* Record that we have expanded a CPU builtin, so that we can later
15558 emit a reference to the special symbol exported by LIBC to ensure we
15559 do not link against an old LIBC that doesn't support this feature. */
15560 cpu_builtin_p = true;
15562 #else
15563 /* For old LIBCs, always return FALSE. */
15564 emit_move_insn (target, GEN_INT (0));
15565 #endif /* TARGET_LIBC_PROVIDES_HWCAP_IN_TCB */
15567 return target;
15570 static rtx
15571 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
15573 rtx pat;
15574 tree arg0 = CALL_EXPR_ARG (exp, 0);
15575 tree arg1 = CALL_EXPR_ARG (exp, 1);
15576 tree arg2 = CALL_EXPR_ARG (exp, 2);
15577 rtx op0 = expand_normal (arg0);
15578 rtx op1 = expand_normal (arg1);
15579 rtx op2 = expand_normal (arg2);
15580 machine_mode tmode = insn_data[icode].operand[0].mode;
15581 machine_mode mode0 = insn_data[icode].operand[1].mode;
15582 machine_mode mode1 = insn_data[icode].operand[2].mode;
15583 machine_mode mode2 = insn_data[icode].operand[3].mode;
15585 if (icode == CODE_FOR_nothing)
15586 /* Builtin not supported on this processor. */
15587 return 0;
15589 /* If we got invalid arguments bail out before generating bad rtl. */
15590 if (arg0 == error_mark_node
15591 || arg1 == error_mark_node
15592 || arg2 == error_mark_node)
15593 return const0_rtx;
15595 /* Check and prepare argument depending on the instruction code.
15597 Note that a switch statement instead of the sequence of tests
15598 would be incorrect as many of the CODE_FOR values could be
15599 CODE_FOR_nothing and that would yield multiple alternatives
15600 with identical values. We'd never reach here at runtime in
15601 this case. */
15602 if (icode == CODE_FOR_altivec_vsldoi_v4sf
15603 || icode == CODE_FOR_altivec_vsldoi_v2df
15604 || icode == CODE_FOR_altivec_vsldoi_v4si
15605 || icode == CODE_FOR_altivec_vsldoi_v8hi
15606 || icode == CODE_FOR_altivec_vsldoi_v16qi)
15608 /* Only allow 4-bit unsigned literals. */
15609 STRIP_NOPS (arg2);
15610 if (TREE_CODE (arg2) != INTEGER_CST
15611 || TREE_INT_CST_LOW (arg2) & ~0xf)
15613 error ("argument 3 must be a 4-bit unsigned literal");
15614 return CONST0_RTX (tmode);
15617 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
15618 || icode == CODE_FOR_vsx_xxpermdi_v2di
15619 || icode == CODE_FOR_vsx_xxpermdi_v2df_be
15620 || icode == CODE_FOR_vsx_xxpermdi_v2di_be
15621 || icode == CODE_FOR_vsx_xxpermdi_v1ti
15622 || icode == CODE_FOR_vsx_xxpermdi_v4sf
15623 || icode == CODE_FOR_vsx_xxpermdi_v4si
15624 || icode == CODE_FOR_vsx_xxpermdi_v8hi
15625 || icode == CODE_FOR_vsx_xxpermdi_v16qi
15626 || icode == CODE_FOR_vsx_xxsldwi_v16qi
15627 || icode == CODE_FOR_vsx_xxsldwi_v8hi
15628 || icode == CODE_FOR_vsx_xxsldwi_v4si
15629 || icode == CODE_FOR_vsx_xxsldwi_v4sf
15630 || icode == CODE_FOR_vsx_xxsldwi_v2di
15631 || icode == CODE_FOR_vsx_xxsldwi_v2df)
15633 /* Only allow 2-bit unsigned literals. */
15634 STRIP_NOPS (arg2);
15635 if (TREE_CODE (arg2) != INTEGER_CST
15636 || TREE_INT_CST_LOW (arg2) & ~0x3)
15638 error ("argument 3 must be a 2-bit unsigned literal");
15639 return CONST0_RTX (tmode);
15642 else if (icode == CODE_FOR_vsx_set_v2df
15643 || icode == CODE_FOR_vsx_set_v2di
15644 || icode == CODE_FOR_bcdadd
15645 || icode == CODE_FOR_bcdadd_lt
15646 || icode == CODE_FOR_bcdadd_eq
15647 || icode == CODE_FOR_bcdadd_gt
15648 || icode == CODE_FOR_bcdsub
15649 || icode == CODE_FOR_bcdsub_lt
15650 || icode == CODE_FOR_bcdsub_eq
15651 || icode == CODE_FOR_bcdsub_gt)
15653 /* Only allow 1-bit unsigned literals. */
15654 STRIP_NOPS (arg2);
15655 if (TREE_CODE (arg2) != INTEGER_CST
15656 || TREE_INT_CST_LOW (arg2) & ~0x1)
15658 error ("argument 3 must be a 1-bit unsigned literal");
15659 return CONST0_RTX (tmode);
15662 else if (icode == CODE_FOR_dfp_ddedpd_dd
15663 || icode == CODE_FOR_dfp_ddedpd_td)
15665 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
15666 STRIP_NOPS (arg0);
15667 if (TREE_CODE (arg0) != INTEGER_CST
15668 || TREE_INT_CST_LOW (arg2) & ~0x3)
15670 error ("argument 1 must be 0 or 2");
15671 return CONST0_RTX (tmode);
15674 else if (icode == CODE_FOR_dfp_denbcd_dd
15675 || icode == CODE_FOR_dfp_denbcd_td)
15677 /* Only allow 1-bit unsigned literals. */
15678 STRIP_NOPS (arg0);
15679 if (TREE_CODE (arg0) != INTEGER_CST
15680 || TREE_INT_CST_LOW (arg0) & ~0x1)
15682 error ("argument 1 must be a 1-bit unsigned literal");
15683 return CONST0_RTX (tmode);
15686 else if (icode == CODE_FOR_dfp_dscli_dd
15687 || icode == CODE_FOR_dfp_dscli_td
15688 || icode == CODE_FOR_dfp_dscri_dd
15689 || icode == CODE_FOR_dfp_dscri_td)
15691 /* Only allow 6-bit unsigned literals. */
15692 STRIP_NOPS (arg1);
15693 if (TREE_CODE (arg1) != INTEGER_CST
15694 || TREE_INT_CST_LOW (arg1) & ~0x3f)
15696 error ("argument 2 must be a 6-bit unsigned literal");
15697 return CONST0_RTX (tmode);
15700 else if (icode == CODE_FOR_crypto_vshasigmaw
15701 || icode == CODE_FOR_crypto_vshasigmad)
15703 /* Check whether the 2nd and 3rd arguments are integer constants and in
15704 range and prepare arguments. */
15705 STRIP_NOPS (arg1);
15706 if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (arg1, 2))
15708 error ("argument 2 must be 0 or 1");
15709 return CONST0_RTX (tmode);
15712 STRIP_NOPS (arg2);
15713 if (TREE_CODE (arg2) != INTEGER_CST || wi::geu_p (arg2, 16))
15715 error ("argument 3 must be in the range 0..15");
15716 return CONST0_RTX (tmode);
15720 if (target == 0
15721 || GET_MODE (target) != tmode
15722 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15723 target = gen_reg_rtx (tmode);
15725 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15726 op0 = copy_to_mode_reg (mode0, op0);
15727 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15728 op1 = copy_to_mode_reg (mode1, op1);
15729 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15730 op2 = copy_to_mode_reg (mode2, op2);
15732 if (TARGET_PAIRED_FLOAT && icode == CODE_FOR_selv2sf4)
15733 pat = GEN_FCN (icode) (target, op0, op1, op2, CONST0_RTX (SFmode));
15734 else
15735 pat = GEN_FCN (icode) (target, op0, op1, op2);
15736 if (! pat)
15737 return 0;
15738 emit_insn (pat);
15740 return target;
15743 /* Expand the lvx builtins. */
15744 static rtx
15745 altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp)
15747 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15748 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
15749 tree arg0;
15750 machine_mode tmode, mode0;
15751 rtx pat, op0;
15752 enum insn_code icode;
15754 switch (fcode)
15756 case ALTIVEC_BUILTIN_LD_INTERNAL_16qi:
15757 icode = CODE_FOR_vector_altivec_load_v16qi;
15758 break;
15759 case ALTIVEC_BUILTIN_LD_INTERNAL_8hi:
15760 icode = CODE_FOR_vector_altivec_load_v8hi;
15761 break;
15762 case ALTIVEC_BUILTIN_LD_INTERNAL_4si:
15763 icode = CODE_FOR_vector_altivec_load_v4si;
15764 break;
15765 case ALTIVEC_BUILTIN_LD_INTERNAL_4sf:
15766 icode = CODE_FOR_vector_altivec_load_v4sf;
15767 break;
15768 case ALTIVEC_BUILTIN_LD_INTERNAL_2df:
15769 icode = CODE_FOR_vector_altivec_load_v2df;
15770 break;
15771 case ALTIVEC_BUILTIN_LD_INTERNAL_2di:
15772 icode = CODE_FOR_vector_altivec_load_v2di;
15773 break;
15774 case ALTIVEC_BUILTIN_LD_INTERNAL_1ti:
15775 icode = CODE_FOR_vector_altivec_load_v1ti;
15776 break;
15777 default:
15778 *expandedp = false;
15779 return NULL_RTX;
15782 *expandedp = true;
15784 arg0 = CALL_EXPR_ARG (exp, 0);
15785 op0 = expand_normal (arg0);
15786 tmode = insn_data[icode].operand[0].mode;
15787 mode0 = insn_data[icode].operand[1].mode;
15789 if (target == 0
15790 || GET_MODE (target) != tmode
15791 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15792 target = gen_reg_rtx (tmode);
15794 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15795 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15797 pat = GEN_FCN (icode) (target, op0);
15798 if (! pat)
15799 return 0;
15800 emit_insn (pat);
15801 return target;
15804 /* Expand the stvx builtins. */
15805 static rtx
15806 altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
15807 bool *expandedp)
15809 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15810 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
15811 tree arg0, arg1;
15812 machine_mode mode0, mode1;
15813 rtx pat, op0, op1;
15814 enum insn_code icode;
15816 switch (fcode)
15818 case ALTIVEC_BUILTIN_ST_INTERNAL_16qi:
15819 icode = CODE_FOR_vector_altivec_store_v16qi;
15820 break;
15821 case ALTIVEC_BUILTIN_ST_INTERNAL_8hi:
15822 icode = CODE_FOR_vector_altivec_store_v8hi;
15823 break;
15824 case ALTIVEC_BUILTIN_ST_INTERNAL_4si:
15825 icode = CODE_FOR_vector_altivec_store_v4si;
15826 break;
15827 case ALTIVEC_BUILTIN_ST_INTERNAL_4sf:
15828 icode = CODE_FOR_vector_altivec_store_v4sf;
15829 break;
15830 case ALTIVEC_BUILTIN_ST_INTERNAL_2df:
15831 icode = CODE_FOR_vector_altivec_store_v2df;
15832 break;
15833 case ALTIVEC_BUILTIN_ST_INTERNAL_2di:
15834 icode = CODE_FOR_vector_altivec_store_v2di;
15835 break;
15836 case ALTIVEC_BUILTIN_ST_INTERNAL_1ti:
15837 icode = CODE_FOR_vector_altivec_store_v1ti;
15838 break;
15839 default:
15840 *expandedp = false;
15841 return NULL_RTX;
15844 arg0 = CALL_EXPR_ARG (exp, 0);
15845 arg1 = CALL_EXPR_ARG (exp, 1);
15846 op0 = expand_normal (arg0);
15847 op1 = expand_normal (arg1);
15848 mode0 = insn_data[icode].operand[0].mode;
15849 mode1 = insn_data[icode].operand[1].mode;
15851 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15852 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15853 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
15854 op1 = copy_to_mode_reg (mode1, op1);
15856 pat = GEN_FCN (icode) (op0, op1);
15857 if (pat)
15858 emit_insn (pat);
15860 *expandedp = true;
15861 return NULL_RTX;
15864 /* Expand the dst builtins. */
15865 static rtx
15866 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
15867 bool *expandedp)
15869 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15870 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15871 tree arg0, arg1, arg2;
15872 machine_mode mode0, mode1;
15873 rtx pat, op0, op1, op2;
15874 const struct builtin_description *d;
15875 size_t i;
15877 *expandedp = false;
15879 /* Handle DST variants. */
15880 d = bdesc_dst;
15881 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
15882 if (d->code == fcode)
15884 arg0 = CALL_EXPR_ARG (exp, 0);
15885 arg1 = CALL_EXPR_ARG (exp, 1);
15886 arg2 = CALL_EXPR_ARG (exp, 2);
15887 op0 = expand_normal (arg0);
15888 op1 = expand_normal (arg1);
15889 op2 = expand_normal (arg2);
15890 mode0 = insn_data[d->icode].operand[0].mode;
15891 mode1 = insn_data[d->icode].operand[1].mode;
15893 /* Invalid arguments, bail out before generating bad rtl. */
15894 if (arg0 == error_mark_node
15895 || arg1 == error_mark_node
15896 || arg2 == error_mark_node)
15897 return const0_rtx;
15899 *expandedp = true;
15900 STRIP_NOPS (arg2);
15901 if (TREE_CODE (arg2) != INTEGER_CST
15902 || TREE_INT_CST_LOW (arg2) & ~0x3)
15904 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
15905 return const0_rtx;
15908 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
15909 op0 = copy_to_mode_reg (Pmode, op0);
15910 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
15911 op1 = copy_to_mode_reg (mode1, op1);
15913 pat = GEN_FCN (d->icode) (op0, op1, op2);
15914 if (pat != 0)
15915 emit_insn (pat);
15917 return NULL_RTX;
15920 return NULL_RTX;
15923 /* Expand vec_init builtin. */
15924 static rtx
15925 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
15927 machine_mode tmode = TYPE_MODE (type);
15928 machine_mode inner_mode = GET_MODE_INNER (tmode);
15929 int i, n_elt = GET_MODE_NUNITS (tmode);
15931 gcc_assert (VECTOR_MODE_P (tmode));
15932 gcc_assert (n_elt == call_expr_nargs (exp));
15934 if (!target || !register_operand (target, tmode))
15935 target = gen_reg_rtx (tmode);
15937 /* If we have a vector compromised of a single element, such as V1TImode, do
15938 the initialization directly. */
15939 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
15941 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
15942 emit_move_insn (target, gen_lowpart (tmode, x));
15944 else
15946 rtvec v = rtvec_alloc (n_elt);
15948 for (i = 0; i < n_elt; ++i)
15950 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
15951 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
15954 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
15957 return target;
15960 /* Return the integer constant in ARG. Constrain it to be in the range
15961 of the subparts of VEC_TYPE; issue an error if not. */
15963 static int
15964 get_element_number (tree vec_type, tree arg)
15966 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
15968 if (!tree_fits_uhwi_p (arg)
15969 || (elt = tree_to_uhwi (arg), elt > max))
15971 error ("selector must be an integer constant in the range 0..%wi", max);
15972 return 0;
15975 return elt;
15978 /* Expand vec_set builtin. */
15979 static rtx
15980 altivec_expand_vec_set_builtin (tree exp)
15982 machine_mode tmode, mode1;
15983 tree arg0, arg1, arg2;
15984 int elt;
15985 rtx op0, op1;
15987 arg0 = CALL_EXPR_ARG (exp, 0);
15988 arg1 = CALL_EXPR_ARG (exp, 1);
15989 arg2 = CALL_EXPR_ARG (exp, 2);
15991 tmode = TYPE_MODE (TREE_TYPE (arg0));
15992 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15993 gcc_assert (VECTOR_MODE_P (tmode));
15995 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
15996 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
15997 elt = get_element_number (TREE_TYPE (arg0), arg2);
15999 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
16000 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
16002 op0 = force_reg (tmode, op0);
16003 op1 = force_reg (mode1, op1);
16005 rs6000_expand_vector_set (op0, op1, elt);
16007 return op0;
16010 /* Expand vec_ext builtin. */
16011 static rtx
16012 altivec_expand_vec_ext_builtin (tree exp, rtx target)
16014 machine_mode tmode, mode0;
16015 tree arg0, arg1;
16016 rtx op0;
16017 rtx op1;
16019 arg0 = CALL_EXPR_ARG (exp, 0);
16020 arg1 = CALL_EXPR_ARG (exp, 1);
16022 op0 = expand_normal (arg0);
16023 op1 = expand_normal (arg1);
16025 /* Call get_element_number to validate arg1 if it is a constant. */
16026 if (TREE_CODE (arg1) == INTEGER_CST)
16027 (void) get_element_number (TREE_TYPE (arg0), arg1);
16029 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16030 mode0 = TYPE_MODE (TREE_TYPE (arg0));
16031 gcc_assert (VECTOR_MODE_P (mode0));
16033 op0 = force_reg (mode0, op0);
16035 if (optimize || !target || !register_operand (target, tmode))
16036 target = gen_reg_rtx (tmode);
16038 rs6000_expand_vector_extract (target, op0, op1);
16040 return target;
16043 /* Expand the builtin in EXP and store the result in TARGET. Store
16044 true in *EXPANDEDP if we found a builtin to expand. */
16045 static rtx
16046 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
16048 const struct builtin_description *d;
16049 size_t i;
16050 enum insn_code icode;
16051 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16052 tree arg0, arg1, arg2;
16053 rtx op0, pat;
16054 machine_mode tmode, mode0;
16055 enum rs6000_builtins fcode
16056 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
16058 if (rs6000_overloaded_builtin_p (fcode))
16060 *expandedp = true;
16061 error ("unresolved overload for Altivec builtin %qF", fndecl);
16063 /* Given it is invalid, just generate a normal call. */
16064 return expand_call (exp, target, false);
16067 target = altivec_expand_ld_builtin (exp, target, expandedp);
16068 if (*expandedp)
16069 return target;
16071 target = altivec_expand_st_builtin (exp, target, expandedp);
16072 if (*expandedp)
16073 return target;
16075 target = altivec_expand_dst_builtin (exp, target, expandedp);
16076 if (*expandedp)
16077 return target;
16079 *expandedp = true;
16081 switch (fcode)
16083 case ALTIVEC_BUILTIN_STVX_V2DF:
16084 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df_2op, exp);
16085 case ALTIVEC_BUILTIN_STVX_V2DI:
16086 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di_2op, exp);
16087 case ALTIVEC_BUILTIN_STVX_V4SF:
16088 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf_2op, exp);
16089 case ALTIVEC_BUILTIN_STVX:
16090 case ALTIVEC_BUILTIN_STVX_V4SI:
16091 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si_2op, exp);
16092 case ALTIVEC_BUILTIN_STVX_V8HI:
16093 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi_2op, exp);
16094 case ALTIVEC_BUILTIN_STVX_V16QI:
16095 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi_2op, exp);
16096 case ALTIVEC_BUILTIN_STVEBX:
16097 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
16098 case ALTIVEC_BUILTIN_STVEHX:
16099 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
16100 case ALTIVEC_BUILTIN_STVEWX:
16101 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
16102 case ALTIVEC_BUILTIN_STVXL_V2DF:
16103 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
16104 case ALTIVEC_BUILTIN_STVXL_V2DI:
16105 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
16106 case ALTIVEC_BUILTIN_STVXL_V4SF:
16107 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
16108 case ALTIVEC_BUILTIN_STVXL:
16109 case ALTIVEC_BUILTIN_STVXL_V4SI:
16110 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
16111 case ALTIVEC_BUILTIN_STVXL_V8HI:
16112 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
16113 case ALTIVEC_BUILTIN_STVXL_V16QI:
16114 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
16116 case ALTIVEC_BUILTIN_STVLX:
16117 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
16118 case ALTIVEC_BUILTIN_STVLXL:
16119 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
16120 case ALTIVEC_BUILTIN_STVRX:
16121 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
16122 case ALTIVEC_BUILTIN_STVRXL:
16123 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
16125 case P9V_BUILTIN_STXVL:
16126 return altivec_expand_stxvl_builtin (CODE_FOR_stxvl, exp);
16128 case VSX_BUILTIN_STXVD2X_V1TI:
16129 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
16130 case VSX_BUILTIN_STXVD2X_V2DF:
16131 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
16132 case VSX_BUILTIN_STXVD2X_V2DI:
16133 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
16134 case VSX_BUILTIN_STXVW4X_V4SF:
16135 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
16136 case VSX_BUILTIN_STXVW4X_V4SI:
16137 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
16138 case VSX_BUILTIN_STXVW4X_V8HI:
16139 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
16140 case VSX_BUILTIN_STXVW4X_V16QI:
16141 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
16143 /* For the following on big endian, it's ok to use any appropriate
16144 unaligned-supporting store, so use a generic expander. For
16145 little-endian, the exact element-reversing instruction must
16146 be used. */
16147 case VSX_BUILTIN_ST_ELEMREV_V2DF:
16149 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2df
16150 : CODE_FOR_vsx_st_elemrev_v2df);
16151 return altivec_expand_stv_builtin (code, exp);
16153 case VSX_BUILTIN_ST_ELEMREV_V2DI:
16155 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2di
16156 : CODE_FOR_vsx_st_elemrev_v2di);
16157 return altivec_expand_stv_builtin (code, exp);
16159 case VSX_BUILTIN_ST_ELEMREV_V4SF:
16161 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4sf
16162 : CODE_FOR_vsx_st_elemrev_v4sf);
16163 return altivec_expand_stv_builtin (code, exp);
16165 case VSX_BUILTIN_ST_ELEMREV_V4SI:
16167 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4si
16168 : CODE_FOR_vsx_st_elemrev_v4si);
16169 return altivec_expand_stv_builtin (code, exp);
16171 case VSX_BUILTIN_ST_ELEMREV_V8HI:
16173 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v8hi
16174 : CODE_FOR_vsx_st_elemrev_v8hi);
16175 return altivec_expand_stv_builtin (code, exp);
16177 case VSX_BUILTIN_ST_ELEMREV_V16QI:
16179 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v16qi
16180 : CODE_FOR_vsx_st_elemrev_v16qi);
16181 return altivec_expand_stv_builtin (code, exp);
16184 case ALTIVEC_BUILTIN_MFVSCR:
16185 icode = CODE_FOR_altivec_mfvscr;
16186 tmode = insn_data[icode].operand[0].mode;
16188 if (target == 0
16189 || GET_MODE (target) != tmode
16190 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16191 target = gen_reg_rtx (tmode);
16193 pat = GEN_FCN (icode) (target);
16194 if (! pat)
16195 return 0;
16196 emit_insn (pat);
16197 return target;
16199 case ALTIVEC_BUILTIN_MTVSCR:
16200 icode = CODE_FOR_altivec_mtvscr;
16201 arg0 = CALL_EXPR_ARG (exp, 0);
16202 op0 = expand_normal (arg0);
16203 mode0 = insn_data[icode].operand[0].mode;
16205 /* If we got invalid arguments bail out before generating bad rtl. */
16206 if (arg0 == error_mark_node)
16207 return const0_rtx;
16209 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16210 op0 = copy_to_mode_reg (mode0, op0);
16212 pat = GEN_FCN (icode) (op0);
16213 if (pat)
16214 emit_insn (pat);
16215 return NULL_RTX;
16217 case ALTIVEC_BUILTIN_DSSALL:
16218 emit_insn (gen_altivec_dssall ());
16219 return NULL_RTX;
16221 case ALTIVEC_BUILTIN_DSS:
16222 icode = CODE_FOR_altivec_dss;
16223 arg0 = CALL_EXPR_ARG (exp, 0);
16224 STRIP_NOPS (arg0);
16225 op0 = expand_normal (arg0);
16226 mode0 = insn_data[icode].operand[0].mode;
16228 /* If we got invalid arguments bail out before generating bad rtl. */
16229 if (arg0 == error_mark_node)
16230 return const0_rtx;
16232 if (TREE_CODE (arg0) != INTEGER_CST
16233 || TREE_INT_CST_LOW (arg0) & ~0x3)
16235 error ("argument to dss must be a 2-bit unsigned literal");
16236 return const0_rtx;
16239 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16240 op0 = copy_to_mode_reg (mode0, op0);
16242 emit_insn (gen_altivec_dss (op0));
16243 return NULL_RTX;
16245 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
16246 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
16247 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
16248 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
16249 case VSX_BUILTIN_VEC_INIT_V2DF:
16250 case VSX_BUILTIN_VEC_INIT_V2DI:
16251 case VSX_BUILTIN_VEC_INIT_V1TI:
16252 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
16254 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
16255 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
16256 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
16257 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
16258 case VSX_BUILTIN_VEC_SET_V2DF:
16259 case VSX_BUILTIN_VEC_SET_V2DI:
16260 case VSX_BUILTIN_VEC_SET_V1TI:
16261 return altivec_expand_vec_set_builtin (exp);
16263 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
16264 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
16265 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
16266 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
16267 case VSX_BUILTIN_VEC_EXT_V2DF:
16268 case VSX_BUILTIN_VEC_EXT_V2DI:
16269 case VSX_BUILTIN_VEC_EXT_V1TI:
16270 return altivec_expand_vec_ext_builtin (exp, target);
16272 case P9V_BUILTIN_VEXTRACT4B:
16273 case P9V_BUILTIN_VEC_VEXTRACT4B:
16274 arg1 = CALL_EXPR_ARG (exp, 1);
16275 STRIP_NOPS (arg1);
16277 /* Generate a normal call if it is invalid. */
16278 if (arg1 == error_mark_node)
16279 return expand_call (exp, target, false);
16281 if (TREE_CODE (arg1) != INTEGER_CST || TREE_INT_CST_LOW (arg1) > 12)
16283 error ("second argument to vec_vextract4b must be 0..12");
16284 return expand_call (exp, target, false);
16286 break;
16288 case P9V_BUILTIN_VINSERT4B:
16289 case P9V_BUILTIN_VINSERT4B_DI:
16290 case P9V_BUILTIN_VEC_VINSERT4B:
16291 arg2 = CALL_EXPR_ARG (exp, 2);
16292 STRIP_NOPS (arg2);
16294 /* Generate a normal call if it is invalid. */
16295 if (arg2 == error_mark_node)
16296 return expand_call (exp, target, false);
16298 if (TREE_CODE (arg2) != INTEGER_CST || TREE_INT_CST_LOW (arg2) > 12)
16300 error ("third argument to vec_vinsert4b must be 0..12");
16301 return expand_call (exp, target, false);
16303 break;
16305 default:
16306 break;
16307 /* Fall through. */
16310 /* Expand abs* operations. */
16311 d = bdesc_abs;
16312 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
16313 if (d->code == fcode)
16314 return altivec_expand_abs_builtin (d->icode, exp, target);
16316 /* Expand the AltiVec predicates. */
16317 d = bdesc_altivec_preds;
16318 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
16319 if (d->code == fcode)
16320 return altivec_expand_predicate_builtin (d->icode, exp, target);
16322 /* LV* are funky. We initialized them differently. */
16323 switch (fcode)
16325 case ALTIVEC_BUILTIN_LVSL:
16326 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
16327 exp, target, false);
16328 case ALTIVEC_BUILTIN_LVSR:
16329 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
16330 exp, target, false);
16331 case ALTIVEC_BUILTIN_LVEBX:
16332 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
16333 exp, target, false);
16334 case ALTIVEC_BUILTIN_LVEHX:
16335 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
16336 exp, target, false);
16337 case ALTIVEC_BUILTIN_LVEWX:
16338 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
16339 exp, target, false);
16340 case ALTIVEC_BUILTIN_LVXL_V2DF:
16341 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
16342 exp, target, false);
16343 case ALTIVEC_BUILTIN_LVXL_V2DI:
16344 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
16345 exp, target, false);
16346 case ALTIVEC_BUILTIN_LVXL_V4SF:
16347 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
16348 exp, target, false);
16349 case ALTIVEC_BUILTIN_LVXL:
16350 case ALTIVEC_BUILTIN_LVXL_V4SI:
16351 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
16352 exp, target, false);
16353 case ALTIVEC_BUILTIN_LVXL_V8HI:
16354 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
16355 exp, target, false);
16356 case ALTIVEC_BUILTIN_LVXL_V16QI:
16357 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
16358 exp, target, false);
16359 case ALTIVEC_BUILTIN_LVX_V2DF:
16360 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df_2op,
16361 exp, target, false);
16362 case ALTIVEC_BUILTIN_LVX_V2DI:
16363 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di_2op,
16364 exp, target, false);
16365 case ALTIVEC_BUILTIN_LVX_V4SF:
16366 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf_2op,
16367 exp, target, false);
16368 case ALTIVEC_BUILTIN_LVX:
16369 case ALTIVEC_BUILTIN_LVX_V4SI:
16370 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si_2op,
16371 exp, target, false);
16372 case ALTIVEC_BUILTIN_LVX_V8HI:
16373 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi_2op,
16374 exp, target, false);
16375 case ALTIVEC_BUILTIN_LVX_V16QI:
16376 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi_2op,
16377 exp, target, false);
16378 case ALTIVEC_BUILTIN_LVLX:
16379 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
16380 exp, target, true);
16381 case ALTIVEC_BUILTIN_LVLXL:
16382 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
16383 exp, target, true);
16384 case ALTIVEC_BUILTIN_LVRX:
16385 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
16386 exp, target, true);
16387 case ALTIVEC_BUILTIN_LVRXL:
16388 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
16389 exp, target, true);
16390 case VSX_BUILTIN_LXVD2X_V1TI:
16391 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
16392 exp, target, false);
16393 case VSX_BUILTIN_LXVD2X_V2DF:
16394 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
16395 exp, target, false);
16396 case VSX_BUILTIN_LXVD2X_V2DI:
16397 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
16398 exp, target, false);
16399 case VSX_BUILTIN_LXVW4X_V4SF:
16400 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
16401 exp, target, false);
16402 case VSX_BUILTIN_LXVW4X_V4SI:
16403 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
16404 exp, target, false);
16405 case VSX_BUILTIN_LXVW4X_V8HI:
16406 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
16407 exp, target, false);
16408 case VSX_BUILTIN_LXVW4X_V16QI:
16409 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
16410 exp, target, false);
16411 /* For the following on big endian, it's ok to use any appropriate
16412 unaligned-supporting load, so use a generic expander. For
16413 little-endian, the exact element-reversing instruction must
16414 be used. */
16415 case VSX_BUILTIN_LD_ELEMREV_V2DF:
16417 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2df
16418 : CODE_FOR_vsx_ld_elemrev_v2df);
16419 return altivec_expand_lv_builtin (code, exp, target, false);
16421 case VSX_BUILTIN_LD_ELEMREV_V2DI:
16423 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2di
16424 : CODE_FOR_vsx_ld_elemrev_v2di);
16425 return altivec_expand_lv_builtin (code, exp, target, false);
16427 case VSX_BUILTIN_LD_ELEMREV_V4SF:
16429 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4sf
16430 : CODE_FOR_vsx_ld_elemrev_v4sf);
16431 return altivec_expand_lv_builtin (code, exp, target, false);
16433 case VSX_BUILTIN_LD_ELEMREV_V4SI:
16435 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4si
16436 : CODE_FOR_vsx_ld_elemrev_v4si);
16437 return altivec_expand_lv_builtin (code, exp, target, false);
16439 case VSX_BUILTIN_LD_ELEMREV_V8HI:
16441 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v8hi
16442 : CODE_FOR_vsx_ld_elemrev_v8hi);
16443 return altivec_expand_lv_builtin (code, exp, target, false);
16445 case VSX_BUILTIN_LD_ELEMREV_V16QI:
16447 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v16qi
16448 : CODE_FOR_vsx_ld_elemrev_v16qi);
16449 return altivec_expand_lv_builtin (code, exp, target, false);
16451 break;
16452 default:
16453 break;
16454 /* Fall through. */
16457 *expandedp = false;
16458 return NULL_RTX;
16461 /* Expand the builtin in EXP and store the result in TARGET. Store
16462 true in *EXPANDEDP if we found a builtin to expand. */
16463 static rtx
16464 paired_expand_builtin (tree exp, rtx target, bool * expandedp)
16466 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16467 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
16468 const struct builtin_description *d;
16469 size_t i;
16471 *expandedp = true;
16473 switch (fcode)
16475 case PAIRED_BUILTIN_STX:
16476 return paired_expand_stv_builtin (CODE_FOR_paired_stx, exp);
16477 case PAIRED_BUILTIN_LX:
16478 return paired_expand_lv_builtin (CODE_FOR_paired_lx, exp, target);
16479 default:
16480 break;
16481 /* Fall through. */
16484 /* Expand the paired predicates. */
16485 d = bdesc_paired_preds;
16486 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); i++, d++)
16487 if (d->code == fcode)
16488 return paired_expand_predicate_builtin (d->icode, exp, target);
16490 *expandedp = false;
16491 return NULL_RTX;
16494 /* Binops that need to be initialized manually, but can be expanded
16495 automagically by rs6000_expand_binop_builtin. */
16496 static const struct builtin_description bdesc_2arg_spe[] =
16498 { RS6000_BTM_SPE, CODE_FOR_spe_evlddx, "__builtin_spe_evlddx", SPE_BUILTIN_EVLDDX },
16499 { RS6000_BTM_SPE, CODE_FOR_spe_evldwx, "__builtin_spe_evldwx", SPE_BUILTIN_EVLDWX },
16500 { RS6000_BTM_SPE, CODE_FOR_spe_evldhx, "__builtin_spe_evldhx", SPE_BUILTIN_EVLDHX },
16501 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhex, "__builtin_spe_evlwhex", SPE_BUILTIN_EVLWHEX },
16502 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhoux, "__builtin_spe_evlwhoux", SPE_BUILTIN_EVLWHOUX },
16503 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhosx, "__builtin_spe_evlwhosx", SPE_BUILTIN_EVLWHOSX },
16504 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplatx, "__builtin_spe_evlwwsplatx", SPE_BUILTIN_EVLWWSPLATX },
16505 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplatx, "__builtin_spe_evlwhsplatx", SPE_BUILTIN_EVLWHSPLATX },
16506 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplatx, "__builtin_spe_evlhhesplatx", SPE_BUILTIN_EVLHHESPLATX },
16507 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplatx, "__builtin_spe_evlhhousplatx", SPE_BUILTIN_EVLHHOUSPLATX },
16508 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplatx, "__builtin_spe_evlhhossplatx", SPE_BUILTIN_EVLHHOSSPLATX },
16509 { RS6000_BTM_SPE, CODE_FOR_spe_evldd, "__builtin_spe_evldd", SPE_BUILTIN_EVLDD },
16510 { RS6000_BTM_SPE, CODE_FOR_spe_evldw, "__builtin_spe_evldw", SPE_BUILTIN_EVLDW },
16511 { RS6000_BTM_SPE, CODE_FOR_spe_evldh, "__builtin_spe_evldh", SPE_BUILTIN_EVLDH },
16512 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhe, "__builtin_spe_evlwhe", SPE_BUILTIN_EVLWHE },
16513 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhou, "__builtin_spe_evlwhou", SPE_BUILTIN_EVLWHOU },
16514 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhos, "__builtin_spe_evlwhos", SPE_BUILTIN_EVLWHOS },
16515 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplat, "__builtin_spe_evlwwsplat", SPE_BUILTIN_EVLWWSPLAT },
16516 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplat, "__builtin_spe_evlwhsplat", SPE_BUILTIN_EVLWHSPLAT },
16517 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplat, "__builtin_spe_evlhhesplat", SPE_BUILTIN_EVLHHESPLAT },
16518 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplat, "__builtin_spe_evlhhousplat", SPE_BUILTIN_EVLHHOUSPLAT },
16519 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplat, "__builtin_spe_evlhhossplat", SPE_BUILTIN_EVLHHOSSPLAT }
16522 /* Expand the builtin in EXP and store the result in TARGET. Store
16523 true in *EXPANDEDP if we found a builtin to expand.
16525 This expands the SPE builtins that are not simple unary and binary
16526 operations. */
16527 static rtx
16528 spe_expand_builtin (tree exp, rtx target, bool *expandedp)
16530 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16531 tree arg1, arg0;
16532 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
16533 enum insn_code icode;
16534 machine_mode tmode, mode0;
16535 rtx pat, op0;
16536 const struct builtin_description *d;
16537 size_t i;
16539 *expandedp = true;
16541 /* Syntax check for a 5-bit unsigned immediate. */
16542 switch (fcode)
16544 case SPE_BUILTIN_EVSTDD:
16545 case SPE_BUILTIN_EVSTDH:
16546 case SPE_BUILTIN_EVSTDW:
16547 case SPE_BUILTIN_EVSTWHE:
16548 case SPE_BUILTIN_EVSTWHO:
16549 case SPE_BUILTIN_EVSTWWE:
16550 case SPE_BUILTIN_EVSTWWO:
16551 arg1 = CALL_EXPR_ARG (exp, 2);
16552 if (TREE_CODE (arg1) != INTEGER_CST
16553 || TREE_INT_CST_LOW (arg1) & ~0x1f)
16555 error ("argument 2 must be a 5-bit unsigned literal");
16556 return const0_rtx;
16558 break;
16559 default:
16560 break;
16563 /* The evsplat*i instructions are not quite generic. */
16564 switch (fcode)
16566 case SPE_BUILTIN_EVSPLATFI:
16567 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplatfi,
16568 exp, target);
16569 case SPE_BUILTIN_EVSPLATI:
16570 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplati,
16571 exp, target);
16572 default:
16573 break;
16576 d = bdesc_2arg_spe;
16577 for (i = 0; i < ARRAY_SIZE (bdesc_2arg_spe); ++i, ++d)
16578 if (d->code == fcode)
16579 return rs6000_expand_binop_builtin (d->icode, exp, target);
16581 d = bdesc_spe_predicates;
16582 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, ++d)
16583 if (d->code == fcode)
16584 return spe_expand_predicate_builtin (d->icode, exp, target);
16586 d = bdesc_spe_evsel;
16587 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, ++d)
16588 if (d->code == fcode)
16589 return spe_expand_evsel_builtin (d->icode, exp, target);
16591 switch (fcode)
16593 case SPE_BUILTIN_EVSTDDX:
16594 return spe_expand_stv_builtin (CODE_FOR_spe_evstddx, exp);
16595 case SPE_BUILTIN_EVSTDHX:
16596 return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx, exp);
16597 case SPE_BUILTIN_EVSTDWX:
16598 return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx, exp);
16599 case SPE_BUILTIN_EVSTWHEX:
16600 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex, exp);
16601 case SPE_BUILTIN_EVSTWHOX:
16602 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox, exp);
16603 case SPE_BUILTIN_EVSTWWEX:
16604 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex, exp);
16605 case SPE_BUILTIN_EVSTWWOX:
16606 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox, exp);
16607 case SPE_BUILTIN_EVSTDD:
16608 return spe_expand_stv_builtin (CODE_FOR_spe_evstdd, exp);
16609 case SPE_BUILTIN_EVSTDH:
16610 return spe_expand_stv_builtin (CODE_FOR_spe_evstdh, exp);
16611 case SPE_BUILTIN_EVSTDW:
16612 return spe_expand_stv_builtin (CODE_FOR_spe_evstdw, exp);
16613 case SPE_BUILTIN_EVSTWHE:
16614 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe, exp);
16615 case SPE_BUILTIN_EVSTWHO:
16616 return spe_expand_stv_builtin (CODE_FOR_spe_evstwho, exp);
16617 case SPE_BUILTIN_EVSTWWE:
16618 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe, exp);
16619 case SPE_BUILTIN_EVSTWWO:
16620 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo, exp);
16621 case SPE_BUILTIN_MFSPEFSCR:
16622 icode = CODE_FOR_spe_mfspefscr;
16623 tmode = insn_data[icode].operand[0].mode;
16625 if (target == 0
16626 || GET_MODE (target) != tmode
16627 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16628 target = gen_reg_rtx (tmode);
16630 pat = GEN_FCN (icode) (target);
16631 if (! pat)
16632 return 0;
16633 emit_insn (pat);
16634 return target;
16635 case SPE_BUILTIN_MTSPEFSCR:
16636 icode = CODE_FOR_spe_mtspefscr;
16637 arg0 = CALL_EXPR_ARG (exp, 0);
16638 op0 = expand_normal (arg0);
16639 mode0 = insn_data[icode].operand[0].mode;
16641 if (arg0 == error_mark_node)
16642 return const0_rtx;
16644 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16645 op0 = copy_to_mode_reg (mode0, op0);
16647 pat = GEN_FCN (icode) (op0);
16648 if (pat)
16649 emit_insn (pat);
16650 return NULL_RTX;
16651 default:
16652 break;
16655 *expandedp = false;
16656 return NULL_RTX;
16659 static rtx
16660 paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
16662 rtx pat, scratch, tmp;
16663 tree form = CALL_EXPR_ARG (exp, 0);
16664 tree arg0 = CALL_EXPR_ARG (exp, 1);
16665 tree arg1 = CALL_EXPR_ARG (exp, 2);
16666 rtx op0 = expand_normal (arg0);
16667 rtx op1 = expand_normal (arg1);
16668 machine_mode mode0 = insn_data[icode].operand[1].mode;
16669 machine_mode mode1 = insn_data[icode].operand[2].mode;
16670 int form_int;
16671 enum rtx_code code;
16673 if (TREE_CODE (form) != INTEGER_CST)
16675 error ("argument 1 of __builtin_paired_predicate must be a constant");
16676 return const0_rtx;
16678 else
16679 form_int = TREE_INT_CST_LOW (form);
16681 gcc_assert (mode0 == mode1);
16683 if (arg0 == error_mark_node || arg1 == error_mark_node)
16684 return const0_rtx;
16686 if (target == 0
16687 || GET_MODE (target) != SImode
16688 || !(*insn_data[icode].operand[0].predicate) (target, SImode))
16689 target = gen_reg_rtx (SImode);
16690 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
16691 op0 = copy_to_mode_reg (mode0, op0);
16692 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
16693 op1 = copy_to_mode_reg (mode1, op1);
16695 scratch = gen_reg_rtx (CCFPmode);
16697 pat = GEN_FCN (icode) (scratch, op0, op1);
16698 if (!pat)
16699 return const0_rtx;
16701 emit_insn (pat);
16703 switch (form_int)
16705 /* LT bit. */
16706 case 0:
16707 code = LT;
16708 break;
16709 /* GT bit. */
16710 case 1:
16711 code = GT;
16712 break;
16713 /* EQ bit. */
16714 case 2:
16715 code = EQ;
16716 break;
16717 /* UN bit. */
16718 case 3:
16719 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
16720 return target;
16721 default:
16722 error ("argument 1 of __builtin_paired_predicate is out of range");
16723 return const0_rtx;
16726 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
16727 emit_move_insn (target, tmp);
16728 return target;
16731 static rtx
16732 spe_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
16734 rtx pat, scratch, tmp;
16735 tree form = CALL_EXPR_ARG (exp, 0);
16736 tree arg0 = CALL_EXPR_ARG (exp, 1);
16737 tree arg1 = CALL_EXPR_ARG (exp, 2);
16738 rtx op0 = expand_normal (arg0);
16739 rtx op1 = expand_normal (arg1);
16740 machine_mode mode0 = insn_data[icode].operand[1].mode;
16741 machine_mode mode1 = insn_data[icode].operand[2].mode;
16742 int form_int;
16743 enum rtx_code code;
16745 if (TREE_CODE (form) != INTEGER_CST)
16747 error ("argument 1 of __builtin_spe_predicate must be a constant");
16748 return const0_rtx;
16750 else
16751 form_int = TREE_INT_CST_LOW (form);
16753 gcc_assert (mode0 == mode1);
16755 if (arg0 == error_mark_node || arg1 == error_mark_node)
16756 return const0_rtx;
16758 if (target == 0
16759 || GET_MODE (target) != SImode
16760 || ! (*insn_data[icode].operand[0].predicate) (target, SImode))
16761 target = gen_reg_rtx (SImode);
16763 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16764 op0 = copy_to_mode_reg (mode0, op0);
16765 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
16766 op1 = copy_to_mode_reg (mode1, op1);
16768 scratch = gen_reg_rtx (CCmode);
16770 pat = GEN_FCN (icode) (scratch, op0, op1);
16771 if (! pat)
16772 return const0_rtx;
16773 emit_insn (pat);
16775 /* There are 4 variants for each predicate: _any_, _all_, _upper_,
16776 _lower_. We use one compare, but look in different bits of the
16777 CR for each variant.
16779 There are 2 elements in each SPE simd type (upper/lower). The CR
16780 bits are set as follows:
16782 BIT0 | BIT 1 | BIT 2 | BIT 3
16783 U | L | (U | L) | (U & L)
16785 So, for an "all" relationship, BIT 3 would be set.
16786 For an "any" relationship, BIT 2 would be set. Etc.
16788 Following traditional nomenclature, these bits map to:
16790 BIT0 | BIT 1 | BIT 2 | BIT 3
16791 LT | GT | EQ | OV
16793 Later, we will generate rtl to look in the LT/EQ/EQ/OV bits.
16796 switch (form_int)
16798 /* All variant. OV bit. */
16799 case 0:
16800 /* We need to get to the OV bit, which is the ORDERED bit. We
16801 could generate (ordered:SI (reg:CC xx) (const_int 0)), but
16802 that's ugly and will make validate_condition_mode die.
16803 So let's just use another pattern. */
16804 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
16805 return target;
16806 /* Any variant. EQ bit. */
16807 case 1:
16808 code = EQ;
16809 break;
16810 /* Upper variant. LT bit. */
16811 case 2:
16812 code = LT;
16813 break;
16814 /* Lower variant. GT bit. */
16815 case 3:
16816 code = GT;
16817 break;
16818 default:
16819 error ("argument 1 of __builtin_spe_predicate is out of range");
16820 return const0_rtx;
16823 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
16824 emit_move_insn (target, tmp);
16826 return target;
16829 /* The evsel builtins look like this:
16831 e = __builtin_spe_evsel_OP (a, b, c, d);
16833 and work like this:
16835 e[upper] = a[upper] *OP* b[upper] ? c[upper] : d[upper];
16836 e[lower] = a[lower] *OP* b[lower] ? c[lower] : d[lower];
16839 static rtx
16840 spe_expand_evsel_builtin (enum insn_code icode, tree exp, rtx target)
16842 rtx pat, scratch;
16843 tree arg0 = CALL_EXPR_ARG (exp, 0);
16844 tree arg1 = CALL_EXPR_ARG (exp, 1);
16845 tree arg2 = CALL_EXPR_ARG (exp, 2);
16846 tree arg3 = CALL_EXPR_ARG (exp, 3);
16847 rtx op0 = expand_normal (arg0);
16848 rtx op1 = expand_normal (arg1);
16849 rtx op2 = expand_normal (arg2);
16850 rtx op3 = expand_normal (arg3);
16851 machine_mode mode0 = insn_data[icode].operand[1].mode;
16852 machine_mode mode1 = insn_data[icode].operand[2].mode;
16854 gcc_assert (mode0 == mode1);
16856 if (arg0 == error_mark_node || arg1 == error_mark_node
16857 || arg2 == error_mark_node || arg3 == error_mark_node)
16858 return const0_rtx;
16860 if (target == 0
16861 || GET_MODE (target) != mode0
16862 || ! (*insn_data[icode].operand[0].predicate) (target, mode0))
16863 target = gen_reg_rtx (mode0);
16865 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16866 op0 = copy_to_mode_reg (mode0, op0);
16867 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
16868 op1 = copy_to_mode_reg (mode0, op1);
16869 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
16870 op2 = copy_to_mode_reg (mode0, op2);
16871 if (! (*insn_data[icode].operand[1].predicate) (op3, mode1))
16872 op3 = copy_to_mode_reg (mode0, op3);
16874 /* Generate the compare. */
16875 scratch = gen_reg_rtx (CCmode);
16876 pat = GEN_FCN (icode) (scratch, op0, op1);
16877 if (! pat)
16878 return const0_rtx;
16879 emit_insn (pat);
16881 if (mode0 == V2SImode)
16882 emit_insn (gen_spe_evsel (target, op2, op3, scratch));
16883 else
16884 emit_insn (gen_spe_evsel_fs (target, op2, op3, scratch));
16886 return target;
16889 /* Raise an error message for a builtin function that is called without the
16890 appropriate target options being set. */
16892 static void
16893 rs6000_invalid_builtin (enum rs6000_builtins fncode)
16895 size_t uns_fncode = (size_t)fncode;
16896 const char *name = rs6000_builtin_info[uns_fncode].name;
16897 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
16899 gcc_assert (name != NULL);
16900 if ((fnmask & RS6000_BTM_CELL) != 0)
16901 error ("Builtin function %s is only valid for the cell processor", name);
16902 else if ((fnmask & RS6000_BTM_VSX) != 0)
16903 error ("Builtin function %s requires the -mvsx option", name);
16904 else if ((fnmask & RS6000_BTM_HTM) != 0)
16905 error ("Builtin function %s requires the -mhtm option", name);
16906 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
16907 error ("Builtin function %s requires the -maltivec option", name);
16908 else if ((fnmask & RS6000_BTM_PAIRED) != 0)
16909 error ("Builtin function %s requires the -mpaired option", name);
16910 else if ((fnmask & RS6000_BTM_SPE) != 0)
16911 error ("Builtin function %s requires the -mspe option", name);
16912 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
16913 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
16914 error ("Builtin function %s requires the -mhard-dfp and"
16915 " -mpower8-vector options", name);
16916 else if ((fnmask & RS6000_BTM_DFP) != 0)
16917 error ("Builtin function %s requires the -mhard-dfp option", name);
16918 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
16919 error ("Builtin function %s requires the -mpower8-vector option", name);
16920 else if ((fnmask & (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
16921 == (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
16922 error ("Builtin function %s requires the -mcpu=power9 and"
16923 " -m64 options", name);
16924 else if ((fnmask & RS6000_BTM_P9_VECTOR) != 0)
16925 error ("Builtin function %s requires the -mcpu=power9 option", name);
16926 else if ((fnmask & (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
16927 == (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
16928 error ("Builtin function %s requires the -mcpu=power9 and"
16929 " -m64 options", name);
16930 else if ((fnmask & RS6000_BTM_P9_MISC) == RS6000_BTM_P9_MISC)
16931 error ("Builtin function %s requires the -mcpu=power9 option", name);
16932 else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
16933 == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
16934 error ("Builtin function %s requires the -mhard-float and"
16935 " -mlong-double-128 options", name);
16936 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
16937 error ("Builtin function %s requires the -mhard-float option", name);
16938 else if ((fnmask & RS6000_BTM_FLOAT128) != 0)
16939 error ("Builtin function %s requires the -mfloat128 option", name);
16940 else
16941 error ("Builtin function %s is not supported with the current options",
16942 name);
16945 /* Target hook for early folding of built-ins, shamelessly stolen
16946 from ia64.c. */
16948 static tree
16949 rs6000_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
16950 tree *args, bool ignore ATTRIBUTE_UNUSED)
16952 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
16954 enum rs6000_builtins fn_code
16955 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
16956 switch (fn_code)
16958 case RS6000_BUILTIN_NANQ:
16959 case RS6000_BUILTIN_NANSQ:
16961 tree type = TREE_TYPE (TREE_TYPE (fndecl));
16962 const char *str = c_getstr (*args);
16963 int quiet = fn_code == RS6000_BUILTIN_NANQ;
16964 REAL_VALUE_TYPE real;
16966 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
16967 return build_real (type, real);
16968 return NULL_TREE;
16970 case RS6000_BUILTIN_INFQ:
16971 case RS6000_BUILTIN_HUGE_VALQ:
16973 tree type = TREE_TYPE (TREE_TYPE (fndecl));
16974 REAL_VALUE_TYPE inf;
16975 real_inf (&inf);
16976 return build_real (type, inf);
16978 default:
16979 break;
16982 #ifdef SUBTARGET_FOLD_BUILTIN
16983 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
16984 #else
16985 return NULL_TREE;
16986 #endif
16989 /* Fold a machine-dependent built-in in GIMPLE. (For folding into
16990 a constant, use rs6000_fold_builtin.) */
16992 bool
16993 rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
16995 gimple *stmt = gsi_stmt (*gsi);
16996 tree fndecl = gimple_call_fndecl (stmt);
16997 gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD);
16998 enum rs6000_builtins fn_code
16999 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
17000 tree arg0, arg1, lhs;
17002 switch (fn_code)
17004 /* Flavors of vec_add. We deliberately don't expand
17005 P8V_BUILTIN_VADDUQM as it gets lowered from V1TImode to
17006 TImode, resulting in much poorer code generation. */
17007 case ALTIVEC_BUILTIN_VADDUBM:
17008 case ALTIVEC_BUILTIN_VADDUHM:
17009 case ALTIVEC_BUILTIN_VADDUWM:
17010 case P8V_BUILTIN_VADDUDM:
17011 case ALTIVEC_BUILTIN_VADDFP:
17012 case VSX_BUILTIN_XVADDDP:
17014 arg0 = gimple_call_arg (stmt, 0);
17015 arg1 = gimple_call_arg (stmt, 1);
17016 lhs = gimple_call_lhs (stmt);
17017 gimple *g = gimple_build_assign (lhs, PLUS_EXPR, arg0, arg1);
17018 gimple_set_location (g, gimple_location (stmt));
17019 gsi_replace (gsi, g, true);
17020 return true;
17022 /* Flavors of vec_sub. We deliberately don't expand
17023 P8V_BUILTIN_VSUBUQM. */
17024 case ALTIVEC_BUILTIN_VSUBUBM:
17025 case ALTIVEC_BUILTIN_VSUBUHM:
17026 case ALTIVEC_BUILTIN_VSUBUWM:
17027 case P8V_BUILTIN_VSUBUDM:
17028 case ALTIVEC_BUILTIN_VSUBFP:
17029 case VSX_BUILTIN_XVSUBDP:
17031 arg0 = gimple_call_arg (stmt, 0);
17032 arg1 = gimple_call_arg (stmt, 1);
17033 lhs = gimple_call_lhs (stmt);
17034 gimple *g = gimple_build_assign (lhs, MINUS_EXPR, arg0, arg1);
17035 gimple_set_location (g, gimple_location (stmt));
17036 gsi_replace (gsi, g, true);
17037 return true;
17039 /* Even element flavors of vec_mul (signed). */
17040 case ALTIVEC_BUILTIN_VMULESB:
17041 case ALTIVEC_BUILTIN_VMULESH:
17042 /* Even element flavors of vec_mul (unsigned). */
17043 case ALTIVEC_BUILTIN_VMULEUB:
17044 case ALTIVEC_BUILTIN_VMULEUH:
17046 arg0 = gimple_call_arg (stmt, 0);
17047 arg1 = gimple_call_arg (stmt, 1);
17048 lhs = gimple_call_lhs (stmt);
17049 gimple *g = gimple_build_assign (lhs, VEC_WIDEN_MULT_EVEN_EXPR, arg0, arg1);
17050 gimple_set_location (g, gimple_location (stmt));
17051 gsi_replace (gsi, g, true);
17052 return true;
17054 /* Odd element flavors of vec_mul (signed). */
17055 case ALTIVEC_BUILTIN_VMULOSB:
17056 case ALTIVEC_BUILTIN_VMULOSH:
17057 /* Odd element flavors of vec_mul (unsigned). */
17058 case ALTIVEC_BUILTIN_VMULOUB:
17059 case ALTIVEC_BUILTIN_VMULOUH:
17061 arg0 = gimple_call_arg (stmt, 0);
17062 arg1 = gimple_call_arg (stmt, 1);
17063 lhs = gimple_call_lhs (stmt);
17064 gimple *g = gimple_build_assign (lhs, VEC_WIDEN_MULT_ODD_EXPR, arg0, arg1);
17065 gimple_set_location (g, gimple_location (stmt));
17066 gsi_replace (gsi, g, true);
17067 return true;
17070 default:
17071 break;
17074 return false;
17077 /* Expand an expression EXP that calls a built-in function,
17078 with result going to TARGET if that's convenient
17079 (and in mode MODE if that's convenient).
17080 SUBTARGET may be used as the target for computing one of EXP's operands.
17081 IGNORE is nonzero if the value is to be ignored. */
17083 static rtx
17084 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
17085 machine_mode mode ATTRIBUTE_UNUSED,
17086 int ignore ATTRIBUTE_UNUSED)
17088 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
17089 enum rs6000_builtins fcode
17090 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
17091 size_t uns_fcode = (size_t)fcode;
17092 const struct builtin_description *d;
17093 size_t i;
17094 rtx ret;
17095 bool success;
17096 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
17097 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
17099 if (TARGET_DEBUG_BUILTIN)
17101 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
17102 const char *name1 = rs6000_builtin_info[uns_fcode].name;
17103 const char *name2 = ((icode != CODE_FOR_nothing)
17104 ? get_insn_name ((int)icode)
17105 : "nothing");
17106 const char *name3;
17108 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
17110 default: name3 = "unknown"; break;
17111 case RS6000_BTC_SPECIAL: name3 = "special"; break;
17112 case RS6000_BTC_UNARY: name3 = "unary"; break;
17113 case RS6000_BTC_BINARY: name3 = "binary"; break;
17114 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
17115 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
17116 case RS6000_BTC_ABS: name3 = "abs"; break;
17117 case RS6000_BTC_EVSEL: name3 = "evsel"; break;
17118 case RS6000_BTC_DST: name3 = "dst"; break;
17122 fprintf (stderr,
17123 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
17124 (name1) ? name1 : "---", fcode,
17125 (name2) ? name2 : "---", (int)icode,
17126 name3,
17127 func_valid_p ? "" : ", not valid");
17130 if (!func_valid_p)
17132 rs6000_invalid_builtin (fcode);
17134 /* Given it is invalid, just generate a normal call. */
17135 return expand_call (exp, target, ignore);
17138 switch (fcode)
17140 case RS6000_BUILTIN_RECIP:
17141 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
17143 case RS6000_BUILTIN_RECIPF:
17144 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
17146 case RS6000_BUILTIN_RSQRTF:
17147 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
17149 case RS6000_BUILTIN_RSQRT:
17150 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
17152 case POWER7_BUILTIN_BPERMD:
17153 return rs6000_expand_binop_builtin (((TARGET_64BIT)
17154 ? CODE_FOR_bpermd_di
17155 : CODE_FOR_bpermd_si), exp, target);
17157 case RS6000_BUILTIN_GET_TB:
17158 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
17159 target);
17161 case RS6000_BUILTIN_MFTB:
17162 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
17163 ? CODE_FOR_rs6000_mftb_di
17164 : CODE_FOR_rs6000_mftb_si),
17165 target);
17167 case RS6000_BUILTIN_MFFS:
17168 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
17170 case RS6000_BUILTIN_MTFSF:
17171 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
17173 case RS6000_BUILTIN_CPU_INIT:
17174 case RS6000_BUILTIN_CPU_IS:
17175 case RS6000_BUILTIN_CPU_SUPPORTS:
17176 return cpu_expand_builtin (fcode, exp, target);
17178 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
17179 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
17181 int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
17182 : (int) CODE_FOR_altivec_lvsl_direct);
17183 machine_mode tmode = insn_data[icode].operand[0].mode;
17184 machine_mode mode = insn_data[icode].operand[1].mode;
17185 tree arg;
17186 rtx op, addr, pat;
17188 gcc_assert (TARGET_ALTIVEC);
17190 arg = CALL_EXPR_ARG (exp, 0);
17191 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
17192 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
17193 addr = memory_address (mode, op);
17194 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
17195 op = addr;
17196 else
17198 /* For the load case need to negate the address. */
17199 op = gen_reg_rtx (GET_MODE (addr));
17200 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
17202 op = gen_rtx_MEM (mode, op);
17204 if (target == 0
17205 || GET_MODE (target) != tmode
17206 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17207 target = gen_reg_rtx (tmode);
17209 pat = GEN_FCN (icode) (target, op);
17210 if (!pat)
17211 return 0;
17212 emit_insn (pat);
17214 return target;
17217 case ALTIVEC_BUILTIN_VCFUX:
17218 case ALTIVEC_BUILTIN_VCFSX:
17219 case ALTIVEC_BUILTIN_VCTUXS:
17220 case ALTIVEC_BUILTIN_VCTSXS:
17221 /* FIXME: There's got to be a nicer way to handle this case than
17222 constructing a new CALL_EXPR. */
17223 if (call_expr_nargs (exp) == 1)
17225 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
17226 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
17228 break;
17230 default:
17231 break;
17234 if (TARGET_ALTIVEC)
17236 ret = altivec_expand_builtin (exp, target, &success);
17238 if (success)
17239 return ret;
17241 if (TARGET_SPE)
17243 ret = spe_expand_builtin (exp, target, &success);
17245 if (success)
17246 return ret;
17248 if (TARGET_PAIRED_FLOAT)
17250 ret = paired_expand_builtin (exp, target, &success);
17252 if (success)
17253 return ret;
17255 if (TARGET_HTM)
17257 ret = htm_expand_builtin (exp, target, &success);
17259 if (success)
17260 return ret;
17263 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
17264 /* RS6000_BTC_SPECIAL represents no-operand operators. */
17265 gcc_assert (attr == RS6000_BTC_UNARY
17266 || attr == RS6000_BTC_BINARY
17267 || attr == RS6000_BTC_TERNARY
17268 || attr == RS6000_BTC_SPECIAL);
17270 /* Handle simple unary operations. */
17271 d = bdesc_1arg;
17272 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17273 if (d->code == fcode)
17274 return rs6000_expand_unop_builtin (d->icode, exp, target);
17276 /* Handle simple binary operations. */
17277 d = bdesc_2arg;
17278 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17279 if (d->code == fcode)
17280 return rs6000_expand_binop_builtin (d->icode, exp, target);
17282 /* Handle simple ternary operations. */
17283 d = bdesc_3arg;
17284 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
17285 if (d->code == fcode)
17286 return rs6000_expand_ternop_builtin (d->icode, exp, target);
17288 /* Handle simple no-argument operations. */
17289 d = bdesc_0arg;
17290 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
17291 if (d->code == fcode)
17292 return rs6000_expand_zeroop_builtin (d->icode, target);
17294 gcc_unreachable ();
17297 /* Create a builtin vector type with a name. Taking care not to give
17298 the canonical type a name. */
17300 static tree
17301 rs6000_vector_type (const char *name, tree elt_type, unsigned num_elts)
17303 tree result = build_vector_type (elt_type, num_elts);
17305 /* Copy so we don't give the canonical type a name. */
17306 result = build_variant_type_copy (result);
17308 add_builtin_type (name, result);
17310 return result;
17313 static void
17314 rs6000_init_builtins (void)
17316 tree tdecl;
17317 tree ftype;
17318 machine_mode mode;
17320 if (TARGET_DEBUG_BUILTIN)
17321 fprintf (stderr, "rs6000_init_builtins%s%s%s%s\n",
17322 (TARGET_PAIRED_FLOAT) ? ", paired" : "",
17323 (TARGET_SPE) ? ", spe" : "",
17324 (TARGET_ALTIVEC) ? ", altivec" : "",
17325 (TARGET_VSX) ? ", vsx" : "");
17327 V2SI_type_node = build_vector_type (intSI_type_node, 2);
17328 V2SF_type_node = build_vector_type (float_type_node, 2);
17329 V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64 ? "__vector long"
17330 : "__vector long long",
17331 intDI_type_node, 2);
17332 V2DF_type_node = rs6000_vector_type ("__vector double", double_type_node, 2);
17333 V4HI_type_node = build_vector_type (intHI_type_node, 4);
17334 V4SI_type_node = rs6000_vector_type ("__vector signed int",
17335 intSI_type_node, 4);
17336 V4SF_type_node = rs6000_vector_type ("__vector float", float_type_node, 4);
17337 V8HI_type_node = rs6000_vector_type ("__vector signed short",
17338 intHI_type_node, 8);
17339 V16QI_type_node = rs6000_vector_type ("__vector signed char",
17340 intQI_type_node, 16);
17342 unsigned_V16QI_type_node = rs6000_vector_type ("__vector unsigned char",
17343 unsigned_intQI_type_node, 16);
17344 unsigned_V8HI_type_node = rs6000_vector_type ("__vector unsigned short",
17345 unsigned_intHI_type_node, 8);
17346 unsigned_V4SI_type_node = rs6000_vector_type ("__vector unsigned int",
17347 unsigned_intSI_type_node, 4);
17348 unsigned_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64
17349 ? "__vector unsigned long"
17350 : "__vector unsigned long long",
17351 unsigned_intDI_type_node, 2);
17353 opaque_V2SF_type_node = build_opaque_vector_type (float_type_node, 2);
17354 opaque_V2SI_type_node = build_opaque_vector_type (intSI_type_node, 2);
17355 opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node);
17356 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
17358 const_str_type_node
17359 = build_pointer_type (build_qualified_type (char_type_node,
17360 TYPE_QUAL_CONST));
17362 /* We use V1TI mode as a special container to hold __int128_t items that
17363 must live in VSX registers. */
17364 if (intTI_type_node)
17366 V1TI_type_node = rs6000_vector_type ("__vector __int128",
17367 intTI_type_node, 1);
17368 unsigned_V1TI_type_node
17369 = rs6000_vector_type ("__vector unsigned __int128",
17370 unsigned_intTI_type_node, 1);
17373 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
17374 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
17375 'vector unsigned short'. */
17377 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
17378 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
17379 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
17380 bool_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
17381 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
17383 long_integer_type_internal_node = long_integer_type_node;
17384 long_unsigned_type_internal_node = long_unsigned_type_node;
17385 long_long_integer_type_internal_node = long_long_integer_type_node;
17386 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
17387 intQI_type_internal_node = intQI_type_node;
17388 uintQI_type_internal_node = unsigned_intQI_type_node;
17389 intHI_type_internal_node = intHI_type_node;
17390 uintHI_type_internal_node = unsigned_intHI_type_node;
17391 intSI_type_internal_node = intSI_type_node;
17392 uintSI_type_internal_node = unsigned_intSI_type_node;
17393 intDI_type_internal_node = intDI_type_node;
17394 uintDI_type_internal_node = unsigned_intDI_type_node;
17395 intTI_type_internal_node = intTI_type_node;
17396 uintTI_type_internal_node = unsigned_intTI_type_node;
17397 float_type_internal_node = float_type_node;
17398 double_type_internal_node = double_type_node;
17399 long_double_type_internal_node = long_double_type_node;
17400 dfloat64_type_internal_node = dfloat64_type_node;
17401 dfloat128_type_internal_node = dfloat128_type_node;
17402 void_type_internal_node = void_type_node;
17404 /* 128-bit floating point support. KFmode is IEEE 128-bit floating point.
17405 IFmode is the IBM extended 128-bit format that is a pair of doubles.
17406 TFmode will be either IEEE 128-bit floating point or the IBM double-double
17407 format that uses a pair of doubles, depending on the switches and
17408 defaults.
17410 We do not enable the actual __float128 keyword unless the user explicitly
17411 asks for it, because the library support is not yet complete.
17413 If we don't support for either 128-bit IBM double double or IEEE 128-bit
17414 floating point, we need make sure the type is non-zero or else self-test
17415 fails during bootstrap.
17417 We don't register a built-in type for __ibm128 if the type is the same as
17418 long double. Instead we add a #define for __ibm128 in
17419 rs6000_cpu_cpp_builtins to long double. */
17420 if (TARGET_LONG_DOUBLE_128 && FLOAT128_IEEE_P (TFmode))
17422 ibm128_float_type_node = make_node (REAL_TYPE);
17423 TYPE_PRECISION (ibm128_float_type_node) = 128;
17424 SET_TYPE_MODE (ibm128_float_type_node, IFmode);
17425 layout_type (ibm128_float_type_node);
17427 lang_hooks.types.register_builtin_type (ibm128_float_type_node,
17428 "__ibm128");
17430 else
17431 ibm128_float_type_node = long_double_type_node;
17433 if (TARGET_FLOAT128_KEYWORD)
17435 ieee128_float_type_node = float128_type_node;
17436 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
17437 "__float128");
17440 else if (TARGET_FLOAT128_TYPE)
17442 ieee128_float_type_node = make_node (REAL_TYPE);
17443 TYPE_PRECISION (ibm128_float_type_node) = 128;
17444 SET_TYPE_MODE (ieee128_float_type_node, KFmode);
17445 layout_type (ieee128_float_type_node);
17447 /* If we are not exporting the __float128/_Float128 keywords, we need a
17448 keyword to get the types created. Use __ieee128 as the dummy
17449 keyword. */
17450 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
17451 "__ieee128");
17454 else
17455 ieee128_float_type_node = long_double_type_node;
17457 /* Initialize the modes for builtin_function_type, mapping a machine mode to
17458 tree type node. */
17459 builtin_mode_to_type[QImode][0] = integer_type_node;
17460 builtin_mode_to_type[HImode][0] = integer_type_node;
17461 builtin_mode_to_type[SImode][0] = intSI_type_node;
17462 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
17463 builtin_mode_to_type[DImode][0] = intDI_type_node;
17464 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
17465 builtin_mode_to_type[TImode][0] = intTI_type_node;
17466 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
17467 builtin_mode_to_type[SFmode][0] = float_type_node;
17468 builtin_mode_to_type[DFmode][0] = double_type_node;
17469 builtin_mode_to_type[IFmode][0] = ibm128_float_type_node;
17470 builtin_mode_to_type[KFmode][0] = ieee128_float_type_node;
17471 builtin_mode_to_type[TFmode][0] = long_double_type_node;
17472 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
17473 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
17474 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
17475 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
17476 builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
17477 builtin_mode_to_type[V2SFmode][0] = V2SF_type_node;
17478 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
17479 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
17480 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
17481 builtin_mode_to_type[V4HImode][0] = V4HI_type_node;
17482 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
17483 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
17484 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
17485 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
17486 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
17487 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
17488 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
17490 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
17491 TYPE_NAME (bool_char_type_node) = tdecl;
17493 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
17494 TYPE_NAME (bool_short_type_node) = tdecl;
17496 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
17497 TYPE_NAME (bool_int_type_node) = tdecl;
17499 tdecl = add_builtin_type ("__pixel", pixel_type_node);
17500 TYPE_NAME (pixel_type_node) = tdecl;
17502 bool_V16QI_type_node = rs6000_vector_type ("__vector __bool char",
17503 bool_char_type_node, 16);
17504 bool_V8HI_type_node = rs6000_vector_type ("__vector __bool short",
17505 bool_short_type_node, 8);
17506 bool_V4SI_type_node = rs6000_vector_type ("__vector __bool int",
17507 bool_int_type_node, 4);
17508 bool_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64
17509 ? "__vector __bool long"
17510 : "__vector __bool long long",
17511 bool_long_type_node, 2);
17512 pixel_V8HI_type_node = rs6000_vector_type ("__vector __pixel",
17513 pixel_type_node, 8);
17515 /* Paired and SPE builtins are only available if you build a compiler with
17516 the appropriate options, so only create those builtins with the
17517 appropriate compiler option. Create Altivec and VSX builtins on machines
17518 with at least the general purpose extensions (970 and newer) to allow the
17519 use of the target attribute. */
17520 if (TARGET_PAIRED_FLOAT)
17521 paired_init_builtins ();
17522 if (TARGET_SPE)
17523 spe_init_builtins ();
17524 if (TARGET_EXTRA_BUILTINS)
17525 altivec_init_builtins ();
17526 if (TARGET_HTM)
17527 htm_init_builtins ();
17529 if (TARGET_EXTRA_BUILTINS || TARGET_SPE || TARGET_PAIRED_FLOAT)
17530 rs6000_common_init_builtins ();
17532 ftype = build_function_type_list (ieee128_float_type_node,
17533 const_str_type_node, NULL_TREE);
17534 def_builtin ("__builtin_nanq", ftype, RS6000_BUILTIN_NANQ);
17535 def_builtin ("__builtin_nansq", ftype, RS6000_BUILTIN_NANSQ);
17537 ftype = build_function_type_list (ieee128_float_type_node, NULL_TREE);
17538 def_builtin ("__builtin_infq", ftype, RS6000_BUILTIN_INFQ);
17539 def_builtin ("__builtin_huge_valq", ftype, RS6000_BUILTIN_HUGE_VALQ);
17541 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
17542 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
17543 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
17545 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
17546 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
17547 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
17549 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
17550 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
17551 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
17553 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
17554 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
17555 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
17557 mode = (TARGET_64BIT) ? DImode : SImode;
17558 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
17559 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
17560 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
17562 ftype = build_function_type_list (unsigned_intDI_type_node,
17563 NULL_TREE);
17564 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
17566 if (TARGET_64BIT)
17567 ftype = build_function_type_list (unsigned_intDI_type_node,
17568 NULL_TREE);
17569 else
17570 ftype = build_function_type_list (unsigned_intSI_type_node,
17571 NULL_TREE);
17572 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
17574 ftype = build_function_type_list (double_type_node, NULL_TREE);
17575 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
17577 ftype = build_function_type_list (void_type_node,
17578 intSI_type_node, double_type_node,
17579 NULL_TREE);
17580 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
17582 ftype = build_function_type_list (void_type_node, NULL_TREE);
17583 def_builtin ("__builtin_cpu_init", ftype, RS6000_BUILTIN_CPU_INIT);
17585 ftype = build_function_type_list (bool_int_type_node, const_ptr_type_node,
17586 NULL_TREE);
17587 def_builtin ("__builtin_cpu_is", ftype, RS6000_BUILTIN_CPU_IS);
17588 def_builtin ("__builtin_cpu_supports", ftype, RS6000_BUILTIN_CPU_SUPPORTS);
17590 /* AIX libm provides clog as __clog. */
17591 if (TARGET_XCOFF &&
17592 (tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
17593 set_user_assembler_name (tdecl, "__clog");
17595 #ifdef SUBTARGET_INIT_BUILTINS
17596 SUBTARGET_INIT_BUILTINS;
17597 #endif
17600 /* Returns the rs6000 builtin decl for CODE. */
17602 static tree
17603 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
17605 HOST_WIDE_INT fnmask;
17607 if (code >= RS6000_BUILTIN_COUNT)
17608 return error_mark_node;
17610 fnmask = rs6000_builtin_info[code].mask;
17611 if ((fnmask & rs6000_builtin_mask) != fnmask)
17613 rs6000_invalid_builtin ((enum rs6000_builtins)code);
17614 return error_mark_node;
17617 return rs6000_builtin_decls[code];
17620 static void
17621 spe_init_builtins (void)
17623 tree puint_type_node = build_pointer_type (unsigned_type_node);
17624 tree pushort_type_node = build_pointer_type (short_unsigned_type_node);
17625 const struct builtin_description *d;
17626 size_t i;
17627 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17629 tree v2si_ftype_4_v2si
17630 = build_function_type_list (opaque_V2SI_type_node,
17631 opaque_V2SI_type_node,
17632 opaque_V2SI_type_node,
17633 opaque_V2SI_type_node,
17634 opaque_V2SI_type_node,
17635 NULL_TREE);
17637 tree v2sf_ftype_4_v2sf
17638 = build_function_type_list (opaque_V2SF_type_node,
17639 opaque_V2SF_type_node,
17640 opaque_V2SF_type_node,
17641 opaque_V2SF_type_node,
17642 opaque_V2SF_type_node,
17643 NULL_TREE);
17645 tree int_ftype_int_v2si_v2si
17646 = build_function_type_list (integer_type_node,
17647 integer_type_node,
17648 opaque_V2SI_type_node,
17649 opaque_V2SI_type_node,
17650 NULL_TREE);
17652 tree int_ftype_int_v2sf_v2sf
17653 = build_function_type_list (integer_type_node,
17654 integer_type_node,
17655 opaque_V2SF_type_node,
17656 opaque_V2SF_type_node,
17657 NULL_TREE);
17659 tree void_ftype_v2si_puint_int
17660 = build_function_type_list (void_type_node,
17661 opaque_V2SI_type_node,
17662 puint_type_node,
17663 integer_type_node,
17664 NULL_TREE);
17666 tree void_ftype_v2si_puint_char
17667 = build_function_type_list (void_type_node,
17668 opaque_V2SI_type_node,
17669 puint_type_node,
17670 char_type_node,
17671 NULL_TREE);
17673 tree void_ftype_v2si_pv2si_int
17674 = build_function_type_list (void_type_node,
17675 opaque_V2SI_type_node,
17676 opaque_p_V2SI_type_node,
17677 integer_type_node,
17678 NULL_TREE);
17680 tree void_ftype_v2si_pv2si_char
17681 = build_function_type_list (void_type_node,
17682 opaque_V2SI_type_node,
17683 opaque_p_V2SI_type_node,
17684 char_type_node,
17685 NULL_TREE);
17687 tree void_ftype_int
17688 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
17690 tree int_ftype_void
17691 = build_function_type_list (integer_type_node, NULL_TREE);
17693 tree v2si_ftype_pv2si_int
17694 = build_function_type_list (opaque_V2SI_type_node,
17695 opaque_p_V2SI_type_node,
17696 integer_type_node,
17697 NULL_TREE);
17699 tree v2si_ftype_puint_int
17700 = build_function_type_list (opaque_V2SI_type_node,
17701 puint_type_node,
17702 integer_type_node,
17703 NULL_TREE);
17705 tree v2si_ftype_pushort_int
17706 = build_function_type_list (opaque_V2SI_type_node,
17707 pushort_type_node,
17708 integer_type_node,
17709 NULL_TREE);
17711 tree v2si_ftype_signed_char
17712 = build_function_type_list (opaque_V2SI_type_node,
17713 signed_char_type_node,
17714 NULL_TREE);
17716 add_builtin_type ("__ev64_opaque__", opaque_V2SI_type_node);
17718 /* Initialize irregular SPE builtins. */
17720 def_builtin ("__builtin_spe_mtspefscr", void_ftype_int, SPE_BUILTIN_MTSPEFSCR);
17721 def_builtin ("__builtin_spe_mfspefscr", int_ftype_void, SPE_BUILTIN_MFSPEFSCR);
17722 def_builtin ("__builtin_spe_evstddx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDDX);
17723 def_builtin ("__builtin_spe_evstdhx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDHX);
17724 def_builtin ("__builtin_spe_evstdwx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDWX);
17725 def_builtin ("__builtin_spe_evstwhex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHEX);
17726 def_builtin ("__builtin_spe_evstwhox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHOX);
17727 def_builtin ("__builtin_spe_evstwwex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWEX);
17728 def_builtin ("__builtin_spe_evstwwox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWOX);
17729 def_builtin ("__builtin_spe_evstdd", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDD);
17730 def_builtin ("__builtin_spe_evstdh", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDH);
17731 def_builtin ("__builtin_spe_evstdw", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDW);
17732 def_builtin ("__builtin_spe_evstwhe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHE);
17733 def_builtin ("__builtin_spe_evstwho", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHO);
17734 def_builtin ("__builtin_spe_evstwwe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWE);
17735 def_builtin ("__builtin_spe_evstwwo", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWO);
17736 def_builtin ("__builtin_spe_evsplatfi", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATFI);
17737 def_builtin ("__builtin_spe_evsplati", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATI);
17739 /* Loads. */
17740 def_builtin ("__builtin_spe_evlddx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDDX);
17741 def_builtin ("__builtin_spe_evldwx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDWX);
17742 def_builtin ("__builtin_spe_evldhx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDHX);
17743 def_builtin ("__builtin_spe_evlwhex", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHEX);
17744 def_builtin ("__builtin_spe_evlwhoux", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOUX);
17745 def_builtin ("__builtin_spe_evlwhosx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOSX);
17746 def_builtin ("__builtin_spe_evlwwsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLATX);
17747 def_builtin ("__builtin_spe_evlwhsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLATX);
17748 def_builtin ("__builtin_spe_evlhhesplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLATX);
17749 def_builtin ("__builtin_spe_evlhhousplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLATX);
17750 def_builtin ("__builtin_spe_evlhhossplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLATX);
17751 def_builtin ("__builtin_spe_evldd", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDD);
17752 def_builtin ("__builtin_spe_evldw", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDW);
17753 def_builtin ("__builtin_spe_evldh", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDH);
17754 def_builtin ("__builtin_spe_evlhhesplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLAT);
17755 def_builtin ("__builtin_spe_evlhhossplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLAT);
17756 def_builtin ("__builtin_spe_evlhhousplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLAT);
17757 def_builtin ("__builtin_spe_evlwhe", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHE);
17758 def_builtin ("__builtin_spe_evlwhos", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOS);
17759 def_builtin ("__builtin_spe_evlwhou", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOU);
17760 def_builtin ("__builtin_spe_evlwhsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLAT);
17761 def_builtin ("__builtin_spe_evlwwsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLAT);
17763 /* Predicates. */
17764 d = bdesc_spe_predicates;
17765 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, d++)
17767 tree type;
17768 HOST_WIDE_INT mask = d->mask;
17770 if ((mask & builtin_mask) != mask)
17772 if (TARGET_DEBUG_BUILTIN)
17773 fprintf (stderr, "spe_init_builtins, skip predicate %s\n",
17774 d->name);
17775 continue;
17778 /* Cannot define builtin if the instruction is disabled. */
17779 gcc_assert (d->icode != CODE_FOR_nothing);
17780 switch (insn_data[d->icode].operand[1].mode)
17782 case V2SImode:
17783 type = int_ftype_int_v2si_v2si;
17784 break;
17785 case V2SFmode:
17786 type = int_ftype_int_v2sf_v2sf;
17787 break;
17788 default:
17789 gcc_unreachable ();
17792 def_builtin (d->name, type, d->code);
17795 /* Evsel predicates. */
17796 d = bdesc_spe_evsel;
17797 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, d++)
17799 tree type;
17800 HOST_WIDE_INT mask = d->mask;
17802 if ((mask & builtin_mask) != mask)
17804 if (TARGET_DEBUG_BUILTIN)
17805 fprintf (stderr, "spe_init_builtins, skip evsel %s\n",
17806 d->name);
17807 continue;
17810 /* Cannot define builtin if the instruction is disabled. */
17811 gcc_assert (d->icode != CODE_FOR_nothing);
17812 switch (insn_data[d->icode].operand[1].mode)
17814 case V2SImode:
17815 type = v2si_ftype_4_v2si;
17816 break;
17817 case V2SFmode:
17818 type = v2sf_ftype_4_v2sf;
17819 break;
17820 default:
17821 gcc_unreachable ();
17824 def_builtin (d->name, type, d->code);
17828 static void
17829 paired_init_builtins (void)
17831 const struct builtin_description *d;
17832 size_t i;
17833 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17835 tree int_ftype_int_v2sf_v2sf
17836 = build_function_type_list (integer_type_node,
17837 integer_type_node,
17838 V2SF_type_node,
17839 V2SF_type_node,
17840 NULL_TREE);
17841 tree pcfloat_type_node =
17842 build_pointer_type (build_qualified_type
17843 (float_type_node, TYPE_QUAL_CONST));
17845 tree v2sf_ftype_long_pcfloat = build_function_type_list (V2SF_type_node,
17846 long_integer_type_node,
17847 pcfloat_type_node,
17848 NULL_TREE);
17849 tree void_ftype_v2sf_long_pcfloat =
17850 build_function_type_list (void_type_node,
17851 V2SF_type_node,
17852 long_integer_type_node,
17853 pcfloat_type_node,
17854 NULL_TREE);
17857 def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat,
17858 PAIRED_BUILTIN_LX);
17861 def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat,
17862 PAIRED_BUILTIN_STX);
17864 /* Predicates. */
17865 d = bdesc_paired_preds;
17866 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); ++i, d++)
17868 tree type;
17869 HOST_WIDE_INT mask = d->mask;
17871 if ((mask & builtin_mask) != mask)
17873 if (TARGET_DEBUG_BUILTIN)
17874 fprintf (stderr, "paired_init_builtins, skip predicate %s\n",
17875 d->name);
17876 continue;
17879 /* Cannot define builtin if the instruction is disabled. */
17880 gcc_assert (d->icode != CODE_FOR_nothing);
17882 if (TARGET_DEBUG_BUILTIN)
17883 fprintf (stderr, "paired pred #%d, insn = %s [%d], mode = %s\n",
17884 (int)i, get_insn_name (d->icode), (int)d->icode,
17885 GET_MODE_NAME (insn_data[d->icode].operand[1].mode));
17887 switch (insn_data[d->icode].operand[1].mode)
17889 case V2SFmode:
17890 type = int_ftype_int_v2sf_v2sf;
17891 break;
17892 default:
17893 gcc_unreachable ();
17896 def_builtin (d->name, type, d->code);
17900 static void
17901 altivec_init_builtins (void)
17903 const struct builtin_description *d;
17904 size_t i;
17905 tree ftype;
17906 tree decl;
17907 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17909 tree pvoid_type_node = build_pointer_type (void_type_node);
17911 tree pcvoid_type_node
17912 = build_pointer_type (build_qualified_type (void_type_node,
17913 TYPE_QUAL_CONST));
17915 tree int_ftype_opaque
17916 = build_function_type_list (integer_type_node,
17917 opaque_V4SI_type_node, NULL_TREE);
17918 tree opaque_ftype_opaque
17919 = build_function_type_list (integer_type_node, NULL_TREE);
17920 tree opaque_ftype_opaque_int
17921 = build_function_type_list (opaque_V4SI_type_node,
17922 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
17923 tree opaque_ftype_opaque_opaque_int
17924 = build_function_type_list (opaque_V4SI_type_node,
17925 opaque_V4SI_type_node, opaque_V4SI_type_node,
17926 integer_type_node, NULL_TREE);
17927 tree opaque_ftype_opaque_opaque_opaque
17928 = build_function_type_list (opaque_V4SI_type_node,
17929 opaque_V4SI_type_node, opaque_V4SI_type_node,
17930 opaque_V4SI_type_node, NULL_TREE);
17931 tree opaque_ftype_opaque_opaque
17932 = build_function_type_list (opaque_V4SI_type_node,
17933 opaque_V4SI_type_node, opaque_V4SI_type_node,
17934 NULL_TREE);
17935 tree int_ftype_int_opaque_opaque
17936 = build_function_type_list (integer_type_node,
17937 integer_type_node, opaque_V4SI_type_node,
17938 opaque_V4SI_type_node, NULL_TREE);
17939 tree int_ftype_int_v4si_v4si
17940 = build_function_type_list (integer_type_node,
17941 integer_type_node, V4SI_type_node,
17942 V4SI_type_node, NULL_TREE);
17943 tree int_ftype_int_v2di_v2di
17944 = build_function_type_list (integer_type_node,
17945 integer_type_node, V2DI_type_node,
17946 V2DI_type_node, NULL_TREE);
17947 tree void_ftype_v4si
17948 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
17949 tree v8hi_ftype_void
17950 = build_function_type_list (V8HI_type_node, NULL_TREE);
17951 tree void_ftype_void
17952 = build_function_type_list (void_type_node, NULL_TREE);
17953 tree void_ftype_int
17954 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
17956 tree opaque_ftype_long_pcvoid
17957 = build_function_type_list (opaque_V4SI_type_node,
17958 long_integer_type_node, pcvoid_type_node,
17959 NULL_TREE);
17960 tree v16qi_ftype_long_pcvoid
17961 = build_function_type_list (V16QI_type_node,
17962 long_integer_type_node, pcvoid_type_node,
17963 NULL_TREE);
17964 tree v8hi_ftype_long_pcvoid
17965 = build_function_type_list (V8HI_type_node,
17966 long_integer_type_node, pcvoid_type_node,
17967 NULL_TREE);
17968 tree v4si_ftype_long_pcvoid
17969 = build_function_type_list (V4SI_type_node,
17970 long_integer_type_node, pcvoid_type_node,
17971 NULL_TREE);
17972 tree v4sf_ftype_long_pcvoid
17973 = build_function_type_list (V4SF_type_node,
17974 long_integer_type_node, pcvoid_type_node,
17975 NULL_TREE);
17976 tree v2df_ftype_long_pcvoid
17977 = build_function_type_list (V2DF_type_node,
17978 long_integer_type_node, pcvoid_type_node,
17979 NULL_TREE);
17980 tree v2di_ftype_long_pcvoid
17981 = build_function_type_list (V2DI_type_node,
17982 long_integer_type_node, pcvoid_type_node,
17983 NULL_TREE);
17985 tree void_ftype_opaque_long_pvoid
17986 = build_function_type_list (void_type_node,
17987 opaque_V4SI_type_node, long_integer_type_node,
17988 pvoid_type_node, NULL_TREE);
17989 tree void_ftype_v4si_long_pvoid
17990 = build_function_type_list (void_type_node,
17991 V4SI_type_node, long_integer_type_node,
17992 pvoid_type_node, NULL_TREE);
17993 tree void_ftype_v16qi_long_pvoid
17994 = build_function_type_list (void_type_node,
17995 V16QI_type_node, long_integer_type_node,
17996 pvoid_type_node, NULL_TREE);
17998 tree void_ftype_v16qi_pvoid_long
17999 = build_function_type_list (void_type_node,
18000 V16QI_type_node, pvoid_type_node,
18001 long_integer_type_node, NULL_TREE);
18003 tree void_ftype_v8hi_long_pvoid
18004 = build_function_type_list (void_type_node,
18005 V8HI_type_node, long_integer_type_node,
18006 pvoid_type_node, NULL_TREE);
18007 tree void_ftype_v4sf_long_pvoid
18008 = build_function_type_list (void_type_node,
18009 V4SF_type_node, long_integer_type_node,
18010 pvoid_type_node, NULL_TREE);
18011 tree void_ftype_v2df_long_pvoid
18012 = build_function_type_list (void_type_node,
18013 V2DF_type_node, long_integer_type_node,
18014 pvoid_type_node, NULL_TREE);
18015 tree void_ftype_v2di_long_pvoid
18016 = build_function_type_list (void_type_node,
18017 V2DI_type_node, long_integer_type_node,
18018 pvoid_type_node, NULL_TREE);
18019 tree int_ftype_int_v8hi_v8hi
18020 = build_function_type_list (integer_type_node,
18021 integer_type_node, V8HI_type_node,
18022 V8HI_type_node, NULL_TREE);
18023 tree int_ftype_int_v16qi_v16qi
18024 = build_function_type_list (integer_type_node,
18025 integer_type_node, V16QI_type_node,
18026 V16QI_type_node, NULL_TREE);
18027 tree int_ftype_int_v4sf_v4sf
18028 = build_function_type_list (integer_type_node,
18029 integer_type_node, V4SF_type_node,
18030 V4SF_type_node, NULL_TREE);
18031 tree int_ftype_int_v2df_v2df
18032 = build_function_type_list (integer_type_node,
18033 integer_type_node, V2DF_type_node,
18034 V2DF_type_node, NULL_TREE);
18035 tree v2di_ftype_v2di
18036 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
18037 tree v4si_ftype_v4si
18038 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
18039 tree v8hi_ftype_v8hi
18040 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
18041 tree v16qi_ftype_v16qi
18042 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
18043 tree v4sf_ftype_v4sf
18044 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
18045 tree v2df_ftype_v2df
18046 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
18047 tree void_ftype_pcvoid_int_int
18048 = build_function_type_list (void_type_node,
18049 pcvoid_type_node, integer_type_node,
18050 integer_type_node, NULL_TREE);
18052 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
18053 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
18054 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
18055 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
18056 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
18057 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
18058 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
18059 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
18060 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
18061 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
18062 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
18063 ALTIVEC_BUILTIN_LVXL_V2DF);
18064 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
18065 ALTIVEC_BUILTIN_LVXL_V2DI);
18066 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
18067 ALTIVEC_BUILTIN_LVXL_V4SF);
18068 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
18069 ALTIVEC_BUILTIN_LVXL_V4SI);
18070 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
18071 ALTIVEC_BUILTIN_LVXL_V8HI);
18072 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
18073 ALTIVEC_BUILTIN_LVXL_V16QI);
18074 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
18075 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
18076 ALTIVEC_BUILTIN_LVX_V2DF);
18077 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
18078 ALTIVEC_BUILTIN_LVX_V2DI);
18079 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
18080 ALTIVEC_BUILTIN_LVX_V4SF);
18081 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
18082 ALTIVEC_BUILTIN_LVX_V4SI);
18083 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
18084 ALTIVEC_BUILTIN_LVX_V8HI);
18085 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
18086 ALTIVEC_BUILTIN_LVX_V16QI);
18087 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
18088 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
18089 ALTIVEC_BUILTIN_STVX_V2DF);
18090 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
18091 ALTIVEC_BUILTIN_STVX_V2DI);
18092 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
18093 ALTIVEC_BUILTIN_STVX_V4SF);
18094 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
18095 ALTIVEC_BUILTIN_STVX_V4SI);
18096 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
18097 ALTIVEC_BUILTIN_STVX_V8HI);
18098 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
18099 ALTIVEC_BUILTIN_STVX_V16QI);
18100 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
18101 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
18102 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
18103 ALTIVEC_BUILTIN_STVXL_V2DF);
18104 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
18105 ALTIVEC_BUILTIN_STVXL_V2DI);
18106 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
18107 ALTIVEC_BUILTIN_STVXL_V4SF);
18108 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
18109 ALTIVEC_BUILTIN_STVXL_V4SI);
18110 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
18111 ALTIVEC_BUILTIN_STVXL_V8HI);
18112 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
18113 ALTIVEC_BUILTIN_STVXL_V16QI);
18114 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
18115 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
18116 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
18117 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
18118 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
18119 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
18120 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
18121 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
18122 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
18123 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
18124 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
18125 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
18126 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
18127 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
18128 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
18129 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
18131 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
18132 VSX_BUILTIN_LXVD2X_V2DF);
18133 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
18134 VSX_BUILTIN_LXVD2X_V2DI);
18135 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
18136 VSX_BUILTIN_LXVW4X_V4SF);
18137 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
18138 VSX_BUILTIN_LXVW4X_V4SI);
18139 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
18140 VSX_BUILTIN_LXVW4X_V8HI);
18141 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
18142 VSX_BUILTIN_LXVW4X_V16QI);
18143 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
18144 VSX_BUILTIN_STXVD2X_V2DF);
18145 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
18146 VSX_BUILTIN_STXVD2X_V2DI);
18147 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
18148 VSX_BUILTIN_STXVW4X_V4SF);
18149 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
18150 VSX_BUILTIN_STXVW4X_V4SI);
18151 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
18152 VSX_BUILTIN_STXVW4X_V8HI);
18153 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
18154 VSX_BUILTIN_STXVW4X_V16QI);
18156 def_builtin ("__builtin_vsx_ld_elemrev_v2df", v2df_ftype_long_pcvoid,
18157 VSX_BUILTIN_LD_ELEMREV_V2DF);
18158 def_builtin ("__builtin_vsx_ld_elemrev_v2di", v2di_ftype_long_pcvoid,
18159 VSX_BUILTIN_LD_ELEMREV_V2DI);
18160 def_builtin ("__builtin_vsx_ld_elemrev_v4sf", v4sf_ftype_long_pcvoid,
18161 VSX_BUILTIN_LD_ELEMREV_V4SF);
18162 def_builtin ("__builtin_vsx_ld_elemrev_v4si", v4si_ftype_long_pcvoid,
18163 VSX_BUILTIN_LD_ELEMREV_V4SI);
18164 def_builtin ("__builtin_vsx_st_elemrev_v2df", void_ftype_v2df_long_pvoid,
18165 VSX_BUILTIN_ST_ELEMREV_V2DF);
18166 def_builtin ("__builtin_vsx_st_elemrev_v2di", void_ftype_v2di_long_pvoid,
18167 VSX_BUILTIN_ST_ELEMREV_V2DI);
18168 def_builtin ("__builtin_vsx_st_elemrev_v4sf", void_ftype_v4sf_long_pvoid,
18169 VSX_BUILTIN_ST_ELEMREV_V4SF);
18170 def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid,
18171 VSX_BUILTIN_ST_ELEMREV_V4SI);
18173 if (TARGET_P9_VECTOR)
18175 def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid,
18176 VSX_BUILTIN_LD_ELEMREV_V8HI);
18177 def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid,
18178 VSX_BUILTIN_LD_ELEMREV_V16QI);
18179 def_builtin ("__builtin_vsx_st_elemrev_v8hi",
18180 void_ftype_v8hi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V8HI);
18181 def_builtin ("__builtin_vsx_st_elemrev_v16qi",
18182 void_ftype_v16qi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V16QI);
18185 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
18186 VSX_BUILTIN_VEC_LD);
18187 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
18188 VSX_BUILTIN_VEC_ST);
18189 def_builtin ("__builtin_vec_xl", opaque_ftype_long_pcvoid,
18190 VSX_BUILTIN_VEC_XL);
18191 def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid,
18192 VSX_BUILTIN_VEC_XST);
18194 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
18195 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
18196 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
18198 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
18199 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
18200 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
18201 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
18202 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
18203 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
18204 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
18205 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
18206 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
18207 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
18208 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
18209 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
18211 def_builtin ("__builtin_vec_adde", opaque_ftype_opaque_opaque_opaque,
18212 ALTIVEC_BUILTIN_VEC_ADDE);
18213 def_builtin ("__builtin_vec_addec", opaque_ftype_opaque_opaque_opaque,
18214 ALTIVEC_BUILTIN_VEC_ADDEC);
18215 def_builtin ("__builtin_vec_cmpne", opaque_ftype_opaque_opaque,
18216 ALTIVEC_BUILTIN_VEC_CMPNE);
18217 def_builtin ("__builtin_vec_mul", opaque_ftype_opaque_opaque,
18218 ALTIVEC_BUILTIN_VEC_MUL);
18220 /* Cell builtins. */
18221 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
18222 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
18223 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
18224 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
18226 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
18227 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
18228 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
18229 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
18231 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
18232 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
18233 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
18234 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
18236 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
18237 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
18238 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
18239 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
18241 if (TARGET_P9_VECTOR)
18242 def_builtin ("__builtin_altivec_stxvl", void_ftype_v16qi_pvoid_long,
18243 P9V_BUILTIN_STXVL);
18245 /* Add the DST variants. */
18246 d = bdesc_dst;
18247 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
18249 HOST_WIDE_INT mask = d->mask;
18251 /* It is expected that these dst built-in functions may have
18252 d->icode equal to CODE_FOR_nothing. */
18253 if ((mask & builtin_mask) != mask)
18255 if (TARGET_DEBUG_BUILTIN)
18256 fprintf (stderr, "altivec_init_builtins, skip dst %s\n",
18257 d->name);
18258 continue;
18260 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
18263 /* Initialize the predicates. */
18264 d = bdesc_altivec_preds;
18265 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
18267 machine_mode mode1;
18268 tree type;
18269 HOST_WIDE_INT mask = d->mask;
18271 if ((mask & builtin_mask) != mask)
18273 if (TARGET_DEBUG_BUILTIN)
18274 fprintf (stderr, "altivec_init_builtins, skip predicate %s\n",
18275 d->name);
18276 continue;
18279 if (rs6000_overloaded_builtin_p (d->code))
18280 mode1 = VOIDmode;
18281 else
18283 /* Cannot define builtin if the instruction is disabled. */
18284 gcc_assert (d->icode != CODE_FOR_nothing);
18285 mode1 = insn_data[d->icode].operand[1].mode;
18288 switch (mode1)
18290 case VOIDmode:
18291 type = int_ftype_int_opaque_opaque;
18292 break;
18293 case V2DImode:
18294 type = int_ftype_int_v2di_v2di;
18295 break;
18296 case V4SImode:
18297 type = int_ftype_int_v4si_v4si;
18298 break;
18299 case V8HImode:
18300 type = int_ftype_int_v8hi_v8hi;
18301 break;
18302 case V16QImode:
18303 type = int_ftype_int_v16qi_v16qi;
18304 break;
18305 case V4SFmode:
18306 type = int_ftype_int_v4sf_v4sf;
18307 break;
18308 case V2DFmode:
18309 type = int_ftype_int_v2df_v2df;
18310 break;
18311 default:
18312 gcc_unreachable ();
18315 def_builtin (d->name, type, d->code);
18318 /* Initialize the abs* operators. */
18319 d = bdesc_abs;
18320 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
18322 machine_mode mode0;
18323 tree type;
18324 HOST_WIDE_INT mask = d->mask;
18326 if ((mask & builtin_mask) != mask)
18328 if (TARGET_DEBUG_BUILTIN)
18329 fprintf (stderr, "altivec_init_builtins, skip abs %s\n",
18330 d->name);
18331 continue;
18334 /* Cannot define builtin if the instruction is disabled. */
18335 gcc_assert (d->icode != CODE_FOR_nothing);
18336 mode0 = insn_data[d->icode].operand[0].mode;
18338 switch (mode0)
18340 case V2DImode:
18341 type = v2di_ftype_v2di;
18342 break;
18343 case V4SImode:
18344 type = v4si_ftype_v4si;
18345 break;
18346 case V8HImode:
18347 type = v8hi_ftype_v8hi;
18348 break;
18349 case V16QImode:
18350 type = v16qi_ftype_v16qi;
18351 break;
18352 case V4SFmode:
18353 type = v4sf_ftype_v4sf;
18354 break;
18355 case V2DFmode:
18356 type = v2df_ftype_v2df;
18357 break;
18358 default:
18359 gcc_unreachable ();
18362 def_builtin (d->name, type, d->code);
18365 /* Initialize target builtin that implements
18366 targetm.vectorize.builtin_mask_for_load. */
18368 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
18369 v16qi_ftype_long_pcvoid,
18370 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
18371 BUILT_IN_MD, NULL, NULL_TREE);
18372 TREE_READONLY (decl) = 1;
18373 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
18374 altivec_builtin_mask_for_load = decl;
18376 /* Access to the vec_init patterns. */
18377 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
18378 integer_type_node, integer_type_node,
18379 integer_type_node, NULL_TREE);
18380 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
18382 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
18383 short_integer_type_node,
18384 short_integer_type_node,
18385 short_integer_type_node,
18386 short_integer_type_node,
18387 short_integer_type_node,
18388 short_integer_type_node,
18389 short_integer_type_node, NULL_TREE);
18390 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
18392 ftype = build_function_type_list (V16QI_type_node, char_type_node,
18393 char_type_node, char_type_node,
18394 char_type_node, char_type_node,
18395 char_type_node, char_type_node,
18396 char_type_node, char_type_node,
18397 char_type_node, char_type_node,
18398 char_type_node, char_type_node,
18399 char_type_node, char_type_node,
18400 char_type_node, NULL_TREE);
18401 def_builtin ("__builtin_vec_init_v16qi", ftype,
18402 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
18404 ftype = build_function_type_list (V4SF_type_node, float_type_node,
18405 float_type_node, float_type_node,
18406 float_type_node, NULL_TREE);
18407 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
18409 /* VSX builtins. */
18410 ftype = build_function_type_list (V2DF_type_node, double_type_node,
18411 double_type_node, NULL_TREE);
18412 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
18414 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
18415 intDI_type_node, NULL_TREE);
18416 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
18418 /* Access to the vec_set patterns. */
18419 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
18420 intSI_type_node,
18421 integer_type_node, NULL_TREE);
18422 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
18424 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
18425 intHI_type_node,
18426 integer_type_node, NULL_TREE);
18427 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
18429 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
18430 intQI_type_node,
18431 integer_type_node, NULL_TREE);
18432 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
18434 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
18435 float_type_node,
18436 integer_type_node, NULL_TREE);
18437 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
18439 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
18440 double_type_node,
18441 integer_type_node, NULL_TREE);
18442 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
18444 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
18445 intDI_type_node,
18446 integer_type_node, NULL_TREE);
18447 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
18449 /* Access to the vec_extract patterns. */
18450 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
18451 integer_type_node, NULL_TREE);
18452 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
18454 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
18455 integer_type_node, NULL_TREE);
18456 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
18458 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
18459 integer_type_node, NULL_TREE);
18460 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
18462 ftype = build_function_type_list (float_type_node, V4SF_type_node,
18463 integer_type_node, NULL_TREE);
18464 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
18466 ftype = build_function_type_list (double_type_node, V2DF_type_node,
18467 integer_type_node, NULL_TREE);
18468 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
18470 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
18471 integer_type_node, NULL_TREE);
18472 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
18475 if (V1TI_type_node)
18477 tree v1ti_ftype_long_pcvoid
18478 = build_function_type_list (V1TI_type_node,
18479 long_integer_type_node, pcvoid_type_node,
18480 NULL_TREE);
18481 tree void_ftype_v1ti_long_pvoid
18482 = build_function_type_list (void_type_node,
18483 V1TI_type_node, long_integer_type_node,
18484 pvoid_type_node, NULL_TREE);
18485 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
18486 VSX_BUILTIN_LXVD2X_V1TI);
18487 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
18488 VSX_BUILTIN_STXVD2X_V1TI);
18489 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
18490 NULL_TREE, NULL_TREE);
18491 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
18492 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
18493 intTI_type_node,
18494 integer_type_node, NULL_TREE);
18495 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
18496 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
18497 integer_type_node, NULL_TREE);
18498 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
18503 static void
18504 htm_init_builtins (void)
18506 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
18507 const struct builtin_description *d;
18508 size_t i;
18510 d = bdesc_htm;
18511 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
18513 tree op[MAX_HTM_OPERANDS], type;
18514 HOST_WIDE_INT mask = d->mask;
18515 unsigned attr = rs6000_builtin_info[d->code].attr;
18516 bool void_func = (attr & RS6000_BTC_VOID);
18517 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
18518 int nopnds = 0;
18519 tree gpr_type_node;
18520 tree rettype;
18521 tree argtype;
18523 /* It is expected that these htm built-in functions may have
18524 d->icode equal to CODE_FOR_nothing. */
18526 if (TARGET_32BIT && TARGET_POWERPC64)
18527 gpr_type_node = long_long_unsigned_type_node;
18528 else
18529 gpr_type_node = long_unsigned_type_node;
18531 if (attr & RS6000_BTC_SPR)
18533 rettype = gpr_type_node;
18534 argtype = gpr_type_node;
18536 else if (d->code == HTM_BUILTIN_TABORTDC
18537 || d->code == HTM_BUILTIN_TABORTDCI)
18539 rettype = unsigned_type_node;
18540 argtype = gpr_type_node;
18542 else
18544 rettype = unsigned_type_node;
18545 argtype = unsigned_type_node;
18548 if ((mask & builtin_mask) != mask)
18550 if (TARGET_DEBUG_BUILTIN)
18551 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
18552 continue;
18555 if (d->name == 0)
18557 if (TARGET_DEBUG_BUILTIN)
18558 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
18559 (long unsigned) i);
18560 continue;
18563 op[nopnds++] = (void_func) ? void_type_node : rettype;
18565 if (attr_args == RS6000_BTC_UNARY)
18566 op[nopnds++] = argtype;
18567 else if (attr_args == RS6000_BTC_BINARY)
18569 op[nopnds++] = argtype;
18570 op[nopnds++] = argtype;
18572 else if (attr_args == RS6000_BTC_TERNARY)
18574 op[nopnds++] = argtype;
18575 op[nopnds++] = argtype;
18576 op[nopnds++] = argtype;
18579 switch (nopnds)
18581 case 1:
18582 type = build_function_type_list (op[0], NULL_TREE);
18583 break;
18584 case 2:
18585 type = build_function_type_list (op[0], op[1], NULL_TREE);
18586 break;
18587 case 3:
18588 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
18589 break;
18590 case 4:
18591 type = build_function_type_list (op[0], op[1], op[2], op[3],
18592 NULL_TREE);
18593 break;
18594 default:
18595 gcc_unreachable ();
18598 def_builtin (d->name, type, d->code);
18602 /* Hash function for builtin functions with up to 3 arguments and a return
18603 type. */
18604 hashval_t
18605 builtin_hasher::hash (builtin_hash_struct *bh)
18607 unsigned ret = 0;
18608 int i;
18610 for (i = 0; i < 4; i++)
18612 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
18613 ret = (ret * 2) + bh->uns_p[i];
18616 return ret;
18619 /* Compare builtin hash entries H1 and H2 for equivalence. */
18620 bool
18621 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
18623 return ((p1->mode[0] == p2->mode[0])
18624 && (p1->mode[1] == p2->mode[1])
18625 && (p1->mode[2] == p2->mode[2])
18626 && (p1->mode[3] == p2->mode[3])
18627 && (p1->uns_p[0] == p2->uns_p[0])
18628 && (p1->uns_p[1] == p2->uns_p[1])
18629 && (p1->uns_p[2] == p2->uns_p[2])
18630 && (p1->uns_p[3] == p2->uns_p[3]));
18633 /* Map types for builtin functions with an explicit return type and up to 3
18634 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
18635 of the argument. */
18636 static tree
18637 builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
18638 machine_mode mode_arg1, machine_mode mode_arg2,
18639 enum rs6000_builtins builtin, const char *name)
18641 struct builtin_hash_struct h;
18642 struct builtin_hash_struct *h2;
18643 int num_args = 3;
18644 int i;
18645 tree ret_type = NULL_TREE;
18646 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
18648 /* Create builtin_hash_table. */
18649 if (builtin_hash_table == NULL)
18650 builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
18652 h.type = NULL_TREE;
18653 h.mode[0] = mode_ret;
18654 h.mode[1] = mode_arg0;
18655 h.mode[2] = mode_arg1;
18656 h.mode[3] = mode_arg2;
18657 h.uns_p[0] = 0;
18658 h.uns_p[1] = 0;
18659 h.uns_p[2] = 0;
18660 h.uns_p[3] = 0;
18662 /* If the builtin is a type that produces unsigned results or takes unsigned
18663 arguments, and it is returned as a decl for the vectorizer (such as
18664 widening multiplies, permute), make sure the arguments and return value
18665 are type correct. */
18666 switch (builtin)
18668 /* unsigned 1 argument functions. */
18669 case CRYPTO_BUILTIN_VSBOX:
18670 case P8V_BUILTIN_VGBBD:
18671 case MISC_BUILTIN_CDTBCD:
18672 case MISC_BUILTIN_CBCDTD:
18673 h.uns_p[0] = 1;
18674 h.uns_p[1] = 1;
18675 break;
18677 /* unsigned 2 argument functions. */
18678 case ALTIVEC_BUILTIN_VMULEUB:
18679 case ALTIVEC_BUILTIN_VMULEUH:
18680 case ALTIVEC_BUILTIN_VMULOUB:
18681 case ALTIVEC_BUILTIN_VMULOUH:
18682 case CRYPTO_BUILTIN_VCIPHER:
18683 case CRYPTO_BUILTIN_VCIPHERLAST:
18684 case CRYPTO_BUILTIN_VNCIPHER:
18685 case CRYPTO_BUILTIN_VNCIPHERLAST:
18686 case CRYPTO_BUILTIN_VPMSUMB:
18687 case CRYPTO_BUILTIN_VPMSUMH:
18688 case CRYPTO_BUILTIN_VPMSUMW:
18689 case CRYPTO_BUILTIN_VPMSUMD:
18690 case CRYPTO_BUILTIN_VPMSUM:
18691 case MISC_BUILTIN_ADDG6S:
18692 case MISC_BUILTIN_DIVWEU:
18693 case MISC_BUILTIN_DIVWEUO:
18694 case MISC_BUILTIN_DIVDEU:
18695 case MISC_BUILTIN_DIVDEUO:
18696 h.uns_p[0] = 1;
18697 h.uns_p[1] = 1;
18698 h.uns_p[2] = 1;
18699 break;
18701 /* unsigned 3 argument functions. */
18702 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
18703 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
18704 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
18705 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
18706 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
18707 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
18708 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
18709 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
18710 case VSX_BUILTIN_VPERM_16QI_UNS:
18711 case VSX_BUILTIN_VPERM_8HI_UNS:
18712 case VSX_BUILTIN_VPERM_4SI_UNS:
18713 case VSX_BUILTIN_VPERM_2DI_UNS:
18714 case VSX_BUILTIN_XXSEL_16QI_UNS:
18715 case VSX_BUILTIN_XXSEL_8HI_UNS:
18716 case VSX_BUILTIN_XXSEL_4SI_UNS:
18717 case VSX_BUILTIN_XXSEL_2DI_UNS:
18718 case CRYPTO_BUILTIN_VPERMXOR:
18719 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
18720 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
18721 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
18722 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
18723 case CRYPTO_BUILTIN_VSHASIGMAW:
18724 case CRYPTO_BUILTIN_VSHASIGMAD:
18725 case CRYPTO_BUILTIN_VSHASIGMA:
18726 h.uns_p[0] = 1;
18727 h.uns_p[1] = 1;
18728 h.uns_p[2] = 1;
18729 h.uns_p[3] = 1;
18730 break;
18732 /* signed permute functions with unsigned char mask. */
18733 case ALTIVEC_BUILTIN_VPERM_16QI:
18734 case ALTIVEC_BUILTIN_VPERM_8HI:
18735 case ALTIVEC_BUILTIN_VPERM_4SI:
18736 case ALTIVEC_BUILTIN_VPERM_4SF:
18737 case ALTIVEC_BUILTIN_VPERM_2DI:
18738 case ALTIVEC_BUILTIN_VPERM_2DF:
18739 case VSX_BUILTIN_VPERM_16QI:
18740 case VSX_BUILTIN_VPERM_8HI:
18741 case VSX_BUILTIN_VPERM_4SI:
18742 case VSX_BUILTIN_VPERM_4SF:
18743 case VSX_BUILTIN_VPERM_2DI:
18744 case VSX_BUILTIN_VPERM_2DF:
18745 h.uns_p[3] = 1;
18746 break;
18748 /* unsigned args, signed return. */
18749 case VSX_BUILTIN_XVCVUXDSP:
18750 case VSX_BUILTIN_XVCVUXDDP_UNS:
18751 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
18752 h.uns_p[1] = 1;
18753 break;
18755 /* signed args, unsigned return. */
18756 case VSX_BUILTIN_XVCVDPUXDS_UNS:
18757 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
18758 case MISC_BUILTIN_UNPACK_TD:
18759 case MISC_BUILTIN_UNPACK_V1TI:
18760 h.uns_p[0] = 1;
18761 break;
18763 /* unsigned arguments for 128-bit pack instructions. */
18764 case MISC_BUILTIN_PACK_TD:
18765 case MISC_BUILTIN_PACK_V1TI:
18766 h.uns_p[1] = 1;
18767 h.uns_p[2] = 1;
18768 break;
18770 default:
18771 break;
18774 /* Figure out how many args are present. */
18775 while (num_args > 0 && h.mode[num_args] == VOIDmode)
18776 num_args--;
18778 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
18779 if (!ret_type && h.uns_p[0])
18780 ret_type = builtin_mode_to_type[h.mode[0]][0];
18782 if (!ret_type)
18783 fatal_error (input_location,
18784 "internal error: builtin function %s had an unexpected "
18785 "return type %s", name, GET_MODE_NAME (h.mode[0]));
18787 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
18788 arg_type[i] = NULL_TREE;
18790 for (i = 0; i < num_args; i++)
18792 int m = (int) h.mode[i+1];
18793 int uns_p = h.uns_p[i+1];
18795 arg_type[i] = builtin_mode_to_type[m][uns_p];
18796 if (!arg_type[i] && uns_p)
18797 arg_type[i] = builtin_mode_to_type[m][0];
18799 if (!arg_type[i])
18800 fatal_error (input_location,
18801 "internal error: builtin function %s, argument %d "
18802 "had unexpected argument type %s", name, i,
18803 GET_MODE_NAME (m));
18806 builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
18807 if (*found == NULL)
18809 h2 = ggc_alloc<builtin_hash_struct> ();
18810 *h2 = h;
18811 *found = h2;
18813 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
18814 arg_type[2], NULL_TREE);
18817 return (*found)->type;
18820 static void
18821 rs6000_common_init_builtins (void)
18823 const struct builtin_description *d;
18824 size_t i;
18826 tree opaque_ftype_opaque = NULL_TREE;
18827 tree opaque_ftype_opaque_opaque = NULL_TREE;
18828 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
18829 tree v2si_ftype = NULL_TREE;
18830 tree v2si_ftype_qi = NULL_TREE;
18831 tree v2si_ftype_v2si_qi = NULL_TREE;
18832 tree v2si_ftype_int_qi = NULL_TREE;
18833 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
18835 if (!TARGET_PAIRED_FLOAT)
18837 builtin_mode_to_type[V2SImode][0] = opaque_V2SI_type_node;
18838 builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node;
18841 /* Paired and SPE builtins are only available if you build a compiler with
18842 the appropriate options, so only create those builtins with the
18843 appropriate compiler option. Create Altivec and VSX builtins on machines
18844 with at least the general purpose extensions (970 and newer) to allow the
18845 use of the target attribute.. */
18847 if (TARGET_EXTRA_BUILTINS)
18848 builtin_mask |= RS6000_BTM_COMMON;
18850 /* Add the ternary operators. */
18851 d = bdesc_3arg;
18852 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
18854 tree type;
18855 HOST_WIDE_INT mask = d->mask;
18857 if ((mask & builtin_mask) != mask)
18859 if (TARGET_DEBUG_BUILTIN)
18860 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
18861 continue;
18864 if (rs6000_overloaded_builtin_p (d->code))
18866 if (! (type = opaque_ftype_opaque_opaque_opaque))
18867 type = opaque_ftype_opaque_opaque_opaque
18868 = build_function_type_list (opaque_V4SI_type_node,
18869 opaque_V4SI_type_node,
18870 opaque_V4SI_type_node,
18871 opaque_V4SI_type_node,
18872 NULL_TREE);
18874 else
18876 enum insn_code icode = d->icode;
18877 if (d->name == 0)
18879 if (TARGET_DEBUG_BUILTIN)
18880 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
18881 (long unsigned)i);
18883 continue;
18886 if (icode == CODE_FOR_nothing)
18888 if (TARGET_DEBUG_BUILTIN)
18889 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
18890 d->name);
18892 continue;
18895 type = builtin_function_type (insn_data[icode].operand[0].mode,
18896 insn_data[icode].operand[1].mode,
18897 insn_data[icode].operand[2].mode,
18898 insn_data[icode].operand[3].mode,
18899 d->code, d->name);
18902 def_builtin (d->name, type, d->code);
18905 /* Add the binary operators. */
18906 d = bdesc_2arg;
18907 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18909 machine_mode mode0, mode1, mode2;
18910 tree type;
18911 HOST_WIDE_INT mask = d->mask;
18913 if ((mask & builtin_mask) != mask)
18915 if (TARGET_DEBUG_BUILTIN)
18916 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
18917 continue;
18920 if (rs6000_overloaded_builtin_p (d->code))
18922 if (! (type = opaque_ftype_opaque_opaque))
18923 type = opaque_ftype_opaque_opaque
18924 = build_function_type_list (opaque_V4SI_type_node,
18925 opaque_V4SI_type_node,
18926 opaque_V4SI_type_node,
18927 NULL_TREE);
18929 else
18931 enum insn_code icode = d->icode;
18932 if (d->name == 0)
18934 if (TARGET_DEBUG_BUILTIN)
18935 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
18936 (long unsigned)i);
18938 continue;
18941 if (icode == CODE_FOR_nothing)
18943 if (TARGET_DEBUG_BUILTIN)
18944 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
18945 d->name);
18947 continue;
18950 mode0 = insn_data[icode].operand[0].mode;
18951 mode1 = insn_data[icode].operand[1].mode;
18952 mode2 = insn_data[icode].operand[2].mode;
18954 if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode)
18956 if (! (type = v2si_ftype_v2si_qi))
18957 type = v2si_ftype_v2si_qi
18958 = build_function_type_list (opaque_V2SI_type_node,
18959 opaque_V2SI_type_node,
18960 char_type_node,
18961 NULL_TREE);
18964 else if (mode0 == V2SImode && GET_MODE_CLASS (mode1) == MODE_INT
18965 && mode2 == QImode)
18967 if (! (type = v2si_ftype_int_qi))
18968 type = v2si_ftype_int_qi
18969 = build_function_type_list (opaque_V2SI_type_node,
18970 integer_type_node,
18971 char_type_node,
18972 NULL_TREE);
18975 else
18976 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
18977 d->code, d->name);
18980 def_builtin (d->name, type, d->code);
18983 /* Add the simple unary operators. */
18984 d = bdesc_1arg;
18985 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18987 machine_mode mode0, mode1;
18988 tree type;
18989 HOST_WIDE_INT mask = d->mask;
18991 if ((mask & builtin_mask) != mask)
18993 if (TARGET_DEBUG_BUILTIN)
18994 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
18995 continue;
18998 if (rs6000_overloaded_builtin_p (d->code))
19000 if (! (type = opaque_ftype_opaque))
19001 type = opaque_ftype_opaque
19002 = build_function_type_list (opaque_V4SI_type_node,
19003 opaque_V4SI_type_node,
19004 NULL_TREE);
19006 else
19008 enum insn_code icode = d->icode;
19009 if (d->name == 0)
19011 if (TARGET_DEBUG_BUILTIN)
19012 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
19013 (long unsigned)i);
19015 continue;
19018 if (icode == CODE_FOR_nothing)
19020 if (TARGET_DEBUG_BUILTIN)
19021 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
19022 d->name);
19024 continue;
19027 mode0 = insn_data[icode].operand[0].mode;
19028 mode1 = insn_data[icode].operand[1].mode;
19030 if (mode0 == V2SImode && mode1 == QImode)
19032 if (! (type = v2si_ftype_qi))
19033 type = v2si_ftype_qi
19034 = build_function_type_list (opaque_V2SI_type_node,
19035 char_type_node,
19036 NULL_TREE);
19039 else
19040 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
19041 d->code, d->name);
19044 def_builtin (d->name, type, d->code);
19047 /* Add the simple no-argument operators. */
19048 d = bdesc_0arg;
19049 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
19051 machine_mode mode0;
19052 tree type;
19053 HOST_WIDE_INT mask = d->mask;
19055 if ((mask & builtin_mask) != mask)
19057 if (TARGET_DEBUG_BUILTIN)
19058 fprintf (stderr, "rs6000_builtin, skip no-argument %s\n", d->name);
19059 continue;
19061 if (rs6000_overloaded_builtin_p (d->code))
19063 if (!opaque_ftype_opaque)
19064 opaque_ftype_opaque
19065 = build_function_type_list (opaque_V4SI_type_node, NULL_TREE);
19066 type = opaque_ftype_opaque;
19068 else
19070 enum insn_code icode = d->icode;
19071 if (d->name == 0)
19073 if (TARGET_DEBUG_BUILTIN)
19074 fprintf (stderr, "rs6000_builtin, bdesc_0arg[%lu] no name\n",
19075 (long unsigned) i);
19076 continue;
19078 if (icode == CODE_FOR_nothing)
19080 if (TARGET_DEBUG_BUILTIN)
19081 fprintf (stderr,
19082 "rs6000_builtin, skip no-argument %s (no code)\n",
19083 d->name);
19084 continue;
19086 mode0 = insn_data[icode].operand[0].mode;
19087 if (mode0 == V2SImode)
19089 /* code for SPE */
19090 if (! (type = v2si_ftype))
19092 v2si_ftype
19093 = build_function_type_list (opaque_V2SI_type_node,
19094 NULL_TREE);
19095 type = v2si_ftype;
19098 else
19099 type = builtin_function_type (mode0, VOIDmode, VOIDmode, VOIDmode,
19100 d->code, d->name);
19102 def_builtin (d->name, type, d->code);
19106 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
19107 static void
19108 init_float128_ibm (machine_mode mode)
19110 if (!TARGET_XL_COMPAT)
19112 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
19113 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
19114 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
19115 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
19117 if (!(TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)))
19119 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
19120 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
19121 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
19122 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
19123 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
19124 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
19125 set_optab_libfunc (le_optab, mode, "__gcc_qle");
19127 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
19128 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
19129 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
19130 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
19131 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
19132 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
19133 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
19134 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
19137 if (!(TARGET_HARD_FLOAT && TARGET_FPRS))
19138 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
19140 else
19142 set_optab_libfunc (add_optab, mode, "_xlqadd");
19143 set_optab_libfunc (sub_optab, mode, "_xlqsub");
19144 set_optab_libfunc (smul_optab, mode, "_xlqmul");
19145 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
19148 /* Add various conversions for IFmode to use the traditional TFmode
19149 names. */
19150 if (mode == IFmode)
19152 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf2");
19153 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf2");
19154 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctftd2");
19155 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd2");
19156 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd2");
19157 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdtf2");
19159 if (TARGET_POWERPC64)
19161 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
19162 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
19163 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
19164 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
19169 /* Set up IEEE 128-bit floating point routines. Use different names if the
19170 arguments can be passed in a vector register. The historical PowerPC
19171 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
19172 continue to use that if we aren't using vector registers to pass IEEE
19173 128-bit floating point. */
19175 static void
19176 init_float128_ieee (machine_mode mode)
19178 if (FLOAT128_VECTOR_P (mode))
19180 set_optab_libfunc (add_optab, mode, "__addkf3");
19181 set_optab_libfunc (sub_optab, mode, "__subkf3");
19182 set_optab_libfunc (neg_optab, mode, "__negkf2");
19183 set_optab_libfunc (smul_optab, mode, "__mulkf3");
19184 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
19185 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
19186 set_optab_libfunc (abs_optab, mode, "__abstkf2");
19188 set_optab_libfunc (eq_optab, mode, "__eqkf2");
19189 set_optab_libfunc (ne_optab, mode, "__nekf2");
19190 set_optab_libfunc (gt_optab, mode, "__gtkf2");
19191 set_optab_libfunc (ge_optab, mode, "__gekf2");
19192 set_optab_libfunc (lt_optab, mode, "__ltkf2");
19193 set_optab_libfunc (le_optab, mode, "__lekf2");
19194 set_optab_libfunc (unord_optab, mode, "__unordkf2");
19196 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
19197 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
19198 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
19199 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
19201 set_conv_libfunc (sext_optab, mode, IFmode, "__extendtfkf2");
19202 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
19203 set_conv_libfunc (sext_optab, mode, TFmode, "__extendtfkf2");
19205 set_conv_libfunc (trunc_optab, IFmode, mode, "__trunckftf2");
19206 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
19207 set_conv_libfunc (trunc_optab, TFmode, mode, "__trunckftf2");
19209 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf2");
19210 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf2");
19211 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunckftd2");
19212 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd2");
19213 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd2");
19214 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdkf2");
19216 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
19217 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
19218 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
19219 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
19221 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
19222 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
19223 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
19224 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
19226 if (TARGET_POWERPC64)
19228 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
19229 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
19230 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
19231 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
19235 else
19237 set_optab_libfunc (add_optab, mode, "_q_add");
19238 set_optab_libfunc (sub_optab, mode, "_q_sub");
19239 set_optab_libfunc (neg_optab, mode, "_q_neg");
19240 set_optab_libfunc (smul_optab, mode, "_q_mul");
19241 set_optab_libfunc (sdiv_optab, mode, "_q_div");
19242 if (TARGET_PPC_GPOPT)
19243 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
19245 set_optab_libfunc (eq_optab, mode, "_q_feq");
19246 set_optab_libfunc (ne_optab, mode, "_q_fne");
19247 set_optab_libfunc (gt_optab, mode, "_q_fgt");
19248 set_optab_libfunc (ge_optab, mode, "_q_fge");
19249 set_optab_libfunc (lt_optab, mode, "_q_flt");
19250 set_optab_libfunc (le_optab, mode, "_q_fle");
19252 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
19253 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
19254 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
19255 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
19256 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
19257 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
19258 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
19259 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
19263 static void
19264 rs6000_init_libfuncs (void)
19266 /* __float128 support. */
19267 if (TARGET_FLOAT128_TYPE)
19269 init_float128_ibm (IFmode);
19270 init_float128_ieee (KFmode);
19273 /* AIX/Darwin/64-bit Linux quad floating point routines. */
19274 if (TARGET_LONG_DOUBLE_128)
19276 if (!TARGET_IEEEQUAD)
19277 init_float128_ibm (TFmode);
19279 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
19280 else
19281 init_float128_ieee (TFmode);
19286 /* Expand a block clear operation, and return 1 if successful. Return 0
19287 if we should let the compiler generate normal code.
19289 operands[0] is the destination
19290 operands[1] is the length
19291 operands[3] is the alignment */
19294 expand_block_clear (rtx operands[])
19296 rtx orig_dest = operands[0];
19297 rtx bytes_rtx = operands[1];
19298 rtx align_rtx = operands[3];
19299 bool constp = (GET_CODE (bytes_rtx) == CONST_INT);
19300 HOST_WIDE_INT align;
19301 HOST_WIDE_INT bytes;
19302 int offset;
19303 int clear_bytes;
19304 int clear_step;
19306 /* If this is not a fixed size move, just call memcpy */
19307 if (! constp)
19308 return 0;
19310 /* This must be a fixed size alignment */
19311 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
19312 align = INTVAL (align_rtx) * BITS_PER_UNIT;
19314 /* Anything to clear? */
19315 bytes = INTVAL (bytes_rtx);
19316 if (bytes <= 0)
19317 return 1;
19319 /* Use the builtin memset after a point, to avoid huge code bloat.
19320 When optimize_size, avoid any significant code bloat; calling
19321 memset is about 4 instructions, so allow for one instruction to
19322 load zero and three to do clearing. */
19323 if (TARGET_ALTIVEC && align >= 128)
19324 clear_step = 16;
19325 else if (TARGET_POWERPC64 && (align >= 64 || !STRICT_ALIGNMENT))
19326 clear_step = 8;
19327 else if (TARGET_SPE && align >= 64)
19328 clear_step = 8;
19329 else
19330 clear_step = 4;
19332 if (optimize_size && bytes > 3 * clear_step)
19333 return 0;
19334 if (! optimize_size && bytes > 8 * clear_step)
19335 return 0;
19337 for (offset = 0; bytes > 0; offset += clear_bytes, bytes -= clear_bytes)
19339 machine_mode mode = BLKmode;
19340 rtx dest;
19342 if (bytes >= 16 && TARGET_ALTIVEC && align >= 128)
19344 clear_bytes = 16;
19345 mode = V4SImode;
19347 else if (bytes >= 8 && TARGET_SPE && align >= 64)
19349 clear_bytes = 8;
19350 mode = V2SImode;
19352 else if (bytes >= 8 && TARGET_POWERPC64
19353 && (align >= 64 || !STRICT_ALIGNMENT))
19355 clear_bytes = 8;
19356 mode = DImode;
19357 if (offset == 0 && align < 64)
19359 rtx addr;
19361 /* If the address form is reg+offset with offset not a
19362 multiple of four, reload into reg indirect form here
19363 rather than waiting for reload. This way we get one
19364 reload, not one per store. */
19365 addr = XEXP (orig_dest, 0);
19366 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
19367 && GET_CODE (XEXP (addr, 1)) == CONST_INT
19368 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
19370 addr = copy_addr_to_reg (addr);
19371 orig_dest = replace_equiv_address (orig_dest, addr);
19375 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
19376 { /* move 4 bytes */
19377 clear_bytes = 4;
19378 mode = SImode;
19380 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
19381 { /* move 2 bytes */
19382 clear_bytes = 2;
19383 mode = HImode;
19385 else /* move 1 byte at a time */
19387 clear_bytes = 1;
19388 mode = QImode;
19391 dest = adjust_address (orig_dest, mode, offset);
19393 emit_move_insn (dest, CONST0_RTX (mode));
19396 return 1;
19399 /* Emit a potentially record-form instruction, setting DST from SRC.
19400 If DOT is 0, that is all; otherwise, set CCREG to the result of the
19401 signed comparison of DST with zero. If DOT is 1, the generated RTL
19402 doesn't care about the DST result; if DOT is 2, it does. If CCREG
19403 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
19404 a separate COMPARE. */
19406 static void
19407 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
19409 if (dot == 0)
19411 emit_move_insn (dst, src);
19412 return;
19415 if (cc_reg_not_cr0_operand (ccreg, CCmode))
19417 emit_move_insn (dst, src);
19418 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
19419 return;
19422 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
19423 if (dot == 1)
19425 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
19426 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
19428 else
19430 rtx set = gen_rtx_SET (dst, src);
19431 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
19435 /* Figure out the correct instructions to generate to load data for
19436 block compare. MODE is used for the read from memory, and
19437 data is zero extended if REG is wider than MODE. If LE code
19438 is being generated, bswap loads are used.
19440 REG is the destination register to move the data into.
19441 MEM is the memory block being read.
19442 MODE is the mode of memory to use for the read. */
19443 static void
19444 do_load_for_compare (rtx reg, rtx mem, machine_mode mode)
19446 switch (GET_MODE (reg))
19448 case DImode:
19449 switch (mode)
19451 case QImode:
19452 emit_insn (gen_zero_extendqidi2 (reg, mem));
19453 break;
19454 case HImode:
19456 rtx src = mem;
19457 if (!BYTES_BIG_ENDIAN)
19459 src = gen_reg_rtx (HImode);
19460 emit_insn (gen_bswaphi2 (src, mem));
19462 emit_insn (gen_zero_extendhidi2 (reg, src));
19463 break;
19465 case SImode:
19467 rtx src = mem;
19468 if (!BYTES_BIG_ENDIAN)
19470 src = gen_reg_rtx (SImode);
19471 emit_insn (gen_bswapsi2 (src, mem));
19473 emit_insn (gen_zero_extendsidi2 (reg, src));
19475 break;
19476 case DImode:
19477 if (!BYTES_BIG_ENDIAN)
19478 emit_insn (gen_bswapdi2 (reg, mem));
19479 else
19480 emit_insn (gen_movdi (reg, mem));
19481 break;
19482 default:
19483 gcc_unreachable ();
19485 break;
19487 case SImode:
19488 switch (mode)
19490 case QImode:
19491 emit_insn (gen_zero_extendqisi2 (reg, mem));
19492 break;
19493 case HImode:
19495 rtx src = mem;
19496 if (!BYTES_BIG_ENDIAN)
19498 src = gen_reg_rtx (HImode);
19499 emit_insn (gen_bswaphi2 (src, mem));
19501 emit_insn (gen_zero_extendhisi2 (reg, src));
19502 break;
19504 case SImode:
19505 if (!BYTES_BIG_ENDIAN)
19506 emit_insn (gen_bswapsi2 (reg, mem));
19507 else
19508 emit_insn (gen_movsi (reg, mem));
19509 break;
19510 case DImode:
19511 /* DImode is larger than the destination reg so is not expected. */
19512 gcc_unreachable ();
19513 break;
19514 default:
19515 gcc_unreachable ();
19517 break;
19518 default:
19519 gcc_unreachable ();
19520 break;
19524 /* Select the mode to be used for reading the next chunk of bytes
19525 in the compare.
19527 OFFSET is the current read offset from the beginning of the block.
19528 BYTES is the number of bytes remaining to be read.
19529 ALIGN is the minimum alignment of the memory blocks being compared in bytes.
19530 WORD_MODE_OK indicates using WORD_MODE is allowed, else SImode is
19531 the largest allowable mode. */
19532 static machine_mode
19533 select_block_compare_mode (unsigned HOST_WIDE_INT offset,
19534 unsigned HOST_WIDE_INT bytes,
19535 unsigned HOST_WIDE_INT align, bool word_mode_ok)
19537 /* First see if we can do a whole load unit
19538 as that will be more efficient than a larger load + shift. */
19540 /* If big, use biggest chunk.
19541 If exactly chunk size, use that size.
19542 If remainder can be done in one piece with shifting, do that.
19543 Do largest chunk possible without violating alignment rules. */
19545 /* The most we can read without potential page crossing. */
19546 unsigned HOST_WIDE_INT maxread = ROUND_UP (bytes, align);
19548 if (word_mode_ok && bytes >= UNITS_PER_WORD)
19549 return word_mode;
19550 else if (bytes == GET_MODE_SIZE (SImode))
19551 return SImode;
19552 else if (bytes == GET_MODE_SIZE (HImode))
19553 return HImode;
19554 else if (bytes == GET_MODE_SIZE (QImode))
19555 return QImode;
19556 else if (bytes < GET_MODE_SIZE (SImode)
19557 && offset >= GET_MODE_SIZE (SImode) - bytes)
19558 /* This matches the case were we have SImode and 3 bytes
19559 and offset >= 1 and permits us to move back one and overlap
19560 with the previous read, thus avoiding having to shift
19561 unwanted bytes off of the input. */
19562 return SImode;
19563 else if (word_mode_ok && bytes < UNITS_PER_WORD
19564 && offset >= UNITS_PER_WORD-bytes)
19565 /* Similarly, if we can use DImode it will get matched here and
19566 can do an overlapping read that ends at the end of the block. */
19567 return word_mode;
19568 else if (word_mode_ok && maxread >= UNITS_PER_WORD)
19569 /* It is safe to do all remaining in one load of largest size,
19570 possibly with a shift to get rid of unwanted bytes. */
19571 return word_mode;
19572 else if (maxread >= GET_MODE_SIZE (SImode))
19573 /* It is safe to do all remaining in one SImode load,
19574 possibly with a shift to get rid of unwanted bytes. */
19575 return SImode;
19576 else if (bytes > GET_MODE_SIZE (SImode))
19577 return SImode;
19578 else if (bytes > GET_MODE_SIZE (HImode))
19579 return HImode;
19581 /* final fallback is do one byte */
19582 return QImode;
19585 /* Compute the alignment of pointer+OFFSET where the original alignment
19586 of pointer was BASE_ALIGN. */
19587 static unsigned HOST_WIDE_INT
19588 compute_current_alignment (unsigned HOST_WIDE_INT base_align,
19589 unsigned HOST_WIDE_INT offset)
19591 if (offset == 0)
19592 return base_align;
19593 return min (base_align, offset & -offset);
19596 /* Expand a block compare operation, and return true if successful.
19597 Return false if we should let the compiler generate normal code,
19598 probably a memcmp call.
19600 OPERANDS[0] is the target (result).
19601 OPERANDS[1] is the first source.
19602 OPERANDS[2] is the second source.
19603 OPERANDS[3] is the length.
19604 OPERANDS[4] is the alignment. */
19605 bool
19606 expand_block_compare (rtx operands[])
19608 rtx target = operands[0];
19609 rtx orig_src1 = operands[1];
19610 rtx orig_src2 = operands[2];
19611 rtx bytes_rtx = operands[3];
19612 rtx align_rtx = operands[4];
19613 HOST_WIDE_INT cmp_bytes = 0;
19614 rtx src1 = orig_src1;
19615 rtx src2 = orig_src2;
19617 /* This case is complicated to handle because the subtract
19618 with carry instructions do not generate the 64-bit
19619 carry and so we must emit code to calculate it ourselves.
19620 We choose not to implement this yet. */
19621 if (TARGET_32BIT && TARGET_POWERPC64)
19622 return false;
19624 /* If this is not a fixed size compare, just call memcmp. */
19625 if (!CONST_INT_P (bytes_rtx))
19626 return false;
19628 /* This must be a fixed size alignment. */
19629 if (!CONST_INT_P (align_rtx))
19630 return false;
19632 unsigned int base_align = UINTVAL (align_rtx) / BITS_PER_UNIT;
19634 /* SLOW_UNALIGNED_ACCESS -- don't do unaligned stuff. */
19635 if (SLOW_UNALIGNED_ACCESS (word_mode, MEM_ALIGN (orig_src1))
19636 || SLOW_UNALIGNED_ACCESS (word_mode, MEM_ALIGN (orig_src2)))
19637 return false;
19639 gcc_assert (GET_MODE (target) == SImode);
19641 /* Anything to move? */
19642 unsigned HOST_WIDE_INT bytes = UINTVAL (bytes_rtx);
19643 if (bytes == 0)
19644 return true;
19646 /* The code generated for p7 and older is not faster than glibc
19647 memcmp if alignment is small and length is not short, so bail
19648 out to avoid those conditions. */
19649 if (!TARGET_EFFICIENT_OVERLAPPING_UNALIGNED
19650 && ((base_align == 1 && bytes > 16)
19651 || (base_align == 2 && bytes > 32)))
19652 return false;
19654 rtx tmp_reg_src1 = gen_reg_rtx (word_mode);
19655 rtx tmp_reg_src2 = gen_reg_rtx (word_mode);
19656 /* P7/P8 code uses cond for subfc. but P9 uses
19657 it for cmpld which needs CCUNSmode. */
19658 rtx cond;
19659 if (TARGET_P9_MISC)
19660 cond = gen_reg_rtx (CCUNSmode);
19661 else
19662 cond = gen_reg_rtx (CCmode);
19664 /* If we have an LE target without ldbrx and word_mode is DImode,
19665 then we must avoid using word_mode. */
19666 int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX
19667 && word_mode == DImode);
19669 /* Strategy phase. How many ops will this take and should we expand it? */
19671 unsigned HOST_WIDE_INT offset = 0;
19672 machine_mode load_mode =
19673 select_block_compare_mode (offset, bytes, base_align, word_mode_ok);
19674 unsigned int load_mode_size = GET_MODE_SIZE (load_mode);
19676 /* We don't want to generate too much code. */
19677 unsigned HOST_WIDE_INT max_bytes =
19678 load_mode_size * (unsigned HOST_WIDE_INT) rs6000_block_compare_inline_limit;
19679 if (!IN_RANGE (bytes, 1, max_bytes))
19680 return false;
19682 bool generate_6432_conversion = false;
19683 rtx convert_label = NULL;
19684 rtx final_label = NULL;
19686 /* Example of generated code for 18 bytes aligned 1 byte.
19687 Compiled with -fno-reorder-blocks for clarity.
19688 ldbrx 10,31,8
19689 ldbrx 9,7,8
19690 subfc. 9,9,10
19691 bne 0,.L6487
19692 addi 9,12,8
19693 addi 5,11,8
19694 ldbrx 10,0,9
19695 ldbrx 9,0,5
19696 subfc. 9,9,10
19697 bne 0,.L6487
19698 addi 9,12,16
19699 lhbrx 10,0,9
19700 addi 9,11,16
19701 lhbrx 9,0,9
19702 subf 9,9,10
19703 b .L6488
19704 .p2align 4,,15
19705 .L6487: #convert_label
19706 popcntd 9,9
19707 subfe 10,10,10
19708 or 9,9,10
19709 .L6488: #final_label
19710 extsw 10,9
19712 We start off with DImode for two blocks that jump to the DI->SI conversion
19713 if the difference is found there, then a final block of HImode that skips
19714 the DI->SI conversion. */
19716 while (bytes > 0)
19718 unsigned int align = compute_current_alignment (base_align, offset);
19719 if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
19720 load_mode = select_block_compare_mode (offset, bytes, align,
19721 word_mode_ok);
19722 else
19723 load_mode = select_block_compare_mode (0, bytes, align, word_mode_ok);
19724 load_mode_size = GET_MODE_SIZE (load_mode);
19725 if (bytes >= load_mode_size)
19726 cmp_bytes = load_mode_size;
19727 else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
19729 /* Move this load back so it doesn't go past the end.
19730 P8/P9 can do this efficiently. */
19731 unsigned int extra_bytes = load_mode_size - bytes;
19732 cmp_bytes = bytes;
19733 if (extra_bytes < offset)
19735 offset -= extra_bytes;
19736 cmp_bytes = load_mode_size;
19737 bytes = cmp_bytes;
19740 else
19741 /* P7 and earlier can't do the overlapping load trick fast,
19742 so this forces a non-overlapping load and a shift to get
19743 rid of the extra bytes. */
19744 cmp_bytes = bytes;
19746 src1 = adjust_address (orig_src1, load_mode, offset);
19747 src2 = adjust_address (orig_src2, load_mode, offset);
19749 if (!REG_P (XEXP (src1, 0)))
19751 rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
19752 src1 = replace_equiv_address (src1, src1_reg);
19754 set_mem_size (src1, cmp_bytes);
19756 if (!REG_P (XEXP (src2, 0)))
19758 rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
19759 src2 = replace_equiv_address (src2, src2_reg);
19761 set_mem_size (src2, cmp_bytes);
19763 do_load_for_compare (tmp_reg_src1, src1, load_mode);
19764 do_load_for_compare (tmp_reg_src2, src2, load_mode);
19766 if (cmp_bytes < load_mode_size)
19768 /* Shift unneeded bytes off. */
19769 rtx sh = GEN_INT (BITS_PER_UNIT * (load_mode_size - cmp_bytes));
19770 if (word_mode == DImode)
19772 emit_insn (gen_lshrdi3 (tmp_reg_src1, tmp_reg_src1, sh));
19773 emit_insn (gen_lshrdi3 (tmp_reg_src2, tmp_reg_src2, sh));
19775 else
19777 emit_insn (gen_lshrsi3 (tmp_reg_src1, tmp_reg_src1, sh));
19778 emit_insn (gen_lshrsi3 (tmp_reg_src2, tmp_reg_src2, sh));
19782 int remain = bytes - cmp_bytes;
19783 if (GET_MODE_SIZE (GET_MODE (target)) > GET_MODE_SIZE (load_mode))
19785 /* Target is larger than load size so we don't need to
19786 reduce result size. */
19788 /* We previously did a block that need 64->32 conversion but
19789 the current block does not, so a label is needed to jump
19790 to the end. */
19791 if (generate_6432_conversion && !final_label)
19792 final_label = gen_label_rtx ();
19794 if (remain > 0)
19796 /* This is not the last block, branch to the end if the result
19797 of this subtract is not zero. */
19798 if (!final_label)
19799 final_label = gen_label_rtx ();
19800 rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
19801 rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2);
19802 rtx cr = gen_reg_rtx (CCmode);
19803 rs6000_emit_dot_insn (tmp_reg_src2, tmp, 2, cr);
19804 emit_insn (gen_movsi (target,
19805 gen_lowpart (SImode, tmp_reg_src2)));
19806 rtx ne_rtx = gen_rtx_NE (VOIDmode, cr, const0_rtx);
19807 rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
19808 fin_ref, pc_rtx);
19809 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
19810 JUMP_LABEL (j) = final_label;
19811 LABEL_NUSES (final_label) += 1;
19813 else
19815 if (word_mode == DImode)
19817 emit_insn (gen_subdi3 (tmp_reg_src2, tmp_reg_src1,
19818 tmp_reg_src2));
19819 emit_insn (gen_movsi (target,
19820 gen_lowpart (SImode, tmp_reg_src2)));
19822 else
19823 emit_insn (gen_subsi3 (target, tmp_reg_src1, tmp_reg_src2));
19825 if (final_label)
19827 rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
19828 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref));
19829 JUMP_LABEL(j) = final_label;
19830 LABEL_NUSES (final_label) += 1;
19831 emit_barrier ();
19835 else
19837 /* Do we need a 64->32 conversion block? We need the 64->32
19838 conversion even if target size == load_mode size because
19839 the subtract generates one extra bit. */
19840 generate_6432_conversion = true;
19842 if (remain > 0)
19844 if (!convert_label)
19845 convert_label = gen_label_rtx ();
19847 /* Compare to zero and branch to convert_label if not zero. */
19848 rtx cvt_ref = gen_rtx_LABEL_REF (VOIDmode, convert_label);
19849 if (TARGET_P9_MISC)
19851 /* Generate a compare, and convert with a setb later. */
19852 rtx cmp = gen_rtx_COMPARE (CCUNSmode, tmp_reg_src1,
19853 tmp_reg_src2);
19854 emit_insn (gen_rtx_SET (cond, cmp));
19856 else
19857 /* Generate a subfc. and use the longer
19858 sequence for conversion. */
19859 if (TARGET_64BIT)
19860 emit_insn (gen_subfdi3_carry_dot2 (tmp_reg_src2, tmp_reg_src2,
19861 tmp_reg_src1, cond));
19862 else
19863 emit_insn (gen_subfsi3_carry_dot2 (tmp_reg_src2, tmp_reg_src2,
19864 tmp_reg_src1, cond));
19865 rtx ne_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
19866 rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
19867 cvt_ref, pc_rtx);
19868 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
19869 JUMP_LABEL(j) = convert_label;
19870 LABEL_NUSES (convert_label) += 1;
19872 else
19874 /* Just do the subtract/compare. Since this is the last block
19875 the convert code will be generated immediately following. */
19876 if (TARGET_P9_MISC)
19878 rtx cmp = gen_rtx_COMPARE (CCUNSmode, tmp_reg_src1,
19879 tmp_reg_src2);
19880 emit_insn (gen_rtx_SET (cond, cmp));
19882 else
19883 if (TARGET_64BIT)
19884 emit_insn (gen_subfdi3_carry (tmp_reg_src2, tmp_reg_src2,
19885 tmp_reg_src1));
19886 else
19887 emit_insn (gen_subfsi3_carry (tmp_reg_src2, tmp_reg_src2,
19888 tmp_reg_src1));
19892 offset += cmp_bytes;
19893 bytes -= cmp_bytes;
19896 if (generate_6432_conversion)
19898 if (convert_label)
19899 emit_label (convert_label);
19901 /* We need to produce DI result from sub, then convert to target SI
19902 while maintaining <0 / ==0 / >0 properties. This sequence works:
19903 subfc L,A,B
19904 subfe H,H,H
19905 popcntd L,L
19906 rldimi L,H,6,0
19908 This is an alternate one Segher cooked up if somebody
19909 wants to expand this for something that doesn't have popcntd:
19910 subfc L,a,b
19911 subfe H,x,x
19912 addic t,L,-1
19913 subfe v,t,L
19914 or z,v,H
19916 And finally, p9 can just do this:
19917 cmpld A,B
19918 setb r */
19920 if (TARGET_P9_MISC)
19922 emit_insn (gen_setb_unsigned (target, cond));
19924 else
19926 if (TARGET_64BIT)
19928 rtx tmp_reg_ca = gen_reg_rtx (DImode);
19929 emit_insn (gen_subfdi3_carry_in_xx (tmp_reg_ca));
19930 emit_insn (gen_popcntddi2 (tmp_reg_src2, tmp_reg_src2));
19931 emit_insn (gen_iordi3 (tmp_reg_src2, tmp_reg_src2, tmp_reg_ca));
19932 emit_insn (gen_movsi (target, gen_lowpart (SImode, tmp_reg_src2)));
19934 else
19936 rtx tmp_reg_ca = gen_reg_rtx (SImode);
19937 emit_insn (gen_subfsi3_carry_in_xx (tmp_reg_ca));
19938 emit_insn (gen_popcntdsi2 (tmp_reg_src2, tmp_reg_src2));
19939 emit_insn (gen_iorsi3 (target, tmp_reg_src2, tmp_reg_ca));
19944 if (final_label)
19945 emit_label (final_label);
19947 gcc_assert (bytes == 0);
19948 return true;
19951 /* Generate alignment check and branch code to set up for
19952 strncmp when we don't have DI alignment.
19953 STRNCMP_LABEL is the label to branch if there is a page crossing.
19954 SRC is the string pointer to be examined.
19955 BYTES is the max number of bytes to compare. */
19956 static void
19957 expand_strncmp_align_check (rtx strncmp_label, rtx src, HOST_WIDE_INT bytes)
19959 rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, strncmp_label);
19960 rtx src_check = copy_addr_to_reg (XEXP (src, 0));
19961 if (GET_MODE (src_check) == SImode)
19962 emit_insn (gen_andsi3 (src_check, src_check, GEN_INT (0xfff)));
19963 else
19964 emit_insn (gen_anddi3 (src_check, src_check, GEN_INT (0xfff)));
19965 rtx cond = gen_reg_rtx (CCmode);
19966 emit_move_insn (cond, gen_rtx_COMPARE (CCmode, src_check,
19967 GEN_INT (4096 - bytes)));
19969 rtx cmp_rtx = gen_rtx_LT (VOIDmode, cond, const0_rtx);
19971 rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
19972 pc_rtx, lab_ref);
19973 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
19974 JUMP_LABEL (j) = strncmp_label;
19975 LABEL_NUSES (strncmp_label) += 1;
19978 /* Expand a string compare operation with length, and return
19979 true if successful. Return false if we should let the
19980 compiler generate normal code, probably a strncmp call.
19982 OPERANDS[0] is the target (result).
19983 OPERANDS[1] is the first source.
19984 OPERANDS[2] is the second source.
19985 If NO_LENGTH is zero, then:
19986 OPERANDS[3] is the length.
19987 OPERANDS[4] is the alignment in bytes.
19988 If NO_LENGTH is nonzero, then:
19989 OPERANDS[3] is the alignment in bytes. */
19990 bool
19991 expand_strn_compare (rtx operands[], int no_length)
19993 rtx target = operands[0];
19994 rtx orig_src1 = operands[1];
19995 rtx orig_src2 = operands[2];
19996 rtx bytes_rtx, align_rtx;
19997 if (no_length)
19999 bytes_rtx = NULL;
20000 align_rtx = operands[3];
20002 else
20004 bytes_rtx = operands[3];
20005 align_rtx = operands[4];
20007 unsigned HOST_WIDE_INT cmp_bytes = 0;
20008 rtx src1 = orig_src1;
20009 rtx src2 = orig_src2;
20011 /* If we have a length, it must be constant. This simplifies things
20012 a bit as we don't have to generate code to check if we've exceeded
20013 the length. Later this could be expanded to handle this case. */
20014 if (!no_length && !CONST_INT_P (bytes_rtx))
20015 return false;
20017 /* This must be a fixed size alignment. */
20018 if (!CONST_INT_P (align_rtx))
20019 return false;
20021 unsigned int base_align = UINTVAL (align_rtx);
20022 int align1 = MEM_ALIGN (orig_src1) / BITS_PER_UNIT;
20023 int align2 = MEM_ALIGN (orig_src2) / BITS_PER_UNIT;
20025 /* SLOW_UNALIGNED_ACCESS -- don't do unaligned stuff. */
20026 if (SLOW_UNALIGNED_ACCESS (word_mode, align1)
20027 || SLOW_UNALIGNED_ACCESS (word_mode, align2))
20028 return false;
20030 gcc_assert (GET_MODE (target) == SImode);
20032 /* If we have an LE target without ldbrx and word_mode is DImode,
20033 then we must avoid using word_mode. */
20034 int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX
20035 && word_mode == DImode);
20037 unsigned int word_mode_size = GET_MODE_SIZE (word_mode);
20039 unsigned HOST_WIDE_INT offset = 0;
20040 unsigned HOST_WIDE_INT bytes; /* N from the strncmp args if available. */
20041 unsigned HOST_WIDE_INT compare_length; /* How much to compare inline. */
20042 if (no_length)
20043 /* Use this as a standin to determine the mode to use. */
20044 bytes = rs6000_string_compare_inline_limit * word_mode_size;
20045 else
20046 bytes = UINTVAL (bytes_rtx);
20048 machine_mode load_mode =
20049 select_block_compare_mode (offset, bytes, base_align, word_mode_ok);
20050 unsigned int load_mode_size = GET_MODE_SIZE (load_mode);
20051 compare_length = rs6000_string_compare_inline_limit * load_mode_size;
20053 /* If we have equality at the end of the last compare and we have not
20054 found the end of the string, we need to call strcmp/strncmp to
20055 compare the remainder. */
20056 bool equality_compare_rest = false;
20058 if (no_length)
20060 bytes = compare_length;
20061 equality_compare_rest = true;
20063 else
20065 if (bytes <= compare_length)
20066 compare_length = bytes;
20067 else
20068 equality_compare_rest = true;
20071 rtx result_reg = gen_reg_rtx (word_mode);
20072 rtx final_move_label = gen_label_rtx ();
20073 rtx final_label = gen_label_rtx ();
20074 rtx begin_compare_label = NULL;
20076 if (base_align < 8)
20078 /* Generate code that checks distance to 4k boundary for this case. */
20079 begin_compare_label = gen_label_rtx ();
20080 rtx strncmp_label = gen_label_rtx ();
20081 rtx jmp;
20083 /* Strncmp for power8 in glibc does this:
20084 rldicl r8,r3,0,52
20085 cmpldi cr7,r8,4096-16
20086 bgt cr7,L(pagecross) */
20088 /* Make sure that the length we use for the alignment test and
20089 the subsequent code generation are in agreement so we do not
20090 go past the length we tested for a 4k boundary crossing. */
20091 unsigned HOST_WIDE_INT align_test = compare_length;
20092 if (align_test < 8)
20094 align_test = HOST_WIDE_INT_1U << ceil_log2 (align_test);
20095 base_align = align_test;
20097 else
20099 align_test = ROUND_UP (align_test, 8);
20100 base_align = 8;
20103 if (align1 < 8)
20104 expand_strncmp_align_check (strncmp_label, src1, align_test);
20105 if (align2 < 8)
20106 expand_strncmp_align_check (strncmp_label, src2, align_test);
20108 /* Now generate the following sequence:
20109 - branch to begin_compare
20110 - strncmp_label
20111 - call to strncmp
20112 - branch to final_label
20113 - begin_compare_label */
20115 rtx cmp_ref = gen_rtx_LABEL_REF (VOIDmode, begin_compare_label);
20116 jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, cmp_ref));
20117 JUMP_LABEL (jmp) = begin_compare_label;
20118 LABEL_NUSES (begin_compare_label) += 1;
20119 emit_barrier ();
20121 emit_label (strncmp_label);
20123 if (!REG_P (XEXP (src1, 0)))
20125 rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
20126 src1 = replace_equiv_address (src1, src1_reg);
20129 if (!REG_P (XEXP (src2, 0)))
20131 rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
20132 src2 = replace_equiv_address (src2, src2_reg);
20135 if (no_length)
20137 tree fun = builtin_decl_explicit (BUILT_IN_STRCMP);
20138 emit_library_call_value (XEXP (DECL_RTL (fun), 0),
20139 target, LCT_NORMAL, GET_MODE (target), 2,
20140 force_reg (Pmode, XEXP (src1, 0)), Pmode,
20141 force_reg (Pmode, XEXP (src2, 0)), Pmode);
20143 else
20145 /* -m32 -mpowerpc64 results in word_mode being DImode even
20146 though otherwise it is 32-bit. The length arg to strncmp
20147 is a size_t which will be the same size as pointers. */
20148 rtx len_rtx;
20149 if (TARGET_64BIT)
20150 len_rtx = gen_reg_rtx (DImode);
20151 else
20152 len_rtx = gen_reg_rtx (SImode);
20154 emit_move_insn (len_rtx, bytes_rtx);
20156 tree fun = builtin_decl_explicit (BUILT_IN_STRNCMP);
20157 emit_library_call_value (XEXP (DECL_RTL (fun), 0),
20158 target, LCT_NORMAL, GET_MODE (target), 3,
20159 force_reg (Pmode, XEXP (src1, 0)), Pmode,
20160 force_reg (Pmode, XEXP (src2, 0)), Pmode,
20161 len_rtx, GET_MODE (len_rtx));
20164 rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
20165 jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref));
20166 JUMP_LABEL (jmp) = final_label;
20167 LABEL_NUSES (final_label) += 1;
20168 emit_barrier ();
20169 emit_label (begin_compare_label);
20172 rtx cleanup_label = NULL;
20173 rtx tmp_reg_src1 = gen_reg_rtx (word_mode);
20174 rtx tmp_reg_src2 = gen_reg_rtx (word_mode);
20176 /* Generate sequence of ld/ldbrx, cmpb to compare out
20177 to the length specified. */
20178 unsigned HOST_WIDE_INT bytes_to_compare = compare_length;
20179 while (bytes_to_compare > 0)
20181 /* Compare sequence:
20182 check each 8B with: ld/ld cmpd bne
20183 If equal, use rldicr/cmpb to check for zero byte.
20184 cleanup code at end:
20185 cmpb get byte that differs
20186 cmpb look for zero byte
20187 orc combine
20188 cntlzd get bit of first zero/diff byte
20189 subfic convert for rldcl use
20190 rldcl rldcl extract diff/zero byte
20191 subf subtract for final result
20193 The last compare can branch around the cleanup code if the
20194 result is zero because the strings are exactly equal. */
20195 unsigned int align = compute_current_alignment (base_align, offset);
20196 if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
20197 load_mode = select_block_compare_mode (offset, bytes_to_compare, align,
20198 word_mode_ok);
20199 else
20200 load_mode = select_block_compare_mode (0, bytes_to_compare, align,
20201 word_mode_ok);
20202 load_mode_size = GET_MODE_SIZE (load_mode);
20203 if (bytes_to_compare >= load_mode_size)
20204 cmp_bytes = load_mode_size;
20205 else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
20207 /* Move this load back so it doesn't go past the end.
20208 P8/P9 can do this efficiently. */
20209 unsigned int extra_bytes = load_mode_size - bytes_to_compare;
20210 cmp_bytes = bytes_to_compare;
20211 if (extra_bytes < offset)
20213 offset -= extra_bytes;
20214 cmp_bytes = load_mode_size;
20215 bytes_to_compare = cmp_bytes;
20218 else
20219 /* P7 and earlier can't do the overlapping load trick fast,
20220 so this forces a non-overlapping load and a shift to get
20221 rid of the extra bytes. */
20222 cmp_bytes = bytes_to_compare;
20224 src1 = adjust_address (orig_src1, load_mode, offset);
20225 src2 = adjust_address (orig_src2, load_mode, offset);
20227 if (!REG_P (XEXP (src1, 0)))
20229 rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
20230 src1 = replace_equiv_address (src1, src1_reg);
20232 set_mem_size (src1, cmp_bytes);
20234 if (!REG_P (XEXP (src2, 0)))
20236 rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
20237 src2 = replace_equiv_address (src2, src2_reg);
20239 set_mem_size (src2, cmp_bytes);
20241 do_load_for_compare (tmp_reg_src1, src1, load_mode);
20242 do_load_for_compare (tmp_reg_src2, src2, load_mode);
20244 /* We must always left-align the data we read, and
20245 clear any bytes to the right that are beyond the string.
20246 Otherwise the cmpb sequence won't produce the correct
20247 results. The beginning of the compare will be done
20248 with word_mode so will not have any extra shifts or
20249 clear rights. */
20251 if (load_mode_size < word_mode_size)
20253 /* Rotate left first. */
20254 rtx sh = GEN_INT (BITS_PER_UNIT * (word_mode_size - load_mode_size));
20255 if (word_mode == DImode)
20257 emit_insn (gen_rotldi3 (tmp_reg_src1, tmp_reg_src1, sh));
20258 emit_insn (gen_rotldi3 (tmp_reg_src2, tmp_reg_src2, sh));
20260 else
20262 emit_insn (gen_rotlsi3 (tmp_reg_src1, tmp_reg_src1, sh));
20263 emit_insn (gen_rotlsi3 (tmp_reg_src2, tmp_reg_src2, sh));
20267 if (cmp_bytes < word_mode_size)
20269 /* Now clear right. This plus the rotate can be
20270 turned into a rldicr instruction. */
20271 HOST_WIDE_INT mb = BITS_PER_UNIT * (word_mode_size - cmp_bytes);
20272 rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
20273 if (word_mode == DImode)
20275 emit_insn (gen_anddi3_mask (tmp_reg_src1, tmp_reg_src1, mask));
20276 emit_insn (gen_anddi3_mask (tmp_reg_src2, tmp_reg_src2, mask));
20278 else
20280 emit_insn (gen_andsi3_mask (tmp_reg_src1, tmp_reg_src1, mask));
20281 emit_insn (gen_andsi3_mask (tmp_reg_src2, tmp_reg_src2, mask));
20285 /* Cases to handle. A and B are chunks of the two strings.
20286 1: Not end of comparison:
20287 A != B: branch to cleanup code to compute result.
20288 A == B: check for 0 byte, next block if not found.
20289 2: End of the inline comparison:
20290 A != B: branch to cleanup code to compute result.
20291 A == B: check for 0 byte, call strcmp/strncmp
20292 3: compared requested N bytes:
20293 A == B: branch to result 0.
20294 A != B: cleanup code to compute result. */
20296 unsigned HOST_WIDE_INT remain = bytes_to_compare - cmp_bytes;
20298 rtx dst_label;
20299 if (remain > 0 || equality_compare_rest)
20301 /* Branch to cleanup code, otherwise fall through to do
20302 more compares. */
20303 if (!cleanup_label)
20304 cleanup_label = gen_label_rtx ();
20305 dst_label = cleanup_label;
20307 else
20308 /* Branch to end and produce result of 0. */
20309 dst_label = final_move_label;
20311 rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, dst_label);
20312 rtx cond = gen_reg_rtx (CCmode);
20314 /* Always produce the 0 result, it is needed if
20315 cmpb finds a 0 byte in this chunk. */
20316 rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2);
20317 rs6000_emit_dot_insn (result_reg, tmp, 1, cond);
20319 rtx cmp_rtx;
20320 if (remain == 0 && !equality_compare_rest)
20321 cmp_rtx = gen_rtx_EQ (VOIDmode, cond, const0_rtx);
20322 else
20323 cmp_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20325 rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
20326 lab_ref, pc_rtx);
20327 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
20328 JUMP_LABEL (j) = dst_label;
20329 LABEL_NUSES (dst_label) += 1;
20331 if (remain > 0 || equality_compare_rest)
20333 /* Generate a cmpb to test for a 0 byte and branch
20334 to final result if found. */
20335 rtx cmpb_zero = gen_reg_rtx (word_mode);
20336 rtx lab_ref_fin = gen_rtx_LABEL_REF (VOIDmode, final_move_label);
20337 rtx condz = gen_reg_rtx (CCmode);
20338 rtx zero_reg = gen_reg_rtx (word_mode);
20339 if (word_mode == SImode)
20341 emit_insn (gen_movsi (zero_reg, GEN_INT (0)));
20342 emit_insn (gen_cmpbsi3 (cmpb_zero, tmp_reg_src1, zero_reg));
20343 if (cmp_bytes < word_mode_size)
20345 /* Don't want to look at zero bytes past end. */
20346 HOST_WIDE_INT mb =
20347 BITS_PER_UNIT * (word_mode_size - cmp_bytes);
20348 rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
20349 emit_insn (gen_andsi3_mask (cmpb_zero, cmpb_zero, mask));
20352 else
20354 emit_insn (gen_movdi (zero_reg, GEN_INT (0)));
20355 emit_insn (gen_cmpbdi3 (cmpb_zero, tmp_reg_src1, zero_reg));
20356 if (cmp_bytes < word_mode_size)
20358 /* Don't want to look at zero bytes past end. */
20359 HOST_WIDE_INT mb =
20360 BITS_PER_UNIT * (word_mode_size - cmp_bytes);
20361 rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
20362 emit_insn (gen_anddi3_mask (cmpb_zero, cmpb_zero, mask));
20366 emit_move_insn (condz, gen_rtx_COMPARE (CCmode, cmpb_zero, zero_reg));
20367 rtx cmpnz_rtx = gen_rtx_NE (VOIDmode, condz, const0_rtx);
20368 rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmpnz_rtx,
20369 lab_ref_fin, pc_rtx);
20370 rtx j2 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
20371 JUMP_LABEL (j2) = final_move_label;
20372 LABEL_NUSES (final_move_label) += 1;
20376 offset += cmp_bytes;
20377 bytes_to_compare -= cmp_bytes;
20380 if (equality_compare_rest)
20382 /* Update pointers past what has been compared already. */
20383 src1 = adjust_address (orig_src1, load_mode, offset);
20384 src2 = adjust_address (orig_src2, load_mode, offset);
20386 if (!REG_P (XEXP (src1, 0)))
20388 rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
20389 src1 = replace_equiv_address (src1, src1_reg);
20391 set_mem_size (src1, cmp_bytes);
20393 if (!REG_P (XEXP (src2, 0)))
20395 rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
20396 src2 = replace_equiv_address (src2, src2_reg);
20398 set_mem_size (src2, cmp_bytes);
20400 /* Construct call to strcmp/strncmp to compare the rest of the string. */
20401 if (no_length)
20403 tree fun = builtin_decl_explicit (BUILT_IN_STRCMP);
20404 emit_library_call_value (XEXP (DECL_RTL (fun), 0),
20405 target, LCT_NORMAL, GET_MODE (target), 2,
20406 force_reg (Pmode, XEXP (src1, 0)), Pmode,
20407 force_reg (Pmode, XEXP (src2, 0)), Pmode);
20409 else
20411 rtx len_rtx;
20412 if (TARGET_64BIT)
20413 len_rtx = gen_reg_rtx (DImode);
20414 else
20415 len_rtx = gen_reg_rtx (SImode);
20417 emit_move_insn (len_rtx, GEN_INT (bytes - compare_length));
20418 tree fun = builtin_decl_explicit (BUILT_IN_STRNCMP);
20419 emit_library_call_value (XEXP (DECL_RTL (fun), 0),
20420 target, LCT_NORMAL, GET_MODE (target), 3,
20421 force_reg (Pmode, XEXP (src1, 0)), Pmode,
20422 force_reg (Pmode, XEXP (src2, 0)), Pmode,
20423 len_rtx, GET_MODE (len_rtx));
20426 rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
20427 rtx jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref));
20428 JUMP_LABEL (jmp) = final_label;
20429 LABEL_NUSES (final_label) += 1;
20430 emit_barrier ();
20433 if (cleanup_label)
20434 emit_label (cleanup_label);
20436 /* Generate the final sequence that identifies the differing
20437 byte and generates the final result, taking into account
20438 zero bytes:
20440 cmpb cmpb_result1, src1, src2
20441 cmpb cmpb_result2, src1, zero
20442 orc cmpb_result1, cmp_result1, cmpb_result2
20443 cntlzd get bit of first zero/diff byte
20444 addi convert for rldcl use
20445 rldcl rldcl extract diff/zero byte
20446 subf subtract for final result
20449 rtx cmpb_diff = gen_reg_rtx (word_mode);
20450 rtx cmpb_zero = gen_reg_rtx (word_mode);
20451 rtx rot_amt = gen_reg_rtx (word_mode);
20452 rtx zero_reg = gen_reg_rtx (word_mode);
20454 rtx rot1_1 = gen_reg_rtx (word_mode);
20455 rtx rot1_2 = gen_reg_rtx (word_mode);
20456 rtx rot2_1 = gen_reg_rtx (word_mode);
20457 rtx rot2_2 = gen_reg_rtx (word_mode);
20459 if (word_mode == SImode)
20461 emit_insn (gen_cmpbsi3 (cmpb_diff, tmp_reg_src1, tmp_reg_src2));
20462 emit_insn (gen_movsi (zero_reg, GEN_INT (0)));
20463 emit_insn (gen_cmpbsi3 (cmpb_zero, tmp_reg_src1, zero_reg));
20464 emit_insn (gen_one_cmplsi2 (cmpb_diff,cmpb_diff));
20465 emit_insn (gen_iorsi3 (cmpb_diff, cmpb_diff, cmpb_zero));
20466 emit_insn (gen_clzsi2 (rot_amt, cmpb_diff));
20467 emit_insn (gen_addsi3 (rot_amt, rot_amt, GEN_INT (8)));
20468 emit_insn (gen_rotlsi3 (rot1_1, tmp_reg_src1,
20469 gen_lowpart (SImode, rot_amt)));
20470 emit_insn (gen_andsi3_mask (rot1_2, rot1_1, GEN_INT (0xff)));
20471 emit_insn (gen_rotlsi3 (rot2_1, tmp_reg_src2,
20472 gen_lowpart (SImode, rot_amt)));
20473 emit_insn (gen_andsi3_mask (rot2_2, rot2_1, GEN_INT (0xff)));
20474 emit_insn (gen_subsi3 (result_reg, rot1_2, rot2_2));
20476 else
20478 emit_insn (gen_cmpbdi3 (cmpb_diff, tmp_reg_src1, tmp_reg_src2));
20479 emit_insn (gen_movdi (zero_reg, GEN_INT (0)));
20480 emit_insn (gen_cmpbdi3 (cmpb_zero, tmp_reg_src1, zero_reg));
20481 emit_insn (gen_one_cmpldi2 (cmpb_diff,cmpb_diff));
20482 emit_insn (gen_iordi3 (cmpb_diff, cmpb_diff, cmpb_zero));
20483 emit_insn (gen_clzdi2 (rot_amt, cmpb_diff));
20484 emit_insn (gen_adddi3 (rot_amt, rot_amt, GEN_INT (8)));
20485 emit_insn (gen_rotldi3 (rot1_1, tmp_reg_src1,
20486 gen_lowpart (SImode, rot_amt)));
20487 emit_insn (gen_anddi3_mask (rot1_2, rot1_1, GEN_INT (0xff)));
20488 emit_insn (gen_rotldi3 (rot2_1, tmp_reg_src2,
20489 gen_lowpart (SImode, rot_amt)));
20490 emit_insn (gen_anddi3_mask (rot2_2, rot2_1, GEN_INT (0xff)));
20491 emit_insn (gen_subdi3 (result_reg, rot1_2, rot2_2));
20494 emit_label (final_move_label);
20495 emit_insn (gen_movsi (target,
20496 gen_lowpart (SImode, result_reg)));
20497 emit_label (final_label);
20498 return true;
20501 /* Expand a block move operation, and return 1 if successful. Return 0
20502 if we should let the compiler generate normal code.
20504 operands[0] is the destination
20505 operands[1] is the source
20506 operands[2] is the length
20507 operands[3] is the alignment */
20509 #define MAX_MOVE_REG 4
20512 expand_block_move (rtx operands[])
20514 rtx orig_dest = operands[0];
20515 rtx orig_src = operands[1];
20516 rtx bytes_rtx = operands[2];
20517 rtx align_rtx = operands[3];
20518 int constp = (GET_CODE (bytes_rtx) == CONST_INT);
20519 int align;
20520 int bytes;
20521 int offset;
20522 int move_bytes;
20523 rtx stores[MAX_MOVE_REG];
20524 int num_reg = 0;
20526 /* If this is not a fixed size move, just call memcpy */
20527 if (! constp)
20528 return 0;
20530 /* This must be a fixed size alignment */
20531 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
20532 align = INTVAL (align_rtx) * BITS_PER_UNIT;
20534 /* Anything to move? */
20535 bytes = INTVAL (bytes_rtx);
20536 if (bytes <= 0)
20537 return 1;
20539 if (bytes > rs6000_block_move_inline_limit)
20540 return 0;
20542 for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes)
20544 union {
20545 rtx (*movmemsi) (rtx, rtx, rtx, rtx);
20546 rtx (*mov) (rtx, rtx);
20547 } gen_func;
20548 machine_mode mode = BLKmode;
20549 rtx src, dest;
20551 /* Altivec first, since it will be faster than a string move
20552 when it applies, and usually not significantly larger. */
20553 if (TARGET_ALTIVEC && bytes >= 16 && align >= 128)
20555 move_bytes = 16;
20556 mode = V4SImode;
20557 gen_func.mov = gen_movv4si;
20559 else if (TARGET_SPE && bytes >= 8 && align >= 64)
20561 move_bytes = 8;
20562 mode = V2SImode;
20563 gen_func.mov = gen_movv2si;
20565 else if (TARGET_STRING
20566 && bytes > 24 /* move up to 32 bytes at a time */
20567 && ! fixed_regs[5]
20568 && ! fixed_regs[6]
20569 && ! fixed_regs[7]
20570 && ! fixed_regs[8]
20571 && ! fixed_regs[9]
20572 && ! fixed_regs[10]
20573 && ! fixed_regs[11]
20574 && ! fixed_regs[12])
20576 move_bytes = (bytes > 32) ? 32 : bytes;
20577 gen_func.movmemsi = gen_movmemsi_8reg;
20579 else if (TARGET_STRING
20580 && bytes > 16 /* move up to 24 bytes at a time */
20581 && ! fixed_regs[5]
20582 && ! fixed_regs[6]
20583 && ! fixed_regs[7]
20584 && ! fixed_regs[8]
20585 && ! fixed_regs[9]
20586 && ! fixed_regs[10])
20588 move_bytes = (bytes > 24) ? 24 : bytes;
20589 gen_func.movmemsi = gen_movmemsi_6reg;
20591 else if (TARGET_STRING
20592 && bytes > 8 /* move up to 16 bytes at a time */
20593 && ! fixed_regs[5]
20594 && ! fixed_regs[6]
20595 && ! fixed_regs[7]
20596 && ! fixed_regs[8])
20598 move_bytes = (bytes > 16) ? 16 : bytes;
20599 gen_func.movmemsi = gen_movmemsi_4reg;
20601 else if (bytes >= 8 && TARGET_POWERPC64
20602 && (align >= 64 || !STRICT_ALIGNMENT))
20604 move_bytes = 8;
20605 mode = DImode;
20606 gen_func.mov = gen_movdi;
20607 if (offset == 0 && align < 64)
20609 rtx addr;
20611 /* If the address form is reg+offset with offset not a
20612 multiple of four, reload into reg indirect form here
20613 rather than waiting for reload. This way we get one
20614 reload, not one per load and/or store. */
20615 addr = XEXP (orig_dest, 0);
20616 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
20617 && GET_CODE (XEXP (addr, 1)) == CONST_INT
20618 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
20620 addr = copy_addr_to_reg (addr);
20621 orig_dest = replace_equiv_address (orig_dest, addr);
20623 addr = XEXP (orig_src, 0);
20624 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
20625 && GET_CODE (XEXP (addr, 1)) == CONST_INT
20626 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
20628 addr = copy_addr_to_reg (addr);
20629 orig_src = replace_equiv_address (orig_src, addr);
20633 else if (TARGET_STRING && bytes > 4 && !TARGET_POWERPC64)
20634 { /* move up to 8 bytes at a time */
20635 move_bytes = (bytes > 8) ? 8 : bytes;
20636 gen_func.movmemsi = gen_movmemsi_2reg;
20638 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
20639 { /* move 4 bytes */
20640 move_bytes = 4;
20641 mode = SImode;
20642 gen_func.mov = gen_movsi;
20644 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
20645 { /* move 2 bytes */
20646 move_bytes = 2;
20647 mode = HImode;
20648 gen_func.mov = gen_movhi;
20650 else if (TARGET_STRING && bytes > 1)
20651 { /* move up to 4 bytes at a time */
20652 move_bytes = (bytes > 4) ? 4 : bytes;
20653 gen_func.movmemsi = gen_movmemsi_1reg;
20655 else /* move 1 byte at a time */
20657 move_bytes = 1;
20658 mode = QImode;
20659 gen_func.mov = gen_movqi;
20662 src = adjust_address (orig_src, mode, offset);
20663 dest = adjust_address (orig_dest, mode, offset);
20665 if (mode != BLKmode)
20667 rtx tmp_reg = gen_reg_rtx (mode);
20669 emit_insn ((*gen_func.mov) (tmp_reg, src));
20670 stores[num_reg++] = (*gen_func.mov) (dest, tmp_reg);
20673 if (mode == BLKmode || num_reg >= MAX_MOVE_REG || bytes == move_bytes)
20675 int i;
20676 for (i = 0; i < num_reg; i++)
20677 emit_insn (stores[i]);
20678 num_reg = 0;
20681 if (mode == BLKmode)
20683 /* Move the address into scratch registers. The movmemsi
20684 patterns require zero offset. */
20685 if (!REG_P (XEXP (src, 0)))
20687 rtx src_reg = copy_addr_to_reg (XEXP (src, 0));
20688 src = replace_equiv_address (src, src_reg);
20690 set_mem_size (src, move_bytes);
20692 if (!REG_P (XEXP (dest, 0)))
20694 rtx dest_reg = copy_addr_to_reg (XEXP (dest, 0));
20695 dest = replace_equiv_address (dest, dest_reg);
20697 set_mem_size (dest, move_bytes);
20699 emit_insn ((*gen_func.movmemsi) (dest, src,
20700 GEN_INT (move_bytes & 31),
20701 align_rtx));
20705 return 1;
20709 /* Return a string to perform a load_multiple operation.
20710 operands[0] is the vector.
20711 operands[1] is the source address.
20712 operands[2] is the first destination register. */
20714 const char *
20715 rs6000_output_load_multiple (rtx operands[3])
20717 /* We have to handle the case where the pseudo used to contain the address
20718 is assigned to one of the output registers. */
20719 int i, j;
20720 int words = XVECLEN (operands[0], 0);
20721 rtx xop[10];
20723 if (XVECLEN (operands[0], 0) == 1)
20724 return "lwz %2,0(%1)";
20726 for (i = 0; i < words; i++)
20727 if (refers_to_regno_p (REGNO (operands[2]) + i, operands[1]))
20729 if (i == words-1)
20731 xop[0] = GEN_INT (4 * (words-1));
20732 xop[1] = operands[1];
20733 xop[2] = operands[2];
20734 output_asm_insn ("lswi %2,%1,%0\n\tlwz %1,%0(%1)", xop);
20735 return "";
20737 else if (i == 0)
20739 xop[0] = GEN_INT (4 * (words-1));
20740 xop[1] = operands[1];
20741 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
20742 output_asm_insn ("addi %1,%1,4\n\tlswi %2,%1,%0\n\tlwz %1,-4(%1)", xop);
20743 return "";
20745 else
20747 for (j = 0; j < words; j++)
20748 if (j != i)
20750 xop[0] = GEN_INT (j * 4);
20751 xop[1] = operands[1];
20752 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + j);
20753 output_asm_insn ("lwz %2,%0(%1)", xop);
20755 xop[0] = GEN_INT (i * 4);
20756 xop[1] = operands[1];
20757 output_asm_insn ("lwz %1,%0(%1)", xop);
20758 return "";
20762 return "lswi %2,%1,%N0";
20766 /* A validation routine: say whether CODE, a condition code, and MODE
20767 match. The other alternatives either don't make sense or should
20768 never be generated. */
20770 void
20771 validate_condition_mode (enum rtx_code code, machine_mode mode)
20773 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
20774 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
20775 && GET_MODE_CLASS (mode) == MODE_CC);
20777 /* These don't make sense. */
20778 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
20779 || mode != CCUNSmode);
20781 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
20782 || mode == CCUNSmode);
20784 gcc_assert (mode == CCFPmode
20785 || (code != ORDERED && code != UNORDERED
20786 && code != UNEQ && code != LTGT
20787 && code != UNGT && code != UNLT
20788 && code != UNGE && code != UNLE));
20790 /* These should never be generated except for
20791 flag_finite_math_only. */
20792 gcc_assert (mode != CCFPmode
20793 || flag_finite_math_only
20794 || (code != LE && code != GE
20795 && code != UNEQ && code != LTGT
20796 && code != UNGT && code != UNLT));
20798 /* These are invalid; the information is not there. */
20799 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
20803 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
20804 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
20805 not zero, store there the bit offset (counted from the right) where
20806 the single stretch of 1 bits begins; and similarly for B, the bit
20807 offset where it ends. */
20809 bool
20810 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
20812 unsigned HOST_WIDE_INT val = INTVAL (mask);
20813 unsigned HOST_WIDE_INT bit;
20814 int nb, ne;
20815 int n = GET_MODE_PRECISION (mode);
20817 if (mode != DImode && mode != SImode)
20818 return false;
20820 if (INTVAL (mask) >= 0)
20822 bit = val & -val;
20823 ne = exact_log2 (bit);
20824 nb = exact_log2 (val + bit);
20826 else if (val + 1 == 0)
20828 nb = n;
20829 ne = 0;
20831 else if (val & 1)
20833 val = ~val;
20834 bit = val & -val;
20835 nb = exact_log2 (bit);
20836 ne = exact_log2 (val + bit);
20838 else
20840 bit = val & -val;
20841 ne = exact_log2 (bit);
20842 if (val + bit == 0)
20843 nb = n;
20844 else
20845 nb = 0;
20848 nb--;
20850 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
20851 return false;
20853 if (b)
20854 *b = nb;
20855 if (e)
20856 *e = ne;
20858 return true;
20861 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
20862 or rldicr instruction, to implement an AND with it in mode MODE. */
20864 bool
20865 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
20867 int nb, ne;
20869 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
20870 return false;
20872 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
20873 does not wrap. */
20874 if (mode == DImode)
20875 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
20877 /* For SImode, rlwinm can do everything. */
20878 if (mode == SImode)
20879 return (nb < 32 && ne < 32);
20881 return false;
20884 /* Return the instruction template for an AND with mask in mode MODE, with
20885 operands OPERANDS. If DOT is true, make it a record-form instruction. */
20887 const char *
20888 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
20890 int nb, ne;
20892 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
20893 gcc_unreachable ();
20895 if (mode == DImode && ne == 0)
20897 operands[3] = GEN_INT (63 - nb);
20898 if (dot)
20899 return "rldicl. %0,%1,0,%3";
20900 return "rldicl %0,%1,0,%3";
20903 if (mode == DImode && nb == 63)
20905 operands[3] = GEN_INT (63 - ne);
20906 if (dot)
20907 return "rldicr. %0,%1,0,%3";
20908 return "rldicr %0,%1,0,%3";
20911 if (nb < 32 && ne < 32)
20913 operands[3] = GEN_INT (31 - nb);
20914 operands[4] = GEN_INT (31 - ne);
20915 if (dot)
20916 return "rlwinm. %0,%1,0,%3,%4";
20917 return "rlwinm %0,%1,0,%3,%4";
20920 gcc_unreachable ();
20923 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
20924 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
20925 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
20927 bool
20928 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
20930 int nb, ne;
20932 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
20933 return false;
20935 int n = GET_MODE_PRECISION (mode);
20936 int sh = -1;
20938 if (CONST_INT_P (XEXP (shift, 1)))
20940 sh = INTVAL (XEXP (shift, 1));
20941 if (sh < 0 || sh >= n)
20942 return false;
20945 rtx_code code = GET_CODE (shift);
20947 /* Convert any shift by 0 to a rotate, to simplify below code. */
20948 if (sh == 0)
20949 code = ROTATE;
20951 /* Convert rotate to simple shift if we can, to make analysis simpler. */
20952 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
20953 code = ASHIFT;
20954 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
20956 code = LSHIFTRT;
20957 sh = n - sh;
20960 /* DImode rotates need rld*. */
20961 if (mode == DImode && code == ROTATE)
20962 return (nb == 63 || ne == 0 || ne == sh);
20964 /* SImode rotates need rlw*. */
20965 if (mode == SImode && code == ROTATE)
20966 return (nb < 32 && ne < 32 && sh < 32);
20968 /* Wrap-around masks are only okay for rotates. */
20969 if (ne > nb)
20970 return false;
20972 /* Variable shifts are only okay for rotates. */
20973 if (sh < 0)
20974 return false;
20976 /* Don't allow ASHIFT if the mask is wrong for that. */
20977 if (code == ASHIFT && ne < sh)
20978 return false;
20980 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
20981 if the mask is wrong for that. */
20982 if (nb < 32 && ne < 32 && sh < 32
20983 && !(code == LSHIFTRT && nb >= 32 - sh))
20984 return true;
20986 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
20987 if the mask is wrong for that. */
20988 if (code == LSHIFTRT)
20989 sh = 64 - sh;
20990 if (nb == 63 || ne == 0 || ne == sh)
20991 return !(code == LSHIFTRT && nb >= sh);
20993 return false;
20996 /* Return the instruction template for a shift with mask in mode MODE, with
20997 operands OPERANDS. If DOT is true, make it a record-form instruction. */
20999 const char *
21000 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
21002 int nb, ne;
21004 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
21005 gcc_unreachable ();
21007 if (mode == DImode && ne == 0)
21009 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
21010 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
21011 operands[3] = GEN_INT (63 - nb);
21012 if (dot)
21013 return "rld%I2cl. %0,%1,%2,%3";
21014 return "rld%I2cl %0,%1,%2,%3";
21017 if (mode == DImode && nb == 63)
21019 operands[3] = GEN_INT (63 - ne);
21020 if (dot)
21021 return "rld%I2cr. %0,%1,%2,%3";
21022 return "rld%I2cr %0,%1,%2,%3";
21025 if (mode == DImode
21026 && GET_CODE (operands[4]) != LSHIFTRT
21027 && CONST_INT_P (operands[2])
21028 && ne == INTVAL (operands[2]))
21030 operands[3] = GEN_INT (63 - nb);
21031 if (dot)
21032 return "rld%I2c. %0,%1,%2,%3";
21033 return "rld%I2c %0,%1,%2,%3";
21036 if (nb < 32 && ne < 32)
21038 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
21039 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
21040 operands[3] = GEN_INT (31 - nb);
21041 operands[4] = GEN_INT (31 - ne);
21042 /* This insn can also be a 64-bit rotate with mask that really makes
21043 it just a shift right (with mask); the %h below are to adjust for
21044 that situation (shift count is >= 32 in that case). */
21045 if (dot)
21046 return "rlw%I2nm. %0,%1,%h2,%3,%4";
21047 return "rlw%I2nm %0,%1,%h2,%3,%4";
21050 gcc_unreachable ();
21053 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
21054 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
21055 ASHIFT, or LSHIFTRT) in mode MODE. */
21057 bool
21058 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
21060 int nb, ne;
21062 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
21063 return false;
21065 int n = GET_MODE_PRECISION (mode);
21067 int sh = INTVAL (XEXP (shift, 1));
21068 if (sh < 0 || sh >= n)
21069 return false;
21071 rtx_code code = GET_CODE (shift);
21073 /* Convert any shift by 0 to a rotate, to simplify below code. */
21074 if (sh == 0)
21075 code = ROTATE;
21077 /* Convert rotate to simple shift if we can, to make analysis simpler. */
21078 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
21079 code = ASHIFT;
21080 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
21082 code = LSHIFTRT;
21083 sh = n - sh;
21086 /* DImode rotates need rldimi. */
21087 if (mode == DImode && code == ROTATE)
21088 return (ne == sh);
21090 /* SImode rotates need rlwimi. */
21091 if (mode == SImode && code == ROTATE)
21092 return (nb < 32 && ne < 32 && sh < 32);
21094 /* Wrap-around masks are only okay for rotates. */
21095 if (ne > nb)
21096 return false;
21098 /* Don't allow ASHIFT if the mask is wrong for that. */
21099 if (code == ASHIFT && ne < sh)
21100 return false;
21102 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
21103 if the mask is wrong for that. */
21104 if (nb < 32 && ne < 32 && sh < 32
21105 && !(code == LSHIFTRT && nb >= 32 - sh))
21106 return true;
21108 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
21109 if the mask is wrong for that. */
21110 if (code == LSHIFTRT)
21111 sh = 64 - sh;
21112 if (ne == sh)
21113 return !(code == LSHIFTRT && nb >= sh);
21115 return false;
21118 /* Return the instruction template for an insert with mask in mode MODE, with
21119 operands OPERANDS. If DOT is true, make it a record-form instruction. */
21121 const char *
21122 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
21124 int nb, ne;
21126 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
21127 gcc_unreachable ();
21129 /* Prefer rldimi because rlwimi is cracked. */
21130 if (TARGET_POWERPC64
21131 && (!dot || mode == DImode)
21132 && GET_CODE (operands[4]) != LSHIFTRT
21133 && ne == INTVAL (operands[2]))
21135 operands[3] = GEN_INT (63 - nb);
21136 if (dot)
21137 return "rldimi. %0,%1,%2,%3";
21138 return "rldimi %0,%1,%2,%3";
21141 if (nb < 32 && ne < 32)
21143 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
21144 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
21145 operands[3] = GEN_INT (31 - nb);
21146 operands[4] = GEN_INT (31 - ne);
21147 if (dot)
21148 return "rlwimi. %0,%1,%2,%3,%4";
21149 return "rlwimi %0,%1,%2,%3,%4";
21152 gcc_unreachable ();
21155 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
21156 using two machine instructions. */
21158 bool
21159 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
21161 /* There are two kinds of AND we can handle with two insns:
21162 1) those we can do with two rl* insn;
21163 2) ori[s];xori[s].
21165 We do not handle that last case yet. */
21167 /* If there is just one stretch of ones, we can do it. */
21168 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
21169 return true;
21171 /* Otherwise, fill in the lowest "hole"; if we can do the result with
21172 one insn, we can do the whole thing with two. */
21173 unsigned HOST_WIDE_INT val = INTVAL (c);
21174 unsigned HOST_WIDE_INT bit1 = val & -val;
21175 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
21176 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
21177 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
21178 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
21181 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
21182 If EXPAND is true, split rotate-and-mask instructions we generate to
21183 their constituent parts as well (this is used during expand); if DOT
21184 is 1, make the last insn a record-form instruction clobbering the
21185 destination GPR and setting the CC reg (from operands[3]); if 2, set
21186 that GPR as well as the CC reg. */
21188 void
21189 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
21191 gcc_assert (!(expand && dot));
21193 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
21195 /* If it is one stretch of ones, it is DImode; shift left, mask, then
21196 shift right. This generates better code than doing the masks without
21197 shifts, or shifting first right and then left. */
21198 int nb, ne;
21199 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
21201 gcc_assert (mode == DImode);
21203 int shift = 63 - nb;
21204 if (expand)
21206 rtx tmp1 = gen_reg_rtx (DImode);
21207 rtx tmp2 = gen_reg_rtx (DImode);
21208 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
21209 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
21210 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
21212 else
21214 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
21215 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
21216 emit_move_insn (operands[0], tmp);
21217 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
21218 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
21220 return;
21223 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
21224 that does the rest. */
21225 unsigned HOST_WIDE_INT bit1 = val & -val;
21226 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
21227 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
21228 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
21230 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
21231 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
21233 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
21235 /* Two "no-rotate"-and-mask instructions, for SImode. */
21236 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
21238 gcc_assert (mode == SImode);
21240 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
21241 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
21242 emit_move_insn (reg, tmp);
21243 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
21244 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
21245 return;
21248 gcc_assert (mode == DImode);
21250 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
21251 insns; we have to do the first in SImode, because it wraps. */
21252 if (mask2 <= 0xffffffff
21253 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
21255 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
21256 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
21257 GEN_INT (mask1));
21258 rtx reg_low = gen_lowpart (SImode, reg);
21259 emit_move_insn (reg_low, tmp);
21260 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
21261 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
21262 return;
21265 /* Two rld* insns: rotate, clear the hole in the middle (which now is
21266 at the top end), rotate back and clear the other hole. */
21267 int right = exact_log2 (bit3);
21268 int left = 64 - right;
21270 /* Rotate the mask too. */
21271 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
21273 if (expand)
21275 rtx tmp1 = gen_reg_rtx (DImode);
21276 rtx tmp2 = gen_reg_rtx (DImode);
21277 rtx tmp3 = gen_reg_rtx (DImode);
21278 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
21279 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
21280 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
21281 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
21283 else
21285 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
21286 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
21287 emit_move_insn (operands[0], tmp);
21288 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
21289 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
21290 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
21294 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
21295 for lfq and stfq insns iff the registers are hard registers. */
21298 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
21300 /* We might have been passed a SUBREG. */
21301 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
21302 return 0;
21304 /* We might have been passed non floating point registers. */
21305 if (!FP_REGNO_P (REGNO (reg1))
21306 || !FP_REGNO_P (REGNO (reg2)))
21307 return 0;
21309 return (REGNO (reg1) == REGNO (reg2) - 1);
21312 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
21313 addr1 and addr2 must be in consecutive memory locations
21314 (addr2 == addr1 + 8). */
21317 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
21319 rtx addr1, addr2;
21320 unsigned int reg1, reg2;
21321 int offset1, offset2;
21323 /* The mems cannot be volatile. */
21324 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
21325 return 0;
21327 addr1 = XEXP (mem1, 0);
21328 addr2 = XEXP (mem2, 0);
21330 /* Extract an offset (if used) from the first addr. */
21331 if (GET_CODE (addr1) == PLUS)
21333 /* If not a REG, return zero. */
21334 if (GET_CODE (XEXP (addr1, 0)) != REG)
21335 return 0;
21336 else
21338 reg1 = REGNO (XEXP (addr1, 0));
21339 /* The offset must be constant! */
21340 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
21341 return 0;
21342 offset1 = INTVAL (XEXP (addr1, 1));
21345 else if (GET_CODE (addr1) != REG)
21346 return 0;
21347 else
21349 reg1 = REGNO (addr1);
21350 /* This was a simple (mem (reg)) expression. Offset is 0. */
21351 offset1 = 0;
21354 /* And now for the second addr. */
21355 if (GET_CODE (addr2) == PLUS)
21357 /* If not a REG, return zero. */
21358 if (GET_CODE (XEXP (addr2, 0)) != REG)
21359 return 0;
21360 else
21362 reg2 = REGNO (XEXP (addr2, 0));
21363 /* The offset must be constant. */
21364 if (GET_CODE (XEXP (addr2, 1)) != CONST_INT)
21365 return 0;
21366 offset2 = INTVAL (XEXP (addr2, 1));
21369 else if (GET_CODE (addr2) != REG)
21370 return 0;
21371 else
21373 reg2 = REGNO (addr2);
21374 /* This was a simple (mem (reg)) expression. Offset is 0. */
21375 offset2 = 0;
21378 /* Both of these must have the same base register. */
21379 if (reg1 != reg2)
21380 return 0;
21382 /* The offset for the second addr must be 8 more than the first addr. */
21383 if (offset2 != offset1 + 8)
21384 return 0;
21386 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
21387 instructions. */
21388 return 1;
21393 rs6000_secondary_memory_needed_rtx (machine_mode mode)
21395 static bool eliminated = false;
21396 rtx ret;
21398 if (mode != SDmode || TARGET_NO_SDMODE_STACK)
21399 ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
21400 else
21402 rtx mem = cfun->machine->sdmode_stack_slot;
21403 gcc_assert (mem != NULL_RTX);
21405 if (!eliminated)
21407 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
21408 cfun->machine->sdmode_stack_slot = mem;
21409 eliminated = true;
21411 ret = mem;
21414 if (TARGET_DEBUG_ADDR)
21416 fprintf (stderr, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
21417 GET_MODE_NAME (mode));
21418 if (!ret)
21419 fprintf (stderr, "\tNULL_RTX\n");
21420 else
21421 debug_rtx (ret);
21424 return ret;
21427 /* Return the mode to be used for memory when a secondary memory
21428 location is needed. For SDmode values we need to use DDmode, in
21429 all other cases we can use the same mode. */
21430 machine_mode
21431 rs6000_secondary_memory_needed_mode (machine_mode mode)
21433 if (lra_in_progress && mode == SDmode)
21434 return DDmode;
21435 return mode;
21438 static tree
21439 rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
21441 /* Don't walk into types. */
21442 if (*tp == NULL_TREE || *tp == error_mark_node || TYPE_P (*tp))
21444 *walk_subtrees = 0;
21445 return NULL_TREE;
21448 switch (TREE_CODE (*tp))
21450 case VAR_DECL:
21451 case PARM_DECL:
21452 case FIELD_DECL:
21453 case RESULT_DECL:
21454 case SSA_NAME:
21455 case REAL_CST:
21456 case MEM_REF:
21457 case VIEW_CONVERT_EXPR:
21458 if (TYPE_MODE (TREE_TYPE (*tp)) == SDmode)
21459 return *tp;
21460 break;
21461 default:
21462 break;
21465 return NULL_TREE;
21468 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
21469 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
21470 only work on the traditional altivec registers, note if an altivec register
21471 was chosen. */
21473 static enum rs6000_reg_type
21474 register_to_reg_type (rtx reg, bool *is_altivec)
21476 HOST_WIDE_INT regno;
21477 enum reg_class rclass;
21479 if (GET_CODE (reg) == SUBREG)
21480 reg = SUBREG_REG (reg);
21482 if (!REG_P (reg))
21483 return NO_REG_TYPE;
21485 regno = REGNO (reg);
21486 if (regno >= FIRST_PSEUDO_REGISTER)
21488 if (!lra_in_progress && !reload_in_progress && !reload_completed)
21489 return PSEUDO_REG_TYPE;
21491 regno = true_regnum (reg);
21492 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
21493 return PSEUDO_REG_TYPE;
21496 gcc_assert (regno >= 0);
21498 if (is_altivec && ALTIVEC_REGNO_P (regno))
21499 *is_altivec = true;
21501 rclass = rs6000_regno_regclass[regno];
21502 return reg_class_to_reg_type[(int)rclass];
21505 /* Helper function to return the cost of adding a TOC entry address. */
21507 static inline int
21508 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
21510 int ret;
21512 if (TARGET_CMODEL != CMODEL_SMALL)
21513 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
21515 else
21516 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
21518 return ret;
21521 /* Helper function for rs6000_secondary_reload to determine whether the memory
21522 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
21523 needs reloading. Return negative if the memory is not handled by the memory
21524 helper functions and to try a different reload method, 0 if no additional
21525 instructions are need, and positive to give the extra cost for the
21526 memory. */
21528 static int
21529 rs6000_secondary_reload_memory (rtx addr,
21530 enum reg_class rclass,
21531 machine_mode mode)
21533 int extra_cost = 0;
21534 rtx reg, and_arg, plus_arg0, plus_arg1;
21535 addr_mask_type addr_mask;
21536 const char *type = NULL;
21537 const char *fail_msg = NULL;
21539 if (GPR_REG_CLASS_P (rclass))
21540 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
21542 else if (rclass == FLOAT_REGS)
21543 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
21545 else if (rclass == ALTIVEC_REGS)
21546 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
21548 /* For the combined VSX_REGS, turn off Altivec AND -16. */
21549 else if (rclass == VSX_REGS)
21550 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
21551 & ~RELOAD_REG_AND_M16);
21553 /* If the register allocator hasn't made up its mind yet on the register
21554 class to use, settle on defaults to use. */
21555 else if (rclass == NO_REGS)
21557 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
21558 & ~RELOAD_REG_AND_M16);
21560 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
21561 addr_mask &= ~(RELOAD_REG_INDEXED
21562 | RELOAD_REG_PRE_INCDEC
21563 | RELOAD_REG_PRE_MODIFY);
21566 else
21567 addr_mask = 0;
21569 /* If the register isn't valid in this register class, just return now. */
21570 if ((addr_mask & RELOAD_REG_VALID) == 0)
21572 if (TARGET_DEBUG_ADDR)
21574 fprintf (stderr,
21575 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
21576 "not valid in class\n",
21577 GET_MODE_NAME (mode), reg_class_names[rclass]);
21578 debug_rtx (addr);
21581 return -1;
21584 switch (GET_CODE (addr))
21586 /* Does the register class supports auto update forms for this mode? We
21587 don't need a scratch register, since the powerpc only supports
21588 PRE_INC, PRE_DEC, and PRE_MODIFY. */
21589 case PRE_INC:
21590 case PRE_DEC:
21591 reg = XEXP (addr, 0);
21592 if (!base_reg_operand (addr, GET_MODE (reg)))
21594 fail_msg = "no base register #1";
21595 extra_cost = -1;
21598 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
21600 extra_cost = 1;
21601 type = "update";
21603 break;
21605 case PRE_MODIFY:
21606 reg = XEXP (addr, 0);
21607 plus_arg1 = XEXP (addr, 1);
21608 if (!base_reg_operand (reg, GET_MODE (reg))
21609 || GET_CODE (plus_arg1) != PLUS
21610 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
21612 fail_msg = "bad PRE_MODIFY";
21613 extra_cost = -1;
21616 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
21618 extra_cost = 1;
21619 type = "update";
21621 break;
21623 /* Do we need to simulate AND -16 to clear the bottom address bits used
21624 in VMX load/stores? Only allow the AND for vector sizes. */
21625 case AND:
21626 and_arg = XEXP (addr, 0);
21627 if (GET_MODE_SIZE (mode) != 16
21628 || GET_CODE (XEXP (addr, 1)) != CONST_INT
21629 || INTVAL (XEXP (addr, 1)) != -16)
21631 fail_msg = "bad Altivec AND #1";
21632 extra_cost = -1;
21635 if (rclass != ALTIVEC_REGS)
21637 if (legitimate_indirect_address_p (and_arg, false))
21638 extra_cost = 1;
21640 else if (legitimate_indexed_address_p (and_arg, false))
21641 extra_cost = 2;
21643 else
21645 fail_msg = "bad Altivec AND #2";
21646 extra_cost = -1;
21649 type = "and";
21651 break;
21653 /* If this is an indirect address, make sure it is a base register. */
21654 case REG:
21655 case SUBREG:
21656 if (!legitimate_indirect_address_p (addr, false))
21658 extra_cost = 1;
21659 type = "move";
21661 break;
21663 /* If this is an indexed address, make sure the register class can handle
21664 indexed addresses for this mode. */
21665 case PLUS:
21666 plus_arg0 = XEXP (addr, 0);
21667 plus_arg1 = XEXP (addr, 1);
21669 /* (plus (plus (reg) (constant)) (constant)) is generated during
21670 push_reload processing, so handle it now. */
21671 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
21673 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
21675 extra_cost = 1;
21676 type = "offset";
21680 /* (plus (plus (reg) (constant)) (reg)) is also generated during
21681 push_reload processing, so handle it now. */
21682 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
21684 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
21686 extra_cost = 1;
21687 type = "indexed #2";
21691 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
21693 fail_msg = "no base register #2";
21694 extra_cost = -1;
21697 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
21699 if ((addr_mask & RELOAD_REG_INDEXED) == 0
21700 || !legitimate_indexed_address_p (addr, false))
21702 extra_cost = 1;
21703 type = "indexed";
21707 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
21708 && CONST_INT_P (plus_arg1))
21710 if (!quad_address_offset_p (INTVAL (plus_arg1)))
21712 extra_cost = 1;
21713 type = "vector d-form offset";
21717 /* Make sure the register class can handle offset addresses. */
21718 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
21720 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
21722 extra_cost = 1;
21723 type = "offset #2";
21727 else
21729 fail_msg = "bad PLUS";
21730 extra_cost = -1;
21733 break;
21735 case LO_SUM:
21736 /* Quad offsets are restricted and can't handle normal addresses. */
21737 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
21739 extra_cost = -1;
21740 type = "vector d-form lo_sum";
21743 else if (!legitimate_lo_sum_address_p (mode, addr, false))
21745 fail_msg = "bad LO_SUM";
21746 extra_cost = -1;
21749 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
21751 extra_cost = 1;
21752 type = "lo_sum";
21754 break;
21756 /* Static addresses need to create a TOC entry. */
21757 case CONST:
21758 case SYMBOL_REF:
21759 case LABEL_REF:
21760 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
21762 extra_cost = -1;
21763 type = "vector d-form lo_sum #2";
21766 else
21768 type = "address";
21769 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
21771 break;
21773 /* TOC references look like offsetable memory. */
21774 case UNSPEC:
21775 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
21777 fail_msg = "bad UNSPEC";
21778 extra_cost = -1;
21781 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
21783 extra_cost = -1;
21784 type = "vector d-form lo_sum #3";
21787 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
21789 extra_cost = 1;
21790 type = "toc reference";
21792 break;
21794 default:
21796 fail_msg = "bad address";
21797 extra_cost = -1;
21801 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
21803 if (extra_cost < 0)
21804 fprintf (stderr,
21805 "rs6000_secondary_reload_memory error: mode = %s, "
21806 "class = %s, addr_mask = '%s', %s\n",
21807 GET_MODE_NAME (mode),
21808 reg_class_names[rclass],
21809 rs6000_debug_addr_mask (addr_mask, false),
21810 (fail_msg != NULL) ? fail_msg : "<bad address>");
21812 else
21813 fprintf (stderr,
21814 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
21815 "addr_mask = '%s', extra cost = %d, %s\n",
21816 GET_MODE_NAME (mode),
21817 reg_class_names[rclass],
21818 rs6000_debug_addr_mask (addr_mask, false),
21819 extra_cost,
21820 (type) ? type : "<none>");
21822 debug_rtx (addr);
21825 return extra_cost;
21828 /* Helper function for rs6000_secondary_reload to return true if a move to a
21829 different register classe is really a simple move. */
21831 static bool
21832 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
21833 enum rs6000_reg_type from_type,
21834 machine_mode mode)
21836 int size = GET_MODE_SIZE (mode);
21838 /* Add support for various direct moves available. In this function, we only
21839 look at cases where we don't need any extra registers, and one or more
21840 simple move insns are issued. Originally small integers are not allowed
21841 in FPR/VSX registers. Single precision binary floating is not a simple
21842 move because we need to convert to the single precision memory layout.
21843 The 4-byte SDmode can be moved. TDmode values are disallowed since they
21844 need special direct move handling, which we do not support yet. */
21845 if (TARGET_DIRECT_MOVE
21846 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
21847 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
21849 if (TARGET_POWERPC64)
21851 /* ISA 2.07: MTVSRD or MVFVSRD. */
21852 if (size == 8)
21853 return true;
21855 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
21856 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
21857 return true;
21860 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
21861 if (TARGET_VSX_SMALL_INTEGER)
21863 if (mode == SImode)
21864 return true;
21866 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
21867 return true;
21870 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
21871 if (mode == SDmode)
21872 return true;
21875 /* Power6+: MFTGPR or MFFGPR. */
21876 else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
21877 && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
21878 || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
21879 return true;
21881 /* Move to/from SPR. */
21882 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
21883 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
21884 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
21885 return true;
21887 return false;
21890 /* Direct move helper function for rs6000_secondary_reload, handle all of the
21891 special direct moves that involve allocating an extra register, return the
21892 insn code of the helper function if there is such a function or
21893 CODE_FOR_nothing if not. */
21895 static bool
21896 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
21897 enum rs6000_reg_type from_type,
21898 machine_mode mode,
21899 secondary_reload_info *sri,
21900 bool altivec_p)
21902 bool ret = false;
21903 enum insn_code icode = CODE_FOR_nothing;
21904 int cost = 0;
21905 int size = GET_MODE_SIZE (mode);
21907 if (TARGET_POWERPC64 && size == 16)
21909 /* Handle moving 128-bit values from GPRs to VSX point registers on
21910 ISA 2.07 (power8, power9) when running in 64-bit mode using
21911 XXPERMDI to glue the two 64-bit values back together. */
21912 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
21914 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
21915 icode = reg_addr[mode].reload_vsx_gpr;
21918 /* Handle moving 128-bit values from VSX point registers to GPRs on
21919 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
21920 bottom 64-bit value. */
21921 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
21923 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
21924 icode = reg_addr[mode].reload_gpr_vsx;
21928 else if (TARGET_POWERPC64 && mode == SFmode)
21930 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
21932 cost = 3; /* xscvdpspn, mfvsrd, and. */
21933 icode = reg_addr[mode].reload_gpr_vsx;
21936 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
21938 cost = 2; /* mtvsrz, xscvspdpn. */
21939 icode = reg_addr[mode].reload_vsx_gpr;
21943 else if (!TARGET_POWERPC64 && size == 8)
21945 /* Handle moving 64-bit values from GPRs to floating point registers on
21946 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
21947 32-bit values back together. Altivec register classes must be handled
21948 specially since a different instruction is used, and the secondary
21949 reload support requires a single instruction class in the scratch
21950 register constraint. However, right now TFmode is not allowed in
21951 Altivec registers, so the pattern will never match. */
21952 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
21954 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
21955 icode = reg_addr[mode].reload_fpr_gpr;
21959 if (icode != CODE_FOR_nothing)
21961 ret = true;
21962 if (sri)
21964 sri->icode = icode;
21965 sri->extra_cost = cost;
21969 return ret;
21972 /* Return whether a move between two register classes can be done either
21973 directly (simple move) or via a pattern that uses a single extra temporary
21974 (using ISA 2.07's direct move in this case. */
21976 static bool
21977 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
21978 enum rs6000_reg_type from_type,
21979 machine_mode mode,
21980 secondary_reload_info *sri,
21981 bool altivec_p)
21983 /* Fall back to load/store reloads if either type is not a register. */
21984 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
21985 return false;
21987 /* If we haven't allocated registers yet, assume the move can be done for the
21988 standard register types. */
21989 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
21990 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
21991 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
21992 return true;
21994 /* Moves to the same set of registers is a simple move for non-specialized
21995 registers. */
21996 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
21997 return true;
21999 /* Check whether a simple move can be done directly. */
22000 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
22002 if (sri)
22004 sri->icode = CODE_FOR_nothing;
22005 sri->extra_cost = 0;
22007 return true;
22010 /* Now check if we can do it in a few steps. */
22011 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
22012 altivec_p);
22015 /* Inform reload about cases where moving X with a mode MODE to a register in
22016 RCLASS requires an extra scratch or immediate register. Return the class
22017 needed for the immediate register.
22019 For VSX and Altivec, we may need a register to convert sp+offset into
22020 reg+sp.
22022 For misaligned 64-bit gpr loads and stores we need a register to
22023 convert an offset address to indirect. */
22025 static reg_class_t
22026 rs6000_secondary_reload (bool in_p,
22027 rtx x,
22028 reg_class_t rclass_i,
22029 machine_mode mode,
22030 secondary_reload_info *sri)
22032 enum reg_class rclass = (enum reg_class) rclass_i;
22033 reg_class_t ret = ALL_REGS;
22034 enum insn_code icode;
22035 bool default_p = false;
22036 bool done_p = false;
22038 /* Allow subreg of memory before/during reload. */
22039 bool memory_p = (MEM_P (x)
22040 || (!reload_completed && GET_CODE (x) == SUBREG
22041 && MEM_P (SUBREG_REG (x))));
22043 sri->icode = CODE_FOR_nothing;
22044 sri->t_icode = CODE_FOR_nothing;
22045 sri->extra_cost = 0;
22046 icode = ((in_p)
22047 ? reg_addr[mode].reload_load
22048 : reg_addr[mode].reload_store);
22050 if (REG_P (x) || register_operand (x, mode))
22052 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
22053 bool altivec_p = (rclass == ALTIVEC_REGS);
22054 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
22056 if (!in_p)
22057 std::swap (to_type, from_type);
22059 /* Can we do a direct move of some sort? */
22060 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
22061 altivec_p))
22063 icode = (enum insn_code)sri->icode;
22064 default_p = false;
22065 done_p = true;
22066 ret = NO_REGS;
22070 /* Make sure 0.0 is not reloaded or forced into memory. */
22071 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
22073 ret = NO_REGS;
22074 default_p = false;
22075 done_p = true;
22078 /* If this is a scalar floating point value and we want to load it into the
22079 traditional Altivec registers, do it via a move via a traditional floating
22080 point register, unless we have D-form addressing. Also make sure that
22081 non-zero constants use a FPR. */
22082 if (!done_p && reg_addr[mode].scalar_in_vmx_p
22083 && !mode_supports_vmx_dform (mode)
22084 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
22085 && (memory_p || (GET_CODE (x) == CONST_DOUBLE)))
22087 ret = FLOAT_REGS;
22088 default_p = false;
22089 done_p = true;
22092 /* Handle reload of load/stores if we have reload helper functions. */
22093 if (!done_p && icode != CODE_FOR_nothing && memory_p)
22095 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
22096 mode);
22098 if (extra_cost >= 0)
22100 done_p = true;
22101 ret = NO_REGS;
22102 if (extra_cost > 0)
22104 sri->extra_cost = extra_cost;
22105 sri->icode = icode;
22110 /* Handle unaligned loads and stores of integer registers. */
22111 if (!done_p && TARGET_POWERPC64
22112 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
22113 && memory_p
22114 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
22116 rtx addr = XEXP (x, 0);
22117 rtx off = address_offset (addr);
22119 if (off != NULL_RTX)
22121 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
22122 unsigned HOST_WIDE_INT offset = INTVAL (off);
22124 /* We need a secondary reload when our legitimate_address_p
22125 says the address is good (as otherwise the entire address
22126 will be reloaded), and the offset is not a multiple of
22127 four or we have an address wrap. Address wrap will only
22128 occur for LO_SUMs since legitimate_offset_address_p
22129 rejects addresses for 16-byte mems that will wrap. */
22130 if (GET_CODE (addr) == LO_SUM
22131 ? (1 /* legitimate_address_p allows any offset for lo_sum */
22132 && ((offset & 3) != 0
22133 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
22134 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
22135 && (offset & 3) != 0))
22137 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
22138 if (in_p)
22139 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
22140 : CODE_FOR_reload_di_load);
22141 else
22142 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
22143 : CODE_FOR_reload_di_store);
22144 sri->extra_cost = 2;
22145 ret = NO_REGS;
22146 done_p = true;
22148 else
22149 default_p = true;
22151 else
22152 default_p = true;
22155 if (!done_p && !TARGET_POWERPC64
22156 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
22157 && memory_p
22158 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
22160 rtx addr = XEXP (x, 0);
22161 rtx off = address_offset (addr);
22163 if (off != NULL_RTX)
22165 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
22166 unsigned HOST_WIDE_INT offset = INTVAL (off);
22168 /* We need a secondary reload when our legitimate_address_p
22169 says the address is good (as otherwise the entire address
22170 will be reloaded), and we have a wrap.
22172 legitimate_lo_sum_address_p allows LO_SUM addresses to
22173 have any offset so test for wrap in the low 16 bits.
22175 legitimate_offset_address_p checks for the range
22176 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
22177 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
22178 [0x7ff4,0x7fff] respectively, so test for the
22179 intersection of these ranges, [0x7ffc,0x7fff] and
22180 [0x7ff4,0x7ff7] respectively.
22182 Note that the address we see here may have been
22183 manipulated by legitimize_reload_address. */
22184 if (GET_CODE (addr) == LO_SUM
22185 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
22186 : offset - (0x8000 - extra) < UNITS_PER_WORD)
22188 if (in_p)
22189 sri->icode = CODE_FOR_reload_si_load;
22190 else
22191 sri->icode = CODE_FOR_reload_si_store;
22192 sri->extra_cost = 2;
22193 ret = NO_REGS;
22194 done_p = true;
22196 else
22197 default_p = true;
22199 else
22200 default_p = true;
22203 if (!done_p)
22204 default_p = true;
22206 if (default_p)
22207 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
22209 gcc_assert (ret != ALL_REGS);
22211 if (TARGET_DEBUG_ADDR)
22213 fprintf (stderr,
22214 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
22215 "mode = %s",
22216 reg_class_names[ret],
22217 in_p ? "true" : "false",
22218 reg_class_names[rclass],
22219 GET_MODE_NAME (mode));
22221 if (reload_completed)
22222 fputs (", after reload", stderr);
22224 if (!done_p)
22225 fputs (", done_p not set", stderr);
22227 if (default_p)
22228 fputs (", default secondary reload", stderr);
22230 if (sri->icode != CODE_FOR_nothing)
22231 fprintf (stderr, ", reload func = %s, extra cost = %d",
22232 insn_data[sri->icode].name, sri->extra_cost);
22234 else if (sri->extra_cost > 0)
22235 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
22237 fputs ("\n", stderr);
22238 debug_rtx (x);
22241 return ret;
22244 /* Better tracing for rs6000_secondary_reload_inner. */
22246 static void
22247 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
22248 bool store_p)
22250 rtx set, clobber;
22252 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
22254 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
22255 store_p ? "store" : "load");
22257 if (store_p)
22258 set = gen_rtx_SET (mem, reg);
22259 else
22260 set = gen_rtx_SET (reg, mem);
22262 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
22263 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
22266 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
22267 ATTRIBUTE_NORETURN;
22269 static void
22270 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
22271 bool store_p)
22273 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
22274 gcc_unreachable ();
22277 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
22278 reload helper functions. These were identified in
22279 rs6000_secondary_reload_memory, and if reload decided to use the secondary
22280 reload, it calls the insns:
22281 reload_<RELOAD:mode>_<P:mptrsize>_store
22282 reload_<RELOAD:mode>_<P:mptrsize>_load
22284 which in turn calls this function, to do whatever is necessary to create
22285 valid addresses. */
22287 void
22288 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
22290 int regno = true_regnum (reg);
22291 machine_mode mode = GET_MODE (reg);
22292 addr_mask_type addr_mask;
22293 rtx addr;
22294 rtx new_addr;
22295 rtx op_reg, op0, op1;
22296 rtx and_op;
22297 rtx cc_clobber;
22298 rtvec rv;
22300 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER || !MEM_P (mem)
22301 || !base_reg_operand (scratch, GET_MODE (scratch)))
22302 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22304 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
22305 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
22307 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
22308 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
22310 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
22311 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
22313 else
22314 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22316 /* Make sure the mode is valid in this register class. */
22317 if ((addr_mask & RELOAD_REG_VALID) == 0)
22318 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22320 if (TARGET_DEBUG_ADDR)
22321 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
22323 new_addr = addr = XEXP (mem, 0);
22324 switch (GET_CODE (addr))
22326 /* Does the register class support auto update forms for this mode? If
22327 not, do the update now. We don't need a scratch register, since the
22328 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
22329 case PRE_INC:
22330 case PRE_DEC:
22331 op_reg = XEXP (addr, 0);
22332 if (!base_reg_operand (op_reg, Pmode))
22333 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22335 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
22337 emit_insn (gen_add2_insn (op_reg, GEN_INT (GET_MODE_SIZE (mode))));
22338 new_addr = op_reg;
22340 break;
22342 case PRE_MODIFY:
22343 op0 = XEXP (addr, 0);
22344 op1 = XEXP (addr, 1);
22345 if (!base_reg_operand (op0, Pmode)
22346 || GET_CODE (op1) != PLUS
22347 || !rtx_equal_p (op0, XEXP (op1, 0)))
22348 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22350 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
22352 emit_insn (gen_rtx_SET (op0, op1));
22353 new_addr = reg;
22355 break;
22357 /* Do we need to simulate AND -16 to clear the bottom address bits used
22358 in VMX load/stores? */
22359 case AND:
22360 op0 = XEXP (addr, 0);
22361 op1 = XEXP (addr, 1);
22362 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
22364 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
22365 op_reg = op0;
22367 else if (GET_CODE (op1) == PLUS)
22369 emit_insn (gen_rtx_SET (scratch, op1));
22370 op_reg = scratch;
22373 else
22374 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22376 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
22377 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
22378 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
22379 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
22380 new_addr = scratch;
22382 break;
22384 /* If this is an indirect address, make sure it is a base register. */
22385 case REG:
22386 case SUBREG:
22387 if (!base_reg_operand (addr, GET_MODE (addr)))
22389 emit_insn (gen_rtx_SET (scratch, addr));
22390 new_addr = scratch;
22392 break;
22394 /* If this is an indexed address, make sure the register class can handle
22395 indexed addresses for this mode. */
22396 case PLUS:
22397 op0 = XEXP (addr, 0);
22398 op1 = XEXP (addr, 1);
22399 if (!base_reg_operand (op0, Pmode))
22400 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22402 else if (int_reg_operand (op1, Pmode))
22404 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
22406 emit_insn (gen_rtx_SET (scratch, addr));
22407 new_addr = scratch;
22411 else if (mode_supports_vsx_dform_quad (mode) && CONST_INT_P (op1))
22413 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
22414 || !quad_address_p (addr, mode, false))
22416 emit_insn (gen_rtx_SET (scratch, addr));
22417 new_addr = scratch;
22421 /* Make sure the register class can handle offset addresses. */
22422 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
22424 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
22426 emit_insn (gen_rtx_SET (scratch, addr));
22427 new_addr = scratch;
22431 else
22432 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22434 break;
22436 case LO_SUM:
22437 op0 = XEXP (addr, 0);
22438 op1 = XEXP (addr, 1);
22439 if (!base_reg_operand (op0, Pmode))
22440 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22442 else if (int_reg_operand (op1, Pmode))
22444 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
22446 emit_insn (gen_rtx_SET (scratch, addr));
22447 new_addr = scratch;
22451 /* Quad offsets are restricted and can't handle normal addresses. */
22452 else if (mode_supports_vsx_dform_quad (mode))
22454 emit_insn (gen_rtx_SET (scratch, addr));
22455 new_addr = scratch;
22458 /* Make sure the register class can handle offset addresses. */
22459 else if (legitimate_lo_sum_address_p (mode, addr, false))
22461 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
22463 emit_insn (gen_rtx_SET (scratch, addr));
22464 new_addr = scratch;
22468 else
22469 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22471 break;
22473 case SYMBOL_REF:
22474 case CONST:
22475 case LABEL_REF:
22476 rs6000_emit_move (scratch, addr, Pmode);
22477 new_addr = scratch;
22478 break;
22480 default:
22481 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22484 /* Adjust the address if it changed. */
22485 if (addr != new_addr)
22487 mem = replace_equiv_address_nv (mem, new_addr);
22488 if (TARGET_DEBUG_ADDR)
22489 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
22492 /* Now create the move. */
22493 if (store_p)
22494 emit_insn (gen_rtx_SET (mem, reg));
22495 else
22496 emit_insn (gen_rtx_SET (reg, mem));
22498 return;
22501 /* Convert reloads involving 64-bit gprs and misaligned offset
22502 addressing, or multiple 32-bit gprs and offsets that are too large,
22503 to use indirect addressing. */
22505 void
22506 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
22508 int regno = true_regnum (reg);
22509 enum reg_class rclass;
22510 rtx addr;
22511 rtx scratch_or_premodify = scratch;
22513 if (TARGET_DEBUG_ADDR)
22515 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
22516 store_p ? "store" : "load");
22517 fprintf (stderr, "reg:\n");
22518 debug_rtx (reg);
22519 fprintf (stderr, "mem:\n");
22520 debug_rtx (mem);
22521 fprintf (stderr, "scratch:\n");
22522 debug_rtx (scratch);
22525 gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
22526 gcc_assert (GET_CODE (mem) == MEM);
22527 rclass = REGNO_REG_CLASS (regno);
22528 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
22529 addr = XEXP (mem, 0);
22531 if (GET_CODE (addr) == PRE_MODIFY)
22533 gcc_assert (REG_P (XEXP (addr, 0))
22534 && GET_CODE (XEXP (addr, 1)) == PLUS
22535 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
22536 scratch_or_premodify = XEXP (addr, 0);
22537 if (!HARD_REGISTER_P (scratch_or_premodify))
22538 /* If we have a pseudo here then reload will have arranged
22539 to have it replaced, but only in the original insn.
22540 Use the replacement here too. */
22541 scratch_or_premodify = find_replacement (&XEXP (addr, 0));
22543 /* RTL emitted by rs6000_secondary_reload_gpr uses RTL
22544 expressions from the original insn, without unsharing them.
22545 Any RTL that points into the original insn will of course
22546 have register replacements applied. That is why we don't
22547 need to look for replacements under the PLUS. */
22548 addr = XEXP (addr, 1);
22550 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
22552 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
22554 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
22556 /* Now create the move. */
22557 if (store_p)
22558 emit_insn (gen_rtx_SET (mem, reg));
22559 else
22560 emit_insn (gen_rtx_SET (reg, mem));
22562 return;
22565 /* Allocate a 64-bit stack slot to be used for copying SDmode values through if
22566 this function has any SDmode references. If we are on a power7 or later, we
22567 don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
22568 can load/store the value. */
22570 static void
22571 rs6000_alloc_sdmode_stack_slot (void)
22573 tree t;
22574 basic_block bb;
22575 gimple_stmt_iterator gsi;
22577 gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
22578 /* We use a different approach for dealing with the secondary
22579 memory in LRA. */
22580 if (ira_use_lra_p)
22581 return;
22583 if (TARGET_NO_SDMODE_STACK)
22584 return;
22586 FOR_EACH_BB_FN (bb, cfun)
22587 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
22589 tree ret = walk_gimple_op (gsi_stmt (gsi), rs6000_check_sdmode, NULL);
22590 if (ret)
22592 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
22593 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
22594 SDmode, 0);
22595 return;
22599 /* Check for any SDmode parameters of the function. */
22600 for (t = DECL_ARGUMENTS (cfun->decl); t; t = DECL_CHAIN (t))
22602 if (TREE_TYPE (t) == error_mark_node)
22603 continue;
22605 if (TYPE_MODE (TREE_TYPE (t)) == SDmode
22606 || TYPE_MODE (DECL_ARG_TYPE (t)) == SDmode)
22608 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
22609 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
22610 SDmode, 0);
22611 return;
22616 static void
22617 rs6000_instantiate_decls (void)
22619 if (cfun->machine->sdmode_stack_slot != NULL_RTX)
22620 instantiate_decl_rtl (cfun->machine->sdmode_stack_slot);
22623 /* Given an rtx X being reloaded into a reg required to be
22624 in class CLASS, return the class of reg to actually use.
22625 In general this is just CLASS; but on some machines
22626 in some cases it is preferable to use a more restrictive class.
22628 On the RS/6000, we have to return NO_REGS when we want to reload a
22629 floating-point CONST_DOUBLE to force it to be copied to memory.
22631 We also don't want to reload integer values into floating-point
22632 registers if we can at all help it. In fact, this can
22633 cause reload to die, if it tries to generate a reload of CTR
22634 into a FP register and discovers it doesn't have the memory location
22635 required.
22637 ??? Would it be a good idea to have reload do the converse, that is
22638 try to reload floating modes into FP registers if possible?
22641 static enum reg_class
22642 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
22644 machine_mode mode = GET_MODE (x);
22645 bool is_constant = CONSTANT_P (x);
22647 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
22648 reload class for it. */
22649 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
22650 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
22651 return NO_REGS;
22653 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
22654 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
22655 return NO_REGS;
22657 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
22658 the reloading of address expressions using PLUS into floating point
22659 registers. */
22660 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
22662 if (is_constant)
22664 /* Zero is always allowed in all VSX registers. */
22665 if (x == CONST0_RTX (mode))
22666 return rclass;
22668 /* If this is a vector constant that can be formed with a few Altivec
22669 instructions, we want altivec registers. */
22670 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
22671 return ALTIVEC_REGS;
22673 /* If this is an integer constant that can easily be loaded into
22674 vector registers, allow it. */
22675 if (CONST_INT_P (x))
22677 HOST_WIDE_INT value = INTVAL (x);
22679 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
22680 2.06 can generate it in the Altivec registers with
22681 VSPLTI<x>. */
22682 if (value == -1)
22684 if (TARGET_P8_VECTOR)
22685 return rclass;
22686 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
22687 return ALTIVEC_REGS;
22688 else
22689 return NO_REGS;
22692 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
22693 a sign extend in the Altivec registers. */
22694 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
22695 && TARGET_VSX_SMALL_INTEGER
22696 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
22697 return ALTIVEC_REGS;
22700 /* Force constant to memory. */
22701 return NO_REGS;
22704 /* D-form addressing can easily reload the value. */
22705 if (mode_supports_vmx_dform (mode)
22706 || mode_supports_vsx_dform_quad (mode))
22707 return rclass;
22709 /* If this is a scalar floating point value and we don't have D-form
22710 addressing, prefer the traditional floating point registers so that we
22711 can use D-form (register+offset) addressing. */
22712 if (rclass == VSX_REGS
22713 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
22714 return FLOAT_REGS;
22716 /* Prefer the Altivec registers if Altivec is handling the vector
22717 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
22718 loads. */
22719 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
22720 || mode == V1TImode)
22721 return ALTIVEC_REGS;
22723 return rclass;
22726 if (is_constant || GET_CODE (x) == PLUS)
22728 if (reg_class_subset_p (GENERAL_REGS, rclass))
22729 return GENERAL_REGS;
22730 if (reg_class_subset_p (BASE_REGS, rclass))
22731 return BASE_REGS;
22732 return NO_REGS;
22735 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
22736 return GENERAL_REGS;
22738 return rclass;
22741 /* Debug version of rs6000_preferred_reload_class. */
22742 static enum reg_class
22743 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
22745 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
22747 fprintf (stderr,
22748 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
22749 "mode = %s, x:\n",
22750 reg_class_names[ret], reg_class_names[rclass],
22751 GET_MODE_NAME (GET_MODE (x)));
22752 debug_rtx (x);
22754 return ret;
22757 /* If we are copying between FP or AltiVec registers and anything else, we need
22758 a memory location. The exception is when we are targeting ppc64 and the
22759 move to/from fpr to gpr instructions are available. Also, under VSX, you
22760 can copy vector registers from the FP register set to the Altivec register
22761 set and vice versa. */
22763 static bool
22764 rs6000_secondary_memory_needed (enum reg_class from_class,
22765 enum reg_class to_class,
22766 machine_mode mode)
22768 enum rs6000_reg_type from_type, to_type;
22769 bool altivec_p = ((from_class == ALTIVEC_REGS)
22770 || (to_class == ALTIVEC_REGS));
22772 /* If a simple/direct move is available, we don't need secondary memory */
22773 from_type = reg_class_to_reg_type[(int)from_class];
22774 to_type = reg_class_to_reg_type[(int)to_class];
22776 if (rs6000_secondary_reload_move (to_type, from_type, mode,
22777 (secondary_reload_info *)0, altivec_p))
22778 return false;
22780 /* If we have a floating point or vector register class, we need to use
22781 memory to transfer the data. */
22782 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
22783 return true;
22785 return false;
22788 /* Debug version of rs6000_secondary_memory_needed. */
22789 static bool
22790 rs6000_debug_secondary_memory_needed (enum reg_class from_class,
22791 enum reg_class to_class,
22792 machine_mode mode)
22794 bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
22796 fprintf (stderr,
22797 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
22798 "to_class = %s, mode = %s\n",
22799 ret ? "true" : "false",
22800 reg_class_names[from_class],
22801 reg_class_names[to_class],
22802 GET_MODE_NAME (mode));
22804 return ret;
22807 /* Return the register class of a scratch register needed to copy IN into
22808 or out of a register in RCLASS in MODE. If it can be done directly,
22809 NO_REGS is returned. */
22811 static enum reg_class
22812 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
22813 rtx in)
22815 int regno;
22817 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
22818 #if TARGET_MACHO
22819 && MACHOPIC_INDIRECT
22820 #endif
22823 /* We cannot copy a symbolic operand directly into anything
22824 other than BASE_REGS for TARGET_ELF. So indicate that a
22825 register from BASE_REGS is needed as an intermediate
22826 register.
22828 On Darwin, pic addresses require a load from memory, which
22829 needs a base register. */
22830 if (rclass != BASE_REGS
22831 && (GET_CODE (in) == SYMBOL_REF
22832 || GET_CODE (in) == HIGH
22833 || GET_CODE (in) == LABEL_REF
22834 || GET_CODE (in) == CONST))
22835 return BASE_REGS;
22838 if (GET_CODE (in) == REG)
22840 regno = REGNO (in);
22841 if (regno >= FIRST_PSEUDO_REGISTER)
22843 regno = true_regnum (in);
22844 if (regno >= FIRST_PSEUDO_REGISTER)
22845 regno = -1;
22848 else if (GET_CODE (in) == SUBREG)
22850 regno = true_regnum (in);
22851 if (regno >= FIRST_PSEUDO_REGISTER)
22852 regno = -1;
22854 else
22855 regno = -1;
22857 /* If we have VSX register moves, prefer moving scalar values between
22858 Altivec registers and GPR by going via an FPR (and then via memory)
22859 instead of reloading the secondary memory address for Altivec moves. */
22860 if (TARGET_VSX
22861 && GET_MODE_SIZE (mode) < 16
22862 && !mode_supports_vmx_dform (mode)
22863 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
22864 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
22865 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
22866 && (regno >= 0 && INT_REGNO_P (regno)))))
22867 return FLOAT_REGS;
22869 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
22870 into anything. */
22871 if (rclass == GENERAL_REGS || rclass == BASE_REGS
22872 || (regno >= 0 && INT_REGNO_P (regno)))
22873 return NO_REGS;
22875 /* Constants, memory, and VSX registers can go into VSX registers (both the
22876 traditional floating point and the altivec registers). */
22877 if (rclass == VSX_REGS
22878 && (regno == -1 || VSX_REGNO_P (regno)))
22879 return NO_REGS;
22881 /* Constants, memory, and FP registers can go into FP registers. */
22882 if ((regno == -1 || FP_REGNO_P (regno))
22883 && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
22884 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
22886 /* Memory, and AltiVec registers can go into AltiVec registers. */
22887 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
22888 && rclass == ALTIVEC_REGS)
22889 return NO_REGS;
22891 /* We can copy among the CR registers. */
22892 if ((rclass == CR_REGS || rclass == CR0_REGS)
22893 && regno >= 0 && CR_REGNO_P (regno))
22894 return NO_REGS;
22896 /* Otherwise, we need GENERAL_REGS. */
22897 return GENERAL_REGS;
22900 /* Debug version of rs6000_secondary_reload_class. */
22901 static enum reg_class
22902 rs6000_debug_secondary_reload_class (enum reg_class rclass,
22903 machine_mode mode, rtx in)
22905 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
22906 fprintf (stderr,
22907 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
22908 "mode = %s, input rtx:\n",
22909 reg_class_names[ret], reg_class_names[rclass],
22910 GET_MODE_NAME (mode));
22911 debug_rtx (in);
22913 return ret;
22916 /* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
22918 static bool
22919 rs6000_cannot_change_mode_class (machine_mode from,
22920 machine_mode to,
22921 enum reg_class rclass)
22923 unsigned from_size = GET_MODE_SIZE (from);
22924 unsigned to_size = GET_MODE_SIZE (to);
22926 if (from_size != to_size)
22928 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
22930 if (reg_classes_intersect_p (xclass, rclass))
22932 unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to];
22933 unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from];
22934 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
22935 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
22937 /* Don't allow 64-bit types to overlap with 128-bit types that take a
22938 single register under VSX because the scalar part of the register
22939 is in the upper 64-bits, and not the lower 64-bits. Types like
22940 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
22941 IEEE floating point can't overlap, and neither can small
22942 values. */
22944 if (to_float128_vector_p && from_float128_vector_p)
22945 return false;
22947 else if (to_float128_vector_p || from_float128_vector_p)
22948 return true;
22950 /* TDmode in floating-mode registers must always go into a register
22951 pair with the most significant word in the even-numbered register
22952 to match ISA requirements. In little-endian mode, this does not
22953 match subreg numbering, so we cannot allow subregs. */
22954 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
22955 return true;
22957 if (from_size < 8 || to_size < 8)
22958 return true;
22960 if (from_size == 8 && (8 * to_nregs) != to_size)
22961 return true;
22963 if (to_size == 8 && (8 * from_nregs) != from_size)
22964 return true;
22966 return false;
22968 else
22969 return false;
22972 if (TARGET_E500_DOUBLE
22973 && ((((to) == DFmode) + ((from) == DFmode)) == 1
22974 || (((to) == TFmode) + ((from) == TFmode)) == 1
22975 || (((to) == IFmode) + ((from) == IFmode)) == 1
22976 || (((to) == KFmode) + ((from) == KFmode)) == 1
22977 || (((to) == DDmode) + ((from) == DDmode)) == 1
22978 || (((to) == TDmode) + ((from) == TDmode)) == 1
22979 || (((to) == DImode) + ((from) == DImode)) == 1))
22980 return true;
22982 /* Since the VSX register set includes traditional floating point registers
22983 and altivec registers, just check for the size being different instead of
22984 trying to check whether the modes are vector modes. Otherwise it won't
22985 allow say DF and DI to change classes. For types like TFmode and TDmode
22986 that take 2 64-bit registers, rather than a single 128-bit register, don't
22987 allow subregs of those types to other 128 bit types. */
22988 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
22990 unsigned num_regs = (from_size + 15) / 16;
22991 if (hard_regno_nregs[FIRST_FPR_REGNO][to] > num_regs
22992 || hard_regno_nregs[FIRST_FPR_REGNO][from] > num_regs)
22993 return true;
22995 return (from_size != 8 && from_size != 16);
22998 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
22999 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
23000 return true;
23002 if (TARGET_SPE && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1
23003 && reg_classes_intersect_p (GENERAL_REGS, rclass))
23004 return true;
23006 return false;
23009 /* Debug version of rs6000_cannot_change_mode_class. */
23010 static bool
23011 rs6000_debug_cannot_change_mode_class (machine_mode from,
23012 machine_mode to,
23013 enum reg_class rclass)
23015 bool ret = rs6000_cannot_change_mode_class (from, to, rclass);
23017 fprintf (stderr,
23018 "rs6000_cannot_change_mode_class, return %s, from = %s, "
23019 "to = %s, rclass = %s\n",
23020 ret ? "true" : "false",
23021 GET_MODE_NAME (from), GET_MODE_NAME (to),
23022 reg_class_names[rclass]);
23024 return ret;
23027 /* Return a string to do a move operation of 128 bits of data. */
23029 const char *
23030 rs6000_output_move_128bit (rtx operands[])
23032 rtx dest = operands[0];
23033 rtx src = operands[1];
23034 machine_mode mode = GET_MODE (dest);
23035 int dest_regno;
23036 int src_regno;
23037 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
23038 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
23040 if (REG_P (dest))
23042 dest_regno = REGNO (dest);
23043 dest_gpr_p = INT_REGNO_P (dest_regno);
23044 dest_fp_p = FP_REGNO_P (dest_regno);
23045 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
23046 dest_vsx_p = dest_fp_p | dest_vmx_p;
23048 else
23050 dest_regno = -1;
23051 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
23054 if (REG_P (src))
23056 src_regno = REGNO (src);
23057 src_gpr_p = INT_REGNO_P (src_regno);
23058 src_fp_p = FP_REGNO_P (src_regno);
23059 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
23060 src_vsx_p = src_fp_p | src_vmx_p;
23062 else
23064 src_regno = -1;
23065 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
23068 /* Register moves. */
23069 if (dest_regno >= 0 && src_regno >= 0)
23071 if (dest_gpr_p)
23073 if (src_gpr_p)
23074 return "#";
23076 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
23077 return (WORDS_BIG_ENDIAN
23078 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
23079 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
23081 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
23082 return "#";
23085 else if (TARGET_VSX && dest_vsx_p)
23087 if (src_vsx_p)
23088 return "xxlor %x0,%x1,%x1";
23090 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
23091 return (WORDS_BIG_ENDIAN
23092 ? "mtvsrdd %x0,%1,%L1"
23093 : "mtvsrdd %x0,%L1,%1");
23095 else if (TARGET_DIRECT_MOVE && src_gpr_p)
23096 return "#";
23099 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
23100 return "vor %0,%1,%1";
23102 else if (dest_fp_p && src_fp_p)
23103 return "#";
23106 /* Loads. */
23107 else if (dest_regno >= 0 && MEM_P (src))
23109 if (dest_gpr_p)
23111 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
23112 return "lq %0,%1";
23113 else
23114 return "#";
23117 else if (TARGET_ALTIVEC && dest_vmx_p
23118 && altivec_indexed_or_indirect_operand (src, mode))
23119 return "lvx %0,%y1";
23121 else if (TARGET_VSX && dest_vsx_p)
23123 if (mode_supports_vsx_dform_quad (mode)
23124 && quad_address_p (XEXP (src, 0), mode, true))
23125 return "lxv %x0,%1";
23127 else if (TARGET_P9_VECTOR)
23128 return "lxvx %x0,%y1";
23130 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
23131 return "lxvw4x %x0,%y1";
23133 else
23134 return "lxvd2x %x0,%y1";
23137 else if (TARGET_ALTIVEC && dest_vmx_p)
23138 return "lvx %0,%y1";
23140 else if (dest_fp_p)
23141 return "#";
23144 /* Stores. */
23145 else if (src_regno >= 0 && MEM_P (dest))
23147 if (src_gpr_p)
23149 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
23150 return "stq %1,%0";
23151 else
23152 return "#";
23155 else if (TARGET_ALTIVEC && src_vmx_p
23156 && altivec_indexed_or_indirect_operand (src, mode))
23157 return "stvx %1,%y0";
23159 else if (TARGET_VSX && src_vsx_p)
23161 if (mode_supports_vsx_dform_quad (mode)
23162 && quad_address_p (XEXP (dest, 0), mode, true))
23163 return "stxv %x1,%0";
23165 else if (TARGET_P9_VECTOR)
23166 return "stxvx %x1,%y0";
23168 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
23169 return "stxvw4x %x1,%y0";
23171 else
23172 return "stxvd2x %x1,%y0";
23175 else if (TARGET_ALTIVEC && src_vmx_p)
23176 return "stvx %1,%y0";
23178 else if (src_fp_p)
23179 return "#";
23182 /* Constants. */
23183 else if (dest_regno >= 0
23184 && (GET_CODE (src) == CONST_INT
23185 || GET_CODE (src) == CONST_WIDE_INT
23186 || GET_CODE (src) == CONST_DOUBLE
23187 || GET_CODE (src) == CONST_VECTOR))
23189 if (dest_gpr_p)
23190 return "#";
23192 else if ((dest_vmx_p && TARGET_ALTIVEC)
23193 || (dest_vsx_p && TARGET_VSX))
23194 return output_vec_const_move (operands);
23197 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
23200 /* Validate a 128-bit move. */
23201 bool
23202 rs6000_move_128bit_ok_p (rtx operands[])
23204 machine_mode mode = GET_MODE (operands[0]);
23205 return (gpc_reg_operand (operands[0], mode)
23206 || gpc_reg_operand (operands[1], mode));
23209 /* Return true if a 128-bit move needs to be split. */
23210 bool
23211 rs6000_split_128bit_ok_p (rtx operands[])
23213 if (!reload_completed)
23214 return false;
23216 if (!gpr_or_gpr_p (operands[0], operands[1]))
23217 return false;
23219 if (quad_load_store_p (operands[0], operands[1]))
23220 return false;
23222 return true;
23226 /* Given a comparison operation, return the bit number in CCR to test. We
23227 know this is a valid comparison.
23229 SCC_P is 1 if this is for an scc. That means that %D will have been
23230 used instead of %C, so the bits will be in different places.
23232 Return -1 if OP isn't a valid comparison for some reason. */
23235 ccr_bit (rtx op, int scc_p)
23237 enum rtx_code code = GET_CODE (op);
23238 machine_mode cc_mode;
23239 int cc_regnum;
23240 int base_bit;
23241 rtx reg;
23243 if (!COMPARISON_P (op))
23244 return -1;
23246 reg = XEXP (op, 0);
23248 gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg)));
23250 cc_mode = GET_MODE (reg);
23251 cc_regnum = REGNO (reg);
23252 base_bit = 4 * (cc_regnum - CR0_REGNO);
23254 validate_condition_mode (code, cc_mode);
23256 /* When generating a sCOND operation, only positive conditions are
23257 allowed. */
23258 gcc_assert (!scc_p
23259 || code == EQ || code == GT || code == LT || code == UNORDERED
23260 || code == GTU || code == LTU);
23262 switch (code)
23264 case NE:
23265 return scc_p ? base_bit + 3 : base_bit + 2;
23266 case EQ:
23267 return base_bit + 2;
23268 case GT: case GTU: case UNLE:
23269 return base_bit + 1;
23270 case LT: case LTU: case UNGE:
23271 return base_bit;
23272 case ORDERED: case UNORDERED:
23273 return base_bit + 3;
23275 case GE: case GEU:
23276 /* If scc, we will have done a cror to put the bit in the
23277 unordered position. So test that bit. For integer, this is ! LT
23278 unless this is an scc insn. */
23279 return scc_p ? base_bit + 3 : base_bit;
23281 case LE: case LEU:
23282 return scc_p ? base_bit + 3 : base_bit + 1;
23284 default:
23285 gcc_unreachable ();
23289 /* Return the GOT register. */
23292 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
23294 /* The second flow pass currently (June 1999) can't update
23295 regs_ever_live without disturbing other parts of the compiler, so
23296 update it here to make the prolog/epilogue code happy. */
23297 if (!can_create_pseudo_p ()
23298 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
23299 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
23301 crtl->uses_pic_offset_table = 1;
23303 return pic_offset_table_rtx;
23306 static rs6000_stack_t stack_info;
23308 /* Function to init struct machine_function.
23309 This will be called, via a pointer variable,
23310 from push_function_context. */
23312 static struct machine_function *
23313 rs6000_init_machine_status (void)
23315 stack_info.reload_completed = 0;
23316 return ggc_cleared_alloc<machine_function> ();
23319 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
23321 /* Write out a function code label. */
23323 void
23324 rs6000_output_function_entry (FILE *file, const char *fname)
23326 if (fname[0] != '.')
23328 switch (DEFAULT_ABI)
23330 default:
23331 gcc_unreachable ();
23333 case ABI_AIX:
23334 if (DOT_SYMBOLS)
23335 putc ('.', file);
23336 else
23337 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
23338 break;
23340 case ABI_ELFv2:
23341 case ABI_V4:
23342 case ABI_DARWIN:
23343 break;
23347 RS6000_OUTPUT_BASENAME (file, fname);
23350 /* Print an operand. Recognize special options, documented below. */
23352 #if TARGET_ELF
23353 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
23354 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
23355 #else
23356 #define SMALL_DATA_RELOC "sda21"
23357 #define SMALL_DATA_REG 0
23358 #endif
23360 void
23361 print_operand (FILE *file, rtx x, int code)
23363 int i;
23364 unsigned HOST_WIDE_INT uval;
23366 switch (code)
23368 /* %a is output_address. */
23370 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
23371 output_operand. */
23373 case 'D':
23374 /* Like 'J' but get to the GT bit only. */
23375 gcc_assert (REG_P (x));
23377 /* Bit 1 is GT bit. */
23378 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
23380 /* Add one for shift count in rlinm for scc. */
23381 fprintf (file, "%d", i + 1);
23382 return;
23384 case 'e':
23385 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
23386 if (! INT_P (x))
23388 output_operand_lossage ("invalid %%e value");
23389 return;
23392 uval = INTVAL (x);
23393 if ((uval & 0xffff) == 0 && uval != 0)
23394 putc ('s', file);
23395 return;
23397 case 'E':
23398 /* X is a CR register. Print the number of the EQ bit of the CR */
23399 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
23400 output_operand_lossage ("invalid %%E value");
23401 else
23402 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
23403 return;
23405 case 'f':
23406 /* X is a CR register. Print the shift count needed to move it
23407 to the high-order four bits. */
23408 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
23409 output_operand_lossage ("invalid %%f value");
23410 else
23411 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
23412 return;
23414 case 'F':
23415 /* Similar, but print the count for the rotate in the opposite
23416 direction. */
23417 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
23418 output_operand_lossage ("invalid %%F value");
23419 else
23420 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
23421 return;
23423 case 'G':
23424 /* X is a constant integer. If it is negative, print "m",
23425 otherwise print "z". This is to make an aze or ame insn. */
23426 if (GET_CODE (x) != CONST_INT)
23427 output_operand_lossage ("invalid %%G value");
23428 else if (INTVAL (x) >= 0)
23429 putc ('z', file);
23430 else
23431 putc ('m', file);
23432 return;
23434 case 'h':
23435 /* If constant, output low-order five bits. Otherwise, write
23436 normally. */
23437 if (INT_P (x))
23438 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
23439 else
23440 print_operand (file, x, 0);
23441 return;
23443 case 'H':
23444 /* If constant, output low-order six bits. Otherwise, write
23445 normally. */
23446 if (INT_P (x))
23447 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
23448 else
23449 print_operand (file, x, 0);
23450 return;
23452 case 'I':
23453 /* Print `i' if this is a constant, else nothing. */
23454 if (INT_P (x))
23455 putc ('i', file);
23456 return;
23458 case 'j':
23459 /* Write the bit number in CCR for jump. */
23460 i = ccr_bit (x, 0);
23461 if (i == -1)
23462 output_operand_lossage ("invalid %%j code");
23463 else
23464 fprintf (file, "%d", i);
23465 return;
23467 case 'J':
23468 /* Similar, but add one for shift count in rlinm for scc and pass
23469 scc flag to `ccr_bit'. */
23470 i = ccr_bit (x, 1);
23471 if (i == -1)
23472 output_operand_lossage ("invalid %%J code");
23473 else
23474 /* If we want bit 31, write a shift count of zero, not 32. */
23475 fprintf (file, "%d", i == 31 ? 0 : i + 1);
23476 return;
23478 case 'k':
23479 /* X must be a constant. Write the 1's complement of the
23480 constant. */
23481 if (! INT_P (x))
23482 output_operand_lossage ("invalid %%k value");
23483 else
23484 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
23485 return;
23487 case 'K':
23488 /* X must be a symbolic constant on ELF. Write an
23489 expression suitable for an 'addi' that adds in the low 16
23490 bits of the MEM. */
23491 if (GET_CODE (x) == CONST)
23493 if (GET_CODE (XEXP (x, 0)) != PLUS
23494 || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
23495 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
23496 || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
23497 output_operand_lossage ("invalid %%K value");
23499 print_operand_address (file, x);
23500 fputs ("@l", file);
23501 return;
23503 /* %l is output_asm_label. */
23505 case 'L':
23506 /* Write second word of DImode or DFmode reference. Works on register
23507 or non-indexed memory only. */
23508 if (REG_P (x))
23509 fputs (reg_names[REGNO (x) + 1], file);
23510 else if (MEM_P (x))
23512 machine_mode mode = GET_MODE (x);
23513 /* Handle possible auto-increment. Since it is pre-increment and
23514 we have already done it, we can just use an offset of word. */
23515 if (GET_CODE (XEXP (x, 0)) == PRE_INC
23516 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
23517 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
23518 UNITS_PER_WORD));
23519 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
23520 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
23521 UNITS_PER_WORD));
23522 else
23523 output_address (mode, XEXP (adjust_address_nv (x, SImode,
23524 UNITS_PER_WORD),
23525 0));
23527 if (small_data_operand (x, GET_MODE (x)))
23528 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
23529 reg_names[SMALL_DATA_REG]);
23531 return;
23533 case 'N':
23534 /* Write the number of elements in the vector times 4. */
23535 if (GET_CODE (x) != PARALLEL)
23536 output_operand_lossage ("invalid %%N value");
23537 else
23538 fprintf (file, "%d", XVECLEN (x, 0) * 4);
23539 return;
23541 case 'O':
23542 /* Similar, but subtract 1 first. */
23543 if (GET_CODE (x) != PARALLEL)
23544 output_operand_lossage ("invalid %%O value");
23545 else
23546 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
23547 return;
23549 case 'p':
23550 /* X is a CONST_INT that is a power of two. Output the logarithm. */
23551 if (! INT_P (x)
23552 || INTVAL (x) < 0
23553 || (i = exact_log2 (INTVAL (x))) < 0)
23554 output_operand_lossage ("invalid %%p value");
23555 else
23556 fprintf (file, "%d", i);
23557 return;
23559 case 'P':
23560 /* The operand must be an indirect memory reference. The result
23561 is the register name. */
23562 if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG
23563 || REGNO (XEXP (x, 0)) >= 32)
23564 output_operand_lossage ("invalid %%P value");
23565 else
23566 fputs (reg_names[REGNO (XEXP (x, 0))], file);
23567 return;
23569 case 'q':
23570 /* This outputs the logical code corresponding to a boolean
23571 expression. The expression may have one or both operands
23572 negated (if one, only the first one). For condition register
23573 logical operations, it will also treat the negated
23574 CR codes as NOTs, but not handle NOTs of them. */
23576 const char *const *t = 0;
23577 const char *s;
23578 enum rtx_code code = GET_CODE (x);
23579 static const char * const tbl[3][3] = {
23580 { "and", "andc", "nor" },
23581 { "or", "orc", "nand" },
23582 { "xor", "eqv", "xor" } };
23584 if (code == AND)
23585 t = tbl[0];
23586 else if (code == IOR)
23587 t = tbl[1];
23588 else if (code == XOR)
23589 t = tbl[2];
23590 else
23591 output_operand_lossage ("invalid %%q value");
23593 if (GET_CODE (XEXP (x, 0)) != NOT)
23594 s = t[0];
23595 else
23597 if (GET_CODE (XEXP (x, 1)) == NOT)
23598 s = t[2];
23599 else
23600 s = t[1];
23603 fputs (s, file);
23605 return;
23607 case 'Q':
23608 if (! TARGET_MFCRF)
23609 return;
23610 fputc (',', file);
23611 /* FALLTHRU */
23613 case 'R':
23614 /* X is a CR register. Print the mask for `mtcrf'. */
23615 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
23616 output_operand_lossage ("invalid %%R value");
23617 else
23618 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
23619 return;
23621 case 's':
23622 /* Low 5 bits of 32 - value */
23623 if (! INT_P (x))
23624 output_operand_lossage ("invalid %%s value");
23625 else
23626 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
23627 return;
23629 case 't':
23630 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
23631 gcc_assert (REG_P (x) && GET_MODE (x) == CCmode);
23633 /* Bit 3 is OV bit. */
23634 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
23636 /* If we want bit 31, write a shift count of zero, not 32. */
23637 fprintf (file, "%d", i == 31 ? 0 : i + 1);
23638 return;
23640 case 'T':
23641 /* Print the symbolic name of a branch target register. */
23642 if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO
23643 && REGNO (x) != CTR_REGNO))
23644 output_operand_lossage ("invalid %%T value");
23645 else if (REGNO (x) == LR_REGNO)
23646 fputs ("lr", file);
23647 else
23648 fputs ("ctr", file);
23649 return;
23651 case 'u':
23652 /* High-order or low-order 16 bits of constant, whichever is non-zero,
23653 for use in unsigned operand. */
23654 if (! INT_P (x))
23656 output_operand_lossage ("invalid %%u value");
23657 return;
23660 uval = INTVAL (x);
23661 if ((uval & 0xffff) == 0)
23662 uval >>= 16;
23664 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
23665 return;
23667 case 'v':
23668 /* High-order 16 bits of constant for use in signed operand. */
23669 if (! INT_P (x))
23670 output_operand_lossage ("invalid %%v value");
23671 else
23672 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
23673 (INTVAL (x) >> 16) & 0xffff);
23674 return;
23676 case 'U':
23677 /* Print `u' if this has an auto-increment or auto-decrement. */
23678 if (MEM_P (x)
23679 && (GET_CODE (XEXP (x, 0)) == PRE_INC
23680 || GET_CODE (XEXP (x, 0)) == PRE_DEC
23681 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
23682 putc ('u', file);
23683 return;
23685 case 'V':
23686 /* Print the trap code for this operand. */
23687 switch (GET_CODE (x))
23689 case EQ:
23690 fputs ("eq", file); /* 4 */
23691 break;
23692 case NE:
23693 fputs ("ne", file); /* 24 */
23694 break;
23695 case LT:
23696 fputs ("lt", file); /* 16 */
23697 break;
23698 case LE:
23699 fputs ("le", file); /* 20 */
23700 break;
23701 case GT:
23702 fputs ("gt", file); /* 8 */
23703 break;
23704 case GE:
23705 fputs ("ge", file); /* 12 */
23706 break;
23707 case LTU:
23708 fputs ("llt", file); /* 2 */
23709 break;
23710 case LEU:
23711 fputs ("lle", file); /* 6 */
23712 break;
23713 case GTU:
23714 fputs ("lgt", file); /* 1 */
23715 break;
23716 case GEU:
23717 fputs ("lge", file); /* 5 */
23718 break;
23719 default:
23720 gcc_unreachable ();
23722 break;
23724 case 'w':
23725 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
23726 normally. */
23727 if (INT_P (x))
23728 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
23729 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
23730 else
23731 print_operand (file, x, 0);
23732 return;
23734 case 'x':
23735 /* X is a FPR or Altivec register used in a VSX context. */
23736 if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
23737 output_operand_lossage ("invalid %%x value");
23738 else
23740 int reg = REGNO (x);
23741 int vsx_reg = (FP_REGNO_P (reg)
23742 ? reg - 32
23743 : reg - FIRST_ALTIVEC_REGNO + 32);
23745 #ifdef TARGET_REGNAMES
23746 if (TARGET_REGNAMES)
23747 fprintf (file, "%%vs%d", vsx_reg);
23748 else
23749 #endif
23750 fprintf (file, "%d", vsx_reg);
23752 return;
23754 case 'X':
23755 if (MEM_P (x)
23756 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
23757 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
23758 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
23759 putc ('x', file);
23760 return;
23762 case 'Y':
23763 /* Like 'L', for third word of TImode/PTImode */
23764 if (REG_P (x))
23765 fputs (reg_names[REGNO (x) + 2], file);
23766 else if (MEM_P (x))
23768 machine_mode mode = GET_MODE (x);
23769 if (GET_CODE (XEXP (x, 0)) == PRE_INC
23770 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
23771 output_address (mode, plus_constant (Pmode,
23772 XEXP (XEXP (x, 0), 0), 8));
23773 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
23774 output_address (mode, plus_constant (Pmode,
23775 XEXP (XEXP (x, 0), 0), 8));
23776 else
23777 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
23778 if (small_data_operand (x, GET_MODE (x)))
23779 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
23780 reg_names[SMALL_DATA_REG]);
23782 return;
23784 case 'z':
23785 /* X is a SYMBOL_REF. Write out the name preceded by a
23786 period and without any trailing data in brackets. Used for function
23787 names. If we are configured for System V (or the embedded ABI) on
23788 the PowerPC, do not emit the period, since those systems do not use
23789 TOCs and the like. */
23790 gcc_assert (GET_CODE (x) == SYMBOL_REF);
23792 /* For macho, check to see if we need a stub. */
23793 if (TARGET_MACHO)
23795 const char *name = XSTR (x, 0);
23796 #if TARGET_MACHO
23797 if (darwin_emit_branch_islands
23798 && MACHOPIC_INDIRECT
23799 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
23800 name = machopic_indirection_name (x, /*stub_p=*/true);
23801 #endif
23802 assemble_name (file, name);
23804 else if (!DOT_SYMBOLS)
23805 assemble_name (file, XSTR (x, 0));
23806 else
23807 rs6000_output_function_entry (file, XSTR (x, 0));
23808 return;
23810 case 'Z':
23811 /* Like 'L', for last word of TImode/PTImode. */
23812 if (REG_P (x))
23813 fputs (reg_names[REGNO (x) + 3], file);
23814 else if (MEM_P (x))
23816 machine_mode mode = GET_MODE (x);
23817 if (GET_CODE (XEXP (x, 0)) == PRE_INC
23818 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
23819 output_address (mode, plus_constant (Pmode,
23820 XEXP (XEXP (x, 0), 0), 12));
23821 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
23822 output_address (mode, plus_constant (Pmode,
23823 XEXP (XEXP (x, 0), 0), 12));
23824 else
23825 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
23826 if (small_data_operand (x, GET_MODE (x)))
23827 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
23828 reg_names[SMALL_DATA_REG]);
23830 return;
23832 /* Print AltiVec or SPE memory operand. */
23833 case 'y':
23835 rtx tmp;
23837 gcc_assert (MEM_P (x));
23839 tmp = XEXP (x, 0);
23841 /* Ugly hack because %y is overloaded. */
23842 if ((TARGET_SPE || TARGET_E500_DOUBLE)
23843 && (GET_MODE_SIZE (GET_MODE (x)) == 8
23844 || FLOAT128_2REG_P (GET_MODE (x))
23845 || GET_MODE (x) == TImode
23846 || GET_MODE (x) == PTImode))
23848 /* Handle [reg]. */
23849 if (REG_P (tmp))
23851 fprintf (file, "0(%s)", reg_names[REGNO (tmp)]);
23852 break;
23854 /* Handle [reg+UIMM]. */
23855 else if (GET_CODE (tmp) == PLUS &&
23856 GET_CODE (XEXP (tmp, 1)) == CONST_INT)
23858 int x;
23860 gcc_assert (REG_P (XEXP (tmp, 0)));
23862 x = INTVAL (XEXP (tmp, 1));
23863 fprintf (file, "%d(%s)", x, reg_names[REGNO (XEXP (tmp, 0))]);
23864 break;
23867 /* Fall through. Must be [reg+reg]. */
23869 if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x))
23870 && GET_CODE (tmp) == AND
23871 && GET_CODE (XEXP (tmp, 1)) == CONST_INT
23872 && INTVAL (XEXP (tmp, 1)) == -16)
23873 tmp = XEXP (tmp, 0);
23874 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
23875 && GET_CODE (tmp) == PRE_MODIFY)
23876 tmp = XEXP (tmp, 1);
23877 if (REG_P (tmp))
23878 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
23879 else
23881 if (GET_CODE (tmp) != PLUS
23882 || !REG_P (XEXP (tmp, 0))
23883 || !REG_P (XEXP (tmp, 1)))
23885 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
23886 break;
23889 if (REGNO (XEXP (tmp, 0)) == 0)
23890 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
23891 reg_names[ REGNO (XEXP (tmp, 0)) ]);
23892 else
23893 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
23894 reg_names[ REGNO (XEXP (tmp, 1)) ]);
23896 break;
23899 case 0:
23900 if (REG_P (x))
23901 fprintf (file, "%s", reg_names[REGNO (x)]);
23902 else if (MEM_P (x))
23904 /* We need to handle PRE_INC and PRE_DEC here, since we need to
23905 know the width from the mode. */
23906 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
23907 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
23908 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
23909 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
23910 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
23911 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
23912 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
23913 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
23914 else
23915 output_address (GET_MODE (x), XEXP (x, 0));
23917 else
23919 if (toc_relative_expr_p (x, false))
23920 /* This hack along with a corresponding hack in
23921 rs6000_output_addr_const_extra arranges to output addends
23922 where the assembler expects to find them. eg.
23923 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
23924 without this hack would be output as "x@toc+4". We
23925 want "x+4@toc". */
23926 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
23927 else
23928 output_addr_const (file, x);
23930 return;
23932 case '&':
23933 if (const char *name = get_some_local_dynamic_name ())
23934 assemble_name (file, name);
23935 else
23936 output_operand_lossage ("'%%&' used without any "
23937 "local dynamic TLS references");
23938 return;
23940 default:
23941 output_operand_lossage ("invalid %%xn code");
23945 /* Print the address of an operand. */
23947 void
23948 print_operand_address (FILE *file, rtx x)
23950 if (REG_P (x))
23951 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
23952 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST
23953 || GET_CODE (x) == LABEL_REF)
23955 output_addr_const (file, x);
23956 if (small_data_operand (x, GET_MODE (x)))
23957 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
23958 reg_names[SMALL_DATA_REG]);
23959 else
23960 gcc_assert (!TARGET_TOC);
23962 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
23963 && REG_P (XEXP (x, 1)))
23965 if (REGNO (XEXP (x, 0)) == 0)
23966 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
23967 reg_names[ REGNO (XEXP (x, 0)) ]);
23968 else
23969 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
23970 reg_names[ REGNO (XEXP (x, 1)) ]);
23972 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
23973 && GET_CODE (XEXP (x, 1)) == CONST_INT)
23974 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
23975 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
23976 #if TARGET_MACHO
23977 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
23978 && CONSTANT_P (XEXP (x, 1)))
23980 fprintf (file, "lo16(");
23981 output_addr_const (file, XEXP (x, 1));
23982 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
23984 #endif
23985 #if TARGET_ELF
23986 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
23987 && CONSTANT_P (XEXP (x, 1)))
23989 output_addr_const (file, XEXP (x, 1));
23990 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
23992 #endif
23993 else if (toc_relative_expr_p (x, false))
23995 /* This hack along with a corresponding hack in
23996 rs6000_output_addr_const_extra arranges to output addends
23997 where the assembler expects to find them. eg.
23998 (lo_sum (reg 9)
23999 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
24000 without this hack would be output as "x@toc+8@l(9)". We
24001 want "x+8@toc@l(9)". */
24002 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
24003 if (GET_CODE (x) == LO_SUM)
24004 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
24005 else
24006 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base, 0, 1))]);
24008 else
24009 gcc_unreachable ();
24012 /* Implement TARGET_OUTPUT_ADDR_CONST_EXTRA. */
24014 static bool
24015 rs6000_output_addr_const_extra (FILE *file, rtx x)
24017 if (GET_CODE (x) == UNSPEC)
24018 switch (XINT (x, 1))
24020 case UNSPEC_TOCREL:
24021 gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF
24022 && REG_P (XVECEXP (x, 0, 1))
24023 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
24024 output_addr_const (file, XVECEXP (x, 0, 0));
24025 if (x == tocrel_base && tocrel_offset != const0_rtx)
24027 if (INTVAL (tocrel_offset) >= 0)
24028 fprintf (file, "+");
24029 output_addr_const (file, CONST_CAST_RTX (tocrel_offset));
24031 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
24033 putc ('-', file);
24034 assemble_name (file, toc_label_name);
24035 need_toc_init = 1;
24037 else if (TARGET_ELF)
24038 fputs ("@toc", file);
24039 return true;
24041 #if TARGET_MACHO
24042 case UNSPEC_MACHOPIC_OFFSET:
24043 output_addr_const (file, XVECEXP (x, 0, 0));
24044 putc ('-', file);
24045 machopic_output_function_base_name (file);
24046 return true;
24047 #endif
24049 return false;
24052 /* Target hook for assembling integer objects. The PowerPC version has
24053 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
24054 is defined. It also needs to handle DI-mode objects on 64-bit
24055 targets. */
24057 static bool
24058 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
24060 #ifdef RELOCATABLE_NEEDS_FIXUP
24061 /* Special handling for SI values. */
24062 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
24064 static int recurse = 0;
24066 /* For -mrelocatable, we mark all addresses that need to be fixed up in
24067 the .fixup section. Since the TOC section is already relocated, we
24068 don't need to mark it here. We used to skip the text section, but it
24069 should never be valid for relocated addresses to be placed in the text
24070 section. */
24071 if (DEFAULT_ABI == ABI_V4
24072 && (TARGET_RELOCATABLE || flag_pic > 1)
24073 && in_section != toc_section
24074 && !recurse
24075 && !CONST_SCALAR_INT_P (x)
24076 && CONSTANT_P (x))
24078 char buf[256];
24080 recurse = 1;
24081 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
24082 fixuplabelno++;
24083 ASM_OUTPUT_LABEL (asm_out_file, buf);
24084 fprintf (asm_out_file, "\t.long\t(");
24085 output_addr_const (asm_out_file, x);
24086 fprintf (asm_out_file, ")@fixup\n");
24087 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
24088 ASM_OUTPUT_ALIGN (asm_out_file, 2);
24089 fprintf (asm_out_file, "\t.long\t");
24090 assemble_name (asm_out_file, buf);
24091 fprintf (asm_out_file, "\n\t.previous\n");
24092 recurse = 0;
24093 return true;
24095 /* Remove initial .'s to turn a -mcall-aixdesc function
24096 address into the address of the descriptor, not the function
24097 itself. */
24098 else if (GET_CODE (x) == SYMBOL_REF
24099 && XSTR (x, 0)[0] == '.'
24100 && DEFAULT_ABI == ABI_AIX)
24102 const char *name = XSTR (x, 0);
24103 while (*name == '.')
24104 name++;
24106 fprintf (asm_out_file, "\t.long\t%s\n", name);
24107 return true;
24110 #endif /* RELOCATABLE_NEEDS_FIXUP */
24111 return default_assemble_integer (x, size, aligned_p);
24114 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
24115 /* Emit an assembler directive to set symbol visibility for DECL to
24116 VISIBILITY_TYPE. */
24118 static void
24119 rs6000_assemble_visibility (tree decl, int vis)
24121 if (TARGET_XCOFF)
24122 return;
24124 /* Functions need to have their entry point symbol visibility set as
24125 well as their descriptor symbol visibility. */
24126 if (DEFAULT_ABI == ABI_AIX
24127 && DOT_SYMBOLS
24128 && TREE_CODE (decl) == FUNCTION_DECL)
24130 static const char * const visibility_types[] = {
24131 NULL, "protected", "hidden", "internal"
24134 const char *name, *type;
24136 name = ((* targetm.strip_name_encoding)
24137 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
24138 type = visibility_types[vis];
24140 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
24141 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
24143 else
24144 default_assemble_visibility (decl, vis);
24146 #endif
24148 enum rtx_code
24149 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
24151 /* Reversal of FP compares takes care -- an ordered compare
24152 becomes an unordered compare and vice versa. */
24153 if (mode == CCFPmode
24154 && (!flag_finite_math_only
24155 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
24156 || code == UNEQ || code == LTGT))
24157 return reverse_condition_maybe_unordered (code);
24158 else
24159 return reverse_condition (code);
24162 /* Generate a compare for CODE. Return a brand-new rtx that
24163 represents the result of the compare. */
24165 static rtx
24166 rs6000_generate_compare (rtx cmp, machine_mode mode)
24168 machine_mode comp_mode;
24169 rtx compare_result;
24170 enum rtx_code code = GET_CODE (cmp);
24171 rtx op0 = XEXP (cmp, 0);
24172 rtx op1 = XEXP (cmp, 1);
24174 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
24175 comp_mode = CCmode;
24176 else if (FLOAT_MODE_P (mode))
24177 comp_mode = CCFPmode;
24178 else if (code == GTU || code == LTU
24179 || code == GEU || code == LEU)
24180 comp_mode = CCUNSmode;
24181 else if ((code == EQ || code == NE)
24182 && unsigned_reg_p (op0)
24183 && (unsigned_reg_p (op1)
24184 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
24185 /* These are unsigned values, perhaps there will be a later
24186 ordering compare that can be shared with this one. */
24187 comp_mode = CCUNSmode;
24188 else
24189 comp_mode = CCmode;
24191 /* If we have an unsigned compare, make sure we don't have a signed value as
24192 an immediate. */
24193 if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT
24194 && INTVAL (op1) < 0)
24196 op0 = copy_rtx_if_shared (op0);
24197 op1 = force_reg (GET_MODE (op0), op1);
24198 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
24201 /* First, the compare. */
24202 compare_result = gen_reg_rtx (comp_mode);
24204 /* E500 FP compare instructions on the GPRs. Yuck! */
24205 if ((!TARGET_FPRS && TARGET_HARD_FLOAT)
24206 && FLOAT_MODE_P (mode))
24208 rtx cmp, or_result, compare_result2;
24209 machine_mode op_mode = GET_MODE (op0);
24210 bool reverse_p;
24212 if (op_mode == VOIDmode)
24213 op_mode = GET_MODE (op1);
24215 /* First reverse the condition codes that aren't directly supported. */
24216 switch (code)
24218 case NE:
24219 case UNLT:
24220 case UNLE:
24221 case UNGT:
24222 case UNGE:
24223 code = reverse_condition_maybe_unordered (code);
24224 reverse_p = true;
24225 break;
24227 case EQ:
24228 case LT:
24229 case LE:
24230 case GT:
24231 case GE:
24232 reverse_p = false;
24233 break;
24235 default:
24236 gcc_unreachable ();
24239 /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only.
24240 This explains the following mess. */
24242 switch (code)
24244 case EQ:
24245 switch (op_mode)
24247 case SFmode:
24248 cmp = (flag_finite_math_only && !flag_trapping_math)
24249 ? gen_tstsfeq_gpr (compare_result, op0, op1)
24250 : gen_cmpsfeq_gpr (compare_result, op0, op1);
24251 break;
24253 case DFmode:
24254 cmp = (flag_finite_math_only && !flag_trapping_math)
24255 ? gen_tstdfeq_gpr (compare_result, op0, op1)
24256 : gen_cmpdfeq_gpr (compare_result, op0, op1);
24257 break;
24259 case TFmode:
24260 case IFmode:
24261 case KFmode:
24262 cmp = (flag_finite_math_only && !flag_trapping_math)
24263 ? gen_tsttfeq_gpr (compare_result, op0, op1)
24264 : gen_cmptfeq_gpr (compare_result, op0, op1);
24265 break;
24267 default:
24268 gcc_unreachable ();
24270 break;
24272 case GT:
24273 case GE:
24274 switch (op_mode)
24276 case SFmode:
24277 cmp = (flag_finite_math_only && !flag_trapping_math)
24278 ? gen_tstsfgt_gpr (compare_result, op0, op1)
24279 : gen_cmpsfgt_gpr (compare_result, op0, op1);
24280 break;
24282 case DFmode:
24283 cmp = (flag_finite_math_only && !flag_trapping_math)
24284 ? gen_tstdfgt_gpr (compare_result, op0, op1)
24285 : gen_cmpdfgt_gpr (compare_result, op0, op1);
24286 break;
24288 case TFmode:
24289 case IFmode:
24290 case KFmode:
24291 cmp = (flag_finite_math_only && !flag_trapping_math)
24292 ? gen_tsttfgt_gpr (compare_result, op0, op1)
24293 : gen_cmptfgt_gpr (compare_result, op0, op1);
24294 break;
24296 default:
24297 gcc_unreachable ();
24299 break;
24301 case LT:
24302 case LE:
24303 switch (op_mode)
24305 case SFmode:
24306 cmp = (flag_finite_math_only && !flag_trapping_math)
24307 ? gen_tstsflt_gpr (compare_result, op0, op1)
24308 : gen_cmpsflt_gpr (compare_result, op0, op1);
24309 break;
24311 case DFmode:
24312 cmp = (flag_finite_math_only && !flag_trapping_math)
24313 ? gen_tstdflt_gpr (compare_result, op0, op1)
24314 : gen_cmpdflt_gpr (compare_result, op0, op1);
24315 break;
24317 case TFmode:
24318 case IFmode:
24319 case KFmode:
24320 cmp = (flag_finite_math_only && !flag_trapping_math)
24321 ? gen_tsttflt_gpr (compare_result, op0, op1)
24322 : gen_cmptflt_gpr (compare_result, op0, op1);
24323 break;
24325 default:
24326 gcc_unreachable ();
24328 break;
24330 default:
24331 gcc_unreachable ();
24334 /* Synthesize LE and GE from LT/GT || EQ. */
24335 if (code == LE || code == GE)
24337 emit_insn (cmp);
24339 compare_result2 = gen_reg_rtx (CCFPmode);
24341 /* Do the EQ. */
24342 switch (op_mode)
24344 case SFmode:
24345 cmp = (flag_finite_math_only && !flag_trapping_math)
24346 ? gen_tstsfeq_gpr (compare_result2, op0, op1)
24347 : gen_cmpsfeq_gpr (compare_result2, op0, op1);
24348 break;
24350 case DFmode:
24351 cmp = (flag_finite_math_only && !flag_trapping_math)
24352 ? gen_tstdfeq_gpr (compare_result2, op0, op1)
24353 : gen_cmpdfeq_gpr (compare_result2, op0, op1);
24354 break;
24356 case TFmode:
24357 case IFmode:
24358 case KFmode:
24359 cmp = (flag_finite_math_only && !flag_trapping_math)
24360 ? gen_tsttfeq_gpr (compare_result2, op0, op1)
24361 : gen_cmptfeq_gpr (compare_result2, op0, op1);
24362 break;
24364 default:
24365 gcc_unreachable ();
24368 emit_insn (cmp);
24370 /* OR them together. */
24371 or_result = gen_reg_rtx (CCFPmode);
24372 cmp = gen_e500_cr_ior_compare (or_result, compare_result,
24373 compare_result2);
24374 compare_result = or_result;
24377 code = reverse_p ? NE : EQ;
24379 emit_insn (cmp);
24382 /* IEEE 128-bit support in VSX registers when we do not have hardware
24383 support. */
24384 else if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
24386 rtx libfunc = NULL_RTX;
24387 bool check_nan = false;
24388 rtx dest;
24390 switch (code)
24392 case EQ:
24393 case NE:
24394 libfunc = optab_libfunc (eq_optab, mode);
24395 break;
24397 case GT:
24398 case GE:
24399 libfunc = optab_libfunc (ge_optab, mode);
24400 break;
24402 case LT:
24403 case LE:
24404 libfunc = optab_libfunc (le_optab, mode);
24405 break;
24407 case UNORDERED:
24408 case ORDERED:
24409 libfunc = optab_libfunc (unord_optab, mode);
24410 code = (code == UNORDERED) ? NE : EQ;
24411 break;
24413 case UNGE:
24414 case UNGT:
24415 check_nan = true;
24416 libfunc = optab_libfunc (ge_optab, mode);
24417 code = (code == UNGE) ? GE : GT;
24418 break;
24420 case UNLE:
24421 case UNLT:
24422 check_nan = true;
24423 libfunc = optab_libfunc (le_optab, mode);
24424 code = (code == UNLE) ? LE : LT;
24425 break;
24427 case UNEQ:
24428 case LTGT:
24429 check_nan = true;
24430 libfunc = optab_libfunc (eq_optab, mode);
24431 code = (code = UNEQ) ? EQ : NE;
24432 break;
24434 default:
24435 gcc_unreachable ();
24438 gcc_assert (libfunc);
24440 if (!check_nan)
24441 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
24442 SImode, 2, op0, mode, op1, mode);
24444 /* The library signals an exception for signalling NaNs, so we need to
24445 handle isgreater, etc. by first checking isordered. */
24446 else
24448 rtx ne_rtx, normal_dest, unord_dest;
24449 rtx unord_func = optab_libfunc (unord_optab, mode);
24450 rtx join_label = gen_label_rtx ();
24451 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
24452 rtx unord_cmp = gen_reg_rtx (comp_mode);
24455 /* Test for either value being a NaN. */
24456 gcc_assert (unord_func);
24457 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
24458 SImode, 2, op0, mode, op1,
24459 mode);
24461 /* Set value (0) if either value is a NaN, and jump to the join
24462 label. */
24463 dest = gen_reg_rtx (SImode);
24464 emit_move_insn (dest, const1_rtx);
24465 emit_insn (gen_rtx_SET (unord_cmp,
24466 gen_rtx_COMPARE (comp_mode, unord_dest,
24467 const0_rtx)));
24469 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
24470 emit_jump_insn (gen_rtx_SET (pc_rtx,
24471 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
24472 join_ref,
24473 pc_rtx)));
24475 /* Do the normal comparison, knowing that the values are not
24476 NaNs. */
24477 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
24478 SImode, 2, op0, mode, op1,
24479 mode);
24481 emit_insn (gen_cstoresi4 (dest,
24482 gen_rtx_fmt_ee (code, SImode, normal_dest,
24483 const0_rtx),
24484 normal_dest, const0_rtx));
24486 /* Join NaN and non-Nan paths. Compare dest against 0. */
24487 emit_label (join_label);
24488 code = NE;
24491 emit_insn (gen_rtx_SET (compare_result,
24492 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
24495 else
24497 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
24498 CLOBBERs to match cmptf_internal2 pattern. */
24499 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
24500 && FLOAT128_IBM_P (GET_MODE (op0))
24501 && TARGET_HARD_FLOAT && TARGET_FPRS)
24502 emit_insn (gen_rtx_PARALLEL (VOIDmode,
24503 gen_rtvec (10,
24504 gen_rtx_SET (compare_result,
24505 gen_rtx_COMPARE (comp_mode, op0, op1)),
24506 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24507 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24508 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24509 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24510 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24511 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24512 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24513 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24514 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
24515 else if (GET_CODE (op1) == UNSPEC
24516 && XINT (op1, 1) == UNSPEC_SP_TEST)
24518 rtx op1b = XVECEXP (op1, 0, 0);
24519 comp_mode = CCEQmode;
24520 compare_result = gen_reg_rtx (CCEQmode);
24521 if (TARGET_64BIT)
24522 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
24523 else
24524 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
24526 else
24527 emit_insn (gen_rtx_SET (compare_result,
24528 gen_rtx_COMPARE (comp_mode, op0, op1)));
24531 /* Some kinds of FP comparisons need an OR operation;
24532 under flag_finite_math_only we don't bother. */
24533 if (FLOAT_MODE_P (mode)
24534 && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)
24535 && !flag_finite_math_only
24536 && !(TARGET_HARD_FLOAT && !TARGET_FPRS)
24537 && (code == LE || code == GE
24538 || code == UNEQ || code == LTGT
24539 || code == UNGT || code == UNLT))
24541 enum rtx_code or1, or2;
24542 rtx or1_rtx, or2_rtx, compare2_rtx;
24543 rtx or_result = gen_reg_rtx (CCEQmode);
24545 switch (code)
24547 case LE: or1 = LT; or2 = EQ; break;
24548 case GE: or1 = GT; or2 = EQ; break;
24549 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
24550 case LTGT: or1 = LT; or2 = GT; break;
24551 case UNGT: or1 = UNORDERED; or2 = GT; break;
24552 case UNLT: or1 = UNORDERED; or2 = LT; break;
24553 default: gcc_unreachable ();
24555 validate_condition_mode (or1, comp_mode);
24556 validate_condition_mode (or2, comp_mode);
24557 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
24558 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
24559 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
24560 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
24561 const_true_rtx);
24562 emit_insn (gen_rtx_SET (or_result, compare2_rtx));
24564 compare_result = or_result;
24565 code = EQ;
24568 validate_condition_mode (code, GET_MODE (compare_result));
24570 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
24574 /* Return the diagnostic message string if the binary operation OP is
24575 not permitted on TYPE1 and TYPE2, NULL otherwise. */
24577 static const char*
24578 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
24579 const_tree type1,
24580 const_tree type2)
24582 enum machine_mode mode1 = TYPE_MODE (type1);
24583 enum machine_mode mode2 = TYPE_MODE (type2);
24585 /* For complex modes, use the inner type. */
24586 if (COMPLEX_MODE_P (mode1))
24587 mode1 = GET_MODE_INNER (mode1);
24589 if (COMPLEX_MODE_P (mode2))
24590 mode2 = GET_MODE_INNER (mode2);
24592 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
24593 double to intermix unless -mfloat128-convert. */
24594 if (mode1 == mode2)
24595 return NULL;
24597 if (!TARGET_FLOAT128_CVT)
24599 if ((mode1 == KFmode && mode2 == IFmode)
24600 || (mode1 == IFmode && mode2 == KFmode))
24601 return N_("__float128 and __ibm128 cannot be used in the same "
24602 "expression");
24604 if (TARGET_IEEEQUAD
24605 && ((mode1 == IFmode && mode2 == TFmode)
24606 || (mode1 == TFmode && mode2 == IFmode)))
24607 return N_("__ibm128 and long double cannot be used in the same "
24608 "expression");
24610 if (!TARGET_IEEEQUAD
24611 && ((mode1 == KFmode && mode2 == TFmode)
24612 || (mode1 == TFmode && mode2 == KFmode)))
24613 return N_("__float128 and long double cannot be used in the same "
24614 "expression");
24617 return NULL;
24621 /* Expand floating point conversion to/from __float128 and __ibm128. */
24623 void
24624 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
24626 machine_mode dest_mode = GET_MODE (dest);
24627 machine_mode src_mode = GET_MODE (src);
24628 convert_optab cvt = unknown_optab;
24629 bool do_move = false;
24630 rtx libfunc = NULL_RTX;
24631 rtx dest2;
24632 typedef rtx (*rtx_2func_t) (rtx, rtx);
24633 rtx_2func_t hw_convert = (rtx_2func_t)0;
24634 size_t kf_or_tf;
24636 struct hw_conv_t {
24637 rtx_2func_t from_df;
24638 rtx_2func_t from_sf;
24639 rtx_2func_t from_si_sign;
24640 rtx_2func_t from_si_uns;
24641 rtx_2func_t from_di_sign;
24642 rtx_2func_t from_di_uns;
24643 rtx_2func_t to_df;
24644 rtx_2func_t to_sf;
24645 rtx_2func_t to_si_sign;
24646 rtx_2func_t to_si_uns;
24647 rtx_2func_t to_di_sign;
24648 rtx_2func_t to_di_uns;
24649 } hw_conversions[2] = {
24650 /* convertions to/from KFmode */
24652 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
24653 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
24654 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
24655 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
24656 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
24657 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
24658 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
24659 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
24660 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
24661 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
24662 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
24663 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
24666 /* convertions to/from TFmode */
24668 gen_extenddftf2_hw, /* TFmode <- DFmode. */
24669 gen_extendsftf2_hw, /* TFmode <- SFmode. */
24670 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
24671 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
24672 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
24673 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
24674 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
24675 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
24676 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
24677 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
24678 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
24679 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
24683 if (dest_mode == src_mode)
24684 gcc_unreachable ();
24686 /* Eliminate memory operations. */
24687 if (MEM_P (src))
24688 src = force_reg (src_mode, src);
24690 if (MEM_P (dest))
24692 rtx tmp = gen_reg_rtx (dest_mode);
24693 rs6000_expand_float128_convert (tmp, src, unsigned_p);
24694 rs6000_emit_move (dest, tmp, dest_mode);
24695 return;
24698 /* Convert to IEEE 128-bit floating point. */
24699 if (FLOAT128_IEEE_P (dest_mode))
24701 if (dest_mode == KFmode)
24702 kf_or_tf = 0;
24703 else if (dest_mode == TFmode)
24704 kf_or_tf = 1;
24705 else
24706 gcc_unreachable ();
24708 switch (src_mode)
24710 case DFmode:
24711 cvt = sext_optab;
24712 hw_convert = hw_conversions[kf_or_tf].from_df;
24713 break;
24715 case SFmode:
24716 cvt = sext_optab;
24717 hw_convert = hw_conversions[kf_or_tf].from_sf;
24718 break;
24720 case KFmode:
24721 case IFmode:
24722 case TFmode:
24723 if (FLOAT128_IBM_P (src_mode))
24724 cvt = sext_optab;
24725 else
24726 do_move = true;
24727 break;
24729 case SImode:
24730 if (unsigned_p)
24732 cvt = ufloat_optab;
24733 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
24735 else
24737 cvt = sfloat_optab;
24738 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
24740 break;
24742 case DImode:
24743 if (unsigned_p)
24745 cvt = ufloat_optab;
24746 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
24748 else
24750 cvt = sfloat_optab;
24751 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
24753 break;
24755 default:
24756 gcc_unreachable ();
24760 /* Convert from IEEE 128-bit floating point. */
24761 else if (FLOAT128_IEEE_P (src_mode))
24763 if (src_mode == KFmode)
24764 kf_or_tf = 0;
24765 else if (src_mode == TFmode)
24766 kf_or_tf = 1;
24767 else
24768 gcc_unreachable ();
24770 switch (dest_mode)
24772 case DFmode:
24773 cvt = trunc_optab;
24774 hw_convert = hw_conversions[kf_or_tf].to_df;
24775 break;
24777 case SFmode:
24778 cvt = trunc_optab;
24779 hw_convert = hw_conversions[kf_or_tf].to_sf;
24780 break;
24782 case KFmode:
24783 case IFmode:
24784 case TFmode:
24785 if (FLOAT128_IBM_P (dest_mode))
24786 cvt = trunc_optab;
24787 else
24788 do_move = true;
24789 break;
24791 case SImode:
24792 if (unsigned_p)
24794 cvt = ufix_optab;
24795 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
24797 else
24799 cvt = sfix_optab;
24800 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
24802 break;
24804 case DImode:
24805 if (unsigned_p)
24807 cvt = ufix_optab;
24808 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
24810 else
24812 cvt = sfix_optab;
24813 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
24815 break;
24817 default:
24818 gcc_unreachable ();
24822 /* Both IBM format. */
24823 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
24824 do_move = true;
24826 else
24827 gcc_unreachable ();
24829 /* Handle conversion between TFmode/KFmode. */
24830 if (do_move)
24831 emit_move_insn (dest, gen_lowpart (dest_mode, src));
24833 /* Handle conversion if we have hardware support. */
24834 else if (TARGET_FLOAT128_HW && hw_convert)
24835 emit_insn ((hw_convert) (dest, src));
24837 /* Call an external function to do the conversion. */
24838 else if (cvt != unknown_optab)
24840 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
24841 gcc_assert (libfunc != NULL_RTX);
24843 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode, 1, src,
24844 src_mode);
24846 gcc_assert (dest2 != NULL_RTX);
24847 if (!rtx_equal_p (dest, dest2))
24848 emit_move_insn (dest, dest2);
24851 else
24852 gcc_unreachable ();
24854 return;
24858 /* Emit the RTL for an sISEL pattern. */
24860 void
24861 rs6000_emit_sISEL (machine_mode mode ATTRIBUTE_UNUSED, rtx operands[])
24863 rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx);
24866 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
24867 can be used as that dest register. Return the dest register. */
24870 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
24872 if (op2 == const0_rtx)
24873 return op1;
24875 if (GET_CODE (scratch) == SCRATCH)
24876 scratch = gen_reg_rtx (mode);
24878 if (logical_operand (op2, mode))
24879 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
24880 else
24881 emit_insn (gen_rtx_SET (scratch,
24882 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
24884 return scratch;
24887 void
24888 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
24890 rtx condition_rtx;
24891 machine_mode op_mode;
24892 enum rtx_code cond_code;
24893 rtx result = operands[0];
24895 condition_rtx = rs6000_generate_compare (operands[1], mode);
24896 cond_code = GET_CODE (condition_rtx);
24898 if (FLOAT_MODE_P (mode)
24899 && !TARGET_FPRS && TARGET_HARD_FLOAT)
24901 rtx t;
24903 PUT_MODE (condition_rtx, SImode);
24904 t = XEXP (condition_rtx, 0);
24906 gcc_assert (cond_code == NE || cond_code == EQ);
24908 if (cond_code == NE)
24909 emit_insn (gen_e500_flip_gt_bit (t, t));
24911 emit_insn (gen_move_from_CR_gt_bit (result, t));
24912 return;
24915 if (cond_code == NE
24916 || cond_code == GE || cond_code == LE
24917 || cond_code == GEU || cond_code == LEU
24918 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
24920 rtx not_result = gen_reg_rtx (CCEQmode);
24921 rtx not_op, rev_cond_rtx;
24922 machine_mode cc_mode;
24924 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
24926 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
24927 SImode, XEXP (condition_rtx, 0), const0_rtx);
24928 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
24929 emit_insn (gen_rtx_SET (not_result, not_op));
24930 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
24933 op_mode = GET_MODE (XEXP (operands[1], 0));
24934 if (op_mode == VOIDmode)
24935 op_mode = GET_MODE (XEXP (operands[1], 1));
24937 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
24939 PUT_MODE (condition_rtx, DImode);
24940 convert_move (result, condition_rtx, 0);
24942 else
24944 PUT_MODE (condition_rtx, SImode);
24945 emit_insn (gen_rtx_SET (result, condition_rtx));
24949 /* Emit a branch of kind CODE to location LOC. */
24951 void
24952 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
24954 rtx condition_rtx, loc_ref;
24956 condition_rtx = rs6000_generate_compare (operands[0], mode);
24957 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
24958 emit_jump_insn (gen_rtx_SET (pc_rtx,
24959 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
24960 loc_ref, pc_rtx)));
24963 /* Return the string to output a conditional branch to LABEL, which is
24964 the operand template of the label, or NULL if the branch is really a
24965 conditional return.
24967 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
24968 condition code register and its mode specifies what kind of
24969 comparison we made.
24971 REVERSED is nonzero if we should reverse the sense of the comparison.
24973 INSN is the insn. */
24975 char *
24976 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
24978 static char string[64];
24979 enum rtx_code code = GET_CODE (op);
24980 rtx cc_reg = XEXP (op, 0);
24981 machine_mode mode = GET_MODE (cc_reg);
24982 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
24983 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
24984 int really_reversed = reversed ^ need_longbranch;
24985 char *s = string;
24986 const char *ccode;
24987 const char *pred;
24988 rtx note;
24990 validate_condition_mode (code, mode);
24992 /* Work out which way this really branches. We could use
24993 reverse_condition_maybe_unordered here always but this
24994 makes the resulting assembler clearer. */
24995 if (really_reversed)
24997 /* Reversal of FP compares takes care -- an ordered compare
24998 becomes an unordered compare and vice versa. */
24999 if (mode == CCFPmode)
25000 code = reverse_condition_maybe_unordered (code);
25001 else
25002 code = reverse_condition (code);
25005 if ((!TARGET_FPRS && TARGET_HARD_FLOAT) && mode == CCFPmode)
25007 /* The efscmp/tst* instructions twiddle bit 2, which maps nicely
25008 to the GT bit. */
25009 switch (code)
25011 case EQ:
25012 /* Opposite of GT. */
25013 code = GT;
25014 break;
25016 case NE:
25017 code = UNLE;
25018 break;
25020 default:
25021 gcc_unreachable ();
25025 switch (code)
25027 /* Not all of these are actually distinct opcodes, but
25028 we distinguish them for clarity of the resulting assembler. */
25029 case NE: case LTGT:
25030 ccode = "ne"; break;
25031 case EQ: case UNEQ:
25032 ccode = "eq"; break;
25033 case GE: case GEU:
25034 ccode = "ge"; break;
25035 case GT: case GTU: case UNGT:
25036 ccode = "gt"; break;
25037 case LE: case LEU:
25038 ccode = "le"; break;
25039 case LT: case LTU: case UNLT:
25040 ccode = "lt"; break;
25041 case UNORDERED: ccode = "un"; break;
25042 case ORDERED: ccode = "nu"; break;
25043 case UNGE: ccode = "nl"; break;
25044 case UNLE: ccode = "ng"; break;
25045 default:
25046 gcc_unreachable ();
25049 /* Maybe we have a guess as to how likely the branch is. */
25050 pred = "";
25051 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
25052 if (note != NULL_RTX)
25054 /* PROB is the difference from 50%. */
25055 int prob = XINT (note, 0) - REG_BR_PROB_BASE / 2;
25057 /* Only hint for highly probable/improbable branches on newer cpus when
25058 we have real profile data, as static prediction overrides processor
25059 dynamic prediction. For older cpus we may as well always hint, but
25060 assume not taken for branches that are very close to 50% as a
25061 mispredicted taken branch is more expensive than a
25062 mispredicted not-taken branch. */
25063 if (rs6000_always_hint
25064 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
25065 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
25066 && br_prob_note_reliable_p (note)))
25068 if (abs (prob) > REG_BR_PROB_BASE / 20
25069 && ((prob > 0) ^ need_longbranch))
25070 pred = "+";
25071 else
25072 pred = "-";
25076 if (label == NULL)
25077 s += sprintf (s, "b%slr%s ", ccode, pred);
25078 else
25079 s += sprintf (s, "b%s%s ", ccode, pred);
25081 /* We need to escape any '%' characters in the reg_names string.
25082 Assume they'd only be the first character.... */
25083 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
25084 *s++ = '%';
25085 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
25087 if (label != NULL)
25089 /* If the branch distance was too far, we may have to use an
25090 unconditional branch to go the distance. */
25091 if (need_longbranch)
25092 s += sprintf (s, ",$+8\n\tb %s", label);
25093 else
25094 s += sprintf (s, ",%s", label);
25097 return string;
25100 /* Return the string to flip the GT bit on a CR. */
25101 char *
25102 output_e500_flip_gt_bit (rtx dst, rtx src)
25104 static char string[64];
25105 int a, b;
25107 gcc_assert (GET_CODE (dst) == REG && CR_REGNO_P (REGNO (dst))
25108 && GET_CODE (src) == REG && CR_REGNO_P (REGNO (src)));
25110 /* GT bit. */
25111 a = 4 * (REGNO (dst) - CR0_REGNO) + 1;
25112 b = 4 * (REGNO (src) - CR0_REGNO) + 1;
25114 sprintf (string, "crnot %d,%d", a, b);
25115 return string;
25118 /* Return insn for VSX or Altivec comparisons. */
25120 static rtx
25121 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
25123 rtx mask;
25124 machine_mode mode = GET_MODE (op0);
25126 switch (code)
25128 default:
25129 break;
25131 case GE:
25132 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
25133 return NULL_RTX;
25134 /* FALLTHRU */
25136 case EQ:
25137 case GT:
25138 case GTU:
25139 case ORDERED:
25140 case UNORDERED:
25141 case UNEQ:
25142 case LTGT:
25143 mask = gen_reg_rtx (mode);
25144 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
25145 return mask;
25148 return NULL_RTX;
25151 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
25152 DMODE is expected destination mode. This is a recursive function. */
25154 static rtx
25155 rs6000_emit_vector_compare (enum rtx_code rcode,
25156 rtx op0, rtx op1,
25157 machine_mode dmode)
25159 rtx mask;
25160 bool swap_operands = false;
25161 bool try_again = false;
25163 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
25164 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
25166 /* See if the comparison works as is. */
25167 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
25168 if (mask)
25169 return mask;
25171 switch (rcode)
25173 case LT:
25174 rcode = GT;
25175 swap_operands = true;
25176 try_again = true;
25177 break;
25178 case LTU:
25179 rcode = GTU;
25180 swap_operands = true;
25181 try_again = true;
25182 break;
25183 case NE:
25184 case UNLE:
25185 case UNLT:
25186 case UNGE:
25187 case UNGT:
25188 /* Invert condition and try again.
25189 e.g., A != B becomes ~(A==B). */
25191 enum rtx_code rev_code;
25192 enum insn_code nor_code;
25193 rtx mask2;
25195 rev_code = reverse_condition_maybe_unordered (rcode);
25196 if (rev_code == UNKNOWN)
25197 return NULL_RTX;
25199 nor_code = optab_handler (one_cmpl_optab, dmode);
25200 if (nor_code == CODE_FOR_nothing)
25201 return NULL_RTX;
25203 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
25204 if (!mask2)
25205 return NULL_RTX;
25207 mask = gen_reg_rtx (dmode);
25208 emit_insn (GEN_FCN (nor_code) (mask, mask2));
25209 return mask;
25211 break;
25212 case GE:
25213 case GEU:
25214 case LE:
25215 case LEU:
25216 /* Try GT/GTU/LT/LTU OR EQ */
25218 rtx c_rtx, eq_rtx;
25219 enum insn_code ior_code;
25220 enum rtx_code new_code;
25222 switch (rcode)
25224 case GE:
25225 new_code = GT;
25226 break;
25228 case GEU:
25229 new_code = GTU;
25230 break;
25232 case LE:
25233 new_code = LT;
25234 break;
25236 case LEU:
25237 new_code = LTU;
25238 break;
25240 default:
25241 gcc_unreachable ();
25244 ior_code = optab_handler (ior_optab, dmode);
25245 if (ior_code == CODE_FOR_nothing)
25246 return NULL_RTX;
25248 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
25249 if (!c_rtx)
25250 return NULL_RTX;
25252 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
25253 if (!eq_rtx)
25254 return NULL_RTX;
25256 mask = gen_reg_rtx (dmode);
25257 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
25258 return mask;
25260 break;
25261 default:
25262 return NULL_RTX;
25265 if (try_again)
25267 if (swap_operands)
25268 std::swap (op0, op1);
25270 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
25271 if (mask)
25272 return mask;
25275 /* You only get two chances. */
25276 return NULL_RTX;
25279 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
25280 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
25281 operands for the relation operation COND. */
25284 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
25285 rtx cond, rtx cc_op0, rtx cc_op1)
25287 machine_mode dest_mode = GET_MODE (dest);
25288 machine_mode mask_mode = GET_MODE (cc_op0);
25289 enum rtx_code rcode = GET_CODE (cond);
25290 machine_mode cc_mode = CCmode;
25291 rtx mask;
25292 rtx cond2;
25293 bool invert_move = false;
25295 if (VECTOR_UNIT_NONE_P (dest_mode))
25296 return 0;
25298 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
25299 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
25301 switch (rcode)
25303 /* Swap operands if we can, and fall back to doing the operation as
25304 specified, and doing a NOR to invert the test. */
25305 case NE:
25306 case UNLE:
25307 case UNLT:
25308 case UNGE:
25309 case UNGT:
25310 /* Invert condition and try again.
25311 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
25312 invert_move = true;
25313 rcode = reverse_condition_maybe_unordered (rcode);
25314 if (rcode == UNKNOWN)
25315 return 0;
25316 break;
25318 case GE:
25319 case LE:
25320 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
25322 /* Invert condition to avoid compound test. */
25323 invert_move = true;
25324 rcode = reverse_condition (rcode);
25326 break;
25328 case GTU:
25329 case GEU:
25330 case LTU:
25331 case LEU:
25332 /* Mark unsigned tests with CCUNSmode. */
25333 cc_mode = CCUNSmode;
25335 /* Invert condition to avoid compound test if necessary. */
25336 if (rcode == GEU || rcode == LEU)
25338 invert_move = true;
25339 rcode = reverse_condition (rcode);
25341 break;
25343 default:
25344 break;
25347 /* Get the vector mask for the given relational operations. */
25348 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
25350 if (!mask)
25351 return 0;
25353 if (invert_move)
25354 std::swap (op_true, op_false);
25356 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
25357 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
25358 && (GET_CODE (op_true) == CONST_VECTOR
25359 || GET_CODE (op_false) == CONST_VECTOR))
25361 rtx constant_0 = CONST0_RTX (dest_mode);
25362 rtx constant_m1 = CONSTM1_RTX (dest_mode);
25364 if (op_true == constant_m1 && op_false == constant_0)
25366 emit_move_insn (dest, mask);
25367 return 1;
25370 else if (op_true == constant_0 && op_false == constant_m1)
25372 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
25373 return 1;
25376 /* If we can't use the vector comparison directly, perhaps we can use
25377 the mask for the true or false fields, instead of loading up a
25378 constant. */
25379 if (op_true == constant_m1)
25380 op_true = mask;
25382 if (op_false == constant_0)
25383 op_false = mask;
25386 if (!REG_P (op_true) && !SUBREG_P (op_true))
25387 op_true = force_reg (dest_mode, op_true);
25389 if (!REG_P (op_false) && !SUBREG_P (op_false))
25390 op_false = force_reg (dest_mode, op_false);
25392 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
25393 CONST0_RTX (dest_mode));
25394 emit_insn (gen_rtx_SET (dest,
25395 gen_rtx_IF_THEN_ELSE (dest_mode,
25396 cond2,
25397 op_true,
25398 op_false)));
25399 return 1;
25402 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
25403 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
25404 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
25405 hardware has no such operation. */
25407 static int
25408 rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond)
25410 enum rtx_code code = GET_CODE (op);
25411 rtx op0 = XEXP (op, 0);
25412 rtx op1 = XEXP (op, 1);
25413 machine_mode compare_mode = GET_MODE (op0);
25414 machine_mode result_mode = GET_MODE (dest);
25415 bool max_p = false;
25417 if (result_mode != compare_mode)
25418 return 0;
25420 if (code == GE || code == GT)
25421 max_p = true;
25422 else if (code == LE || code == LT)
25423 max_p = false;
25424 else
25425 return 0;
25427 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
25430 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond))
25431 max_p = !max_p;
25433 else
25434 return 0;
25436 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
25437 return 1;
25440 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
25441 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
25442 operands of the last comparison is nonzero/true, FALSE_COND if it is
25443 zero/false. Return 0 if the hardware has no such operation. */
25445 static int
25446 rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
25448 enum rtx_code code = GET_CODE (op);
25449 rtx op0 = XEXP (op, 0);
25450 rtx op1 = XEXP (op, 1);
25451 machine_mode result_mode = GET_MODE (dest);
25452 rtx compare_rtx;
25453 rtx cmove_rtx;
25454 rtx clobber_rtx;
25456 if (!can_create_pseudo_p ())
25457 return 0;
25459 switch (code)
25461 case EQ:
25462 case GE:
25463 case GT:
25464 break;
25466 case NE:
25467 case LT:
25468 case LE:
25469 code = swap_condition (code);
25470 std::swap (op0, op1);
25471 break;
25473 default:
25474 return 0;
25477 /* Generate: [(parallel [(set (dest)
25478 (if_then_else (op (cmp1) (cmp2))
25479 (true)
25480 (false)))
25481 (clobber (scratch))])]. */
25483 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
25484 cmove_rtx = gen_rtx_SET (dest,
25485 gen_rtx_IF_THEN_ELSE (result_mode,
25486 compare_rtx,
25487 true_cond,
25488 false_cond));
25490 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
25491 emit_insn (gen_rtx_PARALLEL (VOIDmode,
25492 gen_rtvec (2, cmove_rtx, clobber_rtx)));
25494 return 1;
25497 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
25498 operands of the last comparison is nonzero/true, FALSE_COND if it
25499 is zero/false. Return 0 if the hardware has no such operation. */
25502 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
25504 enum rtx_code code = GET_CODE (op);
25505 rtx op0 = XEXP (op, 0);
25506 rtx op1 = XEXP (op, 1);
25507 machine_mode compare_mode = GET_MODE (op0);
25508 machine_mode result_mode = GET_MODE (dest);
25509 rtx temp;
25510 bool is_against_zero;
25512 /* These modes should always match. */
25513 if (GET_MODE (op1) != compare_mode
25514 /* In the isel case however, we can use a compare immediate, so
25515 op1 may be a small constant. */
25516 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
25517 return 0;
25518 if (GET_MODE (true_cond) != result_mode)
25519 return 0;
25520 if (GET_MODE (false_cond) != result_mode)
25521 return 0;
25523 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
25524 if (TARGET_P9_MINMAX
25525 && (compare_mode == SFmode || compare_mode == DFmode)
25526 && (result_mode == SFmode || result_mode == DFmode))
25528 if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond))
25529 return 1;
25531 if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond))
25532 return 1;
25535 /* Don't allow using floating point comparisons for integer results for
25536 now. */
25537 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
25538 return 0;
25540 /* First, work out if the hardware can do this at all, or
25541 if it's too slow.... */
25542 if (!FLOAT_MODE_P (compare_mode))
25544 if (TARGET_ISEL)
25545 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
25546 return 0;
25548 else if (TARGET_HARD_FLOAT && !TARGET_FPRS
25549 && SCALAR_FLOAT_MODE_P (compare_mode))
25550 return 0;
25552 is_against_zero = op1 == CONST0_RTX (compare_mode);
25554 /* A floating-point subtract might overflow, underflow, or produce
25555 an inexact result, thus changing the floating-point flags, so it
25556 can't be generated if we care about that. It's safe if one side
25557 of the construct is zero, since then no subtract will be
25558 generated. */
25559 if (SCALAR_FLOAT_MODE_P (compare_mode)
25560 && flag_trapping_math && ! is_against_zero)
25561 return 0;
25563 /* Eliminate half of the comparisons by switching operands, this
25564 makes the remaining code simpler. */
25565 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
25566 || code == LTGT || code == LT || code == UNLE)
25568 code = reverse_condition_maybe_unordered (code);
25569 temp = true_cond;
25570 true_cond = false_cond;
25571 false_cond = temp;
25574 /* UNEQ and LTGT take four instructions for a comparison with zero,
25575 it'll probably be faster to use a branch here too. */
25576 if (code == UNEQ && HONOR_NANS (compare_mode))
25577 return 0;
25579 /* We're going to try to implement comparisons by performing
25580 a subtract, then comparing against zero. Unfortunately,
25581 Inf - Inf is NaN which is not zero, and so if we don't
25582 know that the operand is finite and the comparison
25583 would treat EQ different to UNORDERED, we can't do it. */
25584 if (HONOR_INFINITIES (compare_mode)
25585 && code != GT && code != UNGE
25586 && (GET_CODE (op1) != CONST_DOUBLE
25587 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
25588 /* Constructs of the form (a OP b ? a : b) are safe. */
25589 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
25590 || (! rtx_equal_p (op0, true_cond)
25591 && ! rtx_equal_p (op1, true_cond))))
25592 return 0;
25594 /* At this point we know we can use fsel. */
25596 /* Reduce the comparison to a comparison against zero. */
25597 if (! is_against_zero)
25599 temp = gen_reg_rtx (compare_mode);
25600 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
25601 op0 = temp;
25602 op1 = CONST0_RTX (compare_mode);
25605 /* If we don't care about NaNs we can reduce some of the comparisons
25606 down to faster ones. */
25607 if (! HONOR_NANS (compare_mode))
25608 switch (code)
25610 case GT:
25611 code = LE;
25612 temp = true_cond;
25613 true_cond = false_cond;
25614 false_cond = temp;
25615 break;
25616 case UNGE:
25617 code = GE;
25618 break;
25619 case UNEQ:
25620 code = EQ;
25621 break;
25622 default:
25623 break;
25626 /* Now, reduce everything down to a GE. */
25627 switch (code)
25629 case GE:
25630 break;
25632 case LE:
25633 temp = gen_reg_rtx (compare_mode);
25634 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
25635 op0 = temp;
25636 break;
25638 case ORDERED:
25639 temp = gen_reg_rtx (compare_mode);
25640 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
25641 op0 = temp;
25642 break;
25644 case EQ:
25645 temp = gen_reg_rtx (compare_mode);
25646 emit_insn (gen_rtx_SET (temp,
25647 gen_rtx_NEG (compare_mode,
25648 gen_rtx_ABS (compare_mode, op0))));
25649 op0 = temp;
25650 break;
25652 case UNGE:
25653 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
25654 temp = gen_reg_rtx (result_mode);
25655 emit_insn (gen_rtx_SET (temp,
25656 gen_rtx_IF_THEN_ELSE (result_mode,
25657 gen_rtx_GE (VOIDmode,
25658 op0, op1),
25659 true_cond, false_cond)));
25660 false_cond = true_cond;
25661 true_cond = temp;
25663 temp = gen_reg_rtx (compare_mode);
25664 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
25665 op0 = temp;
25666 break;
25668 case GT:
25669 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
25670 temp = gen_reg_rtx (result_mode);
25671 emit_insn (gen_rtx_SET (temp,
25672 gen_rtx_IF_THEN_ELSE (result_mode,
25673 gen_rtx_GE (VOIDmode,
25674 op0, op1),
25675 true_cond, false_cond)));
25676 true_cond = false_cond;
25677 false_cond = temp;
25679 temp = gen_reg_rtx (compare_mode);
25680 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
25681 op0 = temp;
25682 break;
25684 default:
25685 gcc_unreachable ();
25688 emit_insn (gen_rtx_SET (dest,
25689 gen_rtx_IF_THEN_ELSE (result_mode,
25690 gen_rtx_GE (VOIDmode,
25691 op0, op1),
25692 true_cond, false_cond)));
25693 return 1;
25696 /* Same as above, but for ints (isel). */
25698 static int
25699 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
25701 rtx condition_rtx, cr;
25702 machine_mode mode = GET_MODE (dest);
25703 enum rtx_code cond_code;
25704 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
25705 bool signedp;
25707 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
25708 return 0;
25710 /* We still have to do the compare, because isel doesn't do a
25711 compare, it just looks at the CRx bits set by a previous compare
25712 instruction. */
25713 condition_rtx = rs6000_generate_compare (op, mode);
25714 cond_code = GET_CODE (condition_rtx);
25715 cr = XEXP (condition_rtx, 0);
25716 signedp = GET_MODE (cr) == CCmode;
25718 isel_func = (mode == SImode
25719 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
25720 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
25722 switch (cond_code)
25724 case LT: case GT: case LTU: case GTU: case EQ:
25725 /* isel handles these directly. */
25726 break;
25728 default:
25729 /* We need to swap the sense of the comparison. */
25731 std::swap (false_cond, true_cond);
25732 PUT_CODE (condition_rtx, reverse_condition (cond_code));
25734 break;
25737 false_cond = force_reg (mode, false_cond);
25738 if (true_cond != const0_rtx)
25739 true_cond = force_reg (mode, true_cond);
25741 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
25743 return 1;
25746 const char *
25747 output_isel (rtx *operands)
25749 enum rtx_code code;
25751 code = GET_CODE (operands[1]);
25753 if (code == GE || code == GEU || code == LE || code == LEU || code == NE)
25755 gcc_assert (GET_CODE (operands[2]) == REG
25756 && GET_CODE (operands[3]) == REG);
25757 PUT_CODE (operands[1], reverse_condition (code));
25758 return "isel %0,%3,%2,%j1";
25761 return "isel %0,%2,%3,%j1";
25764 void
25765 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
25767 machine_mode mode = GET_MODE (op0);
25768 enum rtx_code c;
25769 rtx target;
25771 /* VSX/altivec have direct min/max insns. */
25772 if ((code == SMAX || code == SMIN)
25773 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
25774 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
25776 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
25777 return;
25780 if (code == SMAX || code == SMIN)
25781 c = GE;
25782 else
25783 c = GEU;
25785 if (code == SMAX || code == UMAX)
25786 target = emit_conditional_move (dest, c, op0, op1, mode,
25787 op0, op1, mode, 0);
25788 else
25789 target = emit_conditional_move (dest, c, op0, op1, mode,
25790 op1, op0, mode, 0);
25791 gcc_assert (target);
25792 if (target != dest)
25793 emit_move_insn (dest, target);
25796 /* Split a signbit operation on 64-bit machines with direct move. Also allow
25797 for the value to come from memory or if it is already loaded into a GPR. */
25799 void
25800 rs6000_split_signbit (rtx dest, rtx src)
25802 machine_mode d_mode = GET_MODE (dest);
25803 machine_mode s_mode = GET_MODE (src);
25804 rtx dest_di = (d_mode == DImode) ? dest : gen_lowpart (DImode, dest);
25805 rtx shift_reg = dest_di;
25807 gcc_assert (FLOAT128_IEEE_P (s_mode) && TARGET_POWERPC64);
25809 if (MEM_P (src))
25811 rtx mem = (WORDS_BIG_ENDIAN
25812 ? adjust_address (src, DImode, 0)
25813 : adjust_address (src, DImode, 8));
25814 emit_insn (gen_rtx_SET (dest_di, mem));
25817 else
25819 unsigned int r = reg_or_subregno (src);
25821 if (INT_REGNO_P (r))
25822 shift_reg = gen_rtx_REG (DImode, r + (BYTES_BIG_ENDIAN == 0));
25824 else
25826 /* Generate the special mfvsrd instruction to get it in a GPR. */
25827 gcc_assert (VSX_REGNO_P (r));
25828 if (s_mode == KFmode)
25829 emit_insn (gen_signbitkf2_dm2 (dest_di, src));
25830 else
25831 emit_insn (gen_signbittf2_dm2 (dest_di, src));
25835 emit_insn (gen_lshrdi3 (dest_di, shift_reg, GEN_INT (63)));
25836 return;
25839 /* A subroutine of the atomic operation splitters. Jump to LABEL if
25840 COND is true. Mark the jump as unlikely to be taken. */
25842 static void
25843 emit_unlikely_jump (rtx cond, rtx label)
25845 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
25846 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
25847 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
25848 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
25851 /* A subroutine of the atomic operation splitters. Emit a load-locked
25852 instruction in MODE. For QI/HImode, possibly use a pattern than includes
25853 the zero_extend operation. */
25855 static void
25856 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
25858 rtx (*fn) (rtx, rtx) = NULL;
25860 switch (mode)
25862 case QImode:
25863 fn = gen_load_lockedqi;
25864 break;
25865 case HImode:
25866 fn = gen_load_lockedhi;
25867 break;
25868 case SImode:
25869 if (GET_MODE (mem) == QImode)
25870 fn = gen_load_lockedqi_si;
25871 else if (GET_MODE (mem) == HImode)
25872 fn = gen_load_lockedhi_si;
25873 else
25874 fn = gen_load_lockedsi;
25875 break;
25876 case DImode:
25877 fn = gen_load_lockeddi;
25878 break;
25879 case TImode:
25880 fn = gen_load_lockedti;
25881 break;
25882 default:
25883 gcc_unreachable ();
25885 emit_insn (fn (reg, mem));
25888 /* A subroutine of the atomic operation splitters. Emit a store-conditional
25889 instruction in MODE. */
25891 static void
25892 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
25894 rtx (*fn) (rtx, rtx, rtx) = NULL;
25896 switch (mode)
25898 case QImode:
25899 fn = gen_store_conditionalqi;
25900 break;
25901 case HImode:
25902 fn = gen_store_conditionalhi;
25903 break;
25904 case SImode:
25905 fn = gen_store_conditionalsi;
25906 break;
25907 case DImode:
25908 fn = gen_store_conditionaldi;
25909 break;
25910 case TImode:
25911 fn = gen_store_conditionalti;
25912 break;
25913 default:
25914 gcc_unreachable ();
25917 /* Emit sync before stwcx. to address PPC405 Erratum. */
25918 if (PPC405_ERRATUM77)
25919 emit_insn (gen_hwsync ());
25921 emit_insn (fn (res, mem, val));
25924 /* Expand barriers before and after a load_locked/store_cond sequence. */
25926 static rtx
25927 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
25929 rtx addr = XEXP (mem, 0);
25930 int strict_p = (reload_in_progress || reload_completed);
25932 if (!legitimate_indirect_address_p (addr, strict_p)
25933 && !legitimate_indexed_address_p (addr, strict_p))
25935 addr = force_reg (Pmode, addr);
25936 mem = replace_equiv_address_nv (mem, addr);
25939 switch (model)
25941 case MEMMODEL_RELAXED:
25942 case MEMMODEL_CONSUME:
25943 case MEMMODEL_ACQUIRE:
25944 break;
25945 case MEMMODEL_RELEASE:
25946 case MEMMODEL_ACQ_REL:
25947 emit_insn (gen_lwsync ());
25948 break;
25949 case MEMMODEL_SEQ_CST:
25950 emit_insn (gen_hwsync ());
25951 break;
25952 default:
25953 gcc_unreachable ();
25955 return mem;
25958 static void
25959 rs6000_post_atomic_barrier (enum memmodel model)
25961 switch (model)
25963 case MEMMODEL_RELAXED:
25964 case MEMMODEL_CONSUME:
25965 case MEMMODEL_RELEASE:
25966 break;
25967 case MEMMODEL_ACQUIRE:
25968 case MEMMODEL_ACQ_REL:
25969 case MEMMODEL_SEQ_CST:
25970 emit_insn (gen_isync ());
25971 break;
25972 default:
25973 gcc_unreachable ();
25977 /* A subroutine of the various atomic expanders. For sub-word operations,
25978 we must adjust things to operate on SImode. Given the original MEM,
25979 return a new aligned memory. Also build and return the quantities by
25980 which to shift and mask. */
25982 static rtx
25983 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
25985 rtx addr, align, shift, mask, mem;
25986 HOST_WIDE_INT shift_mask;
25987 machine_mode mode = GET_MODE (orig_mem);
25989 /* For smaller modes, we have to implement this via SImode. */
25990 shift_mask = (mode == QImode ? 0x18 : 0x10);
25992 addr = XEXP (orig_mem, 0);
25993 addr = force_reg (GET_MODE (addr), addr);
25995 /* Aligned memory containing subword. Generate a new memory. We
25996 do not want any of the existing MEM_ATTR data, as we're now
25997 accessing memory outside the original object. */
25998 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
25999 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26000 mem = gen_rtx_MEM (SImode, align);
26001 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
26002 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
26003 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
26005 /* Shift amount for subword relative to aligned word. */
26006 shift = gen_reg_rtx (SImode);
26007 addr = gen_lowpart (SImode, addr);
26008 rtx tmp = gen_reg_rtx (SImode);
26009 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
26010 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
26011 if (BYTES_BIG_ENDIAN)
26012 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
26013 shift, 1, OPTAB_LIB_WIDEN);
26014 *pshift = shift;
26016 /* Mask for insertion. */
26017 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
26018 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
26019 *pmask = mask;
26021 return mem;
26024 /* A subroutine of the various atomic expanders. For sub-word operands,
26025 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
26027 static rtx
26028 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
26030 rtx x;
26032 x = gen_reg_rtx (SImode);
26033 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
26034 gen_rtx_NOT (SImode, mask),
26035 oldval)));
26037 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
26039 return x;
26042 /* A subroutine of the various atomic expanders. For sub-word operands,
26043 extract WIDE to NARROW via SHIFT. */
26045 static void
26046 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
26048 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
26049 wide, 1, OPTAB_LIB_WIDEN);
26050 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
26053 /* Expand an atomic compare and swap operation. */
26055 void
26056 rs6000_expand_atomic_compare_and_swap (rtx operands[])
26058 rtx boolval, retval, mem, oldval, newval, cond;
26059 rtx label1, label2, x, mask, shift;
26060 machine_mode mode, orig_mode;
26061 enum memmodel mod_s, mod_f;
26062 bool is_weak;
26064 boolval = operands[0];
26065 retval = operands[1];
26066 mem = operands[2];
26067 oldval = operands[3];
26068 newval = operands[4];
26069 is_weak = (INTVAL (operands[5]) != 0);
26070 mod_s = memmodel_base (INTVAL (operands[6]));
26071 mod_f = memmodel_base (INTVAL (operands[7]));
26072 orig_mode = mode = GET_MODE (mem);
26074 mask = shift = NULL_RTX;
26075 if (mode == QImode || mode == HImode)
26077 /* Before power8, we didn't have access to lbarx/lharx, so generate a
26078 lwarx and shift/mask operations. With power8, we need to do the
26079 comparison in SImode, but the store is still done in QI/HImode. */
26080 oldval = convert_modes (SImode, mode, oldval, 1);
26082 if (!TARGET_SYNC_HI_QI)
26084 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
26086 /* Shift and mask OLDVAL into position with the word. */
26087 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
26088 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26090 /* Shift and mask NEWVAL into position within the word. */
26091 newval = convert_modes (SImode, mode, newval, 1);
26092 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
26093 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26096 /* Prepare to adjust the return value. */
26097 retval = gen_reg_rtx (SImode);
26098 mode = SImode;
26100 else if (reg_overlap_mentioned_p (retval, oldval))
26101 oldval = copy_to_reg (oldval);
26103 if (mode != TImode && !reg_or_short_operand (oldval, mode))
26104 oldval = copy_to_mode_reg (mode, oldval);
26106 if (reg_overlap_mentioned_p (retval, newval))
26107 newval = copy_to_reg (newval);
26109 mem = rs6000_pre_atomic_barrier (mem, mod_s);
26111 label1 = NULL_RTX;
26112 if (!is_weak)
26114 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
26115 emit_label (XEXP (label1, 0));
26117 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
26119 emit_load_locked (mode, retval, mem);
26121 x = retval;
26122 if (mask)
26123 x = expand_simple_binop (SImode, AND, retval, mask,
26124 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26126 cond = gen_reg_rtx (CCmode);
26127 /* If we have TImode, synthesize a comparison. */
26128 if (mode != TImode)
26129 x = gen_rtx_COMPARE (CCmode, x, oldval);
26130 else
26132 rtx xor1_result = gen_reg_rtx (DImode);
26133 rtx xor2_result = gen_reg_rtx (DImode);
26134 rtx or_result = gen_reg_rtx (DImode);
26135 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
26136 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
26137 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
26138 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
26140 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
26141 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
26142 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
26143 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
26146 emit_insn (gen_rtx_SET (cond, x));
26148 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
26149 emit_unlikely_jump (x, label2);
26151 x = newval;
26152 if (mask)
26153 x = rs6000_mask_atomic_subword (retval, newval, mask);
26155 emit_store_conditional (orig_mode, cond, mem, x);
26157 if (!is_weak)
26159 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
26160 emit_unlikely_jump (x, label1);
26163 if (!is_mm_relaxed (mod_f))
26164 emit_label (XEXP (label2, 0));
26166 rs6000_post_atomic_barrier (mod_s);
26168 if (is_mm_relaxed (mod_f))
26169 emit_label (XEXP (label2, 0));
26171 if (shift)
26172 rs6000_finish_atomic_subword (operands[1], retval, shift);
26173 else if (mode != GET_MODE (operands[1]))
26174 convert_move (operands[1], retval, 1);
26176 /* In all cases, CR0 contains EQ on success, and NE on failure. */
26177 x = gen_rtx_EQ (SImode, cond, const0_rtx);
26178 emit_insn (gen_rtx_SET (boolval, x));
26181 /* Expand an atomic exchange operation. */
26183 void
26184 rs6000_expand_atomic_exchange (rtx operands[])
26186 rtx retval, mem, val, cond;
26187 machine_mode mode;
26188 enum memmodel model;
26189 rtx label, x, mask, shift;
26191 retval = operands[0];
26192 mem = operands[1];
26193 val = operands[2];
26194 model = memmodel_base (INTVAL (operands[3]));
26195 mode = GET_MODE (mem);
26197 mask = shift = NULL_RTX;
26198 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
26200 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
26202 /* Shift and mask VAL into position with the word. */
26203 val = convert_modes (SImode, mode, val, 1);
26204 val = expand_simple_binop (SImode, ASHIFT, val, shift,
26205 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26207 /* Prepare to adjust the return value. */
26208 retval = gen_reg_rtx (SImode);
26209 mode = SImode;
26212 mem = rs6000_pre_atomic_barrier (mem, model);
26214 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
26215 emit_label (XEXP (label, 0));
26217 emit_load_locked (mode, retval, mem);
26219 x = val;
26220 if (mask)
26221 x = rs6000_mask_atomic_subword (retval, val, mask);
26223 cond = gen_reg_rtx (CCmode);
26224 emit_store_conditional (mode, cond, mem, x);
26226 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
26227 emit_unlikely_jump (x, label);
26229 rs6000_post_atomic_barrier (model);
26231 if (shift)
26232 rs6000_finish_atomic_subword (operands[0], retval, shift);
26235 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
26236 to perform. MEM is the memory on which to operate. VAL is the second
26237 operand of the binary operator. BEFORE and AFTER are optional locations to
26238 return the value of MEM either before of after the operation. MODEL_RTX
26239 is a CONST_INT containing the memory model to use. */
26241 void
26242 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
26243 rtx orig_before, rtx orig_after, rtx model_rtx)
26245 enum memmodel model = memmodel_base (INTVAL (model_rtx));
26246 machine_mode mode = GET_MODE (mem);
26247 machine_mode store_mode = mode;
26248 rtx label, x, cond, mask, shift;
26249 rtx before = orig_before, after = orig_after;
26251 mask = shift = NULL_RTX;
26252 /* On power8, we want to use SImode for the operation. On previous systems,
26253 use the operation in a subword and shift/mask to get the proper byte or
26254 halfword. */
26255 if (mode == QImode || mode == HImode)
26257 if (TARGET_SYNC_HI_QI)
26259 val = convert_modes (SImode, mode, val, 1);
26261 /* Prepare to adjust the return value. */
26262 before = gen_reg_rtx (SImode);
26263 if (after)
26264 after = gen_reg_rtx (SImode);
26265 mode = SImode;
26267 else
26269 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
26271 /* Shift and mask VAL into position with the word. */
26272 val = convert_modes (SImode, mode, val, 1);
26273 val = expand_simple_binop (SImode, ASHIFT, val, shift,
26274 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26276 switch (code)
26278 case IOR:
26279 case XOR:
26280 /* We've already zero-extended VAL. That is sufficient to
26281 make certain that it does not affect other bits. */
26282 mask = NULL;
26283 break;
26285 case AND:
26286 /* If we make certain that all of the other bits in VAL are
26287 set, that will be sufficient to not affect other bits. */
26288 x = gen_rtx_NOT (SImode, mask);
26289 x = gen_rtx_IOR (SImode, x, val);
26290 emit_insn (gen_rtx_SET (val, x));
26291 mask = NULL;
26292 break;
26294 case NOT:
26295 case PLUS:
26296 case MINUS:
26297 /* These will all affect bits outside the field and need
26298 adjustment via MASK within the loop. */
26299 break;
26301 default:
26302 gcc_unreachable ();
26305 /* Prepare to adjust the return value. */
26306 before = gen_reg_rtx (SImode);
26307 if (after)
26308 after = gen_reg_rtx (SImode);
26309 store_mode = mode = SImode;
26313 mem = rs6000_pre_atomic_barrier (mem, model);
26315 label = gen_label_rtx ();
26316 emit_label (label);
26317 label = gen_rtx_LABEL_REF (VOIDmode, label);
26319 if (before == NULL_RTX)
26320 before = gen_reg_rtx (mode);
26322 emit_load_locked (mode, before, mem);
26324 if (code == NOT)
26326 x = expand_simple_binop (mode, AND, before, val,
26327 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26328 after = expand_simple_unop (mode, NOT, x, after, 1);
26330 else
26332 after = expand_simple_binop (mode, code, before, val,
26333 after, 1, OPTAB_LIB_WIDEN);
26336 x = after;
26337 if (mask)
26339 x = expand_simple_binop (SImode, AND, after, mask,
26340 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26341 x = rs6000_mask_atomic_subword (before, x, mask);
26343 else if (store_mode != mode)
26344 x = convert_modes (store_mode, mode, x, 1);
26346 cond = gen_reg_rtx (CCmode);
26347 emit_store_conditional (store_mode, cond, mem, x);
26349 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
26350 emit_unlikely_jump (x, label);
26352 rs6000_post_atomic_barrier (model);
26354 if (shift)
26356 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
26357 then do the calcuations in a SImode register. */
26358 if (orig_before)
26359 rs6000_finish_atomic_subword (orig_before, before, shift);
26360 if (orig_after)
26361 rs6000_finish_atomic_subword (orig_after, after, shift);
26363 else if (store_mode != mode)
26365 /* QImode/HImode on machines with lbarx/lharx where we do the native
26366 operation and then do the calcuations in a SImode register. */
26367 if (orig_before)
26368 convert_move (orig_before, before, 1);
26369 if (orig_after)
26370 convert_move (orig_after, after, 1);
26372 else if (orig_after && after != orig_after)
26373 emit_move_insn (orig_after, after);
26376 /* Emit instructions to move SRC to DST. Called by splitters for
26377 multi-register moves. It will emit at most one instruction for
26378 each register that is accessed; that is, it won't emit li/lis pairs
26379 (or equivalent for 64-bit code). One of SRC or DST must be a hard
26380 register. */
26382 void
26383 rs6000_split_multireg_move (rtx dst, rtx src)
26385 /* The register number of the first register being moved. */
26386 int reg;
26387 /* The mode that is to be moved. */
26388 machine_mode mode;
26389 /* The mode that the move is being done in, and its size. */
26390 machine_mode reg_mode;
26391 int reg_mode_size;
26392 /* The number of registers that will be moved. */
26393 int nregs;
26395 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
26396 mode = GET_MODE (dst);
26397 nregs = hard_regno_nregs[reg][mode];
26398 if (FP_REGNO_P (reg))
26399 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
26400 ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? DFmode : SFmode);
26401 else if (ALTIVEC_REGNO_P (reg))
26402 reg_mode = V16QImode;
26403 else if (TARGET_E500_DOUBLE && FLOAT128_2REG_P (mode))
26404 reg_mode = DFmode;
26405 else
26406 reg_mode = word_mode;
26407 reg_mode_size = GET_MODE_SIZE (reg_mode);
26409 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
26411 /* TDmode residing in FP registers is special, since the ISA requires that
26412 the lower-numbered word of a register pair is always the most significant
26413 word, even in little-endian mode. This does not match the usual subreg
26414 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
26415 the appropriate constituent registers "by hand" in little-endian mode.
26417 Note we do not need to check for destructive overlap here since TDmode
26418 can only reside in even/odd register pairs. */
26419 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
26421 rtx p_src, p_dst;
26422 int i;
26424 for (i = 0; i < nregs; i++)
26426 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
26427 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
26428 else
26429 p_src = simplify_gen_subreg (reg_mode, src, mode,
26430 i * reg_mode_size);
26432 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
26433 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
26434 else
26435 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
26436 i * reg_mode_size);
26438 emit_insn (gen_rtx_SET (p_dst, p_src));
26441 return;
26444 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
26446 /* Move register range backwards, if we might have destructive
26447 overlap. */
26448 int i;
26449 for (i = nregs - 1; i >= 0; i--)
26450 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
26451 i * reg_mode_size),
26452 simplify_gen_subreg (reg_mode, src, mode,
26453 i * reg_mode_size)));
26455 else
26457 int i;
26458 int j = -1;
26459 bool used_update = false;
26460 rtx restore_basereg = NULL_RTX;
26462 if (MEM_P (src) && INT_REGNO_P (reg))
26464 rtx breg;
26466 if (GET_CODE (XEXP (src, 0)) == PRE_INC
26467 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
26469 rtx delta_rtx;
26470 breg = XEXP (XEXP (src, 0), 0);
26471 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
26472 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
26473 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
26474 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
26475 src = replace_equiv_address (src, breg);
26477 else if (! rs6000_offsettable_memref_p (src, reg_mode))
26479 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
26481 rtx basereg = XEXP (XEXP (src, 0), 0);
26482 if (TARGET_UPDATE)
26484 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
26485 emit_insn (gen_rtx_SET (ndst,
26486 gen_rtx_MEM (reg_mode,
26487 XEXP (src, 0))));
26488 used_update = true;
26490 else
26491 emit_insn (gen_rtx_SET (basereg,
26492 XEXP (XEXP (src, 0), 1)));
26493 src = replace_equiv_address (src, basereg);
26495 else
26497 rtx basereg = gen_rtx_REG (Pmode, reg);
26498 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
26499 src = replace_equiv_address (src, basereg);
26503 breg = XEXP (src, 0);
26504 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
26505 breg = XEXP (breg, 0);
26507 /* If the base register we are using to address memory is
26508 also a destination reg, then change that register last. */
26509 if (REG_P (breg)
26510 && REGNO (breg) >= REGNO (dst)
26511 && REGNO (breg) < REGNO (dst) + nregs)
26512 j = REGNO (breg) - REGNO (dst);
26514 else if (MEM_P (dst) && INT_REGNO_P (reg))
26516 rtx breg;
26518 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
26519 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
26521 rtx delta_rtx;
26522 breg = XEXP (XEXP (dst, 0), 0);
26523 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
26524 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
26525 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
26527 /* We have to update the breg before doing the store.
26528 Use store with update, if available. */
26530 if (TARGET_UPDATE)
26532 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
26533 emit_insn (TARGET_32BIT
26534 ? (TARGET_POWERPC64
26535 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
26536 : gen_movsi_update (breg, breg, delta_rtx, nsrc))
26537 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
26538 used_update = true;
26540 else
26541 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
26542 dst = replace_equiv_address (dst, breg);
26544 else if (!rs6000_offsettable_memref_p (dst, reg_mode)
26545 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
26547 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
26549 rtx basereg = XEXP (XEXP (dst, 0), 0);
26550 if (TARGET_UPDATE)
26552 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
26553 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
26554 XEXP (dst, 0)),
26555 nsrc));
26556 used_update = true;
26558 else
26559 emit_insn (gen_rtx_SET (basereg,
26560 XEXP (XEXP (dst, 0), 1)));
26561 dst = replace_equiv_address (dst, basereg);
26563 else
26565 rtx basereg = XEXP (XEXP (dst, 0), 0);
26566 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
26567 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
26568 && REG_P (basereg)
26569 && REG_P (offsetreg)
26570 && REGNO (basereg) != REGNO (offsetreg));
26571 if (REGNO (basereg) == 0)
26573 rtx tmp = offsetreg;
26574 offsetreg = basereg;
26575 basereg = tmp;
26577 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
26578 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
26579 dst = replace_equiv_address (dst, basereg);
26582 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
26583 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode));
26586 for (i = 0; i < nregs; i++)
26588 /* Calculate index to next subword. */
26589 ++j;
26590 if (j == nregs)
26591 j = 0;
26593 /* If compiler already emitted move of first word by
26594 store with update, no need to do anything. */
26595 if (j == 0 && used_update)
26596 continue;
26598 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
26599 j * reg_mode_size),
26600 simplify_gen_subreg (reg_mode, src, mode,
26601 j * reg_mode_size)));
26603 if (restore_basereg != NULL_RTX)
26604 emit_insn (restore_basereg);
26609 /* This page contains routines that are used to determine what the
26610 function prologue and epilogue code will do and write them out. */
26612 static inline bool
26613 save_reg_p (int r)
26615 return !call_used_regs[r] && df_regs_ever_live_p (r);
26618 /* Determine whether the gp REG is really used. */
26620 static bool
26621 rs6000_reg_live_or_pic_offset_p (int reg)
26623 /* We need to mark the PIC offset register live for the same conditions
26624 as it is set up, or otherwise it won't be saved before we clobber it. */
26626 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM && !TARGET_SINGLE_PIC_BASE)
26628 if (TARGET_TOC && TARGET_MINIMAL_TOC
26629 && (crtl->calls_eh_return
26630 || df_regs_ever_live_p (reg)
26631 || !constant_pool_empty_p ()))
26632 return true;
26634 if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
26635 && flag_pic)
26636 return true;
26639 /* If the function calls eh_return, claim used all the registers that would
26640 be checked for liveness otherwise. */
26642 return ((crtl->calls_eh_return || df_regs_ever_live_p (reg))
26643 && !call_used_regs[reg]);
26646 /* Return the first fixed-point register that is required to be
26647 saved. 32 if none. */
26650 first_reg_to_save (void)
26652 int first_reg;
26654 /* Find lowest numbered live register. */
26655 for (first_reg = 13; first_reg <= 31; first_reg++)
26656 if (save_reg_p (first_reg))
26657 break;
26659 if (first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM
26660 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
26661 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
26662 || (TARGET_TOC && TARGET_MINIMAL_TOC))
26663 && rs6000_reg_live_or_pic_offset_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
26664 first_reg = RS6000_PIC_OFFSET_TABLE_REGNUM;
26666 #if TARGET_MACHO
26667 if (flag_pic
26668 && crtl->uses_pic_offset_table
26669 && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM)
26670 return RS6000_PIC_OFFSET_TABLE_REGNUM;
26671 #endif
26673 return first_reg;
26676 /* Similar, for FP regs. */
26679 first_fp_reg_to_save (void)
26681 int first_reg;
26683 /* Find lowest numbered live register. */
26684 for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
26685 if (save_reg_p (first_reg))
26686 break;
26688 return first_reg;
26691 /* Similar, for AltiVec regs. */
26693 static int
26694 first_altivec_reg_to_save (void)
26696 int i;
26698 /* Stack frame remains as is unless we are in AltiVec ABI. */
26699 if (! TARGET_ALTIVEC_ABI)
26700 return LAST_ALTIVEC_REGNO + 1;
26702 /* On Darwin, the unwind routines are compiled without
26703 TARGET_ALTIVEC, and use save_world to save/restore the
26704 altivec registers when necessary. */
26705 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
26706 && ! TARGET_ALTIVEC)
26707 return FIRST_ALTIVEC_REGNO + 20;
26709 /* Find lowest numbered live register. */
26710 for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
26711 if (save_reg_p (i))
26712 break;
26714 return i;
26717 /* Return a 32-bit mask of the AltiVec registers we need to set in
26718 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
26719 the 32-bit word is 0. */
26721 static unsigned int
26722 compute_vrsave_mask (void)
26724 unsigned int i, mask = 0;
26726 /* On Darwin, the unwind routines are compiled without
26727 TARGET_ALTIVEC, and use save_world to save/restore the
26728 call-saved altivec registers when necessary. */
26729 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
26730 && ! TARGET_ALTIVEC)
26731 mask |= 0xFFF;
26733 /* First, find out if we use _any_ altivec registers. */
26734 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
26735 if (df_regs_ever_live_p (i))
26736 mask |= ALTIVEC_REG_BIT (i);
26738 if (mask == 0)
26739 return mask;
26741 /* Next, remove the argument registers from the set. These must
26742 be in the VRSAVE mask set by the caller, so we don't need to add
26743 them in again. More importantly, the mask we compute here is
26744 used to generate CLOBBERs in the set_vrsave insn, and we do not
26745 wish the argument registers to die. */
26746 for (i = ALTIVEC_ARG_MIN_REG; i < (unsigned) crtl->args.info.vregno; i++)
26747 mask &= ~ALTIVEC_REG_BIT (i);
26749 /* Similarly, remove the return value from the set. */
26751 bool yes = false;
26752 diddle_return_value (is_altivec_return_reg, &yes);
26753 if (yes)
26754 mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
26757 return mask;
26760 /* For a very restricted set of circumstances, we can cut down the
26761 size of prologues/epilogues by calling our own save/restore-the-world
26762 routines. */
26764 static void
26765 compute_save_world_info (rs6000_stack_t *info)
26767 info->world_save_p = 1;
26768 info->world_save_p
26769 = (WORLD_SAVE_P (info)
26770 && DEFAULT_ABI == ABI_DARWIN
26771 && !cfun->has_nonlocal_label
26772 && info->first_fp_reg_save == FIRST_SAVED_FP_REGNO
26773 && info->first_gp_reg_save == FIRST_SAVED_GP_REGNO
26774 && info->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
26775 && info->cr_save_p);
26777 /* This will not work in conjunction with sibcalls. Make sure there
26778 are none. (This check is expensive, but seldom executed.) */
26779 if (WORLD_SAVE_P (info))
26781 rtx_insn *insn;
26782 for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
26783 if (CALL_P (insn) && SIBLING_CALL_P (insn))
26785 info->world_save_p = 0;
26786 break;
26790 if (WORLD_SAVE_P (info))
26792 /* Even if we're not touching VRsave, make sure there's room on the
26793 stack for it, if it looks like we're calling SAVE_WORLD, which
26794 will attempt to save it. */
26795 info->vrsave_size = 4;
26797 /* If we are going to save the world, we need to save the link register too. */
26798 info->lr_save_p = 1;
26800 /* "Save" the VRsave register too if we're saving the world. */
26801 if (info->vrsave_mask == 0)
26802 info->vrsave_mask = compute_vrsave_mask ();
26804 /* Because the Darwin register save/restore routines only handle
26805 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
26806 check. */
26807 gcc_assert (info->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
26808 && (info->first_altivec_reg_save
26809 >= FIRST_SAVED_ALTIVEC_REGNO));
26812 return;
26816 static void
26817 is_altivec_return_reg (rtx reg, void *xyes)
26819 bool *yes = (bool *) xyes;
26820 if (REGNO (reg) == ALTIVEC_ARG_RETURN)
26821 *yes = true;
26825 /* Return whether REG is a global user reg or has been specifed by
26826 -ffixed-REG. We should not restore these, and so cannot use
26827 lmw or out-of-line restore functions if there are any. We also
26828 can't save them (well, emit frame notes for them), because frame
26829 unwinding during exception handling will restore saved registers. */
26831 static bool
26832 fixed_reg_p (int reg)
26834 /* Ignore fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] when the
26835 backend sets it, overriding anything the user might have given. */
26836 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
26837 && ((DEFAULT_ABI == ABI_V4 && flag_pic)
26838 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
26839 || (TARGET_TOC && TARGET_MINIMAL_TOC)))
26840 return false;
26842 return fixed_regs[reg];
26845 /* Determine the strategy for savings/restoring registers. */
26847 enum {
26848 SAVE_MULTIPLE = 0x1,
26849 SAVE_INLINE_GPRS = 0x2,
26850 SAVE_INLINE_FPRS = 0x4,
26851 SAVE_NOINLINE_GPRS_SAVES_LR = 0x8,
26852 SAVE_NOINLINE_FPRS_SAVES_LR = 0x10,
26853 SAVE_INLINE_VRS = 0x20,
26854 REST_MULTIPLE = 0x100,
26855 REST_INLINE_GPRS = 0x200,
26856 REST_INLINE_FPRS = 0x400,
26857 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x800,
26858 REST_INLINE_VRS = 0x1000
26861 static int
26862 rs6000_savres_strategy (rs6000_stack_t *info,
26863 bool using_static_chain_p)
26865 int strategy = 0;
26867 /* Select between in-line and out-of-line save and restore of regs.
26868 First, all the obvious cases where we don't use out-of-line. */
26869 if (crtl->calls_eh_return
26870 || cfun->machine->ra_need_lr)
26871 strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
26872 | SAVE_INLINE_GPRS | REST_INLINE_GPRS
26873 | SAVE_INLINE_VRS | REST_INLINE_VRS);
26875 if (info->first_gp_reg_save == 32)
26876 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
26878 if (info->first_fp_reg_save == 64
26879 /* The out-of-line FP routines use double-precision stores;
26880 we can't use those routines if we don't have such stores. */
26881 || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT))
26882 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
26884 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1)
26885 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
26887 /* Define cutoff for using out-of-line functions to save registers. */
26888 if (DEFAULT_ABI == ABI_V4 || TARGET_ELF)
26890 if (!optimize_size)
26892 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
26893 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
26894 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
26896 else
26898 /* Prefer out-of-line restore if it will exit. */
26899 if (info->first_fp_reg_save > 61)
26900 strategy |= SAVE_INLINE_FPRS;
26901 if (info->first_gp_reg_save > 29)
26903 if (info->first_fp_reg_save == 64)
26904 strategy |= SAVE_INLINE_GPRS;
26905 else
26906 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
26908 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO)
26909 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
26912 else if (DEFAULT_ABI == ABI_DARWIN)
26914 if (info->first_fp_reg_save > 60)
26915 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
26916 if (info->first_gp_reg_save > 29)
26917 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
26918 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
26920 else
26922 gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
26923 if ((flag_shrink_wrap_separate && optimize_function_for_speed_p (cfun))
26924 || info->first_fp_reg_save > 61)
26925 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
26926 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
26927 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
26930 /* Don't bother to try to save things out-of-line if r11 is occupied
26931 by the static chain. It would require too much fiddling and the
26932 static chain is rarely used anyway. FPRs are saved w.r.t the stack
26933 pointer on Darwin, and AIX uses r1 or r12. */
26934 if (using_static_chain_p
26935 && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
26936 strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
26937 | SAVE_INLINE_GPRS
26938 | SAVE_INLINE_VRS);
26940 /* Saving CR interferes with the exit routines used on the SPE, so
26941 just punt here. */
26942 if (TARGET_SPE_ABI
26943 && info->spe_64bit_regs_used
26944 && info->cr_save_p)
26945 strategy |= REST_INLINE_GPRS;
26947 /* We can only use the out-of-line routines to restore fprs if we've
26948 saved all the registers from first_fp_reg_save in the prologue.
26949 Otherwise, we risk loading garbage. Of course, if we have saved
26950 out-of-line then we know we haven't skipped any fprs. */
26951 if ((strategy & SAVE_INLINE_FPRS)
26952 && !(strategy & REST_INLINE_FPRS))
26954 int i;
26956 for (i = info->first_fp_reg_save; i < 64; i++)
26957 if (fixed_regs[i] || !save_reg_p (i))
26959 strategy |= REST_INLINE_FPRS;
26960 break;
26964 /* Similarly, for altivec regs. */
26965 if ((strategy & SAVE_INLINE_VRS)
26966 && !(strategy & REST_INLINE_VRS))
26968 int i;
26970 for (i = info->first_altivec_reg_save; i < LAST_ALTIVEC_REGNO + 1; i++)
26971 if (fixed_regs[i] || !save_reg_p (i))
26973 strategy |= REST_INLINE_VRS;
26974 break;
26978 /* info->lr_save_p isn't yet set if the only reason lr needs to be
26979 saved is an out-of-line save or restore. Set up the value for
26980 the next test (excluding out-of-line gprs). */
26981 bool lr_save_p = (info->lr_save_p
26982 || !(strategy & SAVE_INLINE_FPRS)
26983 || !(strategy & SAVE_INLINE_VRS)
26984 || !(strategy & REST_INLINE_FPRS)
26985 || !(strategy & REST_INLINE_VRS));
26987 if (TARGET_MULTIPLE
26988 && !TARGET_POWERPC64
26989 && !(TARGET_SPE_ABI && info->spe_64bit_regs_used)
26990 && info->first_gp_reg_save < 31
26991 && !(flag_shrink_wrap
26992 && flag_shrink_wrap_separate
26993 && optimize_function_for_speed_p (cfun)))
26995 /* Prefer store multiple for saves over out-of-line routines,
26996 since the store-multiple instruction will always be smaller. */
26997 strategy |= SAVE_INLINE_GPRS | SAVE_MULTIPLE;
26999 /* The situation is more complicated with load multiple. We'd
27000 prefer to use the out-of-line routines for restores, since the
27001 "exit" out-of-line routines can handle the restore of LR and the
27002 frame teardown. However if doesn't make sense to use the
27003 out-of-line routine if that is the only reason we'd need to save
27004 LR, and we can't use the "exit" out-of-line gpr restore if we
27005 have saved some fprs; In those cases it is advantageous to use
27006 load multiple when available. */
27007 if (info->first_fp_reg_save != 64 || !lr_save_p)
27008 strategy |= REST_INLINE_GPRS | REST_MULTIPLE;
27011 /* Using the "exit" out-of-line routine does not improve code size
27012 if using it would require lr to be saved and if only saving one
27013 or two gprs. */
27014 else if (!lr_save_p && info->first_gp_reg_save > 29)
27015 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
27017 /* We can only use load multiple or the out-of-line routines to
27018 restore gprs if we've saved all the registers from
27019 first_gp_reg_save. Otherwise, we risk loading garbage.
27020 Of course, if we have saved out-of-line or used stmw then we know
27021 we haven't skipped any gprs. */
27022 if ((strategy & (SAVE_INLINE_GPRS | SAVE_MULTIPLE)) == SAVE_INLINE_GPRS
27023 && (strategy & (REST_INLINE_GPRS | REST_MULTIPLE)) != REST_INLINE_GPRS)
27025 int i;
27027 for (i = info->first_gp_reg_save; i < 32; i++)
27028 if (fixed_reg_p (i) || !save_reg_p (i))
27030 strategy |= REST_INLINE_GPRS;
27031 strategy &= ~REST_MULTIPLE;
27032 break;
27036 if (TARGET_ELF && TARGET_64BIT)
27038 if (!(strategy & SAVE_INLINE_FPRS))
27039 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
27040 else if (!(strategy & SAVE_INLINE_GPRS)
27041 && info->first_fp_reg_save == 64)
27042 strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
27044 else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
27045 strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
27047 if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
27048 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
27050 return strategy;
27053 /* Calculate the stack information for the current function. This is
27054 complicated by having two separate calling sequences, the AIX calling
27055 sequence and the V.4 calling sequence.
27057 AIX (and Darwin/Mac OS X) stack frames look like:
27058 32-bit 64-bit
27059 SP----> +---------------------------------------+
27060 | back chain to caller | 0 0
27061 +---------------------------------------+
27062 | saved CR | 4 8 (8-11)
27063 +---------------------------------------+
27064 | saved LR | 8 16
27065 +---------------------------------------+
27066 | reserved for compilers | 12 24
27067 +---------------------------------------+
27068 | reserved for binders | 16 32
27069 +---------------------------------------+
27070 | saved TOC pointer | 20 40
27071 +---------------------------------------+
27072 | Parameter save area (+padding*) (P) | 24 48
27073 +---------------------------------------+
27074 | Alloca space (A) | 24+P etc.
27075 +---------------------------------------+
27076 | Local variable space (L) | 24+P+A
27077 +---------------------------------------+
27078 | Float/int conversion temporary (X) | 24+P+A+L
27079 +---------------------------------------+
27080 | Save area for AltiVec registers (W) | 24+P+A+L+X
27081 +---------------------------------------+
27082 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
27083 +---------------------------------------+
27084 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
27085 +---------------------------------------+
27086 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
27087 +---------------------------------------+
27088 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
27089 +---------------------------------------+
27090 old SP->| back chain to caller's caller |
27091 +---------------------------------------+
27093 * If the alloca area is present, the parameter save area is
27094 padded so that the former starts 16-byte aligned.
27096 The required alignment for AIX configurations is two words (i.e., 8
27097 or 16 bytes).
27099 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
27101 SP----> +---------------------------------------+
27102 | Back chain to caller | 0
27103 +---------------------------------------+
27104 | Save area for CR | 8
27105 +---------------------------------------+
27106 | Saved LR | 16
27107 +---------------------------------------+
27108 | Saved TOC pointer | 24
27109 +---------------------------------------+
27110 | Parameter save area (+padding*) (P) | 32
27111 +---------------------------------------+
27112 | Alloca space (A) | 32+P
27113 +---------------------------------------+
27114 | Local variable space (L) | 32+P+A
27115 +---------------------------------------+
27116 | Save area for AltiVec registers (W) | 32+P+A+L
27117 +---------------------------------------+
27118 | AltiVec alignment padding (Y) | 32+P+A+L+W
27119 +---------------------------------------+
27120 | Save area for GP registers (G) | 32+P+A+L+W+Y
27121 +---------------------------------------+
27122 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
27123 +---------------------------------------+
27124 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
27125 +---------------------------------------+
27127 * If the alloca area is present, the parameter save area is
27128 padded so that the former starts 16-byte aligned.
27130 V.4 stack frames look like:
27132 SP----> +---------------------------------------+
27133 | back chain to caller | 0
27134 +---------------------------------------+
27135 | caller's saved LR | 4
27136 +---------------------------------------+
27137 | Parameter save area (+padding*) (P) | 8
27138 +---------------------------------------+
27139 | Alloca space (A) | 8+P
27140 +---------------------------------------+
27141 | Varargs save area (V) | 8+P+A
27142 +---------------------------------------+
27143 | Local variable space (L) | 8+P+A+V
27144 +---------------------------------------+
27145 | Float/int conversion temporary (X) | 8+P+A+V+L
27146 +---------------------------------------+
27147 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
27148 +---------------------------------------+
27149 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
27150 +---------------------------------------+
27151 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
27152 +---------------------------------------+
27153 | SPE: area for 64-bit GP registers |
27154 +---------------------------------------+
27155 | SPE alignment padding |
27156 +---------------------------------------+
27157 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
27158 +---------------------------------------+
27159 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
27160 +---------------------------------------+
27161 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
27162 +---------------------------------------+
27163 old SP->| back chain to caller's caller |
27164 +---------------------------------------+
27166 * If the alloca area is present and the required alignment is
27167 16 bytes, the parameter save area is padded so that the
27168 alloca area starts 16-byte aligned.
27170 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
27171 given. (But note below and in sysv4.h that we require only 8 and
27172 may round up the size of our stack frame anyways. The historical
27173 reason is early versions of powerpc-linux which didn't properly
27174 align the stack at program startup. A happy side-effect is that
27175 -mno-eabi libraries can be used with -meabi programs.)
27177 The EABI configuration defaults to the V.4 layout. However,
27178 the stack alignment requirements may differ. If -mno-eabi is not
27179 given, the required stack alignment is 8 bytes; if -mno-eabi is
27180 given, the required alignment is 16 bytes. (But see V.4 comment
27181 above.) */
27183 #ifndef ABI_STACK_BOUNDARY
27184 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
27185 #endif
27187 static rs6000_stack_t *
27188 rs6000_stack_info (void)
27190 /* We should never be called for thunks, we are not set up for that. */
27191 gcc_assert (!cfun->is_thunk);
27193 rs6000_stack_t *info = &stack_info;
27194 int reg_size = TARGET_32BIT ? 4 : 8;
27195 int ehrd_size;
27196 int ehcr_size;
27197 int save_align;
27198 int first_gp;
27199 HOST_WIDE_INT non_fixed_size;
27200 bool using_static_chain_p;
27202 if (reload_completed && info->reload_completed)
27203 return info;
27205 memset (info, 0, sizeof (*info));
27206 info->reload_completed = reload_completed;
27208 if (TARGET_SPE)
27210 /* Cache value so we don't rescan instruction chain over and over. */
27211 if (cfun->machine->spe_insn_chain_scanned_p == 0)
27212 cfun->machine->spe_insn_chain_scanned_p
27213 = spe_func_has_64bit_regs_p () + 1;
27214 info->spe_64bit_regs_used = cfun->machine->spe_insn_chain_scanned_p - 1;
27217 /* Select which calling sequence. */
27218 info->abi = DEFAULT_ABI;
27220 /* Calculate which registers need to be saved & save area size. */
27221 info->first_gp_reg_save = first_reg_to_save ();
27222 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
27223 even if it currently looks like we won't. Reload may need it to
27224 get at a constant; if so, it will have already created a constant
27225 pool entry for it. */
27226 if (((TARGET_TOC && TARGET_MINIMAL_TOC)
27227 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
27228 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
27229 && crtl->uses_const_pool
27230 && info->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
27231 first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
27232 else
27233 first_gp = info->first_gp_reg_save;
27235 info->gp_size = reg_size * (32 - first_gp);
27237 /* For the SPE, we have an additional upper 32-bits on each GPR.
27238 Ideally we should save the entire 64-bits only when the upper
27239 half is used in SIMD instructions. Since we only record
27240 registers live (not the size they are used in), this proves
27241 difficult because we'd have to traverse the instruction chain at
27242 the right time, taking reload into account. This is a real pain,
27243 so we opt to save the GPRs in 64-bits always if but one register
27244 gets used in 64-bits. Otherwise, all the registers in the frame
27245 get saved in 32-bits.
27247 So... since when we save all GPRs (except the SP) in 64-bits, the
27248 traditional GP save area will be empty. */
27249 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
27250 info->gp_size = 0;
27252 info->first_fp_reg_save = first_fp_reg_to_save ();
27253 info->fp_size = 8 * (64 - info->first_fp_reg_save);
27255 info->first_altivec_reg_save = first_altivec_reg_to_save ();
27256 info->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
27257 - info->first_altivec_reg_save);
27259 /* Does this function call anything? */
27260 info->calls_p = (!crtl->is_leaf || cfun->machine->ra_needs_full_frame);
27262 /* Determine if we need to save the condition code registers. */
27263 if (save_reg_p (CR2_REGNO)
27264 || save_reg_p (CR3_REGNO)
27265 || save_reg_p (CR4_REGNO))
27267 info->cr_save_p = 1;
27268 if (DEFAULT_ABI == ABI_V4)
27269 info->cr_size = reg_size;
27272 /* If the current function calls __builtin_eh_return, then we need
27273 to allocate stack space for registers that will hold data for
27274 the exception handler. */
27275 if (crtl->calls_eh_return)
27277 unsigned int i;
27278 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
27279 continue;
27281 /* SPE saves EH registers in 64-bits. */
27282 ehrd_size = i * (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0
27283 ? UNITS_PER_SPE_WORD : UNITS_PER_WORD);
27285 else
27286 ehrd_size = 0;
27288 /* In the ELFv2 ABI, we also need to allocate space for separate
27289 CR field save areas if the function calls __builtin_eh_return. */
27290 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
27292 /* This hard-codes that we have three call-saved CR fields. */
27293 ehcr_size = 3 * reg_size;
27294 /* We do *not* use the regular CR save mechanism. */
27295 info->cr_save_p = 0;
27297 else
27298 ehcr_size = 0;
27300 /* Determine various sizes. */
27301 info->reg_size = reg_size;
27302 info->fixed_size = RS6000_SAVE_AREA;
27303 info->vars_size = RS6000_ALIGN (get_frame_size (), 8);
27304 if (cfun->calls_alloca)
27305 info->parm_size =
27306 RS6000_ALIGN (crtl->outgoing_args_size + info->fixed_size,
27307 STACK_BOUNDARY / BITS_PER_UNIT) - info->fixed_size;
27308 else
27309 info->parm_size = RS6000_ALIGN (crtl->outgoing_args_size,
27310 TARGET_ALTIVEC ? 16 : 8);
27311 if (FRAME_GROWS_DOWNWARD)
27312 info->vars_size
27313 += RS6000_ALIGN (info->fixed_size + info->vars_size + info->parm_size,
27314 ABI_STACK_BOUNDARY / BITS_PER_UNIT)
27315 - (info->fixed_size + info->vars_size + info->parm_size);
27317 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
27318 info->spe_gp_size = 8 * (32 - first_gp);
27320 if (TARGET_ALTIVEC_ABI)
27321 info->vrsave_mask = compute_vrsave_mask ();
27323 if (TARGET_ALTIVEC_VRSAVE && info->vrsave_mask)
27324 info->vrsave_size = 4;
27326 compute_save_world_info (info);
27328 /* Calculate the offsets. */
27329 switch (DEFAULT_ABI)
27331 case ABI_NONE:
27332 default:
27333 gcc_unreachable ();
27335 case ABI_AIX:
27336 case ABI_ELFv2:
27337 case ABI_DARWIN:
27338 info->fp_save_offset = -info->fp_size;
27339 info->gp_save_offset = info->fp_save_offset - info->gp_size;
27341 if (TARGET_ALTIVEC_ABI)
27343 info->vrsave_save_offset = info->gp_save_offset - info->vrsave_size;
27345 /* Align stack so vector save area is on a quadword boundary.
27346 The padding goes above the vectors. */
27347 if (info->altivec_size != 0)
27348 info->altivec_padding_size = info->vrsave_save_offset & 0xF;
27350 info->altivec_save_offset = info->vrsave_save_offset
27351 - info->altivec_padding_size
27352 - info->altivec_size;
27353 gcc_assert (info->altivec_size == 0
27354 || info->altivec_save_offset % 16 == 0);
27356 /* Adjust for AltiVec case. */
27357 info->ehrd_offset = info->altivec_save_offset - ehrd_size;
27359 else
27360 info->ehrd_offset = info->gp_save_offset - ehrd_size;
27362 info->ehcr_offset = info->ehrd_offset - ehcr_size;
27363 info->cr_save_offset = reg_size; /* first word when 64-bit. */
27364 info->lr_save_offset = 2*reg_size;
27365 break;
27367 case ABI_V4:
27368 info->fp_save_offset = -info->fp_size;
27369 info->gp_save_offset = info->fp_save_offset - info->gp_size;
27370 info->cr_save_offset = info->gp_save_offset - info->cr_size;
27372 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
27374 /* Align stack so SPE GPR save area is aligned on a
27375 double-word boundary. */
27376 if (info->spe_gp_size != 0 && info->cr_save_offset != 0)
27377 info->spe_padding_size = 8 - (-info->cr_save_offset % 8);
27378 else
27379 info->spe_padding_size = 0;
27381 info->spe_gp_save_offset = info->cr_save_offset
27382 - info->spe_padding_size
27383 - info->spe_gp_size;
27385 /* Adjust for SPE case. */
27386 info->ehrd_offset = info->spe_gp_save_offset;
27388 else if (TARGET_ALTIVEC_ABI)
27390 info->vrsave_save_offset = info->cr_save_offset - info->vrsave_size;
27392 /* Align stack so vector save area is on a quadword boundary. */
27393 if (info->altivec_size != 0)
27394 info->altivec_padding_size = 16 - (-info->vrsave_save_offset % 16);
27396 info->altivec_save_offset = info->vrsave_save_offset
27397 - info->altivec_padding_size
27398 - info->altivec_size;
27400 /* Adjust for AltiVec case. */
27401 info->ehrd_offset = info->altivec_save_offset;
27403 else
27404 info->ehrd_offset = info->cr_save_offset;
27406 info->ehrd_offset -= ehrd_size;
27407 info->lr_save_offset = reg_size;
27410 save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
27411 info->save_size = RS6000_ALIGN (info->fp_size
27412 + info->gp_size
27413 + info->altivec_size
27414 + info->altivec_padding_size
27415 + info->spe_gp_size
27416 + info->spe_padding_size
27417 + ehrd_size
27418 + ehcr_size
27419 + info->cr_size
27420 + info->vrsave_size,
27421 save_align);
27423 non_fixed_size = info->vars_size + info->parm_size + info->save_size;
27425 info->total_size = RS6000_ALIGN (non_fixed_size + info->fixed_size,
27426 ABI_STACK_BOUNDARY / BITS_PER_UNIT);
27428 /* Determine if we need to save the link register. */
27429 if (info->calls_p
27430 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27431 && crtl->profile
27432 && !TARGET_PROFILE_KERNEL)
27433 || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
27434 #ifdef TARGET_RELOCATABLE
27435 || (DEFAULT_ABI == ABI_V4
27436 && (TARGET_RELOCATABLE || flag_pic > 1)
27437 && !constant_pool_empty_p ())
27438 #endif
27439 || rs6000_ra_ever_killed ())
27440 info->lr_save_p = 1;
27442 using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
27443 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
27444 && call_used_regs[STATIC_CHAIN_REGNUM]);
27445 info->savres_strategy = rs6000_savres_strategy (info, using_static_chain_p);
27447 if (!(info->savres_strategy & SAVE_INLINE_GPRS)
27448 || !(info->savres_strategy & SAVE_INLINE_FPRS)
27449 || !(info->savres_strategy & SAVE_INLINE_VRS)
27450 || !(info->savres_strategy & REST_INLINE_GPRS)
27451 || !(info->savres_strategy & REST_INLINE_FPRS)
27452 || !(info->savres_strategy & REST_INLINE_VRS))
27453 info->lr_save_p = 1;
27455 if (info->lr_save_p)
27456 df_set_regs_ever_live (LR_REGNO, true);
27458 /* Determine if we need to allocate any stack frame:
27460 For AIX we need to push the stack if a frame pointer is needed
27461 (because the stack might be dynamically adjusted), if we are
27462 debugging, if we make calls, or if the sum of fp_save, gp_save,
27463 and local variables are more than the space needed to save all
27464 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
27465 + 18*8 = 288 (GPR13 reserved).
27467 For V.4 we don't have the stack cushion that AIX uses, but assume
27468 that the debugger can handle stackless frames. */
27470 if (info->calls_p)
27471 info->push_p = 1;
27473 else if (DEFAULT_ABI == ABI_V4)
27474 info->push_p = non_fixed_size != 0;
27476 else if (frame_pointer_needed)
27477 info->push_p = 1;
27479 else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
27480 info->push_p = 1;
27482 else
27483 info->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
27485 return info;
27488 /* Return true if the current function uses any GPRs in 64-bit SIMD
27489 mode. */
27491 static bool
27492 spe_func_has_64bit_regs_p (void)
27494 rtx_insn *insns, *insn;
27496 /* Functions that save and restore all the call-saved registers will
27497 need to save/restore the registers in 64-bits. */
27498 if (crtl->calls_eh_return
27499 || cfun->calls_setjmp
27500 || crtl->has_nonlocal_goto)
27501 return true;
27503 insns = get_insns ();
27505 for (insn = NEXT_INSN (insns); insn != NULL_RTX; insn = NEXT_INSN (insn))
27507 if (INSN_P (insn))
27509 rtx i;
27511 /* FIXME: This should be implemented with attributes...
27513 (set_attr "spe64" "true")....then,
27514 if (get_spe64(insn)) return true;
27516 It's the only reliable way to do the stuff below. */
27518 i = PATTERN (insn);
27519 if (GET_CODE (i) == SET)
27521 machine_mode mode = GET_MODE (SET_SRC (i));
27523 if (SPE_VECTOR_MODE (mode))
27524 return true;
27525 if (TARGET_E500_DOUBLE
27526 && (mode == DFmode || FLOAT128_2REG_P (mode)))
27527 return true;
27532 return false;
27535 static void
27536 debug_stack_info (rs6000_stack_t *info)
27538 const char *abi_string;
27540 if (! info)
27541 info = rs6000_stack_info ();
27543 fprintf (stderr, "\nStack information for function %s:\n",
27544 ((current_function_decl && DECL_NAME (current_function_decl))
27545 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
27546 : "<unknown>"));
27548 switch (info->abi)
27550 default: abi_string = "Unknown"; break;
27551 case ABI_NONE: abi_string = "NONE"; break;
27552 case ABI_AIX: abi_string = "AIX"; break;
27553 case ABI_ELFv2: abi_string = "ELFv2"; break;
27554 case ABI_DARWIN: abi_string = "Darwin"; break;
27555 case ABI_V4: abi_string = "V.4"; break;
27558 fprintf (stderr, "\tABI = %5s\n", abi_string);
27560 if (TARGET_ALTIVEC_ABI)
27561 fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
27563 if (TARGET_SPE_ABI)
27564 fprintf (stderr, "\tSPE ABI extensions enabled.\n");
27566 if (info->first_gp_reg_save != 32)
27567 fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save);
27569 if (info->first_fp_reg_save != 64)
27570 fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save);
27572 if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
27573 fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
27574 info->first_altivec_reg_save);
27576 if (info->lr_save_p)
27577 fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p);
27579 if (info->cr_save_p)
27580 fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p);
27582 if (info->vrsave_mask)
27583 fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask);
27585 if (info->push_p)
27586 fprintf (stderr, "\tpush_p = %5d\n", info->push_p);
27588 if (info->calls_p)
27589 fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p);
27591 if (info->gp_size)
27592 fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset);
27594 if (info->fp_size)
27595 fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset);
27597 if (info->altivec_size)
27598 fprintf (stderr, "\taltivec_save_offset = %5d\n",
27599 info->altivec_save_offset);
27601 if (info->spe_gp_size)
27602 fprintf (stderr, "\tspe_gp_save_offset = %5d\n",
27603 info->spe_gp_save_offset);
27605 if (info->vrsave_size)
27606 fprintf (stderr, "\tvrsave_save_offset = %5d\n",
27607 info->vrsave_save_offset);
27609 if (info->lr_save_p)
27610 fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset);
27612 if (info->cr_save_p)
27613 fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset);
27615 if (info->varargs_save_offset)
27616 fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
27618 if (info->total_size)
27619 fprintf (stderr, "\ttotal_size = " HOST_WIDE_INT_PRINT_DEC"\n",
27620 info->total_size);
27622 if (info->vars_size)
27623 fprintf (stderr, "\tvars_size = " HOST_WIDE_INT_PRINT_DEC"\n",
27624 info->vars_size);
27626 if (info->parm_size)
27627 fprintf (stderr, "\tparm_size = %5d\n", info->parm_size);
27629 if (info->fixed_size)
27630 fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size);
27632 if (info->gp_size)
27633 fprintf (stderr, "\tgp_size = %5d\n", info->gp_size);
27635 if (info->spe_gp_size)
27636 fprintf (stderr, "\tspe_gp_size = %5d\n", info->spe_gp_size);
27638 if (info->fp_size)
27639 fprintf (stderr, "\tfp_size = %5d\n", info->fp_size);
27641 if (info->altivec_size)
27642 fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size);
27644 if (info->vrsave_size)
27645 fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size);
27647 if (info->altivec_padding_size)
27648 fprintf (stderr, "\taltivec_padding_size= %5d\n",
27649 info->altivec_padding_size);
27651 if (info->spe_padding_size)
27652 fprintf (stderr, "\tspe_padding_size = %5d\n",
27653 info->spe_padding_size);
27655 if (info->cr_size)
27656 fprintf (stderr, "\tcr_size = %5d\n", info->cr_size);
27658 if (info->save_size)
27659 fprintf (stderr, "\tsave_size = %5d\n", info->save_size);
27661 if (info->reg_size != 4)
27662 fprintf (stderr, "\treg_size = %5d\n", info->reg_size);
27664 fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy);
27666 fprintf (stderr, "\n");
27670 rs6000_return_addr (int count, rtx frame)
27672 /* Currently we don't optimize very well between prolog and body
27673 code and for PIC code the code can be actually quite bad, so
27674 don't try to be too clever here. */
27675 if (count != 0
27676 || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
27678 cfun->machine->ra_needs_full_frame = 1;
27680 return
27681 gen_rtx_MEM
27682 (Pmode,
27683 memory_address
27684 (Pmode,
27685 plus_constant (Pmode,
27686 copy_to_reg
27687 (gen_rtx_MEM (Pmode,
27688 memory_address (Pmode, frame))),
27689 RETURN_ADDRESS_OFFSET)));
27692 cfun->machine->ra_need_lr = 1;
27693 return get_hard_reg_initial_val (Pmode, LR_REGNO);
27696 /* Say whether a function is a candidate for sibcall handling or not. */
27698 static bool
27699 rs6000_function_ok_for_sibcall (tree decl, tree exp)
27701 tree fntype;
27703 if (decl)
27704 fntype = TREE_TYPE (decl);
27705 else
27706 fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
27708 /* We can't do it if the called function has more vector parameters
27709 than the current function; there's nowhere to put the VRsave code. */
27710 if (TARGET_ALTIVEC_ABI
27711 && TARGET_ALTIVEC_VRSAVE
27712 && !(decl && decl == current_function_decl))
27714 function_args_iterator args_iter;
27715 tree type;
27716 int nvreg = 0;
27718 /* Functions with vector parameters are required to have a
27719 prototype, so the argument type info must be available
27720 here. */
27721 FOREACH_FUNCTION_ARGS(fntype, type, args_iter)
27722 if (TREE_CODE (type) == VECTOR_TYPE
27723 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
27724 nvreg++;
27726 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter)
27727 if (TREE_CODE (type) == VECTOR_TYPE
27728 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
27729 nvreg--;
27731 if (nvreg > 0)
27732 return false;
27735 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
27736 functions, because the callee may have a different TOC pointer to
27737 the caller and there's no way to ensure we restore the TOC when
27738 we return. With the secure-plt SYSV ABI we can't make non-local
27739 calls when -fpic/PIC because the plt call stubs use r30. */
27740 if (DEFAULT_ABI == ABI_DARWIN
27741 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27742 && decl
27743 && !DECL_EXTERNAL (decl)
27744 && !DECL_WEAK (decl)
27745 && (*targetm.binds_local_p) (decl))
27746 || (DEFAULT_ABI == ABI_V4
27747 && (!TARGET_SECURE_PLT
27748 || !flag_pic
27749 || (decl
27750 && (*targetm.binds_local_p) (decl)))))
27752 tree attr_list = TYPE_ATTRIBUTES (fntype);
27754 if (!lookup_attribute ("longcall", attr_list)
27755 || lookup_attribute ("shortcall", attr_list))
27756 return true;
27759 return false;
27762 static int
27763 rs6000_ra_ever_killed (void)
27765 rtx_insn *top;
27766 rtx reg;
27767 rtx_insn *insn;
27769 if (cfun->is_thunk)
27770 return 0;
27772 if (cfun->machine->lr_save_state)
27773 return cfun->machine->lr_save_state - 1;
27775 /* regs_ever_live has LR marked as used if any sibcalls are present,
27776 but this should not force saving and restoring in the
27777 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
27778 clobbers LR, so that is inappropriate. */
27780 /* Also, the prologue can generate a store into LR that
27781 doesn't really count, like this:
27783 move LR->R0
27784 bcl to set PIC register
27785 move LR->R31
27786 move R0->LR
27788 When we're called from the epilogue, we need to avoid counting
27789 this as a store. */
27791 push_topmost_sequence ();
27792 top = get_insns ();
27793 pop_topmost_sequence ();
27794 reg = gen_rtx_REG (Pmode, LR_REGNO);
27796 for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
27798 if (INSN_P (insn))
27800 if (CALL_P (insn))
27802 if (!SIBLING_CALL_P (insn))
27803 return 1;
27805 else if (find_regno_note (insn, REG_INC, LR_REGNO))
27806 return 1;
27807 else if (set_of (reg, insn) != NULL_RTX
27808 && !prologue_epilogue_contains (insn))
27809 return 1;
27812 return 0;
27815 /* Emit instructions needed to load the TOC register.
27816 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
27817 a constant pool; or for SVR4 -fpic. */
27819 void
27820 rs6000_emit_load_toc_table (int fromprolog)
27822 rtx dest;
27823 dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
27825 if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
27827 char buf[30];
27828 rtx lab, tmp1, tmp2, got;
27830 lab = gen_label_rtx ();
27831 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
27832 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
27833 if (flag_pic == 2)
27835 got = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
27836 need_toc_init = 1;
27838 else
27839 got = rs6000_got_sym ();
27840 tmp1 = tmp2 = dest;
27841 if (!fromprolog)
27843 tmp1 = gen_reg_rtx (Pmode);
27844 tmp2 = gen_reg_rtx (Pmode);
27846 emit_insn (gen_load_toc_v4_PIC_1 (lab));
27847 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
27848 emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
27849 emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
27851 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
27853 emit_insn (gen_load_toc_v4_pic_si ());
27854 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
27856 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
27858 char buf[30];
27859 rtx temp0 = (fromprolog
27860 ? gen_rtx_REG (Pmode, 0)
27861 : gen_reg_rtx (Pmode));
27863 if (fromprolog)
27865 rtx symF, symL;
27867 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
27868 symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
27870 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
27871 symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
27873 emit_insn (gen_load_toc_v4_PIC_1 (symF));
27874 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
27875 emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
27877 else
27879 rtx tocsym, lab;
27881 tocsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
27882 need_toc_init = 1;
27883 lab = gen_label_rtx ();
27884 emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
27885 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
27886 if (TARGET_LINK_STACK)
27887 emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
27888 emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
27890 emit_insn (gen_addsi3 (dest, temp0, dest));
27892 else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
27894 /* This is for AIX code running in non-PIC ELF32. */
27895 rtx realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
27897 need_toc_init = 1;
27898 emit_insn (gen_elf_high (dest, realsym));
27899 emit_insn (gen_elf_low (dest, dest, realsym));
27901 else
27903 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
27905 if (TARGET_32BIT)
27906 emit_insn (gen_load_toc_aix_si (dest));
27907 else
27908 emit_insn (gen_load_toc_aix_di (dest));
27912 /* Emit instructions to restore the link register after determining where
27913 its value has been stored. */
27915 void
27916 rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
27918 rs6000_stack_t *info = rs6000_stack_info ();
27919 rtx operands[2];
27921 operands[0] = source;
27922 operands[1] = scratch;
27924 if (info->lr_save_p)
27926 rtx frame_rtx = stack_pointer_rtx;
27927 HOST_WIDE_INT sp_offset = 0;
27928 rtx tmp;
27930 if (frame_pointer_needed
27931 || cfun->calls_alloca
27932 || info->total_size > 32767)
27934 tmp = gen_frame_mem (Pmode, frame_rtx);
27935 emit_move_insn (operands[1], tmp);
27936 frame_rtx = operands[1];
27938 else if (info->push_p)
27939 sp_offset = info->total_size;
27941 tmp = plus_constant (Pmode, frame_rtx,
27942 info->lr_save_offset + sp_offset);
27943 tmp = gen_frame_mem (Pmode, tmp);
27944 emit_move_insn (tmp, operands[0]);
27946 else
27947 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
27949 /* Freeze lr_save_p. We've just emitted rtl that depends on the
27950 state of lr_save_p so any change from here on would be a bug. In
27951 particular, stop rs6000_ra_ever_killed from considering the SET
27952 of lr we may have added just above. */
27953 cfun->machine->lr_save_state = info->lr_save_p + 1;
27956 static GTY(()) alias_set_type set = -1;
27958 alias_set_type
27959 get_TOC_alias_set (void)
27961 if (set == -1)
27962 set = new_alias_set ();
27963 return set;
27966 /* This returns nonzero if the current function uses the TOC. This is
27967 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
27968 is generated by the ABI_V4 load_toc_* patterns. */
27969 #if TARGET_ELF
27970 static int
27971 uses_TOC (void)
27973 rtx_insn *insn;
27975 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
27976 if (INSN_P (insn))
27978 rtx pat = PATTERN (insn);
27979 int i;
27981 if (GET_CODE (pat) == PARALLEL)
27982 for (i = 0; i < XVECLEN (pat, 0); i++)
27984 rtx sub = XVECEXP (pat, 0, i);
27985 if (GET_CODE (sub) == USE)
27987 sub = XEXP (sub, 0);
27988 if (GET_CODE (sub) == UNSPEC
27989 && XINT (sub, 1) == UNSPEC_TOC)
27990 return 1;
27994 return 0;
27996 #endif
27999 create_TOC_reference (rtx symbol, rtx largetoc_reg)
28001 rtx tocrel, tocreg, hi;
28003 if (TARGET_DEBUG_ADDR)
28005 if (GET_CODE (symbol) == SYMBOL_REF)
28006 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
28007 XSTR (symbol, 0));
28008 else
28010 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
28011 GET_RTX_NAME (GET_CODE (symbol)));
28012 debug_rtx (symbol);
28016 if (!can_create_pseudo_p ())
28017 df_set_regs_ever_live (TOC_REGISTER, true);
28019 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
28020 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
28021 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
28022 return tocrel;
28024 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
28025 if (largetoc_reg != NULL)
28027 emit_move_insn (largetoc_reg, hi);
28028 hi = largetoc_reg;
28030 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
28033 /* Issue assembly directives that create a reference to the given DWARF
28034 FRAME_TABLE_LABEL from the current function section. */
28035 void
28036 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
28038 fprintf (asm_out_file, "\t.ref %s\n",
28039 (* targetm.strip_name_encoding) (frame_table_label));
28042 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
28043 and the change to the stack pointer. */
28045 static void
28046 rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
28048 rtvec p;
28049 int i;
28050 rtx regs[3];
28052 i = 0;
28053 regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
28054 if (hard_frame_needed)
28055 regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
28056 if (!(REGNO (fp) == STACK_POINTER_REGNUM
28057 || (hard_frame_needed
28058 && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
28059 regs[i++] = fp;
28061 p = rtvec_alloc (i);
28062 while (--i >= 0)
28064 rtx mem = gen_frame_mem (BLKmode, regs[i]);
28065 RTVEC_ELT (p, i) = gen_rtx_SET (mem, const0_rtx);
28068 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
28071 /* Emit the correct code for allocating stack space, as insns.
28072 If COPY_REG, make sure a copy of the old frame is left there.
28073 The generated code may use hard register 0 as a temporary. */
28075 static rtx_insn *
28076 rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
28078 rtx_insn *insn;
28079 rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
28080 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
28081 rtx todec = gen_int_mode (-size, Pmode);
28082 rtx par, set, mem;
28084 if (INTVAL (todec) != -size)
28086 warning (0, "stack frame too large");
28087 emit_insn (gen_trap ());
28088 return 0;
28091 if (crtl->limit_stack)
28093 if (REG_P (stack_limit_rtx)
28094 && REGNO (stack_limit_rtx) > 1
28095 && REGNO (stack_limit_rtx) <= 31)
28097 emit_insn (gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size)));
28098 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
28099 const0_rtx));
28101 else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF
28102 && TARGET_32BIT
28103 && DEFAULT_ABI == ABI_V4
28104 && !flag_pic)
28106 rtx toload = gen_rtx_CONST (VOIDmode,
28107 gen_rtx_PLUS (Pmode,
28108 stack_limit_rtx,
28109 GEN_INT (size)));
28111 emit_insn (gen_elf_high (tmp_reg, toload));
28112 emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
28113 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
28114 const0_rtx));
28116 else
28117 warning (0, "stack limit expression is not supported");
28120 if (copy_reg)
28122 if (copy_off != 0)
28123 emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
28124 else
28125 emit_move_insn (copy_reg, stack_reg);
28128 if (size > 32767)
28130 /* Need a note here so that try_split doesn't get confused. */
28131 if (get_last_insn () == NULL_RTX)
28132 emit_note (NOTE_INSN_DELETED);
28133 insn = emit_move_insn (tmp_reg, todec);
28134 try_split (PATTERN (insn), insn, 0);
28135 todec = tmp_reg;
28138 insn = emit_insn (TARGET_32BIT
28139 ? gen_movsi_update_stack (stack_reg, stack_reg,
28140 todec, stack_reg)
28141 : gen_movdi_di_update_stack (stack_reg, stack_reg,
28142 todec, stack_reg));
28143 /* Since we didn't use gen_frame_mem to generate the MEM, grab
28144 it now and set the alias set/attributes. The above gen_*_update
28145 calls will generate a PARALLEL with the MEM set being the first
28146 operation. */
28147 par = PATTERN (insn);
28148 gcc_assert (GET_CODE (par) == PARALLEL);
28149 set = XVECEXP (par, 0, 0);
28150 gcc_assert (GET_CODE (set) == SET);
28151 mem = SET_DEST (set);
28152 gcc_assert (MEM_P (mem));
28153 MEM_NOTRAP_P (mem) = 1;
28154 set_mem_alias_set (mem, get_frame_alias_set ());
28156 RTX_FRAME_RELATED_P (insn) = 1;
28157 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
28158 gen_rtx_SET (stack_reg, gen_rtx_PLUS (Pmode, stack_reg,
28159 GEN_INT (-size))));
28160 return insn;
28163 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
28165 #if PROBE_INTERVAL > 32768
28166 #error Cannot use indexed addressing mode for stack probing
28167 #endif
28169 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
28170 inclusive. These are offsets from the current stack pointer. */
28172 static void
28173 rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
28175 /* See if we have a constant small number of probes to generate. If so,
28176 that's the easy case. */
28177 if (first + size <= 32768)
28179 HOST_WIDE_INT i;
28181 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
28182 it exceeds SIZE. If only one probe is needed, this will not
28183 generate any code. Then probe at FIRST + SIZE. */
28184 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
28185 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
28186 -(first + i)));
28188 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
28189 -(first + size)));
28192 /* Otherwise, do the same as above, but in a loop. Note that we must be
28193 extra careful with variables wrapping around because we might be at
28194 the very top (or the very bottom) of the address space and we have
28195 to be able to handle this case properly; in particular, we use an
28196 equality test for the loop condition. */
28197 else
28199 HOST_WIDE_INT rounded_size;
28200 rtx r12 = gen_rtx_REG (Pmode, 12);
28201 rtx r0 = gen_rtx_REG (Pmode, 0);
28203 /* Sanity check for the addressing mode we're going to use. */
28204 gcc_assert (first <= 32768);
28206 /* Step 1: round SIZE to the previous multiple of the interval. */
28208 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
28211 /* Step 2: compute initial and final value of the loop counter. */
28213 /* TEST_ADDR = SP + FIRST. */
28214 emit_insn (gen_rtx_SET (r12, plus_constant (Pmode, stack_pointer_rtx,
28215 -first)));
28217 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
28218 if (rounded_size > 32768)
28220 emit_move_insn (r0, GEN_INT (-rounded_size));
28221 emit_insn (gen_rtx_SET (r0, gen_rtx_PLUS (Pmode, r12, r0)));
28223 else
28224 emit_insn (gen_rtx_SET (r0, plus_constant (Pmode, r12,
28225 -rounded_size)));
28228 /* Step 3: the loop
28232 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
28233 probe at TEST_ADDR
28235 while (TEST_ADDR != LAST_ADDR)
28237 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
28238 until it is equal to ROUNDED_SIZE. */
28240 if (TARGET_64BIT)
28241 emit_insn (gen_probe_stack_rangedi (r12, r12, r0));
28242 else
28243 emit_insn (gen_probe_stack_rangesi (r12, r12, r0));
28246 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
28247 that SIZE is equal to ROUNDED_SIZE. */
28249 if (size != rounded_size)
28250 emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
28254 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
28255 absolute addresses. */
28257 const char *
28258 output_probe_stack_range (rtx reg1, rtx reg2)
28260 static int labelno = 0;
28261 char loop_lab[32];
28262 rtx xops[2];
28264 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
28266 /* Loop. */
28267 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
28269 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
28270 xops[0] = reg1;
28271 xops[1] = GEN_INT (-PROBE_INTERVAL);
28272 output_asm_insn ("addi %0,%0,%1", xops);
28274 /* Probe at TEST_ADDR. */
28275 xops[1] = gen_rtx_REG (Pmode, 0);
28276 output_asm_insn ("stw %1,0(%0)", xops);
28278 /* Test if TEST_ADDR == LAST_ADDR. */
28279 xops[1] = reg2;
28280 if (TARGET_64BIT)
28281 output_asm_insn ("cmpd 0,%0,%1", xops);
28282 else
28283 output_asm_insn ("cmpw 0,%0,%1", xops);
28285 /* Branch. */
28286 fputs ("\tbne 0,", asm_out_file);
28287 assemble_name_raw (asm_out_file, loop_lab);
28288 fputc ('\n', asm_out_file);
28290 return "";
28293 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
28294 with (plus:P (reg 1) VAL), and with REG2 replaced with REPL2 if REG2
28295 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
28296 deduce these equivalences by itself so it wasn't necessary to hold
28297 its hand so much. Don't be tempted to always supply d2_f_d_e with
28298 the actual cfa register, ie. r31 when we are using a hard frame
28299 pointer. That fails when saving regs off r1, and sched moves the
28300 r31 setup past the reg saves. */
28302 static rtx_insn *
28303 rs6000_frame_related (rtx_insn *insn, rtx reg, HOST_WIDE_INT val,
28304 rtx reg2, rtx repl2)
28306 rtx repl;
28308 if (REGNO (reg) == STACK_POINTER_REGNUM)
28310 gcc_checking_assert (val == 0);
28311 repl = NULL_RTX;
28313 else
28314 repl = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
28315 GEN_INT (val));
28317 rtx pat = PATTERN (insn);
28318 if (!repl && !reg2)
28320 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
28321 if (GET_CODE (pat) == PARALLEL)
28322 for (int i = 0; i < XVECLEN (pat, 0); i++)
28323 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
28325 rtx set = XVECEXP (pat, 0, i);
28327 /* If this PARALLEL has been emitted for out-of-line
28328 register save functions, or store multiple, then omit
28329 eh_frame info for any user-defined global regs. If
28330 eh_frame info is supplied, frame unwinding will
28331 restore a user reg. */
28332 if (!REG_P (SET_SRC (set))
28333 || !fixed_reg_p (REGNO (SET_SRC (set))))
28334 RTX_FRAME_RELATED_P (set) = 1;
28336 RTX_FRAME_RELATED_P (insn) = 1;
28337 return insn;
28340 /* We expect that 'pat' is either a SET or a PARALLEL containing
28341 SETs (and possibly other stuff). In a PARALLEL, all the SETs
28342 are important so they all have to be marked RTX_FRAME_RELATED_P.
28343 Call simplify_replace_rtx on the SETs rather than the whole insn
28344 so as to leave the other stuff alone (for example USE of r12). */
28346 set_used_flags (pat);
28347 if (GET_CODE (pat) == SET)
28349 if (repl)
28350 pat = simplify_replace_rtx (pat, reg, repl);
28351 if (reg2)
28352 pat = simplify_replace_rtx (pat, reg2, repl2);
28354 else if (GET_CODE (pat) == PARALLEL)
28356 pat = shallow_copy_rtx (pat);
28357 XVEC (pat, 0) = shallow_copy_rtvec (XVEC (pat, 0));
28359 for (int i = 0; i < XVECLEN (pat, 0); i++)
28360 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
28362 rtx set = XVECEXP (pat, 0, i);
28364 if (repl)
28365 set = simplify_replace_rtx (set, reg, repl);
28366 if (reg2)
28367 set = simplify_replace_rtx (set, reg2, repl2);
28368 XVECEXP (pat, 0, i) = set;
28370 /* Omit eh_frame info for any user-defined global regs. */
28371 if (!REG_P (SET_SRC (set))
28372 || !fixed_reg_p (REGNO (SET_SRC (set))))
28373 RTX_FRAME_RELATED_P (set) = 1;
28376 else
28377 gcc_unreachable ();
28379 RTX_FRAME_RELATED_P (insn) = 1;
28380 add_reg_note (insn, REG_FRAME_RELATED_EXPR, copy_rtx_if_shared (pat));
28382 return insn;
28385 /* Returns an insn that has a vrsave set operation with the
28386 appropriate CLOBBERs. */
28388 static rtx
28389 generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
28391 int nclobs, i;
28392 rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
28393 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
28395 clobs[0]
28396 = gen_rtx_SET (vrsave,
28397 gen_rtx_UNSPEC_VOLATILE (SImode,
28398 gen_rtvec (2, reg, vrsave),
28399 UNSPECV_SET_VRSAVE));
28401 nclobs = 1;
28403 /* We need to clobber the registers in the mask so the scheduler
28404 does not move sets to VRSAVE before sets of AltiVec registers.
28406 However, if the function receives nonlocal gotos, reload will set
28407 all call saved registers live. We will end up with:
28409 (set (reg 999) (mem))
28410 (parallel [ (set (reg vrsave) (unspec blah))
28411 (clobber (reg 999))])
28413 The clobber will cause the store into reg 999 to be dead, and
28414 flow will attempt to delete an epilogue insn. In this case, we
28415 need an unspec use/set of the register. */
28417 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
28418 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
28420 if (!epiloguep || call_used_regs [i])
28421 clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode,
28422 gen_rtx_REG (V4SImode, i));
28423 else
28425 rtx reg = gen_rtx_REG (V4SImode, i);
28427 clobs[nclobs++]
28428 = gen_rtx_SET (reg,
28429 gen_rtx_UNSPEC (V4SImode,
28430 gen_rtvec (1, reg), 27));
28434 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
28436 for (i = 0; i < nclobs; ++i)
28437 XVECEXP (insn, 0, i) = clobs[i];
28439 return insn;
28442 static rtx
28443 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
28445 rtx addr, mem;
28447 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
28448 mem = gen_frame_mem (GET_MODE (reg), addr);
28449 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
28452 static rtx
28453 gen_frame_load (rtx reg, rtx frame_reg, int offset)
28455 return gen_frame_set (reg, frame_reg, offset, false);
28458 static rtx
28459 gen_frame_store (rtx reg, rtx frame_reg, int offset)
28461 return gen_frame_set (reg, frame_reg, offset, true);
28464 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
28465 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
28467 static rtx_insn *
28468 emit_frame_save (rtx frame_reg, machine_mode mode,
28469 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
28471 rtx reg;
28473 /* Some cases that need register indexed addressing. */
28474 gcc_checking_assert (!((TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
28475 || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
28476 || (TARGET_E500_DOUBLE && mode == DFmode)
28477 || (TARGET_SPE_ABI
28478 && SPE_VECTOR_MODE (mode)
28479 && !SPE_CONST_OFFSET_OK (offset))));
28481 reg = gen_rtx_REG (mode, regno);
28482 rtx_insn *insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
28483 return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
28484 NULL_RTX, NULL_RTX);
28487 /* Emit an offset memory reference suitable for a frame store, while
28488 converting to a valid addressing mode. */
28490 static rtx
28491 gen_frame_mem_offset (machine_mode mode, rtx reg, int offset)
28493 rtx int_rtx, offset_rtx;
28495 int_rtx = GEN_INT (offset);
28497 if ((TARGET_SPE_ABI && SPE_VECTOR_MODE (mode) && !SPE_CONST_OFFSET_OK (offset))
28498 || (TARGET_E500_DOUBLE && mode == DFmode))
28500 offset_rtx = gen_rtx_REG (Pmode, FIXED_SCRATCH);
28501 emit_move_insn (offset_rtx, int_rtx);
28503 else
28504 offset_rtx = int_rtx;
28506 return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, offset_rtx));
28509 #ifndef TARGET_FIX_AND_CONTINUE
28510 #define TARGET_FIX_AND_CONTINUE 0
28511 #endif
28513 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
28514 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
28515 #define LAST_SAVRES_REGISTER 31
28516 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
28518 enum {
28519 SAVRES_LR = 0x1,
28520 SAVRES_SAVE = 0x2,
28521 SAVRES_REG = 0x0c,
28522 SAVRES_GPR = 0,
28523 SAVRES_FPR = 4,
28524 SAVRES_VR = 8
28527 static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
28529 /* Temporary holding space for an out-of-line register save/restore
28530 routine name. */
28531 static char savres_routine_name[30];
28533 /* Return the name for an out-of-line register save/restore routine.
28534 We are saving/restoring GPRs if GPR is true. */
28536 static char *
28537 rs6000_savres_routine_name (rs6000_stack_t *info, int regno, int sel)
28539 const char *prefix = "";
28540 const char *suffix = "";
28542 /* Different targets are supposed to define
28543 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
28544 routine name could be defined with:
28546 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
28548 This is a nice idea in practice, but in reality, things are
28549 complicated in several ways:
28551 - ELF targets have save/restore routines for GPRs.
28553 - SPE targets use different prefixes for 32/64-bit registers, and
28554 neither of them fit neatly in the FOO_{PREFIX,SUFFIX} regimen.
28556 - PPC64 ELF targets have routines for save/restore of GPRs that
28557 differ in what they do with the link register, so having a set
28558 prefix doesn't work. (We only use one of the save routines at
28559 the moment, though.)
28561 - PPC32 elf targets have "exit" versions of the restore routines
28562 that restore the link register and can save some extra space.
28563 These require an extra suffix. (There are also "tail" versions
28564 of the restore routines and "GOT" versions of the save routines,
28565 but we don't generate those at present. Same problems apply,
28566 though.)
28568 We deal with all this by synthesizing our own prefix/suffix and
28569 using that for the simple sprintf call shown above. */
28570 if (TARGET_SPE)
28572 /* No floating point saves on the SPE. */
28573 gcc_assert ((sel & SAVRES_REG) == SAVRES_GPR);
28575 if ((sel & SAVRES_SAVE))
28576 prefix = info->spe_64bit_regs_used ? "_save64gpr_" : "_save32gpr_";
28577 else
28578 prefix = info->spe_64bit_regs_used ? "_rest64gpr_" : "_rest32gpr_";
28580 if ((sel & SAVRES_LR))
28581 suffix = "_x";
28583 else if (DEFAULT_ABI == ABI_V4)
28585 if (TARGET_64BIT)
28586 goto aix_names;
28588 if ((sel & SAVRES_REG) == SAVRES_GPR)
28589 prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
28590 else if ((sel & SAVRES_REG) == SAVRES_FPR)
28591 prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
28592 else if ((sel & SAVRES_REG) == SAVRES_VR)
28593 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
28594 else
28595 abort ();
28597 if ((sel & SAVRES_LR))
28598 suffix = "_x";
28600 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28602 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
28603 /* No out-of-line save/restore routines for GPRs on AIX. */
28604 gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
28605 #endif
28607 aix_names:
28608 if ((sel & SAVRES_REG) == SAVRES_GPR)
28609 prefix = ((sel & SAVRES_SAVE)
28610 ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
28611 : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
28612 else if ((sel & SAVRES_REG) == SAVRES_FPR)
28614 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
28615 if ((sel & SAVRES_LR))
28616 prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
28617 else
28618 #endif
28620 prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
28621 suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
28624 else if ((sel & SAVRES_REG) == SAVRES_VR)
28625 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
28626 else
28627 abort ();
28630 if (DEFAULT_ABI == ABI_DARWIN)
28632 /* The Darwin approach is (slightly) different, in order to be
28633 compatible with code generated by the system toolchain. There is a
28634 single symbol for the start of save sequence, and the code here
28635 embeds an offset into that code on the basis of the first register
28636 to be saved. */
28637 prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
28638 if ((sel & SAVRES_REG) == SAVRES_GPR)
28639 sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
28640 ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
28641 (regno - 13) * 4, prefix, regno);
28642 else if ((sel & SAVRES_REG) == SAVRES_FPR)
28643 sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
28644 (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
28645 else if ((sel & SAVRES_REG) == SAVRES_VR)
28646 sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
28647 (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
28648 else
28649 abort ();
28651 else
28652 sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
28654 return savres_routine_name;
28657 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
28658 We are saving/restoring GPRs if GPR is true. */
28660 static rtx
28661 rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
28663 int regno = ((sel & SAVRES_REG) == SAVRES_GPR
28664 ? info->first_gp_reg_save
28665 : (sel & SAVRES_REG) == SAVRES_FPR
28666 ? info->first_fp_reg_save - 32
28667 : (sel & SAVRES_REG) == SAVRES_VR
28668 ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
28669 : -1);
28670 rtx sym;
28671 int select = sel;
28673 /* On the SPE, we never have any FPRs, but we do have 32/64-bit
28674 versions of the gpr routines. */
28675 if (TARGET_SPE_ABI && (sel & SAVRES_REG) == SAVRES_GPR
28676 && info->spe_64bit_regs_used)
28677 select ^= SAVRES_FPR ^ SAVRES_GPR;
28679 /* Don't generate bogus routine names. */
28680 gcc_assert (FIRST_SAVRES_REGISTER <= regno
28681 && regno <= LAST_SAVRES_REGISTER
28682 && select >= 0 && select <= 12);
28684 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
28686 if (sym == NULL)
28688 char *name;
28690 name = rs6000_savres_routine_name (info, regno, sel);
28692 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
28693 = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
28694 SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
28697 return sym;
28700 /* Emit a sequence of insns, including a stack tie if needed, for
28701 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
28702 reset the stack pointer, but move the base of the frame into
28703 reg UPDT_REGNO for use by out-of-line register restore routines. */
28705 static rtx
28706 rs6000_emit_stack_reset (rs6000_stack_t *info,
28707 rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
28708 unsigned updt_regno)
28710 /* If there is nothing to do, don't do anything. */
28711 if (frame_off == 0 && REGNO (frame_reg_rtx) == updt_regno)
28712 return NULL_RTX;
28714 rtx updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
28716 /* This blockage is needed so that sched doesn't decide to move
28717 the sp change before the register restores. */
28718 if (DEFAULT_ABI == ABI_V4
28719 || (TARGET_SPE_ABI
28720 && info->spe_64bit_regs_used != 0
28721 && info->first_gp_reg_save != 32))
28722 return emit_insn (gen_stack_restore_tie (updt_reg_rtx, frame_reg_rtx,
28723 GEN_INT (frame_off)));
28725 /* If we are restoring registers out-of-line, we will be using the
28726 "exit" variants of the restore routines, which will reset the
28727 stack for us. But we do need to point updt_reg into the
28728 right place for those routines. */
28729 if (frame_off != 0)
28730 return emit_insn (gen_add3_insn (updt_reg_rtx,
28731 frame_reg_rtx, GEN_INT (frame_off)));
28732 else
28733 return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
28735 return NULL_RTX;
28738 /* Return the register number used as a pointer by out-of-line
28739 save/restore functions. */
28741 static inline unsigned
28742 ptr_regno_for_savres (int sel)
28744 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28745 return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
28746 return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
28749 /* Construct a parallel rtx describing the effect of a call to an
28750 out-of-line register save/restore routine, and emit the insn
28751 or jump_insn as appropriate. */
28753 static rtx_insn *
28754 rs6000_emit_savres_rtx (rs6000_stack_t *info,
28755 rtx frame_reg_rtx, int save_area_offset, int lr_offset,
28756 machine_mode reg_mode, int sel)
28758 int i;
28759 int offset, start_reg, end_reg, n_regs, use_reg;
28760 int reg_size = GET_MODE_SIZE (reg_mode);
28761 rtx sym;
28762 rtvec p;
28763 rtx par;
28764 rtx_insn *insn;
28766 offset = 0;
28767 start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
28768 ? info->first_gp_reg_save
28769 : (sel & SAVRES_REG) == SAVRES_FPR
28770 ? info->first_fp_reg_save
28771 : (sel & SAVRES_REG) == SAVRES_VR
28772 ? info->first_altivec_reg_save
28773 : -1);
28774 end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
28775 ? 32
28776 : (sel & SAVRES_REG) == SAVRES_FPR
28777 ? 64
28778 : (sel & SAVRES_REG) == SAVRES_VR
28779 ? LAST_ALTIVEC_REGNO + 1
28780 : -1);
28781 n_regs = end_reg - start_reg;
28782 p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
28783 + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
28784 + n_regs);
28786 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
28787 RTVEC_ELT (p, offset++) = ret_rtx;
28789 RTVEC_ELT (p, offset++)
28790 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
28792 sym = rs6000_savres_routine_sym (info, sel);
28793 RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
28795 use_reg = ptr_regno_for_savres (sel);
28796 if ((sel & SAVRES_REG) == SAVRES_VR)
28798 /* Vector regs are saved/restored using [reg+reg] addressing. */
28799 RTVEC_ELT (p, offset++)
28800 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg));
28801 RTVEC_ELT (p, offset++)
28802 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
28804 else
28805 RTVEC_ELT (p, offset++)
28806 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
28808 for (i = 0; i < end_reg - start_reg; i++)
28809 RTVEC_ELT (p, i + offset)
28810 = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
28811 frame_reg_rtx, save_area_offset + reg_size * i,
28812 (sel & SAVRES_SAVE) != 0);
28814 if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
28815 RTVEC_ELT (p, i + offset)
28816 = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
28818 par = gen_rtx_PARALLEL (VOIDmode, p);
28820 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
28822 insn = emit_jump_insn (par);
28823 JUMP_LABEL (insn) = ret_rtx;
28825 else
28826 insn = emit_insn (par);
28827 return insn;
28830 /* Emit code to store CR fields that need to be saved into REG. */
28832 static void
28833 rs6000_emit_move_from_cr (rtx reg)
28835 /* Only the ELFv2 ABI allows storing only selected fields. */
28836 if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
28838 int i, cr_reg[8], count = 0;
28840 /* Collect CR fields that must be saved. */
28841 for (i = 0; i < 8; i++)
28842 if (save_reg_p (CR0_REGNO + i))
28843 cr_reg[count++] = i;
28845 /* If it's just a single one, use mfcrf. */
28846 if (count == 1)
28848 rtvec p = rtvec_alloc (1);
28849 rtvec r = rtvec_alloc (2);
28850 RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
28851 RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
28852 RTVEC_ELT (p, 0)
28853 = gen_rtx_SET (reg,
28854 gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
28856 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
28857 return;
28860 /* ??? It might be better to handle count == 2 / 3 cases here
28861 as well, using logical operations to combine the values. */
28864 emit_insn (gen_movesi_from_cr (reg));
28867 /* Return whether the split-stack arg pointer (r12) is used. */
28869 static bool
28870 split_stack_arg_pointer_used_p (void)
28872 /* If the pseudo holding the arg pointer is no longer a pseudo,
28873 then the arg pointer is used. */
28874 if (cfun->machine->split_stack_arg_pointer != NULL_RTX
28875 && (!REG_P (cfun->machine->split_stack_arg_pointer)
28876 || (REGNO (cfun->machine->split_stack_arg_pointer)
28877 < FIRST_PSEUDO_REGISTER)))
28878 return true;
28880 /* Unfortunately we also need to do some code scanning, since
28881 r12 may have been substituted for the pseudo. */
28882 rtx_insn *insn;
28883 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
28884 FOR_BB_INSNS (bb, insn)
28885 if (NONDEBUG_INSN_P (insn))
28887 /* A call destroys r12. */
28888 if (CALL_P (insn))
28889 return false;
28891 df_ref use;
28892 FOR_EACH_INSN_USE (use, insn)
28894 rtx x = DF_REF_REG (use);
28895 if (REG_P (x) && REGNO (x) == 12)
28896 return true;
28898 df_ref def;
28899 FOR_EACH_INSN_DEF (def, insn)
28901 rtx x = DF_REF_REG (def);
28902 if (REG_P (x) && REGNO (x) == 12)
28903 return false;
28906 return bitmap_bit_p (DF_LR_OUT (bb), 12);
28909 /* Return whether we need to emit an ELFv2 global entry point prologue. */
28911 static bool
28912 rs6000_global_entry_point_needed_p (void)
28914 /* Only needed for the ELFv2 ABI. */
28915 if (DEFAULT_ABI != ABI_ELFv2)
28916 return false;
28918 /* With -msingle-pic-base, we assume the whole program shares the same
28919 TOC, so no global entry point prologues are needed anywhere. */
28920 if (TARGET_SINGLE_PIC_BASE)
28921 return false;
28923 /* Ensure we have a global entry point for thunks. ??? We could
28924 avoid that if the target routine doesn't need a global entry point,
28925 but we do not know whether this is the case at this point. */
28926 if (cfun->is_thunk)
28927 return true;
28929 /* For regular functions, rs6000_emit_prologue sets this flag if the
28930 routine ever uses the TOC pointer. */
28931 return cfun->machine->r2_setup_needed;
28934 /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
28935 static sbitmap
28936 rs6000_get_separate_components (void)
28938 rs6000_stack_t *info = rs6000_stack_info ();
28940 if (WORLD_SAVE_P (info))
28941 return NULL;
28943 if (TARGET_SPE_ABI)
28944 return NULL;
28946 sbitmap components = sbitmap_alloc (32);
28947 bitmap_clear (components);
28949 gcc_assert (!(info->savres_strategy & SAVE_MULTIPLE)
28950 && !(info->savres_strategy & REST_MULTIPLE));
28952 /* The GPRs we need saved to the frame. */
28953 if ((info->savres_strategy & SAVE_INLINE_GPRS)
28954 && (info->savres_strategy & REST_INLINE_GPRS))
28956 int reg_size = TARGET_32BIT ? 4 : 8;
28957 int offset = info->gp_save_offset;
28958 if (info->push_p)
28959 offset += info->total_size;
28961 for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++)
28963 if (IN_RANGE (offset, -0x8000, 0x7fff)
28964 && rs6000_reg_live_or_pic_offset_p (regno))
28965 bitmap_set_bit (components, regno);
28967 offset += reg_size;
28971 /* Don't mess with the hard frame pointer. */
28972 if (frame_pointer_needed)
28973 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
28975 /* Don't mess with the fixed TOC register. */
28976 if ((TARGET_TOC && TARGET_MINIMAL_TOC)
28977 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
28978 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
28979 bitmap_clear_bit (components, RS6000_PIC_OFFSET_TABLE_REGNUM);
28981 /* Optimize LR save and restore if we can. This is component 0. Any
28982 out-of-line register save/restore routines need LR. */
28983 if (info->lr_save_p
28984 && !(flag_pic && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
28985 && (info->savres_strategy & SAVE_INLINE_GPRS)
28986 && (info->savres_strategy & REST_INLINE_GPRS)
28987 && (info->savres_strategy & SAVE_INLINE_FPRS)
28988 && (info->savres_strategy & REST_INLINE_FPRS)
28989 && (info->savres_strategy & SAVE_INLINE_VRS)
28990 && (info->savres_strategy & REST_INLINE_VRS))
28992 int offset = info->lr_save_offset;
28993 if (info->push_p)
28994 offset += info->total_size;
28995 if (IN_RANGE (offset, -0x8000, 0x7fff))
28996 bitmap_set_bit (components, 0);
28999 return components;
29002 /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
29003 static sbitmap
29004 rs6000_components_for_bb (basic_block bb)
29006 rs6000_stack_t *info = rs6000_stack_info ();
29008 bitmap in = DF_LIVE_IN (bb);
29009 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
29010 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
29012 sbitmap components = sbitmap_alloc (32);
29013 bitmap_clear (components);
29015 /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
29016 for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++)
29017 if (bitmap_bit_p (in, regno)
29018 || bitmap_bit_p (gen, regno)
29019 || bitmap_bit_p (kill, regno))
29020 bitmap_set_bit (components, regno);
29022 /* LR needs to be saved around a bb if it is killed in that bb. */
29023 if (bitmap_bit_p (in, LR_REGNO)
29024 || bitmap_bit_p (gen, LR_REGNO)
29025 || bitmap_bit_p (kill, LR_REGNO))
29026 bitmap_set_bit (components, 0);
29028 return components;
29031 /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */
29032 static void
29033 rs6000_disqualify_components (sbitmap components, edge e,
29034 sbitmap edge_components, bool /*is_prologue*/)
29036 /* Our LR pro/epilogue code moves LR via R0, so R0 had better not be
29037 live where we want to place that code. */
29038 if (bitmap_bit_p (edge_components, 0)
29039 && bitmap_bit_p (DF_LIVE_IN (e->dest), 0))
29041 if (dump_file)
29042 fprintf (dump_file, "Disqualifying LR because GPR0 is live "
29043 "on entry to bb %d\n", e->dest->index);
29044 bitmap_clear_bit (components, 0);
29048 /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
29049 static void
29050 rs6000_emit_prologue_components (sbitmap components)
29052 rs6000_stack_t *info = rs6000_stack_info ();
29053 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
29054 ? HARD_FRAME_POINTER_REGNUM
29055 : STACK_POINTER_REGNUM);
29056 int reg_size = TARGET_32BIT ? 4 : 8;
29058 /* Prologue for LR. */
29059 if (bitmap_bit_p (components, 0))
29061 rtx reg = gen_rtx_REG (Pmode, 0);
29062 rtx_insn *insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
29063 RTX_FRAME_RELATED_P (insn) = 1;
29064 add_reg_note (insn, REG_CFA_REGISTER, NULL);
29066 int offset = info->lr_save_offset;
29067 if (info->push_p)
29068 offset += info->total_size;
29070 insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
29071 RTX_FRAME_RELATED_P (insn) = 1;
29072 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
29073 rtx mem = copy_rtx (SET_DEST (single_set (insn)));
29074 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, lr));
29077 /* Prologue for the GPRs. */
29078 int offset = info->gp_save_offset;
29079 if (info->push_p)
29080 offset += info->total_size;
29082 for (int i = info->first_gp_reg_save; i < 32; i++)
29084 if (bitmap_bit_p (components, i))
29086 rtx reg = gen_rtx_REG (Pmode, i);
29087 rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
29088 RTX_FRAME_RELATED_P (insn) = 1;
29089 rtx set = copy_rtx (single_set (insn));
29090 add_reg_note (insn, REG_CFA_OFFSET, set);
29093 offset += reg_size;
29097 /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
29098 static void
29099 rs6000_emit_epilogue_components (sbitmap components)
29101 rs6000_stack_t *info = rs6000_stack_info ();
29102 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
29103 ? HARD_FRAME_POINTER_REGNUM
29104 : STACK_POINTER_REGNUM);
29105 int reg_size = TARGET_32BIT ? 4 : 8;
29107 /* Epilogue for the GPRs. */
29108 int offset = info->gp_save_offset;
29109 if (info->push_p)
29110 offset += info->total_size;
29112 for (int i = info->first_gp_reg_save; i < 32; i++)
29114 if (bitmap_bit_p (components, i))
29116 rtx reg = gen_rtx_REG (Pmode, i);
29117 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
29118 RTX_FRAME_RELATED_P (insn) = 1;
29119 add_reg_note (insn, REG_CFA_RESTORE, reg);
29122 offset += reg_size;
29125 /* Epilogue for LR. */
29126 if (bitmap_bit_p (components, 0))
29128 int offset = info->lr_save_offset;
29129 if (info->push_p)
29130 offset += info->total_size;
29132 rtx reg = gen_rtx_REG (Pmode, 0);
29133 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
29135 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
29136 insn = emit_move_insn (lr, reg);
29137 RTX_FRAME_RELATED_P (insn) = 1;
29138 add_reg_note (insn, REG_CFA_RESTORE, lr);
29142 /* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
29143 static void
29144 rs6000_set_handled_components (sbitmap components)
29146 rs6000_stack_t *info = rs6000_stack_info ();
29148 for (int i = info->first_gp_reg_save; i < 32; i++)
29149 if (bitmap_bit_p (components, i))
29150 cfun->machine->gpr_is_wrapped_separately[i] = true;
29152 if (bitmap_bit_p (components, 0))
29153 cfun->machine->lr_is_wrapped_separately = true;
29156 /* Emit function prologue as insns. */
29158 void
29159 rs6000_emit_prologue (void)
29161 rs6000_stack_t *info = rs6000_stack_info ();
29162 machine_mode reg_mode = Pmode;
29163 int reg_size = TARGET_32BIT ? 4 : 8;
29164 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
29165 rtx frame_reg_rtx = sp_reg_rtx;
29166 unsigned int cr_save_regno;
29167 rtx cr_save_rtx = NULL_RTX;
29168 rtx_insn *insn;
29169 int strategy;
29170 int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
29171 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
29172 && call_used_regs[STATIC_CHAIN_REGNUM]);
29173 int using_split_stack = (flag_split_stack
29174 && (lookup_attribute ("no_split_stack",
29175 DECL_ATTRIBUTES (cfun->decl))
29176 == NULL));
29178 /* Offset to top of frame for frame_reg and sp respectively. */
29179 HOST_WIDE_INT frame_off = 0;
29180 HOST_WIDE_INT sp_off = 0;
29181 /* sp_adjust is the stack adjusting instruction, tracked so that the
29182 insn setting up the split-stack arg pointer can be emitted just
29183 prior to it, when r12 is not used here for other purposes. */
29184 rtx_insn *sp_adjust = 0;
29186 #if CHECKING_P
29187 /* Track and check usage of r0, r11, r12. */
29188 int reg_inuse = using_static_chain_p ? 1 << 11 : 0;
29189 #define START_USE(R) do \
29191 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
29192 reg_inuse |= 1 << (R); \
29193 } while (0)
29194 #define END_USE(R) do \
29196 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
29197 reg_inuse &= ~(1 << (R)); \
29198 } while (0)
29199 #define NOT_INUSE(R) do \
29201 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
29202 } while (0)
29203 #else
29204 #define START_USE(R) do {} while (0)
29205 #define END_USE(R) do {} while (0)
29206 #define NOT_INUSE(R) do {} while (0)
29207 #endif
29209 if (DEFAULT_ABI == ABI_ELFv2
29210 && !TARGET_SINGLE_PIC_BASE)
29212 cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
29214 /* With -mminimal-toc we may generate an extra use of r2 below. */
29215 if (TARGET_TOC && TARGET_MINIMAL_TOC
29216 && !constant_pool_empty_p ())
29217 cfun->machine->r2_setup_needed = true;
29221 if (flag_stack_usage_info)
29222 current_function_static_stack_size = info->total_size;
29224 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
29226 HOST_WIDE_INT size = info->total_size;
29228 if (crtl->is_leaf && !cfun->calls_alloca)
29230 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
29231 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT,
29232 size - STACK_CHECK_PROTECT);
29234 else if (size > 0)
29235 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
29238 if (TARGET_FIX_AND_CONTINUE)
29240 /* gdb on darwin arranges to forward a function from the old
29241 address by modifying the first 5 instructions of the function
29242 to branch to the overriding function. This is necessary to
29243 permit function pointers that point to the old function to
29244 actually forward to the new function. */
29245 emit_insn (gen_nop ());
29246 emit_insn (gen_nop ());
29247 emit_insn (gen_nop ());
29248 emit_insn (gen_nop ());
29249 emit_insn (gen_nop ());
29252 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
29254 reg_mode = V2SImode;
29255 reg_size = 8;
29258 /* Handle world saves specially here. */
29259 if (WORLD_SAVE_P (info))
29261 int i, j, sz;
29262 rtx treg;
29263 rtvec p;
29264 rtx reg0;
29266 /* save_world expects lr in r0. */
29267 reg0 = gen_rtx_REG (Pmode, 0);
29268 if (info->lr_save_p)
29270 insn = emit_move_insn (reg0,
29271 gen_rtx_REG (Pmode, LR_REGNO));
29272 RTX_FRAME_RELATED_P (insn) = 1;
29275 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
29276 assumptions about the offsets of various bits of the stack
29277 frame. */
29278 gcc_assert (info->gp_save_offset == -220
29279 && info->fp_save_offset == -144
29280 && info->lr_save_offset == 8
29281 && info->cr_save_offset == 4
29282 && info->push_p
29283 && info->lr_save_p
29284 && (!crtl->calls_eh_return
29285 || info->ehrd_offset == -432)
29286 && info->vrsave_save_offset == -224
29287 && info->altivec_save_offset == -416);
29289 treg = gen_rtx_REG (SImode, 11);
29290 emit_move_insn (treg, GEN_INT (-info->total_size));
29292 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
29293 in R11. It also clobbers R12, so beware! */
29295 /* Preserve CR2 for save_world prologues */
29296 sz = 5;
29297 sz += 32 - info->first_gp_reg_save;
29298 sz += 64 - info->first_fp_reg_save;
29299 sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
29300 p = rtvec_alloc (sz);
29301 j = 0;
29302 RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
29303 gen_rtx_REG (SImode,
29304 LR_REGNO));
29305 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
29306 gen_rtx_SYMBOL_REF (Pmode,
29307 "*save_world"));
29308 /* We do floats first so that the instruction pattern matches
29309 properly. */
29310 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
29311 RTVEC_ELT (p, j++)
29312 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
29313 ? DFmode : SFmode,
29314 info->first_fp_reg_save + i),
29315 frame_reg_rtx,
29316 info->fp_save_offset + frame_off + 8 * i);
29317 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
29318 RTVEC_ELT (p, j++)
29319 = gen_frame_store (gen_rtx_REG (V4SImode,
29320 info->first_altivec_reg_save + i),
29321 frame_reg_rtx,
29322 info->altivec_save_offset + frame_off + 16 * i);
29323 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
29324 RTVEC_ELT (p, j++)
29325 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
29326 frame_reg_rtx,
29327 info->gp_save_offset + frame_off + reg_size * i);
29329 /* CR register traditionally saved as CR2. */
29330 RTVEC_ELT (p, j++)
29331 = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO),
29332 frame_reg_rtx, info->cr_save_offset + frame_off);
29333 /* Explain about use of R0. */
29334 if (info->lr_save_p)
29335 RTVEC_ELT (p, j++)
29336 = gen_frame_store (reg0,
29337 frame_reg_rtx, info->lr_save_offset + frame_off);
29338 /* Explain what happens to the stack pointer. */
29340 rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
29341 RTVEC_ELT (p, j++) = gen_rtx_SET (sp_reg_rtx, newval);
29344 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
29345 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
29346 treg, GEN_INT (-info->total_size));
29347 sp_off = frame_off = info->total_size;
29350 strategy = info->savres_strategy;
29352 /* For V.4, update stack before we do any saving and set back pointer. */
29353 if (! WORLD_SAVE_P (info)
29354 && info->push_p
29355 && (DEFAULT_ABI == ABI_V4
29356 || crtl->calls_eh_return))
29358 bool need_r11 = (TARGET_SPE
29359 ? (!(strategy & SAVE_INLINE_GPRS)
29360 && info->spe_64bit_regs_used == 0)
29361 : (!(strategy & SAVE_INLINE_FPRS)
29362 || !(strategy & SAVE_INLINE_GPRS)
29363 || !(strategy & SAVE_INLINE_VRS)));
29364 int ptr_regno = -1;
29365 rtx ptr_reg = NULL_RTX;
29366 int ptr_off = 0;
29368 if (info->total_size < 32767)
29369 frame_off = info->total_size;
29370 else if (need_r11)
29371 ptr_regno = 11;
29372 else if (info->cr_save_p
29373 || info->lr_save_p
29374 || info->first_fp_reg_save < 64
29375 || info->first_gp_reg_save < 32
29376 || info->altivec_size != 0
29377 || info->vrsave_size != 0
29378 || crtl->calls_eh_return)
29379 ptr_regno = 12;
29380 else
29382 /* The prologue won't be saving any regs so there is no need
29383 to set up a frame register to access any frame save area.
29384 We also won't be using frame_off anywhere below, but set
29385 the correct value anyway to protect against future
29386 changes to this function. */
29387 frame_off = info->total_size;
29389 if (ptr_regno != -1)
29391 /* Set up the frame offset to that needed by the first
29392 out-of-line save function. */
29393 START_USE (ptr_regno);
29394 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
29395 frame_reg_rtx = ptr_reg;
29396 if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0)
29397 gcc_checking_assert (info->fp_save_offset + info->fp_size == 0);
29398 else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32)
29399 ptr_off = info->gp_save_offset + info->gp_size;
29400 else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0)
29401 ptr_off = info->altivec_save_offset + info->altivec_size;
29402 frame_off = -ptr_off;
29404 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
29405 ptr_reg, ptr_off);
29406 if (REGNO (frame_reg_rtx) == 12)
29407 sp_adjust = 0;
29408 sp_off = info->total_size;
29409 if (frame_reg_rtx != sp_reg_rtx)
29410 rs6000_emit_stack_tie (frame_reg_rtx, false);
29413 /* If we use the link register, get it into r0. */
29414 if (!WORLD_SAVE_P (info) && info->lr_save_p
29415 && !cfun->machine->lr_is_wrapped_separately)
29417 rtx addr, reg, mem;
29419 reg = gen_rtx_REG (Pmode, 0);
29420 START_USE (0);
29421 insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
29422 RTX_FRAME_RELATED_P (insn) = 1;
29424 if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
29425 | SAVE_NOINLINE_FPRS_SAVES_LR)))
29427 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
29428 GEN_INT (info->lr_save_offset + frame_off));
29429 mem = gen_rtx_MEM (Pmode, addr);
29430 /* This should not be of rs6000_sr_alias_set, because of
29431 __builtin_return_address. */
29433 insn = emit_move_insn (mem, reg);
29434 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
29435 NULL_RTX, NULL_RTX);
29436 END_USE (0);
29440 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
29441 r12 will be needed by out-of-line gpr restore. */
29442 cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29443 && !(strategy & (SAVE_INLINE_GPRS
29444 | SAVE_NOINLINE_GPRS_SAVES_LR))
29445 ? 11 : 12);
29446 if (!WORLD_SAVE_P (info)
29447 && info->cr_save_p
29448 && REGNO (frame_reg_rtx) != cr_save_regno
29449 && !(using_static_chain_p && cr_save_regno == 11)
29450 && !(using_split_stack && cr_save_regno == 12 && sp_adjust))
29452 cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
29453 START_USE (cr_save_regno);
29454 rs6000_emit_move_from_cr (cr_save_rtx);
29457 /* Do any required saving of fpr's. If only one or two to save, do
29458 it ourselves. Otherwise, call function. */
29459 if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
29461 int i;
29462 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
29463 if (save_reg_p (info->first_fp_reg_save + i))
29464 emit_frame_save (frame_reg_rtx,
29465 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
29466 ? DFmode : SFmode),
29467 info->first_fp_reg_save + i,
29468 info->fp_save_offset + frame_off + 8 * i,
29469 sp_off - frame_off);
29471 else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
29473 bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
29474 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
29475 unsigned ptr_regno = ptr_regno_for_savres (sel);
29476 rtx ptr_reg = frame_reg_rtx;
29478 if (REGNO (frame_reg_rtx) == ptr_regno)
29479 gcc_checking_assert (frame_off == 0);
29480 else
29482 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
29483 NOT_INUSE (ptr_regno);
29484 emit_insn (gen_add3_insn (ptr_reg,
29485 frame_reg_rtx, GEN_INT (frame_off)));
29487 insn = rs6000_emit_savres_rtx (info, ptr_reg,
29488 info->fp_save_offset,
29489 info->lr_save_offset,
29490 DFmode, sel);
29491 rs6000_frame_related (insn, ptr_reg, sp_off,
29492 NULL_RTX, NULL_RTX);
29493 if (lr)
29494 END_USE (0);
29497 /* Save GPRs. This is done as a PARALLEL if we are using
29498 the store-multiple instructions. */
29499 if (!WORLD_SAVE_P (info)
29500 && TARGET_SPE_ABI
29501 && info->spe_64bit_regs_used != 0
29502 && info->first_gp_reg_save != 32)
29504 int i;
29505 rtx spe_save_area_ptr;
29506 HOST_WIDE_INT save_off;
29507 int ool_adjust = 0;
29509 /* Determine whether we can address all of the registers that need
29510 to be saved with an offset from frame_reg_rtx that fits in
29511 the small const field for SPE memory instructions. */
29512 int spe_regs_addressable
29513 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
29514 + reg_size * (32 - info->first_gp_reg_save - 1))
29515 && (strategy & SAVE_INLINE_GPRS));
29517 if (spe_regs_addressable)
29519 spe_save_area_ptr = frame_reg_rtx;
29520 save_off = frame_off;
29522 else
29524 /* Make r11 point to the start of the SPE save area. We need
29525 to be careful here if r11 is holding the static chain. If
29526 it is, then temporarily save it in r0. */
29527 HOST_WIDE_INT offset;
29529 if (!(strategy & SAVE_INLINE_GPRS))
29530 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
29531 offset = info->spe_gp_save_offset + frame_off - ool_adjust;
29532 spe_save_area_ptr = gen_rtx_REG (Pmode, 11);
29533 save_off = frame_off - offset;
29535 if (using_static_chain_p)
29537 rtx r0 = gen_rtx_REG (Pmode, 0);
29539 START_USE (0);
29540 gcc_assert (info->first_gp_reg_save > 11);
29542 emit_move_insn (r0, spe_save_area_ptr);
29544 else if (REGNO (frame_reg_rtx) != 11)
29545 START_USE (11);
29547 emit_insn (gen_addsi3 (spe_save_area_ptr,
29548 frame_reg_rtx, GEN_INT (offset)));
29549 if (!using_static_chain_p && REGNO (frame_reg_rtx) == 11)
29550 frame_off = -info->spe_gp_save_offset + ool_adjust;
29553 if ((strategy & SAVE_INLINE_GPRS))
29555 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
29556 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
29557 emit_frame_save (spe_save_area_ptr, reg_mode,
29558 info->first_gp_reg_save + i,
29559 (info->spe_gp_save_offset + save_off
29560 + reg_size * i),
29561 sp_off - save_off);
29563 else
29565 insn = rs6000_emit_savres_rtx (info, spe_save_area_ptr,
29566 info->spe_gp_save_offset + save_off,
29567 0, reg_mode,
29568 SAVRES_SAVE | SAVRES_GPR);
29570 rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off,
29571 NULL_RTX, NULL_RTX);
29574 /* Move the static chain pointer back. */
29575 if (!spe_regs_addressable)
29577 if (using_static_chain_p)
29579 emit_move_insn (spe_save_area_ptr, gen_rtx_REG (Pmode, 0));
29580 END_USE (0);
29582 else if (REGNO (frame_reg_rtx) != 11)
29583 END_USE (11);
29586 else if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS))
29588 bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0;
29589 int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0);
29590 unsigned ptr_regno = ptr_regno_for_savres (sel);
29591 rtx ptr_reg = frame_reg_rtx;
29592 bool ptr_set_up = REGNO (ptr_reg) == ptr_regno;
29593 int end_save = info->gp_save_offset + info->gp_size;
29594 int ptr_off;
29596 if (ptr_regno == 12)
29597 sp_adjust = 0;
29598 if (!ptr_set_up)
29599 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
29601 /* Need to adjust r11 (r12) if we saved any FPRs. */
29602 if (end_save + frame_off != 0)
29604 rtx offset = GEN_INT (end_save + frame_off);
29606 if (ptr_set_up)
29607 frame_off = -end_save;
29608 else
29609 NOT_INUSE (ptr_regno);
29610 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
29612 else if (!ptr_set_up)
29614 NOT_INUSE (ptr_regno);
29615 emit_move_insn (ptr_reg, frame_reg_rtx);
29617 ptr_off = -end_save;
29618 insn = rs6000_emit_savres_rtx (info, ptr_reg,
29619 info->gp_save_offset + ptr_off,
29620 info->lr_save_offset + ptr_off,
29621 reg_mode, sel);
29622 rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
29623 NULL_RTX, NULL_RTX);
29624 if (lr)
29625 END_USE (0);
29627 else if (!WORLD_SAVE_P (info) && (strategy & SAVE_MULTIPLE))
29629 rtvec p;
29630 int i;
29631 p = rtvec_alloc (32 - info->first_gp_reg_save);
29632 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
29633 RTVEC_ELT (p, i)
29634 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
29635 frame_reg_rtx,
29636 info->gp_save_offset + frame_off + reg_size * i);
29637 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
29638 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
29639 NULL_RTX, NULL_RTX);
29641 else if (!WORLD_SAVE_P (info))
29643 int offset = info->gp_save_offset + frame_off;
29644 for (int i = info->first_gp_reg_save; i < 32; i++)
29646 if (rs6000_reg_live_or_pic_offset_p (i)
29647 && !cfun->machine->gpr_is_wrapped_separately[i])
29648 emit_frame_save (frame_reg_rtx, reg_mode, i, offset,
29649 sp_off - frame_off);
29651 offset += reg_size;
29655 if (crtl->calls_eh_return)
29657 unsigned int i;
29658 rtvec p;
29660 for (i = 0; ; ++i)
29662 unsigned int regno = EH_RETURN_DATA_REGNO (i);
29663 if (regno == INVALID_REGNUM)
29664 break;
29667 p = rtvec_alloc (i);
29669 for (i = 0; ; ++i)
29671 unsigned int regno = EH_RETURN_DATA_REGNO (i);
29672 if (regno == INVALID_REGNUM)
29673 break;
29675 rtx set
29676 = gen_frame_store (gen_rtx_REG (reg_mode, regno),
29677 sp_reg_rtx,
29678 info->ehrd_offset + sp_off + reg_size * (int) i);
29679 RTVEC_ELT (p, i) = set;
29680 RTX_FRAME_RELATED_P (set) = 1;
29683 insn = emit_insn (gen_blockage ());
29684 RTX_FRAME_RELATED_P (insn) = 1;
29685 add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p));
29688 /* In AIX ABI we need to make sure r2 is really saved. */
29689 if (TARGET_AIX && crtl->calls_eh_return)
29691 rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
29692 rtx join_insn, note;
29693 rtx_insn *save_insn;
29694 long toc_restore_insn;
29696 tmp_reg = gen_rtx_REG (Pmode, 11);
29697 tmp_reg_si = gen_rtx_REG (SImode, 11);
29698 if (using_static_chain_p)
29700 START_USE (0);
29701 emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
29703 else
29704 START_USE (11);
29705 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
29706 /* Peek at instruction to which this function returns. If it's
29707 restoring r2, then we know we've already saved r2. We can't
29708 unconditionally save r2 because the value we have will already
29709 be updated if we arrived at this function via a plt call or
29710 toc adjusting stub. */
29711 emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
29712 toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
29713 + RS6000_TOC_SAVE_SLOT);
29714 hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
29715 emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
29716 compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
29717 validate_condition_mode (EQ, CCUNSmode);
29718 lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
29719 emit_insn (gen_rtx_SET (compare_result,
29720 gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
29721 toc_save_done = gen_label_rtx ();
29722 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
29723 gen_rtx_EQ (VOIDmode, compare_result,
29724 const0_rtx),
29725 gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
29726 pc_rtx);
29727 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
29728 JUMP_LABEL (jump) = toc_save_done;
29729 LABEL_NUSES (toc_save_done) += 1;
29731 save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
29732 TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
29733 sp_off - frame_off);
29735 emit_label (toc_save_done);
29737 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
29738 have a CFG that has different saves along different paths.
29739 Move the note to a dummy blockage insn, which describes that
29740 R2 is unconditionally saved after the label. */
29741 /* ??? An alternate representation might be a special insn pattern
29742 containing both the branch and the store. That might let the
29743 code that minimizes the number of DW_CFA_advance opcodes better
29744 freedom in placing the annotations. */
29745 note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL);
29746 if (note)
29747 remove_note (save_insn, note);
29748 else
29749 note = alloc_reg_note (REG_FRAME_RELATED_EXPR,
29750 copy_rtx (PATTERN (save_insn)), NULL_RTX);
29751 RTX_FRAME_RELATED_P (save_insn) = 0;
29753 join_insn = emit_insn (gen_blockage ());
29754 REG_NOTES (join_insn) = note;
29755 RTX_FRAME_RELATED_P (join_insn) = 1;
29757 if (using_static_chain_p)
29759 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
29760 END_USE (0);
29762 else
29763 END_USE (11);
29766 /* Save CR if we use any that must be preserved. */
29767 if (!WORLD_SAVE_P (info) && info->cr_save_p)
29769 rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
29770 GEN_INT (info->cr_save_offset + frame_off));
29771 rtx mem = gen_frame_mem (SImode, addr);
29773 /* If we didn't copy cr before, do so now using r0. */
29774 if (cr_save_rtx == NULL_RTX)
29776 START_USE (0);
29777 cr_save_rtx = gen_rtx_REG (SImode, 0);
29778 rs6000_emit_move_from_cr (cr_save_rtx);
29781 /* Saving CR requires a two-instruction sequence: one instruction
29782 to move the CR to a general-purpose register, and a second
29783 instruction that stores the GPR to memory.
29785 We do not emit any DWARF CFI records for the first of these,
29786 because we cannot properly represent the fact that CR is saved in
29787 a register. One reason is that we cannot express that multiple
29788 CR fields are saved; another reason is that on 64-bit, the size
29789 of the CR register in DWARF (4 bytes) differs from the size of
29790 a general-purpose register.
29792 This means if any intervening instruction were to clobber one of
29793 the call-saved CR fields, we'd have incorrect CFI. To prevent
29794 this from happening, we mark the store to memory as a use of
29795 those CR fields, which prevents any such instruction from being
29796 scheduled in between the two instructions. */
29797 rtx crsave_v[9];
29798 int n_crsave = 0;
29799 int i;
29801 crsave_v[n_crsave++] = gen_rtx_SET (mem, cr_save_rtx);
29802 for (i = 0; i < 8; i++)
29803 if (save_reg_p (CR0_REGNO + i))
29804 crsave_v[n_crsave++]
29805 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
29807 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
29808 gen_rtvec_v (n_crsave, crsave_v)));
29809 END_USE (REGNO (cr_save_rtx));
29811 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
29812 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
29813 so we need to construct a frame expression manually. */
29814 RTX_FRAME_RELATED_P (insn) = 1;
29816 /* Update address to be stack-pointer relative, like
29817 rs6000_frame_related would do. */
29818 addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
29819 GEN_INT (info->cr_save_offset + sp_off));
29820 mem = gen_frame_mem (SImode, addr);
29822 if (DEFAULT_ABI == ABI_ELFv2)
29824 /* In the ELFv2 ABI we generate separate CFI records for each
29825 CR field that was actually saved. They all point to the
29826 same 32-bit stack slot. */
29827 rtx crframe[8];
29828 int n_crframe = 0;
29830 for (i = 0; i < 8; i++)
29831 if (save_reg_p (CR0_REGNO + i))
29833 crframe[n_crframe]
29834 = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR0_REGNO + i));
29836 RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
29837 n_crframe++;
29840 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
29841 gen_rtx_PARALLEL (VOIDmode,
29842 gen_rtvec_v (n_crframe, crframe)));
29844 else
29846 /* In other ABIs, by convention, we use a single CR regnum to
29847 represent the fact that all call-saved CR fields are saved.
29848 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
29849 rtx set = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR2_REGNO));
29850 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
29854 /* In the ELFv2 ABI we need to save all call-saved CR fields into
29855 *separate* slots if the routine calls __builtin_eh_return, so
29856 that they can be independently restored by the unwinder. */
29857 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
29859 int i, cr_off = info->ehcr_offset;
29860 rtx crsave;
29862 /* ??? We might get better performance by using multiple mfocrf
29863 instructions. */
29864 crsave = gen_rtx_REG (SImode, 0);
29865 emit_insn (gen_movesi_from_cr (crsave));
29867 for (i = 0; i < 8; i++)
29868 if (!call_used_regs[CR0_REGNO + i])
29870 rtvec p = rtvec_alloc (2);
29871 RTVEC_ELT (p, 0)
29872 = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
29873 RTVEC_ELT (p, 1)
29874 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
29876 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
29878 RTX_FRAME_RELATED_P (insn) = 1;
29879 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
29880 gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
29881 sp_reg_rtx, cr_off + sp_off));
29883 cr_off += reg_size;
29887 /* Update stack and set back pointer unless this is V.4,
29888 for which it was done previously. */
29889 if (!WORLD_SAVE_P (info) && info->push_p
29890 && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
29892 rtx ptr_reg = NULL;
29893 int ptr_off = 0;
29895 /* If saving altivec regs we need to be able to address all save
29896 locations using a 16-bit offset. */
29897 if ((strategy & SAVE_INLINE_VRS) == 0
29898 || (info->altivec_size != 0
29899 && (info->altivec_save_offset + info->altivec_size - 16
29900 + info->total_size - frame_off) > 32767)
29901 || (info->vrsave_size != 0
29902 && (info->vrsave_save_offset
29903 + info->total_size - frame_off) > 32767))
29905 int sel = SAVRES_SAVE | SAVRES_VR;
29906 unsigned ptr_regno = ptr_regno_for_savres (sel);
29908 if (using_static_chain_p
29909 && ptr_regno == STATIC_CHAIN_REGNUM)
29910 ptr_regno = 12;
29911 if (REGNO (frame_reg_rtx) != ptr_regno)
29912 START_USE (ptr_regno);
29913 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
29914 frame_reg_rtx = ptr_reg;
29915 ptr_off = info->altivec_save_offset + info->altivec_size;
29916 frame_off = -ptr_off;
29918 else if (REGNO (frame_reg_rtx) == 1)
29919 frame_off = info->total_size;
29920 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
29921 ptr_reg, ptr_off);
29922 if (REGNO (frame_reg_rtx) == 12)
29923 sp_adjust = 0;
29924 sp_off = info->total_size;
29925 if (frame_reg_rtx != sp_reg_rtx)
29926 rs6000_emit_stack_tie (frame_reg_rtx, false);
29929 /* Set frame pointer, if needed. */
29930 if (frame_pointer_needed)
29932 insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
29933 sp_reg_rtx);
29934 RTX_FRAME_RELATED_P (insn) = 1;
29937 /* Save AltiVec registers if needed. Save here because the red zone does
29938 not always include AltiVec registers. */
29939 if (!WORLD_SAVE_P (info)
29940 && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0)
29942 int end_save = info->altivec_save_offset + info->altivec_size;
29943 int ptr_off;
29944 /* Oddly, the vector save/restore functions point r0 at the end
29945 of the save area, then use r11 or r12 to load offsets for
29946 [reg+reg] addressing. */
29947 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
29948 int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR);
29949 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
29951 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
29952 NOT_INUSE (0);
29953 if (scratch_regno == 12)
29954 sp_adjust = 0;
29955 if (end_save + frame_off != 0)
29957 rtx offset = GEN_INT (end_save + frame_off);
29959 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
29961 else
29962 emit_move_insn (ptr_reg, frame_reg_rtx);
29964 ptr_off = -end_save;
29965 insn = rs6000_emit_savres_rtx (info, scratch_reg,
29966 info->altivec_save_offset + ptr_off,
29967 0, V4SImode, SAVRES_SAVE | SAVRES_VR);
29968 rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
29969 NULL_RTX, NULL_RTX);
29970 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
29972 /* The oddity mentioned above clobbered our frame reg. */
29973 emit_move_insn (frame_reg_rtx, ptr_reg);
29974 frame_off = ptr_off;
29977 else if (!WORLD_SAVE_P (info)
29978 && info->altivec_size != 0)
29980 int i;
29982 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
29983 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
29985 rtx areg, savereg, mem;
29986 HOST_WIDE_INT offset;
29988 offset = (info->altivec_save_offset + frame_off
29989 + 16 * (i - info->first_altivec_reg_save));
29991 savereg = gen_rtx_REG (V4SImode, i);
29993 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
29995 mem = gen_frame_mem (V4SImode,
29996 gen_rtx_PLUS (Pmode, frame_reg_rtx,
29997 GEN_INT (offset)));
29998 insn = emit_insn (gen_rtx_SET (mem, savereg));
29999 areg = NULL_RTX;
30001 else
30003 NOT_INUSE (0);
30004 areg = gen_rtx_REG (Pmode, 0);
30005 emit_move_insn (areg, GEN_INT (offset));
30007 /* AltiVec addressing mode is [reg+reg]. */
30008 mem = gen_frame_mem (V4SImode,
30009 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
30011 /* Rather than emitting a generic move, force use of the stvx
30012 instruction, which we always want on ISA 2.07 (power8) systems.
30013 In particular we don't want xxpermdi/stxvd2x for little
30014 endian. */
30015 insn = emit_insn (gen_altivec_stvx_v4si_internal (mem, savereg));
30018 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
30019 areg, GEN_INT (offset));
30023 /* VRSAVE is a bit vector representing which AltiVec registers
30024 are used. The OS uses this to determine which vector
30025 registers to save on a context switch. We need to save
30026 VRSAVE on the stack frame, add whatever AltiVec registers we
30027 used in this function, and do the corresponding magic in the
30028 epilogue. */
30030 if (!WORLD_SAVE_P (info)
30031 && info->vrsave_size != 0)
30033 rtx reg, vrsave;
30034 int offset;
30035 int save_regno;
30037 /* Get VRSAVE onto a GPR. Note that ABI_V4 and ABI_DARWIN might
30038 be using r12 as frame_reg_rtx and r11 as the static chain
30039 pointer for nested functions. */
30040 save_regno = 12;
30041 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
30042 && !using_static_chain_p)
30043 save_regno = 11;
30044 else if (using_split_stack || REGNO (frame_reg_rtx) == 12)
30046 save_regno = 11;
30047 if (using_static_chain_p)
30048 save_regno = 0;
30051 NOT_INUSE (save_regno);
30052 reg = gen_rtx_REG (SImode, save_regno);
30053 vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
30054 if (TARGET_MACHO)
30055 emit_insn (gen_get_vrsave_internal (reg));
30056 else
30057 emit_insn (gen_rtx_SET (reg, vrsave));
30059 /* Save VRSAVE. */
30060 offset = info->vrsave_save_offset + frame_off;
30061 insn = emit_insn (gen_frame_store (reg, frame_reg_rtx, offset));
30063 /* Include the registers in the mask. */
30064 emit_insn (gen_iorsi3 (reg, reg, GEN_INT ((int) info->vrsave_mask)));
30066 insn = emit_insn (generate_set_vrsave (reg, info, 0));
30069 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
30070 if (!TARGET_SINGLE_PIC_BASE
30071 && ((TARGET_TOC && TARGET_MINIMAL_TOC
30072 && !constant_pool_empty_p ())
30073 || (DEFAULT_ABI == ABI_V4
30074 && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
30075 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
30077 /* If emit_load_toc_table will use the link register, we need to save
30078 it. We use R12 for this purpose because emit_load_toc_table
30079 can use register 0. This allows us to use a plain 'blr' to return
30080 from the procedure more often. */
30081 int save_LR_around_toc_setup = (TARGET_ELF
30082 && DEFAULT_ABI == ABI_V4
30083 && flag_pic
30084 && ! info->lr_save_p
30085 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0);
30086 if (save_LR_around_toc_setup)
30088 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
30089 rtx tmp = gen_rtx_REG (Pmode, 12);
30091 sp_adjust = 0;
30092 insn = emit_move_insn (tmp, lr);
30093 RTX_FRAME_RELATED_P (insn) = 1;
30095 rs6000_emit_load_toc_table (TRUE);
30097 insn = emit_move_insn (lr, tmp);
30098 add_reg_note (insn, REG_CFA_RESTORE, lr);
30099 RTX_FRAME_RELATED_P (insn) = 1;
30101 else
30102 rs6000_emit_load_toc_table (TRUE);
30105 #if TARGET_MACHO
30106 if (!TARGET_SINGLE_PIC_BASE
30107 && DEFAULT_ABI == ABI_DARWIN
30108 && flag_pic && crtl->uses_pic_offset_table)
30110 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
30111 rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
30113 /* Save and restore LR locally around this call (in R0). */
30114 if (!info->lr_save_p)
30115 emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
30117 emit_insn (gen_load_macho_picbase (src));
30119 emit_move_insn (gen_rtx_REG (Pmode,
30120 RS6000_PIC_OFFSET_TABLE_REGNUM),
30121 lr);
30123 if (!info->lr_save_p)
30124 emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
30126 #endif
30128 /* If we need to, save the TOC register after doing the stack setup.
30129 Do not emit eh frame info for this save. The unwinder wants info,
30130 conceptually attached to instructions in this function, about
30131 register values in the caller of this function. This R2 may have
30132 already been changed from the value in the caller.
30133 We don't attempt to write accurate DWARF EH frame info for R2
30134 because code emitted by gcc for a (non-pointer) function call
30135 doesn't save and restore R2. Instead, R2 is managed out-of-line
30136 by a linker generated plt call stub when the function resides in
30137 a shared library. This behavior is costly to describe in DWARF,
30138 both in terms of the size of DWARF info and the time taken in the
30139 unwinder to interpret it. R2 changes, apart from the
30140 calls_eh_return case earlier in this function, are handled by
30141 linux-unwind.h frob_update_context. */
30142 if (rs6000_save_toc_in_prologue_p ())
30144 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
30145 emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
30148 if (using_split_stack && split_stack_arg_pointer_used_p ())
30150 /* Set up the arg pointer (r12) for -fsplit-stack code. If
30151 __morestack was called, it left the arg pointer to the old
30152 stack in r29. Otherwise, the arg pointer is the top of the
30153 current frame. */
30154 cfun->machine->split_stack_argp_used = true;
30155 if (sp_adjust)
30157 rtx r12 = gen_rtx_REG (Pmode, 12);
30158 rtx set_r12 = gen_rtx_SET (r12, sp_reg_rtx);
30159 emit_insn_before (set_r12, sp_adjust);
30161 else if (frame_off != 0 || REGNO (frame_reg_rtx) != 12)
30163 rtx r12 = gen_rtx_REG (Pmode, 12);
30164 if (frame_off == 0)
30165 emit_move_insn (r12, frame_reg_rtx);
30166 else
30167 emit_insn (gen_add3_insn (r12, frame_reg_rtx, GEN_INT (frame_off)));
30169 if (info->push_p)
30171 rtx r12 = gen_rtx_REG (Pmode, 12);
30172 rtx r29 = gen_rtx_REG (Pmode, 29);
30173 rtx cr7 = gen_rtx_REG (CCUNSmode, CR7_REGNO);
30174 rtx not_more = gen_label_rtx ();
30175 rtx jump;
30177 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
30178 gen_rtx_GEU (VOIDmode, cr7, const0_rtx),
30179 gen_rtx_LABEL_REF (VOIDmode, not_more),
30180 pc_rtx);
30181 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
30182 JUMP_LABEL (jump) = not_more;
30183 LABEL_NUSES (not_more) += 1;
30184 emit_move_insn (r12, r29);
30185 emit_label (not_more);
30190 /* Output .extern statements for the save/restore routines we use. */
30192 static void
30193 rs6000_output_savres_externs (FILE *file)
30195 rs6000_stack_t *info = rs6000_stack_info ();
30197 if (TARGET_DEBUG_STACK)
30198 debug_stack_info (info);
30200 /* Write .extern for any function we will call to save and restore
30201 fp values. */
30202 if (info->first_fp_reg_save < 64
30203 && !TARGET_MACHO
30204 && !TARGET_ELF)
30206 char *name;
30207 int regno = info->first_fp_reg_save - 32;
30209 if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
30211 bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
30212 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
30213 name = rs6000_savres_routine_name (info, regno, sel);
30214 fprintf (file, "\t.extern %s\n", name);
30216 if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
30218 bool lr = (info->savres_strategy
30219 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
30220 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
30221 name = rs6000_savres_routine_name (info, regno, sel);
30222 fprintf (file, "\t.extern %s\n", name);
30227 /* Write function prologue. */
30229 static void
30230 rs6000_output_function_prologue (FILE *file,
30231 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
30233 if (!cfun->is_thunk)
30234 rs6000_output_savres_externs (file);
30236 /* ELFv2 ABI r2 setup code and local entry point. This must follow
30237 immediately after the global entry point label. */
30238 if (rs6000_global_entry_point_needed_p ())
30240 const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
30242 (*targetm.asm_out.internal_label) (file, "LCF", rs6000_pic_labelno);
30244 if (TARGET_CMODEL != CMODEL_LARGE)
30246 /* In the small and medium code models, we assume the TOC is less
30247 2 GB away from the text section, so it can be computed via the
30248 following two-instruction sequence. */
30249 char buf[256];
30251 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
30252 fprintf (file, "0:\taddis 2,12,.TOC.-");
30253 assemble_name (file, buf);
30254 fprintf (file, "@ha\n");
30255 fprintf (file, "\taddi 2,2,.TOC.-");
30256 assemble_name (file, buf);
30257 fprintf (file, "@l\n");
30259 else
30261 /* In the large code model, we allow arbitrary offsets between the
30262 TOC and the text section, so we have to load the offset from
30263 memory. The data field is emitted directly before the global
30264 entry point in rs6000_elf_declare_function_name. */
30265 char buf[256];
30267 #ifdef HAVE_AS_ENTRY_MARKERS
30268 /* If supported by the linker, emit a marker relocation. If the
30269 total code size of the final executable or shared library
30270 happens to fit into 2 GB after all, the linker will replace
30271 this code sequence with the sequence for the small or medium
30272 code model. */
30273 fprintf (file, "\t.reloc .,R_PPC64_ENTRY\n");
30274 #endif
30275 fprintf (file, "\tld 2,");
30276 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
30277 assemble_name (file, buf);
30278 fprintf (file, "-");
30279 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
30280 assemble_name (file, buf);
30281 fprintf (file, "(12)\n");
30282 fprintf (file, "\tadd 2,2,12\n");
30285 fputs ("\t.localentry\t", file);
30286 assemble_name (file, name);
30287 fputs (",.-", file);
30288 assemble_name (file, name);
30289 fputs ("\n", file);
30292 /* Output -mprofile-kernel code. This needs to be done here instead of
30293 in output_function_profile since it must go after the ELFv2 ABI
30294 local entry point. */
30295 if (TARGET_PROFILE_KERNEL && crtl->profile)
30297 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
30298 gcc_assert (!TARGET_32BIT);
30300 asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
30302 /* In the ELFv2 ABI we have no compiler stack word. It must be
30303 the resposibility of _mcount to preserve the static chain
30304 register if required. */
30305 if (DEFAULT_ABI != ABI_ELFv2
30306 && cfun->static_chain_decl != NULL)
30308 asm_fprintf (file, "\tstd %s,24(%s)\n",
30309 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
30310 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
30311 asm_fprintf (file, "\tld %s,24(%s)\n",
30312 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
30314 else
30315 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
30318 rs6000_pic_labelno++;
30321 /* -mprofile-kernel code calls mcount before the function prolog,
30322 so a profiled leaf function should stay a leaf function. */
30323 static bool
30324 rs6000_keep_leaf_when_profiled ()
30326 return TARGET_PROFILE_KERNEL;
30329 /* Non-zero if vmx regs are restored before the frame pop, zero if
30330 we restore after the pop when possible. */
30331 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
30333 /* Restoring cr is a two step process: loading a reg from the frame
30334 save, then moving the reg to cr. For ABI_V4 we must let the
30335 unwinder know that the stack location is no longer valid at or
30336 before the stack deallocation, but we can't emit a cfa_restore for
30337 cr at the stack deallocation like we do for other registers.
30338 The trouble is that it is possible for the move to cr to be
30339 scheduled after the stack deallocation. So say exactly where cr
30340 is located on each of the two insns. */
30342 static rtx
30343 load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
30345 rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
30346 rtx reg = gen_rtx_REG (SImode, regno);
30347 rtx_insn *insn = emit_move_insn (reg, mem);
30349 if (!exit_func && DEFAULT_ABI == ABI_V4)
30351 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
30352 rtx set = gen_rtx_SET (reg, cr);
30354 add_reg_note (insn, REG_CFA_REGISTER, set);
30355 RTX_FRAME_RELATED_P (insn) = 1;
30357 return reg;
30360 /* Reload CR from REG. */
30362 static void
30363 restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
30365 int count = 0;
30366 int i;
30368 if (using_mfcr_multiple)
30370 for (i = 0; i < 8; i++)
30371 if (save_reg_p (CR0_REGNO + i))
30372 count++;
30373 gcc_assert (count);
30376 if (using_mfcr_multiple && count > 1)
30378 rtx_insn *insn;
30379 rtvec p;
30380 int ndx;
30382 p = rtvec_alloc (count);
30384 ndx = 0;
30385 for (i = 0; i < 8; i++)
30386 if (save_reg_p (CR0_REGNO + i))
30388 rtvec r = rtvec_alloc (2);
30389 RTVEC_ELT (r, 0) = reg;
30390 RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
30391 RTVEC_ELT (p, ndx) =
30392 gen_rtx_SET (gen_rtx_REG (CCmode, CR0_REGNO + i),
30393 gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
30394 ndx++;
30396 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
30397 gcc_assert (ndx == count);
30399 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
30400 CR field separately. */
30401 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
30403 for (i = 0; i < 8; i++)
30404 if (save_reg_p (CR0_REGNO + i))
30405 add_reg_note (insn, REG_CFA_RESTORE,
30406 gen_rtx_REG (SImode, CR0_REGNO + i));
30408 RTX_FRAME_RELATED_P (insn) = 1;
30411 else
30412 for (i = 0; i < 8; i++)
30413 if (save_reg_p (CR0_REGNO + i))
30415 rtx insn = emit_insn (gen_movsi_to_cr_one
30416 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
30418 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
30419 CR field separately, attached to the insn that in fact
30420 restores this particular CR field. */
30421 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
30423 add_reg_note (insn, REG_CFA_RESTORE,
30424 gen_rtx_REG (SImode, CR0_REGNO + i));
30426 RTX_FRAME_RELATED_P (insn) = 1;
30430 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
30431 if (!exit_func && DEFAULT_ABI != ABI_ELFv2
30432 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
30434 rtx_insn *insn = get_last_insn ();
30435 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
30437 add_reg_note (insn, REG_CFA_RESTORE, cr);
30438 RTX_FRAME_RELATED_P (insn) = 1;
30442 /* Like cr, the move to lr instruction can be scheduled after the
30443 stack deallocation, but unlike cr, its stack frame save is still
30444 valid. So we only need to emit the cfa_restore on the correct
30445 instruction. */
30447 static void
30448 load_lr_save (int regno, rtx frame_reg_rtx, int offset)
30450 rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
30451 rtx reg = gen_rtx_REG (Pmode, regno);
30453 emit_move_insn (reg, mem);
30456 static void
30457 restore_saved_lr (int regno, bool exit_func)
30459 rtx reg = gen_rtx_REG (Pmode, regno);
30460 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
30461 rtx_insn *insn = emit_move_insn (lr, reg);
30463 if (!exit_func && flag_shrink_wrap)
30465 add_reg_note (insn, REG_CFA_RESTORE, lr);
30466 RTX_FRAME_RELATED_P (insn) = 1;
30470 static rtx
30471 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
30473 if (DEFAULT_ABI == ABI_ELFv2)
30475 int i;
30476 for (i = 0; i < 8; i++)
30477 if (save_reg_p (CR0_REGNO + i))
30479 rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
30480 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
30481 cfa_restores);
30484 else if (info->cr_save_p)
30485 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
30486 gen_rtx_REG (SImode, CR2_REGNO),
30487 cfa_restores);
30489 if (info->lr_save_p)
30490 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
30491 gen_rtx_REG (Pmode, LR_REGNO),
30492 cfa_restores);
30493 return cfa_restores;
30496 /* Return true if OFFSET from stack pointer can be clobbered by signals.
30497 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
30498 below stack pointer not cloberred by signals. */
30500 static inline bool
30501 offset_below_red_zone_p (HOST_WIDE_INT offset)
30503 return offset < (DEFAULT_ABI == ABI_V4
30505 : TARGET_32BIT ? -220 : -288);
30508 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
30510 static void
30511 emit_cfa_restores (rtx cfa_restores)
30513 rtx_insn *insn = get_last_insn ();
30514 rtx *loc = &REG_NOTES (insn);
30516 while (*loc)
30517 loc = &XEXP (*loc, 1);
30518 *loc = cfa_restores;
30519 RTX_FRAME_RELATED_P (insn) = 1;
30522 /* Emit function epilogue as insns. */
30524 void
30525 rs6000_emit_epilogue (int sibcall)
30527 rs6000_stack_t *info;
30528 int restoring_GPRs_inline;
30529 int restoring_FPRs_inline;
30530 int using_load_multiple;
30531 int using_mtcr_multiple;
30532 int use_backchain_to_restore_sp;
30533 int restore_lr;
30534 int strategy;
30535 HOST_WIDE_INT frame_off = 0;
30536 rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
30537 rtx frame_reg_rtx = sp_reg_rtx;
30538 rtx cfa_restores = NULL_RTX;
30539 rtx insn;
30540 rtx cr_save_reg = NULL_RTX;
30541 machine_mode reg_mode = Pmode;
30542 int reg_size = TARGET_32BIT ? 4 : 8;
30543 int i;
30544 bool exit_func;
30545 unsigned ptr_regno;
30547 info = rs6000_stack_info ();
30549 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
30551 reg_mode = V2SImode;
30552 reg_size = 8;
30555 strategy = info->savres_strategy;
30556 using_load_multiple = strategy & REST_MULTIPLE;
30557 restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
30558 restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
30559 using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601
30560 || rs6000_cpu == PROCESSOR_PPC603
30561 || rs6000_cpu == PROCESSOR_PPC750
30562 || optimize_size);
30563 /* Restore via the backchain when we have a large frame, since this
30564 is more efficient than an addis, addi pair. The second condition
30565 here will not trigger at the moment; We don't actually need a
30566 frame pointer for alloca, but the generic parts of the compiler
30567 give us one anyway. */
30568 use_backchain_to_restore_sp = (info->total_size + (info->lr_save_p
30569 ? info->lr_save_offset
30570 : 0) > 32767
30571 || (cfun->calls_alloca
30572 && !frame_pointer_needed));
30573 restore_lr = (info->lr_save_p
30574 && (restoring_FPRs_inline
30575 || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
30576 && (restoring_GPRs_inline
30577 || info->first_fp_reg_save < 64)
30578 && !cfun->machine->lr_is_wrapped_separately);
30581 if (WORLD_SAVE_P (info))
30583 int i, j;
30584 char rname[30];
30585 const char *alloc_rname;
30586 rtvec p;
30588 /* eh_rest_world_r10 will return to the location saved in the LR
30589 stack slot (which is not likely to be our caller.)
30590 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
30591 rest_world is similar, except any R10 parameter is ignored.
30592 The exception-handling stuff that was here in 2.95 is no
30593 longer necessary. */
30595 p = rtvec_alloc (9
30596 + 32 - info->first_gp_reg_save
30597 + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
30598 + 63 + 1 - info->first_fp_reg_save);
30600 strcpy (rname, ((crtl->calls_eh_return) ?
30601 "*eh_rest_world_r10" : "*rest_world"));
30602 alloc_rname = ggc_strdup (rname);
30604 j = 0;
30605 RTVEC_ELT (p, j++) = ret_rtx;
30606 RTVEC_ELT (p, j++)
30607 = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
30608 /* The instruction pattern requires a clobber here;
30609 it is shared with the restVEC helper. */
30610 RTVEC_ELT (p, j++)
30611 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
30614 /* CR register traditionally saved as CR2. */
30615 rtx reg = gen_rtx_REG (SImode, CR2_REGNO);
30616 RTVEC_ELT (p, j++)
30617 = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset);
30618 if (flag_shrink_wrap)
30620 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
30621 gen_rtx_REG (Pmode, LR_REGNO),
30622 cfa_restores);
30623 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
30627 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
30629 rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
30630 RTVEC_ELT (p, j++)
30631 = gen_frame_load (reg,
30632 frame_reg_rtx, info->gp_save_offset + reg_size * i);
30633 if (flag_shrink_wrap)
30634 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
30636 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
30638 rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
30639 RTVEC_ELT (p, j++)
30640 = gen_frame_load (reg,
30641 frame_reg_rtx, info->altivec_save_offset + 16 * i);
30642 if (flag_shrink_wrap)
30643 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
30645 for (i = 0; info->first_fp_reg_save + i <= 63; i++)
30647 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
30648 ? DFmode : SFmode),
30649 info->first_fp_reg_save + i);
30650 RTVEC_ELT (p, j++)
30651 = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i);
30652 if (flag_shrink_wrap)
30653 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
30655 RTVEC_ELT (p, j++)
30656 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
30657 RTVEC_ELT (p, j++)
30658 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
30659 RTVEC_ELT (p, j++)
30660 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
30661 RTVEC_ELT (p, j++)
30662 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
30663 RTVEC_ELT (p, j++)
30664 = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
30665 insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
30667 if (flag_shrink_wrap)
30669 REG_NOTES (insn) = cfa_restores;
30670 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
30671 RTX_FRAME_RELATED_P (insn) = 1;
30673 return;
30676 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
30677 if (info->push_p)
30678 frame_off = info->total_size;
30680 /* Restore AltiVec registers if we must do so before adjusting the
30681 stack. */
30682 if (info->altivec_size != 0
30683 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
30684 || (DEFAULT_ABI != ABI_V4
30685 && offset_below_red_zone_p (info->altivec_save_offset))))
30687 int i;
30688 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
30690 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
30691 if (use_backchain_to_restore_sp)
30693 int frame_regno = 11;
30695 if ((strategy & REST_INLINE_VRS) == 0)
30697 /* Of r11 and r12, select the one not clobbered by an
30698 out-of-line restore function for the frame register. */
30699 frame_regno = 11 + 12 - scratch_regno;
30701 frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno);
30702 emit_move_insn (frame_reg_rtx,
30703 gen_rtx_MEM (Pmode, sp_reg_rtx));
30704 frame_off = 0;
30706 else if (frame_pointer_needed)
30707 frame_reg_rtx = hard_frame_pointer_rtx;
30709 if ((strategy & REST_INLINE_VRS) == 0)
30711 int end_save = info->altivec_save_offset + info->altivec_size;
30712 int ptr_off;
30713 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
30714 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
30716 if (end_save + frame_off != 0)
30718 rtx offset = GEN_INT (end_save + frame_off);
30720 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
30722 else
30723 emit_move_insn (ptr_reg, frame_reg_rtx);
30725 ptr_off = -end_save;
30726 insn = rs6000_emit_savres_rtx (info, scratch_reg,
30727 info->altivec_save_offset + ptr_off,
30728 0, V4SImode, SAVRES_VR);
30730 else
30732 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
30733 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
30735 rtx addr, areg, mem, insn;
30736 rtx reg = gen_rtx_REG (V4SImode, i);
30737 HOST_WIDE_INT offset
30738 = (info->altivec_save_offset + frame_off
30739 + 16 * (i - info->first_altivec_reg_save));
30741 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
30743 mem = gen_frame_mem (V4SImode,
30744 gen_rtx_PLUS (Pmode, frame_reg_rtx,
30745 GEN_INT (offset)));
30746 insn = gen_rtx_SET (reg, mem);
30748 else
30750 areg = gen_rtx_REG (Pmode, 0);
30751 emit_move_insn (areg, GEN_INT (offset));
30753 /* AltiVec addressing mode is [reg+reg]. */
30754 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
30755 mem = gen_frame_mem (V4SImode, addr);
30757 /* Rather than emitting a generic move, force use of the
30758 lvx instruction, which we always want. In particular we
30759 don't want lxvd2x/xxpermdi for little endian. */
30760 insn = gen_altivec_lvx_v4si_internal (reg, mem);
30763 (void) emit_insn (insn);
30767 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
30768 if (((strategy & REST_INLINE_VRS) == 0
30769 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
30770 && (flag_shrink_wrap
30771 || (offset_below_red_zone_p
30772 (info->altivec_save_offset
30773 + 16 * (i - info->first_altivec_reg_save)))))
30775 rtx reg = gen_rtx_REG (V4SImode, i);
30776 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
30780 /* Restore VRSAVE if we must do so before adjusting the stack. */
30781 if (info->vrsave_size != 0
30782 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
30783 || (DEFAULT_ABI != ABI_V4
30784 && offset_below_red_zone_p (info->vrsave_save_offset))))
30786 rtx reg;
30788 if (frame_reg_rtx == sp_reg_rtx)
30790 if (use_backchain_to_restore_sp)
30792 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
30793 emit_move_insn (frame_reg_rtx,
30794 gen_rtx_MEM (Pmode, sp_reg_rtx));
30795 frame_off = 0;
30797 else if (frame_pointer_needed)
30798 frame_reg_rtx = hard_frame_pointer_rtx;
30801 reg = gen_rtx_REG (SImode, 12);
30802 emit_insn (gen_frame_load (reg, frame_reg_rtx,
30803 info->vrsave_save_offset + frame_off));
30805 emit_insn (generate_set_vrsave (reg, info, 1));
30808 insn = NULL_RTX;
30809 /* If we have a large stack frame, restore the old stack pointer
30810 using the backchain. */
30811 if (use_backchain_to_restore_sp)
30813 if (frame_reg_rtx == sp_reg_rtx)
30815 /* Under V.4, don't reset the stack pointer until after we're done
30816 loading the saved registers. */
30817 if (DEFAULT_ABI == ABI_V4)
30818 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
30820 insn = emit_move_insn (frame_reg_rtx,
30821 gen_rtx_MEM (Pmode, sp_reg_rtx));
30822 frame_off = 0;
30824 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
30825 && DEFAULT_ABI == ABI_V4)
30826 /* frame_reg_rtx has been set up by the altivec restore. */
30828 else
30830 insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
30831 frame_reg_rtx = sp_reg_rtx;
30834 /* If we have a frame pointer, we can restore the old stack pointer
30835 from it. */
30836 else if (frame_pointer_needed)
30838 frame_reg_rtx = sp_reg_rtx;
30839 if (DEFAULT_ABI == ABI_V4)
30840 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
30841 /* Prevent reordering memory accesses against stack pointer restore. */
30842 else if (cfun->calls_alloca
30843 || offset_below_red_zone_p (-info->total_size))
30844 rs6000_emit_stack_tie (frame_reg_rtx, true);
30846 insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
30847 GEN_INT (info->total_size)));
30848 frame_off = 0;
30850 else if (info->push_p
30851 && DEFAULT_ABI != ABI_V4
30852 && !crtl->calls_eh_return)
30854 /* Prevent reordering memory accesses against stack pointer restore. */
30855 if (cfun->calls_alloca
30856 || offset_below_red_zone_p (-info->total_size))
30857 rs6000_emit_stack_tie (frame_reg_rtx, false);
30858 insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
30859 GEN_INT (info->total_size)));
30860 frame_off = 0;
30862 if (insn && frame_reg_rtx == sp_reg_rtx)
30864 if (cfa_restores)
30866 REG_NOTES (insn) = cfa_restores;
30867 cfa_restores = NULL_RTX;
30869 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
30870 RTX_FRAME_RELATED_P (insn) = 1;
30873 /* Restore AltiVec registers if we have not done so already. */
30874 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
30875 && info->altivec_size != 0
30876 && (DEFAULT_ABI == ABI_V4
30877 || !offset_below_red_zone_p (info->altivec_save_offset)))
30879 int i;
30881 if ((strategy & REST_INLINE_VRS) == 0)
30883 int end_save = info->altivec_save_offset + info->altivec_size;
30884 int ptr_off;
30885 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
30886 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
30887 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
30889 if (end_save + frame_off != 0)
30891 rtx offset = GEN_INT (end_save + frame_off);
30893 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
30895 else
30896 emit_move_insn (ptr_reg, frame_reg_rtx);
30898 ptr_off = -end_save;
30899 insn = rs6000_emit_savres_rtx (info, scratch_reg,
30900 info->altivec_save_offset + ptr_off,
30901 0, V4SImode, SAVRES_VR);
30902 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
30904 /* Frame reg was clobbered by out-of-line save. Restore it
30905 from ptr_reg, and if we are calling out-of-line gpr or
30906 fpr restore set up the correct pointer and offset. */
30907 unsigned newptr_regno = 1;
30908 if (!restoring_GPRs_inline)
30910 bool lr = info->gp_save_offset + info->gp_size == 0;
30911 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
30912 newptr_regno = ptr_regno_for_savres (sel);
30913 end_save = info->gp_save_offset + info->gp_size;
30915 else if (!restoring_FPRs_inline)
30917 bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR);
30918 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
30919 newptr_regno = ptr_regno_for_savres (sel);
30920 end_save = info->fp_save_offset + info->fp_size;
30923 if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
30924 frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
30926 if (end_save + ptr_off != 0)
30928 rtx offset = GEN_INT (end_save + ptr_off);
30930 frame_off = -end_save;
30931 if (TARGET_32BIT)
30932 emit_insn (gen_addsi3_carry (frame_reg_rtx,
30933 ptr_reg, offset));
30934 else
30935 emit_insn (gen_adddi3_carry (frame_reg_rtx,
30936 ptr_reg, offset));
30938 else
30940 frame_off = ptr_off;
30941 emit_move_insn (frame_reg_rtx, ptr_reg);
30945 else
30947 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
30948 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
30950 rtx addr, areg, mem, insn;
30951 rtx reg = gen_rtx_REG (V4SImode, i);
30952 HOST_WIDE_INT offset
30953 = (info->altivec_save_offset + frame_off
30954 + 16 * (i - info->first_altivec_reg_save));
30956 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
30958 mem = gen_frame_mem (V4SImode,
30959 gen_rtx_PLUS (Pmode, frame_reg_rtx,
30960 GEN_INT (offset)));
30961 insn = gen_rtx_SET (reg, mem);
30963 else
30965 areg = gen_rtx_REG (Pmode, 0);
30966 emit_move_insn (areg, GEN_INT (offset));
30968 /* AltiVec addressing mode is [reg+reg]. */
30969 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
30970 mem = gen_frame_mem (V4SImode, addr);
30972 /* Rather than emitting a generic move, force use of the
30973 lvx instruction, which we always want. In particular we
30974 don't want lxvd2x/xxpermdi for little endian. */
30975 insn = gen_altivec_lvx_v4si_internal (reg, mem);
30978 (void) emit_insn (insn);
30982 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
30983 if (((strategy & REST_INLINE_VRS) == 0
30984 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
30985 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
30987 rtx reg = gen_rtx_REG (V4SImode, i);
30988 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
30992 /* Restore VRSAVE if we have not done so already. */
30993 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
30994 && info->vrsave_size != 0
30995 && (DEFAULT_ABI == ABI_V4
30996 || !offset_below_red_zone_p (info->vrsave_save_offset)))
30998 rtx reg;
31000 reg = gen_rtx_REG (SImode, 12);
31001 emit_insn (gen_frame_load (reg, frame_reg_rtx,
31002 info->vrsave_save_offset + frame_off));
31004 emit_insn (generate_set_vrsave (reg, info, 1));
31007 /* If we exit by an out-of-line restore function on ABI_V4 then that
31008 function will deallocate the stack, so we don't need to worry
31009 about the unwinder restoring cr from an invalid stack frame
31010 location. */
31011 exit_func = (!restoring_FPRs_inline
31012 || (!restoring_GPRs_inline
31013 && info->first_fp_reg_save == 64));
31015 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
31016 *separate* slots if the routine calls __builtin_eh_return, so
31017 that they can be independently restored by the unwinder. */
31018 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
31020 int i, cr_off = info->ehcr_offset;
31022 for (i = 0; i < 8; i++)
31023 if (!call_used_regs[CR0_REGNO + i])
31025 rtx reg = gen_rtx_REG (SImode, 0);
31026 emit_insn (gen_frame_load (reg, frame_reg_rtx,
31027 cr_off + frame_off));
31029 insn = emit_insn (gen_movsi_to_cr_one
31030 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
31032 if (!exit_func && flag_shrink_wrap)
31034 add_reg_note (insn, REG_CFA_RESTORE,
31035 gen_rtx_REG (SImode, CR0_REGNO + i));
31037 RTX_FRAME_RELATED_P (insn) = 1;
31040 cr_off += reg_size;
31044 /* Get the old lr if we saved it. If we are restoring registers
31045 out-of-line, then the out-of-line routines can do this for us. */
31046 if (restore_lr && restoring_GPRs_inline)
31047 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
31049 /* Get the old cr if we saved it. */
31050 if (info->cr_save_p)
31052 unsigned cr_save_regno = 12;
31054 if (!restoring_GPRs_inline)
31056 /* Ensure we don't use the register used by the out-of-line
31057 gpr register restore below. */
31058 bool lr = info->gp_save_offset + info->gp_size == 0;
31059 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
31060 int gpr_ptr_regno = ptr_regno_for_savres (sel);
31062 if (gpr_ptr_regno == 12)
31063 cr_save_regno = 11;
31064 gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno);
31066 else if (REGNO (frame_reg_rtx) == 12)
31067 cr_save_regno = 11;
31069 cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx,
31070 info->cr_save_offset + frame_off,
31071 exit_func);
31074 /* Set LR here to try to overlap restores below. */
31075 if (restore_lr && restoring_GPRs_inline)
31076 restore_saved_lr (0, exit_func);
31078 /* Load exception handler data registers, if needed. */
31079 if (crtl->calls_eh_return)
31081 unsigned int i, regno;
31083 if (TARGET_AIX)
31085 rtx reg = gen_rtx_REG (reg_mode, 2);
31086 emit_insn (gen_frame_load (reg, frame_reg_rtx,
31087 frame_off + RS6000_TOC_SAVE_SLOT));
31090 for (i = 0; ; ++i)
31092 rtx mem;
31094 regno = EH_RETURN_DATA_REGNO (i);
31095 if (regno == INVALID_REGNUM)
31096 break;
31098 /* Note: possible use of r0 here to address SPE regs. */
31099 mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
31100 info->ehrd_offset + frame_off
31101 + reg_size * (int) i);
31103 emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
31107 /* Restore GPRs. This is done as a PARALLEL if we are using
31108 the load-multiple instructions. */
31109 if (TARGET_SPE_ABI
31110 && info->spe_64bit_regs_used
31111 && info->first_gp_reg_save != 32)
31113 /* Determine whether we can address all of the registers that need
31114 to be saved with an offset from frame_reg_rtx that fits in
31115 the small const field for SPE memory instructions. */
31116 int spe_regs_addressable
31117 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
31118 + reg_size * (32 - info->first_gp_reg_save - 1))
31119 && restoring_GPRs_inline);
31121 if (!spe_regs_addressable)
31123 int ool_adjust = 0;
31124 rtx old_frame_reg_rtx = frame_reg_rtx;
31125 /* Make r11 point to the start of the SPE save area. We worried about
31126 not clobbering it when we were saving registers in the prologue.
31127 There's no need to worry here because the static chain is passed
31128 anew to every function. */
31130 if (!restoring_GPRs_inline)
31131 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
31132 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
31133 emit_insn (gen_addsi3 (frame_reg_rtx, old_frame_reg_rtx,
31134 GEN_INT (info->spe_gp_save_offset
31135 + frame_off
31136 - ool_adjust)));
31137 /* Keep the invariant that frame_reg_rtx + frame_off points
31138 at the top of the stack frame. */
31139 frame_off = -info->spe_gp_save_offset + ool_adjust;
31142 if (restoring_GPRs_inline)
31144 HOST_WIDE_INT spe_offset = info->spe_gp_save_offset + frame_off;
31146 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
31147 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
31149 rtx offset, addr, mem, reg;
31151 /* We're doing all this to ensure that the immediate offset
31152 fits into the immediate field of 'evldd'. */
31153 gcc_assert (SPE_CONST_OFFSET_OK (spe_offset + reg_size * i));
31155 offset = GEN_INT (spe_offset + reg_size * i);
31156 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, offset);
31157 mem = gen_rtx_MEM (V2SImode, addr);
31158 reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
31160 emit_move_insn (reg, mem);
31163 else
31164 rs6000_emit_savres_rtx (info, frame_reg_rtx,
31165 info->spe_gp_save_offset + frame_off,
31166 info->lr_save_offset + frame_off,
31167 reg_mode,
31168 SAVRES_GPR | SAVRES_LR);
31170 else if (!restoring_GPRs_inline)
31172 /* We are jumping to an out-of-line function. */
31173 rtx ptr_reg;
31174 int end_save = info->gp_save_offset + info->gp_size;
31175 bool can_use_exit = end_save == 0;
31176 int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0);
31177 int ptr_off;
31179 /* Emit stack reset code if we need it. */
31180 ptr_regno = ptr_regno_for_savres (sel);
31181 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
31182 if (can_use_exit)
31183 rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
31184 else if (end_save + frame_off != 0)
31185 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx,
31186 GEN_INT (end_save + frame_off)));
31187 else if (REGNO (frame_reg_rtx) != ptr_regno)
31188 emit_move_insn (ptr_reg, frame_reg_rtx);
31189 if (REGNO (frame_reg_rtx) == ptr_regno)
31190 frame_off = -end_save;
31192 if (can_use_exit && info->cr_save_p)
31193 restore_saved_cr (cr_save_reg, using_mtcr_multiple, true);
31195 ptr_off = -end_save;
31196 rs6000_emit_savres_rtx (info, ptr_reg,
31197 info->gp_save_offset + ptr_off,
31198 info->lr_save_offset + ptr_off,
31199 reg_mode, sel);
31201 else if (using_load_multiple)
31203 rtvec p;
31204 p = rtvec_alloc (32 - info->first_gp_reg_save);
31205 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
31206 RTVEC_ELT (p, i)
31207 = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
31208 frame_reg_rtx,
31209 info->gp_save_offset + frame_off + reg_size * i);
31210 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
31212 else
31214 int offset = info->gp_save_offset + frame_off;
31215 for (i = info->first_gp_reg_save; i < 32; i++)
31217 if (rs6000_reg_live_or_pic_offset_p (i)
31218 && !cfun->machine->gpr_is_wrapped_separately[i])
31220 rtx reg = gen_rtx_REG (reg_mode, i);
31221 emit_insn (gen_frame_load (reg, frame_reg_rtx, offset));
31224 offset += reg_size;
31228 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
31230 /* If the frame pointer was used then we can't delay emitting
31231 a REG_CFA_DEF_CFA note. This must happen on the insn that
31232 restores the frame pointer, r31. We may have already emitted
31233 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
31234 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
31235 be harmless if emitted. */
31236 if (frame_pointer_needed)
31238 insn = get_last_insn ();
31239 add_reg_note (insn, REG_CFA_DEF_CFA,
31240 plus_constant (Pmode, frame_reg_rtx, frame_off));
31241 RTX_FRAME_RELATED_P (insn) = 1;
31244 /* Set up cfa_restores. We always need these when
31245 shrink-wrapping. If not shrink-wrapping then we only need
31246 the cfa_restore when the stack location is no longer valid.
31247 The cfa_restores must be emitted on or before the insn that
31248 invalidates the stack, and of course must not be emitted
31249 before the insn that actually does the restore. The latter
31250 is why it is a bad idea to emit the cfa_restores as a group
31251 on the last instruction here that actually does a restore:
31252 That insn may be reordered with respect to others doing
31253 restores. */
31254 if (flag_shrink_wrap
31255 && !restoring_GPRs_inline
31256 && info->first_fp_reg_save == 64)
31257 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
31259 for (i = info->first_gp_reg_save; i < 32; i++)
31260 if (!restoring_GPRs_inline
31261 || using_load_multiple
31262 || rs6000_reg_live_or_pic_offset_p (i))
31264 if (cfun->machine->gpr_is_wrapped_separately[i])
31265 continue;
31267 rtx reg = gen_rtx_REG (reg_mode, i);
31268 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31272 if (!restoring_GPRs_inline
31273 && info->first_fp_reg_save == 64)
31275 /* We are jumping to an out-of-line function. */
31276 if (cfa_restores)
31277 emit_cfa_restores (cfa_restores);
31278 return;
31281 if (restore_lr && !restoring_GPRs_inline)
31283 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
31284 restore_saved_lr (0, exit_func);
31287 /* Restore fpr's if we need to do it without calling a function. */
31288 if (restoring_FPRs_inline)
31289 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
31290 if (save_reg_p (info->first_fp_reg_save + i))
31292 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
31293 ? DFmode : SFmode),
31294 info->first_fp_reg_save + i);
31295 emit_insn (gen_frame_load (reg, frame_reg_rtx,
31296 info->fp_save_offset + frame_off + 8 * i));
31297 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
31298 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31301 /* If we saved cr, restore it here. Just those that were used. */
31302 if (info->cr_save_p)
31303 restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func);
31305 /* If this is V.4, unwind the stack pointer after all of the loads
31306 have been done, or set up r11 if we are restoring fp out of line. */
31307 ptr_regno = 1;
31308 if (!restoring_FPRs_inline)
31310 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
31311 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
31312 ptr_regno = ptr_regno_for_savres (sel);
31315 insn = rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
31316 if (REGNO (frame_reg_rtx) == ptr_regno)
31317 frame_off = 0;
31319 if (insn && restoring_FPRs_inline)
31321 if (cfa_restores)
31323 REG_NOTES (insn) = cfa_restores;
31324 cfa_restores = NULL_RTX;
31326 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
31327 RTX_FRAME_RELATED_P (insn) = 1;
31330 if (crtl->calls_eh_return)
31332 rtx sa = EH_RETURN_STACKADJ_RTX;
31333 emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
31336 if (!sibcall && restoring_FPRs_inline)
31338 if (cfa_restores)
31340 /* We can't hang the cfa_restores off a simple return,
31341 since the shrink-wrap code sometimes uses an existing
31342 return. This means there might be a path from
31343 pre-prologue code to this return, and dwarf2cfi code
31344 wants the eh_frame unwinder state to be the same on
31345 all paths to any point. So we need to emit the
31346 cfa_restores before the return. For -m64 we really
31347 don't need epilogue cfa_restores at all, except for
31348 this irritating dwarf2cfi with shrink-wrap
31349 requirement; The stack red-zone means eh_frame info
31350 from the prologue telling the unwinder to restore
31351 from the stack is perfectly good right to the end of
31352 the function. */
31353 emit_insn (gen_blockage ());
31354 emit_cfa_restores (cfa_restores);
31355 cfa_restores = NULL_RTX;
31358 emit_jump_insn (targetm.gen_simple_return ());
31361 if (!sibcall && !restoring_FPRs_inline)
31363 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
31364 rtvec p = rtvec_alloc (3 + !!lr + 64 - info->first_fp_reg_save);
31365 int elt = 0;
31366 RTVEC_ELT (p, elt++) = ret_rtx;
31367 if (lr)
31368 RTVEC_ELT (p, elt++)
31369 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
31371 /* We have to restore more than two FP registers, so branch to the
31372 restore function. It will return to our caller. */
31373 int i;
31374 int reg;
31375 rtx sym;
31377 if (flag_shrink_wrap)
31378 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
31380 sym = rs6000_savres_routine_sym (info, SAVRES_FPR | (lr ? SAVRES_LR : 0));
31381 RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, sym);
31382 reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
31383 RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
31385 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
31387 rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
31389 RTVEC_ELT (p, elt++)
31390 = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i);
31391 if (flag_shrink_wrap)
31392 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31395 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
31398 if (cfa_restores)
31400 if (sibcall)
31401 /* Ensure the cfa_restores are hung off an insn that won't
31402 be reordered above other restores. */
31403 emit_insn (gen_blockage ());
31405 emit_cfa_restores (cfa_restores);
31409 /* Write function epilogue. */
31411 static void
31412 rs6000_output_function_epilogue (FILE *file,
31413 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
31415 #if TARGET_MACHO
31416 macho_branch_islands ();
31419 rtx_insn *insn = get_last_insn ();
31420 rtx_insn *deleted_debug_label = NULL;
31422 /* Mach-O doesn't support labels at the end of objects, so if
31423 it looks like we might want one, take special action.
31425 First, collect any sequence of deleted debug labels. */
31426 while (insn
31427 && NOTE_P (insn)
31428 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
31430 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
31431 notes only, instead set their CODE_LABEL_NUMBER to -1,
31432 otherwise there would be code generation differences
31433 in between -g and -g0. */
31434 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
31435 deleted_debug_label = insn;
31436 insn = PREV_INSN (insn);
31439 /* Second, if we have:
31440 label:
31441 barrier
31442 then this needs to be detected, so skip past the barrier. */
31444 if (insn && BARRIER_P (insn))
31445 insn = PREV_INSN (insn);
31447 /* Up to now we've only seen notes or barriers. */
31448 if (insn)
31450 if (LABEL_P (insn)
31451 || (NOTE_P (insn)
31452 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
31453 /* Trailing label: <barrier>. */
31454 fputs ("\tnop\n", file);
31455 else
31457 /* Lastly, see if we have a completely empty function body. */
31458 while (insn && ! INSN_P (insn))
31459 insn = PREV_INSN (insn);
31460 /* If we don't find any insns, we've got an empty function body;
31461 I.e. completely empty - without a return or branch. This is
31462 taken as the case where a function body has been removed
31463 because it contains an inline __builtin_unreachable(). GCC
31464 states that reaching __builtin_unreachable() means UB so we're
31465 not obliged to do anything special; however, we want
31466 non-zero-sized function bodies. To meet this, and help the
31467 user out, let's trap the case. */
31468 if (insn == NULL)
31469 fputs ("\ttrap\n", file);
31472 else if (deleted_debug_label)
31473 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
31474 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
31475 CODE_LABEL_NUMBER (insn) = -1;
31477 #endif
31479 /* Output a traceback table here. See /usr/include/sys/debug.h for info
31480 on its format.
31482 We don't output a traceback table if -finhibit-size-directive was
31483 used. The documentation for -finhibit-size-directive reads
31484 ``don't output a @code{.size} assembler directive, or anything
31485 else that would cause trouble if the function is split in the
31486 middle, and the two halves are placed at locations far apart in
31487 memory.'' The traceback table has this property, since it
31488 includes the offset from the start of the function to the
31489 traceback table itself.
31491 System V.4 Powerpc's (and the embedded ABI derived from it) use a
31492 different traceback table. */
31493 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
31494 && ! flag_inhibit_size_directive
31495 && rs6000_traceback != traceback_none && !cfun->is_thunk)
31497 const char *fname = NULL;
31498 const char *language_string = lang_hooks.name;
31499 int fixed_parms = 0, float_parms = 0, parm_info = 0;
31500 int i;
31501 int optional_tbtab;
31502 rs6000_stack_t *info = rs6000_stack_info ();
31504 if (rs6000_traceback == traceback_full)
31505 optional_tbtab = 1;
31506 else if (rs6000_traceback == traceback_part)
31507 optional_tbtab = 0;
31508 else
31509 optional_tbtab = !optimize_size && !TARGET_ELF;
31511 if (optional_tbtab)
31513 fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
31514 while (*fname == '.') /* V.4 encodes . in the name */
31515 fname++;
31517 /* Need label immediately before tbtab, so we can compute
31518 its offset from the function start. */
31519 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
31520 ASM_OUTPUT_LABEL (file, fname);
31523 /* The .tbtab pseudo-op can only be used for the first eight
31524 expressions, since it can't handle the possibly variable
31525 length fields that follow. However, if you omit the optional
31526 fields, the assembler outputs zeros for all optional fields
31527 anyways, giving each variable length field is minimum length
31528 (as defined in sys/debug.h). Thus we can not use the .tbtab
31529 pseudo-op at all. */
31531 /* An all-zero word flags the start of the tbtab, for debuggers
31532 that have to find it by searching forward from the entry
31533 point or from the current pc. */
31534 fputs ("\t.long 0\n", file);
31536 /* Tbtab format type. Use format type 0. */
31537 fputs ("\t.byte 0,", file);
31539 /* Language type. Unfortunately, there does not seem to be any
31540 official way to discover the language being compiled, so we
31541 use language_string.
31542 C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9.
31543 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
31544 a number, so for now use 9. LTO, Go and JIT aren't assigned numbers
31545 either, so for now use 0. */
31546 if (lang_GNU_C ()
31547 || ! strcmp (language_string, "GNU GIMPLE")
31548 || ! strcmp (language_string, "GNU Go")
31549 || ! strcmp (language_string, "libgccjit"))
31550 i = 0;
31551 else if (! strcmp (language_string, "GNU F77")
31552 || lang_GNU_Fortran ())
31553 i = 1;
31554 else if (! strcmp (language_string, "GNU Pascal"))
31555 i = 2;
31556 else if (! strcmp (language_string, "GNU Ada"))
31557 i = 3;
31558 else if (lang_GNU_CXX ()
31559 || ! strcmp (language_string, "GNU Objective-C++"))
31560 i = 9;
31561 else if (! strcmp (language_string, "GNU Java"))
31562 i = 13;
31563 else if (! strcmp (language_string, "GNU Objective-C"))
31564 i = 14;
31565 else
31566 gcc_unreachable ();
31567 fprintf (file, "%d,", i);
31569 /* 8 single bit fields: global linkage (not set for C extern linkage,
31570 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
31571 from start of procedure stored in tbtab, internal function, function
31572 has controlled storage, function has no toc, function uses fp,
31573 function logs/aborts fp operations. */
31574 /* Assume that fp operations are used if any fp reg must be saved. */
31575 fprintf (file, "%d,",
31576 (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
31578 /* 6 bitfields: function is interrupt handler, name present in
31579 proc table, function calls alloca, on condition directives
31580 (controls stack walks, 3 bits), saves condition reg, saves
31581 link reg. */
31582 /* The `function calls alloca' bit seems to be set whenever reg 31 is
31583 set up as a frame pointer, even when there is no alloca call. */
31584 fprintf (file, "%d,",
31585 ((optional_tbtab << 6)
31586 | ((optional_tbtab & frame_pointer_needed) << 5)
31587 | (info->cr_save_p << 1)
31588 | (info->lr_save_p)));
31590 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
31591 (6 bits). */
31592 fprintf (file, "%d,",
31593 (info->push_p << 7) | (64 - info->first_fp_reg_save));
31595 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
31596 fprintf (file, "%d,", (32 - first_reg_to_save ()));
31598 if (optional_tbtab)
31600 /* Compute the parameter info from the function decl argument
31601 list. */
31602 tree decl;
31603 int next_parm_info_bit = 31;
31605 for (decl = DECL_ARGUMENTS (current_function_decl);
31606 decl; decl = DECL_CHAIN (decl))
31608 rtx parameter = DECL_INCOMING_RTL (decl);
31609 machine_mode mode = GET_MODE (parameter);
31611 if (GET_CODE (parameter) == REG)
31613 if (SCALAR_FLOAT_MODE_P (mode))
31615 int bits;
31617 float_parms++;
31619 switch (mode)
31621 case SFmode:
31622 case SDmode:
31623 bits = 0x2;
31624 break;
31626 case DFmode:
31627 case DDmode:
31628 case TFmode:
31629 case TDmode:
31630 case IFmode:
31631 case KFmode:
31632 bits = 0x3;
31633 break;
31635 default:
31636 gcc_unreachable ();
31639 /* If only one bit will fit, don't or in this entry. */
31640 if (next_parm_info_bit > 0)
31641 parm_info |= (bits << (next_parm_info_bit - 1));
31642 next_parm_info_bit -= 2;
31644 else
31646 fixed_parms += ((GET_MODE_SIZE (mode)
31647 + (UNITS_PER_WORD - 1))
31648 / UNITS_PER_WORD);
31649 next_parm_info_bit -= 1;
31655 /* Number of fixed point parameters. */
31656 /* This is actually the number of words of fixed point parameters; thus
31657 an 8 byte struct counts as 2; and thus the maximum value is 8. */
31658 fprintf (file, "%d,", fixed_parms);
31660 /* 2 bitfields: number of floating point parameters (7 bits), parameters
31661 all on stack. */
31662 /* This is actually the number of fp registers that hold parameters;
31663 and thus the maximum value is 13. */
31664 /* Set parameters on stack bit if parameters are not in their original
31665 registers, regardless of whether they are on the stack? Xlc
31666 seems to set the bit when not optimizing. */
31667 fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
31669 if (optional_tbtab)
31671 /* Optional fields follow. Some are variable length. */
31673 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single
31674 float, 11 double float. */
31675 /* There is an entry for each parameter in a register, in the order
31676 that they occur in the parameter list. Any intervening arguments
31677 on the stack are ignored. If the list overflows a long (max
31678 possible length 34 bits) then completely leave off all elements
31679 that don't fit. */
31680 /* Only emit this long if there was at least one parameter. */
31681 if (fixed_parms || float_parms)
31682 fprintf (file, "\t.long %d\n", parm_info);
31684 /* Offset from start of code to tb table. */
31685 fputs ("\t.long ", file);
31686 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
31687 RS6000_OUTPUT_BASENAME (file, fname);
31688 putc ('-', file);
31689 rs6000_output_function_entry (file, fname);
31690 putc ('\n', file);
31692 /* Interrupt handler mask. */
31693 /* Omit this long, since we never set the interrupt handler bit
31694 above. */
31696 /* Number of CTL (controlled storage) anchors. */
31697 /* Omit this long, since the has_ctl bit is never set above. */
31699 /* Displacement into stack of each CTL anchor. */
31700 /* Omit this list of longs, because there are no CTL anchors. */
31702 /* Length of function name. */
31703 if (*fname == '*')
31704 ++fname;
31705 fprintf (file, "\t.short %d\n", (int) strlen (fname));
31707 /* Function name. */
31708 assemble_string (fname, strlen (fname));
31710 /* Register for alloca automatic storage; this is always reg 31.
31711 Only emit this if the alloca bit was set above. */
31712 if (frame_pointer_needed)
31713 fputs ("\t.byte 31\n", file);
31715 fputs ("\t.align 2\n", file);
31719 /* Arrange to define .LCTOC1 label, if not already done. */
31720 if (need_toc_init)
31722 need_toc_init = 0;
31723 if (!toc_initialized)
31725 switch_to_section (toc_section);
31726 switch_to_section (current_function_section ());
31731 /* -fsplit-stack support. */
31733 /* A SYMBOL_REF for __morestack. */
31734 static GTY(()) rtx morestack_ref;
31736 static rtx
31737 gen_add3_const (rtx rt, rtx ra, long c)
31739 if (TARGET_64BIT)
31740 return gen_adddi3 (rt, ra, GEN_INT (c));
31741 else
31742 return gen_addsi3 (rt, ra, GEN_INT (c));
31745 /* Emit -fsplit-stack prologue, which goes before the regular function
31746 prologue (at local entry point in the case of ELFv2). */
31748 void
31749 rs6000_expand_split_stack_prologue (void)
31751 rs6000_stack_t *info = rs6000_stack_info ();
31752 unsigned HOST_WIDE_INT allocate;
31753 long alloc_hi, alloc_lo;
31754 rtx r0, r1, r12, lr, ok_label, compare, jump, call_fusage;
31755 rtx_insn *insn;
31757 gcc_assert (flag_split_stack && reload_completed);
31759 if (!info->push_p)
31760 return;
31762 if (global_regs[29])
31764 error ("-fsplit-stack uses register r29");
31765 inform (DECL_SOURCE_LOCATION (global_regs_decl[29]),
31766 "conflicts with %qD", global_regs_decl[29]);
31769 allocate = info->total_size;
31770 if (allocate > (unsigned HOST_WIDE_INT) 1 << 31)
31772 sorry ("Stack frame larger than 2G is not supported for -fsplit-stack");
31773 return;
31775 if (morestack_ref == NULL_RTX)
31777 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
31778 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
31779 | SYMBOL_FLAG_FUNCTION);
31782 r0 = gen_rtx_REG (Pmode, 0);
31783 r1 = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
31784 r12 = gen_rtx_REG (Pmode, 12);
31785 emit_insn (gen_load_split_stack_limit (r0));
31786 /* Always emit two insns here to calculate the requested stack,
31787 so that the linker can edit them when adjusting size for calling
31788 non-split-stack code. */
31789 alloc_hi = (-allocate + 0x8000) & ~0xffffL;
31790 alloc_lo = -allocate - alloc_hi;
31791 if (alloc_hi != 0)
31793 emit_insn (gen_add3_const (r12, r1, alloc_hi));
31794 if (alloc_lo != 0)
31795 emit_insn (gen_add3_const (r12, r12, alloc_lo));
31796 else
31797 emit_insn (gen_nop ());
31799 else
31801 emit_insn (gen_add3_const (r12, r1, alloc_lo));
31802 emit_insn (gen_nop ());
31805 compare = gen_rtx_REG (CCUNSmode, CR7_REGNO);
31806 emit_insn (gen_rtx_SET (compare, gen_rtx_COMPARE (CCUNSmode, r12, r0)));
31807 ok_label = gen_label_rtx ();
31808 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
31809 gen_rtx_GEU (VOIDmode, compare, const0_rtx),
31810 gen_rtx_LABEL_REF (VOIDmode, ok_label),
31811 pc_rtx);
31812 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
31813 JUMP_LABEL (insn) = ok_label;
31814 /* Mark the jump as very likely to be taken. */
31815 add_int_reg_note (insn, REG_BR_PROB,
31816 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
31818 lr = gen_rtx_REG (Pmode, LR_REGNO);
31819 insn = emit_move_insn (r0, lr);
31820 RTX_FRAME_RELATED_P (insn) = 1;
31821 insn = emit_insn (gen_frame_store (r0, r1, info->lr_save_offset));
31822 RTX_FRAME_RELATED_P (insn) = 1;
31824 insn = emit_call_insn (gen_call (gen_rtx_MEM (SImode, morestack_ref),
31825 const0_rtx, const0_rtx));
31826 call_fusage = NULL_RTX;
31827 use_reg (&call_fusage, r12);
31828 /* Say the call uses r0, even though it doesn't, to stop regrename
31829 from twiddling with the insns saving lr, trashing args for cfun.
31830 The insns restoring lr are similarly protected by making
31831 split_stack_return use r0. */
31832 use_reg (&call_fusage, r0);
31833 add_function_usage_to (insn, call_fusage);
31834 /* Indicate that this function can't jump to non-local gotos. */
31835 make_reg_eh_region_note_nothrow_nononlocal (insn);
31836 emit_insn (gen_frame_load (r0, r1, info->lr_save_offset));
31837 insn = emit_move_insn (lr, r0);
31838 add_reg_note (insn, REG_CFA_RESTORE, lr);
31839 RTX_FRAME_RELATED_P (insn) = 1;
31840 emit_insn (gen_split_stack_return ());
31842 emit_label (ok_label);
31843 LABEL_NUSES (ok_label) = 1;
31846 /* Return the internal arg pointer used for function incoming
31847 arguments. When -fsplit-stack, the arg pointer is r12 so we need
31848 to copy it to a pseudo in order for it to be preserved over calls
31849 and suchlike. We'd really like to use a pseudo here for the
31850 internal arg pointer but data-flow analysis is not prepared to
31851 accept pseudos as live at the beginning of a function. */
31853 static rtx
31854 rs6000_internal_arg_pointer (void)
31856 if (flag_split_stack
31857 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
31858 == NULL))
31861 if (cfun->machine->split_stack_arg_pointer == NULL_RTX)
31863 rtx pat;
31865 cfun->machine->split_stack_arg_pointer = gen_reg_rtx (Pmode);
31866 REG_POINTER (cfun->machine->split_stack_arg_pointer) = 1;
31868 /* Put the pseudo initialization right after the note at the
31869 beginning of the function. */
31870 pat = gen_rtx_SET (cfun->machine->split_stack_arg_pointer,
31871 gen_rtx_REG (Pmode, 12));
31872 push_topmost_sequence ();
31873 emit_insn_after (pat, get_insns ());
31874 pop_topmost_sequence ();
31876 return plus_constant (Pmode, cfun->machine->split_stack_arg_pointer,
31877 FIRST_PARM_OFFSET (current_function_decl));
31879 return virtual_incoming_args_rtx;
31882 /* We may have to tell the dataflow pass that the split stack prologue
31883 is initializing a register. */
31885 static void
31886 rs6000_live_on_entry (bitmap regs)
31888 if (flag_split_stack)
31889 bitmap_set_bit (regs, 12);
31892 /* Emit -fsplit-stack dynamic stack allocation space check. */
31894 void
31895 rs6000_split_stack_space_check (rtx size, rtx label)
31897 rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
31898 rtx limit = gen_reg_rtx (Pmode);
31899 rtx requested = gen_reg_rtx (Pmode);
31900 rtx cmp = gen_reg_rtx (CCUNSmode);
31901 rtx jump;
31903 emit_insn (gen_load_split_stack_limit (limit));
31904 if (CONST_INT_P (size))
31905 emit_insn (gen_add3_insn (requested, sp, GEN_INT (-INTVAL (size))));
31906 else
31908 size = force_reg (Pmode, size);
31909 emit_move_insn (requested, gen_rtx_MINUS (Pmode, sp, size));
31911 emit_insn (gen_rtx_SET (cmp, gen_rtx_COMPARE (CCUNSmode, requested, limit)));
31912 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
31913 gen_rtx_GEU (VOIDmode, cmp, const0_rtx),
31914 gen_rtx_LABEL_REF (VOIDmode, label),
31915 pc_rtx);
31916 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
31917 JUMP_LABEL (jump) = label;
31920 /* A C compound statement that outputs the assembler code for a thunk
31921 function, used to implement C++ virtual function calls with
31922 multiple inheritance. The thunk acts as a wrapper around a virtual
31923 function, adjusting the implicit object parameter before handing
31924 control off to the real function.
31926 First, emit code to add the integer DELTA to the location that
31927 contains the incoming first argument. Assume that this argument
31928 contains a pointer, and is the one used to pass the `this' pointer
31929 in C++. This is the incoming argument *before* the function
31930 prologue, e.g. `%o0' on a sparc. The addition must preserve the
31931 values of all other incoming arguments.
31933 After the addition, emit code to jump to FUNCTION, which is a
31934 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
31935 not touch the return address. Hence returning from FUNCTION will
31936 return to whoever called the current `thunk'.
31938 The effect must be as if FUNCTION had been called directly with the
31939 adjusted first argument. This macro is responsible for emitting
31940 all of the code for a thunk function; output_function_prologue()
31941 and output_function_epilogue() are not invoked.
31943 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
31944 been extracted from it.) It might possibly be useful on some
31945 targets, but probably not.
31947 If you do not define this macro, the target-independent code in the
31948 C++ frontend will generate a less efficient heavyweight thunk that
31949 calls FUNCTION instead of jumping to it. The generic approach does
31950 not support varargs. */
31952 static void
31953 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
31954 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
31955 tree function)
31957 rtx this_rtx, funexp;
31958 rtx_insn *insn;
31960 reload_completed = 1;
31961 epilogue_completed = 1;
31963 /* Mark the end of the (empty) prologue. */
31964 emit_note (NOTE_INSN_PROLOGUE_END);
31966 /* Find the "this" pointer. If the function returns a structure,
31967 the structure return pointer is in r3. */
31968 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
31969 this_rtx = gen_rtx_REG (Pmode, 4);
31970 else
31971 this_rtx = gen_rtx_REG (Pmode, 3);
31973 /* Apply the constant offset, if required. */
31974 if (delta)
31975 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
31977 /* Apply the offset from the vtable, if required. */
31978 if (vcall_offset)
31980 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
31981 rtx tmp = gen_rtx_REG (Pmode, 12);
31983 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
31984 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
31986 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
31987 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
31989 else
31991 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
31993 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
31995 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
31998 /* Generate a tail call to the target function. */
31999 if (!TREE_USED (function))
32001 assemble_external (function);
32002 TREE_USED (function) = 1;
32004 funexp = XEXP (DECL_RTL (function), 0);
32005 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
32007 #if TARGET_MACHO
32008 if (MACHOPIC_INDIRECT)
32009 funexp = machopic_indirect_call_target (funexp);
32010 #endif
32012 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
32013 generate sibcall RTL explicitly. */
32014 insn = emit_call_insn (
32015 gen_rtx_PARALLEL (VOIDmode,
32016 gen_rtvec (3,
32017 gen_rtx_CALL (VOIDmode,
32018 funexp, const0_rtx),
32019 gen_rtx_USE (VOIDmode, const0_rtx),
32020 simple_return_rtx)));
32021 SIBLING_CALL_P (insn) = 1;
32022 emit_barrier ();
32024 /* Run just enough of rest_of_compilation to get the insns emitted.
32025 There's not really enough bulk here to make other passes such as
32026 instruction scheduling worth while. Note that use_thunk calls
32027 assemble_start_function and assemble_end_function. */
32028 insn = get_insns ();
32029 shorten_branches (insn);
32030 final_start_function (insn, file, 1);
32031 final (insn, file, 1);
32032 final_end_function ();
32034 reload_completed = 0;
32035 epilogue_completed = 0;
32038 /* A quick summary of the various types of 'constant-pool tables'
32039 under PowerPC:
32041 Target Flags Name One table per
32042 AIX (none) AIX TOC object file
32043 AIX -mfull-toc AIX TOC object file
32044 AIX -mminimal-toc AIX minimal TOC translation unit
32045 SVR4/EABI (none) SVR4 SDATA object file
32046 SVR4/EABI -fpic SVR4 pic object file
32047 SVR4/EABI -fPIC SVR4 PIC translation unit
32048 SVR4/EABI -mrelocatable EABI TOC function
32049 SVR4/EABI -maix AIX TOC object file
32050 SVR4/EABI -maix -mminimal-toc
32051 AIX minimal TOC translation unit
32053 Name Reg. Set by entries contains:
32054 made by addrs? fp? sum?
32056 AIX TOC 2 crt0 as Y option option
32057 AIX minimal TOC 30 prolog gcc Y Y option
32058 SVR4 SDATA 13 crt0 gcc N Y N
32059 SVR4 pic 30 prolog ld Y not yet N
32060 SVR4 PIC 30 prolog gcc Y option option
32061 EABI TOC 30 prolog gcc Y option option
32065 /* Hash functions for the hash table. */
32067 static unsigned
32068 rs6000_hash_constant (rtx k)
32070 enum rtx_code code = GET_CODE (k);
32071 machine_mode mode = GET_MODE (k);
32072 unsigned result = (code << 3) ^ mode;
32073 const char *format;
32074 int flen, fidx;
32076 format = GET_RTX_FORMAT (code);
32077 flen = strlen (format);
32078 fidx = 0;
32080 switch (code)
32082 case LABEL_REF:
32083 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
32085 case CONST_WIDE_INT:
32087 int i;
32088 flen = CONST_WIDE_INT_NUNITS (k);
32089 for (i = 0; i < flen; i++)
32090 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
32091 return result;
32094 case CONST_DOUBLE:
32095 if (mode != VOIDmode)
32096 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
32097 flen = 2;
32098 break;
32100 case CODE_LABEL:
32101 fidx = 3;
32102 break;
32104 default:
32105 break;
32108 for (; fidx < flen; fidx++)
32109 switch (format[fidx])
32111 case 's':
32113 unsigned i, len;
32114 const char *str = XSTR (k, fidx);
32115 len = strlen (str);
32116 result = result * 613 + len;
32117 for (i = 0; i < len; i++)
32118 result = result * 613 + (unsigned) str[i];
32119 break;
32121 case 'u':
32122 case 'e':
32123 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
32124 break;
32125 case 'i':
32126 case 'n':
32127 result = result * 613 + (unsigned) XINT (k, fidx);
32128 break;
32129 case 'w':
32130 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
32131 result = result * 613 + (unsigned) XWINT (k, fidx);
32132 else
32134 size_t i;
32135 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
32136 result = result * 613 + (unsigned) (XWINT (k, fidx)
32137 >> CHAR_BIT * i);
32139 break;
32140 case '0':
32141 break;
32142 default:
32143 gcc_unreachable ();
32146 return result;
32149 hashval_t
32150 toc_hasher::hash (toc_hash_struct *thc)
32152 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
32155 /* Compare H1 and H2 for equivalence. */
32157 bool
32158 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
32160 rtx r1 = h1->key;
32161 rtx r2 = h2->key;
32163 if (h1->key_mode != h2->key_mode)
32164 return 0;
32166 return rtx_equal_p (r1, r2);
32169 /* These are the names given by the C++ front-end to vtables, and
32170 vtable-like objects. Ideally, this logic should not be here;
32171 instead, there should be some programmatic way of inquiring as
32172 to whether or not an object is a vtable. */
32174 #define VTABLE_NAME_P(NAME) \
32175 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
32176 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
32177 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
32178 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
32179 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
32181 #ifdef NO_DOLLAR_IN_LABEL
32182 /* Return a GGC-allocated character string translating dollar signs in
32183 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
32185 const char *
32186 rs6000_xcoff_strip_dollar (const char *name)
32188 char *strip, *p;
32189 const char *q;
32190 size_t len;
32192 q = (const char *) strchr (name, '$');
32194 if (q == 0 || q == name)
32195 return name;
32197 len = strlen (name);
32198 strip = XALLOCAVEC (char, len + 1);
32199 strcpy (strip, name);
32200 p = strip + (q - name);
32201 while (p)
32203 *p = '_';
32204 p = strchr (p + 1, '$');
32207 return ggc_alloc_string (strip, len);
32209 #endif
32211 void
32212 rs6000_output_symbol_ref (FILE *file, rtx x)
32214 const char *name = XSTR (x, 0);
32216 /* Currently C++ toc references to vtables can be emitted before it
32217 is decided whether the vtable is public or private. If this is
32218 the case, then the linker will eventually complain that there is
32219 a reference to an unknown section. Thus, for vtables only,
32220 we emit the TOC reference to reference the identifier and not the
32221 symbol. */
32222 if (VTABLE_NAME_P (name))
32224 RS6000_OUTPUT_BASENAME (file, name);
32226 else
32227 assemble_name (file, name);
32230 /* Output a TOC entry. We derive the entry name from what is being
32231 written. */
32233 void
32234 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
32236 char buf[256];
32237 const char *name = buf;
32238 rtx base = x;
32239 HOST_WIDE_INT offset = 0;
32241 gcc_assert (!TARGET_NO_TOC);
32243 /* When the linker won't eliminate them, don't output duplicate
32244 TOC entries (this happens on AIX if there is any kind of TOC,
32245 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
32246 CODE_LABELs. */
32247 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
32249 struct toc_hash_struct *h;
32251 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
32252 time because GGC is not initialized at that point. */
32253 if (toc_hash_table == NULL)
32254 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
32256 h = ggc_alloc<toc_hash_struct> ();
32257 h->key = x;
32258 h->key_mode = mode;
32259 h->labelno = labelno;
32261 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
32262 if (*found == NULL)
32263 *found = h;
32264 else /* This is indeed a duplicate.
32265 Set this label equal to that label. */
32267 fputs ("\t.set ", file);
32268 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
32269 fprintf (file, "%d,", labelno);
32270 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
32271 fprintf (file, "%d\n", ((*found)->labelno));
32273 #ifdef HAVE_AS_TLS
32274 if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF
32275 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
32276 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
32278 fputs ("\t.set ", file);
32279 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
32280 fprintf (file, "%d,", labelno);
32281 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
32282 fprintf (file, "%d\n", ((*found)->labelno));
32284 #endif
32285 return;
32289 /* If we're going to put a double constant in the TOC, make sure it's
32290 aligned properly when strict alignment is on. */
32291 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
32292 && STRICT_ALIGNMENT
32293 && GET_MODE_BITSIZE (mode) >= 64
32294 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
32295 ASM_OUTPUT_ALIGN (file, 3);
32298 (*targetm.asm_out.internal_label) (file, "LC", labelno);
32300 /* Handle FP constants specially. Note that if we have a minimal
32301 TOC, things we put here aren't actually in the TOC, so we can allow
32302 FP constants. */
32303 if (GET_CODE (x) == CONST_DOUBLE &&
32304 (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
32305 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
32307 long k[4];
32309 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
32310 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
32311 else
32312 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
32314 if (TARGET_64BIT)
32316 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32317 fputs (DOUBLE_INT_ASM_OP, file);
32318 else
32319 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
32320 k[0] & 0xffffffff, k[1] & 0xffffffff,
32321 k[2] & 0xffffffff, k[3] & 0xffffffff);
32322 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
32323 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
32324 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
32325 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
32326 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
32327 return;
32329 else
32331 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32332 fputs ("\t.long ", file);
32333 else
32334 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
32335 k[0] & 0xffffffff, k[1] & 0xffffffff,
32336 k[2] & 0xffffffff, k[3] & 0xffffffff);
32337 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
32338 k[0] & 0xffffffff, k[1] & 0xffffffff,
32339 k[2] & 0xffffffff, k[3] & 0xffffffff);
32340 return;
32343 else if (GET_CODE (x) == CONST_DOUBLE &&
32344 (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
32346 long k[2];
32348 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
32349 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
32350 else
32351 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
32353 if (TARGET_64BIT)
32355 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32356 fputs (DOUBLE_INT_ASM_OP, file);
32357 else
32358 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
32359 k[0] & 0xffffffff, k[1] & 0xffffffff);
32360 fprintf (file, "0x%lx%08lx\n",
32361 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
32362 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
32363 return;
32365 else
32367 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32368 fputs ("\t.long ", file);
32369 else
32370 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
32371 k[0] & 0xffffffff, k[1] & 0xffffffff);
32372 fprintf (file, "0x%lx,0x%lx\n",
32373 k[0] & 0xffffffff, k[1] & 0xffffffff);
32374 return;
32377 else if (GET_CODE (x) == CONST_DOUBLE &&
32378 (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
32380 long l;
32382 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
32383 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
32384 else
32385 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
32387 if (TARGET_64BIT)
32389 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32390 fputs (DOUBLE_INT_ASM_OP, file);
32391 else
32392 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
32393 if (WORDS_BIG_ENDIAN)
32394 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
32395 else
32396 fprintf (file, "0x%lx\n", l & 0xffffffff);
32397 return;
32399 else
32401 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32402 fputs ("\t.long ", file);
32403 else
32404 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
32405 fprintf (file, "0x%lx\n", l & 0xffffffff);
32406 return;
32409 else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT)
32411 unsigned HOST_WIDE_INT low;
32412 HOST_WIDE_INT high;
32414 low = INTVAL (x) & 0xffffffff;
32415 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
32417 /* TOC entries are always Pmode-sized, so when big-endian
32418 smaller integer constants in the TOC need to be padded.
32419 (This is still a win over putting the constants in
32420 a separate constant pool, because then we'd have
32421 to have both a TOC entry _and_ the actual constant.)
32423 For a 32-bit target, CONST_INT values are loaded and shifted
32424 entirely within `low' and can be stored in one TOC entry. */
32426 /* It would be easy to make this work, but it doesn't now. */
32427 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
32429 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
32431 low |= high << 32;
32432 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
32433 high = (HOST_WIDE_INT) low >> 32;
32434 low &= 0xffffffff;
32437 if (TARGET_64BIT)
32439 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32440 fputs (DOUBLE_INT_ASM_OP, file);
32441 else
32442 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
32443 (long) high & 0xffffffff, (long) low & 0xffffffff);
32444 fprintf (file, "0x%lx%08lx\n",
32445 (long) high & 0xffffffff, (long) low & 0xffffffff);
32446 return;
32448 else
32450 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
32452 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32453 fputs ("\t.long ", file);
32454 else
32455 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
32456 (long) high & 0xffffffff, (long) low & 0xffffffff);
32457 fprintf (file, "0x%lx,0x%lx\n",
32458 (long) high & 0xffffffff, (long) low & 0xffffffff);
32460 else
32462 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32463 fputs ("\t.long ", file);
32464 else
32465 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
32466 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
32468 return;
32472 if (GET_CODE (x) == CONST)
32474 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
32475 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
32477 base = XEXP (XEXP (x, 0), 0);
32478 offset = INTVAL (XEXP (XEXP (x, 0), 1));
32481 switch (GET_CODE (base))
32483 case SYMBOL_REF:
32484 name = XSTR (base, 0);
32485 break;
32487 case LABEL_REF:
32488 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
32489 CODE_LABEL_NUMBER (XEXP (base, 0)));
32490 break;
32492 case CODE_LABEL:
32493 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
32494 break;
32496 default:
32497 gcc_unreachable ();
32500 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32501 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
32502 else
32504 fputs ("\t.tc ", file);
32505 RS6000_OUTPUT_BASENAME (file, name);
32507 if (offset < 0)
32508 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
32509 else if (offset)
32510 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
32512 /* Mark large TOC symbols on AIX with [TE] so they are mapped
32513 after other TOC symbols, reducing overflow of small TOC access
32514 to [TC] symbols. */
32515 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
32516 ? "[TE]," : "[TC],", file);
32519 /* Currently C++ toc references to vtables can be emitted before it
32520 is decided whether the vtable is public or private. If this is
32521 the case, then the linker will eventually complain that there is
32522 a TOC reference to an unknown section. Thus, for vtables only,
32523 we emit the TOC reference to reference the symbol and not the
32524 section. */
32525 if (VTABLE_NAME_P (name))
32527 RS6000_OUTPUT_BASENAME (file, name);
32528 if (offset < 0)
32529 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
32530 else if (offset > 0)
32531 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
32533 else
32534 output_addr_const (file, x);
32536 #if HAVE_AS_TLS
32537 if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF)
32539 switch (SYMBOL_REF_TLS_MODEL (base))
32541 case 0:
32542 break;
32543 case TLS_MODEL_LOCAL_EXEC:
32544 fputs ("@le", file);
32545 break;
32546 case TLS_MODEL_INITIAL_EXEC:
32547 fputs ("@ie", file);
32548 break;
32549 /* Use global-dynamic for local-dynamic. */
32550 case TLS_MODEL_GLOBAL_DYNAMIC:
32551 case TLS_MODEL_LOCAL_DYNAMIC:
32552 putc ('\n', file);
32553 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
32554 fputs ("\t.tc .", file);
32555 RS6000_OUTPUT_BASENAME (file, name);
32556 fputs ("[TC],", file);
32557 output_addr_const (file, x);
32558 fputs ("@m", file);
32559 break;
32560 default:
32561 gcc_unreachable ();
32564 #endif
32566 putc ('\n', file);
32569 /* Output an assembler pseudo-op to write an ASCII string of N characters
32570 starting at P to FILE.
32572 On the RS/6000, we have to do this using the .byte operation and
32573 write out special characters outside the quoted string.
32574 Also, the assembler is broken; very long strings are truncated,
32575 so we must artificially break them up early. */
32577 void
32578 output_ascii (FILE *file, const char *p, int n)
32580 char c;
32581 int i, count_string;
32582 const char *for_string = "\t.byte \"";
32583 const char *for_decimal = "\t.byte ";
32584 const char *to_close = NULL;
32586 count_string = 0;
32587 for (i = 0; i < n; i++)
32589 c = *p++;
32590 if (c >= ' ' && c < 0177)
32592 if (for_string)
32593 fputs (for_string, file);
32594 putc (c, file);
32596 /* Write two quotes to get one. */
32597 if (c == '"')
32599 putc (c, file);
32600 ++count_string;
32603 for_string = NULL;
32604 for_decimal = "\"\n\t.byte ";
32605 to_close = "\"\n";
32606 ++count_string;
32608 if (count_string >= 512)
32610 fputs (to_close, file);
32612 for_string = "\t.byte \"";
32613 for_decimal = "\t.byte ";
32614 to_close = NULL;
32615 count_string = 0;
32618 else
32620 if (for_decimal)
32621 fputs (for_decimal, file);
32622 fprintf (file, "%d", c);
32624 for_string = "\n\t.byte \"";
32625 for_decimal = ", ";
32626 to_close = "\n";
32627 count_string = 0;
32631 /* Now close the string if we have written one. Then end the line. */
32632 if (to_close)
32633 fputs (to_close, file);
32636 /* Generate a unique section name for FILENAME for a section type
32637 represented by SECTION_DESC. Output goes into BUF.
32639 SECTION_DESC can be any string, as long as it is different for each
32640 possible section type.
32642 We name the section in the same manner as xlc. The name begins with an
32643 underscore followed by the filename (after stripping any leading directory
32644 names) with the last period replaced by the string SECTION_DESC. If
32645 FILENAME does not contain a period, SECTION_DESC is appended to the end of
32646 the name. */
32648 void
32649 rs6000_gen_section_name (char **buf, const char *filename,
32650 const char *section_desc)
32652 const char *q, *after_last_slash, *last_period = 0;
32653 char *p;
32654 int len;
32656 after_last_slash = filename;
32657 for (q = filename; *q; q++)
32659 if (*q == '/')
32660 after_last_slash = q + 1;
32661 else if (*q == '.')
32662 last_period = q;
32665 len = strlen (after_last_slash) + strlen (section_desc) + 2;
32666 *buf = (char *) xmalloc (len);
32668 p = *buf;
32669 *p++ = '_';
32671 for (q = after_last_slash; *q; q++)
32673 if (q == last_period)
32675 strcpy (p, section_desc);
32676 p += strlen (section_desc);
32677 break;
32680 else if (ISALNUM (*q))
32681 *p++ = *q;
32684 if (last_period == 0)
32685 strcpy (p, section_desc);
32686 else
32687 *p = '\0';
32690 /* Emit profile function. */
32692 void
32693 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
32695 /* Non-standard profiling for kernels, which just saves LR then calls
32696 _mcount without worrying about arg saves. The idea is to change
32697 the function prologue as little as possible as it isn't easy to
32698 account for arg save/restore code added just for _mcount. */
32699 if (TARGET_PROFILE_KERNEL)
32700 return;
32702 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
32704 #ifndef NO_PROFILE_COUNTERS
32705 # define NO_PROFILE_COUNTERS 0
32706 #endif
32707 if (NO_PROFILE_COUNTERS)
32708 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
32709 LCT_NORMAL, VOIDmode, 0);
32710 else
32712 char buf[30];
32713 const char *label_name;
32714 rtx fun;
32716 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
32717 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
32718 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
32720 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
32721 LCT_NORMAL, VOIDmode, 1, fun, Pmode);
32724 else if (DEFAULT_ABI == ABI_DARWIN)
32726 const char *mcount_name = RS6000_MCOUNT;
32727 int caller_addr_regno = LR_REGNO;
32729 /* Be conservative and always set this, at least for now. */
32730 crtl->uses_pic_offset_table = 1;
32732 #if TARGET_MACHO
32733 /* For PIC code, set up a stub and collect the caller's address
32734 from r0, which is where the prologue puts it. */
32735 if (MACHOPIC_INDIRECT
32736 && crtl->uses_pic_offset_table)
32737 caller_addr_regno = 0;
32738 #endif
32739 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
32740 LCT_NORMAL, VOIDmode, 1,
32741 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
32745 /* Write function profiler code. */
32747 void
32748 output_function_profiler (FILE *file, int labelno)
32750 char buf[100];
32752 switch (DEFAULT_ABI)
32754 default:
32755 gcc_unreachable ();
32757 case ABI_V4:
32758 if (!TARGET_32BIT)
32760 warning (0, "no profiling of 64-bit code for this ABI");
32761 return;
32763 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
32764 fprintf (file, "\tmflr %s\n", reg_names[0]);
32765 if (NO_PROFILE_COUNTERS)
32767 asm_fprintf (file, "\tstw %s,4(%s)\n",
32768 reg_names[0], reg_names[1]);
32770 else if (TARGET_SECURE_PLT && flag_pic)
32772 if (TARGET_LINK_STACK)
32774 char name[32];
32775 get_ppc476_thunk_name (name);
32776 asm_fprintf (file, "\tbl %s\n", name);
32778 else
32779 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
32780 asm_fprintf (file, "\tstw %s,4(%s)\n",
32781 reg_names[0], reg_names[1]);
32782 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
32783 asm_fprintf (file, "\taddis %s,%s,",
32784 reg_names[12], reg_names[12]);
32785 assemble_name (file, buf);
32786 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
32787 assemble_name (file, buf);
32788 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
32790 else if (flag_pic == 1)
32792 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
32793 asm_fprintf (file, "\tstw %s,4(%s)\n",
32794 reg_names[0], reg_names[1]);
32795 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
32796 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
32797 assemble_name (file, buf);
32798 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
32800 else if (flag_pic > 1)
32802 asm_fprintf (file, "\tstw %s,4(%s)\n",
32803 reg_names[0], reg_names[1]);
32804 /* Now, we need to get the address of the label. */
32805 if (TARGET_LINK_STACK)
32807 char name[32];
32808 get_ppc476_thunk_name (name);
32809 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
32810 assemble_name (file, buf);
32811 fputs ("-.\n1:", file);
32812 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
32813 asm_fprintf (file, "\taddi %s,%s,4\n",
32814 reg_names[11], reg_names[11]);
32816 else
32818 fputs ("\tbcl 20,31,1f\n\t.long ", file);
32819 assemble_name (file, buf);
32820 fputs ("-.\n1:", file);
32821 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
32823 asm_fprintf (file, "\tlwz %s,0(%s)\n",
32824 reg_names[0], reg_names[11]);
32825 asm_fprintf (file, "\tadd %s,%s,%s\n",
32826 reg_names[0], reg_names[0], reg_names[11]);
32828 else
32830 asm_fprintf (file, "\tlis %s,", reg_names[12]);
32831 assemble_name (file, buf);
32832 fputs ("@ha\n", file);
32833 asm_fprintf (file, "\tstw %s,4(%s)\n",
32834 reg_names[0], reg_names[1]);
32835 asm_fprintf (file, "\tla %s,", reg_names[0]);
32836 assemble_name (file, buf);
32837 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
32840 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
32841 fprintf (file, "\tbl %s%s\n",
32842 RS6000_MCOUNT, flag_pic ? "@plt" : "");
32843 break;
32845 case ABI_AIX:
32846 case ABI_ELFv2:
32847 case ABI_DARWIN:
32848 /* Don't do anything, done in output_profile_hook (). */
32849 break;
32855 /* The following variable value is the last issued insn. */
32857 static rtx_insn *last_scheduled_insn;
32859 /* The following variable helps to balance issuing of load and
32860 store instructions */
32862 static int load_store_pendulum;
32864 /* The following variable helps pair divide insns during scheduling. */
32865 static int divide_cnt;
32866 /* The following variable helps pair and alternate vector and vector load
32867 insns during scheduling. */
32868 static int vec_pairing;
32871 /* Power4 load update and store update instructions are cracked into a
32872 load or store and an integer insn which are executed in the same cycle.
32873 Branches have their own dispatch slot which does not count against the
32874 GCC issue rate, but it changes the program flow so there are no other
32875 instructions to issue in this cycle. */
32877 static int
32878 rs6000_variable_issue_1 (rtx_insn *insn, int more)
32880 last_scheduled_insn = insn;
32881 if (GET_CODE (PATTERN (insn)) == USE
32882 || GET_CODE (PATTERN (insn)) == CLOBBER)
32884 cached_can_issue_more = more;
32885 return cached_can_issue_more;
32888 if (insn_terminates_group_p (insn, current_group))
32890 cached_can_issue_more = 0;
32891 return cached_can_issue_more;
32894 /* If no reservation, but reach here */
32895 if (recog_memoized (insn) < 0)
32896 return more;
32898 if (rs6000_sched_groups)
32900 if (is_microcoded_insn (insn))
32901 cached_can_issue_more = 0;
32902 else if (is_cracked_insn (insn))
32903 cached_can_issue_more = more > 2 ? more - 2 : 0;
32904 else
32905 cached_can_issue_more = more - 1;
32907 return cached_can_issue_more;
32910 if (rs6000_cpu_attr == CPU_CELL && is_nonpipeline_insn (insn))
32911 return 0;
32913 cached_can_issue_more = more - 1;
32914 return cached_can_issue_more;
32917 static int
32918 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
32920 int r = rs6000_variable_issue_1 (insn, more);
32921 if (verbose)
32922 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
32923 return r;
32926 /* Adjust the cost of a scheduling dependency. Return the new cost of
32927 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
32929 static int
32930 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
32931 unsigned int)
32933 enum attr_type attr_type;
32935 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
32936 return cost;
32938 switch (dep_type)
32940 case REG_DEP_TRUE:
32942 /* Data dependency; DEP_INSN writes a register that INSN reads
32943 some cycles later. */
32945 /* Separate a load from a narrower, dependent store. */
32946 if ((rs6000_sched_groups || rs6000_cpu_attr == CPU_POWER9)
32947 && GET_CODE (PATTERN (insn)) == SET
32948 && GET_CODE (PATTERN (dep_insn)) == SET
32949 && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
32950 && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
32951 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
32952 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
32953 return cost + 14;
32955 attr_type = get_attr_type (insn);
32957 switch (attr_type)
32959 case TYPE_JMPREG:
32960 /* Tell the first scheduling pass about the latency between
32961 a mtctr and bctr (and mtlr and br/blr). The first
32962 scheduling pass will not know about this latency since
32963 the mtctr instruction, which has the latency associated
32964 to it, will be generated by reload. */
32965 return 4;
32966 case TYPE_BRANCH:
32967 /* Leave some extra cycles between a compare and its
32968 dependent branch, to inhibit expensive mispredicts. */
32969 if ((rs6000_cpu_attr == CPU_PPC603
32970 || rs6000_cpu_attr == CPU_PPC604
32971 || rs6000_cpu_attr == CPU_PPC604E
32972 || rs6000_cpu_attr == CPU_PPC620
32973 || rs6000_cpu_attr == CPU_PPC630
32974 || rs6000_cpu_attr == CPU_PPC750
32975 || rs6000_cpu_attr == CPU_PPC7400
32976 || rs6000_cpu_attr == CPU_PPC7450
32977 || rs6000_cpu_attr == CPU_PPCE5500
32978 || rs6000_cpu_attr == CPU_PPCE6500
32979 || rs6000_cpu_attr == CPU_POWER4
32980 || rs6000_cpu_attr == CPU_POWER5
32981 || rs6000_cpu_attr == CPU_POWER7
32982 || rs6000_cpu_attr == CPU_POWER8
32983 || rs6000_cpu_attr == CPU_POWER9
32984 || rs6000_cpu_attr == CPU_CELL)
32985 && recog_memoized (dep_insn)
32986 && (INSN_CODE (dep_insn) >= 0))
32988 switch (get_attr_type (dep_insn))
32990 case TYPE_CMP:
32991 case TYPE_FPCOMPARE:
32992 case TYPE_CR_LOGICAL:
32993 case TYPE_DELAYED_CR:
32994 return cost + 2;
32995 case TYPE_EXTS:
32996 case TYPE_MUL:
32997 if (get_attr_dot (dep_insn) == DOT_YES)
32998 return cost + 2;
32999 else
33000 break;
33001 case TYPE_SHIFT:
33002 if (get_attr_dot (dep_insn) == DOT_YES
33003 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
33004 return cost + 2;
33005 else
33006 break;
33007 default:
33008 break;
33010 break;
33012 case TYPE_STORE:
33013 case TYPE_FPSTORE:
33014 if ((rs6000_cpu == PROCESSOR_POWER6)
33015 && recog_memoized (dep_insn)
33016 && (INSN_CODE (dep_insn) >= 0))
33019 if (GET_CODE (PATTERN (insn)) != SET)
33020 /* If this happens, we have to extend this to schedule
33021 optimally. Return default for now. */
33022 return cost;
33024 /* Adjust the cost for the case where the value written
33025 by a fixed point operation is used as the address
33026 gen value on a store. */
33027 switch (get_attr_type (dep_insn))
33029 case TYPE_LOAD:
33030 case TYPE_CNTLZ:
33032 if (! store_data_bypass_p (dep_insn, insn))
33033 return get_attr_sign_extend (dep_insn)
33034 == SIGN_EXTEND_YES ? 6 : 4;
33035 break;
33037 case TYPE_SHIFT:
33039 if (! store_data_bypass_p (dep_insn, insn))
33040 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
33041 6 : 3;
33042 break;
33044 case TYPE_INTEGER:
33045 case TYPE_ADD:
33046 case TYPE_LOGICAL:
33047 case TYPE_EXTS:
33048 case TYPE_INSERT:
33050 if (! store_data_bypass_p (dep_insn, insn))
33051 return 3;
33052 break;
33054 case TYPE_STORE:
33055 case TYPE_FPLOAD:
33056 case TYPE_FPSTORE:
33058 if (get_attr_update (dep_insn) == UPDATE_YES
33059 && ! store_data_bypass_p (dep_insn, insn))
33060 return 3;
33061 break;
33063 case TYPE_MUL:
33065 if (! store_data_bypass_p (dep_insn, insn))
33066 return 17;
33067 break;
33069 case TYPE_DIV:
33071 if (! store_data_bypass_p (dep_insn, insn))
33072 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
33073 break;
33075 default:
33076 break;
33079 break;
33081 case TYPE_LOAD:
33082 if ((rs6000_cpu == PROCESSOR_POWER6)
33083 && recog_memoized (dep_insn)
33084 && (INSN_CODE (dep_insn) >= 0))
33087 /* Adjust the cost for the case where the value written
33088 by a fixed point instruction is used within the address
33089 gen portion of a subsequent load(u)(x) */
33090 switch (get_attr_type (dep_insn))
33092 case TYPE_LOAD:
33093 case TYPE_CNTLZ:
33095 if (set_to_load_agen (dep_insn, insn))
33096 return get_attr_sign_extend (dep_insn)
33097 == SIGN_EXTEND_YES ? 6 : 4;
33098 break;
33100 case TYPE_SHIFT:
33102 if (set_to_load_agen (dep_insn, insn))
33103 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
33104 6 : 3;
33105 break;
33107 case TYPE_INTEGER:
33108 case TYPE_ADD:
33109 case TYPE_LOGICAL:
33110 case TYPE_EXTS:
33111 case TYPE_INSERT:
33113 if (set_to_load_agen (dep_insn, insn))
33114 return 3;
33115 break;
33117 case TYPE_STORE:
33118 case TYPE_FPLOAD:
33119 case TYPE_FPSTORE:
33121 if (get_attr_update (dep_insn) == UPDATE_YES
33122 && set_to_load_agen (dep_insn, insn))
33123 return 3;
33124 break;
33126 case TYPE_MUL:
33128 if (set_to_load_agen (dep_insn, insn))
33129 return 17;
33130 break;
33132 case TYPE_DIV:
33134 if (set_to_load_agen (dep_insn, insn))
33135 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
33136 break;
33138 default:
33139 break;
33142 break;
33144 case TYPE_FPLOAD:
33145 if ((rs6000_cpu == PROCESSOR_POWER6)
33146 && get_attr_update (insn) == UPDATE_NO
33147 && recog_memoized (dep_insn)
33148 && (INSN_CODE (dep_insn) >= 0)
33149 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
33150 return 2;
33152 default:
33153 break;
33156 /* Fall out to return default cost. */
33158 break;
33160 case REG_DEP_OUTPUT:
33161 /* Output dependency; DEP_INSN writes a register that INSN writes some
33162 cycles later. */
33163 if ((rs6000_cpu == PROCESSOR_POWER6)
33164 && recog_memoized (dep_insn)
33165 && (INSN_CODE (dep_insn) >= 0))
33167 attr_type = get_attr_type (insn);
33169 switch (attr_type)
33171 case TYPE_FP:
33172 case TYPE_FPSIMPLE:
33173 if (get_attr_type (dep_insn) == TYPE_FP
33174 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
33175 return 1;
33176 break;
33177 case TYPE_FPLOAD:
33178 if (get_attr_update (insn) == UPDATE_NO
33179 && get_attr_type (dep_insn) == TYPE_MFFGPR)
33180 return 2;
33181 break;
33182 default:
33183 break;
33186 /* Fall through, no cost for output dependency. */
33187 /* FALLTHRU */
33189 case REG_DEP_ANTI:
33190 /* Anti dependency; DEP_INSN reads a register that INSN writes some
33191 cycles later. */
33192 return 0;
33194 default:
33195 gcc_unreachable ();
33198 return cost;
33201 /* Debug version of rs6000_adjust_cost. */
33203 static int
33204 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
33205 int cost, unsigned int dw)
33207 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
33209 if (ret != cost)
33211 const char *dep;
33213 switch (dep_type)
33215 default: dep = "unknown depencency"; break;
33216 case REG_DEP_TRUE: dep = "data dependency"; break;
33217 case REG_DEP_OUTPUT: dep = "output dependency"; break;
33218 case REG_DEP_ANTI: dep = "anti depencency"; break;
33221 fprintf (stderr,
33222 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
33223 "%s, insn:\n", ret, cost, dep);
33225 debug_rtx (insn);
33228 return ret;
33231 /* The function returns a true if INSN is microcoded.
33232 Return false otherwise. */
33234 static bool
33235 is_microcoded_insn (rtx_insn *insn)
33237 if (!insn || !NONDEBUG_INSN_P (insn)
33238 || GET_CODE (PATTERN (insn)) == USE
33239 || GET_CODE (PATTERN (insn)) == CLOBBER)
33240 return false;
33242 if (rs6000_cpu_attr == CPU_CELL)
33243 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
33245 if (rs6000_sched_groups
33246 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
33248 enum attr_type type = get_attr_type (insn);
33249 if ((type == TYPE_LOAD
33250 && get_attr_update (insn) == UPDATE_YES
33251 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
33252 || ((type == TYPE_LOAD || type == TYPE_STORE)
33253 && get_attr_update (insn) == UPDATE_YES
33254 && get_attr_indexed (insn) == INDEXED_YES)
33255 || type == TYPE_MFCR)
33256 return true;
33259 return false;
33262 /* The function returns true if INSN is cracked into 2 instructions
33263 by the processor (and therefore occupies 2 issue slots). */
33265 static bool
33266 is_cracked_insn (rtx_insn *insn)
33268 if (!insn || !NONDEBUG_INSN_P (insn)
33269 || GET_CODE (PATTERN (insn)) == USE
33270 || GET_CODE (PATTERN (insn)) == CLOBBER)
33271 return false;
33273 if (rs6000_sched_groups
33274 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
33276 enum attr_type type = get_attr_type (insn);
33277 if ((type == TYPE_LOAD
33278 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
33279 && get_attr_update (insn) == UPDATE_NO)
33280 || (type == TYPE_LOAD
33281 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
33282 && get_attr_update (insn) == UPDATE_YES
33283 && get_attr_indexed (insn) == INDEXED_NO)
33284 || (type == TYPE_STORE
33285 && get_attr_update (insn) == UPDATE_YES
33286 && get_attr_indexed (insn) == INDEXED_NO)
33287 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
33288 && get_attr_update (insn) == UPDATE_YES)
33289 || type == TYPE_DELAYED_CR
33290 || (type == TYPE_EXTS
33291 && get_attr_dot (insn) == DOT_YES)
33292 || (type == TYPE_SHIFT
33293 && get_attr_dot (insn) == DOT_YES
33294 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
33295 || (type == TYPE_MUL
33296 && get_attr_dot (insn) == DOT_YES)
33297 || type == TYPE_DIV
33298 || (type == TYPE_INSERT
33299 && get_attr_size (insn) == SIZE_32))
33300 return true;
33303 return false;
33306 /* The function returns true if INSN can be issued only from
33307 the branch slot. */
33309 static bool
33310 is_branch_slot_insn (rtx_insn *insn)
33312 if (!insn || !NONDEBUG_INSN_P (insn)
33313 || GET_CODE (PATTERN (insn)) == USE
33314 || GET_CODE (PATTERN (insn)) == CLOBBER)
33315 return false;
33317 if (rs6000_sched_groups)
33319 enum attr_type type = get_attr_type (insn);
33320 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
33321 return true;
33322 return false;
33325 return false;
33328 /* The function returns true if out_inst sets a value that is
33329 used in the address generation computation of in_insn */
33330 static bool
33331 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
33333 rtx out_set, in_set;
33335 /* For performance reasons, only handle the simple case where
33336 both loads are a single_set. */
33337 out_set = single_set (out_insn);
33338 if (out_set)
33340 in_set = single_set (in_insn);
33341 if (in_set)
33342 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
33345 return false;
33348 /* Try to determine base/offset/size parts of the given MEM.
33349 Return true if successful, false if all the values couldn't
33350 be determined.
33352 This function only looks for REG or REG+CONST address forms.
33353 REG+REG address form will return false. */
33355 static bool
33356 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
33357 HOST_WIDE_INT *size)
33359 rtx addr_rtx;
33360 if MEM_SIZE_KNOWN_P (mem)
33361 *size = MEM_SIZE (mem);
33362 else
33363 return false;
33365 addr_rtx = (XEXP (mem, 0));
33366 if (GET_CODE (addr_rtx) == PRE_MODIFY)
33367 addr_rtx = XEXP (addr_rtx, 1);
33369 *offset = 0;
33370 while (GET_CODE (addr_rtx) == PLUS
33371 && CONST_INT_P (XEXP (addr_rtx, 1)))
33373 *offset += INTVAL (XEXP (addr_rtx, 1));
33374 addr_rtx = XEXP (addr_rtx, 0);
33376 if (!REG_P (addr_rtx))
33377 return false;
33379 *base = addr_rtx;
33380 return true;
33383 /* The function returns true if the target storage location of
33384 mem1 is adjacent to the target storage location of mem2 */
33385 /* Return 1 if memory locations are adjacent. */
33387 static bool
33388 adjacent_mem_locations (rtx mem1, rtx mem2)
33390 rtx reg1, reg2;
33391 HOST_WIDE_INT off1, size1, off2, size2;
33393 if (get_memref_parts (mem1, &reg1, &off1, &size1)
33394 && get_memref_parts (mem2, &reg2, &off2, &size2))
33395 return ((REGNO (reg1) == REGNO (reg2))
33396 && ((off1 + size1 == off2)
33397 || (off2 + size2 == off1)));
33399 return false;
33402 /* This function returns true if it can be determined that the two MEM
33403 locations overlap by at least 1 byte based on base reg/offset/size. */
33405 static bool
33406 mem_locations_overlap (rtx mem1, rtx mem2)
33408 rtx reg1, reg2;
33409 HOST_WIDE_INT off1, size1, off2, size2;
33411 if (get_memref_parts (mem1, &reg1, &off1, &size1)
33412 && get_memref_parts (mem2, &reg2, &off2, &size2))
33413 return ((REGNO (reg1) == REGNO (reg2))
33414 && (((off1 <= off2) && (off1 + size1 > off2))
33415 || ((off2 <= off1) && (off2 + size2 > off1))));
33417 return false;
33420 /* A C statement (sans semicolon) to update the integer scheduling
33421 priority INSN_PRIORITY (INSN). Increase the priority to execute the
33422 INSN earlier, reduce the priority to execute INSN later. Do not
33423 define this macro if you do not need to adjust the scheduling
33424 priorities of insns. */
33426 static int
33427 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
33429 rtx load_mem, str_mem;
33430 /* On machines (like the 750) which have asymmetric integer units,
33431 where one integer unit can do multiply and divides and the other
33432 can't, reduce the priority of multiply/divide so it is scheduled
33433 before other integer operations. */
33435 #if 0
33436 if (! INSN_P (insn))
33437 return priority;
33439 if (GET_CODE (PATTERN (insn)) == USE)
33440 return priority;
33442 switch (rs6000_cpu_attr) {
33443 case CPU_PPC750:
33444 switch (get_attr_type (insn))
33446 default:
33447 break;
33449 case TYPE_MUL:
33450 case TYPE_DIV:
33451 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
33452 priority, priority);
33453 if (priority >= 0 && priority < 0x01000000)
33454 priority >>= 3;
33455 break;
33458 #endif
33460 if (insn_must_be_first_in_group (insn)
33461 && reload_completed
33462 && current_sched_info->sched_max_insns_priority
33463 && rs6000_sched_restricted_insns_priority)
33466 /* Prioritize insns that can be dispatched only in the first
33467 dispatch slot. */
33468 if (rs6000_sched_restricted_insns_priority == 1)
33469 /* Attach highest priority to insn. This means that in
33470 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
33471 precede 'priority' (critical path) considerations. */
33472 return current_sched_info->sched_max_insns_priority;
33473 else if (rs6000_sched_restricted_insns_priority == 2)
33474 /* Increase priority of insn by a minimal amount. This means that in
33475 haifa-sched.c:ready_sort(), only 'priority' (critical path)
33476 considerations precede dispatch-slot restriction considerations. */
33477 return (priority + 1);
33480 if (rs6000_cpu == PROCESSOR_POWER6
33481 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
33482 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
33483 /* Attach highest priority to insn if the scheduler has just issued two
33484 stores and this instruction is a load, or two loads and this instruction
33485 is a store. Power6 wants loads and stores scheduled alternately
33486 when possible */
33487 return current_sched_info->sched_max_insns_priority;
33489 return priority;
33492 /* Return true if the instruction is nonpipelined on the Cell. */
33493 static bool
33494 is_nonpipeline_insn (rtx_insn *insn)
33496 enum attr_type type;
33497 if (!insn || !NONDEBUG_INSN_P (insn)
33498 || GET_CODE (PATTERN (insn)) == USE
33499 || GET_CODE (PATTERN (insn)) == CLOBBER)
33500 return false;
33502 type = get_attr_type (insn);
33503 if (type == TYPE_MUL
33504 || type == TYPE_DIV
33505 || type == TYPE_SDIV
33506 || type == TYPE_DDIV
33507 || type == TYPE_SSQRT
33508 || type == TYPE_DSQRT
33509 || type == TYPE_MFCR
33510 || type == TYPE_MFCRF
33511 || type == TYPE_MFJMPR)
33513 return true;
33515 return false;
33519 /* Return how many instructions the machine can issue per cycle. */
33521 static int
33522 rs6000_issue_rate (void)
33524 /* Unless scheduling for register pressure, use issue rate of 1 for
33525 first scheduling pass to decrease degradation. */
33526 if (!reload_completed && !flag_sched_pressure)
33527 return 1;
33529 switch (rs6000_cpu_attr) {
33530 case CPU_RS64A:
33531 case CPU_PPC601: /* ? */
33532 case CPU_PPC7450:
33533 return 3;
33534 case CPU_PPC440:
33535 case CPU_PPC603:
33536 case CPU_PPC750:
33537 case CPU_PPC7400:
33538 case CPU_PPC8540:
33539 case CPU_PPC8548:
33540 case CPU_CELL:
33541 case CPU_PPCE300C2:
33542 case CPU_PPCE300C3:
33543 case CPU_PPCE500MC:
33544 case CPU_PPCE500MC64:
33545 case CPU_PPCE5500:
33546 case CPU_PPCE6500:
33547 case CPU_TITAN:
33548 return 2;
33549 case CPU_PPC476:
33550 case CPU_PPC604:
33551 case CPU_PPC604E:
33552 case CPU_PPC620:
33553 case CPU_PPC630:
33554 return 4;
33555 case CPU_POWER4:
33556 case CPU_POWER5:
33557 case CPU_POWER6:
33558 case CPU_POWER7:
33559 return 5;
33560 case CPU_POWER8:
33561 return 7;
33562 case CPU_POWER9:
33563 return 6;
33564 default:
33565 return 1;
33569 /* Return how many instructions to look ahead for better insn
33570 scheduling. */
33572 static int
33573 rs6000_use_sched_lookahead (void)
33575 switch (rs6000_cpu_attr)
33577 case CPU_PPC8540:
33578 case CPU_PPC8548:
33579 return 4;
33581 case CPU_CELL:
33582 return (reload_completed ? 8 : 0);
33584 default:
33585 return 0;
33589 /* We are choosing insn from the ready queue. Return zero if INSN can be
33590 chosen. */
33591 static int
33592 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
33594 if (ready_index == 0)
33595 return 0;
33597 if (rs6000_cpu_attr != CPU_CELL)
33598 return 0;
33600 gcc_assert (insn != NULL_RTX && INSN_P (insn));
33602 if (!reload_completed
33603 || is_nonpipeline_insn (insn)
33604 || is_microcoded_insn (insn))
33605 return 1;
33607 return 0;
33610 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
33611 and return true. */
33613 static bool
33614 find_mem_ref (rtx pat, rtx *mem_ref)
33616 const char * fmt;
33617 int i, j;
33619 /* stack_tie does not produce any real memory traffic. */
33620 if (tie_operand (pat, VOIDmode))
33621 return false;
33623 if (GET_CODE (pat) == MEM)
33625 *mem_ref = pat;
33626 return true;
33629 /* Recursively process the pattern. */
33630 fmt = GET_RTX_FORMAT (GET_CODE (pat));
33632 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
33634 if (fmt[i] == 'e')
33636 if (find_mem_ref (XEXP (pat, i), mem_ref))
33637 return true;
33639 else if (fmt[i] == 'E')
33640 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
33642 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
33643 return true;
33647 return false;
33650 /* Determine if PAT is a PATTERN of a load insn. */
33652 static bool
33653 is_load_insn1 (rtx pat, rtx *load_mem)
33655 if (!pat || pat == NULL_RTX)
33656 return false;
33658 if (GET_CODE (pat) == SET)
33659 return find_mem_ref (SET_SRC (pat), load_mem);
33661 if (GET_CODE (pat) == PARALLEL)
33663 int i;
33665 for (i = 0; i < XVECLEN (pat, 0); i++)
33666 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
33667 return true;
33670 return false;
33673 /* Determine if INSN loads from memory. */
33675 static bool
33676 is_load_insn (rtx insn, rtx *load_mem)
33678 if (!insn || !INSN_P (insn))
33679 return false;
33681 if (CALL_P (insn))
33682 return false;
33684 return is_load_insn1 (PATTERN (insn), load_mem);
33687 /* Determine if PAT is a PATTERN of a store insn. */
33689 static bool
33690 is_store_insn1 (rtx pat, rtx *str_mem)
33692 if (!pat || pat == NULL_RTX)
33693 return false;
33695 if (GET_CODE (pat) == SET)
33696 return find_mem_ref (SET_DEST (pat), str_mem);
33698 if (GET_CODE (pat) == PARALLEL)
33700 int i;
33702 for (i = 0; i < XVECLEN (pat, 0); i++)
33703 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
33704 return true;
33707 return false;
33710 /* Determine if INSN stores to memory. */
33712 static bool
33713 is_store_insn (rtx insn, rtx *str_mem)
33715 if (!insn || !INSN_P (insn))
33716 return false;
33718 return is_store_insn1 (PATTERN (insn), str_mem);
33721 /* Return whether TYPE is a Power9 pairable vector instruction type. */
33723 static bool
33724 is_power9_pairable_vec_type (enum attr_type type)
33726 switch (type)
33728 case TYPE_VECSIMPLE:
33729 case TYPE_VECCOMPLEX:
33730 case TYPE_VECDIV:
33731 case TYPE_VECCMP:
33732 case TYPE_VECPERM:
33733 case TYPE_VECFLOAT:
33734 case TYPE_VECFDIV:
33735 case TYPE_VECDOUBLE:
33736 return true;
33737 default:
33738 break;
33740 return false;
33743 /* Returns whether the dependence between INSN and NEXT is considered
33744 costly by the given target. */
33746 static bool
33747 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
33749 rtx insn;
33750 rtx next;
33751 rtx load_mem, str_mem;
33753 /* If the flag is not enabled - no dependence is considered costly;
33754 allow all dependent insns in the same group.
33755 This is the most aggressive option. */
33756 if (rs6000_sched_costly_dep == no_dep_costly)
33757 return false;
33759 /* If the flag is set to 1 - a dependence is always considered costly;
33760 do not allow dependent instructions in the same group.
33761 This is the most conservative option. */
33762 if (rs6000_sched_costly_dep == all_deps_costly)
33763 return true;
33765 insn = DEP_PRO (dep);
33766 next = DEP_CON (dep);
33768 if (rs6000_sched_costly_dep == store_to_load_dep_costly
33769 && is_load_insn (next, &load_mem)
33770 && is_store_insn (insn, &str_mem))
33771 /* Prevent load after store in the same group. */
33772 return true;
33774 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
33775 && is_load_insn (next, &load_mem)
33776 && is_store_insn (insn, &str_mem)
33777 && DEP_TYPE (dep) == REG_DEP_TRUE
33778 && mem_locations_overlap(str_mem, load_mem))
33779 /* Prevent load after store in the same group if it is a true
33780 dependence. */
33781 return true;
33783 /* The flag is set to X; dependences with latency >= X are considered costly,
33784 and will not be scheduled in the same group. */
33785 if (rs6000_sched_costly_dep <= max_dep_latency
33786 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
33787 return true;
33789 return false;
33792 /* Return the next insn after INSN that is found before TAIL is reached,
33793 skipping any "non-active" insns - insns that will not actually occupy
33794 an issue slot. Return NULL_RTX if such an insn is not found. */
33796 static rtx_insn *
33797 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
33799 if (insn == NULL_RTX || insn == tail)
33800 return NULL;
33802 while (1)
33804 insn = NEXT_INSN (insn);
33805 if (insn == NULL_RTX || insn == tail)
33806 return NULL;
33808 if (CALL_P (insn)
33809 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
33810 || (NONJUMP_INSN_P (insn)
33811 && GET_CODE (PATTERN (insn)) != USE
33812 && GET_CODE (PATTERN (insn)) != CLOBBER
33813 && INSN_CODE (insn) != CODE_FOR_stack_tie))
33814 break;
33816 return insn;
33819 /* Do Power9 specific sched_reorder2 reordering of ready list. */
33821 static int
33822 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
33824 int pos;
33825 int i;
33826 rtx_insn *tmp;
33827 enum attr_type type, type2;
33829 type = get_attr_type (last_scheduled_insn);
33831 /* Try to issue fixed point divides back-to-back in pairs so they will be
33832 routed to separate execution units and execute in parallel. */
33833 if (type == TYPE_DIV && divide_cnt == 0)
33835 /* First divide has been scheduled. */
33836 divide_cnt = 1;
33838 /* Scan the ready list looking for another divide, if found move it
33839 to the end of the list so it is chosen next. */
33840 pos = lastpos;
33841 while (pos >= 0)
33843 if (recog_memoized (ready[pos]) >= 0
33844 && get_attr_type (ready[pos]) == TYPE_DIV)
33846 tmp = ready[pos];
33847 for (i = pos; i < lastpos; i++)
33848 ready[i] = ready[i + 1];
33849 ready[lastpos] = tmp;
33850 break;
33852 pos--;
33855 else
33857 /* Last insn was the 2nd divide or not a divide, reset the counter. */
33858 divide_cnt = 0;
33860 /* The best dispatch throughput for vector and vector load insns can be
33861 achieved by interleaving a vector and vector load such that they'll
33862 dispatch to the same superslice. If this pairing cannot be achieved
33863 then it is best to pair vector insns together and vector load insns
33864 together.
33866 To aid in this pairing, vec_pairing maintains the current state with
33867 the following values:
33869 0 : Initial state, no vecload/vector pairing has been started.
33871 1 : A vecload or vector insn has been issued and a candidate for
33872 pairing has been found and moved to the end of the ready
33873 list. */
33874 if (type == TYPE_VECLOAD)
33876 /* Issued a vecload. */
33877 if (vec_pairing == 0)
33879 int vecload_pos = -1;
33880 /* We issued a single vecload, look for a vector insn to pair it
33881 with. If one isn't found, try to pair another vecload. */
33882 pos = lastpos;
33883 while (pos >= 0)
33885 if (recog_memoized (ready[pos]) >= 0)
33887 type2 = get_attr_type (ready[pos]);
33888 if (is_power9_pairable_vec_type (type2))
33890 /* Found a vector insn to pair with, move it to the
33891 end of the ready list so it is scheduled next. */
33892 tmp = ready[pos];
33893 for (i = pos; i < lastpos; i++)
33894 ready[i] = ready[i + 1];
33895 ready[lastpos] = tmp;
33896 vec_pairing = 1;
33897 return cached_can_issue_more;
33899 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
33900 /* Remember position of first vecload seen. */
33901 vecload_pos = pos;
33903 pos--;
33905 if (vecload_pos >= 0)
33907 /* Didn't find a vector to pair with but did find a vecload,
33908 move it to the end of the ready list. */
33909 tmp = ready[vecload_pos];
33910 for (i = vecload_pos; i < lastpos; i++)
33911 ready[i] = ready[i + 1];
33912 ready[lastpos] = tmp;
33913 vec_pairing = 1;
33914 return cached_can_issue_more;
33918 else if (is_power9_pairable_vec_type (type))
33920 /* Issued a vector operation. */
33921 if (vec_pairing == 0)
33923 int vec_pos = -1;
33924 /* We issued a single vector insn, look for a vecload to pair it
33925 with. If one isn't found, try to pair another vector. */
33926 pos = lastpos;
33927 while (pos >= 0)
33929 if (recog_memoized (ready[pos]) >= 0)
33931 type2 = get_attr_type (ready[pos]);
33932 if (type2 == TYPE_VECLOAD)
33934 /* Found a vecload insn to pair with, move it to the
33935 end of the ready list so it is scheduled next. */
33936 tmp = ready[pos];
33937 for (i = pos; i < lastpos; i++)
33938 ready[i] = ready[i + 1];
33939 ready[lastpos] = tmp;
33940 vec_pairing = 1;
33941 return cached_can_issue_more;
33943 else if (is_power9_pairable_vec_type (type2)
33944 && vec_pos == -1)
33945 /* Remember position of first vector insn seen. */
33946 vec_pos = pos;
33948 pos--;
33950 if (vec_pos >= 0)
33952 /* Didn't find a vecload to pair with but did find a vector
33953 insn, move it to the end of the ready list. */
33954 tmp = ready[vec_pos];
33955 for (i = vec_pos; i < lastpos; i++)
33956 ready[i] = ready[i + 1];
33957 ready[lastpos] = tmp;
33958 vec_pairing = 1;
33959 return cached_can_issue_more;
33964 /* We've either finished a vec/vecload pair, couldn't find an insn to
33965 continue the current pair, or the last insn had nothing to do with
33966 with pairing. In any case, reset the state. */
33967 vec_pairing = 0;
33970 return cached_can_issue_more;
33973 /* We are about to begin issuing insns for this clock cycle. */
33975 static int
33976 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
33977 rtx_insn **ready ATTRIBUTE_UNUSED,
33978 int *pn_ready ATTRIBUTE_UNUSED,
33979 int clock_var ATTRIBUTE_UNUSED)
33981 int n_ready = *pn_ready;
33983 if (sched_verbose)
33984 fprintf (dump, "// rs6000_sched_reorder :\n");
33986 /* Reorder the ready list, if the second to last ready insn
33987 is a nonepipeline insn. */
33988 if (rs6000_cpu_attr == CPU_CELL && n_ready > 1)
33990 if (is_nonpipeline_insn (ready[n_ready - 1])
33991 && (recog_memoized (ready[n_ready - 2]) > 0))
33992 /* Simply swap first two insns. */
33993 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
33996 if (rs6000_cpu == PROCESSOR_POWER6)
33997 load_store_pendulum = 0;
33999 return rs6000_issue_rate ();
34002 /* Like rs6000_sched_reorder, but called after issuing each insn. */
34004 static int
34005 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
34006 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
34008 if (sched_verbose)
34009 fprintf (dump, "// rs6000_sched_reorder2 :\n");
34011 /* For Power6, we need to handle some special cases to try and keep the
34012 store queue from overflowing and triggering expensive flushes.
34014 This code monitors how load and store instructions are being issued
34015 and skews the ready list one way or the other to increase the likelihood
34016 that a desired instruction is issued at the proper time.
34018 A couple of things are done. First, we maintain a "load_store_pendulum"
34019 to track the current state of load/store issue.
34021 - If the pendulum is at zero, then no loads or stores have been
34022 issued in the current cycle so we do nothing.
34024 - If the pendulum is 1, then a single load has been issued in this
34025 cycle and we attempt to locate another load in the ready list to
34026 issue with it.
34028 - If the pendulum is -2, then two stores have already been
34029 issued in this cycle, so we increase the priority of the first load
34030 in the ready list to increase it's likelihood of being chosen first
34031 in the next cycle.
34033 - If the pendulum is -1, then a single store has been issued in this
34034 cycle and we attempt to locate another store in the ready list to
34035 issue with it, preferring a store to an adjacent memory location to
34036 facilitate store pairing in the store queue.
34038 - If the pendulum is 2, then two loads have already been
34039 issued in this cycle, so we increase the priority of the first store
34040 in the ready list to increase it's likelihood of being chosen first
34041 in the next cycle.
34043 - If the pendulum < -2 or > 2, then do nothing.
34045 Note: This code covers the most common scenarios. There exist non
34046 load/store instructions which make use of the LSU and which
34047 would need to be accounted for to strictly model the behavior
34048 of the machine. Those instructions are currently unaccounted
34049 for to help minimize compile time overhead of this code.
34051 if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn)
34053 int pos;
34054 int i;
34055 rtx_insn *tmp;
34056 rtx load_mem, str_mem;
34058 if (is_store_insn (last_scheduled_insn, &str_mem))
34059 /* Issuing a store, swing the load_store_pendulum to the left */
34060 load_store_pendulum--;
34061 else if (is_load_insn (last_scheduled_insn, &load_mem))
34062 /* Issuing a load, swing the load_store_pendulum to the right */
34063 load_store_pendulum++;
34064 else
34065 return cached_can_issue_more;
34067 /* If the pendulum is balanced, or there is only one instruction on
34068 the ready list, then all is well, so return. */
34069 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
34070 return cached_can_issue_more;
34072 if (load_store_pendulum == 1)
34074 /* A load has been issued in this cycle. Scan the ready list
34075 for another load to issue with it */
34076 pos = *pn_ready-1;
34078 while (pos >= 0)
34080 if (is_load_insn (ready[pos], &load_mem))
34082 /* Found a load. Move it to the head of the ready list,
34083 and adjust it's priority so that it is more likely to
34084 stay there */
34085 tmp = ready[pos];
34086 for (i=pos; i<*pn_ready-1; i++)
34087 ready[i] = ready[i + 1];
34088 ready[*pn_ready-1] = tmp;
34090 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
34091 INSN_PRIORITY (tmp)++;
34092 break;
34094 pos--;
34097 else if (load_store_pendulum == -2)
34099 /* Two stores have been issued in this cycle. Increase the
34100 priority of the first load in the ready list to favor it for
34101 issuing in the next cycle. */
34102 pos = *pn_ready-1;
34104 while (pos >= 0)
34106 if (is_load_insn (ready[pos], &load_mem)
34107 && !sel_sched_p ()
34108 && INSN_PRIORITY_KNOWN (ready[pos]))
34110 INSN_PRIORITY (ready[pos])++;
34112 /* Adjust the pendulum to account for the fact that a load
34113 was found and increased in priority. This is to prevent
34114 increasing the priority of multiple loads */
34115 load_store_pendulum--;
34117 break;
34119 pos--;
34122 else if (load_store_pendulum == -1)
34124 /* A store has been issued in this cycle. Scan the ready list for
34125 another store to issue with it, preferring a store to an adjacent
34126 memory location */
34127 int first_store_pos = -1;
34129 pos = *pn_ready-1;
34131 while (pos >= 0)
34133 if (is_store_insn (ready[pos], &str_mem))
34135 rtx str_mem2;
34136 /* Maintain the index of the first store found on the
34137 list */
34138 if (first_store_pos == -1)
34139 first_store_pos = pos;
34141 if (is_store_insn (last_scheduled_insn, &str_mem2)
34142 && adjacent_mem_locations (str_mem, str_mem2))
34144 /* Found an adjacent store. Move it to the head of the
34145 ready list, and adjust it's priority so that it is
34146 more likely to stay there */
34147 tmp = ready[pos];
34148 for (i=pos; i<*pn_ready-1; i++)
34149 ready[i] = ready[i + 1];
34150 ready[*pn_ready-1] = tmp;
34152 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
34153 INSN_PRIORITY (tmp)++;
34155 first_store_pos = -1;
34157 break;
34160 pos--;
34163 if (first_store_pos >= 0)
34165 /* An adjacent store wasn't found, but a non-adjacent store was,
34166 so move the non-adjacent store to the front of the ready
34167 list, and adjust its priority so that it is more likely to
34168 stay there. */
34169 tmp = ready[first_store_pos];
34170 for (i=first_store_pos; i<*pn_ready-1; i++)
34171 ready[i] = ready[i + 1];
34172 ready[*pn_ready-1] = tmp;
34173 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
34174 INSN_PRIORITY (tmp)++;
34177 else if (load_store_pendulum == 2)
34179 /* Two loads have been issued in this cycle. Increase the priority
34180 of the first store in the ready list to favor it for issuing in
34181 the next cycle. */
34182 pos = *pn_ready-1;
34184 while (pos >= 0)
34186 if (is_store_insn (ready[pos], &str_mem)
34187 && !sel_sched_p ()
34188 && INSN_PRIORITY_KNOWN (ready[pos]))
34190 INSN_PRIORITY (ready[pos])++;
34192 /* Adjust the pendulum to account for the fact that a store
34193 was found and increased in priority. This is to prevent
34194 increasing the priority of multiple stores */
34195 load_store_pendulum++;
34197 break;
34199 pos--;
34204 /* Do Power9 dependent reordering if necessary. */
34205 if (rs6000_cpu == PROCESSOR_POWER9 && last_scheduled_insn
34206 && recog_memoized (last_scheduled_insn) >= 0)
34207 return power9_sched_reorder2 (ready, *pn_ready - 1);
34209 return cached_can_issue_more;
34212 /* Return whether the presence of INSN causes a dispatch group termination
34213 of group WHICH_GROUP.
34215 If WHICH_GROUP == current_group, this function will return true if INSN
34216 causes the termination of the current group (i.e, the dispatch group to
34217 which INSN belongs). This means that INSN will be the last insn in the
34218 group it belongs to.
34220 If WHICH_GROUP == previous_group, this function will return true if INSN
34221 causes the termination of the previous group (i.e, the dispatch group that
34222 precedes the group to which INSN belongs). This means that INSN will be
34223 the first insn in the group it belongs to). */
34225 static bool
34226 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
34228 bool first, last;
34230 if (! insn)
34231 return false;
34233 first = insn_must_be_first_in_group (insn);
34234 last = insn_must_be_last_in_group (insn);
34236 if (first && last)
34237 return true;
34239 if (which_group == current_group)
34240 return last;
34241 else if (which_group == previous_group)
34242 return first;
34244 return false;
34248 static bool
34249 insn_must_be_first_in_group (rtx_insn *insn)
34251 enum attr_type type;
34253 if (!insn
34254 || NOTE_P (insn)
34255 || DEBUG_INSN_P (insn)
34256 || GET_CODE (PATTERN (insn)) == USE
34257 || GET_CODE (PATTERN (insn)) == CLOBBER)
34258 return false;
34260 switch (rs6000_cpu)
34262 case PROCESSOR_POWER5:
34263 if (is_cracked_insn (insn))
34264 return true;
34265 /* FALLTHRU */
34266 case PROCESSOR_POWER4:
34267 if (is_microcoded_insn (insn))
34268 return true;
34270 if (!rs6000_sched_groups)
34271 return false;
34273 type = get_attr_type (insn);
34275 switch (type)
34277 case TYPE_MFCR:
34278 case TYPE_MFCRF:
34279 case TYPE_MTCR:
34280 case TYPE_DELAYED_CR:
34281 case TYPE_CR_LOGICAL:
34282 case TYPE_MTJMPR:
34283 case TYPE_MFJMPR:
34284 case TYPE_DIV:
34285 case TYPE_LOAD_L:
34286 case TYPE_STORE_C:
34287 case TYPE_ISYNC:
34288 case TYPE_SYNC:
34289 return true;
34290 default:
34291 break;
34293 break;
34294 case PROCESSOR_POWER6:
34295 type = get_attr_type (insn);
34297 switch (type)
34299 case TYPE_EXTS:
34300 case TYPE_CNTLZ:
34301 case TYPE_TRAP:
34302 case TYPE_MUL:
34303 case TYPE_INSERT:
34304 case TYPE_FPCOMPARE:
34305 case TYPE_MFCR:
34306 case TYPE_MTCR:
34307 case TYPE_MFJMPR:
34308 case TYPE_MTJMPR:
34309 case TYPE_ISYNC:
34310 case TYPE_SYNC:
34311 case TYPE_LOAD_L:
34312 case TYPE_STORE_C:
34313 return true;
34314 case TYPE_SHIFT:
34315 if (get_attr_dot (insn) == DOT_NO
34316 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
34317 return true;
34318 else
34319 break;
34320 case TYPE_DIV:
34321 if (get_attr_size (insn) == SIZE_32)
34322 return true;
34323 else
34324 break;
34325 case TYPE_LOAD:
34326 case TYPE_STORE:
34327 case TYPE_FPLOAD:
34328 case TYPE_FPSTORE:
34329 if (get_attr_update (insn) == UPDATE_YES)
34330 return true;
34331 else
34332 break;
34333 default:
34334 break;
34336 break;
34337 case PROCESSOR_POWER7:
34338 type = get_attr_type (insn);
34340 switch (type)
34342 case TYPE_CR_LOGICAL:
34343 case TYPE_MFCR:
34344 case TYPE_MFCRF:
34345 case TYPE_MTCR:
34346 case TYPE_DIV:
34347 case TYPE_ISYNC:
34348 case TYPE_LOAD_L:
34349 case TYPE_STORE_C:
34350 case TYPE_MFJMPR:
34351 case TYPE_MTJMPR:
34352 return true;
34353 case TYPE_MUL:
34354 case TYPE_SHIFT:
34355 case TYPE_EXTS:
34356 if (get_attr_dot (insn) == DOT_YES)
34357 return true;
34358 else
34359 break;
34360 case TYPE_LOAD:
34361 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
34362 || get_attr_update (insn) == UPDATE_YES)
34363 return true;
34364 else
34365 break;
34366 case TYPE_STORE:
34367 case TYPE_FPLOAD:
34368 case TYPE_FPSTORE:
34369 if (get_attr_update (insn) == UPDATE_YES)
34370 return true;
34371 else
34372 break;
34373 default:
34374 break;
34376 break;
34377 case PROCESSOR_POWER8:
34378 type = get_attr_type (insn);
34380 switch (type)
34382 case TYPE_CR_LOGICAL:
34383 case TYPE_DELAYED_CR:
34384 case TYPE_MFCR:
34385 case TYPE_MFCRF:
34386 case TYPE_MTCR:
34387 case TYPE_SYNC:
34388 case TYPE_ISYNC:
34389 case TYPE_LOAD_L:
34390 case TYPE_STORE_C:
34391 case TYPE_VECSTORE:
34392 case TYPE_MFJMPR:
34393 case TYPE_MTJMPR:
34394 return true;
34395 case TYPE_SHIFT:
34396 case TYPE_EXTS:
34397 case TYPE_MUL:
34398 if (get_attr_dot (insn) == DOT_YES)
34399 return true;
34400 else
34401 break;
34402 case TYPE_LOAD:
34403 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
34404 || get_attr_update (insn) == UPDATE_YES)
34405 return true;
34406 else
34407 break;
34408 case TYPE_STORE:
34409 if (get_attr_update (insn) == UPDATE_YES
34410 && get_attr_indexed (insn) == INDEXED_YES)
34411 return true;
34412 else
34413 break;
34414 default:
34415 break;
34417 break;
34418 default:
34419 break;
34422 return false;
34425 static bool
34426 insn_must_be_last_in_group (rtx_insn *insn)
34428 enum attr_type type;
34430 if (!insn
34431 || NOTE_P (insn)
34432 || DEBUG_INSN_P (insn)
34433 || GET_CODE (PATTERN (insn)) == USE
34434 || GET_CODE (PATTERN (insn)) == CLOBBER)
34435 return false;
34437 switch (rs6000_cpu) {
34438 case PROCESSOR_POWER4:
34439 case PROCESSOR_POWER5:
34440 if (is_microcoded_insn (insn))
34441 return true;
34443 if (is_branch_slot_insn (insn))
34444 return true;
34446 break;
34447 case PROCESSOR_POWER6:
34448 type = get_attr_type (insn);
34450 switch (type)
34452 case TYPE_EXTS:
34453 case TYPE_CNTLZ:
34454 case TYPE_TRAP:
34455 case TYPE_MUL:
34456 case TYPE_FPCOMPARE:
34457 case TYPE_MFCR:
34458 case TYPE_MTCR:
34459 case TYPE_MFJMPR:
34460 case TYPE_MTJMPR:
34461 case TYPE_ISYNC:
34462 case TYPE_SYNC:
34463 case TYPE_LOAD_L:
34464 case TYPE_STORE_C:
34465 return true;
34466 case TYPE_SHIFT:
34467 if (get_attr_dot (insn) == DOT_NO
34468 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
34469 return true;
34470 else
34471 break;
34472 case TYPE_DIV:
34473 if (get_attr_size (insn) == SIZE_32)
34474 return true;
34475 else
34476 break;
34477 default:
34478 break;
34480 break;
34481 case PROCESSOR_POWER7:
34482 type = get_attr_type (insn);
34484 switch (type)
34486 case TYPE_ISYNC:
34487 case TYPE_SYNC:
34488 case TYPE_LOAD_L:
34489 case TYPE_STORE_C:
34490 return true;
34491 case TYPE_LOAD:
34492 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
34493 && get_attr_update (insn) == UPDATE_YES)
34494 return true;
34495 else
34496 break;
34497 case TYPE_STORE:
34498 if (get_attr_update (insn) == UPDATE_YES
34499 && get_attr_indexed (insn) == INDEXED_YES)
34500 return true;
34501 else
34502 break;
34503 default:
34504 break;
34506 break;
34507 case PROCESSOR_POWER8:
34508 type = get_attr_type (insn);
34510 switch (type)
34512 case TYPE_MFCR:
34513 case TYPE_MTCR:
34514 case TYPE_ISYNC:
34515 case TYPE_SYNC:
34516 case TYPE_LOAD_L:
34517 case TYPE_STORE_C:
34518 return true;
34519 case TYPE_LOAD:
34520 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
34521 && get_attr_update (insn) == UPDATE_YES)
34522 return true;
34523 else
34524 break;
34525 case TYPE_STORE:
34526 if (get_attr_update (insn) == UPDATE_YES
34527 && get_attr_indexed (insn) == INDEXED_YES)
34528 return true;
34529 else
34530 break;
34531 default:
34532 break;
34534 break;
34535 default:
34536 break;
34539 return false;
34542 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
34543 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
34545 static bool
34546 is_costly_group (rtx *group_insns, rtx next_insn)
34548 int i;
34549 int issue_rate = rs6000_issue_rate ();
34551 for (i = 0; i < issue_rate; i++)
34553 sd_iterator_def sd_it;
34554 dep_t dep;
34555 rtx insn = group_insns[i];
34557 if (!insn)
34558 continue;
34560 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
34562 rtx next = DEP_CON (dep);
34564 if (next == next_insn
34565 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
34566 return true;
34570 return false;
34573 /* Utility of the function redefine_groups.
34574 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
34575 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
34576 to keep it "far" (in a separate group) from GROUP_INSNS, following
34577 one of the following schemes, depending on the value of the flag
34578 -minsert_sched_nops = X:
34579 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
34580 in order to force NEXT_INSN into a separate group.
34581 (2) X < sched_finish_regroup_exact: insert exactly X nops.
34582 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
34583 insertion (has a group just ended, how many vacant issue slots remain in the
34584 last group, and how many dispatch groups were encountered so far). */
34586 static int
34587 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
34588 rtx_insn *next_insn, bool *group_end, int can_issue_more,
34589 int *group_count)
34591 rtx nop;
34592 bool force;
34593 int issue_rate = rs6000_issue_rate ();
34594 bool end = *group_end;
34595 int i;
34597 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
34598 return can_issue_more;
34600 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
34601 return can_issue_more;
34603 force = is_costly_group (group_insns, next_insn);
34604 if (!force)
34605 return can_issue_more;
34607 if (sched_verbose > 6)
34608 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
34609 *group_count ,can_issue_more);
34611 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
34613 if (*group_end)
34614 can_issue_more = 0;
34616 /* Since only a branch can be issued in the last issue_slot, it is
34617 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
34618 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
34619 in this case the last nop will start a new group and the branch
34620 will be forced to the new group. */
34621 if (can_issue_more && !is_branch_slot_insn (next_insn))
34622 can_issue_more--;
34624 /* Do we have a special group ending nop? */
34625 if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
34626 || rs6000_cpu_attr == CPU_POWER8)
34628 nop = gen_group_ending_nop ();
34629 emit_insn_before (nop, next_insn);
34630 can_issue_more = 0;
34632 else
34633 while (can_issue_more > 0)
34635 nop = gen_nop ();
34636 emit_insn_before (nop, next_insn);
34637 can_issue_more--;
34640 *group_end = true;
34641 return 0;
34644 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
34646 int n_nops = rs6000_sched_insert_nops;
34648 /* Nops can't be issued from the branch slot, so the effective
34649 issue_rate for nops is 'issue_rate - 1'. */
34650 if (can_issue_more == 0)
34651 can_issue_more = issue_rate;
34652 can_issue_more--;
34653 if (can_issue_more == 0)
34655 can_issue_more = issue_rate - 1;
34656 (*group_count)++;
34657 end = true;
34658 for (i = 0; i < issue_rate; i++)
34660 group_insns[i] = 0;
34664 while (n_nops > 0)
34666 nop = gen_nop ();
34667 emit_insn_before (nop, next_insn);
34668 if (can_issue_more == issue_rate - 1) /* new group begins */
34669 end = false;
34670 can_issue_more--;
34671 if (can_issue_more == 0)
34673 can_issue_more = issue_rate - 1;
34674 (*group_count)++;
34675 end = true;
34676 for (i = 0; i < issue_rate; i++)
34678 group_insns[i] = 0;
34681 n_nops--;
34684 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
34685 can_issue_more++;
34687 /* Is next_insn going to start a new group? */
34688 *group_end
34689 = (end
34690 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
34691 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
34692 || (can_issue_more < issue_rate &&
34693 insn_terminates_group_p (next_insn, previous_group)));
34694 if (*group_end && end)
34695 (*group_count)--;
34697 if (sched_verbose > 6)
34698 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
34699 *group_count, can_issue_more);
34700 return can_issue_more;
34703 return can_issue_more;
34706 /* This function tries to synch the dispatch groups that the compiler "sees"
34707 with the dispatch groups that the processor dispatcher is expected to
34708 form in practice. It tries to achieve this synchronization by forcing the
34709 estimated processor grouping on the compiler (as opposed to the function
34710 'pad_goups' which tries to force the scheduler's grouping on the processor).
34712 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
34713 examines the (estimated) dispatch groups that will be formed by the processor
34714 dispatcher. It marks these group boundaries to reflect the estimated
34715 processor grouping, overriding the grouping that the scheduler had marked.
34716 Depending on the value of the flag '-minsert-sched-nops' this function can
34717 force certain insns into separate groups or force a certain distance between
34718 them by inserting nops, for example, if there exists a "costly dependence"
34719 between the insns.
34721 The function estimates the group boundaries that the processor will form as
34722 follows: It keeps track of how many vacant issue slots are available after
34723 each insn. A subsequent insn will start a new group if one of the following
34724 4 cases applies:
34725 - no more vacant issue slots remain in the current dispatch group.
34726 - only the last issue slot, which is the branch slot, is vacant, but the next
34727 insn is not a branch.
34728 - only the last 2 or less issue slots, including the branch slot, are vacant,
34729 which means that a cracked insn (which occupies two issue slots) can't be
34730 issued in this group.
34731 - less than 'issue_rate' slots are vacant, and the next insn always needs to
34732 start a new group. */
34734 static int
34735 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
34736 rtx_insn *tail)
34738 rtx_insn *insn, *next_insn;
34739 int issue_rate;
34740 int can_issue_more;
34741 int slot, i;
34742 bool group_end;
34743 int group_count = 0;
34744 rtx *group_insns;
34746 /* Initialize. */
34747 issue_rate = rs6000_issue_rate ();
34748 group_insns = XALLOCAVEC (rtx, issue_rate);
34749 for (i = 0; i < issue_rate; i++)
34751 group_insns[i] = 0;
34753 can_issue_more = issue_rate;
34754 slot = 0;
34755 insn = get_next_active_insn (prev_head_insn, tail);
34756 group_end = false;
34758 while (insn != NULL_RTX)
34760 slot = (issue_rate - can_issue_more);
34761 group_insns[slot] = insn;
34762 can_issue_more =
34763 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
34764 if (insn_terminates_group_p (insn, current_group))
34765 can_issue_more = 0;
34767 next_insn = get_next_active_insn (insn, tail);
34768 if (next_insn == NULL_RTX)
34769 return group_count + 1;
34771 /* Is next_insn going to start a new group? */
34772 group_end
34773 = (can_issue_more == 0
34774 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
34775 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
34776 || (can_issue_more < issue_rate &&
34777 insn_terminates_group_p (next_insn, previous_group)));
34779 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
34780 next_insn, &group_end, can_issue_more,
34781 &group_count);
34783 if (group_end)
34785 group_count++;
34786 can_issue_more = 0;
34787 for (i = 0; i < issue_rate; i++)
34789 group_insns[i] = 0;
34793 if (GET_MODE (next_insn) == TImode && can_issue_more)
34794 PUT_MODE (next_insn, VOIDmode);
34795 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
34796 PUT_MODE (next_insn, TImode);
34798 insn = next_insn;
34799 if (can_issue_more == 0)
34800 can_issue_more = issue_rate;
34801 } /* while */
34803 return group_count;
34806 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
34807 dispatch group boundaries that the scheduler had marked. Pad with nops
34808 any dispatch groups which have vacant issue slots, in order to force the
34809 scheduler's grouping on the processor dispatcher. The function
34810 returns the number of dispatch groups found. */
34812 static int
34813 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
34814 rtx_insn *tail)
34816 rtx_insn *insn, *next_insn;
34817 rtx nop;
34818 int issue_rate;
34819 int can_issue_more;
34820 int group_end;
34821 int group_count = 0;
34823 /* Initialize issue_rate. */
34824 issue_rate = rs6000_issue_rate ();
34825 can_issue_more = issue_rate;
34827 insn = get_next_active_insn (prev_head_insn, tail);
34828 next_insn = get_next_active_insn (insn, tail);
34830 while (insn != NULL_RTX)
34832 can_issue_more =
34833 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
34835 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
34837 if (next_insn == NULL_RTX)
34838 break;
34840 if (group_end)
34842 /* If the scheduler had marked group termination at this location
34843 (between insn and next_insn), and neither insn nor next_insn will
34844 force group termination, pad the group with nops to force group
34845 termination. */
34846 if (can_issue_more
34847 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
34848 && !insn_terminates_group_p (insn, current_group)
34849 && !insn_terminates_group_p (next_insn, previous_group))
34851 if (!is_branch_slot_insn (next_insn))
34852 can_issue_more--;
34854 while (can_issue_more)
34856 nop = gen_nop ();
34857 emit_insn_before (nop, next_insn);
34858 can_issue_more--;
34862 can_issue_more = issue_rate;
34863 group_count++;
34866 insn = next_insn;
34867 next_insn = get_next_active_insn (insn, tail);
34870 return group_count;
34873 /* We're beginning a new block. Initialize data structures as necessary. */
34875 static void
34876 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
34877 int sched_verbose ATTRIBUTE_UNUSED,
34878 int max_ready ATTRIBUTE_UNUSED)
34880 last_scheduled_insn = NULL;
34881 load_store_pendulum = 0;
34882 divide_cnt = 0;
34883 vec_pairing = 0;
34886 /* The following function is called at the end of scheduling BB.
34887 After reload, it inserts nops at insn group bundling. */
34889 static void
34890 rs6000_sched_finish (FILE *dump, int sched_verbose)
34892 int n_groups;
34894 if (sched_verbose)
34895 fprintf (dump, "=== Finishing schedule.\n");
34897 if (reload_completed && rs6000_sched_groups)
34899 /* Do not run sched_finish hook when selective scheduling enabled. */
34900 if (sel_sched_p ())
34901 return;
34903 if (rs6000_sched_insert_nops == sched_finish_none)
34904 return;
34906 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
34907 n_groups = pad_groups (dump, sched_verbose,
34908 current_sched_info->prev_head,
34909 current_sched_info->next_tail);
34910 else
34911 n_groups = redefine_groups (dump, sched_verbose,
34912 current_sched_info->prev_head,
34913 current_sched_info->next_tail);
34915 if (sched_verbose >= 6)
34917 fprintf (dump, "ngroups = %d\n", n_groups);
34918 print_rtl (dump, current_sched_info->prev_head);
34919 fprintf (dump, "Done finish_sched\n");
34924 struct rs6000_sched_context
34926 short cached_can_issue_more;
34927 rtx_insn *last_scheduled_insn;
34928 int load_store_pendulum;
34929 int divide_cnt;
34930 int vec_pairing;
34933 typedef struct rs6000_sched_context rs6000_sched_context_def;
34934 typedef rs6000_sched_context_def *rs6000_sched_context_t;
34936 /* Allocate store for new scheduling context. */
34937 static void *
34938 rs6000_alloc_sched_context (void)
34940 return xmalloc (sizeof (rs6000_sched_context_def));
34943 /* If CLEAN_P is true then initializes _SC with clean data,
34944 and from the global context otherwise. */
34945 static void
34946 rs6000_init_sched_context (void *_sc, bool clean_p)
34948 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
34950 if (clean_p)
34952 sc->cached_can_issue_more = 0;
34953 sc->last_scheduled_insn = NULL;
34954 sc->load_store_pendulum = 0;
34955 sc->divide_cnt = 0;
34956 sc->vec_pairing = 0;
34958 else
34960 sc->cached_can_issue_more = cached_can_issue_more;
34961 sc->last_scheduled_insn = last_scheduled_insn;
34962 sc->load_store_pendulum = load_store_pendulum;
34963 sc->divide_cnt = divide_cnt;
34964 sc->vec_pairing = vec_pairing;
34968 /* Sets the global scheduling context to the one pointed to by _SC. */
34969 static void
34970 rs6000_set_sched_context (void *_sc)
34972 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
34974 gcc_assert (sc != NULL);
34976 cached_can_issue_more = sc->cached_can_issue_more;
34977 last_scheduled_insn = sc->last_scheduled_insn;
34978 load_store_pendulum = sc->load_store_pendulum;
34979 divide_cnt = sc->divide_cnt;
34980 vec_pairing = sc->vec_pairing;
34983 /* Free _SC. */
34984 static void
34985 rs6000_free_sched_context (void *_sc)
34987 gcc_assert (_sc != NULL);
34989 free (_sc);
34992 static bool
34993 rs6000_sched_can_speculate_insn (rtx_insn *insn)
34995 switch (get_attr_type (insn))
34997 case TYPE_DIV:
34998 case TYPE_SDIV:
34999 case TYPE_DDIV:
35000 case TYPE_VECDIV:
35001 case TYPE_SSQRT:
35002 case TYPE_DSQRT:
35003 return false;
35005 default:
35006 return true;
35010 /* Length in units of the trampoline for entering a nested function. */
35013 rs6000_trampoline_size (void)
35015 int ret = 0;
35017 switch (DEFAULT_ABI)
35019 default:
35020 gcc_unreachable ();
35022 case ABI_AIX:
35023 ret = (TARGET_32BIT) ? 12 : 24;
35024 break;
35026 case ABI_ELFv2:
35027 gcc_assert (!TARGET_32BIT);
35028 ret = 32;
35029 break;
35031 case ABI_DARWIN:
35032 case ABI_V4:
35033 ret = (TARGET_32BIT) ? 40 : 48;
35034 break;
35037 return ret;
35040 /* Emit RTL insns to initialize the variable parts of a trampoline.
35041 FNADDR is an RTX for the address of the function's pure code.
35042 CXT is an RTX for the static chain value for the function. */
35044 static void
35045 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
35047 int regsize = (TARGET_32BIT) ? 4 : 8;
35048 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
35049 rtx ctx_reg = force_reg (Pmode, cxt);
35050 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
35052 switch (DEFAULT_ABI)
35054 default:
35055 gcc_unreachable ();
35057 /* Under AIX, just build the 3 word function descriptor */
35058 case ABI_AIX:
35060 rtx fnmem, fn_reg, toc_reg;
35062 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
35063 error ("You cannot take the address of a nested function if you use "
35064 "the -mno-pointers-to-nested-functions option.");
35066 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
35067 fn_reg = gen_reg_rtx (Pmode);
35068 toc_reg = gen_reg_rtx (Pmode);
35070 /* Macro to shorten the code expansions below. */
35071 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
35073 m_tramp = replace_equiv_address (m_tramp, addr);
35075 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
35076 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
35077 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
35078 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
35079 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
35081 # undef MEM_PLUS
35083 break;
35085 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
35086 case ABI_ELFv2:
35087 case ABI_DARWIN:
35088 case ABI_V4:
35089 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
35090 LCT_NORMAL, VOIDmode, 4,
35091 addr, Pmode,
35092 GEN_INT (rs6000_trampoline_size ()), SImode,
35093 fnaddr, Pmode,
35094 ctx_reg, Pmode);
35095 break;
35100 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
35101 identifier as an argument, so the front end shouldn't look it up. */
35103 static bool
35104 rs6000_attribute_takes_identifier_p (const_tree attr_id)
35106 return is_attribute_p ("altivec", attr_id);
35109 /* Handle the "altivec" attribute. The attribute may have
35110 arguments as follows:
35112 __attribute__((altivec(vector__)))
35113 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
35114 __attribute__((altivec(bool__))) (always followed by 'unsigned')
35116 and may appear more than once (e.g., 'vector bool char') in a
35117 given declaration. */
35119 static tree
35120 rs6000_handle_altivec_attribute (tree *node,
35121 tree name ATTRIBUTE_UNUSED,
35122 tree args,
35123 int flags ATTRIBUTE_UNUSED,
35124 bool *no_add_attrs)
35126 tree type = *node, result = NULL_TREE;
35127 machine_mode mode;
35128 int unsigned_p;
35129 char altivec_type
35130 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
35131 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
35132 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
35133 : '?');
35135 while (POINTER_TYPE_P (type)
35136 || TREE_CODE (type) == FUNCTION_TYPE
35137 || TREE_CODE (type) == METHOD_TYPE
35138 || TREE_CODE (type) == ARRAY_TYPE)
35139 type = TREE_TYPE (type);
35141 mode = TYPE_MODE (type);
35143 /* Check for invalid AltiVec type qualifiers. */
35144 if (type == long_double_type_node)
35145 error ("use of %<long double%> in AltiVec types is invalid");
35146 else if (type == boolean_type_node)
35147 error ("use of boolean types in AltiVec types is invalid");
35148 else if (TREE_CODE (type) == COMPLEX_TYPE)
35149 error ("use of %<complex%> in AltiVec types is invalid");
35150 else if (DECIMAL_FLOAT_MODE_P (mode))
35151 error ("use of decimal floating point types in AltiVec types is invalid");
35152 else if (!TARGET_VSX)
35154 if (type == long_unsigned_type_node || type == long_integer_type_node)
35156 if (TARGET_64BIT)
35157 error ("use of %<long%> in AltiVec types is invalid for "
35158 "64-bit code without -mvsx");
35159 else if (rs6000_warn_altivec_long)
35160 warning (0, "use of %<long%> in AltiVec types is deprecated; "
35161 "use %<int%>");
35163 else if (type == long_long_unsigned_type_node
35164 || type == long_long_integer_type_node)
35165 error ("use of %<long long%> in AltiVec types is invalid without "
35166 "-mvsx");
35167 else if (type == double_type_node)
35168 error ("use of %<double%> in AltiVec types is invalid without -mvsx");
35171 switch (altivec_type)
35173 case 'v':
35174 unsigned_p = TYPE_UNSIGNED (type);
35175 switch (mode)
35177 case TImode:
35178 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
35179 break;
35180 case DImode:
35181 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
35182 break;
35183 case SImode:
35184 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
35185 break;
35186 case HImode:
35187 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
35188 break;
35189 case QImode:
35190 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
35191 break;
35192 case SFmode: result = V4SF_type_node; break;
35193 case DFmode: result = V2DF_type_node; break;
35194 /* If the user says 'vector int bool', we may be handed the 'bool'
35195 attribute _before_ the 'vector' attribute, and so select the
35196 proper type in the 'b' case below. */
35197 case V4SImode: case V8HImode: case V16QImode: case V4SFmode:
35198 case V2DImode: case V2DFmode:
35199 result = type;
35200 default: break;
35202 break;
35203 case 'b':
35204 switch (mode)
35206 case DImode: case V2DImode: result = bool_V2DI_type_node; break;
35207 case SImode: case V4SImode: result = bool_V4SI_type_node; break;
35208 case HImode: case V8HImode: result = bool_V8HI_type_node; break;
35209 case QImode: case V16QImode: result = bool_V16QI_type_node;
35210 default: break;
35212 break;
35213 case 'p':
35214 switch (mode)
35216 case V8HImode: result = pixel_V8HI_type_node;
35217 default: break;
35219 default: break;
35222 /* Propagate qualifiers attached to the element type
35223 onto the vector type. */
35224 if (result && result != type && TYPE_QUALS (type))
35225 result = build_qualified_type (result, TYPE_QUALS (type));
35227 *no_add_attrs = true; /* No need to hang on to the attribute. */
35229 if (result)
35230 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
35232 return NULL_TREE;
35235 /* AltiVec defines four built-in scalar types that serve as vector
35236 elements; we must teach the compiler how to mangle them. */
35238 static const char *
35239 rs6000_mangle_type (const_tree type)
35241 type = TYPE_MAIN_VARIANT (type);
35243 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
35244 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
35245 return NULL;
35247 if (type == bool_char_type_node) return "U6__boolc";
35248 if (type == bool_short_type_node) return "U6__bools";
35249 if (type == pixel_type_node) return "u7__pixel";
35250 if (type == bool_int_type_node) return "U6__booli";
35251 if (type == bool_long_type_node) return "U6__booll";
35253 /* Use a unique name for __float128 rather than trying to use "e" or "g". Use
35254 "g" for IBM extended double, no matter whether it is long double (using
35255 -mabi=ibmlongdouble) or the distinct __ibm128 type. */
35256 if (TARGET_FLOAT128_TYPE)
35258 if (type == ieee128_float_type_node)
35259 return "U10__float128";
35261 if (type == ibm128_float_type_node)
35262 return "g";
35264 if (type == long_double_type_node && TARGET_LONG_DOUBLE_128)
35265 return (TARGET_IEEEQUAD) ? "U10__float128" : "g";
35268 /* Mangle IBM extended float long double as `g' (__float128) on
35269 powerpc*-linux where long-double-64 previously was the default. */
35270 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
35271 && TARGET_ELF
35272 && TARGET_LONG_DOUBLE_128
35273 && !TARGET_IEEEQUAD)
35274 return "g";
35276 /* For all other types, use normal C++ mangling. */
35277 return NULL;
35280 /* Handle a "longcall" or "shortcall" attribute; arguments as in
35281 struct attribute_spec.handler. */
35283 static tree
35284 rs6000_handle_longcall_attribute (tree *node, tree name,
35285 tree args ATTRIBUTE_UNUSED,
35286 int flags ATTRIBUTE_UNUSED,
35287 bool *no_add_attrs)
35289 if (TREE_CODE (*node) != FUNCTION_TYPE
35290 && TREE_CODE (*node) != FIELD_DECL
35291 && TREE_CODE (*node) != TYPE_DECL)
35293 warning (OPT_Wattributes, "%qE attribute only applies to functions",
35294 name);
35295 *no_add_attrs = true;
35298 return NULL_TREE;
35301 /* Set longcall attributes on all functions declared when
35302 rs6000_default_long_calls is true. */
35303 static void
35304 rs6000_set_default_type_attributes (tree type)
35306 if (rs6000_default_long_calls
35307 && (TREE_CODE (type) == FUNCTION_TYPE
35308 || TREE_CODE (type) == METHOD_TYPE))
35309 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
35310 NULL_TREE,
35311 TYPE_ATTRIBUTES (type));
35313 #if TARGET_MACHO
35314 darwin_set_default_type_attributes (type);
35315 #endif
35318 /* Return a reference suitable for calling a function with the
35319 longcall attribute. */
35322 rs6000_longcall_ref (rtx call_ref)
35324 const char *call_name;
35325 tree node;
35327 if (GET_CODE (call_ref) != SYMBOL_REF)
35328 return call_ref;
35330 /* System V adds '.' to the internal name, so skip them. */
35331 call_name = XSTR (call_ref, 0);
35332 if (*call_name == '.')
35334 while (*call_name == '.')
35335 call_name++;
35337 node = get_identifier (call_name);
35338 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
35341 return force_reg (Pmode, call_ref);
35344 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
35345 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
35346 #endif
35348 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
35349 struct attribute_spec.handler. */
35350 static tree
35351 rs6000_handle_struct_attribute (tree *node, tree name,
35352 tree args ATTRIBUTE_UNUSED,
35353 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
35355 tree *type = NULL;
35356 if (DECL_P (*node))
35358 if (TREE_CODE (*node) == TYPE_DECL)
35359 type = &TREE_TYPE (*node);
35361 else
35362 type = node;
35364 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
35365 || TREE_CODE (*type) == UNION_TYPE)))
35367 warning (OPT_Wattributes, "%qE attribute ignored", name);
35368 *no_add_attrs = true;
35371 else if ((is_attribute_p ("ms_struct", name)
35372 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
35373 || ((is_attribute_p ("gcc_struct", name)
35374 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
35376 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
35377 name);
35378 *no_add_attrs = true;
35381 return NULL_TREE;
35384 static bool
35385 rs6000_ms_bitfield_layout_p (const_tree record_type)
35387 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
35388 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
35389 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
35392 #ifdef USING_ELFOS_H
35394 /* A get_unnamed_section callback, used for switching to toc_section. */
35396 static void
35397 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
35399 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
35400 && TARGET_MINIMAL_TOC)
35402 if (!toc_initialized)
35404 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
35405 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
35406 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
35407 fprintf (asm_out_file, "\t.tc ");
35408 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
35409 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
35410 fprintf (asm_out_file, "\n");
35412 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
35413 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
35414 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
35415 fprintf (asm_out_file, " = .+32768\n");
35416 toc_initialized = 1;
35418 else
35419 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
35421 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
35423 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
35424 if (!toc_initialized)
35426 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
35427 toc_initialized = 1;
35430 else
35432 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
35433 if (!toc_initialized)
35435 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
35436 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
35437 fprintf (asm_out_file, " = .+32768\n");
35438 toc_initialized = 1;
35443 /* Implement TARGET_ASM_INIT_SECTIONS. */
35445 static void
35446 rs6000_elf_asm_init_sections (void)
35448 toc_section
35449 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
35451 sdata2_section
35452 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
35453 SDATA2_SECTION_ASM_OP);
35456 /* Implement TARGET_SELECT_RTX_SECTION. */
35458 static section *
35459 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
35460 unsigned HOST_WIDE_INT align)
35462 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
35463 return toc_section;
35464 else
35465 return default_elf_select_rtx_section (mode, x, align);
35468 /* For a SYMBOL_REF, set generic flags and then perform some
35469 target-specific processing.
35471 When the AIX ABI is requested on a non-AIX system, replace the
35472 function name with the real name (with a leading .) rather than the
35473 function descriptor name. This saves a lot of overriding code to
35474 read the prefixes. */
35476 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
35477 static void
35478 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
35480 default_encode_section_info (decl, rtl, first);
35482 if (first
35483 && TREE_CODE (decl) == FUNCTION_DECL
35484 && !TARGET_AIX
35485 && DEFAULT_ABI == ABI_AIX)
35487 rtx sym_ref = XEXP (rtl, 0);
35488 size_t len = strlen (XSTR (sym_ref, 0));
35489 char *str = XALLOCAVEC (char, len + 2);
35490 str[0] = '.';
35491 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
35492 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
35496 static inline bool
35497 compare_section_name (const char *section, const char *templ)
35499 int len;
35501 len = strlen (templ);
35502 return (strncmp (section, templ, len) == 0
35503 && (section[len] == 0 || section[len] == '.'));
35506 bool
35507 rs6000_elf_in_small_data_p (const_tree decl)
35509 if (rs6000_sdata == SDATA_NONE)
35510 return false;
35512 /* We want to merge strings, so we never consider them small data. */
35513 if (TREE_CODE (decl) == STRING_CST)
35514 return false;
35516 /* Functions are never in the small data area. */
35517 if (TREE_CODE (decl) == FUNCTION_DECL)
35518 return false;
35520 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
35522 const char *section = DECL_SECTION_NAME (decl);
35523 if (compare_section_name (section, ".sdata")
35524 || compare_section_name (section, ".sdata2")
35525 || compare_section_name (section, ".gnu.linkonce.s")
35526 || compare_section_name (section, ".sbss")
35527 || compare_section_name (section, ".sbss2")
35528 || compare_section_name (section, ".gnu.linkonce.sb")
35529 || strcmp (section, ".PPC.EMB.sdata0") == 0
35530 || strcmp (section, ".PPC.EMB.sbss0") == 0)
35531 return true;
35533 else
35535 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
35537 if (size > 0
35538 && size <= g_switch_value
35539 /* If it's not public, and we're not going to reference it there,
35540 there's no need to put it in the small data section. */
35541 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
35542 return true;
35545 return false;
35548 #endif /* USING_ELFOS_H */
35550 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
35552 static bool
35553 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
35555 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
35558 /* Do not place thread-local symbols refs in the object blocks. */
35560 static bool
35561 rs6000_use_blocks_for_decl_p (const_tree decl)
35563 return !DECL_THREAD_LOCAL_P (decl);
35566 /* Return a REG that occurs in ADDR with coefficient 1.
35567 ADDR can be effectively incremented by incrementing REG.
35569 r0 is special and we must not select it as an address
35570 register by this routine since our caller will try to
35571 increment the returned register via an "la" instruction. */
35574 find_addr_reg (rtx addr)
35576 while (GET_CODE (addr) == PLUS)
35578 if (GET_CODE (XEXP (addr, 0)) == REG
35579 && REGNO (XEXP (addr, 0)) != 0)
35580 addr = XEXP (addr, 0);
35581 else if (GET_CODE (XEXP (addr, 1)) == REG
35582 && REGNO (XEXP (addr, 1)) != 0)
35583 addr = XEXP (addr, 1);
35584 else if (CONSTANT_P (XEXP (addr, 0)))
35585 addr = XEXP (addr, 1);
35586 else if (CONSTANT_P (XEXP (addr, 1)))
35587 addr = XEXP (addr, 0);
35588 else
35589 gcc_unreachable ();
35591 gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0);
35592 return addr;
35595 void
35596 rs6000_fatal_bad_address (rtx op)
35598 fatal_insn ("bad address", op);
35601 #if TARGET_MACHO
35603 typedef struct branch_island_d {
35604 tree function_name;
35605 tree label_name;
35606 int line_number;
35607 } branch_island;
35610 static vec<branch_island, va_gc> *branch_islands;
35612 /* Remember to generate a branch island for far calls to the given
35613 function. */
35615 static void
35616 add_compiler_branch_island (tree label_name, tree function_name,
35617 int line_number)
35619 branch_island bi = {function_name, label_name, line_number};
35620 vec_safe_push (branch_islands, bi);
35623 /* Generate far-jump branch islands for everything recorded in
35624 branch_islands. Invoked immediately after the last instruction of
35625 the epilogue has been emitted; the branch islands must be appended
35626 to, and contiguous with, the function body. Mach-O stubs are
35627 generated in machopic_output_stub(). */
35629 static void
35630 macho_branch_islands (void)
35632 char tmp_buf[512];
35634 while (!vec_safe_is_empty (branch_islands))
35636 branch_island *bi = &branch_islands->last ();
35637 const char *label = IDENTIFIER_POINTER (bi->label_name);
35638 const char *name = IDENTIFIER_POINTER (bi->function_name);
35639 char name_buf[512];
35640 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
35641 if (name[0] == '*' || name[0] == '&')
35642 strcpy (name_buf, name+1);
35643 else
35645 name_buf[0] = '_';
35646 strcpy (name_buf+1, name);
35648 strcpy (tmp_buf, "\n");
35649 strcat (tmp_buf, label);
35650 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
35651 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
35652 dbxout_stabd (N_SLINE, bi->line_number);
35653 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
35654 if (flag_pic)
35656 if (TARGET_LINK_STACK)
35658 char name[32];
35659 get_ppc476_thunk_name (name);
35660 strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
35661 strcat (tmp_buf, name);
35662 strcat (tmp_buf, "\n");
35663 strcat (tmp_buf, label);
35664 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
35666 else
35668 strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
35669 strcat (tmp_buf, label);
35670 strcat (tmp_buf, "_pic\n");
35671 strcat (tmp_buf, label);
35672 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
35675 strcat (tmp_buf, "\taddis r11,r11,ha16(");
35676 strcat (tmp_buf, name_buf);
35677 strcat (tmp_buf, " - ");
35678 strcat (tmp_buf, label);
35679 strcat (tmp_buf, "_pic)\n");
35681 strcat (tmp_buf, "\tmtlr r0\n");
35683 strcat (tmp_buf, "\taddi r12,r11,lo16(");
35684 strcat (tmp_buf, name_buf);
35685 strcat (tmp_buf, " - ");
35686 strcat (tmp_buf, label);
35687 strcat (tmp_buf, "_pic)\n");
35689 strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
35691 else
35693 strcat (tmp_buf, ":\nlis r12,hi16(");
35694 strcat (tmp_buf, name_buf);
35695 strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
35696 strcat (tmp_buf, name_buf);
35697 strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
35699 output_asm_insn (tmp_buf, 0);
35700 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
35701 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
35702 dbxout_stabd (N_SLINE, bi->line_number);
35703 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
35704 branch_islands->pop ();
35708 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
35709 already there or not. */
35711 static int
35712 no_previous_def (tree function_name)
35714 branch_island *bi;
35715 unsigned ix;
35717 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
35718 if (function_name == bi->function_name)
35719 return 0;
35720 return 1;
35723 /* GET_PREV_LABEL gets the label name from the previous definition of
35724 the function. */
35726 static tree
35727 get_prev_label (tree function_name)
35729 branch_island *bi;
35730 unsigned ix;
35732 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
35733 if (function_name == bi->function_name)
35734 return bi->label_name;
35735 return NULL_TREE;
35738 /* INSN is either a function call or a millicode call. It may have an
35739 unconditional jump in its delay slot.
35741 CALL_DEST is the routine we are calling. */
35743 char *
35744 output_call (rtx_insn *insn, rtx *operands, int dest_operand_number,
35745 int cookie_operand_number)
35747 static char buf[256];
35748 if (darwin_emit_branch_islands
35749 && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
35750 && (INTVAL (operands[cookie_operand_number]) & CALL_LONG))
35752 tree labelname;
35753 tree funname = get_identifier (XSTR (operands[dest_operand_number], 0));
35755 if (no_previous_def (funname))
35757 rtx label_rtx = gen_label_rtx ();
35758 char *label_buf, temp_buf[256];
35759 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
35760 CODE_LABEL_NUMBER (label_rtx));
35761 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
35762 labelname = get_identifier (label_buf);
35763 add_compiler_branch_island (labelname, funname, insn_line (insn));
35765 else
35766 labelname = get_prev_label (funname);
35768 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
35769 instruction will reach 'foo', otherwise link as 'bl L42'".
35770 "L42" should be a 'branch island', that will do a far jump to
35771 'foo'. Branch islands are generated in
35772 macho_branch_islands(). */
35773 sprintf (buf, "jbsr %%z%d,%.246s",
35774 dest_operand_number, IDENTIFIER_POINTER (labelname));
35776 else
35777 sprintf (buf, "bl %%z%d", dest_operand_number);
35778 return buf;
35781 /* Generate PIC and indirect symbol stubs. */
35783 void
35784 machopic_output_stub (FILE *file, const char *symb, const char *stub)
35786 unsigned int length;
35787 char *symbol_name, *lazy_ptr_name;
35788 char *local_label_0;
35789 static int label = 0;
35791 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
35792 symb = (*targetm.strip_name_encoding) (symb);
35795 length = strlen (symb);
35796 symbol_name = XALLOCAVEC (char, length + 32);
35797 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
35799 lazy_ptr_name = XALLOCAVEC (char, length + 32);
35800 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
35802 if (flag_pic == 2)
35803 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
35804 else
35805 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
35807 if (flag_pic == 2)
35809 fprintf (file, "\t.align 5\n");
35811 fprintf (file, "%s:\n", stub);
35812 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
35814 label++;
35815 local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
35816 sprintf (local_label_0, "\"L%011d$spb\"", label);
35818 fprintf (file, "\tmflr r0\n");
35819 if (TARGET_LINK_STACK)
35821 char name[32];
35822 get_ppc476_thunk_name (name);
35823 fprintf (file, "\tbl %s\n", name);
35824 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
35826 else
35828 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
35829 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
35831 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
35832 lazy_ptr_name, local_label_0);
35833 fprintf (file, "\tmtlr r0\n");
35834 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
35835 (TARGET_64BIT ? "ldu" : "lwzu"),
35836 lazy_ptr_name, local_label_0);
35837 fprintf (file, "\tmtctr r12\n");
35838 fprintf (file, "\tbctr\n");
35840 else
35842 fprintf (file, "\t.align 4\n");
35844 fprintf (file, "%s:\n", stub);
35845 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
35847 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
35848 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
35849 (TARGET_64BIT ? "ldu" : "lwzu"),
35850 lazy_ptr_name);
35851 fprintf (file, "\tmtctr r12\n");
35852 fprintf (file, "\tbctr\n");
35855 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
35856 fprintf (file, "%s:\n", lazy_ptr_name);
35857 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
35858 fprintf (file, "%sdyld_stub_binding_helper\n",
35859 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
35862 /* Legitimize PIC addresses. If the address is already
35863 position-independent, we return ORIG. Newly generated
35864 position-independent addresses go into a reg. This is REG if non
35865 zero, otherwise we allocate register(s) as necessary. */
35867 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
35870 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
35871 rtx reg)
35873 rtx base, offset;
35875 if (reg == NULL && ! reload_in_progress && ! reload_completed)
35876 reg = gen_reg_rtx (Pmode);
35878 if (GET_CODE (orig) == CONST)
35880 rtx reg_temp;
35882 if (GET_CODE (XEXP (orig, 0)) == PLUS
35883 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
35884 return orig;
35886 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
35888 /* Use a different reg for the intermediate value, as
35889 it will be marked UNCHANGING. */
35890 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
35891 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
35892 Pmode, reg_temp);
35893 offset =
35894 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
35895 Pmode, reg);
35897 if (GET_CODE (offset) == CONST_INT)
35899 if (SMALL_INT (offset))
35900 return plus_constant (Pmode, base, INTVAL (offset));
35901 else if (! reload_in_progress && ! reload_completed)
35902 offset = force_reg (Pmode, offset);
35903 else
35905 rtx mem = force_const_mem (Pmode, orig);
35906 return machopic_legitimize_pic_address (mem, Pmode, reg);
35909 return gen_rtx_PLUS (Pmode, base, offset);
35912 /* Fall back on generic machopic code. */
35913 return machopic_legitimize_pic_address (orig, mode, reg);
35916 /* Output a .machine directive for the Darwin assembler, and call
35917 the generic start_file routine. */
35919 static void
35920 rs6000_darwin_file_start (void)
35922 static const struct
35924 const char *arg;
35925 const char *name;
35926 HOST_WIDE_INT if_set;
35927 } mapping[] = {
35928 { "ppc64", "ppc64", MASK_64BIT },
35929 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
35930 { "power4", "ppc970", 0 },
35931 { "G5", "ppc970", 0 },
35932 { "7450", "ppc7450", 0 },
35933 { "7400", "ppc7400", MASK_ALTIVEC },
35934 { "G4", "ppc7400", 0 },
35935 { "750", "ppc750", 0 },
35936 { "740", "ppc750", 0 },
35937 { "G3", "ppc750", 0 },
35938 { "604e", "ppc604e", 0 },
35939 { "604", "ppc604", 0 },
35940 { "603e", "ppc603", 0 },
35941 { "603", "ppc603", 0 },
35942 { "601", "ppc601", 0 },
35943 { NULL, "ppc", 0 } };
35944 const char *cpu_id = "";
35945 size_t i;
35947 rs6000_file_start ();
35948 darwin_file_start ();
35950 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
35952 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
35953 cpu_id = rs6000_default_cpu;
35955 if (global_options_set.x_rs6000_cpu_index)
35956 cpu_id = processor_target_table[rs6000_cpu_index].name;
35958 /* Look through the mapping array. Pick the first name that either
35959 matches the argument, has a bit set in IF_SET that is also set
35960 in the target flags, or has a NULL name. */
35962 i = 0;
35963 while (mapping[i].arg != NULL
35964 && strcmp (mapping[i].arg, cpu_id) != 0
35965 && (mapping[i].if_set & rs6000_isa_flags) == 0)
35966 i++;
35968 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
35971 #endif /* TARGET_MACHO */
35973 #if TARGET_ELF
35974 static int
35975 rs6000_elf_reloc_rw_mask (void)
35977 if (flag_pic)
35978 return 3;
35979 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
35980 return 2;
35981 else
35982 return 0;
35985 /* Record an element in the table of global constructors. SYMBOL is
35986 a SYMBOL_REF of the function to be called; PRIORITY is a number
35987 between 0 and MAX_INIT_PRIORITY.
35989 This differs from default_named_section_asm_out_constructor in
35990 that we have special handling for -mrelocatable. */
35992 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
35993 static void
35994 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
35996 const char *section = ".ctors";
35997 char buf[18];
35999 if (priority != DEFAULT_INIT_PRIORITY)
36001 sprintf (buf, ".ctors.%.5u",
36002 /* Invert the numbering so the linker puts us in the proper
36003 order; constructors are run from right to left, and the
36004 linker sorts in increasing order. */
36005 MAX_INIT_PRIORITY - priority);
36006 section = buf;
36009 switch_to_section (get_section (section, SECTION_WRITE, NULL));
36010 assemble_align (POINTER_SIZE);
36012 if (DEFAULT_ABI == ABI_V4
36013 && (TARGET_RELOCATABLE || flag_pic > 1))
36015 fputs ("\t.long (", asm_out_file);
36016 output_addr_const (asm_out_file, symbol);
36017 fputs (")@fixup\n", asm_out_file);
36019 else
36020 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
36023 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
36024 static void
36025 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
36027 const char *section = ".dtors";
36028 char buf[18];
36030 if (priority != DEFAULT_INIT_PRIORITY)
36032 sprintf (buf, ".dtors.%.5u",
36033 /* Invert the numbering so the linker puts us in the proper
36034 order; constructors are run from right to left, and the
36035 linker sorts in increasing order. */
36036 MAX_INIT_PRIORITY - priority);
36037 section = buf;
36040 switch_to_section (get_section (section, SECTION_WRITE, NULL));
36041 assemble_align (POINTER_SIZE);
36043 if (DEFAULT_ABI == ABI_V4
36044 && (TARGET_RELOCATABLE || flag_pic > 1))
36046 fputs ("\t.long (", asm_out_file);
36047 output_addr_const (asm_out_file, symbol);
36048 fputs (")@fixup\n", asm_out_file);
36050 else
36051 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
36054 void
36055 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
36057 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
36059 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
36060 ASM_OUTPUT_LABEL (file, name);
36061 fputs (DOUBLE_INT_ASM_OP, file);
36062 rs6000_output_function_entry (file, name);
36063 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
36064 if (DOT_SYMBOLS)
36066 fputs ("\t.size\t", file);
36067 assemble_name (file, name);
36068 fputs (",24\n\t.type\t.", file);
36069 assemble_name (file, name);
36070 fputs (",@function\n", file);
36071 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
36073 fputs ("\t.globl\t.", file);
36074 assemble_name (file, name);
36075 putc ('\n', file);
36078 else
36079 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
36080 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
36081 rs6000_output_function_entry (file, name);
36082 fputs (":\n", file);
36083 return;
36086 if (DEFAULT_ABI == ABI_V4
36087 && (TARGET_RELOCATABLE || flag_pic > 1)
36088 && !TARGET_SECURE_PLT
36089 && (!constant_pool_empty_p () || crtl->profile)
36090 && uses_TOC ())
36092 char buf[256];
36094 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
36096 fprintf (file, "\t.long ");
36097 assemble_name (file, toc_label_name);
36098 need_toc_init = 1;
36099 putc ('-', file);
36100 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
36101 assemble_name (file, buf);
36102 putc ('\n', file);
36105 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
36106 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
36108 if (TARGET_CMODEL == CMODEL_LARGE && rs6000_global_entry_point_needed_p ())
36110 char buf[256];
36112 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
36114 fprintf (file, "\t.quad .TOC.-");
36115 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
36116 assemble_name (file, buf);
36117 putc ('\n', file);
36120 if (DEFAULT_ABI == ABI_AIX)
36122 const char *desc_name, *orig_name;
36124 orig_name = (*targetm.strip_name_encoding) (name);
36125 desc_name = orig_name;
36126 while (*desc_name == '.')
36127 desc_name++;
36129 if (TREE_PUBLIC (decl))
36130 fprintf (file, "\t.globl %s\n", desc_name);
36132 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
36133 fprintf (file, "%s:\n", desc_name);
36134 fprintf (file, "\t.long %s\n", orig_name);
36135 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
36136 fputs ("\t.long 0\n", file);
36137 fprintf (file, "\t.previous\n");
36139 ASM_OUTPUT_LABEL (file, name);
36142 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
36143 static void
36144 rs6000_elf_file_end (void)
36146 #ifdef HAVE_AS_GNU_ATTRIBUTE
36147 /* ??? The value emitted depends on options active at file end.
36148 Assume anyone using #pragma or attributes that might change
36149 options knows what they are doing. */
36150 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
36151 && rs6000_passes_float)
36153 int fp;
36155 if (TARGET_DF_FPR | TARGET_DF_SPE)
36156 fp = 1;
36157 else if (TARGET_SF_FPR | TARGET_SF_SPE)
36158 fp = 3;
36159 else
36160 fp = 2;
36161 if (rs6000_passes_long_double)
36163 if (!TARGET_LONG_DOUBLE_128)
36164 fp |= 2 * 4;
36165 else if (TARGET_IEEEQUAD)
36166 fp |= 3 * 4;
36167 else
36168 fp |= 1 * 4;
36170 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
36172 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
36174 if (rs6000_passes_vector)
36175 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
36176 (TARGET_ALTIVEC_ABI ? 2
36177 : TARGET_SPE_ABI ? 3
36178 : 1));
36179 if (rs6000_returns_struct)
36180 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
36181 aix_struct_return ? 2 : 1);
36183 #endif
36184 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
36185 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
36186 file_end_indicate_exec_stack ();
36187 #endif
36189 if (flag_split_stack)
36190 file_end_indicate_split_stack ();
36192 if (cpu_builtin_p)
36194 /* We have expanded a CPU builtin, so we need to emit a reference to
36195 the special symbol that LIBC uses to declare it supports the
36196 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
36197 switch_to_section (data_section);
36198 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
36199 fprintf (asm_out_file, "\t%s %s\n",
36200 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
36203 #endif
36205 #if TARGET_XCOFF
36207 #ifndef HAVE_XCOFF_DWARF_EXTRAS
36208 #define HAVE_XCOFF_DWARF_EXTRAS 0
36209 #endif
36211 static enum unwind_info_type
36212 rs6000_xcoff_debug_unwind_info (void)
36214 return UI_NONE;
36217 static void
36218 rs6000_xcoff_asm_output_anchor (rtx symbol)
36220 char buffer[100];
36222 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
36223 SYMBOL_REF_BLOCK_OFFSET (symbol));
36224 fprintf (asm_out_file, "%s", SET_ASM_OP);
36225 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
36226 fprintf (asm_out_file, ",");
36227 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
36228 fprintf (asm_out_file, "\n");
36231 static void
36232 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
36234 fputs (GLOBAL_ASM_OP, stream);
36235 RS6000_OUTPUT_BASENAME (stream, name);
36236 putc ('\n', stream);
36239 /* A get_unnamed_decl callback, used for read-only sections. PTR
36240 points to the section string variable. */
36242 static void
36243 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
36245 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
36246 *(const char *const *) directive,
36247 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
36250 /* Likewise for read-write sections. */
36252 static void
36253 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
36255 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
36256 *(const char *const *) directive,
36257 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
36260 static void
36261 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
36263 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
36264 *(const char *const *) directive,
36265 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
36268 /* A get_unnamed_section callback, used for switching to toc_section. */
36270 static void
36271 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
36273 if (TARGET_MINIMAL_TOC)
36275 /* toc_section is always selected at least once from
36276 rs6000_xcoff_file_start, so this is guaranteed to
36277 always be defined once and only once in each file. */
36278 if (!toc_initialized)
36280 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
36281 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
36282 toc_initialized = 1;
36284 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
36285 (TARGET_32BIT ? "" : ",3"));
36287 else
36288 fputs ("\t.toc\n", asm_out_file);
36291 /* Implement TARGET_ASM_INIT_SECTIONS. */
36293 static void
36294 rs6000_xcoff_asm_init_sections (void)
36296 read_only_data_section
36297 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
36298 &xcoff_read_only_section_name);
36300 private_data_section
36301 = get_unnamed_section (SECTION_WRITE,
36302 rs6000_xcoff_output_readwrite_section_asm_op,
36303 &xcoff_private_data_section_name);
36305 tls_data_section
36306 = get_unnamed_section (SECTION_TLS,
36307 rs6000_xcoff_output_tls_section_asm_op,
36308 &xcoff_tls_data_section_name);
36310 tls_private_data_section
36311 = get_unnamed_section (SECTION_TLS,
36312 rs6000_xcoff_output_tls_section_asm_op,
36313 &xcoff_private_data_section_name);
36315 read_only_private_data_section
36316 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
36317 &xcoff_private_data_section_name);
36319 toc_section
36320 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
36322 readonly_data_section = read_only_data_section;
36325 static int
36326 rs6000_xcoff_reloc_rw_mask (void)
36328 return 3;
36331 static void
36332 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
36333 tree decl ATTRIBUTE_UNUSED)
36335 int smclass;
36336 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
36338 if (flags & SECTION_EXCLUDE)
36339 smclass = 4;
36340 else if (flags & SECTION_DEBUG)
36342 fprintf (asm_out_file, "\t.dwsect %s\n", name);
36343 return;
36345 else if (flags & SECTION_CODE)
36346 smclass = 0;
36347 else if (flags & SECTION_TLS)
36348 smclass = 3;
36349 else if (flags & SECTION_WRITE)
36350 smclass = 2;
36351 else
36352 smclass = 1;
36354 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
36355 (flags & SECTION_CODE) ? "." : "",
36356 name, suffix[smclass], flags & SECTION_ENTSIZE);
36359 #define IN_NAMED_SECTION(DECL) \
36360 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
36361 && DECL_SECTION_NAME (DECL) != NULL)
36363 static section *
36364 rs6000_xcoff_select_section (tree decl, int reloc,
36365 unsigned HOST_WIDE_INT align)
36367 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
36368 named section. */
36369 if (align > BIGGEST_ALIGNMENT)
36371 resolve_unique_section (decl, reloc, true);
36372 if (IN_NAMED_SECTION (decl))
36373 return get_named_section (decl, NULL, reloc);
36376 if (decl_readonly_section (decl, reloc))
36378 if (TREE_PUBLIC (decl))
36379 return read_only_data_section;
36380 else
36381 return read_only_private_data_section;
36383 else
36385 #if HAVE_AS_TLS
36386 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
36388 if (TREE_PUBLIC (decl))
36389 return tls_data_section;
36390 else if (bss_initializer_p (decl))
36392 /* Convert to COMMON to emit in BSS. */
36393 DECL_COMMON (decl) = 1;
36394 return tls_comm_section;
36396 else
36397 return tls_private_data_section;
36399 else
36400 #endif
36401 if (TREE_PUBLIC (decl))
36402 return data_section;
36403 else
36404 return private_data_section;
36408 static void
36409 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
36411 const char *name;
36413 /* Use select_section for private data and uninitialized data with
36414 alignment <= BIGGEST_ALIGNMENT. */
36415 if (!TREE_PUBLIC (decl)
36416 || DECL_COMMON (decl)
36417 || (DECL_INITIAL (decl) == NULL_TREE
36418 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
36419 || DECL_INITIAL (decl) == error_mark_node
36420 || (flag_zero_initialized_in_bss
36421 && initializer_zerop (DECL_INITIAL (decl))))
36422 return;
36424 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
36425 name = (*targetm.strip_name_encoding) (name);
36426 set_decl_section_name (decl, name);
36429 /* Select section for constant in constant pool.
36431 On RS/6000, all constants are in the private read-only data area.
36432 However, if this is being placed in the TOC it must be output as a
36433 toc entry. */
36435 static section *
36436 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
36437 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
36439 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
36440 return toc_section;
36441 else
36442 return read_only_private_data_section;
36445 /* Remove any trailing [DS] or the like from the symbol name. */
36447 static const char *
36448 rs6000_xcoff_strip_name_encoding (const char *name)
36450 size_t len;
36451 if (*name == '*')
36452 name++;
36453 len = strlen (name);
36454 if (name[len - 1] == ']')
36455 return ggc_alloc_string (name, len - 4);
36456 else
36457 return name;
36460 /* Section attributes. AIX is always PIC. */
36462 static unsigned int
36463 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
36465 unsigned int align;
36466 unsigned int flags = default_section_type_flags (decl, name, reloc);
36468 /* Align to at least UNIT size. */
36469 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
36470 align = MIN_UNITS_PER_WORD;
36471 else
36472 /* Increase alignment of large objects if not already stricter. */
36473 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
36474 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
36475 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
36477 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
36480 /* Output at beginning of assembler file.
36482 Initialize the section names for the RS/6000 at this point.
36484 Specify filename, including full path, to assembler.
36486 We want to go into the TOC section so at least one .toc will be emitted.
36487 Also, in order to output proper .bs/.es pairs, we need at least one static
36488 [RW] section emitted.
36490 Finally, declare mcount when profiling to make the assembler happy. */
36492 static void
36493 rs6000_xcoff_file_start (void)
36495 rs6000_gen_section_name (&xcoff_bss_section_name,
36496 main_input_filename, ".bss_");
36497 rs6000_gen_section_name (&xcoff_private_data_section_name,
36498 main_input_filename, ".rw_");
36499 rs6000_gen_section_name (&xcoff_read_only_section_name,
36500 main_input_filename, ".ro_");
36501 rs6000_gen_section_name (&xcoff_tls_data_section_name,
36502 main_input_filename, ".tls_");
36503 rs6000_gen_section_name (&xcoff_tbss_section_name,
36504 main_input_filename, ".tbss_[UL]");
36506 fputs ("\t.file\t", asm_out_file);
36507 output_quoted_string (asm_out_file, main_input_filename);
36508 fputc ('\n', asm_out_file);
36509 if (write_symbols != NO_DEBUG)
36510 switch_to_section (private_data_section);
36511 switch_to_section (toc_section);
36512 switch_to_section (text_section);
36513 if (profile_flag)
36514 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
36515 rs6000_file_start ();
36518 /* Output at end of assembler file.
36519 On the RS/6000, referencing data should automatically pull in text. */
36521 static void
36522 rs6000_xcoff_file_end (void)
36524 switch_to_section (text_section);
36525 fputs ("_section_.text:\n", asm_out_file);
36526 switch_to_section (data_section);
36527 fputs (TARGET_32BIT
36528 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
36529 asm_out_file);
36532 struct declare_alias_data
36534 FILE *file;
36535 bool function_descriptor;
36538 /* Declare alias N. A helper function for for_node_and_aliases. */
36540 static bool
36541 rs6000_declare_alias (struct symtab_node *n, void *d)
36543 struct declare_alias_data *data = (struct declare_alias_data *)d;
36544 /* Main symbol is output specially, because varasm machinery does part of
36545 the job for us - we do not need to declare .globl/lglobs and such. */
36546 if (!n->alias || n->weakref)
36547 return false;
36549 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
36550 return false;
36552 /* Prevent assemble_alias from trying to use .set pseudo operation
36553 that does not behave as expected by the middle-end. */
36554 TREE_ASM_WRITTEN (n->decl) = true;
36556 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
36557 char *buffer = (char *) alloca (strlen (name) + 2);
36558 char *p;
36559 int dollar_inside = 0;
36561 strcpy (buffer, name);
36562 p = strchr (buffer, '$');
36563 while (p) {
36564 *p = '_';
36565 dollar_inside++;
36566 p = strchr (p + 1, '$');
36568 if (TREE_PUBLIC (n->decl))
36570 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
36572 if (dollar_inside) {
36573 if (data->function_descriptor)
36574 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
36575 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
36577 if (data->function_descriptor)
36579 fputs ("\t.globl .", data->file);
36580 RS6000_OUTPUT_BASENAME (data->file, buffer);
36581 putc ('\n', data->file);
36583 fputs ("\t.globl ", data->file);
36584 RS6000_OUTPUT_BASENAME (data->file, buffer);
36585 putc ('\n', data->file);
36587 #ifdef ASM_WEAKEN_DECL
36588 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
36589 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
36590 #endif
36592 else
36594 if (dollar_inside)
36596 if (data->function_descriptor)
36597 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
36598 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
36600 if (data->function_descriptor)
36602 fputs ("\t.lglobl .", data->file);
36603 RS6000_OUTPUT_BASENAME (data->file, buffer);
36604 putc ('\n', data->file);
36606 fputs ("\t.lglobl ", data->file);
36607 RS6000_OUTPUT_BASENAME (data->file, buffer);
36608 putc ('\n', data->file);
36610 if (data->function_descriptor)
36611 fputs (".", data->file);
36612 RS6000_OUTPUT_BASENAME (data->file, buffer);
36613 fputs (":\n", data->file);
36614 return false;
36618 #ifdef HAVE_GAS_HIDDEN
36619 /* Helper function to calculate visibility of a DECL
36620 and return the value as a const string. */
36622 static const char *
36623 rs6000_xcoff_visibility (tree decl)
36625 static const char * const visibility_types[] = {
36626 "", ",protected", ",hidden", ",internal"
36629 enum symbol_visibility vis = DECL_VISIBILITY (decl);
36631 if (TREE_CODE (decl) == FUNCTION_DECL
36632 && cgraph_node::get (decl)
36633 && cgraph_node::get (decl)->instrumentation_clone
36634 && cgraph_node::get (decl)->instrumented_version)
36635 vis = DECL_VISIBILITY (cgraph_node::get (decl)->instrumented_version->decl);
36637 return visibility_types[vis];
36639 #endif
36642 /* This macro produces the initial definition of a function name.
36643 On the RS/6000, we need to place an extra '.' in the function name and
36644 output the function descriptor.
36645 Dollar signs are converted to underscores.
36647 The csect for the function will have already been created when
36648 text_section was selected. We do have to go back to that csect, however.
36650 The third and fourth parameters to the .function pseudo-op (16 and 044)
36651 are placeholders which no longer have any use.
36653 Because AIX assembler's .set command has unexpected semantics, we output
36654 all aliases as alternative labels in front of the definition. */
36656 void
36657 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
36659 char *buffer = (char *) alloca (strlen (name) + 1);
36660 char *p;
36661 int dollar_inside = 0;
36662 struct declare_alias_data data = {file, false};
36664 strcpy (buffer, name);
36665 p = strchr (buffer, '$');
36666 while (p) {
36667 *p = '_';
36668 dollar_inside++;
36669 p = strchr (p + 1, '$');
36671 if (TREE_PUBLIC (decl))
36673 if (!RS6000_WEAK || !DECL_WEAK (decl))
36675 if (dollar_inside) {
36676 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
36677 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
36679 fputs ("\t.globl .", file);
36680 RS6000_OUTPUT_BASENAME (file, buffer);
36681 #ifdef HAVE_GAS_HIDDEN
36682 fputs (rs6000_xcoff_visibility (decl), file);
36683 #endif
36684 putc ('\n', file);
36687 else
36689 if (dollar_inside) {
36690 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
36691 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
36693 fputs ("\t.lglobl .", file);
36694 RS6000_OUTPUT_BASENAME (file, buffer);
36695 putc ('\n', file);
36697 fputs ("\t.csect ", file);
36698 RS6000_OUTPUT_BASENAME (file, buffer);
36699 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
36700 RS6000_OUTPUT_BASENAME (file, buffer);
36701 fputs (":\n", file);
36702 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
36703 &data, true);
36704 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
36705 RS6000_OUTPUT_BASENAME (file, buffer);
36706 fputs (", TOC[tc0], 0\n", file);
36707 in_section = NULL;
36708 switch_to_section (function_section (decl));
36709 putc ('.', file);
36710 RS6000_OUTPUT_BASENAME (file, buffer);
36711 fputs (":\n", file);
36712 data.function_descriptor = true;
36713 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
36714 &data, true);
36715 if (!DECL_IGNORED_P (decl))
36717 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
36718 xcoffout_declare_function (file, decl, buffer);
36719 else if (write_symbols == DWARF2_DEBUG)
36721 name = (*targetm.strip_name_encoding) (name);
36722 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
36725 return;
36729 /* Output assembly language to globalize a symbol from a DECL,
36730 possibly with visibility. */
36732 void
36733 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
36735 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
36736 fputs (GLOBAL_ASM_OP, stream);
36737 RS6000_OUTPUT_BASENAME (stream, name);
36738 #ifdef HAVE_GAS_HIDDEN
36739 fputs (rs6000_xcoff_visibility (decl), stream);
36740 #endif
36741 putc ('\n', stream);
36744 /* Output assembly language to define a symbol as COMMON from a DECL,
36745 possibly with visibility. */
36747 void
36748 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
36749 tree decl ATTRIBUTE_UNUSED,
36750 const char *name,
36751 unsigned HOST_WIDE_INT size,
36752 unsigned HOST_WIDE_INT align)
36754 unsigned HOST_WIDE_INT align2 = 2;
36756 if (align > 32)
36757 align2 = floor_log2 (align / BITS_PER_UNIT);
36758 else if (size > 4)
36759 align2 = 3;
36761 fputs (COMMON_ASM_OP, stream);
36762 RS6000_OUTPUT_BASENAME (stream, name);
36764 fprintf (stream,
36765 "," HOST_WIDE_INT_PRINT_UNSIGNED "," HOST_WIDE_INT_PRINT_UNSIGNED,
36766 size, align2);
36768 #ifdef HAVE_GAS_HIDDEN
36769 fputs (rs6000_xcoff_visibility (decl), stream);
36770 #endif
36771 putc ('\n', stream);
36774 /* This macro produces the initial definition of a object (variable) name.
36775 Because AIX assembler's .set command has unexpected semantics, we output
36776 all aliases as alternative labels in front of the definition. */
36778 void
36779 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
36781 struct declare_alias_data data = {file, false};
36782 RS6000_OUTPUT_BASENAME (file, name);
36783 fputs (":\n", file);
36784 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
36785 &data, true);
36788 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
36790 void
36791 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
36793 fputs (integer_asm_op (size, FALSE), file);
36794 assemble_name (file, label);
36795 fputs ("-$", file);
36798 /* Output a symbol offset relative to the dbase for the current object.
36799 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
36800 signed offsets.
36802 __gcc_unwind_dbase is embedded in all executables/libraries through
36803 libgcc/config/rs6000/crtdbase.S. */
36805 void
36806 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
36808 fputs (integer_asm_op (size, FALSE), file);
36809 assemble_name (file, label);
36810 fputs("-__gcc_unwind_dbase", file);
36813 #ifdef HAVE_AS_TLS
36814 static void
36815 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
36817 rtx symbol;
36818 int flags;
36819 const char *symname;
36821 default_encode_section_info (decl, rtl, first);
36823 /* Careful not to prod global register variables. */
36824 if (!MEM_P (rtl))
36825 return;
36826 symbol = XEXP (rtl, 0);
36827 if (GET_CODE (symbol) != SYMBOL_REF)
36828 return;
36830 flags = SYMBOL_REF_FLAGS (symbol);
36832 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
36833 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
36835 SYMBOL_REF_FLAGS (symbol) = flags;
36837 /* Append mapping class to extern decls. */
36838 symname = XSTR (symbol, 0);
36839 if (decl /* sync condition with assemble_external () */
36840 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
36841 && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl))
36842 || TREE_CODE (decl) == FUNCTION_DECL)
36843 && symname[strlen (symname) - 1] != ']')
36845 char *newname = (char *) alloca (strlen (symname) + 5);
36846 strcpy (newname, symname);
36847 strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL
36848 ? "[DS]" : "[UA]"));
36849 XSTR (symbol, 0) = ggc_strdup (newname);
36852 #endif /* HAVE_AS_TLS */
36853 #endif /* TARGET_XCOFF */
36855 void
36856 rs6000_asm_weaken_decl (FILE *stream, tree decl,
36857 const char *name, const char *val)
36859 fputs ("\t.weak\t", stream);
36860 RS6000_OUTPUT_BASENAME (stream, name);
36861 if (decl && TREE_CODE (decl) == FUNCTION_DECL
36862 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
36864 if (TARGET_XCOFF)
36865 fputs ("[DS]", stream);
36866 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
36867 if (TARGET_XCOFF)
36868 fputs (rs6000_xcoff_visibility (decl), stream);
36869 #endif
36870 fputs ("\n\t.weak\t.", stream);
36871 RS6000_OUTPUT_BASENAME (stream, name);
36873 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
36874 if (TARGET_XCOFF)
36875 fputs (rs6000_xcoff_visibility (decl), stream);
36876 #endif
36877 fputc ('\n', stream);
36878 if (val)
36880 #ifdef ASM_OUTPUT_DEF
36881 ASM_OUTPUT_DEF (stream, name, val);
36882 #endif
36883 if (decl && TREE_CODE (decl) == FUNCTION_DECL
36884 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
36886 fputs ("\t.set\t.", stream);
36887 RS6000_OUTPUT_BASENAME (stream, name);
36888 fputs (",.", stream);
36889 RS6000_OUTPUT_BASENAME (stream, val);
36890 fputc ('\n', stream);
36896 /* Return true if INSN should not be copied. */
36898 static bool
36899 rs6000_cannot_copy_insn_p (rtx_insn *insn)
36901 return recog_memoized (insn) >= 0
36902 && get_attr_cannot_copy (insn);
36905 /* Compute a (partial) cost for rtx X. Return true if the complete
36906 cost has been computed, and false if subexpressions should be
36907 scanned. In either case, *TOTAL contains the cost result. */
36909 static bool
36910 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
36911 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
36913 int code = GET_CODE (x);
36915 switch (code)
36917 /* On the RS/6000, if it is valid in the insn, it is free. */
36918 case CONST_INT:
36919 if (((outer_code == SET
36920 || outer_code == PLUS
36921 || outer_code == MINUS)
36922 && (satisfies_constraint_I (x)
36923 || satisfies_constraint_L (x)))
36924 || (outer_code == AND
36925 && (satisfies_constraint_K (x)
36926 || (mode == SImode
36927 ? satisfies_constraint_L (x)
36928 : satisfies_constraint_J (x))))
36929 || ((outer_code == IOR || outer_code == XOR)
36930 && (satisfies_constraint_K (x)
36931 || (mode == SImode
36932 ? satisfies_constraint_L (x)
36933 : satisfies_constraint_J (x))))
36934 || outer_code == ASHIFT
36935 || outer_code == ASHIFTRT
36936 || outer_code == LSHIFTRT
36937 || outer_code == ROTATE
36938 || outer_code == ROTATERT
36939 || outer_code == ZERO_EXTRACT
36940 || (outer_code == MULT
36941 && satisfies_constraint_I (x))
36942 || ((outer_code == DIV || outer_code == UDIV
36943 || outer_code == MOD || outer_code == UMOD)
36944 && exact_log2 (INTVAL (x)) >= 0)
36945 || (outer_code == COMPARE
36946 && (satisfies_constraint_I (x)
36947 || satisfies_constraint_K (x)))
36948 || ((outer_code == EQ || outer_code == NE)
36949 && (satisfies_constraint_I (x)
36950 || satisfies_constraint_K (x)
36951 || (mode == SImode
36952 ? satisfies_constraint_L (x)
36953 : satisfies_constraint_J (x))))
36954 || (outer_code == GTU
36955 && satisfies_constraint_I (x))
36956 || (outer_code == LTU
36957 && satisfies_constraint_P (x)))
36959 *total = 0;
36960 return true;
36962 else if ((outer_code == PLUS
36963 && reg_or_add_cint_operand (x, VOIDmode))
36964 || (outer_code == MINUS
36965 && reg_or_sub_cint_operand (x, VOIDmode))
36966 || ((outer_code == SET
36967 || outer_code == IOR
36968 || outer_code == XOR)
36969 && (INTVAL (x)
36970 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
36972 *total = COSTS_N_INSNS (1);
36973 return true;
36975 /* FALLTHRU */
36977 case CONST_DOUBLE:
36978 case CONST_WIDE_INT:
36979 case CONST:
36980 case HIGH:
36981 case SYMBOL_REF:
36982 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
36983 return true;
36985 case MEM:
36986 /* When optimizing for size, MEM should be slightly more expensive
36987 than generating address, e.g., (plus (reg) (const)).
36988 L1 cache latency is about two instructions. */
36989 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
36990 if (SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (x)))
36991 *total += COSTS_N_INSNS (100);
36992 return true;
36994 case LABEL_REF:
36995 *total = 0;
36996 return true;
36998 case PLUS:
36999 case MINUS:
37000 if (FLOAT_MODE_P (mode))
37001 *total = rs6000_cost->fp;
37002 else
37003 *total = COSTS_N_INSNS (1);
37004 return false;
37006 case MULT:
37007 if (GET_CODE (XEXP (x, 1)) == CONST_INT
37008 && satisfies_constraint_I (XEXP (x, 1)))
37010 if (INTVAL (XEXP (x, 1)) >= -256
37011 && INTVAL (XEXP (x, 1)) <= 255)
37012 *total = rs6000_cost->mulsi_const9;
37013 else
37014 *total = rs6000_cost->mulsi_const;
37016 else if (mode == SFmode)
37017 *total = rs6000_cost->fp;
37018 else if (FLOAT_MODE_P (mode))
37019 *total = rs6000_cost->dmul;
37020 else if (mode == DImode)
37021 *total = rs6000_cost->muldi;
37022 else
37023 *total = rs6000_cost->mulsi;
37024 return false;
37026 case FMA:
37027 if (mode == SFmode)
37028 *total = rs6000_cost->fp;
37029 else
37030 *total = rs6000_cost->dmul;
37031 break;
37033 case DIV:
37034 case MOD:
37035 if (FLOAT_MODE_P (mode))
37037 *total = mode == DFmode ? rs6000_cost->ddiv
37038 : rs6000_cost->sdiv;
37039 return false;
37041 /* FALLTHRU */
37043 case UDIV:
37044 case UMOD:
37045 if (GET_CODE (XEXP (x, 1)) == CONST_INT
37046 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
37048 if (code == DIV || code == MOD)
37049 /* Shift, addze */
37050 *total = COSTS_N_INSNS (2);
37051 else
37052 /* Shift */
37053 *total = COSTS_N_INSNS (1);
37055 else
37057 if (GET_MODE (XEXP (x, 1)) == DImode)
37058 *total = rs6000_cost->divdi;
37059 else
37060 *total = rs6000_cost->divsi;
37062 /* Add in shift and subtract for MOD unless we have a mod instruction. */
37063 if (!TARGET_MODULO && (code == MOD || code == UMOD))
37064 *total += COSTS_N_INSNS (2);
37065 return false;
37067 case CTZ:
37068 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
37069 return false;
37071 case FFS:
37072 *total = COSTS_N_INSNS (4);
37073 return false;
37075 case POPCOUNT:
37076 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
37077 return false;
37079 case PARITY:
37080 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
37081 return false;
37083 case NOT:
37084 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
37085 *total = 0;
37086 else
37087 *total = COSTS_N_INSNS (1);
37088 return false;
37090 case AND:
37091 if (CONST_INT_P (XEXP (x, 1)))
37093 rtx left = XEXP (x, 0);
37094 rtx_code left_code = GET_CODE (left);
37096 /* rotate-and-mask: 1 insn. */
37097 if ((left_code == ROTATE
37098 || left_code == ASHIFT
37099 || left_code == LSHIFTRT)
37100 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
37102 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
37103 if (!CONST_INT_P (XEXP (left, 1)))
37104 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
37105 *total += COSTS_N_INSNS (1);
37106 return true;
37109 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
37110 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
37111 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
37112 || (val & 0xffff) == val
37113 || (val & 0xffff0000) == val
37114 || ((val & 0xffff) == 0 && mode == SImode))
37116 *total = rtx_cost (left, mode, AND, 0, speed);
37117 *total += COSTS_N_INSNS (1);
37118 return true;
37121 /* 2 insns. */
37122 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
37124 *total = rtx_cost (left, mode, AND, 0, speed);
37125 *total += COSTS_N_INSNS (2);
37126 return true;
37130 *total = COSTS_N_INSNS (1);
37131 return false;
37133 case IOR:
37134 /* FIXME */
37135 *total = COSTS_N_INSNS (1);
37136 return true;
37138 case CLZ:
37139 case XOR:
37140 case ZERO_EXTRACT:
37141 *total = COSTS_N_INSNS (1);
37142 return false;
37144 case ASHIFT:
37145 /* The EXTSWSLI instruction is a combined instruction. Don't count both
37146 the sign extend and shift separately within the insn. */
37147 if (TARGET_EXTSWSLI && mode == DImode
37148 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
37149 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
37151 *total = 0;
37152 return false;
37154 /* fall through */
37156 case ASHIFTRT:
37157 case LSHIFTRT:
37158 case ROTATE:
37159 case ROTATERT:
37160 /* Handle mul_highpart. */
37161 if (outer_code == TRUNCATE
37162 && GET_CODE (XEXP (x, 0)) == MULT)
37164 if (mode == DImode)
37165 *total = rs6000_cost->muldi;
37166 else
37167 *total = rs6000_cost->mulsi;
37168 return true;
37170 else if (outer_code == AND)
37171 *total = 0;
37172 else
37173 *total = COSTS_N_INSNS (1);
37174 return false;
37176 case SIGN_EXTEND:
37177 case ZERO_EXTEND:
37178 if (GET_CODE (XEXP (x, 0)) == MEM)
37179 *total = 0;
37180 else
37181 *total = COSTS_N_INSNS (1);
37182 return false;
37184 case COMPARE:
37185 case NEG:
37186 case ABS:
37187 if (!FLOAT_MODE_P (mode))
37189 *total = COSTS_N_INSNS (1);
37190 return false;
37192 /* FALLTHRU */
37194 case FLOAT:
37195 case UNSIGNED_FLOAT:
37196 case FIX:
37197 case UNSIGNED_FIX:
37198 case FLOAT_TRUNCATE:
37199 *total = rs6000_cost->fp;
37200 return false;
37202 case FLOAT_EXTEND:
37203 if (mode == DFmode)
37204 *total = rs6000_cost->sfdf_convert;
37205 else
37206 *total = rs6000_cost->fp;
37207 return false;
37209 case UNSPEC:
37210 switch (XINT (x, 1))
37212 case UNSPEC_FRSP:
37213 *total = rs6000_cost->fp;
37214 return true;
37216 default:
37217 break;
37219 break;
37221 case CALL:
37222 case IF_THEN_ELSE:
37223 if (!speed)
37225 *total = COSTS_N_INSNS (1);
37226 return true;
37228 else if (FLOAT_MODE_P (mode)
37229 && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS)
37231 *total = rs6000_cost->fp;
37232 return false;
37234 break;
37236 case NE:
37237 case EQ:
37238 case GTU:
37239 case LTU:
37240 /* Carry bit requires mode == Pmode.
37241 NEG or PLUS already counted so only add one. */
37242 if (mode == Pmode
37243 && (outer_code == NEG || outer_code == PLUS))
37245 *total = COSTS_N_INSNS (1);
37246 return true;
37248 if (outer_code == SET)
37250 if (XEXP (x, 1) == const0_rtx)
37252 if (TARGET_ISEL && !TARGET_MFCRF)
37253 *total = COSTS_N_INSNS (8);
37254 else
37255 *total = COSTS_N_INSNS (2);
37256 return true;
37258 else
37260 *total = COSTS_N_INSNS (3);
37261 return false;
37264 /* FALLTHRU */
37266 case GT:
37267 case LT:
37268 case UNORDERED:
37269 if (outer_code == SET && (XEXP (x, 1) == const0_rtx))
37271 if (TARGET_ISEL && !TARGET_MFCRF)
37272 *total = COSTS_N_INSNS (8);
37273 else
37274 *total = COSTS_N_INSNS (2);
37275 return true;
37277 /* CC COMPARE. */
37278 if (outer_code == COMPARE)
37280 *total = 0;
37281 return true;
37283 break;
37285 default:
37286 break;
37289 return false;
37292 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
37294 static bool
37295 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
37296 int opno, int *total, bool speed)
37298 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
37300 fprintf (stderr,
37301 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
37302 "opno = %d, total = %d, speed = %s, x:\n",
37303 ret ? "complete" : "scan inner",
37304 GET_MODE_NAME (mode),
37305 GET_RTX_NAME (outer_code),
37306 opno,
37307 *total,
37308 speed ? "true" : "false");
37310 debug_rtx (x);
37312 return ret;
37315 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
37317 static int
37318 rs6000_debug_address_cost (rtx x, machine_mode mode,
37319 addr_space_t as, bool speed)
37321 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
37323 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
37324 ret, speed ? "true" : "false");
37325 debug_rtx (x);
37327 return ret;
37331 /* A C expression returning the cost of moving data from a register of class
37332 CLASS1 to one of CLASS2. */
37334 static int
37335 rs6000_register_move_cost (machine_mode mode,
37336 reg_class_t from, reg_class_t to)
37338 int ret;
37340 if (TARGET_DEBUG_COST)
37341 dbg_cost_ctrl++;
37343 /* Moves from/to GENERAL_REGS. */
37344 if (reg_classes_intersect_p (to, GENERAL_REGS)
37345 || reg_classes_intersect_p (from, GENERAL_REGS))
37347 reg_class_t rclass = from;
37349 if (! reg_classes_intersect_p (to, GENERAL_REGS))
37350 rclass = to;
37352 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
37353 ret = (rs6000_memory_move_cost (mode, rclass, false)
37354 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
37356 /* It's more expensive to move CR_REGS than CR0_REGS because of the
37357 shift. */
37358 else if (rclass == CR_REGS)
37359 ret = 4;
37361 /* For those processors that have slow LR/CTR moves, make them more
37362 expensive than memory in order to bias spills to memory .*/
37363 else if ((rs6000_cpu == PROCESSOR_POWER6
37364 || rs6000_cpu == PROCESSOR_POWER7
37365 || rs6000_cpu == PROCESSOR_POWER8
37366 || rs6000_cpu == PROCESSOR_POWER9)
37367 && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
37368 ret = 6 * hard_regno_nregs[0][mode];
37370 else
37371 /* A move will cost one instruction per GPR moved. */
37372 ret = 2 * hard_regno_nregs[0][mode];
37375 /* If we have VSX, we can easily move between FPR or Altivec registers. */
37376 else if (VECTOR_MEM_VSX_P (mode)
37377 && reg_classes_intersect_p (to, VSX_REGS)
37378 && reg_classes_intersect_p (from, VSX_REGS))
37379 ret = 2 * hard_regno_nregs[FIRST_FPR_REGNO][mode];
37381 /* Moving between two similar registers is just one instruction. */
37382 else if (reg_classes_intersect_p (to, from))
37383 ret = (FLOAT128_2REG_P (mode)) ? 4 : 2;
37385 /* Everything else has to go through GENERAL_REGS. */
37386 else
37387 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
37388 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
37390 if (TARGET_DEBUG_COST)
37392 if (dbg_cost_ctrl == 1)
37393 fprintf (stderr,
37394 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
37395 ret, GET_MODE_NAME (mode), reg_class_names[from],
37396 reg_class_names[to]);
37397 dbg_cost_ctrl--;
37400 return ret;
37403 /* A C expressions returning the cost of moving data of MODE from a register to
37404 or from memory. */
37406 static int
37407 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
37408 bool in ATTRIBUTE_UNUSED)
37410 int ret;
37412 if (TARGET_DEBUG_COST)
37413 dbg_cost_ctrl++;
37415 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
37416 ret = 4 * hard_regno_nregs[0][mode];
37417 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
37418 || reg_classes_intersect_p (rclass, VSX_REGS)))
37419 ret = 4 * hard_regno_nregs[32][mode];
37420 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
37421 ret = 4 * hard_regno_nregs[FIRST_ALTIVEC_REGNO][mode];
37422 else
37423 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
37425 if (TARGET_DEBUG_COST)
37427 if (dbg_cost_ctrl == 1)
37428 fprintf (stderr,
37429 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
37430 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
37431 dbg_cost_ctrl--;
37434 return ret;
37437 /* Returns a code for a target-specific builtin that implements
37438 reciprocal of the function, or NULL_TREE if not available. */
37440 static tree
37441 rs6000_builtin_reciprocal (tree fndecl)
37443 switch (DECL_FUNCTION_CODE (fndecl))
37445 case VSX_BUILTIN_XVSQRTDP:
37446 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
37447 return NULL_TREE;
37449 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
37451 case VSX_BUILTIN_XVSQRTSP:
37452 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
37453 return NULL_TREE;
37455 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
37457 default:
37458 return NULL_TREE;
37462 /* Load up a constant. If the mode is a vector mode, splat the value across
37463 all of the vector elements. */
37465 static rtx
37466 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
37468 rtx reg;
37470 if (mode == SFmode || mode == DFmode)
37472 rtx d = const_double_from_real_value (dconst, mode);
37473 reg = force_reg (mode, d);
37475 else if (mode == V4SFmode)
37477 rtx d = const_double_from_real_value (dconst, SFmode);
37478 rtvec v = gen_rtvec (4, d, d, d, d);
37479 reg = gen_reg_rtx (mode);
37480 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
37482 else if (mode == V2DFmode)
37484 rtx d = const_double_from_real_value (dconst, DFmode);
37485 rtvec v = gen_rtvec (2, d, d);
37486 reg = gen_reg_rtx (mode);
37487 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
37489 else
37490 gcc_unreachable ();
37492 return reg;
37495 /* Generate an FMA instruction. */
37497 static void
37498 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
37500 machine_mode mode = GET_MODE (target);
37501 rtx dst;
37503 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
37504 gcc_assert (dst != NULL);
37506 if (dst != target)
37507 emit_move_insn (target, dst);
37510 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
37512 static void
37513 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
37515 machine_mode mode = GET_MODE (dst);
37516 rtx r;
37518 /* This is a tad more complicated, since the fnma_optab is for
37519 a different expression: fma(-m1, m2, a), which is the same
37520 thing except in the case of signed zeros.
37522 Fortunately we know that if FMA is supported that FNMSUB is
37523 also supported in the ISA. Just expand it directly. */
37525 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
37527 r = gen_rtx_NEG (mode, a);
37528 r = gen_rtx_FMA (mode, m1, m2, r);
37529 r = gen_rtx_NEG (mode, r);
37530 emit_insn (gen_rtx_SET (dst, r));
37533 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
37534 add a reg_note saying that this was a division. Support both scalar and
37535 vector divide. Assumes no trapping math and finite arguments. */
37537 void
37538 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
37540 machine_mode mode = GET_MODE (dst);
37541 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
37542 int i;
37544 /* Low precision estimates guarantee 5 bits of accuracy. High
37545 precision estimates guarantee 14 bits of accuracy. SFmode
37546 requires 23 bits of accuracy. DFmode requires 52 bits of
37547 accuracy. Each pass at least doubles the accuracy, leading
37548 to the following. */
37549 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
37550 if (mode == DFmode || mode == V2DFmode)
37551 passes++;
37553 enum insn_code code = optab_handler (smul_optab, mode);
37554 insn_gen_fn gen_mul = GEN_FCN (code);
37556 gcc_assert (code != CODE_FOR_nothing);
37558 one = rs6000_load_constant_and_splat (mode, dconst1);
37560 /* x0 = 1./d estimate */
37561 x0 = gen_reg_rtx (mode);
37562 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
37563 UNSPEC_FRES)));
37565 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
37566 if (passes > 1) {
37568 /* e0 = 1. - d * x0 */
37569 e0 = gen_reg_rtx (mode);
37570 rs6000_emit_nmsub (e0, d, x0, one);
37572 /* x1 = x0 + e0 * x0 */
37573 x1 = gen_reg_rtx (mode);
37574 rs6000_emit_madd (x1, e0, x0, x0);
37576 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
37577 ++i, xprev = xnext, eprev = enext) {
37579 /* enext = eprev * eprev */
37580 enext = gen_reg_rtx (mode);
37581 emit_insn (gen_mul (enext, eprev, eprev));
37583 /* xnext = xprev + enext * xprev */
37584 xnext = gen_reg_rtx (mode);
37585 rs6000_emit_madd (xnext, enext, xprev, xprev);
37588 } else
37589 xprev = x0;
37591 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
37593 /* u = n * xprev */
37594 u = gen_reg_rtx (mode);
37595 emit_insn (gen_mul (u, n, xprev));
37597 /* v = n - (d * u) */
37598 v = gen_reg_rtx (mode);
37599 rs6000_emit_nmsub (v, d, u, n);
37601 /* dst = (v * xprev) + u */
37602 rs6000_emit_madd (dst, v, xprev, u);
37604 if (note_p)
37605 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
37608 /* Goldschmidt's Algorithm for single/double-precision floating point
37609 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
37611 void
37612 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
37614 machine_mode mode = GET_MODE (src);
37615 rtx e = gen_reg_rtx (mode);
37616 rtx g = gen_reg_rtx (mode);
37617 rtx h = gen_reg_rtx (mode);
37619 /* Low precision estimates guarantee 5 bits of accuracy. High
37620 precision estimates guarantee 14 bits of accuracy. SFmode
37621 requires 23 bits of accuracy. DFmode requires 52 bits of
37622 accuracy. Each pass at least doubles the accuracy, leading
37623 to the following. */
37624 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
37625 if (mode == DFmode || mode == V2DFmode)
37626 passes++;
37628 int i;
37629 rtx mhalf;
37630 enum insn_code code = optab_handler (smul_optab, mode);
37631 insn_gen_fn gen_mul = GEN_FCN (code);
37633 gcc_assert (code != CODE_FOR_nothing);
37635 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
37637 /* e = rsqrt estimate */
37638 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
37639 UNSPEC_RSQRT)));
37641 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
37642 if (!recip)
37644 rtx zero = force_reg (mode, CONST0_RTX (mode));
37646 if (mode == SFmode)
37648 rtx target = emit_conditional_move (e, GT, src, zero, mode,
37649 e, zero, mode, 0);
37650 if (target != e)
37651 emit_move_insn (e, target);
37653 else
37655 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
37656 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
37660 /* g = sqrt estimate. */
37661 emit_insn (gen_mul (g, e, src));
37662 /* h = 1/(2*sqrt) estimate. */
37663 emit_insn (gen_mul (h, e, mhalf));
37665 if (recip)
37667 if (passes == 1)
37669 rtx t = gen_reg_rtx (mode);
37670 rs6000_emit_nmsub (t, g, h, mhalf);
37671 /* Apply correction directly to 1/rsqrt estimate. */
37672 rs6000_emit_madd (dst, e, t, e);
37674 else
37676 for (i = 0; i < passes; i++)
37678 rtx t1 = gen_reg_rtx (mode);
37679 rtx g1 = gen_reg_rtx (mode);
37680 rtx h1 = gen_reg_rtx (mode);
37682 rs6000_emit_nmsub (t1, g, h, mhalf);
37683 rs6000_emit_madd (g1, g, t1, g);
37684 rs6000_emit_madd (h1, h, t1, h);
37686 g = g1;
37687 h = h1;
37689 /* Multiply by 2 for 1/rsqrt. */
37690 emit_insn (gen_add3_insn (dst, h, h));
37693 else
37695 rtx t = gen_reg_rtx (mode);
37696 rs6000_emit_nmsub (t, g, h, mhalf);
37697 rs6000_emit_madd (dst, g, t, g);
37700 return;
37703 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
37704 (Power7) targets. DST is the target, and SRC is the argument operand. */
37706 void
37707 rs6000_emit_popcount (rtx dst, rtx src)
37709 machine_mode mode = GET_MODE (dst);
37710 rtx tmp1, tmp2;
37712 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
37713 if (TARGET_POPCNTD)
37715 if (mode == SImode)
37716 emit_insn (gen_popcntdsi2 (dst, src));
37717 else
37718 emit_insn (gen_popcntddi2 (dst, src));
37719 return;
37722 tmp1 = gen_reg_rtx (mode);
37724 if (mode == SImode)
37726 emit_insn (gen_popcntbsi2 (tmp1, src));
37727 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
37728 NULL_RTX, 0);
37729 tmp2 = force_reg (SImode, tmp2);
37730 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
37732 else
37734 emit_insn (gen_popcntbdi2 (tmp1, src));
37735 tmp2 = expand_mult (DImode, tmp1,
37736 GEN_INT ((HOST_WIDE_INT)
37737 0x01010101 << 32 | 0x01010101),
37738 NULL_RTX, 0);
37739 tmp2 = force_reg (DImode, tmp2);
37740 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
37745 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
37746 target, and SRC is the argument operand. */
37748 void
37749 rs6000_emit_parity (rtx dst, rtx src)
37751 machine_mode mode = GET_MODE (dst);
37752 rtx tmp;
37754 tmp = gen_reg_rtx (mode);
37756 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
37757 if (TARGET_CMPB)
37759 if (mode == SImode)
37761 emit_insn (gen_popcntbsi2 (tmp, src));
37762 emit_insn (gen_paritysi2_cmpb (dst, tmp));
37764 else
37766 emit_insn (gen_popcntbdi2 (tmp, src));
37767 emit_insn (gen_paritydi2_cmpb (dst, tmp));
37769 return;
37772 if (mode == SImode)
37774 /* Is mult+shift >= shift+xor+shift+xor? */
37775 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
37777 rtx tmp1, tmp2, tmp3, tmp4;
37779 tmp1 = gen_reg_rtx (SImode);
37780 emit_insn (gen_popcntbsi2 (tmp1, src));
37782 tmp2 = gen_reg_rtx (SImode);
37783 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
37784 tmp3 = gen_reg_rtx (SImode);
37785 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
37787 tmp4 = gen_reg_rtx (SImode);
37788 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
37789 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
37791 else
37792 rs6000_emit_popcount (tmp, src);
37793 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
37795 else
37797 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
37798 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
37800 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
37802 tmp1 = gen_reg_rtx (DImode);
37803 emit_insn (gen_popcntbdi2 (tmp1, src));
37805 tmp2 = gen_reg_rtx (DImode);
37806 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
37807 tmp3 = gen_reg_rtx (DImode);
37808 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
37810 tmp4 = gen_reg_rtx (DImode);
37811 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
37812 tmp5 = gen_reg_rtx (DImode);
37813 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
37815 tmp6 = gen_reg_rtx (DImode);
37816 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
37817 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
37819 else
37820 rs6000_emit_popcount (tmp, src);
37821 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
37825 /* Expand an Altivec constant permutation for little endian mode.
37826 There are two issues: First, the two input operands must be
37827 swapped so that together they form a double-wide array in LE
37828 order. Second, the vperm instruction has surprising behavior
37829 in LE mode: it interprets the elements of the source vectors
37830 in BE mode ("left to right") and interprets the elements of
37831 the destination vector in LE mode ("right to left"). To
37832 correct for this, we must subtract each element of the permute
37833 control vector from 31.
37835 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
37836 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
37837 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
37838 serve as the permute control vector. Then, in BE mode,
37840 vperm 9,10,11,12
37842 places the desired result in vr9. However, in LE mode the
37843 vector contents will be
37845 vr10 = 00000003 00000002 00000001 00000000
37846 vr11 = 00000007 00000006 00000005 00000004
37848 The result of the vperm using the same permute control vector is
37850 vr9 = 05000000 07000000 01000000 03000000
37852 That is, the leftmost 4 bytes of vr10 are interpreted as the
37853 source for the rightmost 4 bytes of vr9, and so on.
37855 If we change the permute control vector to
37857 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
37859 and issue
37861 vperm 9,11,10,12
37863 we get the desired
37865 vr9 = 00000006 00000004 00000002 00000000. */
37867 void
37868 altivec_expand_vec_perm_const_le (rtx operands[4])
37870 unsigned int i;
37871 rtx perm[16];
37872 rtx constv, unspec;
37873 rtx target = operands[0];
37874 rtx op0 = operands[1];
37875 rtx op1 = operands[2];
37876 rtx sel = operands[3];
37878 /* Unpack and adjust the constant selector. */
37879 for (i = 0; i < 16; ++i)
37881 rtx e = XVECEXP (sel, 0, i);
37882 unsigned int elt = 31 - (INTVAL (e) & 31);
37883 perm[i] = GEN_INT (elt);
37886 /* Expand to a permute, swapping the inputs and using the
37887 adjusted selector. */
37888 if (!REG_P (op0))
37889 op0 = force_reg (V16QImode, op0);
37890 if (!REG_P (op1))
37891 op1 = force_reg (V16QImode, op1);
37893 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
37894 constv = force_reg (V16QImode, constv);
37895 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
37896 UNSPEC_VPERM);
37897 if (!REG_P (target))
37899 rtx tmp = gen_reg_rtx (V16QImode);
37900 emit_move_insn (tmp, unspec);
37901 unspec = tmp;
37904 emit_move_insn (target, unspec);
37907 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
37908 permute control vector. But here it's not a constant, so we must
37909 generate a vector NAND or NOR to do the adjustment. */
37911 void
37912 altivec_expand_vec_perm_le (rtx operands[4])
37914 rtx notx, iorx, unspec;
37915 rtx target = operands[0];
37916 rtx op0 = operands[1];
37917 rtx op1 = operands[2];
37918 rtx sel = operands[3];
37919 rtx tmp = target;
37920 rtx norreg = gen_reg_rtx (V16QImode);
37921 machine_mode mode = GET_MODE (target);
37923 /* Get everything in regs so the pattern matches. */
37924 if (!REG_P (op0))
37925 op0 = force_reg (mode, op0);
37926 if (!REG_P (op1))
37927 op1 = force_reg (mode, op1);
37928 if (!REG_P (sel))
37929 sel = force_reg (V16QImode, sel);
37930 if (!REG_P (target))
37931 tmp = gen_reg_rtx (mode);
37933 if (TARGET_P9_VECTOR)
37935 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op0, op1, sel),
37936 UNSPEC_VPERMR);
37938 else
37940 /* Invert the selector with a VNAND if available, else a VNOR.
37941 The VNAND is preferred for future fusion opportunities. */
37942 notx = gen_rtx_NOT (V16QImode, sel);
37943 iorx = (TARGET_P8_VECTOR
37944 ? gen_rtx_IOR (V16QImode, notx, notx)
37945 : gen_rtx_AND (V16QImode, notx, notx));
37946 emit_insn (gen_rtx_SET (norreg, iorx));
37948 /* Permute with operands reversed and adjusted selector. */
37949 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
37950 UNSPEC_VPERM);
37953 /* Copy into target, possibly by way of a register. */
37954 if (!REG_P (target))
37956 emit_move_insn (tmp, unspec);
37957 unspec = tmp;
37960 emit_move_insn (target, unspec);
37963 /* Expand an Altivec constant permutation. Return true if we match
37964 an efficient implementation; false to fall back to VPERM. */
37966 bool
37967 altivec_expand_vec_perm_const (rtx operands[4])
37969 struct altivec_perm_insn {
37970 HOST_WIDE_INT mask;
37971 enum insn_code impl;
37972 unsigned char perm[16];
37974 static const struct altivec_perm_insn patterns[] = {
37975 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
37976 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
37977 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
37978 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
37979 { OPTION_MASK_ALTIVEC,
37980 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
37981 : CODE_FOR_altivec_vmrglb_direct),
37982 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
37983 { OPTION_MASK_ALTIVEC,
37984 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
37985 : CODE_FOR_altivec_vmrglh_direct),
37986 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
37987 { OPTION_MASK_ALTIVEC,
37988 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
37989 : CODE_FOR_altivec_vmrglw_direct),
37990 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
37991 { OPTION_MASK_ALTIVEC,
37992 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
37993 : CODE_FOR_altivec_vmrghb_direct),
37994 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
37995 { OPTION_MASK_ALTIVEC,
37996 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
37997 : CODE_FOR_altivec_vmrghh_direct),
37998 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
37999 { OPTION_MASK_ALTIVEC,
38000 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
38001 : CODE_FOR_altivec_vmrghw_direct),
38002 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
38003 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
38004 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
38005 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
38006 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
38009 unsigned int i, j, elt, which;
38010 unsigned char perm[16];
38011 rtx target, op0, op1, sel, x;
38012 bool one_vec;
38014 target = operands[0];
38015 op0 = operands[1];
38016 op1 = operands[2];
38017 sel = operands[3];
38019 /* Unpack the constant selector. */
38020 for (i = which = 0; i < 16; ++i)
38022 rtx e = XVECEXP (sel, 0, i);
38023 elt = INTVAL (e) & 31;
38024 which |= (elt < 16 ? 1 : 2);
38025 perm[i] = elt;
38028 /* Simplify the constant selector based on operands. */
38029 switch (which)
38031 default:
38032 gcc_unreachable ();
38034 case 3:
38035 one_vec = false;
38036 if (!rtx_equal_p (op0, op1))
38037 break;
38038 /* FALLTHRU */
38040 case 2:
38041 for (i = 0; i < 16; ++i)
38042 perm[i] &= 15;
38043 op0 = op1;
38044 one_vec = true;
38045 break;
38047 case 1:
38048 op1 = op0;
38049 one_vec = true;
38050 break;
38053 /* Look for splat patterns. */
38054 if (one_vec)
38056 elt = perm[0];
38058 for (i = 0; i < 16; ++i)
38059 if (perm[i] != elt)
38060 break;
38061 if (i == 16)
38063 if (!BYTES_BIG_ENDIAN)
38064 elt = 15 - elt;
38065 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
38066 return true;
38069 if (elt % 2 == 0)
38071 for (i = 0; i < 16; i += 2)
38072 if (perm[i] != elt || perm[i + 1] != elt + 1)
38073 break;
38074 if (i == 16)
38076 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
38077 x = gen_reg_rtx (V8HImode);
38078 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
38079 GEN_INT (field)));
38080 emit_move_insn (target, gen_lowpart (V16QImode, x));
38081 return true;
38085 if (elt % 4 == 0)
38087 for (i = 0; i < 16; i += 4)
38088 if (perm[i] != elt
38089 || perm[i + 1] != elt + 1
38090 || perm[i + 2] != elt + 2
38091 || perm[i + 3] != elt + 3)
38092 break;
38093 if (i == 16)
38095 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
38096 x = gen_reg_rtx (V4SImode);
38097 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
38098 GEN_INT (field)));
38099 emit_move_insn (target, gen_lowpart (V16QImode, x));
38100 return true;
38105 /* Look for merge and pack patterns. */
38106 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
38108 bool swapped;
38110 if ((patterns[j].mask & rs6000_isa_flags) == 0)
38111 continue;
38113 elt = patterns[j].perm[0];
38114 if (perm[0] == elt)
38115 swapped = false;
38116 else if (perm[0] == elt + 16)
38117 swapped = true;
38118 else
38119 continue;
38120 for (i = 1; i < 16; ++i)
38122 elt = patterns[j].perm[i];
38123 if (swapped)
38124 elt = (elt >= 16 ? elt - 16 : elt + 16);
38125 else if (one_vec && elt >= 16)
38126 elt -= 16;
38127 if (perm[i] != elt)
38128 break;
38130 if (i == 16)
38132 enum insn_code icode = patterns[j].impl;
38133 machine_mode omode = insn_data[icode].operand[0].mode;
38134 machine_mode imode = insn_data[icode].operand[1].mode;
38136 /* For little-endian, don't use vpkuwum and vpkuhum if the
38137 underlying vector type is not V4SI and V8HI, respectively.
38138 For example, using vpkuwum with a V8HI picks up the even
38139 halfwords (BE numbering) when the even halfwords (LE
38140 numbering) are what we need. */
38141 if (!BYTES_BIG_ENDIAN
38142 && icode == CODE_FOR_altivec_vpkuwum_direct
38143 && ((GET_CODE (op0) == REG
38144 && GET_MODE (op0) != V4SImode)
38145 || (GET_CODE (op0) == SUBREG
38146 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
38147 continue;
38148 if (!BYTES_BIG_ENDIAN
38149 && icode == CODE_FOR_altivec_vpkuhum_direct
38150 && ((GET_CODE (op0) == REG
38151 && GET_MODE (op0) != V8HImode)
38152 || (GET_CODE (op0) == SUBREG
38153 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
38154 continue;
38156 /* For little-endian, the two input operands must be swapped
38157 (or swapped back) to ensure proper right-to-left numbering
38158 from 0 to 2N-1. */
38159 if (swapped ^ !BYTES_BIG_ENDIAN)
38160 std::swap (op0, op1);
38161 if (imode != V16QImode)
38163 op0 = gen_lowpart (imode, op0);
38164 op1 = gen_lowpart (imode, op1);
38166 if (omode == V16QImode)
38167 x = target;
38168 else
38169 x = gen_reg_rtx (omode);
38170 emit_insn (GEN_FCN (icode) (x, op0, op1));
38171 if (omode != V16QImode)
38172 emit_move_insn (target, gen_lowpart (V16QImode, x));
38173 return true;
38177 if (!BYTES_BIG_ENDIAN)
38179 altivec_expand_vec_perm_const_le (operands);
38180 return true;
38183 return false;
38186 /* Expand a Paired Single, VSX Permute Doubleword, or SPE constant permutation.
38187 Return true if we match an efficient implementation. */
38189 static bool
38190 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
38191 unsigned char perm0, unsigned char perm1)
38193 rtx x;
38195 /* If both selectors come from the same operand, fold to single op. */
38196 if ((perm0 & 2) == (perm1 & 2))
38198 if (perm0 & 2)
38199 op0 = op1;
38200 else
38201 op1 = op0;
38203 /* If both operands are equal, fold to simpler permutation. */
38204 if (rtx_equal_p (op0, op1))
38206 perm0 = perm0 & 1;
38207 perm1 = (perm1 & 1) + 2;
38209 /* If the first selector comes from the second operand, swap. */
38210 else if (perm0 & 2)
38212 if (perm1 & 2)
38213 return false;
38214 perm0 -= 2;
38215 perm1 += 2;
38216 std::swap (op0, op1);
38218 /* If the second selector does not come from the second operand, fail. */
38219 else if ((perm1 & 2) == 0)
38220 return false;
38222 /* Success! */
38223 if (target != NULL)
38225 machine_mode vmode, dmode;
38226 rtvec v;
38228 vmode = GET_MODE (target);
38229 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
38230 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4);
38231 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
38232 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
38233 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
38234 emit_insn (gen_rtx_SET (target, x));
38236 return true;
38239 bool
38240 rs6000_expand_vec_perm_const (rtx operands[4])
38242 rtx target, op0, op1, sel;
38243 unsigned char perm0, perm1;
38245 target = operands[0];
38246 op0 = operands[1];
38247 op1 = operands[2];
38248 sel = operands[3];
38250 /* Unpack the constant selector. */
38251 perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3;
38252 perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3;
38254 return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1);
38257 /* Test whether a constant permutation is supported. */
38259 static bool
38260 rs6000_vectorize_vec_perm_const_ok (machine_mode vmode,
38261 const unsigned char *sel)
38263 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
38264 if (TARGET_ALTIVEC)
38265 return true;
38267 /* Check for ps_merge* or evmerge* insns. */
38268 if ((TARGET_PAIRED_FLOAT && vmode == V2SFmode)
38269 || (TARGET_SPE && vmode == V2SImode))
38271 rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
38272 rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
38273 return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]);
38276 return false;
38279 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */
38281 static void
38282 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
38283 machine_mode vmode, unsigned nelt, rtx perm[])
38285 machine_mode imode;
38286 rtx x;
38288 imode = vmode;
38289 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT)
38291 imode = mode_for_size (GET_MODE_UNIT_BITSIZE (vmode), MODE_INT, 0);
38292 imode = mode_for_vector (imode, nelt);
38295 x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm));
38296 x = expand_vec_perm (vmode, op0, op1, x, target);
38297 if (x != target)
38298 emit_move_insn (target, x);
38301 /* Expand an extract even operation. */
38303 void
38304 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
38306 machine_mode vmode = GET_MODE (target);
38307 unsigned i, nelt = GET_MODE_NUNITS (vmode);
38308 rtx perm[16];
38310 for (i = 0; i < nelt; i++)
38311 perm[i] = GEN_INT (i * 2);
38313 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
38316 /* Expand a vector interleave operation. */
38318 void
38319 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
38321 machine_mode vmode = GET_MODE (target);
38322 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
38323 rtx perm[16];
38325 high = (highp ? 0 : nelt / 2);
38326 for (i = 0; i < nelt / 2; i++)
38328 perm[i * 2] = GEN_INT (i + high);
38329 perm[i * 2 + 1] = GEN_INT (i + nelt + high);
38332 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
38335 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
38336 void
38337 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
38339 HOST_WIDE_INT hwi_scale (scale);
38340 REAL_VALUE_TYPE r_pow;
38341 rtvec v = rtvec_alloc (2);
38342 rtx elt;
38343 rtx scale_vec = gen_reg_rtx (V2DFmode);
38344 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
38345 elt = const_double_from_real_value (r_pow, DFmode);
38346 RTVEC_ELT (v, 0) = elt;
38347 RTVEC_ELT (v, 1) = elt;
38348 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
38349 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
38352 /* Return an RTX representing where to find the function value of a
38353 function returning MODE. */
38354 static rtx
38355 rs6000_complex_function_value (machine_mode mode)
38357 unsigned int regno;
38358 rtx r1, r2;
38359 machine_mode inner = GET_MODE_INNER (mode);
38360 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
38362 if (TARGET_FLOAT128_TYPE
38363 && (mode == KCmode
38364 || (mode == TCmode && TARGET_IEEEQUAD)))
38365 regno = ALTIVEC_ARG_RETURN;
38367 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
38368 regno = FP_ARG_RETURN;
38370 else
38372 regno = GP_ARG_RETURN;
38374 /* 32-bit is OK since it'll go in r3/r4. */
38375 if (TARGET_32BIT && inner_bytes >= 4)
38376 return gen_rtx_REG (mode, regno);
38379 if (inner_bytes >= 8)
38380 return gen_rtx_REG (mode, regno);
38382 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
38383 const0_rtx);
38384 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
38385 GEN_INT (inner_bytes));
38386 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
38389 /* Return an rtx describing a return value of MODE as a PARALLEL
38390 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
38391 stride REG_STRIDE. */
38393 static rtx
38394 rs6000_parallel_return (machine_mode mode,
38395 int n_elts, machine_mode elt_mode,
38396 unsigned int regno, unsigned int reg_stride)
38398 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
38400 int i;
38401 for (i = 0; i < n_elts; i++)
38403 rtx r = gen_rtx_REG (elt_mode, regno);
38404 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
38405 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
38406 regno += reg_stride;
38409 return par;
38412 /* Target hook for TARGET_FUNCTION_VALUE.
38414 On the SPE, both FPs and vectors are returned in r3.
38416 On RS/6000 an integer value is in r3 and a floating-point value is in
38417 fp1, unless -msoft-float. */
38419 static rtx
38420 rs6000_function_value (const_tree valtype,
38421 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
38422 bool outgoing ATTRIBUTE_UNUSED)
38424 machine_mode mode;
38425 unsigned int regno;
38426 machine_mode elt_mode;
38427 int n_elts;
38429 /* Special handling for structs in darwin64. */
38430 if (TARGET_MACHO
38431 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
38433 CUMULATIVE_ARGS valcum;
38434 rtx valret;
38436 valcum.words = 0;
38437 valcum.fregno = FP_ARG_MIN_REG;
38438 valcum.vregno = ALTIVEC_ARG_MIN_REG;
38439 /* Do a trial code generation as if this were going to be passed as
38440 an argument; if any part goes in memory, we return NULL. */
38441 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
38442 if (valret)
38443 return valret;
38444 /* Otherwise fall through to standard ABI rules. */
38447 mode = TYPE_MODE (valtype);
38449 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
38450 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
38452 int first_reg, n_regs;
38454 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
38456 /* _Decimal128 must use even/odd register pairs. */
38457 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
38458 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
38460 else
38462 first_reg = ALTIVEC_ARG_RETURN;
38463 n_regs = 1;
38466 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
38469 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
38470 if (TARGET_32BIT && TARGET_POWERPC64)
38471 switch (mode)
38473 default:
38474 break;
38475 case DImode:
38476 case SCmode:
38477 case DCmode:
38478 case TCmode:
38479 int count = GET_MODE_SIZE (mode) / 4;
38480 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
38483 if ((INTEGRAL_TYPE_P (valtype)
38484 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
38485 || POINTER_TYPE_P (valtype))
38486 mode = TARGET_32BIT ? SImode : DImode;
38488 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
38489 /* _Decimal128 must use an even/odd register pair. */
38490 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
38491 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT && TARGET_FPRS
38492 && !FLOAT128_VECTOR_P (mode)
38493 && ((TARGET_SINGLE_FLOAT && (mode == SFmode)) || TARGET_DOUBLE_FLOAT))
38494 regno = FP_ARG_RETURN;
38495 else if (TREE_CODE (valtype) == COMPLEX_TYPE
38496 && targetm.calls.split_complex_arg)
38497 return rs6000_complex_function_value (mode);
38498 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
38499 return register is used in both cases, and we won't see V2DImode/V2DFmode
38500 for pure altivec, combine the two cases. */
38501 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
38502 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
38503 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
38504 regno = ALTIVEC_ARG_RETURN;
38505 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
38506 && (mode == DFmode || mode == DCmode
38507 || FLOAT128_IBM_P (mode) || mode == TCmode))
38508 return spe_build_register_parallel (mode, GP_ARG_RETURN);
38509 else
38510 regno = GP_ARG_RETURN;
38512 return gen_rtx_REG (mode, regno);
38515 /* Define how to find the value returned by a library function
38516 assuming the value has mode MODE. */
38518 rs6000_libcall_value (machine_mode mode)
38520 unsigned int regno;
38522 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
38523 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
38524 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
38526 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
38527 /* _Decimal128 must use an even/odd register pair. */
38528 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
38529 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode)
38530 && TARGET_HARD_FLOAT && TARGET_FPRS
38531 && ((TARGET_SINGLE_FLOAT && mode == SFmode) || TARGET_DOUBLE_FLOAT))
38532 regno = FP_ARG_RETURN;
38533 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
38534 return register is used in both cases, and we won't see V2DImode/V2DFmode
38535 for pure altivec, combine the two cases. */
38536 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
38537 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
38538 regno = ALTIVEC_ARG_RETURN;
38539 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
38540 return rs6000_complex_function_value (mode);
38541 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
38542 && (mode == DFmode || mode == DCmode
38543 || FLOAT128_IBM_P (mode) || mode == TCmode))
38544 return spe_build_register_parallel (mode, GP_ARG_RETURN);
38545 else
38546 regno = GP_ARG_RETURN;
38548 return gen_rtx_REG (mode, regno);
38552 /* Return true if we use LRA instead of reload pass. */
38553 static bool
38554 rs6000_lra_p (void)
38556 return TARGET_LRA;
38559 /* Compute register pressure classes. We implement the target hook to avoid
38560 IRA picking something like NON_SPECIAL_REGS as a pressure class, which can
38561 lead to incorrect estimates of number of available registers and therefor
38562 increased register pressure/spill. */
38563 static int
38564 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
38566 int n;
38568 n = 0;
38569 pressure_classes[n++] = GENERAL_REGS;
38570 if (TARGET_VSX)
38571 pressure_classes[n++] = VSX_REGS;
38572 else
38574 if (TARGET_ALTIVEC)
38575 pressure_classes[n++] = ALTIVEC_REGS;
38576 if (TARGET_HARD_FLOAT && TARGET_FPRS)
38577 pressure_classes[n++] = FLOAT_REGS;
38579 pressure_classes[n++] = CR_REGS;
38580 pressure_classes[n++] = SPECIAL_REGS;
38582 return n;
38585 /* Given FROM and TO register numbers, say whether this elimination is allowed.
38586 Frame pointer elimination is automatically handled.
38588 For the RS/6000, if frame pointer elimination is being done, we would like
38589 to convert ap into fp, not sp.
38591 We need r30 if -mminimal-toc was specified, and there are constant pool
38592 references. */
38594 static bool
38595 rs6000_can_eliminate (const int from, const int to)
38597 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
38598 ? ! frame_pointer_needed
38599 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
38600 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC
38601 || constant_pool_empty_p ()
38602 : true);
38605 /* Define the offset between two registers, FROM to be eliminated and its
38606 replacement TO, at the start of a routine. */
38607 HOST_WIDE_INT
38608 rs6000_initial_elimination_offset (int from, int to)
38610 rs6000_stack_t *info = rs6000_stack_info ();
38611 HOST_WIDE_INT offset;
38613 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
38614 offset = info->push_p ? 0 : -info->total_size;
38615 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
38617 offset = info->push_p ? 0 : -info->total_size;
38618 if (FRAME_GROWS_DOWNWARD)
38619 offset += info->fixed_size + info->vars_size + info->parm_size;
38621 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
38622 offset = FRAME_GROWS_DOWNWARD
38623 ? info->fixed_size + info->vars_size + info->parm_size
38624 : 0;
38625 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
38626 offset = info->total_size;
38627 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
38628 offset = info->push_p ? info->total_size : 0;
38629 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
38630 offset = 0;
38631 else
38632 gcc_unreachable ();
38634 return offset;
38637 static rtx
38638 rs6000_dwarf_register_span (rtx reg)
38640 rtx parts[8];
38641 int i, words;
38642 unsigned regno = REGNO (reg);
38643 machine_mode mode = GET_MODE (reg);
38645 if (TARGET_SPE
38646 && regno < 32
38647 && (SPE_VECTOR_MODE (GET_MODE (reg))
38648 || (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode)
38649 && mode != SFmode && mode != SDmode && mode != SCmode)))
38651 else
38652 return NULL_RTX;
38654 regno = REGNO (reg);
38656 /* The duality of the SPE register size wreaks all kinds of havoc.
38657 This is a way of distinguishing r0 in 32-bits from r0 in
38658 64-bits. */
38659 words = (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD;
38660 gcc_assert (words <= 4);
38661 for (i = 0; i < words; i++, regno++)
38663 if (BYTES_BIG_ENDIAN)
38665 parts[2 * i] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
38666 parts[2 * i + 1] = gen_rtx_REG (SImode, regno);
38668 else
38670 parts[2 * i] = gen_rtx_REG (SImode, regno);
38671 parts[2 * i + 1] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
38675 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (words * 2, parts));
38678 /* Fill in sizes for SPE register high parts in table used by unwinder. */
38680 static void
38681 rs6000_init_dwarf_reg_sizes_extra (tree address)
38683 if (TARGET_SPE)
38685 int i;
38686 machine_mode mode = TYPE_MODE (char_type_node);
38687 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
38688 rtx mem = gen_rtx_MEM (BLKmode, addr);
38689 rtx value = gen_int_mode (4, mode);
38691 for (i = FIRST_SPE_HIGH_REGNO; i < LAST_SPE_HIGH_REGNO+1; i++)
38693 int column = DWARF_REG_TO_UNWIND_COLUMN
38694 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
38695 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
38697 emit_move_insn (adjust_address (mem, mode, offset), value);
38701 if (TARGET_MACHO && ! TARGET_ALTIVEC)
38703 int i;
38704 machine_mode mode = TYPE_MODE (char_type_node);
38705 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
38706 rtx mem = gen_rtx_MEM (BLKmode, addr);
38707 rtx value = gen_int_mode (16, mode);
38709 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
38710 The unwinder still needs to know the size of Altivec registers. */
38712 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
38714 int column = DWARF_REG_TO_UNWIND_COLUMN
38715 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
38716 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
38718 emit_move_insn (adjust_address (mem, mode, offset), value);
38723 /* Map internal gcc register numbers to debug format register numbers.
38724 FORMAT specifies the type of debug register number to use:
38725 0 -- debug information, except for frame-related sections
38726 1 -- DWARF .debug_frame section
38727 2 -- DWARF .eh_frame section */
38729 unsigned int
38730 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
38732 /* We never use the GCC internal number for SPE high registers.
38733 Those are mapped to the 1200..1231 range for all debug formats. */
38734 if (SPE_HIGH_REGNO_P (regno))
38735 return regno - FIRST_SPE_HIGH_REGNO + 1200;
38737 /* Except for the above, we use the internal number for non-DWARF
38738 debug information, and also for .eh_frame. */
38739 if ((format == 0 && write_symbols != DWARF2_DEBUG) || format == 2)
38740 return regno;
38742 /* On some platforms, we use the standard DWARF register
38743 numbering for .debug_info and .debug_frame. */
38744 #ifdef RS6000_USE_DWARF_NUMBERING
38745 if (regno <= 63)
38746 return regno;
38747 if (regno == LR_REGNO)
38748 return 108;
38749 if (regno == CTR_REGNO)
38750 return 109;
38751 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
38752 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
38753 The actual code emitted saves the whole of CR, so we map CR2_REGNO
38754 to the DWARF reg for CR. */
38755 if (format == 1 && regno == CR2_REGNO)
38756 return 64;
38757 if (CR_REGNO_P (regno))
38758 return regno - CR0_REGNO + 86;
38759 if (regno == CA_REGNO)
38760 return 101; /* XER */
38761 if (ALTIVEC_REGNO_P (regno))
38762 return regno - FIRST_ALTIVEC_REGNO + 1124;
38763 if (regno == VRSAVE_REGNO)
38764 return 356;
38765 if (regno == VSCR_REGNO)
38766 return 67;
38767 if (regno == SPE_ACC_REGNO)
38768 return 99;
38769 if (regno == SPEFSCR_REGNO)
38770 return 612;
38771 #endif
38772 return regno;
38775 /* target hook eh_return_filter_mode */
38776 static machine_mode
38777 rs6000_eh_return_filter_mode (void)
38779 return TARGET_32BIT ? SImode : word_mode;
38782 /* Target hook for scalar_mode_supported_p. */
38783 static bool
38784 rs6000_scalar_mode_supported_p (machine_mode mode)
38786 /* -m32 does not support TImode. This is the default, from
38787 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
38788 same ABI as for -m32. But default_scalar_mode_supported_p allows
38789 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
38790 for -mpowerpc64. */
38791 if (TARGET_32BIT && mode == TImode)
38792 return false;
38794 if (DECIMAL_FLOAT_MODE_P (mode))
38795 return default_decimal_float_supported_p ();
38796 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
38797 return true;
38798 else
38799 return default_scalar_mode_supported_p (mode);
38802 /* Target hook for vector_mode_supported_p. */
38803 static bool
38804 rs6000_vector_mode_supported_p (machine_mode mode)
38807 if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (mode))
38808 return true;
38810 if (TARGET_SPE && SPE_VECTOR_MODE (mode))
38811 return true;
38813 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
38814 128-bit, the compiler might try to widen IEEE 128-bit to IBM
38815 double-double. */
38816 else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
38817 return true;
38819 else
38820 return false;
38823 /* Target hook for floatn_mode. */
38824 static machine_mode
38825 rs6000_floatn_mode (int n, bool extended)
38827 if (extended)
38829 switch (n)
38831 case 32:
38832 return DFmode;
38834 case 64:
38835 if (TARGET_FLOAT128_KEYWORD)
38836 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
38837 else
38838 return VOIDmode;
38840 case 128:
38841 return VOIDmode;
38843 default:
38844 /* Those are the only valid _FloatNx types. */
38845 gcc_unreachable ();
38848 else
38850 switch (n)
38852 case 32:
38853 return SFmode;
38855 case 64:
38856 return DFmode;
38858 case 128:
38859 if (TARGET_FLOAT128_KEYWORD)
38860 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
38861 else
38862 return VOIDmode;
38864 default:
38865 return VOIDmode;
38871 /* Target hook for c_mode_for_suffix. */
38872 static machine_mode
38873 rs6000_c_mode_for_suffix (char suffix)
38875 if (TARGET_FLOAT128_TYPE)
38877 if (suffix == 'q' || suffix == 'Q')
38878 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
38880 /* At the moment, we are not defining a suffix for IBM extended double.
38881 If/when the default for -mabi=ieeelongdouble is changed, and we want
38882 to support __ibm128 constants in legacy library code, we may need to
38883 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
38884 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
38885 __float80 constants. */
38888 return VOIDmode;
38891 /* Target hook for invalid_arg_for_unprototyped_fn. */
38892 static const char *
38893 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
38895 return (!rs6000_darwin64_abi
38896 && typelist == 0
38897 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
38898 && (funcdecl == NULL_TREE
38899 || (TREE_CODE (funcdecl) == FUNCTION_DECL
38900 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
38901 ? N_("AltiVec argument passed to unprototyped function")
38902 : NULL;
38905 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
38906 setup by using __stack_chk_fail_local hidden function instead of
38907 calling __stack_chk_fail directly. Otherwise it is better to call
38908 __stack_chk_fail directly. */
38910 static tree ATTRIBUTE_UNUSED
38911 rs6000_stack_protect_fail (void)
38913 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
38914 ? default_hidden_stack_protect_fail ()
38915 : default_external_stack_protect_fail ();
38918 void
38919 rs6000_final_prescan_insn (rtx_insn *insn, rtx *operand ATTRIBUTE_UNUSED,
38920 int num_operands ATTRIBUTE_UNUSED)
38922 if (rs6000_warn_cell_microcode)
38924 const char *temp;
38925 int insn_code_number = recog_memoized (insn);
38926 location_t location = INSN_LOCATION (insn);
38928 /* Punt on insns we cannot recognize. */
38929 if (insn_code_number < 0)
38930 return;
38932 /* get_insn_template can modify recog_data, so save and restore it. */
38933 struct recog_data_d recog_data_save = recog_data;
38934 for (int i = 0; i < recog_data.n_operands; i++)
38935 recog_data.operand[i] = copy_rtx (recog_data.operand[i]);
38936 temp = get_insn_template (insn_code_number, insn);
38937 recog_data = recog_data_save;
38939 if (get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS)
38940 warning_at (location, OPT_mwarn_cell_microcode,
38941 "emitting microcode insn %s\t[%s] #%d",
38942 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
38943 else if (get_attr_cell_micro (insn) == CELL_MICRO_CONDITIONAL)
38944 warning_at (location, OPT_mwarn_cell_microcode,
38945 "emitting conditional microcode insn %s\t[%s] #%d",
38946 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
38950 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
38952 #if TARGET_ELF
38953 static unsigned HOST_WIDE_INT
38954 rs6000_asan_shadow_offset (void)
38956 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
38958 #endif
38960 /* Mask options that we want to support inside of attribute((target)) and
38961 #pragma GCC target operations. Note, we do not include things like
38962 64/32-bit, endianness, hard/soft floating point, etc. that would have
38963 different calling sequences. */
38965 struct rs6000_opt_mask {
38966 const char *name; /* option name */
38967 HOST_WIDE_INT mask; /* mask to set */
38968 bool invert; /* invert sense of mask */
38969 bool valid_target; /* option is a target option */
38972 static struct rs6000_opt_mask const rs6000_opt_masks[] =
38974 { "altivec", OPTION_MASK_ALTIVEC, false, true },
38975 { "cmpb", OPTION_MASK_CMPB, false, true },
38976 { "crypto", OPTION_MASK_CRYPTO, false, true },
38977 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
38978 { "dlmzb", OPTION_MASK_DLMZB, false, true },
38979 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
38980 false, true },
38981 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, false },
38982 { "float128-type", OPTION_MASK_FLOAT128_TYPE, false, false },
38983 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, false },
38984 { "fprnd", OPTION_MASK_FPRND, false, true },
38985 { "hard-dfp", OPTION_MASK_DFP, false, true },
38986 { "htm", OPTION_MASK_HTM, false, true },
38987 { "isel", OPTION_MASK_ISEL, false, true },
38988 { "mfcrf", OPTION_MASK_MFCRF, false, true },
38989 { "mfpgpr", OPTION_MASK_MFPGPR, false, true },
38990 { "modulo", OPTION_MASK_MODULO, false, true },
38991 { "mulhw", OPTION_MASK_MULHW, false, true },
38992 { "multiple", OPTION_MASK_MULTIPLE, false, true },
38993 { "popcntb", OPTION_MASK_POPCNTB, false, true },
38994 { "popcntd", OPTION_MASK_POPCNTD, false, true },
38995 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
38996 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
38997 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
38998 { "power9-dform-scalar", OPTION_MASK_P9_DFORM_SCALAR, false, true },
38999 { "power9-dform-vector", OPTION_MASK_P9_DFORM_VECTOR, false, true },
39000 { "power9-fusion", OPTION_MASK_P9_FUSION, false, true },
39001 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
39002 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
39003 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
39004 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
39005 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
39006 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
39007 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
39008 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
39009 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
39010 { "string", OPTION_MASK_STRING, false, true },
39011 { "toc-fusion", OPTION_MASK_TOC_FUSION, false, true },
39012 { "update", OPTION_MASK_NO_UPDATE, true , true },
39013 { "upper-regs-di", OPTION_MASK_UPPER_REGS_DI, false, true },
39014 { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, true },
39015 { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, true },
39016 { "vsx", OPTION_MASK_VSX, false, true },
39017 { "vsx-small-integer", OPTION_MASK_VSX_SMALL_INTEGER, false, true },
39018 { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true },
39019 #ifdef OPTION_MASK_64BIT
39020 #if TARGET_AIX_OS
39021 { "aix64", OPTION_MASK_64BIT, false, false },
39022 { "aix32", OPTION_MASK_64BIT, true, false },
39023 #else
39024 { "64", OPTION_MASK_64BIT, false, false },
39025 { "32", OPTION_MASK_64BIT, true, false },
39026 #endif
39027 #endif
39028 #ifdef OPTION_MASK_EABI
39029 { "eabi", OPTION_MASK_EABI, false, false },
39030 #endif
39031 #ifdef OPTION_MASK_LITTLE_ENDIAN
39032 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
39033 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
39034 #endif
39035 #ifdef OPTION_MASK_RELOCATABLE
39036 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
39037 #endif
39038 #ifdef OPTION_MASK_STRICT_ALIGN
39039 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
39040 #endif
39041 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
39042 { "string", OPTION_MASK_STRING, false, false },
39045 /* Builtin mask mapping for printing the flags. */
39046 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
39048 { "altivec", RS6000_BTM_ALTIVEC, false, false },
39049 { "vsx", RS6000_BTM_VSX, false, false },
39050 { "spe", RS6000_BTM_SPE, false, false },
39051 { "paired", RS6000_BTM_PAIRED, false, false },
39052 { "fre", RS6000_BTM_FRE, false, false },
39053 { "fres", RS6000_BTM_FRES, false, false },
39054 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
39055 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
39056 { "popcntd", RS6000_BTM_POPCNTD, false, false },
39057 { "cell", RS6000_BTM_CELL, false, false },
39058 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
39059 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
39060 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
39061 { "crypto", RS6000_BTM_CRYPTO, false, false },
39062 { "htm", RS6000_BTM_HTM, false, false },
39063 { "hard-dfp", RS6000_BTM_DFP, false, false },
39064 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
39065 { "long-double-128", RS6000_BTM_LDBL128, false, false },
39066 { "float128", RS6000_BTM_FLOAT128, false, false },
39069 /* Option variables that we want to support inside attribute((target)) and
39070 #pragma GCC target operations. */
39072 struct rs6000_opt_var {
39073 const char *name; /* option name */
39074 size_t global_offset; /* offset of the option in global_options. */
39075 size_t target_offset; /* offset of the option in target options. */
39078 static struct rs6000_opt_var const rs6000_opt_vars[] =
39080 { "friz",
39081 offsetof (struct gcc_options, x_TARGET_FRIZ),
39082 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
39083 { "avoid-indexed-addresses",
39084 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
39085 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
39086 { "paired",
39087 offsetof (struct gcc_options, x_rs6000_paired_float),
39088 offsetof (struct cl_target_option, x_rs6000_paired_float), },
39089 { "longcall",
39090 offsetof (struct gcc_options, x_rs6000_default_long_calls),
39091 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
39092 { "optimize-swaps",
39093 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
39094 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
39095 { "allow-movmisalign",
39096 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
39097 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
39098 { "allow-df-permute",
39099 offsetof (struct gcc_options, x_TARGET_ALLOW_DF_PERMUTE),
39100 offsetof (struct cl_target_option, x_TARGET_ALLOW_DF_PERMUTE), },
39101 { "sched-groups",
39102 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
39103 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
39104 { "always-hint",
39105 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
39106 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
39107 { "align-branch-targets",
39108 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
39109 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
39110 { "vectorize-builtins",
39111 offsetof (struct gcc_options, x_TARGET_VECTORIZE_BUILTINS),
39112 offsetof (struct cl_target_option, x_TARGET_VECTORIZE_BUILTINS), },
39113 { "tls-markers",
39114 offsetof (struct gcc_options, x_tls_markers),
39115 offsetof (struct cl_target_option, x_tls_markers), },
39116 { "sched-prolog",
39117 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
39118 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
39119 { "sched-epilog",
39120 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
39121 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
39122 { "gen-cell-microcode",
39123 offsetof (struct gcc_options, x_rs6000_gen_cell_microcode),
39124 offsetof (struct cl_target_option, x_rs6000_gen_cell_microcode), },
39125 { "warn-cell-microcode",
39126 offsetof (struct gcc_options, x_rs6000_warn_cell_microcode),
39127 offsetof (struct cl_target_option, x_rs6000_warn_cell_microcode), },
39130 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
39131 parsing. Return true if there were no errors. */
39133 static bool
39134 rs6000_inner_target_options (tree args, bool attr_p)
39136 bool ret = true;
39138 if (args == NULL_TREE)
39141 else if (TREE_CODE (args) == STRING_CST)
39143 char *p = ASTRDUP (TREE_STRING_POINTER (args));
39144 char *q;
39146 while ((q = strtok (p, ",")) != NULL)
39148 bool error_p = false;
39149 bool not_valid_p = false;
39150 const char *cpu_opt = NULL;
39152 p = NULL;
39153 if (strncmp (q, "cpu=", 4) == 0)
39155 int cpu_index = rs6000_cpu_name_lookup (q+4);
39156 if (cpu_index >= 0)
39157 rs6000_cpu_index = cpu_index;
39158 else
39160 error_p = true;
39161 cpu_opt = q+4;
39164 else if (strncmp (q, "tune=", 5) == 0)
39166 int tune_index = rs6000_cpu_name_lookup (q+5);
39167 if (tune_index >= 0)
39168 rs6000_tune_index = tune_index;
39169 else
39171 error_p = true;
39172 cpu_opt = q+5;
39175 else
39177 size_t i;
39178 bool invert = false;
39179 char *r = q;
39181 error_p = true;
39182 if (strncmp (r, "no-", 3) == 0)
39184 invert = true;
39185 r += 3;
39188 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
39189 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
39191 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
39193 if (!rs6000_opt_masks[i].valid_target)
39194 not_valid_p = true;
39195 else
39197 error_p = false;
39198 rs6000_isa_flags_explicit |= mask;
39200 /* VSX needs altivec, so -mvsx automagically sets
39201 altivec and disables -mavoid-indexed-addresses. */
39202 if (!invert)
39204 if (mask == OPTION_MASK_VSX)
39206 mask |= OPTION_MASK_ALTIVEC;
39207 TARGET_AVOID_XFORM = 0;
39211 if (rs6000_opt_masks[i].invert)
39212 invert = !invert;
39214 if (invert)
39215 rs6000_isa_flags &= ~mask;
39216 else
39217 rs6000_isa_flags |= mask;
39219 break;
39222 if (error_p && !not_valid_p)
39224 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
39225 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
39227 size_t j = rs6000_opt_vars[i].global_offset;
39228 *((int *) ((char *)&global_options + j)) = !invert;
39229 error_p = false;
39230 not_valid_p = false;
39231 break;
39236 if (error_p)
39238 const char *eprefix, *esuffix;
39240 ret = false;
39241 if (attr_p)
39243 eprefix = "__attribute__((__target__(";
39244 esuffix = ")))";
39246 else
39248 eprefix = "#pragma GCC target ";
39249 esuffix = "";
39252 if (cpu_opt)
39253 error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt, eprefix,
39254 q, esuffix);
39255 else if (not_valid_p)
39256 error ("%s\"%s\"%s is not allowed", eprefix, q, esuffix);
39257 else
39258 error ("%s\"%s\"%s is invalid", eprefix, q, esuffix);
39263 else if (TREE_CODE (args) == TREE_LIST)
39267 tree value = TREE_VALUE (args);
39268 if (value)
39270 bool ret2 = rs6000_inner_target_options (value, attr_p);
39271 if (!ret2)
39272 ret = false;
39274 args = TREE_CHAIN (args);
39276 while (args != NULL_TREE);
39279 else
39281 error ("attribute %<target%> argument not a string");
39282 return false;
39285 return ret;
39288 /* Print out the target options as a list for -mdebug=target. */
39290 static void
39291 rs6000_debug_target_options (tree args, const char *prefix)
39293 if (args == NULL_TREE)
39294 fprintf (stderr, "%s<NULL>", prefix);
39296 else if (TREE_CODE (args) == STRING_CST)
39298 char *p = ASTRDUP (TREE_STRING_POINTER (args));
39299 char *q;
39301 while ((q = strtok (p, ",")) != NULL)
39303 p = NULL;
39304 fprintf (stderr, "%s\"%s\"", prefix, q);
39305 prefix = ", ";
39309 else if (TREE_CODE (args) == TREE_LIST)
39313 tree value = TREE_VALUE (args);
39314 if (value)
39316 rs6000_debug_target_options (value, prefix);
39317 prefix = ", ";
39319 args = TREE_CHAIN (args);
39321 while (args != NULL_TREE);
39324 else
39325 gcc_unreachable ();
39327 return;
39331 /* Hook to validate attribute((target("..."))). */
39333 static bool
39334 rs6000_valid_attribute_p (tree fndecl,
39335 tree ARG_UNUSED (name),
39336 tree args,
39337 int flags)
39339 struct cl_target_option cur_target;
39340 bool ret;
39341 tree old_optimize = build_optimization_node (&global_options);
39342 tree new_target, new_optimize;
39343 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
39345 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
39347 if (TARGET_DEBUG_TARGET)
39349 tree tname = DECL_NAME (fndecl);
39350 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
39351 if (tname)
39352 fprintf (stderr, "function: %.*s\n",
39353 (int) IDENTIFIER_LENGTH (tname),
39354 IDENTIFIER_POINTER (tname));
39355 else
39356 fprintf (stderr, "function: unknown\n");
39358 fprintf (stderr, "args:");
39359 rs6000_debug_target_options (args, " ");
39360 fprintf (stderr, "\n");
39362 if (flags)
39363 fprintf (stderr, "flags: 0x%x\n", flags);
39365 fprintf (stderr, "--------------------\n");
39368 old_optimize = build_optimization_node (&global_options);
39369 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
39371 /* If the function changed the optimization levels as well as setting target
39372 options, start with the optimizations specified. */
39373 if (func_optimize && func_optimize != old_optimize)
39374 cl_optimization_restore (&global_options,
39375 TREE_OPTIMIZATION (func_optimize));
39377 /* The target attributes may also change some optimization flags, so update
39378 the optimization options if necessary. */
39379 cl_target_option_save (&cur_target, &global_options);
39380 rs6000_cpu_index = rs6000_tune_index = -1;
39381 ret = rs6000_inner_target_options (args, true);
39383 /* Set up any additional state. */
39384 if (ret)
39386 ret = rs6000_option_override_internal (false);
39387 new_target = build_target_option_node (&global_options);
39389 else
39390 new_target = NULL;
39392 new_optimize = build_optimization_node (&global_options);
39394 if (!new_target)
39395 ret = false;
39397 else if (fndecl)
39399 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
39401 if (old_optimize != new_optimize)
39402 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
39405 cl_target_option_restore (&global_options, &cur_target);
39407 if (old_optimize != new_optimize)
39408 cl_optimization_restore (&global_options,
39409 TREE_OPTIMIZATION (old_optimize));
39411 return ret;
39415 /* Hook to validate the current #pragma GCC target and set the state, and
39416 update the macros based on what was changed. If ARGS is NULL, then
39417 POP_TARGET is used to reset the options. */
39419 bool
39420 rs6000_pragma_target_parse (tree args, tree pop_target)
39422 tree prev_tree = build_target_option_node (&global_options);
39423 tree cur_tree;
39424 struct cl_target_option *prev_opt, *cur_opt;
39425 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
39426 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
39428 if (TARGET_DEBUG_TARGET)
39430 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
39431 fprintf (stderr, "args:");
39432 rs6000_debug_target_options (args, " ");
39433 fprintf (stderr, "\n");
39435 if (pop_target)
39437 fprintf (stderr, "pop_target:\n");
39438 debug_tree (pop_target);
39440 else
39441 fprintf (stderr, "pop_target: <NULL>\n");
39443 fprintf (stderr, "--------------------\n");
39446 if (! args)
39448 cur_tree = ((pop_target)
39449 ? pop_target
39450 : target_option_default_node);
39451 cl_target_option_restore (&global_options,
39452 TREE_TARGET_OPTION (cur_tree));
39454 else
39456 rs6000_cpu_index = rs6000_tune_index = -1;
39457 if (!rs6000_inner_target_options (args, false)
39458 || !rs6000_option_override_internal (false)
39459 || (cur_tree = build_target_option_node (&global_options))
39460 == NULL_TREE)
39462 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
39463 fprintf (stderr, "invalid pragma\n");
39465 return false;
39469 target_option_current_node = cur_tree;
39471 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
39472 change the macros that are defined. */
39473 if (rs6000_target_modify_macros_ptr)
39475 prev_opt = TREE_TARGET_OPTION (prev_tree);
39476 prev_bumask = prev_opt->x_rs6000_builtin_mask;
39477 prev_flags = prev_opt->x_rs6000_isa_flags;
39479 cur_opt = TREE_TARGET_OPTION (cur_tree);
39480 cur_flags = cur_opt->x_rs6000_isa_flags;
39481 cur_bumask = cur_opt->x_rs6000_builtin_mask;
39483 diff_bumask = (prev_bumask ^ cur_bumask);
39484 diff_flags = (prev_flags ^ cur_flags);
39486 if ((diff_flags != 0) || (diff_bumask != 0))
39488 /* Delete old macros. */
39489 rs6000_target_modify_macros_ptr (false,
39490 prev_flags & diff_flags,
39491 prev_bumask & diff_bumask);
39493 /* Define new macros. */
39494 rs6000_target_modify_macros_ptr (true,
39495 cur_flags & diff_flags,
39496 cur_bumask & diff_bumask);
39500 return true;
39504 /* Remember the last target of rs6000_set_current_function. */
39505 static GTY(()) tree rs6000_previous_fndecl;
39507 /* Establish appropriate back-end context for processing the function
39508 FNDECL. The argument might be NULL to indicate processing at top
39509 level, outside of any function scope. */
39510 static void
39511 rs6000_set_current_function (tree fndecl)
39513 tree old_tree = (rs6000_previous_fndecl
39514 ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl)
39515 : NULL_TREE);
39517 tree new_tree = (fndecl
39518 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
39519 : NULL_TREE);
39521 if (TARGET_DEBUG_TARGET)
39523 bool print_final = false;
39524 fprintf (stderr, "\n==================== rs6000_set_current_function");
39526 if (fndecl)
39527 fprintf (stderr, ", fndecl %s (%p)",
39528 (DECL_NAME (fndecl)
39529 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
39530 : "<unknown>"), (void *)fndecl);
39532 if (rs6000_previous_fndecl)
39533 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
39535 fprintf (stderr, "\n");
39536 if (new_tree)
39538 fprintf (stderr, "\nnew fndecl target specific options:\n");
39539 debug_tree (new_tree);
39540 print_final = true;
39543 if (old_tree)
39545 fprintf (stderr, "\nold fndecl target specific options:\n");
39546 debug_tree (old_tree);
39547 print_final = true;
39550 if (print_final)
39551 fprintf (stderr, "--------------------\n");
39554 /* Only change the context if the function changes. This hook is called
39555 several times in the course of compiling a function, and we don't want to
39556 slow things down too much or call target_reinit when it isn't safe. */
39557 if (fndecl && fndecl != rs6000_previous_fndecl)
39559 rs6000_previous_fndecl = fndecl;
39560 if (old_tree == new_tree)
39563 else if (new_tree && new_tree != target_option_default_node)
39565 cl_target_option_restore (&global_options,
39566 TREE_TARGET_OPTION (new_tree));
39567 if (TREE_TARGET_GLOBALS (new_tree))
39568 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
39569 else
39570 TREE_TARGET_GLOBALS (new_tree)
39571 = save_target_globals_default_opts ();
39574 else if (old_tree && old_tree != target_option_default_node)
39576 new_tree = target_option_current_node;
39577 cl_target_option_restore (&global_options,
39578 TREE_TARGET_OPTION (new_tree));
39579 if (TREE_TARGET_GLOBALS (new_tree))
39580 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
39581 else if (new_tree == target_option_default_node)
39582 restore_target_globals (&default_target_globals);
39583 else
39584 TREE_TARGET_GLOBALS (new_tree)
39585 = save_target_globals_default_opts ();
39591 /* Save the current options */
39593 static void
39594 rs6000_function_specific_save (struct cl_target_option *ptr,
39595 struct gcc_options *opts)
39597 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
39598 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
39601 /* Restore the current options */
39603 static void
39604 rs6000_function_specific_restore (struct gcc_options *opts,
39605 struct cl_target_option *ptr)
39608 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
39609 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
39610 (void) rs6000_option_override_internal (false);
39613 /* Print the current options */
39615 static void
39616 rs6000_function_specific_print (FILE *file, int indent,
39617 struct cl_target_option *ptr)
39619 rs6000_print_isa_options (file, indent, "Isa options set",
39620 ptr->x_rs6000_isa_flags);
39622 rs6000_print_isa_options (file, indent, "Isa options explicit",
39623 ptr->x_rs6000_isa_flags_explicit);
39626 /* Helper function to print the current isa or misc options on a line. */
39628 static void
39629 rs6000_print_options_internal (FILE *file,
39630 int indent,
39631 const char *string,
39632 HOST_WIDE_INT flags,
39633 const char *prefix,
39634 const struct rs6000_opt_mask *opts,
39635 size_t num_elements)
39637 size_t i;
39638 size_t start_column = 0;
39639 size_t cur_column;
39640 size_t max_column = 120;
39641 size_t prefix_len = strlen (prefix);
39642 size_t comma_len = 0;
39643 const char *comma = "";
39645 if (indent)
39646 start_column += fprintf (file, "%*s", indent, "");
39648 if (!flags)
39650 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
39651 return;
39654 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
39656 /* Print the various mask options. */
39657 cur_column = start_column;
39658 for (i = 0; i < num_elements; i++)
39660 bool invert = opts[i].invert;
39661 const char *name = opts[i].name;
39662 const char *no_str = "";
39663 HOST_WIDE_INT mask = opts[i].mask;
39664 size_t len = comma_len + prefix_len + strlen (name);
39666 if (!invert)
39668 if ((flags & mask) == 0)
39670 no_str = "no-";
39671 len += sizeof ("no-") - 1;
39674 flags &= ~mask;
39677 else
39679 if ((flags & mask) != 0)
39681 no_str = "no-";
39682 len += sizeof ("no-") - 1;
39685 flags |= mask;
39688 cur_column += len;
39689 if (cur_column > max_column)
39691 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
39692 cur_column = start_column + len;
39693 comma = "";
39696 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
39697 comma = ", ";
39698 comma_len = sizeof (", ") - 1;
39701 fputs ("\n", file);
39704 /* Helper function to print the current isa options on a line. */
39706 static void
39707 rs6000_print_isa_options (FILE *file, int indent, const char *string,
39708 HOST_WIDE_INT flags)
39710 rs6000_print_options_internal (file, indent, string, flags, "-m",
39711 &rs6000_opt_masks[0],
39712 ARRAY_SIZE (rs6000_opt_masks));
39715 static void
39716 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
39717 HOST_WIDE_INT flags)
39719 rs6000_print_options_internal (file, indent, string, flags, "",
39720 &rs6000_builtin_mask_names[0],
39721 ARRAY_SIZE (rs6000_builtin_mask_names));
39725 /* Hook to determine if one function can safely inline another. */
39727 static bool
39728 rs6000_can_inline_p (tree caller, tree callee)
39730 bool ret = false;
39731 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
39732 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
39734 /* If callee has no option attributes, then it is ok to inline. */
39735 if (!callee_tree)
39736 ret = true;
39738 /* If caller has no option attributes, but callee does then it is not ok to
39739 inline. */
39740 else if (!caller_tree)
39741 ret = false;
39743 else
39745 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
39746 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
39748 /* Callee's options should a subset of the caller's, i.e. a vsx function
39749 can inline an altivec function but a non-vsx function can't inline a
39750 vsx function. */
39751 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
39752 == callee_opts->x_rs6000_isa_flags)
39753 ret = true;
39756 if (TARGET_DEBUG_TARGET)
39757 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
39758 (DECL_NAME (caller)
39759 ? IDENTIFIER_POINTER (DECL_NAME (caller))
39760 : "<unknown>"),
39761 (DECL_NAME (callee)
39762 ? IDENTIFIER_POINTER (DECL_NAME (callee))
39763 : "<unknown>"),
39764 (ret ? "can" : "cannot"));
39766 return ret;
39769 /* Allocate a stack temp and fixup the address so it meets the particular
39770 memory requirements (either offetable or REG+REG addressing). */
39773 rs6000_allocate_stack_temp (machine_mode mode,
39774 bool offsettable_p,
39775 bool reg_reg_p)
39777 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
39778 rtx addr = XEXP (stack, 0);
39779 int strict_p = (reload_in_progress || reload_completed);
39781 if (!legitimate_indirect_address_p (addr, strict_p))
39783 if (offsettable_p
39784 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
39785 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
39787 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
39788 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
39791 return stack;
39794 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
39795 to such a form to deal with memory reference instructions like STFIWX that
39796 only take reg+reg addressing. */
39799 rs6000_address_for_fpconvert (rtx x)
39801 int strict_p = (reload_in_progress || reload_completed);
39802 rtx addr;
39804 gcc_assert (MEM_P (x));
39805 addr = XEXP (x, 0);
39806 if (! legitimate_indirect_address_p (addr, strict_p)
39807 && ! legitimate_indexed_address_p (addr, strict_p))
39809 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
39811 rtx reg = XEXP (addr, 0);
39812 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
39813 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
39814 gcc_assert (REG_P (reg));
39815 emit_insn (gen_add3_insn (reg, reg, size_rtx));
39816 addr = reg;
39818 else if (GET_CODE (addr) == PRE_MODIFY)
39820 rtx reg = XEXP (addr, 0);
39821 rtx expr = XEXP (addr, 1);
39822 gcc_assert (REG_P (reg));
39823 gcc_assert (GET_CODE (expr) == PLUS);
39824 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
39825 addr = reg;
39828 x = replace_equiv_address (x, copy_addr_to_reg (addr));
39831 return x;
39834 /* Given a memory reference, if it is not in the form for altivec memory
39835 reference instructions (i.e. reg or reg+reg addressing with AND of -16),
39836 convert to the altivec format. */
39839 rs6000_address_for_altivec (rtx x)
39841 gcc_assert (MEM_P (x));
39842 if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x)))
39844 rtx addr = XEXP (x, 0);
39845 int strict_p = (reload_in_progress || reload_completed);
39847 if (!legitimate_indexed_address_p (addr, strict_p)
39848 && !legitimate_indirect_address_p (addr, strict_p))
39849 addr = copy_to_mode_reg (Pmode, addr);
39851 addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
39852 x = change_address (x, GET_MODE (x), addr);
39855 return x;
39858 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
39860 On the RS/6000, all integer constants are acceptable, most won't be valid
39861 for particular insns, though. Only easy FP constants are acceptable. */
39863 static bool
39864 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
39866 if (TARGET_ELF && tls_referenced_p (x))
39867 return false;
39869 return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR)
39870 || GET_MODE (x) == VOIDmode
39871 || (TARGET_POWERPC64 && mode == DImode)
39872 || easy_fp_constant (x, mode)
39873 || easy_vector_constant (x, mode));
39877 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
39879 static bool
39880 chain_already_loaded (rtx_insn *last)
39882 for (; last != NULL; last = PREV_INSN (last))
39884 if (NONJUMP_INSN_P (last))
39886 rtx patt = PATTERN (last);
39888 if (GET_CODE (patt) == SET)
39890 rtx lhs = XEXP (patt, 0);
39892 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
39893 return true;
39897 return false;
39900 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
39902 void
39903 rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
39905 const bool direct_call_p
39906 = GET_CODE (func_desc) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (func_desc);
39907 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
39908 rtx toc_load = NULL_RTX;
39909 rtx toc_restore = NULL_RTX;
39910 rtx func_addr;
39911 rtx abi_reg = NULL_RTX;
39912 rtx call[4];
39913 int n_call;
39914 rtx insn;
39916 /* Handle longcall attributes. */
39917 if (INTVAL (cookie) & CALL_LONG)
39918 func_desc = rs6000_longcall_ref (func_desc);
39920 /* Handle indirect calls. */
39921 if (GET_CODE (func_desc) != SYMBOL_REF
39922 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
39924 /* Save the TOC into its reserved slot before the call,
39925 and prepare to restore it after the call. */
39926 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
39927 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
39928 rtx stack_toc_mem = gen_frame_mem (Pmode,
39929 gen_rtx_PLUS (Pmode, stack_ptr,
39930 stack_toc_offset));
39931 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
39932 gen_rtvec (1, stack_toc_offset),
39933 UNSPEC_TOCSLOT);
39934 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
39936 /* Can we optimize saving the TOC in the prologue or
39937 do we need to do it at every call? */
39938 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
39939 cfun->machine->save_toc_in_prologue = true;
39940 else
39942 MEM_VOLATILE_P (stack_toc_mem) = 1;
39943 emit_move_insn (stack_toc_mem, toc_reg);
39946 if (DEFAULT_ABI == ABI_ELFv2)
39948 /* A function pointer in the ELFv2 ABI is just a plain address, but
39949 the ABI requires it to be loaded into r12 before the call. */
39950 func_addr = gen_rtx_REG (Pmode, 12);
39951 emit_move_insn (func_addr, func_desc);
39952 abi_reg = func_addr;
39954 else
39956 /* A function pointer under AIX is a pointer to a data area whose
39957 first word contains the actual address of the function, whose
39958 second word contains a pointer to its TOC, and whose third word
39959 contains a value to place in the static chain register (r11).
39960 Note that if we load the static chain, our "trampoline" need
39961 not have any executable code. */
39963 /* Load up address of the actual function. */
39964 func_desc = force_reg (Pmode, func_desc);
39965 func_addr = gen_reg_rtx (Pmode);
39966 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
39968 /* Prepare to load the TOC of the called function. Note that the
39969 TOC load must happen immediately before the actual call so
39970 that unwinding the TOC registers works correctly. See the
39971 comment in frob_update_context. */
39972 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
39973 rtx func_toc_mem = gen_rtx_MEM (Pmode,
39974 gen_rtx_PLUS (Pmode, func_desc,
39975 func_toc_offset));
39976 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
39978 /* If we have a static chain, load it up. But, if the call was
39979 originally direct, the 3rd word has not been written since no
39980 trampoline has been built, so we ought not to load it, lest we
39981 override a static chain value. */
39982 if (!direct_call_p
39983 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
39984 && !chain_already_loaded (get_current_sequence ()->next->last))
39986 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
39987 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
39988 rtx func_sc_mem = gen_rtx_MEM (Pmode,
39989 gen_rtx_PLUS (Pmode, func_desc,
39990 func_sc_offset));
39991 emit_move_insn (sc_reg, func_sc_mem);
39992 abi_reg = sc_reg;
39996 else
39998 /* Direct calls use the TOC: for local calls, the callee will
39999 assume the TOC register is set; for non-local calls, the
40000 PLT stub needs the TOC register. */
40001 abi_reg = toc_reg;
40002 func_addr = func_desc;
40005 /* Create the call. */
40006 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
40007 if (value != NULL_RTX)
40008 call[0] = gen_rtx_SET (value, call[0]);
40009 n_call = 1;
40011 if (toc_load)
40012 call[n_call++] = toc_load;
40013 if (toc_restore)
40014 call[n_call++] = toc_restore;
40016 call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
40018 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
40019 insn = emit_call_insn (insn);
40021 /* Mention all registers defined by the ABI to hold information
40022 as uses in CALL_INSN_FUNCTION_USAGE. */
40023 if (abi_reg)
40024 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
40027 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
40029 void
40030 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
40032 rtx call[2];
40033 rtx insn;
40035 gcc_assert (INTVAL (cookie) == 0);
40037 /* Create the call. */
40038 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
40039 if (value != NULL_RTX)
40040 call[0] = gen_rtx_SET (value, call[0]);
40042 call[1] = simple_return_rtx;
40044 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
40045 insn = emit_call_insn (insn);
40047 /* Note use of the TOC register. */
40048 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
40051 /* Return whether we need to always update the saved TOC pointer when we update
40052 the stack pointer. */
40054 static bool
40055 rs6000_save_toc_in_prologue_p (void)
40057 return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue);
40060 #ifdef HAVE_GAS_HIDDEN
40061 # define USE_HIDDEN_LINKONCE 1
40062 #else
40063 # define USE_HIDDEN_LINKONCE 0
40064 #endif
40066 /* Fills in the label name that should be used for a 476 link stack thunk. */
40068 void
40069 get_ppc476_thunk_name (char name[32])
40071 gcc_assert (TARGET_LINK_STACK);
40073 if (USE_HIDDEN_LINKONCE)
40074 sprintf (name, "__ppc476.get_thunk");
40075 else
40076 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
40079 /* This function emits the simple thunk routine that is used to preserve
40080 the link stack on the 476 cpu. */
40082 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
40083 static void
40084 rs6000_code_end (void)
40086 char name[32];
40087 tree decl;
40089 if (!TARGET_LINK_STACK)
40090 return;
40092 get_ppc476_thunk_name (name);
40094 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
40095 build_function_type_list (void_type_node, NULL_TREE));
40096 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
40097 NULL_TREE, void_type_node);
40098 TREE_PUBLIC (decl) = 1;
40099 TREE_STATIC (decl) = 1;
40101 #if RS6000_WEAK
40102 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
40104 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
40105 targetm.asm_out.unique_section (decl, 0);
40106 switch_to_section (get_named_section (decl, NULL, 0));
40107 DECL_WEAK (decl) = 1;
40108 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
40109 targetm.asm_out.globalize_label (asm_out_file, name);
40110 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
40111 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
40113 else
40114 #endif
40116 switch_to_section (text_section);
40117 ASM_OUTPUT_LABEL (asm_out_file, name);
40120 DECL_INITIAL (decl) = make_node (BLOCK);
40121 current_function_decl = decl;
40122 allocate_struct_function (decl, false);
40123 init_function_start (decl);
40124 first_function_block_is_cold = false;
40125 /* Make sure unwind info is emitted for the thunk if needed. */
40126 final_start_function (emit_barrier (), asm_out_file, 1);
40128 fputs ("\tblr\n", asm_out_file);
40130 final_end_function ();
40131 init_insn_lengths ();
40132 free_after_compilation (cfun);
40133 set_cfun (NULL);
40134 current_function_decl = NULL;
40137 /* Add r30 to hard reg set if the prologue sets it up and it is not
40138 pic_offset_table_rtx. */
40140 static void
40141 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
40143 if (!TARGET_SINGLE_PIC_BASE
40144 && TARGET_TOC
40145 && TARGET_MINIMAL_TOC
40146 && !constant_pool_empty_p ())
40147 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
40148 if (cfun->machine->split_stack_argp_used)
40149 add_to_hard_reg_set (&set->set, Pmode, 12);
40153 /* Helper function for rs6000_split_logical to emit a logical instruction after
40154 spliting the operation to single GPR registers.
40156 DEST is the destination register.
40157 OP1 and OP2 are the input source registers.
40158 CODE is the base operation (AND, IOR, XOR, NOT).
40159 MODE is the machine mode.
40160 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
40161 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
40162 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
40164 static void
40165 rs6000_split_logical_inner (rtx dest,
40166 rtx op1,
40167 rtx op2,
40168 enum rtx_code code,
40169 machine_mode mode,
40170 bool complement_final_p,
40171 bool complement_op1_p,
40172 bool complement_op2_p)
40174 rtx bool_rtx;
40176 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
40177 if (op2 && GET_CODE (op2) == CONST_INT
40178 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
40179 && !complement_final_p && !complement_op1_p && !complement_op2_p)
40181 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
40182 HOST_WIDE_INT value = INTVAL (op2) & mask;
40184 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
40185 if (code == AND)
40187 if (value == 0)
40189 emit_insn (gen_rtx_SET (dest, const0_rtx));
40190 return;
40193 else if (value == mask)
40195 if (!rtx_equal_p (dest, op1))
40196 emit_insn (gen_rtx_SET (dest, op1));
40197 return;
40201 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
40202 into separate ORI/ORIS or XORI/XORIS instrucitons. */
40203 else if (code == IOR || code == XOR)
40205 if (value == 0)
40207 if (!rtx_equal_p (dest, op1))
40208 emit_insn (gen_rtx_SET (dest, op1));
40209 return;
40214 if (code == AND && mode == SImode
40215 && !complement_final_p && !complement_op1_p && !complement_op2_p)
40217 emit_insn (gen_andsi3 (dest, op1, op2));
40218 return;
40221 if (complement_op1_p)
40222 op1 = gen_rtx_NOT (mode, op1);
40224 if (complement_op2_p)
40225 op2 = gen_rtx_NOT (mode, op2);
40227 /* For canonical RTL, if only one arm is inverted it is the first. */
40228 if (!complement_op1_p && complement_op2_p)
40229 std::swap (op1, op2);
40231 bool_rtx = ((code == NOT)
40232 ? gen_rtx_NOT (mode, op1)
40233 : gen_rtx_fmt_ee (code, mode, op1, op2));
40235 if (complement_final_p)
40236 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
40238 emit_insn (gen_rtx_SET (dest, bool_rtx));
40241 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
40242 operations are split immediately during RTL generation to allow for more
40243 optimizations of the AND/IOR/XOR.
40245 OPERANDS is an array containing the destination and two input operands.
40246 CODE is the base operation (AND, IOR, XOR, NOT).
40247 MODE is the machine mode.
40248 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
40249 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
40250 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
40251 CLOBBER_REG is either NULL or a scratch register of type CC to allow
40252 formation of the AND instructions. */
40254 static void
40255 rs6000_split_logical_di (rtx operands[3],
40256 enum rtx_code code,
40257 bool complement_final_p,
40258 bool complement_op1_p,
40259 bool complement_op2_p)
40261 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
40262 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
40263 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
40264 enum hi_lo { hi = 0, lo = 1 };
40265 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
40266 size_t i;
40268 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
40269 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
40270 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
40271 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
40273 if (code == NOT)
40274 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
40275 else
40277 if (GET_CODE (operands[2]) != CONST_INT)
40279 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
40280 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
40282 else
40284 HOST_WIDE_INT value = INTVAL (operands[2]);
40285 HOST_WIDE_INT value_hi_lo[2];
40287 gcc_assert (!complement_final_p);
40288 gcc_assert (!complement_op1_p);
40289 gcc_assert (!complement_op2_p);
40291 value_hi_lo[hi] = value >> 32;
40292 value_hi_lo[lo] = value & lower_32bits;
40294 for (i = 0; i < 2; i++)
40296 HOST_WIDE_INT sub_value = value_hi_lo[i];
40298 if (sub_value & sign_bit)
40299 sub_value |= upper_32bits;
40301 op2_hi_lo[i] = GEN_INT (sub_value);
40303 /* If this is an AND instruction, check to see if we need to load
40304 the value in a register. */
40305 if (code == AND && sub_value != -1 && sub_value != 0
40306 && !and_operand (op2_hi_lo[i], SImode))
40307 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
40312 for (i = 0; i < 2; i++)
40314 /* Split large IOR/XOR operations. */
40315 if ((code == IOR || code == XOR)
40316 && GET_CODE (op2_hi_lo[i]) == CONST_INT
40317 && !complement_final_p
40318 && !complement_op1_p
40319 && !complement_op2_p
40320 && !logical_const_operand (op2_hi_lo[i], SImode))
40322 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
40323 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
40324 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
40325 rtx tmp = gen_reg_rtx (SImode);
40327 /* Make sure the constant is sign extended. */
40328 if ((hi_16bits & sign_bit) != 0)
40329 hi_16bits |= upper_32bits;
40331 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
40332 code, SImode, false, false, false);
40334 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
40335 code, SImode, false, false, false);
40337 else
40338 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
40339 code, SImode, complement_final_p,
40340 complement_op1_p, complement_op2_p);
40343 return;
40346 /* Split the insns that make up boolean operations operating on multiple GPR
40347 registers. The boolean MD patterns ensure that the inputs either are
40348 exactly the same as the output registers, or there is no overlap.
40350 OPERANDS is an array containing the destination and two input operands.
40351 CODE is the base operation (AND, IOR, XOR, NOT).
40352 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
40353 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
40354 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
40356 void
40357 rs6000_split_logical (rtx operands[3],
40358 enum rtx_code code,
40359 bool complement_final_p,
40360 bool complement_op1_p,
40361 bool complement_op2_p)
40363 machine_mode mode = GET_MODE (operands[0]);
40364 machine_mode sub_mode;
40365 rtx op0, op1, op2;
40366 int sub_size, regno0, regno1, nregs, i;
40368 /* If this is DImode, use the specialized version that can run before
40369 register allocation. */
40370 if (mode == DImode && !TARGET_POWERPC64)
40372 rs6000_split_logical_di (operands, code, complement_final_p,
40373 complement_op1_p, complement_op2_p);
40374 return;
40377 op0 = operands[0];
40378 op1 = operands[1];
40379 op2 = (code == NOT) ? NULL_RTX : operands[2];
40380 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
40381 sub_size = GET_MODE_SIZE (sub_mode);
40382 regno0 = REGNO (op0);
40383 regno1 = REGNO (op1);
40385 gcc_assert (reload_completed);
40386 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
40387 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
40389 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
40390 gcc_assert (nregs > 1);
40392 if (op2 && REG_P (op2))
40393 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
40395 for (i = 0; i < nregs; i++)
40397 int offset = i * sub_size;
40398 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
40399 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
40400 rtx sub_op2 = ((code == NOT)
40401 ? NULL_RTX
40402 : simplify_subreg (sub_mode, op2, mode, offset));
40404 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
40405 complement_final_p, complement_op1_p,
40406 complement_op2_p);
40409 return;
40413 /* Return true if the peephole2 can combine a load involving a combination of
40414 an addis instruction and a load with an offset that can be fused together on
40415 a power8. */
40417 bool
40418 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
40419 rtx addis_value, /* addis value. */
40420 rtx target, /* target register that is loaded. */
40421 rtx mem) /* bottom part of the memory addr. */
40423 rtx addr;
40424 rtx base_reg;
40426 /* Validate arguments. */
40427 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
40428 return false;
40430 if (!base_reg_operand (target, GET_MODE (target)))
40431 return false;
40433 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
40434 return false;
40436 /* Allow sign/zero extension. */
40437 if (GET_CODE (mem) == ZERO_EXTEND
40438 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
40439 mem = XEXP (mem, 0);
40441 if (!MEM_P (mem))
40442 return false;
40444 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
40445 return false;
40447 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
40448 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
40449 return false;
40451 /* Validate that the register used to load the high value is either the
40452 register being loaded, or we can safely replace its use.
40454 This function is only called from the peephole2 pass and we assume that
40455 there are 2 instructions in the peephole (addis and load), so we want to
40456 check if the target register was not used in the memory address and the
40457 register to hold the addis result is dead after the peephole. */
40458 if (REGNO (addis_reg) != REGNO (target))
40460 if (reg_mentioned_p (target, mem))
40461 return false;
40463 if (!peep2_reg_dead_p (2, addis_reg))
40464 return false;
40466 /* If the target register being loaded is the stack pointer, we must
40467 avoid loading any other value into it, even temporarily. */
40468 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
40469 return false;
40472 base_reg = XEXP (addr, 0);
40473 return REGNO (addis_reg) == REGNO (base_reg);
40476 /* During the peephole2 pass, adjust and expand the insns for a load fusion
40477 sequence. We adjust the addis register to use the target register. If the
40478 load sign extends, we adjust the code to do the zero extending load, and an
40479 explicit sign extension later since the fusion only covers zero extending
40480 loads.
40482 The operands are:
40483 operands[0] register set with addis (to be replaced with target)
40484 operands[1] value set via addis
40485 operands[2] target register being loaded
40486 operands[3] D-form memory reference using operands[0]. */
40488 void
40489 expand_fusion_gpr_load (rtx *operands)
40491 rtx addis_value = operands[1];
40492 rtx target = operands[2];
40493 rtx orig_mem = operands[3];
40494 rtx new_addr, new_mem, orig_addr, offset;
40495 enum rtx_code plus_or_lo_sum;
40496 machine_mode target_mode = GET_MODE (target);
40497 machine_mode extend_mode = target_mode;
40498 machine_mode ptr_mode = Pmode;
40499 enum rtx_code extend = UNKNOWN;
40501 if (GET_CODE (orig_mem) == ZERO_EXTEND
40502 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
40504 extend = GET_CODE (orig_mem);
40505 orig_mem = XEXP (orig_mem, 0);
40506 target_mode = GET_MODE (orig_mem);
40509 gcc_assert (MEM_P (orig_mem));
40511 orig_addr = XEXP (orig_mem, 0);
40512 plus_or_lo_sum = GET_CODE (orig_addr);
40513 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
40515 offset = XEXP (orig_addr, 1);
40516 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
40517 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
40519 if (extend != UNKNOWN)
40520 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
40522 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
40523 UNSPEC_FUSION_GPR);
40524 emit_insn (gen_rtx_SET (target, new_mem));
40526 if (extend == SIGN_EXTEND)
40528 int sub_off = ((BYTES_BIG_ENDIAN)
40529 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
40530 : 0);
40531 rtx sign_reg
40532 = simplify_subreg (target_mode, target, extend_mode, sub_off);
40534 emit_insn (gen_rtx_SET (target,
40535 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
40538 return;
40541 /* Emit the addis instruction that will be part of a fused instruction
40542 sequence. */
40544 void
40545 emit_fusion_addis (rtx target, rtx addis_value, const char *comment,
40546 const char *mode_name)
40548 rtx fuse_ops[10];
40549 char insn_template[80];
40550 const char *addis_str = NULL;
40551 const char *comment_str = ASM_COMMENT_START;
40553 if (*comment_str == ' ')
40554 comment_str++;
40556 /* Emit the addis instruction. */
40557 fuse_ops[0] = target;
40558 if (satisfies_constraint_L (addis_value))
40560 fuse_ops[1] = addis_value;
40561 addis_str = "lis %0,%v1";
40564 else if (GET_CODE (addis_value) == PLUS)
40566 rtx op0 = XEXP (addis_value, 0);
40567 rtx op1 = XEXP (addis_value, 1);
40569 if (REG_P (op0) && CONST_INT_P (op1)
40570 && satisfies_constraint_L (op1))
40572 fuse_ops[1] = op0;
40573 fuse_ops[2] = op1;
40574 addis_str = "addis %0,%1,%v2";
40578 else if (GET_CODE (addis_value) == HIGH)
40580 rtx value = XEXP (addis_value, 0);
40581 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
40583 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
40584 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
40585 if (TARGET_ELF)
40586 addis_str = "addis %0,%2,%1@toc@ha";
40588 else if (TARGET_XCOFF)
40589 addis_str = "addis %0,%1@u(%2)";
40591 else
40592 gcc_unreachable ();
40595 else if (GET_CODE (value) == PLUS)
40597 rtx op0 = XEXP (value, 0);
40598 rtx op1 = XEXP (value, 1);
40600 if (GET_CODE (op0) == UNSPEC
40601 && XINT (op0, 1) == UNSPEC_TOCREL
40602 && CONST_INT_P (op1))
40604 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
40605 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
40606 fuse_ops[3] = op1;
40607 if (TARGET_ELF)
40608 addis_str = "addis %0,%2,%1+%3@toc@ha";
40610 else if (TARGET_XCOFF)
40611 addis_str = "addis %0,%1+%3@u(%2)";
40613 else
40614 gcc_unreachable ();
40618 else if (satisfies_constraint_L (value))
40620 fuse_ops[1] = value;
40621 addis_str = "lis %0,%v1";
40624 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
40626 fuse_ops[1] = value;
40627 addis_str = "lis %0,%1@ha";
40631 if (!addis_str)
40632 fatal_insn ("Could not generate addis value for fusion", addis_value);
40634 sprintf (insn_template, "%s\t\t%s %s, type %s", addis_str, comment_str,
40635 comment, mode_name);
40636 output_asm_insn (insn_template, fuse_ops);
40639 /* Emit a D-form load or store instruction that is the second instruction
40640 of a fusion sequence. */
40642 void
40643 emit_fusion_load_store (rtx load_store_reg, rtx addis_reg, rtx offset,
40644 const char *insn_str)
40646 rtx fuse_ops[10];
40647 char insn_template[80];
40649 fuse_ops[0] = load_store_reg;
40650 fuse_ops[1] = addis_reg;
40652 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
40654 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
40655 fuse_ops[2] = offset;
40656 output_asm_insn (insn_template, fuse_ops);
40659 else if (GET_CODE (offset) == UNSPEC
40660 && XINT (offset, 1) == UNSPEC_TOCREL)
40662 if (TARGET_ELF)
40663 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
40665 else if (TARGET_XCOFF)
40666 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
40668 else
40669 gcc_unreachable ();
40671 fuse_ops[2] = XVECEXP (offset, 0, 0);
40672 output_asm_insn (insn_template, fuse_ops);
40675 else if (GET_CODE (offset) == PLUS
40676 && GET_CODE (XEXP (offset, 0)) == UNSPEC
40677 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
40678 && CONST_INT_P (XEXP (offset, 1)))
40680 rtx tocrel_unspec = XEXP (offset, 0);
40681 if (TARGET_ELF)
40682 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
40684 else if (TARGET_XCOFF)
40685 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
40687 else
40688 gcc_unreachable ();
40690 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
40691 fuse_ops[3] = XEXP (offset, 1);
40692 output_asm_insn (insn_template, fuse_ops);
40695 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
40697 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
40699 fuse_ops[2] = offset;
40700 output_asm_insn (insn_template, fuse_ops);
40703 else
40704 fatal_insn ("Unable to generate load/store offset for fusion", offset);
40706 return;
40709 /* Wrap a TOC address that can be fused to indicate that special fusion
40710 processing is needed. */
40713 fusion_wrap_memory_address (rtx old_mem)
40715 rtx old_addr = XEXP (old_mem, 0);
40716 rtvec v = gen_rtvec (1, old_addr);
40717 rtx new_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_FUSION_ADDIS);
40718 return replace_equiv_address_nv (old_mem, new_addr, false);
40721 /* Given an address, convert it into the addis and load offset parts. Addresses
40722 created during the peephole2 process look like:
40723 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
40724 (unspec [(...)] UNSPEC_TOCREL))
40726 Addresses created via toc fusion look like:
40727 (unspec [(unspec [(...)] UNSPEC_TOCREL)] UNSPEC_FUSION_ADDIS)) */
40729 static void
40730 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
40732 rtx hi, lo;
40734 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_FUSION_ADDIS)
40736 lo = XVECEXP (addr, 0, 0);
40737 hi = gen_rtx_HIGH (Pmode, lo);
40739 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
40741 hi = XEXP (addr, 0);
40742 lo = XEXP (addr, 1);
40744 else
40745 gcc_unreachable ();
40747 *p_hi = hi;
40748 *p_lo = lo;
40751 /* Return a string to fuse an addis instruction with a gpr load to the same
40752 register that we loaded up the addis instruction. The address that is used
40753 is the logical address that was formed during peephole2:
40754 (lo_sum (high) (low-part))
40756 Or the address is the TOC address that is wrapped before register allocation:
40757 (unspec [(addr) (toc-reg)] UNSPEC_FUSION_ADDIS)
40759 The code is complicated, so we call output_asm_insn directly, and just
40760 return "". */
40762 const char *
40763 emit_fusion_gpr_load (rtx target, rtx mem)
40765 rtx addis_value;
40766 rtx addr;
40767 rtx load_offset;
40768 const char *load_str = NULL;
40769 const char *mode_name = NULL;
40770 machine_mode mode;
40772 if (GET_CODE (mem) == ZERO_EXTEND)
40773 mem = XEXP (mem, 0);
40775 gcc_assert (REG_P (target) && MEM_P (mem));
40777 addr = XEXP (mem, 0);
40778 fusion_split_address (addr, &addis_value, &load_offset);
40780 /* Now emit the load instruction to the same register. */
40781 mode = GET_MODE (mem);
40782 switch (mode)
40784 case QImode:
40785 mode_name = "char";
40786 load_str = "lbz";
40787 break;
40789 case HImode:
40790 mode_name = "short";
40791 load_str = "lhz";
40792 break;
40794 case SImode:
40795 case SFmode:
40796 mode_name = (mode == SFmode) ? "float" : "int";
40797 load_str = "lwz";
40798 break;
40800 case DImode:
40801 case DFmode:
40802 gcc_assert (TARGET_POWERPC64);
40803 mode_name = (mode == DFmode) ? "double" : "long";
40804 load_str = "ld";
40805 break;
40807 default:
40808 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
40811 /* Emit the addis instruction. */
40812 emit_fusion_addis (target, addis_value, "gpr load fusion", mode_name);
40814 /* Emit the D-form load instruction. */
40815 emit_fusion_load_store (target, target, load_offset, load_str);
40817 return "";
40821 /* Return true if the peephole2 can combine a load/store involving a
40822 combination of an addis instruction and the memory operation. This was
40823 added to the ISA 3.0 (power9) hardware. */
40825 bool
40826 fusion_p9_p (rtx addis_reg, /* register set via addis. */
40827 rtx addis_value, /* addis value. */
40828 rtx dest, /* destination (memory or register). */
40829 rtx src) /* source (register or memory). */
40831 rtx addr, mem, offset;
40832 enum machine_mode mode = GET_MODE (src);
40834 /* Validate arguments. */
40835 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
40836 return false;
40838 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
40839 return false;
40841 /* Ignore extend operations that are part of the load. */
40842 if (GET_CODE (src) == FLOAT_EXTEND || GET_CODE (src) == ZERO_EXTEND)
40843 src = XEXP (src, 0);
40845 /* Test for memory<-register or register<-memory. */
40846 if (fpr_reg_operand (src, mode) || int_reg_operand (src, mode))
40848 if (!MEM_P (dest))
40849 return false;
40851 mem = dest;
40854 else if (MEM_P (src))
40856 if (!fpr_reg_operand (dest, mode) && !int_reg_operand (dest, mode))
40857 return false;
40859 mem = src;
40862 else
40863 return false;
40865 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
40866 if (GET_CODE (addr) == PLUS)
40868 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
40869 return false;
40871 return satisfies_constraint_I (XEXP (addr, 1));
40874 else if (GET_CODE (addr) == LO_SUM)
40876 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
40877 return false;
40879 offset = XEXP (addr, 1);
40880 if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
40881 return small_toc_ref (offset, GET_MODE (offset));
40883 else if (TARGET_ELF && !TARGET_POWERPC64)
40884 return CONSTANT_P (offset);
40887 return false;
40890 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
40891 load sequence.
40893 The operands are:
40894 operands[0] register set with addis
40895 operands[1] value set via addis
40896 operands[2] target register being loaded
40897 operands[3] D-form memory reference using operands[0].
40899 This is similar to the fusion introduced with power8, except it scales to
40900 both loads/stores and does not require the result register to be the same as
40901 the base register. At the moment, we only do this if register set with addis
40902 is dead. */
40904 void
40905 expand_fusion_p9_load (rtx *operands)
40907 rtx tmp_reg = operands[0];
40908 rtx addis_value = operands[1];
40909 rtx target = operands[2];
40910 rtx orig_mem = operands[3];
40911 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn;
40912 enum rtx_code plus_or_lo_sum;
40913 machine_mode target_mode = GET_MODE (target);
40914 machine_mode extend_mode = target_mode;
40915 machine_mode ptr_mode = Pmode;
40916 enum rtx_code extend = UNKNOWN;
40918 if (GET_CODE (orig_mem) == FLOAT_EXTEND || GET_CODE (orig_mem) == ZERO_EXTEND)
40920 extend = GET_CODE (orig_mem);
40921 orig_mem = XEXP (orig_mem, 0);
40922 target_mode = GET_MODE (orig_mem);
40925 gcc_assert (MEM_P (orig_mem));
40927 orig_addr = XEXP (orig_mem, 0);
40928 plus_or_lo_sum = GET_CODE (orig_addr);
40929 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
40931 offset = XEXP (orig_addr, 1);
40932 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
40933 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
40935 if (extend != UNKNOWN)
40936 new_mem = gen_rtx_fmt_e (extend, extend_mode, new_mem);
40938 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
40939 UNSPEC_FUSION_P9);
40941 set = gen_rtx_SET (target, new_mem);
40942 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
40943 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
40944 emit_insn (insn);
40946 return;
40949 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
40950 store sequence.
40952 The operands are:
40953 operands[0] register set with addis
40954 operands[1] value set via addis
40955 operands[2] target D-form memory being stored to
40956 operands[3] register being stored
40958 This is similar to the fusion introduced with power8, except it scales to
40959 both loads/stores and does not require the result register to be the same as
40960 the base register. At the moment, we only do this if register set with addis
40961 is dead. */
40963 void
40964 expand_fusion_p9_store (rtx *operands)
40966 rtx tmp_reg = operands[0];
40967 rtx addis_value = operands[1];
40968 rtx orig_mem = operands[2];
40969 rtx src = operands[3];
40970 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn, new_src;
40971 enum rtx_code plus_or_lo_sum;
40972 machine_mode target_mode = GET_MODE (orig_mem);
40973 machine_mode ptr_mode = Pmode;
40975 gcc_assert (MEM_P (orig_mem));
40977 orig_addr = XEXP (orig_mem, 0);
40978 plus_or_lo_sum = GET_CODE (orig_addr);
40979 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
40981 offset = XEXP (orig_addr, 1);
40982 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
40983 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
40985 new_src = gen_rtx_UNSPEC (target_mode, gen_rtvec (1, src),
40986 UNSPEC_FUSION_P9);
40988 set = gen_rtx_SET (new_mem, new_src);
40989 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
40990 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
40991 emit_insn (insn);
40993 return;
40996 /* Return a string to fuse an addis instruction with a load using extended
40997 fusion. The address that is used is the logical address that was formed
40998 during peephole2: (lo_sum (high) (low-part))
41000 The code is complicated, so we call output_asm_insn directly, and just
41001 return "". */
41003 const char *
41004 emit_fusion_p9_load (rtx reg, rtx mem, rtx tmp_reg)
41006 enum machine_mode mode = GET_MODE (reg);
41007 rtx hi;
41008 rtx lo;
41009 rtx addr;
41010 const char *load_string;
41011 int r;
41013 if (GET_CODE (mem) == FLOAT_EXTEND || GET_CODE (mem) == ZERO_EXTEND)
41015 mem = XEXP (mem, 0);
41016 mode = GET_MODE (mem);
41019 if (GET_CODE (reg) == SUBREG)
41021 gcc_assert (SUBREG_BYTE (reg) == 0);
41022 reg = SUBREG_REG (reg);
41025 if (!REG_P (reg))
41026 fatal_insn ("emit_fusion_p9_load, bad reg #1", reg);
41028 r = REGNO (reg);
41029 if (FP_REGNO_P (r))
41031 if (mode == SFmode)
41032 load_string = "lfs";
41033 else if (mode == DFmode || mode == DImode)
41034 load_string = "lfd";
41035 else
41036 gcc_unreachable ();
41038 else if (ALTIVEC_REGNO_P (r) && TARGET_P9_DFORM_SCALAR)
41040 if (mode == SFmode)
41041 load_string = "lxssp";
41042 else if (mode == DFmode || mode == DImode)
41043 load_string = "lxsd";
41044 else
41045 gcc_unreachable ();
41047 else if (INT_REGNO_P (r))
41049 switch (mode)
41051 case QImode:
41052 load_string = "lbz";
41053 break;
41054 case HImode:
41055 load_string = "lhz";
41056 break;
41057 case SImode:
41058 case SFmode:
41059 load_string = "lwz";
41060 break;
41061 case DImode:
41062 case DFmode:
41063 if (!TARGET_POWERPC64)
41064 gcc_unreachable ();
41065 load_string = "ld";
41066 break;
41067 default:
41068 gcc_unreachable ();
41071 else
41072 fatal_insn ("emit_fusion_p9_load, bad reg #2", reg);
41074 if (!MEM_P (mem))
41075 fatal_insn ("emit_fusion_p9_load not MEM", mem);
41077 addr = XEXP (mem, 0);
41078 fusion_split_address (addr, &hi, &lo);
41080 /* Emit the addis instruction. */
41081 emit_fusion_addis (tmp_reg, hi, "power9 load fusion", GET_MODE_NAME (mode));
41083 /* Emit the D-form load instruction. */
41084 emit_fusion_load_store (reg, tmp_reg, lo, load_string);
41086 return "";
41089 /* Return a string to fuse an addis instruction with a store using extended
41090 fusion. The address that is used is the logical address that was formed
41091 during peephole2: (lo_sum (high) (low-part))
41093 The code is complicated, so we call output_asm_insn directly, and just
41094 return "". */
41096 const char *
41097 emit_fusion_p9_store (rtx mem, rtx reg, rtx tmp_reg)
41099 enum machine_mode mode = GET_MODE (reg);
41100 rtx hi;
41101 rtx lo;
41102 rtx addr;
41103 const char *store_string;
41104 int r;
41106 if (GET_CODE (reg) == SUBREG)
41108 gcc_assert (SUBREG_BYTE (reg) == 0);
41109 reg = SUBREG_REG (reg);
41112 if (!REG_P (reg))
41113 fatal_insn ("emit_fusion_p9_store, bad reg #1", reg);
41115 r = REGNO (reg);
41116 if (FP_REGNO_P (r))
41118 if (mode == SFmode)
41119 store_string = "stfs";
41120 else if (mode == DFmode)
41121 store_string = "stfd";
41122 else
41123 gcc_unreachable ();
41125 else if (ALTIVEC_REGNO_P (r) && TARGET_P9_DFORM_SCALAR)
41127 if (mode == SFmode)
41128 store_string = "stxssp";
41129 else if (mode == DFmode || mode == DImode)
41130 store_string = "stxsd";
41131 else
41132 gcc_unreachable ();
41134 else if (INT_REGNO_P (r))
41136 switch (mode)
41138 case QImode:
41139 store_string = "stb";
41140 break;
41141 case HImode:
41142 store_string = "sth";
41143 break;
41144 case SImode:
41145 case SFmode:
41146 store_string = "stw";
41147 break;
41148 case DImode:
41149 case DFmode:
41150 if (!TARGET_POWERPC64)
41151 gcc_unreachable ();
41152 store_string = "std";
41153 break;
41154 default:
41155 gcc_unreachable ();
41158 else
41159 fatal_insn ("emit_fusion_p9_store, bad reg #2", reg);
41161 if (!MEM_P (mem))
41162 fatal_insn ("emit_fusion_p9_store not MEM", mem);
41164 addr = XEXP (mem, 0);
41165 fusion_split_address (addr, &hi, &lo);
41167 /* Emit the addis instruction. */
41168 emit_fusion_addis (tmp_reg, hi, "power9 store fusion", GET_MODE_NAME (mode));
41170 /* Emit the D-form load instruction. */
41171 emit_fusion_load_store (reg, tmp_reg, lo, store_string);
41173 return "";
41177 /* Analyze vector computations and remove unnecessary doubleword
41178 swaps (xxswapdi instructions). This pass is performed only
41179 for little-endian VSX code generation.
41181 For this specific case, loads and stores of 4x32 and 2x64 vectors
41182 are inefficient. These are implemented using the lvx2dx and
41183 stvx2dx instructions, which invert the order of doublewords in
41184 a vector register. Thus the code generation inserts an xxswapdi
41185 after each such load, and prior to each such store. (For spill
41186 code after register assignment, an additional xxswapdi is inserted
41187 following each store in order to return a hard register to its
41188 unpermuted value.)
41190 The extra xxswapdi instructions reduce performance. This can be
41191 particularly bad for vectorized code. The purpose of this pass
41192 is to reduce the number of xxswapdi instructions required for
41193 correctness.
41195 The primary insight is that much code that operates on vectors
41196 does not care about the relative order of elements in a register,
41197 so long as the correct memory order is preserved. If we have
41198 a computation where all input values are provided by lvxd2x/xxswapdi
41199 sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
41200 and all intermediate computations are pure SIMD (independent of
41201 element order), then all the xxswapdi's associated with the loads
41202 and stores may be removed.
41204 This pass uses some of the infrastructure and logical ideas from
41205 the "web" pass in web.c. We create maximal webs of computations
41206 fitting the description above using union-find. Each such web is
41207 then optimized by removing its unnecessary xxswapdi instructions.
41209 The pass is placed prior to global optimization so that we can
41210 perform the optimization in the safest and simplest way possible;
41211 that is, by replacing each xxswapdi insn with a register copy insn.
41212 Subsequent forward propagation will remove copies where possible.
41214 There are some operations sensitive to element order for which we
41215 can still allow the operation, provided we modify those operations.
41216 These include CONST_VECTORs, for which we must swap the first and
41217 second halves of the constant vector; and SUBREGs, for which we
41218 must adjust the byte offset to account for the swapped doublewords.
41219 A remaining opportunity would be non-immediate-form splats, for
41220 which we should adjust the selected lane of the input. We should
41221 also make code generation adjustments for sum-across operations,
41222 since this is a common vectorizer reduction.
41224 Because we run prior to the first split, we can see loads and stores
41225 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
41226 vector loads and stores that have not yet been split into a permuting
41227 load/store and a swap. (One way this can happen is with a builtin
41228 call to vec_vsx_{ld,st}.) We can handle these as well, but rather
41229 than deleting a swap, we convert the load/store into a permuting
41230 load/store (which effectively removes the swap). */
41232 /* Notes on Permutes
41234 We do not currently handle computations that contain permutes. There
41235 is a general transformation that can be performed correctly, but it
41236 may introduce more expensive code than it replaces. To handle these
41237 would require a cost model to determine when to perform the optimization.
41238 This commentary records how this could be done if desired.
41240 The most general permute is something like this (example for V16QI):
41242 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
41243 (parallel [(const_int a0) (const_int a1)
41245 (const_int a14) (const_int a15)]))
41247 where a0,...,a15 are in [0,31] and select elements from op1 and op2
41248 to produce in the result.
41250 Regardless of mode, we can convert the PARALLEL to a mask of 16
41251 byte-element selectors. Let's call this M, with M[i] representing
41252 the ith byte-element selector value. Then if we swap doublewords
41253 throughout the computation, we can get correct behavior by replacing
41254 M with M' as follows:
41256 M'[i] = { (M[i]+8)%16 : M[i] in [0,15]
41257 { ((M[i]+8)%16)+16 : M[i] in [16,31]
41259 This seems promising at first, since we are just replacing one mask
41260 with another. But certain masks are preferable to others. If M
41261 is a mask that matches a vmrghh pattern, for example, M' certainly
41262 will not. Instead of a single vmrghh, we would generate a load of
41263 M' and a vperm. So we would need to know how many xxswapd's we can
41264 remove as a result of this transformation to determine if it's
41265 profitable; and preferably the logic would need to be aware of all
41266 the special preferable masks.
41268 Another form of permute is an UNSPEC_VPERM, in which the mask is
41269 already in a register. In some cases, this mask may be a constant
41270 that we can discover with ud-chains, in which case the above
41271 transformation is ok. However, the common usage here is for the
41272 mask to be produced by an UNSPEC_LVSL, in which case the mask
41273 cannot be known at compile time. In such a case we would have to
41274 generate several instructions to compute M' as above at run time,
41275 and a cost model is needed again.
41277 However, when the mask M for an UNSPEC_VPERM is loaded from the
41278 constant pool, we can replace M with M' as above at no cost
41279 beyond adding a constant pool entry. */
41281 /* This is based on the union-find logic in web.c. web_entry_base is
41282 defined in df.h. */
41283 class swap_web_entry : public web_entry_base
41285 public:
41286 /* Pointer to the insn. */
41287 rtx_insn *insn;
41288 /* Set if insn contains a mention of a vector register. All other
41289 fields are undefined if this field is unset. */
41290 unsigned int is_relevant : 1;
41291 /* Set if insn is a load. */
41292 unsigned int is_load : 1;
41293 /* Set if insn is a store. */
41294 unsigned int is_store : 1;
41295 /* Set if insn is a doubleword swap. This can either be a register swap
41296 or a permuting load or store (test is_load and is_store for this). */
41297 unsigned int is_swap : 1;
41298 /* Set if the insn has a live-in use of a parameter register. */
41299 unsigned int is_live_in : 1;
41300 /* Set if the insn has a live-out def of a return register. */
41301 unsigned int is_live_out : 1;
41302 /* Set if the insn contains a subreg reference of a vector register. */
41303 unsigned int contains_subreg : 1;
41304 /* Set if the insn contains a 128-bit integer operand. */
41305 unsigned int is_128_int : 1;
41306 /* Set if this is a call-insn. */
41307 unsigned int is_call : 1;
41308 /* Set if this insn does not perform a vector operation for which
41309 element order matters, or if we know how to fix it up if it does.
41310 Undefined if is_swap is set. */
41311 unsigned int is_swappable : 1;
41312 /* A nonzero value indicates what kind of special handling for this
41313 insn is required if doublewords are swapped. Undefined if
41314 is_swappable is not set. */
41315 unsigned int special_handling : 4;
41316 /* Set if the web represented by this entry cannot be optimized. */
41317 unsigned int web_not_optimizable : 1;
41318 /* Set if this insn should be deleted. */
41319 unsigned int will_delete : 1;
41322 enum special_handling_values {
41323 SH_NONE = 0,
41324 SH_CONST_VECTOR,
41325 SH_SUBREG,
41326 SH_NOSWAP_LD,
41327 SH_NOSWAP_ST,
41328 SH_EXTRACT,
41329 SH_SPLAT,
41330 SH_XXPERMDI,
41331 SH_CONCAT,
41332 SH_VPERM
41335 /* Union INSN with all insns containing definitions that reach USE.
41336 Detect whether USE is live-in to the current function. */
41337 static void
41338 union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
41340 struct df_link *link = DF_REF_CHAIN (use);
41342 if (!link)
41343 insn_entry[INSN_UID (insn)].is_live_in = 1;
41345 while (link)
41347 if (DF_REF_IS_ARTIFICIAL (link->ref))
41348 insn_entry[INSN_UID (insn)].is_live_in = 1;
41350 if (DF_REF_INSN_INFO (link->ref))
41352 rtx def_insn = DF_REF_INSN (link->ref);
41353 (void)unionfind_union (insn_entry + INSN_UID (insn),
41354 insn_entry + INSN_UID (def_insn));
41357 link = link->next;
41361 /* Union INSN with all insns containing uses reached from DEF.
41362 Detect whether DEF is live-out from the current function. */
41363 static void
41364 union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
41366 struct df_link *link = DF_REF_CHAIN (def);
41368 if (!link)
41369 insn_entry[INSN_UID (insn)].is_live_out = 1;
41371 while (link)
41373 /* This could be an eh use or some other artificial use;
41374 we treat these all the same (killing the optimization). */
41375 if (DF_REF_IS_ARTIFICIAL (link->ref))
41376 insn_entry[INSN_UID (insn)].is_live_out = 1;
41378 if (DF_REF_INSN_INFO (link->ref))
41380 rtx use_insn = DF_REF_INSN (link->ref);
41381 (void)unionfind_union (insn_entry + INSN_UID (insn),
41382 insn_entry + INSN_UID (use_insn));
41385 link = link->next;
41389 /* Return 1 iff INSN is a load insn, including permuting loads that
41390 represent an lvxd2x instruction; else return 0. */
41391 static unsigned int
41392 insn_is_load_p (rtx insn)
41394 rtx body = PATTERN (insn);
41396 if (GET_CODE (body) == SET)
41398 if (GET_CODE (SET_SRC (body)) == MEM)
41399 return 1;
41401 if (GET_CODE (SET_SRC (body)) == VEC_SELECT
41402 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM)
41403 return 1;
41405 return 0;
41408 if (GET_CODE (body) != PARALLEL)
41409 return 0;
41411 rtx set = XVECEXP (body, 0, 0);
41413 if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM)
41414 return 1;
41416 return 0;
41419 /* Return 1 iff INSN is a store insn, including permuting stores that
41420 represent an stvxd2x instruction; else return 0. */
41421 static unsigned int
41422 insn_is_store_p (rtx insn)
41424 rtx body = PATTERN (insn);
41425 if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM)
41426 return 1;
41427 if (GET_CODE (body) != PARALLEL)
41428 return 0;
41429 rtx set = XVECEXP (body, 0, 0);
41430 if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM)
41431 return 1;
41432 return 0;
41435 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
41436 a permuting load, or a permuting store. */
41437 static unsigned int
41438 insn_is_swap_p (rtx insn)
41440 rtx body = PATTERN (insn);
41441 if (GET_CODE (body) != SET)
41442 return 0;
41443 rtx rhs = SET_SRC (body);
41444 if (GET_CODE (rhs) != VEC_SELECT)
41445 return 0;
41446 rtx parallel = XEXP (rhs, 1);
41447 if (GET_CODE (parallel) != PARALLEL)
41448 return 0;
41449 unsigned int len = XVECLEN (parallel, 0);
41450 if (len != 2 && len != 4 && len != 8 && len != 16)
41451 return 0;
41452 for (unsigned int i = 0; i < len / 2; ++i)
41454 rtx op = XVECEXP (parallel, 0, i);
41455 if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i)
41456 return 0;
41458 for (unsigned int i = len / 2; i < len; ++i)
41460 rtx op = XVECEXP (parallel, 0, i);
41461 if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2)
41462 return 0;
41464 return 1;
41467 /* Return TRUE if insn is a swap fed by a load from the constant pool. */
41468 static bool
41469 const_load_sequence_p (swap_web_entry *insn_entry, rtx insn)
41471 unsigned uid = INSN_UID (insn);
41472 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load)
41473 return false;
41475 /* Find the unique use in the swap and locate its def. If the def
41476 isn't unique, punt. */
41477 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
41478 df_ref use;
41479 FOR_EACH_INSN_INFO_USE (use, insn_info)
41481 struct df_link *def_link = DF_REF_CHAIN (use);
41482 if (!def_link || def_link->next)
41483 return false;
41485 rtx def_insn = DF_REF_INSN (def_link->ref);
41486 unsigned uid2 = INSN_UID (def_insn);
41487 if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap)
41488 return false;
41490 rtx body = PATTERN (def_insn);
41491 if (GET_CODE (body) != SET
41492 || GET_CODE (SET_SRC (body)) != VEC_SELECT
41493 || GET_CODE (XEXP (SET_SRC (body), 0)) != MEM)
41494 return false;
41496 rtx mem = XEXP (SET_SRC (body), 0);
41497 rtx base_reg = XEXP (mem, 0);
41499 df_ref base_use;
41500 insn_info = DF_INSN_INFO_GET (def_insn);
41501 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
41503 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
41504 continue;
41506 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
41507 if (!base_def_link || base_def_link->next)
41508 return false;
41510 rtx tocrel_insn = DF_REF_INSN (base_def_link->ref);
41511 rtx tocrel_body = PATTERN (tocrel_insn);
41512 rtx base, offset;
41513 if (GET_CODE (tocrel_body) != SET)
41514 return false;
41515 /* There is an extra level of indirection for small/large
41516 code models. */
41517 rtx tocrel_expr = SET_SRC (tocrel_body);
41518 if (GET_CODE (tocrel_expr) == MEM)
41519 tocrel_expr = XEXP (tocrel_expr, 0);
41520 if (!toc_relative_expr_p (tocrel_expr, false))
41521 return false;
41522 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
41523 if (GET_CODE (base) != SYMBOL_REF || !CONSTANT_POOL_ADDRESS_P (base))
41524 return false;
41527 return true;
41530 /* Return TRUE iff OP matches a V2DF reduction pattern. See the
41531 definition of vsx_reduc_<VEC_reduc_name>_v2df in vsx.md. */
41532 static bool
41533 v2df_reduction_p (rtx op)
41535 if (GET_MODE (op) != V2DFmode)
41536 return false;
41538 enum rtx_code code = GET_CODE (op);
41539 if (code != PLUS && code != SMIN && code != SMAX)
41540 return false;
41542 rtx concat = XEXP (op, 0);
41543 if (GET_CODE (concat) != VEC_CONCAT)
41544 return false;
41546 rtx select0 = XEXP (concat, 0);
41547 rtx select1 = XEXP (concat, 1);
41548 if (GET_CODE (select0) != VEC_SELECT || GET_CODE (select1) != VEC_SELECT)
41549 return false;
41551 rtx reg0 = XEXP (select0, 0);
41552 rtx reg1 = XEXP (select1, 0);
41553 if (!rtx_equal_p (reg0, reg1) || !REG_P (reg0))
41554 return false;
41556 rtx parallel0 = XEXP (select0, 1);
41557 rtx parallel1 = XEXP (select1, 1);
41558 if (GET_CODE (parallel0) != PARALLEL || GET_CODE (parallel1) != PARALLEL)
41559 return false;
41561 if (!rtx_equal_p (XVECEXP (parallel0, 0, 0), const1_rtx)
41562 || !rtx_equal_p (XVECEXP (parallel1, 0, 0), const0_rtx))
41563 return false;
41565 return true;
41568 /* Return 1 iff OP is an operand that will not be affected by having
41569 vector doublewords swapped in memory. */
41570 static unsigned int
41571 rtx_is_swappable_p (rtx op, unsigned int *special)
41573 enum rtx_code code = GET_CODE (op);
41574 int i, j;
41575 rtx parallel;
41577 switch (code)
41579 case LABEL_REF:
41580 case SYMBOL_REF:
41581 case CLOBBER:
41582 case REG:
41583 return 1;
41585 case VEC_CONCAT:
41586 case ASM_INPUT:
41587 case ASM_OPERANDS:
41588 return 0;
41590 case CONST_VECTOR:
41592 *special = SH_CONST_VECTOR;
41593 return 1;
41596 case VEC_DUPLICATE:
41597 /* Opportunity: If XEXP (op, 0) has the same mode as the result,
41598 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
41599 it represents a vector splat for which we can do special
41600 handling. */
41601 if (GET_CODE (XEXP (op, 0)) == CONST_INT)
41602 return 1;
41603 else if (REG_P (XEXP (op, 0))
41604 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
41605 /* This catches V2DF and V2DI splat, at a minimum. */
41606 return 1;
41607 else if (GET_CODE (XEXP (op, 0)) == TRUNCATE
41608 && REG_P (XEXP (XEXP (op, 0), 0))
41609 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
41610 /* This catches splat of a truncated value. */
41611 return 1;
41612 else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
41613 /* If the duplicated item is from a select, defer to the select
41614 processing to see if we can change the lane for the splat. */
41615 return rtx_is_swappable_p (XEXP (op, 0), special);
41616 else
41617 return 0;
41619 case VEC_SELECT:
41620 /* A vec_extract operation is ok if we change the lane. */
41621 if (GET_CODE (XEXP (op, 0)) == REG
41622 && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
41623 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
41624 && XVECLEN (parallel, 0) == 1
41625 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT)
41627 *special = SH_EXTRACT;
41628 return 1;
41630 /* An XXPERMDI is ok if we adjust the lanes. Note that if the
41631 XXPERMDI is a swap operation, it will be identified by
41632 insn_is_swap_p and therefore we won't get here. */
41633 else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT
41634 && (GET_MODE (XEXP (op, 0)) == V4DFmode
41635 || GET_MODE (XEXP (op, 0)) == V4DImode)
41636 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
41637 && XVECLEN (parallel, 0) == 2
41638 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT
41639 && GET_CODE (XVECEXP (parallel, 0, 1)) == CONST_INT)
41641 *special = SH_XXPERMDI;
41642 return 1;
41644 else if (v2df_reduction_p (op))
41645 return 1;
41646 else
41647 return 0;
41649 case UNSPEC:
41651 /* Various operations are unsafe for this optimization, at least
41652 without significant additional work. Permutes are obviously
41653 problematic, as both the permute control vector and the ordering
41654 of the target values are invalidated by doubleword swapping.
41655 Vector pack and unpack modify the number of vector lanes.
41656 Merge-high/low will not operate correctly on swapped operands.
41657 Vector shifts across element boundaries are clearly uncool,
41658 as are vector select and concatenate operations. Vector
41659 sum-across instructions define one operand with a specific
41660 order-dependent element, so additional fixup code would be
41661 needed to make those work. Vector set and non-immediate-form
41662 vector splat are element-order sensitive. A few of these
41663 cases might be workable with special handling if required.
41664 Adding cost modeling would be appropriate in some cases. */
41665 int val = XINT (op, 1);
41666 switch (val)
41668 default:
41669 break;
41670 case UNSPEC_VMRGH_DIRECT:
41671 case UNSPEC_VMRGL_DIRECT:
41672 case UNSPEC_VPACK_SIGN_SIGN_SAT:
41673 case UNSPEC_VPACK_SIGN_UNS_SAT:
41674 case UNSPEC_VPACK_UNS_UNS_MOD:
41675 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
41676 case UNSPEC_VPACK_UNS_UNS_SAT:
41677 case UNSPEC_VPERM:
41678 case UNSPEC_VPERM_UNS:
41679 case UNSPEC_VPERMHI:
41680 case UNSPEC_VPERMSI:
41681 case UNSPEC_VPKPX:
41682 case UNSPEC_VSLDOI:
41683 case UNSPEC_VSLO:
41684 case UNSPEC_VSRO:
41685 case UNSPEC_VSUM2SWS:
41686 case UNSPEC_VSUM4S:
41687 case UNSPEC_VSUM4UBS:
41688 case UNSPEC_VSUMSWS:
41689 case UNSPEC_VSUMSWS_DIRECT:
41690 case UNSPEC_VSX_CONCAT:
41691 case UNSPEC_VSX_SET:
41692 case UNSPEC_VSX_SLDWI:
41693 case UNSPEC_VUNPACK_HI_SIGN:
41694 case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
41695 case UNSPEC_VUNPACK_LO_SIGN:
41696 case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
41697 case UNSPEC_VUPKHPX:
41698 case UNSPEC_VUPKHS_V4SF:
41699 case UNSPEC_VUPKHU_V4SF:
41700 case UNSPEC_VUPKLPX:
41701 case UNSPEC_VUPKLS_V4SF:
41702 case UNSPEC_VUPKLU_V4SF:
41703 case UNSPEC_VSX_CVDPSPN:
41704 case UNSPEC_VSX_CVSPDP:
41705 case UNSPEC_VSX_CVSPDPN:
41706 case UNSPEC_VSX_EXTRACT:
41707 case UNSPEC_VSX_VSLO:
41708 case UNSPEC_VSX_VEC_INIT:
41709 return 0;
41710 case UNSPEC_VSPLT_DIRECT:
41711 case UNSPEC_VSX_XXSPLTD:
41712 *special = SH_SPLAT;
41713 return 1;
41714 case UNSPEC_REDUC_PLUS:
41715 case UNSPEC_REDUC:
41716 return 1;
41720 default:
41721 break;
41724 const char *fmt = GET_RTX_FORMAT (code);
41725 int ok = 1;
41727 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
41728 if (fmt[i] == 'e' || fmt[i] == 'u')
41730 unsigned int special_op = SH_NONE;
41731 ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
41732 if (special_op == SH_NONE)
41733 continue;
41734 /* Ensure we never have two kinds of special handling
41735 for the same insn. */
41736 if (*special != SH_NONE && *special != special_op)
41737 return 0;
41738 *special = special_op;
41740 else if (fmt[i] == 'E')
41741 for (j = 0; j < XVECLEN (op, i); ++j)
41743 unsigned int special_op = SH_NONE;
41744 ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
41745 if (special_op == SH_NONE)
41746 continue;
41747 /* Ensure we never have two kinds of special handling
41748 for the same insn. */
41749 if (*special != SH_NONE && *special != special_op)
41750 return 0;
41751 *special = special_op;
41754 return ok;
41757 /* Return 1 iff INSN is an operand that will not be affected by
41758 having vector doublewords swapped in memory (in which case
41759 *SPECIAL is unchanged), or that can be modified to be correct
41760 if vector doublewords are swapped in memory (in which case
41761 *SPECIAL is changed to a value indicating how). */
41762 static unsigned int
41763 insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
41764 unsigned int *special)
41766 /* Calls are always bad. */
41767 if (GET_CODE (insn) == CALL_INSN)
41768 return 0;
41770 /* Loads and stores seen here are not permuting, but we can still
41771 fix them up by converting them to permuting ones. Exceptions:
41772 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
41773 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
41774 for the SET source. Also we must now make an exception for lvx
41775 and stvx when they are not in the UNSPEC_LVX/STVX form (with the
41776 explicit "& -16") since this leads to unrecognizable insns. */
41777 rtx body = PATTERN (insn);
41778 int i = INSN_UID (insn);
41780 if (insn_entry[i].is_load)
41782 if (GET_CODE (body) == SET)
41784 rtx rhs = SET_SRC (body);
41785 /* Even without a swap, the RHS might be a vec_select for, say,
41786 a byte-reversing load. */
41787 if (GET_CODE (rhs) != MEM)
41788 return 0;
41789 if (GET_CODE (XEXP (rhs, 0)) == AND)
41790 return 0;
41792 *special = SH_NOSWAP_LD;
41793 return 1;
41795 else
41796 return 0;
41799 if (insn_entry[i].is_store)
41801 if (GET_CODE (body) == SET
41802 && GET_CODE (SET_SRC (body)) != UNSPEC)
41804 rtx lhs = SET_DEST (body);
41805 /* Even without a swap, the LHS might be a vec_select for, say,
41806 a byte-reversing store. */
41807 if (GET_CODE (lhs) != MEM)
41808 return 0;
41809 if (GET_CODE (XEXP (lhs, 0)) == AND)
41810 return 0;
41812 *special = SH_NOSWAP_ST;
41813 return 1;
41815 else
41816 return 0;
41819 /* A convert to single precision can be left as is provided that
41820 all of its uses are in xxspltw instructions that splat BE element
41821 zero. */
41822 if (GET_CODE (body) == SET
41823 && GET_CODE (SET_SRC (body)) == UNSPEC
41824 && XINT (SET_SRC (body), 1) == UNSPEC_VSX_CVDPSPN)
41826 df_ref def;
41827 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
41829 FOR_EACH_INSN_INFO_DEF (def, insn_info)
41831 struct df_link *link = DF_REF_CHAIN (def);
41832 if (!link)
41833 return 0;
41835 for (; link; link = link->next) {
41836 rtx use_insn = DF_REF_INSN (link->ref);
41837 rtx use_body = PATTERN (use_insn);
41838 if (GET_CODE (use_body) != SET
41839 || GET_CODE (SET_SRC (use_body)) != UNSPEC
41840 || XINT (SET_SRC (use_body), 1) != UNSPEC_VSX_XXSPLTW
41841 || XVECEXP (SET_SRC (use_body), 0, 1) != const0_rtx)
41842 return 0;
41846 return 1;
41849 /* A concatenation of two doublewords is ok if we reverse the
41850 order of the inputs. */
41851 if (GET_CODE (body) == SET
41852 && GET_CODE (SET_SRC (body)) == VEC_CONCAT
41853 && (GET_MODE (SET_SRC (body)) == V2DFmode
41854 || GET_MODE (SET_SRC (body)) == V2DImode))
41856 *special = SH_CONCAT;
41857 return 1;
41860 /* V2DF reductions are always swappable. */
41861 if (GET_CODE (body) == PARALLEL)
41863 rtx expr = XVECEXP (body, 0, 0);
41864 if (GET_CODE (expr) == SET
41865 && v2df_reduction_p (SET_SRC (expr)))
41866 return 1;
41869 /* An UNSPEC_VPERM is ok if the mask operand is loaded from the
41870 constant pool. */
41871 if (GET_CODE (body) == SET
41872 && GET_CODE (SET_SRC (body)) == UNSPEC
41873 && XINT (SET_SRC (body), 1) == UNSPEC_VPERM
41874 && XVECLEN (SET_SRC (body), 0) == 3
41875 && GET_CODE (XVECEXP (SET_SRC (body), 0, 2)) == REG)
41877 rtx mask_reg = XVECEXP (SET_SRC (body), 0, 2);
41878 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
41879 df_ref use;
41880 FOR_EACH_INSN_INFO_USE (use, insn_info)
41881 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
41883 struct df_link *def_link = DF_REF_CHAIN (use);
41884 /* Punt if multiple definitions for this reg. */
41885 if (def_link && !def_link->next &&
41886 const_load_sequence_p (insn_entry,
41887 DF_REF_INSN (def_link->ref)))
41889 *special = SH_VPERM;
41890 return 1;
41895 /* Otherwise check the operands for vector lane violations. */
41896 return rtx_is_swappable_p (body, special);
41899 enum chain_purpose { FOR_LOADS, FOR_STORES };
41901 /* Return true if the UD or DU chain headed by LINK is non-empty,
41902 and every entry on the chain references an insn that is a
41903 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
41904 register swap must have only permuting loads as reaching defs.
41905 If PURPOSE is FOR_STORES, each such register swap must have only
41906 register swaps or permuting stores as reached uses. */
41907 static bool
41908 chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
41909 enum chain_purpose purpose)
41911 if (!link)
41912 return false;
41914 for (; link; link = link->next)
41916 if (!ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (DF_REF_REG (link->ref))))
41917 continue;
41919 if (DF_REF_IS_ARTIFICIAL (link->ref))
41920 return false;
41922 rtx reached_insn = DF_REF_INSN (link->ref);
41923 unsigned uid = INSN_UID (reached_insn);
41924 struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn);
41926 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
41927 || insn_entry[uid].is_store)
41928 return false;
41930 if (purpose == FOR_LOADS)
41932 df_ref use;
41933 FOR_EACH_INSN_INFO_USE (use, insn_info)
41935 struct df_link *swap_link = DF_REF_CHAIN (use);
41937 while (swap_link)
41939 if (DF_REF_IS_ARTIFICIAL (link->ref))
41940 return false;
41942 rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
41943 unsigned uid2 = INSN_UID (swap_def_insn);
41945 /* Only permuting loads are allowed. */
41946 if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
41947 return false;
41949 swap_link = swap_link->next;
41953 else if (purpose == FOR_STORES)
41955 df_ref def;
41956 FOR_EACH_INSN_INFO_DEF (def, insn_info)
41958 struct df_link *swap_link = DF_REF_CHAIN (def);
41960 while (swap_link)
41962 if (DF_REF_IS_ARTIFICIAL (link->ref))
41963 return false;
41965 rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
41966 unsigned uid2 = INSN_UID (swap_use_insn);
41968 /* Permuting stores or register swaps are allowed. */
41969 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
41970 return false;
41972 swap_link = swap_link->next;
41978 return true;
41981 /* Mark the xxswapdi instructions associated with permuting loads and
41982 stores for removal. Note that we only flag them for deletion here,
41983 as there is a possibility of a swap being reached from multiple
41984 loads, etc. */
41985 static void
41986 mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
41988 rtx insn = insn_entry[i].insn;
41989 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
41991 if (insn_entry[i].is_load)
41993 df_ref def;
41994 FOR_EACH_INSN_INFO_DEF (def, insn_info)
41996 struct df_link *link = DF_REF_CHAIN (def);
41998 /* We know by now that these are swaps, so we can delete
41999 them confidently. */
42000 while (link)
42002 rtx use_insn = DF_REF_INSN (link->ref);
42003 insn_entry[INSN_UID (use_insn)].will_delete = 1;
42004 link = link->next;
42008 else if (insn_entry[i].is_store)
42010 df_ref use;
42011 FOR_EACH_INSN_INFO_USE (use, insn_info)
42013 /* Ignore uses for addressability. */
42014 machine_mode mode = GET_MODE (DF_REF_REG (use));
42015 if (!ALTIVEC_OR_VSX_VECTOR_MODE (mode))
42016 continue;
42018 struct df_link *link = DF_REF_CHAIN (use);
42020 /* We know by now that these are swaps, so we can delete
42021 them confidently. */
42022 while (link)
42024 rtx def_insn = DF_REF_INSN (link->ref);
42025 insn_entry[INSN_UID (def_insn)].will_delete = 1;
42026 link = link->next;
42032 /* OP is either a CONST_VECTOR or an expression containing one.
42033 Swap the first half of the vector with the second in the first
42034 case. Recurse to find it in the second. */
42035 static void
42036 swap_const_vector_halves (rtx op)
42038 int i;
42039 enum rtx_code code = GET_CODE (op);
42040 if (GET_CODE (op) == CONST_VECTOR)
42042 int half_units = GET_MODE_NUNITS (GET_MODE (op)) / 2;
42043 for (i = 0; i < half_units; ++i)
42045 rtx temp = CONST_VECTOR_ELT (op, i);
42046 CONST_VECTOR_ELT (op, i) = CONST_VECTOR_ELT (op, i + half_units);
42047 CONST_VECTOR_ELT (op, i + half_units) = temp;
42050 else
42052 int j;
42053 const char *fmt = GET_RTX_FORMAT (code);
42054 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
42055 if (fmt[i] == 'e' || fmt[i] == 'u')
42056 swap_const_vector_halves (XEXP (op, i));
42057 else if (fmt[i] == 'E')
42058 for (j = 0; j < XVECLEN (op, i); ++j)
42059 swap_const_vector_halves (XVECEXP (op, i, j));
42063 /* Find all subregs of a vector expression that perform a narrowing,
42064 and adjust the subreg index to account for doubleword swapping. */
42065 static void
42066 adjust_subreg_index (rtx op)
42068 enum rtx_code code = GET_CODE (op);
42069 if (code == SUBREG
42070 && (GET_MODE_SIZE (GET_MODE (op))
42071 < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
42073 unsigned int index = SUBREG_BYTE (op);
42074 if (index < 8)
42075 index += 8;
42076 else
42077 index -= 8;
42078 SUBREG_BYTE (op) = index;
42081 const char *fmt = GET_RTX_FORMAT (code);
42082 int i,j;
42083 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
42084 if (fmt[i] == 'e' || fmt[i] == 'u')
42085 adjust_subreg_index (XEXP (op, i));
42086 else if (fmt[i] == 'E')
42087 for (j = 0; j < XVECLEN (op, i); ++j)
42088 adjust_subreg_index (XVECEXP (op, i, j));
42091 /* Convert the non-permuting load INSN to a permuting one. */
42092 static void
42093 permute_load (rtx_insn *insn)
42095 rtx body = PATTERN (insn);
42096 rtx mem_op = SET_SRC (body);
42097 rtx tgt_reg = SET_DEST (body);
42098 machine_mode mode = GET_MODE (tgt_reg);
42099 int n_elts = GET_MODE_NUNITS (mode);
42100 int half_elts = n_elts / 2;
42101 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
42102 int i, j;
42103 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
42104 XVECEXP (par, 0, i) = GEN_INT (j);
42105 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
42106 XVECEXP (par, 0, i) = GEN_INT (j);
42107 rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
42108 SET_SRC (body) = sel;
42109 INSN_CODE (insn) = -1; /* Force re-recognition. */
42110 df_insn_rescan (insn);
42112 if (dump_file)
42113 fprintf (dump_file, "Replacing load %d with permuted load\n",
42114 INSN_UID (insn));
42117 /* Convert the non-permuting store INSN to a permuting one. */
42118 static void
42119 permute_store (rtx_insn *insn)
42121 rtx body = PATTERN (insn);
42122 rtx src_reg = SET_SRC (body);
42123 machine_mode mode = GET_MODE (src_reg);
42124 int n_elts = GET_MODE_NUNITS (mode);
42125 int half_elts = n_elts / 2;
42126 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
42127 int i, j;
42128 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
42129 XVECEXP (par, 0, i) = GEN_INT (j);
42130 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
42131 XVECEXP (par, 0, i) = GEN_INT (j);
42132 rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
42133 SET_SRC (body) = sel;
42134 INSN_CODE (insn) = -1; /* Force re-recognition. */
42135 df_insn_rescan (insn);
42137 if (dump_file)
42138 fprintf (dump_file, "Replacing store %d with permuted store\n",
42139 INSN_UID (insn));
42142 /* Given OP that contains a vector extract operation, adjust the index
42143 of the extracted lane to account for the doubleword swap. */
42144 static void
42145 adjust_extract (rtx_insn *insn)
42147 rtx pattern = PATTERN (insn);
42148 if (GET_CODE (pattern) == PARALLEL)
42149 pattern = XVECEXP (pattern, 0, 0);
42150 rtx src = SET_SRC (pattern);
42151 /* The vec_select may be wrapped in a vec_duplicate for a splat, so
42152 account for that. */
42153 rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
42154 rtx par = XEXP (sel, 1);
42155 int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
42156 int lane = INTVAL (XVECEXP (par, 0, 0));
42157 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
42158 XVECEXP (par, 0, 0) = GEN_INT (lane);
42159 INSN_CODE (insn) = -1; /* Force re-recognition. */
42160 df_insn_rescan (insn);
42162 if (dump_file)
42163 fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
42166 /* Given OP that contains a vector direct-splat operation, adjust the index
42167 of the source lane to account for the doubleword swap. */
42168 static void
42169 adjust_splat (rtx_insn *insn)
42171 rtx body = PATTERN (insn);
42172 rtx unspec = XEXP (body, 1);
42173 int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
42174 int lane = INTVAL (XVECEXP (unspec, 0, 1));
42175 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
42176 XVECEXP (unspec, 0, 1) = GEN_INT (lane);
42177 INSN_CODE (insn) = -1; /* Force re-recognition. */
42178 df_insn_rescan (insn);
42180 if (dump_file)
42181 fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
42184 /* Given OP that contains an XXPERMDI operation (that is not a doubleword
42185 swap), reverse the order of the source operands and adjust the indices
42186 of the source lanes to account for doubleword reversal. */
42187 static void
42188 adjust_xxpermdi (rtx_insn *insn)
42190 rtx set = PATTERN (insn);
42191 rtx select = XEXP (set, 1);
42192 rtx concat = XEXP (select, 0);
42193 rtx src0 = XEXP (concat, 0);
42194 XEXP (concat, 0) = XEXP (concat, 1);
42195 XEXP (concat, 1) = src0;
42196 rtx parallel = XEXP (select, 1);
42197 int lane0 = INTVAL (XVECEXP (parallel, 0, 0));
42198 int lane1 = INTVAL (XVECEXP (parallel, 0, 1));
42199 int new_lane0 = 3 - lane1;
42200 int new_lane1 = 3 - lane0;
42201 XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0);
42202 XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1);
42203 INSN_CODE (insn) = -1; /* Force re-recognition. */
42204 df_insn_rescan (insn);
42206 if (dump_file)
42207 fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn));
42210 /* Given OP that contains a VEC_CONCAT operation of two doublewords,
42211 reverse the order of those inputs. */
42212 static void
42213 adjust_concat (rtx_insn *insn)
42215 rtx set = PATTERN (insn);
42216 rtx concat = XEXP (set, 1);
42217 rtx src0 = XEXP (concat, 0);
42218 XEXP (concat, 0) = XEXP (concat, 1);
42219 XEXP (concat, 1) = src0;
42220 INSN_CODE (insn) = -1; /* Force re-recognition. */
42221 df_insn_rescan (insn);
42223 if (dump_file)
42224 fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn));
42227 /* Given an UNSPEC_VPERM insn, modify the mask loaded from the
42228 constant pool to reflect swapped doublewords. */
42229 static void
42230 adjust_vperm (rtx_insn *insn)
42232 /* We previously determined that the UNSPEC_VPERM was fed by a
42233 swap of a swapping load of a TOC-relative constant pool symbol.
42234 Find the MEM in the swapping load and replace it with a MEM for
42235 the adjusted mask constant. */
42236 rtx set = PATTERN (insn);
42237 rtx mask_reg = XVECEXP (SET_SRC (set), 0, 2);
42239 /* Find the swap. */
42240 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
42241 df_ref use;
42242 rtx_insn *swap_insn = 0;
42243 FOR_EACH_INSN_INFO_USE (use, insn_info)
42244 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
42246 struct df_link *def_link = DF_REF_CHAIN (use);
42247 gcc_assert (def_link && !def_link->next);
42248 swap_insn = DF_REF_INSN (def_link->ref);
42249 break;
42251 gcc_assert (swap_insn);
42253 /* Find the load. */
42254 insn_info = DF_INSN_INFO_GET (swap_insn);
42255 rtx_insn *load_insn = 0;
42256 FOR_EACH_INSN_INFO_USE (use, insn_info)
42258 struct df_link *def_link = DF_REF_CHAIN (use);
42259 gcc_assert (def_link && !def_link->next);
42260 load_insn = DF_REF_INSN (def_link->ref);
42261 break;
42263 gcc_assert (load_insn);
42265 /* Find the TOC-relative symbol access. */
42266 insn_info = DF_INSN_INFO_GET (load_insn);
42267 rtx_insn *tocrel_insn = 0;
42268 FOR_EACH_INSN_INFO_USE (use, insn_info)
42270 struct df_link *def_link = DF_REF_CHAIN (use);
42271 gcc_assert (def_link && !def_link->next);
42272 tocrel_insn = DF_REF_INSN (def_link->ref);
42273 break;
42275 gcc_assert (tocrel_insn);
42277 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p
42278 to set tocrel_base; otherwise it would be unnecessary as we've
42279 already established it will return true. */
42280 rtx base, offset;
42281 rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn));
42282 /* There is an extra level of indirection for small/large code models. */
42283 if (GET_CODE (tocrel_expr) == MEM)
42284 tocrel_expr = XEXP (tocrel_expr, 0);
42285 if (!toc_relative_expr_p (tocrel_expr, false))
42286 gcc_unreachable ();
42287 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
42288 rtx const_vector = get_pool_constant (base);
42289 /* With the extra indirection, get_pool_constant will produce the
42290 real constant from the reg_equal expression, so get the real
42291 constant. */
42292 if (GET_CODE (const_vector) == SYMBOL_REF)
42293 const_vector = get_pool_constant (const_vector);
42294 gcc_assert (GET_CODE (const_vector) == CONST_VECTOR);
42296 /* Create an adjusted mask from the initial mask. */
42297 unsigned int new_mask[16], i, val;
42298 for (i = 0; i < 16; ++i) {
42299 val = INTVAL (XVECEXP (const_vector, 0, i));
42300 if (val < 16)
42301 new_mask[i] = (val + 8) % 16;
42302 else
42303 new_mask[i] = ((val + 8) % 16) + 16;
42306 /* Create a new CONST_VECTOR and a MEM that references it. */
42307 rtx vals = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
42308 for (i = 0; i < 16; ++i)
42309 XVECEXP (vals, 0, i) = GEN_INT (new_mask[i]);
42310 rtx new_const_vector = gen_rtx_CONST_VECTOR (V16QImode, XVEC (vals, 0));
42311 rtx new_mem = force_const_mem (V16QImode, new_const_vector);
42312 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
42313 can't recognize. Force the SYMBOL_REF into a register. */
42314 if (!REG_P (XEXP (new_mem, 0))) {
42315 rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0));
42316 XEXP (new_mem, 0) = base_reg;
42317 /* Move the newly created insn ahead of the load insn. */
42318 rtx_insn *force_insn = get_last_insn ();
42319 remove_insn (force_insn);
42320 rtx_insn *before_load_insn = PREV_INSN (load_insn);
42321 add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn));
42322 df_insn_rescan (before_load_insn);
42323 df_insn_rescan (force_insn);
42326 /* Replace the MEM in the load instruction and rescan it. */
42327 XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem;
42328 INSN_CODE (load_insn) = -1; /* Force re-recognition. */
42329 df_insn_rescan (load_insn);
42331 if (dump_file)
42332 fprintf (dump_file, "Adjusting mask for vperm %d\n", INSN_UID (insn));
42335 /* The insn described by INSN_ENTRY[I] can be swapped, but only
42336 with special handling. Take care of that here. */
42337 static void
42338 handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
42340 rtx_insn *insn = insn_entry[i].insn;
42341 rtx body = PATTERN (insn);
42343 switch (insn_entry[i].special_handling)
42345 default:
42346 gcc_unreachable ();
42347 case SH_CONST_VECTOR:
42349 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
42350 gcc_assert (GET_CODE (body) == SET);
42351 rtx rhs = SET_SRC (body);
42352 swap_const_vector_halves (rhs);
42353 if (dump_file)
42354 fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
42355 break;
42357 case SH_SUBREG:
42358 /* A subreg of the same size is already safe. For subregs that
42359 select a smaller portion of a reg, adjust the index for
42360 swapped doublewords. */
42361 adjust_subreg_index (body);
42362 if (dump_file)
42363 fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
42364 break;
42365 case SH_NOSWAP_LD:
42366 /* Convert a non-permuting load to a permuting one. */
42367 permute_load (insn);
42368 break;
42369 case SH_NOSWAP_ST:
42370 /* Convert a non-permuting store to a permuting one. */
42371 permute_store (insn);
42372 break;
42373 case SH_EXTRACT:
42374 /* Change the lane on an extract operation. */
42375 adjust_extract (insn);
42376 break;
42377 case SH_SPLAT:
42378 /* Change the lane on a direct-splat operation. */
42379 adjust_splat (insn);
42380 break;
42381 case SH_XXPERMDI:
42382 /* Change the lanes on an XXPERMDI operation. */
42383 adjust_xxpermdi (insn);
42384 break;
42385 case SH_CONCAT:
42386 /* Reverse the order of a concatenation operation. */
42387 adjust_concat (insn);
42388 break;
42389 case SH_VPERM:
42390 /* Change the mask loaded from the constant pool for a VPERM. */
42391 adjust_vperm (insn);
42392 break;
42396 /* Find the insn from the Ith table entry, which is known to be a
42397 register swap Y = SWAP(X). Replace it with a copy Y = X. */
42398 static void
42399 replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
42401 rtx_insn *insn = insn_entry[i].insn;
42402 rtx body = PATTERN (insn);
42403 rtx src_reg = XEXP (SET_SRC (body), 0);
42404 rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
42405 rtx_insn *new_insn = emit_insn_before (copy, insn);
42406 set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
42407 df_insn_rescan (new_insn);
42409 if (dump_file)
42411 unsigned int new_uid = INSN_UID (new_insn);
42412 fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
42415 df_insn_delete (insn);
42416 remove_insn (insn);
42417 insn->set_deleted ();
42420 /* Dump the swap table to DUMP_FILE. */
42421 static void
42422 dump_swap_insn_table (swap_web_entry *insn_entry)
42424 int e = get_max_uid ();
42425 fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
42427 for (int i = 0; i < e; ++i)
42428 if (insn_entry[i].is_relevant)
42430 swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
42431 fprintf (dump_file, "%6d %6d ", i,
42432 pred_entry && pred_entry->insn
42433 ? INSN_UID (pred_entry->insn) : 0);
42434 if (insn_entry[i].is_load)
42435 fputs ("load ", dump_file);
42436 if (insn_entry[i].is_store)
42437 fputs ("store ", dump_file);
42438 if (insn_entry[i].is_swap)
42439 fputs ("swap ", dump_file);
42440 if (insn_entry[i].is_live_in)
42441 fputs ("live-in ", dump_file);
42442 if (insn_entry[i].is_live_out)
42443 fputs ("live-out ", dump_file);
42444 if (insn_entry[i].contains_subreg)
42445 fputs ("subreg ", dump_file);
42446 if (insn_entry[i].is_128_int)
42447 fputs ("int128 ", dump_file);
42448 if (insn_entry[i].is_call)
42449 fputs ("call ", dump_file);
42450 if (insn_entry[i].is_swappable)
42452 fputs ("swappable ", dump_file);
42453 if (insn_entry[i].special_handling == SH_CONST_VECTOR)
42454 fputs ("special:constvec ", dump_file);
42455 else if (insn_entry[i].special_handling == SH_SUBREG)
42456 fputs ("special:subreg ", dump_file);
42457 else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
42458 fputs ("special:load ", dump_file);
42459 else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
42460 fputs ("special:store ", dump_file);
42461 else if (insn_entry[i].special_handling == SH_EXTRACT)
42462 fputs ("special:extract ", dump_file);
42463 else if (insn_entry[i].special_handling == SH_SPLAT)
42464 fputs ("special:splat ", dump_file);
42465 else if (insn_entry[i].special_handling == SH_XXPERMDI)
42466 fputs ("special:xxpermdi ", dump_file);
42467 else if (insn_entry[i].special_handling == SH_CONCAT)
42468 fputs ("special:concat ", dump_file);
42469 else if (insn_entry[i].special_handling == SH_VPERM)
42470 fputs ("special:vperm ", dump_file);
42472 if (insn_entry[i].web_not_optimizable)
42473 fputs ("unoptimizable ", dump_file);
42474 if (insn_entry[i].will_delete)
42475 fputs ("delete ", dump_file);
42476 fputs ("\n", dump_file);
42478 fputs ("\n", dump_file);
42481 /* Return RTX with its address canonicalized to (reg) or (+ reg reg).
42482 Here RTX is an (& addr (const_int -16)). Always return a new copy
42483 to avoid problems with combine. */
42484 static rtx
42485 alignment_with_canonical_addr (rtx align)
42487 rtx canon;
42488 rtx addr = XEXP (align, 0);
42490 if (REG_P (addr))
42491 canon = addr;
42493 else if (GET_CODE (addr) == PLUS)
42495 rtx addrop0 = XEXP (addr, 0);
42496 rtx addrop1 = XEXP (addr, 1);
42498 if (!REG_P (addrop0))
42499 addrop0 = force_reg (GET_MODE (addrop0), addrop0);
42501 if (!REG_P (addrop1))
42502 addrop1 = force_reg (GET_MODE (addrop1), addrop1);
42504 canon = gen_rtx_PLUS (GET_MODE (addr), addrop0, addrop1);
42507 else
42508 canon = force_reg (GET_MODE (addr), addr);
42510 return gen_rtx_AND (GET_MODE (align), canon, GEN_INT (-16));
42513 /* Check whether an rtx is an alignment mask, and if so, return
42514 a fully-expanded rtx for the masking operation. */
42515 static rtx
42516 alignment_mask (rtx_insn *insn)
42518 rtx body = PATTERN (insn);
42520 if (GET_CODE (body) != SET
42521 || GET_CODE (SET_SRC (body)) != AND
42522 || !REG_P (XEXP (SET_SRC (body), 0)))
42523 return 0;
42525 rtx mask = XEXP (SET_SRC (body), 1);
42527 if (GET_CODE (mask) == CONST_INT)
42529 if (INTVAL (mask) == -16)
42530 return alignment_with_canonical_addr (SET_SRC (body));
42531 else
42532 return 0;
42535 if (!REG_P (mask))
42536 return 0;
42538 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
42539 df_ref use;
42540 rtx real_mask = 0;
42542 FOR_EACH_INSN_INFO_USE (use, insn_info)
42544 if (!rtx_equal_p (DF_REF_REG (use), mask))
42545 continue;
42547 struct df_link *def_link = DF_REF_CHAIN (use);
42548 if (!def_link || def_link->next)
42549 return 0;
42551 rtx_insn *const_insn = DF_REF_INSN (def_link->ref);
42552 rtx const_body = PATTERN (const_insn);
42553 if (GET_CODE (const_body) != SET)
42554 return 0;
42556 real_mask = SET_SRC (const_body);
42558 if (GET_CODE (real_mask) != CONST_INT
42559 || INTVAL (real_mask) != -16)
42560 return 0;
42563 if (real_mask == 0)
42564 return 0;
42566 return alignment_with_canonical_addr (SET_SRC (body));
42569 /* Given INSN that's a load or store based at BASE_REG, look for a
42570 feeding computation that aligns its address on a 16-byte boundary. */
42571 static rtx
42572 find_alignment_op (rtx_insn *insn, rtx base_reg)
42574 df_ref base_use;
42575 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
42576 rtx and_operation = 0;
42578 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
42580 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
42581 continue;
42583 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
42584 if (!base_def_link || base_def_link->next)
42585 break;
42587 /* With stack-protector code enabled, and possibly in other
42588 circumstances, there may not be an associated insn for
42589 the def. */
42590 if (DF_REF_IS_ARTIFICIAL (base_def_link->ref))
42591 break;
42593 rtx_insn *and_insn = DF_REF_INSN (base_def_link->ref);
42594 and_operation = alignment_mask (and_insn);
42595 if (and_operation != 0)
42596 break;
42599 return and_operation;
42602 struct del_info { bool replace; rtx_insn *replace_insn; };
42604 /* If INSN is the load for an lvx pattern, put it in canonical form. */
42605 static void
42606 recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete)
42608 rtx body = PATTERN (insn);
42609 gcc_assert (GET_CODE (body) == SET
42610 && GET_CODE (SET_SRC (body)) == VEC_SELECT
42611 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM);
42613 rtx mem = XEXP (SET_SRC (body), 0);
42614 rtx base_reg = XEXP (mem, 0);
42616 rtx and_operation = find_alignment_op (insn, base_reg);
42618 if (and_operation != 0)
42620 df_ref def;
42621 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
42622 FOR_EACH_INSN_INFO_DEF (def, insn_info)
42624 struct df_link *link = DF_REF_CHAIN (def);
42625 if (!link || link->next)
42626 break;
42628 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
42629 if (!insn_is_swap_p (swap_insn)
42630 || insn_is_load_p (swap_insn)
42631 || insn_is_store_p (swap_insn))
42632 break;
42634 /* Expected lvx pattern found. Change the swap to
42635 a copy, and propagate the AND operation into the
42636 load. */
42637 to_delete[INSN_UID (swap_insn)].replace = true;
42638 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
42640 XEXP (mem, 0) = and_operation;
42641 SET_SRC (body) = mem;
42642 INSN_CODE (insn) = -1; /* Force re-recognition. */
42643 df_insn_rescan (insn);
42645 if (dump_file)
42646 fprintf (dump_file, "lvx opportunity found at %d\n",
42647 INSN_UID (insn));
42652 /* If INSN is the store for an stvx pattern, put it in canonical form. */
42653 static void
42654 recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete)
42656 rtx body = PATTERN (insn);
42657 gcc_assert (GET_CODE (body) == SET
42658 && GET_CODE (SET_DEST (body)) == MEM
42659 && GET_CODE (SET_SRC (body)) == VEC_SELECT);
42660 rtx mem = SET_DEST (body);
42661 rtx base_reg = XEXP (mem, 0);
42663 rtx and_operation = find_alignment_op (insn, base_reg);
42665 if (and_operation != 0)
42667 rtx src_reg = XEXP (SET_SRC (body), 0);
42668 df_ref src_use;
42669 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
42670 FOR_EACH_INSN_INFO_USE (src_use, insn_info)
42672 if (!rtx_equal_p (DF_REF_REG (src_use), src_reg))
42673 continue;
42675 struct df_link *link = DF_REF_CHAIN (src_use);
42676 if (!link || link->next)
42677 break;
42679 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
42680 if (!insn_is_swap_p (swap_insn)
42681 || insn_is_load_p (swap_insn)
42682 || insn_is_store_p (swap_insn))
42683 break;
42685 /* Expected stvx pattern found. Change the swap to
42686 a copy, and propagate the AND operation into the
42687 store. */
42688 to_delete[INSN_UID (swap_insn)].replace = true;
42689 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
42691 XEXP (mem, 0) = and_operation;
42692 SET_SRC (body) = src_reg;
42693 INSN_CODE (insn) = -1; /* Force re-recognition. */
42694 df_insn_rescan (insn);
42696 if (dump_file)
42697 fprintf (dump_file, "stvx opportunity found at %d\n",
42698 INSN_UID (insn));
42703 /* Look for patterns created from builtin lvx and stvx calls, and
42704 canonicalize them to be properly recognized as such. */
42705 static void
42706 recombine_lvx_stvx_patterns (function *fun)
42708 int i;
42709 basic_block bb;
42710 rtx_insn *insn;
42712 int num_insns = get_max_uid ();
42713 del_info *to_delete = XCNEWVEC (del_info, num_insns);
42715 FOR_ALL_BB_FN (bb, fun)
42716 FOR_BB_INSNS (bb, insn)
42718 if (!NONDEBUG_INSN_P (insn))
42719 continue;
42721 if (insn_is_load_p (insn) && insn_is_swap_p (insn))
42722 recombine_lvx_pattern (insn, to_delete);
42723 else if (insn_is_store_p (insn) && insn_is_swap_p (insn))
42724 recombine_stvx_pattern (insn, to_delete);
42727 /* Turning swaps into copies is delayed until now, to avoid problems
42728 with deleting instructions during the insn walk. */
42729 for (i = 0; i < num_insns; i++)
42730 if (to_delete[i].replace)
42732 rtx swap_body = PATTERN (to_delete[i].replace_insn);
42733 rtx src_reg = XEXP (SET_SRC (swap_body), 0);
42734 rtx copy = gen_rtx_SET (SET_DEST (swap_body), src_reg);
42735 rtx_insn *new_insn = emit_insn_before (copy,
42736 to_delete[i].replace_insn);
42737 set_block_for_insn (new_insn,
42738 BLOCK_FOR_INSN (to_delete[i].replace_insn));
42739 df_insn_rescan (new_insn);
42740 df_insn_delete (to_delete[i].replace_insn);
42741 remove_insn (to_delete[i].replace_insn);
42742 to_delete[i].replace_insn->set_deleted ();
42745 free (to_delete);
42748 /* Main entry point for this pass. */
42749 unsigned int
42750 rs6000_analyze_swaps (function *fun)
42752 swap_web_entry *insn_entry;
42753 basic_block bb;
42754 rtx_insn *insn, *curr_insn = 0;
42756 /* Dataflow analysis for use-def chains. */
42757 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
42758 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
42759 df_analyze ();
42760 df_set_flags (DF_DEFER_INSN_RESCAN);
42762 /* Pre-pass to recombine lvx and stvx patterns so we don't lose info. */
42763 recombine_lvx_stvx_patterns (fun);
42765 /* Allocate structure to represent webs of insns. */
42766 insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
42768 /* Walk the insns to gather basic data. */
42769 FOR_ALL_BB_FN (bb, fun)
42770 FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
42772 unsigned int uid = INSN_UID (insn);
42773 if (NONDEBUG_INSN_P (insn))
42775 insn_entry[uid].insn = insn;
42777 if (GET_CODE (insn) == CALL_INSN)
42778 insn_entry[uid].is_call = 1;
42780 /* Walk the uses and defs to see if we mention vector regs.
42781 Record any constraints on optimization of such mentions. */
42782 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
42783 df_ref mention;
42784 FOR_EACH_INSN_INFO_USE (mention, insn_info)
42786 /* We use DF_REF_REAL_REG here to get inside any subregs. */
42787 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
42789 /* If a use gets its value from a call insn, it will be
42790 a hard register and will look like (reg:V4SI 3 3).
42791 The df analysis creates two mentions for GPR3 and GPR4,
42792 both DImode. We must recognize this and treat it as a
42793 vector mention to ensure the call is unioned with this
42794 use. */
42795 if (mode == DImode && DF_REF_INSN_INFO (mention))
42797 rtx feeder = DF_REF_INSN (mention);
42798 /* FIXME: It is pretty hard to get from the df mention
42799 to the mode of the use in the insn. We arbitrarily
42800 pick a vector mode here, even though the use might
42801 be a real DImode. We can be too conservative
42802 (create a web larger than necessary) because of
42803 this, so consider eventually fixing this. */
42804 if (GET_CODE (feeder) == CALL_INSN)
42805 mode = V4SImode;
42808 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
42810 insn_entry[uid].is_relevant = 1;
42811 if (mode == TImode || mode == V1TImode
42812 || FLOAT128_VECTOR_P (mode))
42813 insn_entry[uid].is_128_int = 1;
42814 if (DF_REF_INSN_INFO (mention))
42815 insn_entry[uid].contains_subreg
42816 = !rtx_equal_p (DF_REF_REG (mention),
42817 DF_REF_REAL_REG (mention));
42818 union_defs (insn_entry, insn, mention);
42821 FOR_EACH_INSN_INFO_DEF (mention, insn_info)
42823 /* We use DF_REF_REAL_REG here to get inside any subregs. */
42824 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
42826 /* If we're loading up a hard vector register for a call,
42827 it looks like (set (reg:V4SI 9 9) (...)). The df
42828 analysis creates two mentions for GPR9 and GPR10, both
42829 DImode. So relying on the mode from the mentions
42830 isn't sufficient to ensure we union the call into the
42831 web with the parameter setup code. */
42832 if (mode == DImode && GET_CODE (insn) == SET
42833 && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn))))
42834 mode = GET_MODE (SET_DEST (insn));
42836 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
42838 insn_entry[uid].is_relevant = 1;
42839 if (mode == TImode || mode == V1TImode
42840 || FLOAT128_VECTOR_P (mode))
42841 insn_entry[uid].is_128_int = 1;
42842 if (DF_REF_INSN_INFO (mention))
42843 insn_entry[uid].contains_subreg
42844 = !rtx_equal_p (DF_REF_REG (mention),
42845 DF_REF_REAL_REG (mention));
42846 /* REG_FUNCTION_VALUE_P is not valid for subregs. */
42847 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
42848 insn_entry[uid].is_live_out = 1;
42849 union_uses (insn_entry, insn, mention);
42853 if (insn_entry[uid].is_relevant)
42855 /* Determine if this is a load or store. */
42856 insn_entry[uid].is_load = insn_is_load_p (insn);
42857 insn_entry[uid].is_store = insn_is_store_p (insn);
42859 /* Determine if this is a doubleword swap. If not,
42860 determine whether it can legally be swapped. */
42861 if (insn_is_swap_p (insn))
42862 insn_entry[uid].is_swap = 1;
42863 else
42865 unsigned int special = SH_NONE;
42866 insn_entry[uid].is_swappable
42867 = insn_is_swappable_p (insn_entry, insn, &special);
42868 if (special != SH_NONE && insn_entry[uid].contains_subreg)
42869 insn_entry[uid].is_swappable = 0;
42870 else if (special != SH_NONE)
42871 insn_entry[uid].special_handling = special;
42872 else if (insn_entry[uid].contains_subreg)
42873 insn_entry[uid].special_handling = SH_SUBREG;
42879 if (dump_file)
42881 fprintf (dump_file, "\nSwap insn entry table when first built\n");
42882 dump_swap_insn_table (insn_entry);
42885 /* Record unoptimizable webs. */
42886 unsigned e = get_max_uid (), i;
42887 for (i = 0; i < e; ++i)
42889 if (!insn_entry[i].is_relevant)
42890 continue;
42892 swap_web_entry *root
42893 = (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
42895 if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
42896 || (insn_entry[i].contains_subreg
42897 && insn_entry[i].special_handling != SH_SUBREG)
42898 || insn_entry[i].is_128_int || insn_entry[i].is_call
42899 || !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
42900 root->web_not_optimizable = 1;
42902 /* If we have loads or stores that aren't permuting then the
42903 optimization isn't appropriate. */
42904 else if ((insn_entry[i].is_load || insn_entry[i].is_store)
42905 && !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
42906 root->web_not_optimizable = 1;
42908 /* If we have permuting loads or stores that are not accompanied
42909 by a register swap, the optimization isn't appropriate. */
42910 else if (insn_entry[i].is_load && insn_entry[i].is_swap)
42912 rtx insn = insn_entry[i].insn;
42913 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
42914 df_ref def;
42916 FOR_EACH_INSN_INFO_DEF (def, insn_info)
42918 struct df_link *link = DF_REF_CHAIN (def);
42920 if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
42922 root->web_not_optimizable = 1;
42923 break;
42927 else if (insn_entry[i].is_store && insn_entry[i].is_swap)
42929 rtx insn = insn_entry[i].insn;
42930 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
42931 df_ref use;
42933 FOR_EACH_INSN_INFO_USE (use, insn_info)
42935 struct df_link *link = DF_REF_CHAIN (use);
42937 if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
42939 root->web_not_optimizable = 1;
42940 break;
42946 if (dump_file)
42948 fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
42949 dump_swap_insn_table (insn_entry);
42952 /* For each load and store in an optimizable web (which implies
42953 the loads and stores are permuting), find the associated
42954 register swaps and mark them for removal. Due to various
42955 optimizations we may mark the same swap more than once. Also
42956 perform special handling for swappable insns that require it. */
42957 for (i = 0; i < e; ++i)
42958 if ((insn_entry[i].is_load || insn_entry[i].is_store)
42959 && insn_entry[i].is_swap)
42961 swap_web_entry* root_entry
42962 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
42963 if (!root_entry->web_not_optimizable)
42964 mark_swaps_for_removal (insn_entry, i);
42966 else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
42968 swap_web_entry* root_entry
42969 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
42970 if (!root_entry->web_not_optimizable)
42971 handle_special_swappables (insn_entry, i);
42974 /* Now delete the swaps marked for removal. */
42975 for (i = 0; i < e; ++i)
42976 if (insn_entry[i].will_delete)
42977 replace_swap_with_copy (insn_entry, i);
42979 /* Clean up. */
42980 free (insn_entry);
42981 return 0;
42984 const pass_data pass_data_analyze_swaps =
42986 RTL_PASS, /* type */
42987 "swaps", /* name */
42988 OPTGROUP_NONE, /* optinfo_flags */
42989 TV_NONE, /* tv_id */
42990 0, /* properties_required */
42991 0, /* properties_provided */
42992 0, /* properties_destroyed */
42993 0, /* todo_flags_start */
42994 TODO_df_finish, /* todo_flags_finish */
42997 class pass_analyze_swaps : public rtl_opt_pass
42999 public:
43000 pass_analyze_swaps(gcc::context *ctxt)
43001 : rtl_opt_pass(pass_data_analyze_swaps, ctxt)
43004 /* opt_pass methods: */
43005 virtual bool gate (function *)
43007 return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
43008 && !TARGET_P9_VECTOR && rs6000_optimize_swaps);
43011 virtual unsigned int execute (function *fun)
43013 return rs6000_analyze_swaps (fun);
43016 opt_pass *clone ()
43018 return new pass_analyze_swaps (m_ctxt);
43021 }; // class pass_analyze_swaps
43023 rtl_opt_pass *
43024 make_pass_analyze_swaps (gcc::context *ctxt)
43026 return new pass_analyze_swaps (ctxt);
43029 #ifdef RS6000_GLIBC_ATOMIC_FENV
43030 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
43031 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
43032 #endif
43034 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
43036 static void
43037 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
43039 if (!TARGET_HARD_FLOAT || !TARGET_FPRS)
43041 #ifdef RS6000_GLIBC_ATOMIC_FENV
43042 if (atomic_hold_decl == NULL_TREE)
43044 atomic_hold_decl
43045 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
43046 get_identifier ("__atomic_feholdexcept"),
43047 build_function_type_list (void_type_node,
43048 double_ptr_type_node,
43049 NULL_TREE));
43050 TREE_PUBLIC (atomic_hold_decl) = 1;
43051 DECL_EXTERNAL (atomic_hold_decl) = 1;
43054 if (atomic_clear_decl == NULL_TREE)
43056 atomic_clear_decl
43057 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
43058 get_identifier ("__atomic_feclearexcept"),
43059 build_function_type_list (void_type_node,
43060 NULL_TREE));
43061 TREE_PUBLIC (atomic_clear_decl) = 1;
43062 DECL_EXTERNAL (atomic_clear_decl) = 1;
43065 tree const_double = build_qualified_type (double_type_node,
43066 TYPE_QUAL_CONST);
43067 tree const_double_ptr = build_pointer_type (const_double);
43068 if (atomic_update_decl == NULL_TREE)
43070 atomic_update_decl
43071 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
43072 get_identifier ("__atomic_feupdateenv"),
43073 build_function_type_list (void_type_node,
43074 const_double_ptr,
43075 NULL_TREE));
43076 TREE_PUBLIC (atomic_update_decl) = 1;
43077 DECL_EXTERNAL (atomic_update_decl) = 1;
43080 tree fenv_var = create_tmp_var_raw (double_type_node);
43081 TREE_ADDRESSABLE (fenv_var) = 1;
43082 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
43084 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
43085 *clear = build_call_expr (atomic_clear_decl, 0);
43086 *update = build_call_expr (atomic_update_decl, 1,
43087 fold_convert (const_double_ptr, fenv_addr));
43088 #endif
43089 return;
43092 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
43093 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
43094 tree call_mffs = build_call_expr (mffs, 0);
43096 /* Generates the equivalent of feholdexcept (&fenv_var)
43098 *fenv_var = __builtin_mffs ();
43099 double fenv_hold;
43100 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
43101 __builtin_mtfsf (0xff, fenv_hold); */
43103 /* Mask to clear everything except for the rounding modes and non-IEEE
43104 arithmetic flag. */
43105 const unsigned HOST_WIDE_INT hold_exception_mask =
43106 HOST_WIDE_INT_C (0xffffffff00000007);
43108 tree fenv_var = create_tmp_var_raw (double_type_node);
43110 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
43112 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
43113 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
43114 build_int_cst (uint64_type_node,
43115 hold_exception_mask));
43117 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
43118 fenv_llu_and);
43120 tree hold_mtfsf = build_call_expr (mtfsf, 2,
43121 build_int_cst (unsigned_type_node, 0xff),
43122 fenv_hold_mtfsf);
43124 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
43126 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
43128 double fenv_clear = __builtin_mffs ();
43129 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
43130 __builtin_mtfsf (0xff, fenv_clear); */
43132 /* Mask to clear everything except for the rounding modes and non-IEEE
43133 arithmetic flag. */
43134 const unsigned HOST_WIDE_INT clear_exception_mask =
43135 HOST_WIDE_INT_C (0xffffffff00000000);
43137 tree fenv_clear = create_tmp_var_raw (double_type_node);
43139 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
43141 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
43142 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
43143 fenv_clean_llu,
43144 build_int_cst (uint64_type_node,
43145 clear_exception_mask));
43147 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
43148 fenv_clear_llu_and);
43150 tree clear_mtfsf = build_call_expr (mtfsf, 2,
43151 build_int_cst (unsigned_type_node, 0xff),
43152 fenv_clear_mtfsf);
43154 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
43156 /* Generates the equivalent of feupdateenv (&fenv_var)
43158 double old_fenv = __builtin_mffs ();
43159 double fenv_update;
43160 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
43161 (*(uint64_t*)fenv_var 0x1ff80fff);
43162 __builtin_mtfsf (0xff, fenv_update); */
43164 const unsigned HOST_WIDE_INT update_exception_mask =
43165 HOST_WIDE_INT_C (0xffffffff1fffff00);
43166 const unsigned HOST_WIDE_INT new_exception_mask =
43167 HOST_WIDE_INT_C (0x1ff80fff);
43169 tree old_fenv = create_tmp_var_raw (double_type_node);
43170 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
43172 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
43173 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
43174 build_int_cst (uint64_type_node,
43175 update_exception_mask));
43177 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
43178 build_int_cst (uint64_type_node,
43179 new_exception_mask));
43181 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
43182 old_llu_and, new_llu_and);
43184 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
43185 new_llu_mask);
43187 tree update_mtfsf = build_call_expr (mtfsf, 2,
43188 build_int_cst (unsigned_type_node, 0xff),
43189 fenv_update_mtfsf);
43191 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
43194 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
43196 static bool
43197 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
43198 optimization_type opt_type)
43200 switch (op)
43202 case rsqrt_optab:
43203 return (opt_type == OPTIMIZE_FOR_SPEED
43204 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
43206 default:
43207 return true;
43211 struct gcc_target targetm = TARGET_INITIALIZER;
43213 #include "gt-rs6000.h"