1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
33 #include "stringpool.h"
40 #include "diagnostic-core.h"
41 #include "insn-attr.h"
44 #include "fold-const.h"
45 #include "stor-layout.h"
47 #include "print-tree.h"
53 #include "common/common-target.h"
54 #include "langhooks.h"
56 #include "sched-int.h"
58 #include "gimple-iterator.h"
59 #include "gimple-ssa.h"
60 #include "gimple-walk.h"
63 #include "tm-constrs.h"
64 #include "tree-vectorizer.h"
65 #include "target-globals.h"
68 #include "tree-pass.h"
71 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
74 #include "gstab.h" /* for N_SLINE */
76 #include "case-cfn-macros.h"
79 /* This file should be included last. */
80 #include "target-def.h"
82 #ifndef TARGET_NO_PROTOTYPE
83 #define TARGET_NO_PROTOTYPE 0
86 #define min(A,B) ((A) < (B) ? (A) : (B))
87 #define max(A,B) ((A) > (B) ? (A) : (B))
89 /* Structure used to define the rs6000 stack */
90 typedef struct rs6000_stack
{
91 int reload_completed
; /* stack info won't change from here on */
92 int first_gp_reg_save
; /* first callee saved GP register used */
93 int first_fp_reg_save
; /* first callee saved FP register used */
94 int first_altivec_reg_save
; /* first callee saved AltiVec register used */
95 int lr_save_p
; /* true if the link reg needs to be saved */
96 int cr_save_p
; /* true if the CR reg needs to be saved */
97 unsigned int vrsave_mask
; /* mask of vec registers to save */
98 int push_p
; /* true if we need to allocate stack space */
99 int calls_p
; /* true if the function makes any calls */
100 int world_save_p
; /* true if we're saving *everything*:
101 r13-r31, cr, f14-f31, vrsave, v20-v31 */
102 enum rs6000_abi abi
; /* which ABI to use */
103 int gp_save_offset
; /* offset to save GP regs from initial SP */
104 int fp_save_offset
; /* offset to save FP regs from initial SP */
105 int altivec_save_offset
; /* offset to save AltiVec regs from initial SP */
106 int lr_save_offset
; /* offset to save LR from initial SP */
107 int cr_save_offset
; /* offset to save CR from initial SP */
108 int vrsave_save_offset
; /* offset to save VRSAVE from initial SP */
109 int spe_gp_save_offset
; /* offset to save spe 64-bit gprs */
110 int varargs_save_offset
; /* offset to save the varargs registers */
111 int ehrd_offset
; /* offset to EH return data */
112 int ehcr_offset
; /* offset to EH CR field data */
113 int reg_size
; /* register size (4 or 8) */
114 HOST_WIDE_INT vars_size
; /* variable save area size */
115 int parm_size
; /* outgoing parameter size */
116 int save_size
; /* save area size */
117 int fixed_size
; /* fixed size of stack frame */
118 int gp_size
; /* size of saved GP registers */
119 int fp_size
; /* size of saved FP registers */
120 int altivec_size
; /* size of saved AltiVec registers */
121 int cr_size
; /* size to hold CR if not in fixed area */
122 int vrsave_size
; /* size to hold VRSAVE */
123 int altivec_padding_size
; /* size of altivec alignment padding */
124 int spe_gp_size
; /* size of 64-bit GPR save size for SPE */
125 int spe_padding_size
;
126 HOST_WIDE_INT total_size
; /* total bytes allocated for stack */
127 int spe_64bit_regs_used
;
131 /* A C structure for machine-specific, per-function data.
132 This is added to the cfun structure. */
133 typedef struct GTY(()) machine_function
135 /* Whether the instruction chain has been scanned already. */
136 int spe_insn_chain_scanned_p
;
137 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
138 int ra_needs_full_frame
;
139 /* Flags if __builtin_return_address (0) was used. */
141 /* Cache lr_save_p after expansion of builtin_eh_return. */
143 /* Whether we need to save the TOC to the reserved stack location in the
144 function prologue. */
145 bool save_toc_in_prologue
;
146 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
147 varargs save area. */
148 HOST_WIDE_INT varargs_save_offset
;
149 /* Temporary stack slot to use for SDmode copies. This slot is
150 64-bits wide and is allocated early enough so that the offset
151 does not overflow the 16-bit load/store offset field. */
152 rtx sdmode_stack_slot
;
153 /* Alternative internal arg pointer for -fsplit-stack. */
154 rtx split_stack_arg_pointer
;
155 bool split_stack_argp_used
;
156 /* Flag if r2 setup is needed with ELFv2 ABI. */
157 bool r2_setup_needed
;
158 /* The components already handled by separate shrink-wrapping, which should
159 not be considered by the prologue and epilogue. */
160 bool gpr_is_wrapped_separately
[32];
161 bool lr_is_wrapped_separately
;
164 /* Support targetm.vectorize.builtin_mask_for_load. */
165 static GTY(()) tree altivec_builtin_mask_for_load
;
167 /* Set to nonzero once AIX common-mode calls have been defined. */
168 static GTY(()) int common_mode_defined
;
170 /* Label number of label created for -mrelocatable, to call to so we can
171 get the address of the GOT section */
172 static int rs6000_pic_labelno
;
175 /* Counter for labels which are to be placed in .fixup. */
176 int fixuplabelno
= 0;
179 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
182 /* Specify the machine mode that pointers have. After generation of rtl, the
183 compiler makes no further distinction between pointers and any other objects
184 of this machine mode. The type is unsigned since not all things that
185 include rs6000.h also include machmode.h. */
186 unsigned rs6000_pmode
;
188 /* Width in bits of a pointer. */
189 unsigned rs6000_pointer_size
;
191 #ifdef HAVE_AS_GNU_ATTRIBUTE
192 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
193 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
195 /* Flag whether floating point values have been passed/returned.
196 Note that this doesn't say whether fprs are used, since the
197 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
198 should be set for soft-float values passed in gprs and ieee128
199 values passed in vsx registers. */
200 static bool rs6000_passes_float
;
201 static bool rs6000_passes_long_double
;
202 /* Flag whether vector values have been passed/returned. */
203 static bool rs6000_passes_vector
;
204 /* Flag whether small (<= 8 byte) structures have been returned. */
205 static bool rs6000_returns_struct
;
208 /* Value is TRUE if register/mode pair is acceptable. */
209 bool rs6000_hard_regno_mode_ok_p
[NUM_MACHINE_MODES
][FIRST_PSEUDO_REGISTER
];
211 /* Maximum number of registers needed for a given register class and mode. */
212 unsigned char rs6000_class_max_nregs
[NUM_MACHINE_MODES
][LIM_REG_CLASSES
];
214 /* How many registers are needed for a given register and mode. */
215 unsigned char rs6000_hard_regno_nregs
[NUM_MACHINE_MODES
][FIRST_PSEUDO_REGISTER
];
217 /* Map register number to register class. */
218 enum reg_class rs6000_regno_regclass
[FIRST_PSEUDO_REGISTER
];
220 static int dbg_cost_ctrl
;
222 /* Built in types. */
223 tree rs6000_builtin_types
[RS6000_BTI_MAX
];
224 tree rs6000_builtin_decls
[RS6000_BUILTIN_COUNT
];
226 /* Flag to say the TOC is initialized */
227 int toc_initialized
, need_toc_init
;
228 char toc_label_name
[10];
230 /* Cached value of rs6000_variable_issue. This is cached in
231 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
232 static short cached_can_issue_more
;
234 static GTY(()) section
*read_only_data_section
;
235 static GTY(()) section
*private_data_section
;
236 static GTY(()) section
*tls_data_section
;
237 static GTY(()) section
*tls_private_data_section
;
238 static GTY(()) section
*read_only_private_data_section
;
239 static GTY(()) section
*sdata2_section
;
240 static GTY(()) section
*toc_section
;
242 struct builtin_description
244 const HOST_WIDE_INT mask
;
245 const enum insn_code icode
;
246 const char *const name
;
247 const enum rs6000_builtins code
;
250 /* Describe the vector unit used for modes. */
251 enum rs6000_vector rs6000_vector_unit
[NUM_MACHINE_MODES
];
252 enum rs6000_vector rs6000_vector_mem
[NUM_MACHINE_MODES
];
254 /* Register classes for various constraints that are based on the target
256 enum reg_class rs6000_constraints
[RS6000_CONSTRAINT_MAX
];
258 /* Describe the alignment of a vector. */
259 int rs6000_vector_align
[NUM_MACHINE_MODES
];
261 /* Map selected modes to types for builtins. */
262 static GTY(()) tree builtin_mode_to_type
[MAX_MACHINE_MODE
][2];
264 /* What modes to automatically generate reciprocal divide estimate (fre) and
265 reciprocal sqrt (frsqrte) for. */
266 unsigned char rs6000_recip_bits
[MAX_MACHINE_MODE
];
268 /* Masks to determine which reciprocal esitmate instructions to generate
270 enum rs6000_recip_mask
{
271 RECIP_SF_DIV
= 0x001, /* Use divide estimate */
272 RECIP_DF_DIV
= 0x002,
273 RECIP_V4SF_DIV
= 0x004,
274 RECIP_V2DF_DIV
= 0x008,
276 RECIP_SF_RSQRT
= 0x010, /* Use reciprocal sqrt estimate. */
277 RECIP_DF_RSQRT
= 0x020,
278 RECIP_V4SF_RSQRT
= 0x040,
279 RECIP_V2DF_RSQRT
= 0x080,
281 /* Various combination of flags for -mrecip=xxx. */
283 RECIP_ALL
= (RECIP_SF_DIV
| RECIP_DF_DIV
| RECIP_V4SF_DIV
284 | RECIP_V2DF_DIV
| RECIP_SF_RSQRT
| RECIP_DF_RSQRT
285 | RECIP_V4SF_RSQRT
| RECIP_V2DF_RSQRT
),
287 RECIP_HIGH_PRECISION
= RECIP_ALL
,
289 /* On low precision machines like the power5, don't enable double precision
290 reciprocal square root estimate, since it isn't accurate enough. */
291 RECIP_LOW_PRECISION
= (RECIP_ALL
& ~(RECIP_DF_RSQRT
| RECIP_V2DF_RSQRT
))
294 /* -mrecip options. */
297 const char *string
; /* option name */
298 unsigned int mask
; /* mask bits to set */
299 } recip_options
[] = {
300 { "all", RECIP_ALL
},
301 { "none", RECIP_NONE
},
302 { "div", (RECIP_SF_DIV
| RECIP_DF_DIV
| RECIP_V4SF_DIV
304 { "divf", (RECIP_SF_DIV
| RECIP_V4SF_DIV
) },
305 { "divd", (RECIP_DF_DIV
| RECIP_V2DF_DIV
) },
306 { "rsqrt", (RECIP_SF_RSQRT
| RECIP_DF_RSQRT
| RECIP_V4SF_RSQRT
307 | RECIP_V2DF_RSQRT
) },
308 { "rsqrtf", (RECIP_SF_RSQRT
| RECIP_V4SF_RSQRT
) },
309 { "rsqrtd", (RECIP_DF_RSQRT
| RECIP_V2DF_RSQRT
) },
312 /* Used by __builtin_cpu_is(), mapping from PLATFORM names to values. */
318 { "power9", PPC_PLATFORM_POWER9
},
319 { "power8", PPC_PLATFORM_POWER8
},
320 { "power7", PPC_PLATFORM_POWER7
},
321 { "power6x", PPC_PLATFORM_POWER6X
},
322 { "power6", PPC_PLATFORM_POWER6
},
323 { "power5+", PPC_PLATFORM_POWER5_PLUS
},
324 { "power5", PPC_PLATFORM_POWER5
},
325 { "ppc970", PPC_PLATFORM_PPC970
},
326 { "power4", PPC_PLATFORM_POWER4
},
327 { "ppca2", PPC_PLATFORM_PPCA2
},
328 { "ppc476", PPC_PLATFORM_PPC476
},
329 { "ppc464", PPC_PLATFORM_PPC464
},
330 { "ppc440", PPC_PLATFORM_PPC440
},
331 { "ppc405", PPC_PLATFORM_PPC405
},
332 { "ppc-cell-be", PPC_PLATFORM_CELL_BE
}
335 /* Used by __builtin_cpu_supports(), mapping from HWCAP names to masks. */
341 } cpu_supports_info
[] = {
342 /* AT_HWCAP masks. */
343 { "4xxmac", PPC_FEATURE_HAS_4xxMAC
, 0 },
344 { "altivec", PPC_FEATURE_HAS_ALTIVEC
, 0 },
345 { "arch_2_05", PPC_FEATURE_ARCH_2_05
, 0 },
346 { "arch_2_06", PPC_FEATURE_ARCH_2_06
, 0 },
347 { "archpmu", PPC_FEATURE_PERFMON_COMPAT
, 0 },
348 { "booke", PPC_FEATURE_BOOKE
, 0 },
349 { "cellbe", PPC_FEATURE_CELL_BE
, 0 },
350 { "dfp", PPC_FEATURE_HAS_DFP
, 0 },
351 { "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE
, 0 },
352 { "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE
, 0 },
353 { "fpu", PPC_FEATURE_HAS_FPU
, 0 },
354 { "ic_snoop", PPC_FEATURE_ICACHE_SNOOP
, 0 },
355 { "mmu", PPC_FEATURE_HAS_MMU
, 0 },
356 { "notb", PPC_FEATURE_NO_TB
, 0 },
357 { "pa6t", PPC_FEATURE_PA6T
, 0 },
358 { "power4", PPC_FEATURE_POWER4
, 0 },
359 { "power5", PPC_FEATURE_POWER5
, 0 },
360 { "power5+", PPC_FEATURE_POWER5_PLUS
, 0 },
361 { "power6x", PPC_FEATURE_POWER6_EXT
, 0 },
362 { "ppc32", PPC_FEATURE_32
, 0 },
363 { "ppc601", PPC_FEATURE_601_INSTR
, 0 },
364 { "ppc64", PPC_FEATURE_64
, 0 },
365 { "ppcle", PPC_FEATURE_PPC_LE
, 0 },
366 { "smt", PPC_FEATURE_SMT
, 0 },
367 { "spe", PPC_FEATURE_HAS_SPE
, 0 },
368 { "true_le", PPC_FEATURE_TRUE_LE
, 0 },
369 { "ucache", PPC_FEATURE_UNIFIED_CACHE
, 0 },
370 { "vsx", PPC_FEATURE_HAS_VSX
, 0 },
372 /* AT_HWCAP2 masks. */
373 { "arch_2_07", PPC_FEATURE2_ARCH_2_07
, 1 },
374 { "dscr", PPC_FEATURE2_HAS_DSCR
, 1 },
375 { "ebb", PPC_FEATURE2_HAS_EBB
, 1 },
376 { "htm", PPC_FEATURE2_HAS_HTM
, 1 },
377 { "htm-nosc", PPC_FEATURE2_HTM_NOSC
, 1 },
378 { "isel", PPC_FEATURE2_HAS_ISEL
, 1 },
379 { "tar", PPC_FEATURE2_HAS_TAR
, 1 },
380 { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO
, 1 },
381 { "arch_3_00", PPC_FEATURE2_ARCH_3_00
, 1 },
382 { "ieee128", PPC_FEATURE2_HAS_IEEE128
, 1 }
385 /* Newer LIBCs explicitly export this symbol to declare that they provide
386 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
387 reference to this symbol whenever we expand a CPU builtin, so that
388 we never link against an old LIBC. */
389 const char *tcb_verification_symbol
= "__parse_hwcap_and_convert_at_platform";
391 /* True if we have expanded a CPU builtin. */
394 /* Pointer to function (in rs6000-c.c) that can define or undefine target
395 macros that have changed. Languages that don't support the preprocessor
396 don't link in rs6000-c.c, so we can't call it directly. */
397 void (*rs6000_target_modify_macros_ptr
) (bool, HOST_WIDE_INT
, HOST_WIDE_INT
);
399 /* Simplfy register classes into simpler classifications. We assume
400 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
401 check for standard register classes (gpr/floating/altivec/vsx) and
402 floating/vector classes (float/altivec/vsx). */
404 enum rs6000_reg_type
{
417 /* Map register class to register type. */
418 static enum rs6000_reg_type reg_class_to_reg_type
[N_REG_CLASSES
];
420 /* First/last register type for the 'normal' register types (i.e. general
421 purpose, floating point, altivec, and VSX registers). */
422 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
424 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
427 /* Register classes we care about in secondary reload or go if legitimate
428 address. We only need to worry about GPR, FPR, and Altivec registers here,
429 along an ANY field that is the OR of the 3 register classes. */
431 enum rs6000_reload_reg_type
{
432 RELOAD_REG_GPR
, /* General purpose registers. */
433 RELOAD_REG_FPR
, /* Traditional floating point regs. */
434 RELOAD_REG_VMX
, /* Altivec (VMX) registers. */
435 RELOAD_REG_ANY
, /* OR of GPR, FPR, Altivec masks. */
439 /* For setting up register classes, loop through the 3 register classes mapping
440 into real registers, and skip the ANY class, which is just an OR of the
442 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
443 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
445 /* Map reload register type to a register in the register class. */
446 struct reload_reg_map_type
{
447 const char *name
; /* Register class name. */
448 int reg
; /* Register in the register class. */
451 static const struct reload_reg_map_type reload_reg_map
[N_RELOAD_REG
] = {
452 { "Gpr", FIRST_GPR_REGNO
}, /* RELOAD_REG_GPR. */
453 { "Fpr", FIRST_FPR_REGNO
}, /* RELOAD_REG_FPR. */
454 { "VMX", FIRST_ALTIVEC_REGNO
}, /* RELOAD_REG_VMX. */
455 { "Any", -1 }, /* RELOAD_REG_ANY. */
458 /* Mask bits for each register class, indexed per mode. Historically the
459 compiler has been more restrictive which types can do PRE_MODIFY instead of
460 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
461 typedef unsigned char addr_mask_type
;
463 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
464 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
465 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
466 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
467 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
468 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
469 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
470 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
472 /* Register type masks based on the type, of valid addressing modes. */
473 struct rs6000_reg_addr
{
474 enum insn_code reload_load
; /* INSN to reload for loading. */
475 enum insn_code reload_store
; /* INSN to reload for storing. */
476 enum insn_code reload_fpr_gpr
; /* INSN to move from FPR to GPR. */
477 enum insn_code reload_gpr_vsx
; /* INSN to move from GPR to VSX. */
478 enum insn_code reload_vsx_gpr
; /* INSN to move from VSX to GPR. */
479 enum insn_code fusion_gpr_ld
; /* INSN for fusing gpr ADDIS/loads. */
480 /* INSNs for fusing addi with loads
481 or stores for each reg. class. */
482 enum insn_code fusion_addi_ld
[(int)N_RELOAD_REG
];
483 enum insn_code fusion_addi_st
[(int)N_RELOAD_REG
];
484 /* INSNs for fusing addis with loads
485 or stores for each reg. class. */
486 enum insn_code fusion_addis_ld
[(int)N_RELOAD_REG
];
487 enum insn_code fusion_addis_st
[(int)N_RELOAD_REG
];
488 addr_mask_type addr_mask
[(int)N_RELOAD_REG
]; /* Valid address masks. */
489 bool scalar_in_vmx_p
; /* Scalar value can go in VMX. */
490 bool fused_toc
; /* Mode supports TOC fusion. */
493 static struct rs6000_reg_addr reg_addr
[NUM_MACHINE_MODES
];
495 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
497 mode_supports_pre_incdec_p (machine_mode mode
)
499 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_PRE_INCDEC
)
503 /* Helper function to say whether a mode supports PRE_MODIFY. */
505 mode_supports_pre_modify_p (machine_mode mode
)
507 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_PRE_MODIFY
)
511 /* Return true if we have D-form addressing in altivec registers. */
513 mode_supports_vmx_dform (machine_mode mode
)
515 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_OFFSET
) != 0);
518 /* Return true if we have D-form addressing in VSX registers. This addressing
519 is more limited than normal d-form addressing in that the offset must be
520 aligned on a 16-byte boundary. */
522 mode_supports_vsx_dform_quad (machine_mode mode
)
524 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_QUAD_OFFSET
)
529 /* Target cpu costs. */
531 struct processor_costs
{
532 const int mulsi
; /* cost of SImode multiplication. */
533 const int mulsi_const
; /* cost of SImode multiplication by constant. */
534 const int mulsi_const9
; /* cost of SImode mult by short constant. */
535 const int muldi
; /* cost of DImode multiplication. */
536 const int divsi
; /* cost of SImode division. */
537 const int divdi
; /* cost of DImode division. */
538 const int fp
; /* cost of simple SFmode and DFmode insns. */
539 const int dmul
; /* cost of DFmode multiplication (and fmadd). */
540 const int sdiv
; /* cost of SFmode division (fdivs). */
541 const int ddiv
; /* cost of DFmode division (fdiv). */
542 const int cache_line_size
; /* cache line size in bytes. */
543 const int l1_cache_size
; /* size of l1 cache, in kilobytes. */
544 const int l2_cache_size
; /* size of l2 cache, in kilobytes. */
545 const int simultaneous_prefetches
; /* number of parallel prefetch
547 const int sfdf_convert
; /* cost of SF->DF conversion. */
550 const struct processor_costs
*rs6000_cost
;
552 /* Processor costs (relative to an add) */
554 /* Instruction size costs on 32bit processors. */
556 struct processor_costs size32_cost
= {
557 COSTS_N_INSNS (1), /* mulsi */
558 COSTS_N_INSNS (1), /* mulsi_const */
559 COSTS_N_INSNS (1), /* mulsi_const9 */
560 COSTS_N_INSNS (1), /* muldi */
561 COSTS_N_INSNS (1), /* divsi */
562 COSTS_N_INSNS (1), /* divdi */
563 COSTS_N_INSNS (1), /* fp */
564 COSTS_N_INSNS (1), /* dmul */
565 COSTS_N_INSNS (1), /* sdiv */
566 COSTS_N_INSNS (1), /* ddiv */
567 32, /* cache line size */
571 0, /* SF->DF convert */
574 /* Instruction size costs on 64bit processors. */
576 struct processor_costs size64_cost
= {
577 COSTS_N_INSNS (1), /* mulsi */
578 COSTS_N_INSNS (1), /* mulsi_const */
579 COSTS_N_INSNS (1), /* mulsi_const9 */
580 COSTS_N_INSNS (1), /* muldi */
581 COSTS_N_INSNS (1), /* divsi */
582 COSTS_N_INSNS (1), /* divdi */
583 COSTS_N_INSNS (1), /* fp */
584 COSTS_N_INSNS (1), /* dmul */
585 COSTS_N_INSNS (1), /* sdiv */
586 COSTS_N_INSNS (1), /* ddiv */
587 128, /* cache line size */
591 0, /* SF->DF convert */
594 /* Instruction costs on RS64A processors. */
596 struct processor_costs rs64a_cost
= {
597 COSTS_N_INSNS (20), /* mulsi */
598 COSTS_N_INSNS (12), /* mulsi_const */
599 COSTS_N_INSNS (8), /* mulsi_const9 */
600 COSTS_N_INSNS (34), /* muldi */
601 COSTS_N_INSNS (65), /* divsi */
602 COSTS_N_INSNS (67), /* divdi */
603 COSTS_N_INSNS (4), /* fp */
604 COSTS_N_INSNS (4), /* dmul */
605 COSTS_N_INSNS (31), /* sdiv */
606 COSTS_N_INSNS (31), /* ddiv */
607 128, /* cache line size */
611 0, /* SF->DF convert */
614 /* Instruction costs on MPCCORE processors. */
616 struct processor_costs mpccore_cost
= {
617 COSTS_N_INSNS (2), /* mulsi */
618 COSTS_N_INSNS (2), /* mulsi_const */
619 COSTS_N_INSNS (2), /* mulsi_const9 */
620 COSTS_N_INSNS (2), /* muldi */
621 COSTS_N_INSNS (6), /* divsi */
622 COSTS_N_INSNS (6), /* divdi */
623 COSTS_N_INSNS (4), /* fp */
624 COSTS_N_INSNS (5), /* dmul */
625 COSTS_N_INSNS (10), /* sdiv */
626 COSTS_N_INSNS (17), /* ddiv */
627 32, /* cache line size */
631 0, /* SF->DF convert */
634 /* Instruction costs on PPC403 processors. */
636 struct processor_costs ppc403_cost
= {
637 COSTS_N_INSNS (4), /* mulsi */
638 COSTS_N_INSNS (4), /* mulsi_const */
639 COSTS_N_INSNS (4), /* mulsi_const9 */
640 COSTS_N_INSNS (4), /* muldi */
641 COSTS_N_INSNS (33), /* divsi */
642 COSTS_N_INSNS (33), /* divdi */
643 COSTS_N_INSNS (11), /* fp */
644 COSTS_N_INSNS (11), /* dmul */
645 COSTS_N_INSNS (11), /* sdiv */
646 COSTS_N_INSNS (11), /* ddiv */
647 32, /* cache line size */
651 0, /* SF->DF convert */
654 /* Instruction costs on PPC405 processors. */
656 struct processor_costs ppc405_cost
= {
657 COSTS_N_INSNS (5), /* mulsi */
658 COSTS_N_INSNS (4), /* mulsi_const */
659 COSTS_N_INSNS (3), /* mulsi_const9 */
660 COSTS_N_INSNS (5), /* muldi */
661 COSTS_N_INSNS (35), /* divsi */
662 COSTS_N_INSNS (35), /* divdi */
663 COSTS_N_INSNS (11), /* fp */
664 COSTS_N_INSNS (11), /* dmul */
665 COSTS_N_INSNS (11), /* sdiv */
666 COSTS_N_INSNS (11), /* ddiv */
667 32, /* cache line size */
671 0, /* SF->DF convert */
674 /* Instruction costs on PPC440 processors. */
676 struct processor_costs ppc440_cost
= {
677 COSTS_N_INSNS (3), /* mulsi */
678 COSTS_N_INSNS (2), /* mulsi_const */
679 COSTS_N_INSNS (2), /* mulsi_const9 */
680 COSTS_N_INSNS (3), /* muldi */
681 COSTS_N_INSNS (34), /* divsi */
682 COSTS_N_INSNS (34), /* divdi */
683 COSTS_N_INSNS (5), /* fp */
684 COSTS_N_INSNS (5), /* dmul */
685 COSTS_N_INSNS (19), /* sdiv */
686 COSTS_N_INSNS (33), /* ddiv */
687 32, /* cache line size */
691 0, /* SF->DF convert */
694 /* Instruction costs on PPC476 processors. */
696 struct processor_costs ppc476_cost
= {
697 COSTS_N_INSNS (4), /* mulsi */
698 COSTS_N_INSNS (4), /* mulsi_const */
699 COSTS_N_INSNS (4), /* mulsi_const9 */
700 COSTS_N_INSNS (4), /* muldi */
701 COSTS_N_INSNS (11), /* divsi */
702 COSTS_N_INSNS (11), /* divdi */
703 COSTS_N_INSNS (6), /* fp */
704 COSTS_N_INSNS (6), /* dmul */
705 COSTS_N_INSNS (19), /* sdiv */
706 COSTS_N_INSNS (33), /* ddiv */
707 32, /* l1 cache line size */
711 0, /* SF->DF convert */
714 /* Instruction costs on PPC601 processors. */
716 struct processor_costs ppc601_cost
= {
717 COSTS_N_INSNS (5), /* mulsi */
718 COSTS_N_INSNS (5), /* mulsi_const */
719 COSTS_N_INSNS (5), /* mulsi_const9 */
720 COSTS_N_INSNS (5), /* muldi */
721 COSTS_N_INSNS (36), /* divsi */
722 COSTS_N_INSNS (36), /* divdi */
723 COSTS_N_INSNS (4), /* fp */
724 COSTS_N_INSNS (5), /* dmul */
725 COSTS_N_INSNS (17), /* sdiv */
726 COSTS_N_INSNS (31), /* ddiv */
727 32, /* cache line size */
731 0, /* SF->DF convert */
734 /* Instruction costs on PPC603 processors. */
736 struct processor_costs ppc603_cost
= {
737 COSTS_N_INSNS (5), /* mulsi */
738 COSTS_N_INSNS (3), /* mulsi_const */
739 COSTS_N_INSNS (2), /* mulsi_const9 */
740 COSTS_N_INSNS (5), /* muldi */
741 COSTS_N_INSNS (37), /* divsi */
742 COSTS_N_INSNS (37), /* divdi */
743 COSTS_N_INSNS (3), /* fp */
744 COSTS_N_INSNS (4), /* dmul */
745 COSTS_N_INSNS (18), /* sdiv */
746 COSTS_N_INSNS (33), /* ddiv */
747 32, /* cache line size */
751 0, /* SF->DF convert */
754 /* Instruction costs on PPC604 processors. */
756 struct processor_costs ppc604_cost
= {
757 COSTS_N_INSNS (4), /* mulsi */
758 COSTS_N_INSNS (4), /* mulsi_const */
759 COSTS_N_INSNS (4), /* mulsi_const9 */
760 COSTS_N_INSNS (4), /* muldi */
761 COSTS_N_INSNS (20), /* divsi */
762 COSTS_N_INSNS (20), /* divdi */
763 COSTS_N_INSNS (3), /* fp */
764 COSTS_N_INSNS (3), /* dmul */
765 COSTS_N_INSNS (18), /* sdiv */
766 COSTS_N_INSNS (32), /* ddiv */
767 32, /* cache line size */
771 0, /* SF->DF convert */
774 /* Instruction costs on PPC604e processors. */
776 struct processor_costs ppc604e_cost
= {
777 COSTS_N_INSNS (2), /* mulsi */
778 COSTS_N_INSNS (2), /* mulsi_const */
779 COSTS_N_INSNS (2), /* mulsi_const9 */
780 COSTS_N_INSNS (2), /* muldi */
781 COSTS_N_INSNS (20), /* divsi */
782 COSTS_N_INSNS (20), /* divdi */
783 COSTS_N_INSNS (3), /* fp */
784 COSTS_N_INSNS (3), /* dmul */
785 COSTS_N_INSNS (18), /* sdiv */
786 COSTS_N_INSNS (32), /* ddiv */
787 32, /* cache line size */
791 0, /* SF->DF convert */
794 /* Instruction costs on PPC620 processors. */
796 struct processor_costs ppc620_cost
= {
797 COSTS_N_INSNS (5), /* mulsi */
798 COSTS_N_INSNS (4), /* mulsi_const */
799 COSTS_N_INSNS (3), /* mulsi_const9 */
800 COSTS_N_INSNS (7), /* muldi */
801 COSTS_N_INSNS (21), /* divsi */
802 COSTS_N_INSNS (37), /* divdi */
803 COSTS_N_INSNS (3), /* fp */
804 COSTS_N_INSNS (3), /* dmul */
805 COSTS_N_INSNS (18), /* sdiv */
806 COSTS_N_INSNS (32), /* ddiv */
807 128, /* cache line size */
811 0, /* SF->DF convert */
814 /* Instruction costs on PPC630 processors. */
816 struct processor_costs ppc630_cost
= {
817 COSTS_N_INSNS (5), /* mulsi */
818 COSTS_N_INSNS (4), /* mulsi_const */
819 COSTS_N_INSNS (3), /* mulsi_const9 */
820 COSTS_N_INSNS (7), /* muldi */
821 COSTS_N_INSNS (21), /* divsi */
822 COSTS_N_INSNS (37), /* divdi */
823 COSTS_N_INSNS (3), /* fp */
824 COSTS_N_INSNS (3), /* dmul */
825 COSTS_N_INSNS (17), /* sdiv */
826 COSTS_N_INSNS (21), /* ddiv */
827 128, /* cache line size */
831 0, /* SF->DF convert */
834 /* Instruction costs on Cell processor. */
835 /* COSTS_N_INSNS (1) ~ one add. */
837 struct processor_costs ppccell_cost
= {
838 COSTS_N_INSNS (9/2)+2, /* mulsi */
839 COSTS_N_INSNS (6/2), /* mulsi_const */
840 COSTS_N_INSNS (6/2), /* mulsi_const9 */
841 COSTS_N_INSNS (15/2)+2, /* muldi */
842 COSTS_N_INSNS (38/2), /* divsi */
843 COSTS_N_INSNS (70/2), /* divdi */
844 COSTS_N_INSNS (10/2), /* fp */
845 COSTS_N_INSNS (10/2), /* dmul */
846 COSTS_N_INSNS (74/2), /* sdiv */
847 COSTS_N_INSNS (74/2), /* ddiv */
848 128, /* cache line size */
852 0, /* SF->DF convert */
855 /* Instruction costs on PPC750 and PPC7400 processors. */
857 struct processor_costs ppc750_cost
= {
858 COSTS_N_INSNS (5), /* mulsi */
859 COSTS_N_INSNS (3), /* mulsi_const */
860 COSTS_N_INSNS (2), /* mulsi_const9 */
861 COSTS_N_INSNS (5), /* muldi */
862 COSTS_N_INSNS (17), /* divsi */
863 COSTS_N_INSNS (17), /* divdi */
864 COSTS_N_INSNS (3), /* fp */
865 COSTS_N_INSNS (3), /* dmul */
866 COSTS_N_INSNS (17), /* sdiv */
867 COSTS_N_INSNS (31), /* ddiv */
868 32, /* cache line size */
872 0, /* SF->DF convert */
875 /* Instruction costs on PPC7450 processors. */
877 struct processor_costs ppc7450_cost
= {
878 COSTS_N_INSNS (4), /* mulsi */
879 COSTS_N_INSNS (3), /* mulsi_const */
880 COSTS_N_INSNS (3), /* mulsi_const9 */
881 COSTS_N_INSNS (4), /* muldi */
882 COSTS_N_INSNS (23), /* divsi */
883 COSTS_N_INSNS (23), /* divdi */
884 COSTS_N_INSNS (5), /* fp */
885 COSTS_N_INSNS (5), /* dmul */
886 COSTS_N_INSNS (21), /* sdiv */
887 COSTS_N_INSNS (35), /* ddiv */
888 32, /* cache line size */
892 0, /* SF->DF convert */
895 /* Instruction costs on PPC8540 processors. */
897 struct processor_costs ppc8540_cost
= {
898 COSTS_N_INSNS (4), /* mulsi */
899 COSTS_N_INSNS (4), /* mulsi_const */
900 COSTS_N_INSNS (4), /* mulsi_const9 */
901 COSTS_N_INSNS (4), /* muldi */
902 COSTS_N_INSNS (19), /* divsi */
903 COSTS_N_INSNS (19), /* divdi */
904 COSTS_N_INSNS (4), /* fp */
905 COSTS_N_INSNS (4), /* dmul */
906 COSTS_N_INSNS (29), /* sdiv */
907 COSTS_N_INSNS (29), /* ddiv */
908 32, /* cache line size */
911 1, /* prefetch streams /*/
912 0, /* SF->DF convert */
915 /* Instruction costs on E300C2 and E300C3 cores. */
917 struct processor_costs ppce300c2c3_cost
= {
918 COSTS_N_INSNS (4), /* mulsi */
919 COSTS_N_INSNS (4), /* mulsi_const */
920 COSTS_N_INSNS (4), /* mulsi_const9 */
921 COSTS_N_INSNS (4), /* muldi */
922 COSTS_N_INSNS (19), /* divsi */
923 COSTS_N_INSNS (19), /* divdi */
924 COSTS_N_INSNS (3), /* fp */
925 COSTS_N_INSNS (4), /* dmul */
926 COSTS_N_INSNS (18), /* sdiv */
927 COSTS_N_INSNS (33), /* ddiv */
931 1, /* prefetch streams /*/
932 0, /* SF->DF convert */
935 /* Instruction costs on PPCE500MC processors. */
937 struct processor_costs ppce500mc_cost
= {
938 COSTS_N_INSNS (4), /* mulsi */
939 COSTS_N_INSNS (4), /* mulsi_const */
940 COSTS_N_INSNS (4), /* mulsi_const9 */
941 COSTS_N_INSNS (4), /* muldi */
942 COSTS_N_INSNS (14), /* divsi */
943 COSTS_N_INSNS (14), /* divdi */
944 COSTS_N_INSNS (8), /* fp */
945 COSTS_N_INSNS (10), /* dmul */
946 COSTS_N_INSNS (36), /* sdiv */
947 COSTS_N_INSNS (66), /* ddiv */
948 64, /* cache line size */
951 1, /* prefetch streams /*/
952 0, /* SF->DF convert */
955 /* Instruction costs on PPCE500MC64 processors. */
957 struct processor_costs ppce500mc64_cost
= {
958 COSTS_N_INSNS (4), /* mulsi */
959 COSTS_N_INSNS (4), /* mulsi_const */
960 COSTS_N_INSNS (4), /* mulsi_const9 */
961 COSTS_N_INSNS (4), /* muldi */
962 COSTS_N_INSNS (14), /* divsi */
963 COSTS_N_INSNS (14), /* divdi */
964 COSTS_N_INSNS (4), /* fp */
965 COSTS_N_INSNS (10), /* dmul */
966 COSTS_N_INSNS (36), /* sdiv */
967 COSTS_N_INSNS (66), /* ddiv */
968 64, /* cache line size */
971 1, /* prefetch streams /*/
972 0, /* SF->DF convert */
975 /* Instruction costs on PPCE5500 processors. */
977 struct processor_costs ppce5500_cost
= {
978 COSTS_N_INSNS (5), /* mulsi */
979 COSTS_N_INSNS (5), /* mulsi_const */
980 COSTS_N_INSNS (4), /* mulsi_const9 */
981 COSTS_N_INSNS (5), /* muldi */
982 COSTS_N_INSNS (14), /* divsi */
983 COSTS_N_INSNS (14), /* divdi */
984 COSTS_N_INSNS (7), /* fp */
985 COSTS_N_INSNS (10), /* dmul */
986 COSTS_N_INSNS (36), /* sdiv */
987 COSTS_N_INSNS (66), /* ddiv */
988 64, /* cache line size */
991 1, /* prefetch streams /*/
992 0, /* SF->DF convert */
995 /* Instruction costs on PPCE6500 processors. */
997 struct processor_costs ppce6500_cost
= {
998 COSTS_N_INSNS (5), /* mulsi */
999 COSTS_N_INSNS (5), /* mulsi_const */
1000 COSTS_N_INSNS (4), /* mulsi_const9 */
1001 COSTS_N_INSNS (5), /* muldi */
1002 COSTS_N_INSNS (14), /* divsi */
1003 COSTS_N_INSNS (14), /* divdi */
1004 COSTS_N_INSNS (7), /* fp */
1005 COSTS_N_INSNS (10), /* dmul */
1006 COSTS_N_INSNS (36), /* sdiv */
1007 COSTS_N_INSNS (66), /* ddiv */
1008 64, /* cache line size */
1011 1, /* prefetch streams /*/
1012 0, /* SF->DF convert */
1015 /* Instruction costs on AppliedMicro Titan processors. */
1017 struct processor_costs titan_cost
= {
1018 COSTS_N_INSNS (5), /* mulsi */
1019 COSTS_N_INSNS (5), /* mulsi_const */
1020 COSTS_N_INSNS (5), /* mulsi_const9 */
1021 COSTS_N_INSNS (5), /* muldi */
1022 COSTS_N_INSNS (18), /* divsi */
1023 COSTS_N_INSNS (18), /* divdi */
1024 COSTS_N_INSNS (10), /* fp */
1025 COSTS_N_INSNS (10), /* dmul */
1026 COSTS_N_INSNS (46), /* sdiv */
1027 COSTS_N_INSNS (72), /* ddiv */
1028 32, /* cache line size */
1031 1, /* prefetch streams /*/
1032 0, /* SF->DF convert */
1035 /* Instruction costs on POWER4 and POWER5 processors. */
1037 struct processor_costs power4_cost
= {
1038 COSTS_N_INSNS (3), /* mulsi */
1039 COSTS_N_INSNS (2), /* mulsi_const */
1040 COSTS_N_INSNS (2), /* mulsi_const9 */
1041 COSTS_N_INSNS (4), /* muldi */
1042 COSTS_N_INSNS (18), /* divsi */
1043 COSTS_N_INSNS (34), /* divdi */
1044 COSTS_N_INSNS (3), /* fp */
1045 COSTS_N_INSNS (3), /* dmul */
1046 COSTS_N_INSNS (17), /* sdiv */
1047 COSTS_N_INSNS (17), /* ddiv */
1048 128, /* cache line size */
1050 1024, /* l2 cache */
1051 8, /* prefetch streams /*/
1052 0, /* SF->DF convert */
1055 /* Instruction costs on POWER6 processors. */
1057 struct processor_costs power6_cost
= {
1058 COSTS_N_INSNS (8), /* mulsi */
1059 COSTS_N_INSNS (8), /* mulsi_const */
1060 COSTS_N_INSNS (8), /* mulsi_const9 */
1061 COSTS_N_INSNS (8), /* muldi */
1062 COSTS_N_INSNS (22), /* divsi */
1063 COSTS_N_INSNS (28), /* divdi */
1064 COSTS_N_INSNS (3), /* fp */
1065 COSTS_N_INSNS (3), /* dmul */
1066 COSTS_N_INSNS (13), /* sdiv */
1067 COSTS_N_INSNS (16), /* ddiv */
1068 128, /* cache line size */
1070 2048, /* l2 cache */
1071 16, /* prefetch streams */
1072 0, /* SF->DF convert */
1075 /* Instruction costs on POWER7 processors. */
1077 struct processor_costs power7_cost
= {
1078 COSTS_N_INSNS (2), /* mulsi */
1079 COSTS_N_INSNS (2), /* mulsi_const */
1080 COSTS_N_INSNS (2), /* mulsi_const9 */
1081 COSTS_N_INSNS (2), /* muldi */
1082 COSTS_N_INSNS (18), /* divsi */
1083 COSTS_N_INSNS (34), /* divdi */
1084 COSTS_N_INSNS (3), /* fp */
1085 COSTS_N_INSNS (3), /* dmul */
1086 COSTS_N_INSNS (13), /* sdiv */
1087 COSTS_N_INSNS (16), /* ddiv */
1088 128, /* cache line size */
1091 12, /* prefetch streams */
1092 COSTS_N_INSNS (3), /* SF->DF convert */
1095 /* Instruction costs on POWER8 processors. */
1097 struct processor_costs power8_cost
= {
1098 COSTS_N_INSNS (3), /* mulsi */
1099 COSTS_N_INSNS (3), /* mulsi_const */
1100 COSTS_N_INSNS (3), /* mulsi_const9 */
1101 COSTS_N_INSNS (3), /* muldi */
1102 COSTS_N_INSNS (19), /* divsi */
1103 COSTS_N_INSNS (35), /* divdi */
1104 COSTS_N_INSNS (3), /* fp */
1105 COSTS_N_INSNS (3), /* dmul */
1106 COSTS_N_INSNS (14), /* sdiv */
1107 COSTS_N_INSNS (17), /* ddiv */
1108 128, /* cache line size */
1111 12, /* prefetch streams */
1112 COSTS_N_INSNS (3), /* SF->DF convert */
1115 /* Instruction costs on POWER9 processors. */
1117 struct processor_costs power9_cost
= {
1118 COSTS_N_INSNS (3), /* mulsi */
1119 COSTS_N_INSNS (3), /* mulsi_const */
1120 COSTS_N_INSNS (3), /* mulsi_const9 */
1121 COSTS_N_INSNS (3), /* muldi */
1122 COSTS_N_INSNS (8), /* divsi */
1123 COSTS_N_INSNS (12), /* divdi */
1124 COSTS_N_INSNS (3), /* fp */
1125 COSTS_N_INSNS (3), /* dmul */
1126 COSTS_N_INSNS (13), /* sdiv */
1127 COSTS_N_INSNS (18), /* ddiv */
1128 128, /* cache line size */
1131 8, /* prefetch streams */
1132 COSTS_N_INSNS (3), /* SF->DF convert */
1135 /* Instruction costs on POWER A2 processors. */
1137 struct processor_costs ppca2_cost
= {
1138 COSTS_N_INSNS (16), /* mulsi */
1139 COSTS_N_INSNS (16), /* mulsi_const */
1140 COSTS_N_INSNS (16), /* mulsi_const9 */
1141 COSTS_N_INSNS (16), /* muldi */
1142 COSTS_N_INSNS (22), /* divsi */
1143 COSTS_N_INSNS (28), /* divdi */
1144 COSTS_N_INSNS (3), /* fp */
1145 COSTS_N_INSNS (3), /* dmul */
1146 COSTS_N_INSNS (59), /* sdiv */
1147 COSTS_N_INSNS (72), /* ddiv */
1150 2048, /* l2 cache */
1151 16, /* prefetch streams */
1152 0, /* SF->DF convert */
1156 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1157 #undef RS6000_BUILTIN_0
1158 #undef RS6000_BUILTIN_1
1159 #undef RS6000_BUILTIN_2
1160 #undef RS6000_BUILTIN_3
1161 #undef RS6000_BUILTIN_A
1162 #undef RS6000_BUILTIN_D
1163 #undef RS6000_BUILTIN_E
1164 #undef RS6000_BUILTIN_H
1165 #undef RS6000_BUILTIN_P
1166 #undef RS6000_BUILTIN_Q
1167 #undef RS6000_BUILTIN_S
1168 #undef RS6000_BUILTIN_X
1170 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
1171 { NAME, ICODE, MASK, ATTR },
1173 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1174 { NAME, ICODE, MASK, ATTR },
1176 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1177 { NAME, ICODE, MASK, ATTR },
1179 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1180 { NAME, ICODE, MASK, ATTR },
1182 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1183 { NAME, ICODE, MASK, ATTR },
1185 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1186 { NAME, ICODE, MASK, ATTR },
1188 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
1189 { NAME, ICODE, MASK, ATTR },
1191 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1192 { NAME, ICODE, MASK, ATTR },
1194 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1195 { NAME, ICODE, MASK, ATTR },
1197 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
1198 { NAME, ICODE, MASK, ATTR },
1200 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
1201 { NAME, ICODE, MASK, ATTR },
1203 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1204 { NAME, ICODE, MASK, ATTR },
1206 struct rs6000_builtin_info_type
{
1208 const enum insn_code icode
;
1209 const HOST_WIDE_INT mask
;
1210 const unsigned attr
;
1213 static const struct rs6000_builtin_info_type rs6000_builtin_info
[] =
1215 #include "rs6000-builtin.def"
1218 #undef RS6000_BUILTIN_0
1219 #undef RS6000_BUILTIN_1
1220 #undef RS6000_BUILTIN_2
1221 #undef RS6000_BUILTIN_3
1222 #undef RS6000_BUILTIN_A
1223 #undef RS6000_BUILTIN_D
1224 #undef RS6000_BUILTIN_E
1225 #undef RS6000_BUILTIN_H
1226 #undef RS6000_BUILTIN_P
1227 #undef RS6000_BUILTIN_Q
1228 #undef RS6000_BUILTIN_S
1229 #undef RS6000_BUILTIN_X
1231 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1232 static tree (*rs6000_veclib_handler
) (combined_fn
, tree
, tree
);
1235 static bool rs6000_debug_legitimate_address_p (machine_mode
, rtx
, bool);
1236 static bool spe_func_has_64bit_regs_p (void);
1237 static struct machine_function
* rs6000_init_machine_status (void);
1238 static int rs6000_ra_ever_killed (void);
1239 static tree
rs6000_handle_longcall_attribute (tree
*, tree
, tree
, int, bool *);
1240 static tree
rs6000_handle_altivec_attribute (tree
*, tree
, tree
, int, bool *);
1241 static tree
rs6000_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
1242 static tree
rs6000_builtin_vectorized_libmass (combined_fn
, tree
, tree
);
1243 static void rs6000_emit_set_long_const (rtx
, HOST_WIDE_INT
);
1244 static int rs6000_memory_move_cost (machine_mode
, reg_class_t
, bool);
1245 static bool rs6000_debug_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
1246 static int rs6000_debug_address_cost (rtx
, machine_mode
, addr_space_t
,
1248 static int rs6000_debug_adjust_cost (rtx_insn
*, int, rtx_insn
*, int,
1250 static bool is_microcoded_insn (rtx_insn
*);
1251 static bool is_nonpipeline_insn (rtx_insn
*);
1252 static bool is_cracked_insn (rtx_insn
*);
1253 static bool is_load_insn (rtx
, rtx
*);
1254 static bool is_store_insn (rtx
, rtx
*);
1255 static bool set_to_load_agen (rtx_insn
*,rtx_insn
*);
1256 static bool insn_terminates_group_p (rtx_insn
*, enum group_termination
);
1257 static bool insn_must_be_first_in_group (rtx_insn
*);
1258 static bool insn_must_be_last_in_group (rtx_insn
*);
1259 static void altivec_init_builtins (void);
1260 static tree
builtin_function_type (machine_mode
, machine_mode
,
1261 machine_mode
, machine_mode
,
1262 enum rs6000_builtins
, const char *name
);
1263 static void rs6000_common_init_builtins (void);
1264 static void paired_init_builtins (void);
1265 static rtx
paired_expand_predicate_builtin (enum insn_code
, tree
, rtx
);
1266 static void spe_init_builtins (void);
1267 static void htm_init_builtins (void);
1268 static rtx
spe_expand_predicate_builtin (enum insn_code
, tree
, rtx
);
1269 static rtx
spe_expand_evsel_builtin (enum insn_code
, tree
, rtx
);
1270 static int rs6000_emit_int_cmove (rtx
, rtx
, rtx
, rtx
);
1271 static rs6000_stack_t
*rs6000_stack_info (void);
1272 static void is_altivec_return_reg (rtx
, void *);
1273 int easy_vector_constant (rtx
, machine_mode
);
1274 static rtx
rs6000_debug_legitimize_address (rtx
, rtx
, machine_mode
);
1275 static rtx
rs6000_legitimize_tls_address (rtx
, enum tls_model
);
1276 static rtx
rs6000_darwin64_record_arg (CUMULATIVE_ARGS
*, const_tree
,
1279 static void macho_branch_islands (void);
1281 static rtx
rs6000_legitimize_reload_address (rtx
, machine_mode
, int, int,
1283 static rtx
rs6000_debug_legitimize_reload_address (rtx
, machine_mode
, int,
1285 static bool rs6000_mode_dependent_address (const_rtx
);
1286 static bool rs6000_debug_mode_dependent_address (const_rtx
);
1287 static enum reg_class
rs6000_secondary_reload_class (enum reg_class
,
1289 static enum reg_class
rs6000_debug_secondary_reload_class (enum reg_class
,
1292 static enum reg_class
rs6000_preferred_reload_class (rtx
, enum reg_class
);
1293 static enum reg_class
rs6000_debug_preferred_reload_class (rtx
,
1295 static bool rs6000_secondary_memory_needed (enum reg_class
, enum reg_class
,
1297 static bool rs6000_debug_secondary_memory_needed (enum reg_class
,
1300 static bool rs6000_cannot_change_mode_class (machine_mode
,
1303 static bool rs6000_debug_cannot_change_mode_class (machine_mode
,
1306 static bool rs6000_save_toc_in_prologue_p (void);
1307 static rtx
rs6000_internal_arg_pointer (void);
1309 rtx (*rs6000_legitimize_reload_address_ptr
) (rtx
, machine_mode
, int, int,
1311 = rs6000_legitimize_reload_address
;
1313 static bool (*rs6000_mode_dependent_address_ptr
) (const_rtx
)
1314 = rs6000_mode_dependent_address
;
1316 enum reg_class (*rs6000_secondary_reload_class_ptr
) (enum reg_class
,
1318 = rs6000_secondary_reload_class
;
1320 enum reg_class (*rs6000_preferred_reload_class_ptr
) (rtx
, enum reg_class
)
1321 = rs6000_preferred_reload_class
;
1323 bool (*rs6000_secondary_memory_needed_ptr
) (enum reg_class
, enum reg_class
,
1325 = rs6000_secondary_memory_needed
;
1327 bool (*rs6000_cannot_change_mode_class_ptr
) (machine_mode
,
1330 = rs6000_cannot_change_mode_class
;
1332 const int INSN_NOT_AVAILABLE
= -1;
1334 static void rs6000_print_isa_options (FILE *, int, const char *,
1336 static void rs6000_print_builtin_options (FILE *, int, const char *,
1339 static enum rs6000_reg_type
register_to_reg_type (rtx
, bool *);
1340 static bool rs6000_secondary_reload_move (enum rs6000_reg_type
,
1341 enum rs6000_reg_type
,
1343 secondary_reload_info
*,
1345 rtl_opt_pass
*make_pass_analyze_swaps (gcc::context
*);
1346 static bool rs6000_keep_leaf_when_profiled () __attribute__ ((unused
));
1347 static tree
rs6000_fold_builtin (tree
, int, tree
*, bool);
1349 /* Hash table stuff for keeping track of TOC entries. */
1351 struct GTY((for_user
)) toc_hash_struct
1353 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1354 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1356 machine_mode key_mode
;
1360 struct toc_hasher
: ggc_ptr_hash
<toc_hash_struct
>
1362 static hashval_t
hash (toc_hash_struct
*);
1363 static bool equal (toc_hash_struct
*, toc_hash_struct
*);
1366 static GTY (()) hash_table
<toc_hasher
> *toc_hash_table
;
1368 /* Hash table to keep track of the argument types for builtin functions. */
1370 struct GTY((for_user
)) builtin_hash_struct
1373 machine_mode mode
[4]; /* return value + 3 arguments. */
1374 unsigned char uns_p
[4]; /* and whether the types are unsigned. */
1377 struct builtin_hasher
: ggc_ptr_hash
<builtin_hash_struct
>
1379 static hashval_t
hash (builtin_hash_struct
*);
1380 static bool equal (builtin_hash_struct
*, builtin_hash_struct
*);
1383 static GTY (()) hash_table
<builtin_hasher
> *builtin_hash_table
;
1386 /* Default register names. */
1387 char rs6000_reg_names
[][8] =
1389 "0", "1", "2", "3", "4", "5", "6", "7",
1390 "8", "9", "10", "11", "12", "13", "14", "15",
1391 "16", "17", "18", "19", "20", "21", "22", "23",
1392 "24", "25", "26", "27", "28", "29", "30", "31",
1393 "0", "1", "2", "3", "4", "5", "6", "7",
1394 "8", "9", "10", "11", "12", "13", "14", "15",
1395 "16", "17", "18", "19", "20", "21", "22", "23",
1396 "24", "25", "26", "27", "28", "29", "30", "31",
1397 "mq", "lr", "ctr","ap",
1398 "0", "1", "2", "3", "4", "5", "6", "7",
1400 /* AltiVec registers. */
1401 "0", "1", "2", "3", "4", "5", "6", "7",
1402 "8", "9", "10", "11", "12", "13", "14", "15",
1403 "16", "17", "18", "19", "20", "21", "22", "23",
1404 "24", "25", "26", "27", "28", "29", "30", "31",
1406 /* SPE registers. */
1407 "spe_acc", "spefscr",
1408 /* Soft frame pointer. */
1410 /* HTM SPR registers. */
1411 "tfhar", "tfiar", "texasr",
1412 /* SPE High registers. */
1413 "0", "1", "2", "3", "4", "5", "6", "7",
1414 "8", "9", "10", "11", "12", "13", "14", "15",
1415 "16", "17", "18", "19", "20", "21", "22", "23",
1416 "24", "25", "26", "27", "28", "29", "30", "31"
1419 #ifdef TARGET_REGNAMES
1420 static const char alt_reg_names
[][8] =
1422 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1423 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1424 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1425 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1426 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1427 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1428 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1429 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1430 "mq", "lr", "ctr", "ap",
1431 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1433 /* AltiVec registers. */
1434 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1435 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1436 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1437 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1439 /* SPE registers. */
1440 "spe_acc", "spefscr",
1441 /* Soft frame pointer. */
1443 /* HTM SPR registers. */
1444 "tfhar", "tfiar", "texasr",
1445 /* SPE High registers. */
1446 "%rh0", "%rh1", "%rh2", "%rh3", "%rh4", "%rh5", "%rh6", "%rh7",
1447 "%rh8", "%rh9", "%rh10", "%r11", "%rh12", "%rh13", "%rh14", "%rh15",
1448 "%rh16", "%rh17", "%rh18", "%rh19", "%rh20", "%rh21", "%rh22", "%rh23",
1449 "%rh24", "%rh25", "%rh26", "%rh27", "%rh28", "%rh29", "%rh30", "%rh31"
1453 /* Table of valid machine attributes. */
1455 static const struct attribute_spec rs6000_attribute_table
[] =
1457 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
1458 affects_type_identity } */
1459 { "altivec", 1, 1, false, true, false, rs6000_handle_altivec_attribute
,
1461 { "longcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute
,
1463 { "shortcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute
,
1465 { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute
,
1467 { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute
,
1469 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1470 SUBTARGET_ATTRIBUTE_TABLE
,
1472 { NULL
, 0, 0, false, false, false, NULL
, false }
1475 #ifndef TARGET_PROFILE_KERNEL
1476 #define TARGET_PROFILE_KERNEL 0
1479 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1480 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1482 /* Initialize the GCC target structure. */
1483 #undef TARGET_ATTRIBUTE_TABLE
1484 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1485 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1486 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1487 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1488 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1490 #undef TARGET_ASM_ALIGNED_DI_OP
1491 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1493 /* Default unaligned ops are only provided for ELF. Find the ops needed
1494 for non-ELF systems. */
1495 #ifndef OBJECT_FORMAT_ELF
1497 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1499 #undef TARGET_ASM_UNALIGNED_HI_OP
1500 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1501 #undef TARGET_ASM_UNALIGNED_SI_OP
1502 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1503 #undef TARGET_ASM_UNALIGNED_DI_OP
1504 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1507 #undef TARGET_ASM_UNALIGNED_HI_OP
1508 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1509 #undef TARGET_ASM_UNALIGNED_SI_OP
1510 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1511 #undef TARGET_ASM_UNALIGNED_DI_OP
1512 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1513 #undef TARGET_ASM_ALIGNED_DI_OP
1514 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1518 /* This hook deals with fixups for relocatable code and DI-mode objects
1520 #undef TARGET_ASM_INTEGER
1521 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1523 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1524 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1525 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1528 #undef TARGET_SET_UP_BY_PROLOGUE
1529 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1531 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1532 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1533 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1534 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1535 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1536 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1537 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1538 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1539 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1540 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1541 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1542 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1544 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1545 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1547 #undef TARGET_INTERNAL_ARG_POINTER
1548 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1550 #undef TARGET_HAVE_TLS
1551 #define TARGET_HAVE_TLS HAVE_AS_TLS
1553 #undef TARGET_CANNOT_FORCE_CONST_MEM
1554 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1556 #undef TARGET_DELEGITIMIZE_ADDRESS
1557 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1559 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1560 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1562 #undef TARGET_LEGITIMATE_COMBINED_INSN
1563 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1565 #undef TARGET_ASM_FUNCTION_PROLOGUE
1566 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1567 #undef TARGET_ASM_FUNCTION_EPILOGUE
1568 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1570 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1571 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1573 #undef TARGET_LEGITIMIZE_ADDRESS
1574 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1576 #undef TARGET_SCHED_VARIABLE_ISSUE
1577 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1579 #undef TARGET_SCHED_ISSUE_RATE
1580 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1581 #undef TARGET_SCHED_ADJUST_COST
1582 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1583 #undef TARGET_SCHED_ADJUST_PRIORITY
1584 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1585 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1586 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1587 #undef TARGET_SCHED_INIT
1588 #define TARGET_SCHED_INIT rs6000_sched_init
1589 #undef TARGET_SCHED_FINISH
1590 #define TARGET_SCHED_FINISH rs6000_sched_finish
1591 #undef TARGET_SCHED_REORDER
1592 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1593 #undef TARGET_SCHED_REORDER2
1594 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1596 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1597 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1599 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1600 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1602 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1603 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1604 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1605 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1606 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1607 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1608 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1609 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1611 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1612 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1614 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1615 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1616 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1617 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1618 rs6000_builtin_support_vector_misalignment
1619 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1620 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1621 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1622 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1623 rs6000_builtin_vectorization_cost
1624 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1625 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1626 rs6000_preferred_simd_mode
1627 #undef TARGET_VECTORIZE_INIT_COST
1628 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1629 #undef TARGET_VECTORIZE_ADD_STMT_COST
1630 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1631 #undef TARGET_VECTORIZE_FINISH_COST
1632 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1633 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1634 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1636 #undef TARGET_INIT_BUILTINS
1637 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1638 #undef TARGET_BUILTIN_DECL
1639 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1641 #undef TARGET_FOLD_BUILTIN
1642 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1643 #undef TARGET_GIMPLE_FOLD_BUILTIN
1644 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1646 #undef TARGET_EXPAND_BUILTIN
1647 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1649 #undef TARGET_MANGLE_TYPE
1650 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1652 #undef TARGET_INIT_LIBFUNCS
1653 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1656 #undef TARGET_BINDS_LOCAL_P
1657 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1660 #undef TARGET_MS_BITFIELD_LAYOUT_P
1661 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1663 #undef TARGET_ASM_OUTPUT_MI_THUNK
1664 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1666 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1667 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1669 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1670 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1672 #undef TARGET_REGISTER_MOVE_COST
1673 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1674 #undef TARGET_MEMORY_MOVE_COST
1675 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1676 #undef TARGET_CANNOT_COPY_INSN_P
1677 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1678 #undef TARGET_RTX_COSTS
1679 #define TARGET_RTX_COSTS rs6000_rtx_costs
1680 #undef TARGET_ADDRESS_COST
1681 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1683 #undef TARGET_DWARF_REGISTER_SPAN
1684 #define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span
1686 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1687 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1689 #undef TARGET_MEMBER_TYPE_FORCES_BLK
1690 #define TARGET_MEMBER_TYPE_FORCES_BLK rs6000_member_type_forces_blk
1692 #undef TARGET_PROMOTE_FUNCTION_MODE
1693 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1695 #undef TARGET_RETURN_IN_MEMORY
1696 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1698 #undef TARGET_RETURN_IN_MSB
1699 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1701 #undef TARGET_SETUP_INCOMING_VARARGS
1702 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1704 /* Always strict argument naming on rs6000. */
1705 #undef TARGET_STRICT_ARGUMENT_NAMING
1706 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1707 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1708 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1709 #undef TARGET_SPLIT_COMPLEX_ARG
1710 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1711 #undef TARGET_MUST_PASS_IN_STACK
1712 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1713 #undef TARGET_PASS_BY_REFERENCE
1714 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1715 #undef TARGET_ARG_PARTIAL_BYTES
1716 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1717 #undef TARGET_FUNCTION_ARG_ADVANCE
1718 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1719 #undef TARGET_FUNCTION_ARG
1720 #define TARGET_FUNCTION_ARG rs6000_function_arg
1721 #undef TARGET_FUNCTION_ARG_BOUNDARY
1722 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1724 #undef TARGET_BUILD_BUILTIN_VA_LIST
1725 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1727 #undef TARGET_EXPAND_BUILTIN_VA_START
1728 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1730 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1731 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1733 #undef TARGET_EH_RETURN_FILTER_MODE
1734 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1736 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1737 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1739 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1740 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1742 #undef TARGET_FLOATN_MODE
1743 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1745 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1746 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1748 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
1749 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
1751 #undef TARGET_MD_ASM_ADJUST
1752 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1754 #undef TARGET_OPTION_OVERRIDE
1755 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1757 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1758 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1759 rs6000_builtin_vectorized_function
1761 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1762 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1763 rs6000_builtin_md_vectorized_function
1765 #undef TARGET_STACK_PROTECT_GUARD
1766 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1769 #undef TARGET_STACK_PROTECT_FAIL
1770 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1774 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1775 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1778 /* Use a 32-bit anchor range. This leads to sequences like:
1780 addis tmp,anchor,high
1783 where tmp itself acts as an anchor, and can be shared between
1784 accesses to the same 64k page. */
1785 #undef TARGET_MIN_ANCHOR_OFFSET
1786 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1787 #undef TARGET_MAX_ANCHOR_OFFSET
1788 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1789 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1790 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1791 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1792 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1794 #undef TARGET_BUILTIN_RECIPROCAL
1795 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1797 #undef TARGET_EXPAND_TO_RTL_HOOK
1798 #define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot
1800 #undef TARGET_INSTANTIATE_DECLS
1801 #define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
1803 #undef TARGET_SECONDARY_RELOAD
1804 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1806 #undef TARGET_LEGITIMATE_ADDRESS_P
1807 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1809 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1810 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1813 #define TARGET_LRA_P rs6000_lra_p
1815 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1816 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1818 #undef TARGET_CAN_ELIMINATE
1819 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1821 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1822 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1824 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1825 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1827 #undef TARGET_TRAMPOLINE_INIT
1828 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1830 #undef TARGET_FUNCTION_VALUE
1831 #define TARGET_FUNCTION_VALUE rs6000_function_value
1833 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1834 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1836 #undef TARGET_OPTION_SAVE
1837 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1839 #undef TARGET_OPTION_RESTORE
1840 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1842 #undef TARGET_OPTION_PRINT
1843 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1845 #undef TARGET_CAN_INLINE_P
1846 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1848 #undef TARGET_SET_CURRENT_FUNCTION
1849 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1851 #undef TARGET_LEGITIMATE_CONSTANT_P
1852 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1854 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
1855 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok
1857 #undef TARGET_CAN_USE_DOLOOP_P
1858 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1860 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1861 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1863 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1864 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1865 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1866 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1867 #undef TARGET_UNWIND_WORD_MODE
1868 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1870 #undef TARGET_OFFLOAD_OPTIONS
1871 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1873 #undef TARGET_C_MODE_FOR_SUFFIX
1874 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1876 #undef TARGET_INVALID_BINARY_OP
1877 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1879 #undef TARGET_OPTAB_SUPPORTED_P
1880 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1882 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1883 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1886 /* Processor table. */
1889 const char *const name
; /* Canonical processor name. */
1890 const enum processor_type processor
; /* Processor type enum value. */
1891 const HOST_WIDE_INT target_enable
; /* Target flags to enable. */
1894 static struct rs6000_ptt
const processor_target_table
[] =
1896 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1897 #include "rs6000-cpus.def"
1901 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1905 rs6000_cpu_name_lookup (const char *name
)
1911 for (i
= 0; i
< ARRAY_SIZE (processor_target_table
); i
++)
1912 if (! strcmp (name
, processor_target_table
[i
].name
))
1920 /* Return number of consecutive hard regs needed starting at reg REGNO
1921 to hold something of mode MODE.
1922 This is ordinarily the length in words of a value of mode MODE
1923 but can be less for certain modes in special long registers.
1925 For the SPE, GPRs are 64 bits but only 32 bits are visible in
1926 scalar instructions. The upper 32 bits are only available to the
1929 POWER and PowerPC GPRs hold 32 bits worth;
1930 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1933 rs6000_hard_regno_nregs_internal (int regno
, machine_mode mode
)
1935 unsigned HOST_WIDE_INT reg_size
;
1937 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1938 128-bit floating point that can go in vector registers, which has VSX
1939 memory addressing. */
1940 if (FP_REGNO_P (regno
))
1941 reg_size
= (VECTOR_MEM_VSX_P (mode
) || FLOAT128_VECTOR_P (mode
)
1942 ? UNITS_PER_VSX_WORD
1943 : UNITS_PER_FP_WORD
);
1945 else if (SPE_SIMD_REGNO_P (regno
) && TARGET_SPE
&& SPE_VECTOR_MODE (mode
))
1946 reg_size
= UNITS_PER_SPE_WORD
;
1948 else if (ALTIVEC_REGNO_P (regno
))
1949 reg_size
= UNITS_PER_ALTIVEC_WORD
;
1951 /* The value returned for SCmode in the E500 double case is 2 for
1952 ABI compatibility; storing an SCmode value in a single register
1953 would require function_arg and rs6000_spe_function_arg to handle
1954 SCmode so as to pass the value correctly in a pair of
1956 else if (TARGET_E500_DOUBLE
&& FLOAT_MODE_P (mode
) && mode
!= SCmode
1957 && !DECIMAL_FLOAT_MODE_P (mode
) && SPE_SIMD_REGNO_P (regno
))
1958 reg_size
= UNITS_PER_FP_WORD
;
1961 reg_size
= UNITS_PER_WORD
;
1963 return (GET_MODE_SIZE (mode
) + reg_size
- 1) / reg_size
;
1966 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1969 rs6000_hard_regno_mode_ok (int regno
, machine_mode mode
)
1971 int last_regno
= regno
+ rs6000_hard_regno_nregs
[mode
][regno
] - 1;
1973 if (COMPLEX_MODE_P (mode
))
1974 mode
= GET_MODE_INNER (mode
);
1976 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1977 register combinations, and use PTImode where we need to deal with quad
1978 word memory operations. Don't allow quad words in the argument or frame
1979 pointer registers, just registers 0..31. */
1980 if (mode
== PTImode
)
1981 return (IN_RANGE (regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
)
1982 && IN_RANGE (last_regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
)
1983 && ((regno
& 1) == 0));
1985 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1986 implementations. Don't allow an item to be split between a FP register
1987 and an Altivec register. Allow TImode in all VSX registers if the user
1989 if (TARGET_VSX
&& VSX_REGNO_P (regno
)
1990 && (VECTOR_MEM_VSX_P (mode
)
1991 || FLOAT128_VECTOR_P (mode
)
1992 || reg_addr
[mode
].scalar_in_vmx_p
1993 || (TARGET_VSX_TIMODE
&& mode
== TImode
)
1994 || (TARGET_VADDUQM
&& mode
== V1TImode
)))
1996 if (FP_REGNO_P (regno
))
1997 return FP_REGNO_P (last_regno
);
1999 if (ALTIVEC_REGNO_P (regno
))
2001 if (GET_MODE_SIZE (mode
) != 16 && !reg_addr
[mode
].scalar_in_vmx_p
)
2004 return ALTIVEC_REGNO_P (last_regno
);
2008 /* The GPRs can hold any mode, but values bigger than one register
2009 cannot go past R31. */
2010 if (INT_REGNO_P (regno
))
2011 return INT_REGNO_P (last_regno
);
2013 /* The float registers (except for VSX vector modes) can only hold floating
2014 modes and DImode. */
2015 if (FP_REGNO_P (regno
))
2017 if (FLOAT128_VECTOR_P (mode
))
2020 if (SCALAR_FLOAT_MODE_P (mode
)
2021 && (mode
!= TDmode
|| (regno
% 2) == 0)
2022 && FP_REGNO_P (last_regno
))
2025 if (GET_MODE_CLASS (mode
) == MODE_INT
)
2027 if(GET_MODE_SIZE (mode
) == UNITS_PER_FP_WORD
)
2030 if (TARGET_VSX_SMALL_INTEGER
)
2035 if (TARGET_P9_VECTOR
&& (mode
== HImode
|| mode
== QImode
))
2040 if (PAIRED_SIMD_REGNO_P (regno
) && TARGET_PAIRED_FLOAT
2041 && PAIRED_VECTOR_MODE (mode
))
2047 /* The CR register can only hold CC modes. */
2048 if (CR_REGNO_P (regno
))
2049 return GET_MODE_CLASS (mode
) == MODE_CC
;
2051 if (CA_REGNO_P (regno
))
2052 return mode
== Pmode
|| mode
== SImode
;
2054 /* AltiVec only in AldyVec registers. */
2055 if (ALTIVEC_REGNO_P (regno
))
2056 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
)
2057 || mode
== V1TImode
);
2059 /* ...but GPRs can hold SIMD data on the SPE in one register. */
2060 if (SPE_SIMD_REGNO_P (regno
) && TARGET_SPE
&& SPE_VECTOR_MODE (mode
))
2063 /* We cannot put non-VSX TImode or PTImode anywhere except general register
2064 and it must be able to fit within the register set. */
2066 return GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
;
2069 /* Print interesting facts about registers. */
2071 rs6000_debug_reg_print (int first_regno
, int last_regno
, const char *reg_name
)
2075 for (r
= first_regno
; r
<= last_regno
; ++r
)
2077 const char *comma
= "";
2080 if (first_regno
== last_regno
)
2081 fprintf (stderr
, "%s:\t", reg_name
);
2083 fprintf (stderr
, "%s%d:\t", reg_name
, r
- first_regno
);
2086 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2087 if (rs6000_hard_regno_mode_ok_p
[m
][r
] && rs6000_hard_regno_nregs
[m
][r
])
2091 fprintf (stderr
, ",\n\t");
2096 if (rs6000_hard_regno_nregs
[m
][r
] > 1)
2097 len
+= fprintf (stderr
, "%s%s/%d", comma
, GET_MODE_NAME (m
),
2098 rs6000_hard_regno_nregs
[m
][r
]);
2100 len
+= fprintf (stderr
, "%s%s", comma
, GET_MODE_NAME (m
));
2105 if (call_used_regs
[r
])
2109 fprintf (stderr
, ",\n\t");
2114 len
+= fprintf (stderr
, "%s%s", comma
, "call-used");
2122 fprintf (stderr
, ",\n\t");
2127 len
+= fprintf (stderr
, "%s%s", comma
, "fixed");
2133 fprintf (stderr
, ",\n\t");
2137 len
+= fprintf (stderr
, "%sreg-class = %s", comma
,
2138 reg_class_names
[(int)rs6000_regno_regclass
[r
]]);
2143 fprintf (stderr
, ",\n\t");
2147 fprintf (stderr
, "%sregno = %d\n", comma
, r
);
2152 rs6000_debug_vector_unit (enum rs6000_vector v
)
2158 case VECTOR_NONE
: ret
= "none"; break;
2159 case VECTOR_ALTIVEC
: ret
= "altivec"; break;
2160 case VECTOR_VSX
: ret
= "vsx"; break;
2161 case VECTOR_P8_VECTOR
: ret
= "p8_vector"; break;
2162 case VECTOR_PAIRED
: ret
= "paired"; break;
2163 case VECTOR_SPE
: ret
= "spe"; break;
2164 case VECTOR_OTHER
: ret
= "other"; break;
2165 default: ret
= "unknown"; break;
2171 /* Inner function printing just the address mask for a particular reload
2173 DEBUG_FUNCTION
char *
2174 rs6000_debug_addr_mask (addr_mask_type mask
, bool keep_spaces
)
2179 if ((mask
& RELOAD_REG_VALID
) != 0)
2181 else if (keep_spaces
)
2184 if ((mask
& RELOAD_REG_MULTIPLE
) != 0)
2186 else if (keep_spaces
)
2189 if ((mask
& RELOAD_REG_INDEXED
) != 0)
2191 else if (keep_spaces
)
2194 if ((mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
2196 else if ((mask
& RELOAD_REG_OFFSET
) != 0)
2198 else if (keep_spaces
)
2201 if ((mask
& RELOAD_REG_PRE_INCDEC
) != 0)
2203 else if (keep_spaces
)
2206 if ((mask
& RELOAD_REG_PRE_MODIFY
) != 0)
2208 else if (keep_spaces
)
2211 if ((mask
& RELOAD_REG_AND_M16
) != 0)
2213 else if (keep_spaces
)
2221 /* Print the address masks in a human readble fashion. */
2223 rs6000_debug_print_mode (ssize_t m
)
2229 fprintf (stderr
, "Mode: %-5s", GET_MODE_NAME (m
));
2230 for (rc
= 0; rc
< N_RELOAD_REG
; rc
++)
2231 fprintf (stderr
, " %s: %s", reload_reg_map
[rc
].name
,
2232 rs6000_debug_addr_mask (reg_addr
[m
].addr_mask
[rc
], true));
2234 if ((reg_addr
[m
].reload_store
!= CODE_FOR_nothing
)
2235 || (reg_addr
[m
].reload_load
!= CODE_FOR_nothing
))
2236 fprintf (stderr
, " Reload=%c%c",
2237 (reg_addr
[m
].reload_store
!= CODE_FOR_nothing
) ? 's' : '*',
2238 (reg_addr
[m
].reload_load
!= CODE_FOR_nothing
) ? 'l' : '*');
2240 spaces
+= sizeof (" Reload=sl") - 1;
2242 if (reg_addr
[m
].scalar_in_vmx_p
)
2244 fprintf (stderr
, "%*s Upper=y", spaces
, "");
2248 spaces
+= sizeof (" Upper=y") - 1;
2250 fuse_extra_p
= ((reg_addr
[m
].fusion_gpr_ld
!= CODE_FOR_nothing
)
2251 || reg_addr
[m
].fused_toc
);
2254 for (rc
= 0; rc
< N_RELOAD_REG
; rc
++)
2256 if (rc
!= RELOAD_REG_ANY
)
2258 if (reg_addr
[m
].fusion_addi_ld
[rc
] != CODE_FOR_nothing
2259 || reg_addr
[m
].fusion_addi_ld
[rc
] != CODE_FOR_nothing
2260 || reg_addr
[m
].fusion_addi_st
[rc
] != CODE_FOR_nothing
2261 || reg_addr
[m
].fusion_addis_ld
[rc
] != CODE_FOR_nothing
2262 || reg_addr
[m
].fusion_addis_st
[rc
] != CODE_FOR_nothing
)
2264 fuse_extra_p
= true;
2273 fprintf (stderr
, "%*s Fuse:", spaces
, "");
2276 for (rc
= 0; rc
< N_RELOAD_REG
; rc
++)
2278 if (rc
!= RELOAD_REG_ANY
)
2282 if (reg_addr
[m
].fusion_addis_ld
[rc
] != CODE_FOR_nothing
)
2284 else if (reg_addr
[m
].fusion_addi_ld
[rc
] != CODE_FOR_nothing
)
2289 if (reg_addr
[m
].fusion_addis_st
[rc
] != CODE_FOR_nothing
)
2291 else if (reg_addr
[m
].fusion_addi_st
[rc
] != CODE_FOR_nothing
)
2296 if (load
== '-' && store
== '-')
2300 fprintf (stderr
, "%*s%c=%c%c", (spaces
+ 1), "",
2301 reload_reg_map
[rc
].name
[0], load
, store
);
2307 if (reg_addr
[m
].fusion_gpr_ld
!= CODE_FOR_nothing
)
2309 fprintf (stderr
, "%*sP8gpr", (spaces
+ 1), "");
2313 spaces
+= sizeof (" P8gpr") - 1;
2315 if (reg_addr
[m
].fused_toc
)
2317 fprintf (stderr
, "%*sToc", (spaces
+ 1), "");
2321 spaces
+= sizeof (" Toc") - 1;
2324 spaces
+= sizeof (" Fuse: G=ls F=ls v=ls P8gpr Toc") - 1;
2326 if (rs6000_vector_unit
[m
] != VECTOR_NONE
2327 || rs6000_vector_mem
[m
] != VECTOR_NONE
)
2329 fprintf (stderr
, "%*s vector: arith=%-10s mem=%s",
2331 rs6000_debug_vector_unit (rs6000_vector_unit
[m
]),
2332 rs6000_debug_vector_unit (rs6000_vector_mem
[m
]));
2335 fputs ("\n", stderr
);
2338 #define DEBUG_FMT_ID "%-32s= "
2339 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2340 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2341 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2343 /* Print various interesting information with -mdebug=reg. */
2345 rs6000_debug_reg_global (void)
2347 static const char *const tf
[2] = { "false", "true" };
2348 const char *nl
= (const char *)0;
2351 char costly_num
[20];
2353 char flags_buffer
[40];
2354 const char *costly_str
;
2355 const char *nop_str
;
2356 const char *trace_str
;
2357 const char *abi_str
;
2358 const char *cmodel_str
;
2359 struct cl_target_option cl_opts
;
2361 /* Modes we want tieable information on. */
2362 static const machine_mode print_tieable_modes
[] = {
2400 /* Virtual regs we are interested in. */
2401 const static struct {
2402 int regno
; /* register number. */
2403 const char *name
; /* register name. */
2404 } virtual_regs
[] = {
2405 { STACK_POINTER_REGNUM
, "stack pointer:" },
2406 { TOC_REGNUM
, "toc: " },
2407 { STATIC_CHAIN_REGNUM
, "static chain: " },
2408 { RS6000_PIC_OFFSET_TABLE_REGNUM
, "pic offset: " },
2409 { HARD_FRAME_POINTER_REGNUM
, "hard frame: " },
2410 { ARG_POINTER_REGNUM
, "arg pointer: " },
2411 { FRAME_POINTER_REGNUM
, "frame pointer:" },
2412 { FIRST_PSEUDO_REGISTER
, "first pseudo: " },
2413 { FIRST_VIRTUAL_REGISTER
, "first virtual:" },
2414 { VIRTUAL_INCOMING_ARGS_REGNUM
, "incoming_args:" },
2415 { VIRTUAL_STACK_VARS_REGNUM
, "stack_vars: " },
2416 { VIRTUAL_STACK_DYNAMIC_REGNUM
, "stack_dynamic:" },
2417 { VIRTUAL_OUTGOING_ARGS_REGNUM
, "outgoing_args:" },
2418 { VIRTUAL_CFA_REGNUM
, "cfa (frame): " },
2419 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM
, "stack boundry:" },
2420 { LAST_VIRTUAL_REGISTER
, "last virtual: " },
2423 fputs ("\nHard register information:\n", stderr
);
2424 rs6000_debug_reg_print (FIRST_GPR_REGNO
, LAST_GPR_REGNO
, "gr");
2425 rs6000_debug_reg_print (FIRST_FPR_REGNO
, LAST_FPR_REGNO
, "fp");
2426 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO
,
2429 rs6000_debug_reg_print (LR_REGNO
, LR_REGNO
, "lr");
2430 rs6000_debug_reg_print (CTR_REGNO
, CTR_REGNO
, "ctr");
2431 rs6000_debug_reg_print (CR0_REGNO
, CR7_REGNO
, "cr");
2432 rs6000_debug_reg_print (CA_REGNO
, CA_REGNO
, "ca");
2433 rs6000_debug_reg_print (VRSAVE_REGNO
, VRSAVE_REGNO
, "vrsave");
2434 rs6000_debug_reg_print (VSCR_REGNO
, VSCR_REGNO
, "vscr");
2435 rs6000_debug_reg_print (SPE_ACC_REGNO
, SPE_ACC_REGNO
, "spe_a");
2436 rs6000_debug_reg_print (SPEFSCR_REGNO
, SPEFSCR_REGNO
, "spe_f");
2438 fputs ("\nVirtual/stack/frame registers:\n", stderr
);
2439 for (v
= 0; v
< ARRAY_SIZE (virtual_regs
); v
++)
2440 fprintf (stderr
, "%s regno = %3d\n", virtual_regs
[v
].name
, virtual_regs
[v
].regno
);
2444 "d reg_class = %s\n"
2445 "f reg_class = %s\n"
2446 "v reg_class = %s\n"
2447 "wa reg_class = %s\n"
2448 "wb reg_class = %s\n"
2449 "wd reg_class = %s\n"
2450 "we reg_class = %s\n"
2451 "wf reg_class = %s\n"
2452 "wg reg_class = %s\n"
2453 "wh reg_class = %s\n"
2454 "wi reg_class = %s\n"
2455 "wj reg_class = %s\n"
2456 "wk reg_class = %s\n"
2457 "wl reg_class = %s\n"
2458 "wm reg_class = %s\n"
2459 "wo reg_class = %s\n"
2460 "wp reg_class = %s\n"
2461 "wq reg_class = %s\n"
2462 "wr reg_class = %s\n"
2463 "ws reg_class = %s\n"
2464 "wt reg_class = %s\n"
2465 "wu reg_class = %s\n"
2466 "wv reg_class = %s\n"
2467 "ww reg_class = %s\n"
2468 "wx reg_class = %s\n"
2469 "wy reg_class = %s\n"
2470 "wz reg_class = %s\n"
2471 "wA reg_class = %s\n"
2472 "wH reg_class = %s\n"
2473 "wI reg_class = %s\n"
2474 "wJ reg_class = %s\n"
2475 "wK reg_class = %s\n"
2477 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_d
]],
2478 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_f
]],
2479 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_v
]],
2480 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wa
]],
2481 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wb
]],
2482 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wd
]],
2483 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_we
]],
2484 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wf
]],
2485 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wg
]],
2486 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wh
]],
2487 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wi
]],
2488 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wj
]],
2489 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wk
]],
2490 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wl
]],
2491 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wm
]],
2492 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wo
]],
2493 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wp
]],
2494 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wq
]],
2495 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wr
]],
2496 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_ws
]],
2497 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wt
]],
2498 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wu
]],
2499 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wv
]],
2500 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_ww
]],
2501 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wx
]],
2502 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wy
]],
2503 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wz
]],
2504 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wA
]],
2505 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wH
]],
2506 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wI
]],
2507 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wJ
]],
2508 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wK
]]);
2511 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2512 rs6000_debug_print_mode (m
);
2514 fputs ("\n", stderr
);
2516 for (m1
= 0; m1
< ARRAY_SIZE (print_tieable_modes
); m1
++)
2518 machine_mode mode1
= print_tieable_modes
[m1
];
2519 bool first_time
= true;
2521 nl
= (const char *)0;
2522 for (m2
= 0; m2
< ARRAY_SIZE (print_tieable_modes
); m2
++)
2524 machine_mode mode2
= print_tieable_modes
[m2
];
2525 if (mode1
!= mode2
&& MODES_TIEABLE_P (mode1
, mode2
))
2529 fprintf (stderr
, "Tieable modes %s:", GET_MODE_NAME (mode1
));
2534 fprintf (stderr
, " %s", GET_MODE_NAME (mode2
));
2539 fputs ("\n", stderr
);
2545 if (rs6000_recip_control
)
2547 fprintf (stderr
, "\nReciprocal mask = 0x%x\n", rs6000_recip_control
);
2549 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2550 if (rs6000_recip_bits
[m
])
2553 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2555 (RS6000_RECIP_AUTO_RE_P (m
)
2557 : (RS6000_RECIP_HAVE_RE_P (m
) ? "have" : "none")),
2558 (RS6000_RECIP_AUTO_RSQRTE_P (m
)
2560 : (RS6000_RECIP_HAVE_RSQRTE_P (m
) ? "have" : "none")));
2563 fputs ("\n", stderr
);
2566 if (rs6000_cpu_index
>= 0)
2568 const char *name
= processor_target_table
[rs6000_cpu_index
].name
;
2570 = processor_target_table
[rs6000_cpu_index
].target_enable
;
2572 sprintf (flags_buffer
, "-mcpu=%s flags", name
);
2573 rs6000_print_isa_options (stderr
, 0, flags_buffer
, flags
);
2576 fprintf (stderr
, DEBUG_FMT_S
, "cpu", "<none>");
2578 if (rs6000_tune_index
>= 0)
2580 const char *name
= processor_target_table
[rs6000_tune_index
].name
;
2582 = processor_target_table
[rs6000_tune_index
].target_enable
;
2584 sprintf (flags_buffer
, "-mtune=%s flags", name
);
2585 rs6000_print_isa_options (stderr
, 0, flags_buffer
, flags
);
2588 fprintf (stderr
, DEBUG_FMT_S
, "tune", "<none>");
2590 cl_target_option_save (&cl_opts
, &global_options
);
2591 rs6000_print_isa_options (stderr
, 0, "rs6000_isa_flags",
2594 rs6000_print_isa_options (stderr
, 0, "rs6000_isa_flags_explicit",
2595 rs6000_isa_flags_explicit
);
2597 rs6000_print_builtin_options (stderr
, 0, "rs6000_builtin_mask",
2598 rs6000_builtin_mask
);
2600 rs6000_print_isa_options (stderr
, 0, "TARGET_DEFAULT", TARGET_DEFAULT
);
2602 fprintf (stderr
, DEBUG_FMT_S
, "--with-cpu default",
2603 OPTION_TARGET_CPU_DEFAULT
? OPTION_TARGET_CPU_DEFAULT
: "<none>");
2605 switch (rs6000_sched_costly_dep
)
2607 case max_dep_latency
:
2608 costly_str
= "max_dep_latency";
2612 costly_str
= "no_dep_costly";
2615 case all_deps_costly
:
2616 costly_str
= "all_deps_costly";
2619 case true_store_to_load_dep_costly
:
2620 costly_str
= "true_store_to_load_dep_costly";
2623 case store_to_load_dep_costly
:
2624 costly_str
= "store_to_load_dep_costly";
2628 costly_str
= costly_num
;
2629 sprintf (costly_num
, "%d", (int)rs6000_sched_costly_dep
);
2633 fprintf (stderr
, DEBUG_FMT_S
, "sched_costly_dep", costly_str
);
2635 switch (rs6000_sched_insert_nops
)
2637 case sched_finish_regroup_exact
:
2638 nop_str
= "sched_finish_regroup_exact";
2641 case sched_finish_pad_groups
:
2642 nop_str
= "sched_finish_pad_groups";
2645 case sched_finish_none
:
2646 nop_str
= "sched_finish_none";
2651 sprintf (nop_num
, "%d", (int)rs6000_sched_insert_nops
);
2655 fprintf (stderr
, DEBUG_FMT_S
, "sched_insert_nops", nop_str
);
2657 switch (rs6000_sdata
)
2664 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "data");
2668 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "sysv");
2672 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "eabi");
2677 switch (rs6000_traceback
)
2679 case traceback_default
: trace_str
= "default"; break;
2680 case traceback_none
: trace_str
= "none"; break;
2681 case traceback_part
: trace_str
= "part"; break;
2682 case traceback_full
: trace_str
= "full"; break;
2683 default: trace_str
= "unknown"; break;
2686 fprintf (stderr
, DEBUG_FMT_S
, "traceback", trace_str
);
2688 switch (rs6000_current_cmodel
)
2690 case CMODEL_SMALL
: cmodel_str
= "small"; break;
2691 case CMODEL_MEDIUM
: cmodel_str
= "medium"; break;
2692 case CMODEL_LARGE
: cmodel_str
= "large"; break;
2693 default: cmodel_str
= "unknown"; break;
2696 fprintf (stderr
, DEBUG_FMT_S
, "cmodel", cmodel_str
);
2698 switch (rs6000_current_abi
)
2700 case ABI_NONE
: abi_str
= "none"; break;
2701 case ABI_AIX
: abi_str
= "aix"; break;
2702 case ABI_ELFv2
: abi_str
= "ELFv2"; break;
2703 case ABI_V4
: abi_str
= "V4"; break;
2704 case ABI_DARWIN
: abi_str
= "darwin"; break;
2705 default: abi_str
= "unknown"; break;
2708 fprintf (stderr
, DEBUG_FMT_S
, "abi", abi_str
);
2710 if (rs6000_altivec_abi
)
2711 fprintf (stderr
, DEBUG_FMT_S
, "altivec_abi", "true");
2714 fprintf (stderr
, DEBUG_FMT_S
, "spe_abi", "true");
2716 if (rs6000_darwin64_abi
)
2717 fprintf (stderr
, DEBUG_FMT_S
, "darwin64_abi", "true");
2719 if (rs6000_float_gprs
)
2720 fprintf (stderr
, DEBUG_FMT_S
, "float_gprs", "true");
2722 fprintf (stderr
, DEBUG_FMT_S
, "fprs",
2723 (TARGET_FPRS
? "true" : "false"));
2725 fprintf (stderr
, DEBUG_FMT_S
, "single_float",
2726 (TARGET_SINGLE_FLOAT
? "true" : "false"));
2728 fprintf (stderr
, DEBUG_FMT_S
, "double_float",
2729 (TARGET_DOUBLE_FLOAT
? "true" : "false"));
2731 fprintf (stderr
, DEBUG_FMT_S
, "soft_float",
2732 (TARGET_SOFT_FLOAT
? "true" : "false"));
2734 fprintf (stderr
, DEBUG_FMT_S
, "e500_single",
2735 (TARGET_E500_SINGLE
? "true" : "false"));
2737 fprintf (stderr
, DEBUG_FMT_S
, "e500_double",
2738 (TARGET_E500_DOUBLE
? "true" : "false"));
2740 if (TARGET_LINK_STACK
)
2741 fprintf (stderr
, DEBUG_FMT_S
, "link_stack", "true");
2743 fprintf (stderr
, DEBUG_FMT_S
, "lra", TARGET_LRA
? "true" : "false");
2745 if (TARGET_P8_FUSION
)
2749 strcpy (options
, (TARGET_P9_FUSION
) ? "power9" : "power8");
2750 if (TARGET_TOC_FUSION
)
2751 strcat (options
, ", toc");
2753 if (TARGET_P8_FUSION_SIGN
)
2754 strcat (options
, ", sign");
2756 fprintf (stderr
, DEBUG_FMT_S
, "fusion", options
);
2759 fprintf (stderr
, DEBUG_FMT_S
, "plt-format",
2760 TARGET_SECURE_PLT
? "secure" : "bss");
2761 fprintf (stderr
, DEBUG_FMT_S
, "struct-return",
2762 aix_struct_return
? "aix" : "sysv");
2763 fprintf (stderr
, DEBUG_FMT_S
, "always_hint", tf
[!!rs6000_always_hint
]);
2764 fprintf (stderr
, DEBUG_FMT_S
, "sched_groups", tf
[!!rs6000_sched_groups
]);
2765 fprintf (stderr
, DEBUG_FMT_S
, "align_branch",
2766 tf
[!!rs6000_align_branch_targets
]);
2767 fprintf (stderr
, DEBUG_FMT_D
, "tls_size", rs6000_tls_size
);
2768 fprintf (stderr
, DEBUG_FMT_D
, "long_double_size",
2769 rs6000_long_double_type_size
);
2770 fprintf (stderr
, DEBUG_FMT_D
, "sched_restricted_insns_priority",
2771 (int)rs6000_sched_restricted_insns_priority
);
2772 fprintf (stderr
, DEBUG_FMT_D
, "Number of standard builtins",
2774 fprintf (stderr
, DEBUG_FMT_D
, "Number of rs6000 builtins",
2775 (int)RS6000_BUILTIN_COUNT
);
2777 fprintf (stderr
, DEBUG_FMT_D
, "Enable float128 on VSX",
2778 (int)TARGET_FLOAT128_ENABLE_TYPE
);
2781 fprintf (stderr
, DEBUG_FMT_D
, "VSX easy 64-bit scalar element",
2782 (int)VECTOR_ELEMENT_SCALAR_64BIT
);
2784 if (TARGET_DIRECT_MOVE_128
)
2785 fprintf (stderr
, DEBUG_FMT_D
, "VSX easy 64-bit mfvsrld element",
2786 (int)VECTOR_ELEMENT_MFVSRLD_64BIT
);
2790 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2791 legitimate address support to figure out the appropriate addressing to
2795 rs6000_setup_reg_addr_masks (void)
2797 ssize_t rc
, reg
, m
, nregs
;
2798 addr_mask_type any_addr_mask
, addr_mask
;
2800 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2802 machine_mode m2
= (machine_mode
) m
;
2803 bool complex_p
= false;
2804 bool small_int_p
= (m2
== QImode
|| m2
== HImode
|| m2
== SImode
);
2807 if (COMPLEX_MODE_P (m2
))
2810 m2
= GET_MODE_INNER (m2
);
2813 msize
= GET_MODE_SIZE (m2
);
2815 /* SDmode is special in that we want to access it only via REG+REG
2816 addressing on power7 and above, since we want to use the LFIWZX and
2817 STFIWZX instructions to load it. */
2818 bool indexed_only_p
= (m
== SDmode
&& TARGET_NO_SDMODE_STACK
);
2821 for (rc
= FIRST_RELOAD_REG_CLASS
; rc
<= LAST_RELOAD_REG_CLASS
; rc
++)
2824 reg
= reload_reg_map
[rc
].reg
;
2826 /* Can mode values go in the GPR/FPR/Altivec registers? */
2827 if (reg
>= 0 && rs6000_hard_regno_mode_ok_p
[m
][reg
])
2829 bool small_int_vsx_p
= (small_int_p
2830 && (rc
== RELOAD_REG_FPR
2831 || rc
== RELOAD_REG_VMX
));
2833 nregs
= rs6000_hard_regno_nregs
[m
][reg
];
2834 addr_mask
|= RELOAD_REG_VALID
;
2836 /* Indicate if the mode takes more than 1 physical register. If
2837 it takes a single register, indicate it can do REG+REG
2838 addressing. Small integers in VSX registers can only do
2839 REG+REG addressing. */
2840 if (small_int_vsx_p
)
2841 addr_mask
|= RELOAD_REG_INDEXED
;
2842 else if (nregs
> 1 || m
== BLKmode
|| complex_p
)
2843 addr_mask
|= RELOAD_REG_MULTIPLE
;
2845 addr_mask
|= RELOAD_REG_INDEXED
;
2847 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2848 addressing. Restrict addressing on SPE for 64-bit types
2849 because of the SUBREG hackery used to address 64-bit floats in
2850 '32-bit' GPRs. If we allow scalars into Altivec registers,
2851 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY. */
2854 && (rc
== RELOAD_REG_GPR
|| rc
== RELOAD_REG_FPR
)
2856 && !VECTOR_MODE_P (m2
)
2857 && !FLOAT128_VECTOR_P (m2
)
2860 && (m2
!= DFmode
|| !TARGET_UPPER_REGS_DF
)
2861 && (m2
!= SFmode
|| !TARGET_UPPER_REGS_SF
)
2862 && !(TARGET_E500_DOUBLE
&& msize
== 8))
2864 addr_mask
|= RELOAD_REG_PRE_INCDEC
;
2866 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2867 we don't allow PRE_MODIFY for some multi-register
2872 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2876 if (TARGET_POWERPC64
)
2877 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2883 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2889 /* GPR and FPR registers can do REG+OFFSET addressing, except
2890 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2891 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2892 if ((addr_mask
!= 0) && !indexed_only_p
2894 && (rc
== RELOAD_REG_GPR
2895 || ((msize
== 8 || m2
== SFmode
)
2896 && (rc
== RELOAD_REG_FPR
2897 || (rc
== RELOAD_REG_VMX
2898 && TARGET_P9_DFORM_SCALAR
)))))
2899 addr_mask
|= RELOAD_REG_OFFSET
;
2901 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2902 instructions are enabled. The offset for 128-bit VSX registers is
2903 only 12-bits. While GPRs can handle the full offset range, VSX
2904 registers can only handle the restricted range. */
2905 else if ((addr_mask
!= 0) && !indexed_only_p
2906 && msize
== 16 && TARGET_P9_DFORM_VECTOR
2907 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2
)
2908 || (m2
== TImode
&& TARGET_VSX_TIMODE
)))
2910 addr_mask
|= RELOAD_REG_OFFSET
;
2911 if (rc
== RELOAD_REG_FPR
|| rc
== RELOAD_REG_VMX
)
2912 addr_mask
|= RELOAD_REG_QUAD_OFFSET
;
2915 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2916 addressing on 128-bit types. */
2917 if (rc
== RELOAD_REG_VMX
&& msize
== 16
2918 && (addr_mask
& RELOAD_REG_VALID
) != 0)
2919 addr_mask
|= RELOAD_REG_AND_M16
;
2921 reg_addr
[m
].addr_mask
[rc
] = addr_mask
;
2922 any_addr_mask
|= addr_mask
;
2925 reg_addr
[m
].addr_mask
[RELOAD_REG_ANY
] = any_addr_mask
;
2930 /* Initialize the various global tables that are based on register size. */
2932 rs6000_init_hard_regno_mode_ok (bool global_init_p
)
2938 /* Precalculate REGNO_REG_CLASS. */
2939 rs6000_regno_regclass
[0] = GENERAL_REGS
;
2940 for (r
= 1; r
< 32; ++r
)
2941 rs6000_regno_regclass
[r
] = BASE_REGS
;
2943 for (r
= 32; r
< 64; ++r
)
2944 rs6000_regno_regclass
[r
] = FLOAT_REGS
;
2946 for (r
= 64; r
< FIRST_PSEUDO_REGISTER
; ++r
)
2947 rs6000_regno_regclass
[r
] = NO_REGS
;
2949 for (r
= FIRST_ALTIVEC_REGNO
; r
<= LAST_ALTIVEC_REGNO
; ++r
)
2950 rs6000_regno_regclass
[r
] = ALTIVEC_REGS
;
2952 rs6000_regno_regclass
[CR0_REGNO
] = CR0_REGS
;
2953 for (r
= CR1_REGNO
; r
<= CR7_REGNO
; ++r
)
2954 rs6000_regno_regclass
[r
] = CR_REGS
;
2956 rs6000_regno_regclass
[LR_REGNO
] = LINK_REGS
;
2957 rs6000_regno_regclass
[CTR_REGNO
] = CTR_REGS
;
2958 rs6000_regno_regclass
[CA_REGNO
] = NO_REGS
;
2959 rs6000_regno_regclass
[VRSAVE_REGNO
] = VRSAVE_REGS
;
2960 rs6000_regno_regclass
[VSCR_REGNO
] = VRSAVE_REGS
;
2961 rs6000_regno_regclass
[SPE_ACC_REGNO
] = SPE_ACC_REGS
;
2962 rs6000_regno_regclass
[SPEFSCR_REGNO
] = SPEFSCR_REGS
;
2963 rs6000_regno_regclass
[TFHAR_REGNO
] = SPR_REGS
;
2964 rs6000_regno_regclass
[TFIAR_REGNO
] = SPR_REGS
;
2965 rs6000_regno_regclass
[TEXASR_REGNO
] = SPR_REGS
;
2966 rs6000_regno_regclass
[ARG_POINTER_REGNUM
] = BASE_REGS
;
2967 rs6000_regno_regclass
[FRAME_POINTER_REGNUM
] = BASE_REGS
;
2969 /* Precalculate register class to simpler reload register class. We don't
2970 need all of the register classes that are combinations of different
2971 classes, just the simple ones that have constraint letters. */
2972 for (c
= 0; c
< N_REG_CLASSES
; c
++)
2973 reg_class_to_reg_type
[c
] = NO_REG_TYPE
;
2975 reg_class_to_reg_type
[(int)GENERAL_REGS
] = GPR_REG_TYPE
;
2976 reg_class_to_reg_type
[(int)BASE_REGS
] = GPR_REG_TYPE
;
2977 reg_class_to_reg_type
[(int)VSX_REGS
] = VSX_REG_TYPE
;
2978 reg_class_to_reg_type
[(int)VRSAVE_REGS
] = SPR_REG_TYPE
;
2979 reg_class_to_reg_type
[(int)VSCR_REGS
] = SPR_REG_TYPE
;
2980 reg_class_to_reg_type
[(int)LINK_REGS
] = SPR_REG_TYPE
;
2981 reg_class_to_reg_type
[(int)CTR_REGS
] = SPR_REG_TYPE
;
2982 reg_class_to_reg_type
[(int)LINK_OR_CTR_REGS
] = SPR_REG_TYPE
;
2983 reg_class_to_reg_type
[(int)CR_REGS
] = CR_REG_TYPE
;
2984 reg_class_to_reg_type
[(int)CR0_REGS
] = CR_REG_TYPE
;
2985 reg_class_to_reg_type
[(int)SPE_ACC_REGS
] = SPE_ACC_TYPE
;
2986 reg_class_to_reg_type
[(int)SPEFSCR_REGS
] = SPEFSCR_REG_TYPE
;
2990 reg_class_to_reg_type
[(int)FLOAT_REGS
] = VSX_REG_TYPE
;
2991 reg_class_to_reg_type
[(int)ALTIVEC_REGS
] = VSX_REG_TYPE
;
2995 reg_class_to_reg_type
[(int)FLOAT_REGS
] = FPR_REG_TYPE
;
2996 reg_class_to_reg_type
[(int)ALTIVEC_REGS
] = ALTIVEC_REG_TYPE
;
2999 /* Precalculate the valid memory formats as well as the vector information,
3000 this must be set up before the rs6000_hard_regno_nregs_internal calls
3002 gcc_assert ((int)VECTOR_NONE
== 0);
3003 memset ((void *) &rs6000_vector_unit
[0], '\0', sizeof (rs6000_vector_unit
));
3004 memset ((void *) &rs6000_vector_mem
[0], '\0', sizeof (rs6000_vector_unit
));
3006 gcc_assert ((int)CODE_FOR_nothing
== 0);
3007 memset ((void *) ®_addr
[0], '\0', sizeof (reg_addr
));
3009 gcc_assert ((int)NO_REGS
== 0);
3010 memset ((void *) &rs6000_constraints
[0], '\0', sizeof (rs6000_constraints
));
3012 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
3013 believes it can use native alignment or still uses 128-bit alignment. */
3014 if (TARGET_VSX
&& !TARGET_VSX_ALIGN_128
)
3025 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
3026 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
3027 if (TARGET_FLOAT128_TYPE
)
3029 rs6000_vector_mem
[KFmode
] = VECTOR_VSX
;
3030 rs6000_vector_align
[KFmode
] = 128;
3032 if (FLOAT128_IEEE_P (TFmode
))
3034 rs6000_vector_mem
[TFmode
] = VECTOR_VSX
;
3035 rs6000_vector_align
[TFmode
] = 128;
3039 /* V2DF mode, VSX only. */
3042 rs6000_vector_unit
[V2DFmode
] = VECTOR_VSX
;
3043 rs6000_vector_mem
[V2DFmode
] = VECTOR_VSX
;
3044 rs6000_vector_align
[V2DFmode
] = align64
;
3047 /* V4SF mode, either VSX or Altivec. */
3050 rs6000_vector_unit
[V4SFmode
] = VECTOR_VSX
;
3051 rs6000_vector_mem
[V4SFmode
] = VECTOR_VSX
;
3052 rs6000_vector_align
[V4SFmode
] = align32
;
3054 else if (TARGET_ALTIVEC
)
3056 rs6000_vector_unit
[V4SFmode
] = VECTOR_ALTIVEC
;
3057 rs6000_vector_mem
[V4SFmode
] = VECTOR_ALTIVEC
;
3058 rs6000_vector_align
[V4SFmode
] = align32
;
3061 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
3065 rs6000_vector_unit
[V4SImode
] = VECTOR_ALTIVEC
;
3066 rs6000_vector_unit
[V8HImode
] = VECTOR_ALTIVEC
;
3067 rs6000_vector_unit
[V16QImode
] = VECTOR_ALTIVEC
;
3068 rs6000_vector_align
[V4SImode
] = align32
;
3069 rs6000_vector_align
[V8HImode
] = align32
;
3070 rs6000_vector_align
[V16QImode
] = align32
;
3074 rs6000_vector_mem
[V4SImode
] = VECTOR_VSX
;
3075 rs6000_vector_mem
[V8HImode
] = VECTOR_VSX
;
3076 rs6000_vector_mem
[V16QImode
] = VECTOR_VSX
;
3080 rs6000_vector_mem
[V4SImode
] = VECTOR_ALTIVEC
;
3081 rs6000_vector_mem
[V8HImode
] = VECTOR_ALTIVEC
;
3082 rs6000_vector_mem
[V16QImode
] = VECTOR_ALTIVEC
;
3086 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
3087 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
3090 rs6000_vector_mem
[V2DImode
] = VECTOR_VSX
;
3091 rs6000_vector_unit
[V2DImode
]
3092 = (TARGET_P8_VECTOR
) ? VECTOR_P8_VECTOR
: VECTOR_NONE
;
3093 rs6000_vector_align
[V2DImode
] = align64
;
3095 rs6000_vector_mem
[V1TImode
] = VECTOR_VSX
;
3096 rs6000_vector_unit
[V1TImode
]
3097 = (TARGET_P8_VECTOR
) ? VECTOR_P8_VECTOR
: VECTOR_NONE
;
3098 rs6000_vector_align
[V1TImode
] = 128;
3101 /* DFmode, see if we want to use the VSX unit. Memory is handled
3102 differently, so don't set rs6000_vector_mem. */
3103 if (TARGET_VSX
&& TARGET_VSX_SCALAR_DOUBLE
)
3105 rs6000_vector_unit
[DFmode
] = VECTOR_VSX
;
3106 rs6000_vector_align
[DFmode
] = 64;
3109 /* SFmode, see if we want to use the VSX unit. */
3110 if (TARGET_P8_VECTOR
&& TARGET_VSX_SCALAR_FLOAT
)
3112 rs6000_vector_unit
[SFmode
] = VECTOR_VSX
;
3113 rs6000_vector_align
[SFmode
] = 32;
3116 /* Allow TImode in VSX register and set the VSX memory macros. */
3117 if (TARGET_VSX
&& TARGET_VSX_TIMODE
)
3119 rs6000_vector_mem
[TImode
] = VECTOR_VSX
;
3120 rs6000_vector_align
[TImode
] = align64
;
3123 /* TODO add SPE and paired floating point vector support. */
3125 /* Register class constraints for the constraints that depend on compile
3126 switches. When the VSX code was added, different constraints were added
3127 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
3128 of the VSX registers are used. The register classes for scalar floating
3129 point types is set, based on whether we allow that type into the upper
3130 (Altivec) registers. GCC has register classes to target the Altivec
3131 registers for load/store operations, to select using a VSX memory
3132 operation instead of the traditional floating point operation. The
3135 d - Register class to use with traditional DFmode instructions.
3136 f - Register class to use with traditional SFmode instructions.
3137 v - Altivec register.
3138 wa - Any VSX register.
3139 wc - Reserved to represent individual CR bits (used in LLVM).
3140 wd - Preferred register class for V2DFmode.
3141 wf - Preferred register class for V4SFmode.
3142 wg - Float register for power6x move insns.
3143 wh - FP register for direct move instructions.
3144 wi - FP or VSX register to hold 64-bit integers for VSX insns.
3145 wj - FP or VSX register to hold 64-bit integers for direct moves.
3146 wk - FP or VSX register to hold 64-bit doubles for direct moves.
3147 wl - Float register if we can do 32-bit signed int loads.
3148 wm - VSX register for ISA 2.07 direct move operations.
3149 wn - always NO_REGS.
3150 wr - GPR if 64-bit mode is permitted.
3151 ws - Register class to do ISA 2.06 DF operations.
3152 wt - VSX register for TImode in VSX registers.
3153 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
3154 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
3155 ww - Register class to do SF conversions in with VSX operations.
3156 wx - Float register if we can do 32-bit int stores.
3157 wy - Register class to do ISA 2.07 SF operations.
3158 wz - Float register if we can do 32-bit unsigned int loads.
3159 wH - Altivec register if SImode is allowed in VSX registers.
3160 wI - VSX register if SImode is allowed in VSX registers.
3161 wJ - VSX register if QImode/HImode are allowed in VSX registers.
3162 wK - Altivec register if QImode/HImode are allowed in VSX registers. */
3164 if (TARGET_HARD_FLOAT
&& TARGET_FPRS
)
3165 rs6000_constraints
[RS6000_CONSTRAINT_f
] = FLOAT_REGS
; /* SFmode */
3167 if (TARGET_HARD_FLOAT
&& TARGET_FPRS
&& TARGET_DOUBLE_FLOAT
)
3168 rs6000_constraints
[RS6000_CONSTRAINT_d
] = FLOAT_REGS
; /* DFmode */
3172 rs6000_constraints
[RS6000_CONSTRAINT_wa
] = VSX_REGS
;
3173 rs6000_constraints
[RS6000_CONSTRAINT_wd
] = VSX_REGS
; /* V2DFmode */
3174 rs6000_constraints
[RS6000_CONSTRAINT_wf
] = VSX_REGS
; /* V4SFmode */
3176 if (TARGET_VSX_TIMODE
)
3177 rs6000_constraints
[RS6000_CONSTRAINT_wt
] = VSX_REGS
; /* TImode */
3179 if (TARGET_UPPER_REGS_DF
) /* DFmode */
3181 rs6000_constraints
[RS6000_CONSTRAINT_ws
] = VSX_REGS
;
3182 rs6000_constraints
[RS6000_CONSTRAINT_wv
] = ALTIVEC_REGS
;
3185 rs6000_constraints
[RS6000_CONSTRAINT_ws
] = FLOAT_REGS
;
3187 if (TARGET_UPPER_REGS_DI
) /* DImode */
3188 rs6000_constraints
[RS6000_CONSTRAINT_wi
] = VSX_REGS
;
3190 rs6000_constraints
[RS6000_CONSTRAINT_wi
] = FLOAT_REGS
;
3193 /* Add conditional constraints based on various options, to allow us to
3194 collapse multiple insn patterns. */
3196 rs6000_constraints
[RS6000_CONSTRAINT_v
] = ALTIVEC_REGS
;
3198 if (TARGET_MFPGPR
) /* DFmode */
3199 rs6000_constraints
[RS6000_CONSTRAINT_wg
] = FLOAT_REGS
;
3202 rs6000_constraints
[RS6000_CONSTRAINT_wl
] = FLOAT_REGS
; /* DImode */
3204 if (TARGET_DIRECT_MOVE
)
3206 rs6000_constraints
[RS6000_CONSTRAINT_wh
] = FLOAT_REGS
;
3207 rs6000_constraints
[RS6000_CONSTRAINT_wj
] /* DImode */
3208 = rs6000_constraints
[RS6000_CONSTRAINT_wi
];
3209 rs6000_constraints
[RS6000_CONSTRAINT_wk
] /* DFmode */
3210 = rs6000_constraints
[RS6000_CONSTRAINT_ws
];
3211 rs6000_constraints
[RS6000_CONSTRAINT_wm
] = VSX_REGS
;
3214 if (TARGET_POWERPC64
)
3216 rs6000_constraints
[RS6000_CONSTRAINT_wr
] = GENERAL_REGS
;
3217 rs6000_constraints
[RS6000_CONSTRAINT_wA
] = BASE_REGS
;
3220 if (TARGET_P8_VECTOR
&& TARGET_UPPER_REGS_SF
) /* SFmode */
3222 rs6000_constraints
[RS6000_CONSTRAINT_wu
] = ALTIVEC_REGS
;
3223 rs6000_constraints
[RS6000_CONSTRAINT_wy
] = VSX_REGS
;
3224 rs6000_constraints
[RS6000_CONSTRAINT_ww
] = VSX_REGS
;
3226 else if (TARGET_P8_VECTOR
)
3228 rs6000_constraints
[RS6000_CONSTRAINT_wy
] = FLOAT_REGS
;
3229 rs6000_constraints
[RS6000_CONSTRAINT_ww
] = FLOAT_REGS
;
3231 else if (TARGET_VSX
)
3232 rs6000_constraints
[RS6000_CONSTRAINT_ww
] = FLOAT_REGS
;
3235 rs6000_constraints
[RS6000_CONSTRAINT_wx
] = FLOAT_REGS
; /* DImode */
3238 rs6000_constraints
[RS6000_CONSTRAINT_wz
] = FLOAT_REGS
; /* DImode */
3240 if (TARGET_FLOAT128_TYPE
)
3242 rs6000_constraints
[RS6000_CONSTRAINT_wq
] = VSX_REGS
; /* KFmode */
3243 if (FLOAT128_IEEE_P (TFmode
))
3244 rs6000_constraints
[RS6000_CONSTRAINT_wp
] = VSX_REGS
; /* TFmode */
3247 /* Support for new D-form instructions. */
3248 if (TARGET_P9_DFORM_SCALAR
)
3249 rs6000_constraints
[RS6000_CONSTRAINT_wb
] = ALTIVEC_REGS
;
3251 /* Support for ISA 3.0 (power9) vectors. */
3252 if (TARGET_P9_VECTOR
)
3253 rs6000_constraints
[RS6000_CONSTRAINT_wo
] = VSX_REGS
;
3255 /* Support for new direct moves (ISA 3.0 + 64bit). */
3256 if (TARGET_DIRECT_MOVE_128
)
3257 rs6000_constraints
[RS6000_CONSTRAINT_we
] = VSX_REGS
;
3259 /* Support small integers in VSX registers. */
3260 if (TARGET_VSX_SMALL_INTEGER
)
3262 rs6000_constraints
[RS6000_CONSTRAINT_wH
] = ALTIVEC_REGS
;
3263 rs6000_constraints
[RS6000_CONSTRAINT_wI
] = FLOAT_REGS
;
3264 if (TARGET_P9_VECTOR
)
3266 rs6000_constraints
[RS6000_CONSTRAINT_wJ
] = FLOAT_REGS
;
3267 rs6000_constraints
[RS6000_CONSTRAINT_wK
] = ALTIVEC_REGS
;
3271 /* Set up the reload helper and direct move functions. */
3272 if (TARGET_VSX
|| TARGET_ALTIVEC
)
3276 reg_addr
[V16QImode
].reload_store
= CODE_FOR_reload_v16qi_di_store
;
3277 reg_addr
[V16QImode
].reload_load
= CODE_FOR_reload_v16qi_di_load
;
3278 reg_addr
[V8HImode
].reload_store
= CODE_FOR_reload_v8hi_di_store
;
3279 reg_addr
[V8HImode
].reload_load
= CODE_FOR_reload_v8hi_di_load
;
3280 reg_addr
[V4SImode
].reload_store
= CODE_FOR_reload_v4si_di_store
;
3281 reg_addr
[V4SImode
].reload_load
= CODE_FOR_reload_v4si_di_load
;
3282 reg_addr
[V2DImode
].reload_store
= CODE_FOR_reload_v2di_di_store
;
3283 reg_addr
[V2DImode
].reload_load
= CODE_FOR_reload_v2di_di_load
;
3284 reg_addr
[V1TImode
].reload_store
= CODE_FOR_reload_v1ti_di_store
;
3285 reg_addr
[V1TImode
].reload_load
= CODE_FOR_reload_v1ti_di_load
;
3286 reg_addr
[V4SFmode
].reload_store
= CODE_FOR_reload_v4sf_di_store
;
3287 reg_addr
[V4SFmode
].reload_load
= CODE_FOR_reload_v4sf_di_load
;
3288 reg_addr
[V2DFmode
].reload_store
= CODE_FOR_reload_v2df_di_store
;
3289 reg_addr
[V2DFmode
].reload_load
= CODE_FOR_reload_v2df_di_load
;
3290 reg_addr
[DFmode
].reload_store
= CODE_FOR_reload_df_di_store
;
3291 reg_addr
[DFmode
].reload_load
= CODE_FOR_reload_df_di_load
;
3292 reg_addr
[DDmode
].reload_store
= CODE_FOR_reload_dd_di_store
;
3293 reg_addr
[DDmode
].reload_load
= CODE_FOR_reload_dd_di_load
;
3294 reg_addr
[SFmode
].reload_store
= CODE_FOR_reload_sf_di_store
;
3295 reg_addr
[SFmode
].reload_load
= CODE_FOR_reload_sf_di_load
;
3297 if (FLOAT128_VECTOR_P (KFmode
))
3299 reg_addr
[KFmode
].reload_store
= CODE_FOR_reload_kf_di_store
;
3300 reg_addr
[KFmode
].reload_load
= CODE_FOR_reload_kf_di_load
;
3303 if (FLOAT128_VECTOR_P (TFmode
))
3305 reg_addr
[TFmode
].reload_store
= CODE_FOR_reload_tf_di_store
;
3306 reg_addr
[TFmode
].reload_load
= CODE_FOR_reload_tf_di_load
;
3309 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3311 if (TARGET_NO_SDMODE_STACK
)
3313 reg_addr
[SDmode
].reload_store
= CODE_FOR_reload_sd_di_store
;
3314 reg_addr
[SDmode
].reload_load
= CODE_FOR_reload_sd_di_load
;
3317 if (TARGET_VSX_TIMODE
)
3319 reg_addr
[TImode
].reload_store
= CODE_FOR_reload_ti_di_store
;
3320 reg_addr
[TImode
].reload_load
= CODE_FOR_reload_ti_di_load
;
3323 if (TARGET_DIRECT_MOVE
&& !TARGET_DIRECT_MOVE_128
)
3325 reg_addr
[TImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxti
;
3326 reg_addr
[V1TImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv1ti
;
3327 reg_addr
[V2DFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv2df
;
3328 reg_addr
[V2DImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv2di
;
3329 reg_addr
[V4SFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv4sf
;
3330 reg_addr
[V4SImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv4si
;
3331 reg_addr
[V8HImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv8hi
;
3332 reg_addr
[V16QImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv16qi
;
3333 reg_addr
[SFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxsf
;
3335 reg_addr
[TImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprti
;
3336 reg_addr
[V1TImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv1ti
;
3337 reg_addr
[V2DFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv2df
;
3338 reg_addr
[V2DImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv2di
;
3339 reg_addr
[V4SFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv4sf
;
3340 reg_addr
[V4SImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv4si
;
3341 reg_addr
[V8HImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv8hi
;
3342 reg_addr
[V16QImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv16qi
;
3343 reg_addr
[SFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprsf
;
3345 if (FLOAT128_VECTOR_P (KFmode
))
3347 reg_addr
[KFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxkf
;
3348 reg_addr
[KFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprkf
;
3351 if (FLOAT128_VECTOR_P (TFmode
))
3353 reg_addr
[TFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxtf
;
3354 reg_addr
[TFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprtf
;
3360 reg_addr
[V16QImode
].reload_store
= CODE_FOR_reload_v16qi_si_store
;
3361 reg_addr
[V16QImode
].reload_load
= CODE_FOR_reload_v16qi_si_load
;
3362 reg_addr
[V8HImode
].reload_store
= CODE_FOR_reload_v8hi_si_store
;
3363 reg_addr
[V8HImode
].reload_load
= CODE_FOR_reload_v8hi_si_load
;
3364 reg_addr
[V4SImode
].reload_store
= CODE_FOR_reload_v4si_si_store
;
3365 reg_addr
[V4SImode
].reload_load
= CODE_FOR_reload_v4si_si_load
;
3366 reg_addr
[V2DImode
].reload_store
= CODE_FOR_reload_v2di_si_store
;
3367 reg_addr
[V2DImode
].reload_load
= CODE_FOR_reload_v2di_si_load
;
3368 reg_addr
[V1TImode
].reload_store
= CODE_FOR_reload_v1ti_si_store
;
3369 reg_addr
[V1TImode
].reload_load
= CODE_FOR_reload_v1ti_si_load
;
3370 reg_addr
[V4SFmode
].reload_store
= CODE_FOR_reload_v4sf_si_store
;
3371 reg_addr
[V4SFmode
].reload_load
= CODE_FOR_reload_v4sf_si_load
;
3372 reg_addr
[V2DFmode
].reload_store
= CODE_FOR_reload_v2df_si_store
;
3373 reg_addr
[V2DFmode
].reload_load
= CODE_FOR_reload_v2df_si_load
;
3374 reg_addr
[DFmode
].reload_store
= CODE_FOR_reload_df_si_store
;
3375 reg_addr
[DFmode
].reload_load
= CODE_FOR_reload_df_si_load
;
3376 reg_addr
[DDmode
].reload_store
= CODE_FOR_reload_dd_si_store
;
3377 reg_addr
[DDmode
].reload_load
= CODE_FOR_reload_dd_si_load
;
3378 reg_addr
[SFmode
].reload_store
= CODE_FOR_reload_sf_si_store
;
3379 reg_addr
[SFmode
].reload_load
= CODE_FOR_reload_sf_si_load
;
3381 if (FLOAT128_VECTOR_P (KFmode
))
3383 reg_addr
[KFmode
].reload_store
= CODE_FOR_reload_kf_si_store
;
3384 reg_addr
[KFmode
].reload_load
= CODE_FOR_reload_kf_si_load
;
3387 if (FLOAT128_IEEE_P (TFmode
))
3389 reg_addr
[TFmode
].reload_store
= CODE_FOR_reload_tf_si_store
;
3390 reg_addr
[TFmode
].reload_load
= CODE_FOR_reload_tf_si_load
;
3393 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3395 if (TARGET_NO_SDMODE_STACK
)
3397 reg_addr
[SDmode
].reload_store
= CODE_FOR_reload_sd_si_store
;
3398 reg_addr
[SDmode
].reload_load
= CODE_FOR_reload_sd_si_load
;
3401 if (TARGET_VSX_TIMODE
)
3403 reg_addr
[TImode
].reload_store
= CODE_FOR_reload_ti_si_store
;
3404 reg_addr
[TImode
].reload_load
= CODE_FOR_reload_ti_si_load
;
3407 if (TARGET_DIRECT_MOVE
)
3409 reg_addr
[DImode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdi
;
3410 reg_addr
[DDmode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdd
;
3411 reg_addr
[DFmode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdf
;
3415 if (TARGET_UPPER_REGS_DF
)
3416 reg_addr
[DFmode
].scalar_in_vmx_p
= true;
3418 if (TARGET_UPPER_REGS_DI
)
3419 reg_addr
[DImode
].scalar_in_vmx_p
= true;
3421 if (TARGET_UPPER_REGS_SF
)
3422 reg_addr
[SFmode
].scalar_in_vmx_p
= true;
3424 if (TARGET_VSX_SMALL_INTEGER
)
3426 reg_addr
[SImode
].scalar_in_vmx_p
= true;
3427 if (TARGET_P9_VECTOR
)
3429 reg_addr
[HImode
].scalar_in_vmx_p
= true;
3430 reg_addr
[QImode
].scalar_in_vmx_p
= true;
3435 /* Setup the fusion operations. */
3436 if (TARGET_P8_FUSION
)
3438 reg_addr
[QImode
].fusion_gpr_ld
= CODE_FOR_fusion_gpr_load_qi
;
3439 reg_addr
[HImode
].fusion_gpr_ld
= CODE_FOR_fusion_gpr_load_hi
;
3440 reg_addr
[SImode
].fusion_gpr_ld
= CODE_FOR_fusion_gpr_load_si
;
3442 reg_addr
[DImode
].fusion_gpr_ld
= CODE_FOR_fusion_gpr_load_di
;
3445 if (TARGET_P9_FUSION
)
3448 enum machine_mode mode
; /* mode of the fused type. */
3449 enum machine_mode pmode
; /* pointer mode. */
3450 enum rs6000_reload_reg_type rtype
; /* register type. */
3451 enum insn_code load
; /* load insn. */
3452 enum insn_code store
; /* store insn. */
3455 static const struct fuse_insns addis_insns
[] = {
3456 { SFmode
, DImode
, RELOAD_REG_FPR
,
3457 CODE_FOR_fusion_vsx_di_sf_load
,
3458 CODE_FOR_fusion_vsx_di_sf_store
},
3460 { SFmode
, SImode
, RELOAD_REG_FPR
,
3461 CODE_FOR_fusion_vsx_si_sf_load
,
3462 CODE_FOR_fusion_vsx_si_sf_store
},
3464 { DFmode
, DImode
, RELOAD_REG_FPR
,
3465 CODE_FOR_fusion_vsx_di_df_load
,
3466 CODE_FOR_fusion_vsx_di_df_store
},
3468 { DFmode
, SImode
, RELOAD_REG_FPR
,
3469 CODE_FOR_fusion_vsx_si_df_load
,
3470 CODE_FOR_fusion_vsx_si_df_store
},
3472 { DImode
, DImode
, RELOAD_REG_FPR
,
3473 CODE_FOR_fusion_vsx_di_di_load
,
3474 CODE_FOR_fusion_vsx_di_di_store
},
3476 { DImode
, SImode
, RELOAD_REG_FPR
,
3477 CODE_FOR_fusion_vsx_si_di_load
,
3478 CODE_FOR_fusion_vsx_si_di_store
},
3480 { QImode
, DImode
, RELOAD_REG_GPR
,
3481 CODE_FOR_fusion_gpr_di_qi_load
,
3482 CODE_FOR_fusion_gpr_di_qi_store
},
3484 { QImode
, SImode
, RELOAD_REG_GPR
,
3485 CODE_FOR_fusion_gpr_si_qi_load
,
3486 CODE_FOR_fusion_gpr_si_qi_store
},
3488 { HImode
, DImode
, RELOAD_REG_GPR
,
3489 CODE_FOR_fusion_gpr_di_hi_load
,
3490 CODE_FOR_fusion_gpr_di_hi_store
},
3492 { HImode
, SImode
, RELOAD_REG_GPR
,
3493 CODE_FOR_fusion_gpr_si_hi_load
,
3494 CODE_FOR_fusion_gpr_si_hi_store
},
3496 { SImode
, DImode
, RELOAD_REG_GPR
,
3497 CODE_FOR_fusion_gpr_di_si_load
,
3498 CODE_FOR_fusion_gpr_di_si_store
},
3500 { SImode
, SImode
, RELOAD_REG_GPR
,
3501 CODE_FOR_fusion_gpr_si_si_load
,
3502 CODE_FOR_fusion_gpr_si_si_store
},
3504 { SFmode
, DImode
, RELOAD_REG_GPR
,
3505 CODE_FOR_fusion_gpr_di_sf_load
,
3506 CODE_FOR_fusion_gpr_di_sf_store
},
3508 { SFmode
, SImode
, RELOAD_REG_GPR
,
3509 CODE_FOR_fusion_gpr_si_sf_load
,
3510 CODE_FOR_fusion_gpr_si_sf_store
},
3512 { DImode
, DImode
, RELOAD_REG_GPR
,
3513 CODE_FOR_fusion_gpr_di_di_load
,
3514 CODE_FOR_fusion_gpr_di_di_store
},
3516 { DFmode
, DImode
, RELOAD_REG_GPR
,
3517 CODE_FOR_fusion_gpr_di_df_load
,
3518 CODE_FOR_fusion_gpr_di_df_store
},
3521 enum machine_mode cur_pmode
= Pmode
;
3524 for (i
= 0; i
< ARRAY_SIZE (addis_insns
); i
++)
3526 enum machine_mode xmode
= addis_insns
[i
].mode
;
3527 enum rs6000_reload_reg_type rtype
= addis_insns
[i
].rtype
;
3529 if (addis_insns
[i
].pmode
!= cur_pmode
)
3532 if (rtype
== RELOAD_REG_FPR
3533 && (!TARGET_HARD_FLOAT
|| !TARGET_FPRS
))
3536 reg_addr
[xmode
].fusion_addis_ld
[rtype
] = addis_insns
[i
].load
;
3537 reg_addr
[xmode
].fusion_addis_st
[rtype
] = addis_insns
[i
].store
;
3539 if (rtype
== RELOAD_REG_FPR
&& TARGET_P9_DFORM_SCALAR
)
3541 reg_addr
[xmode
].fusion_addis_ld
[RELOAD_REG_VMX
]
3542 = addis_insns
[i
].load
;
3543 reg_addr
[xmode
].fusion_addis_st
[RELOAD_REG_VMX
]
3544 = addis_insns
[i
].store
;
3549 /* Note which types we support fusing TOC setup plus memory insn. We only do
3550 fused TOCs for medium/large code models. */
3551 if (TARGET_P8_FUSION
&& TARGET_TOC_FUSION
&& TARGET_POWERPC64
3552 && (TARGET_CMODEL
!= CMODEL_SMALL
))
3554 reg_addr
[QImode
].fused_toc
= true;
3555 reg_addr
[HImode
].fused_toc
= true;
3556 reg_addr
[SImode
].fused_toc
= true;
3557 reg_addr
[DImode
].fused_toc
= true;
3558 if (TARGET_HARD_FLOAT
&& TARGET_FPRS
)
3560 if (TARGET_SINGLE_FLOAT
)
3561 reg_addr
[SFmode
].fused_toc
= true;
3562 if (TARGET_DOUBLE_FLOAT
)
3563 reg_addr
[DFmode
].fused_toc
= true;
3567 /* Precalculate HARD_REGNO_NREGS. */
3568 for (r
= 0; r
< FIRST_PSEUDO_REGISTER
; ++r
)
3569 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3570 rs6000_hard_regno_nregs
[m
][r
]
3571 = rs6000_hard_regno_nregs_internal (r
, (machine_mode
)m
);
3573 /* Precalculate HARD_REGNO_MODE_OK. */
3574 for (r
= 0; r
< FIRST_PSEUDO_REGISTER
; ++r
)
3575 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3576 if (rs6000_hard_regno_mode_ok (r
, (machine_mode
)m
))
3577 rs6000_hard_regno_mode_ok_p
[m
][r
] = true;
3579 /* Precalculate CLASS_MAX_NREGS sizes. */
3580 for (c
= 0; c
< LIM_REG_CLASSES
; ++c
)
3584 if (TARGET_VSX
&& VSX_REG_CLASS_P (c
))
3585 reg_size
= UNITS_PER_VSX_WORD
;
3587 else if (c
== ALTIVEC_REGS
)
3588 reg_size
= UNITS_PER_ALTIVEC_WORD
;
3590 else if (c
== FLOAT_REGS
)
3591 reg_size
= UNITS_PER_FP_WORD
;
3594 reg_size
= UNITS_PER_WORD
;
3596 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3598 machine_mode m2
= (machine_mode
)m
;
3599 int reg_size2
= reg_size
;
3601 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3603 if (TARGET_VSX
&& VSX_REG_CLASS_P (c
) && FLOAT128_2REG_P (m
))
3604 reg_size2
= UNITS_PER_FP_WORD
;
3606 rs6000_class_max_nregs
[m
][c
]
3607 = (GET_MODE_SIZE (m2
) + reg_size2
- 1) / reg_size2
;
3611 if (TARGET_E500_DOUBLE
)
3612 rs6000_class_max_nregs
[DFmode
][GENERAL_REGS
] = 1;
3614 /* Calculate which modes to automatically generate code to use a the
3615 reciprocal divide and square root instructions. In the future, possibly
3616 automatically generate the instructions even if the user did not specify
3617 -mrecip. The older machines double precision reciprocal sqrt estimate is
3618 not accurate enough. */
3619 memset (rs6000_recip_bits
, 0, sizeof (rs6000_recip_bits
));
3621 rs6000_recip_bits
[SFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3623 rs6000_recip_bits
[DFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3624 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
))
3625 rs6000_recip_bits
[V4SFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3626 if (VECTOR_UNIT_VSX_P (V2DFmode
))
3627 rs6000_recip_bits
[V2DFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3629 if (TARGET_FRSQRTES
)
3630 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3632 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3633 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
))
3634 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3635 if (VECTOR_UNIT_VSX_P (V2DFmode
))
3636 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3638 if (rs6000_recip_control
)
3640 if (!flag_finite_math_only
)
3641 warning (0, "-mrecip requires -ffinite-math or -ffast-math");
3642 if (flag_trapping_math
)
3643 warning (0, "-mrecip requires -fno-trapping-math or -ffast-math");
3644 if (!flag_reciprocal_math
)
3645 warning (0, "-mrecip requires -freciprocal-math or -ffast-math");
3646 if (flag_finite_math_only
&& !flag_trapping_math
&& flag_reciprocal_math
)
3648 if (RS6000_RECIP_HAVE_RE_P (SFmode
)
3649 && (rs6000_recip_control
& RECIP_SF_DIV
) != 0)
3650 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3652 if (RS6000_RECIP_HAVE_RE_P (DFmode
)
3653 && (rs6000_recip_control
& RECIP_DF_DIV
) != 0)
3654 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3656 if (RS6000_RECIP_HAVE_RE_P (V4SFmode
)
3657 && (rs6000_recip_control
& RECIP_V4SF_DIV
) != 0)
3658 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3660 if (RS6000_RECIP_HAVE_RE_P (V2DFmode
)
3661 && (rs6000_recip_control
& RECIP_V2DF_DIV
) != 0)
3662 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3664 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode
)
3665 && (rs6000_recip_control
& RECIP_SF_RSQRT
) != 0)
3666 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3668 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode
)
3669 && (rs6000_recip_control
& RECIP_DF_RSQRT
) != 0)
3670 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3672 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode
)
3673 && (rs6000_recip_control
& RECIP_V4SF_RSQRT
) != 0)
3674 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3676 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode
)
3677 && (rs6000_recip_control
& RECIP_V2DF_RSQRT
) != 0)
3678 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3682 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3683 legitimate address support to figure out the appropriate addressing to
3685 rs6000_setup_reg_addr_masks ();
3687 if (global_init_p
|| TARGET_DEBUG_TARGET
)
3689 if (TARGET_DEBUG_REG
)
3690 rs6000_debug_reg_global ();
3692 if (TARGET_DEBUG_COST
|| TARGET_DEBUG_REG
)
3694 "SImode variable mult cost = %d\n"
3695 "SImode constant mult cost = %d\n"
3696 "SImode short constant mult cost = %d\n"
3697 "DImode multipliciation cost = %d\n"
3698 "SImode division cost = %d\n"
3699 "DImode division cost = %d\n"
3700 "Simple fp operation cost = %d\n"
3701 "DFmode multiplication cost = %d\n"
3702 "SFmode division cost = %d\n"
3703 "DFmode division cost = %d\n"
3704 "cache line size = %d\n"
3705 "l1 cache size = %d\n"
3706 "l2 cache size = %d\n"
3707 "simultaneous prefetches = %d\n"
3710 rs6000_cost
->mulsi_const
,
3711 rs6000_cost
->mulsi_const9
,
3719 rs6000_cost
->cache_line_size
,
3720 rs6000_cost
->l1_cache_size
,
3721 rs6000_cost
->l2_cache_size
,
3722 rs6000_cost
->simultaneous_prefetches
);
3727 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3730 darwin_rs6000_override_options (void)
3732 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3734 rs6000_altivec_abi
= 1;
3735 TARGET_ALTIVEC_VRSAVE
= 1;
3736 rs6000_current_abi
= ABI_DARWIN
;
3738 if (DEFAULT_ABI
== ABI_DARWIN
3740 darwin_one_byte_bool
= 1;
3742 if (TARGET_64BIT
&& ! TARGET_POWERPC64
)
3744 rs6000_isa_flags
|= OPTION_MASK_POWERPC64
;
3745 warning (0, "-m64 requires PowerPC64 architecture, enabling");
3749 rs6000_default_long_calls
= 1;
3750 rs6000_isa_flags
|= OPTION_MASK_SOFT_FLOAT
;
3753 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3755 if (!flag_mkernel
&& !flag_apple_kext
3757 && ! (rs6000_isa_flags_explicit
& OPTION_MASK_ALTIVEC
))
3758 rs6000_isa_flags
|= OPTION_MASK_ALTIVEC
;
3760 /* Unless the user (not the configurer) has explicitly overridden
3761 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3762 G4 unless targeting the kernel. */
3765 && strverscmp (darwin_macosx_version_min
, "10.5") >= 0
3766 && ! (rs6000_isa_flags_explicit
& OPTION_MASK_ALTIVEC
)
3767 && ! global_options_set
.x_rs6000_cpu_index
)
3769 rs6000_isa_flags
|= OPTION_MASK_ALTIVEC
;
3774 /* If not otherwise specified by a target, make 'long double' equivalent to
3777 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3778 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3781 /* Return the builtin mask of the various options used that could affect which
3782 builtins were used. In the past we used target_flags, but we've run out of
3783 bits, and some options like SPE and PAIRED are no longer in
3787 rs6000_builtin_mask_calculate (void)
3789 return (((TARGET_ALTIVEC
) ? RS6000_BTM_ALTIVEC
: 0)
3790 | ((TARGET_VSX
) ? RS6000_BTM_VSX
: 0)
3791 | ((TARGET_SPE
) ? RS6000_BTM_SPE
: 0)
3792 | ((TARGET_PAIRED_FLOAT
) ? RS6000_BTM_PAIRED
: 0)
3793 | ((TARGET_FRE
) ? RS6000_BTM_FRE
: 0)
3794 | ((TARGET_FRES
) ? RS6000_BTM_FRES
: 0)
3795 | ((TARGET_FRSQRTE
) ? RS6000_BTM_FRSQRTE
: 0)
3796 | ((TARGET_FRSQRTES
) ? RS6000_BTM_FRSQRTES
: 0)
3797 | ((TARGET_POPCNTD
) ? RS6000_BTM_POPCNTD
: 0)
3798 | ((rs6000_cpu
== PROCESSOR_CELL
) ? RS6000_BTM_CELL
: 0)
3799 | ((TARGET_P8_VECTOR
) ? RS6000_BTM_P8_VECTOR
: 0)
3800 | ((TARGET_P9_VECTOR
) ? RS6000_BTM_P9_VECTOR
: 0)
3801 | ((TARGET_P9_MISC
) ? RS6000_BTM_P9_MISC
: 0)
3802 | ((TARGET_MODULO
) ? RS6000_BTM_MODULO
: 0)
3803 | ((TARGET_64BIT
) ? RS6000_BTM_64BIT
: 0)
3804 | ((TARGET_CRYPTO
) ? RS6000_BTM_CRYPTO
: 0)
3805 | ((TARGET_HTM
) ? RS6000_BTM_HTM
: 0)
3806 | ((TARGET_DFP
) ? RS6000_BTM_DFP
: 0)
3807 | ((TARGET_HARD_FLOAT
) ? RS6000_BTM_HARD_FLOAT
: 0)
3808 | ((TARGET_LONG_DOUBLE_128
) ? RS6000_BTM_LDBL128
: 0)
3809 | ((TARGET_FLOAT128_TYPE
) ? RS6000_BTM_FLOAT128
: 0));
3812 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3813 to clobber the XER[CA] bit because clobbering that bit without telling
3814 the compiler worked just fine with versions of GCC before GCC 5, and
3815 breaking a lot of older code in ways that are hard to track down is
3816 not such a great idea. */
3819 rs6000_md_asm_adjust (vec
<rtx
> &/*outputs*/, vec
<rtx
> &/*inputs*/,
3820 vec
<const char *> &/*constraints*/,
3821 vec
<rtx
> &clobbers
, HARD_REG_SET
&clobbered_regs
)
3823 clobbers
.safe_push (gen_rtx_REG (SImode
, CA_REGNO
));
3824 SET_HARD_REG_BIT (clobbered_regs
, CA_REGNO
);
3828 /* Override command line options.
3830 Combine build-specific configuration information with options
3831 specified on the command line to set various state variables which
3832 influence code generation, optimization, and expansion of built-in
3833 functions. Assure that command-line configuration preferences are
3834 compatible with each other and with the build configuration; issue
3835 warnings while adjusting configuration or error messages while
3836 rejecting configuration.
3838 Upon entry to this function:
3840 This function is called once at the beginning of
3841 compilation, and then again at the start and end of compiling
3842 each section of code that has a different configuration, as
3843 indicated, for example, by adding the
3845 __attribute__((__target__("cpu=power9")))
3847 qualifier to a function definition or, for example, by bracketing
3850 #pragma GCC target("altivec")
3854 #pragma GCC reset_options
3856 directives. Parameter global_init_p is true for the initial
3857 invocation, which initializes global variables, and false for all
3858 subsequent invocations.
3861 Various global state information is assumed to be valid. This
3862 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3863 default CPU specified at build configure time, TARGET_DEFAULT,
3864 representing the default set of option flags for the default
3865 target, and global_options_set.x_rs6000_isa_flags, representing
3866 which options were requested on the command line.
3868 Upon return from this function:
3870 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3871 was set by name on the command line. Additionally, if certain
3872 attributes are automatically enabled or disabled by this function
3873 in order to assure compatibility between options and
3874 configuration, the flags associated with those attributes are
3875 also set. By setting these "explicit bits", we avoid the risk
3876 that other code might accidentally overwrite these particular
3877 attributes with "default values".
3879 The various bits of rs6000_isa_flags are set to indicate the
3880 target options that have been selected for the most current
3881 compilation efforts. This has the effect of also turning on the
3882 associated TARGET_XXX values since these are macros which are
3883 generally defined to test the corresponding bit of the
3884 rs6000_isa_flags variable.
3886 The variable rs6000_builtin_mask is set to represent the target
3887 options for the most current compilation efforts, consistent with
3888 the current contents of rs6000_isa_flags. This variable controls
3889 expansion of built-in functions.
3891 Various other global variables and fields of global structures
3892 (over 50 in all) are initialized to reflect the desired options
3893 for the most current compilation efforts. */
3896 rs6000_option_override_internal (bool global_init_p
)
3899 bool have_cpu
= false;
3901 /* The default cpu requested at configure time, if any. */
3902 const char *implicit_cpu
= OPTION_TARGET_CPU_DEFAULT
;
3904 HOST_WIDE_INT set_masks
;
3907 struct cl_target_option
*main_target_opt
3908 = ((global_init_p
|| target_option_default_node
== NULL
)
3909 ? NULL
: TREE_TARGET_OPTION (target_option_default_node
));
3911 /* Print defaults. */
3912 if ((TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
) && global_init_p
)
3913 rs6000_print_isa_options (stderr
, 0, "TARGET_DEFAULT", TARGET_DEFAULT
);
3915 /* Remember the explicit arguments. */
3917 rs6000_isa_flags_explicit
= global_options_set
.x_rs6000_isa_flags
;
3919 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3920 library functions, so warn about it. The flag may be useful for
3921 performance studies from time to time though, so don't disable it
3923 if (global_options_set
.x_rs6000_alignment_flags
3924 && rs6000_alignment_flags
== MASK_ALIGN_POWER
3925 && DEFAULT_ABI
== ABI_DARWIN
3927 warning (0, "-malign-power is not supported for 64-bit Darwin;"
3928 " it is incompatible with the installed C and C++ libraries");
3930 /* Numerous experiment shows that IRA based loop pressure
3931 calculation works better for RTL loop invariant motion on targets
3932 with enough (>= 32) registers. It is an expensive optimization.
3933 So it is on only for peak performance. */
3934 if (optimize
>= 3 && global_init_p
3935 && !global_options_set
.x_flag_ira_loop_pressure
)
3936 flag_ira_loop_pressure
= 1;
3938 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3939 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3940 options were already specified. */
3941 if (flag_sanitize
& SANITIZE_USER_ADDRESS
3942 && !global_options_set
.x_flag_asynchronous_unwind_tables
)
3943 flag_asynchronous_unwind_tables
= 1;
3945 /* Set the pointer size. */
3948 rs6000_pmode
= (int)DImode
;
3949 rs6000_pointer_size
= 64;
3953 rs6000_pmode
= (int)SImode
;
3954 rs6000_pointer_size
= 32;
3957 /* Some OSs don't support saving the high part of 64-bit registers on context
3958 switch. Other OSs don't support saving Altivec registers. On those OSs,
3959 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3960 if the user wants either, the user must explicitly specify them and we
3961 won't interfere with the user's specification. */
3963 set_masks
= POWERPC_MASKS
;
3964 #ifdef OS_MISSING_POWERPC64
3965 if (OS_MISSING_POWERPC64
)
3966 set_masks
&= ~OPTION_MASK_POWERPC64
;
3968 #ifdef OS_MISSING_ALTIVEC
3969 if (OS_MISSING_ALTIVEC
)
3970 set_masks
&= ~(OPTION_MASK_ALTIVEC
| OPTION_MASK_VSX
);
3973 /* Don't override by the processor default if given explicitly. */
3974 set_masks
&= ~rs6000_isa_flags_explicit
;
3976 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3977 the cpu in a target attribute or pragma, but did not specify a tuning
3978 option, use the cpu for the tuning option rather than the option specified
3979 with -mtune on the command line. Process a '--with-cpu' configuration
3980 request as an implicit --cpu. */
3981 if (rs6000_cpu_index
>= 0)
3983 cpu_index
= rs6000_cpu_index
;
3986 else if (main_target_opt
!= NULL
&& main_target_opt
->x_rs6000_cpu_index
>= 0)
3988 rs6000_cpu_index
= cpu_index
= main_target_opt
->x_rs6000_cpu_index
;
3991 else if (implicit_cpu
)
3993 rs6000_cpu_index
= cpu_index
= rs6000_cpu_name_lookup (implicit_cpu
);
3998 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3999 const char *default_cpu
= ((!TARGET_POWERPC64
)
4001 : ((BYTES_BIG_ENDIAN
)
4005 rs6000_cpu_index
= cpu_index
= rs6000_cpu_name_lookup (default_cpu
);
4009 gcc_assert (cpu_index
>= 0);
4013 #ifndef HAVE_AS_POWER9
4014 if (processor_target_table
[rs6000_cpu_index
].processor
4015 == PROCESSOR_POWER9
)
4018 warning (0, "will not generate power9 instructions because "
4019 "assembler lacks power9 support");
4022 #ifndef HAVE_AS_POWER8
4023 if (processor_target_table
[rs6000_cpu_index
].processor
4024 == PROCESSOR_POWER8
)
4027 warning (0, "will not generate power8 instructions because "
4028 "assembler lacks power8 support");
4031 #ifndef HAVE_AS_POPCNTD
4032 if (processor_target_table
[rs6000_cpu_index
].processor
4033 == PROCESSOR_POWER7
)
4036 warning (0, "will not generate power7 instructions because "
4037 "assembler lacks power7 support");
4041 if (processor_target_table
[rs6000_cpu_index
].processor
4042 == PROCESSOR_POWER6
)
4045 warning (0, "will not generate power6 instructions because "
4046 "assembler lacks power6 support");
4049 #ifndef HAVE_AS_POPCNTB
4050 if (processor_target_table
[rs6000_cpu_index
].processor
4051 == PROCESSOR_POWER5
)
4054 warning (0, "will not generate power5 instructions because "
4055 "assembler lacks power5 support");
4061 /* PowerPC 64-bit LE requires at least ISA 2.07. */
4062 const char *default_cpu
= (!TARGET_POWERPC64
4068 rs6000_cpu_index
= cpu_index
= rs6000_cpu_name_lookup (default_cpu
);
4072 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
4073 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
4074 with those from the cpu, except for options that were explicitly set. If
4075 we don't have a cpu, do not override the target bits set in
4079 rs6000_isa_flags
&= ~set_masks
;
4080 rs6000_isa_flags
|= (processor_target_table
[cpu_index
].target_enable
4085 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
4086 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
4087 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
4088 to using rs6000_isa_flags, we need to do the initialization here.
4090 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
4091 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
4092 HOST_WIDE_INT flags
= ((TARGET_DEFAULT
) ? TARGET_DEFAULT
4093 : processor_target_table
[cpu_index
].target_enable
);
4094 rs6000_isa_flags
|= (flags
& ~rs6000_isa_flags_explicit
);
4097 if (rs6000_tune_index
>= 0)
4098 tune_index
= rs6000_tune_index
;
4100 rs6000_tune_index
= tune_index
= cpu_index
;
4104 enum processor_type tune_proc
4105 = (TARGET_POWERPC64
? PROCESSOR_DEFAULT64
: PROCESSOR_DEFAULT
);
4108 for (i
= 0; i
< ARRAY_SIZE (processor_target_table
); i
++)
4109 if (processor_target_table
[i
].processor
== tune_proc
)
4111 rs6000_tune_index
= tune_index
= i
;
4116 gcc_assert (tune_index
>= 0);
4117 rs6000_cpu
= processor_target_table
[tune_index
].processor
;
4119 /* Pick defaults for SPE related control flags. Do this early to make sure
4120 that the TARGET_ macros are representative ASAP. */
4122 int spe_capable_cpu
=
4123 (rs6000_cpu
== PROCESSOR_PPC8540
4124 || rs6000_cpu
== PROCESSOR_PPC8548
);
4126 if (!global_options_set
.x_rs6000_spe_abi
)
4127 rs6000_spe_abi
= spe_capable_cpu
;
4129 if (!global_options_set
.x_rs6000_spe
)
4130 rs6000_spe
= spe_capable_cpu
;
4132 if (!global_options_set
.x_rs6000_float_gprs
)
4134 (rs6000_cpu
== PROCESSOR_PPC8540
? 1
4135 : rs6000_cpu
== PROCESSOR_PPC8548
? 2
4139 if (global_options_set
.x_rs6000_spe_abi
4142 error ("not configured for SPE ABI");
4144 if (global_options_set
.x_rs6000_spe
4147 error ("not configured for SPE instruction set");
4149 if (main_target_opt
!= NULL
4150 && ((main_target_opt
->x_rs6000_spe_abi
!= rs6000_spe_abi
)
4151 || (main_target_opt
->x_rs6000_spe
!= rs6000_spe
)
4152 || (main_target_opt
->x_rs6000_float_gprs
!= rs6000_float_gprs
)))
4153 error ("target attribute or pragma changes SPE ABI");
4155 if (rs6000_cpu
== PROCESSOR_PPCE300C2
|| rs6000_cpu
== PROCESSOR_PPCE300C3
4156 || rs6000_cpu
== PROCESSOR_PPCE500MC
|| rs6000_cpu
== PROCESSOR_PPCE500MC64
4157 || rs6000_cpu
== PROCESSOR_PPCE5500
)
4160 error ("AltiVec not supported in this target");
4162 error ("SPE not supported in this target");
4164 if (rs6000_cpu
== PROCESSOR_PPCE6500
)
4167 error ("SPE not supported in this target");
4170 /* Disable Cell microcode if we are optimizing for the Cell
4171 and not optimizing for size. */
4172 if (rs6000_gen_cell_microcode
== -1)
4173 rs6000_gen_cell_microcode
= !(rs6000_cpu
== PROCESSOR_CELL
4176 /* If we are optimizing big endian systems for space and it's OK to
4177 use instructions that would be microcoded on the Cell, use the
4178 load/store multiple and string instructions. */
4179 if (BYTES_BIG_ENDIAN
&& optimize_size
&& rs6000_gen_cell_microcode
)
4180 rs6000_isa_flags
|= ~rs6000_isa_flags_explicit
& (OPTION_MASK_MULTIPLE
4181 | OPTION_MASK_STRING
);
4183 /* Don't allow -mmultiple or -mstring on little endian systems
4184 unless the cpu is a 750, because the hardware doesn't support the
4185 instructions used in little endian mode, and causes an alignment
4186 trap. The 750 does not cause an alignment trap (except when the
4187 target is unaligned). */
4189 if (!BYTES_BIG_ENDIAN
&& rs6000_cpu
!= PROCESSOR_PPC750
)
4191 if (TARGET_MULTIPLE
)
4193 rs6000_isa_flags
&= ~OPTION_MASK_MULTIPLE
;
4194 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MULTIPLE
) != 0)
4195 warning (0, "-mmultiple is not supported on little endian systems");
4200 rs6000_isa_flags
&= ~OPTION_MASK_STRING
;
4201 if ((rs6000_isa_flags_explicit
& OPTION_MASK_STRING
) != 0)
4202 warning (0, "-mstring is not supported on little endian systems");
4206 /* If little-endian, default to -mstrict-align on older processors.
4207 Testing for htm matches power8 and later. */
4208 if (!BYTES_BIG_ENDIAN
4209 && !(processor_target_table
[tune_index
].target_enable
& OPTION_MASK_HTM
))
4210 rs6000_isa_flags
|= ~rs6000_isa_flags_explicit
& OPTION_MASK_STRICT_ALIGN
;
4212 /* -maltivec={le,be} implies -maltivec. */
4213 if (rs6000_altivec_element_order
!= 0)
4214 rs6000_isa_flags
|= OPTION_MASK_ALTIVEC
;
4216 /* Disallow -maltivec=le in big endian mode for now. This is not
4217 known to be useful for anyone. */
4218 if (BYTES_BIG_ENDIAN
&& rs6000_altivec_element_order
== 1)
4220 warning (0, N_("-maltivec=le not allowed for big-endian targets"));
4221 rs6000_altivec_element_order
= 0;
4224 /* Add some warnings for VSX. */
4227 const char *msg
= NULL
;
4228 if (!TARGET_HARD_FLOAT
|| !TARGET_FPRS
4229 || !TARGET_SINGLE_FLOAT
|| !TARGET_DOUBLE_FLOAT
)
4231 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
)
4232 msg
= N_("-mvsx requires hardware floating point");
4235 rs6000_isa_flags
&= ~ OPTION_MASK_VSX
;
4236 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
4239 else if (TARGET_PAIRED_FLOAT
)
4240 msg
= N_("-mvsx and -mpaired are incompatible");
4241 else if (TARGET_AVOID_XFORM
> 0)
4242 msg
= N_("-mvsx needs indexed addressing");
4243 else if (!TARGET_ALTIVEC
&& (rs6000_isa_flags_explicit
4244 & OPTION_MASK_ALTIVEC
))
4246 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
)
4247 msg
= N_("-mvsx and -mno-altivec are incompatible");
4249 msg
= N_("-mno-altivec disables vsx");
4255 rs6000_isa_flags
&= ~ OPTION_MASK_VSX
;
4256 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
4260 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
4261 the -mcpu setting to enable options that conflict. */
4262 if ((!TARGET_HARD_FLOAT
|| !TARGET_ALTIVEC
|| !TARGET_VSX
)
4263 && (rs6000_isa_flags_explicit
& (OPTION_MASK_SOFT_FLOAT
4264 | OPTION_MASK_ALTIVEC
4265 | OPTION_MASK_VSX
)) != 0)
4266 rs6000_isa_flags
&= ~((OPTION_MASK_P8_VECTOR
| OPTION_MASK_CRYPTO
4267 | OPTION_MASK_DIRECT_MOVE
)
4268 & ~rs6000_isa_flags_explicit
);
4270 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4271 rs6000_print_isa_options (stderr
, 0, "before defaults", rs6000_isa_flags
);
4273 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
4274 unless the user explicitly used the -mno-<option> to disable the code. */
4275 if (TARGET_P9_VECTOR
|| TARGET_MODULO
|| TARGET_P9_DFORM_SCALAR
4276 || TARGET_P9_DFORM_VECTOR
|| TARGET_P9_DFORM_BOTH
> 0)
4277 rs6000_isa_flags
|= (ISA_3_0_MASKS_SERVER
& ~rs6000_isa_flags_explicit
);
4278 else if (TARGET_P9_MINMAX
)
4282 if (cpu_index
== PROCESSOR_POWER9
)
4284 /* legacy behavior: allow -mcpu-power9 with certain
4285 capabilities explicitly disabled. */
4287 (ISA_3_0_MASKS_SERVER
& ~rs6000_isa_flags_explicit
);
4288 /* However, reject this automatic fix if certain
4289 capabilities required for TARGET_P9_MINMAX support
4290 have been explicitly disabled. */
4291 if (((OPTION_MASK_VSX
| OPTION_MASK_UPPER_REGS_SF
4292 | OPTION_MASK_UPPER_REGS_DF
) & rs6000_isa_flags
)
4293 != (OPTION_MASK_VSX
| OPTION_MASK_UPPER_REGS_SF
4294 | OPTION_MASK_UPPER_REGS_DF
))
4295 error ("-mpower9-minmax incompatible with explicitly disabled options");
4298 error ("Power9 target option is incompatible with -mcpu=<xxx> for "
4299 "<xxx> less than power9");
4301 else if ((ISA_3_0_MASKS_SERVER
& rs6000_isa_flags_explicit
)
4302 != (ISA_3_0_MASKS_SERVER
& rs6000_isa_flags
4303 & rs6000_isa_flags_explicit
))
4304 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
4305 were explicitly cleared. */
4306 error ("-mpower9-minmax incompatible with explicitly disabled options");
4308 rs6000_isa_flags
|= ISA_3_0_MASKS_SERVER
;
4310 else if (TARGET_P8_VECTOR
|| TARGET_DIRECT_MOVE
|| TARGET_CRYPTO
)
4311 rs6000_isa_flags
|= (ISA_2_7_MASKS_SERVER
& ~rs6000_isa_flags_explicit
);
4312 else if (TARGET_VSX
)
4313 rs6000_isa_flags
|= (ISA_2_6_MASKS_SERVER
& ~rs6000_isa_flags_explicit
);
4314 else if (TARGET_POPCNTD
)
4315 rs6000_isa_flags
|= (ISA_2_6_MASKS_EMBEDDED
& ~rs6000_isa_flags_explicit
);
4316 else if (TARGET_DFP
)
4317 rs6000_isa_flags
|= (ISA_2_5_MASKS_SERVER
& ~rs6000_isa_flags_explicit
);
4318 else if (TARGET_CMPB
)
4319 rs6000_isa_flags
|= (ISA_2_5_MASKS_EMBEDDED
& ~rs6000_isa_flags_explicit
);
4320 else if (TARGET_FPRND
)
4321 rs6000_isa_flags
|= (ISA_2_4_MASKS
& ~rs6000_isa_flags_explicit
);
4322 else if (TARGET_POPCNTB
)
4323 rs6000_isa_flags
|= (ISA_2_2_MASKS
& ~rs6000_isa_flags_explicit
);
4324 else if (TARGET_ALTIVEC
)
4325 rs6000_isa_flags
|= (OPTION_MASK_PPC_GFXOPT
& ~rs6000_isa_flags_explicit
);
4327 if (TARGET_CRYPTO
&& !TARGET_ALTIVEC
)
4329 if (rs6000_isa_flags_explicit
& OPTION_MASK_CRYPTO
)
4330 error ("-mcrypto requires -maltivec");
4331 rs6000_isa_flags
&= ~OPTION_MASK_CRYPTO
;
4334 if (TARGET_DIRECT_MOVE
&& !TARGET_VSX
)
4336 if (rs6000_isa_flags_explicit
& OPTION_MASK_DIRECT_MOVE
)
4337 error ("-mdirect-move requires -mvsx");
4338 rs6000_isa_flags
&= ~OPTION_MASK_DIRECT_MOVE
;
4341 if (TARGET_P8_VECTOR
&& !TARGET_ALTIVEC
)
4343 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
)
4344 error ("-mpower8-vector requires -maltivec");
4345 rs6000_isa_flags
&= ~OPTION_MASK_P8_VECTOR
;
4348 if (TARGET_P8_VECTOR
&& !TARGET_VSX
)
4350 if ((rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
)
4351 && (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
))
4352 error ("-mpower8-vector requires -mvsx");
4353 else if ((rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
) == 0)
4355 rs6000_isa_flags
&= ~OPTION_MASK_P8_VECTOR
;
4356 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
)
4357 rs6000_isa_flags_explicit
|= OPTION_MASK_P8_VECTOR
;
4361 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
4363 rs6000_isa_flags
|= OPTION_MASK_VSX
;
4364 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
4368 if (TARGET_VSX_TIMODE
&& !TARGET_VSX
)
4370 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX_TIMODE
)
4371 error ("-mvsx-timode requires -mvsx");
4372 rs6000_isa_flags
&= ~OPTION_MASK_VSX_TIMODE
;
4375 if (TARGET_DFP
&& !TARGET_HARD_FLOAT
)
4377 if (rs6000_isa_flags_explicit
& OPTION_MASK_DFP
)
4378 error ("-mhard-dfp requires -mhard-float");
4379 rs6000_isa_flags
&= ~OPTION_MASK_DFP
;
4382 /* Allow an explicit -mupper-regs to set -mupper-regs-df, -mupper-regs-di,
4383 and -mupper-regs-sf, depending on the cpu, unless the user explicitly also
4384 set the individual option. */
4385 if (TARGET_UPPER_REGS
> 0)
4388 && !(rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_DF
))
4390 rs6000_isa_flags
|= OPTION_MASK_UPPER_REGS_DF
;
4391 rs6000_isa_flags_explicit
|= OPTION_MASK_UPPER_REGS_DF
;
4394 && !(rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_DI
))
4396 rs6000_isa_flags
|= OPTION_MASK_UPPER_REGS_DI
;
4397 rs6000_isa_flags_explicit
|= OPTION_MASK_UPPER_REGS_DI
;
4399 if (TARGET_P8_VECTOR
4400 && !(rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_SF
))
4402 rs6000_isa_flags
|= OPTION_MASK_UPPER_REGS_SF
;
4403 rs6000_isa_flags_explicit
|= OPTION_MASK_UPPER_REGS_SF
;
4406 else if (TARGET_UPPER_REGS
== 0)
4409 && !(rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_DF
))
4411 rs6000_isa_flags
&= ~OPTION_MASK_UPPER_REGS_DF
;
4412 rs6000_isa_flags_explicit
|= OPTION_MASK_UPPER_REGS_DF
;
4415 && !(rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_DI
))
4417 rs6000_isa_flags
&= ~OPTION_MASK_UPPER_REGS_DI
;
4418 rs6000_isa_flags_explicit
|= OPTION_MASK_UPPER_REGS_DI
;
4420 if (TARGET_P8_VECTOR
4421 && !(rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_SF
))
4423 rs6000_isa_flags
&= ~OPTION_MASK_UPPER_REGS_SF
;
4424 rs6000_isa_flags_explicit
|= OPTION_MASK_UPPER_REGS_SF
;
4428 if (TARGET_UPPER_REGS_DF
&& !TARGET_VSX
)
4430 if (rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_DF
)
4431 error ("-mupper-regs-df requires -mvsx");
4432 rs6000_isa_flags
&= ~OPTION_MASK_UPPER_REGS_DF
;
4435 if (TARGET_UPPER_REGS_DI
&& !TARGET_VSX
)
4437 if (rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_DI
)
4438 error ("-mupper-regs-di requires -mvsx");
4439 rs6000_isa_flags
&= ~OPTION_MASK_UPPER_REGS_DI
;
4442 if (TARGET_UPPER_REGS_SF
&& !TARGET_P8_VECTOR
)
4444 if (rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_SF
)
4445 error ("-mupper-regs-sf requires -mpower8-vector");
4446 rs6000_isa_flags
&= ~OPTION_MASK_UPPER_REGS_SF
;
4449 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
4450 silently turn off quad memory mode. */
4451 if ((TARGET_QUAD_MEMORY
|| TARGET_QUAD_MEMORY_ATOMIC
) && !TARGET_POWERPC64
)
4453 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY
) != 0)
4454 warning (0, N_("-mquad-memory requires 64-bit mode"));
4456 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY_ATOMIC
) != 0)
4457 warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
4459 rs6000_isa_flags
&= ~(OPTION_MASK_QUAD_MEMORY
4460 | OPTION_MASK_QUAD_MEMORY_ATOMIC
);
4463 /* Non-atomic quad memory load/store are disabled for little endian, since
4464 the words are reversed, but atomic operations can still be done by
4465 swapping the words. */
4466 if (TARGET_QUAD_MEMORY
&& !WORDS_BIG_ENDIAN
)
4468 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY
) != 0)
4469 warning (0, N_("-mquad-memory is not available in little endian mode"));
4471 rs6000_isa_flags
&= ~OPTION_MASK_QUAD_MEMORY
;
4474 /* Assume if the user asked for normal quad memory instructions, they want
4475 the atomic versions as well, unless they explicity told us not to use quad
4476 word atomic instructions. */
4477 if (TARGET_QUAD_MEMORY
4478 && !TARGET_QUAD_MEMORY_ATOMIC
4479 && ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY_ATOMIC
) == 0))
4480 rs6000_isa_flags
|= OPTION_MASK_QUAD_MEMORY_ATOMIC
;
4482 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4483 generating power8 instructions. */
4484 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION
))
4485 rs6000_isa_flags
|= (processor_target_table
[tune_index
].target_enable
4486 & OPTION_MASK_P8_FUSION
);
4488 /* Setting additional fusion flags turns on base fusion. */
4489 if (!TARGET_P8_FUSION
&& (TARGET_P8_FUSION_SIGN
|| TARGET_TOC_FUSION
))
4491 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION
)
4493 if (TARGET_P8_FUSION_SIGN
)
4494 error ("-mpower8-fusion-sign requires -mpower8-fusion");
4496 if (TARGET_TOC_FUSION
)
4497 error ("-mtoc-fusion requires -mpower8-fusion");
4499 rs6000_isa_flags
&= ~OPTION_MASK_P8_FUSION
;
4502 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION
;
4505 /* Power9 fusion is a superset over power8 fusion. */
4506 if (TARGET_P9_FUSION
&& !TARGET_P8_FUSION
)
4508 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION
)
4510 /* We prefer to not mention undocumented options in
4511 error messages. However, if users have managed to select
4512 power9-fusion without selecting power8-fusion, they
4513 already know about undocumented flags. */
4514 error ("-mpower9-fusion requires -mpower8-fusion");
4515 rs6000_isa_flags
&= ~OPTION_MASK_P9_FUSION
;
4518 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION
;
4521 /* Enable power9 fusion if we are tuning for power9, even if we aren't
4522 generating power9 instructions. */
4523 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_P9_FUSION
))
4524 rs6000_isa_flags
|= (processor_target_table
[tune_index
].target_enable
4525 & OPTION_MASK_P9_FUSION
);
4527 /* Power8 does not fuse sign extended loads with the addis. If we are
4528 optimizing at high levels for speed, convert a sign extended load into a
4529 zero extending load, and an explicit sign extension. */
4530 if (TARGET_P8_FUSION
4531 && !(rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION_SIGN
)
4532 && optimize_function_for_speed_p (cfun
)
4534 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION_SIGN
;
4536 /* TOC fusion requires 64-bit and medium/large code model. */
4537 if (TARGET_TOC_FUSION
&& !TARGET_POWERPC64
)
4539 rs6000_isa_flags
&= ~OPTION_MASK_TOC_FUSION
;
4540 if ((rs6000_isa_flags_explicit
& OPTION_MASK_TOC_FUSION
) != 0)
4541 warning (0, N_("-mtoc-fusion requires 64-bit"));
4544 if (TARGET_TOC_FUSION
&& (TARGET_CMODEL
== CMODEL_SMALL
))
4546 rs6000_isa_flags
&= ~OPTION_MASK_TOC_FUSION
;
4547 if ((rs6000_isa_flags_explicit
& OPTION_MASK_TOC_FUSION
) != 0)
4548 warning (0, N_("-mtoc-fusion requires medium/large code model"));
4551 /* Turn on -mtoc-fusion by default if p8-fusion and 64-bit medium/large code
4553 if (TARGET_P8_FUSION
&& !TARGET_TOC_FUSION
&& TARGET_POWERPC64
4554 && (TARGET_CMODEL
!= CMODEL_SMALL
)
4555 && !(rs6000_isa_flags_explicit
& OPTION_MASK_TOC_FUSION
))
4556 rs6000_isa_flags
|= OPTION_MASK_TOC_FUSION
;
4558 /* ISA 3.0 vector instructions include ISA 2.07. */
4559 if (TARGET_P9_VECTOR
&& !TARGET_P8_VECTOR
)
4561 /* We prefer to not mention undocumented options in
4562 error messages. However, if users have managed to select
4563 power9-vector without selecting power8-vector, they
4564 already know about undocumented flags. */
4565 if ((rs6000_isa_flags_explicit
& OPTION_MASK_P9_VECTOR
) &&
4566 (rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
))
4567 error ("-mpower9-vector requires -mpower8-vector");
4568 else if ((rs6000_isa_flags_explicit
& OPTION_MASK_P9_VECTOR
) == 0)
4570 rs6000_isa_flags
&= ~OPTION_MASK_P9_VECTOR
;
4571 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
)
4572 rs6000_isa_flags_explicit
|= OPTION_MASK_P9_VECTOR
;
4576 /* OPTION_MASK_P9_VECTOR is explicit and
4577 OPTION_MASK_P8_VECTOR is not explicit. */
4578 rs6000_isa_flags
|= OPTION_MASK_P8_VECTOR
;
4579 rs6000_isa_flags_explicit
|= OPTION_MASK_P8_VECTOR
;
4583 /* -mpower9-dform turns on both -mpower9-dform-scalar and
4584 -mpower9-dform-vector. */
4585 if (TARGET_P9_DFORM_BOTH
> 0)
4587 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_P9_DFORM_VECTOR
))
4588 rs6000_isa_flags
|= OPTION_MASK_P9_DFORM_VECTOR
;
4590 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_P9_DFORM_SCALAR
))
4591 rs6000_isa_flags
|= OPTION_MASK_P9_DFORM_SCALAR
;
4593 else if (TARGET_P9_DFORM_BOTH
== 0)
4595 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_P9_DFORM_VECTOR
))
4596 rs6000_isa_flags
&= ~OPTION_MASK_P9_DFORM_VECTOR
;
4598 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_P9_DFORM_SCALAR
))
4599 rs6000_isa_flags
&= ~OPTION_MASK_P9_DFORM_SCALAR
;
4602 /* ISA 3.0 D-form instructions require p9-vector and upper-regs. */
4603 if ((TARGET_P9_DFORM_SCALAR
|| TARGET_P9_DFORM_VECTOR
) && !TARGET_P9_VECTOR
)
4605 /* We prefer to not mention undocumented options in
4606 error messages. However, if users have managed to select
4607 power9-dform without selecting power9-vector, they
4608 already know about undocumented flags. */
4609 if ((rs6000_isa_flags_explicit
& OPTION_MASK_P9_VECTOR
)
4610 && (rs6000_isa_flags_explicit
& (OPTION_MASK_P9_DFORM_SCALAR
4611 | OPTION_MASK_P9_DFORM_VECTOR
)))
4612 error ("-mpower9-dform requires -mpower9-vector");
4613 else if (rs6000_isa_flags_explicit
& OPTION_MASK_P9_VECTOR
)
4616 ~(OPTION_MASK_P9_DFORM_SCALAR
| OPTION_MASK_P9_DFORM_VECTOR
);
4617 rs6000_isa_flags_explicit
|=
4618 (OPTION_MASK_P9_DFORM_SCALAR
| OPTION_MASK_P9_DFORM_VECTOR
);
4622 /* We know that OPTION_MASK_P9_VECTOR is not explicit and
4623 OPTION_MASK_P9_DFORM_SCALAR or OPTION_MASK_P9_DORM_VECTOR
4625 rs6000_isa_flags
|= OPTION_MASK_P9_VECTOR
;
4626 rs6000_isa_flags_explicit
|= OPTION_MASK_P9_VECTOR
;
4630 if ((TARGET_P9_DFORM_SCALAR
|| TARGET_P9_DFORM_VECTOR
)
4631 && !TARGET_DIRECT_MOVE
)
4633 /* We prefer to not mention undocumented options in
4634 error messages. However, if users have managed to select
4635 power9-dform without selecting direct-move, they
4636 already know about undocumented flags. */
4637 if ((rs6000_isa_flags_explicit
& OPTION_MASK_DIRECT_MOVE
)
4638 && ((rs6000_isa_flags_explicit
& OPTION_MASK_P9_DFORM_VECTOR
) ||
4639 (rs6000_isa_flags_explicit
& OPTION_MASK_P9_DFORM_SCALAR
) ||
4640 (TARGET_P9_DFORM_BOTH
== 1)))
4641 error ("-mpower9-dform, -mpower9-dform-vector, -mpower9-dform-scalar"
4642 " require -mdirect-move");
4643 else if ((rs6000_isa_flags_explicit
& OPTION_MASK_DIRECT_MOVE
) == 0)
4645 rs6000_isa_flags
|= OPTION_MASK_DIRECT_MOVE
;
4646 rs6000_isa_flags_explicit
|= OPTION_MASK_DIRECT_MOVE
;
4651 ~(OPTION_MASK_P9_DFORM_SCALAR
| OPTION_MASK_P9_DFORM_VECTOR
);
4652 rs6000_isa_flags_explicit
|=
4653 (OPTION_MASK_P9_DFORM_SCALAR
| OPTION_MASK_P9_DFORM_VECTOR
);
4657 if (TARGET_P9_DFORM_SCALAR
&& !TARGET_UPPER_REGS_DF
)
4659 /* We prefer to not mention undocumented options in
4660 error messages. However, if users have managed to select
4661 power9-dform without selecting upper-regs-df, they
4662 already know about undocumented flags. */
4663 if (rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_DF
)
4664 error ("-mpower9-dform requires -mupper-regs-df");
4665 rs6000_isa_flags
&= ~OPTION_MASK_P9_DFORM_SCALAR
;
4668 if (TARGET_P9_DFORM_SCALAR
&& !TARGET_UPPER_REGS_SF
)
4670 if (rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_SF
)
4671 error ("-mpower9-dform requires -mupper-regs-sf");
4672 rs6000_isa_flags
&= ~OPTION_MASK_P9_DFORM_SCALAR
;
4675 /* Enable LRA by default. */
4676 if ((rs6000_isa_flags_explicit
& OPTION_MASK_LRA
) == 0)
4677 rs6000_isa_flags
|= OPTION_MASK_LRA
;
4679 /* There have been bugs with -mvsx-timode that don't show up with -mlra,
4680 but do show up with -mno-lra. Given -mlra will become the default once
4681 PR 69847 is fixed, turn off the options with problems by default if
4682 -mno-lra was used, and warn if the user explicitly asked for the option.
4684 Enable -mpower9-dform-vector by default if LRA and other power9 options.
4685 Enable -mvsx-timode by default if LRA and VSX. */
4688 if (TARGET_VSX_TIMODE
)
4690 if ((rs6000_isa_flags_explicit
& OPTION_MASK_VSX_TIMODE
) != 0)
4691 warning (0, "-mvsx-timode might need -mlra");
4694 rs6000_isa_flags
&= ~OPTION_MASK_VSX_TIMODE
;
4700 if (TARGET_VSX
&& !TARGET_VSX_TIMODE
4701 && (rs6000_isa_flags_explicit
& OPTION_MASK_VSX_TIMODE
) == 0)
4702 rs6000_isa_flags
|= OPTION_MASK_VSX_TIMODE
;
4705 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4706 support. If we only have ISA 2.06 support, and the user did not specify
4707 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4708 but we don't enable the full vectorization support */
4709 if (TARGET_ALLOW_MOVMISALIGN
== -1 && TARGET_P8_VECTOR
&& TARGET_DIRECT_MOVE
)
4710 TARGET_ALLOW_MOVMISALIGN
= 1;
4712 else if (TARGET_ALLOW_MOVMISALIGN
&& !TARGET_VSX
)
4714 if (TARGET_ALLOW_MOVMISALIGN
> 0
4715 && global_options_set
.x_TARGET_ALLOW_MOVMISALIGN
)
4716 error ("-mallow-movmisalign requires -mvsx");
4718 TARGET_ALLOW_MOVMISALIGN
= 0;
4721 /* Determine when unaligned vector accesses are permitted, and when
4722 they are preferred over masked Altivec loads. Note that if
4723 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4724 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4726 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
4730 if (rs6000_isa_flags_explicit
& OPTION_MASK_EFFICIENT_UNALIGNED_VSX
)
4731 error ("-mefficient-unaligned-vsx requires -mvsx");
4733 rs6000_isa_flags
&= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX
;
4736 else if (!TARGET_ALLOW_MOVMISALIGN
)
4738 if (rs6000_isa_flags_explicit
& OPTION_MASK_EFFICIENT_UNALIGNED_VSX
)
4739 error ("-mefficient-unaligned-vsx requires -mallow-movmisalign");
4741 rs6000_isa_flags
&= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX
;
4745 /* Check whether we should allow small integers into VSX registers. We
4746 require direct move to prevent the register allocator from having to move
4747 variables through memory to do moves. SImode can be used on ISA 2.07,
4748 while HImode and QImode require ISA 3.0. */
4749 if (TARGET_VSX_SMALL_INTEGER
4750 && (!TARGET_DIRECT_MOVE
|| !TARGET_P8_VECTOR
|| !TARGET_UPPER_REGS_DI
))
4752 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX_SMALL_INTEGER
)
4753 error ("-mvsx-small-integer requires -mpower8-vector, "
4754 "-mupper-regs-di, and -mdirect-move");
4756 rs6000_isa_flags
&= ~OPTION_MASK_VSX_SMALL_INTEGER
;
4759 /* Set long double size before the IEEE 128-bit tests. */
4760 if (!global_options_set
.x_rs6000_long_double_type_size
)
4762 if (main_target_opt
!= NULL
4763 && (main_target_opt
->x_rs6000_long_double_type_size
4764 != RS6000_DEFAULT_LONG_DOUBLE_SIZE
))
4765 error ("target attribute or pragma changes long double size");
4767 rs6000_long_double_type_size
= RS6000_DEFAULT_LONG_DOUBLE_SIZE
;
4770 /* Set -mabi=ieeelongdouble on some old targets. Note, AIX and Darwin
4771 explicitly redefine TARGET_IEEEQUAD to 0, so those systems will not
4772 pick up this default. */
4773 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
4774 if (!global_options_set
.x_rs6000_ieeequad
)
4775 rs6000_ieeequad
= 1;
4778 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4779 sytems, but don't enable the __float128 keyword. */
4780 if (TARGET_VSX
&& TARGET_LONG_DOUBLE_128
4781 && (TARGET_FLOAT128_ENABLE_TYPE
|| TARGET_IEEEQUAD
)
4782 && ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_TYPE
) == 0))
4783 rs6000_isa_flags
|= OPTION_MASK_FLOAT128_TYPE
;
4785 /* IEEE 128-bit floating point requires VSX support. */
4788 if (TARGET_FLOAT128_KEYWORD
)
4790 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_KEYWORD
) != 0)
4791 error ("-mfloat128 requires VSX support");
4793 rs6000_isa_flags
&= ~(OPTION_MASK_FLOAT128_TYPE
4794 | OPTION_MASK_FLOAT128_KEYWORD
4795 | OPTION_MASK_FLOAT128_HW
);
4798 else if (TARGET_FLOAT128_TYPE
)
4800 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_TYPE
) != 0)
4801 error ("-mfloat128-type requires VSX support");
4803 rs6000_isa_flags
&= ~(OPTION_MASK_FLOAT128_TYPE
4804 | OPTION_MASK_FLOAT128_KEYWORD
4805 | OPTION_MASK_FLOAT128_HW
);
4809 /* -mfloat128 and -mfloat128-hardware internally require the underlying IEEE
4810 128-bit floating point support to be enabled. */
4811 if (!TARGET_FLOAT128_TYPE
)
4813 if (TARGET_FLOAT128_KEYWORD
)
4815 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_KEYWORD
) != 0)
4817 error ("-mfloat128 requires -mfloat128-type");
4818 rs6000_isa_flags
&= ~(OPTION_MASK_FLOAT128_TYPE
4819 | OPTION_MASK_FLOAT128_KEYWORD
4820 | OPTION_MASK_FLOAT128_HW
);
4823 rs6000_isa_flags
|= OPTION_MASK_FLOAT128_TYPE
;
4826 if (TARGET_FLOAT128_HW
)
4828 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
) != 0)
4830 error ("-mfloat128-hardware requires -mfloat128-type");
4831 rs6000_isa_flags
&= ~OPTION_MASK_FLOAT128_HW
;
4834 rs6000_isa_flags
&= ~(OPTION_MASK_FLOAT128_TYPE
4835 | OPTION_MASK_FLOAT128_KEYWORD
4836 | OPTION_MASK_FLOAT128_HW
);
4840 /* If we have -mfloat128-type and full ISA 3.0 support, enable
4841 -mfloat128-hardware by default. However, don't enable the __float128
4842 keyword. If the user explicitly turned on -mfloat128-hardware, enable the
4843 -mfloat128 option as well if it was not already set. */
4844 if (TARGET_FLOAT128_TYPE
&& !TARGET_FLOAT128_HW
4845 && (rs6000_isa_flags
& ISA_3_0_MASKS_IEEE
) == ISA_3_0_MASKS_IEEE
4846 && !(rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
))
4847 rs6000_isa_flags
|= OPTION_MASK_FLOAT128_HW
;
4849 if (TARGET_FLOAT128_HW
4850 && (rs6000_isa_flags
& ISA_3_0_MASKS_IEEE
) != ISA_3_0_MASKS_IEEE
)
4852 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
) != 0)
4853 error ("-mfloat128-hardware requires full ISA 3.0 support");
4855 rs6000_isa_flags
&= ~OPTION_MASK_FLOAT128_HW
;
4858 if (TARGET_FLOAT128_HW
&& !TARGET_64BIT
)
4860 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
) != 0)
4861 error ("-mfloat128-hardware requires -m64");
4863 rs6000_isa_flags
&= ~OPTION_MASK_FLOAT128_HW
;
4866 if (TARGET_FLOAT128_HW
&& !TARGET_FLOAT128_KEYWORD
4867 && (rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
) != 0
4868 && (rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_KEYWORD
) == 0)
4869 rs6000_isa_flags
|= OPTION_MASK_FLOAT128_KEYWORD
;
4871 /* Print the options after updating the defaults. */
4872 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4873 rs6000_print_isa_options (stderr
, 0, "after defaults", rs6000_isa_flags
);
4875 /* E500mc does "better" if we inline more aggressively. Respect the
4876 user's opinion, though. */
4877 if (rs6000_block_move_inline_limit
== 0
4878 && (rs6000_cpu
== PROCESSOR_PPCE500MC
4879 || rs6000_cpu
== PROCESSOR_PPCE500MC64
4880 || rs6000_cpu
== PROCESSOR_PPCE5500
4881 || rs6000_cpu
== PROCESSOR_PPCE6500
))
4882 rs6000_block_move_inline_limit
= 128;
4884 /* store_one_arg depends on expand_block_move to handle at least the
4885 size of reg_parm_stack_space. */
4886 if (rs6000_block_move_inline_limit
< (TARGET_POWERPC64
? 64 : 32))
4887 rs6000_block_move_inline_limit
= (TARGET_POWERPC64
? 64 : 32);
4891 /* If the appropriate debug option is enabled, replace the target hooks
4892 with debug versions that call the real version and then prints
4893 debugging information. */
4894 if (TARGET_DEBUG_COST
)
4896 targetm
.rtx_costs
= rs6000_debug_rtx_costs
;
4897 targetm
.address_cost
= rs6000_debug_address_cost
;
4898 targetm
.sched
.adjust_cost
= rs6000_debug_adjust_cost
;
4901 if (TARGET_DEBUG_ADDR
)
4903 targetm
.legitimate_address_p
= rs6000_debug_legitimate_address_p
;
4904 targetm
.legitimize_address
= rs6000_debug_legitimize_address
;
4905 rs6000_secondary_reload_class_ptr
4906 = rs6000_debug_secondary_reload_class
;
4907 rs6000_secondary_memory_needed_ptr
4908 = rs6000_debug_secondary_memory_needed
;
4909 rs6000_cannot_change_mode_class_ptr
4910 = rs6000_debug_cannot_change_mode_class
;
4911 rs6000_preferred_reload_class_ptr
4912 = rs6000_debug_preferred_reload_class
;
4913 rs6000_legitimize_reload_address_ptr
4914 = rs6000_debug_legitimize_reload_address
;
4915 rs6000_mode_dependent_address_ptr
4916 = rs6000_debug_mode_dependent_address
;
4919 if (rs6000_veclibabi_name
)
4921 if (strcmp (rs6000_veclibabi_name
, "mass") == 0)
4922 rs6000_veclib_handler
= rs6000_builtin_vectorized_libmass
;
4925 error ("unknown vectorization library ABI type (%s) for "
4926 "-mveclibabi= switch", rs6000_veclibabi_name
);
4932 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4933 target attribute or pragma which automatically enables both options,
4934 unless the altivec ABI was set. This is set by default for 64-bit, but
4936 if (main_target_opt
!= NULL
&& !main_target_opt
->x_rs6000_altivec_abi
)
4937 rs6000_isa_flags
&= ~((OPTION_MASK_VSX
| OPTION_MASK_ALTIVEC
4938 | OPTION_MASK_FLOAT128_TYPE
4939 | OPTION_MASK_FLOAT128_KEYWORD
)
4940 & ~rs6000_isa_flags_explicit
);
4942 /* Enable Altivec ABI for AIX -maltivec. */
4943 if (TARGET_XCOFF
&& (TARGET_ALTIVEC
|| TARGET_VSX
))
4945 if (main_target_opt
!= NULL
&& !main_target_opt
->x_rs6000_altivec_abi
)
4946 error ("target attribute or pragma changes AltiVec ABI");
4948 rs6000_altivec_abi
= 1;
4951 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4952 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4953 be explicitly overridden in either case. */
4956 if (!global_options_set
.x_rs6000_altivec_abi
4957 && (TARGET_64BIT
|| TARGET_ALTIVEC
|| TARGET_VSX
))
4959 if (main_target_opt
!= NULL
&&
4960 !main_target_opt
->x_rs6000_altivec_abi
)
4961 error ("target attribute or pragma changes AltiVec ABI");
4963 rs6000_altivec_abi
= 1;
4967 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4968 So far, the only darwin64 targets are also MACH-O. */
4970 && DEFAULT_ABI
== ABI_DARWIN
4973 if (main_target_opt
!= NULL
&& !main_target_opt
->x_rs6000_darwin64_abi
)
4974 error ("target attribute or pragma changes darwin64 ABI");
4977 rs6000_darwin64_abi
= 1;
4978 /* Default to natural alignment, for better performance. */
4979 rs6000_alignment_flags
= MASK_ALIGN_NATURAL
;
4983 /* Place FP constants in the constant pool instead of TOC
4984 if section anchors enabled. */
4985 if (flag_section_anchors
4986 && !global_options_set
.x_TARGET_NO_FP_IN_TOC
)
4987 TARGET_NO_FP_IN_TOC
= 1;
4989 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4990 rs6000_print_isa_options (stderr
, 0, "before subtarget", rs6000_isa_flags
);
4992 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4993 SUBTARGET_OVERRIDE_OPTIONS
;
4995 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4996 SUBSUBTARGET_OVERRIDE_OPTIONS
;
4998 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4999 SUB3TARGET_OVERRIDE_OPTIONS
;
5002 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
5003 rs6000_print_isa_options (stderr
, 0, "after subtarget", rs6000_isa_flags
);
5005 /* For the E500 family of cores, reset the single/double FP flags to let us
5006 check that they remain constant across attributes or pragmas. Also,
5007 clear a possible request for string instructions, not supported and which
5008 we might have silently queried above for -Os.
5010 For other families, clear ISEL in case it was set implicitly.
5015 case PROCESSOR_PPC8540
:
5016 case PROCESSOR_PPC8548
:
5017 case PROCESSOR_PPCE500MC
:
5018 case PROCESSOR_PPCE500MC64
:
5019 case PROCESSOR_PPCE5500
:
5020 case PROCESSOR_PPCE6500
:
5022 rs6000_single_float
= TARGET_E500_SINGLE
|| TARGET_E500_DOUBLE
;
5023 rs6000_double_float
= TARGET_E500_DOUBLE
;
5025 rs6000_isa_flags
&= ~OPTION_MASK_STRING
;
5031 if (have_cpu
&& !(rs6000_isa_flags_explicit
& OPTION_MASK_ISEL
))
5032 rs6000_isa_flags
&= ~OPTION_MASK_ISEL
;
5037 if (main_target_opt
)
5039 if (main_target_opt
->x_rs6000_single_float
!= rs6000_single_float
)
5040 error ("target attribute or pragma changes single precision floating "
5042 if (main_target_opt
->x_rs6000_double_float
!= rs6000_double_float
)
5043 error ("target attribute or pragma changes double precision floating "
5047 /* Detect invalid option combinations with E500. */
5050 rs6000_always_hint
= (rs6000_cpu
!= PROCESSOR_POWER4
5051 && rs6000_cpu
!= PROCESSOR_POWER5
5052 && rs6000_cpu
!= PROCESSOR_POWER6
5053 && rs6000_cpu
!= PROCESSOR_POWER7
5054 && rs6000_cpu
!= PROCESSOR_POWER8
5055 && rs6000_cpu
!= PROCESSOR_POWER9
5056 && rs6000_cpu
!= PROCESSOR_PPCA2
5057 && rs6000_cpu
!= PROCESSOR_CELL
5058 && rs6000_cpu
!= PROCESSOR_PPC476
);
5059 rs6000_sched_groups
= (rs6000_cpu
== PROCESSOR_POWER4
5060 || rs6000_cpu
== PROCESSOR_POWER5
5061 || rs6000_cpu
== PROCESSOR_POWER7
5062 || rs6000_cpu
== PROCESSOR_POWER8
);
5063 rs6000_align_branch_targets
= (rs6000_cpu
== PROCESSOR_POWER4
5064 || rs6000_cpu
== PROCESSOR_POWER5
5065 || rs6000_cpu
== PROCESSOR_POWER6
5066 || rs6000_cpu
== PROCESSOR_POWER7
5067 || rs6000_cpu
== PROCESSOR_POWER8
5068 || rs6000_cpu
== PROCESSOR_POWER9
5069 || rs6000_cpu
== PROCESSOR_PPCE500MC
5070 || rs6000_cpu
== PROCESSOR_PPCE500MC64
5071 || rs6000_cpu
== PROCESSOR_PPCE5500
5072 || rs6000_cpu
== PROCESSOR_PPCE6500
);
5074 /* Allow debug switches to override the above settings. These are set to -1
5075 in rs6000.opt to indicate the user hasn't directly set the switch. */
5076 if (TARGET_ALWAYS_HINT
>= 0)
5077 rs6000_always_hint
= TARGET_ALWAYS_HINT
;
5079 if (TARGET_SCHED_GROUPS
>= 0)
5080 rs6000_sched_groups
= TARGET_SCHED_GROUPS
;
5082 if (TARGET_ALIGN_BRANCH_TARGETS
>= 0)
5083 rs6000_align_branch_targets
= TARGET_ALIGN_BRANCH_TARGETS
;
5085 rs6000_sched_restricted_insns_priority
5086 = (rs6000_sched_groups
? 1 : 0);
5088 /* Handle -msched-costly-dep option. */
5089 rs6000_sched_costly_dep
5090 = (rs6000_sched_groups
? true_store_to_load_dep_costly
: no_dep_costly
);
5092 if (rs6000_sched_costly_dep_str
)
5094 if (! strcmp (rs6000_sched_costly_dep_str
, "no"))
5095 rs6000_sched_costly_dep
= no_dep_costly
;
5096 else if (! strcmp (rs6000_sched_costly_dep_str
, "all"))
5097 rs6000_sched_costly_dep
= all_deps_costly
;
5098 else if (! strcmp (rs6000_sched_costly_dep_str
, "true_store_to_load"))
5099 rs6000_sched_costly_dep
= true_store_to_load_dep_costly
;
5100 else if (! strcmp (rs6000_sched_costly_dep_str
, "store_to_load"))
5101 rs6000_sched_costly_dep
= store_to_load_dep_costly
;
5103 rs6000_sched_costly_dep
= ((enum rs6000_dependence_cost
)
5104 atoi (rs6000_sched_costly_dep_str
));
5107 /* Handle -minsert-sched-nops option. */
5108 rs6000_sched_insert_nops
5109 = (rs6000_sched_groups
? sched_finish_regroup_exact
: sched_finish_none
);
5111 if (rs6000_sched_insert_nops_str
)
5113 if (! strcmp (rs6000_sched_insert_nops_str
, "no"))
5114 rs6000_sched_insert_nops
= sched_finish_none
;
5115 else if (! strcmp (rs6000_sched_insert_nops_str
, "pad"))
5116 rs6000_sched_insert_nops
= sched_finish_pad_groups
;
5117 else if (! strcmp (rs6000_sched_insert_nops_str
, "regroup_exact"))
5118 rs6000_sched_insert_nops
= sched_finish_regroup_exact
;
5120 rs6000_sched_insert_nops
= ((enum rs6000_nop_insertion
)
5121 atoi (rs6000_sched_insert_nops_str
));
5124 /* Handle stack protector */
5125 if (!global_options_set
.x_rs6000_stack_protector_guard
)
5126 #ifdef TARGET_THREAD_SSP_OFFSET
5127 rs6000_stack_protector_guard
= SSP_TLS
;
5129 rs6000_stack_protector_guard
= SSP_GLOBAL
;
5132 #ifdef TARGET_THREAD_SSP_OFFSET
5133 rs6000_stack_protector_guard_offset
= TARGET_THREAD_SSP_OFFSET
;
5134 rs6000_stack_protector_guard_reg
= TARGET_64BIT
? 13 : 2;
5137 if (global_options_set
.x_rs6000_stack_protector_guard_offset_str
)
5140 const char *str
= rs6000_stack_protector_guard_offset_str
;
5143 long offset
= strtol (str
, &endp
, 0);
5144 if (!*str
|| *endp
|| errno
)
5145 error ("%qs is not a valid number "
5146 "in -mstack-protector-guard-offset=", str
);
5148 if (!IN_RANGE (offset
, -0x8000, 0x7fff)
5149 || (TARGET_64BIT
&& (offset
& 3)))
5150 error ("%qs is not a valid offset "
5151 "in -mstack-protector-guard-offset=", str
);
5153 rs6000_stack_protector_guard_offset
= offset
;
5156 if (global_options_set
.x_rs6000_stack_protector_guard_reg_str
)
5158 const char *str
= rs6000_stack_protector_guard_reg_str
;
5159 int reg
= decode_reg_name (str
);
5161 if (!IN_RANGE (reg
, 1, 31))
5162 error ("%qs is not a valid base register "
5163 "in -mstack-protector-guard-reg=", str
);
5165 rs6000_stack_protector_guard_reg
= reg
;
5168 if (rs6000_stack_protector_guard
== SSP_TLS
5169 && !IN_RANGE (rs6000_stack_protector_guard_reg
, 1, 31))
5170 error ("-mstack-protector-guard=tls needs a valid base register");
5174 #ifdef TARGET_REGNAMES
5175 /* If the user desires alternate register names, copy in the
5176 alternate names now. */
5177 if (TARGET_REGNAMES
)
5178 memcpy (rs6000_reg_names
, alt_reg_names
, sizeof (rs6000_reg_names
));
5181 /* Set aix_struct_return last, after the ABI is determined.
5182 If -maix-struct-return or -msvr4-struct-return was explicitly
5183 used, don't override with the ABI default. */
5184 if (!global_options_set
.x_aix_struct_return
)
5185 aix_struct_return
= (DEFAULT_ABI
!= ABI_V4
|| DRAFT_V4_STRUCT_RET
);
5188 /* IBM XL compiler defaults to unsigned bitfields. */
5189 if (TARGET_XL_COMPAT
)
5190 flag_signed_bitfields
= 0;
5193 if (TARGET_LONG_DOUBLE_128
&& !TARGET_IEEEQUAD
)
5194 REAL_MODE_FORMAT (TFmode
) = &ibm_extended_format
;
5196 ASM_GENERATE_INTERNAL_LABEL (toc_label_name
, "LCTOC", 1);
5198 /* We can only guarantee the availability of DI pseudo-ops when
5199 assembling for 64-bit targets. */
5202 targetm
.asm_out
.aligned_op
.di
= NULL
;
5203 targetm
.asm_out
.unaligned_op
.di
= NULL
;
5207 /* Set branch target alignment, if not optimizing for size. */
5210 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
5211 aligned 8byte to avoid misprediction by the branch predictor. */
5212 if (rs6000_cpu
== PROCESSOR_TITAN
5213 || rs6000_cpu
== PROCESSOR_CELL
)
5215 if (align_functions
<= 0)
5216 align_functions
= 8;
5217 if (align_jumps
<= 0)
5219 if (align_loops
<= 0)
5222 if (rs6000_align_branch_targets
)
5224 if (align_functions
<= 0)
5225 align_functions
= 16;
5226 if (align_jumps
<= 0)
5228 if (align_loops
<= 0)
5230 can_override_loop_align
= 1;
5234 if (align_jumps_max_skip
<= 0)
5235 align_jumps_max_skip
= 15;
5236 if (align_loops_max_skip
<= 0)
5237 align_loops_max_skip
= 15;
5240 /* Arrange to save and restore machine status around nested functions. */
5241 init_machine_status
= rs6000_init_machine_status
;
5243 /* We should always be splitting complex arguments, but we can't break
5244 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
5245 if (DEFAULT_ABI
== ABI_V4
|| DEFAULT_ABI
== ABI_DARWIN
)
5246 targetm
.calls
.split_complex_arg
= NULL
;
5248 /* The AIX and ELFv1 ABIs define standard function descriptors. */
5249 if (DEFAULT_ABI
== ABI_AIX
)
5250 targetm
.calls
.custom_function_descriptors
= 0;
5253 /* Initialize rs6000_cost with the appropriate target costs. */
5255 rs6000_cost
= TARGET_POWERPC64
? &size64_cost
: &size32_cost
;
5259 case PROCESSOR_RS64A
:
5260 rs6000_cost
= &rs64a_cost
;
5263 case PROCESSOR_MPCCORE
:
5264 rs6000_cost
= &mpccore_cost
;
5267 case PROCESSOR_PPC403
:
5268 rs6000_cost
= &ppc403_cost
;
5271 case PROCESSOR_PPC405
:
5272 rs6000_cost
= &ppc405_cost
;
5275 case PROCESSOR_PPC440
:
5276 rs6000_cost
= &ppc440_cost
;
5279 case PROCESSOR_PPC476
:
5280 rs6000_cost
= &ppc476_cost
;
5283 case PROCESSOR_PPC601
:
5284 rs6000_cost
= &ppc601_cost
;
5287 case PROCESSOR_PPC603
:
5288 rs6000_cost
= &ppc603_cost
;
5291 case PROCESSOR_PPC604
:
5292 rs6000_cost
= &ppc604_cost
;
5295 case PROCESSOR_PPC604e
:
5296 rs6000_cost
= &ppc604e_cost
;
5299 case PROCESSOR_PPC620
:
5300 rs6000_cost
= &ppc620_cost
;
5303 case PROCESSOR_PPC630
:
5304 rs6000_cost
= &ppc630_cost
;
5307 case PROCESSOR_CELL
:
5308 rs6000_cost
= &ppccell_cost
;
5311 case PROCESSOR_PPC750
:
5312 case PROCESSOR_PPC7400
:
5313 rs6000_cost
= &ppc750_cost
;
5316 case PROCESSOR_PPC7450
:
5317 rs6000_cost
= &ppc7450_cost
;
5320 case PROCESSOR_PPC8540
:
5321 case PROCESSOR_PPC8548
:
5322 rs6000_cost
= &ppc8540_cost
;
5325 case PROCESSOR_PPCE300C2
:
5326 case PROCESSOR_PPCE300C3
:
5327 rs6000_cost
= &ppce300c2c3_cost
;
5330 case PROCESSOR_PPCE500MC
:
5331 rs6000_cost
= &ppce500mc_cost
;
5334 case PROCESSOR_PPCE500MC64
:
5335 rs6000_cost
= &ppce500mc64_cost
;
5338 case PROCESSOR_PPCE5500
:
5339 rs6000_cost
= &ppce5500_cost
;
5342 case PROCESSOR_PPCE6500
:
5343 rs6000_cost
= &ppce6500_cost
;
5346 case PROCESSOR_TITAN
:
5347 rs6000_cost
= &titan_cost
;
5350 case PROCESSOR_POWER4
:
5351 case PROCESSOR_POWER5
:
5352 rs6000_cost
= &power4_cost
;
5355 case PROCESSOR_POWER6
:
5356 rs6000_cost
= &power6_cost
;
5359 case PROCESSOR_POWER7
:
5360 rs6000_cost
= &power7_cost
;
5363 case PROCESSOR_POWER8
:
5364 rs6000_cost
= &power8_cost
;
5367 case PROCESSOR_POWER9
:
5368 rs6000_cost
= &power9_cost
;
5371 case PROCESSOR_PPCA2
:
5372 rs6000_cost
= &ppca2_cost
;
5381 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
5382 rs6000_cost
->simultaneous_prefetches
,
5383 global_options
.x_param_values
,
5384 global_options_set
.x_param_values
);
5385 maybe_set_param_value (PARAM_L1_CACHE_SIZE
, rs6000_cost
->l1_cache_size
,
5386 global_options
.x_param_values
,
5387 global_options_set
.x_param_values
);
5388 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
5389 rs6000_cost
->cache_line_size
,
5390 global_options
.x_param_values
,
5391 global_options_set
.x_param_values
);
5392 maybe_set_param_value (PARAM_L2_CACHE_SIZE
, rs6000_cost
->l2_cache_size
,
5393 global_options
.x_param_values
,
5394 global_options_set
.x_param_values
);
5396 /* Increase loop peeling limits based on performance analysis. */
5397 maybe_set_param_value (PARAM_MAX_PEELED_INSNS
, 400,
5398 global_options
.x_param_values
,
5399 global_options_set
.x_param_values
);
5400 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS
, 400,
5401 global_options
.x_param_values
,
5402 global_options_set
.x_param_values
);
5404 /* Use the 'model' -fsched-pressure algorithm by default. */
5405 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
,
5406 SCHED_PRESSURE_MODEL
,
5407 global_options
.x_param_values
,
5408 global_options_set
.x_param_values
);
5410 /* If using typedef char *va_list, signal that
5411 __builtin_va_start (&ap, 0) can be optimized to
5412 ap = __builtin_next_arg (0). */
5413 if (DEFAULT_ABI
!= ABI_V4
)
5414 targetm
.expand_builtin_va_start
= NULL
;
5417 /* Set up single/double float flags.
5418 If TARGET_HARD_FLOAT is set, but neither single or double is set,
5419 then set both flags. */
5420 if (TARGET_HARD_FLOAT
&& TARGET_FPRS
5421 && rs6000_single_float
== 0 && rs6000_double_float
== 0)
5422 rs6000_single_float
= rs6000_double_float
= 1;
5424 /* If not explicitly specified via option, decide whether to generate indexed
5425 load/store instructions. A value of -1 indicates that the
5426 initial value of this variable has not been overwritten. During
5427 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
5428 if (TARGET_AVOID_XFORM
== -1)
5429 /* Avoid indexed addressing when targeting Power6 in order to avoid the
5430 DERAT mispredict penalty. However the LVE and STVE altivec instructions
5431 need indexed accesses and the type used is the scalar type of the element
5432 being loaded or stored. */
5433 TARGET_AVOID_XFORM
= (rs6000_cpu
== PROCESSOR_POWER6
&& TARGET_CMPB
5434 && !TARGET_ALTIVEC
);
5436 /* Set the -mrecip options. */
5437 if (rs6000_recip_name
)
5439 char *p
= ASTRDUP (rs6000_recip_name
);
5441 unsigned int mask
, i
;
5444 while ((q
= strtok (p
, ",")) != NULL
)
5455 if (!strcmp (q
, "default"))
5456 mask
= ((TARGET_RECIP_PRECISION
)
5457 ? RECIP_HIGH_PRECISION
: RECIP_LOW_PRECISION
);
5460 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
5461 if (!strcmp (q
, recip_options
[i
].string
))
5463 mask
= recip_options
[i
].mask
;
5467 if (i
== ARRAY_SIZE (recip_options
))
5469 error ("unknown option for -mrecip=%s", q
);
5477 rs6000_recip_control
&= ~mask
;
5479 rs6000_recip_control
|= mask
;
5483 /* Set the builtin mask of the various options used that could affect which
5484 builtins were used. In the past we used target_flags, but we've run out
5485 of bits, and some options like SPE and PAIRED are no longer in
5487 rs6000_builtin_mask
= rs6000_builtin_mask_calculate ();
5488 if (TARGET_DEBUG_BUILTIN
|| TARGET_DEBUG_TARGET
)
5489 rs6000_print_builtin_options (stderr
, 0, "builtin mask",
5490 rs6000_builtin_mask
);
5492 /* Initialize all of the registers. */
5493 rs6000_init_hard_regno_mode_ok (global_init_p
);
5495 /* Save the initial options in case the user does function specific options */
5497 target_option_default_node
= target_option_current_node
5498 = build_target_option_node (&global_options
);
5500 /* If not explicitly specified via option, decide whether to generate the
5501 extra blr's required to preserve the link stack on some cpus (eg, 476). */
5502 if (TARGET_LINK_STACK
== -1)
5503 SET_TARGET_LINK_STACK (rs6000_cpu
== PROCESSOR_PPC476
&& flag_pic
);
5508 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
5509 define the target cpu type. */
5512 rs6000_option_override (void)
5514 (void) rs6000_option_override_internal (true);
5518 /* Implement targetm.vectorize.builtin_mask_for_load. */
5520 rs6000_builtin_mask_for_load (void)
5522 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
5523 if ((TARGET_ALTIVEC
&& !TARGET_VSX
)
5524 || (TARGET_VSX
&& !TARGET_EFFICIENT_UNALIGNED_VSX
))
5525 return altivec_builtin_mask_for_load
;
5530 /* Implement LOOP_ALIGN. */
5532 rs6000_loop_align (rtx label
)
5537 /* Don't override loop alignment if -falign-loops was specified. */
5538 if (!can_override_loop_align
)
5539 return align_loops_log
;
5541 bb
= BLOCK_FOR_INSN (label
);
5542 ninsns
= num_loop_insns(bb
->loop_father
);
5544 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
5545 if (ninsns
> 4 && ninsns
<= 8
5546 && (rs6000_cpu
== PROCESSOR_POWER4
5547 || rs6000_cpu
== PROCESSOR_POWER5
5548 || rs6000_cpu
== PROCESSOR_POWER6
5549 || rs6000_cpu
== PROCESSOR_POWER7
5550 || rs6000_cpu
== PROCESSOR_POWER8
5551 || rs6000_cpu
== PROCESSOR_POWER9
))
5554 return align_loops_log
;
5557 /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
5559 rs6000_loop_align_max_skip (rtx_insn
*label
)
5561 return (1 << rs6000_loop_align (label
)) - 1;
5564 /* Return true iff, data reference of TYPE can reach vector alignment (16)
5565 after applying N number of iterations. This routine does not determine
5566 how may iterations are required to reach desired alignment. */
5569 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED
, bool is_packed
)
5576 if (rs6000_alignment_flags
== MASK_ALIGN_NATURAL
)
5579 if (rs6000_alignment_flags
== MASK_ALIGN_POWER
)
5589 /* Assuming that all other types are naturally aligned. CHECKME! */
5594 /* Return true if the vector misalignment factor is supported by the
5597 rs6000_builtin_support_vector_misalignment (machine_mode mode
,
5604 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
5607 /* Return if movmisalign pattern is not supported for this mode. */
5608 if (optab_handler (movmisalign_optab
, mode
) == CODE_FOR_nothing
)
5611 if (misalignment
== -1)
5613 /* Misalignment factor is unknown at compile time but we know
5614 it's word aligned. */
5615 if (rs6000_vector_alignment_reachable (type
, is_packed
))
5617 int element_size
= TREE_INT_CST_LOW (TYPE_SIZE (type
));
5619 if (element_size
== 64 || element_size
== 32)
5626 /* VSX supports word-aligned vector. */
5627 if (misalignment
% 4 == 0)
5633 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5635 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
5636 tree vectype
, int misalign
)
5641 switch (type_of_cost
)
5651 case cond_branch_not_taken
:
5660 case vec_promote_demote
:
5666 case cond_branch_taken
:
5669 case unaligned_load
:
5670 if (TARGET_P9_VECTOR
)
5673 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
5676 if (TARGET_VSX
&& TARGET_ALLOW_MOVMISALIGN
)
5678 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
5680 /* Double word aligned. */
5688 /* Double word aligned. */
5692 /* Unknown misalignment. */
5705 /* Misaligned loads are not supported. */
5710 case unaligned_store
:
5711 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
5714 if (TARGET_VSX
&& TARGET_ALLOW_MOVMISALIGN
)
5716 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
5718 /* Double word aligned. */
5726 /* Double word aligned. */
5730 /* Unknown misalignment. */
5743 /* Misaligned stores are not supported. */
5749 /* This is a rough approximation assuming non-constant elements
5750 constructed into a vector via element insertion. FIXME:
5751 vec_construct is not granular enough for uniformly good
5752 decisions. If the initialization is a splat, this is
5753 cheaper than we estimate. Improve this someday. */
5754 elem_type
= TREE_TYPE (vectype
);
5755 /* 32-bit vectors loaded into registers are stored as double
5756 precision, so we need 2 permutes, 2 converts, and 1 merge
5757 to construct a vector of short floats from them. */
5758 if (SCALAR_FLOAT_TYPE_P (elem_type
)
5759 && TYPE_PRECISION (elem_type
) == 32)
5762 return max (2, TYPE_VECTOR_SUBPARTS (vectype
) - 1);
5769 /* Implement targetm.vectorize.preferred_simd_mode. */
5772 rs6000_preferred_simd_mode (machine_mode mode
)
5781 if (TARGET_ALTIVEC
|| TARGET_VSX
)
5807 if (TARGET_PAIRED_FLOAT
5813 typedef struct _rs6000_cost_data
5815 struct loop
*loop_info
;
5819 /* Test for likely overcommitment of vector hardware resources. If a
5820 loop iteration is relatively large, and too large a percentage of
5821 instructions in the loop are vectorized, the cost model may not
5822 adequately reflect delays from unavailable vector resources.
5823 Penalize the loop body cost for this case. */
5826 rs6000_density_test (rs6000_cost_data
*data
)
5828 const int DENSITY_PCT_THRESHOLD
= 85;
5829 const int DENSITY_SIZE_THRESHOLD
= 70;
5830 const int DENSITY_PENALTY
= 10;
5831 struct loop
*loop
= data
->loop_info
;
5832 basic_block
*bbs
= get_loop_body (loop
);
5833 int nbbs
= loop
->num_nodes
;
5834 int vec_cost
= data
->cost
[vect_body
], not_vec_cost
= 0;
5837 for (i
= 0; i
< nbbs
; i
++)
5839 basic_block bb
= bbs
[i
];
5840 gimple_stmt_iterator gsi
;
5842 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
5844 gimple
*stmt
= gsi_stmt (gsi
);
5845 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5847 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
5848 && !STMT_VINFO_IN_PATTERN_P (stmt_info
))
5854 density_pct
= (vec_cost
* 100) / (vec_cost
+ not_vec_cost
);
5856 if (density_pct
> DENSITY_PCT_THRESHOLD
5857 && vec_cost
+ not_vec_cost
> DENSITY_SIZE_THRESHOLD
)
5859 data
->cost
[vect_body
] = vec_cost
* (100 + DENSITY_PENALTY
) / 100;
5860 if (dump_enabled_p ())
5861 dump_printf_loc (MSG_NOTE
, vect_location
,
5862 "density %d%%, cost %d exceeds threshold, penalizing "
5863 "loop body cost by %d%%", density_pct
,
5864 vec_cost
+ not_vec_cost
, DENSITY_PENALTY
);
5868 /* Implement targetm.vectorize.init_cost. */
5871 rs6000_init_cost (struct loop
*loop_info
)
5873 rs6000_cost_data
*data
= XNEW (struct _rs6000_cost_data
);
5874 data
->loop_info
= loop_info
;
5875 data
->cost
[vect_prologue
] = 0;
5876 data
->cost
[vect_body
] = 0;
5877 data
->cost
[vect_epilogue
] = 0;
5881 /* Implement targetm.vectorize.add_stmt_cost. */
5884 rs6000_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
5885 struct _stmt_vec_info
*stmt_info
, int misalign
,
5886 enum vect_cost_model_location where
)
5888 rs6000_cost_data
*cost_data
= (rs6000_cost_data
*) data
;
5889 unsigned retval
= 0;
5891 if (flag_vect_cost_model
)
5893 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
5894 int stmt_cost
= rs6000_builtin_vectorization_cost (kind
, vectype
,
5896 /* Statements in an inner loop relative to the loop being
5897 vectorized are weighted more heavily. The value here is
5898 arbitrary and could potentially be improved with analysis. */
5899 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
5900 count
*= 50; /* FIXME. */
5902 retval
= (unsigned) (count
* stmt_cost
);
5903 cost_data
->cost
[where
] += retval
;
5909 /* Implement targetm.vectorize.finish_cost. */
5912 rs6000_finish_cost (void *data
, unsigned *prologue_cost
,
5913 unsigned *body_cost
, unsigned *epilogue_cost
)
5915 rs6000_cost_data
*cost_data
= (rs6000_cost_data
*) data
;
5917 if (cost_data
->loop_info
)
5918 rs6000_density_test (cost_data
);
5920 *prologue_cost
= cost_data
->cost
[vect_prologue
];
5921 *body_cost
= cost_data
->cost
[vect_body
];
5922 *epilogue_cost
= cost_data
->cost
[vect_epilogue
];
5925 /* Implement targetm.vectorize.destroy_cost_data. */
5928 rs6000_destroy_cost_data (void *data
)
5933 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5934 library with vectorized intrinsics. */
5937 rs6000_builtin_vectorized_libmass (combined_fn fn
, tree type_out
,
5941 const char *suffix
= NULL
;
5942 tree fntype
, new_fndecl
, bdecl
= NULL_TREE
;
5945 machine_mode el_mode
, in_mode
;
5948 /* Libmass is suitable for unsafe math only as it does not correctly support
5949 parts of IEEE with the required precision such as denormals. Only support
5950 it if we have VSX to use the simd d2 or f4 functions.
5951 XXX: Add variable length support. */
5952 if (!flag_unsafe_math_optimizations
|| !TARGET_VSX
)
5955 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
5956 n
= TYPE_VECTOR_SUBPARTS (type_out
);
5957 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
5958 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
5959 if (el_mode
!= in_mode
5995 if (el_mode
== DFmode
&& n
== 2)
5997 bdecl
= mathfn_built_in (double_type_node
, fn
);
5998 suffix
= "d2"; /* pow -> powd2 */
6000 else if (el_mode
== SFmode
&& n
== 4)
6002 bdecl
= mathfn_built_in (float_type_node
, fn
);
6003 suffix
= "4"; /* powf -> powf4 */
6015 gcc_assert (suffix
!= NULL
);
6016 bname
= IDENTIFIER_POINTER (DECL_NAME (bdecl
));
6020 strcpy (name
, bname
+ sizeof ("__builtin_") - 1);
6021 strcat (name
, suffix
);
6024 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
6025 else if (n_args
== 2)
6026 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
6030 /* Build a function declaration for the vectorized function. */
6031 new_fndecl
= build_decl (BUILTINS_LOCATION
,
6032 FUNCTION_DECL
, get_identifier (name
), fntype
);
6033 TREE_PUBLIC (new_fndecl
) = 1;
6034 DECL_EXTERNAL (new_fndecl
) = 1;
6035 DECL_IS_NOVOPS (new_fndecl
) = 1;
6036 TREE_READONLY (new_fndecl
) = 1;
6041 /* Returns a function decl for a vectorized version of the builtin function
6042 with builtin function code FN and the result vector type TYPE, or NULL_TREE
6043 if it is not available. */
6046 rs6000_builtin_vectorized_function (unsigned int fn
, tree type_out
,
6049 machine_mode in_mode
, out_mode
;
6052 if (TARGET_DEBUG_BUILTIN
)
6053 fprintf (stderr
, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
6054 combined_fn_name (combined_fn (fn
)),
6055 GET_MODE_NAME (TYPE_MODE (type_out
)),
6056 GET_MODE_NAME (TYPE_MODE (type_in
)));
6058 if (TREE_CODE (type_out
) != VECTOR_TYPE
6059 || TREE_CODE (type_in
) != VECTOR_TYPE
6060 || !TARGET_VECTORIZE_BUILTINS
)
6063 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
6064 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
6065 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
6066 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
6071 if (VECTOR_UNIT_VSX_P (V2DFmode
)
6072 && out_mode
== DFmode
&& out_n
== 2
6073 && in_mode
== DFmode
&& in_n
== 2)
6074 return rs6000_builtin_decls
[VSX_BUILTIN_CPSGNDP
];
6075 if (VECTOR_UNIT_VSX_P (V4SFmode
)
6076 && out_mode
== SFmode
&& out_n
== 4
6077 && in_mode
== SFmode
&& in_n
== 4)
6078 return rs6000_builtin_decls
[VSX_BUILTIN_CPSGNSP
];
6079 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
6080 && out_mode
== SFmode
&& out_n
== 4
6081 && in_mode
== SFmode
&& in_n
== 4)
6082 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_COPYSIGN_V4SF
];
6085 if (VECTOR_UNIT_VSX_P (V2DFmode
)
6086 && out_mode
== DFmode
&& out_n
== 2
6087 && in_mode
== DFmode
&& in_n
== 2)
6088 return rs6000_builtin_decls
[VSX_BUILTIN_XVRDPIP
];
6089 if (VECTOR_UNIT_VSX_P (V4SFmode
)
6090 && out_mode
== SFmode
&& out_n
== 4
6091 && in_mode
== SFmode
&& in_n
== 4)
6092 return rs6000_builtin_decls
[VSX_BUILTIN_XVRSPIP
];
6093 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
6094 && out_mode
== SFmode
&& out_n
== 4
6095 && in_mode
== SFmode
&& in_n
== 4)
6096 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VRFIP
];
6099 if (VECTOR_UNIT_VSX_P (V2DFmode
)
6100 && out_mode
== DFmode
&& out_n
== 2
6101 && in_mode
== DFmode
&& in_n
== 2)
6102 return rs6000_builtin_decls
[VSX_BUILTIN_XVRDPIM
];
6103 if (VECTOR_UNIT_VSX_P (V4SFmode
)
6104 && out_mode
== SFmode
&& out_n
== 4
6105 && in_mode
== SFmode
&& in_n
== 4)
6106 return rs6000_builtin_decls
[VSX_BUILTIN_XVRSPIM
];
6107 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
6108 && out_mode
== SFmode
&& out_n
== 4
6109 && in_mode
== SFmode
&& in_n
== 4)
6110 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VRFIM
];
6113 if (VECTOR_UNIT_VSX_P (V2DFmode
)
6114 && out_mode
== DFmode
&& out_n
== 2
6115 && in_mode
== DFmode
&& in_n
== 2)
6116 return rs6000_builtin_decls
[VSX_BUILTIN_XVMADDDP
];
6117 if (VECTOR_UNIT_VSX_P (V4SFmode
)
6118 && out_mode
== SFmode
&& out_n
== 4
6119 && in_mode
== SFmode
&& in_n
== 4)
6120 return rs6000_builtin_decls
[VSX_BUILTIN_XVMADDSP
];
6121 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
6122 && out_mode
== SFmode
&& out_n
== 4
6123 && in_mode
== SFmode
&& in_n
== 4)
6124 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VMADDFP
];
6127 if (VECTOR_UNIT_VSX_P (V2DFmode
)
6128 && out_mode
== DFmode
&& out_n
== 2
6129 && in_mode
== DFmode
&& in_n
== 2)
6130 return rs6000_builtin_decls
[VSX_BUILTIN_XVRDPIZ
];
6131 if (VECTOR_UNIT_VSX_P (V4SFmode
)
6132 && out_mode
== SFmode
&& out_n
== 4
6133 && in_mode
== SFmode
&& in_n
== 4)
6134 return rs6000_builtin_decls
[VSX_BUILTIN_XVRSPIZ
];
6135 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
6136 && out_mode
== SFmode
&& out_n
== 4
6137 && in_mode
== SFmode
&& in_n
== 4)
6138 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VRFIZ
];
6141 if (VECTOR_UNIT_VSX_P (V2DFmode
)
6142 && flag_unsafe_math_optimizations
6143 && out_mode
== DFmode
&& out_n
== 2
6144 && in_mode
== DFmode
&& in_n
== 2)
6145 return rs6000_builtin_decls
[VSX_BUILTIN_XVRDPI
];
6146 if (VECTOR_UNIT_VSX_P (V4SFmode
)
6147 && flag_unsafe_math_optimizations
6148 && out_mode
== SFmode
&& out_n
== 4
6149 && in_mode
== SFmode
&& in_n
== 4)
6150 return rs6000_builtin_decls
[VSX_BUILTIN_XVRSPI
];
6153 if (VECTOR_UNIT_VSX_P (V2DFmode
)
6154 && !flag_trapping_math
6155 && out_mode
== DFmode
&& out_n
== 2
6156 && in_mode
== DFmode
&& in_n
== 2)
6157 return rs6000_builtin_decls
[VSX_BUILTIN_XVRDPIC
];
6158 if (VECTOR_UNIT_VSX_P (V4SFmode
)
6159 && !flag_trapping_math
6160 && out_mode
== SFmode
&& out_n
== 4
6161 && in_mode
== SFmode
&& in_n
== 4)
6162 return rs6000_builtin_decls
[VSX_BUILTIN_XVRSPIC
];
6168 /* Generate calls to libmass if appropriate. */
6169 if (rs6000_veclib_handler
)
6170 return rs6000_veclib_handler (combined_fn (fn
), type_out
, type_in
);
6175 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
6178 rs6000_builtin_md_vectorized_function (tree fndecl
, tree type_out
,
6181 machine_mode in_mode
, out_mode
;
6184 if (TARGET_DEBUG_BUILTIN
)
6185 fprintf (stderr
, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
6186 IDENTIFIER_POINTER (DECL_NAME (fndecl
)),
6187 GET_MODE_NAME (TYPE_MODE (type_out
)),
6188 GET_MODE_NAME (TYPE_MODE (type_in
)));
6190 if (TREE_CODE (type_out
) != VECTOR_TYPE
6191 || TREE_CODE (type_in
) != VECTOR_TYPE
6192 || !TARGET_VECTORIZE_BUILTINS
)
6195 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
6196 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
6197 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
6198 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
6200 enum rs6000_builtins fn
6201 = (enum rs6000_builtins
) DECL_FUNCTION_CODE (fndecl
);
6204 case RS6000_BUILTIN_RSQRTF
:
6205 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
)
6206 && out_mode
== SFmode
&& out_n
== 4
6207 && in_mode
== SFmode
&& in_n
== 4)
6208 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VRSQRTFP
];
6210 case RS6000_BUILTIN_RSQRT
:
6211 if (VECTOR_UNIT_VSX_P (V2DFmode
)
6212 && out_mode
== DFmode
&& out_n
== 2
6213 && in_mode
== DFmode
&& in_n
== 2)
6214 return rs6000_builtin_decls
[VSX_BUILTIN_RSQRT_2DF
];
6216 case RS6000_BUILTIN_RECIPF
:
6217 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
)
6218 && out_mode
== SFmode
&& out_n
== 4
6219 && in_mode
== SFmode
&& in_n
== 4)
6220 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VRECIPFP
];
6222 case RS6000_BUILTIN_RECIP
:
6223 if (VECTOR_UNIT_VSX_P (V2DFmode
)
6224 && out_mode
== DFmode
&& out_n
== 2
6225 && in_mode
== DFmode
&& in_n
== 2)
6226 return rs6000_builtin_decls
[VSX_BUILTIN_RECIP_V2DF
];
6234 /* Default CPU string for rs6000*_file_start functions. */
6235 static const char *rs6000_default_cpu
;
6237 /* Do anything needed at the start of the asm file. */
6240 rs6000_file_start (void)
6243 const char *start
= buffer
;
6244 FILE *file
= asm_out_file
;
6246 rs6000_default_cpu
= TARGET_CPU_DEFAULT
;
6248 default_file_start ();
6250 if (flag_verbose_asm
)
6252 sprintf (buffer
, "\n%s rs6000/powerpc options:", ASM_COMMENT_START
);
6254 if (rs6000_default_cpu
!= 0 && rs6000_default_cpu
[0] != '\0')
6256 fprintf (file
, "%s --with-cpu=%s", start
, rs6000_default_cpu
);
6260 if (global_options_set
.x_rs6000_cpu_index
)
6262 fprintf (file
, "%s -mcpu=%s", start
,
6263 processor_target_table
[rs6000_cpu_index
].name
);
6267 if (global_options_set
.x_rs6000_tune_index
)
6269 fprintf (file
, "%s -mtune=%s", start
,
6270 processor_target_table
[rs6000_tune_index
].name
);
6274 if (PPC405_ERRATUM77
)
6276 fprintf (file
, "%s PPC405CR_ERRATUM77", start
);
6280 #ifdef USING_ELFOS_H
6281 switch (rs6000_sdata
)
6283 case SDATA_NONE
: fprintf (file
, "%s -msdata=none", start
); start
= ""; break;
6284 case SDATA_DATA
: fprintf (file
, "%s -msdata=data", start
); start
= ""; break;
6285 case SDATA_SYSV
: fprintf (file
, "%s -msdata=sysv", start
); start
= ""; break;
6286 case SDATA_EABI
: fprintf (file
, "%s -msdata=eabi", start
); start
= ""; break;
6289 if (rs6000_sdata
&& g_switch_value
)
6291 fprintf (file
, "%s -G %d", start
,
6301 #ifdef USING_ELFOS_H
6302 if (!(rs6000_default_cpu
&& rs6000_default_cpu
[0])
6303 && !global_options_set
.x_rs6000_cpu_index
)
6305 fputs ("\t.machine ", asm_out_file
);
6306 if ((rs6000_isa_flags
& OPTION_MASK_MODULO
) != 0)
6307 fputs ("power9\n", asm_out_file
);
6308 else if ((rs6000_isa_flags
& OPTION_MASK_DIRECT_MOVE
) != 0)
6309 fputs ("power8\n", asm_out_file
);
6310 else if ((rs6000_isa_flags
& OPTION_MASK_POPCNTD
) != 0)
6311 fputs ("power7\n", asm_out_file
);
6312 else if ((rs6000_isa_flags
& OPTION_MASK_CMPB
) != 0)
6313 fputs ("power6\n", asm_out_file
);
6314 else if ((rs6000_isa_flags
& OPTION_MASK_POPCNTB
) != 0)
6315 fputs ("power5\n", asm_out_file
);
6316 else if ((rs6000_isa_flags
& OPTION_MASK_MFCRF
) != 0)
6317 fputs ("power4\n", asm_out_file
);
6318 else if ((rs6000_isa_flags
& OPTION_MASK_POWERPC64
) != 0)
6319 fputs ("ppc64\n", asm_out_file
);
6321 fputs ("ppc\n", asm_out_file
);
6325 if (DEFAULT_ABI
== ABI_ELFv2
)
6326 fprintf (file
, "\t.abiversion 2\n");
6330 /* Return nonzero if this function is known to have a null epilogue. */
6333 direct_return (void)
6335 if (reload_completed
)
6337 rs6000_stack_t
*info
= rs6000_stack_info ();
6339 if (info
->first_gp_reg_save
== 32
6340 && info
->first_fp_reg_save
== 64
6341 && info
->first_altivec_reg_save
== LAST_ALTIVEC_REGNO
+ 1
6342 && ! info
->lr_save_p
6343 && ! info
->cr_save_p
6344 && info
->vrsave_size
== 0
6352 /* Return the number of instructions it takes to form a constant in an
6353 integer register. */
6356 num_insns_constant_wide (HOST_WIDE_INT value
)
6358 /* signed constant loadable with addi */
6359 if (((unsigned HOST_WIDE_INT
) value
+ 0x8000) < 0x10000)
6362 /* constant loadable with addis */
6363 else if ((value
& 0xffff) == 0
6364 && (value
>> 31 == -1 || value
>> 31 == 0))
6367 else if (TARGET_POWERPC64
)
6369 HOST_WIDE_INT low
= ((value
& 0xffffffff) ^ 0x80000000) - 0x80000000;
6370 HOST_WIDE_INT high
= value
>> 31;
6372 if (high
== 0 || high
== -1)
6378 return num_insns_constant_wide (high
) + 1;
6380 return num_insns_constant_wide (low
) + 1;
6382 return (num_insns_constant_wide (high
)
6383 + num_insns_constant_wide (low
) + 1);
6391 num_insns_constant (rtx op
, machine_mode mode
)
6393 HOST_WIDE_INT low
, high
;
6395 switch (GET_CODE (op
))
6398 if ((INTVAL (op
) >> 31) != 0 && (INTVAL (op
) >> 31) != -1
6399 && rs6000_is_valid_and_mask (op
, mode
))
6402 return num_insns_constant_wide (INTVAL (op
));
6404 case CONST_WIDE_INT
:
6407 int ins
= CONST_WIDE_INT_NUNITS (op
) - 1;
6408 for (i
= 0; i
< CONST_WIDE_INT_NUNITS (op
); i
++)
6409 ins
+= num_insns_constant_wide (CONST_WIDE_INT_ELT (op
, i
));
6414 if (mode
== SFmode
|| mode
== SDmode
)
6418 if (DECIMAL_FLOAT_MODE_P (mode
))
6419 REAL_VALUE_TO_TARGET_DECIMAL32
6420 (*CONST_DOUBLE_REAL_VALUE (op
), l
);
6422 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op
), l
);
6423 return num_insns_constant_wide ((HOST_WIDE_INT
) l
);
6427 if (DECIMAL_FLOAT_MODE_P (mode
))
6428 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (op
), l
);
6430 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (op
), l
);
6431 high
= l
[WORDS_BIG_ENDIAN
== 0];
6432 low
= l
[WORDS_BIG_ENDIAN
!= 0];
6435 return (num_insns_constant_wide (low
)
6436 + num_insns_constant_wide (high
));
6439 if ((high
== 0 && low
>= 0)
6440 || (high
== -1 && low
< 0))
6441 return num_insns_constant_wide (low
);
6443 else if (rs6000_is_valid_and_mask (op
, mode
))
6447 return num_insns_constant_wide (high
) + 1;
6450 return (num_insns_constant_wide (high
)
6451 + num_insns_constant_wide (low
) + 1);
6459 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6460 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6461 corresponding element of the vector, but for V4SFmode and V2SFmode,
6462 the corresponding "float" is interpreted as an SImode integer. */
6465 const_vector_elt_as_int (rtx op
, unsigned int elt
)
6469 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6470 gcc_assert (GET_MODE (op
) != V2DImode
6471 && GET_MODE (op
) != V2DFmode
);
6473 tmp
= CONST_VECTOR_ELT (op
, elt
);
6474 if (GET_MODE (op
) == V4SFmode
6475 || GET_MODE (op
) == V2SFmode
)
6476 tmp
= gen_lowpart (SImode
, tmp
);
6477 return INTVAL (tmp
);
6480 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6481 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6482 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6483 all items are set to the same value and contain COPIES replicas of the
6484 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6485 operand and the others are set to the value of the operand's msb. */
6488 vspltis_constant (rtx op
, unsigned step
, unsigned copies
)
6490 machine_mode mode
= GET_MODE (op
);
6491 machine_mode inner
= GET_MODE_INNER (mode
);
6499 HOST_WIDE_INT splat_val
;
6500 HOST_WIDE_INT msb_val
;
6502 if (mode
== V2DImode
|| mode
== V2DFmode
|| mode
== V1TImode
)
6505 nunits
= GET_MODE_NUNITS (mode
);
6506 bitsize
= GET_MODE_BITSIZE (inner
);
6507 mask
= GET_MODE_MASK (inner
);
6509 val
= const_vector_elt_as_int (op
, BYTES_BIG_ENDIAN
? nunits
- 1 : 0);
6511 msb_val
= val
>= 0 ? 0 : -1;
6513 /* Construct the value to be splatted, if possible. If not, return 0. */
6514 for (i
= 2; i
<= copies
; i
*= 2)
6516 HOST_WIDE_INT small_val
;
6518 small_val
= splat_val
>> bitsize
;
6520 if (splat_val
!= ((HOST_WIDE_INT
)
6521 ((unsigned HOST_WIDE_INT
) small_val
<< bitsize
)
6522 | (small_val
& mask
)))
6524 splat_val
= small_val
;
6527 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6528 if (EASY_VECTOR_15 (splat_val
))
6531 /* Also check if we can splat, and then add the result to itself. Do so if
6532 the value is positive, of if the splat instruction is using OP's mode;
6533 for splat_val < 0, the splat and the add should use the same mode. */
6534 else if (EASY_VECTOR_15_ADD_SELF (splat_val
)
6535 && (splat_val
>= 0 || (step
== 1 && copies
== 1)))
6538 /* Also check if are loading up the most significant bit which can be done by
6539 loading up -1 and shifting the value left by -1. */
6540 else if (EASY_VECTOR_MSB (splat_val
, inner
))
6546 /* Check if VAL is present in every STEP-th element, and the
6547 other elements are filled with its most significant bit. */
6548 for (i
= 1; i
< nunits
; ++i
)
6550 HOST_WIDE_INT desired_val
;
6551 unsigned elt
= BYTES_BIG_ENDIAN
? nunits
- 1 - i
: i
;
6552 if ((i
& (step
- 1)) == 0)
6555 desired_val
= msb_val
;
6557 if (desired_val
!= const_vector_elt_as_int (op
, elt
))
6564 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6565 instruction, filling in the bottom elements with 0 or -1.
6567 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6568 for the number of zeroes to shift in, or negative for the number of 0xff
6571 OP is a CONST_VECTOR. */
6574 vspltis_shifted (rtx op
)
6576 machine_mode mode
= GET_MODE (op
);
6577 machine_mode inner
= GET_MODE_INNER (mode
);
6585 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
)
6588 /* We need to create pseudo registers to do the shift, so don't recognize
6589 shift vector constants after reload. */
6590 if (!can_create_pseudo_p ())
6593 nunits
= GET_MODE_NUNITS (mode
);
6594 mask
= GET_MODE_MASK (inner
);
6596 val
= const_vector_elt_as_int (op
, BYTES_BIG_ENDIAN
? 0 : nunits
- 1);
6598 /* Check if the value can really be the operand of a vspltis[bhw]. */
6599 if (EASY_VECTOR_15 (val
))
6602 /* Also check if we are loading up the most significant bit which can be done
6603 by loading up -1 and shifting the value left by -1. */
6604 else if (EASY_VECTOR_MSB (val
, inner
))
6610 /* Check if VAL is present in every STEP-th element until we find elements
6611 that are 0 or all 1 bits. */
6612 for (i
= 1; i
< nunits
; ++i
)
6614 unsigned elt
= BYTES_BIG_ENDIAN
? i
: nunits
- 1 - i
;
6615 HOST_WIDE_INT elt_val
= const_vector_elt_as_int (op
, elt
);
6617 /* If the value isn't the splat value, check for the remaining elements
6623 for (j
= i
+1; j
< nunits
; ++j
)
6625 unsigned elt2
= BYTES_BIG_ENDIAN
? j
: nunits
- 1 - j
;
6626 if (const_vector_elt_as_int (op
, elt2
) != 0)
6630 return (nunits
- i
) * GET_MODE_SIZE (inner
);
6633 else if ((elt_val
& mask
) == mask
)
6635 for (j
= i
+1; j
< nunits
; ++j
)
6637 unsigned elt2
= BYTES_BIG_ENDIAN
? j
: nunits
- 1 - j
;
6638 if ((const_vector_elt_as_int (op
, elt2
) & mask
) != mask
)
6642 return -((nunits
- i
) * GET_MODE_SIZE (inner
));
6650 /* If all elements are equal, we don't need to do VLSDOI. */
6655 /* Return true if OP is of the given MODE and can be synthesized
6656 with a vspltisb, vspltish or vspltisw. */
6659 easy_altivec_constant (rtx op
, machine_mode mode
)
6661 unsigned step
, copies
;
6663 if (mode
== VOIDmode
)
6664 mode
= GET_MODE (op
);
6665 else if (mode
!= GET_MODE (op
))
6668 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6670 if (mode
== V2DFmode
)
6671 return zero_constant (op
, mode
);
6673 else if (mode
== V2DImode
)
6675 if (GET_CODE (CONST_VECTOR_ELT (op
, 0)) != CONST_INT
6676 || GET_CODE (CONST_VECTOR_ELT (op
, 1)) != CONST_INT
)
6679 if (zero_constant (op
, mode
))
6682 if (INTVAL (CONST_VECTOR_ELT (op
, 0)) == -1
6683 && INTVAL (CONST_VECTOR_ELT (op
, 1)) == -1)
6689 /* V1TImode is a special container for TImode. Ignore for now. */
6690 else if (mode
== V1TImode
)
6693 /* Start with a vspltisw. */
6694 step
= GET_MODE_NUNITS (mode
) / 4;
6697 if (vspltis_constant (op
, step
, copies
))
6700 /* Then try with a vspltish. */
6706 if (vspltis_constant (op
, step
, copies
))
6709 /* And finally a vspltisb. */
6715 if (vspltis_constant (op
, step
, copies
))
6718 if (vspltis_shifted (op
) != 0)
6724 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6725 result is OP. Abort if it is not possible. */
6728 gen_easy_altivec_constant (rtx op
)
6730 machine_mode mode
= GET_MODE (op
);
6731 int nunits
= GET_MODE_NUNITS (mode
);
6732 rtx val
= CONST_VECTOR_ELT (op
, BYTES_BIG_ENDIAN
? nunits
- 1 : 0);
6733 unsigned step
= nunits
/ 4;
6734 unsigned copies
= 1;
6736 /* Start with a vspltisw. */
6737 if (vspltis_constant (op
, step
, copies
))
6738 return gen_rtx_VEC_DUPLICATE (V4SImode
, gen_lowpart (SImode
, val
));
6740 /* Then try with a vspltish. */
6746 if (vspltis_constant (op
, step
, copies
))
6747 return gen_rtx_VEC_DUPLICATE (V8HImode
, gen_lowpart (HImode
, val
));
6749 /* And finally a vspltisb. */
6755 if (vspltis_constant (op
, step
, copies
))
6756 return gen_rtx_VEC_DUPLICATE (V16QImode
, gen_lowpart (QImode
, val
));
6761 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6762 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6764 Return the number of instructions needed (1 or 2) into the address pointed
6767 Return the constant that is being split via CONSTANT_PTR. */
6770 xxspltib_constant_p (rtx op
,
6775 size_t nunits
= GET_MODE_NUNITS (mode
);
6777 HOST_WIDE_INT value
;
6780 /* Set the returned values to out of bound values. */
6781 *num_insns_ptr
= -1;
6782 *constant_ptr
= 256;
6784 if (!TARGET_P9_VECTOR
)
6787 if (mode
== VOIDmode
)
6788 mode
= GET_MODE (op
);
6790 else if (mode
!= GET_MODE (op
) && GET_MODE (op
) != VOIDmode
)
6793 /* Handle (vec_duplicate <constant>). */
6794 if (GET_CODE (op
) == VEC_DUPLICATE
)
6796 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
6797 && mode
!= V2DImode
)
6800 element
= XEXP (op
, 0);
6801 if (!CONST_INT_P (element
))
6804 value
= INTVAL (element
);
6805 if (!IN_RANGE (value
, -128, 127))
6809 /* Handle (const_vector [...]). */
6810 else if (GET_CODE (op
) == CONST_VECTOR
)
6812 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
6813 && mode
!= V2DImode
)
6816 element
= CONST_VECTOR_ELT (op
, 0);
6817 if (!CONST_INT_P (element
))
6820 value
= INTVAL (element
);
6821 if (!IN_RANGE (value
, -128, 127))
6824 for (i
= 1; i
< nunits
; i
++)
6826 element
= CONST_VECTOR_ELT (op
, i
);
6827 if (!CONST_INT_P (element
))
6830 if (value
!= INTVAL (element
))
6835 /* Handle integer constants being loaded into the upper part of the VSX
6836 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6837 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6838 else if (CONST_INT_P (op
))
6840 if (!SCALAR_INT_MODE_P (mode
))
6843 value
= INTVAL (op
);
6844 if (!IN_RANGE (value
, -128, 127))
6847 if (!IN_RANGE (value
, -1, 0))
6849 if (!(reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_VALID
))
6852 if (EASY_VECTOR_15 (value
))
6860 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6861 sign extend. Special case 0/-1 to allow getting any VSX register instead
6862 of an Altivec register. */
6863 if ((mode
== V4SImode
|| mode
== V8HImode
) && !IN_RANGE (value
, -1, 0)
6864 && EASY_VECTOR_15 (value
))
6867 /* Return # of instructions and the constant byte for XXSPLTIB. */
6868 if (mode
== V16QImode
)
6871 else if (IN_RANGE (value
, -1, 0))
6877 *constant_ptr
= (int) value
;
6882 output_vec_const_move (rtx
*operands
)
6884 int cst
, cst2
, shift
;
6890 mode
= GET_MODE (dest
);
6894 bool dest_vmx_p
= ALTIVEC_REGNO_P (REGNO (dest
));
6895 int xxspltib_value
= 256;
6898 if (zero_constant (vec
, mode
))
6900 if (TARGET_P9_VECTOR
)
6901 return "xxspltib %x0,0";
6903 else if (dest_vmx_p
)
6904 return "vspltisw %0,0";
6907 return "xxlxor %x0,%x0,%x0";
6910 if (all_ones_constant (vec
, mode
))
6912 if (TARGET_P9_VECTOR
)
6913 return "xxspltib %x0,255";
6915 else if (dest_vmx_p
)
6916 return "vspltisw %0,-1";
6918 else if (TARGET_P8_VECTOR
)
6919 return "xxlorc %x0,%x0,%x0";
6925 if (TARGET_P9_VECTOR
6926 && xxspltib_constant_p (vec
, mode
, &num_insns
, &xxspltib_value
))
6930 operands
[2] = GEN_INT (xxspltib_value
& 0xff);
6931 return "xxspltib %x0,%2";
6942 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest
)));
6943 if (zero_constant (vec
, mode
))
6944 return "vspltisw %0,0";
6946 if (all_ones_constant (vec
, mode
))
6947 return "vspltisw %0,-1";
6949 /* Do we need to construct a value using VSLDOI? */
6950 shift
= vspltis_shifted (vec
);
6954 splat_vec
= gen_easy_altivec_constant (vec
);
6955 gcc_assert (GET_CODE (splat_vec
) == VEC_DUPLICATE
);
6956 operands
[1] = XEXP (splat_vec
, 0);
6957 if (!EASY_VECTOR_15 (INTVAL (operands
[1])))
6960 switch (GET_MODE (splat_vec
))
6963 return "vspltisw %0,%1";
6966 return "vspltish %0,%1";
6969 return "vspltisb %0,%1";
6976 gcc_assert (TARGET_SPE
);
6978 /* Vector constant 0 is handled as a splitter of V2SI, and in the
6979 pattern of V1DI, V4HI, and V2SF.
6981 FIXME: We should probably return # and add post reload
6982 splitters for these, but this way is so easy ;-). */
6983 cst
= INTVAL (CONST_VECTOR_ELT (vec
, 0));
6984 cst2
= INTVAL (CONST_VECTOR_ELT (vec
, 1));
6985 operands
[1] = CONST_VECTOR_ELT (vec
, 0);
6986 operands
[2] = CONST_VECTOR_ELT (vec
, 1);
6988 return "li %0,%1\n\tevmergelo %0,%0,%0";
6989 else if (WORDS_BIG_ENDIAN
)
6990 return "li %0,%1\n\tevmergelo %0,%0,%0\n\tli %0,%2";
6992 return "li %0,%2\n\tevmergelo %0,%0,%0\n\tli %0,%1";
6995 /* Initialize TARGET of vector PAIRED to VALS. */
6998 paired_expand_vector_init (rtx target
, rtx vals
)
7000 machine_mode mode
= GET_MODE (target
);
7001 int n_elts
= GET_MODE_NUNITS (mode
);
7003 rtx x
, new_rtx
, tmp
, constant_op
, op1
, op2
;
7006 for (i
= 0; i
< n_elts
; ++i
)
7008 x
= XVECEXP (vals
, 0, i
);
7009 if (!(CONST_SCALAR_INT_P (x
) || CONST_DOUBLE_P (x
) || CONST_FIXED_P (x
)))
7014 /* Load from constant pool. */
7015 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
7021 /* The vector is initialized only with non-constants. */
7022 new_rtx
= gen_rtx_VEC_CONCAT (V2SFmode
, XVECEXP (vals
, 0, 0),
7023 XVECEXP (vals
, 0, 1));
7025 emit_move_insn (target
, new_rtx
);
7029 /* One field is non-constant and the other one is a constant. Load the
7030 constant from the constant pool and use ps_merge instruction to
7031 construct the whole vector. */
7032 op1
= XVECEXP (vals
, 0, 0);
7033 op2
= XVECEXP (vals
, 0, 1);
7035 constant_op
= (CONSTANT_P (op1
)) ? op1
: op2
;
7037 tmp
= gen_reg_rtx (GET_MODE (constant_op
));
7038 emit_move_insn (tmp
, constant_op
);
7040 if (CONSTANT_P (op1
))
7041 new_rtx
= gen_rtx_VEC_CONCAT (V2SFmode
, tmp
, op2
);
7043 new_rtx
= gen_rtx_VEC_CONCAT (V2SFmode
, op1
, tmp
);
7045 emit_move_insn (target
, new_rtx
);
7049 paired_expand_vector_move (rtx operands
[])
7051 rtx op0
= operands
[0], op1
= operands
[1];
7053 emit_move_insn (op0
, op1
);
7056 /* Emit vector compare for code RCODE. DEST is destination, OP1 and
7057 OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two
7058 operands for the relation operation COND. This is a recursive
7062 paired_emit_vector_compare (enum rtx_code rcode
,
7063 rtx dest
, rtx op0
, rtx op1
,
7064 rtx cc_op0
, rtx cc_op1
)
7066 rtx tmp
= gen_reg_rtx (V2SFmode
);
7069 gcc_assert (TARGET_PAIRED_FLOAT
);
7070 gcc_assert (GET_MODE (op0
) == GET_MODE (op1
));
7076 paired_emit_vector_compare (GE
, dest
, op1
, op0
, cc_op0
, cc_op1
);
7080 emit_insn (gen_subv2sf3 (tmp
, cc_op0
, cc_op1
));
7081 emit_insn (gen_selv2sf4 (dest
, tmp
, op0
, op1
, CONST0_RTX (SFmode
)));
7085 paired_emit_vector_compare (GE
, dest
, op0
, op1
, cc_op1
, cc_op0
);
7088 paired_emit_vector_compare (LE
, dest
, op1
, op0
, cc_op0
, cc_op1
);
7091 tmp1
= gen_reg_rtx (V2SFmode
);
7092 max
= gen_reg_rtx (V2SFmode
);
7093 min
= gen_reg_rtx (V2SFmode
);
7094 gen_reg_rtx (V2SFmode
);
7096 emit_insn (gen_subv2sf3 (tmp
, cc_op0
, cc_op1
));
7097 emit_insn (gen_selv2sf4
7098 (max
, tmp
, cc_op0
, cc_op1
, CONST0_RTX (SFmode
)));
7099 emit_insn (gen_subv2sf3 (tmp
, cc_op1
, cc_op0
));
7100 emit_insn (gen_selv2sf4
7101 (min
, tmp
, cc_op0
, cc_op1
, CONST0_RTX (SFmode
)));
7102 emit_insn (gen_subv2sf3 (tmp1
, min
, max
));
7103 emit_insn (gen_selv2sf4 (dest
, tmp1
, op0
, op1
, CONST0_RTX (SFmode
)));
7106 paired_emit_vector_compare (EQ
, dest
, op1
, op0
, cc_op0
, cc_op1
);
7109 paired_emit_vector_compare (LE
, dest
, op1
, op0
, cc_op0
, cc_op1
);
7112 paired_emit_vector_compare (LT
, dest
, op1
, op0
, cc_op0
, cc_op1
);
7115 paired_emit_vector_compare (GE
, dest
, op1
, op0
, cc_op0
, cc_op1
);
7118 paired_emit_vector_compare (GT
, dest
, op1
, op0
, cc_op0
, cc_op1
);
7127 /* Emit vector conditional expression.
7128 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
7129 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
7132 paired_emit_vector_cond_expr (rtx dest
, rtx op1
, rtx op2
,
7133 rtx cond
, rtx cc_op0
, rtx cc_op1
)
7135 enum rtx_code rcode
= GET_CODE (cond
);
7137 if (!TARGET_PAIRED_FLOAT
)
7140 paired_emit_vector_compare (rcode
, dest
, op1
, op2
, cc_op0
, cc_op1
);
7145 /* Initialize vector TARGET to VALS. */
7148 rs6000_expand_vector_init (rtx target
, rtx vals
)
7150 machine_mode mode
= GET_MODE (target
);
7151 machine_mode inner_mode
= GET_MODE_INNER (mode
);
7152 int n_elts
= GET_MODE_NUNITS (mode
);
7153 int n_var
= 0, one_var
= -1;
7154 bool all_same
= true, all_const_zero
= true;
7158 for (i
= 0; i
< n_elts
; ++i
)
7160 x
= XVECEXP (vals
, 0, i
);
7161 if (!(CONST_SCALAR_INT_P (x
) || CONST_DOUBLE_P (x
) || CONST_FIXED_P (x
)))
7162 ++n_var
, one_var
= i
;
7163 else if (x
!= CONST0_RTX (inner_mode
))
7164 all_const_zero
= false;
7166 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
7172 rtx const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
7173 bool int_vector_p
= (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
);
7174 if ((int_vector_p
|| TARGET_VSX
) && all_const_zero
)
7176 /* Zero register. */
7177 emit_move_insn (target
, CONST0_RTX (mode
));
7180 else if (int_vector_p
&& easy_vector_constant (const_vec
, mode
))
7182 /* Splat immediate. */
7183 emit_insn (gen_rtx_SET (target
, const_vec
));
7188 /* Load from constant pool. */
7189 emit_move_insn (target
, const_vec
);
7194 /* Double word values on VSX can use xxpermdi or lxvdsx. */
7195 if (VECTOR_MEM_VSX_P (mode
) && (mode
== V2DFmode
|| mode
== V2DImode
))
7199 size_t num_elements
= all_same
? 1 : 2;
7200 for (i
= 0; i
< num_elements
; i
++)
7202 op
[i
] = XVECEXP (vals
, 0, i
);
7203 /* Just in case there is a SUBREG with a smaller mode, do a
7205 if (GET_MODE (op
[i
]) != inner_mode
)
7207 rtx tmp
= gen_reg_rtx (inner_mode
);
7208 convert_move (tmp
, op
[i
], 0);
7211 /* Allow load with splat double word. */
7212 else if (MEM_P (op
[i
]))
7215 op
[i
] = force_reg (inner_mode
, op
[i
]);
7217 else if (!REG_P (op
[i
]))
7218 op
[i
] = force_reg (inner_mode
, op
[i
]);
7223 if (mode
== V2DFmode
)
7224 emit_insn (gen_vsx_splat_v2df (target
, op
[0]));
7226 emit_insn (gen_vsx_splat_v2di (target
, op
[0]));
7230 if (mode
== V2DFmode
)
7231 emit_insn (gen_vsx_concat_v2df (target
, op
[0], op
[1]));
7233 emit_insn (gen_vsx_concat_v2di (target
, op
[0], op
[1]));
7238 /* Special case initializing vector int if we are on 64-bit systems with
7239 direct move or we have the ISA 3.0 instructions. */
7240 if (mode
== V4SImode
&& VECTOR_MEM_VSX_P (V4SImode
)
7241 && TARGET_DIRECT_MOVE_64BIT
)
7245 rtx element0
= XVECEXP (vals
, 0, 0);
7246 if (MEM_P (element0
))
7247 element0
= rs6000_address_for_fpconvert (element0
);
7249 element0
= force_reg (SImode
, element0
);
7251 if (TARGET_P9_VECTOR
)
7252 emit_insn (gen_vsx_splat_v4si (target
, element0
));
7255 rtx tmp
= gen_reg_rtx (DImode
);
7256 emit_insn (gen_zero_extendsidi2 (tmp
, element0
));
7257 emit_insn (gen_vsx_splat_v4si_di (target
, tmp
));
7266 for (i
= 0; i
< 4; i
++)
7268 elements
[i
] = XVECEXP (vals
, 0, i
);
7269 if (!CONST_INT_P (elements
[i
]) && !REG_P (elements
[i
]))
7270 elements
[i
] = copy_to_mode_reg (SImode
, elements
[i
]);
7273 emit_insn (gen_vsx_init_v4si (target
, elements
[0], elements
[1],
7274 elements
[2], elements
[3]));
7279 /* With single precision floating point on VSX, know that internally single
7280 precision is actually represented as a double, and either make 2 V2DF
7281 vectors, and convert these vectors to single precision, or do one
7282 conversion, and splat the result to the other elements. */
7283 if (mode
== V4SFmode
&& VECTOR_MEM_VSX_P (V4SFmode
))
7287 rtx element0
= XVECEXP (vals
, 0, 0);
7289 if (TARGET_P9_VECTOR
)
7291 if (MEM_P (element0
))
7292 element0
= rs6000_address_for_fpconvert (element0
);
7294 emit_insn (gen_vsx_splat_v4sf (target
, element0
));
7299 rtx freg
= gen_reg_rtx (V4SFmode
);
7300 rtx sreg
= force_reg (SFmode
, element0
);
7301 rtx cvt
= (TARGET_XSCVDPSPN
7302 ? gen_vsx_xscvdpspn_scalar (freg
, sreg
)
7303 : gen_vsx_xscvdpsp_scalar (freg
, sreg
));
7306 emit_insn (gen_vsx_xxspltw_v4sf_direct (target
, freg
,
7312 rtx dbl_even
= gen_reg_rtx (V2DFmode
);
7313 rtx dbl_odd
= gen_reg_rtx (V2DFmode
);
7314 rtx flt_even
= gen_reg_rtx (V4SFmode
);
7315 rtx flt_odd
= gen_reg_rtx (V4SFmode
);
7316 rtx op0
= force_reg (SFmode
, XVECEXP (vals
, 0, 0));
7317 rtx op1
= force_reg (SFmode
, XVECEXP (vals
, 0, 1));
7318 rtx op2
= force_reg (SFmode
, XVECEXP (vals
, 0, 2));
7319 rtx op3
= force_reg (SFmode
, XVECEXP (vals
, 0, 3));
7321 /* Use VMRGEW if we can instead of doing a permute. */
7322 if (TARGET_P8_VECTOR
)
7324 emit_insn (gen_vsx_concat_v2sf (dbl_even
, op0
, op2
));
7325 emit_insn (gen_vsx_concat_v2sf (dbl_odd
, op1
, op3
));
7326 emit_insn (gen_vsx_xvcvdpsp (flt_even
, dbl_even
));
7327 emit_insn (gen_vsx_xvcvdpsp (flt_odd
, dbl_odd
));
7328 if (BYTES_BIG_ENDIAN
)
7329 emit_insn (gen_p8_vmrgew_v4sf_direct (target
, flt_even
, flt_odd
));
7331 emit_insn (gen_p8_vmrgew_v4sf_direct (target
, flt_odd
, flt_even
));
7335 emit_insn (gen_vsx_concat_v2sf (dbl_even
, op0
, op1
));
7336 emit_insn (gen_vsx_concat_v2sf (dbl_odd
, op2
, op3
));
7337 emit_insn (gen_vsx_xvcvdpsp (flt_even
, dbl_even
));
7338 emit_insn (gen_vsx_xvcvdpsp (flt_odd
, dbl_odd
));
7339 rs6000_expand_extract_even (target
, flt_even
, flt_odd
);
7345 /* Special case initializing vector short/char that are splats if we are on
7346 64-bit systems with direct move. */
7347 if (all_same
&& TARGET_DIRECT_MOVE_64BIT
7348 && (mode
== V16QImode
|| mode
== V8HImode
))
7350 rtx op0
= XVECEXP (vals
, 0, 0);
7351 rtx di_tmp
= gen_reg_rtx (DImode
);
7354 op0
= force_reg (GET_MODE_INNER (mode
), op0
);
7356 if (mode
== V16QImode
)
7358 emit_insn (gen_zero_extendqidi2 (di_tmp
, op0
));
7359 emit_insn (gen_vsx_vspltb_di (target
, di_tmp
));
7363 if (mode
== V8HImode
)
7365 emit_insn (gen_zero_extendhidi2 (di_tmp
, op0
));
7366 emit_insn (gen_vsx_vsplth_di (target
, di_tmp
));
7371 /* Store value to stack temp. Load vector element. Splat. However, splat
7372 of 64-bit items is not supported on Altivec. */
7373 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
7375 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (inner_mode
));
7376 emit_move_insn (adjust_address_nv (mem
, inner_mode
, 0),
7377 XVECEXP (vals
, 0, 0));
7378 x
= gen_rtx_UNSPEC (VOIDmode
,
7379 gen_rtvec (1, const0_rtx
), UNSPEC_LVE
);
7380 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
7382 gen_rtx_SET (target
, mem
),
7384 x
= gen_rtx_VEC_SELECT (inner_mode
, target
,
7385 gen_rtx_PARALLEL (VOIDmode
,
7386 gen_rtvec (1, const0_rtx
)));
7387 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_DUPLICATE (mode
, x
)));
7391 /* One field is non-constant. Load constant then overwrite
7395 rtx copy
= copy_rtx (vals
);
7397 /* Load constant part of vector, substitute neighboring value for
7399 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
7400 rs6000_expand_vector_init (target
, copy
);
7402 /* Insert variable. */
7403 rs6000_expand_vector_set (target
, XVECEXP (vals
, 0, one_var
), one_var
);
7407 /* Construct the vector in memory one field at a time
7408 and load the whole vector. */
7409 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
7410 for (i
= 0; i
< n_elts
; i
++)
7411 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
7412 i
* GET_MODE_SIZE (inner_mode
)),
7413 XVECEXP (vals
, 0, i
));
7414 emit_move_insn (target
, mem
);
7417 /* Set field ELT of TARGET to VAL. */
7420 rs6000_expand_vector_set (rtx target
, rtx val
, int elt
)
7422 machine_mode mode
= GET_MODE (target
);
7423 machine_mode inner_mode
= GET_MODE_INNER (mode
);
7424 rtx reg
= gen_reg_rtx (mode
);
7426 int width
= GET_MODE_SIZE (inner_mode
);
7429 val
= force_reg (GET_MODE (val
), val
);
7431 if (VECTOR_MEM_VSX_P (mode
))
7433 rtx insn
= NULL_RTX
;
7434 rtx elt_rtx
= GEN_INT (elt
);
7436 if (mode
== V2DFmode
)
7437 insn
= gen_vsx_set_v2df (target
, target
, val
, elt_rtx
);
7439 else if (mode
== V2DImode
)
7440 insn
= gen_vsx_set_v2di (target
, target
, val
, elt_rtx
);
7442 else if (TARGET_P9_VECTOR
&& TARGET_VSX_SMALL_INTEGER
7443 && TARGET_UPPER_REGS_DI
&& TARGET_POWERPC64
)
7445 if (mode
== V4SImode
)
7446 insn
= gen_vsx_set_v4si_p9 (target
, target
, val
, elt_rtx
);
7447 else if (mode
== V8HImode
)
7448 insn
= gen_vsx_set_v8hi_p9 (target
, target
, val
, elt_rtx
);
7449 else if (mode
== V16QImode
)
7450 insn
= gen_vsx_set_v16qi_p9 (target
, target
, val
, elt_rtx
);
7460 /* Simplify setting single element vectors like V1TImode. */
7461 if (GET_MODE_SIZE (mode
) == GET_MODE_SIZE (inner_mode
) && elt
== 0)
7463 emit_move_insn (target
, gen_lowpart (mode
, val
));
7467 /* Load single variable value. */
7468 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (inner_mode
));
7469 emit_move_insn (adjust_address_nv (mem
, inner_mode
, 0), val
);
7470 x
= gen_rtx_UNSPEC (VOIDmode
,
7471 gen_rtvec (1, const0_rtx
), UNSPEC_LVE
);
7472 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
7474 gen_rtx_SET (reg
, mem
),
7477 /* Linear sequence. */
7478 mask
= gen_rtx_PARALLEL (V16QImode
, rtvec_alloc (16));
7479 for (i
= 0; i
< 16; ++i
)
7480 XVECEXP (mask
, 0, i
) = GEN_INT (i
);
7482 /* Set permute mask to insert element into target. */
7483 for (i
= 0; i
< width
; ++i
)
7484 XVECEXP (mask
, 0, elt
*width
+ i
)
7485 = GEN_INT (i
+ 0x10);
7486 x
= gen_rtx_CONST_VECTOR (V16QImode
, XVEC (mask
, 0));
7488 if (BYTES_BIG_ENDIAN
)
7489 x
= gen_rtx_UNSPEC (mode
,
7490 gen_rtvec (3, target
, reg
,
7491 force_reg (V16QImode
, x
)),
7495 if (TARGET_P9_VECTOR
)
7496 x
= gen_rtx_UNSPEC (mode
,
7497 gen_rtvec (3, target
, reg
,
7498 force_reg (V16QImode
, x
)),
7502 /* Invert selector. We prefer to generate VNAND on P8 so
7503 that future fusion opportunities can kick in, but must
7504 generate VNOR elsewhere. */
7505 rtx notx
= gen_rtx_NOT (V16QImode
, force_reg (V16QImode
, x
));
7506 rtx iorx
= (TARGET_P8_VECTOR
7507 ? gen_rtx_IOR (V16QImode
, notx
, notx
)
7508 : gen_rtx_AND (V16QImode
, notx
, notx
));
7509 rtx tmp
= gen_reg_rtx (V16QImode
);
7510 emit_insn (gen_rtx_SET (tmp
, iorx
));
7512 /* Permute with operands reversed and adjusted selector. */
7513 x
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, reg
, target
, tmp
),
7518 emit_insn (gen_rtx_SET (target
, x
));
7521 /* Extract field ELT from VEC into TARGET. */
7524 rs6000_expand_vector_extract (rtx target
, rtx vec
, rtx elt
)
7526 machine_mode mode
= GET_MODE (vec
);
7527 machine_mode inner_mode
= GET_MODE_INNER (mode
);
7530 if (VECTOR_MEM_VSX_P (mode
) && CONST_INT_P (elt
))
7537 gcc_assert (INTVAL (elt
) == 0 && inner_mode
== TImode
);
7538 emit_move_insn (target
, gen_lowpart (TImode
, vec
));
7541 emit_insn (gen_vsx_extract_v2df (target
, vec
, elt
));
7544 emit_insn (gen_vsx_extract_v2di (target
, vec
, elt
));
7547 emit_insn (gen_vsx_extract_v4sf (target
, vec
, elt
));
7550 if (TARGET_DIRECT_MOVE_64BIT
)
7552 emit_insn (gen_vsx_extract_v16qi (target
, vec
, elt
));
7558 if (TARGET_DIRECT_MOVE_64BIT
)
7560 emit_insn (gen_vsx_extract_v8hi (target
, vec
, elt
));
7566 if (TARGET_DIRECT_MOVE_64BIT
)
7568 emit_insn (gen_vsx_extract_v4si (target
, vec
, elt
));
7574 else if (VECTOR_MEM_VSX_P (mode
) && !CONST_INT_P (elt
)
7575 && TARGET_DIRECT_MOVE_64BIT
)
7577 if (GET_MODE (elt
) != DImode
)
7579 rtx tmp
= gen_reg_rtx (DImode
);
7580 convert_move (tmp
, elt
, 0);
7583 else if (!REG_P (elt
))
7584 elt
= force_reg (DImode
, elt
);
7589 emit_insn (gen_vsx_extract_v2df_var (target
, vec
, elt
));
7593 emit_insn (gen_vsx_extract_v2di_var (target
, vec
, elt
));
7597 if (TARGET_UPPER_REGS_SF
)
7599 emit_insn (gen_vsx_extract_v4sf_var (target
, vec
, elt
));
7605 emit_insn (gen_vsx_extract_v4si_var (target
, vec
, elt
));
7609 emit_insn (gen_vsx_extract_v8hi_var (target
, vec
, elt
));
7613 emit_insn (gen_vsx_extract_v16qi_var (target
, vec
, elt
));
7621 gcc_assert (CONST_INT_P (elt
));
7623 /* Allocate mode-sized buffer. */
7624 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
7626 emit_move_insn (mem
, vec
);
7628 /* Add offset to field within buffer matching vector element. */
7629 mem
= adjust_address_nv (mem
, inner_mode
,
7630 INTVAL (elt
) * GET_MODE_SIZE (inner_mode
));
7632 emit_move_insn (target
, adjust_address_nv (mem
, inner_mode
, 0));
7635 /* Helper function to return the register number of a RTX. */
7637 regno_or_subregno (rtx op
)
7641 else if (SUBREG_P (op
))
7642 return subreg_regno (op
);
7647 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7648 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7649 temporary (BASE_TMP) to fixup the address. Return the new memory address
7650 that is valid for reads or writes to a given register (SCALAR_REG). */
7653 rs6000_adjust_vec_address (rtx scalar_reg
,
7657 machine_mode scalar_mode
)
7659 unsigned scalar_size
= GET_MODE_SIZE (scalar_mode
);
7660 rtx addr
= XEXP (mem
, 0);
7665 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7666 gcc_assert (GET_RTX_CLASS (GET_CODE (addr
)) != RTX_AUTOINC
);
7668 /* Calculate what we need to add to the address to get the element
7670 if (CONST_INT_P (element
))
7671 element_offset
= GEN_INT (INTVAL (element
) * scalar_size
);
7674 int byte_shift
= exact_log2 (scalar_size
);
7675 gcc_assert (byte_shift
>= 0);
7677 if (byte_shift
== 0)
7678 element_offset
= element
;
7682 if (TARGET_POWERPC64
)
7683 emit_insn (gen_ashldi3 (base_tmp
, element
, GEN_INT (byte_shift
)));
7685 emit_insn (gen_ashlsi3 (base_tmp
, element
, GEN_INT (byte_shift
)));
7687 element_offset
= base_tmp
;
7691 /* Create the new address pointing to the element within the vector. If we
7692 are adding 0, we don't have to change the address. */
7693 if (element_offset
== const0_rtx
)
7696 /* A simple indirect address can be converted into a reg + offset
7698 else if (REG_P (addr
) || SUBREG_P (addr
))
7699 new_addr
= gen_rtx_PLUS (Pmode
, addr
, element_offset
);
7701 /* Optimize D-FORM addresses with constant offset with a constant element, to
7702 include the element offset in the address directly. */
7703 else if (GET_CODE (addr
) == PLUS
)
7705 rtx op0
= XEXP (addr
, 0);
7706 rtx op1
= XEXP (addr
, 1);
7709 gcc_assert (REG_P (op0
) || SUBREG_P (op0
));
7710 if (CONST_INT_P (op1
) && CONST_INT_P (element_offset
))
7712 HOST_WIDE_INT offset
= INTVAL (op1
) + INTVAL (element_offset
);
7713 rtx offset_rtx
= GEN_INT (offset
);
7715 if (IN_RANGE (offset
, -32768, 32767)
7716 && (scalar_size
< 8 || (offset
& 0x3) == 0))
7717 new_addr
= gen_rtx_PLUS (Pmode
, op0
, offset_rtx
);
7720 emit_move_insn (base_tmp
, offset_rtx
);
7721 new_addr
= gen_rtx_PLUS (Pmode
, op0
, base_tmp
);
7726 bool op1_reg_p
= (REG_P (op1
) || SUBREG_P (op1
));
7727 bool ele_reg_p
= (REG_P (element_offset
) || SUBREG_P (element_offset
));
7729 /* Note, ADDI requires the register being added to be a base
7730 register. If the register was R0, load it up into the temporary
7733 && (ele_reg_p
|| reg_or_subregno (op1
) != FIRST_GPR_REGNO
))
7735 insn
= gen_add3_insn (base_tmp
, op1
, element_offset
);
7736 gcc_assert (insn
!= NULL_RTX
);
7741 && reg_or_subregno (element_offset
) != FIRST_GPR_REGNO
)
7743 insn
= gen_add3_insn (base_tmp
, element_offset
, op1
);
7744 gcc_assert (insn
!= NULL_RTX
);
7750 emit_move_insn (base_tmp
, op1
);
7751 emit_insn (gen_add2_insn (base_tmp
, element_offset
));
7754 new_addr
= gen_rtx_PLUS (Pmode
, op0
, base_tmp
);
7760 emit_move_insn (base_tmp
, addr
);
7761 new_addr
= gen_rtx_PLUS (Pmode
, base_tmp
, element_offset
);
7764 /* If we have a PLUS, we need to see whether the particular register class
7765 allows for D-FORM or X-FORM addressing. */
7766 if (GET_CODE (new_addr
) == PLUS
)
7768 rtx op1
= XEXP (new_addr
, 1);
7769 addr_mask_type addr_mask
;
7770 int scalar_regno
= regno_or_subregno (scalar_reg
);
7772 gcc_assert (scalar_regno
< FIRST_PSEUDO_REGISTER
);
7773 if (INT_REGNO_P (scalar_regno
))
7774 addr_mask
= reg_addr
[scalar_mode
].addr_mask
[RELOAD_REG_GPR
];
7776 else if (FP_REGNO_P (scalar_regno
))
7777 addr_mask
= reg_addr
[scalar_mode
].addr_mask
[RELOAD_REG_FPR
];
7779 else if (ALTIVEC_REGNO_P (scalar_regno
))
7780 addr_mask
= reg_addr
[scalar_mode
].addr_mask
[RELOAD_REG_VMX
];
7785 if (REG_P (op1
) || SUBREG_P (op1
))
7786 valid_addr_p
= (addr_mask
& RELOAD_REG_INDEXED
) != 0;
7788 valid_addr_p
= (addr_mask
& RELOAD_REG_OFFSET
) != 0;
7791 else if (REG_P (new_addr
) || SUBREG_P (new_addr
))
7792 valid_addr_p
= true;
7795 valid_addr_p
= false;
7799 emit_move_insn (base_tmp
, new_addr
);
7800 new_addr
= base_tmp
;
7803 return change_address (mem
, scalar_mode
, new_addr
);
7806 /* Split a variable vec_extract operation into the component instructions. */
7809 rs6000_split_vec_extract_var (rtx dest
, rtx src
, rtx element
, rtx tmp_gpr
,
7812 machine_mode mode
= GET_MODE (src
);
7813 machine_mode scalar_mode
= GET_MODE (dest
);
7814 unsigned scalar_size
= GET_MODE_SIZE (scalar_mode
);
7815 int byte_shift
= exact_log2 (scalar_size
);
7817 gcc_assert (byte_shift
>= 0);
7819 /* If we are given a memory address, optimize to load just the element. We
7820 don't have to adjust the vector element number on little endian
7824 gcc_assert (REG_P (tmp_gpr
));
7825 emit_move_insn (dest
, rs6000_adjust_vec_address (dest
, src
, element
,
7826 tmp_gpr
, scalar_mode
));
7830 else if (REG_P (src
) || SUBREG_P (src
))
7832 int bit_shift
= byte_shift
+ 3;
7834 int dest_regno
= regno_or_subregno (dest
);
7835 int src_regno
= regno_or_subregno (src
);
7836 int element_regno
= regno_or_subregno (element
);
7838 gcc_assert (REG_P (tmp_gpr
));
7840 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7841 a general purpose register. */
7842 if (TARGET_P9_VECTOR
7843 && (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
7844 && INT_REGNO_P (dest_regno
)
7845 && ALTIVEC_REGNO_P (src_regno
)
7846 && INT_REGNO_P (element_regno
))
7848 rtx dest_si
= gen_rtx_REG (SImode
, dest_regno
);
7849 rtx element_si
= gen_rtx_REG (SImode
, element_regno
);
7851 if (mode
== V16QImode
)
7852 emit_insn (VECTOR_ELT_ORDER_BIG
7853 ? gen_vextublx (dest_si
, element_si
, src
)
7854 : gen_vextubrx (dest_si
, element_si
, src
));
7856 else if (mode
== V8HImode
)
7858 rtx tmp_gpr_si
= gen_rtx_REG (SImode
, REGNO (tmp_gpr
));
7859 emit_insn (gen_ashlsi3 (tmp_gpr_si
, element_si
, const1_rtx
));
7860 emit_insn (VECTOR_ELT_ORDER_BIG
7861 ? gen_vextuhlx (dest_si
, tmp_gpr_si
, src
)
7862 : gen_vextuhrx (dest_si
, tmp_gpr_si
, src
));
7868 rtx tmp_gpr_si
= gen_rtx_REG (SImode
, REGNO (tmp_gpr
));
7869 emit_insn (gen_ashlsi3 (tmp_gpr_si
, element_si
, const2_rtx
));
7870 emit_insn (VECTOR_ELT_ORDER_BIG
7871 ? gen_vextuwlx (dest_si
, tmp_gpr_si
, src
)
7872 : gen_vextuwrx (dest_si
, tmp_gpr_si
, src
));
7879 gcc_assert (REG_P (tmp_altivec
));
7881 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7882 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7883 will shift the element into the upper position (adding 3 to convert a
7884 byte shift into a bit shift). */
7885 if (scalar_size
== 8)
7887 if (!VECTOR_ELT_ORDER_BIG
)
7889 emit_insn (gen_xordi3 (tmp_gpr
, element
, const1_rtx
));
7895 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7897 emit_insn (gen_rtx_SET (tmp_gpr
,
7898 gen_rtx_AND (DImode
,
7899 gen_rtx_ASHIFT (DImode
,
7906 if (!VECTOR_ELT_ORDER_BIG
)
7908 rtx num_ele_m1
= GEN_INT (GET_MODE_NUNITS (mode
) - 1);
7910 emit_insn (gen_anddi3 (tmp_gpr
, element
, num_ele_m1
));
7911 emit_insn (gen_subdi3 (tmp_gpr
, num_ele_m1
, tmp_gpr
));
7917 emit_insn (gen_ashldi3 (tmp_gpr
, element2
, GEN_INT (bit_shift
)));
7920 /* Get the value into the lower byte of the Altivec register where VSLO
7922 if (TARGET_P9_VECTOR
)
7923 emit_insn (gen_vsx_splat_v2di (tmp_altivec
, tmp_gpr
));
7924 else if (can_create_pseudo_p ())
7925 emit_insn (gen_vsx_concat_v2di (tmp_altivec
, tmp_gpr
, tmp_gpr
));
7928 rtx tmp_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
7929 emit_move_insn (tmp_di
, tmp_gpr
);
7930 emit_insn (gen_vsx_concat_v2di (tmp_altivec
, tmp_di
, tmp_di
));
7933 /* Do the VSLO to get the value into the final location. */
7937 emit_insn (gen_vsx_vslo_v2df (dest
, src
, tmp_altivec
));
7941 emit_insn (gen_vsx_vslo_v2di (dest
, src
, tmp_altivec
));
7946 rtx tmp_altivec_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
7947 rtx tmp_altivec_v4sf
= gen_rtx_REG (V4SFmode
, REGNO (tmp_altivec
));
7948 rtx src_v2di
= gen_rtx_REG (V2DImode
, REGNO (src
));
7949 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di
, src_v2di
,
7952 emit_insn (gen_vsx_xscvspdp_scalar2 (dest
, tmp_altivec_v4sf
));
7960 rtx tmp_altivec_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
7961 rtx src_v2di
= gen_rtx_REG (V2DImode
, REGNO (src
));
7962 rtx tmp_gpr_di
= gen_rtx_REG (DImode
, REGNO (dest
));
7963 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di
, src_v2di
,
7965 emit_move_insn (tmp_gpr_di
, tmp_altivec_di
);
7966 emit_insn (gen_ashrdi3 (tmp_gpr_di
, tmp_gpr_di
,
7967 GEN_INT (64 - (8 * scalar_size
))));
7981 /* Helper function for rs6000_split_v4si_init to build up a DImode value from
7982 two SImode values. */
7985 rs6000_split_v4si_init_di_reg (rtx dest
, rtx si1
, rtx si2
, rtx tmp
)
7987 const unsigned HOST_WIDE_INT mask_32bit
= HOST_WIDE_INT_C (0xffffffff);
7989 if (CONST_INT_P (si1
) && CONST_INT_P (si2
))
7991 unsigned HOST_WIDE_INT const1
= (UINTVAL (si1
) & mask_32bit
) << 32;
7992 unsigned HOST_WIDE_INT const2
= UINTVAL (si2
) & mask_32bit
;
7994 emit_move_insn (dest
, GEN_INT (const1
| const2
));
7998 /* Put si1 into upper 32-bits of dest. */
7999 if (CONST_INT_P (si1
))
8000 emit_move_insn (dest
, GEN_INT ((UINTVAL (si1
) & mask_32bit
) << 32));
8003 /* Generate RLDIC. */
8004 rtx si1_di
= gen_rtx_REG (DImode
, regno_or_subregno (si1
));
8005 rtx shift_rtx
= gen_rtx_ASHIFT (DImode
, si1_di
, GEN_INT (32));
8006 rtx mask_rtx
= GEN_INT (mask_32bit
<< 32);
8007 rtx and_rtx
= gen_rtx_AND (DImode
, shift_rtx
, mask_rtx
);
8008 gcc_assert (!reg_overlap_mentioned_p (dest
, si1
));
8009 emit_insn (gen_rtx_SET (dest
, and_rtx
));
8012 /* Put si2 into the temporary. */
8013 gcc_assert (!reg_overlap_mentioned_p (dest
, tmp
));
8014 if (CONST_INT_P (si2
))
8015 emit_move_insn (tmp
, GEN_INT (UINTVAL (si2
) & mask_32bit
));
8017 emit_insn (gen_zero_extendsidi2 (tmp
, si2
));
8019 /* Combine the two parts. */
8020 emit_insn (gen_iordi3 (dest
, dest
, tmp
));
8024 /* Split a V4SI initialization. */
8027 rs6000_split_v4si_init (rtx operands
[])
8029 rtx dest
= operands
[0];
8031 /* Destination is a GPR, build up the two DImode parts in place. */
8032 if (REG_P (dest
) || SUBREG_P (dest
))
8034 int d_regno
= regno_or_subregno (dest
);
8035 rtx scalar1
= operands
[1];
8036 rtx scalar2
= operands
[2];
8037 rtx scalar3
= operands
[3];
8038 rtx scalar4
= operands
[4];
8039 rtx tmp1
= operands
[5];
8040 rtx tmp2
= operands
[6];
8042 /* Even though we only need one temporary (plus the destination, which
8043 has an early clobber constraint, try to use two temporaries, one for
8044 each double word created. That way the 2nd insn scheduling pass can
8045 rearrange things so the two parts are done in parallel. */
8046 if (BYTES_BIG_ENDIAN
)
8048 rtx di_lo
= gen_rtx_REG (DImode
, d_regno
);
8049 rtx di_hi
= gen_rtx_REG (DImode
, d_regno
+ 1);
8050 rs6000_split_v4si_init_di_reg (di_lo
, scalar1
, scalar2
, tmp1
);
8051 rs6000_split_v4si_init_di_reg (di_hi
, scalar3
, scalar4
, tmp2
);
8055 rtx di_lo
= gen_rtx_REG (DImode
, d_regno
+ 1);
8056 rtx di_hi
= gen_rtx_REG (DImode
, d_regno
);
8057 gcc_assert (!VECTOR_ELT_ORDER_BIG
);
8058 rs6000_split_v4si_init_di_reg (di_lo
, scalar4
, scalar3
, tmp1
);
8059 rs6000_split_v4si_init_di_reg (di_hi
, scalar2
, scalar1
, tmp2
);
8068 /* Return TRUE if OP is an invalid SUBREG operation on the e500. */
8071 invalid_e500_subreg (rtx op
, machine_mode mode
)
8073 if (TARGET_E500_DOUBLE
)
8075 /* Reject (subreg:SI (reg:DF)); likewise with subreg:DI or
8076 subreg:TI and reg:TF. Decimal float modes are like integer
8077 modes (only low part of each register used) for this
8079 if (GET_CODE (op
) == SUBREG
8080 && (mode
== SImode
|| mode
== DImode
|| mode
== TImode
8081 || mode
== DDmode
|| mode
== TDmode
|| mode
== PTImode
)
8082 && REG_P (SUBREG_REG (op
))
8083 && (GET_MODE (SUBREG_REG (op
)) == DFmode
8084 || GET_MODE (SUBREG_REG (op
)) == TFmode
8085 || GET_MODE (SUBREG_REG (op
)) == IFmode
8086 || GET_MODE (SUBREG_REG (op
)) == KFmode
))
8089 /* Reject (subreg:DF (reg:DI)); likewise with subreg:TF and
8091 if (GET_CODE (op
) == SUBREG
8092 && (mode
== DFmode
|| mode
== TFmode
|| mode
== IFmode
8094 && REG_P (SUBREG_REG (op
))
8095 && (GET_MODE (SUBREG_REG (op
)) == DImode
8096 || GET_MODE (SUBREG_REG (op
)) == TImode
8097 || GET_MODE (SUBREG_REG (op
)) == PTImode
8098 || GET_MODE (SUBREG_REG (op
)) == DDmode
8099 || GET_MODE (SUBREG_REG (op
)) == TDmode
))
8104 && GET_CODE (op
) == SUBREG
8106 && REG_P (SUBREG_REG (op
))
8107 && SPE_VECTOR_MODE (GET_MODE (SUBREG_REG (op
))))
8113 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
8114 selects whether the alignment is abi mandated, optional, or
8115 both abi and optional alignment. */
8118 rs6000_data_alignment (tree type
, unsigned int align
, enum data_align how
)
8120 if (how
!= align_opt
)
8122 if (TREE_CODE (type
) == VECTOR_TYPE
)
8124 if ((TARGET_SPE
&& SPE_VECTOR_MODE (TYPE_MODE (type
)))
8125 || (TARGET_PAIRED_FLOAT
&& PAIRED_VECTOR_MODE (TYPE_MODE (type
))))
8130 else if (align
< 128)
8133 else if (TARGET_E500_DOUBLE
8134 && TREE_CODE (type
) == REAL_TYPE
8135 && TYPE_MODE (type
) == DFmode
)
8142 if (how
!= align_abi
)
8144 if (TREE_CODE (type
) == ARRAY_TYPE
8145 && TYPE_MODE (TREE_TYPE (type
)) == QImode
)
8147 if (align
< BITS_PER_WORD
)
8148 align
= BITS_PER_WORD
;
8155 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
8158 rs6000_special_adjust_field_align_p (tree type
, unsigned int computed
)
8160 if (TARGET_ALTIVEC
&& TREE_CODE (type
) == VECTOR_TYPE
)
8162 if (computed
!= 128)
8165 if (!warned
&& warn_psabi
)
8168 inform (input_location
,
8169 "the layout of aggregates containing vectors with"
8170 " %d-byte alignment has changed in GCC 5",
8171 computed
/ BITS_PER_UNIT
);
8174 /* In current GCC there is no special case. */
8181 /* AIX increases natural record alignment to doubleword if the first
8182 field is an FP double while the FP fields remain word aligned. */
8185 rs6000_special_round_type_align (tree type
, unsigned int computed
,
8186 unsigned int specified
)
8188 unsigned int align
= MAX (computed
, specified
);
8189 tree field
= TYPE_FIELDS (type
);
8191 /* Skip all non field decls */
8192 while (field
!= NULL
&& TREE_CODE (field
) != FIELD_DECL
)
8193 field
= DECL_CHAIN (field
);
8195 if (field
!= NULL
&& field
!= type
)
8197 type
= TREE_TYPE (field
);
8198 while (TREE_CODE (type
) == ARRAY_TYPE
)
8199 type
= TREE_TYPE (type
);
8201 if (type
!= error_mark_node
&& TYPE_MODE (type
) == DFmode
)
8202 align
= MAX (align
, 64);
8208 /* Darwin increases record alignment to the natural alignment of
8212 darwin_rs6000_special_round_type_align (tree type
, unsigned int computed
,
8213 unsigned int specified
)
8215 unsigned int align
= MAX (computed
, specified
);
8217 if (TYPE_PACKED (type
))
8220 /* Find the first field, looking down into aggregates. */
8222 tree field
= TYPE_FIELDS (type
);
8223 /* Skip all non field decls */
8224 while (field
!= NULL
&& TREE_CODE (field
) != FIELD_DECL
)
8225 field
= DECL_CHAIN (field
);
8228 /* A packed field does not contribute any extra alignment. */
8229 if (DECL_PACKED (field
))
8231 type
= TREE_TYPE (field
);
8232 while (TREE_CODE (type
) == ARRAY_TYPE
)
8233 type
= TREE_TYPE (type
);
8234 } while (AGGREGATE_TYPE_P (type
));
8236 if (! AGGREGATE_TYPE_P (type
) && type
!= error_mark_node
)
8237 align
= MAX (align
, TYPE_ALIGN (type
));
8242 /* Return 1 for an operand in small memory on V.4/eabi. */
8245 small_data_operand (rtx op ATTRIBUTE_UNUSED
,
8246 machine_mode mode ATTRIBUTE_UNUSED
)
8251 if (rs6000_sdata
== SDATA_NONE
|| rs6000_sdata
== SDATA_DATA
)
8254 if (DEFAULT_ABI
!= ABI_V4
)
8257 /* Vector and float memory instructions have a limited offset on the
8258 SPE, so using a vector or float variable directly as an operand is
8261 && (SPE_VECTOR_MODE (mode
) || FLOAT_MODE_P (mode
)))
8264 if (GET_CODE (op
) == SYMBOL_REF
)
8267 else if (GET_CODE (op
) != CONST
8268 || GET_CODE (XEXP (op
, 0)) != PLUS
8269 || GET_CODE (XEXP (XEXP (op
, 0), 0)) != SYMBOL_REF
8270 || GET_CODE (XEXP (XEXP (op
, 0), 1)) != CONST_INT
)
8275 rtx sum
= XEXP (op
, 0);
8276 HOST_WIDE_INT summand
;
8278 /* We have to be careful here, because it is the referenced address
8279 that must be 32k from _SDA_BASE_, not just the symbol. */
8280 summand
= INTVAL (XEXP (sum
, 1));
8281 if (summand
< 0 || summand
> g_switch_value
)
8284 sym_ref
= XEXP (sum
, 0);
8287 return SYMBOL_REF_SMALL_P (sym_ref
);
8293 /* Return true if either operand is a general purpose register. */
8296 gpr_or_gpr_p (rtx op0
, rtx op1
)
8298 return ((REG_P (op0
) && INT_REGNO_P (REGNO (op0
)))
8299 || (REG_P (op1
) && INT_REGNO_P (REGNO (op1
))));
8302 /* Return true if this is a move direct operation between GPR registers and
8303 floating point/VSX registers. */
8306 direct_move_p (rtx op0
, rtx op1
)
8310 if (!REG_P (op0
) || !REG_P (op1
))
8313 if (!TARGET_DIRECT_MOVE
&& !TARGET_MFPGPR
)
8316 regno0
= REGNO (op0
);
8317 regno1
= REGNO (op1
);
8318 if (regno0
>= FIRST_PSEUDO_REGISTER
|| regno1
>= FIRST_PSEUDO_REGISTER
)
8321 if (INT_REGNO_P (regno0
))
8322 return (TARGET_DIRECT_MOVE
) ? VSX_REGNO_P (regno1
) : FP_REGNO_P (regno1
);
8324 else if (INT_REGNO_P (regno1
))
8326 if (TARGET_MFPGPR
&& FP_REGNO_P (regno0
))
8329 else if (TARGET_DIRECT_MOVE
&& VSX_REGNO_P (regno0
))
8336 /* Return true if the OFFSET is valid for the quad address instructions that
8337 use d-form (register + offset) addressing. */
8340 quad_address_offset_p (HOST_WIDE_INT offset
)
8342 return (IN_RANGE (offset
, -32768, 32767) && ((offset
) & 0xf) == 0);
8345 /* Return true if the ADDR is an acceptable address for a quad memory
8346 operation of mode MODE (either LQ/STQ for general purpose registers, or
8347 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
8348 is intended for LQ/STQ. If it is false, the address is intended for the ISA
8349 3.0 LXV/STXV instruction. */
8352 quad_address_p (rtx addr
, machine_mode mode
, bool strict
)
8356 if (GET_MODE_SIZE (mode
) != 16)
8359 if (legitimate_indirect_address_p (addr
, strict
))
8362 if (VECTOR_MODE_P (mode
) && !mode_supports_vsx_dform_quad (mode
))
8365 if (GET_CODE (addr
) != PLUS
)
8368 op0
= XEXP (addr
, 0);
8369 if (!REG_P (op0
) || !INT_REG_OK_FOR_BASE_P (op0
, strict
))
8372 op1
= XEXP (addr
, 1);
8373 if (!CONST_INT_P (op1
))
8376 return quad_address_offset_p (INTVAL (op1
));
8379 /* Return true if this is a load or store quad operation. This function does
8380 not handle the atomic quad memory instructions. */
8383 quad_load_store_p (rtx op0
, rtx op1
)
8387 if (!TARGET_QUAD_MEMORY
)
8390 else if (REG_P (op0
) && MEM_P (op1
))
8391 ret
= (quad_int_reg_operand (op0
, GET_MODE (op0
))
8392 && quad_memory_operand (op1
, GET_MODE (op1
))
8393 && !reg_overlap_mentioned_p (op0
, op1
));
8395 else if (MEM_P (op0
) && REG_P (op1
))
8396 ret
= (quad_memory_operand (op0
, GET_MODE (op0
))
8397 && quad_int_reg_operand (op1
, GET_MODE (op1
)));
8402 if (TARGET_DEBUG_ADDR
)
8404 fprintf (stderr
, "\n========== quad_load_store, return %s\n",
8405 ret
? "true" : "false");
8406 debug_rtx (gen_rtx_SET (op0
, op1
));
8412 /* Given an address, return a constant offset term if one exists. */
8415 address_offset (rtx op
)
8417 if (GET_CODE (op
) == PRE_INC
8418 || GET_CODE (op
) == PRE_DEC
)
8420 else if (GET_CODE (op
) == PRE_MODIFY
8421 || GET_CODE (op
) == LO_SUM
)
8424 if (GET_CODE (op
) == CONST
)
8427 if (GET_CODE (op
) == PLUS
)
8430 if (CONST_INT_P (op
))
8436 /* Return true if the MEM operand is a memory operand suitable for use
8437 with a (full width, possibly multiple) gpr load/store. On
8438 powerpc64 this means the offset must be divisible by 4.
8439 Implements 'Y' constraint.
8441 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8442 a constraint function we know the operand has satisfied a suitable
8443 memory predicate. Also accept some odd rtl generated by reload
8444 (see rs6000_legitimize_reload_address for various forms). It is
8445 important that reload rtl be accepted by appropriate constraints
8446 but not by the operand predicate.
8448 Offsetting a lo_sum should not be allowed, except where we know by
8449 alignment that a 32k boundary is not crossed, but see the ???
8450 comment in rs6000_legitimize_reload_address. Note that by
8451 "offsetting" here we mean a further offset to access parts of the
8452 MEM. It's fine to have a lo_sum where the inner address is offset
8453 from a sym, since the same sym+offset will appear in the high part
8454 of the address calculation. */
8457 mem_operand_gpr (rtx op
, machine_mode mode
)
8459 unsigned HOST_WIDE_INT offset
;
8461 rtx addr
= XEXP (op
, 0);
8463 op
= address_offset (addr
);
8467 offset
= INTVAL (op
);
8468 if (TARGET_POWERPC64
&& (offset
& 3) != 0)
8471 extra
= GET_MODE_SIZE (mode
) - UNITS_PER_WORD
;
8475 if (GET_CODE (addr
) == LO_SUM
)
8476 /* For lo_sum addresses, we must allow any offset except one that
8477 causes a wrap, so test only the low 16 bits. */
8478 offset
= ((offset
& 0xffff) ^ 0x8000) - 0x8000;
8480 return offset
+ 0x8000 < 0x10000u
- extra
;
8483 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8484 enforce an offset divisible by 4 even for 32-bit. */
8487 mem_operand_ds_form (rtx op
, machine_mode mode
)
8489 unsigned HOST_WIDE_INT offset
;
8491 rtx addr
= XEXP (op
, 0);
8493 if (!offsettable_address_p (false, mode
, addr
))
8496 op
= address_offset (addr
);
8500 offset
= INTVAL (op
);
8501 if ((offset
& 3) != 0)
8504 extra
= GET_MODE_SIZE (mode
) - UNITS_PER_WORD
;
8508 if (GET_CODE (addr
) == LO_SUM
)
8509 /* For lo_sum addresses, we must allow any offset except one that
8510 causes a wrap, so test only the low 16 bits. */
8511 offset
= ((offset
& 0xffff) ^ 0x8000) - 0x8000;
8513 return offset
+ 0x8000 < 0x10000u
- extra
;
8516 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8519 reg_offset_addressing_ok_p (machine_mode mode
)
8533 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8534 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8535 a vector mode, if we want to use the VSX registers to move it around,
8536 we need to restrict ourselves to reg+reg addressing. Similarly for
8537 IEEE 128-bit floating point that is passed in a single vector
8539 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
))
8540 return mode_supports_vsx_dform_quad (mode
);
8547 /* Paired vector modes. Only reg+reg addressing is valid. */
8548 if (TARGET_PAIRED_FLOAT
)
8553 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8554 addressing for the LFIWZX and STFIWX instructions. */
8555 if (TARGET_NO_SDMODE_STACK
)
8567 virtual_stack_registers_memory_p (rtx op
)
8571 if (GET_CODE (op
) == REG
)
8572 regnum
= REGNO (op
);
8574 else if (GET_CODE (op
) == PLUS
8575 && GET_CODE (XEXP (op
, 0)) == REG
8576 && GET_CODE (XEXP (op
, 1)) == CONST_INT
)
8577 regnum
= REGNO (XEXP (op
, 0));
8582 return (regnum
>= FIRST_VIRTUAL_REGISTER
8583 && regnum
<= LAST_VIRTUAL_POINTER_REGISTER
);
8586 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8587 is known to not straddle a 32k boundary. This function is used
8588 to determine whether -mcmodel=medium code can use TOC pointer
8589 relative addressing for OP. This means the alignment of the TOC
8590 pointer must also be taken into account, and unfortunately that is
8593 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8594 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8598 offsettable_ok_by_alignment (rtx op
, HOST_WIDE_INT offset
,
8602 unsigned HOST_WIDE_INT dsize
, dalign
, lsb
, mask
;
8604 if (GET_CODE (op
) != SYMBOL_REF
)
8607 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8609 if (mode_supports_vsx_dform_quad (mode
))
8612 dsize
= GET_MODE_SIZE (mode
);
8613 decl
= SYMBOL_REF_DECL (op
);
8619 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8620 replacing memory addresses with an anchor plus offset. We
8621 could find the decl by rummaging around in the block->objects
8622 VEC for the given offset but that seems like too much work. */
8623 dalign
= BITS_PER_UNIT
;
8624 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op
)
8625 && SYMBOL_REF_ANCHOR_P (op
)
8626 && SYMBOL_REF_BLOCK (op
) != NULL
)
8628 struct object_block
*block
= SYMBOL_REF_BLOCK (op
);
8630 dalign
= block
->alignment
;
8631 offset
+= SYMBOL_REF_BLOCK_OFFSET (op
);
8633 else if (CONSTANT_POOL_ADDRESS_P (op
))
8635 /* It would be nice to have get_pool_align().. */
8636 machine_mode cmode
= get_pool_mode (op
);
8638 dalign
= GET_MODE_ALIGNMENT (cmode
);
8641 else if (DECL_P (decl
))
8643 dalign
= DECL_ALIGN (decl
);
8647 /* Allow BLKmode when the entire object is known to not
8648 cross a 32k boundary. */
8649 if (!DECL_SIZE_UNIT (decl
))
8652 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl
)))
8655 dsize
= tree_to_uhwi (DECL_SIZE_UNIT (decl
));
8659 dalign
/= BITS_PER_UNIT
;
8660 if (dalign
> POWERPC64_TOC_POINTER_ALIGNMENT
)
8661 dalign
= POWERPC64_TOC_POINTER_ALIGNMENT
;
8662 return dalign
>= dsize
;
8668 /* Find how many bits of the alignment we know for this access. */
8669 dalign
/= BITS_PER_UNIT
;
8670 if (dalign
> POWERPC64_TOC_POINTER_ALIGNMENT
)
8671 dalign
= POWERPC64_TOC_POINTER_ALIGNMENT
;
8673 lsb
= offset
& -offset
;
8677 return dalign
>= dsize
;
8681 constant_pool_expr_p (rtx op
)
8685 split_const (op
, &base
, &offset
);
8686 return (GET_CODE (base
) == SYMBOL_REF
8687 && CONSTANT_POOL_ADDRESS_P (base
)
8688 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base
), Pmode
));
8691 static const_rtx tocrel_base
, tocrel_offset
;
8693 /* Return true if OP is a toc pointer relative address (the output
8694 of create_TOC_reference). If STRICT, do not match non-split
8695 -mcmodel=large/medium toc pointer relative addresses. */
8698 toc_relative_expr_p (const_rtx op
, bool strict
)
8703 if (TARGET_CMODEL
!= CMODEL_SMALL
)
8705 /* When strict ensure we have everything tidy. */
8707 && !(GET_CODE (op
) == LO_SUM
8708 && REG_P (XEXP (op
, 0))
8709 && INT_REG_OK_FOR_BASE_P (XEXP (op
, 0), strict
)))
8712 /* When not strict, allow non-split TOC addresses and also allow
8713 (lo_sum (high ..)) TOC addresses created during reload. */
8714 if (GET_CODE (op
) == LO_SUM
)
8719 tocrel_offset
= const0_rtx
;
8720 if (GET_CODE (op
) == PLUS
&& add_cint_operand (XEXP (op
, 1), GET_MODE (op
)))
8722 tocrel_base
= XEXP (op
, 0);
8723 tocrel_offset
= XEXP (op
, 1);
8726 return (GET_CODE (tocrel_base
) == UNSPEC
8727 && XINT (tocrel_base
, 1) == UNSPEC_TOCREL
);
8730 /* Return true if X is a constant pool address, and also for cmodel=medium
8731 if X is a toc-relative address known to be offsettable within MODE. */
8734 legitimate_constant_pool_address_p (const_rtx x
, machine_mode mode
,
8737 return (toc_relative_expr_p (x
, strict
)
8738 && (TARGET_CMODEL
!= CMODEL_MEDIUM
8739 || constant_pool_expr_p (XVECEXP (tocrel_base
, 0, 0))
8741 || offsettable_ok_by_alignment (XVECEXP (tocrel_base
, 0, 0),
8742 INTVAL (tocrel_offset
), mode
)));
8746 legitimate_small_data_p (machine_mode mode
, rtx x
)
8748 return (DEFAULT_ABI
== ABI_V4
8749 && !flag_pic
&& !TARGET_TOC
8750 && (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == CONST
)
8751 && small_data_operand (x
, mode
));
8754 /* SPE offset addressing is limited to 5-bits worth of double words. */
8755 #define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0)
8758 rs6000_legitimate_offset_address_p (machine_mode mode
, rtx x
,
8759 bool strict
, bool worst_case
)
8761 unsigned HOST_WIDE_INT offset
;
8764 if (GET_CODE (x
) != PLUS
)
8766 if (!REG_P (XEXP (x
, 0)))
8768 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), strict
))
8770 if (mode_supports_vsx_dform_quad (mode
))
8771 return quad_address_p (x
, mode
, strict
);
8772 if (!reg_offset_addressing_ok_p (mode
))
8773 return virtual_stack_registers_memory_p (x
);
8774 if (legitimate_constant_pool_address_p (x
, mode
, strict
|| lra_in_progress
))
8776 if (GET_CODE (XEXP (x
, 1)) != CONST_INT
)
8779 offset
= INTVAL (XEXP (x
, 1));
8787 /* SPE vector modes. */
8788 return SPE_CONST_OFFSET_OK (offset
);
8793 /* On e500v2, we may have:
8795 (subreg:DF (mem:DI (plus (reg) (const_int))) 0).
8797 Which gets addressed with evldd instructions. */
8798 if (TARGET_E500_DOUBLE
)
8799 return SPE_CONST_OFFSET_OK (offset
);
8801 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8803 if (VECTOR_MEM_VSX_P (mode
))
8808 if (!TARGET_POWERPC64
)
8810 else if (offset
& 3)
8820 if (TARGET_E500_DOUBLE
)
8821 return (SPE_CONST_OFFSET_OK (offset
)
8822 && SPE_CONST_OFFSET_OK (offset
+ 8));
8827 if (!TARGET_POWERPC64
)
8829 else if (offset
& 3)
8838 return offset
< 0x10000 - extra
;
8842 legitimate_indexed_address_p (rtx x
, int strict
)
8846 if (GET_CODE (x
) != PLUS
)
8852 /* Recognize the rtl generated by reload which we know will later be
8853 replaced with proper base and index regs. */
8855 && reload_in_progress
8856 && (REG_P (op0
) || GET_CODE (op0
) == PLUS
)
8860 return (REG_P (op0
) && REG_P (op1
)
8861 && ((INT_REG_OK_FOR_BASE_P (op0
, strict
)
8862 && INT_REG_OK_FOR_INDEX_P (op1
, strict
))
8863 || (INT_REG_OK_FOR_BASE_P (op1
, strict
)
8864 && INT_REG_OK_FOR_INDEX_P (op0
, strict
))));
8868 avoiding_indexed_address_p (machine_mode mode
)
8870 /* Avoid indexed addressing for modes that have non-indexed
8871 load/store instruction forms. */
8872 return (TARGET_AVOID_XFORM
&& VECTOR_MEM_NONE_P (mode
));
8876 legitimate_indirect_address_p (rtx x
, int strict
)
8878 return GET_CODE (x
) == REG
&& INT_REG_OK_FOR_BASE_P (x
, strict
);
8882 macho_lo_sum_memory_operand (rtx x
, machine_mode mode
)
8884 if (!TARGET_MACHO
|| !flag_pic
8885 || mode
!= SImode
|| GET_CODE (x
) != MEM
)
8889 if (GET_CODE (x
) != LO_SUM
)
8891 if (GET_CODE (XEXP (x
, 0)) != REG
)
8893 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), 0))
8897 return CONSTANT_P (x
);
8901 legitimate_lo_sum_address_p (machine_mode mode
, rtx x
, int strict
)
8903 if (GET_CODE (x
) != LO_SUM
)
8905 if (GET_CODE (XEXP (x
, 0)) != REG
)
8907 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), strict
))
8909 /* quad word addresses are restricted, and we can't use LO_SUM. */
8910 if (mode_supports_vsx_dform_quad (mode
))
8912 /* Restrict addressing for DI because of our SUBREG hackery. */
8913 if (TARGET_E500_DOUBLE
&& GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
8917 if (TARGET_ELF
|| TARGET_MACHO
)
8921 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
)
8923 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
8924 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
8925 recognizes some LO_SUM addresses as valid although this
8926 function says opposite. In most cases, LRA through different
8927 transformations can generate correct code for address reloads.
8928 It can not manage only some LO_SUM cases. So we need to add
8929 code analogous to one in rs6000_legitimize_reload_address for
8930 LOW_SUM here saying that some addresses are still valid. */
8931 large_toc_ok
= (lra_in_progress
&& TARGET_CMODEL
!= CMODEL_SMALL
8932 && small_toc_ref (x
, VOIDmode
));
8933 if (TARGET_TOC
&& ! large_toc_ok
)
8935 if (GET_MODE_NUNITS (mode
) != 1)
8937 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
8938 && !(/* ??? Assume floating point reg based on mode? */
8939 TARGET_HARD_FLOAT
&& TARGET_FPRS
&& TARGET_DOUBLE_FLOAT
8940 && (mode
== DFmode
|| mode
== DDmode
)))
8943 return CONSTANT_P (x
) || large_toc_ok
;
8950 /* Try machine-dependent ways of modifying an illegitimate address
8951 to be legitimate. If we find one, return the new, valid address.
8952 This is used from only one place: `memory_address' in explow.c.
8954 OLDX is the address as it was before break_out_memory_refs was
8955 called. In some cases it is useful to look at this to decide what
8958 It is always safe for this function to do nothing. It exists to
8959 recognize opportunities to optimize the output.
8961 On RS/6000, first check for the sum of a register with a constant
8962 integer that is out of range. If so, generate code to add the
8963 constant with the low-order 16 bits masked to the register and force
8964 this result into another register (this can be done with `cau').
8965 Then generate an address of REG+(CONST&0xffff), allowing for the
8966 possibility of bit 16 being a one.
8968 Then check for the sum of a register and something not constant, try to
8969 load the other things into a register and return the sum. */
8972 rs6000_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
8977 if (!reg_offset_addressing_ok_p (mode
)
8978 || mode_supports_vsx_dform_quad (mode
))
8980 if (virtual_stack_registers_memory_p (x
))
8983 /* In theory we should not be seeing addresses of the form reg+0,
8984 but just in case it is generated, optimize it away. */
8985 if (GET_CODE (x
) == PLUS
&& XEXP (x
, 1) == const0_rtx
)
8986 return force_reg (Pmode
, XEXP (x
, 0));
8988 /* For TImode with load/store quad, restrict addresses to just a single
8989 pointer, so it works with both GPRs and VSX registers. */
8990 /* Make sure both operands are registers. */
8991 else if (GET_CODE (x
) == PLUS
8992 && (mode
!= TImode
|| !TARGET_VSX_TIMODE
))
8993 return gen_rtx_PLUS (Pmode
,
8994 force_reg (Pmode
, XEXP (x
, 0)),
8995 force_reg (Pmode
, XEXP (x
, 1)));
8997 return force_reg (Pmode
, x
);
8999 if (GET_CODE (x
) == SYMBOL_REF
)
9001 enum tls_model model
= SYMBOL_REF_TLS_MODEL (x
);
9003 return rs6000_legitimize_tls_address (x
, model
);
9015 /* As in legitimate_offset_address_p we do not assume
9016 worst-case. The mode here is just a hint as to the registers
9017 used. A TImode is usually in gprs, but may actually be in
9018 fprs. Leave worst-case scenario for reload to handle via
9019 insn constraints. PTImode is only GPRs. */
9026 if (GET_CODE (x
) == PLUS
9027 && GET_CODE (XEXP (x
, 0)) == REG
9028 && GET_CODE (XEXP (x
, 1)) == CONST_INT
9029 && ((unsigned HOST_WIDE_INT
) (INTVAL (XEXP (x
, 1)) + 0x8000)
9031 && !(SPE_VECTOR_MODE (mode
)
9032 || (TARGET_E500_DOUBLE
&& GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)))
9034 HOST_WIDE_INT high_int
, low_int
;
9036 low_int
= ((INTVAL (XEXP (x
, 1)) & 0xffff) ^ 0x8000) - 0x8000;
9037 if (low_int
>= 0x8000 - extra
)
9039 high_int
= INTVAL (XEXP (x
, 1)) - low_int
;
9040 sum
= force_operand (gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
9041 GEN_INT (high_int
)), 0);
9042 return plus_constant (Pmode
, sum
, low_int
);
9044 else if (GET_CODE (x
) == PLUS
9045 && GET_CODE (XEXP (x
, 0)) == REG
9046 && GET_CODE (XEXP (x
, 1)) != CONST_INT
9047 && GET_MODE_NUNITS (mode
) == 1
9048 && (GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
9049 || (/* ??? Assume floating point reg based on mode? */
9050 (TARGET_HARD_FLOAT
&& TARGET_FPRS
&& TARGET_DOUBLE_FLOAT
)
9051 && (mode
== DFmode
|| mode
== DDmode
)))
9052 && !avoiding_indexed_address_p (mode
))
9054 return gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
9055 force_reg (Pmode
, force_operand (XEXP (x
, 1), 0)));
9057 else if (SPE_VECTOR_MODE (mode
)
9058 || (TARGET_E500_DOUBLE
&& GET_MODE_SIZE (mode
) > UNITS_PER_WORD
))
9062 /* We accept [reg + reg] and [reg + OFFSET]. */
9064 if (GET_CODE (x
) == PLUS
)
9066 rtx op1
= XEXP (x
, 0);
9067 rtx op2
= XEXP (x
, 1);
9070 op1
= force_reg (Pmode
, op1
);
9072 if (GET_CODE (op2
) != REG
9073 && (GET_CODE (op2
) != CONST_INT
9074 || !SPE_CONST_OFFSET_OK (INTVAL (op2
))
9075 || (GET_MODE_SIZE (mode
) > 8
9076 && !SPE_CONST_OFFSET_OK (INTVAL (op2
) + 8))))
9077 op2
= force_reg (Pmode
, op2
);
9079 /* We can't always do [reg + reg] for these, because [reg +
9080 reg + offset] is not a legitimate addressing mode. */
9081 y
= gen_rtx_PLUS (Pmode
, op1
, op2
);
9083 if ((GET_MODE_SIZE (mode
) > 8 || mode
== DDmode
) && REG_P (op2
))
9084 return force_reg (Pmode
, y
);
9089 return force_reg (Pmode
, x
);
9091 else if ((TARGET_ELF
9093 || !MACHO_DYNAMIC_NO_PIC_P
9099 && GET_CODE (x
) != CONST_INT
9100 && GET_CODE (x
) != CONST_WIDE_INT
9101 && GET_CODE (x
) != CONST_DOUBLE
9103 && GET_MODE_NUNITS (mode
) == 1
9104 && (GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
9105 || (/* ??? Assume floating point reg based on mode? */
9106 (TARGET_HARD_FLOAT
&& TARGET_FPRS
&& TARGET_DOUBLE_FLOAT
)
9107 && (mode
== DFmode
|| mode
== DDmode
))))
9109 rtx reg
= gen_reg_rtx (Pmode
);
9111 emit_insn (gen_elf_high (reg
, x
));
9113 emit_insn (gen_macho_high (reg
, x
));
9114 return gen_rtx_LO_SUM (Pmode
, reg
, x
);
9117 && GET_CODE (x
) == SYMBOL_REF
9118 && constant_pool_expr_p (x
)
9119 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x
), Pmode
))
9120 return create_TOC_reference (x
, NULL_RTX
);
9125 /* Debug version of rs6000_legitimize_address. */
9127 rs6000_debug_legitimize_address (rtx x
, rtx oldx
, machine_mode mode
)
9133 ret
= rs6000_legitimize_address (x
, oldx
, mode
);
9134 insns
= get_insns ();
9140 "\nrs6000_legitimize_address: mode %s, old code %s, "
9141 "new code %s, modified\n",
9142 GET_MODE_NAME (mode
), GET_RTX_NAME (GET_CODE (x
)),
9143 GET_RTX_NAME (GET_CODE (ret
)));
9145 fprintf (stderr
, "Original address:\n");
9148 fprintf (stderr
, "oldx:\n");
9151 fprintf (stderr
, "New address:\n");
9156 fprintf (stderr
, "Insns added:\n");
9157 debug_rtx_list (insns
, 20);
9163 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
9164 GET_MODE_NAME (mode
), GET_RTX_NAME (GET_CODE (x
)));
9175 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9176 We need to emit DTP-relative relocations. */
9178 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
9180 rs6000_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
9185 fputs ("\t.long\t", file
);
9188 fputs (DOUBLE_INT_ASM_OP
, file
);
9193 output_addr_const (file
, x
);
9195 fputs ("@dtprel+0x8000", file
);
9196 else if (TARGET_XCOFF
&& GET_CODE (x
) == SYMBOL_REF
)
9198 switch (SYMBOL_REF_TLS_MODEL (x
))
9202 case TLS_MODEL_LOCAL_EXEC
:
9203 fputs ("@le", file
);
9205 case TLS_MODEL_INITIAL_EXEC
:
9206 fputs ("@ie", file
);
9208 case TLS_MODEL_GLOBAL_DYNAMIC
:
9209 case TLS_MODEL_LOCAL_DYNAMIC
:
9218 /* Return true if X is a symbol that refers to real (rather than emulated)
9222 rs6000_real_tls_symbol_ref_p (rtx x
)
9224 return (GET_CODE (x
) == SYMBOL_REF
9225 && SYMBOL_REF_TLS_MODEL (x
) >= TLS_MODEL_REAL
);
9228 /* In the name of slightly smaller debug output, and to cater to
9229 general assembler lossage, recognize various UNSPEC sequences
9230 and turn them back into a direct symbol reference. */
9233 rs6000_delegitimize_address (rtx orig_x
)
9237 orig_x
= delegitimize_mem_from_attrs (orig_x
);
9243 if (TARGET_CMODEL
!= CMODEL_SMALL
9244 && GET_CODE (y
) == LO_SUM
)
9248 if (GET_CODE (y
) == PLUS
9249 && GET_MODE (y
) == Pmode
9250 && CONST_INT_P (XEXP (y
, 1)))
9252 offset
= XEXP (y
, 1);
9256 if (GET_CODE (y
) == UNSPEC
9257 && XINT (y
, 1) == UNSPEC_TOCREL
)
9259 y
= XVECEXP (y
, 0, 0);
9262 /* Do not associate thread-local symbols with the original
9263 constant pool symbol. */
9265 && GET_CODE (y
) == SYMBOL_REF
9266 && CONSTANT_POOL_ADDRESS_P (y
)
9267 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y
)))
9271 if (offset
!= NULL_RTX
)
9272 y
= gen_rtx_PLUS (Pmode
, y
, offset
);
9273 if (!MEM_P (orig_x
))
9276 return replace_equiv_address_nv (orig_x
, y
);
9280 && GET_CODE (orig_x
) == LO_SUM
9281 && GET_CODE (XEXP (orig_x
, 1)) == CONST
)
9283 y
= XEXP (XEXP (orig_x
, 1), 0);
9284 if (GET_CODE (y
) == UNSPEC
9285 && XINT (y
, 1) == UNSPEC_MACHOPIC_OFFSET
)
9286 return XVECEXP (y
, 0, 0);
9292 /* Return true if X shouldn't be emitted into the debug info.
9293 The linker doesn't like .toc section references from
9294 .debug_* sections, so reject .toc section symbols. */
9297 rs6000_const_not_ok_for_debug_p (rtx x
)
9299 if (GET_CODE (x
) == SYMBOL_REF
9300 && CONSTANT_POOL_ADDRESS_P (x
))
9302 rtx c
= get_pool_constant (x
);
9303 machine_mode cmode
= get_pool_mode (x
);
9304 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c
, cmode
))
9312 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
9315 rs6000_legitimate_combined_insn (rtx_insn
*insn
)
9317 int icode
= INSN_CODE (insn
);
9319 /* Reject creating doloop insns. Combine should not be allowed
9320 to create these for a number of reasons:
9321 1) In a nested loop, if combine creates one of these in an
9322 outer loop and the register allocator happens to allocate ctr
9323 to the outer loop insn, then the inner loop can't use ctr.
9324 Inner loops ought to be more highly optimized.
9325 2) Combine often wants to create one of these from what was
9326 originally a three insn sequence, first combining the three
9327 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
9328 allocated ctr, the splitter takes use back to the three insn
9329 sequence. It's better to stop combine at the two insn
9331 3) Faced with not being able to allocate ctr for ctrsi/crtdi
9332 insns, the register allocator sometimes uses floating point
9333 or vector registers for the pseudo. Since ctrsi/ctrdi is a
9334 jump insn and output reloads are not implemented for jumps,
9335 the ctrsi/ctrdi splitters need to handle all possible cases.
9336 That's a pain, and it gets to be seriously difficult when a
9337 splitter that runs after reload needs memory to transfer from
9338 a gpr to fpr. See PR70098 and PR71763 which are not fixed
9339 for the difficult case. It's better to not create problems
9340 in the first place. */
9341 if (icode
!= CODE_FOR_nothing
9342 && (icode
== CODE_FOR_ctrsi_internal1
9343 || icode
== CODE_FOR_ctrdi_internal1
9344 || icode
== CODE_FOR_ctrsi_internal2
9345 || icode
== CODE_FOR_ctrdi_internal2
9346 || icode
== CODE_FOR_ctrsi_internal3
9347 || icode
== CODE_FOR_ctrdi_internal3
9348 || icode
== CODE_FOR_ctrsi_internal4
9349 || icode
== CODE_FOR_ctrdi_internal4
))
9355 /* Construct the SYMBOL_REF for the tls_get_addr function. */
9357 static GTY(()) rtx rs6000_tls_symbol
;
9359 rs6000_tls_get_addr (void)
9361 if (!rs6000_tls_symbol
)
9362 rs6000_tls_symbol
= init_one_libfunc ("__tls_get_addr");
9364 return rs6000_tls_symbol
;
9367 /* Construct the SYMBOL_REF for TLS GOT references. */
9369 static GTY(()) rtx rs6000_got_symbol
;
9371 rs6000_got_sym (void)
9373 if (!rs6000_got_symbol
)
9375 rs6000_got_symbol
= gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
9376 SYMBOL_REF_FLAGS (rs6000_got_symbol
) |= SYMBOL_FLAG_LOCAL
;
9377 SYMBOL_REF_FLAGS (rs6000_got_symbol
) |= SYMBOL_FLAG_EXTERNAL
;
9380 return rs6000_got_symbol
;
9383 /* AIX Thread-Local Address support. */
9386 rs6000_legitimize_tls_address_aix (rtx addr
, enum tls_model model
)
9388 rtx sym
, mem
, tocref
, tlsreg
, tmpreg
, dest
, tlsaddr
;
9392 name
= XSTR (addr
, 0);
9393 /* Append TLS CSECT qualifier, unless the symbol already is qualified
9394 or the symbol will be in TLS private data section. */
9395 if (name
[strlen (name
) - 1] != ']'
9396 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr
))
9397 || bss_initializer_p (SYMBOL_REF_DECL (addr
))))
9399 tlsname
= XALLOCAVEC (char, strlen (name
) + 4);
9400 strcpy (tlsname
, name
);
9402 bss_initializer_p (SYMBOL_REF_DECL (addr
)) ? "[UL]" : "[TL]");
9403 tlsaddr
= copy_rtx (addr
);
9404 XSTR (tlsaddr
, 0) = ggc_strdup (tlsname
);
9409 /* Place addr into TOC constant pool. */
9410 sym
= force_const_mem (GET_MODE (tlsaddr
), tlsaddr
);
9412 /* Output the TOC entry and create the MEM referencing the value. */
9413 if (constant_pool_expr_p (XEXP (sym
, 0))
9414 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym
, 0)), Pmode
))
9416 tocref
= create_TOC_reference (XEXP (sym
, 0), NULL_RTX
);
9417 mem
= gen_const_mem (Pmode
, tocref
);
9418 set_mem_alias_set (mem
, get_TOC_alias_set ());
9423 /* Use global-dynamic for local-dynamic. */
9424 if (model
== TLS_MODEL_GLOBAL_DYNAMIC
9425 || model
== TLS_MODEL_LOCAL_DYNAMIC
)
9427 /* Create new TOC reference for @m symbol. */
9428 name
= XSTR (XVECEXP (XEXP (mem
, 0), 0, 0), 0);
9429 tlsname
= XALLOCAVEC (char, strlen (name
) + 1);
9430 strcpy (tlsname
, "*LCM");
9431 strcat (tlsname
, name
+ 3);
9432 rtx modaddr
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (tlsname
));
9433 SYMBOL_REF_FLAGS (modaddr
) |= SYMBOL_FLAG_LOCAL
;
9434 tocref
= create_TOC_reference (modaddr
, NULL_RTX
);
9435 rtx modmem
= gen_const_mem (Pmode
, tocref
);
9436 set_mem_alias_set (modmem
, get_TOC_alias_set ());
9438 rtx modreg
= gen_reg_rtx (Pmode
);
9439 emit_insn (gen_rtx_SET (modreg
, modmem
));
9441 tmpreg
= gen_reg_rtx (Pmode
);
9442 emit_insn (gen_rtx_SET (tmpreg
, mem
));
9444 dest
= gen_reg_rtx (Pmode
);
9446 emit_insn (gen_tls_get_addrsi (dest
, modreg
, tmpreg
));
9448 emit_insn (gen_tls_get_addrdi (dest
, modreg
, tmpreg
));
9451 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
9452 else if (TARGET_32BIT
)
9454 tlsreg
= gen_reg_rtx (SImode
);
9455 emit_insn (gen_tls_get_tpointer (tlsreg
));
9458 tlsreg
= gen_rtx_REG (DImode
, 13);
9460 /* Load the TOC value into temporary register. */
9461 tmpreg
= gen_reg_rtx (Pmode
);
9462 emit_insn (gen_rtx_SET (tmpreg
, mem
));
9463 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
9464 gen_rtx_MINUS (Pmode
, addr
, tlsreg
));
9466 /* Add TOC symbol value to TLS pointer. */
9467 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tmpreg
, tlsreg
));
9472 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9473 this (thread-local) address. */
9476 rs6000_legitimize_tls_address (rtx addr
, enum tls_model model
)
9481 return rs6000_legitimize_tls_address_aix (addr
, model
);
9483 dest
= gen_reg_rtx (Pmode
);
9484 if (model
== TLS_MODEL_LOCAL_EXEC
&& rs6000_tls_size
== 16)
9490 tlsreg
= gen_rtx_REG (Pmode
, 13);
9491 insn
= gen_tls_tprel_64 (dest
, tlsreg
, addr
);
9495 tlsreg
= gen_rtx_REG (Pmode
, 2);
9496 insn
= gen_tls_tprel_32 (dest
, tlsreg
, addr
);
9500 else if (model
== TLS_MODEL_LOCAL_EXEC
&& rs6000_tls_size
== 32)
9504 tmp
= gen_reg_rtx (Pmode
);
9507 tlsreg
= gen_rtx_REG (Pmode
, 13);
9508 insn
= gen_tls_tprel_ha_64 (tmp
, tlsreg
, addr
);
9512 tlsreg
= gen_rtx_REG (Pmode
, 2);
9513 insn
= gen_tls_tprel_ha_32 (tmp
, tlsreg
, addr
);
9517 insn
= gen_tls_tprel_lo_64 (dest
, tmp
, addr
);
9519 insn
= gen_tls_tprel_lo_32 (dest
, tmp
, addr
);
9524 rtx r3
, got
, tga
, tmp1
, tmp2
, call_insn
;
9526 /* We currently use relocations like @got@tlsgd for tls, which
9527 means the linker will handle allocation of tls entries, placing
9528 them in the .got section. So use a pointer to the .got section,
9529 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9530 or to secondary GOT sections used by 32-bit -fPIC. */
9532 got
= gen_rtx_REG (Pmode
, 2);
9536 got
= gen_rtx_REG (Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
9539 rtx gsym
= rs6000_got_sym ();
9540 got
= gen_reg_rtx (Pmode
);
9542 rs6000_emit_move (got
, gsym
, Pmode
);
9547 tmp1
= gen_reg_rtx (Pmode
);
9548 tmp2
= gen_reg_rtx (Pmode
);
9549 mem
= gen_const_mem (Pmode
, tmp1
);
9550 lab
= gen_label_rtx ();
9551 emit_insn (gen_load_toc_v4_PIC_1b (gsym
, lab
));
9552 emit_move_insn (tmp1
, gen_rtx_REG (Pmode
, LR_REGNO
));
9553 if (TARGET_LINK_STACK
)
9554 emit_insn (gen_addsi3 (tmp1
, tmp1
, GEN_INT (4)));
9555 emit_move_insn (tmp2
, mem
);
9556 rtx_insn
*last
= emit_insn (gen_addsi3 (got
, tmp1
, tmp2
));
9557 set_unique_reg_note (last
, REG_EQUAL
, gsym
);
9562 if (model
== TLS_MODEL_GLOBAL_DYNAMIC
)
9564 tga
= rs6000_tls_get_addr ();
9565 emit_library_call_value (tga
, dest
, LCT_CONST
, Pmode
,
9566 1, const0_rtx
, Pmode
);
9568 r3
= gen_rtx_REG (Pmode
, 3);
9569 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
9572 insn
= gen_tls_gd_aix64 (r3
, got
, addr
, tga
, const0_rtx
);
9574 insn
= gen_tls_gd_aix32 (r3
, got
, addr
, tga
, const0_rtx
);
9576 else if (DEFAULT_ABI
== ABI_V4
)
9577 insn
= gen_tls_gd_sysvsi (r3
, got
, addr
, tga
, const0_rtx
);
9580 call_insn
= last_call_insn ();
9581 PATTERN (call_insn
) = insn
;
9582 if (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
&& flag_pic
)
9583 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn
),
9584 pic_offset_table_rtx
);
9586 else if (model
== TLS_MODEL_LOCAL_DYNAMIC
)
9588 tga
= rs6000_tls_get_addr ();
9589 tmp1
= gen_reg_rtx (Pmode
);
9590 emit_library_call_value (tga
, tmp1
, LCT_CONST
, Pmode
,
9591 1, const0_rtx
, Pmode
);
9593 r3
= gen_rtx_REG (Pmode
, 3);
9594 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
9597 insn
= gen_tls_ld_aix64 (r3
, got
, tga
, const0_rtx
);
9599 insn
= gen_tls_ld_aix32 (r3
, got
, tga
, const0_rtx
);
9601 else if (DEFAULT_ABI
== ABI_V4
)
9602 insn
= gen_tls_ld_sysvsi (r3
, got
, tga
, const0_rtx
);
9605 call_insn
= last_call_insn ();
9606 PATTERN (call_insn
) = insn
;
9607 if (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
&& flag_pic
)
9608 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn
),
9609 pic_offset_table_rtx
);
9611 if (rs6000_tls_size
== 16)
9614 insn
= gen_tls_dtprel_64 (dest
, tmp1
, addr
);
9616 insn
= gen_tls_dtprel_32 (dest
, tmp1
, addr
);
9618 else if (rs6000_tls_size
== 32)
9620 tmp2
= gen_reg_rtx (Pmode
);
9622 insn
= gen_tls_dtprel_ha_64 (tmp2
, tmp1
, addr
);
9624 insn
= gen_tls_dtprel_ha_32 (tmp2
, tmp1
, addr
);
9627 insn
= gen_tls_dtprel_lo_64 (dest
, tmp2
, addr
);
9629 insn
= gen_tls_dtprel_lo_32 (dest
, tmp2
, addr
);
9633 tmp2
= gen_reg_rtx (Pmode
);
9635 insn
= gen_tls_got_dtprel_64 (tmp2
, got
, addr
);
9637 insn
= gen_tls_got_dtprel_32 (tmp2
, got
, addr
);
9639 insn
= gen_rtx_SET (dest
, gen_rtx_PLUS (Pmode
, tmp2
, tmp1
));
9645 /* IE, or 64-bit offset LE. */
9646 tmp2
= gen_reg_rtx (Pmode
);
9648 insn
= gen_tls_got_tprel_64 (tmp2
, got
, addr
);
9650 insn
= gen_tls_got_tprel_32 (tmp2
, got
, addr
);
9653 insn
= gen_tls_tls_64 (dest
, tmp2
, addr
);
9655 insn
= gen_tls_tls_32 (dest
, tmp2
, addr
);
9663 /* Only create the global variable for the stack protect guard if we are using
9664 the global flavor of that guard. */
9666 rs6000_init_stack_protect_guard (void)
9668 if (rs6000_stack_protector_guard
== SSP_GLOBAL
)
9669 return default_stack_protect_guard ();
9674 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9677 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
9679 if (GET_CODE (x
) == HIGH
9680 && GET_CODE (XEXP (x
, 0)) == UNSPEC
)
9683 /* A TLS symbol in the TOC cannot contain a sum. */
9684 if (GET_CODE (x
) == CONST
9685 && GET_CODE (XEXP (x
, 0)) == PLUS
9686 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
9687 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0)) != 0)
9690 /* Do not place an ELF TLS symbol in the constant pool. */
9691 return TARGET_ELF
&& tls_referenced_p (x
);
9694 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9695 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9696 can be addressed relative to the toc pointer. */
9699 use_toc_relative_ref (rtx sym
, machine_mode mode
)
9701 return ((constant_pool_expr_p (sym
)
9702 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym
),
9703 get_pool_mode (sym
)))
9704 || (TARGET_CMODEL
== CMODEL_MEDIUM
9705 && SYMBOL_REF_LOCAL_P (sym
)
9706 && GET_MODE_SIZE (mode
) <= POWERPC64_TOC_POINTER_ALIGNMENT
));
9709 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
9710 replace the input X, or the original X if no replacement is called for.
9711 The output parameter *WIN is 1 if the calling macro should goto WIN,
9714 For RS/6000, we wish to handle large displacements off a base
9715 register by splitting the addend across an addiu/addis and the mem insn.
9716 This cuts number of extra insns needed from 3 to 1.
9718 On Darwin, we use this to generate code for floating point constants.
9719 A movsf_low is generated so we wind up with 2 instructions rather than 3.
9720 The Darwin code is inside #if TARGET_MACHO because only then are the
9721 machopic_* functions defined. */
9723 rs6000_legitimize_reload_address (rtx x
, machine_mode mode
,
9724 int opnum
, int type
,
9725 int ind_levels ATTRIBUTE_UNUSED
, int *win
)
9727 bool reg_offset_p
= reg_offset_addressing_ok_p (mode
);
9728 bool quad_offset_p
= mode_supports_vsx_dform_quad (mode
);
9730 /* Nasty hack for vsx_splat_v2df/v2di load from mem, which takes a
9731 DFmode/DImode MEM. Ditto for ISA 3.0 vsx_splat_v4sf/v4si. */
9734 && ((mode
== DFmode
&& recog_data
.operand_mode
[0] == V2DFmode
)
9735 || (mode
== DImode
&& recog_data
.operand_mode
[0] == V2DImode
)
9736 || (mode
== SFmode
&& recog_data
.operand_mode
[0] == V4SFmode
9737 && TARGET_P9_VECTOR
)
9738 || (mode
== SImode
&& recog_data
.operand_mode
[0] == V4SImode
9739 && TARGET_P9_VECTOR
)))
9740 reg_offset_p
= false;
9742 /* We must recognize output that we have already generated ourselves. */
9743 if (GET_CODE (x
) == PLUS
9744 && GET_CODE (XEXP (x
, 0)) == PLUS
9745 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == REG
9746 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
9747 && GET_CODE (XEXP (x
, 1)) == CONST_INT
)
9749 if (TARGET_DEBUG_ADDR
)
9751 fprintf (stderr
, "\nlegitimize_reload_address push_reload #1:\n");
9754 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
9755 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
9756 opnum
, (enum reload_type
) type
);
9761 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
9762 if (GET_CODE (x
) == LO_SUM
9763 && GET_CODE (XEXP (x
, 0)) == HIGH
)
9765 if (TARGET_DEBUG_ADDR
)
9767 fprintf (stderr
, "\nlegitimize_reload_address push_reload #2:\n");
9770 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
9771 BASE_REG_CLASS
, Pmode
, VOIDmode
, 0, 0,
9772 opnum
, (enum reload_type
) type
);
9778 if (DEFAULT_ABI
== ABI_DARWIN
&& flag_pic
9779 && GET_CODE (x
) == LO_SUM
9780 && GET_CODE (XEXP (x
, 0)) == PLUS
9781 && XEXP (XEXP (x
, 0), 0) == pic_offset_table_rtx
9782 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == HIGH
9783 && XEXP (XEXP (XEXP (x
, 0), 1), 0) == XEXP (x
, 1)
9784 && machopic_operand_p (XEXP (x
, 1)))
9786 /* Result of previous invocation of this function on Darwin
9787 floating point constant. */
9788 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
9789 BASE_REG_CLASS
, Pmode
, VOIDmode
, 0, 0,
9790 opnum
, (enum reload_type
) type
);
9796 if (TARGET_CMODEL
!= CMODEL_SMALL
9799 && small_toc_ref (x
, VOIDmode
))
9801 rtx hi
= gen_rtx_HIGH (Pmode
, copy_rtx (x
));
9802 x
= gen_rtx_LO_SUM (Pmode
, hi
, x
);
9803 if (TARGET_DEBUG_ADDR
)
9805 fprintf (stderr
, "\nlegitimize_reload_address push_reload #3:\n");
9808 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
9809 BASE_REG_CLASS
, Pmode
, VOIDmode
, 0, 0,
9810 opnum
, (enum reload_type
) type
);
9815 if (GET_CODE (x
) == PLUS
9816 && REG_P (XEXP (x
, 0))
9817 && REGNO (XEXP (x
, 0)) < FIRST_PSEUDO_REGISTER
9818 && INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), 1)
9819 && CONST_INT_P (XEXP (x
, 1))
9821 && !SPE_VECTOR_MODE (mode
)
9822 && !(TARGET_E500_DOUBLE
&& GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
9823 && (quad_offset_p
|| !VECTOR_MODE_P (mode
) || VECTOR_MEM_NONE_P (mode
)))
9825 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
9826 HOST_WIDE_INT low
= ((val
& 0xffff) ^ 0x8000) - 0x8000;
9828 = (((val
- low
) & 0xffffffff) ^ 0x80000000) - 0x80000000;
9830 /* Check for 32-bit overflow or quad addresses with one of the
9831 four least significant bits set. */
9832 if (high
+ low
!= val
9833 || (quad_offset_p
&& (low
& 0xf)))
9839 /* Reload the high part into a base reg; leave the low part
9840 in the mem directly. */
9842 x
= gen_rtx_PLUS (GET_MODE (x
),
9843 gen_rtx_PLUS (GET_MODE (x
), XEXP (x
, 0),
9847 if (TARGET_DEBUG_ADDR
)
9849 fprintf (stderr
, "\nlegitimize_reload_address push_reload #4:\n");
9852 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
9853 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
9854 opnum
, (enum reload_type
) type
);
9859 if (GET_CODE (x
) == SYMBOL_REF
9862 && (!VECTOR_MODE_P (mode
) || VECTOR_MEM_NONE_P (mode
))
9863 && !SPE_VECTOR_MODE (mode
)
9865 && DEFAULT_ABI
== ABI_DARWIN
9866 && (flag_pic
|| MACHO_DYNAMIC_NO_PIC_P
)
9867 && machopic_symbol_defined_p (x
)
9869 && DEFAULT_ABI
== ABI_V4
9872 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
9873 The same goes for DImode without 64-bit gprs and DFmode and DDmode
9875 ??? Assume floating point reg based on mode? This assumption is
9876 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
9877 where reload ends up doing a DFmode load of a constant from
9878 mem using two gprs. Unfortunately, at this point reload
9879 hasn't yet selected regs so poking around in reload data
9880 won't help and even if we could figure out the regs reliably,
9881 we'd still want to allow this transformation when the mem is
9882 naturally aligned. Since we say the address is good here, we
9883 can't disable offsets from LO_SUMs in mem_operand_gpr.
9884 FIXME: Allow offset from lo_sum for other modes too, when
9885 mem is sufficiently aligned.
9887 Also disallow this if the type can go in VMX/Altivec registers, since
9888 those registers do not have d-form (reg+offset) address modes. */
9889 && !reg_addr
[mode
].scalar_in_vmx_p
9894 && (mode
!= TImode
|| !TARGET_VSX_TIMODE
)
9896 && (mode
!= DImode
|| TARGET_POWERPC64
)
9897 && ((mode
!= DFmode
&& mode
!= DDmode
) || TARGET_POWERPC64
9898 || (TARGET_HARD_FLOAT
&& TARGET_FPRS
&& TARGET_DOUBLE_FLOAT
)))
9903 rtx offset
= machopic_gen_offset (x
);
9904 x
= gen_rtx_LO_SUM (GET_MODE (x
),
9905 gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
,
9906 gen_rtx_HIGH (Pmode
, offset
)), offset
);
9910 x
= gen_rtx_LO_SUM (GET_MODE (x
),
9911 gen_rtx_HIGH (Pmode
, x
), x
);
9913 if (TARGET_DEBUG_ADDR
)
9915 fprintf (stderr
, "\nlegitimize_reload_address push_reload #5:\n");
9918 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
9919 BASE_REG_CLASS
, Pmode
, VOIDmode
, 0, 0,
9920 opnum
, (enum reload_type
) type
);
9925 /* Reload an offset address wrapped by an AND that represents the
9926 masking of the lower bits. Strip the outer AND and let reload
9927 convert the offset address into an indirect address. For VSX,
9928 force reload to create the address with an AND in a separate
9929 register, because we can't guarantee an altivec register will
9931 if (VECTOR_MEM_ALTIVEC_P (mode
)
9932 && GET_CODE (x
) == AND
9933 && GET_CODE (XEXP (x
, 0)) == PLUS
9934 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == REG
9935 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
9936 && GET_CODE (XEXP (x
, 1)) == CONST_INT
9937 && INTVAL (XEXP (x
, 1)) == -16)
9947 && GET_CODE (x
) == SYMBOL_REF
9948 && use_toc_relative_ref (x
, mode
))
9950 x
= create_TOC_reference (x
, NULL_RTX
);
9951 if (TARGET_CMODEL
!= CMODEL_SMALL
)
9953 if (TARGET_DEBUG_ADDR
)
9955 fprintf (stderr
, "\nlegitimize_reload_address push_reload #6:\n");
9958 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
9959 BASE_REG_CLASS
, Pmode
, VOIDmode
, 0, 0,
9960 opnum
, (enum reload_type
) type
);
9969 /* Debug version of rs6000_legitimize_reload_address. */
9971 rs6000_debug_legitimize_reload_address (rtx x
, machine_mode mode
,
9972 int opnum
, int type
,
9973 int ind_levels
, int *win
)
9975 rtx ret
= rs6000_legitimize_reload_address (x
, mode
, opnum
, type
,
9978 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
9979 "type = %d, ind_levels = %d, win = %d, original addr:\n",
9980 GET_MODE_NAME (mode
), opnum
, type
, ind_levels
, *win
);
9984 fprintf (stderr
, "Same address returned\n");
9986 fprintf (stderr
, "NULL returned\n");
9989 fprintf (stderr
, "New address:\n");
9996 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9997 that is a valid memory address for an instruction.
9998 The MODE argument is the machine mode for the MEM expression
9999 that wants to use this address.
10001 On the RS/6000, there are four valid address: a SYMBOL_REF that
10002 refers to a constant pool entry of an address (or the sum of it
10003 plus a constant), a short (16-bit signed) constant plus a register,
10004 the sum of two registers, or a register indirect, possibly with an
10005 auto-increment. For DFmode, DDmode and DImode with a constant plus
10006 register, we must ensure that both words are addressable or PowerPC64
10007 with offset word aligned.
10009 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
10010 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
10011 because adjacent memory cells are accessed by adding word-sized offsets
10012 during assembly output. */
10014 rs6000_legitimate_address_p (machine_mode mode
, rtx x
, bool reg_ok_strict
)
10016 bool reg_offset_p
= reg_offset_addressing_ok_p (mode
);
10017 bool quad_offset_p
= mode_supports_vsx_dform_quad (mode
);
10019 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
10020 if (VECTOR_MEM_ALTIVEC_P (mode
)
10021 && GET_CODE (x
) == AND
10022 && GET_CODE (XEXP (x
, 1)) == CONST_INT
10023 && INTVAL (XEXP (x
, 1)) == -16)
10026 if (TARGET_ELF
&& RS6000_SYMBOL_REF_TLS_P (x
))
10028 if (legitimate_indirect_address_p (x
, reg_ok_strict
))
10031 && (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == PRE_DEC
)
10032 && mode_supports_pre_incdec_p (mode
)
10033 && legitimate_indirect_address_p (XEXP (x
, 0), reg_ok_strict
))
10035 /* Handle restricted vector d-form offsets in ISA 3.0. */
10038 if (quad_address_p (x
, mode
, reg_ok_strict
))
10041 else if (virtual_stack_registers_memory_p (x
))
10044 else if (reg_offset_p
)
10046 if (legitimate_small_data_p (mode
, x
))
10048 if (legitimate_constant_pool_address_p (x
, mode
,
10049 reg_ok_strict
|| lra_in_progress
))
10051 if (reg_addr
[mode
].fused_toc
&& GET_CODE (x
) == UNSPEC
10052 && XINT (x
, 1) == UNSPEC_FUSION_ADDIS
)
10056 /* For TImode, if we have TImode in VSX registers, only allow register
10057 indirect addresses. This will allow the values to go in either GPRs
10058 or VSX registers without reloading. The vector types would tend to
10059 go into VSX registers, so we allow REG+REG, while TImode seems
10060 somewhat split, in that some uses are GPR based, and some VSX based. */
10061 /* FIXME: We could loosen this by changing the following to
10062 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
10063 but currently we cannot allow REG+REG addressing for TImode. See
10064 PR72827 for complete details on how this ends up hoodwinking DSE. */
10065 if (mode
== TImode
&& TARGET_VSX_TIMODE
)
10067 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
10068 if (! reg_ok_strict
10070 && GET_CODE (x
) == PLUS
10071 && GET_CODE (XEXP (x
, 0)) == REG
10072 && (XEXP (x
, 0) == virtual_stack_vars_rtx
10073 || XEXP (x
, 0) == arg_pointer_rtx
)
10074 && GET_CODE (XEXP (x
, 1)) == CONST_INT
)
10076 if (rs6000_legitimate_offset_address_p (mode
, x
, reg_ok_strict
, false))
10078 if (!FLOAT128_2REG_P (mode
)
10079 && ((TARGET_HARD_FLOAT
&& TARGET_FPRS
&& TARGET_DOUBLE_FLOAT
)
10080 || TARGET_POWERPC64
10081 || (mode
!= DFmode
&& mode
!= DDmode
)
10082 || (TARGET_E500_DOUBLE
&& mode
!= DDmode
))
10083 && (TARGET_POWERPC64
|| mode
!= DImode
)
10084 && (mode
!= TImode
|| VECTOR_MEM_VSX_P (TImode
))
10086 && !avoiding_indexed_address_p (mode
)
10087 && legitimate_indexed_address_p (x
, reg_ok_strict
))
10089 if (TARGET_UPDATE
&& GET_CODE (x
) == PRE_MODIFY
10090 && mode_supports_pre_modify_p (mode
)
10091 && legitimate_indirect_address_p (XEXP (x
, 0), reg_ok_strict
)
10092 && (rs6000_legitimate_offset_address_p (mode
, XEXP (x
, 1),
10093 reg_ok_strict
, false)
10094 || (!avoiding_indexed_address_p (mode
)
10095 && legitimate_indexed_address_p (XEXP (x
, 1), reg_ok_strict
)))
10096 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
10098 if (reg_offset_p
&& !quad_offset_p
10099 && legitimate_lo_sum_address_p (mode
, x
, reg_ok_strict
))
10104 /* Debug version of rs6000_legitimate_address_p. */
10106 rs6000_debug_legitimate_address_p (machine_mode mode
, rtx x
,
10107 bool reg_ok_strict
)
10109 bool ret
= rs6000_legitimate_address_p (mode
, x
, reg_ok_strict
);
10111 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
10112 "strict = %d, reload = %s, code = %s\n",
10113 ret
? "true" : "false",
10114 GET_MODE_NAME (mode
),
10118 : (reload_in_progress
? "progress" : "before")),
10119 GET_RTX_NAME (GET_CODE (x
)));
10125 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
10128 rs6000_mode_dependent_address_p (const_rtx addr
,
10129 addr_space_t as ATTRIBUTE_UNUSED
)
10131 return rs6000_mode_dependent_address_ptr (addr
);
10134 /* Go to LABEL if ADDR (a legitimate address expression)
10135 has an effect that depends on the machine mode it is used for.
10137 On the RS/6000 this is true of all integral offsets (since AltiVec
10138 and VSX modes don't allow them) or is a pre-increment or decrement.
10140 ??? Except that due to conceptual problems in offsettable_address_p
10141 we can't really report the problems of integral offsets. So leave
10142 this assuming that the adjustable offset must be valid for the
10143 sub-words of a TFmode operand, which is what we had before. */
10146 rs6000_mode_dependent_address (const_rtx addr
)
10148 switch (GET_CODE (addr
))
10151 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
10152 is considered a legitimate address before reload, so there
10153 are no offset restrictions in that case. Note that this
10154 condition is safe in strict mode because any address involving
10155 virtual_stack_vars_rtx or arg_pointer_rtx would already have
10156 been rejected as illegitimate. */
10157 if (XEXP (addr
, 0) != virtual_stack_vars_rtx
10158 && XEXP (addr
, 0) != arg_pointer_rtx
10159 && GET_CODE (XEXP (addr
, 1)) == CONST_INT
)
10161 unsigned HOST_WIDE_INT val
= INTVAL (XEXP (addr
, 1));
10162 return val
+ 0x8000 >= 0x10000 - (TARGET_POWERPC64
? 8 : 12);
10167 /* Anything in the constant pool is sufficiently aligned that
10168 all bytes have the same high part address. */
10169 return !legitimate_constant_pool_address_p (addr
, QImode
, false);
10171 /* Auto-increment cases are now treated generically in recog.c. */
10173 return TARGET_UPDATE
;
10175 /* AND is only allowed in Altivec loads. */
10186 /* Debug version of rs6000_mode_dependent_address. */
10188 rs6000_debug_mode_dependent_address (const_rtx addr
)
10190 bool ret
= rs6000_mode_dependent_address (addr
);
10192 fprintf (stderr
, "\nrs6000_mode_dependent_address: ret = %s\n",
10193 ret
? "true" : "false");
10199 /* Implement FIND_BASE_TERM. */
10202 rs6000_find_base_term (rtx op
)
10207 if (GET_CODE (base
) == CONST
)
10208 base
= XEXP (base
, 0);
10209 if (GET_CODE (base
) == PLUS
)
10210 base
= XEXP (base
, 0);
10211 if (GET_CODE (base
) == UNSPEC
)
10212 switch (XINT (base
, 1))
10214 case UNSPEC_TOCREL
:
10215 case UNSPEC_MACHOPIC_OFFSET
:
10216 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
10217 for aliasing purposes. */
10218 return XVECEXP (base
, 0, 0);
10224 /* More elaborate version of recog's offsettable_memref_p predicate
10225 that works around the ??? note of rs6000_mode_dependent_address.
10226 In particular it accepts
10228 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
10230 in 32-bit mode, that the recog predicate rejects. */
10233 rs6000_offsettable_memref_p (rtx op
, machine_mode reg_mode
)
10240 /* First mimic offsettable_memref_p. */
10241 if (offsettable_address_p (true, GET_MODE (op
), XEXP (op
, 0)))
10244 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
10245 the latter predicate knows nothing about the mode of the memory
10246 reference and, therefore, assumes that it is the largest supported
10247 mode (TFmode). As a consequence, legitimate offsettable memory
10248 references are rejected. rs6000_legitimate_offset_address_p contains
10249 the correct logic for the PLUS case of rs6000_mode_dependent_address,
10250 at least with a little bit of help here given that we know the
10251 actual registers used. */
10252 worst_case
= ((TARGET_POWERPC64
&& GET_MODE_CLASS (reg_mode
) == MODE_INT
)
10253 || GET_MODE_SIZE (reg_mode
) == 4);
10254 return rs6000_legitimate_offset_address_p (GET_MODE (op
), XEXP (op
, 0),
10258 /* Determine the reassociation width to be used in reassociate_bb.
10259 This takes into account how many parallel operations we
10260 can actually do of a given type, and also the latency.
10262 int add/sub 6/cycle
10264 vect add/sub/mul 2/cycle
10265 fp add/sub/mul 2/cycle
10270 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
10271 enum machine_mode mode
)
10273 switch (rs6000_cpu
)
10275 case PROCESSOR_POWER8
:
10276 case PROCESSOR_POWER9
:
10277 if (DECIMAL_FLOAT_MODE_P (mode
))
10279 if (VECTOR_MODE_P (mode
))
10281 if (INTEGRAL_MODE_P (mode
))
10282 return opc
== MULT_EXPR
? 4 : 6;
10283 if (FLOAT_MODE_P (mode
))
10292 /* Change register usage conditional on target flags. */
10294 rs6000_conditional_register_usage (void)
10298 if (TARGET_DEBUG_TARGET
)
10299 fprintf (stderr
, "rs6000_conditional_register_usage called\n");
10301 /* Set MQ register fixed (already call_used) so that it will not be
10303 fixed_regs
[64] = 1;
10305 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
10307 fixed_regs
[13] = call_used_regs
[13]
10308 = call_really_used_regs
[13] = 1;
10310 /* Conditionally disable FPRs. */
10311 if (TARGET_SOFT_FLOAT
|| !TARGET_FPRS
)
10312 for (i
= 32; i
< 64; i
++)
10313 fixed_regs
[i
] = call_used_regs
[i
]
10314 = call_really_used_regs
[i
] = 1;
10316 /* The TOC register is not killed across calls in a way that is
10317 visible to the compiler. */
10318 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
10319 call_really_used_regs
[2] = 0;
10321 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
== 2)
10322 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10324 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
== 1)
10325 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
10326 = call_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
10327 = call_really_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10329 if (DEFAULT_ABI
== ABI_DARWIN
&& flag_pic
)
10330 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
10331 = call_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
10332 = call_really_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10334 if (TARGET_TOC
&& TARGET_MINIMAL_TOC
)
10335 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
10336 = call_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10340 global_regs
[SPEFSCR_REGNO
] = 1;
10341 /* We used to use r14 as FIXED_SCRATCH to address SPE 64-bit
10342 registers in prologues and epilogues. We no longer use r14
10343 for FIXED_SCRATCH, but we're keeping r14 out of the allocation
10344 pool for link-compatibility with older versions of GCC. Once
10345 "old" code has died out, we can return r14 to the allocation
10348 = call_used_regs
[14]
10349 = call_really_used_regs
[14] = 1;
10352 if (!TARGET_ALTIVEC
&& !TARGET_VSX
)
10354 for (i
= FIRST_ALTIVEC_REGNO
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
10355 fixed_regs
[i
] = call_used_regs
[i
] = call_really_used_regs
[i
] = 1;
10356 call_really_used_regs
[VRSAVE_REGNO
] = 1;
10359 if (TARGET_ALTIVEC
|| TARGET_VSX
)
10360 global_regs
[VSCR_REGNO
] = 1;
10362 if (TARGET_ALTIVEC_ABI
)
10364 for (i
= FIRST_ALTIVEC_REGNO
; i
< FIRST_ALTIVEC_REGNO
+ 20; ++i
)
10365 call_used_regs
[i
] = call_really_used_regs
[i
] = 1;
10367 /* AIX reserves VR20:31 in non-extended ABI mode. */
10369 for (i
= FIRST_ALTIVEC_REGNO
+ 20; i
< FIRST_ALTIVEC_REGNO
+ 32; ++i
)
10370 fixed_regs
[i
] = call_used_regs
[i
] = call_really_used_regs
[i
] = 1;
10375 /* Output insns to set DEST equal to the constant SOURCE as a series of
10376 lis, ori and shl instructions and return TRUE. */
10379 rs6000_emit_set_const (rtx dest
, rtx source
)
10381 machine_mode mode
= GET_MODE (dest
);
10386 gcc_checking_assert (CONST_INT_P (source
));
10387 c
= INTVAL (source
);
10392 emit_insn (gen_rtx_SET (dest
, source
));
10396 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (SImode
);
10398 emit_insn (gen_rtx_SET (copy_rtx (temp
),
10399 GEN_INT (c
& ~(HOST_WIDE_INT
) 0xffff)));
10400 emit_insn (gen_rtx_SET (dest
,
10401 gen_rtx_IOR (SImode
, copy_rtx (temp
),
10402 GEN_INT (c
& 0xffff))));
10406 if (!TARGET_POWERPC64
)
10410 hi
= operand_subword_force (copy_rtx (dest
), WORDS_BIG_ENDIAN
== 0,
10412 lo
= operand_subword_force (dest
, WORDS_BIG_ENDIAN
!= 0,
10414 emit_move_insn (hi
, GEN_INT (c
>> 32));
10415 c
= ((c
& 0xffffffff) ^ 0x80000000) - 0x80000000;
10416 emit_move_insn (lo
, GEN_INT (c
));
10419 rs6000_emit_set_long_const (dest
, c
);
10423 gcc_unreachable ();
10426 insn
= get_last_insn ();
10427 set
= single_set (insn
);
10428 if (! CONSTANT_P (SET_SRC (set
)))
10429 set_unique_reg_note (insn
, REG_EQUAL
, GEN_INT (c
));
10434 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
10435 Output insns to set DEST equal to the constant C as a series of
10436 lis, ori and shl instructions. */
10439 rs6000_emit_set_long_const (rtx dest
, HOST_WIDE_INT c
)
10442 HOST_WIDE_INT ud1
, ud2
, ud3
, ud4
;
10452 if ((ud4
== 0xffff && ud3
== 0xffff && ud2
== 0xffff && (ud1
& 0x8000))
10453 || (ud4
== 0 && ud3
== 0 && ud2
== 0 && ! (ud1
& 0x8000)))
10454 emit_move_insn (dest
, GEN_INT ((ud1
^ 0x8000) - 0x8000));
10456 else if ((ud4
== 0xffff && ud3
== 0xffff && (ud2
& 0x8000))
10457 || (ud4
== 0 && ud3
== 0 && ! (ud2
& 0x8000)))
10459 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10461 emit_move_insn (ud1
!= 0 ? copy_rtx (temp
) : dest
,
10462 GEN_INT (((ud2
<< 16) ^ 0x80000000) - 0x80000000));
10464 emit_move_insn (dest
,
10465 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10468 else if (ud3
== 0 && ud4
== 0)
10470 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10472 gcc_assert (ud2
& 0x8000);
10473 emit_move_insn (copy_rtx (temp
),
10474 GEN_INT (((ud2
<< 16) ^ 0x80000000) - 0x80000000));
10476 emit_move_insn (copy_rtx (temp
),
10477 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10479 emit_move_insn (dest
,
10480 gen_rtx_ZERO_EXTEND (DImode
,
10481 gen_lowpart (SImode
,
10482 copy_rtx (temp
))));
10484 else if ((ud4
== 0xffff && (ud3
& 0x8000))
10485 || (ud4
== 0 && ! (ud3
& 0x8000)))
10487 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10489 emit_move_insn (copy_rtx (temp
),
10490 GEN_INT (((ud3
<< 16) ^ 0x80000000) - 0x80000000));
10492 emit_move_insn (copy_rtx (temp
),
10493 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10495 emit_move_insn (ud1
!= 0 ? copy_rtx (temp
) : dest
,
10496 gen_rtx_ASHIFT (DImode
, copy_rtx (temp
),
10499 emit_move_insn (dest
,
10500 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10505 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10507 emit_move_insn (copy_rtx (temp
),
10508 GEN_INT (((ud4
<< 16) ^ 0x80000000) - 0x80000000));
10510 emit_move_insn (copy_rtx (temp
),
10511 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10514 emit_move_insn (ud2
!= 0 || ud1
!= 0 ? copy_rtx (temp
) : dest
,
10515 gen_rtx_ASHIFT (DImode
, copy_rtx (temp
),
10518 emit_move_insn (ud1
!= 0 ? copy_rtx (temp
) : dest
,
10519 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10520 GEN_INT (ud2
<< 16)));
10522 emit_move_insn (dest
,
10523 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10528 /* Helper for the following. Get rid of [r+r] memory refs
10529 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
10532 rs6000_eliminate_indexed_memrefs (rtx operands
[2])
10534 if (reload_in_progress
)
10537 if (GET_CODE (operands
[0]) == MEM
10538 && GET_CODE (XEXP (operands
[0], 0)) != REG
10539 && ! legitimate_constant_pool_address_p (XEXP (operands
[0], 0),
10540 GET_MODE (operands
[0]), false))
10542 = replace_equiv_address (operands
[0],
10543 copy_addr_to_reg (XEXP (operands
[0], 0)));
10545 if (GET_CODE (operands
[1]) == MEM
10546 && GET_CODE (XEXP (operands
[1], 0)) != REG
10547 && ! legitimate_constant_pool_address_p (XEXP (operands
[1], 0),
10548 GET_MODE (operands
[1]), false))
10550 = replace_equiv_address (operands
[1],
10551 copy_addr_to_reg (XEXP (operands
[1], 0)));
10554 /* Generate a vector of constants to permute MODE for a little-endian
10555 storage operation by swapping the two halves of a vector. */
10557 rs6000_const_vec (machine_mode mode
)
10585 v
= rtvec_alloc (subparts
);
10587 for (i
= 0; i
< subparts
/ 2; ++i
)
10588 RTVEC_ELT (v
, i
) = gen_rtx_CONST_INT (DImode
, i
+ subparts
/ 2);
10589 for (i
= subparts
/ 2; i
< subparts
; ++i
)
10590 RTVEC_ELT (v
, i
) = gen_rtx_CONST_INT (DImode
, i
- subparts
/ 2);
10595 /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
10596 for a VSX load or store operation. */
10598 rs6000_gen_le_vsx_permute (rtx source
, machine_mode mode
)
10600 /* Use ROTATE instead of VEC_SELECT on IEEE 128-bit floating point, and
10601 128-bit integers if they are allowed in VSX registers. */
10602 if (FLOAT128_VECTOR_P (mode
) || mode
== TImode
|| mode
== V1TImode
)
10603 return gen_rtx_ROTATE (mode
, source
, GEN_INT (64));
10606 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rs6000_const_vec (mode
));
10607 return gen_rtx_VEC_SELECT (mode
, source
, par
);
10611 /* Emit a little-endian load from vector memory location SOURCE to VSX
10612 register DEST in mode MODE. The load is done with two permuting
10613 insn's that represent an lxvd2x and xxpermdi. */
10615 rs6000_emit_le_vsx_load (rtx dest
, rtx source
, machine_mode mode
)
10617 rtx tmp
, permute_mem
, permute_reg
;
10619 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10621 if (mode
== TImode
|| mode
== V1TImode
)
10624 dest
= gen_lowpart (V2DImode
, dest
);
10625 source
= adjust_address (source
, V2DImode
, 0);
10628 tmp
= can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest
) : dest
;
10629 permute_mem
= rs6000_gen_le_vsx_permute (source
, mode
);
10630 permute_reg
= rs6000_gen_le_vsx_permute (tmp
, mode
);
10631 emit_insn (gen_rtx_SET (tmp
, permute_mem
));
10632 emit_insn (gen_rtx_SET (dest
, permute_reg
));
10635 /* Emit a little-endian store to vector memory location DEST from VSX
10636 register SOURCE in mode MODE. The store is done with two permuting
10637 insn's that represent an xxpermdi and an stxvd2x. */
10639 rs6000_emit_le_vsx_store (rtx dest
, rtx source
, machine_mode mode
)
10641 rtx tmp
, permute_src
, permute_tmp
;
10643 /* This should never be called during or after reload, because it does
10644 not re-permute the source register. It is intended only for use
10646 gcc_assert (!reload_in_progress
&& !lra_in_progress
&& !reload_completed
);
10648 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10650 if (mode
== TImode
|| mode
== V1TImode
)
10653 dest
= adjust_address (dest
, V2DImode
, 0);
10654 source
= gen_lowpart (V2DImode
, source
);
10657 tmp
= can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source
) : source
;
10658 permute_src
= rs6000_gen_le_vsx_permute (source
, mode
);
10659 permute_tmp
= rs6000_gen_le_vsx_permute (tmp
, mode
);
10660 emit_insn (gen_rtx_SET (tmp
, permute_src
));
10661 emit_insn (gen_rtx_SET (dest
, permute_tmp
));
10664 /* Emit a sequence representing a little-endian VSX load or store,
10665 moving data from SOURCE to DEST in mode MODE. This is done
10666 separately from rs6000_emit_move to ensure it is called only
10667 during expand. LE VSX loads and stores introduced later are
10668 handled with a split. The expand-time RTL generation allows
10669 us to optimize away redundant pairs of register-permutes. */
10671 rs6000_emit_le_vsx_move (rtx dest
, rtx source
, machine_mode mode
)
10673 gcc_assert (!BYTES_BIG_ENDIAN
10674 && VECTOR_MEM_VSX_P (mode
)
10675 && !TARGET_P9_VECTOR
10676 && !gpr_or_gpr_p (dest
, source
)
10677 && (MEM_P (source
) ^ MEM_P (dest
)));
10679 if (MEM_P (source
))
10681 gcc_assert (REG_P (dest
) || GET_CODE (dest
) == SUBREG
);
10682 rs6000_emit_le_vsx_load (dest
, source
, mode
);
10686 if (!REG_P (source
))
10687 source
= force_reg (mode
, source
);
10688 rs6000_emit_le_vsx_store (dest
, source
, mode
);
10692 /* Return whether a SFmode or SImode move can be done without converting one
10693 mode to another. This arrises when we have:
10695 (SUBREG:SF (REG:SI ...))
10696 (SUBREG:SI (REG:SF ...))
10698 and one of the values is in a floating point/vector register, where SFmode
10699 scalars are stored in DFmode format. */
10702 valid_sf_si_move (rtx dest
, rtx src
, machine_mode mode
)
10704 if (TARGET_ALLOW_SF_SUBREG
)
10707 if (mode
!= SFmode
&& GET_MODE_CLASS (mode
) != MODE_INT
)
10710 if (!SUBREG_P (src
) || !sf_subreg_operand (src
, mode
))
10713 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
10714 if (SUBREG_P (dest
))
10716 rtx dest_subreg
= SUBREG_REG (dest
);
10717 rtx src_subreg
= SUBREG_REG (src
);
10718 return GET_MODE (dest_subreg
) == GET_MODE (src_subreg
);
10725 /* Helper function to change moves with:
10727 (SUBREG:SF (REG:SI)) and
10728 (SUBREG:SI (REG:SF))
10730 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
10731 values are stored as DFmode values in the VSX registers. We need to convert
10732 the bits before we can use a direct move or operate on the bits in the
10733 vector register as an integer type.
10735 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
10738 rs6000_emit_move_si_sf_subreg (rtx dest
, rtx source
, machine_mode mode
)
10740 if (TARGET_DIRECT_MOVE_64BIT
&& !reload_in_progress
&& !reload_completed
10741 && !lra_in_progress
10742 && (!SUBREG_P (dest
) || !sf_subreg_operand (dest
, mode
))
10743 && SUBREG_P (source
) && sf_subreg_operand (source
, mode
))
10745 rtx inner_source
= SUBREG_REG (source
);
10746 machine_mode inner_mode
= GET_MODE (inner_source
);
10748 if (mode
== SImode
&& inner_mode
== SFmode
)
10750 emit_insn (gen_movsi_from_sf (dest
, inner_source
));
10754 if (mode
== SFmode
&& inner_mode
== SImode
)
10756 emit_insn (gen_movsf_from_si (dest
, inner_source
));
10764 /* Emit a move from SOURCE to DEST in mode MODE. */
10766 rs6000_emit_move (rtx dest
, rtx source
, machine_mode mode
)
10769 operands
[0] = dest
;
10770 operands
[1] = source
;
10772 if (TARGET_DEBUG_ADDR
)
10775 "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, "
10776 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10777 GET_MODE_NAME (mode
),
10778 reload_in_progress
,
10780 can_create_pseudo_p ());
10782 fprintf (stderr
, "source:\n");
10783 debug_rtx (source
);
10786 /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
10787 if (CONST_WIDE_INT_P (operands
[1])
10788 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
)
10790 /* This should be fixed with the introduction of CONST_WIDE_INT. */
10791 gcc_unreachable ();
10794 /* See if we need to special case SImode/SFmode SUBREG moves. */
10795 if ((mode
== SImode
|| mode
== SFmode
) && SUBREG_P (source
)
10796 && rs6000_emit_move_si_sf_subreg (dest
, source
, mode
))
10799 /* Check if GCC is setting up a block move that will end up using FP
10800 registers as temporaries. We must make sure this is acceptable. */
10801 if (GET_CODE (operands
[0]) == MEM
10802 && GET_CODE (operands
[1]) == MEM
10804 && (SLOW_UNALIGNED_ACCESS (DImode
, MEM_ALIGN (operands
[0]))
10805 || SLOW_UNALIGNED_ACCESS (DImode
, MEM_ALIGN (operands
[1])))
10806 && ! (SLOW_UNALIGNED_ACCESS (SImode
, (MEM_ALIGN (operands
[0]) > 32
10807 ? 32 : MEM_ALIGN (operands
[0])))
10808 || SLOW_UNALIGNED_ACCESS (SImode
, (MEM_ALIGN (operands
[1]) > 32
10810 : MEM_ALIGN (operands
[1]))))
10811 && ! MEM_VOLATILE_P (operands
[0])
10812 && ! MEM_VOLATILE_P (operands
[1]))
10814 emit_move_insn (adjust_address (operands
[0], SImode
, 0),
10815 adjust_address (operands
[1], SImode
, 0));
10816 emit_move_insn (adjust_address (copy_rtx (operands
[0]), SImode
, 4),
10817 adjust_address (copy_rtx (operands
[1]), SImode
, 4));
10821 if (can_create_pseudo_p () && GET_CODE (operands
[0]) == MEM
10822 && !gpc_reg_operand (operands
[1], mode
))
10823 operands
[1] = force_reg (mode
, operands
[1]);
10825 /* Recognize the case where operand[1] is a reference to thread-local
10826 data and load its address to a register. */
10827 if (tls_referenced_p (operands
[1]))
10829 enum tls_model model
;
10830 rtx tmp
= operands
[1];
10833 if (GET_CODE (tmp
) == CONST
&& GET_CODE (XEXP (tmp
, 0)) == PLUS
)
10835 addend
= XEXP (XEXP (tmp
, 0), 1);
10836 tmp
= XEXP (XEXP (tmp
, 0), 0);
10839 gcc_assert (GET_CODE (tmp
) == SYMBOL_REF
);
10840 model
= SYMBOL_REF_TLS_MODEL (tmp
);
10841 gcc_assert (model
!= 0);
10843 tmp
= rs6000_legitimize_tls_address (tmp
, model
);
10846 tmp
= gen_rtx_PLUS (mode
, tmp
, addend
);
10847 tmp
= force_operand (tmp
, operands
[0]);
10852 /* Handle the case where reload calls us with an invalid address. */
10853 if (reload_in_progress
&& mode
== Pmode
10854 && (! general_operand (operands
[1], mode
)
10855 || ! nonimmediate_operand (operands
[0], mode
)))
10858 /* 128-bit constant floating-point values on Darwin should really be loaded
10859 as two parts. However, this premature splitting is a problem when DFmode
10860 values can go into Altivec registers. */
10861 if (FLOAT128_IBM_P (mode
) && !reg_addr
[DFmode
].scalar_in_vmx_p
10862 && GET_CODE (operands
[1]) == CONST_DOUBLE
)
10864 rs6000_emit_move (simplify_gen_subreg (DFmode
, operands
[0], mode
, 0),
10865 simplify_gen_subreg (DFmode
, operands
[1], mode
, 0),
10867 rs6000_emit_move (simplify_gen_subreg (DFmode
, operands
[0], mode
,
10868 GET_MODE_SIZE (DFmode
)),
10869 simplify_gen_subreg (DFmode
, operands
[1], mode
,
10870 GET_MODE_SIZE (DFmode
)),
10875 if (reload_in_progress
&& cfun
->machine
->sdmode_stack_slot
!= NULL_RTX
)
10876 cfun
->machine
->sdmode_stack_slot
=
10877 eliminate_regs (cfun
->machine
->sdmode_stack_slot
, VOIDmode
, NULL_RTX
);
10880 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
10881 p1:SD) if p1 is not of floating point class and p0 is spilled as
10882 we can have no analogous movsd_store for this. */
10883 if (lra_in_progress
&& mode
== DDmode
10884 && REG_P (operands
[0]) && REGNO (operands
[0]) >= FIRST_PSEUDO_REGISTER
10885 && reg_preferred_class (REGNO (operands
[0])) == NO_REGS
10886 && GET_CODE (operands
[1]) == SUBREG
&& REG_P (SUBREG_REG (operands
[1]))
10887 && GET_MODE (SUBREG_REG (operands
[1])) == SDmode
)
10890 int regno
= REGNO (SUBREG_REG (operands
[1]));
10892 if (regno
>= FIRST_PSEUDO_REGISTER
)
10894 cl
= reg_preferred_class (regno
);
10895 regno
= cl
== NO_REGS
? -1 : ira_class_hard_regs
[cl
][1];
10897 if (regno
>= 0 && ! FP_REGNO_P (regno
))
10900 operands
[0] = gen_lowpart_SUBREG (SDmode
, operands
[0]);
10901 operands
[1] = SUBREG_REG (operands
[1]);
10904 if (lra_in_progress
10906 && REG_P (operands
[0]) && REGNO (operands
[0]) >= FIRST_PSEUDO_REGISTER
10907 && reg_preferred_class (REGNO (operands
[0])) == NO_REGS
10908 && (REG_P (operands
[1])
10909 || (GET_CODE (operands
[1]) == SUBREG
10910 && REG_P (SUBREG_REG (operands
[1])))))
10912 int regno
= REGNO (GET_CODE (operands
[1]) == SUBREG
10913 ? SUBREG_REG (operands
[1]) : operands
[1]);
10916 if (regno
>= FIRST_PSEUDO_REGISTER
)
10918 cl
= reg_preferred_class (regno
);
10919 gcc_assert (cl
!= NO_REGS
);
10920 regno
= ira_class_hard_regs
[cl
][0];
10922 if (FP_REGNO_P (regno
))
10924 if (GET_MODE (operands
[0]) != DDmode
)
10925 operands
[0] = gen_rtx_SUBREG (DDmode
, operands
[0], 0);
10926 emit_insn (gen_movsd_store (operands
[0], operands
[1]));
10928 else if (INT_REGNO_P (regno
))
10929 emit_insn (gen_movsd_hardfloat (operands
[0], operands
[1]));
10934 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
10935 p:DD)) if p0 is not of floating point class and p1 is spilled as
10936 we can have no analogous movsd_load for this. */
10937 if (lra_in_progress
&& mode
== DDmode
10938 && GET_CODE (operands
[0]) == SUBREG
&& REG_P (SUBREG_REG (operands
[0]))
10939 && GET_MODE (SUBREG_REG (operands
[0])) == SDmode
10940 && REG_P (operands
[1]) && REGNO (operands
[1]) >= FIRST_PSEUDO_REGISTER
10941 && reg_preferred_class (REGNO (operands
[1])) == NO_REGS
)
10944 int regno
= REGNO (SUBREG_REG (operands
[0]));
10946 if (regno
>= FIRST_PSEUDO_REGISTER
)
10948 cl
= reg_preferred_class (regno
);
10949 regno
= cl
== NO_REGS
? -1 : ira_class_hard_regs
[cl
][0];
10951 if (regno
>= 0 && ! FP_REGNO_P (regno
))
10954 operands
[0] = SUBREG_REG (operands
[0]);
10955 operands
[1] = gen_lowpart_SUBREG (SDmode
, operands
[1]);
10958 if (lra_in_progress
10960 && (REG_P (operands
[0])
10961 || (GET_CODE (operands
[0]) == SUBREG
10962 && REG_P (SUBREG_REG (operands
[0]))))
10963 && REG_P (operands
[1]) && REGNO (operands
[1]) >= FIRST_PSEUDO_REGISTER
10964 && reg_preferred_class (REGNO (operands
[1])) == NO_REGS
)
10966 int regno
= REGNO (GET_CODE (operands
[0]) == SUBREG
10967 ? SUBREG_REG (operands
[0]) : operands
[0]);
10970 if (regno
>= FIRST_PSEUDO_REGISTER
)
10972 cl
= reg_preferred_class (regno
);
10973 gcc_assert (cl
!= NO_REGS
);
10974 regno
= ira_class_hard_regs
[cl
][0];
10976 if (FP_REGNO_P (regno
))
10978 if (GET_MODE (operands
[1]) != DDmode
)
10979 operands
[1] = gen_rtx_SUBREG (DDmode
, operands
[1], 0);
10980 emit_insn (gen_movsd_load (operands
[0], operands
[1]));
10982 else if (INT_REGNO_P (regno
))
10983 emit_insn (gen_movsd_hardfloat (operands
[0], operands
[1]));
10989 if (reload_in_progress
10991 && cfun
->machine
->sdmode_stack_slot
!= NULL_RTX
10992 && MEM_P (operands
[0])
10993 && rtx_equal_p (operands
[0], cfun
->machine
->sdmode_stack_slot
)
10994 && REG_P (operands
[1]))
10996 if (FP_REGNO_P (REGNO (operands
[1])))
10998 rtx mem
= adjust_address_nv (operands
[0], DDmode
, 0);
10999 mem
= eliminate_regs (mem
, VOIDmode
, NULL_RTX
);
11000 emit_insn (gen_movsd_store (mem
, operands
[1]));
11002 else if (INT_REGNO_P (REGNO (operands
[1])))
11004 rtx mem
= operands
[0];
11005 if (BYTES_BIG_ENDIAN
)
11006 mem
= adjust_address_nv (mem
, mode
, 4);
11007 mem
= eliminate_regs (mem
, VOIDmode
, NULL_RTX
);
11008 emit_insn (gen_movsd_hardfloat (mem
, operands
[1]));
11014 if (reload_in_progress
11016 && REG_P (operands
[0])
11017 && MEM_P (operands
[1])
11018 && cfun
->machine
->sdmode_stack_slot
!= NULL_RTX
11019 && rtx_equal_p (operands
[1], cfun
->machine
->sdmode_stack_slot
))
11021 if (FP_REGNO_P (REGNO (operands
[0])))
11023 rtx mem
= adjust_address_nv (operands
[1], DDmode
, 0);
11024 mem
= eliminate_regs (mem
, VOIDmode
, NULL_RTX
);
11025 emit_insn (gen_movsd_load (operands
[0], mem
));
11027 else if (INT_REGNO_P (REGNO (operands
[0])))
11029 rtx mem
= operands
[1];
11030 if (BYTES_BIG_ENDIAN
)
11031 mem
= adjust_address_nv (mem
, mode
, 4);
11032 mem
= eliminate_regs (mem
, VOIDmode
, NULL_RTX
);
11033 emit_insn (gen_movsd_hardfloat (operands
[0], mem
));
11040 /* FIXME: In the long term, this switch statement should go away
11041 and be replaced by a sequence of tests based on things like
11047 if (CONSTANT_P (operands
[1])
11048 && GET_CODE (operands
[1]) != CONST_INT
)
11049 operands
[1] = force_const_mem (mode
, operands
[1]);
11056 if (FLOAT128_2REG_P (mode
))
11057 rs6000_eliminate_indexed_memrefs (operands
);
11064 if (CONSTANT_P (operands
[1])
11065 && ! easy_fp_constant (operands
[1], mode
))
11066 operands
[1] = force_const_mem (mode
, operands
[1]);
11080 if (CONSTANT_P (operands
[1])
11081 && !easy_vector_constant (operands
[1], mode
))
11082 operands
[1] = force_const_mem (mode
, operands
[1]);
11087 /* Use default pattern for address of ELF small data */
11090 && DEFAULT_ABI
== ABI_V4
11091 && (GET_CODE (operands
[1]) == SYMBOL_REF
11092 || GET_CODE (operands
[1]) == CONST
)
11093 && small_data_operand (operands
[1], mode
))
11095 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11099 if (DEFAULT_ABI
== ABI_V4
11100 && mode
== Pmode
&& mode
== SImode
11101 && flag_pic
== 1 && got_operand (operands
[1], mode
))
11103 emit_insn (gen_movsi_got (operands
[0], operands
[1]));
11107 if ((TARGET_ELF
|| DEFAULT_ABI
== ABI_DARWIN
)
11111 && CONSTANT_P (operands
[1])
11112 && GET_CODE (operands
[1]) != HIGH
11113 && GET_CODE (operands
[1]) != CONST_INT
)
11115 rtx target
= (!can_create_pseudo_p ()
11117 : gen_reg_rtx (mode
));
11119 /* If this is a function address on -mcall-aixdesc,
11120 convert it to the address of the descriptor. */
11121 if (DEFAULT_ABI
== ABI_AIX
11122 && GET_CODE (operands
[1]) == SYMBOL_REF
11123 && XSTR (operands
[1], 0)[0] == '.')
11125 const char *name
= XSTR (operands
[1], 0);
11127 while (*name
== '.')
11129 new_ref
= gen_rtx_SYMBOL_REF (Pmode
, name
);
11130 CONSTANT_POOL_ADDRESS_P (new_ref
)
11131 = CONSTANT_POOL_ADDRESS_P (operands
[1]);
11132 SYMBOL_REF_FLAGS (new_ref
) = SYMBOL_REF_FLAGS (operands
[1]);
11133 SYMBOL_REF_USED (new_ref
) = SYMBOL_REF_USED (operands
[1]);
11134 SYMBOL_REF_DATA (new_ref
) = SYMBOL_REF_DATA (operands
[1]);
11135 operands
[1] = new_ref
;
11138 if (DEFAULT_ABI
== ABI_DARWIN
)
11141 if (MACHO_DYNAMIC_NO_PIC_P
)
11143 /* Take care of any required data indirection. */
11144 operands
[1] = rs6000_machopic_legitimize_pic_address (
11145 operands
[1], mode
, operands
[0]);
11146 if (operands
[0] != operands
[1])
11147 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11151 emit_insn (gen_macho_high (target
, operands
[1]));
11152 emit_insn (gen_macho_low (operands
[0], target
, operands
[1]));
11156 emit_insn (gen_elf_high (target
, operands
[1]));
11157 emit_insn (gen_elf_low (operands
[0], target
, operands
[1]));
11161 /* If this is a SYMBOL_REF that refers to a constant pool entry,
11162 and we have put it in the TOC, we just need to make a TOC-relative
11163 reference to it. */
11165 && GET_CODE (operands
[1]) == SYMBOL_REF
11166 && use_toc_relative_ref (operands
[1], mode
))
11167 operands
[1] = create_TOC_reference (operands
[1], operands
[0]);
11168 else if (mode
== Pmode
11169 && CONSTANT_P (operands
[1])
11170 && GET_CODE (operands
[1]) != HIGH
11171 && ((GET_CODE (operands
[1]) != CONST_INT
11172 && ! easy_fp_constant (operands
[1], mode
))
11173 || (GET_CODE (operands
[1]) == CONST_INT
11174 && (num_insns_constant (operands
[1], mode
)
11175 > (TARGET_CMODEL
!= CMODEL_SMALL
? 3 : 2)))
11176 || (GET_CODE (operands
[0]) == REG
11177 && FP_REGNO_P (REGNO (operands
[0]))))
11178 && !toc_relative_expr_p (operands
[1], false)
11179 && (TARGET_CMODEL
== CMODEL_SMALL
11180 || can_create_pseudo_p ()
11181 || (REG_P (operands
[0])
11182 && INT_REG_OK_FOR_BASE_P (operands
[0], true))))
11186 /* Darwin uses a special PIC legitimizer. */
11187 if (DEFAULT_ABI
== ABI_DARWIN
&& MACHOPIC_INDIRECT
)
11190 rs6000_machopic_legitimize_pic_address (operands
[1], mode
,
11192 if (operands
[0] != operands
[1])
11193 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11198 /* If we are to limit the number of things we put in the TOC and
11199 this is a symbol plus a constant we can add in one insn,
11200 just put the symbol in the TOC and add the constant. Don't do
11201 this if reload is in progress. */
11202 if (GET_CODE (operands
[1]) == CONST
11203 && TARGET_NO_SUM_IN_TOC
&& ! reload_in_progress
11204 && GET_CODE (XEXP (operands
[1], 0)) == PLUS
11205 && add_operand (XEXP (XEXP (operands
[1], 0), 1), mode
)
11206 && (GET_CODE (XEXP (XEXP (operands
[1], 0), 0)) == LABEL_REF
11207 || GET_CODE (XEXP (XEXP (operands
[1], 0), 0)) == SYMBOL_REF
)
11208 && ! side_effects_p (operands
[0]))
11211 force_const_mem (mode
, XEXP (XEXP (operands
[1], 0), 0));
11212 rtx other
= XEXP (XEXP (operands
[1], 0), 1);
11214 sym
= force_reg (mode
, sym
);
11215 emit_insn (gen_add3_insn (operands
[0], sym
, other
));
11219 operands
[1] = force_const_mem (mode
, operands
[1]);
11222 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
11223 && use_toc_relative_ref (XEXP (operands
[1], 0), mode
))
11225 rtx tocref
= create_TOC_reference (XEXP (operands
[1], 0),
11227 operands
[1] = gen_const_mem (mode
, tocref
);
11228 set_mem_alias_set (operands
[1], get_TOC_alias_set ());
11234 if (!VECTOR_MEM_VSX_P (TImode
))
11235 rs6000_eliminate_indexed_memrefs (operands
);
11239 rs6000_eliminate_indexed_memrefs (operands
);
11243 fatal_insn ("bad move", gen_rtx_SET (dest
, source
));
11246 /* Above, we may have called force_const_mem which may have returned
11247 an invalid address. If we can, fix this up; otherwise, reload will
11248 have to deal with it. */
11249 if (GET_CODE (operands
[1]) == MEM
&& ! reload_in_progress
)
11250 operands
[1] = validize_mem (operands
[1]);
11253 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11256 /* Return true if a structure, union or array containing FIELD should be
11257 accessed using `BLKMODE'.
11259 For the SPE, simd types are V2SI, and gcc can be tempted to put the
11260 entire thing in a DI and use subregs to access the internals.
11261 store_bit_field() will force (subreg:DI (reg:V2SI x))'s to the
11262 back-end. Because a single GPR can hold a V2SI, but not a DI, the
11263 best thing to do is set structs to BLKmode and avoid Severe Tire
11266 On e500 v2, DF and DI modes suffer from the same anomaly. DF can
11267 fit into 1, whereas DI still needs two. */
11270 rs6000_member_type_forces_blk (const_tree field
, machine_mode mode
)
11272 return ((TARGET_SPE
&& TREE_CODE (TREE_TYPE (field
)) == VECTOR_TYPE
)
11273 || (TARGET_E500_DOUBLE
&& mode
== DFmode
));
11276 /* Nonzero if we can use a floating-point register to pass this arg. */
11277 #define USE_FP_FOR_ARG_P(CUM,MODE) \
11278 (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \
11279 && (CUM)->fregno <= FP_ARG_MAX_REG \
11280 && TARGET_HARD_FLOAT && TARGET_FPRS)
11282 /* Nonzero if we can use an AltiVec register to pass this arg. */
11283 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
11284 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
11285 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
11286 && TARGET_ALTIVEC_ABI \
11289 /* Walk down the type tree of TYPE counting consecutive base elements.
11290 If *MODEP is VOIDmode, then set it to the first valid floating point
11291 or vector type. If a non-floating point or vector type is found, or
11292 if a floating point or vector type that doesn't match a non-VOIDmode
11293 *MODEP is found, then return -1, otherwise return the count in the
11297 rs6000_aggregate_candidate (const_tree type
, machine_mode
*modep
)
11300 HOST_WIDE_INT size
;
11302 switch (TREE_CODE (type
))
11305 mode
= TYPE_MODE (type
);
11306 if (!SCALAR_FLOAT_MODE_P (mode
))
11309 if (*modep
== VOIDmode
)
11312 if (*modep
== mode
)
11318 mode
= TYPE_MODE (TREE_TYPE (type
));
11319 if (!SCALAR_FLOAT_MODE_P (mode
))
11322 if (*modep
== VOIDmode
)
11325 if (*modep
== mode
)
11331 if (!TARGET_ALTIVEC_ABI
|| !TARGET_ALTIVEC
)
11334 /* Use V4SImode as representative of all 128-bit vector types. */
11335 size
= int_size_in_bytes (type
);
11345 if (*modep
== VOIDmode
)
11348 /* Vector modes are considered to be opaque: two vectors are
11349 equivalent for the purposes of being homogeneous aggregates
11350 if they are the same size. */
11351 if (*modep
== mode
)
11359 tree index
= TYPE_DOMAIN (type
);
11361 /* Can't handle incomplete types nor sizes that are not
11363 if (!COMPLETE_TYPE_P (type
)
11364 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
11367 count
= rs6000_aggregate_candidate (TREE_TYPE (type
), modep
);
11370 || !TYPE_MAX_VALUE (index
)
11371 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
11372 || !TYPE_MIN_VALUE (index
)
11373 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
11377 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
11378 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
11380 /* There must be no padding. */
11381 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
11393 /* Can't handle incomplete types nor sizes that are not
11395 if (!COMPLETE_TYPE_P (type
)
11396 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
11399 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
11401 if (TREE_CODE (field
) != FIELD_DECL
)
11404 sub_count
= rs6000_aggregate_candidate (TREE_TYPE (field
), modep
);
11407 count
+= sub_count
;
11410 /* There must be no padding. */
11411 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
11418 case QUAL_UNION_TYPE
:
11420 /* These aren't very interesting except in a degenerate case. */
11425 /* Can't handle incomplete types nor sizes that are not
11427 if (!COMPLETE_TYPE_P (type
)
11428 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
11431 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
11433 if (TREE_CODE (field
) != FIELD_DECL
)
11436 sub_count
= rs6000_aggregate_candidate (TREE_TYPE (field
), modep
);
11439 count
= count
> sub_count
? count
: sub_count
;
11442 /* There must be no padding. */
11443 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
11456 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
11457 float or vector aggregate that shall be passed in FP/vector registers
11458 according to the ELFv2 ABI, return the homogeneous element mode in
11459 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
11461 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
11464 rs6000_discover_homogeneous_aggregate (machine_mode mode
, const_tree type
,
11465 machine_mode
*elt_mode
,
11468 /* Note that we do not accept complex types at the top level as
11469 homogeneous aggregates; these types are handled via the
11470 targetm.calls.split_complex_arg mechanism. Complex types
11471 can be elements of homogeneous aggregates, however. */
11472 if (DEFAULT_ABI
== ABI_ELFv2
&& type
&& AGGREGATE_TYPE_P (type
))
11474 machine_mode field_mode
= VOIDmode
;
11475 int field_count
= rs6000_aggregate_candidate (type
, &field_mode
);
11477 if (field_count
> 0)
11479 int n_regs
= (SCALAR_FLOAT_MODE_P (field_mode
) ?
11480 (GET_MODE_SIZE (field_mode
) + 7) >> 3 : 1);
11482 /* The ELFv2 ABI allows homogeneous aggregates to occupy
11483 up to AGGR_ARG_NUM_REG registers. */
11484 if (field_count
* n_regs
<= AGGR_ARG_NUM_REG
)
11487 *elt_mode
= field_mode
;
11489 *n_elts
= field_count
;
11502 /* Return a nonzero value to say to return the function value in
11503 memory, just as large structures are always returned. TYPE will be
11504 the data type of the value, and FNTYPE will be the type of the
11505 function doing the returning, or @code{NULL} for libcalls.
11507 The AIX ABI for the RS/6000 specifies that all structures are
11508 returned in memory. The Darwin ABI does the same.
11510 For the Darwin 64 Bit ABI, a function result can be returned in
11511 registers or in memory, depending on the size of the return data
11512 type. If it is returned in registers, the value occupies the same
11513 registers as it would if it were the first and only function
11514 argument. Otherwise, the function places its result in memory at
11515 the location pointed to by GPR3.
11517 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
11518 but a draft put them in memory, and GCC used to implement the draft
11519 instead of the final standard. Therefore, aix_struct_return
11520 controls this instead of DEFAULT_ABI; V.4 targets needing backward
11521 compatibility can change DRAFT_V4_STRUCT_RET to override the
11522 default, and -m switches get the final word. See
11523 rs6000_option_override_internal for more details.
11525 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
11526 long double support is enabled. These values are returned in memory.
11528 int_size_in_bytes returns -1 for variable size objects, which go in
11529 memory always. The cast to unsigned makes -1 > 8. */
11532 rs6000_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
11534 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
11536 && rs6000_darwin64_abi
11537 && TREE_CODE (type
) == RECORD_TYPE
11538 && int_size_in_bytes (type
) > 0)
11540 CUMULATIVE_ARGS valcum
;
11544 valcum
.fregno
= FP_ARG_MIN_REG
;
11545 valcum
.vregno
= ALTIVEC_ARG_MIN_REG
;
11546 /* Do a trial code generation as if this were going to be passed
11547 as an argument; if any part goes in memory, we return NULL. */
11548 valret
= rs6000_darwin64_record_arg (&valcum
, type
, true, true);
11551 /* Otherwise fall through to more conventional ABI rules. */
11554 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
11555 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type
), type
,
11559 /* The ELFv2 ABI returns aggregates up to 16B in registers */
11560 if (DEFAULT_ABI
== ABI_ELFv2
&& AGGREGATE_TYPE_P (type
)
11561 && (unsigned HOST_WIDE_INT
) int_size_in_bytes (type
) <= 16)
11564 if (AGGREGATE_TYPE_P (type
)
11565 && (aix_struct_return
11566 || (unsigned HOST_WIDE_INT
) int_size_in_bytes (type
) > 8))
11569 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
11570 modes only exist for GCC vector types if -maltivec. */
11571 if (TARGET_32BIT
&& !TARGET_ALTIVEC_ABI
11572 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type
)))
11575 /* Return synthetic vectors in memory. */
11576 if (TREE_CODE (type
) == VECTOR_TYPE
11577 && int_size_in_bytes (type
) > (TARGET_ALTIVEC_ABI
? 16 : 8))
11579 static bool warned_for_return_big_vectors
= false;
11580 if (!warned_for_return_big_vectors
)
11582 warning (OPT_Wpsabi
, "GCC vector returned by reference: "
11583 "non-standard ABI extension with no compatibility guarantee");
11584 warned_for_return_big_vectors
= true;
11589 if (DEFAULT_ABI
== ABI_V4
&& TARGET_IEEEQUAD
11590 && FLOAT128_IEEE_P (TYPE_MODE (type
)))
11596 /* Specify whether values returned in registers should be at the most
11597 significant end of a register. We want aggregates returned by
11598 value to match the way aggregates are passed to functions. */
11601 rs6000_return_in_msb (const_tree valtype
)
11603 return (DEFAULT_ABI
== ABI_ELFv2
11604 && BYTES_BIG_ENDIAN
11605 && AGGREGATE_TYPE_P (valtype
)
11606 && FUNCTION_ARG_PADDING (TYPE_MODE (valtype
), valtype
) == upward
);
11609 #ifdef HAVE_AS_GNU_ATTRIBUTE
11610 /* Return TRUE if a call to function FNDECL may be one that
11611 potentially affects the function calling ABI of the object file. */
11614 call_ABI_of_interest (tree fndecl
)
11616 if (rs6000_gnu_attr
&& symtab
->state
== EXPANSION
)
11618 struct cgraph_node
*c_node
;
11620 /* Libcalls are always interesting. */
11621 if (fndecl
== NULL_TREE
)
11624 /* Any call to an external function is interesting. */
11625 if (DECL_EXTERNAL (fndecl
))
11628 /* Interesting functions that we are emitting in this object file. */
11629 c_node
= cgraph_node::get (fndecl
);
11630 c_node
= c_node
->ultimate_alias_target ();
11631 return !c_node
->only_called_directly_p ();
11637 /* Initialize a variable CUM of type CUMULATIVE_ARGS
11638 for a call to a function whose data type is FNTYPE.
11639 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
11641 For incoming args we set the number of arguments in the prototype large
11642 so we never return a PARALLEL. */
11645 init_cumulative_args (CUMULATIVE_ARGS
*cum
, tree fntype
,
11646 rtx libname ATTRIBUTE_UNUSED
, int incoming
,
11647 int libcall
, int n_named_args
,
11648 tree fndecl ATTRIBUTE_UNUSED
,
11649 machine_mode return_mode ATTRIBUTE_UNUSED
)
11651 static CUMULATIVE_ARGS zero_cumulative
;
11653 *cum
= zero_cumulative
;
11655 cum
->fregno
= FP_ARG_MIN_REG
;
11656 cum
->vregno
= ALTIVEC_ARG_MIN_REG
;
11657 cum
->prototype
= (fntype
&& prototype_p (fntype
));
11658 cum
->call_cookie
= ((DEFAULT_ABI
== ABI_V4
&& libcall
)
11659 ? CALL_LIBCALL
: CALL_NORMAL
);
11660 cum
->sysv_gregno
= GP_ARG_MIN_REG
;
11661 cum
->stdarg
= stdarg_p (fntype
);
11662 cum
->libcall
= libcall
;
11664 cum
->nargs_prototype
= 0;
11665 if (incoming
|| cum
->prototype
)
11666 cum
->nargs_prototype
= n_named_args
;
11668 /* Check for a longcall attribute. */
11669 if ((!fntype
&& rs6000_default_long_calls
)
11671 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype
))
11672 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype
))))
11673 cum
->call_cookie
|= CALL_LONG
;
11675 if (TARGET_DEBUG_ARG
)
11677 fprintf (stderr
, "\ninit_cumulative_args:");
11680 tree ret_type
= TREE_TYPE (fntype
);
11681 fprintf (stderr
, " ret code = %s,",
11682 get_tree_code_name (TREE_CODE (ret_type
)));
11685 if (cum
->call_cookie
& CALL_LONG
)
11686 fprintf (stderr
, " longcall,");
11688 fprintf (stderr
, " proto = %d, nargs = %d\n",
11689 cum
->prototype
, cum
->nargs_prototype
);
11692 #ifdef HAVE_AS_GNU_ATTRIBUTE
11693 if (TARGET_ELF
&& (TARGET_64BIT
|| DEFAULT_ABI
== ABI_V4
))
11695 cum
->escapes
= call_ABI_of_interest (fndecl
);
11702 return_type
= TREE_TYPE (fntype
);
11703 return_mode
= TYPE_MODE (return_type
);
11706 return_type
= lang_hooks
.types
.type_for_mode (return_mode
, 0);
11708 if (return_type
!= NULL
)
11710 if (TREE_CODE (return_type
) == RECORD_TYPE
11711 && TYPE_TRANSPARENT_AGGR (return_type
))
11713 return_type
= TREE_TYPE (first_field (return_type
));
11714 return_mode
= TYPE_MODE (return_type
);
11716 if (AGGREGATE_TYPE_P (return_type
)
11717 && ((unsigned HOST_WIDE_INT
) int_size_in_bytes (return_type
)
11719 rs6000_returns_struct
= true;
11721 if (SCALAR_FLOAT_MODE_P (return_mode
))
11723 rs6000_passes_float
= true;
11724 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
|| TARGET_64BIT
)
11725 && (FLOAT128_IBM_P (return_mode
)
11726 || FLOAT128_IEEE_P (return_mode
)
11727 || (return_type
!= NULL
11728 && (TYPE_MAIN_VARIANT (return_type
)
11729 == long_double_type_node
))))
11730 rs6000_passes_long_double
= true;
11732 if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode
)
11733 || SPE_VECTOR_MODE (return_mode
))
11734 rs6000_passes_vector
= true;
11741 && TARGET_ALTIVEC_ABI
11742 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype
))))
11744 error ("cannot return value in vector register because"
11745 " altivec instructions are disabled, use -maltivec"
11746 " to enable them");
11750 /* The mode the ABI uses for a word. This is not the same as word_mode
11751 for -m32 -mpowerpc64. This is used to implement various target hooks. */
11753 static machine_mode
11754 rs6000_abi_word_mode (void)
11756 return TARGET_32BIT
? SImode
: DImode
;
11759 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
11761 rs6000_offload_options (void)
11764 return xstrdup ("-foffload-abi=lp64");
11766 return xstrdup ("-foffload-abi=ilp32");
11769 /* On rs6000, function arguments are promoted, as are function return
11772 static machine_mode
11773 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
11775 int *punsignedp ATTRIBUTE_UNUSED
,
11778 PROMOTE_MODE (mode
, *punsignedp
, type
);
11783 /* Return true if TYPE must be passed on the stack and not in registers. */
11786 rs6000_must_pass_in_stack (machine_mode mode
, const_tree type
)
11788 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
|| TARGET_64BIT
)
11789 return must_pass_in_stack_var_size (mode
, type
);
11791 return must_pass_in_stack_var_size_or_pad (mode
, type
);
11795 is_complex_IBM_long_double (machine_mode mode
)
11797 return mode
== ICmode
|| (!TARGET_IEEEQUAD
&& mode
== TCmode
);
11800 /* Whether ABI_V4 passes MODE args to a function in floating point
11804 abi_v4_pass_in_fpr (machine_mode mode
)
11806 if (!TARGET_FPRS
|| !TARGET_HARD_FLOAT
)
11808 if (TARGET_SINGLE_FLOAT
&& mode
== SFmode
)
11810 if (TARGET_DOUBLE_FLOAT
&& mode
== DFmode
)
11812 /* ABI_V4 passes complex IBM long double in 8 gprs.
11813 Stupid, but we can't change the ABI now. */
11814 if (is_complex_IBM_long_double (mode
))
11816 if (FLOAT128_2REG_P (mode
))
11818 if (DECIMAL_FLOAT_MODE_P (mode
))
11823 /* If defined, a C expression which determines whether, and in which
11824 direction, to pad out an argument with extra space. The value
11825 should be of type `enum direction': either `upward' to pad above
11826 the argument, `downward' to pad below, or `none' to inhibit
11829 For the AIX ABI structs are always stored left shifted in their
11833 function_arg_padding (machine_mode mode
, const_tree type
)
11835 #ifndef AGGREGATE_PADDING_FIXED
11836 #define AGGREGATE_PADDING_FIXED 0
11838 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
11839 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
11842 if (!AGGREGATE_PADDING_FIXED
)
11844 /* GCC used to pass structures of the same size as integer types as
11845 if they were in fact integers, ignoring FUNCTION_ARG_PADDING.
11846 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
11847 passed padded downward, except that -mstrict-align further
11848 muddied the water in that multi-component structures of 2 and 4
11849 bytes in size were passed padded upward.
11851 The following arranges for best compatibility with previous
11852 versions of gcc, but removes the -mstrict-align dependency. */
11853 if (BYTES_BIG_ENDIAN
)
11855 HOST_WIDE_INT size
= 0;
11857 if (mode
== BLKmode
)
11859 if (type
&& TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
)
11860 size
= int_size_in_bytes (type
);
11863 size
= GET_MODE_SIZE (mode
);
11865 if (size
== 1 || size
== 2 || size
== 4)
11871 if (AGGREGATES_PAD_UPWARD_ALWAYS
)
11873 if (type
!= 0 && AGGREGATE_TYPE_P (type
))
11877 /* Fall back to the default. */
11878 return DEFAULT_FUNCTION_ARG_PADDING (mode
, type
);
11881 /* If defined, a C expression that gives the alignment boundary, in bits,
11882 of an argument with the specified mode and type. If it is not defined,
11883 PARM_BOUNDARY is used for all arguments.
11885 V.4 wants long longs and doubles to be double word aligned. Just
11886 testing the mode size is a boneheaded way to do this as it means
11887 that other types such as complex int are also double word aligned.
11888 However, we're stuck with this because changing the ABI might break
11889 existing library interfaces.
11891 Doubleword align SPE vectors.
11892 Quadword align Altivec/VSX vectors.
11893 Quadword align large synthetic vector types. */
11895 static unsigned int
11896 rs6000_function_arg_boundary (machine_mode mode
, const_tree type
)
11898 machine_mode elt_mode
;
11901 rs6000_discover_homogeneous_aggregate (mode
, type
, &elt_mode
, &n_elts
);
11903 if (DEFAULT_ABI
== ABI_V4
11904 && (GET_MODE_SIZE (mode
) == 8
11905 || (TARGET_HARD_FLOAT
11907 && !is_complex_IBM_long_double (mode
)
11908 && FLOAT128_2REG_P (mode
))))
11910 else if (FLOAT128_VECTOR_P (mode
))
11912 else if (SPE_VECTOR_MODE (mode
)
11913 || (type
&& TREE_CODE (type
) == VECTOR_TYPE
11914 && int_size_in_bytes (type
) >= 8
11915 && int_size_in_bytes (type
) < 16))
11917 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode
)
11918 || (type
&& TREE_CODE (type
) == VECTOR_TYPE
11919 && int_size_in_bytes (type
) >= 16))
11922 /* Aggregate types that need > 8 byte alignment are quadword-aligned
11923 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
11924 -mcompat-align-parm is used. */
11925 if (((DEFAULT_ABI
== ABI_AIX
&& !rs6000_compat_align_parm
)
11926 || DEFAULT_ABI
== ABI_ELFv2
)
11927 && type
&& TYPE_ALIGN (type
) > 64)
11929 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
11930 or homogeneous float/vector aggregates here. We already handled
11931 vector aggregates above, but still need to check for float here. */
11932 bool aggregate_p
= (AGGREGATE_TYPE_P (type
)
11933 && !SCALAR_FLOAT_MODE_P (elt_mode
));
11935 /* We used to check for BLKmode instead of the above aggregate type
11936 check. Warn when this results in any difference to the ABI. */
11937 if (aggregate_p
!= (mode
== BLKmode
))
11939 static bool warned
;
11940 if (!warned
&& warn_psabi
)
11943 inform (input_location
,
11944 "the ABI of passing aggregates with %d-byte alignment"
11945 " has changed in GCC 5",
11946 (int) TYPE_ALIGN (type
) / BITS_PER_UNIT
);
11954 /* Similar for the Darwin64 ABI. Note that for historical reasons we
11955 implement the "aggregate type" check as a BLKmode check here; this
11956 means certain aggregate types are in fact not aligned. */
11957 if (TARGET_MACHO
&& rs6000_darwin64_abi
11959 && type
&& TYPE_ALIGN (type
) > 64)
11962 return PARM_BOUNDARY
;
11965 /* The offset in words to the start of the parameter save area. */
11967 static unsigned int
11968 rs6000_parm_offset (void)
11970 return (DEFAULT_ABI
== ABI_V4
? 2
11971 : DEFAULT_ABI
== ABI_ELFv2
? 4
11975 /* For a function parm of MODE and TYPE, return the starting word in
11976 the parameter area. NWORDS of the parameter area are already used. */
11978 static unsigned int
11979 rs6000_parm_start (machine_mode mode
, const_tree type
,
11980 unsigned int nwords
)
11982 unsigned int align
;
11984 align
= rs6000_function_arg_boundary (mode
, type
) / PARM_BOUNDARY
- 1;
11985 return nwords
+ (-(rs6000_parm_offset () + nwords
) & align
);
11988 /* Compute the size (in words) of a function argument. */
11990 static unsigned long
11991 rs6000_arg_size (machine_mode mode
, const_tree type
)
11993 unsigned long size
;
11995 if (mode
!= BLKmode
)
11996 size
= GET_MODE_SIZE (mode
);
11998 size
= int_size_in_bytes (type
);
12001 return (size
+ 3) >> 2;
12003 return (size
+ 7) >> 3;
12006 /* Use this to flush pending int fields. */
12009 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS
*cum
,
12010 HOST_WIDE_INT bitpos
, int final
)
12012 unsigned int startbit
, endbit
;
12013 int intregs
, intoffset
;
12016 /* Handle the situations where a float is taking up the first half
12017 of the GPR, and the other half is empty (typically due to
12018 alignment restrictions). We can detect this by a 8-byte-aligned
12019 int field, or by seeing that this is the final flush for this
12020 argument. Count the word and continue on. */
12021 if (cum
->floats_in_gpr
== 1
12022 && (cum
->intoffset
% 64 == 0
12023 || (cum
->intoffset
== -1 && final
)))
12026 cum
->floats_in_gpr
= 0;
12029 if (cum
->intoffset
== -1)
12032 intoffset
= cum
->intoffset
;
12033 cum
->intoffset
= -1;
12034 cum
->floats_in_gpr
= 0;
12036 if (intoffset
% BITS_PER_WORD
!= 0)
12038 mode
= mode_for_size (BITS_PER_WORD
- intoffset
% BITS_PER_WORD
,
12040 if (mode
== BLKmode
)
12042 /* We couldn't find an appropriate mode, which happens,
12043 e.g., in packed structs when there are 3 bytes to load.
12044 Back intoffset back to the beginning of the word in this
12046 intoffset
= ROUND_DOWN (intoffset
, BITS_PER_WORD
);
12050 startbit
= ROUND_DOWN (intoffset
, BITS_PER_WORD
);
12051 endbit
= ROUND_UP (bitpos
, BITS_PER_WORD
);
12052 intregs
= (endbit
- startbit
) / BITS_PER_WORD
;
12053 cum
->words
+= intregs
;
12054 /* words should be unsigned. */
12055 if ((unsigned)cum
->words
< (endbit
/BITS_PER_WORD
))
12057 int pad
= (endbit
/BITS_PER_WORD
) - cum
->words
;
12062 /* The darwin64 ABI calls for us to recurse down through structs,
12063 looking for elements passed in registers. Unfortunately, we have
12064 to track int register count here also because of misalignments
12065 in powerpc alignment mode. */
12068 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS
*cum
,
12070 HOST_WIDE_INT startbitpos
)
12074 for (f
= TYPE_FIELDS (type
); f
; f
= DECL_CHAIN (f
))
12075 if (TREE_CODE (f
) == FIELD_DECL
)
12077 HOST_WIDE_INT bitpos
= startbitpos
;
12078 tree ftype
= TREE_TYPE (f
);
12080 if (ftype
== error_mark_node
)
12082 mode
= TYPE_MODE (ftype
);
12084 if (DECL_SIZE (f
) != 0
12085 && tree_fits_uhwi_p (bit_position (f
)))
12086 bitpos
+= int_bit_position (f
);
12088 /* ??? FIXME: else assume zero offset. */
12090 if (TREE_CODE (ftype
) == RECORD_TYPE
)
12091 rs6000_darwin64_record_arg_advance_recurse (cum
, ftype
, bitpos
);
12092 else if (USE_FP_FOR_ARG_P (cum
, mode
))
12094 unsigned n_fpregs
= (GET_MODE_SIZE (mode
) + 7) >> 3;
12095 rs6000_darwin64_record_arg_advance_flush (cum
, bitpos
, 0);
12096 cum
->fregno
+= n_fpregs
;
12097 /* Single-precision floats present a special problem for
12098 us, because they are smaller than an 8-byte GPR, and so
12099 the structure-packing rules combined with the standard
12100 varargs behavior mean that we want to pack float/float
12101 and float/int combinations into a single register's
12102 space. This is complicated by the arg advance flushing,
12103 which works on arbitrarily large groups of int-type
12105 if (mode
== SFmode
)
12107 if (cum
->floats_in_gpr
== 1)
12109 /* Two floats in a word; count the word and reset
12110 the float count. */
12112 cum
->floats_in_gpr
= 0;
12114 else if (bitpos
% 64 == 0)
12116 /* A float at the beginning of an 8-byte word;
12117 count it and put off adjusting cum->words until
12118 we see if a arg advance flush is going to do it
12120 cum
->floats_in_gpr
++;
12124 /* The float is at the end of a word, preceded
12125 by integer fields, so the arg advance flush
12126 just above has already set cum->words and
12127 everything is taken care of. */
12131 cum
->words
+= n_fpregs
;
12133 else if (USE_ALTIVEC_FOR_ARG_P (cum
, mode
, 1))
12135 rs6000_darwin64_record_arg_advance_flush (cum
, bitpos
, 0);
12139 else if (cum
->intoffset
== -1)
12140 cum
->intoffset
= bitpos
;
12144 /* Check for an item that needs to be considered specially under the darwin 64
12145 bit ABI. These are record types where the mode is BLK or the structure is
12146 8 bytes in size. */
12148 rs6000_darwin64_struct_check_p (machine_mode mode
, const_tree type
)
12150 return rs6000_darwin64_abi
12151 && ((mode
== BLKmode
12152 && TREE_CODE (type
) == RECORD_TYPE
12153 && int_size_in_bytes (type
) > 0)
12154 || (type
&& TREE_CODE (type
) == RECORD_TYPE
12155 && int_size_in_bytes (type
) == 8)) ? 1 : 0;
12158 /* Update the data in CUM to advance over an argument
12159 of mode MODE and data type TYPE.
12160 (TYPE is null for libcalls where that information may not be available.)
12162 Note that for args passed by reference, function_arg will be called
12163 with MODE and TYPE set to that of the pointer to the arg, not the arg
12167 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS
*cum
, machine_mode mode
,
12168 const_tree type
, bool named
, int depth
)
12170 machine_mode elt_mode
;
12173 rs6000_discover_homogeneous_aggregate (mode
, type
, &elt_mode
, &n_elts
);
12175 /* Only tick off an argument if we're not recursing. */
12177 cum
->nargs_prototype
--;
12179 #ifdef HAVE_AS_GNU_ATTRIBUTE
12180 if (TARGET_ELF
&& (TARGET_64BIT
|| DEFAULT_ABI
== ABI_V4
)
12183 if (SCALAR_FLOAT_MODE_P (mode
))
12185 rs6000_passes_float
= true;
12186 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
|| TARGET_64BIT
)
12187 && (FLOAT128_IBM_P (mode
)
12188 || FLOAT128_IEEE_P (mode
)
12190 && TYPE_MAIN_VARIANT (type
) == long_double_type_node
)))
12191 rs6000_passes_long_double
= true;
12193 if ((named
&& ALTIVEC_OR_VSX_VECTOR_MODE (mode
))
12194 || (SPE_VECTOR_MODE (mode
)
12196 && cum
->sysv_gregno
<= GP_ARG_MAX_REG
))
12197 rs6000_passes_vector
= true;
12201 if (TARGET_ALTIVEC_ABI
12202 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode
)
12203 || (type
&& TREE_CODE (type
) == VECTOR_TYPE
12204 && int_size_in_bytes (type
) == 16)))
12206 bool stack
= false;
12208 if (USE_ALTIVEC_FOR_ARG_P (cum
, elt_mode
, named
))
12210 cum
->vregno
+= n_elts
;
12212 if (!TARGET_ALTIVEC
)
12213 error ("cannot pass argument in vector register because"
12214 " altivec instructions are disabled, use -maltivec"
12215 " to enable them");
12217 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
12218 even if it is going to be passed in a vector register.
12219 Darwin does the same for variable-argument functions. */
12220 if (((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
12222 || (cum
->stdarg
&& DEFAULT_ABI
!= ABI_V4
))
12232 /* Vector parameters must be 16-byte aligned. In 32-bit
12233 mode this means we need to take into account the offset
12234 to the parameter save area. In 64-bit mode, they just
12235 have to start on an even word, since the parameter save
12236 area is 16-byte aligned. */
12238 align
= -(rs6000_parm_offset () + cum
->words
) & 3;
12240 align
= cum
->words
& 1;
12241 cum
->words
+= align
+ rs6000_arg_size (mode
, type
);
12243 if (TARGET_DEBUG_ARG
)
12245 fprintf (stderr
, "function_adv: words = %2d, align=%d, ",
12246 cum
->words
, align
);
12247 fprintf (stderr
, "nargs = %4d, proto = %d, mode = %4s\n",
12248 cum
->nargs_prototype
, cum
->prototype
,
12249 GET_MODE_NAME (mode
));
12253 else if (TARGET_SPE_ABI
&& TARGET_SPE
&& SPE_VECTOR_MODE (mode
)
12255 && cum
->sysv_gregno
<= GP_ARG_MAX_REG
)
12256 cum
->sysv_gregno
++;
12258 else if (TARGET_MACHO
&& rs6000_darwin64_struct_check_p (mode
, type
))
12260 int size
= int_size_in_bytes (type
);
12261 /* Variable sized types have size == -1 and are
12262 treated as if consisting entirely of ints.
12263 Pad to 16 byte boundary if needed. */
12264 if (TYPE_ALIGN (type
) >= 2 * BITS_PER_WORD
12265 && (cum
->words
% 2) != 0)
12267 /* For varargs, we can just go up by the size of the struct. */
12269 cum
->words
+= (size
+ 7) / 8;
12272 /* It is tempting to say int register count just goes up by
12273 sizeof(type)/8, but this is wrong in a case such as
12274 { int; double; int; } [powerpc alignment]. We have to
12275 grovel through the fields for these too. */
12276 cum
->intoffset
= 0;
12277 cum
->floats_in_gpr
= 0;
12278 rs6000_darwin64_record_arg_advance_recurse (cum
, type
, 0);
12279 rs6000_darwin64_record_arg_advance_flush (cum
,
12280 size
* BITS_PER_UNIT
, 1);
12282 if (TARGET_DEBUG_ARG
)
12284 fprintf (stderr
, "function_adv: words = %2d, align=%d, size=%d",
12285 cum
->words
, TYPE_ALIGN (type
), size
);
12287 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
12288 cum
->nargs_prototype
, cum
->prototype
,
12289 GET_MODE_NAME (mode
));
12292 else if (DEFAULT_ABI
== ABI_V4
)
12294 if (abi_v4_pass_in_fpr (mode
))
12296 /* _Decimal128 must use an even/odd register pair. This assumes
12297 that the register number is odd when fregno is odd. */
12298 if (mode
== TDmode
&& (cum
->fregno
% 2) == 1)
12301 if (cum
->fregno
+ (FLOAT128_2REG_P (mode
) ? 1 : 0)
12302 <= FP_ARG_V4_MAX_REG
)
12303 cum
->fregno
+= (GET_MODE_SIZE (mode
) + 7) >> 3;
12306 cum
->fregno
= FP_ARG_V4_MAX_REG
+ 1;
12307 if (mode
== DFmode
|| FLOAT128_IBM_P (mode
)
12308 || mode
== DDmode
|| mode
== TDmode
)
12309 cum
->words
+= cum
->words
& 1;
12310 cum
->words
+= rs6000_arg_size (mode
, type
);
12315 int n_words
= rs6000_arg_size (mode
, type
);
12316 int gregno
= cum
->sysv_gregno
;
12318 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
12319 (r7,r8) or (r9,r10). As does any other 2 word item such
12320 as complex int due to a historical mistake. */
12322 gregno
+= (1 - gregno
) & 1;
12324 /* Multi-reg args are not split between registers and stack. */
12325 if (gregno
+ n_words
- 1 > GP_ARG_MAX_REG
)
12327 /* Long long and SPE vectors are aligned on the stack.
12328 So are other 2 word items such as complex int due to
12329 a historical mistake. */
12331 cum
->words
+= cum
->words
& 1;
12332 cum
->words
+= n_words
;
12335 /* Note: continuing to accumulate gregno past when we've started
12336 spilling to the stack indicates the fact that we've started
12337 spilling to the stack to expand_builtin_saveregs. */
12338 cum
->sysv_gregno
= gregno
+ n_words
;
12341 if (TARGET_DEBUG_ARG
)
12343 fprintf (stderr
, "function_adv: words = %2d, fregno = %2d, ",
12344 cum
->words
, cum
->fregno
);
12345 fprintf (stderr
, "gregno = %2d, nargs = %4d, proto = %d, ",
12346 cum
->sysv_gregno
, cum
->nargs_prototype
, cum
->prototype
);
12347 fprintf (stderr
, "mode = %4s, named = %d\n",
12348 GET_MODE_NAME (mode
), named
);
12353 int n_words
= rs6000_arg_size (mode
, type
);
12354 int start_words
= cum
->words
;
12355 int align_words
= rs6000_parm_start (mode
, type
, start_words
);
12357 cum
->words
= align_words
+ n_words
;
12359 if (SCALAR_FLOAT_MODE_P (elt_mode
) && TARGET_HARD_FLOAT
&& TARGET_FPRS
)
12361 /* _Decimal128 must be passed in an even/odd float register pair.
12362 This assumes that the register number is odd when fregno is
12364 if (elt_mode
== TDmode
&& (cum
->fregno
% 2) == 1)
12366 cum
->fregno
+= n_elts
* ((GET_MODE_SIZE (elt_mode
) + 7) >> 3);
12369 if (TARGET_DEBUG_ARG
)
12371 fprintf (stderr
, "function_adv: words = %2d, fregno = %2d, ",
12372 cum
->words
, cum
->fregno
);
12373 fprintf (stderr
, "nargs = %4d, proto = %d, mode = %4s, ",
12374 cum
->nargs_prototype
, cum
->prototype
, GET_MODE_NAME (mode
));
12375 fprintf (stderr
, "named = %d, align = %d, depth = %d\n",
12376 named
, align_words
- start_words
, depth
);
12382 rs6000_function_arg_advance (cumulative_args_t cum
, machine_mode mode
,
12383 const_tree type
, bool named
)
12385 rs6000_function_arg_advance_1 (get_cumulative_args (cum
), mode
, type
, named
,
12390 spe_build_register_parallel (machine_mode mode
, int gregno
)
12392 rtx r1
, r3
, r5
, r7
;
12397 r1
= gen_rtx_REG (DImode
, gregno
);
12398 r1
= gen_rtx_EXPR_LIST (VOIDmode
, r1
, const0_rtx
);
12399 return gen_rtx_PARALLEL (mode
, gen_rtvec (1, r1
));
12403 r1
= gen_rtx_REG (DImode
, gregno
);
12404 r1
= gen_rtx_EXPR_LIST (VOIDmode
, r1
, const0_rtx
);
12405 r3
= gen_rtx_REG (DImode
, gregno
+ 2);
12406 r3
= gen_rtx_EXPR_LIST (VOIDmode
, r3
, GEN_INT (8));
12407 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, r1
, r3
));
12410 r1
= gen_rtx_REG (DImode
, gregno
);
12411 r1
= gen_rtx_EXPR_LIST (VOIDmode
, r1
, const0_rtx
);
12412 r3
= gen_rtx_REG (DImode
, gregno
+ 2);
12413 r3
= gen_rtx_EXPR_LIST (VOIDmode
, r3
, GEN_INT (8));
12414 r5
= gen_rtx_REG (DImode
, gregno
+ 4);
12415 r5
= gen_rtx_EXPR_LIST (VOIDmode
, r5
, GEN_INT (16));
12416 r7
= gen_rtx_REG (DImode
, gregno
+ 6);
12417 r7
= gen_rtx_EXPR_LIST (VOIDmode
, r7
, GEN_INT (24));
12418 return gen_rtx_PARALLEL (mode
, gen_rtvec (4, r1
, r3
, r5
, r7
));
12421 gcc_unreachable ();
12425 /* Determine where to put a SIMD argument on the SPE. */
12427 rs6000_spe_function_arg (const CUMULATIVE_ARGS
*cum
, machine_mode mode
,
12430 int gregno
= cum
->sysv_gregno
;
12432 /* On E500 v2, double arithmetic is done on the full 64-bit GPR, but
12433 are passed and returned in a pair of GPRs for ABI compatibility. */
12434 if (TARGET_E500_DOUBLE
&& (mode
== DFmode
|| mode
== TFmode
12435 || mode
== DCmode
|| mode
== TCmode
))
12437 int n_words
= rs6000_arg_size (mode
, type
);
12439 /* Doubles go in an odd/even register pair (r5/r6, etc). */
12440 if (mode
== DFmode
)
12441 gregno
+= (1 - gregno
) & 1;
12443 /* Multi-reg args are not split between registers and stack. */
12444 if (gregno
+ n_words
- 1 > GP_ARG_MAX_REG
)
12447 return spe_build_register_parallel (mode
, gregno
);
12451 int n_words
= rs6000_arg_size (mode
, type
);
12453 /* SPE vectors are put in odd registers. */
12454 if (n_words
== 2 && (gregno
& 1) == 0)
12457 if (gregno
+ n_words
- 1 <= GP_ARG_MAX_REG
)
12460 machine_mode m
= SImode
;
12462 r1
= gen_rtx_REG (m
, gregno
);
12463 r1
= gen_rtx_EXPR_LIST (m
, r1
, const0_rtx
);
12464 r2
= gen_rtx_REG (m
, gregno
+ 1);
12465 r2
= gen_rtx_EXPR_LIST (m
, r2
, GEN_INT (4));
12466 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, r1
, r2
));
12473 if (gregno
<= GP_ARG_MAX_REG
)
12474 return gen_rtx_REG (mode
, gregno
);
12480 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
12481 structure between cum->intoffset and bitpos to integer registers. */
12484 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS
*cum
,
12485 HOST_WIDE_INT bitpos
, rtx rvec
[], int *k
)
12488 unsigned int regno
;
12489 unsigned int startbit
, endbit
;
12490 int this_regno
, intregs
, intoffset
;
12493 if (cum
->intoffset
== -1)
12496 intoffset
= cum
->intoffset
;
12497 cum
->intoffset
= -1;
12499 /* If this is the trailing part of a word, try to only load that
12500 much into the register. Otherwise load the whole register. Note
12501 that in the latter case we may pick up unwanted bits. It's not a
12502 problem at the moment but may wish to revisit. */
12504 if (intoffset
% BITS_PER_WORD
!= 0)
12506 mode
= mode_for_size (BITS_PER_WORD
- intoffset
% BITS_PER_WORD
,
12508 if (mode
== BLKmode
)
12510 /* We couldn't find an appropriate mode, which happens,
12511 e.g., in packed structs when there are 3 bytes to load.
12512 Back intoffset back to the beginning of the word in this
12514 intoffset
= ROUND_DOWN (intoffset
, BITS_PER_WORD
);
12521 startbit
= ROUND_DOWN (intoffset
, BITS_PER_WORD
);
12522 endbit
= ROUND_UP (bitpos
, BITS_PER_WORD
);
12523 intregs
= (endbit
- startbit
) / BITS_PER_WORD
;
12524 this_regno
= cum
->words
+ intoffset
/ BITS_PER_WORD
;
12526 if (intregs
> 0 && intregs
> GP_ARG_NUM_REG
- this_regno
)
12527 cum
->use_stack
= 1;
12529 intregs
= MIN (intregs
, GP_ARG_NUM_REG
- this_regno
);
12533 intoffset
/= BITS_PER_UNIT
;
12536 regno
= GP_ARG_MIN_REG
+ this_regno
;
12537 reg
= gen_rtx_REG (mode
, regno
);
12539 gen_rtx_EXPR_LIST (VOIDmode
, reg
, GEN_INT (intoffset
));
12542 intoffset
= (intoffset
| (UNITS_PER_WORD
-1)) + 1;
12546 while (intregs
> 0);
12549 /* Recursive workhorse for the following. */
12552 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS
*cum
, const_tree type
,
12553 HOST_WIDE_INT startbitpos
, rtx rvec
[],
12558 for (f
= TYPE_FIELDS (type
); f
; f
= DECL_CHAIN (f
))
12559 if (TREE_CODE (f
) == FIELD_DECL
)
12561 HOST_WIDE_INT bitpos
= startbitpos
;
12562 tree ftype
= TREE_TYPE (f
);
12564 if (ftype
== error_mark_node
)
12566 mode
= TYPE_MODE (ftype
);
12568 if (DECL_SIZE (f
) != 0
12569 && tree_fits_uhwi_p (bit_position (f
)))
12570 bitpos
+= int_bit_position (f
);
12572 /* ??? FIXME: else assume zero offset. */
12574 if (TREE_CODE (ftype
) == RECORD_TYPE
)
12575 rs6000_darwin64_record_arg_recurse (cum
, ftype
, bitpos
, rvec
, k
);
12576 else if (cum
->named
&& USE_FP_FOR_ARG_P (cum
, mode
))
12578 unsigned n_fpreg
= (GET_MODE_SIZE (mode
) + 7) >> 3;
12582 case SCmode
: mode
= SFmode
; break;
12583 case DCmode
: mode
= DFmode
; break;
12584 case TCmode
: mode
= TFmode
; break;
12588 rs6000_darwin64_record_arg_flush (cum
, bitpos
, rvec
, k
);
12589 if (cum
->fregno
+ n_fpreg
> FP_ARG_MAX_REG
+ 1)
12591 gcc_assert (cum
->fregno
== FP_ARG_MAX_REG
12592 && (mode
== TFmode
|| mode
== TDmode
));
12593 /* Long double or _Decimal128 split over regs and memory. */
12594 mode
= DECIMAL_FLOAT_MODE_P (mode
) ? DDmode
: DFmode
;
12598 = gen_rtx_EXPR_LIST (VOIDmode
,
12599 gen_rtx_REG (mode
, cum
->fregno
++),
12600 GEN_INT (bitpos
/ BITS_PER_UNIT
));
12601 if (FLOAT128_2REG_P (mode
))
12604 else if (cum
->named
&& USE_ALTIVEC_FOR_ARG_P (cum
, mode
, 1))
12606 rs6000_darwin64_record_arg_flush (cum
, bitpos
, rvec
, k
);
12608 = gen_rtx_EXPR_LIST (VOIDmode
,
12609 gen_rtx_REG (mode
, cum
->vregno
++),
12610 GEN_INT (bitpos
/ BITS_PER_UNIT
));
12612 else if (cum
->intoffset
== -1)
12613 cum
->intoffset
= bitpos
;
12617 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
12618 the register(s) to be used for each field and subfield of a struct
12619 being passed by value, along with the offset of where the
12620 register's value may be found in the block. FP fields go in FP
12621 register, vector fields go in vector registers, and everything
12622 else goes in int registers, packed as in memory.
12624 This code is also used for function return values. RETVAL indicates
12625 whether this is the case.
12627 Much of this is taken from the SPARC V9 port, which has a similar
12628 calling convention. */
12631 rs6000_darwin64_record_arg (CUMULATIVE_ARGS
*orig_cum
, const_tree type
,
12632 bool named
, bool retval
)
12634 rtx rvec
[FIRST_PSEUDO_REGISTER
];
12635 int k
= 1, kbase
= 1;
12636 HOST_WIDE_INT typesize
= int_size_in_bytes (type
);
12637 /* This is a copy; modifications are not visible to our caller. */
12638 CUMULATIVE_ARGS copy_cum
= *orig_cum
;
12639 CUMULATIVE_ARGS
*cum
= ©_cum
;
12641 /* Pad to 16 byte boundary if needed. */
12642 if (!retval
&& TYPE_ALIGN (type
) >= 2 * BITS_PER_WORD
12643 && (cum
->words
% 2) != 0)
12646 cum
->intoffset
= 0;
12647 cum
->use_stack
= 0;
12648 cum
->named
= named
;
12650 /* Put entries into rvec[] for individual FP and vector fields, and
12651 for the chunks of memory that go in int regs. Note we start at
12652 element 1; 0 is reserved for an indication of using memory, and
12653 may or may not be filled in below. */
12654 rs6000_darwin64_record_arg_recurse (cum
, type
, /* startbit pos= */ 0, rvec
, &k
);
12655 rs6000_darwin64_record_arg_flush (cum
, typesize
* BITS_PER_UNIT
, rvec
, &k
);
12657 /* If any part of the struct went on the stack put all of it there.
12658 This hack is because the generic code for
12659 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
12660 parts of the struct are not at the beginning. */
12661 if (cum
->use_stack
)
12664 return NULL_RTX
; /* doesn't go in registers at all */
12666 rvec
[0] = gen_rtx_EXPR_LIST (VOIDmode
, NULL_RTX
, const0_rtx
);
12668 if (k
> 1 || cum
->use_stack
)
12669 return gen_rtx_PARALLEL (BLKmode
, gen_rtvec_v (k
- kbase
, &rvec
[kbase
]));
12674 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
12677 rs6000_mixed_function_arg (machine_mode mode
, const_tree type
,
12682 rtx rvec
[GP_ARG_NUM_REG
+ 1];
12684 if (align_words
>= GP_ARG_NUM_REG
)
12687 n_units
= rs6000_arg_size (mode
, type
);
12689 /* Optimize the simple case where the arg fits in one gpr, except in
12690 the case of BLKmode due to assign_parms assuming that registers are
12691 BITS_PER_WORD wide. */
12693 || (n_units
== 1 && mode
!= BLKmode
))
12694 return gen_rtx_REG (mode
, GP_ARG_MIN_REG
+ align_words
);
12697 if (align_words
+ n_units
> GP_ARG_NUM_REG
)
12698 /* Not all of the arg fits in gprs. Say that it goes in memory too,
12699 using a magic NULL_RTX component.
12700 This is not strictly correct. Only some of the arg belongs in
12701 memory, not all of it. However, the normal scheme using
12702 function_arg_partial_nregs can result in unusual subregs, eg.
12703 (subreg:SI (reg:DF) 4), which are not handled well. The code to
12704 store the whole arg to memory is often more efficient than code
12705 to store pieces, and we know that space is available in the right
12706 place for the whole arg. */
12707 rvec
[k
++] = gen_rtx_EXPR_LIST (VOIDmode
, NULL_RTX
, const0_rtx
);
12712 rtx r
= gen_rtx_REG (SImode
, GP_ARG_MIN_REG
+ align_words
);
12713 rtx off
= GEN_INT (i
++ * 4);
12714 rvec
[k
++] = gen_rtx_EXPR_LIST (VOIDmode
, r
, off
);
12716 while (++align_words
< GP_ARG_NUM_REG
&& --n_units
!= 0);
12718 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (k
, rvec
));
12721 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
12722 but must also be copied into the parameter save area starting at
12723 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
12724 to the GPRs and/or memory. Return the number of elements used. */
12727 rs6000_psave_function_arg (machine_mode mode
, const_tree type
,
12728 int align_words
, rtx
*rvec
)
12732 if (align_words
< GP_ARG_NUM_REG
)
12734 int n_words
= rs6000_arg_size (mode
, type
);
12736 if (align_words
+ n_words
> GP_ARG_NUM_REG
12738 || (TARGET_32BIT
&& TARGET_POWERPC64
))
12740 /* If this is partially on the stack, then we only
12741 include the portion actually in registers here. */
12742 machine_mode rmode
= TARGET_32BIT
? SImode
: DImode
;
12745 if (align_words
+ n_words
> GP_ARG_NUM_REG
)
12747 /* Not all of the arg fits in gprs. Say that it goes in memory
12748 too, using a magic NULL_RTX component. Also see comment in
12749 rs6000_mixed_function_arg for why the normal
12750 function_arg_partial_nregs scheme doesn't work in this case. */
12751 rvec
[k
++] = gen_rtx_EXPR_LIST (VOIDmode
, NULL_RTX
, const0_rtx
);
12756 rtx r
= gen_rtx_REG (rmode
, GP_ARG_MIN_REG
+ align_words
);
12757 rtx off
= GEN_INT (i
++ * GET_MODE_SIZE (rmode
));
12758 rvec
[k
++] = gen_rtx_EXPR_LIST (VOIDmode
, r
, off
);
12760 while (++align_words
< GP_ARG_NUM_REG
&& --n_words
!= 0);
12764 /* The whole arg fits in gprs. */
12765 rtx r
= gen_rtx_REG (mode
, GP_ARG_MIN_REG
+ align_words
);
12766 rvec
[k
++] = gen_rtx_EXPR_LIST (VOIDmode
, r
, const0_rtx
);
12771 /* It's entirely in memory. */
12772 rvec
[k
++] = gen_rtx_EXPR_LIST (VOIDmode
, NULL_RTX
, const0_rtx
);
12778 /* RVEC is a vector of K components of an argument of mode MODE.
12779 Construct the final function_arg return value from it. */
12782 rs6000_finish_function_arg (machine_mode mode
, rtx
*rvec
, int k
)
12784 gcc_assert (k
>= 1);
12786 /* Avoid returning a PARALLEL in the trivial cases. */
12789 if (XEXP (rvec
[0], 0) == NULL_RTX
)
12792 if (GET_MODE (XEXP (rvec
[0], 0)) == mode
)
12793 return XEXP (rvec
[0], 0);
12796 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (k
, rvec
));
12799 /* Determine where to put an argument to a function.
12800 Value is zero to push the argument on the stack,
12801 or a hard register in which to store the argument.
12803 MODE is the argument's machine mode.
12804 TYPE is the data type of the argument (as a tree).
12805 This is null for libcalls where that information may
12807 CUM is a variable of type CUMULATIVE_ARGS which gives info about
12808 the preceding args and about the function being called. It is
12809 not modified in this routine.
12810 NAMED is nonzero if this argument is a named parameter
12811 (otherwise it is an extra parameter matching an ellipsis).
12813 On RS/6000 the first eight words of non-FP are normally in registers
12814 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
12815 Under V.4, the first 8 FP args are in registers.
12817 If this is floating-point and no prototype is specified, we use
12818 both an FP and integer register (or possibly FP reg and stack). Library
12819 functions (when CALL_LIBCALL is set) always have the proper types for args,
12820 so we can pass the FP value just in one register. emit_library_function
12821 doesn't support PARALLEL anyway.
12823 Note that for args passed by reference, function_arg will be called
12824 with MODE and TYPE set to that of the pointer to the arg, not the arg
12828 rs6000_function_arg (cumulative_args_t cum_v
, machine_mode mode
,
12829 const_tree type
, bool named
)
12831 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
12832 enum rs6000_abi abi
= DEFAULT_ABI
;
12833 machine_mode elt_mode
;
12836 /* Return a marker to indicate whether CR1 needs to set or clear the
12837 bit that V.4 uses to say fp args were passed in registers.
12838 Assume that we don't need the marker for software floating point,
12839 or compiler generated library calls. */
12840 if (mode
== VOIDmode
)
12843 && (cum
->call_cookie
& CALL_LIBCALL
) == 0
12845 || (cum
->nargs_prototype
< 0
12846 && (cum
->prototype
|| TARGET_NO_PROTOTYPE
))))
12848 /* For the SPE, we need to crxor CR6 always. */
12849 if (TARGET_SPE_ABI
)
12850 return GEN_INT (cum
->call_cookie
| CALL_V4_SET_FP_ARGS
);
12851 else if (TARGET_HARD_FLOAT
&& TARGET_FPRS
)
12852 return GEN_INT (cum
->call_cookie
12853 | ((cum
->fregno
== FP_ARG_MIN_REG
)
12854 ? CALL_V4_SET_FP_ARGS
12855 : CALL_V4_CLEAR_FP_ARGS
));
12858 return GEN_INT (cum
->call_cookie
& ~CALL_LIBCALL
);
12861 rs6000_discover_homogeneous_aggregate (mode
, type
, &elt_mode
, &n_elts
);
12863 if (TARGET_MACHO
&& rs6000_darwin64_struct_check_p (mode
, type
))
12865 rtx rslt
= rs6000_darwin64_record_arg (cum
, type
, named
, /*retval= */false);
12866 if (rslt
!= NULL_RTX
)
12868 /* Else fall through to usual handling. */
12871 if (USE_ALTIVEC_FOR_ARG_P (cum
, elt_mode
, named
))
12873 rtx rvec
[GP_ARG_NUM_REG
+ AGGR_ARG_NUM_REG
+ 1];
12877 /* Do we also need to pass this argument in the parameter save area?
12878 Library support functions for IEEE 128-bit are assumed to not need the
12879 value passed both in GPRs and in vector registers. */
12880 if (TARGET_64BIT
&& !cum
->prototype
12881 && (!cum
->libcall
|| !FLOAT128_VECTOR_P (elt_mode
)))
12883 int align_words
= ROUND_UP (cum
->words
, 2);
12884 k
= rs6000_psave_function_arg (mode
, type
, align_words
, rvec
);
12887 /* Describe where this argument goes in the vector registers. */
12888 for (i
= 0; i
< n_elts
&& cum
->vregno
+ i
<= ALTIVEC_ARG_MAX_REG
; i
++)
12890 r
= gen_rtx_REG (elt_mode
, cum
->vregno
+ i
);
12891 off
= GEN_INT (i
* GET_MODE_SIZE (elt_mode
));
12892 rvec
[k
++] = gen_rtx_EXPR_LIST (VOIDmode
, r
, off
);
12895 return rs6000_finish_function_arg (mode
, rvec
, k
);
12897 else if (TARGET_ALTIVEC_ABI
12898 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode
)
12899 || (type
&& TREE_CODE (type
) == VECTOR_TYPE
12900 && int_size_in_bytes (type
) == 16)))
12902 if (named
|| abi
== ABI_V4
)
12906 /* Vector parameters to varargs functions under AIX or Darwin
12907 get passed in memory and possibly also in GPRs. */
12908 int align
, align_words
, n_words
;
12909 machine_mode part_mode
;
12911 /* Vector parameters must be 16-byte aligned. In 32-bit
12912 mode this means we need to take into account the offset
12913 to the parameter save area. In 64-bit mode, they just
12914 have to start on an even word, since the parameter save
12915 area is 16-byte aligned. */
12917 align
= -(rs6000_parm_offset () + cum
->words
) & 3;
12919 align
= cum
->words
& 1;
12920 align_words
= cum
->words
+ align
;
12922 /* Out of registers? Memory, then. */
12923 if (align_words
>= GP_ARG_NUM_REG
)
12926 if (TARGET_32BIT
&& TARGET_POWERPC64
)
12927 return rs6000_mixed_function_arg (mode
, type
, align_words
);
12929 /* The vector value goes in GPRs. Only the part of the
12930 value in GPRs is reported here. */
12932 n_words
= rs6000_arg_size (mode
, type
);
12933 if (align_words
+ n_words
> GP_ARG_NUM_REG
)
12934 /* Fortunately, there are only two possibilities, the value
12935 is either wholly in GPRs or half in GPRs and half not. */
12936 part_mode
= DImode
;
12938 return gen_rtx_REG (part_mode
, GP_ARG_MIN_REG
+ align_words
);
12941 else if (TARGET_SPE_ABI
&& TARGET_SPE
12942 && (SPE_VECTOR_MODE (mode
)
12943 || (TARGET_E500_DOUBLE
&& (mode
== DFmode
12946 || mode
== TCmode
))))
12947 return rs6000_spe_function_arg (cum
, mode
, type
);
12949 else if (abi
== ABI_V4
)
12951 if (abi_v4_pass_in_fpr (mode
))
12953 /* _Decimal128 must use an even/odd register pair. This assumes
12954 that the register number is odd when fregno is odd. */
12955 if (mode
== TDmode
&& (cum
->fregno
% 2) == 1)
12958 if (cum
->fregno
+ (FLOAT128_2REG_P (mode
) ? 1 : 0)
12959 <= FP_ARG_V4_MAX_REG
)
12960 return gen_rtx_REG (mode
, cum
->fregno
);
12966 int n_words
= rs6000_arg_size (mode
, type
);
12967 int gregno
= cum
->sysv_gregno
;
12969 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
12970 (r7,r8) or (r9,r10). As does any other 2 word item such
12971 as complex int due to a historical mistake. */
12973 gregno
+= (1 - gregno
) & 1;
12975 /* Multi-reg args are not split between registers and stack. */
12976 if (gregno
+ n_words
- 1 > GP_ARG_MAX_REG
)
12979 if (TARGET_32BIT
&& TARGET_POWERPC64
)
12980 return rs6000_mixed_function_arg (mode
, type
,
12981 gregno
- GP_ARG_MIN_REG
);
12982 return gen_rtx_REG (mode
, gregno
);
12987 int align_words
= rs6000_parm_start (mode
, type
, cum
->words
);
12989 /* _Decimal128 must be passed in an even/odd float register pair.
12990 This assumes that the register number is odd when fregno is odd. */
12991 if (elt_mode
== TDmode
&& (cum
->fregno
% 2) == 1)
12994 if (USE_FP_FOR_ARG_P (cum
, elt_mode
))
12996 rtx rvec
[GP_ARG_NUM_REG
+ AGGR_ARG_NUM_REG
+ 1];
12999 unsigned long n_fpreg
= (GET_MODE_SIZE (elt_mode
) + 7) >> 3;
13002 /* Do we also need to pass this argument in the parameter
13004 if (type
&& (cum
->nargs_prototype
<= 0
13005 || ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
13006 && TARGET_XL_COMPAT
13007 && align_words
>= GP_ARG_NUM_REG
)))
13008 k
= rs6000_psave_function_arg (mode
, type
, align_words
, rvec
);
13010 /* Describe where this argument goes in the fprs. */
13011 for (i
= 0; i
< n_elts
13012 && cum
->fregno
+ i
* n_fpreg
<= FP_ARG_MAX_REG
; i
++)
13014 /* Check if the argument is split over registers and memory.
13015 This can only ever happen for long double or _Decimal128;
13016 complex types are handled via split_complex_arg. */
13017 machine_mode fmode
= elt_mode
;
13018 if (cum
->fregno
+ (i
+ 1) * n_fpreg
> FP_ARG_MAX_REG
+ 1)
13020 gcc_assert (FLOAT128_2REG_P (fmode
));
13021 fmode
= DECIMAL_FLOAT_MODE_P (fmode
) ? DDmode
: DFmode
;
13024 r
= gen_rtx_REG (fmode
, cum
->fregno
+ i
* n_fpreg
);
13025 off
= GEN_INT (i
* GET_MODE_SIZE (elt_mode
));
13026 rvec
[k
++] = gen_rtx_EXPR_LIST (VOIDmode
, r
, off
);
13029 /* If there were not enough FPRs to hold the argument, the rest
13030 usually goes into memory. However, if the current position
13031 is still within the register parameter area, a portion may
13032 actually have to go into GPRs.
13034 Note that it may happen that the portion of the argument
13035 passed in the first "half" of the first GPR was already
13036 passed in the last FPR as well.
13038 For unnamed arguments, we already set up GPRs to cover the
13039 whole argument in rs6000_psave_function_arg, so there is
13040 nothing further to do at this point. */
13041 fpr_words
= (i
* GET_MODE_SIZE (elt_mode
)) / (TARGET_32BIT
? 4 : 8);
13042 if (i
< n_elts
&& align_words
+ fpr_words
< GP_ARG_NUM_REG
13043 && cum
->nargs_prototype
> 0)
13045 static bool warned
;
13047 machine_mode rmode
= TARGET_32BIT
? SImode
: DImode
;
13048 int n_words
= rs6000_arg_size (mode
, type
);
13050 align_words
+= fpr_words
;
13051 n_words
-= fpr_words
;
13055 r
= gen_rtx_REG (rmode
, GP_ARG_MIN_REG
+ align_words
);
13056 off
= GEN_INT (fpr_words
++ * GET_MODE_SIZE (rmode
));
13057 rvec
[k
++] = gen_rtx_EXPR_LIST (VOIDmode
, r
, off
);
13059 while (++align_words
< GP_ARG_NUM_REG
&& --n_words
!= 0);
13061 if (!warned
&& warn_psabi
)
13064 inform (input_location
,
13065 "the ABI of passing homogeneous float aggregates"
13066 " has changed in GCC 5");
13070 return rs6000_finish_function_arg (mode
, rvec
, k
);
13072 else if (align_words
< GP_ARG_NUM_REG
)
13074 if (TARGET_32BIT
&& TARGET_POWERPC64
)
13075 return rs6000_mixed_function_arg (mode
, type
, align_words
);
13077 return gen_rtx_REG (mode
, GP_ARG_MIN_REG
+ align_words
);
13084 /* For an arg passed partly in registers and partly in memory, this is
13085 the number of bytes passed in registers. For args passed entirely in
13086 registers or entirely in memory, zero. When an arg is described by a
13087 PARALLEL, perhaps using more than one register type, this function
13088 returns the number of bytes used by the first element of the PARALLEL. */
13091 rs6000_arg_partial_bytes (cumulative_args_t cum_v
, machine_mode mode
,
13092 tree type
, bool named
)
13094 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
13095 bool passed_in_gprs
= true;
13098 machine_mode elt_mode
;
13101 rs6000_discover_homogeneous_aggregate (mode
, type
, &elt_mode
, &n_elts
);
13103 if (DEFAULT_ABI
== ABI_V4
)
13106 if (USE_ALTIVEC_FOR_ARG_P (cum
, elt_mode
, named
))
13108 /* If we are passing this arg in the fixed parameter save area (gprs or
13109 memory) as well as VRs, we do not use the partial bytes mechanism;
13110 instead, rs6000_function_arg will return a PARALLEL including a memory
13111 element as necessary. Library support functions for IEEE 128-bit are
13112 assumed to not need the value passed both in GPRs and in vector
13114 if (TARGET_64BIT
&& !cum
->prototype
13115 && (!cum
->libcall
|| !FLOAT128_VECTOR_P (elt_mode
)))
13118 /* Otherwise, we pass in VRs only. Check for partial copies. */
13119 passed_in_gprs
= false;
13120 if (cum
->vregno
+ n_elts
> ALTIVEC_ARG_MAX_REG
+ 1)
13121 ret
= (ALTIVEC_ARG_MAX_REG
+ 1 - cum
->vregno
) * 16;
13124 /* In this complicated case we just disable the partial_nregs code. */
13125 if (TARGET_MACHO
&& rs6000_darwin64_struct_check_p (mode
, type
))
13128 align_words
= rs6000_parm_start (mode
, type
, cum
->words
);
13130 if (USE_FP_FOR_ARG_P (cum
, elt_mode
))
13132 unsigned long n_fpreg
= (GET_MODE_SIZE (elt_mode
) + 7) >> 3;
13134 /* If we are passing this arg in the fixed parameter save area
13135 (gprs or memory) as well as FPRs, we do not use the partial
13136 bytes mechanism; instead, rs6000_function_arg will return a
13137 PARALLEL including a memory element as necessary. */
13139 && (cum
->nargs_prototype
<= 0
13140 || ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
13141 && TARGET_XL_COMPAT
13142 && align_words
>= GP_ARG_NUM_REG
)))
13145 /* Otherwise, we pass in FPRs only. Check for partial copies. */
13146 passed_in_gprs
= false;
13147 if (cum
->fregno
+ n_elts
* n_fpreg
> FP_ARG_MAX_REG
+ 1)
13149 /* Compute number of bytes / words passed in FPRs. If there
13150 is still space available in the register parameter area
13151 *after* that amount, a part of the argument will be passed
13152 in GPRs. In that case, the total amount passed in any
13153 registers is equal to the amount that would have been passed
13154 in GPRs if everything were passed there, so we fall back to
13155 the GPR code below to compute the appropriate value. */
13156 int fpr
= ((FP_ARG_MAX_REG
+ 1 - cum
->fregno
)
13157 * MIN (8, GET_MODE_SIZE (elt_mode
)));
13158 int fpr_words
= fpr
/ (TARGET_32BIT
? 4 : 8);
13160 if (align_words
+ fpr_words
< GP_ARG_NUM_REG
)
13161 passed_in_gprs
= true;
13168 && align_words
< GP_ARG_NUM_REG
13169 && GP_ARG_NUM_REG
< align_words
+ rs6000_arg_size (mode
, type
))
13170 ret
= (GP_ARG_NUM_REG
- align_words
) * (TARGET_32BIT
? 4 : 8);
13172 if (ret
!= 0 && TARGET_DEBUG_ARG
)
13173 fprintf (stderr
, "rs6000_arg_partial_bytes: %d\n", ret
);
13178 /* A C expression that indicates when an argument must be passed by
13179 reference. If nonzero for an argument, a copy of that argument is
13180 made in memory and a pointer to the argument is passed instead of
13181 the argument itself. The pointer is passed in whatever way is
13182 appropriate for passing a pointer to that type.
13184 Under V.4, aggregates and long double are passed by reference.
13186 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
13187 reference unless the AltiVec vector extension ABI is in force.
13189 As an extension to all ABIs, variable sized types are passed by
13193 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
13194 machine_mode mode
, const_tree type
,
13195 bool named ATTRIBUTE_UNUSED
)
13200 if (DEFAULT_ABI
== ABI_V4
&& TARGET_IEEEQUAD
13201 && FLOAT128_IEEE_P (TYPE_MODE (type
)))
13203 if (TARGET_DEBUG_ARG
)
13204 fprintf (stderr
, "function_arg_pass_by_reference: V4 IEEE 128-bit\n");
13208 if (DEFAULT_ABI
== ABI_V4
&& AGGREGATE_TYPE_P (type
))
13210 if (TARGET_DEBUG_ARG
)
13211 fprintf (stderr
, "function_arg_pass_by_reference: V4 aggregate\n");
13215 if (int_size_in_bytes (type
) < 0)
13217 if (TARGET_DEBUG_ARG
)
13218 fprintf (stderr
, "function_arg_pass_by_reference: variable size\n");
13222 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
13223 modes only exist for GCC vector types if -maltivec. */
13224 if (TARGET_32BIT
&& !TARGET_ALTIVEC_ABI
&& ALTIVEC_VECTOR_MODE (mode
))
13226 if (TARGET_DEBUG_ARG
)
13227 fprintf (stderr
, "function_arg_pass_by_reference: AltiVec\n");
13231 /* Pass synthetic vectors in memory. */
13232 if (TREE_CODE (type
) == VECTOR_TYPE
13233 && int_size_in_bytes (type
) > (TARGET_ALTIVEC_ABI
? 16 : 8))
13235 static bool warned_for_pass_big_vectors
= false;
13236 if (TARGET_DEBUG_ARG
)
13237 fprintf (stderr
, "function_arg_pass_by_reference: synthetic vector\n");
13238 if (!warned_for_pass_big_vectors
)
13240 warning (OPT_Wpsabi
, "GCC vector passed by reference: "
13241 "non-standard ABI extension with no compatibility guarantee");
13242 warned_for_pass_big_vectors
= true;
13250 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
13251 already processes. Return true if the parameter must be passed
13252 (fully or partially) on the stack. */
13255 rs6000_parm_needs_stack (cumulative_args_t args_so_far
, tree type
)
13261 /* Catch errors. */
13262 if (type
== NULL
|| type
== error_mark_node
)
13265 /* Handle types with no storage requirement. */
13266 if (TYPE_MODE (type
) == VOIDmode
)
13269 /* Handle complex types. */
13270 if (TREE_CODE (type
) == COMPLEX_TYPE
)
13271 return (rs6000_parm_needs_stack (args_so_far
, TREE_TYPE (type
))
13272 || rs6000_parm_needs_stack (args_so_far
, TREE_TYPE (type
)));
13274 /* Handle transparent aggregates. */
13275 if ((TREE_CODE (type
) == UNION_TYPE
|| TREE_CODE (type
) == RECORD_TYPE
)
13276 && TYPE_TRANSPARENT_AGGR (type
))
13277 type
= TREE_TYPE (first_field (type
));
13279 /* See if this arg was passed by invisible reference. */
13280 if (pass_by_reference (get_cumulative_args (args_so_far
),
13281 TYPE_MODE (type
), type
, true))
13282 type
= build_pointer_type (type
);
13284 /* Find mode as it is passed by the ABI. */
13285 unsignedp
= TYPE_UNSIGNED (type
);
13286 mode
= promote_mode (type
, TYPE_MODE (type
), &unsignedp
);
13288 /* If we must pass in stack, we need a stack. */
13289 if (rs6000_must_pass_in_stack (mode
, type
))
13292 /* If there is no incoming register, we need a stack. */
13293 entry_parm
= rs6000_function_arg (args_so_far
, mode
, type
, true);
13294 if (entry_parm
== NULL
)
13297 /* Likewise if we need to pass both in registers and on the stack. */
13298 if (GET_CODE (entry_parm
) == PARALLEL
13299 && XEXP (XVECEXP (entry_parm
, 0, 0), 0) == NULL_RTX
)
13302 /* Also true if we're partially in registers and partially not. */
13303 if (rs6000_arg_partial_bytes (args_so_far
, mode
, type
, true) != 0)
13306 /* Update info on where next arg arrives in registers. */
13307 rs6000_function_arg_advance (args_so_far
, mode
, type
, true);
13311 /* Return true if FUN has no prototype, has a variable argument
13312 list, or passes any parameter in memory. */
13315 rs6000_function_parms_need_stack (tree fun
, bool incoming
)
13317 tree fntype
, result
;
13318 CUMULATIVE_ARGS args_so_far_v
;
13319 cumulative_args_t args_so_far
;
13322 /* Must be a libcall, all of which only use reg parms. */
13327 fntype
= TREE_TYPE (fun
);
13329 /* Varargs functions need the parameter save area. */
13330 if ((!incoming
&& !prototype_p (fntype
)) || stdarg_p (fntype
))
13333 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v
, fntype
, NULL_RTX
);
13334 args_so_far
= pack_cumulative_args (&args_so_far_v
);
13336 /* When incoming, we will have been passed the function decl.
13337 It is necessary to use the decl to handle K&R style functions,
13338 where TYPE_ARG_TYPES may not be available. */
13341 gcc_assert (DECL_P (fun
));
13342 result
= DECL_RESULT (fun
);
13345 result
= TREE_TYPE (fntype
);
13347 if (result
&& aggregate_value_p (result
, fntype
))
13349 if (!TYPE_P (result
))
13350 result
= TREE_TYPE (result
);
13351 result
= build_pointer_type (result
);
13352 rs6000_parm_needs_stack (args_so_far
, result
);
13359 for (parm
= DECL_ARGUMENTS (fun
);
13360 parm
&& parm
!= void_list_node
;
13361 parm
= TREE_CHAIN (parm
))
13362 if (rs6000_parm_needs_stack (args_so_far
, TREE_TYPE (parm
)))
13367 function_args_iterator args_iter
;
13370 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
13371 if (rs6000_parm_needs_stack (args_so_far
, arg_type
))
13378 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
13379 usually a constant depending on the ABI. However, in the ELFv2 ABI
13380 the register parameter area is optional when calling a function that
13381 has a prototype is scope, has no variable argument list, and passes
13382 all parameters in registers. */
13385 rs6000_reg_parm_stack_space (tree fun
, bool incoming
)
13387 int reg_parm_stack_space
;
13389 switch (DEFAULT_ABI
)
13392 reg_parm_stack_space
= 0;
13397 reg_parm_stack_space
= TARGET_64BIT
? 64 : 32;
13401 /* ??? Recomputing this every time is a bit expensive. Is there
13402 a place to cache this information? */
13403 if (rs6000_function_parms_need_stack (fun
, incoming
))
13404 reg_parm_stack_space
= TARGET_64BIT
? 64 : 32;
13406 reg_parm_stack_space
= 0;
13410 return reg_parm_stack_space
;
13414 rs6000_move_block_from_reg (int regno
, rtx x
, int nregs
)
13417 machine_mode reg_mode
= TARGET_32BIT
? SImode
: DImode
;
13422 for (i
= 0; i
< nregs
; i
++)
13424 rtx tem
= adjust_address_nv (x
, reg_mode
, i
* GET_MODE_SIZE (reg_mode
));
13425 if (reload_completed
)
13427 if (! strict_memory_address_p (reg_mode
, XEXP (tem
, 0)))
13430 tem
= simplify_gen_subreg (reg_mode
, x
, BLKmode
,
13431 i
* GET_MODE_SIZE (reg_mode
));
13434 tem
= replace_equiv_address (tem
, XEXP (tem
, 0));
13438 emit_move_insn (tem
, gen_rtx_REG (reg_mode
, regno
+ i
));
13442 /* Perform any needed actions needed for a function that is receiving a
13443 variable number of arguments.
13447 MODE and TYPE are the mode and type of the current parameter.
13449 PRETEND_SIZE is a variable that should be set to the amount of stack
13450 that must be pushed by the prolog to pretend that our caller pushed
13453 Normally, this macro will push all remaining incoming registers on the
13454 stack and set PRETEND_SIZE to the length of the registers pushed. */
13457 setup_incoming_varargs (cumulative_args_t cum
, machine_mode mode
,
13458 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
13461 CUMULATIVE_ARGS next_cum
;
13462 int reg_size
= TARGET_32BIT
? 4 : 8;
13463 rtx save_area
= NULL_RTX
, mem
;
13464 int first_reg_offset
;
13465 alias_set_type set
;
13467 /* Skip the last named argument. */
13468 next_cum
= *get_cumulative_args (cum
);
13469 rs6000_function_arg_advance_1 (&next_cum
, mode
, type
, true, 0);
13471 if (DEFAULT_ABI
== ABI_V4
)
13473 first_reg_offset
= next_cum
.sysv_gregno
- GP_ARG_MIN_REG
;
13477 int gpr_reg_num
= 0, gpr_size
= 0, fpr_size
= 0;
13478 HOST_WIDE_INT offset
= 0;
13480 /* Try to optimize the size of the varargs save area.
13481 The ABI requires that ap.reg_save_area is doubleword
13482 aligned, but we don't need to allocate space for all
13483 the bytes, only those to which we actually will save
13485 if (cfun
->va_list_gpr_size
&& first_reg_offset
< GP_ARG_NUM_REG
)
13486 gpr_reg_num
= GP_ARG_NUM_REG
- first_reg_offset
;
13487 if (TARGET_HARD_FLOAT
&& TARGET_FPRS
13488 && next_cum
.fregno
<= FP_ARG_V4_MAX_REG
13489 && cfun
->va_list_fpr_size
)
13492 fpr_size
= (next_cum
.fregno
- FP_ARG_MIN_REG
)
13493 * UNITS_PER_FP_WORD
;
13494 if (cfun
->va_list_fpr_size
13495 < FP_ARG_V4_MAX_REG
+ 1 - next_cum
.fregno
)
13496 fpr_size
+= cfun
->va_list_fpr_size
* UNITS_PER_FP_WORD
;
13498 fpr_size
+= (FP_ARG_V4_MAX_REG
+ 1 - next_cum
.fregno
)
13499 * UNITS_PER_FP_WORD
;
13503 offset
= -((first_reg_offset
* reg_size
) & ~7);
13504 if (!fpr_size
&& gpr_reg_num
> cfun
->va_list_gpr_size
)
13506 gpr_reg_num
= cfun
->va_list_gpr_size
;
13507 if (reg_size
== 4 && (first_reg_offset
& 1))
13510 gpr_size
= (gpr_reg_num
* reg_size
+ 7) & ~7;
13513 offset
= - (int) (next_cum
.fregno
- FP_ARG_MIN_REG
)
13514 * UNITS_PER_FP_WORD
13515 - (int) (GP_ARG_NUM_REG
* reg_size
);
13517 if (gpr_size
+ fpr_size
)
13520 = assign_stack_local (BLKmode
, gpr_size
+ fpr_size
, 64);
13521 gcc_assert (GET_CODE (reg_save_area
) == MEM
);
13522 reg_save_area
= XEXP (reg_save_area
, 0);
13523 if (GET_CODE (reg_save_area
) == PLUS
)
13525 gcc_assert (XEXP (reg_save_area
, 0)
13526 == virtual_stack_vars_rtx
);
13527 gcc_assert (GET_CODE (XEXP (reg_save_area
, 1)) == CONST_INT
);
13528 offset
+= INTVAL (XEXP (reg_save_area
, 1));
13531 gcc_assert (reg_save_area
== virtual_stack_vars_rtx
);
13534 cfun
->machine
->varargs_save_offset
= offset
;
13535 save_area
= plus_constant (Pmode
, virtual_stack_vars_rtx
, offset
);
13540 first_reg_offset
= next_cum
.words
;
13541 save_area
= crtl
->args
.internal_arg_pointer
;
13543 if (targetm
.calls
.must_pass_in_stack (mode
, type
))
13544 first_reg_offset
+= rs6000_arg_size (TYPE_MODE (type
), type
);
13547 set
= get_varargs_alias_set ();
13548 if (! no_rtl
&& first_reg_offset
< GP_ARG_NUM_REG
13549 && cfun
->va_list_gpr_size
)
13551 int n_gpr
, nregs
= GP_ARG_NUM_REG
- first_reg_offset
;
13553 if (va_list_gpr_counter_field
)
13554 /* V4 va_list_gpr_size counts number of registers needed. */
13555 n_gpr
= cfun
->va_list_gpr_size
;
13557 /* char * va_list instead counts number of bytes needed. */
13558 n_gpr
= (cfun
->va_list_gpr_size
+ reg_size
- 1) / reg_size
;
13563 mem
= gen_rtx_MEM (BLKmode
,
13564 plus_constant (Pmode
, save_area
,
13565 first_reg_offset
* reg_size
));
13566 MEM_NOTRAP_P (mem
) = 1;
13567 set_mem_alias_set (mem
, set
);
13568 set_mem_align (mem
, BITS_PER_WORD
);
13570 rs6000_move_block_from_reg (GP_ARG_MIN_REG
+ first_reg_offset
, mem
,
13574 /* Save FP registers if needed. */
13575 if (DEFAULT_ABI
== ABI_V4
13576 && TARGET_HARD_FLOAT
&& TARGET_FPRS
13578 && next_cum
.fregno
<= FP_ARG_V4_MAX_REG
13579 && cfun
->va_list_fpr_size
)
13581 int fregno
= next_cum
.fregno
, nregs
;
13582 rtx cr1
= gen_rtx_REG (CCmode
, CR1_REGNO
);
13583 rtx lab
= gen_label_rtx ();
13584 int off
= (GP_ARG_NUM_REG
* reg_size
) + ((fregno
- FP_ARG_MIN_REG
)
13585 * UNITS_PER_FP_WORD
);
13588 (gen_rtx_SET (pc_rtx
,
13589 gen_rtx_IF_THEN_ELSE (VOIDmode
,
13590 gen_rtx_NE (VOIDmode
, cr1
,
13592 gen_rtx_LABEL_REF (VOIDmode
, lab
),
13596 fregno
<= FP_ARG_V4_MAX_REG
&& nregs
< cfun
->va_list_fpr_size
;
13597 fregno
++, off
+= UNITS_PER_FP_WORD
, nregs
++)
13599 mem
= gen_rtx_MEM ((TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
)
13601 plus_constant (Pmode
, save_area
, off
));
13602 MEM_NOTRAP_P (mem
) = 1;
13603 set_mem_alias_set (mem
, set
);
13604 set_mem_align (mem
, GET_MODE_ALIGNMENT (
13605 (TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
)
13606 ? DFmode
: SFmode
));
13607 emit_move_insn (mem
, gen_rtx_REG (
13608 (TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
)
13609 ? DFmode
: SFmode
, fregno
));
13616 /* Create the va_list data type. */
13619 rs6000_build_builtin_va_list (void)
13621 tree f_gpr
, f_fpr
, f_res
, f_ovf
, f_sav
, record
, type_decl
;
13623 /* For AIX, prefer 'char *' because that's what the system
13624 header files like. */
13625 if (DEFAULT_ABI
!= ABI_V4
)
13626 return build_pointer_type (char_type_node
);
13628 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
13629 type_decl
= build_decl (BUILTINS_LOCATION
, TYPE_DECL
,
13630 get_identifier ("__va_list_tag"), record
);
13632 f_gpr
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
, get_identifier ("gpr"),
13633 unsigned_char_type_node
);
13634 f_fpr
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
, get_identifier ("fpr"),
13635 unsigned_char_type_node
);
13636 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
13637 every user file. */
13638 f_res
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
13639 get_identifier ("reserved"), short_unsigned_type_node
);
13640 f_ovf
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
13641 get_identifier ("overflow_arg_area"),
13643 f_sav
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
13644 get_identifier ("reg_save_area"),
13647 va_list_gpr_counter_field
= f_gpr
;
13648 va_list_fpr_counter_field
= f_fpr
;
13650 DECL_FIELD_CONTEXT (f_gpr
) = record
;
13651 DECL_FIELD_CONTEXT (f_fpr
) = record
;
13652 DECL_FIELD_CONTEXT (f_res
) = record
;
13653 DECL_FIELD_CONTEXT (f_ovf
) = record
;
13654 DECL_FIELD_CONTEXT (f_sav
) = record
;
13656 TYPE_STUB_DECL (record
) = type_decl
;
13657 TYPE_NAME (record
) = type_decl
;
13658 TYPE_FIELDS (record
) = f_gpr
;
13659 DECL_CHAIN (f_gpr
) = f_fpr
;
13660 DECL_CHAIN (f_fpr
) = f_res
;
13661 DECL_CHAIN (f_res
) = f_ovf
;
13662 DECL_CHAIN (f_ovf
) = f_sav
;
13664 layout_type (record
);
13666 /* The correct type is an array type of one element. */
13667 return build_array_type (record
, build_index_type (size_zero_node
));
13670 /* Implement va_start. */
13673 rs6000_va_start (tree valist
, rtx nextarg
)
13675 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
13676 tree f_gpr
, f_fpr
, f_res
, f_ovf
, f_sav
;
13677 tree gpr
, fpr
, ovf
, sav
, t
;
13679 /* Only SVR4 needs something special. */
13680 if (DEFAULT_ABI
!= ABI_V4
)
13682 std_expand_builtin_va_start (valist
, nextarg
);
13686 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
13687 f_fpr
= DECL_CHAIN (f_gpr
);
13688 f_res
= DECL_CHAIN (f_fpr
);
13689 f_ovf
= DECL_CHAIN (f_res
);
13690 f_sav
= DECL_CHAIN (f_ovf
);
13692 valist
= build_simple_mem_ref (valist
);
13693 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
13694 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
13696 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
13698 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
13701 /* Count number of gp and fp argument registers used. */
13702 words
= crtl
->args
.info
.words
;
13703 n_gpr
= MIN (crtl
->args
.info
.sysv_gregno
- GP_ARG_MIN_REG
,
13705 n_fpr
= MIN (crtl
->args
.info
.fregno
- FP_ARG_MIN_REG
,
13708 if (TARGET_DEBUG_ARG
)
13709 fprintf (stderr
, "va_start: words = " HOST_WIDE_INT_PRINT_DEC
", n_gpr = "
13710 HOST_WIDE_INT_PRINT_DEC
", n_fpr = " HOST_WIDE_INT_PRINT_DEC
"\n",
13711 words
, n_gpr
, n_fpr
);
13713 if (cfun
->va_list_gpr_size
)
13715 t
= build2 (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
,
13716 build_int_cst (NULL_TREE
, n_gpr
));
13717 TREE_SIDE_EFFECTS (t
) = 1;
13718 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
13721 if (cfun
->va_list_fpr_size
)
13723 t
= build2 (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
,
13724 build_int_cst (NULL_TREE
, n_fpr
));
13725 TREE_SIDE_EFFECTS (t
) = 1;
13726 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
13728 #ifdef HAVE_AS_GNU_ATTRIBUTE
13729 if (call_ABI_of_interest (cfun
->decl
))
13730 rs6000_passes_float
= true;
13734 /* Find the overflow area. */
13735 t
= make_tree (TREE_TYPE (ovf
), crtl
->args
.internal_arg_pointer
);
13737 t
= fold_build_pointer_plus_hwi (t
, words
* MIN_UNITS_PER_WORD
);
13738 t
= build2 (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
13739 TREE_SIDE_EFFECTS (t
) = 1;
13740 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
13742 /* If there were no va_arg invocations, don't set up the register
13744 if (!cfun
->va_list_gpr_size
13745 && !cfun
->va_list_fpr_size
13746 && n_gpr
< GP_ARG_NUM_REG
13747 && n_fpr
< FP_ARG_V4_MAX_REG
)
13750 /* Find the register save area. */
13751 t
= make_tree (TREE_TYPE (sav
), virtual_stack_vars_rtx
);
13752 if (cfun
->machine
->varargs_save_offset
)
13753 t
= fold_build_pointer_plus_hwi (t
, cfun
->machine
->varargs_save_offset
);
13754 t
= build2 (MODIFY_EXPR
, TREE_TYPE (sav
), sav
, t
);
13755 TREE_SIDE_EFFECTS (t
) = 1;
13756 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
13759 /* Implement va_arg. */
13762 rs6000_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
13763 gimple_seq
*post_p
)
13765 tree f_gpr
, f_fpr
, f_res
, f_ovf
, f_sav
;
13766 tree gpr
, fpr
, ovf
, sav
, reg
, t
, u
;
13767 int size
, rsize
, n_reg
, sav_ofs
, sav_scale
;
13768 tree lab_false
, lab_over
, addr
;
13770 tree ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
13774 if (pass_by_reference (NULL
, TYPE_MODE (type
), type
, false))
13776 t
= rs6000_gimplify_va_arg (valist
, ptrtype
, pre_p
, post_p
);
13777 return build_va_arg_indirect_ref (t
);
13780 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
13781 earlier version of gcc, with the property that it always applied alignment
13782 adjustments to the va-args (even for zero-sized types). The cheapest way
13783 to deal with this is to replicate the effect of the part of
13784 std_gimplify_va_arg_expr that carries out the align adjust, for the case
13786 We don't need to check for pass-by-reference because of the test above.
13787 We can return a simplifed answer, since we know there's no offset to add. */
13790 && rs6000_darwin64_abi
)
13791 || DEFAULT_ABI
== ABI_ELFv2
13792 || (DEFAULT_ABI
== ABI_AIX
&& !rs6000_compat_align_parm
))
13793 && integer_zerop (TYPE_SIZE (type
)))
13795 unsigned HOST_WIDE_INT align
, boundary
;
13796 tree valist_tmp
= get_initialized_tmp_var (valist
, pre_p
, NULL
);
13797 align
= PARM_BOUNDARY
/ BITS_PER_UNIT
;
13798 boundary
= rs6000_function_arg_boundary (TYPE_MODE (type
), type
);
13799 if (boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
13800 boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
13801 boundary
/= BITS_PER_UNIT
;
13802 if (boundary
> align
)
13805 /* This updates arg ptr by the amount that would be necessary
13806 to align the zero-sized (but not zero-alignment) item. */
13807 t
= build2 (MODIFY_EXPR
, TREE_TYPE (valist
), valist_tmp
,
13808 fold_build_pointer_plus_hwi (valist_tmp
, boundary
- 1));
13809 gimplify_and_add (t
, pre_p
);
13811 t
= fold_convert (sizetype
, valist_tmp
);
13812 t
= build2 (MODIFY_EXPR
, TREE_TYPE (valist
), valist_tmp
,
13813 fold_convert (TREE_TYPE (valist
),
13814 fold_build2 (BIT_AND_EXPR
, sizetype
, t
,
13815 size_int (-boundary
))));
13816 t
= build2 (MODIFY_EXPR
, TREE_TYPE (valist
), valist
, t
);
13817 gimplify_and_add (t
, pre_p
);
13819 /* Since it is zero-sized there's no increment for the item itself. */
13820 valist_tmp
= fold_convert (build_pointer_type (type
), valist_tmp
);
13821 return build_va_arg_indirect_ref (valist_tmp
);
13824 if (DEFAULT_ABI
!= ABI_V4
)
13826 if (targetm
.calls
.split_complex_arg
&& TREE_CODE (type
) == COMPLEX_TYPE
)
13828 tree elem_type
= TREE_TYPE (type
);
13829 machine_mode elem_mode
= TYPE_MODE (elem_type
);
13830 int elem_size
= GET_MODE_SIZE (elem_mode
);
13832 if (elem_size
< UNITS_PER_WORD
)
13834 tree real_part
, imag_part
;
13835 gimple_seq post
= NULL
;
13837 real_part
= rs6000_gimplify_va_arg (valist
, elem_type
, pre_p
,
13839 /* Copy the value into a temporary, lest the formal temporary
13840 be reused out from under us. */
13841 real_part
= get_initialized_tmp_var (real_part
, pre_p
, &post
);
13842 gimple_seq_add_seq (pre_p
, post
);
13844 imag_part
= rs6000_gimplify_va_arg (valist
, elem_type
, pre_p
,
13847 return build2 (COMPLEX_EXPR
, type
, real_part
, imag_part
);
13851 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
13854 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
13855 f_fpr
= DECL_CHAIN (f_gpr
);
13856 f_res
= DECL_CHAIN (f_fpr
);
13857 f_ovf
= DECL_CHAIN (f_res
);
13858 f_sav
= DECL_CHAIN (f_ovf
);
13860 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
13861 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
13863 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
13865 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
13868 size
= int_size_in_bytes (type
);
13869 rsize
= (size
+ 3) / 4;
13870 int pad
= 4 * rsize
- size
;
13873 machine_mode mode
= TYPE_MODE (type
);
13874 if (abi_v4_pass_in_fpr (mode
))
13876 /* FP args go in FP registers, if present. */
13878 n_reg
= (size
+ 7) / 8;
13879 sav_ofs
= ((TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
) ? 8 : 4) * 4;
13880 sav_scale
= ((TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
) ? 8 : 4);
13881 if (mode
!= SFmode
&& mode
!= SDmode
)
13886 /* Otherwise into GP registers. */
13895 /* Pull the value out of the saved registers.... */
13898 addr
= create_tmp_var (ptr_type_node
, "addr");
13900 /* AltiVec vectors never go in registers when -mabi=altivec. */
13901 if (TARGET_ALTIVEC_ABI
&& ALTIVEC_VECTOR_MODE (mode
))
13905 lab_false
= create_artificial_label (input_location
);
13906 lab_over
= create_artificial_label (input_location
);
13908 /* Long long and SPE vectors are aligned in the registers.
13909 As are any other 2 gpr item such as complex int due to a
13910 historical mistake. */
13912 if (n_reg
== 2 && reg
== gpr
)
13915 u
= build2 (BIT_AND_EXPR
, TREE_TYPE (reg
), unshare_expr (reg
),
13916 build_int_cst (TREE_TYPE (reg
), n_reg
- 1));
13917 u
= build2 (POSTINCREMENT_EXPR
, TREE_TYPE (reg
),
13918 unshare_expr (reg
), u
);
13920 /* _Decimal128 is passed in even/odd fpr pairs; the stored
13921 reg number is 0 for f1, so we want to make it odd. */
13922 else if (reg
== fpr
&& mode
== TDmode
)
13924 t
= build2 (BIT_IOR_EXPR
, TREE_TYPE (reg
), unshare_expr (reg
),
13925 build_int_cst (TREE_TYPE (reg
), 1));
13926 u
= build2 (MODIFY_EXPR
, void_type_node
, unshare_expr (reg
), t
);
13929 t
= fold_convert (TREE_TYPE (reg
), size_int (8 - n_reg
+ 1));
13930 t
= build2 (GE_EXPR
, boolean_type_node
, u
, t
);
13931 u
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
13932 t
= build3 (COND_EXPR
, void_type_node
, t
, u
, NULL_TREE
);
13933 gimplify_and_add (t
, pre_p
);
13937 t
= fold_build_pointer_plus_hwi (sav
, sav_ofs
);
13939 u
= build2 (POSTINCREMENT_EXPR
, TREE_TYPE (reg
), unshare_expr (reg
),
13940 build_int_cst (TREE_TYPE (reg
), n_reg
));
13941 u
= fold_convert (sizetype
, u
);
13942 u
= build2 (MULT_EXPR
, sizetype
, u
, size_int (sav_scale
));
13943 t
= fold_build_pointer_plus (t
, u
);
13945 /* _Decimal32 varargs are located in the second word of the 64-bit
13946 FP register for 32-bit binaries. */
13948 && TARGET_HARD_FLOAT
&& TARGET_FPRS
13950 t
= fold_build_pointer_plus_hwi (t
, size
);
13952 /* Args are passed right-aligned. */
13953 if (BYTES_BIG_ENDIAN
)
13954 t
= fold_build_pointer_plus_hwi (t
, pad
);
13956 gimplify_assign (addr
, t
, pre_p
);
13958 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
13960 stmt
= gimple_build_label (lab_false
);
13961 gimple_seq_add_stmt (pre_p
, stmt
);
13963 if ((n_reg
== 2 && !regalign
) || n_reg
> 2)
13965 /* Ensure that we don't find any more args in regs.
13966 Alignment has taken care of for special cases. */
13967 gimplify_assign (reg
, build_int_cst (TREE_TYPE (reg
), 8), pre_p
);
13971 /* ... otherwise out of the overflow area. */
13973 /* Care for on-stack alignment if needed. */
13977 t
= fold_build_pointer_plus_hwi (t
, align
- 1);
13978 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
13979 build_int_cst (TREE_TYPE (t
), -align
));
13982 /* Args are passed right-aligned. */
13983 if (BYTES_BIG_ENDIAN
)
13984 t
= fold_build_pointer_plus_hwi (t
, pad
);
13986 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
13988 gimplify_assign (unshare_expr (addr
), t
, pre_p
);
13990 t
= fold_build_pointer_plus_hwi (t
, size
);
13991 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
13995 stmt
= gimple_build_label (lab_over
);
13996 gimple_seq_add_stmt (pre_p
, stmt
);
13999 if (STRICT_ALIGNMENT
14000 && (TYPE_ALIGN (type
)
14001 > (unsigned) BITS_PER_UNIT
* (align
< 4 ? 4 : align
)))
14003 /* The value (of type complex double, for example) may not be
14004 aligned in memory in the saved registers, so copy via a
14005 temporary. (This is the same code as used for SPARC.) */
14006 tree tmp
= create_tmp_var (type
, "va_arg_tmp");
14007 tree dest_addr
= build_fold_addr_expr (tmp
);
14009 tree copy
= build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
14010 3, dest_addr
, addr
, size_int (rsize
* 4));
14012 gimplify_and_add (copy
, pre_p
);
14016 addr
= fold_convert (ptrtype
, addr
);
14017 return build_va_arg_indirect_ref (addr
);
14023 def_builtin (const char *name
, tree type
, enum rs6000_builtins code
)
14026 unsigned classify
= rs6000_builtin_info
[(int)code
].attr
;
14027 const char *attr_string
= "";
14029 gcc_assert (name
!= NULL
);
14030 gcc_assert (IN_RANGE ((int)code
, 0, (int)RS6000_BUILTIN_COUNT
));
14032 if (rs6000_builtin_decls
[(int)code
])
14033 fatal_error (input_location
,
14034 "internal error: builtin function %s already processed", name
);
14036 rs6000_builtin_decls
[(int)code
] = t
=
14037 add_builtin_function (name
, type
, (int)code
, BUILT_IN_MD
, NULL
, NULL_TREE
);
14039 /* Set any special attributes. */
14040 if ((classify
& RS6000_BTC_CONST
) != 0)
14042 /* const function, function only depends on the inputs. */
14043 TREE_READONLY (t
) = 1;
14044 TREE_NOTHROW (t
) = 1;
14045 attr_string
= ", const";
14047 else if ((classify
& RS6000_BTC_PURE
) != 0)
14049 /* pure function, function can read global memory, but does not set any
14051 DECL_PURE_P (t
) = 1;
14052 TREE_NOTHROW (t
) = 1;
14053 attr_string
= ", pure";
14055 else if ((classify
& RS6000_BTC_FP
) != 0)
14057 /* Function is a math function. If rounding mode is on, then treat the
14058 function as not reading global memory, but it can have arbitrary side
14059 effects. If it is off, then assume the function is a const function.
14060 This mimics the ATTR_MATHFN_FPROUNDING attribute in
14061 builtin-attribute.def that is used for the math functions. */
14062 TREE_NOTHROW (t
) = 1;
14063 if (flag_rounding_math
)
14065 DECL_PURE_P (t
) = 1;
14066 DECL_IS_NOVOPS (t
) = 1;
14067 attr_string
= ", fp, pure";
14071 TREE_READONLY (t
) = 1;
14072 attr_string
= ", fp, const";
14075 else if ((classify
& RS6000_BTC_ATTR_MASK
) != 0)
14076 gcc_unreachable ();
14078 if (TARGET_DEBUG_BUILTIN
)
14079 fprintf (stderr
, "rs6000_builtin, code = %4d, %s%s\n",
14080 (int)code
, name
, attr_string
);
14083 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
14085 #undef RS6000_BUILTIN_0
14086 #undef RS6000_BUILTIN_1
14087 #undef RS6000_BUILTIN_2
14088 #undef RS6000_BUILTIN_3
14089 #undef RS6000_BUILTIN_A
14090 #undef RS6000_BUILTIN_D
14091 #undef RS6000_BUILTIN_E
14092 #undef RS6000_BUILTIN_H
14093 #undef RS6000_BUILTIN_P
14094 #undef RS6000_BUILTIN_Q
14095 #undef RS6000_BUILTIN_S
14096 #undef RS6000_BUILTIN_X
14098 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14099 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14100 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14101 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
14102 { MASK, ICODE, NAME, ENUM },
14104 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14105 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14106 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14107 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14108 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14109 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14110 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14111 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14113 static const struct builtin_description bdesc_3arg
[] =
14115 #include "rs6000-builtin.def"
14118 /* DST operations: void foo (void *, const int, const char). */
14120 #undef RS6000_BUILTIN_0
14121 #undef RS6000_BUILTIN_1
14122 #undef RS6000_BUILTIN_2
14123 #undef RS6000_BUILTIN_3
14124 #undef RS6000_BUILTIN_A
14125 #undef RS6000_BUILTIN_D
14126 #undef RS6000_BUILTIN_E
14127 #undef RS6000_BUILTIN_H
14128 #undef RS6000_BUILTIN_P
14129 #undef RS6000_BUILTIN_Q
14130 #undef RS6000_BUILTIN_S
14131 #undef RS6000_BUILTIN_X
14133 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14134 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14135 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14136 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14137 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14138 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
14139 { MASK, ICODE, NAME, ENUM },
14141 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14142 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14143 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14144 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14145 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14146 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14148 static const struct builtin_description bdesc_dst
[] =
14150 #include "rs6000-builtin.def"
14153 /* Simple binary operations: VECc = foo (VECa, VECb). */
14155 #undef RS6000_BUILTIN_0
14156 #undef RS6000_BUILTIN_1
14157 #undef RS6000_BUILTIN_2
14158 #undef RS6000_BUILTIN_3
14159 #undef RS6000_BUILTIN_A
14160 #undef RS6000_BUILTIN_D
14161 #undef RS6000_BUILTIN_E
14162 #undef RS6000_BUILTIN_H
14163 #undef RS6000_BUILTIN_P
14164 #undef RS6000_BUILTIN_Q
14165 #undef RS6000_BUILTIN_S
14166 #undef RS6000_BUILTIN_X
14168 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14169 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14170 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
14171 { MASK, ICODE, NAME, ENUM },
14173 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14174 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14175 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14176 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14177 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14178 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14179 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14180 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14181 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14183 static const struct builtin_description bdesc_2arg
[] =
14185 #include "rs6000-builtin.def"
14188 #undef RS6000_BUILTIN_0
14189 #undef RS6000_BUILTIN_1
14190 #undef RS6000_BUILTIN_2
14191 #undef RS6000_BUILTIN_3
14192 #undef RS6000_BUILTIN_A
14193 #undef RS6000_BUILTIN_D
14194 #undef RS6000_BUILTIN_E
14195 #undef RS6000_BUILTIN_H
14196 #undef RS6000_BUILTIN_P
14197 #undef RS6000_BUILTIN_Q
14198 #undef RS6000_BUILTIN_S
14199 #undef RS6000_BUILTIN_X
14201 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14202 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14203 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14204 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14205 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14206 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14207 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14208 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14209 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
14210 { MASK, ICODE, NAME, ENUM },
14212 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14213 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14214 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14216 /* AltiVec predicates. */
14218 static const struct builtin_description bdesc_altivec_preds
[] =
14220 #include "rs6000-builtin.def"
14223 /* SPE predicates. */
14224 #undef RS6000_BUILTIN_0
14225 #undef RS6000_BUILTIN_1
14226 #undef RS6000_BUILTIN_2
14227 #undef RS6000_BUILTIN_3
14228 #undef RS6000_BUILTIN_A
14229 #undef RS6000_BUILTIN_D
14230 #undef RS6000_BUILTIN_E
14231 #undef RS6000_BUILTIN_H
14232 #undef RS6000_BUILTIN_P
14233 #undef RS6000_BUILTIN_Q
14234 #undef RS6000_BUILTIN_S
14235 #undef RS6000_BUILTIN_X
14237 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14238 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14239 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14240 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14241 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14242 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14243 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14244 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14245 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14246 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14247 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
14248 { MASK, ICODE, NAME, ENUM },
14250 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14252 static const struct builtin_description bdesc_spe_predicates
[] =
14254 #include "rs6000-builtin.def"
14257 /* SPE evsel predicates. */
14258 #undef RS6000_BUILTIN_0
14259 #undef RS6000_BUILTIN_1
14260 #undef RS6000_BUILTIN_2
14261 #undef RS6000_BUILTIN_3
14262 #undef RS6000_BUILTIN_A
14263 #undef RS6000_BUILTIN_D
14264 #undef RS6000_BUILTIN_E
14265 #undef RS6000_BUILTIN_H
14266 #undef RS6000_BUILTIN_P
14267 #undef RS6000_BUILTIN_Q
14268 #undef RS6000_BUILTIN_S
14269 #undef RS6000_BUILTIN_X
14271 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14272 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14273 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14274 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14275 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14276 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14277 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
14278 { MASK, ICODE, NAME, ENUM },
14280 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14281 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14282 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14283 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14284 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14286 static const struct builtin_description bdesc_spe_evsel
[] =
14288 #include "rs6000-builtin.def"
14291 /* PAIRED predicates. */
14292 #undef RS6000_BUILTIN_0
14293 #undef RS6000_BUILTIN_1
14294 #undef RS6000_BUILTIN_2
14295 #undef RS6000_BUILTIN_3
14296 #undef RS6000_BUILTIN_A
14297 #undef RS6000_BUILTIN_D
14298 #undef RS6000_BUILTIN_E
14299 #undef RS6000_BUILTIN_H
14300 #undef RS6000_BUILTIN_P
14301 #undef RS6000_BUILTIN_Q
14302 #undef RS6000_BUILTIN_S
14303 #undef RS6000_BUILTIN_X
14305 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14306 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14307 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14308 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14309 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14310 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14311 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14312 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14313 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14314 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
14315 { MASK, ICODE, NAME, ENUM },
14317 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14318 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14320 static const struct builtin_description bdesc_paired_preds
[] =
14322 #include "rs6000-builtin.def"
14325 /* ABS* operations. */
14327 #undef RS6000_BUILTIN_0
14328 #undef RS6000_BUILTIN_1
14329 #undef RS6000_BUILTIN_2
14330 #undef RS6000_BUILTIN_3
14331 #undef RS6000_BUILTIN_A
14332 #undef RS6000_BUILTIN_D
14333 #undef RS6000_BUILTIN_E
14334 #undef RS6000_BUILTIN_H
14335 #undef RS6000_BUILTIN_P
14336 #undef RS6000_BUILTIN_Q
14337 #undef RS6000_BUILTIN_S
14338 #undef RS6000_BUILTIN_X
14340 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14341 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14342 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14343 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14344 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
14345 { MASK, ICODE, NAME, ENUM },
14347 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14348 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14349 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14350 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14351 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14352 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14353 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14355 static const struct builtin_description bdesc_abs
[] =
14357 #include "rs6000-builtin.def"
14360 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
14363 #undef RS6000_BUILTIN_0
14364 #undef RS6000_BUILTIN_1
14365 #undef RS6000_BUILTIN_2
14366 #undef RS6000_BUILTIN_3
14367 #undef RS6000_BUILTIN_A
14368 #undef RS6000_BUILTIN_D
14369 #undef RS6000_BUILTIN_E
14370 #undef RS6000_BUILTIN_H
14371 #undef RS6000_BUILTIN_P
14372 #undef RS6000_BUILTIN_Q
14373 #undef RS6000_BUILTIN_S
14374 #undef RS6000_BUILTIN_X
14376 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14377 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
14378 { MASK, ICODE, NAME, ENUM },
14380 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14381 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14382 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14383 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14384 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14385 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14386 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14387 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14388 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14389 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14391 static const struct builtin_description bdesc_1arg
[] =
14393 #include "rs6000-builtin.def"
14396 /* Simple no-argument operations: result = __builtin_darn_32 () */
14398 #undef RS6000_BUILTIN_0
14399 #undef RS6000_BUILTIN_1
14400 #undef RS6000_BUILTIN_2
14401 #undef RS6000_BUILTIN_3
14402 #undef RS6000_BUILTIN_A
14403 #undef RS6000_BUILTIN_D
14404 #undef RS6000_BUILTIN_E
14405 #undef RS6000_BUILTIN_H
14406 #undef RS6000_BUILTIN_P
14407 #undef RS6000_BUILTIN_Q
14408 #undef RS6000_BUILTIN_S
14409 #undef RS6000_BUILTIN_X
14411 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
14412 { MASK, ICODE, NAME, ENUM },
14414 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14415 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14416 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14417 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14418 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14419 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14420 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14421 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14422 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14423 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14424 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14426 static const struct builtin_description bdesc_0arg
[] =
14428 #include "rs6000-builtin.def"
14431 /* HTM builtins. */
14432 #undef RS6000_BUILTIN_0
14433 #undef RS6000_BUILTIN_1
14434 #undef RS6000_BUILTIN_2
14435 #undef RS6000_BUILTIN_3
14436 #undef RS6000_BUILTIN_A
14437 #undef RS6000_BUILTIN_D
14438 #undef RS6000_BUILTIN_E
14439 #undef RS6000_BUILTIN_H
14440 #undef RS6000_BUILTIN_P
14441 #undef RS6000_BUILTIN_Q
14442 #undef RS6000_BUILTIN_S
14443 #undef RS6000_BUILTIN_X
14445 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14446 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14447 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14448 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14449 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14450 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14451 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14452 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
14453 { MASK, ICODE, NAME, ENUM },
14455 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14456 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14457 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14458 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14460 static const struct builtin_description bdesc_htm
[] =
14462 #include "rs6000-builtin.def"
14465 #undef RS6000_BUILTIN_0
14466 #undef RS6000_BUILTIN_1
14467 #undef RS6000_BUILTIN_2
14468 #undef RS6000_BUILTIN_3
14469 #undef RS6000_BUILTIN_A
14470 #undef RS6000_BUILTIN_D
14471 #undef RS6000_BUILTIN_E
14472 #undef RS6000_BUILTIN_H
14473 #undef RS6000_BUILTIN_P
14474 #undef RS6000_BUILTIN_Q
14475 #undef RS6000_BUILTIN_S
14477 /* Return true if a builtin function is overloaded. */
14479 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode
)
14481 return (rs6000_builtin_info
[(int)fncode
].attr
& RS6000_BTC_OVERLOADED
) != 0;
14485 rs6000_overloaded_builtin_name (enum rs6000_builtins fncode
)
14487 return rs6000_builtin_info
[(int)fncode
].name
;
14490 /* Expand an expression EXP that calls a builtin without arguments. */
14492 rs6000_expand_zeroop_builtin (enum insn_code icode
, rtx target
)
14495 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
14497 if (icode
== CODE_FOR_nothing
)
14498 /* Builtin not supported on this processor. */
14502 || GET_MODE (target
) != tmode
14503 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14504 target
= gen_reg_rtx (tmode
);
14506 pat
= GEN_FCN (icode
) (target
);
14516 rs6000_expand_mtfsf_builtin (enum insn_code icode
, tree exp
)
14519 tree arg0
= CALL_EXPR_ARG (exp
, 0);
14520 tree arg1
= CALL_EXPR_ARG (exp
, 1);
14521 rtx op0
= expand_normal (arg0
);
14522 rtx op1
= expand_normal (arg1
);
14523 machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
14524 machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
14526 if (icode
== CODE_FOR_nothing
)
14527 /* Builtin not supported on this processor. */
14530 /* If we got invalid arguments bail out before generating bad rtl. */
14531 if (arg0
== error_mark_node
|| arg1
== error_mark_node
)
14534 if (GET_CODE (op0
) != CONST_INT
14535 || INTVAL (op0
) > 255
14536 || INTVAL (op0
) < 0)
14538 error ("argument 1 must be an 8-bit field value");
14542 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
14543 op0
= copy_to_mode_reg (mode0
, op0
);
14545 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
14546 op1
= copy_to_mode_reg (mode1
, op1
);
14548 pat
= GEN_FCN (icode
) (op0
, op1
);
14557 rs6000_expand_unop_builtin (enum insn_code icode
, tree exp
, rtx target
)
14560 tree arg0
= CALL_EXPR_ARG (exp
, 0);
14561 rtx op0
= expand_normal (arg0
);
14562 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
14563 machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
14565 if (icode
== CODE_FOR_nothing
)
14566 /* Builtin not supported on this processor. */
14569 /* If we got invalid arguments bail out before generating bad rtl. */
14570 if (arg0
== error_mark_node
)
14573 if (icode
== CODE_FOR_altivec_vspltisb
14574 || icode
== CODE_FOR_altivec_vspltish
14575 || icode
== CODE_FOR_altivec_vspltisw
14576 || icode
== CODE_FOR_spe_evsplatfi
14577 || icode
== CODE_FOR_spe_evsplati
)
14579 /* Only allow 5-bit *signed* literals. */
14580 if (GET_CODE (op0
) != CONST_INT
14581 || INTVAL (op0
) > 15
14582 || INTVAL (op0
) < -16)
14584 error ("argument 1 must be a 5-bit signed literal");
14585 return CONST0_RTX (tmode
);
14590 || GET_MODE (target
) != tmode
14591 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14592 target
= gen_reg_rtx (tmode
);
14594 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
14595 op0
= copy_to_mode_reg (mode0
, op0
);
14597 pat
= GEN_FCN (icode
) (target
, op0
);
14606 altivec_expand_abs_builtin (enum insn_code icode
, tree exp
, rtx target
)
14608 rtx pat
, scratch1
, scratch2
;
14609 tree arg0
= CALL_EXPR_ARG (exp
, 0);
14610 rtx op0
= expand_normal (arg0
);
14611 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
14612 machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
14614 /* If we have invalid arguments, bail out before generating bad rtl. */
14615 if (arg0
== error_mark_node
)
14619 || GET_MODE (target
) != tmode
14620 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14621 target
= gen_reg_rtx (tmode
);
14623 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
14624 op0
= copy_to_mode_reg (mode0
, op0
);
14626 scratch1
= gen_reg_rtx (mode0
);
14627 scratch2
= gen_reg_rtx (mode0
);
14629 pat
= GEN_FCN (icode
) (target
, op0
, scratch1
, scratch2
);
14638 rs6000_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
14641 tree arg0
= CALL_EXPR_ARG (exp
, 0);
14642 tree arg1
= CALL_EXPR_ARG (exp
, 1);
14643 rtx op0
= expand_normal (arg0
);
14644 rtx op1
= expand_normal (arg1
);
14645 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
14646 machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
14647 machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
14649 if (icode
== CODE_FOR_nothing
)
14650 /* Builtin not supported on this processor. */
14653 /* If we got invalid arguments bail out before generating bad rtl. */
14654 if (arg0
== error_mark_node
|| arg1
== error_mark_node
)
14657 if (icode
== CODE_FOR_altivec_vcfux
14658 || icode
== CODE_FOR_altivec_vcfsx
14659 || icode
== CODE_FOR_altivec_vctsxs
14660 || icode
== CODE_FOR_altivec_vctuxs
14661 || icode
== CODE_FOR_altivec_vspltb
14662 || icode
== CODE_FOR_altivec_vsplth
14663 || icode
== CODE_FOR_altivec_vspltw
14664 || icode
== CODE_FOR_spe_evaddiw
14665 || icode
== CODE_FOR_spe_evldd
14666 || icode
== CODE_FOR_spe_evldh
14667 || icode
== CODE_FOR_spe_evldw
14668 || icode
== CODE_FOR_spe_evlhhesplat
14669 || icode
== CODE_FOR_spe_evlhhossplat
14670 || icode
== CODE_FOR_spe_evlhhousplat
14671 || icode
== CODE_FOR_spe_evlwhe
14672 || icode
== CODE_FOR_spe_evlwhos
14673 || icode
== CODE_FOR_spe_evlwhou
14674 || icode
== CODE_FOR_spe_evlwhsplat
14675 || icode
== CODE_FOR_spe_evlwwsplat
14676 || icode
== CODE_FOR_spe_evrlwi
14677 || icode
== CODE_FOR_spe_evslwi
14678 || icode
== CODE_FOR_spe_evsrwis
14679 || icode
== CODE_FOR_spe_evsubifw
14680 || icode
== CODE_FOR_spe_evsrwiu
)
14682 /* Only allow 5-bit unsigned literals. */
14684 if (TREE_CODE (arg1
) != INTEGER_CST
14685 || TREE_INT_CST_LOW (arg1
) & ~0x1f)
14687 error ("argument 2 must be a 5-bit unsigned literal");
14688 return CONST0_RTX (tmode
);
14691 else if (icode
== CODE_FOR_dfptstsfi_eq_dd
14692 || icode
== CODE_FOR_dfptstsfi_lt_dd
14693 || icode
== CODE_FOR_dfptstsfi_gt_dd
14694 || icode
== CODE_FOR_dfptstsfi_unordered_dd
14695 || icode
== CODE_FOR_dfptstsfi_eq_td
14696 || icode
== CODE_FOR_dfptstsfi_lt_td
14697 || icode
== CODE_FOR_dfptstsfi_gt_td
14698 || icode
== CODE_FOR_dfptstsfi_unordered_td
)
14700 /* Only allow 6-bit unsigned literals. */
14702 if (TREE_CODE (arg0
) != INTEGER_CST
14703 || !IN_RANGE (TREE_INT_CST_LOW (arg0
), 0, 63))
14705 error ("argument 1 must be a 6-bit unsigned literal");
14706 return CONST0_RTX (tmode
);
14709 else if (icode
== CODE_FOR_xststdcdp
14710 || icode
== CODE_FOR_xststdcsp
14711 || icode
== CODE_FOR_xvtstdcdp
14712 || icode
== CODE_FOR_xvtstdcsp
)
14714 /* Only allow 7-bit unsigned literals. */
14716 if (TREE_CODE (arg1
) != INTEGER_CST
14717 || !IN_RANGE (TREE_INT_CST_LOW (arg1
), 0, 127))
14719 error ("argument 2 must be a 7-bit unsigned literal");
14720 return CONST0_RTX (tmode
);
14725 || GET_MODE (target
) != tmode
14726 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14727 target
= gen_reg_rtx (tmode
);
14729 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
14730 op0
= copy_to_mode_reg (mode0
, op0
);
14731 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
14732 op1
= copy_to_mode_reg (mode1
, op1
);
14734 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
14743 altivec_expand_predicate_builtin (enum insn_code icode
, tree exp
, rtx target
)
14746 tree cr6_form
= CALL_EXPR_ARG (exp
, 0);
14747 tree arg0
= CALL_EXPR_ARG (exp
, 1);
14748 tree arg1
= CALL_EXPR_ARG (exp
, 2);
14749 rtx op0
= expand_normal (arg0
);
14750 rtx op1
= expand_normal (arg1
);
14751 machine_mode tmode
= SImode
;
14752 machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
14753 machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
14756 if (TREE_CODE (cr6_form
) != INTEGER_CST
)
14758 error ("argument 1 of __builtin_altivec_predicate must be a constant");
14762 cr6_form_int
= TREE_INT_CST_LOW (cr6_form
);
14764 gcc_assert (mode0
== mode1
);
14766 /* If we have invalid arguments, bail out before generating bad rtl. */
14767 if (arg0
== error_mark_node
|| arg1
== error_mark_node
)
14771 || GET_MODE (target
) != tmode
14772 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14773 target
= gen_reg_rtx (tmode
);
14775 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
14776 op0
= copy_to_mode_reg (mode0
, op0
);
14777 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
14778 op1
= copy_to_mode_reg (mode1
, op1
);
14780 /* Note that for many of the relevant operations (e.g. cmpne or
14781 cmpeq) with float or double operands, it makes more sense for the
14782 mode of the allocated scratch register to select a vector of
14783 integer. But the choice to copy the mode of operand 0 was made
14784 long ago and there are no plans to change it. */
14785 scratch
= gen_reg_rtx (mode0
);
14787 pat
= GEN_FCN (icode
) (scratch
, op0
, op1
);
14792 /* The vec_any* and vec_all* predicates use the same opcodes for two
14793 different operations, but the bits in CR6 will be different
14794 depending on what information we want. So we have to play tricks
14795 with CR6 to get the right bits out.
14797 If you think this is disgusting, look at the specs for the
14798 AltiVec predicates. */
14800 switch (cr6_form_int
)
14803 emit_insn (gen_cr6_test_for_zero (target
));
14806 emit_insn (gen_cr6_test_for_zero_reverse (target
));
14809 emit_insn (gen_cr6_test_for_lt (target
));
14812 emit_insn (gen_cr6_test_for_lt_reverse (target
));
14815 error ("argument 1 of __builtin_altivec_predicate is out of range");
14823 paired_expand_lv_builtin (enum insn_code icode
, tree exp
, rtx target
)
14826 tree arg0
= CALL_EXPR_ARG (exp
, 0);
14827 tree arg1
= CALL_EXPR_ARG (exp
, 1);
14828 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
14829 machine_mode mode0
= Pmode
;
14830 machine_mode mode1
= Pmode
;
14831 rtx op0
= expand_normal (arg0
);
14832 rtx op1
= expand_normal (arg1
);
14834 if (icode
== CODE_FOR_nothing
)
14835 /* Builtin not supported on this processor. */
14838 /* If we got invalid arguments bail out before generating bad rtl. */
14839 if (arg0
== error_mark_node
|| arg1
== error_mark_node
)
14843 || GET_MODE (target
) != tmode
14844 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14845 target
= gen_reg_rtx (tmode
);
14847 op1
= copy_to_mode_reg (mode1
, op1
);
14849 if (op0
== const0_rtx
)
14851 addr
= gen_rtx_MEM (tmode
, op1
);
14855 op0
= copy_to_mode_reg (mode0
, op0
);
14856 addr
= gen_rtx_MEM (tmode
, gen_rtx_PLUS (Pmode
, op0
, op1
));
14859 pat
= GEN_FCN (icode
) (target
, addr
);
14868 /* Return a constant vector for use as a little-endian permute control vector
14869 to reverse the order of elements of the given vector mode. */
14871 swap_selector_for_mode (machine_mode mode
)
14873 /* These are little endian vectors, so their elements are reversed
14874 from what you would normally expect for a permute control vector. */
14875 unsigned int swap2
[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
14876 unsigned int swap4
[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
14877 unsigned int swap8
[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
14878 unsigned int swap16
[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
14879 unsigned int *swaparray
, i
;
14896 swaparray
= swap16
;
14899 gcc_unreachable ();
14902 for (i
= 0; i
< 16; ++i
)
14903 perm
[i
] = GEN_INT (swaparray
[i
]);
14905 return force_reg (V16QImode
, gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, perm
)));
14908 /* Generate code for an "lvxl", or "lve*x" built-in for a little endian target
14909 with -maltivec=be specified. Issue the load followed by an element-
14910 reversing permute. */
14912 altivec_expand_lvx_be (rtx op0
, rtx op1
, machine_mode mode
, unsigned unspec
)
14914 rtx tmp
= gen_reg_rtx (mode
);
14915 rtx load
= gen_rtx_SET (tmp
, op1
);
14916 rtx lvx
= gen_rtx_UNSPEC (mode
, gen_rtvec (1, const0_rtx
), unspec
);
14917 rtx par
= gen_rtx_PARALLEL (mode
, gen_rtvec (2, load
, lvx
));
14918 rtx sel
= swap_selector_for_mode (mode
);
14919 rtx vperm
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, tmp
, tmp
, sel
), UNSPEC_VPERM
);
14921 gcc_assert (REG_P (op0
));
14923 emit_insn (gen_rtx_SET (op0
, vperm
));
14926 /* Generate code for a "stvxl" built-in for a little endian target with
14927 -maltivec=be specified. Issue the store preceded by an element-reversing
14930 altivec_expand_stvx_be (rtx op0
, rtx op1
, machine_mode mode
, unsigned unspec
)
14932 rtx tmp
= gen_reg_rtx (mode
);
14933 rtx store
= gen_rtx_SET (op0
, tmp
);
14934 rtx stvx
= gen_rtx_UNSPEC (mode
, gen_rtvec (1, const0_rtx
), unspec
);
14935 rtx par
= gen_rtx_PARALLEL (mode
, gen_rtvec (2, store
, stvx
));
14936 rtx sel
= swap_selector_for_mode (mode
);
14939 gcc_assert (REG_P (op1
));
14940 vperm
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, op1
, op1
, sel
), UNSPEC_VPERM
);
14941 emit_insn (gen_rtx_SET (tmp
, vperm
));
14945 /* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
14946 specified. Issue the store preceded by an element-reversing permute. */
14948 altivec_expand_stvex_be (rtx op0
, rtx op1
, machine_mode mode
, unsigned unspec
)
14950 machine_mode inner_mode
= GET_MODE_INNER (mode
);
14951 rtx tmp
= gen_reg_rtx (mode
);
14952 rtx stvx
= gen_rtx_UNSPEC (inner_mode
, gen_rtvec (1, tmp
), unspec
);
14953 rtx sel
= swap_selector_for_mode (mode
);
14956 gcc_assert (REG_P (op1
));
14957 vperm
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, op1
, op1
, sel
), UNSPEC_VPERM
);
14958 emit_insn (gen_rtx_SET (tmp
, vperm
));
14959 emit_insn (gen_rtx_SET (op0
, stvx
));
14963 altivec_expand_lv_builtin (enum insn_code icode
, tree exp
, rtx target
, bool blk
)
14966 tree arg0
= CALL_EXPR_ARG (exp
, 0);
14967 tree arg1
= CALL_EXPR_ARG (exp
, 1);
14968 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
14969 machine_mode mode0
= Pmode
;
14970 machine_mode mode1
= Pmode
;
14971 rtx op0
= expand_normal (arg0
);
14972 rtx op1
= expand_normal (arg1
);
14974 if (icode
== CODE_FOR_nothing
)
14975 /* Builtin not supported on this processor. */
14978 /* If we got invalid arguments bail out before generating bad rtl. */
14979 if (arg0
== error_mark_node
|| arg1
== error_mark_node
)
14983 || GET_MODE (target
) != tmode
14984 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14985 target
= gen_reg_rtx (tmode
);
14987 op1
= copy_to_mode_reg (mode1
, op1
);
14989 /* For LVX, express the RTL accurately by ANDing the address with -16.
14990 LVXL and LVE*X expand to use UNSPECs to hide their special behavior,
14991 so the raw address is fine. */
14992 if (icode
== CODE_FOR_altivec_lvx_v2df_2op
14993 || icode
== CODE_FOR_altivec_lvx_v2di_2op
14994 || icode
== CODE_FOR_altivec_lvx_v4sf_2op
14995 || icode
== CODE_FOR_altivec_lvx_v4si_2op
14996 || icode
== CODE_FOR_altivec_lvx_v8hi_2op
14997 || icode
== CODE_FOR_altivec_lvx_v16qi_2op
)
15000 if (op0
== const0_rtx
)
15004 op0
= copy_to_mode_reg (mode0
, op0
);
15005 rawaddr
= gen_rtx_PLUS (Pmode
, op1
, op0
);
15007 addr
= gen_rtx_AND (Pmode
, rawaddr
, gen_rtx_CONST_INT (Pmode
, -16));
15008 addr
= gen_rtx_MEM (blk
? BLKmode
: tmode
, addr
);
15010 /* For -maltivec=be, emit the load and follow it up with a
15011 permute to swap the elements. */
15012 if (!BYTES_BIG_ENDIAN
&& VECTOR_ELT_ORDER_BIG
)
15014 rtx temp
= gen_reg_rtx (tmode
);
15015 emit_insn (gen_rtx_SET (temp
, addr
));
15017 rtx sel
= swap_selector_for_mode (tmode
);
15018 rtx vperm
= gen_rtx_UNSPEC (tmode
, gen_rtvec (3, temp
, temp
, sel
),
15020 emit_insn (gen_rtx_SET (target
, vperm
));
15023 emit_insn (gen_rtx_SET (target
, addr
));
15027 if (op0
== const0_rtx
)
15028 addr
= gen_rtx_MEM (blk
? BLKmode
: tmode
, op1
);
15031 op0
= copy_to_mode_reg (mode0
, op0
);
15032 addr
= gen_rtx_MEM (blk
? BLKmode
: tmode
,
15033 gen_rtx_PLUS (Pmode
, op1
, op0
));
15036 pat
= GEN_FCN (icode
) (target
, addr
);
15046 spe_expand_stv_builtin (enum insn_code icode
, tree exp
)
15048 tree arg0
= CALL_EXPR_ARG (exp
, 0);
15049 tree arg1
= CALL_EXPR_ARG (exp
, 1);
15050 tree arg2
= CALL_EXPR_ARG (exp
, 2);
15051 rtx op0
= expand_normal (arg0
);
15052 rtx op1
= expand_normal (arg1
);
15053 rtx op2
= expand_normal (arg2
);
15055 machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
15056 machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
15057 machine_mode mode2
= insn_data
[icode
].operand
[2].mode
;
15059 /* Invalid arguments. Bail before doing anything stoopid! */
15060 if (arg0
== error_mark_node
15061 || arg1
== error_mark_node
15062 || arg2
== error_mark_node
)
15065 if (! (*insn_data
[icode
].operand
[2].predicate
) (op0
, mode2
))
15066 op0
= copy_to_mode_reg (mode2
, op0
);
15067 if (! (*insn_data
[icode
].operand
[0].predicate
) (op1
, mode0
))
15068 op1
= copy_to_mode_reg (mode0
, op1
);
15069 if (! (*insn_data
[icode
].operand
[1].predicate
) (op2
, mode1
))
15070 op2
= copy_to_mode_reg (mode1
, op2
);
15072 pat
= GEN_FCN (icode
) (op1
, op2
, op0
);
15079 paired_expand_stv_builtin (enum insn_code icode
, tree exp
)
15081 tree arg0
= CALL_EXPR_ARG (exp
, 0);
15082 tree arg1
= CALL_EXPR_ARG (exp
, 1);
15083 tree arg2
= CALL_EXPR_ARG (exp
, 2);
15084 rtx op0
= expand_normal (arg0
);
15085 rtx op1
= expand_normal (arg1
);
15086 rtx op2
= expand_normal (arg2
);
15088 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
15089 machine_mode mode1
= Pmode
;
15090 machine_mode mode2
= Pmode
;
15092 /* Invalid arguments. Bail before doing anything stoopid! */
15093 if (arg0
== error_mark_node
15094 || arg1
== error_mark_node
15095 || arg2
== error_mark_node
)
15098 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, tmode
))
15099 op0
= copy_to_mode_reg (tmode
, op0
);
15101 op2
= copy_to_mode_reg (mode2
, op2
);
15103 if (op1
== const0_rtx
)
15105 addr
= gen_rtx_MEM (tmode
, op2
);
15109 op1
= copy_to_mode_reg (mode1
, op1
);
15110 addr
= gen_rtx_MEM (tmode
, gen_rtx_PLUS (Pmode
, op1
, op2
));
15113 pat
= GEN_FCN (icode
) (addr
, op0
);
15120 altivec_expand_stxvl_builtin (enum insn_code icode
, tree exp
)
15123 tree arg0
= CALL_EXPR_ARG (exp
, 0);
15124 tree arg1
= CALL_EXPR_ARG (exp
, 1);
15125 tree arg2
= CALL_EXPR_ARG (exp
, 2);
15126 rtx op0
= expand_normal (arg0
);
15127 rtx op1
= expand_normal (arg1
);
15128 rtx op2
= expand_normal (arg2
);
15129 machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
15130 machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
15131 machine_mode mode2
= insn_data
[icode
].operand
[2].mode
;
15133 if (icode
== CODE_FOR_nothing
)
15134 /* Builtin not supported on this processor. */
15137 /* If we got invalid arguments bail out before generating bad rtl. */
15138 if (arg0
== error_mark_node
15139 || arg1
== error_mark_node
15140 || arg2
== error_mark_node
)
15143 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
15144 op0
= copy_to_mode_reg (mode0
, op0
);
15145 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
15146 op1
= copy_to_mode_reg (mode1
, op1
);
15147 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
15148 op2
= copy_to_mode_reg (mode2
, op2
);
15150 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
15158 altivec_expand_stv_builtin (enum insn_code icode
, tree exp
)
15160 tree arg0
= CALL_EXPR_ARG (exp
, 0);
15161 tree arg1
= CALL_EXPR_ARG (exp
, 1);
15162 tree arg2
= CALL_EXPR_ARG (exp
, 2);
15163 rtx op0
= expand_normal (arg0
);
15164 rtx op1
= expand_normal (arg1
);
15165 rtx op2
= expand_normal (arg2
);
15166 rtx pat
, addr
, rawaddr
;
15167 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
15168 machine_mode smode
= insn_data
[icode
].operand
[1].mode
;
15169 machine_mode mode1
= Pmode
;
15170 machine_mode mode2
= Pmode
;
15172 /* Invalid arguments. Bail before doing anything stoopid! */
15173 if (arg0
== error_mark_node
15174 || arg1
== error_mark_node
15175 || arg2
== error_mark_node
)
15178 op2
= copy_to_mode_reg (mode2
, op2
);
15180 /* For STVX, express the RTL accurately by ANDing the address with -16.
15181 STVXL and STVE*X expand to use UNSPECs to hide their special behavior,
15182 so the raw address is fine. */
15183 if (icode
== CODE_FOR_altivec_stvx_v2df_2op
15184 || icode
== CODE_FOR_altivec_stvx_v2di_2op
15185 || icode
== CODE_FOR_altivec_stvx_v4sf_2op
15186 || icode
== CODE_FOR_altivec_stvx_v4si_2op
15187 || icode
== CODE_FOR_altivec_stvx_v8hi_2op
15188 || icode
== CODE_FOR_altivec_stvx_v16qi_2op
)
15190 if (op1
== const0_rtx
)
15194 op1
= copy_to_mode_reg (mode1
, op1
);
15195 rawaddr
= gen_rtx_PLUS (Pmode
, op2
, op1
);
15198 addr
= gen_rtx_AND (Pmode
, rawaddr
, gen_rtx_CONST_INT (Pmode
, -16));
15199 addr
= gen_rtx_MEM (tmode
, addr
);
15201 op0
= copy_to_mode_reg (tmode
, op0
);
15203 /* For -maltivec=be, emit a permute to swap the elements, followed
15205 if (!BYTES_BIG_ENDIAN
&& VECTOR_ELT_ORDER_BIG
)
15207 rtx temp
= gen_reg_rtx (tmode
);
15208 rtx sel
= swap_selector_for_mode (tmode
);
15209 rtx vperm
= gen_rtx_UNSPEC (tmode
, gen_rtvec (3, op0
, op0
, sel
),
15211 emit_insn (gen_rtx_SET (temp
, vperm
));
15212 emit_insn (gen_rtx_SET (addr
, temp
));
15215 emit_insn (gen_rtx_SET (addr
, op0
));
15219 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, smode
))
15220 op0
= copy_to_mode_reg (smode
, op0
);
15222 if (op1
== const0_rtx
)
15223 addr
= gen_rtx_MEM (tmode
, op2
);
15226 op1
= copy_to_mode_reg (mode1
, op1
);
15227 addr
= gen_rtx_MEM (tmode
, gen_rtx_PLUS (Pmode
, op2
, op1
));
15230 pat
= GEN_FCN (icode
) (addr
, op0
);
15238 /* Return the appropriate SPR number associated with the given builtin. */
15239 static inline HOST_WIDE_INT
15240 htm_spr_num (enum rs6000_builtins code
)
15242 if (code
== HTM_BUILTIN_GET_TFHAR
15243 || code
== HTM_BUILTIN_SET_TFHAR
)
15245 else if (code
== HTM_BUILTIN_GET_TFIAR
15246 || code
== HTM_BUILTIN_SET_TFIAR
)
15248 else if (code
== HTM_BUILTIN_GET_TEXASR
15249 || code
== HTM_BUILTIN_SET_TEXASR
)
15251 gcc_assert (code
== HTM_BUILTIN_GET_TEXASRU
15252 || code
== HTM_BUILTIN_SET_TEXASRU
);
15253 return TEXASRU_SPR
;
15256 /* Return the appropriate SPR regno associated with the given builtin. */
15257 static inline HOST_WIDE_INT
15258 htm_spr_regno (enum rs6000_builtins code
)
15260 if (code
== HTM_BUILTIN_GET_TFHAR
15261 || code
== HTM_BUILTIN_SET_TFHAR
)
15262 return TFHAR_REGNO
;
15263 else if (code
== HTM_BUILTIN_GET_TFIAR
15264 || code
== HTM_BUILTIN_SET_TFIAR
)
15265 return TFIAR_REGNO
;
15266 gcc_assert (code
== HTM_BUILTIN_GET_TEXASR
15267 || code
== HTM_BUILTIN_SET_TEXASR
15268 || code
== HTM_BUILTIN_GET_TEXASRU
15269 || code
== HTM_BUILTIN_SET_TEXASRU
);
15270 return TEXASR_REGNO
;
15273 /* Return the correct ICODE value depending on whether we are
15274 setting or reading the HTM SPRs. */
15275 static inline enum insn_code
15276 rs6000_htm_spr_icode (bool nonvoid
)
15279 return (TARGET_POWERPC64
) ? CODE_FOR_htm_mfspr_di
: CODE_FOR_htm_mfspr_si
;
15281 return (TARGET_POWERPC64
) ? CODE_FOR_htm_mtspr_di
: CODE_FOR_htm_mtspr_si
;
15284 /* Expand the HTM builtin in EXP and store the result in TARGET.
15285 Store true in *EXPANDEDP if we found a builtin to expand. */
15287 htm_expand_builtin (tree exp
, rtx target
, bool * expandedp
)
15289 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
15290 bool nonvoid
= TREE_TYPE (TREE_TYPE (fndecl
)) != void_type_node
;
15291 enum rs6000_builtins fcode
= (enum rs6000_builtins
) DECL_FUNCTION_CODE (fndecl
);
15292 const struct builtin_description
*d
;
15297 if (!TARGET_POWERPC64
15298 && (fcode
== HTM_BUILTIN_TABORTDC
15299 || fcode
== HTM_BUILTIN_TABORTDCI
))
15301 size_t uns_fcode
= (size_t)fcode
;
15302 const char *name
= rs6000_builtin_info
[uns_fcode
].name
;
15303 error ("builtin %s is only valid in 64-bit mode", name
);
15307 /* Expand the HTM builtins. */
15309 for (i
= 0; i
< ARRAY_SIZE (bdesc_htm
); i
++, d
++)
15310 if (d
->code
== fcode
)
15312 rtx op
[MAX_HTM_OPERANDS
], pat
;
15315 call_expr_arg_iterator iter
;
15316 unsigned attr
= rs6000_builtin_info
[fcode
].attr
;
15317 enum insn_code icode
= d
->icode
;
15318 const struct insn_operand_data
*insn_op
;
15319 bool uses_spr
= (attr
& RS6000_BTC_SPR
);
15323 icode
= rs6000_htm_spr_icode (nonvoid
);
15324 insn_op
= &insn_data
[icode
].operand
[0];
15328 machine_mode tmode
= (uses_spr
) ? insn_op
->mode
: SImode
;
15330 || GET_MODE (target
) != tmode
15331 || (uses_spr
&& !(*insn_op
->predicate
) (target
, tmode
)))
15332 target
= gen_reg_rtx (tmode
);
15334 op
[nopnds
++] = target
;
15337 FOR_EACH_CALL_EXPR_ARG (arg
, iter
, exp
)
15339 if (arg
== error_mark_node
|| nopnds
>= MAX_HTM_OPERANDS
)
15342 insn_op
= &insn_data
[icode
].operand
[nopnds
];
15344 op
[nopnds
] = expand_normal (arg
);
15346 if (!(*insn_op
->predicate
) (op
[nopnds
], insn_op
->mode
))
15348 if (!strcmp (insn_op
->constraint
, "n"))
15350 int arg_num
= (nonvoid
) ? nopnds
: nopnds
+ 1;
15351 if (!CONST_INT_P (op
[nopnds
]))
15352 error ("argument %d must be an unsigned literal", arg_num
);
15354 error ("argument %d is an unsigned literal that is "
15355 "out of range", arg_num
);
15358 op
[nopnds
] = copy_to_mode_reg (insn_op
->mode
, op
[nopnds
]);
15364 /* Handle the builtins for extended mnemonics. These accept
15365 no arguments, but map to builtins that take arguments. */
15368 case HTM_BUILTIN_TENDALL
: /* Alias for: tend. 1 */
15369 case HTM_BUILTIN_TRESUME
: /* Alias for: tsr. 1 */
15370 op
[nopnds
++] = GEN_INT (1);
15372 attr
|= RS6000_BTC_UNARY
;
15374 case HTM_BUILTIN_TSUSPEND
: /* Alias for: tsr. 0 */
15375 op
[nopnds
++] = GEN_INT (0);
15377 attr
|= RS6000_BTC_UNARY
;
15383 /* If this builtin accesses SPRs, then pass in the appropriate
15384 SPR number and SPR regno as the last two operands. */
15387 machine_mode mode
= (TARGET_POWERPC64
) ? DImode
: SImode
;
15388 op
[nopnds
++] = gen_rtx_CONST_INT (mode
, htm_spr_num (fcode
));
15389 op
[nopnds
++] = gen_rtx_REG (mode
, htm_spr_regno (fcode
));
15391 /* If this builtin accesses a CR, then pass in a scratch
15392 CR as the last operand. */
15393 else if (attr
& RS6000_BTC_CR
)
15394 { cr
= gen_reg_rtx (CCmode
);
15400 int expected_nopnds
= 0;
15401 if ((attr
& RS6000_BTC_TYPE_MASK
) == RS6000_BTC_UNARY
)
15402 expected_nopnds
= 1;
15403 else if ((attr
& RS6000_BTC_TYPE_MASK
) == RS6000_BTC_BINARY
)
15404 expected_nopnds
= 2;
15405 else if ((attr
& RS6000_BTC_TYPE_MASK
) == RS6000_BTC_TERNARY
)
15406 expected_nopnds
= 3;
15407 if (!(attr
& RS6000_BTC_VOID
))
15408 expected_nopnds
+= 1;
15410 expected_nopnds
+= 2;
15412 gcc_assert (nopnds
== expected_nopnds
15413 && nopnds
<= MAX_HTM_OPERANDS
);
15419 pat
= GEN_FCN (icode
) (op
[0]);
15422 pat
= GEN_FCN (icode
) (op
[0], op
[1]);
15425 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2]);
15428 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3]);
15431 gcc_unreachable ();
15437 if (attr
& RS6000_BTC_CR
)
15439 if (fcode
== HTM_BUILTIN_TBEGIN
)
15441 /* Emit code to set TARGET to true or false depending on
15442 whether the tbegin. instruction successfully or failed
15443 to start a transaction. We do this by placing the 1's
15444 complement of CR's EQ bit into TARGET. */
15445 rtx scratch
= gen_reg_rtx (SImode
);
15446 emit_insn (gen_rtx_SET (scratch
,
15447 gen_rtx_EQ (SImode
, cr
,
15449 emit_insn (gen_rtx_SET (target
,
15450 gen_rtx_XOR (SImode
, scratch
,
15455 /* Emit code to copy the 4-bit condition register field
15456 CR into the least significant end of register TARGET. */
15457 rtx scratch1
= gen_reg_rtx (SImode
);
15458 rtx scratch2
= gen_reg_rtx (SImode
);
15459 rtx subreg
= simplify_gen_subreg (CCmode
, scratch1
, SImode
, 0);
15460 emit_insn (gen_movcc (subreg
, cr
));
15461 emit_insn (gen_lshrsi3 (scratch2
, scratch1
, GEN_INT (28)));
15462 emit_insn (gen_andsi3 (target
, scratch2
, GEN_INT (0xf)));
15471 *expandedp
= false;
15475 /* Expand the CPU builtin in FCODE and store the result in TARGET. */
15478 cpu_expand_builtin (enum rs6000_builtins fcode
, tree exp ATTRIBUTE_UNUSED
,
15481 /* __builtin_cpu_init () is a nop, so expand to nothing. */
15482 if (fcode
== RS6000_BUILTIN_CPU_INIT
)
15485 if (target
== 0 || GET_MODE (target
) != SImode
)
15486 target
= gen_reg_rtx (SImode
);
15488 #ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
15489 tree arg
= TREE_OPERAND (CALL_EXPR_ARG (exp
, 0), 0);
15490 if (TREE_CODE (arg
) != STRING_CST
)
15492 error ("builtin %s only accepts a string argument",
15493 rs6000_builtin_info
[(size_t) fcode
].name
);
15497 if (fcode
== RS6000_BUILTIN_CPU_IS
)
15499 const char *cpu
= TREE_STRING_POINTER (arg
);
15500 rtx cpuid
= NULL_RTX
;
15501 for (size_t i
= 0; i
< ARRAY_SIZE (cpu_is_info
); i
++)
15502 if (strcmp (cpu
, cpu_is_info
[i
].cpu
) == 0)
15504 /* The CPUID value in the TCB is offset by _DL_FIRST_PLATFORM. */
15505 cpuid
= GEN_INT (cpu_is_info
[i
].cpuid
+ _DL_FIRST_PLATFORM
);
15508 if (cpuid
== NULL_RTX
)
15510 /* Invalid CPU argument. */
15511 error ("cpu %s is an invalid argument to builtin %s",
15512 cpu
, rs6000_builtin_info
[(size_t) fcode
].name
);
15516 rtx platform
= gen_reg_rtx (SImode
);
15517 rtx tcbmem
= gen_const_mem (SImode
,
15518 gen_rtx_PLUS (Pmode
,
15519 gen_rtx_REG (Pmode
, TLS_REGNUM
),
15520 GEN_INT (TCB_PLATFORM_OFFSET
)));
15521 emit_move_insn (platform
, tcbmem
);
15522 emit_insn (gen_eqsi3 (target
, platform
, cpuid
));
15524 else if (fcode
== RS6000_BUILTIN_CPU_SUPPORTS
)
15526 const char *hwcap
= TREE_STRING_POINTER (arg
);
15527 rtx mask
= NULL_RTX
;
15529 for (size_t i
= 0; i
< ARRAY_SIZE (cpu_supports_info
); i
++)
15530 if (strcmp (hwcap
, cpu_supports_info
[i
].hwcap
) == 0)
15532 mask
= GEN_INT (cpu_supports_info
[i
].mask
);
15533 hwcap_offset
= TCB_HWCAP_OFFSET (cpu_supports_info
[i
].id
);
15536 if (mask
== NULL_RTX
)
15538 /* Invalid HWCAP argument. */
15539 error ("hwcap %s is an invalid argument to builtin %s",
15540 hwcap
, rs6000_builtin_info
[(size_t) fcode
].name
);
15544 rtx tcb_hwcap
= gen_reg_rtx (SImode
);
15545 rtx tcbmem
= gen_const_mem (SImode
,
15546 gen_rtx_PLUS (Pmode
,
15547 gen_rtx_REG (Pmode
, TLS_REGNUM
),
15548 GEN_INT (hwcap_offset
)));
15549 emit_move_insn (tcb_hwcap
, tcbmem
);
15550 rtx scratch1
= gen_reg_rtx (SImode
);
15551 emit_insn (gen_rtx_SET (scratch1
, gen_rtx_AND (SImode
, tcb_hwcap
, mask
)));
15552 rtx scratch2
= gen_reg_rtx (SImode
);
15553 emit_insn (gen_eqsi3 (scratch2
, scratch1
, const0_rtx
));
15554 emit_insn (gen_rtx_SET (target
, gen_rtx_XOR (SImode
, scratch2
, const1_rtx
)));
15557 /* Record that we have expanded a CPU builtin, so that we can later
15558 emit a reference to the special symbol exported by LIBC to ensure we
15559 do not link against an old LIBC that doesn't support this feature. */
15560 cpu_builtin_p
= true;
15563 /* For old LIBCs, always return FALSE. */
15564 emit_move_insn (target
, GEN_INT (0));
15565 #endif /* TARGET_LIBC_PROVIDES_HWCAP_IN_TCB */
15571 rs6000_expand_ternop_builtin (enum insn_code icode
, tree exp
, rtx target
)
15574 tree arg0
= CALL_EXPR_ARG (exp
, 0);
15575 tree arg1
= CALL_EXPR_ARG (exp
, 1);
15576 tree arg2
= CALL_EXPR_ARG (exp
, 2);
15577 rtx op0
= expand_normal (arg0
);
15578 rtx op1
= expand_normal (arg1
);
15579 rtx op2
= expand_normal (arg2
);
15580 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
15581 machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
15582 machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
15583 machine_mode mode2
= insn_data
[icode
].operand
[3].mode
;
15585 if (icode
== CODE_FOR_nothing
)
15586 /* Builtin not supported on this processor. */
15589 /* If we got invalid arguments bail out before generating bad rtl. */
15590 if (arg0
== error_mark_node
15591 || arg1
== error_mark_node
15592 || arg2
== error_mark_node
)
15595 /* Check and prepare argument depending on the instruction code.
15597 Note that a switch statement instead of the sequence of tests
15598 would be incorrect as many of the CODE_FOR values could be
15599 CODE_FOR_nothing and that would yield multiple alternatives
15600 with identical values. We'd never reach here at runtime in
15602 if (icode
== CODE_FOR_altivec_vsldoi_v4sf
15603 || icode
== CODE_FOR_altivec_vsldoi_v2df
15604 || icode
== CODE_FOR_altivec_vsldoi_v4si
15605 || icode
== CODE_FOR_altivec_vsldoi_v8hi
15606 || icode
== CODE_FOR_altivec_vsldoi_v16qi
)
15608 /* Only allow 4-bit unsigned literals. */
15610 if (TREE_CODE (arg2
) != INTEGER_CST
15611 || TREE_INT_CST_LOW (arg2
) & ~0xf)
15613 error ("argument 3 must be a 4-bit unsigned literal");
15614 return CONST0_RTX (tmode
);
15617 else if (icode
== CODE_FOR_vsx_xxpermdi_v2df
15618 || icode
== CODE_FOR_vsx_xxpermdi_v2di
15619 || icode
== CODE_FOR_vsx_xxpermdi_v2df_be
15620 || icode
== CODE_FOR_vsx_xxpermdi_v2di_be
15621 || icode
== CODE_FOR_vsx_xxpermdi_v1ti
15622 || icode
== CODE_FOR_vsx_xxpermdi_v4sf
15623 || icode
== CODE_FOR_vsx_xxpermdi_v4si
15624 || icode
== CODE_FOR_vsx_xxpermdi_v8hi
15625 || icode
== CODE_FOR_vsx_xxpermdi_v16qi
15626 || icode
== CODE_FOR_vsx_xxsldwi_v16qi
15627 || icode
== CODE_FOR_vsx_xxsldwi_v8hi
15628 || icode
== CODE_FOR_vsx_xxsldwi_v4si
15629 || icode
== CODE_FOR_vsx_xxsldwi_v4sf
15630 || icode
== CODE_FOR_vsx_xxsldwi_v2di
15631 || icode
== CODE_FOR_vsx_xxsldwi_v2df
)
15633 /* Only allow 2-bit unsigned literals. */
15635 if (TREE_CODE (arg2
) != INTEGER_CST
15636 || TREE_INT_CST_LOW (arg2
) & ~0x3)
15638 error ("argument 3 must be a 2-bit unsigned literal");
15639 return CONST0_RTX (tmode
);
15642 else if (icode
== CODE_FOR_vsx_set_v2df
15643 || icode
== CODE_FOR_vsx_set_v2di
15644 || icode
== CODE_FOR_bcdadd
15645 || icode
== CODE_FOR_bcdadd_lt
15646 || icode
== CODE_FOR_bcdadd_eq
15647 || icode
== CODE_FOR_bcdadd_gt
15648 || icode
== CODE_FOR_bcdsub
15649 || icode
== CODE_FOR_bcdsub_lt
15650 || icode
== CODE_FOR_bcdsub_eq
15651 || icode
== CODE_FOR_bcdsub_gt
)
15653 /* Only allow 1-bit unsigned literals. */
15655 if (TREE_CODE (arg2
) != INTEGER_CST
15656 || TREE_INT_CST_LOW (arg2
) & ~0x1)
15658 error ("argument 3 must be a 1-bit unsigned literal");
15659 return CONST0_RTX (tmode
);
15662 else if (icode
== CODE_FOR_dfp_ddedpd_dd
15663 || icode
== CODE_FOR_dfp_ddedpd_td
)
15665 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
15667 if (TREE_CODE (arg0
) != INTEGER_CST
15668 || TREE_INT_CST_LOW (arg2
) & ~0x3)
15670 error ("argument 1 must be 0 or 2");
15671 return CONST0_RTX (tmode
);
15674 else if (icode
== CODE_FOR_dfp_denbcd_dd
15675 || icode
== CODE_FOR_dfp_denbcd_td
)
15677 /* Only allow 1-bit unsigned literals. */
15679 if (TREE_CODE (arg0
) != INTEGER_CST
15680 || TREE_INT_CST_LOW (arg0
) & ~0x1)
15682 error ("argument 1 must be a 1-bit unsigned literal");
15683 return CONST0_RTX (tmode
);
15686 else if (icode
== CODE_FOR_dfp_dscli_dd
15687 || icode
== CODE_FOR_dfp_dscli_td
15688 || icode
== CODE_FOR_dfp_dscri_dd
15689 || icode
== CODE_FOR_dfp_dscri_td
)
15691 /* Only allow 6-bit unsigned literals. */
15693 if (TREE_CODE (arg1
) != INTEGER_CST
15694 || TREE_INT_CST_LOW (arg1
) & ~0x3f)
15696 error ("argument 2 must be a 6-bit unsigned literal");
15697 return CONST0_RTX (tmode
);
15700 else if (icode
== CODE_FOR_crypto_vshasigmaw
15701 || icode
== CODE_FOR_crypto_vshasigmad
)
15703 /* Check whether the 2nd and 3rd arguments are integer constants and in
15704 range and prepare arguments. */
15706 if (TREE_CODE (arg1
) != INTEGER_CST
|| wi::geu_p (arg1
, 2))
15708 error ("argument 2 must be 0 or 1");
15709 return CONST0_RTX (tmode
);
15713 if (TREE_CODE (arg2
) != INTEGER_CST
|| wi::geu_p (arg2
, 16))
15715 error ("argument 3 must be in the range 0..15");
15716 return CONST0_RTX (tmode
);
15721 || GET_MODE (target
) != tmode
15722 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
15723 target
= gen_reg_rtx (tmode
);
15725 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
15726 op0
= copy_to_mode_reg (mode0
, op0
);
15727 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
15728 op1
= copy_to_mode_reg (mode1
, op1
);
15729 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
15730 op2
= copy_to_mode_reg (mode2
, op2
);
15732 if (TARGET_PAIRED_FLOAT
&& icode
== CODE_FOR_selv2sf4
)
15733 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
, CONST0_RTX (SFmode
));
15735 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
15743 /* Expand the lvx builtins. */
15745 altivec_expand_ld_builtin (tree exp
, rtx target
, bool *expandedp
)
15747 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
15748 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
15750 machine_mode tmode
, mode0
;
15752 enum insn_code icode
;
15756 case ALTIVEC_BUILTIN_LD_INTERNAL_16qi
:
15757 icode
= CODE_FOR_vector_altivec_load_v16qi
;
15759 case ALTIVEC_BUILTIN_LD_INTERNAL_8hi
:
15760 icode
= CODE_FOR_vector_altivec_load_v8hi
;
15762 case ALTIVEC_BUILTIN_LD_INTERNAL_4si
:
15763 icode
= CODE_FOR_vector_altivec_load_v4si
;
15765 case ALTIVEC_BUILTIN_LD_INTERNAL_4sf
:
15766 icode
= CODE_FOR_vector_altivec_load_v4sf
;
15768 case ALTIVEC_BUILTIN_LD_INTERNAL_2df
:
15769 icode
= CODE_FOR_vector_altivec_load_v2df
;
15771 case ALTIVEC_BUILTIN_LD_INTERNAL_2di
:
15772 icode
= CODE_FOR_vector_altivec_load_v2di
;
15774 case ALTIVEC_BUILTIN_LD_INTERNAL_1ti
:
15775 icode
= CODE_FOR_vector_altivec_load_v1ti
;
15778 *expandedp
= false;
15784 arg0
= CALL_EXPR_ARG (exp
, 0);
15785 op0
= expand_normal (arg0
);
15786 tmode
= insn_data
[icode
].operand
[0].mode
;
15787 mode0
= insn_data
[icode
].operand
[1].mode
;
15790 || GET_MODE (target
) != tmode
15791 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
15792 target
= gen_reg_rtx (tmode
);
15794 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
15795 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
15797 pat
= GEN_FCN (icode
) (target
, op0
);
15804 /* Expand the stvx builtins. */
15806 altivec_expand_st_builtin (tree exp
, rtx target ATTRIBUTE_UNUSED
,
15809 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
15810 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
15812 machine_mode mode0
, mode1
;
15814 enum insn_code icode
;
15818 case ALTIVEC_BUILTIN_ST_INTERNAL_16qi
:
15819 icode
= CODE_FOR_vector_altivec_store_v16qi
;
15821 case ALTIVEC_BUILTIN_ST_INTERNAL_8hi
:
15822 icode
= CODE_FOR_vector_altivec_store_v8hi
;
15824 case ALTIVEC_BUILTIN_ST_INTERNAL_4si
:
15825 icode
= CODE_FOR_vector_altivec_store_v4si
;
15827 case ALTIVEC_BUILTIN_ST_INTERNAL_4sf
:
15828 icode
= CODE_FOR_vector_altivec_store_v4sf
;
15830 case ALTIVEC_BUILTIN_ST_INTERNAL_2df
:
15831 icode
= CODE_FOR_vector_altivec_store_v2df
;
15833 case ALTIVEC_BUILTIN_ST_INTERNAL_2di
:
15834 icode
= CODE_FOR_vector_altivec_store_v2di
;
15836 case ALTIVEC_BUILTIN_ST_INTERNAL_1ti
:
15837 icode
= CODE_FOR_vector_altivec_store_v1ti
;
15840 *expandedp
= false;
15844 arg0
= CALL_EXPR_ARG (exp
, 0);
15845 arg1
= CALL_EXPR_ARG (exp
, 1);
15846 op0
= expand_normal (arg0
);
15847 op1
= expand_normal (arg1
);
15848 mode0
= insn_data
[icode
].operand
[0].mode
;
15849 mode1
= insn_data
[icode
].operand
[1].mode
;
15851 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
15852 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
15853 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
15854 op1
= copy_to_mode_reg (mode1
, op1
);
15856 pat
= GEN_FCN (icode
) (op0
, op1
);
15864 /* Expand the dst builtins. */
15866 altivec_expand_dst_builtin (tree exp
, rtx target ATTRIBUTE_UNUSED
,
15869 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
15870 enum rs6000_builtins fcode
= (enum rs6000_builtins
) DECL_FUNCTION_CODE (fndecl
);
15871 tree arg0
, arg1
, arg2
;
15872 machine_mode mode0
, mode1
;
15873 rtx pat
, op0
, op1
, op2
;
15874 const struct builtin_description
*d
;
15877 *expandedp
= false;
15879 /* Handle DST variants. */
15881 for (i
= 0; i
< ARRAY_SIZE (bdesc_dst
); i
++, d
++)
15882 if (d
->code
== fcode
)
15884 arg0
= CALL_EXPR_ARG (exp
, 0);
15885 arg1
= CALL_EXPR_ARG (exp
, 1);
15886 arg2
= CALL_EXPR_ARG (exp
, 2);
15887 op0
= expand_normal (arg0
);
15888 op1
= expand_normal (arg1
);
15889 op2
= expand_normal (arg2
);
15890 mode0
= insn_data
[d
->icode
].operand
[0].mode
;
15891 mode1
= insn_data
[d
->icode
].operand
[1].mode
;
15893 /* Invalid arguments, bail out before generating bad rtl. */
15894 if (arg0
== error_mark_node
15895 || arg1
== error_mark_node
15896 || arg2
== error_mark_node
)
15901 if (TREE_CODE (arg2
) != INTEGER_CST
15902 || TREE_INT_CST_LOW (arg2
) & ~0x3)
15904 error ("argument to %qs must be a 2-bit unsigned literal", d
->name
);
15908 if (! (*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
15909 op0
= copy_to_mode_reg (Pmode
, op0
);
15910 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
15911 op1
= copy_to_mode_reg (mode1
, op1
);
15913 pat
= GEN_FCN (d
->icode
) (op0
, op1
, op2
);
15923 /* Expand vec_init builtin. */
15925 altivec_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
15927 machine_mode tmode
= TYPE_MODE (type
);
15928 machine_mode inner_mode
= GET_MODE_INNER (tmode
);
15929 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
15931 gcc_assert (VECTOR_MODE_P (tmode
));
15932 gcc_assert (n_elt
== call_expr_nargs (exp
));
15934 if (!target
|| !register_operand (target
, tmode
))
15935 target
= gen_reg_rtx (tmode
);
15937 /* If we have a vector compromised of a single element, such as V1TImode, do
15938 the initialization directly. */
15939 if (n_elt
== 1 && GET_MODE_SIZE (tmode
) == GET_MODE_SIZE (inner_mode
))
15941 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, 0));
15942 emit_move_insn (target
, gen_lowpart (tmode
, x
));
15946 rtvec v
= rtvec_alloc (n_elt
);
15948 for (i
= 0; i
< n_elt
; ++i
)
15950 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
15951 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
15954 rs6000_expand_vector_init (target
, gen_rtx_PARALLEL (tmode
, v
));
15960 /* Return the integer constant in ARG. Constrain it to be in the range
15961 of the subparts of VEC_TYPE; issue an error if not. */
15964 get_element_number (tree vec_type
, tree arg
)
15966 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
15968 if (!tree_fits_uhwi_p (arg
)
15969 || (elt
= tree_to_uhwi (arg
), elt
> max
))
15971 error ("selector must be an integer constant in the range 0..%wi", max
);
15978 /* Expand vec_set builtin. */
15980 altivec_expand_vec_set_builtin (tree exp
)
15982 machine_mode tmode
, mode1
;
15983 tree arg0
, arg1
, arg2
;
15987 arg0
= CALL_EXPR_ARG (exp
, 0);
15988 arg1
= CALL_EXPR_ARG (exp
, 1);
15989 arg2
= CALL_EXPR_ARG (exp
, 2);
15991 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
15992 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
15993 gcc_assert (VECTOR_MODE_P (tmode
));
15995 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
15996 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
15997 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
15999 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
16000 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
16002 op0
= force_reg (tmode
, op0
);
16003 op1
= force_reg (mode1
, op1
);
16005 rs6000_expand_vector_set (op0
, op1
, elt
);
16010 /* Expand vec_ext builtin. */
16012 altivec_expand_vec_ext_builtin (tree exp
, rtx target
)
16014 machine_mode tmode
, mode0
;
16019 arg0
= CALL_EXPR_ARG (exp
, 0);
16020 arg1
= CALL_EXPR_ARG (exp
, 1);
16022 op0
= expand_normal (arg0
);
16023 op1
= expand_normal (arg1
);
16025 /* Call get_element_number to validate arg1 if it is a constant. */
16026 if (TREE_CODE (arg1
) == INTEGER_CST
)
16027 (void) get_element_number (TREE_TYPE (arg0
), arg1
);
16029 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
16030 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
16031 gcc_assert (VECTOR_MODE_P (mode0
));
16033 op0
= force_reg (mode0
, op0
);
16035 if (optimize
|| !target
|| !register_operand (target
, tmode
))
16036 target
= gen_reg_rtx (tmode
);
16038 rs6000_expand_vector_extract (target
, op0
, op1
);
16043 /* Expand the builtin in EXP and store the result in TARGET. Store
16044 true in *EXPANDEDP if we found a builtin to expand. */
16046 altivec_expand_builtin (tree exp
, rtx target
, bool *expandedp
)
16048 const struct builtin_description
*d
;
16050 enum insn_code icode
;
16051 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
16052 tree arg0
, arg1
, arg2
;
16054 machine_mode tmode
, mode0
;
16055 enum rs6000_builtins fcode
16056 = (enum rs6000_builtins
) DECL_FUNCTION_CODE (fndecl
);
16058 if (rs6000_overloaded_builtin_p (fcode
))
16061 error ("unresolved overload for Altivec builtin %qF", fndecl
);
16063 /* Given it is invalid, just generate a normal call. */
16064 return expand_call (exp
, target
, false);
16067 target
= altivec_expand_ld_builtin (exp
, target
, expandedp
);
16071 target
= altivec_expand_st_builtin (exp
, target
, expandedp
);
16075 target
= altivec_expand_dst_builtin (exp
, target
, expandedp
);
16083 case ALTIVEC_BUILTIN_STVX_V2DF
:
16084 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df_2op
, exp
);
16085 case ALTIVEC_BUILTIN_STVX_V2DI
:
16086 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di_2op
, exp
);
16087 case ALTIVEC_BUILTIN_STVX_V4SF
:
16088 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf_2op
, exp
);
16089 case ALTIVEC_BUILTIN_STVX
:
16090 case ALTIVEC_BUILTIN_STVX_V4SI
:
16091 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si_2op
, exp
);
16092 case ALTIVEC_BUILTIN_STVX_V8HI
:
16093 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi_2op
, exp
);
16094 case ALTIVEC_BUILTIN_STVX_V16QI
:
16095 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi_2op
, exp
);
16096 case ALTIVEC_BUILTIN_STVEBX
:
16097 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx
, exp
);
16098 case ALTIVEC_BUILTIN_STVEHX
:
16099 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx
, exp
);
16100 case ALTIVEC_BUILTIN_STVEWX
:
16101 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx
, exp
);
16102 case ALTIVEC_BUILTIN_STVXL_V2DF
:
16103 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df
, exp
);
16104 case ALTIVEC_BUILTIN_STVXL_V2DI
:
16105 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di
, exp
);
16106 case ALTIVEC_BUILTIN_STVXL_V4SF
:
16107 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf
, exp
);
16108 case ALTIVEC_BUILTIN_STVXL
:
16109 case ALTIVEC_BUILTIN_STVXL_V4SI
:
16110 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si
, exp
);
16111 case ALTIVEC_BUILTIN_STVXL_V8HI
:
16112 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi
, exp
);
16113 case ALTIVEC_BUILTIN_STVXL_V16QI
:
16114 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi
, exp
);
16116 case ALTIVEC_BUILTIN_STVLX
:
16117 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx
, exp
);
16118 case ALTIVEC_BUILTIN_STVLXL
:
16119 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl
, exp
);
16120 case ALTIVEC_BUILTIN_STVRX
:
16121 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx
, exp
);
16122 case ALTIVEC_BUILTIN_STVRXL
:
16123 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl
, exp
);
16125 case P9V_BUILTIN_STXVL
:
16126 return altivec_expand_stxvl_builtin (CODE_FOR_stxvl
, exp
);
16128 case VSX_BUILTIN_STXVD2X_V1TI
:
16129 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti
, exp
);
16130 case VSX_BUILTIN_STXVD2X_V2DF
:
16131 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df
, exp
);
16132 case VSX_BUILTIN_STXVD2X_V2DI
:
16133 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di
, exp
);
16134 case VSX_BUILTIN_STXVW4X_V4SF
:
16135 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf
, exp
);
16136 case VSX_BUILTIN_STXVW4X_V4SI
:
16137 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si
, exp
);
16138 case VSX_BUILTIN_STXVW4X_V8HI
:
16139 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi
, exp
);
16140 case VSX_BUILTIN_STXVW4X_V16QI
:
16141 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi
, exp
);
16143 /* For the following on big endian, it's ok to use any appropriate
16144 unaligned-supporting store, so use a generic expander. For
16145 little-endian, the exact element-reversing instruction must
16147 case VSX_BUILTIN_ST_ELEMREV_V2DF
:
16149 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_store_v2df
16150 : CODE_FOR_vsx_st_elemrev_v2df
);
16151 return altivec_expand_stv_builtin (code
, exp
);
16153 case VSX_BUILTIN_ST_ELEMREV_V2DI
:
16155 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_store_v2di
16156 : CODE_FOR_vsx_st_elemrev_v2di
);
16157 return altivec_expand_stv_builtin (code
, exp
);
16159 case VSX_BUILTIN_ST_ELEMREV_V4SF
:
16161 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_store_v4sf
16162 : CODE_FOR_vsx_st_elemrev_v4sf
);
16163 return altivec_expand_stv_builtin (code
, exp
);
16165 case VSX_BUILTIN_ST_ELEMREV_V4SI
:
16167 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_store_v4si
16168 : CODE_FOR_vsx_st_elemrev_v4si
);
16169 return altivec_expand_stv_builtin (code
, exp
);
16171 case VSX_BUILTIN_ST_ELEMREV_V8HI
:
16173 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_store_v8hi
16174 : CODE_FOR_vsx_st_elemrev_v8hi
);
16175 return altivec_expand_stv_builtin (code
, exp
);
16177 case VSX_BUILTIN_ST_ELEMREV_V16QI
:
16179 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_store_v16qi
16180 : CODE_FOR_vsx_st_elemrev_v16qi
);
16181 return altivec_expand_stv_builtin (code
, exp
);
16184 case ALTIVEC_BUILTIN_MFVSCR
:
16185 icode
= CODE_FOR_altivec_mfvscr
;
16186 tmode
= insn_data
[icode
].operand
[0].mode
;
16189 || GET_MODE (target
) != tmode
16190 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
16191 target
= gen_reg_rtx (tmode
);
16193 pat
= GEN_FCN (icode
) (target
);
16199 case ALTIVEC_BUILTIN_MTVSCR
:
16200 icode
= CODE_FOR_altivec_mtvscr
;
16201 arg0
= CALL_EXPR_ARG (exp
, 0);
16202 op0
= expand_normal (arg0
);
16203 mode0
= insn_data
[icode
].operand
[0].mode
;
16205 /* If we got invalid arguments bail out before generating bad rtl. */
16206 if (arg0
== error_mark_node
)
16209 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
16210 op0
= copy_to_mode_reg (mode0
, op0
);
16212 pat
= GEN_FCN (icode
) (op0
);
16217 case ALTIVEC_BUILTIN_DSSALL
:
16218 emit_insn (gen_altivec_dssall ());
16221 case ALTIVEC_BUILTIN_DSS
:
16222 icode
= CODE_FOR_altivec_dss
;
16223 arg0
= CALL_EXPR_ARG (exp
, 0);
16225 op0
= expand_normal (arg0
);
16226 mode0
= insn_data
[icode
].operand
[0].mode
;
16228 /* If we got invalid arguments bail out before generating bad rtl. */
16229 if (arg0
== error_mark_node
)
16232 if (TREE_CODE (arg0
) != INTEGER_CST
16233 || TREE_INT_CST_LOW (arg0
) & ~0x3)
16235 error ("argument to dss must be a 2-bit unsigned literal");
16239 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
16240 op0
= copy_to_mode_reg (mode0
, op0
);
16242 emit_insn (gen_altivec_dss (op0
));
16245 case ALTIVEC_BUILTIN_VEC_INIT_V4SI
:
16246 case ALTIVEC_BUILTIN_VEC_INIT_V8HI
:
16247 case ALTIVEC_BUILTIN_VEC_INIT_V16QI
:
16248 case ALTIVEC_BUILTIN_VEC_INIT_V4SF
:
16249 case VSX_BUILTIN_VEC_INIT_V2DF
:
16250 case VSX_BUILTIN_VEC_INIT_V2DI
:
16251 case VSX_BUILTIN_VEC_INIT_V1TI
:
16252 return altivec_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
16254 case ALTIVEC_BUILTIN_VEC_SET_V4SI
:
16255 case ALTIVEC_BUILTIN_VEC_SET_V8HI
:
16256 case ALTIVEC_BUILTIN_VEC_SET_V16QI
:
16257 case ALTIVEC_BUILTIN_VEC_SET_V4SF
:
16258 case VSX_BUILTIN_VEC_SET_V2DF
:
16259 case VSX_BUILTIN_VEC_SET_V2DI
:
16260 case VSX_BUILTIN_VEC_SET_V1TI
:
16261 return altivec_expand_vec_set_builtin (exp
);
16263 case ALTIVEC_BUILTIN_VEC_EXT_V4SI
:
16264 case ALTIVEC_BUILTIN_VEC_EXT_V8HI
:
16265 case ALTIVEC_BUILTIN_VEC_EXT_V16QI
:
16266 case ALTIVEC_BUILTIN_VEC_EXT_V4SF
:
16267 case VSX_BUILTIN_VEC_EXT_V2DF
:
16268 case VSX_BUILTIN_VEC_EXT_V2DI
:
16269 case VSX_BUILTIN_VEC_EXT_V1TI
:
16270 return altivec_expand_vec_ext_builtin (exp
, target
);
16272 case P9V_BUILTIN_VEXTRACT4B
:
16273 case P9V_BUILTIN_VEC_VEXTRACT4B
:
16274 arg1
= CALL_EXPR_ARG (exp
, 1);
16277 /* Generate a normal call if it is invalid. */
16278 if (arg1
== error_mark_node
)
16279 return expand_call (exp
, target
, false);
16281 if (TREE_CODE (arg1
) != INTEGER_CST
|| TREE_INT_CST_LOW (arg1
) > 12)
16283 error ("second argument to vec_vextract4b must be 0..12");
16284 return expand_call (exp
, target
, false);
16288 case P9V_BUILTIN_VINSERT4B
:
16289 case P9V_BUILTIN_VINSERT4B_DI
:
16290 case P9V_BUILTIN_VEC_VINSERT4B
:
16291 arg2
= CALL_EXPR_ARG (exp
, 2);
16294 /* Generate a normal call if it is invalid. */
16295 if (arg2
== error_mark_node
)
16296 return expand_call (exp
, target
, false);
16298 if (TREE_CODE (arg2
) != INTEGER_CST
|| TREE_INT_CST_LOW (arg2
) > 12)
16300 error ("third argument to vec_vinsert4b must be 0..12");
16301 return expand_call (exp
, target
, false);
16307 /* Fall through. */
16310 /* Expand abs* operations. */
16312 for (i
= 0; i
< ARRAY_SIZE (bdesc_abs
); i
++, d
++)
16313 if (d
->code
== fcode
)
16314 return altivec_expand_abs_builtin (d
->icode
, exp
, target
);
16316 /* Expand the AltiVec predicates. */
16317 d
= bdesc_altivec_preds
;
16318 for (i
= 0; i
< ARRAY_SIZE (bdesc_altivec_preds
); i
++, d
++)
16319 if (d
->code
== fcode
)
16320 return altivec_expand_predicate_builtin (d
->icode
, exp
, target
);
16322 /* LV* are funky. We initialized them differently. */
16325 case ALTIVEC_BUILTIN_LVSL
:
16326 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl
,
16327 exp
, target
, false);
16328 case ALTIVEC_BUILTIN_LVSR
:
16329 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr
,
16330 exp
, target
, false);
16331 case ALTIVEC_BUILTIN_LVEBX
:
16332 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx
,
16333 exp
, target
, false);
16334 case ALTIVEC_BUILTIN_LVEHX
:
16335 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx
,
16336 exp
, target
, false);
16337 case ALTIVEC_BUILTIN_LVEWX
:
16338 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx
,
16339 exp
, target
, false);
16340 case ALTIVEC_BUILTIN_LVXL_V2DF
:
16341 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df
,
16342 exp
, target
, false);
16343 case ALTIVEC_BUILTIN_LVXL_V2DI
:
16344 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di
,
16345 exp
, target
, false);
16346 case ALTIVEC_BUILTIN_LVXL_V4SF
:
16347 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf
,
16348 exp
, target
, false);
16349 case ALTIVEC_BUILTIN_LVXL
:
16350 case ALTIVEC_BUILTIN_LVXL_V4SI
:
16351 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si
,
16352 exp
, target
, false);
16353 case ALTIVEC_BUILTIN_LVXL_V8HI
:
16354 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi
,
16355 exp
, target
, false);
16356 case ALTIVEC_BUILTIN_LVXL_V16QI
:
16357 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi
,
16358 exp
, target
, false);
16359 case ALTIVEC_BUILTIN_LVX_V2DF
:
16360 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df_2op
,
16361 exp
, target
, false);
16362 case ALTIVEC_BUILTIN_LVX_V2DI
:
16363 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di_2op
,
16364 exp
, target
, false);
16365 case ALTIVEC_BUILTIN_LVX_V4SF
:
16366 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf_2op
,
16367 exp
, target
, false);
16368 case ALTIVEC_BUILTIN_LVX
:
16369 case ALTIVEC_BUILTIN_LVX_V4SI
:
16370 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si_2op
,
16371 exp
, target
, false);
16372 case ALTIVEC_BUILTIN_LVX_V8HI
:
16373 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi_2op
,
16374 exp
, target
, false);
16375 case ALTIVEC_BUILTIN_LVX_V16QI
:
16376 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi_2op
,
16377 exp
, target
, false);
16378 case ALTIVEC_BUILTIN_LVLX
:
16379 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx
,
16380 exp
, target
, true);
16381 case ALTIVEC_BUILTIN_LVLXL
:
16382 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl
,
16383 exp
, target
, true);
16384 case ALTIVEC_BUILTIN_LVRX
:
16385 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx
,
16386 exp
, target
, true);
16387 case ALTIVEC_BUILTIN_LVRXL
:
16388 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl
,
16389 exp
, target
, true);
16390 case VSX_BUILTIN_LXVD2X_V1TI
:
16391 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti
,
16392 exp
, target
, false);
16393 case VSX_BUILTIN_LXVD2X_V2DF
:
16394 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df
,
16395 exp
, target
, false);
16396 case VSX_BUILTIN_LXVD2X_V2DI
:
16397 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di
,
16398 exp
, target
, false);
16399 case VSX_BUILTIN_LXVW4X_V4SF
:
16400 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf
,
16401 exp
, target
, false);
16402 case VSX_BUILTIN_LXVW4X_V4SI
:
16403 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si
,
16404 exp
, target
, false);
16405 case VSX_BUILTIN_LXVW4X_V8HI
:
16406 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi
,
16407 exp
, target
, false);
16408 case VSX_BUILTIN_LXVW4X_V16QI
:
16409 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi
,
16410 exp
, target
, false);
16411 /* For the following on big endian, it's ok to use any appropriate
16412 unaligned-supporting load, so use a generic expander. For
16413 little-endian, the exact element-reversing instruction must
16415 case VSX_BUILTIN_LD_ELEMREV_V2DF
:
16417 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_load_v2df
16418 : CODE_FOR_vsx_ld_elemrev_v2df
);
16419 return altivec_expand_lv_builtin (code
, exp
, target
, false);
16421 case VSX_BUILTIN_LD_ELEMREV_V2DI
:
16423 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_load_v2di
16424 : CODE_FOR_vsx_ld_elemrev_v2di
);
16425 return altivec_expand_lv_builtin (code
, exp
, target
, false);
16427 case VSX_BUILTIN_LD_ELEMREV_V4SF
:
16429 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_load_v4sf
16430 : CODE_FOR_vsx_ld_elemrev_v4sf
);
16431 return altivec_expand_lv_builtin (code
, exp
, target
, false);
16433 case VSX_BUILTIN_LD_ELEMREV_V4SI
:
16435 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_load_v4si
16436 : CODE_FOR_vsx_ld_elemrev_v4si
);
16437 return altivec_expand_lv_builtin (code
, exp
, target
, false);
16439 case VSX_BUILTIN_LD_ELEMREV_V8HI
:
16441 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_load_v8hi
16442 : CODE_FOR_vsx_ld_elemrev_v8hi
);
16443 return altivec_expand_lv_builtin (code
, exp
, target
, false);
16445 case VSX_BUILTIN_LD_ELEMREV_V16QI
:
16447 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_load_v16qi
16448 : CODE_FOR_vsx_ld_elemrev_v16qi
);
16449 return altivec_expand_lv_builtin (code
, exp
, target
, false);
16454 /* Fall through. */
16457 *expandedp
= false;
16461 /* Expand the builtin in EXP and store the result in TARGET. Store
16462 true in *EXPANDEDP if we found a builtin to expand. */
16464 paired_expand_builtin (tree exp
, rtx target
, bool * expandedp
)
16466 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
16467 enum rs6000_builtins fcode
= (enum rs6000_builtins
) DECL_FUNCTION_CODE (fndecl
);
16468 const struct builtin_description
*d
;
16475 case PAIRED_BUILTIN_STX
:
16476 return paired_expand_stv_builtin (CODE_FOR_paired_stx
, exp
);
16477 case PAIRED_BUILTIN_LX
:
16478 return paired_expand_lv_builtin (CODE_FOR_paired_lx
, exp
, target
);
16481 /* Fall through. */
16484 /* Expand the paired predicates. */
16485 d
= bdesc_paired_preds
;
16486 for (i
= 0; i
< ARRAY_SIZE (bdesc_paired_preds
); i
++, d
++)
16487 if (d
->code
== fcode
)
16488 return paired_expand_predicate_builtin (d
->icode
, exp
, target
);
16490 *expandedp
= false;
16494 /* Binops that need to be initialized manually, but can be expanded
16495 automagically by rs6000_expand_binop_builtin. */
16496 static const struct builtin_description bdesc_2arg_spe
[] =
16498 { RS6000_BTM_SPE
, CODE_FOR_spe_evlddx
, "__builtin_spe_evlddx", SPE_BUILTIN_EVLDDX
},
16499 { RS6000_BTM_SPE
, CODE_FOR_spe_evldwx
, "__builtin_spe_evldwx", SPE_BUILTIN_EVLDWX
},
16500 { RS6000_BTM_SPE
, CODE_FOR_spe_evldhx
, "__builtin_spe_evldhx", SPE_BUILTIN_EVLDHX
},
16501 { RS6000_BTM_SPE
, CODE_FOR_spe_evlwhex
, "__builtin_spe_evlwhex", SPE_BUILTIN_EVLWHEX
},
16502 { RS6000_BTM_SPE
, CODE_FOR_spe_evlwhoux
, "__builtin_spe_evlwhoux", SPE_BUILTIN_EVLWHOUX
},
16503 { RS6000_BTM_SPE
, CODE_FOR_spe_evlwhosx
, "__builtin_spe_evlwhosx", SPE_BUILTIN_EVLWHOSX
},
16504 { RS6000_BTM_SPE
, CODE_FOR_spe_evlwwsplatx
, "__builtin_spe_evlwwsplatx", SPE_BUILTIN_EVLWWSPLATX
},
16505 { RS6000_BTM_SPE
, CODE_FOR_spe_evlwhsplatx
, "__builtin_spe_evlwhsplatx", SPE_BUILTIN_EVLWHSPLATX
},
16506 { RS6000_BTM_SPE
, CODE_FOR_spe_evlhhesplatx
, "__builtin_spe_evlhhesplatx", SPE_BUILTIN_EVLHHESPLATX
},
16507 { RS6000_BTM_SPE
, CODE_FOR_spe_evlhhousplatx
, "__builtin_spe_evlhhousplatx", SPE_BUILTIN_EVLHHOUSPLATX
},
16508 { RS6000_BTM_SPE
, CODE_FOR_spe_evlhhossplatx
, "__builtin_spe_evlhhossplatx", SPE_BUILTIN_EVLHHOSSPLATX
},
16509 { RS6000_BTM_SPE
, CODE_FOR_spe_evldd
, "__builtin_spe_evldd", SPE_BUILTIN_EVLDD
},
16510 { RS6000_BTM_SPE
, CODE_FOR_spe_evldw
, "__builtin_spe_evldw", SPE_BUILTIN_EVLDW
},
16511 { RS6000_BTM_SPE
, CODE_FOR_spe_evldh
, "__builtin_spe_evldh", SPE_BUILTIN_EVLDH
},
16512 { RS6000_BTM_SPE
, CODE_FOR_spe_evlwhe
, "__builtin_spe_evlwhe", SPE_BUILTIN_EVLWHE
},
16513 { RS6000_BTM_SPE
, CODE_FOR_spe_evlwhou
, "__builtin_spe_evlwhou", SPE_BUILTIN_EVLWHOU
},
16514 { RS6000_BTM_SPE
, CODE_FOR_spe_evlwhos
, "__builtin_spe_evlwhos", SPE_BUILTIN_EVLWHOS
},
16515 { RS6000_BTM_SPE
, CODE_FOR_spe_evlwwsplat
, "__builtin_spe_evlwwsplat", SPE_BUILTIN_EVLWWSPLAT
},
16516 { RS6000_BTM_SPE
, CODE_FOR_spe_evlwhsplat
, "__builtin_spe_evlwhsplat", SPE_BUILTIN_EVLWHSPLAT
},
16517 { RS6000_BTM_SPE
, CODE_FOR_spe_evlhhesplat
, "__builtin_spe_evlhhesplat", SPE_BUILTIN_EVLHHESPLAT
},
16518 { RS6000_BTM_SPE
, CODE_FOR_spe_evlhhousplat
, "__builtin_spe_evlhhousplat", SPE_BUILTIN_EVLHHOUSPLAT
},
16519 { RS6000_BTM_SPE
, CODE_FOR_spe_evlhhossplat
, "__builtin_spe_evlhhossplat", SPE_BUILTIN_EVLHHOSSPLAT
}
16522 /* Expand the builtin in EXP and store the result in TARGET. Store
16523 true in *EXPANDEDP if we found a builtin to expand.
16525 This expands the SPE builtins that are not simple unary and binary
16528 spe_expand_builtin (tree exp
, rtx target
, bool *expandedp
)
16530 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
16532 enum rs6000_builtins fcode
= (enum rs6000_builtins
) DECL_FUNCTION_CODE (fndecl
);
16533 enum insn_code icode
;
16534 machine_mode tmode
, mode0
;
16536 const struct builtin_description
*d
;
16541 /* Syntax check for a 5-bit unsigned immediate. */
16544 case SPE_BUILTIN_EVSTDD
:
16545 case SPE_BUILTIN_EVSTDH
:
16546 case SPE_BUILTIN_EVSTDW
:
16547 case SPE_BUILTIN_EVSTWHE
:
16548 case SPE_BUILTIN_EVSTWHO
:
16549 case SPE_BUILTIN_EVSTWWE
:
16550 case SPE_BUILTIN_EVSTWWO
:
16551 arg1
= CALL_EXPR_ARG (exp
, 2);
16552 if (TREE_CODE (arg1
) != INTEGER_CST
16553 || TREE_INT_CST_LOW (arg1
) & ~0x1f)
16555 error ("argument 2 must be a 5-bit unsigned literal");
16563 /* The evsplat*i instructions are not quite generic. */
16566 case SPE_BUILTIN_EVSPLATFI
:
16567 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplatfi
,
16569 case SPE_BUILTIN_EVSPLATI
:
16570 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplati
,
16576 d
= bdesc_2arg_spe
;
16577 for (i
= 0; i
< ARRAY_SIZE (bdesc_2arg_spe
); ++i
, ++d
)
16578 if (d
->code
== fcode
)
16579 return rs6000_expand_binop_builtin (d
->icode
, exp
, target
);
16581 d
= bdesc_spe_predicates
;
16582 for (i
= 0; i
< ARRAY_SIZE (bdesc_spe_predicates
); ++i
, ++d
)
16583 if (d
->code
== fcode
)
16584 return spe_expand_predicate_builtin (d
->icode
, exp
, target
);
16586 d
= bdesc_spe_evsel
;
16587 for (i
= 0; i
< ARRAY_SIZE (bdesc_spe_evsel
); ++i
, ++d
)
16588 if (d
->code
== fcode
)
16589 return spe_expand_evsel_builtin (d
->icode
, exp
, target
);
16593 case SPE_BUILTIN_EVSTDDX
:
16594 return spe_expand_stv_builtin (CODE_FOR_spe_evstddx
, exp
);
16595 case SPE_BUILTIN_EVSTDHX
:
16596 return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx
, exp
);
16597 case SPE_BUILTIN_EVSTDWX
:
16598 return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx
, exp
);
16599 case SPE_BUILTIN_EVSTWHEX
:
16600 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex
, exp
);
16601 case SPE_BUILTIN_EVSTWHOX
:
16602 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox
, exp
);
16603 case SPE_BUILTIN_EVSTWWEX
:
16604 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex
, exp
);
16605 case SPE_BUILTIN_EVSTWWOX
:
16606 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox
, exp
);
16607 case SPE_BUILTIN_EVSTDD
:
16608 return spe_expand_stv_builtin (CODE_FOR_spe_evstdd
, exp
);
16609 case SPE_BUILTIN_EVSTDH
:
16610 return spe_expand_stv_builtin (CODE_FOR_spe_evstdh
, exp
);
16611 case SPE_BUILTIN_EVSTDW
:
16612 return spe_expand_stv_builtin (CODE_FOR_spe_evstdw
, exp
);
16613 case SPE_BUILTIN_EVSTWHE
:
16614 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe
, exp
);
16615 case SPE_BUILTIN_EVSTWHO
:
16616 return spe_expand_stv_builtin (CODE_FOR_spe_evstwho
, exp
);
16617 case SPE_BUILTIN_EVSTWWE
:
16618 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe
, exp
);
16619 case SPE_BUILTIN_EVSTWWO
:
16620 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo
, exp
);
16621 case SPE_BUILTIN_MFSPEFSCR
:
16622 icode
= CODE_FOR_spe_mfspefscr
;
16623 tmode
= insn_data
[icode
].operand
[0].mode
;
16626 || GET_MODE (target
) != tmode
16627 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
16628 target
= gen_reg_rtx (tmode
);
16630 pat
= GEN_FCN (icode
) (target
);
16635 case SPE_BUILTIN_MTSPEFSCR
:
16636 icode
= CODE_FOR_spe_mtspefscr
;
16637 arg0
= CALL_EXPR_ARG (exp
, 0);
16638 op0
= expand_normal (arg0
);
16639 mode0
= insn_data
[icode
].operand
[0].mode
;
16641 if (arg0
== error_mark_node
)
16644 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
16645 op0
= copy_to_mode_reg (mode0
, op0
);
16647 pat
= GEN_FCN (icode
) (op0
);
16655 *expandedp
= false;
16660 paired_expand_predicate_builtin (enum insn_code icode
, tree exp
, rtx target
)
16662 rtx pat
, scratch
, tmp
;
16663 tree form
= CALL_EXPR_ARG (exp
, 0);
16664 tree arg0
= CALL_EXPR_ARG (exp
, 1);
16665 tree arg1
= CALL_EXPR_ARG (exp
, 2);
16666 rtx op0
= expand_normal (arg0
);
16667 rtx op1
= expand_normal (arg1
);
16668 machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
16669 machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
16671 enum rtx_code code
;
16673 if (TREE_CODE (form
) != INTEGER_CST
)
16675 error ("argument 1 of __builtin_paired_predicate must be a constant");
16679 form_int
= TREE_INT_CST_LOW (form
);
16681 gcc_assert (mode0
== mode1
);
16683 if (arg0
== error_mark_node
|| arg1
== error_mark_node
)
16687 || GET_MODE (target
) != SImode
16688 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, SImode
))
16689 target
= gen_reg_rtx (SImode
);
16690 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
16691 op0
= copy_to_mode_reg (mode0
, op0
);
16692 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
16693 op1
= copy_to_mode_reg (mode1
, op1
);
16695 scratch
= gen_reg_rtx (CCFPmode
);
16697 pat
= GEN_FCN (icode
) (scratch
, op0
, op1
);
16719 emit_insn (gen_move_from_CR_ov_bit (target
, scratch
));
16722 error ("argument 1 of __builtin_paired_predicate is out of range");
16726 tmp
= gen_rtx_fmt_ee (code
, SImode
, scratch
, const0_rtx
);
16727 emit_move_insn (target
, tmp
);
16732 spe_expand_predicate_builtin (enum insn_code icode
, tree exp
, rtx target
)
16734 rtx pat
, scratch
, tmp
;
16735 tree form
= CALL_EXPR_ARG (exp
, 0);
16736 tree arg0
= CALL_EXPR_ARG (exp
, 1);
16737 tree arg1
= CALL_EXPR_ARG (exp
, 2);
16738 rtx op0
= expand_normal (arg0
);
16739 rtx op1
= expand_normal (arg1
);
16740 machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
16741 machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
16743 enum rtx_code code
;
16745 if (TREE_CODE (form
) != INTEGER_CST
)
16747 error ("argument 1 of __builtin_spe_predicate must be a constant");
16751 form_int
= TREE_INT_CST_LOW (form
);
16753 gcc_assert (mode0
== mode1
);
16755 if (arg0
== error_mark_node
|| arg1
== error_mark_node
)
16759 || GET_MODE (target
) != SImode
16760 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, SImode
))
16761 target
= gen_reg_rtx (SImode
);
16763 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
16764 op0
= copy_to_mode_reg (mode0
, op0
);
16765 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
16766 op1
= copy_to_mode_reg (mode1
, op1
);
16768 scratch
= gen_reg_rtx (CCmode
);
16770 pat
= GEN_FCN (icode
) (scratch
, op0
, op1
);
16775 /* There are 4 variants for each predicate: _any_, _all_, _upper_,
16776 _lower_. We use one compare, but look in different bits of the
16777 CR for each variant.
16779 There are 2 elements in each SPE simd type (upper/lower). The CR
16780 bits are set as follows:
16782 BIT0 | BIT 1 | BIT 2 | BIT 3
16783 U | L | (U | L) | (U & L)
16785 So, for an "all" relationship, BIT 3 would be set.
16786 For an "any" relationship, BIT 2 would be set. Etc.
16788 Following traditional nomenclature, these bits map to:
16790 BIT0 | BIT 1 | BIT 2 | BIT 3
16793 Later, we will generate rtl to look in the LT/EQ/EQ/OV bits.
16798 /* All variant. OV bit. */
16800 /* We need to get to the OV bit, which is the ORDERED bit. We
16801 could generate (ordered:SI (reg:CC xx) (const_int 0)), but
16802 that's ugly and will make validate_condition_mode die.
16803 So let's just use another pattern. */
16804 emit_insn (gen_move_from_CR_ov_bit (target
, scratch
));
16806 /* Any variant. EQ bit. */
16810 /* Upper variant. LT bit. */
16814 /* Lower variant. GT bit. */
16819 error ("argument 1 of __builtin_spe_predicate is out of range");
16823 tmp
= gen_rtx_fmt_ee (code
, SImode
, scratch
, const0_rtx
);
16824 emit_move_insn (target
, tmp
);
16829 /* The evsel builtins look like this:
16831 e = __builtin_spe_evsel_OP (a, b, c, d);
16833 and work like this:
16835 e[upper] = a[upper] *OP* b[upper] ? c[upper] : d[upper];
16836 e[lower] = a[lower] *OP* b[lower] ? c[lower] : d[lower];
16840 spe_expand_evsel_builtin (enum insn_code icode
, tree exp
, rtx target
)
16843 tree arg0
= CALL_EXPR_ARG (exp
, 0);
16844 tree arg1
= CALL_EXPR_ARG (exp
, 1);
16845 tree arg2
= CALL_EXPR_ARG (exp
, 2);
16846 tree arg3
= CALL_EXPR_ARG (exp
, 3);
16847 rtx op0
= expand_normal (arg0
);
16848 rtx op1
= expand_normal (arg1
);
16849 rtx op2
= expand_normal (arg2
);
16850 rtx op3
= expand_normal (arg3
);
16851 machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
16852 machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
16854 gcc_assert (mode0
== mode1
);
16856 if (arg0
== error_mark_node
|| arg1
== error_mark_node
16857 || arg2
== error_mark_node
|| arg3
== error_mark_node
)
16861 || GET_MODE (target
) != mode0
16862 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, mode0
))
16863 target
= gen_reg_rtx (mode0
);
16865 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
16866 op0
= copy_to_mode_reg (mode0
, op0
);
16867 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
16868 op1
= copy_to_mode_reg (mode0
, op1
);
16869 if (! (*insn_data
[icode
].operand
[1].predicate
) (op2
, mode1
))
16870 op2
= copy_to_mode_reg (mode0
, op2
);
16871 if (! (*insn_data
[icode
].operand
[1].predicate
) (op3
, mode1
))
16872 op3
= copy_to_mode_reg (mode0
, op3
);
16874 /* Generate the compare. */
16875 scratch
= gen_reg_rtx (CCmode
);
16876 pat
= GEN_FCN (icode
) (scratch
, op0
, op1
);
16881 if (mode0
== V2SImode
)
16882 emit_insn (gen_spe_evsel (target
, op2
, op3
, scratch
));
16884 emit_insn (gen_spe_evsel_fs (target
, op2
, op3
, scratch
));
16889 /* Raise an error message for a builtin function that is called without the
16890 appropriate target options being set. */
16893 rs6000_invalid_builtin (enum rs6000_builtins fncode
)
16895 size_t uns_fncode
= (size_t)fncode
;
16896 const char *name
= rs6000_builtin_info
[uns_fncode
].name
;
16897 HOST_WIDE_INT fnmask
= rs6000_builtin_info
[uns_fncode
].mask
;
16899 gcc_assert (name
!= NULL
);
16900 if ((fnmask
& RS6000_BTM_CELL
) != 0)
16901 error ("Builtin function %s is only valid for the cell processor", name
);
16902 else if ((fnmask
& RS6000_BTM_VSX
) != 0)
16903 error ("Builtin function %s requires the -mvsx option", name
);
16904 else if ((fnmask
& RS6000_BTM_HTM
) != 0)
16905 error ("Builtin function %s requires the -mhtm option", name
);
16906 else if ((fnmask
& RS6000_BTM_ALTIVEC
) != 0)
16907 error ("Builtin function %s requires the -maltivec option", name
);
16908 else if ((fnmask
& RS6000_BTM_PAIRED
) != 0)
16909 error ("Builtin function %s requires the -mpaired option", name
);
16910 else if ((fnmask
& RS6000_BTM_SPE
) != 0)
16911 error ("Builtin function %s requires the -mspe option", name
);
16912 else if ((fnmask
& (RS6000_BTM_DFP
| RS6000_BTM_P8_VECTOR
))
16913 == (RS6000_BTM_DFP
| RS6000_BTM_P8_VECTOR
))
16914 error ("Builtin function %s requires the -mhard-dfp and"
16915 " -mpower8-vector options", name
);
16916 else if ((fnmask
& RS6000_BTM_DFP
) != 0)
16917 error ("Builtin function %s requires the -mhard-dfp option", name
);
16918 else if ((fnmask
& RS6000_BTM_P8_VECTOR
) != 0)
16919 error ("Builtin function %s requires the -mpower8-vector option", name
);
16920 else if ((fnmask
& (RS6000_BTM_P9_VECTOR
| RS6000_BTM_64BIT
))
16921 == (RS6000_BTM_P9_VECTOR
| RS6000_BTM_64BIT
))
16922 error ("Builtin function %s requires the -mcpu=power9 and"
16923 " -m64 options", name
);
16924 else if ((fnmask
& RS6000_BTM_P9_VECTOR
) != 0)
16925 error ("Builtin function %s requires the -mcpu=power9 option", name
);
16926 else if ((fnmask
& (RS6000_BTM_P9_MISC
| RS6000_BTM_64BIT
))
16927 == (RS6000_BTM_P9_MISC
| RS6000_BTM_64BIT
))
16928 error ("Builtin function %s requires the -mcpu=power9 and"
16929 " -m64 options", name
);
16930 else if ((fnmask
& RS6000_BTM_P9_MISC
) == RS6000_BTM_P9_MISC
)
16931 error ("Builtin function %s requires the -mcpu=power9 option", name
);
16932 else if ((fnmask
& (RS6000_BTM_HARD_FLOAT
| RS6000_BTM_LDBL128
))
16933 == (RS6000_BTM_HARD_FLOAT
| RS6000_BTM_LDBL128
))
16934 error ("Builtin function %s requires the -mhard-float and"
16935 " -mlong-double-128 options", name
);
16936 else if ((fnmask
& RS6000_BTM_HARD_FLOAT
) != 0)
16937 error ("Builtin function %s requires the -mhard-float option", name
);
16938 else if ((fnmask
& RS6000_BTM_FLOAT128
) != 0)
16939 error ("Builtin function %s requires the -mfloat128 option", name
);
16941 error ("Builtin function %s is not supported with the current options",
16945 /* Target hook for early folding of built-ins, shamelessly stolen
16949 rs6000_fold_builtin (tree fndecl
, int n_args ATTRIBUTE_UNUSED
,
16950 tree
*args
, bool ignore ATTRIBUTE_UNUSED
)
16952 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
16954 enum rs6000_builtins fn_code
16955 = (enum rs6000_builtins
) DECL_FUNCTION_CODE (fndecl
);
16958 case RS6000_BUILTIN_NANQ
:
16959 case RS6000_BUILTIN_NANSQ
:
16961 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
16962 const char *str
= c_getstr (*args
);
16963 int quiet
= fn_code
== RS6000_BUILTIN_NANQ
;
16964 REAL_VALUE_TYPE real
;
16966 if (str
&& real_nan (&real
, str
, quiet
, TYPE_MODE (type
)))
16967 return build_real (type
, real
);
16970 case RS6000_BUILTIN_INFQ
:
16971 case RS6000_BUILTIN_HUGE_VALQ
:
16973 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
16974 REAL_VALUE_TYPE inf
;
16976 return build_real (type
, inf
);
16982 #ifdef SUBTARGET_FOLD_BUILTIN
16983 return SUBTARGET_FOLD_BUILTIN (fndecl
, n_args
, args
, ignore
);
16989 /* Fold a machine-dependent built-in in GIMPLE. (For folding into
16990 a constant, use rs6000_fold_builtin.) */
16993 rs6000_gimple_fold_builtin (gimple_stmt_iterator
*gsi
)
16995 gimple
*stmt
= gsi_stmt (*gsi
);
16996 tree fndecl
= gimple_call_fndecl (stmt
);
16997 gcc_checking_assert (fndecl
&& DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
);
16998 enum rs6000_builtins fn_code
16999 = (enum rs6000_builtins
) DECL_FUNCTION_CODE (fndecl
);
17000 tree arg0
, arg1
, lhs
;
17004 /* Flavors of vec_add. We deliberately don't expand
17005 P8V_BUILTIN_VADDUQM as it gets lowered from V1TImode to
17006 TImode, resulting in much poorer code generation. */
17007 case ALTIVEC_BUILTIN_VADDUBM
:
17008 case ALTIVEC_BUILTIN_VADDUHM
:
17009 case ALTIVEC_BUILTIN_VADDUWM
:
17010 case P8V_BUILTIN_VADDUDM
:
17011 case ALTIVEC_BUILTIN_VADDFP
:
17012 case VSX_BUILTIN_XVADDDP
:
17014 arg0
= gimple_call_arg (stmt
, 0);
17015 arg1
= gimple_call_arg (stmt
, 1);
17016 lhs
= gimple_call_lhs (stmt
);
17017 gimple
*g
= gimple_build_assign (lhs
, PLUS_EXPR
, arg0
, arg1
);
17018 gimple_set_location (g
, gimple_location (stmt
));
17019 gsi_replace (gsi
, g
, true);
17022 /* Flavors of vec_sub. We deliberately don't expand
17023 P8V_BUILTIN_VSUBUQM. */
17024 case ALTIVEC_BUILTIN_VSUBUBM
:
17025 case ALTIVEC_BUILTIN_VSUBUHM
:
17026 case ALTIVEC_BUILTIN_VSUBUWM
:
17027 case P8V_BUILTIN_VSUBUDM
:
17028 case ALTIVEC_BUILTIN_VSUBFP
:
17029 case VSX_BUILTIN_XVSUBDP
:
17031 arg0
= gimple_call_arg (stmt
, 0);
17032 arg1
= gimple_call_arg (stmt
, 1);
17033 lhs
= gimple_call_lhs (stmt
);
17034 gimple
*g
= gimple_build_assign (lhs
, MINUS_EXPR
, arg0
, arg1
);
17035 gimple_set_location (g
, gimple_location (stmt
));
17036 gsi_replace (gsi
, g
, true);
17039 /* Even element flavors of vec_mul (signed). */
17040 case ALTIVEC_BUILTIN_VMULESB
:
17041 case ALTIVEC_BUILTIN_VMULESH
:
17042 /* Even element flavors of vec_mul (unsigned). */
17043 case ALTIVEC_BUILTIN_VMULEUB
:
17044 case ALTIVEC_BUILTIN_VMULEUH
:
17046 arg0
= gimple_call_arg (stmt
, 0);
17047 arg1
= gimple_call_arg (stmt
, 1);
17048 lhs
= gimple_call_lhs (stmt
);
17049 gimple
*g
= gimple_build_assign (lhs
, VEC_WIDEN_MULT_EVEN_EXPR
, arg0
, arg1
);
17050 gimple_set_location (g
, gimple_location (stmt
));
17051 gsi_replace (gsi
, g
, true);
17054 /* Odd element flavors of vec_mul (signed). */
17055 case ALTIVEC_BUILTIN_VMULOSB
:
17056 case ALTIVEC_BUILTIN_VMULOSH
:
17057 /* Odd element flavors of vec_mul (unsigned). */
17058 case ALTIVEC_BUILTIN_VMULOUB
:
17059 case ALTIVEC_BUILTIN_VMULOUH
:
17061 arg0
= gimple_call_arg (stmt
, 0);
17062 arg1
= gimple_call_arg (stmt
, 1);
17063 lhs
= gimple_call_lhs (stmt
);
17064 gimple
*g
= gimple_build_assign (lhs
, VEC_WIDEN_MULT_ODD_EXPR
, arg0
, arg1
);
17065 gimple_set_location (g
, gimple_location (stmt
));
17066 gsi_replace (gsi
, g
, true);
17077 /* Expand an expression EXP that calls a built-in function,
17078 with result going to TARGET if that's convenient
17079 (and in mode MODE if that's convenient).
17080 SUBTARGET may be used as the target for computing one of EXP's operands.
17081 IGNORE is nonzero if the value is to be ignored. */
17084 rs6000_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
17085 machine_mode mode ATTRIBUTE_UNUSED
,
17086 int ignore ATTRIBUTE_UNUSED
)
17088 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
17089 enum rs6000_builtins fcode
17090 = (enum rs6000_builtins
)DECL_FUNCTION_CODE (fndecl
);
17091 size_t uns_fcode
= (size_t)fcode
;
17092 const struct builtin_description
*d
;
17096 HOST_WIDE_INT mask
= rs6000_builtin_info
[uns_fcode
].mask
;
17097 bool func_valid_p
= ((rs6000_builtin_mask
& mask
) == mask
);
17099 if (TARGET_DEBUG_BUILTIN
)
17101 enum insn_code icode
= rs6000_builtin_info
[uns_fcode
].icode
;
17102 const char *name1
= rs6000_builtin_info
[uns_fcode
].name
;
17103 const char *name2
= ((icode
!= CODE_FOR_nothing
)
17104 ? get_insn_name ((int)icode
)
17108 switch (rs6000_builtin_info
[uns_fcode
].attr
& RS6000_BTC_TYPE_MASK
)
17110 default: name3
= "unknown"; break;
17111 case RS6000_BTC_SPECIAL
: name3
= "special"; break;
17112 case RS6000_BTC_UNARY
: name3
= "unary"; break;
17113 case RS6000_BTC_BINARY
: name3
= "binary"; break;
17114 case RS6000_BTC_TERNARY
: name3
= "ternary"; break;
17115 case RS6000_BTC_PREDICATE
: name3
= "predicate"; break;
17116 case RS6000_BTC_ABS
: name3
= "abs"; break;
17117 case RS6000_BTC_EVSEL
: name3
= "evsel"; break;
17118 case RS6000_BTC_DST
: name3
= "dst"; break;
17123 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
17124 (name1
) ? name1
: "---", fcode
,
17125 (name2
) ? name2
: "---", (int)icode
,
17127 func_valid_p
? "" : ", not valid");
17132 rs6000_invalid_builtin (fcode
);
17134 /* Given it is invalid, just generate a normal call. */
17135 return expand_call (exp
, target
, ignore
);
17140 case RS6000_BUILTIN_RECIP
:
17141 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3
, exp
, target
);
17143 case RS6000_BUILTIN_RECIPF
:
17144 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3
, exp
, target
);
17146 case RS6000_BUILTIN_RSQRTF
:
17147 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2
, exp
, target
);
17149 case RS6000_BUILTIN_RSQRT
:
17150 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2
, exp
, target
);
17152 case POWER7_BUILTIN_BPERMD
:
17153 return rs6000_expand_binop_builtin (((TARGET_64BIT
)
17154 ? CODE_FOR_bpermd_di
17155 : CODE_FOR_bpermd_si
), exp
, target
);
17157 case RS6000_BUILTIN_GET_TB
:
17158 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase
,
17161 case RS6000_BUILTIN_MFTB
:
17162 return rs6000_expand_zeroop_builtin (((TARGET_64BIT
)
17163 ? CODE_FOR_rs6000_mftb_di
17164 : CODE_FOR_rs6000_mftb_si
),
17167 case RS6000_BUILTIN_MFFS
:
17168 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs
, target
);
17170 case RS6000_BUILTIN_MTFSF
:
17171 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf
, exp
);
17173 case RS6000_BUILTIN_CPU_INIT
:
17174 case RS6000_BUILTIN_CPU_IS
:
17175 case RS6000_BUILTIN_CPU_SUPPORTS
:
17176 return cpu_expand_builtin (fcode
, exp
, target
);
17178 case ALTIVEC_BUILTIN_MASK_FOR_LOAD
:
17179 case ALTIVEC_BUILTIN_MASK_FOR_STORE
:
17181 int icode
= (BYTES_BIG_ENDIAN
? (int) CODE_FOR_altivec_lvsr_direct
17182 : (int) CODE_FOR_altivec_lvsl_direct
);
17183 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17184 machine_mode mode
= insn_data
[icode
].operand
[1].mode
;
17188 gcc_assert (TARGET_ALTIVEC
);
17190 arg
= CALL_EXPR_ARG (exp
, 0);
17191 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg
)));
17192 op
= expand_expr (arg
, NULL_RTX
, Pmode
, EXPAND_NORMAL
);
17193 addr
= memory_address (mode
, op
);
17194 if (fcode
== ALTIVEC_BUILTIN_MASK_FOR_STORE
)
17198 /* For the load case need to negate the address. */
17199 op
= gen_reg_rtx (GET_MODE (addr
));
17200 emit_insn (gen_rtx_SET (op
, gen_rtx_NEG (GET_MODE (addr
), addr
)));
17202 op
= gen_rtx_MEM (mode
, op
);
17205 || GET_MODE (target
) != tmode
17206 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17207 target
= gen_reg_rtx (tmode
);
17209 pat
= GEN_FCN (icode
) (target
, op
);
17217 case ALTIVEC_BUILTIN_VCFUX
:
17218 case ALTIVEC_BUILTIN_VCFSX
:
17219 case ALTIVEC_BUILTIN_VCTUXS
:
17220 case ALTIVEC_BUILTIN_VCTSXS
:
17221 /* FIXME: There's got to be a nicer way to handle this case than
17222 constructing a new CALL_EXPR. */
17223 if (call_expr_nargs (exp
) == 1)
17225 exp
= build_call_nary (TREE_TYPE (exp
), CALL_EXPR_FN (exp
),
17226 2, CALL_EXPR_ARG (exp
, 0), integer_zero_node
);
17234 if (TARGET_ALTIVEC
)
17236 ret
= altivec_expand_builtin (exp
, target
, &success
);
17243 ret
= spe_expand_builtin (exp
, target
, &success
);
17248 if (TARGET_PAIRED_FLOAT
)
17250 ret
= paired_expand_builtin (exp
, target
, &success
);
17257 ret
= htm_expand_builtin (exp
, target
, &success
);
17263 unsigned attr
= rs6000_builtin_info
[uns_fcode
].attr
& RS6000_BTC_TYPE_MASK
;
17264 /* RS6000_BTC_SPECIAL represents no-operand operators. */
17265 gcc_assert (attr
== RS6000_BTC_UNARY
17266 || attr
== RS6000_BTC_BINARY
17267 || attr
== RS6000_BTC_TERNARY
17268 || attr
== RS6000_BTC_SPECIAL
);
17270 /* Handle simple unary operations. */
17272 for (i
= 0; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
17273 if (d
->code
== fcode
)
17274 return rs6000_expand_unop_builtin (d
->icode
, exp
, target
);
17276 /* Handle simple binary operations. */
17278 for (i
= 0; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
17279 if (d
->code
== fcode
)
17280 return rs6000_expand_binop_builtin (d
->icode
, exp
, target
);
17282 /* Handle simple ternary operations. */
17284 for (i
= 0; i
< ARRAY_SIZE (bdesc_3arg
); i
++, d
++)
17285 if (d
->code
== fcode
)
17286 return rs6000_expand_ternop_builtin (d
->icode
, exp
, target
);
17288 /* Handle simple no-argument operations. */
17290 for (i
= 0; i
< ARRAY_SIZE (bdesc_0arg
); i
++, d
++)
17291 if (d
->code
== fcode
)
17292 return rs6000_expand_zeroop_builtin (d
->icode
, target
);
17294 gcc_unreachable ();
17297 /* Create a builtin vector type with a name. Taking care not to give
17298 the canonical type a name. */
17301 rs6000_vector_type (const char *name
, tree elt_type
, unsigned num_elts
)
17303 tree result
= build_vector_type (elt_type
, num_elts
);
17305 /* Copy so we don't give the canonical type a name. */
17306 result
= build_variant_type_copy (result
);
17308 add_builtin_type (name
, result
);
17314 rs6000_init_builtins (void)
17320 if (TARGET_DEBUG_BUILTIN
)
17321 fprintf (stderr
, "rs6000_init_builtins%s%s%s%s\n",
17322 (TARGET_PAIRED_FLOAT
) ? ", paired" : "",
17323 (TARGET_SPE
) ? ", spe" : "",
17324 (TARGET_ALTIVEC
) ? ", altivec" : "",
17325 (TARGET_VSX
) ? ", vsx" : "");
17327 V2SI_type_node
= build_vector_type (intSI_type_node
, 2);
17328 V2SF_type_node
= build_vector_type (float_type_node
, 2);
17329 V2DI_type_node
= rs6000_vector_type (TARGET_POWERPC64
? "__vector long"
17330 : "__vector long long",
17331 intDI_type_node
, 2);
17332 V2DF_type_node
= rs6000_vector_type ("__vector double", double_type_node
, 2);
17333 V4HI_type_node
= build_vector_type (intHI_type_node
, 4);
17334 V4SI_type_node
= rs6000_vector_type ("__vector signed int",
17335 intSI_type_node
, 4);
17336 V4SF_type_node
= rs6000_vector_type ("__vector float", float_type_node
, 4);
17337 V8HI_type_node
= rs6000_vector_type ("__vector signed short",
17338 intHI_type_node
, 8);
17339 V16QI_type_node
= rs6000_vector_type ("__vector signed char",
17340 intQI_type_node
, 16);
17342 unsigned_V16QI_type_node
= rs6000_vector_type ("__vector unsigned char",
17343 unsigned_intQI_type_node
, 16);
17344 unsigned_V8HI_type_node
= rs6000_vector_type ("__vector unsigned short",
17345 unsigned_intHI_type_node
, 8);
17346 unsigned_V4SI_type_node
= rs6000_vector_type ("__vector unsigned int",
17347 unsigned_intSI_type_node
, 4);
17348 unsigned_V2DI_type_node
= rs6000_vector_type (TARGET_POWERPC64
17349 ? "__vector unsigned long"
17350 : "__vector unsigned long long",
17351 unsigned_intDI_type_node
, 2);
17353 opaque_V2SF_type_node
= build_opaque_vector_type (float_type_node
, 2);
17354 opaque_V2SI_type_node
= build_opaque_vector_type (intSI_type_node
, 2);
17355 opaque_p_V2SI_type_node
= build_pointer_type (opaque_V2SI_type_node
);
17356 opaque_V4SI_type_node
= build_opaque_vector_type (intSI_type_node
, 4);
17358 const_str_type_node
17359 = build_pointer_type (build_qualified_type (char_type_node
,
17362 /* We use V1TI mode as a special container to hold __int128_t items that
17363 must live in VSX registers. */
17364 if (intTI_type_node
)
17366 V1TI_type_node
= rs6000_vector_type ("__vector __int128",
17367 intTI_type_node
, 1);
17368 unsigned_V1TI_type_node
17369 = rs6000_vector_type ("__vector unsigned __int128",
17370 unsigned_intTI_type_node
, 1);
17373 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
17374 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
17375 'vector unsigned short'. */
17377 bool_char_type_node
= build_distinct_type_copy (unsigned_intQI_type_node
);
17378 bool_short_type_node
= build_distinct_type_copy (unsigned_intHI_type_node
);
17379 bool_int_type_node
= build_distinct_type_copy (unsigned_intSI_type_node
);
17380 bool_long_type_node
= build_distinct_type_copy (unsigned_intDI_type_node
);
17381 pixel_type_node
= build_distinct_type_copy (unsigned_intHI_type_node
);
17383 long_integer_type_internal_node
= long_integer_type_node
;
17384 long_unsigned_type_internal_node
= long_unsigned_type_node
;
17385 long_long_integer_type_internal_node
= long_long_integer_type_node
;
17386 long_long_unsigned_type_internal_node
= long_long_unsigned_type_node
;
17387 intQI_type_internal_node
= intQI_type_node
;
17388 uintQI_type_internal_node
= unsigned_intQI_type_node
;
17389 intHI_type_internal_node
= intHI_type_node
;
17390 uintHI_type_internal_node
= unsigned_intHI_type_node
;
17391 intSI_type_internal_node
= intSI_type_node
;
17392 uintSI_type_internal_node
= unsigned_intSI_type_node
;
17393 intDI_type_internal_node
= intDI_type_node
;
17394 uintDI_type_internal_node
= unsigned_intDI_type_node
;
17395 intTI_type_internal_node
= intTI_type_node
;
17396 uintTI_type_internal_node
= unsigned_intTI_type_node
;
17397 float_type_internal_node
= float_type_node
;
17398 double_type_internal_node
= double_type_node
;
17399 long_double_type_internal_node
= long_double_type_node
;
17400 dfloat64_type_internal_node
= dfloat64_type_node
;
17401 dfloat128_type_internal_node
= dfloat128_type_node
;
17402 void_type_internal_node
= void_type_node
;
17404 /* 128-bit floating point support. KFmode is IEEE 128-bit floating point.
17405 IFmode is the IBM extended 128-bit format that is a pair of doubles.
17406 TFmode will be either IEEE 128-bit floating point or the IBM double-double
17407 format that uses a pair of doubles, depending on the switches and
17410 We do not enable the actual __float128 keyword unless the user explicitly
17411 asks for it, because the library support is not yet complete.
17413 If we don't support for either 128-bit IBM double double or IEEE 128-bit
17414 floating point, we need make sure the type is non-zero or else self-test
17415 fails during bootstrap.
17417 We don't register a built-in type for __ibm128 if the type is the same as
17418 long double. Instead we add a #define for __ibm128 in
17419 rs6000_cpu_cpp_builtins to long double. */
17420 if (TARGET_LONG_DOUBLE_128
&& FLOAT128_IEEE_P (TFmode
))
17422 ibm128_float_type_node
= make_node (REAL_TYPE
);
17423 TYPE_PRECISION (ibm128_float_type_node
) = 128;
17424 SET_TYPE_MODE (ibm128_float_type_node
, IFmode
);
17425 layout_type (ibm128_float_type_node
);
17427 lang_hooks
.types
.register_builtin_type (ibm128_float_type_node
,
17431 ibm128_float_type_node
= long_double_type_node
;
17433 if (TARGET_FLOAT128_KEYWORD
)
17435 ieee128_float_type_node
= float128_type_node
;
17436 lang_hooks
.types
.register_builtin_type (ieee128_float_type_node
,
17440 else if (TARGET_FLOAT128_TYPE
)
17442 ieee128_float_type_node
= make_node (REAL_TYPE
);
17443 TYPE_PRECISION (ibm128_float_type_node
) = 128;
17444 SET_TYPE_MODE (ieee128_float_type_node
, KFmode
);
17445 layout_type (ieee128_float_type_node
);
17447 /* If we are not exporting the __float128/_Float128 keywords, we need a
17448 keyword to get the types created. Use __ieee128 as the dummy
17450 lang_hooks
.types
.register_builtin_type (ieee128_float_type_node
,
17455 ieee128_float_type_node
= long_double_type_node
;
17457 /* Initialize the modes for builtin_function_type, mapping a machine mode to
17459 builtin_mode_to_type
[QImode
][0] = integer_type_node
;
17460 builtin_mode_to_type
[HImode
][0] = integer_type_node
;
17461 builtin_mode_to_type
[SImode
][0] = intSI_type_node
;
17462 builtin_mode_to_type
[SImode
][1] = unsigned_intSI_type_node
;
17463 builtin_mode_to_type
[DImode
][0] = intDI_type_node
;
17464 builtin_mode_to_type
[DImode
][1] = unsigned_intDI_type_node
;
17465 builtin_mode_to_type
[TImode
][0] = intTI_type_node
;
17466 builtin_mode_to_type
[TImode
][1] = unsigned_intTI_type_node
;
17467 builtin_mode_to_type
[SFmode
][0] = float_type_node
;
17468 builtin_mode_to_type
[DFmode
][0] = double_type_node
;
17469 builtin_mode_to_type
[IFmode
][0] = ibm128_float_type_node
;
17470 builtin_mode_to_type
[KFmode
][0] = ieee128_float_type_node
;
17471 builtin_mode_to_type
[TFmode
][0] = long_double_type_node
;
17472 builtin_mode_to_type
[DDmode
][0] = dfloat64_type_node
;
17473 builtin_mode_to_type
[TDmode
][0] = dfloat128_type_node
;
17474 builtin_mode_to_type
[V1TImode
][0] = V1TI_type_node
;
17475 builtin_mode_to_type
[V1TImode
][1] = unsigned_V1TI_type_node
;
17476 builtin_mode_to_type
[V2SImode
][0] = V2SI_type_node
;
17477 builtin_mode_to_type
[V2SFmode
][0] = V2SF_type_node
;
17478 builtin_mode_to_type
[V2DImode
][0] = V2DI_type_node
;
17479 builtin_mode_to_type
[V2DImode
][1] = unsigned_V2DI_type_node
;
17480 builtin_mode_to_type
[V2DFmode
][0] = V2DF_type_node
;
17481 builtin_mode_to_type
[V4HImode
][0] = V4HI_type_node
;
17482 builtin_mode_to_type
[V4SImode
][0] = V4SI_type_node
;
17483 builtin_mode_to_type
[V4SImode
][1] = unsigned_V4SI_type_node
;
17484 builtin_mode_to_type
[V4SFmode
][0] = V4SF_type_node
;
17485 builtin_mode_to_type
[V8HImode
][0] = V8HI_type_node
;
17486 builtin_mode_to_type
[V8HImode
][1] = unsigned_V8HI_type_node
;
17487 builtin_mode_to_type
[V16QImode
][0] = V16QI_type_node
;
17488 builtin_mode_to_type
[V16QImode
][1] = unsigned_V16QI_type_node
;
17490 tdecl
= add_builtin_type ("__bool char", bool_char_type_node
);
17491 TYPE_NAME (bool_char_type_node
) = tdecl
;
17493 tdecl
= add_builtin_type ("__bool short", bool_short_type_node
);
17494 TYPE_NAME (bool_short_type_node
) = tdecl
;
17496 tdecl
= add_builtin_type ("__bool int", bool_int_type_node
);
17497 TYPE_NAME (bool_int_type_node
) = tdecl
;
17499 tdecl
= add_builtin_type ("__pixel", pixel_type_node
);
17500 TYPE_NAME (pixel_type_node
) = tdecl
;
17502 bool_V16QI_type_node
= rs6000_vector_type ("__vector __bool char",
17503 bool_char_type_node
, 16);
17504 bool_V8HI_type_node
= rs6000_vector_type ("__vector __bool short",
17505 bool_short_type_node
, 8);
17506 bool_V4SI_type_node
= rs6000_vector_type ("__vector __bool int",
17507 bool_int_type_node
, 4);
17508 bool_V2DI_type_node
= rs6000_vector_type (TARGET_POWERPC64
17509 ? "__vector __bool long"
17510 : "__vector __bool long long",
17511 bool_long_type_node
, 2);
17512 pixel_V8HI_type_node
= rs6000_vector_type ("__vector __pixel",
17513 pixel_type_node
, 8);
17515 /* Paired and SPE builtins are only available if you build a compiler with
17516 the appropriate options, so only create those builtins with the
17517 appropriate compiler option. Create Altivec and VSX builtins on machines
17518 with at least the general purpose extensions (970 and newer) to allow the
17519 use of the target attribute. */
17520 if (TARGET_PAIRED_FLOAT
)
17521 paired_init_builtins ();
17523 spe_init_builtins ();
17524 if (TARGET_EXTRA_BUILTINS
)
17525 altivec_init_builtins ();
17527 htm_init_builtins ();
17529 if (TARGET_EXTRA_BUILTINS
|| TARGET_SPE
|| TARGET_PAIRED_FLOAT
)
17530 rs6000_common_init_builtins ();
17532 ftype
= build_function_type_list (ieee128_float_type_node
,
17533 const_str_type_node
, NULL_TREE
);
17534 def_builtin ("__builtin_nanq", ftype
, RS6000_BUILTIN_NANQ
);
17535 def_builtin ("__builtin_nansq", ftype
, RS6000_BUILTIN_NANSQ
);
17537 ftype
= build_function_type_list (ieee128_float_type_node
, NULL_TREE
);
17538 def_builtin ("__builtin_infq", ftype
, RS6000_BUILTIN_INFQ
);
17539 def_builtin ("__builtin_huge_valq", ftype
, RS6000_BUILTIN_HUGE_VALQ
);
17541 ftype
= builtin_function_type (DFmode
, DFmode
, DFmode
, VOIDmode
,
17542 RS6000_BUILTIN_RECIP
, "__builtin_recipdiv");
17543 def_builtin ("__builtin_recipdiv", ftype
, RS6000_BUILTIN_RECIP
);
17545 ftype
= builtin_function_type (SFmode
, SFmode
, SFmode
, VOIDmode
,
17546 RS6000_BUILTIN_RECIPF
, "__builtin_recipdivf");
17547 def_builtin ("__builtin_recipdivf", ftype
, RS6000_BUILTIN_RECIPF
);
17549 ftype
= builtin_function_type (DFmode
, DFmode
, VOIDmode
, VOIDmode
,
17550 RS6000_BUILTIN_RSQRT
, "__builtin_rsqrt");
17551 def_builtin ("__builtin_rsqrt", ftype
, RS6000_BUILTIN_RSQRT
);
17553 ftype
= builtin_function_type (SFmode
, SFmode
, VOIDmode
, VOIDmode
,
17554 RS6000_BUILTIN_RSQRTF
, "__builtin_rsqrtf");
17555 def_builtin ("__builtin_rsqrtf", ftype
, RS6000_BUILTIN_RSQRTF
);
17557 mode
= (TARGET_64BIT
) ? DImode
: SImode
;
17558 ftype
= builtin_function_type (mode
, mode
, mode
, VOIDmode
,
17559 POWER7_BUILTIN_BPERMD
, "__builtin_bpermd");
17560 def_builtin ("__builtin_bpermd", ftype
, POWER7_BUILTIN_BPERMD
);
17562 ftype
= build_function_type_list (unsigned_intDI_type_node
,
17564 def_builtin ("__builtin_ppc_get_timebase", ftype
, RS6000_BUILTIN_GET_TB
);
17567 ftype
= build_function_type_list (unsigned_intDI_type_node
,
17570 ftype
= build_function_type_list (unsigned_intSI_type_node
,
17572 def_builtin ("__builtin_ppc_mftb", ftype
, RS6000_BUILTIN_MFTB
);
17574 ftype
= build_function_type_list (double_type_node
, NULL_TREE
);
17575 def_builtin ("__builtin_mffs", ftype
, RS6000_BUILTIN_MFFS
);
17577 ftype
= build_function_type_list (void_type_node
,
17578 intSI_type_node
, double_type_node
,
17580 def_builtin ("__builtin_mtfsf", ftype
, RS6000_BUILTIN_MTFSF
);
17582 ftype
= build_function_type_list (void_type_node
, NULL_TREE
);
17583 def_builtin ("__builtin_cpu_init", ftype
, RS6000_BUILTIN_CPU_INIT
);
17585 ftype
= build_function_type_list (bool_int_type_node
, const_ptr_type_node
,
17587 def_builtin ("__builtin_cpu_is", ftype
, RS6000_BUILTIN_CPU_IS
);
17588 def_builtin ("__builtin_cpu_supports", ftype
, RS6000_BUILTIN_CPU_SUPPORTS
);
17590 /* AIX libm provides clog as __clog. */
17591 if (TARGET_XCOFF
&&
17592 (tdecl
= builtin_decl_explicit (BUILT_IN_CLOG
)) != NULL_TREE
)
17593 set_user_assembler_name (tdecl
, "__clog");
17595 #ifdef SUBTARGET_INIT_BUILTINS
17596 SUBTARGET_INIT_BUILTINS
;
17600 /* Returns the rs6000 builtin decl for CODE. */
17603 rs6000_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
17605 HOST_WIDE_INT fnmask
;
17607 if (code
>= RS6000_BUILTIN_COUNT
)
17608 return error_mark_node
;
17610 fnmask
= rs6000_builtin_info
[code
].mask
;
17611 if ((fnmask
& rs6000_builtin_mask
) != fnmask
)
17613 rs6000_invalid_builtin ((enum rs6000_builtins
)code
);
17614 return error_mark_node
;
17617 return rs6000_builtin_decls
[code
];
17621 spe_init_builtins (void)
17623 tree puint_type_node
= build_pointer_type (unsigned_type_node
);
17624 tree pushort_type_node
= build_pointer_type (short_unsigned_type_node
);
17625 const struct builtin_description
*d
;
17627 HOST_WIDE_INT builtin_mask
= rs6000_builtin_mask
;
17629 tree v2si_ftype_4_v2si
17630 = build_function_type_list (opaque_V2SI_type_node
,
17631 opaque_V2SI_type_node
,
17632 opaque_V2SI_type_node
,
17633 opaque_V2SI_type_node
,
17634 opaque_V2SI_type_node
,
17637 tree v2sf_ftype_4_v2sf
17638 = build_function_type_list (opaque_V2SF_type_node
,
17639 opaque_V2SF_type_node
,
17640 opaque_V2SF_type_node
,
17641 opaque_V2SF_type_node
,
17642 opaque_V2SF_type_node
,
17645 tree int_ftype_int_v2si_v2si
17646 = build_function_type_list (integer_type_node
,
17648 opaque_V2SI_type_node
,
17649 opaque_V2SI_type_node
,
17652 tree int_ftype_int_v2sf_v2sf
17653 = build_function_type_list (integer_type_node
,
17655 opaque_V2SF_type_node
,
17656 opaque_V2SF_type_node
,
17659 tree void_ftype_v2si_puint_int
17660 = build_function_type_list (void_type_node
,
17661 opaque_V2SI_type_node
,
17666 tree void_ftype_v2si_puint_char
17667 = build_function_type_list (void_type_node
,
17668 opaque_V2SI_type_node
,
17673 tree void_ftype_v2si_pv2si_int
17674 = build_function_type_list (void_type_node
,
17675 opaque_V2SI_type_node
,
17676 opaque_p_V2SI_type_node
,
17680 tree void_ftype_v2si_pv2si_char
17681 = build_function_type_list (void_type_node
,
17682 opaque_V2SI_type_node
,
17683 opaque_p_V2SI_type_node
,
17687 tree void_ftype_int
17688 = build_function_type_list (void_type_node
, integer_type_node
, NULL_TREE
);
17690 tree int_ftype_void
17691 = build_function_type_list (integer_type_node
, NULL_TREE
);
17693 tree v2si_ftype_pv2si_int
17694 = build_function_type_list (opaque_V2SI_type_node
,
17695 opaque_p_V2SI_type_node
,
17699 tree v2si_ftype_puint_int
17700 = build_function_type_list (opaque_V2SI_type_node
,
17705 tree v2si_ftype_pushort_int
17706 = build_function_type_list (opaque_V2SI_type_node
,
17711 tree v2si_ftype_signed_char
17712 = build_function_type_list (opaque_V2SI_type_node
,
17713 signed_char_type_node
,
17716 add_builtin_type ("__ev64_opaque__", opaque_V2SI_type_node
);
17718 /* Initialize irregular SPE builtins. */
17720 def_builtin ("__builtin_spe_mtspefscr", void_ftype_int
, SPE_BUILTIN_MTSPEFSCR
);
17721 def_builtin ("__builtin_spe_mfspefscr", int_ftype_void
, SPE_BUILTIN_MFSPEFSCR
);
17722 def_builtin ("__builtin_spe_evstddx", void_ftype_v2si_pv2si_int
, SPE_BUILTIN_EVSTDDX
);
17723 def_builtin ("__builtin_spe_evstdhx", void_ftype_v2si_pv2si_int
, SPE_BUILTIN_EVSTDHX
);
17724 def_builtin ("__builtin_spe_evstdwx", void_ftype_v2si_pv2si_int
, SPE_BUILTIN_EVSTDWX
);
17725 def_builtin ("__builtin_spe_evstwhex", void_ftype_v2si_puint_int
, SPE_BUILTIN_EVSTWHEX
);
17726 def_builtin ("__builtin_spe_evstwhox", void_ftype_v2si_puint_int
, SPE_BUILTIN_EVSTWHOX
);
17727 def_builtin ("__builtin_spe_evstwwex", void_ftype_v2si_puint_int
, SPE_BUILTIN_EVSTWWEX
);
17728 def_builtin ("__builtin_spe_evstwwox", void_ftype_v2si_puint_int
, SPE_BUILTIN_EVSTWWOX
);
17729 def_builtin ("__builtin_spe_evstdd", void_ftype_v2si_pv2si_char
, SPE_BUILTIN_EVSTDD
);
17730 def_builtin ("__builtin_spe_evstdh", void_ftype_v2si_pv2si_char
, SPE_BUILTIN_EVSTDH
);
17731 def_builtin ("__builtin_spe_evstdw", void_ftype_v2si_pv2si_char
, SPE_BUILTIN_EVSTDW
);
17732 def_builtin ("__builtin_spe_evstwhe", void_ftype_v2si_puint_char
, SPE_BUILTIN_EVSTWHE
);
17733 def_builtin ("__builtin_spe_evstwho", void_ftype_v2si_puint_char
, SPE_BUILTIN_EVSTWHO
);
17734 def_builtin ("__builtin_spe_evstwwe", void_ftype_v2si_puint_char
, SPE_BUILTIN_EVSTWWE
);
17735 def_builtin ("__builtin_spe_evstwwo", void_ftype_v2si_puint_char
, SPE_BUILTIN_EVSTWWO
);
17736 def_builtin ("__builtin_spe_evsplatfi", v2si_ftype_signed_char
, SPE_BUILTIN_EVSPLATFI
);
17737 def_builtin ("__builtin_spe_evsplati", v2si_ftype_signed_char
, SPE_BUILTIN_EVSPLATI
);
17740 def_builtin ("__builtin_spe_evlddx", v2si_ftype_pv2si_int
, SPE_BUILTIN_EVLDDX
);
17741 def_builtin ("__builtin_spe_evldwx", v2si_ftype_pv2si_int
, SPE_BUILTIN_EVLDWX
);
17742 def_builtin ("__builtin_spe_evldhx", v2si_ftype_pv2si_int
, SPE_BUILTIN_EVLDHX
);
17743 def_builtin ("__builtin_spe_evlwhex", v2si_ftype_puint_int
, SPE_BUILTIN_EVLWHEX
);
17744 def_builtin ("__builtin_spe_evlwhoux", v2si_ftype_puint_int
, SPE_BUILTIN_EVLWHOUX
);
17745 def_builtin ("__builtin_spe_evlwhosx", v2si_ftype_puint_int
, SPE_BUILTIN_EVLWHOSX
);
17746 def_builtin ("__builtin_spe_evlwwsplatx", v2si_ftype_puint_int
, SPE_BUILTIN_EVLWWSPLATX
);
17747 def_builtin ("__builtin_spe_evlwhsplatx", v2si_ftype_puint_int
, SPE_BUILTIN_EVLWHSPLATX
);
17748 def_builtin ("__builtin_spe_evlhhesplatx", v2si_ftype_pushort_int
, SPE_BUILTIN_EVLHHESPLATX
);
17749 def_builtin ("__builtin_spe_evlhhousplatx", v2si_ftype_pushort_int
, SPE_BUILTIN_EVLHHOUSPLATX
);
17750 def_builtin ("__builtin_spe_evlhhossplatx", v2si_ftype_pushort_int
, SPE_BUILTIN_EVLHHOSSPLATX
);
17751 def_builtin ("__builtin_spe_evldd", v2si_ftype_pv2si_int
, SPE_BUILTIN_EVLDD
);
17752 def_builtin ("__builtin_spe_evldw", v2si_ftype_pv2si_int
, SPE_BUILTIN_EVLDW
);
17753 def_builtin ("__builtin_spe_evldh", v2si_ftype_pv2si_int
, SPE_BUILTIN_EVLDH
);
17754 def_builtin ("__builtin_spe_evlhhesplat", v2si_ftype_pushort_int
, SPE_BUILTIN_EVLHHESPLAT
);
17755 def_builtin ("__builtin_spe_evlhhossplat", v2si_ftype_pushort_int
, SPE_BUILTIN_EVLHHOSSPLAT
);
17756 def_builtin ("__builtin_spe_evlhhousplat", v2si_ftype_pushort_int
, SPE_BUILTIN_EVLHHOUSPLAT
);
17757 def_builtin ("__builtin_spe_evlwhe", v2si_ftype_puint_int
, SPE_BUILTIN_EVLWHE
);
17758 def_builtin ("__builtin_spe_evlwhos", v2si_ftype_puint_int
, SPE_BUILTIN_EVLWHOS
);
17759 def_builtin ("__builtin_spe_evlwhou", v2si_ftype_puint_int
, SPE_BUILTIN_EVLWHOU
);
17760 def_builtin ("__builtin_spe_evlwhsplat", v2si_ftype_puint_int
, SPE_BUILTIN_EVLWHSPLAT
);
17761 def_builtin ("__builtin_spe_evlwwsplat", v2si_ftype_puint_int
, SPE_BUILTIN_EVLWWSPLAT
);
17764 d
= bdesc_spe_predicates
;
17765 for (i
= 0; i
< ARRAY_SIZE (bdesc_spe_predicates
); ++i
, d
++)
17768 HOST_WIDE_INT mask
= d
->mask
;
17770 if ((mask
& builtin_mask
) != mask
)
17772 if (TARGET_DEBUG_BUILTIN
)
17773 fprintf (stderr
, "spe_init_builtins, skip predicate %s\n",
17778 /* Cannot define builtin if the instruction is disabled. */
17779 gcc_assert (d
->icode
!= CODE_FOR_nothing
);
17780 switch (insn_data
[d
->icode
].operand
[1].mode
)
17783 type
= int_ftype_int_v2si_v2si
;
17786 type
= int_ftype_int_v2sf_v2sf
;
17789 gcc_unreachable ();
17792 def_builtin (d
->name
, type
, d
->code
);
17795 /* Evsel predicates. */
17796 d
= bdesc_spe_evsel
;
17797 for (i
= 0; i
< ARRAY_SIZE (bdesc_spe_evsel
); ++i
, d
++)
17800 HOST_WIDE_INT mask
= d
->mask
;
17802 if ((mask
& builtin_mask
) != mask
)
17804 if (TARGET_DEBUG_BUILTIN
)
17805 fprintf (stderr
, "spe_init_builtins, skip evsel %s\n",
17810 /* Cannot define builtin if the instruction is disabled. */
17811 gcc_assert (d
->icode
!= CODE_FOR_nothing
);
17812 switch (insn_data
[d
->icode
].operand
[1].mode
)
17815 type
= v2si_ftype_4_v2si
;
17818 type
= v2sf_ftype_4_v2sf
;
17821 gcc_unreachable ();
17824 def_builtin (d
->name
, type
, d
->code
);
17829 paired_init_builtins (void)
17831 const struct builtin_description
*d
;
17833 HOST_WIDE_INT builtin_mask
= rs6000_builtin_mask
;
17835 tree int_ftype_int_v2sf_v2sf
17836 = build_function_type_list (integer_type_node
,
17841 tree pcfloat_type_node
=
17842 build_pointer_type (build_qualified_type
17843 (float_type_node
, TYPE_QUAL_CONST
));
17845 tree v2sf_ftype_long_pcfloat
= build_function_type_list (V2SF_type_node
,
17846 long_integer_type_node
,
17849 tree void_ftype_v2sf_long_pcfloat
=
17850 build_function_type_list (void_type_node
,
17852 long_integer_type_node
,
17857 def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat
,
17858 PAIRED_BUILTIN_LX
);
17861 def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat
,
17862 PAIRED_BUILTIN_STX
);
17865 d
= bdesc_paired_preds
;
17866 for (i
= 0; i
< ARRAY_SIZE (bdesc_paired_preds
); ++i
, d
++)
17869 HOST_WIDE_INT mask
= d
->mask
;
17871 if ((mask
& builtin_mask
) != mask
)
17873 if (TARGET_DEBUG_BUILTIN
)
17874 fprintf (stderr
, "paired_init_builtins, skip predicate %s\n",
17879 /* Cannot define builtin if the instruction is disabled. */
17880 gcc_assert (d
->icode
!= CODE_FOR_nothing
);
17882 if (TARGET_DEBUG_BUILTIN
)
17883 fprintf (stderr
, "paired pred #%d, insn = %s [%d], mode = %s\n",
17884 (int)i
, get_insn_name (d
->icode
), (int)d
->icode
,
17885 GET_MODE_NAME (insn_data
[d
->icode
].operand
[1].mode
));
17887 switch (insn_data
[d
->icode
].operand
[1].mode
)
17890 type
= int_ftype_int_v2sf_v2sf
;
17893 gcc_unreachable ();
17896 def_builtin (d
->name
, type
, d
->code
);
17901 altivec_init_builtins (void)
17903 const struct builtin_description
*d
;
17907 HOST_WIDE_INT builtin_mask
= rs6000_builtin_mask
;
17909 tree pvoid_type_node
= build_pointer_type (void_type_node
);
17911 tree pcvoid_type_node
17912 = build_pointer_type (build_qualified_type (void_type_node
,
17915 tree int_ftype_opaque
17916 = build_function_type_list (integer_type_node
,
17917 opaque_V4SI_type_node
, NULL_TREE
);
17918 tree opaque_ftype_opaque
17919 = build_function_type_list (integer_type_node
, NULL_TREE
);
17920 tree opaque_ftype_opaque_int
17921 = build_function_type_list (opaque_V4SI_type_node
,
17922 opaque_V4SI_type_node
, integer_type_node
, NULL_TREE
);
17923 tree opaque_ftype_opaque_opaque_int
17924 = build_function_type_list (opaque_V4SI_type_node
,
17925 opaque_V4SI_type_node
, opaque_V4SI_type_node
,
17926 integer_type_node
, NULL_TREE
);
17927 tree opaque_ftype_opaque_opaque_opaque
17928 = build_function_type_list (opaque_V4SI_type_node
,
17929 opaque_V4SI_type_node
, opaque_V4SI_type_node
,
17930 opaque_V4SI_type_node
, NULL_TREE
);
17931 tree opaque_ftype_opaque_opaque
17932 = build_function_type_list (opaque_V4SI_type_node
,
17933 opaque_V4SI_type_node
, opaque_V4SI_type_node
,
17935 tree int_ftype_int_opaque_opaque
17936 = build_function_type_list (integer_type_node
,
17937 integer_type_node
, opaque_V4SI_type_node
,
17938 opaque_V4SI_type_node
, NULL_TREE
);
17939 tree int_ftype_int_v4si_v4si
17940 = build_function_type_list (integer_type_node
,
17941 integer_type_node
, V4SI_type_node
,
17942 V4SI_type_node
, NULL_TREE
);
17943 tree int_ftype_int_v2di_v2di
17944 = build_function_type_list (integer_type_node
,
17945 integer_type_node
, V2DI_type_node
,
17946 V2DI_type_node
, NULL_TREE
);
17947 tree void_ftype_v4si
17948 = build_function_type_list (void_type_node
, V4SI_type_node
, NULL_TREE
);
17949 tree v8hi_ftype_void
17950 = build_function_type_list (V8HI_type_node
, NULL_TREE
);
17951 tree void_ftype_void
17952 = build_function_type_list (void_type_node
, NULL_TREE
);
17953 tree void_ftype_int
17954 = build_function_type_list (void_type_node
, integer_type_node
, NULL_TREE
);
17956 tree opaque_ftype_long_pcvoid
17957 = build_function_type_list (opaque_V4SI_type_node
,
17958 long_integer_type_node
, pcvoid_type_node
,
17960 tree v16qi_ftype_long_pcvoid
17961 = build_function_type_list (V16QI_type_node
,
17962 long_integer_type_node
, pcvoid_type_node
,
17964 tree v8hi_ftype_long_pcvoid
17965 = build_function_type_list (V8HI_type_node
,
17966 long_integer_type_node
, pcvoid_type_node
,
17968 tree v4si_ftype_long_pcvoid
17969 = build_function_type_list (V4SI_type_node
,
17970 long_integer_type_node
, pcvoid_type_node
,
17972 tree v4sf_ftype_long_pcvoid
17973 = build_function_type_list (V4SF_type_node
,
17974 long_integer_type_node
, pcvoid_type_node
,
17976 tree v2df_ftype_long_pcvoid
17977 = build_function_type_list (V2DF_type_node
,
17978 long_integer_type_node
, pcvoid_type_node
,
17980 tree v2di_ftype_long_pcvoid
17981 = build_function_type_list (V2DI_type_node
,
17982 long_integer_type_node
, pcvoid_type_node
,
17985 tree void_ftype_opaque_long_pvoid
17986 = build_function_type_list (void_type_node
,
17987 opaque_V4SI_type_node
, long_integer_type_node
,
17988 pvoid_type_node
, NULL_TREE
);
17989 tree void_ftype_v4si_long_pvoid
17990 = build_function_type_list (void_type_node
,
17991 V4SI_type_node
, long_integer_type_node
,
17992 pvoid_type_node
, NULL_TREE
);
17993 tree void_ftype_v16qi_long_pvoid
17994 = build_function_type_list (void_type_node
,
17995 V16QI_type_node
, long_integer_type_node
,
17996 pvoid_type_node
, NULL_TREE
);
17998 tree void_ftype_v16qi_pvoid_long
17999 = build_function_type_list (void_type_node
,
18000 V16QI_type_node
, pvoid_type_node
,
18001 long_integer_type_node
, NULL_TREE
);
18003 tree void_ftype_v8hi_long_pvoid
18004 = build_function_type_list (void_type_node
,
18005 V8HI_type_node
, long_integer_type_node
,
18006 pvoid_type_node
, NULL_TREE
);
18007 tree void_ftype_v4sf_long_pvoid
18008 = build_function_type_list (void_type_node
,
18009 V4SF_type_node
, long_integer_type_node
,
18010 pvoid_type_node
, NULL_TREE
);
18011 tree void_ftype_v2df_long_pvoid
18012 = build_function_type_list (void_type_node
,
18013 V2DF_type_node
, long_integer_type_node
,
18014 pvoid_type_node
, NULL_TREE
);
18015 tree void_ftype_v2di_long_pvoid
18016 = build_function_type_list (void_type_node
,
18017 V2DI_type_node
, long_integer_type_node
,
18018 pvoid_type_node
, NULL_TREE
);
18019 tree int_ftype_int_v8hi_v8hi
18020 = build_function_type_list (integer_type_node
,
18021 integer_type_node
, V8HI_type_node
,
18022 V8HI_type_node
, NULL_TREE
);
18023 tree int_ftype_int_v16qi_v16qi
18024 = build_function_type_list (integer_type_node
,
18025 integer_type_node
, V16QI_type_node
,
18026 V16QI_type_node
, NULL_TREE
);
18027 tree int_ftype_int_v4sf_v4sf
18028 = build_function_type_list (integer_type_node
,
18029 integer_type_node
, V4SF_type_node
,
18030 V4SF_type_node
, NULL_TREE
);
18031 tree int_ftype_int_v2df_v2df
18032 = build_function_type_list (integer_type_node
,
18033 integer_type_node
, V2DF_type_node
,
18034 V2DF_type_node
, NULL_TREE
);
18035 tree v2di_ftype_v2di
18036 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
18037 tree v4si_ftype_v4si
18038 = build_function_type_list (V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
18039 tree v8hi_ftype_v8hi
18040 = build_function_type_list (V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
18041 tree v16qi_ftype_v16qi
18042 = build_function_type_list (V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
18043 tree v4sf_ftype_v4sf
18044 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
18045 tree v2df_ftype_v2df
18046 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
18047 tree void_ftype_pcvoid_int_int
18048 = build_function_type_list (void_type_node
,
18049 pcvoid_type_node
, integer_type_node
,
18050 integer_type_node
, NULL_TREE
);
18052 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si
, ALTIVEC_BUILTIN_MTVSCR
);
18053 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void
, ALTIVEC_BUILTIN_MFVSCR
);
18054 def_builtin ("__builtin_altivec_dssall", void_ftype_void
, ALTIVEC_BUILTIN_DSSALL
);
18055 def_builtin ("__builtin_altivec_dss", void_ftype_int
, ALTIVEC_BUILTIN_DSS
);
18056 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVSL
);
18057 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVSR
);
18058 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVEBX
);
18059 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVEHX
);
18060 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVEWX
);
18061 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVXL
);
18062 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid
,
18063 ALTIVEC_BUILTIN_LVXL_V2DF
);
18064 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid
,
18065 ALTIVEC_BUILTIN_LVXL_V2DI
);
18066 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid
,
18067 ALTIVEC_BUILTIN_LVXL_V4SF
);
18068 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid
,
18069 ALTIVEC_BUILTIN_LVXL_V4SI
);
18070 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid
,
18071 ALTIVEC_BUILTIN_LVXL_V8HI
);
18072 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid
,
18073 ALTIVEC_BUILTIN_LVXL_V16QI
);
18074 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVX
);
18075 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid
,
18076 ALTIVEC_BUILTIN_LVX_V2DF
);
18077 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid
,
18078 ALTIVEC_BUILTIN_LVX_V2DI
);
18079 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid
,
18080 ALTIVEC_BUILTIN_LVX_V4SF
);
18081 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid
,
18082 ALTIVEC_BUILTIN_LVX_V4SI
);
18083 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid
,
18084 ALTIVEC_BUILTIN_LVX_V8HI
);
18085 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid
,
18086 ALTIVEC_BUILTIN_LVX_V16QI
);
18087 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid
, ALTIVEC_BUILTIN_STVX
);
18088 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid
,
18089 ALTIVEC_BUILTIN_STVX_V2DF
);
18090 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid
,
18091 ALTIVEC_BUILTIN_STVX_V2DI
);
18092 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid
,
18093 ALTIVEC_BUILTIN_STVX_V4SF
);
18094 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid
,
18095 ALTIVEC_BUILTIN_STVX_V4SI
);
18096 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid
,
18097 ALTIVEC_BUILTIN_STVX_V8HI
);
18098 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid
,
18099 ALTIVEC_BUILTIN_STVX_V16QI
);
18100 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid
, ALTIVEC_BUILTIN_STVEWX
);
18101 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid
, ALTIVEC_BUILTIN_STVXL
);
18102 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid
,
18103 ALTIVEC_BUILTIN_STVXL_V2DF
);
18104 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid
,
18105 ALTIVEC_BUILTIN_STVXL_V2DI
);
18106 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid
,
18107 ALTIVEC_BUILTIN_STVXL_V4SF
);
18108 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid
,
18109 ALTIVEC_BUILTIN_STVXL_V4SI
);
18110 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid
,
18111 ALTIVEC_BUILTIN_STVXL_V8HI
);
18112 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid
,
18113 ALTIVEC_BUILTIN_STVXL_V16QI
);
18114 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid
, ALTIVEC_BUILTIN_STVEBX
);
18115 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid
, ALTIVEC_BUILTIN_STVEHX
);
18116 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LD
);
18117 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LDE
);
18118 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LDL
);
18119 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LVSL
);
18120 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LVSR
);
18121 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LVEBX
);
18122 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LVEHX
);
18123 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LVEWX
);
18124 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid
, ALTIVEC_BUILTIN_VEC_ST
);
18125 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid
, ALTIVEC_BUILTIN_VEC_STE
);
18126 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid
, ALTIVEC_BUILTIN_VEC_STL
);
18127 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid
, ALTIVEC_BUILTIN_VEC_STVEWX
);
18128 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid
, ALTIVEC_BUILTIN_VEC_STVEBX
);
18129 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid
, ALTIVEC_BUILTIN_VEC_STVEHX
);
18131 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid
,
18132 VSX_BUILTIN_LXVD2X_V2DF
);
18133 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid
,
18134 VSX_BUILTIN_LXVD2X_V2DI
);
18135 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid
,
18136 VSX_BUILTIN_LXVW4X_V4SF
);
18137 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid
,
18138 VSX_BUILTIN_LXVW4X_V4SI
);
18139 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid
,
18140 VSX_BUILTIN_LXVW4X_V8HI
);
18141 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid
,
18142 VSX_BUILTIN_LXVW4X_V16QI
);
18143 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid
,
18144 VSX_BUILTIN_STXVD2X_V2DF
);
18145 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid
,
18146 VSX_BUILTIN_STXVD2X_V2DI
);
18147 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid
,
18148 VSX_BUILTIN_STXVW4X_V4SF
);
18149 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid
,
18150 VSX_BUILTIN_STXVW4X_V4SI
);
18151 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid
,
18152 VSX_BUILTIN_STXVW4X_V8HI
);
18153 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid
,
18154 VSX_BUILTIN_STXVW4X_V16QI
);
18156 def_builtin ("__builtin_vsx_ld_elemrev_v2df", v2df_ftype_long_pcvoid
,
18157 VSX_BUILTIN_LD_ELEMREV_V2DF
);
18158 def_builtin ("__builtin_vsx_ld_elemrev_v2di", v2di_ftype_long_pcvoid
,
18159 VSX_BUILTIN_LD_ELEMREV_V2DI
);
18160 def_builtin ("__builtin_vsx_ld_elemrev_v4sf", v4sf_ftype_long_pcvoid
,
18161 VSX_BUILTIN_LD_ELEMREV_V4SF
);
18162 def_builtin ("__builtin_vsx_ld_elemrev_v4si", v4si_ftype_long_pcvoid
,
18163 VSX_BUILTIN_LD_ELEMREV_V4SI
);
18164 def_builtin ("__builtin_vsx_st_elemrev_v2df", void_ftype_v2df_long_pvoid
,
18165 VSX_BUILTIN_ST_ELEMREV_V2DF
);
18166 def_builtin ("__builtin_vsx_st_elemrev_v2di", void_ftype_v2di_long_pvoid
,
18167 VSX_BUILTIN_ST_ELEMREV_V2DI
);
18168 def_builtin ("__builtin_vsx_st_elemrev_v4sf", void_ftype_v4sf_long_pvoid
,
18169 VSX_BUILTIN_ST_ELEMREV_V4SF
);
18170 def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid
,
18171 VSX_BUILTIN_ST_ELEMREV_V4SI
);
18173 if (TARGET_P9_VECTOR
)
18175 def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid
,
18176 VSX_BUILTIN_LD_ELEMREV_V8HI
);
18177 def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid
,
18178 VSX_BUILTIN_LD_ELEMREV_V16QI
);
18179 def_builtin ("__builtin_vsx_st_elemrev_v8hi",
18180 void_ftype_v8hi_long_pvoid
, VSX_BUILTIN_ST_ELEMREV_V8HI
);
18181 def_builtin ("__builtin_vsx_st_elemrev_v16qi",
18182 void_ftype_v16qi_long_pvoid
, VSX_BUILTIN_ST_ELEMREV_V16QI
);
18185 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid
,
18186 VSX_BUILTIN_VEC_LD
);
18187 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid
,
18188 VSX_BUILTIN_VEC_ST
);
18189 def_builtin ("__builtin_vec_xl", opaque_ftype_long_pcvoid
,
18190 VSX_BUILTIN_VEC_XL
);
18191 def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid
,
18192 VSX_BUILTIN_VEC_XST
);
18194 def_builtin ("__builtin_vec_step", int_ftype_opaque
, ALTIVEC_BUILTIN_VEC_STEP
);
18195 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque
, ALTIVEC_BUILTIN_VEC_SPLATS
);
18196 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque
, ALTIVEC_BUILTIN_VEC_PROMOTE
);
18198 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int
, ALTIVEC_BUILTIN_VEC_SLD
);
18199 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int
, ALTIVEC_BUILTIN_VEC_SPLAT
);
18200 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int
, ALTIVEC_BUILTIN_VEC_EXTRACT
);
18201 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int
, ALTIVEC_BUILTIN_VEC_INSERT
);
18202 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int
, ALTIVEC_BUILTIN_VEC_VSPLTW
);
18203 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int
, ALTIVEC_BUILTIN_VEC_VSPLTH
);
18204 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int
, ALTIVEC_BUILTIN_VEC_VSPLTB
);
18205 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int
, ALTIVEC_BUILTIN_VEC_CTF
);
18206 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int
, ALTIVEC_BUILTIN_VEC_VCFSX
);
18207 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int
, ALTIVEC_BUILTIN_VEC_VCFUX
);
18208 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int
, ALTIVEC_BUILTIN_VEC_CTS
);
18209 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int
, ALTIVEC_BUILTIN_VEC_CTU
);
18211 def_builtin ("__builtin_vec_adde", opaque_ftype_opaque_opaque_opaque
,
18212 ALTIVEC_BUILTIN_VEC_ADDE
);
18213 def_builtin ("__builtin_vec_addec", opaque_ftype_opaque_opaque_opaque
,
18214 ALTIVEC_BUILTIN_VEC_ADDEC
);
18215 def_builtin ("__builtin_vec_cmpne", opaque_ftype_opaque_opaque
,
18216 ALTIVEC_BUILTIN_VEC_CMPNE
);
18217 def_builtin ("__builtin_vec_mul", opaque_ftype_opaque_opaque
,
18218 ALTIVEC_BUILTIN_VEC_MUL
);
18220 /* Cell builtins. */
18221 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVLX
);
18222 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVLXL
);
18223 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVRX
);
18224 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVRXL
);
18226 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LVLX
);
18227 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LVLXL
);
18228 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LVRX
);
18229 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LVRXL
);
18231 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid
, ALTIVEC_BUILTIN_STVLX
);
18232 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid
, ALTIVEC_BUILTIN_STVLXL
);
18233 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid
, ALTIVEC_BUILTIN_STVRX
);
18234 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid
, ALTIVEC_BUILTIN_STVRXL
);
18236 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid
, ALTIVEC_BUILTIN_VEC_STVLX
);
18237 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid
, ALTIVEC_BUILTIN_VEC_STVLXL
);
18238 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid
, ALTIVEC_BUILTIN_VEC_STVRX
);
18239 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid
, ALTIVEC_BUILTIN_VEC_STVRXL
);
18241 if (TARGET_P9_VECTOR
)
18242 def_builtin ("__builtin_altivec_stxvl", void_ftype_v16qi_pvoid_long
,
18243 P9V_BUILTIN_STXVL
);
18245 /* Add the DST variants. */
18247 for (i
= 0; i
< ARRAY_SIZE (bdesc_dst
); i
++, d
++)
18249 HOST_WIDE_INT mask
= d
->mask
;
18251 /* It is expected that these dst built-in functions may have
18252 d->icode equal to CODE_FOR_nothing. */
18253 if ((mask
& builtin_mask
) != mask
)
18255 if (TARGET_DEBUG_BUILTIN
)
18256 fprintf (stderr
, "altivec_init_builtins, skip dst %s\n",
18260 def_builtin (d
->name
, void_ftype_pcvoid_int_int
, d
->code
);
18263 /* Initialize the predicates. */
18264 d
= bdesc_altivec_preds
;
18265 for (i
= 0; i
< ARRAY_SIZE (bdesc_altivec_preds
); i
++, d
++)
18267 machine_mode mode1
;
18269 HOST_WIDE_INT mask
= d
->mask
;
18271 if ((mask
& builtin_mask
) != mask
)
18273 if (TARGET_DEBUG_BUILTIN
)
18274 fprintf (stderr
, "altivec_init_builtins, skip predicate %s\n",
18279 if (rs6000_overloaded_builtin_p (d
->code
))
18283 /* Cannot define builtin if the instruction is disabled. */
18284 gcc_assert (d
->icode
!= CODE_FOR_nothing
);
18285 mode1
= insn_data
[d
->icode
].operand
[1].mode
;
18291 type
= int_ftype_int_opaque_opaque
;
18294 type
= int_ftype_int_v2di_v2di
;
18297 type
= int_ftype_int_v4si_v4si
;
18300 type
= int_ftype_int_v8hi_v8hi
;
18303 type
= int_ftype_int_v16qi_v16qi
;
18306 type
= int_ftype_int_v4sf_v4sf
;
18309 type
= int_ftype_int_v2df_v2df
;
18312 gcc_unreachable ();
18315 def_builtin (d
->name
, type
, d
->code
);
18318 /* Initialize the abs* operators. */
18320 for (i
= 0; i
< ARRAY_SIZE (bdesc_abs
); i
++, d
++)
18322 machine_mode mode0
;
18324 HOST_WIDE_INT mask
= d
->mask
;
18326 if ((mask
& builtin_mask
) != mask
)
18328 if (TARGET_DEBUG_BUILTIN
)
18329 fprintf (stderr
, "altivec_init_builtins, skip abs %s\n",
18334 /* Cannot define builtin if the instruction is disabled. */
18335 gcc_assert (d
->icode
!= CODE_FOR_nothing
);
18336 mode0
= insn_data
[d
->icode
].operand
[0].mode
;
18341 type
= v2di_ftype_v2di
;
18344 type
= v4si_ftype_v4si
;
18347 type
= v8hi_ftype_v8hi
;
18350 type
= v16qi_ftype_v16qi
;
18353 type
= v4sf_ftype_v4sf
;
18356 type
= v2df_ftype_v2df
;
18359 gcc_unreachable ();
18362 def_builtin (d
->name
, type
, d
->code
);
18365 /* Initialize target builtin that implements
18366 targetm.vectorize.builtin_mask_for_load. */
18368 decl
= add_builtin_function ("__builtin_altivec_mask_for_load",
18369 v16qi_ftype_long_pcvoid
,
18370 ALTIVEC_BUILTIN_MASK_FOR_LOAD
,
18371 BUILT_IN_MD
, NULL
, NULL_TREE
);
18372 TREE_READONLY (decl
) = 1;
18373 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
18374 altivec_builtin_mask_for_load
= decl
;
18376 /* Access to the vec_init patterns. */
18377 ftype
= build_function_type_list (V4SI_type_node
, integer_type_node
,
18378 integer_type_node
, integer_type_node
,
18379 integer_type_node
, NULL_TREE
);
18380 def_builtin ("__builtin_vec_init_v4si", ftype
, ALTIVEC_BUILTIN_VEC_INIT_V4SI
);
18382 ftype
= build_function_type_list (V8HI_type_node
, short_integer_type_node
,
18383 short_integer_type_node
,
18384 short_integer_type_node
,
18385 short_integer_type_node
,
18386 short_integer_type_node
,
18387 short_integer_type_node
,
18388 short_integer_type_node
,
18389 short_integer_type_node
, NULL_TREE
);
18390 def_builtin ("__builtin_vec_init_v8hi", ftype
, ALTIVEC_BUILTIN_VEC_INIT_V8HI
);
18392 ftype
= build_function_type_list (V16QI_type_node
, char_type_node
,
18393 char_type_node
, char_type_node
,
18394 char_type_node
, char_type_node
,
18395 char_type_node
, char_type_node
,
18396 char_type_node
, char_type_node
,
18397 char_type_node
, char_type_node
,
18398 char_type_node
, char_type_node
,
18399 char_type_node
, char_type_node
,
18400 char_type_node
, NULL_TREE
);
18401 def_builtin ("__builtin_vec_init_v16qi", ftype
,
18402 ALTIVEC_BUILTIN_VEC_INIT_V16QI
);
18404 ftype
= build_function_type_list (V4SF_type_node
, float_type_node
,
18405 float_type_node
, float_type_node
,
18406 float_type_node
, NULL_TREE
);
18407 def_builtin ("__builtin_vec_init_v4sf", ftype
, ALTIVEC_BUILTIN_VEC_INIT_V4SF
);
18409 /* VSX builtins. */
18410 ftype
= build_function_type_list (V2DF_type_node
, double_type_node
,
18411 double_type_node
, NULL_TREE
);
18412 def_builtin ("__builtin_vec_init_v2df", ftype
, VSX_BUILTIN_VEC_INIT_V2DF
);
18414 ftype
= build_function_type_list (V2DI_type_node
, intDI_type_node
,
18415 intDI_type_node
, NULL_TREE
);
18416 def_builtin ("__builtin_vec_init_v2di", ftype
, VSX_BUILTIN_VEC_INIT_V2DI
);
18418 /* Access to the vec_set patterns. */
18419 ftype
= build_function_type_list (V4SI_type_node
, V4SI_type_node
,
18421 integer_type_node
, NULL_TREE
);
18422 def_builtin ("__builtin_vec_set_v4si", ftype
, ALTIVEC_BUILTIN_VEC_SET_V4SI
);
18424 ftype
= build_function_type_list (V8HI_type_node
, V8HI_type_node
,
18426 integer_type_node
, NULL_TREE
);
18427 def_builtin ("__builtin_vec_set_v8hi", ftype
, ALTIVEC_BUILTIN_VEC_SET_V8HI
);
18429 ftype
= build_function_type_list (V16QI_type_node
, V16QI_type_node
,
18431 integer_type_node
, NULL_TREE
);
18432 def_builtin ("__builtin_vec_set_v16qi", ftype
, ALTIVEC_BUILTIN_VEC_SET_V16QI
);
18434 ftype
= build_function_type_list (V4SF_type_node
, V4SF_type_node
,
18436 integer_type_node
, NULL_TREE
);
18437 def_builtin ("__builtin_vec_set_v4sf", ftype
, ALTIVEC_BUILTIN_VEC_SET_V4SF
);
18439 ftype
= build_function_type_list (V2DF_type_node
, V2DF_type_node
,
18441 integer_type_node
, NULL_TREE
);
18442 def_builtin ("__builtin_vec_set_v2df", ftype
, VSX_BUILTIN_VEC_SET_V2DF
);
18444 ftype
= build_function_type_list (V2DI_type_node
, V2DI_type_node
,
18446 integer_type_node
, NULL_TREE
);
18447 def_builtin ("__builtin_vec_set_v2di", ftype
, VSX_BUILTIN_VEC_SET_V2DI
);
18449 /* Access to the vec_extract patterns. */
18450 ftype
= build_function_type_list (intSI_type_node
, V4SI_type_node
,
18451 integer_type_node
, NULL_TREE
);
18452 def_builtin ("__builtin_vec_ext_v4si", ftype
, ALTIVEC_BUILTIN_VEC_EXT_V4SI
);
18454 ftype
= build_function_type_list (intHI_type_node
, V8HI_type_node
,
18455 integer_type_node
, NULL_TREE
);
18456 def_builtin ("__builtin_vec_ext_v8hi", ftype
, ALTIVEC_BUILTIN_VEC_EXT_V8HI
);
18458 ftype
= build_function_type_list (intQI_type_node
, V16QI_type_node
,
18459 integer_type_node
, NULL_TREE
);
18460 def_builtin ("__builtin_vec_ext_v16qi", ftype
, ALTIVEC_BUILTIN_VEC_EXT_V16QI
);
18462 ftype
= build_function_type_list (float_type_node
, V4SF_type_node
,
18463 integer_type_node
, NULL_TREE
);
18464 def_builtin ("__builtin_vec_ext_v4sf", ftype
, ALTIVEC_BUILTIN_VEC_EXT_V4SF
);
18466 ftype
= build_function_type_list (double_type_node
, V2DF_type_node
,
18467 integer_type_node
, NULL_TREE
);
18468 def_builtin ("__builtin_vec_ext_v2df", ftype
, VSX_BUILTIN_VEC_EXT_V2DF
);
18470 ftype
= build_function_type_list (intDI_type_node
, V2DI_type_node
,
18471 integer_type_node
, NULL_TREE
);
18472 def_builtin ("__builtin_vec_ext_v2di", ftype
, VSX_BUILTIN_VEC_EXT_V2DI
);
18475 if (V1TI_type_node
)
18477 tree v1ti_ftype_long_pcvoid
18478 = build_function_type_list (V1TI_type_node
,
18479 long_integer_type_node
, pcvoid_type_node
,
18481 tree void_ftype_v1ti_long_pvoid
18482 = build_function_type_list (void_type_node
,
18483 V1TI_type_node
, long_integer_type_node
,
18484 pvoid_type_node
, NULL_TREE
);
18485 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid
,
18486 VSX_BUILTIN_LXVD2X_V1TI
);
18487 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid
,
18488 VSX_BUILTIN_STXVD2X_V1TI
);
18489 ftype
= build_function_type_list (V1TI_type_node
, intTI_type_node
,
18490 NULL_TREE
, NULL_TREE
);
18491 def_builtin ("__builtin_vec_init_v1ti", ftype
, VSX_BUILTIN_VEC_INIT_V1TI
);
18492 ftype
= build_function_type_list (V1TI_type_node
, V1TI_type_node
,
18494 integer_type_node
, NULL_TREE
);
18495 def_builtin ("__builtin_vec_set_v1ti", ftype
, VSX_BUILTIN_VEC_SET_V1TI
);
18496 ftype
= build_function_type_list (intTI_type_node
, V1TI_type_node
,
18497 integer_type_node
, NULL_TREE
);
18498 def_builtin ("__builtin_vec_ext_v1ti", ftype
, VSX_BUILTIN_VEC_EXT_V1TI
);
18504 htm_init_builtins (void)
18506 HOST_WIDE_INT builtin_mask
= rs6000_builtin_mask
;
18507 const struct builtin_description
*d
;
18511 for (i
= 0; i
< ARRAY_SIZE (bdesc_htm
); i
++, d
++)
18513 tree op
[MAX_HTM_OPERANDS
], type
;
18514 HOST_WIDE_INT mask
= d
->mask
;
18515 unsigned attr
= rs6000_builtin_info
[d
->code
].attr
;
18516 bool void_func
= (attr
& RS6000_BTC_VOID
);
18517 int attr_args
= (attr
& RS6000_BTC_TYPE_MASK
);
18519 tree gpr_type_node
;
18523 /* It is expected that these htm built-in functions may have
18524 d->icode equal to CODE_FOR_nothing. */
18526 if (TARGET_32BIT
&& TARGET_POWERPC64
)
18527 gpr_type_node
= long_long_unsigned_type_node
;
18529 gpr_type_node
= long_unsigned_type_node
;
18531 if (attr
& RS6000_BTC_SPR
)
18533 rettype
= gpr_type_node
;
18534 argtype
= gpr_type_node
;
18536 else if (d
->code
== HTM_BUILTIN_TABORTDC
18537 || d
->code
== HTM_BUILTIN_TABORTDCI
)
18539 rettype
= unsigned_type_node
;
18540 argtype
= gpr_type_node
;
18544 rettype
= unsigned_type_node
;
18545 argtype
= unsigned_type_node
;
18548 if ((mask
& builtin_mask
) != mask
)
18550 if (TARGET_DEBUG_BUILTIN
)
18551 fprintf (stderr
, "htm_builtin, skip binary %s\n", d
->name
);
18557 if (TARGET_DEBUG_BUILTIN
)
18558 fprintf (stderr
, "htm_builtin, bdesc_htm[%ld] no name\n",
18559 (long unsigned) i
);
18563 op
[nopnds
++] = (void_func
) ? void_type_node
: rettype
;
18565 if (attr_args
== RS6000_BTC_UNARY
)
18566 op
[nopnds
++] = argtype
;
18567 else if (attr_args
== RS6000_BTC_BINARY
)
18569 op
[nopnds
++] = argtype
;
18570 op
[nopnds
++] = argtype
;
18572 else if (attr_args
== RS6000_BTC_TERNARY
)
18574 op
[nopnds
++] = argtype
;
18575 op
[nopnds
++] = argtype
;
18576 op
[nopnds
++] = argtype
;
18582 type
= build_function_type_list (op
[0], NULL_TREE
);
18585 type
= build_function_type_list (op
[0], op
[1], NULL_TREE
);
18588 type
= build_function_type_list (op
[0], op
[1], op
[2], NULL_TREE
);
18591 type
= build_function_type_list (op
[0], op
[1], op
[2], op
[3],
18595 gcc_unreachable ();
18598 def_builtin (d
->name
, type
, d
->code
);
18602 /* Hash function for builtin functions with up to 3 arguments and a return
18605 builtin_hasher::hash (builtin_hash_struct
*bh
)
18610 for (i
= 0; i
< 4; i
++)
18612 ret
= (ret
* (unsigned)MAX_MACHINE_MODE
) + ((unsigned)bh
->mode
[i
]);
18613 ret
= (ret
* 2) + bh
->uns_p
[i
];
18619 /* Compare builtin hash entries H1 and H2 for equivalence. */
18621 builtin_hasher::equal (builtin_hash_struct
*p1
, builtin_hash_struct
*p2
)
18623 return ((p1
->mode
[0] == p2
->mode
[0])
18624 && (p1
->mode
[1] == p2
->mode
[1])
18625 && (p1
->mode
[2] == p2
->mode
[2])
18626 && (p1
->mode
[3] == p2
->mode
[3])
18627 && (p1
->uns_p
[0] == p2
->uns_p
[0])
18628 && (p1
->uns_p
[1] == p2
->uns_p
[1])
18629 && (p1
->uns_p
[2] == p2
->uns_p
[2])
18630 && (p1
->uns_p
[3] == p2
->uns_p
[3]));
18633 /* Map types for builtin functions with an explicit return type and up to 3
18634 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
18635 of the argument. */
18637 builtin_function_type (machine_mode mode_ret
, machine_mode mode_arg0
,
18638 machine_mode mode_arg1
, machine_mode mode_arg2
,
18639 enum rs6000_builtins builtin
, const char *name
)
18641 struct builtin_hash_struct h
;
18642 struct builtin_hash_struct
*h2
;
18645 tree ret_type
= NULL_TREE
;
18646 tree arg_type
[3] = { NULL_TREE
, NULL_TREE
, NULL_TREE
};
18648 /* Create builtin_hash_table. */
18649 if (builtin_hash_table
== NULL
)
18650 builtin_hash_table
= hash_table
<builtin_hasher
>::create_ggc (1500);
18652 h
.type
= NULL_TREE
;
18653 h
.mode
[0] = mode_ret
;
18654 h
.mode
[1] = mode_arg0
;
18655 h
.mode
[2] = mode_arg1
;
18656 h
.mode
[3] = mode_arg2
;
18662 /* If the builtin is a type that produces unsigned results or takes unsigned
18663 arguments, and it is returned as a decl for the vectorizer (such as
18664 widening multiplies, permute), make sure the arguments and return value
18665 are type correct. */
18668 /* unsigned 1 argument functions. */
18669 case CRYPTO_BUILTIN_VSBOX
:
18670 case P8V_BUILTIN_VGBBD
:
18671 case MISC_BUILTIN_CDTBCD
:
18672 case MISC_BUILTIN_CBCDTD
:
18677 /* unsigned 2 argument functions. */
18678 case ALTIVEC_BUILTIN_VMULEUB
:
18679 case ALTIVEC_BUILTIN_VMULEUH
:
18680 case ALTIVEC_BUILTIN_VMULOUB
:
18681 case ALTIVEC_BUILTIN_VMULOUH
:
18682 case CRYPTO_BUILTIN_VCIPHER
:
18683 case CRYPTO_BUILTIN_VCIPHERLAST
:
18684 case CRYPTO_BUILTIN_VNCIPHER
:
18685 case CRYPTO_BUILTIN_VNCIPHERLAST
:
18686 case CRYPTO_BUILTIN_VPMSUMB
:
18687 case CRYPTO_BUILTIN_VPMSUMH
:
18688 case CRYPTO_BUILTIN_VPMSUMW
:
18689 case CRYPTO_BUILTIN_VPMSUMD
:
18690 case CRYPTO_BUILTIN_VPMSUM
:
18691 case MISC_BUILTIN_ADDG6S
:
18692 case MISC_BUILTIN_DIVWEU
:
18693 case MISC_BUILTIN_DIVWEUO
:
18694 case MISC_BUILTIN_DIVDEU
:
18695 case MISC_BUILTIN_DIVDEUO
:
18701 /* unsigned 3 argument functions. */
18702 case ALTIVEC_BUILTIN_VPERM_16QI_UNS
:
18703 case ALTIVEC_BUILTIN_VPERM_8HI_UNS
:
18704 case ALTIVEC_BUILTIN_VPERM_4SI_UNS
:
18705 case ALTIVEC_BUILTIN_VPERM_2DI_UNS
:
18706 case ALTIVEC_BUILTIN_VSEL_16QI_UNS
:
18707 case ALTIVEC_BUILTIN_VSEL_8HI_UNS
:
18708 case ALTIVEC_BUILTIN_VSEL_4SI_UNS
:
18709 case ALTIVEC_BUILTIN_VSEL_2DI_UNS
:
18710 case VSX_BUILTIN_VPERM_16QI_UNS
:
18711 case VSX_BUILTIN_VPERM_8HI_UNS
:
18712 case VSX_BUILTIN_VPERM_4SI_UNS
:
18713 case VSX_BUILTIN_VPERM_2DI_UNS
:
18714 case VSX_BUILTIN_XXSEL_16QI_UNS
:
18715 case VSX_BUILTIN_XXSEL_8HI_UNS
:
18716 case VSX_BUILTIN_XXSEL_4SI_UNS
:
18717 case VSX_BUILTIN_XXSEL_2DI_UNS
:
18718 case CRYPTO_BUILTIN_VPERMXOR
:
18719 case CRYPTO_BUILTIN_VPERMXOR_V2DI
:
18720 case CRYPTO_BUILTIN_VPERMXOR_V4SI
:
18721 case CRYPTO_BUILTIN_VPERMXOR_V8HI
:
18722 case CRYPTO_BUILTIN_VPERMXOR_V16QI
:
18723 case CRYPTO_BUILTIN_VSHASIGMAW
:
18724 case CRYPTO_BUILTIN_VSHASIGMAD
:
18725 case CRYPTO_BUILTIN_VSHASIGMA
:
18732 /* signed permute functions with unsigned char mask. */
18733 case ALTIVEC_BUILTIN_VPERM_16QI
:
18734 case ALTIVEC_BUILTIN_VPERM_8HI
:
18735 case ALTIVEC_BUILTIN_VPERM_4SI
:
18736 case ALTIVEC_BUILTIN_VPERM_4SF
:
18737 case ALTIVEC_BUILTIN_VPERM_2DI
:
18738 case ALTIVEC_BUILTIN_VPERM_2DF
:
18739 case VSX_BUILTIN_VPERM_16QI
:
18740 case VSX_BUILTIN_VPERM_8HI
:
18741 case VSX_BUILTIN_VPERM_4SI
:
18742 case VSX_BUILTIN_VPERM_4SF
:
18743 case VSX_BUILTIN_VPERM_2DI
:
18744 case VSX_BUILTIN_VPERM_2DF
:
18748 /* unsigned args, signed return. */
18749 case VSX_BUILTIN_XVCVUXDSP
:
18750 case VSX_BUILTIN_XVCVUXDDP_UNS
:
18751 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF
:
18755 /* signed args, unsigned return. */
18756 case VSX_BUILTIN_XVCVDPUXDS_UNS
:
18757 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI
:
18758 case MISC_BUILTIN_UNPACK_TD
:
18759 case MISC_BUILTIN_UNPACK_V1TI
:
18763 /* unsigned arguments for 128-bit pack instructions. */
18764 case MISC_BUILTIN_PACK_TD
:
18765 case MISC_BUILTIN_PACK_V1TI
:
18774 /* Figure out how many args are present. */
18775 while (num_args
> 0 && h
.mode
[num_args
] == VOIDmode
)
18778 ret_type
= builtin_mode_to_type
[h
.mode
[0]][h
.uns_p
[0]];
18779 if (!ret_type
&& h
.uns_p
[0])
18780 ret_type
= builtin_mode_to_type
[h
.mode
[0]][0];
18783 fatal_error (input_location
,
18784 "internal error: builtin function %s had an unexpected "
18785 "return type %s", name
, GET_MODE_NAME (h
.mode
[0]));
18787 for (i
= 0; i
< (int) ARRAY_SIZE (arg_type
); i
++)
18788 arg_type
[i
] = NULL_TREE
;
18790 for (i
= 0; i
< num_args
; i
++)
18792 int m
= (int) h
.mode
[i
+1];
18793 int uns_p
= h
.uns_p
[i
+1];
18795 arg_type
[i
] = builtin_mode_to_type
[m
][uns_p
];
18796 if (!arg_type
[i
] && uns_p
)
18797 arg_type
[i
] = builtin_mode_to_type
[m
][0];
18800 fatal_error (input_location
,
18801 "internal error: builtin function %s, argument %d "
18802 "had unexpected argument type %s", name
, i
,
18803 GET_MODE_NAME (m
));
18806 builtin_hash_struct
**found
= builtin_hash_table
->find_slot (&h
, INSERT
);
18807 if (*found
== NULL
)
18809 h2
= ggc_alloc
<builtin_hash_struct
> ();
18813 h2
->type
= build_function_type_list (ret_type
, arg_type
[0], arg_type
[1],
18814 arg_type
[2], NULL_TREE
);
18817 return (*found
)->type
;
18821 rs6000_common_init_builtins (void)
18823 const struct builtin_description
*d
;
18826 tree opaque_ftype_opaque
= NULL_TREE
;
18827 tree opaque_ftype_opaque_opaque
= NULL_TREE
;
18828 tree opaque_ftype_opaque_opaque_opaque
= NULL_TREE
;
18829 tree v2si_ftype
= NULL_TREE
;
18830 tree v2si_ftype_qi
= NULL_TREE
;
18831 tree v2si_ftype_v2si_qi
= NULL_TREE
;
18832 tree v2si_ftype_int_qi
= NULL_TREE
;
18833 HOST_WIDE_INT builtin_mask
= rs6000_builtin_mask
;
18835 if (!TARGET_PAIRED_FLOAT
)
18837 builtin_mode_to_type
[V2SImode
][0] = opaque_V2SI_type_node
;
18838 builtin_mode_to_type
[V2SFmode
][0] = opaque_V2SF_type_node
;
18841 /* Paired and SPE builtins are only available if you build a compiler with
18842 the appropriate options, so only create those builtins with the
18843 appropriate compiler option. Create Altivec and VSX builtins on machines
18844 with at least the general purpose extensions (970 and newer) to allow the
18845 use of the target attribute.. */
18847 if (TARGET_EXTRA_BUILTINS
)
18848 builtin_mask
|= RS6000_BTM_COMMON
;
18850 /* Add the ternary operators. */
18852 for (i
= 0; i
< ARRAY_SIZE (bdesc_3arg
); i
++, d
++)
18855 HOST_WIDE_INT mask
= d
->mask
;
18857 if ((mask
& builtin_mask
) != mask
)
18859 if (TARGET_DEBUG_BUILTIN
)
18860 fprintf (stderr
, "rs6000_builtin, skip ternary %s\n", d
->name
);
18864 if (rs6000_overloaded_builtin_p (d
->code
))
18866 if (! (type
= opaque_ftype_opaque_opaque_opaque
))
18867 type
= opaque_ftype_opaque_opaque_opaque
18868 = build_function_type_list (opaque_V4SI_type_node
,
18869 opaque_V4SI_type_node
,
18870 opaque_V4SI_type_node
,
18871 opaque_V4SI_type_node
,
18876 enum insn_code icode
= d
->icode
;
18879 if (TARGET_DEBUG_BUILTIN
)
18880 fprintf (stderr
, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
18886 if (icode
== CODE_FOR_nothing
)
18888 if (TARGET_DEBUG_BUILTIN
)
18889 fprintf (stderr
, "rs6000_builtin, skip ternary %s (no code)\n",
18895 type
= builtin_function_type (insn_data
[icode
].operand
[0].mode
,
18896 insn_data
[icode
].operand
[1].mode
,
18897 insn_data
[icode
].operand
[2].mode
,
18898 insn_data
[icode
].operand
[3].mode
,
18902 def_builtin (d
->name
, type
, d
->code
);
18905 /* Add the binary operators. */
18907 for (i
= 0; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
18909 machine_mode mode0
, mode1
, mode2
;
18911 HOST_WIDE_INT mask
= d
->mask
;
18913 if ((mask
& builtin_mask
) != mask
)
18915 if (TARGET_DEBUG_BUILTIN
)
18916 fprintf (stderr
, "rs6000_builtin, skip binary %s\n", d
->name
);
18920 if (rs6000_overloaded_builtin_p (d
->code
))
18922 if (! (type
= opaque_ftype_opaque_opaque
))
18923 type
= opaque_ftype_opaque_opaque
18924 = build_function_type_list (opaque_V4SI_type_node
,
18925 opaque_V4SI_type_node
,
18926 opaque_V4SI_type_node
,
18931 enum insn_code icode
= d
->icode
;
18934 if (TARGET_DEBUG_BUILTIN
)
18935 fprintf (stderr
, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
18941 if (icode
== CODE_FOR_nothing
)
18943 if (TARGET_DEBUG_BUILTIN
)
18944 fprintf (stderr
, "rs6000_builtin, skip binary %s (no code)\n",
18950 mode0
= insn_data
[icode
].operand
[0].mode
;
18951 mode1
= insn_data
[icode
].operand
[1].mode
;
18952 mode2
= insn_data
[icode
].operand
[2].mode
;
18954 if (mode0
== V2SImode
&& mode1
== V2SImode
&& mode2
== QImode
)
18956 if (! (type
= v2si_ftype_v2si_qi
))
18957 type
= v2si_ftype_v2si_qi
18958 = build_function_type_list (opaque_V2SI_type_node
,
18959 opaque_V2SI_type_node
,
18964 else if (mode0
== V2SImode
&& GET_MODE_CLASS (mode1
) == MODE_INT
18965 && mode2
== QImode
)
18967 if (! (type
= v2si_ftype_int_qi
))
18968 type
= v2si_ftype_int_qi
18969 = build_function_type_list (opaque_V2SI_type_node
,
18976 type
= builtin_function_type (mode0
, mode1
, mode2
, VOIDmode
,
18980 def_builtin (d
->name
, type
, d
->code
);
18983 /* Add the simple unary operators. */
18985 for (i
= 0; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
18987 machine_mode mode0
, mode1
;
18989 HOST_WIDE_INT mask
= d
->mask
;
18991 if ((mask
& builtin_mask
) != mask
)
18993 if (TARGET_DEBUG_BUILTIN
)
18994 fprintf (stderr
, "rs6000_builtin, skip unary %s\n", d
->name
);
18998 if (rs6000_overloaded_builtin_p (d
->code
))
19000 if (! (type
= opaque_ftype_opaque
))
19001 type
= opaque_ftype_opaque
19002 = build_function_type_list (opaque_V4SI_type_node
,
19003 opaque_V4SI_type_node
,
19008 enum insn_code icode
= d
->icode
;
19011 if (TARGET_DEBUG_BUILTIN
)
19012 fprintf (stderr
, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
19018 if (icode
== CODE_FOR_nothing
)
19020 if (TARGET_DEBUG_BUILTIN
)
19021 fprintf (stderr
, "rs6000_builtin, skip unary %s (no code)\n",
19027 mode0
= insn_data
[icode
].operand
[0].mode
;
19028 mode1
= insn_data
[icode
].operand
[1].mode
;
19030 if (mode0
== V2SImode
&& mode1
== QImode
)
19032 if (! (type
= v2si_ftype_qi
))
19033 type
= v2si_ftype_qi
19034 = build_function_type_list (opaque_V2SI_type_node
,
19040 type
= builtin_function_type (mode0
, mode1
, VOIDmode
, VOIDmode
,
19044 def_builtin (d
->name
, type
, d
->code
);
19047 /* Add the simple no-argument operators. */
19049 for (i
= 0; i
< ARRAY_SIZE (bdesc_0arg
); i
++, d
++)
19051 machine_mode mode0
;
19053 HOST_WIDE_INT mask
= d
->mask
;
19055 if ((mask
& builtin_mask
) != mask
)
19057 if (TARGET_DEBUG_BUILTIN
)
19058 fprintf (stderr
, "rs6000_builtin, skip no-argument %s\n", d
->name
);
19061 if (rs6000_overloaded_builtin_p (d
->code
))
19063 if (!opaque_ftype_opaque
)
19064 opaque_ftype_opaque
19065 = build_function_type_list (opaque_V4SI_type_node
, NULL_TREE
);
19066 type
= opaque_ftype_opaque
;
19070 enum insn_code icode
= d
->icode
;
19073 if (TARGET_DEBUG_BUILTIN
)
19074 fprintf (stderr
, "rs6000_builtin, bdesc_0arg[%lu] no name\n",
19075 (long unsigned) i
);
19078 if (icode
== CODE_FOR_nothing
)
19080 if (TARGET_DEBUG_BUILTIN
)
19082 "rs6000_builtin, skip no-argument %s (no code)\n",
19086 mode0
= insn_data
[icode
].operand
[0].mode
;
19087 if (mode0
== V2SImode
)
19090 if (! (type
= v2si_ftype
))
19093 = build_function_type_list (opaque_V2SI_type_node
,
19099 type
= builtin_function_type (mode0
, VOIDmode
, VOIDmode
, VOIDmode
,
19102 def_builtin (d
->name
, type
, d
->code
);
19106 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
19108 init_float128_ibm (machine_mode mode
)
19110 if (!TARGET_XL_COMPAT
)
19112 set_optab_libfunc (add_optab
, mode
, "__gcc_qadd");
19113 set_optab_libfunc (sub_optab
, mode
, "__gcc_qsub");
19114 set_optab_libfunc (smul_optab
, mode
, "__gcc_qmul");
19115 set_optab_libfunc (sdiv_optab
, mode
, "__gcc_qdiv");
19117 if (!(TARGET_HARD_FLOAT
&& (TARGET_FPRS
|| TARGET_E500_DOUBLE
)))
19119 set_optab_libfunc (neg_optab
, mode
, "__gcc_qneg");
19120 set_optab_libfunc (eq_optab
, mode
, "__gcc_qeq");
19121 set_optab_libfunc (ne_optab
, mode
, "__gcc_qne");
19122 set_optab_libfunc (gt_optab
, mode
, "__gcc_qgt");
19123 set_optab_libfunc (ge_optab
, mode
, "__gcc_qge");
19124 set_optab_libfunc (lt_optab
, mode
, "__gcc_qlt");
19125 set_optab_libfunc (le_optab
, mode
, "__gcc_qle");
19127 set_conv_libfunc (sext_optab
, mode
, SFmode
, "__gcc_stoq");
19128 set_conv_libfunc (sext_optab
, mode
, DFmode
, "__gcc_dtoq");
19129 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "__gcc_qtos");
19130 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "__gcc_qtod");
19131 set_conv_libfunc (sfix_optab
, SImode
, mode
, "__gcc_qtoi");
19132 set_conv_libfunc (ufix_optab
, SImode
, mode
, "__gcc_qtou");
19133 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "__gcc_itoq");
19134 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "__gcc_utoq");
19137 if (!(TARGET_HARD_FLOAT
&& TARGET_FPRS
))
19138 set_optab_libfunc (unord_optab
, mode
, "__gcc_qunord");
19142 set_optab_libfunc (add_optab
, mode
, "_xlqadd");
19143 set_optab_libfunc (sub_optab
, mode
, "_xlqsub");
19144 set_optab_libfunc (smul_optab
, mode
, "_xlqmul");
19145 set_optab_libfunc (sdiv_optab
, mode
, "_xlqdiv");
19148 /* Add various conversions for IFmode to use the traditional TFmode
19150 if (mode
== IFmode
)
19152 set_conv_libfunc (sext_optab
, mode
, SDmode
, "__dpd_extendsdtf2");
19153 set_conv_libfunc (sext_optab
, mode
, DDmode
, "__dpd_extendddtf2");
19154 set_conv_libfunc (trunc_optab
, mode
, TDmode
, "__dpd_trunctftd2");
19155 set_conv_libfunc (trunc_optab
, SDmode
, mode
, "__dpd_trunctfsd2");
19156 set_conv_libfunc (trunc_optab
, DDmode
, mode
, "__dpd_trunctfdd2");
19157 set_conv_libfunc (sext_optab
, TDmode
, mode
, "__dpd_extendtdtf2");
19159 if (TARGET_POWERPC64
)
19161 set_conv_libfunc (sfix_optab
, TImode
, mode
, "__fixtfti");
19162 set_conv_libfunc (ufix_optab
, TImode
, mode
, "__fixunstfti");
19163 set_conv_libfunc (sfloat_optab
, mode
, TImode
, "__floattitf");
19164 set_conv_libfunc (ufloat_optab
, mode
, TImode
, "__floatuntitf");
19169 /* Set up IEEE 128-bit floating point routines. Use different names if the
19170 arguments can be passed in a vector register. The historical PowerPC
19171 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
19172 continue to use that if we aren't using vector registers to pass IEEE
19173 128-bit floating point. */
19176 init_float128_ieee (machine_mode mode
)
19178 if (FLOAT128_VECTOR_P (mode
))
19180 set_optab_libfunc (add_optab
, mode
, "__addkf3");
19181 set_optab_libfunc (sub_optab
, mode
, "__subkf3");
19182 set_optab_libfunc (neg_optab
, mode
, "__negkf2");
19183 set_optab_libfunc (smul_optab
, mode
, "__mulkf3");
19184 set_optab_libfunc (sdiv_optab
, mode
, "__divkf3");
19185 set_optab_libfunc (sqrt_optab
, mode
, "__sqrtkf2");
19186 set_optab_libfunc (abs_optab
, mode
, "__abstkf2");
19188 set_optab_libfunc (eq_optab
, mode
, "__eqkf2");
19189 set_optab_libfunc (ne_optab
, mode
, "__nekf2");
19190 set_optab_libfunc (gt_optab
, mode
, "__gtkf2");
19191 set_optab_libfunc (ge_optab
, mode
, "__gekf2");
19192 set_optab_libfunc (lt_optab
, mode
, "__ltkf2");
19193 set_optab_libfunc (le_optab
, mode
, "__lekf2");
19194 set_optab_libfunc (unord_optab
, mode
, "__unordkf2");
19196 set_conv_libfunc (sext_optab
, mode
, SFmode
, "__extendsfkf2");
19197 set_conv_libfunc (sext_optab
, mode
, DFmode
, "__extenddfkf2");
19198 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "__trunckfsf2");
19199 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "__trunckfdf2");
19201 set_conv_libfunc (sext_optab
, mode
, IFmode
, "__extendtfkf2");
19202 if (mode
!= TFmode
&& FLOAT128_IBM_P (TFmode
))
19203 set_conv_libfunc (sext_optab
, mode
, TFmode
, "__extendtfkf2");
19205 set_conv_libfunc (trunc_optab
, IFmode
, mode
, "__trunckftf2");
19206 if (mode
!= TFmode
&& FLOAT128_IBM_P (TFmode
))
19207 set_conv_libfunc (trunc_optab
, TFmode
, mode
, "__trunckftf2");
19209 set_conv_libfunc (sext_optab
, mode
, SDmode
, "__dpd_extendsdkf2");
19210 set_conv_libfunc (sext_optab
, mode
, DDmode
, "__dpd_extendddkf2");
19211 set_conv_libfunc (trunc_optab
, mode
, TDmode
, "__dpd_trunckftd2");
19212 set_conv_libfunc (trunc_optab
, SDmode
, mode
, "__dpd_trunckfsd2");
19213 set_conv_libfunc (trunc_optab
, DDmode
, mode
, "__dpd_trunckfdd2");
19214 set_conv_libfunc (sext_optab
, TDmode
, mode
, "__dpd_extendtdkf2");
19216 set_conv_libfunc (sfix_optab
, SImode
, mode
, "__fixkfsi");
19217 set_conv_libfunc (ufix_optab
, SImode
, mode
, "__fixunskfsi");
19218 set_conv_libfunc (sfix_optab
, DImode
, mode
, "__fixkfdi");
19219 set_conv_libfunc (ufix_optab
, DImode
, mode
, "__fixunskfdi");
19221 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "__floatsikf");
19222 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "__floatunsikf");
19223 set_conv_libfunc (sfloat_optab
, mode
, DImode
, "__floatdikf");
19224 set_conv_libfunc (ufloat_optab
, mode
, DImode
, "__floatundikf");
19226 if (TARGET_POWERPC64
)
19228 set_conv_libfunc (sfix_optab
, TImode
, mode
, "__fixkfti");
19229 set_conv_libfunc (ufix_optab
, TImode
, mode
, "__fixunskfti");
19230 set_conv_libfunc (sfloat_optab
, mode
, TImode
, "__floattikf");
19231 set_conv_libfunc (ufloat_optab
, mode
, TImode
, "__floatuntikf");
19237 set_optab_libfunc (add_optab
, mode
, "_q_add");
19238 set_optab_libfunc (sub_optab
, mode
, "_q_sub");
19239 set_optab_libfunc (neg_optab
, mode
, "_q_neg");
19240 set_optab_libfunc (smul_optab
, mode
, "_q_mul");
19241 set_optab_libfunc (sdiv_optab
, mode
, "_q_div");
19242 if (TARGET_PPC_GPOPT
)
19243 set_optab_libfunc (sqrt_optab
, mode
, "_q_sqrt");
19245 set_optab_libfunc (eq_optab
, mode
, "_q_feq");
19246 set_optab_libfunc (ne_optab
, mode
, "_q_fne");
19247 set_optab_libfunc (gt_optab
, mode
, "_q_fgt");
19248 set_optab_libfunc (ge_optab
, mode
, "_q_fge");
19249 set_optab_libfunc (lt_optab
, mode
, "_q_flt");
19250 set_optab_libfunc (le_optab
, mode
, "_q_fle");
19252 set_conv_libfunc (sext_optab
, mode
, SFmode
, "_q_stoq");
19253 set_conv_libfunc (sext_optab
, mode
, DFmode
, "_q_dtoq");
19254 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "_q_qtos");
19255 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "_q_qtod");
19256 set_conv_libfunc (sfix_optab
, SImode
, mode
, "_q_qtoi");
19257 set_conv_libfunc (ufix_optab
, SImode
, mode
, "_q_qtou");
19258 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "_q_itoq");
19259 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "_q_utoq");
19264 rs6000_init_libfuncs (void)
19266 /* __float128 support. */
19267 if (TARGET_FLOAT128_TYPE
)
19269 init_float128_ibm (IFmode
);
19270 init_float128_ieee (KFmode
);
19273 /* AIX/Darwin/64-bit Linux quad floating point routines. */
19274 if (TARGET_LONG_DOUBLE_128
)
19276 if (!TARGET_IEEEQUAD
)
19277 init_float128_ibm (TFmode
);
19279 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
19281 init_float128_ieee (TFmode
);
19286 /* Expand a block clear operation, and return 1 if successful. Return 0
19287 if we should let the compiler generate normal code.
19289 operands[0] is the destination
19290 operands[1] is the length
19291 operands[3] is the alignment */
19294 expand_block_clear (rtx operands
[])
19296 rtx orig_dest
= operands
[0];
19297 rtx bytes_rtx
= operands
[1];
19298 rtx align_rtx
= operands
[3];
19299 bool constp
= (GET_CODE (bytes_rtx
) == CONST_INT
);
19300 HOST_WIDE_INT align
;
19301 HOST_WIDE_INT bytes
;
19306 /* If this is not a fixed size move, just call memcpy */
19310 /* This must be a fixed size alignment */
19311 gcc_assert (GET_CODE (align_rtx
) == CONST_INT
);
19312 align
= INTVAL (align_rtx
) * BITS_PER_UNIT
;
19314 /* Anything to clear? */
19315 bytes
= INTVAL (bytes_rtx
);
19319 /* Use the builtin memset after a point, to avoid huge code bloat.
19320 When optimize_size, avoid any significant code bloat; calling
19321 memset is about 4 instructions, so allow for one instruction to
19322 load zero and three to do clearing. */
19323 if (TARGET_ALTIVEC
&& align
>= 128)
19325 else if (TARGET_POWERPC64
&& (align
>= 64 || !STRICT_ALIGNMENT
))
19327 else if (TARGET_SPE
&& align
>= 64)
19332 if (optimize_size
&& bytes
> 3 * clear_step
)
19334 if (! optimize_size
&& bytes
> 8 * clear_step
)
19337 for (offset
= 0; bytes
> 0; offset
+= clear_bytes
, bytes
-= clear_bytes
)
19339 machine_mode mode
= BLKmode
;
19342 if (bytes
>= 16 && TARGET_ALTIVEC
&& align
>= 128)
19347 else if (bytes
>= 8 && TARGET_SPE
&& align
>= 64)
19352 else if (bytes
>= 8 && TARGET_POWERPC64
19353 && (align
>= 64 || !STRICT_ALIGNMENT
))
19357 if (offset
== 0 && align
< 64)
19361 /* If the address form is reg+offset with offset not a
19362 multiple of four, reload into reg indirect form here
19363 rather than waiting for reload. This way we get one
19364 reload, not one per store. */
19365 addr
= XEXP (orig_dest
, 0);
19366 if ((GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == LO_SUM
)
19367 && GET_CODE (XEXP (addr
, 1)) == CONST_INT
19368 && (INTVAL (XEXP (addr
, 1)) & 3) != 0)
19370 addr
= copy_addr_to_reg (addr
);
19371 orig_dest
= replace_equiv_address (orig_dest
, addr
);
19375 else if (bytes
>= 4 && (align
>= 32 || !STRICT_ALIGNMENT
))
19376 { /* move 4 bytes */
19380 else if (bytes
>= 2 && (align
>= 16 || !STRICT_ALIGNMENT
))
19381 { /* move 2 bytes */
19385 else /* move 1 byte at a time */
19391 dest
= adjust_address (orig_dest
, mode
, offset
);
19393 emit_move_insn (dest
, CONST0_RTX (mode
));
19399 /* Emit a potentially record-form instruction, setting DST from SRC.
19400 If DOT is 0, that is all; otherwise, set CCREG to the result of the
19401 signed comparison of DST with zero. If DOT is 1, the generated RTL
19402 doesn't care about the DST result; if DOT is 2, it does. If CCREG
19403 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
19404 a separate COMPARE. */
19407 rs6000_emit_dot_insn (rtx dst
, rtx src
, int dot
, rtx ccreg
)
19411 emit_move_insn (dst
, src
);
19415 if (cc_reg_not_cr0_operand (ccreg
, CCmode
))
19417 emit_move_insn (dst
, src
);
19418 emit_move_insn (ccreg
, gen_rtx_COMPARE (CCmode
, dst
, const0_rtx
));
19422 rtx ccset
= gen_rtx_SET (ccreg
, gen_rtx_COMPARE (CCmode
, src
, const0_rtx
));
19425 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, dst
);
19426 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, ccset
, clobber
)));
19430 rtx set
= gen_rtx_SET (dst
, src
);
19431 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, ccset
, set
)));
19435 /* Figure out the correct instructions to generate to load data for
19436 block compare. MODE is used for the read from memory, and
19437 data is zero extended if REG is wider than MODE. If LE code
19438 is being generated, bswap loads are used.
19440 REG is the destination register to move the data into.
19441 MEM is the memory block being read.
19442 MODE is the mode of memory to use for the read. */
19444 do_load_for_compare (rtx reg
, rtx mem
, machine_mode mode
)
19446 switch (GET_MODE (reg
))
19452 emit_insn (gen_zero_extendqidi2 (reg
, mem
));
19457 if (!BYTES_BIG_ENDIAN
)
19459 src
= gen_reg_rtx (HImode
);
19460 emit_insn (gen_bswaphi2 (src
, mem
));
19462 emit_insn (gen_zero_extendhidi2 (reg
, src
));
19468 if (!BYTES_BIG_ENDIAN
)
19470 src
= gen_reg_rtx (SImode
);
19471 emit_insn (gen_bswapsi2 (src
, mem
));
19473 emit_insn (gen_zero_extendsidi2 (reg
, src
));
19477 if (!BYTES_BIG_ENDIAN
)
19478 emit_insn (gen_bswapdi2 (reg
, mem
));
19480 emit_insn (gen_movdi (reg
, mem
));
19483 gcc_unreachable ();
19491 emit_insn (gen_zero_extendqisi2 (reg
, mem
));
19496 if (!BYTES_BIG_ENDIAN
)
19498 src
= gen_reg_rtx (HImode
);
19499 emit_insn (gen_bswaphi2 (src
, mem
));
19501 emit_insn (gen_zero_extendhisi2 (reg
, src
));
19505 if (!BYTES_BIG_ENDIAN
)
19506 emit_insn (gen_bswapsi2 (reg
, mem
));
19508 emit_insn (gen_movsi (reg
, mem
));
19511 /* DImode is larger than the destination reg so is not expected. */
19512 gcc_unreachable ();
19515 gcc_unreachable ();
19519 gcc_unreachable ();
19524 /* Select the mode to be used for reading the next chunk of bytes
19527 OFFSET is the current read offset from the beginning of the block.
19528 BYTES is the number of bytes remaining to be read.
19529 ALIGN is the minimum alignment of the memory blocks being compared in bytes.
19530 WORD_MODE_OK indicates using WORD_MODE is allowed, else SImode is
19531 the largest allowable mode. */
19532 static machine_mode
19533 select_block_compare_mode (unsigned HOST_WIDE_INT offset
,
19534 unsigned HOST_WIDE_INT bytes
,
19535 unsigned HOST_WIDE_INT align
, bool word_mode_ok
)
19537 /* First see if we can do a whole load unit
19538 as that will be more efficient than a larger load + shift. */
19540 /* If big, use biggest chunk.
19541 If exactly chunk size, use that size.
19542 If remainder can be done in one piece with shifting, do that.
19543 Do largest chunk possible without violating alignment rules. */
19545 /* The most we can read without potential page crossing. */
19546 unsigned HOST_WIDE_INT maxread
= ROUND_UP (bytes
, align
);
19548 if (word_mode_ok
&& bytes
>= UNITS_PER_WORD
)
19550 else if (bytes
== GET_MODE_SIZE (SImode
))
19552 else if (bytes
== GET_MODE_SIZE (HImode
))
19554 else if (bytes
== GET_MODE_SIZE (QImode
))
19556 else if (bytes
< GET_MODE_SIZE (SImode
)
19557 && offset
>= GET_MODE_SIZE (SImode
) - bytes
)
19558 /* This matches the case were we have SImode and 3 bytes
19559 and offset >= 1 and permits us to move back one and overlap
19560 with the previous read, thus avoiding having to shift
19561 unwanted bytes off of the input. */
19563 else if (word_mode_ok
&& bytes
< UNITS_PER_WORD
19564 && offset
>= UNITS_PER_WORD
-bytes
)
19565 /* Similarly, if we can use DImode it will get matched here and
19566 can do an overlapping read that ends at the end of the block. */
19568 else if (word_mode_ok
&& maxread
>= UNITS_PER_WORD
)
19569 /* It is safe to do all remaining in one load of largest size,
19570 possibly with a shift to get rid of unwanted bytes. */
19572 else if (maxread
>= GET_MODE_SIZE (SImode
))
19573 /* It is safe to do all remaining in one SImode load,
19574 possibly with a shift to get rid of unwanted bytes. */
19576 else if (bytes
> GET_MODE_SIZE (SImode
))
19578 else if (bytes
> GET_MODE_SIZE (HImode
))
19581 /* final fallback is do one byte */
19585 /* Compute the alignment of pointer+OFFSET where the original alignment
19586 of pointer was BASE_ALIGN. */
19587 static unsigned HOST_WIDE_INT
19588 compute_current_alignment (unsigned HOST_WIDE_INT base_align
,
19589 unsigned HOST_WIDE_INT offset
)
19593 return min (base_align
, offset
& -offset
);
19596 /* Expand a block compare operation, and return true if successful.
19597 Return false if we should let the compiler generate normal code,
19598 probably a memcmp call.
19600 OPERANDS[0] is the target (result).
19601 OPERANDS[1] is the first source.
19602 OPERANDS[2] is the second source.
19603 OPERANDS[3] is the length.
19604 OPERANDS[4] is the alignment. */
19606 expand_block_compare (rtx operands
[])
19608 rtx target
= operands
[0];
19609 rtx orig_src1
= operands
[1];
19610 rtx orig_src2
= operands
[2];
19611 rtx bytes_rtx
= operands
[3];
19612 rtx align_rtx
= operands
[4];
19613 HOST_WIDE_INT cmp_bytes
= 0;
19614 rtx src1
= orig_src1
;
19615 rtx src2
= orig_src2
;
19617 /* This case is complicated to handle because the subtract
19618 with carry instructions do not generate the 64-bit
19619 carry and so we must emit code to calculate it ourselves.
19620 We choose not to implement this yet. */
19621 if (TARGET_32BIT
&& TARGET_POWERPC64
)
19624 /* If this is not a fixed size compare, just call memcmp. */
19625 if (!CONST_INT_P (bytes_rtx
))
19628 /* This must be a fixed size alignment. */
19629 if (!CONST_INT_P (align_rtx
))
19632 unsigned int base_align
= UINTVAL (align_rtx
) / BITS_PER_UNIT
;
19634 /* SLOW_UNALIGNED_ACCESS -- don't do unaligned stuff. */
19635 if (SLOW_UNALIGNED_ACCESS (word_mode
, MEM_ALIGN (orig_src1
))
19636 || SLOW_UNALIGNED_ACCESS (word_mode
, MEM_ALIGN (orig_src2
)))
19639 gcc_assert (GET_MODE (target
) == SImode
);
19641 /* Anything to move? */
19642 unsigned HOST_WIDE_INT bytes
= UINTVAL (bytes_rtx
);
19646 /* The code generated for p7 and older is not faster than glibc
19647 memcmp if alignment is small and length is not short, so bail
19648 out to avoid those conditions. */
19649 if (!TARGET_EFFICIENT_OVERLAPPING_UNALIGNED
19650 && ((base_align
== 1 && bytes
> 16)
19651 || (base_align
== 2 && bytes
> 32)))
19654 rtx tmp_reg_src1
= gen_reg_rtx (word_mode
);
19655 rtx tmp_reg_src2
= gen_reg_rtx (word_mode
);
19656 /* P7/P8 code uses cond for subfc. but P9 uses
19657 it for cmpld which needs CCUNSmode. */
19659 if (TARGET_P9_MISC
)
19660 cond
= gen_reg_rtx (CCUNSmode
);
19662 cond
= gen_reg_rtx (CCmode
);
19664 /* If we have an LE target without ldbrx and word_mode is DImode,
19665 then we must avoid using word_mode. */
19666 int word_mode_ok
= !(!BYTES_BIG_ENDIAN
&& !TARGET_LDBRX
19667 && word_mode
== DImode
);
19669 /* Strategy phase. How many ops will this take and should we expand it? */
19671 unsigned HOST_WIDE_INT offset
= 0;
19672 machine_mode load_mode
=
19673 select_block_compare_mode (offset
, bytes
, base_align
, word_mode_ok
);
19674 unsigned int load_mode_size
= GET_MODE_SIZE (load_mode
);
19676 /* We don't want to generate too much code. */
19677 unsigned HOST_WIDE_INT max_bytes
=
19678 load_mode_size
* (unsigned HOST_WIDE_INT
) rs6000_block_compare_inline_limit
;
19679 if (!IN_RANGE (bytes
, 1, max_bytes
))
19682 bool generate_6432_conversion
= false;
19683 rtx convert_label
= NULL
;
19684 rtx final_label
= NULL
;
19686 /* Example of generated code for 18 bytes aligned 1 byte.
19687 Compiled with -fno-reorder-blocks for clarity.
19705 .L6487: #convert_label
19709 .L6488: #final_label
19712 We start off with DImode for two blocks that jump to the DI->SI conversion
19713 if the difference is found there, then a final block of HImode that skips
19714 the DI->SI conversion. */
19718 unsigned int align
= compute_current_alignment (base_align
, offset
);
19719 if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED
)
19720 load_mode
= select_block_compare_mode (offset
, bytes
, align
,
19723 load_mode
= select_block_compare_mode (0, bytes
, align
, word_mode_ok
);
19724 load_mode_size
= GET_MODE_SIZE (load_mode
);
19725 if (bytes
>= load_mode_size
)
19726 cmp_bytes
= load_mode_size
;
19727 else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED
)
19729 /* Move this load back so it doesn't go past the end.
19730 P8/P9 can do this efficiently. */
19731 unsigned int extra_bytes
= load_mode_size
- bytes
;
19733 if (extra_bytes
< offset
)
19735 offset
-= extra_bytes
;
19736 cmp_bytes
= load_mode_size
;
19741 /* P7 and earlier can't do the overlapping load trick fast,
19742 so this forces a non-overlapping load and a shift to get
19743 rid of the extra bytes. */
19746 src1
= adjust_address (orig_src1
, load_mode
, offset
);
19747 src2
= adjust_address (orig_src2
, load_mode
, offset
);
19749 if (!REG_P (XEXP (src1
, 0)))
19751 rtx src1_reg
= copy_addr_to_reg (XEXP (src1
, 0));
19752 src1
= replace_equiv_address (src1
, src1_reg
);
19754 set_mem_size (src1
, cmp_bytes
);
19756 if (!REG_P (XEXP (src2
, 0)))
19758 rtx src2_reg
= copy_addr_to_reg (XEXP (src2
, 0));
19759 src2
= replace_equiv_address (src2
, src2_reg
);
19761 set_mem_size (src2
, cmp_bytes
);
19763 do_load_for_compare (tmp_reg_src1
, src1
, load_mode
);
19764 do_load_for_compare (tmp_reg_src2
, src2
, load_mode
);
19766 if (cmp_bytes
< load_mode_size
)
19768 /* Shift unneeded bytes off. */
19769 rtx sh
= GEN_INT (BITS_PER_UNIT
* (load_mode_size
- cmp_bytes
));
19770 if (word_mode
== DImode
)
19772 emit_insn (gen_lshrdi3 (tmp_reg_src1
, tmp_reg_src1
, sh
));
19773 emit_insn (gen_lshrdi3 (tmp_reg_src2
, tmp_reg_src2
, sh
));
19777 emit_insn (gen_lshrsi3 (tmp_reg_src1
, tmp_reg_src1
, sh
));
19778 emit_insn (gen_lshrsi3 (tmp_reg_src2
, tmp_reg_src2
, sh
));
19782 int remain
= bytes
- cmp_bytes
;
19783 if (GET_MODE_SIZE (GET_MODE (target
)) > GET_MODE_SIZE (load_mode
))
19785 /* Target is larger than load size so we don't need to
19786 reduce result size. */
19788 /* We previously did a block that need 64->32 conversion but
19789 the current block does not, so a label is needed to jump
19791 if (generate_6432_conversion
&& !final_label
)
19792 final_label
= gen_label_rtx ();
19796 /* This is not the last block, branch to the end if the result
19797 of this subtract is not zero. */
19799 final_label
= gen_label_rtx ();
19800 rtx fin_ref
= gen_rtx_LABEL_REF (VOIDmode
, final_label
);
19801 rtx tmp
= gen_rtx_MINUS (word_mode
, tmp_reg_src1
, tmp_reg_src2
);
19802 rtx cr
= gen_reg_rtx (CCmode
);
19803 rs6000_emit_dot_insn (tmp_reg_src2
, tmp
, 2, cr
);
19804 emit_insn (gen_movsi (target
,
19805 gen_lowpart (SImode
, tmp_reg_src2
)));
19806 rtx ne_rtx
= gen_rtx_NE (VOIDmode
, cr
, const0_rtx
);
19807 rtx ifelse
= gen_rtx_IF_THEN_ELSE (VOIDmode
, ne_rtx
,
19809 rtx j
= emit_jump_insn (gen_rtx_SET (pc_rtx
, ifelse
));
19810 JUMP_LABEL (j
) = final_label
;
19811 LABEL_NUSES (final_label
) += 1;
19815 if (word_mode
== DImode
)
19817 emit_insn (gen_subdi3 (tmp_reg_src2
, tmp_reg_src1
,
19819 emit_insn (gen_movsi (target
,
19820 gen_lowpart (SImode
, tmp_reg_src2
)));
19823 emit_insn (gen_subsi3 (target
, tmp_reg_src1
, tmp_reg_src2
));
19827 rtx fin_ref
= gen_rtx_LABEL_REF (VOIDmode
, final_label
);
19828 rtx j
= emit_jump_insn (gen_rtx_SET (pc_rtx
, fin_ref
));
19829 JUMP_LABEL(j
) = final_label
;
19830 LABEL_NUSES (final_label
) += 1;
19837 /* Do we need a 64->32 conversion block? We need the 64->32
19838 conversion even if target size == load_mode size because
19839 the subtract generates one extra bit. */
19840 generate_6432_conversion
= true;
19844 if (!convert_label
)
19845 convert_label
= gen_label_rtx ();
19847 /* Compare to zero and branch to convert_label if not zero. */
19848 rtx cvt_ref
= gen_rtx_LABEL_REF (VOIDmode
, convert_label
);
19849 if (TARGET_P9_MISC
)
19851 /* Generate a compare, and convert with a setb later. */
19852 rtx cmp
= gen_rtx_COMPARE (CCUNSmode
, tmp_reg_src1
,
19854 emit_insn (gen_rtx_SET (cond
, cmp
));
19857 /* Generate a subfc. and use the longer
19858 sequence for conversion. */
19860 emit_insn (gen_subfdi3_carry_dot2 (tmp_reg_src2
, tmp_reg_src2
,
19861 tmp_reg_src1
, cond
));
19863 emit_insn (gen_subfsi3_carry_dot2 (tmp_reg_src2
, tmp_reg_src2
,
19864 tmp_reg_src1
, cond
));
19865 rtx ne_rtx
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
19866 rtx ifelse
= gen_rtx_IF_THEN_ELSE (VOIDmode
, ne_rtx
,
19868 rtx j
= emit_jump_insn (gen_rtx_SET (pc_rtx
, ifelse
));
19869 JUMP_LABEL(j
) = convert_label
;
19870 LABEL_NUSES (convert_label
) += 1;
19874 /* Just do the subtract/compare. Since this is the last block
19875 the convert code will be generated immediately following. */
19876 if (TARGET_P9_MISC
)
19878 rtx cmp
= gen_rtx_COMPARE (CCUNSmode
, tmp_reg_src1
,
19880 emit_insn (gen_rtx_SET (cond
, cmp
));
19884 emit_insn (gen_subfdi3_carry (tmp_reg_src2
, tmp_reg_src2
,
19887 emit_insn (gen_subfsi3_carry (tmp_reg_src2
, tmp_reg_src2
,
19892 offset
+= cmp_bytes
;
19893 bytes
-= cmp_bytes
;
19896 if (generate_6432_conversion
)
19899 emit_label (convert_label
);
19901 /* We need to produce DI result from sub, then convert to target SI
19902 while maintaining <0 / ==0 / >0 properties. This sequence works:
19908 This is an alternate one Segher cooked up if somebody
19909 wants to expand this for something that doesn't have popcntd:
19916 And finally, p9 can just do this:
19920 if (TARGET_P9_MISC
)
19922 emit_insn (gen_setb_unsigned (target
, cond
));
19928 rtx tmp_reg_ca
= gen_reg_rtx (DImode
);
19929 emit_insn (gen_subfdi3_carry_in_xx (tmp_reg_ca
));
19930 emit_insn (gen_popcntddi2 (tmp_reg_src2
, tmp_reg_src2
));
19931 emit_insn (gen_iordi3 (tmp_reg_src2
, tmp_reg_src2
, tmp_reg_ca
));
19932 emit_insn (gen_movsi (target
, gen_lowpart (SImode
, tmp_reg_src2
)));
19936 rtx tmp_reg_ca
= gen_reg_rtx (SImode
);
19937 emit_insn (gen_subfsi3_carry_in_xx (tmp_reg_ca
));
19938 emit_insn (gen_popcntdsi2 (tmp_reg_src2
, tmp_reg_src2
));
19939 emit_insn (gen_iorsi3 (target
, tmp_reg_src2
, tmp_reg_ca
));
19945 emit_label (final_label
);
19947 gcc_assert (bytes
== 0);
19951 /* Generate alignment check and branch code to set up for
19952 strncmp when we don't have DI alignment.
19953 STRNCMP_LABEL is the label to branch if there is a page crossing.
19954 SRC is the string pointer to be examined.
19955 BYTES is the max number of bytes to compare. */
19957 expand_strncmp_align_check (rtx strncmp_label
, rtx src
, HOST_WIDE_INT bytes
)
19959 rtx lab_ref
= gen_rtx_LABEL_REF (VOIDmode
, strncmp_label
);
19960 rtx src_check
= copy_addr_to_reg (XEXP (src
, 0));
19961 if (GET_MODE (src_check
) == SImode
)
19962 emit_insn (gen_andsi3 (src_check
, src_check
, GEN_INT (0xfff)));
19964 emit_insn (gen_anddi3 (src_check
, src_check
, GEN_INT (0xfff)));
19965 rtx cond
= gen_reg_rtx (CCmode
);
19966 emit_move_insn (cond
, gen_rtx_COMPARE (CCmode
, src_check
,
19967 GEN_INT (4096 - bytes
)));
19969 rtx cmp_rtx
= gen_rtx_LT (VOIDmode
, cond
, const0_rtx
);
19971 rtx ifelse
= gen_rtx_IF_THEN_ELSE (VOIDmode
, cmp_rtx
,
19973 rtx j
= emit_jump_insn (gen_rtx_SET (pc_rtx
, ifelse
));
19974 JUMP_LABEL (j
) = strncmp_label
;
19975 LABEL_NUSES (strncmp_label
) += 1;
19978 /* Expand a string compare operation with length, and return
19979 true if successful. Return false if we should let the
19980 compiler generate normal code, probably a strncmp call.
19982 OPERANDS[0] is the target (result).
19983 OPERANDS[1] is the first source.
19984 OPERANDS[2] is the second source.
19985 If NO_LENGTH is zero, then:
19986 OPERANDS[3] is the length.
19987 OPERANDS[4] is the alignment in bytes.
19988 If NO_LENGTH is nonzero, then:
19989 OPERANDS[3] is the alignment in bytes. */
19991 expand_strn_compare (rtx operands
[], int no_length
)
19993 rtx target
= operands
[0];
19994 rtx orig_src1
= operands
[1];
19995 rtx orig_src2
= operands
[2];
19996 rtx bytes_rtx
, align_rtx
;
20000 align_rtx
= operands
[3];
20004 bytes_rtx
= operands
[3];
20005 align_rtx
= operands
[4];
20007 unsigned HOST_WIDE_INT cmp_bytes
= 0;
20008 rtx src1
= orig_src1
;
20009 rtx src2
= orig_src2
;
20011 /* If we have a length, it must be constant. This simplifies things
20012 a bit as we don't have to generate code to check if we've exceeded
20013 the length. Later this could be expanded to handle this case. */
20014 if (!no_length
&& !CONST_INT_P (bytes_rtx
))
20017 /* This must be a fixed size alignment. */
20018 if (!CONST_INT_P (align_rtx
))
20021 unsigned int base_align
= UINTVAL (align_rtx
);
20022 int align1
= MEM_ALIGN (orig_src1
) / BITS_PER_UNIT
;
20023 int align2
= MEM_ALIGN (orig_src2
) / BITS_PER_UNIT
;
20025 /* SLOW_UNALIGNED_ACCESS -- don't do unaligned stuff. */
20026 if (SLOW_UNALIGNED_ACCESS (word_mode
, align1
)
20027 || SLOW_UNALIGNED_ACCESS (word_mode
, align2
))
20030 gcc_assert (GET_MODE (target
) == SImode
);
20032 /* If we have an LE target without ldbrx and word_mode is DImode,
20033 then we must avoid using word_mode. */
20034 int word_mode_ok
= !(!BYTES_BIG_ENDIAN
&& !TARGET_LDBRX
20035 && word_mode
== DImode
);
20037 unsigned int word_mode_size
= GET_MODE_SIZE (word_mode
);
20039 unsigned HOST_WIDE_INT offset
= 0;
20040 unsigned HOST_WIDE_INT bytes
; /* N from the strncmp args if available. */
20041 unsigned HOST_WIDE_INT compare_length
; /* How much to compare inline. */
20043 /* Use this as a standin to determine the mode to use. */
20044 bytes
= rs6000_string_compare_inline_limit
* word_mode_size
;
20046 bytes
= UINTVAL (bytes_rtx
);
20048 machine_mode load_mode
=
20049 select_block_compare_mode (offset
, bytes
, base_align
, word_mode_ok
);
20050 unsigned int load_mode_size
= GET_MODE_SIZE (load_mode
);
20051 compare_length
= rs6000_string_compare_inline_limit
* load_mode_size
;
20053 /* If we have equality at the end of the last compare and we have not
20054 found the end of the string, we need to call strcmp/strncmp to
20055 compare the remainder. */
20056 bool equality_compare_rest
= false;
20060 bytes
= compare_length
;
20061 equality_compare_rest
= true;
20065 if (bytes
<= compare_length
)
20066 compare_length
= bytes
;
20068 equality_compare_rest
= true;
20071 rtx result_reg
= gen_reg_rtx (word_mode
);
20072 rtx final_move_label
= gen_label_rtx ();
20073 rtx final_label
= gen_label_rtx ();
20074 rtx begin_compare_label
= NULL
;
20076 if (base_align
< 8)
20078 /* Generate code that checks distance to 4k boundary for this case. */
20079 begin_compare_label
= gen_label_rtx ();
20080 rtx strncmp_label
= gen_label_rtx ();
20083 /* Strncmp for power8 in glibc does this:
20085 cmpldi cr7,r8,4096-16
20086 bgt cr7,L(pagecross) */
20088 /* Make sure that the length we use for the alignment test and
20089 the subsequent code generation are in agreement so we do not
20090 go past the length we tested for a 4k boundary crossing. */
20091 unsigned HOST_WIDE_INT align_test
= compare_length
;
20092 if (align_test
< 8)
20094 align_test
= HOST_WIDE_INT_1U
<< ceil_log2 (align_test
);
20095 base_align
= align_test
;
20099 align_test
= ROUND_UP (align_test
, 8);
20104 expand_strncmp_align_check (strncmp_label
, src1
, align_test
);
20106 expand_strncmp_align_check (strncmp_label
, src2
, align_test
);
20108 /* Now generate the following sequence:
20109 - branch to begin_compare
20112 - branch to final_label
20113 - begin_compare_label */
20115 rtx cmp_ref
= gen_rtx_LABEL_REF (VOIDmode
, begin_compare_label
);
20116 jmp
= emit_jump_insn (gen_rtx_SET (pc_rtx
, cmp_ref
));
20117 JUMP_LABEL (jmp
) = begin_compare_label
;
20118 LABEL_NUSES (begin_compare_label
) += 1;
20121 emit_label (strncmp_label
);
20123 if (!REG_P (XEXP (src1
, 0)))
20125 rtx src1_reg
= copy_addr_to_reg (XEXP (src1
, 0));
20126 src1
= replace_equiv_address (src1
, src1_reg
);
20129 if (!REG_P (XEXP (src2
, 0)))
20131 rtx src2_reg
= copy_addr_to_reg (XEXP (src2
, 0));
20132 src2
= replace_equiv_address (src2
, src2_reg
);
20137 tree fun
= builtin_decl_explicit (BUILT_IN_STRCMP
);
20138 emit_library_call_value (XEXP (DECL_RTL (fun
), 0),
20139 target
, LCT_NORMAL
, GET_MODE (target
), 2,
20140 force_reg (Pmode
, XEXP (src1
, 0)), Pmode
,
20141 force_reg (Pmode
, XEXP (src2
, 0)), Pmode
);
20145 /* -m32 -mpowerpc64 results in word_mode being DImode even
20146 though otherwise it is 32-bit. The length arg to strncmp
20147 is a size_t which will be the same size as pointers. */
20150 len_rtx
= gen_reg_rtx (DImode
);
20152 len_rtx
= gen_reg_rtx (SImode
);
20154 emit_move_insn (len_rtx
, bytes_rtx
);
20156 tree fun
= builtin_decl_explicit (BUILT_IN_STRNCMP
);
20157 emit_library_call_value (XEXP (DECL_RTL (fun
), 0),
20158 target
, LCT_NORMAL
, GET_MODE (target
), 3,
20159 force_reg (Pmode
, XEXP (src1
, 0)), Pmode
,
20160 force_reg (Pmode
, XEXP (src2
, 0)), Pmode
,
20161 len_rtx
, GET_MODE (len_rtx
));
20164 rtx fin_ref
= gen_rtx_LABEL_REF (VOIDmode
, final_label
);
20165 jmp
= emit_jump_insn (gen_rtx_SET (pc_rtx
, fin_ref
));
20166 JUMP_LABEL (jmp
) = final_label
;
20167 LABEL_NUSES (final_label
) += 1;
20169 emit_label (begin_compare_label
);
20172 rtx cleanup_label
= NULL
;
20173 rtx tmp_reg_src1
= gen_reg_rtx (word_mode
);
20174 rtx tmp_reg_src2
= gen_reg_rtx (word_mode
);
20176 /* Generate sequence of ld/ldbrx, cmpb to compare out
20177 to the length specified. */
20178 unsigned HOST_WIDE_INT bytes_to_compare
= compare_length
;
20179 while (bytes_to_compare
> 0)
20181 /* Compare sequence:
20182 check each 8B with: ld/ld cmpd bne
20183 If equal, use rldicr/cmpb to check for zero byte.
20184 cleanup code at end:
20185 cmpb get byte that differs
20186 cmpb look for zero byte
20188 cntlzd get bit of first zero/diff byte
20189 subfic convert for rldcl use
20190 rldcl rldcl extract diff/zero byte
20191 subf subtract for final result
20193 The last compare can branch around the cleanup code if the
20194 result is zero because the strings are exactly equal. */
20195 unsigned int align
= compute_current_alignment (base_align
, offset
);
20196 if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED
)
20197 load_mode
= select_block_compare_mode (offset
, bytes_to_compare
, align
,
20200 load_mode
= select_block_compare_mode (0, bytes_to_compare
, align
,
20202 load_mode_size
= GET_MODE_SIZE (load_mode
);
20203 if (bytes_to_compare
>= load_mode_size
)
20204 cmp_bytes
= load_mode_size
;
20205 else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED
)
20207 /* Move this load back so it doesn't go past the end.
20208 P8/P9 can do this efficiently. */
20209 unsigned int extra_bytes
= load_mode_size
- bytes_to_compare
;
20210 cmp_bytes
= bytes_to_compare
;
20211 if (extra_bytes
< offset
)
20213 offset
-= extra_bytes
;
20214 cmp_bytes
= load_mode_size
;
20215 bytes_to_compare
= cmp_bytes
;
20219 /* P7 and earlier can't do the overlapping load trick fast,
20220 so this forces a non-overlapping load and a shift to get
20221 rid of the extra bytes. */
20222 cmp_bytes
= bytes_to_compare
;
20224 src1
= adjust_address (orig_src1
, load_mode
, offset
);
20225 src2
= adjust_address (orig_src2
, load_mode
, offset
);
20227 if (!REG_P (XEXP (src1
, 0)))
20229 rtx src1_reg
= copy_addr_to_reg (XEXP (src1
, 0));
20230 src1
= replace_equiv_address (src1
, src1_reg
);
20232 set_mem_size (src1
, cmp_bytes
);
20234 if (!REG_P (XEXP (src2
, 0)))
20236 rtx src2_reg
= copy_addr_to_reg (XEXP (src2
, 0));
20237 src2
= replace_equiv_address (src2
, src2_reg
);
20239 set_mem_size (src2
, cmp_bytes
);
20241 do_load_for_compare (tmp_reg_src1
, src1
, load_mode
);
20242 do_load_for_compare (tmp_reg_src2
, src2
, load_mode
);
20244 /* We must always left-align the data we read, and
20245 clear any bytes to the right that are beyond the string.
20246 Otherwise the cmpb sequence won't produce the correct
20247 results. The beginning of the compare will be done
20248 with word_mode so will not have any extra shifts or
20251 if (load_mode_size
< word_mode_size
)
20253 /* Rotate left first. */
20254 rtx sh
= GEN_INT (BITS_PER_UNIT
* (word_mode_size
- load_mode_size
));
20255 if (word_mode
== DImode
)
20257 emit_insn (gen_rotldi3 (tmp_reg_src1
, tmp_reg_src1
, sh
));
20258 emit_insn (gen_rotldi3 (tmp_reg_src2
, tmp_reg_src2
, sh
));
20262 emit_insn (gen_rotlsi3 (tmp_reg_src1
, tmp_reg_src1
, sh
));
20263 emit_insn (gen_rotlsi3 (tmp_reg_src2
, tmp_reg_src2
, sh
));
20267 if (cmp_bytes
< word_mode_size
)
20269 /* Now clear right. This plus the rotate can be
20270 turned into a rldicr instruction. */
20271 HOST_WIDE_INT mb
= BITS_PER_UNIT
* (word_mode_size
- cmp_bytes
);
20272 rtx mask
= GEN_INT (HOST_WIDE_INT_M1U
<< mb
);
20273 if (word_mode
== DImode
)
20275 emit_insn (gen_anddi3_mask (tmp_reg_src1
, tmp_reg_src1
, mask
));
20276 emit_insn (gen_anddi3_mask (tmp_reg_src2
, tmp_reg_src2
, mask
));
20280 emit_insn (gen_andsi3_mask (tmp_reg_src1
, tmp_reg_src1
, mask
));
20281 emit_insn (gen_andsi3_mask (tmp_reg_src2
, tmp_reg_src2
, mask
));
20285 /* Cases to handle. A and B are chunks of the two strings.
20286 1: Not end of comparison:
20287 A != B: branch to cleanup code to compute result.
20288 A == B: check for 0 byte, next block if not found.
20289 2: End of the inline comparison:
20290 A != B: branch to cleanup code to compute result.
20291 A == B: check for 0 byte, call strcmp/strncmp
20292 3: compared requested N bytes:
20293 A == B: branch to result 0.
20294 A != B: cleanup code to compute result. */
20296 unsigned HOST_WIDE_INT remain
= bytes_to_compare
- cmp_bytes
;
20299 if (remain
> 0 || equality_compare_rest
)
20301 /* Branch to cleanup code, otherwise fall through to do
20303 if (!cleanup_label
)
20304 cleanup_label
= gen_label_rtx ();
20305 dst_label
= cleanup_label
;
20308 /* Branch to end and produce result of 0. */
20309 dst_label
= final_move_label
;
20311 rtx lab_ref
= gen_rtx_LABEL_REF (VOIDmode
, dst_label
);
20312 rtx cond
= gen_reg_rtx (CCmode
);
20314 /* Always produce the 0 result, it is needed if
20315 cmpb finds a 0 byte in this chunk. */
20316 rtx tmp
= gen_rtx_MINUS (word_mode
, tmp_reg_src1
, tmp_reg_src2
);
20317 rs6000_emit_dot_insn (result_reg
, tmp
, 1, cond
);
20320 if (remain
== 0 && !equality_compare_rest
)
20321 cmp_rtx
= gen_rtx_EQ (VOIDmode
, cond
, const0_rtx
);
20323 cmp_rtx
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
20325 rtx ifelse
= gen_rtx_IF_THEN_ELSE (VOIDmode
, cmp_rtx
,
20327 rtx j
= emit_jump_insn (gen_rtx_SET (pc_rtx
, ifelse
));
20328 JUMP_LABEL (j
) = dst_label
;
20329 LABEL_NUSES (dst_label
) += 1;
20331 if (remain
> 0 || equality_compare_rest
)
20333 /* Generate a cmpb to test for a 0 byte and branch
20334 to final result if found. */
20335 rtx cmpb_zero
= gen_reg_rtx (word_mode
);
20336 rtx lab_ref_fin
= gen_rtx_LABEL_REF (VOIDmode
, final_move_label
);
20337 rtx condz
= gen_reg_rtx (CCmode
);
20338 rtx zero_reg
= gen_reg_rtx (word_mode
);
20339 if (word_mode
== SImode
)
20341 emit_insn (gen_movsi (zero_reg
, GEN_INT (0)));
20342 emit_insn (gen_cmpbsi3 (cmpb_zero
, tmp_reg_src1
, zero_reg
));
20343 if (cmp_bytes
< word_mode_size
)
20345 /* Don't want to look at zero bytes past end. */
20347 BITS_PER_UNIT
* (word_mode_size
- cmp_bytes
);
20348 rtx mask
= GEN_INT (HOST_WIDE_INT_M1U
<< mb
);
20349 emit_insn (gen_andsi3_mask (cmpb_zero
, cmpb_zero
, mask
));
20354 emit_insn (gen_movdi (zero_reg
, GEN_INT (0)));
20355 emit_insn (gen_cmpbdi3 (cmpb_zero
, tmp_reg_src1
, zero_reg
));
20356 if (cmp_bytes
< word_mode_size
)
20358 /* Don't want to look at zero bytes past end. */
20360 BITS_PER_UNIT
* (word_mode_size
- cmp_bytes
);
20361 rtx mask
= GEN_INT (HOST_WIDE_INT_M1U
<< mb
);
20362 emit_insn (gen_anddi3_mask (cmpb_zero
, cmpb_zero
, mask
));
20366 emit_move_insn (condz
, gen_rtx_COMPARE (CCmode
, cmpb_zero
, zero_reg
));
20367 rtx cmpnz_rtx
= gen_rtx_NE (VOIDmode
, condz
, const0_rtx
);
20368 rtx ifelse
= gen_rtx_IF_THEN_ELSE (VOIDmode
, cmpnz_rtx
,
20369 lab_ref_fin
, pc_rtx
);
20370 rtx j2
= emit_jump_insn (gen_rtx_SET (pc_rtx
, ifelse
));
20371 JUMP_LABEL (j2
) = final_move_label
;
20372 LABEL_NUSES (final_move_label
) += 1;
20376 offset
+= cmp_bytes
;
20377 bytes_to_compare
-= cmp_bytes
;
20380 if (equality_compare_rest
)
20382 /* Update pointers past what has been compared already. */
20383 src1
= adjust_address (orig_src1
, load_mode
, offset
);
20384 src2
= adjust_address (orig_src2
, load_mode
, offset
);
20386 if (!REG_P (XEXP (src1
, 0)))
20388 rtx src1_reg
= copy_addr_to_reg (XEXP (src1
, 0));
20389 src1
= replace_equiv_address (src1
, src1_reg
);
20391 set_mem_size (src1
, cmp_bytes
);
20393 if (!REG_P (XEXP (src2
, 0)))
20395 rtx src2_reg
= copy_addr_to_reg (XEXP (src2
, 0));
20396 src2
= replace_equiv_address (src2
, src2_reg
);
20398 set_mem_size (src2
, cmp_bytes
);
20400 /* Construct call to strcmp/strncmp to compare the rest of the string. */
20403 tree fun
= builtin_decl_explicit (BUILT_IN_STRCMP
);
20404 emit_library_call_value (XEXP (DECL_RTL (fun
), 0),
20405 target
, LCT_NORMAL
, GET_MODE (target
), 2,
20406 force_reg (Pmode
, XEXP (src1
, 0)), Pmode
,
20407 force_reg (Pmode
, XEXP (src2
, 0)), Pmode
);
20413 len_rtx
= gen_reg_rtx (DImode
);
20415 len_rtx
= gen_reg_rtx (SImode
);
20417 emit_move_insn (len_rtx
, GEN_INT (bytes
- compare_length
));
20418 tree fun
= builtin_decl_explicit (BUILT_IN_STRNCMP
);
20419 emit_library_call_value (XEXP (DECL_RTL (fun
), 0),
20420 target
, LCT_NORMAL
, GET_MODE (target
), 3,
20421 force_reg (Pmode
, XEXP (src1
, 0)), Pmode
,
20422 force_reg (Pmode
, XEXP (src2
, 0)), Pmode
,
20423 len_rtx
, GET_MODE (len_rtx
));
20426 rtx fin_ref
= gen_rtx_LABEL_REF (VOIDmode
, final_label
);
20427 rtx jmp
= emit_jump_insn (gen_rtx_SET (pc_rtx
, fin_ref
));
20428 JUMP_LABEL (jmp
) = final_label
;
20429 LABEL_NUSES (final_label
) += 1;
20434 emit_label (cleanup_label
);
20436 /* Generate the final sequence that identifies the differing
20437 byte and generates the final result, taking into account
20440 cmpb cmpb_result1, src1, src2
20441 cmpb cmpb_result2, src1, zero
20442 orc cmpb_result1, cmp_result1, cmpb_result2
20443 cntlzd get bit of first zero/diff byte
20444 addi convert for rldcl use
20445 rldcl rldcl extract diff/zero byte
20446 subf subtract for final result
20449 rtx cmpb_diff
= gen_reg_rtx (word_mode
);
20450 rtx cmpb_zero
= gen_reg_rtx (word_mode
);
20451 rtx rot_amt
= gen_reg_rtx (word_mode
);
20452 rtx zero_reg
= gen_reg_rtx (word_mode
);
20454 rtx rot1_1
= gen_reg_rtx (word_mode
);
20455 rtx rot1_2
= gen_reg_rtx (word_mode
);
20456 rtx rot2_1
= gen_reg_rtx (word_mode
);
20457 rtx rot2_2
= gen_reg_rtx (word_mode
);
20459 if (word_mode
== SImode
)
20461 emit_insn (gen_cmpbsi3 (cmpb_diff
, tmp_reg_src1
, tmp_reg_src2
));
20462 emit_insn (gen_movsi (zero_reg
, GEN_INT (0)));
20463 emit_insn (gen_cmpbsi3 (cmpb_zero
, tmp_reg_src1
, zero_reg
));
20464 emit_insn (gen_one_cmplsi2 (cmpb_diff
,cmpb_diff
));
20465 emit_insn (gen_iorsi3 (cmpb_diff
, cmpb_diff
, cmpb_zero
));
20466 emit_insn (gen_clzsi2 (rot_amt
, cmpb_diff
));
20467 emit_insn (gen_addsi3 (rot_amt
, rot_amt
, GEN_INT (8)));
20468 emit_insn (gen_rotlsi3 (rot1_1
, tmp_reg_src1
,
20469 gen_lowpart (SImode
, rot_amt
)));
20470 emit_insn (gen_andsi3_mask (rot1_2
, rot1_1
, GEN_INT (0xff)));
20471 emit_insn (gen_rotlsi3 (rot2_1
, tmp_reg_src2
,
20472 gen_lowpart (SImode
, rot_amt
)));
20473 emit_insn (gen_andsi3_mask (rot2_2
, rot2_1
, GEN_INT (0xff)));
20474 emit_insn (gen_subsi3 (result_reg
, rot1_2
, rot2_2
));
20478 emit_insn (gen_cmpbdi3 (cmpb_diff
, tmp_reg_src1
, tmp_reg_src2
));
20479 emit_insn (gen_movdi (zero_reg
, GEN_INT (0)));
20480 emit_insn (gen_cmpbdi3 (cmpb_zero
, tmp_reg_src1
, zero_reg
));
20481 emit_insn (gen_one_cmpldi2 (cmpb_diff
,cmpb_diff
));
20482 emit_insn (gen_iordi3 (cmpb_diff
, cmpb_diff
, cmpb_zero
));
20483 emit_insn (gen_clzdi2 (rot_amt
, cmpb_diff
));
20484 emit_insn (gen_adddi3 (rot_amt
, rot_amt
, GEN_INT (8)));
20485 emit_insn (gen_rotldi3 (rot1_1
, tmp_reg_src1
,
20486 gen_lowpart (SImode
, rot_amt
)));
20487 emit_insn (gen_anddi3_mask (rot1_2
, rot1_1
, GEN_INT (0xff)));
20488 emit_insn (gen_rotldi3 (rot2_1
, tmp_reg_src2
,
20489 gen_lowpart (SImode
, rot_amt
)));
20490 emit_insn (gen_anddi3_mask (rot2_2
, rot2_1
, GEN_INT (0xff)));
20491 emit_insn (gen_subdi3 (result_reg
, rot1_2
, rot2_2
));
20494 emit_label (final_move_label
);
20495 emit_insn (gen_movsi (target
,
20496 gen_lowpart (SImode
, result_reg
)));
20497 emit_label (final_label
);
20501 /* Expand a block move operation, and return 1 if successful. Return 0
20502 if we should let the compiler generate normal code.
20504 operands[0] is the destination
20505 operands[1] is the source
20506 operands[2] is the length
20507 operands[3] is the alignment */
20509 #define MAX_MOVE_REG 4
20512 expand_block_move (rtx operands
[])
20514 rtx orig_dest
= operands
[0];
20515 rtx orig_src
= operands
[1];
20516 rtx bytes_rtx
= operands
[2];
20517 rtx align_rtx
= operands
[3];
20518 int constp
= (GET_CODE (bytes_rtx
) == CONST_INT
);
20523 rtx stores
[MAX_MOVE_REG
];
20526 /* If this is not a fixed size move, just call memcpy */
20530 /* This must be a fixed size alignment */
20531 gcc_assert (GET_CODE (align_rtx
) == CONST_INT
);
20532 align
= INTVAL (align_rtx
) * BITS_PER_UNIT
;
20534 /* Anything to move? */
20535 bytes
= INTVAL (bytes_rtx
);
20539 if (bytes
> rs6000_block_move_inline_limit
)
20542 for (offset
= 0; bytes
> 0; offset
+= move_bytes
, bytes
-= move_bytes
)
20545 rtx (*movmemsi
) (rtx
, rtx
, rtx
, rtx
);
20546 rtx (*mov
) (rtx
, rtx
);
20548 machine_mode mode
= BLKmode
;
20551 /* Altivec first, since it will be faster than a string move
20552 when it applies, and usually not significantly larger. */
20553 if (TARGET_ALTIVEC
&& bytes
>= 16 && align
>= 128)
20557 gen_func
.mov
= gen_movv4si
;
20559 else if (TARGET_SPE
&& bytes
>= 8 && align
>= 64)
20563 gen_func
.mov
= gen_movv2si
;
20565 else if (TARGET_STRING
20566 && bytes
> 24 /* move up to 32 bytes at a time */
20572 && ! fixed_regs
[10]
20573 && ! fixed_regs
[11]
20574 && ! fixed_regs
[12])
20576 move_bytes
= (bytes
> 32) ? 32 : bytes
;
20577 gen_func
.movmemsi
= gen_movmemsi_8reg
;
20579 else if (TARGET_STRING
20580 && bytes
> 16 /* move up to 24 bytes at a time */
20586 && ! fixed_regs
[10])
20588 move_bytes
= (bytes
> 24) ? 24 : bytes
;
20589 gen_func
.movmemsi
= gen_movmemsi_6reg
;
20591 else if (TARGET_STRING
20592 && bytes
> 8 /* move up to 16 bytes at a time */
20596 && ! fixed_regs
[8])
20598 move_bytes
= (bytes
> 16) ? 16 : bytes
;
20599 gen_func
.movmemsi
= gen_movmemsi_4reg
;
20601 else if (bytes
>= 8 && TARGET_POWERPC64
20602 && (align
>= 64 || !STRICT_ALIGNMENT
))
20606 gen_func
.mov
= gen_movdi
;
20607 if (offset
== 0 && align
< 64)
20611 /* If the address form is reg+offset with offset not a
20612 multiple of four, reload into reg indirect form here
20613 rather than waiting for reload. This way we get one
20614 reload, not one per load and/or store. */
20615 addr
= XEXP (orig_dest
, 0);
20616 if ((GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == LO_SUM
)
20617 && GET_CODE (XEXP (addr
, 1)) == CONST_INT
20618 && (INTVAL (XEXP (addr
, 1)) & 3) != 0)
20620 addr
= copy_addr_to_reg (addr
);
20621 orig_dest
= replace_equiv_address (orig_dest
, addr
);
20623 addr
= XEXP (orig_src
, 0);
20624 if ((GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == LO_SUM
)
20625 && GET_CODE (XEXP (addr
, 1)) == CONST_INT
20626 && (INTVAL (XEXP (addr
, 1)) & 3) != 0)
20628 addr
= copy_addr_to_reg (addr
);
20629 orig_src
= replace_equiv_address (orig_src
, addr
);
20633 else if (TARGET_STRING
&& bytes
> 4 && !TARGET_POWERPC64
)
20634 { /* move up to 8 bytes at a time */
20635 move_bytes
= (bytes
> 8) ? 8 : bytes
;
20636 gen_func
.movmemsi
= gen_movmemsi_2reg
;
20638 else if (bytes
>= 4 && (align
>= 32 || !STRICT_ALIGNMENT
))
20639 { /* move 4 bytes */
20642 gen_func
.mov
= gen_movsi
;
20644 else if (bytes
>= 2 && (align
>= 16 || !STRICT_ALIGNMENT
))
20645 { /* move 2 bytes */
20648 gen_func
.mov
= gen_movhi
;
20650 else if (TARGET_STRING
&& bytes
> 1)
20651 { /* move up to 4 bytes at a time */
20652 move_bytes
= (bytes
> 4) ? 4 : bytes
;
20653 gen_func
.movmemsi
= gen_movmemsi_1reg
;
20655 else /* move 1 byte at a time */
20659 gen_func
.mov
= gen_movqi
;
20662 src
= adjust_address (orig_src
, mode
, offset
);
20663 dest
= adjust_address (orig_dest
, mode
, offset
);
20665 if (mode
!= BLKmode
)
20667 rtx tmp_reg
= gen_reg_rtx (mode
);
20669 emit_insn ((*gen_func
.mov
) (tmp_reg
, src
));
20670 stores
[num_reg
++] = (*gen_func
.mov
) (dest
, tmp_reg
);
20673 if (mode
== BLKmode
|| num_reg
>= MAX_MOVE_REG
|| bytes
== move_bytes
)
20676 for (i
= 0; i
< num_reg
; i
++)
20677 emit_insn (stores
[i
]);
20681 if (mode
== BLKmode
)
20683 /* Move the address into scratch registers. The movmemsi
20684 patterns require zero offset. */
20685 if (!REG_P (XEXP (src
, 0)))
20687 rtx src_reg
= copy_addr_to_reg (XEXP (src
, 0));
20688 src
= replace_equiv_address (src
, src_reg
);
20690 set_mem_size (src
, move_bytes
);
20692 if (!REG_P (XEXP (dest
, 0)))
20694 rtx dest_reg
= copy_addr_to_reg (XEXP (dest
, 0));
20695 dest
= replace_equiv_address (dest
, dest_reg
);
20697 set_mem_size (dest
, move_bytes
);
20699 emit_insn ((*gen_func
.movmemsi
) (dest
, src
,
20700 GEN_INT (move_bytes
& 31),
20709 /* Return a string to perform a load_multiple operation.
20710 operands[0] is the vector.
20711 operands[1] is the source address.
20712 operands[2] is the first destination register. */
20715 rs6000_output_load_multiple (rtx operands
[3])
20717 /* We have to handle the case where the pseudo used to contain the address
20718 is assigned to one of the output registers. */
20720 int words
= XVECLEN (operands
[0], 0);
20723 if (XVECLEN (operands
[0], 0) == 1)
20724 return "lwz %2,0(%1)";
20726 for (i
= 0; i
< words
; i
++)
20727 if (refers_to_regno_p (REGNO (operands
[2]) + i
, operands
[1]))
20731 xop
[0] = GEN_INT (4 * (words
-1));
20732 xop
[1] = operands
[1];
20733 xop
[2] = operands
[2];
20734 output_asm_insn ("lswi %2,%1,%0\n\tlwz %1,%0(%1)", xop
);
20739 xop
[0] = GEN_INT (4 * (words
-1));
20740 xop
[1] = operands
[1];
20741 xop
[2] = gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
20742 output_asm_insn ("addi %1,%1,4\n\tlswi %2,%1,%0\n\tlwz %1,-4(%1)", xop
);
20747 for (j
= 0; j
< words
; j
++)
20750 xop
[0] = GEN_INT (j
* 4);
20751 xop
[1] = operands
[1];
20752 xop
[2] = gen_rtx_REG (SImode
, REGNO (operands
[2]) + j
);
20753 output_asm_insn ("lwz %2,%0(%1)", xop
);
20755 xop
[0] = GEN_INT (i
* 4);
20756 xop
[1] = operands
[1];
20757 output_asm_insn ("lwz %1,%0(%1)", xop
);
20762 return "lswi %2,%1,%N0";
20766 /* A validation routine: say whether CODE, a condition code, and MODE
20767 match. The other alternatives either don't make sense or should
20768 never be generated. */
20771 validate_condition_mode (enum rtx_code code
, machine_mode mode
)
20773 gcc_assert ((GET_RTX_CLASS (code
) == RTX_COMPARE
20774 || GET_RTX_CLASS (code
) == RTX_COMM_COMPARE
)
20775 && GET_MODE_CLASS (mode
) == MODE_CC
);
20777 /* These don't make sense. */
20778 gcc_assert ((code
!= GT
&& code
!= LT
&& code
!= GE
&& code
!= LE
)
20779 || mode
!= CCUNSmode
);
20781 gcc_assert ((code
!= GTU
&& code
!= LTU
&& code
!= GEU
&& code
!= LEU
)
20782 || mode
== CCUNSmode
);
20784 gcc_assert (mode
== CCFPmode
20785 || (code
!= ORDERED
&& code
!= UNORDERED
20786 && code
!= UNEQ
&& code
!= LTGT
20787 && code
!= UNGT
&& code
!= UNLT
20788 && code
!= UNGE
&& code
!= UNLE
));
20790 /* These should never be generated except for
20791 flag_finite_math_only. */
20792 gcc_assert (mode
!= CCFPmode
20793 || flag_finite_math_only
20794 || (code
!= LE
&& code
!= GE
20795 && code
!= UNEQ
&& code
!= LTGT
20796 && code
!= UNGT
&& code
!= UNLT
));
20798 /* These are invalid; the information is not there. */
20799 gcc_assert (mode
!= CCEQmode
|| code
== EQ
|| code
== NE
);
20803 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
20804 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
20805 not zero, store there the bit offset (counted from the right) where
20806 the single stretch of 1 bits begins; and similarly for B, the bit
20807 offset where it ends. */
20810 rs6000_is_valid_mask (rtx mask
, int *b
, int *e
, machine_mode mode
)
20812 unsigned HOST_WIDE_INT val
= INTVAL (mask
);
20813 unsigned HOST_WIDE_INT bit
;
20815 int n
= GET_MODE_PRECISION (mode
);
20817 if (mode
!= DImode
&& mode
!= SImode
)
20820 if (INTVAL (mask
) >= 0)
20823 ne
= exact_log2 (bit
);
20824 nb
= exact_log2 (val
+ bit
);
20826 else if (val
+ 1 == 0)
20835 nb
= exact_log2 (bit
);
20836 ne
= exact_log2 (val
+ bit
);
20841 ne
= exact_log2 (bit
);
20842 if (val
+ bit
== 0)
20850 if (nb
< 0 || ne
< 0 || nb
>= n
|| ne
>= n
)
20861 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
20862 or rldicr instruction, to implement an AND with it in mode MODE. */
20865 rs6000_is_valid_and_mask (rtx mask
, machine_mode mode
)
20869 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
20872 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
20874 if (mode
== DImode
)
20875 return (ne
== 0 || nb
== 63 || (nb
< 32 && ne
<= nb
));
20877 /* For SImode, rlwinm can do everything. */
20878 if (mode
== SImode
)
20879 return (nb
< 32 && ne
< 32);
20884 /* Return the instruction template for an AND with mask in mode MODE, with
20885 operands OPERANDS. If DOT is true, make it a record-form instruction. */
20888 rs6000_insn_for_and_mask (machine_mode mode
, rtx
*operands
, bool dot
)
20892 if (!rs6000_is_valid_mask (operands
[2], &nb
, &ne
, mode
))
20893 gcc_unreachable ();
20895 if (mode
== DImode
&& ne
== 0)
20897 operands
[3] = GEN_INT (63 - nb
);
20899 return "rldicl. %0,%1,0,%3";
20900 return "rldicl %0,%1,0,%3";
20903 if (mode
== DImode
&& nb
== 63)
20905 operands
[3] = GEN_INT (63 - ne
);
20907 return "rldicr. %0,%1,0,%3";
20908 return "rldicr %0,%1,0,%3";
20911 if (nb
< 32 && ne
< 32)
20913 operands
[3] = GEN_INT (31 - nb
);
20914 operands
[4] = GEN_INT (31 - ne
);
20916 return "rlwinm. %0,%1,0,%3,%4";
20917 return "rlwinm %0,%1,0,%3,%4";
20920 gcc_unreachable ();
20923 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
20924 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
20925 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
20928 rs6000_is_valid_shift_mask (rtx mask
, rtx shift
, machine_mode mode
)
20932 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
20935 int n
= GET_MODE_PRECISION (mode
);
20938 if (CONST_INT_P (XEXP (shift
, 1)))
20940 sh
= INTVAL (XEXP (shift
, 1));
20941 if (sh
< 0 || sh
>= n
)
20945 rtx_code code
= GET_CODE (shift
);
20947 /* Convert any shift by 0 to a rotate, to simplify below code. */
20951 /* Convert rotate to simple shift if we can, to make analysis simpler. */
20952 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& ne
>= sh
)
20954 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& nb
< sh
)
20960 /* DImode rotates need rld*. */
20961 if (mode
== DImode
&& code
== ROTATE
)
20962 return (nb
== 63 || ne
== 0 || ne
== sh
);
20964 /* SImode rotates need rlw*. */
20965 if (mode
== SImode
&& code
== ROTATE
)
20966 return (nb
< 32 && ne
< 32 && sh
< 32);
20968 /* Wrap-around masks are only okay for rotates. */
20972 /* Variable shifts are only okay for rotates. */
20976 /* Don't allow ASHIFT if the mask is wrong for that. */
20977 if (code
== ASHIFT
&& ne
< sh
)
20980 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
20981 if the mask is wrong for that. */
20982 if (nb
< 32 && ne
< 32 && sh
< 32
20983 && !(code
== LSHIFTRT
&& nb
>= 32 - sh
))
20986 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
20987 if the mask is wrong for that. */
20988 if (code
== LSHIFTRT
)
20990 if (nb
== 63 || ne
== 0 || ne
== sh
)
20991 return !(code
== LSHIFTRT
&& nb
>= sh
);
20996 /* Return the instruction template for a shift with mask in mode MODE, with
20997 operands OPERANDS. If DOT is true, make it a record-form instruction. */
21000 rs6000_insn_for_shift_mask (machine_mode mode
, rtx
*operands
, bool dot
)
21004 if (!rs6000_is_valid_mask (operands
[3], &nb
, &ne
, mode
))
21005 gcc_unreachable ();
21007 if (mode
== DImode
&& ne
== 0)
21009 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
21010 operands
[2] = GEN_INT (64 - INTVAL (operands
[2]));
21011 operands
[3] = GEN_INT (63 - nb
);
21013 return "rld%I2cl. %0,%1,%2,%3";
21014 return "rld%I2cl %0,%1,%2,%3";
21017 if (mode
== DImode
&& nb
== 63)
21019 operands
[3] = GEN_INT (63 - ne
);
21021 return "rld%I2cr. %0,%1,%2,%3";
21022 return "rld%I2cr %0,%1,%2,%3";
21026 && GET_CODE (operands
[4]) != LSHIFTRT
21027 && CONST_INT_P (operands
[2])
21028 && ne
== INTVAL (operands
[2]))
21030 operands
[3] = GEN_INT (63 - nb
);
21032 return "rld%I2c. %0,%1,%2,%3";
21033 return "rld%I2c %0,%1,%2,%3";
21036 if (nb
< 32 && ne
< 32)
21038 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
21039 operands
[2] = GEN_INT (32 - INTVAL (operands
[2]));
21040 operands
[3] = GEN_INT (31 - nb
);
21041 operands
[4] = GEN_INT (31 - ne
);
21042 /* This insn can also be a 64-bit rotate with mask that really makes
21043 it just a shift right (with mask); the %h below are to adjust for
21044 that situation (shift count is >= 32 in that case). */
21046 return "rlw%I2nm. %0,%1,%h2,%3,%4";
21047 return "rlw%I2nm %0,%1,%h2,%3,%4";
21050 gcc_unreachable ();
21053 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
21054 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
21055 ASHIFT, or LSHIFTRT) in mode MODE. */
21058 rs6000_is_valid_insert_mask (rtx mask
, rtx shift
, machine_mode mode
)
21062 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
21065 int n
= GET_MODE_PRECISION (mode
);
21067 int sh
= INTVAL (XEXP (shift
, 1));
21068 if (sh
< 0 || sh
>= n
)
21071 rtx_code code
= GET_CODE (shift
);
21073 /* Convert any shift by 0 to a rotate, to simplify below code. */
21077 /* Convert rotate to simple shift if we can, to make analysis simpler. */
21078 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& ne
>= sh
)
21080 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& nb
< sh
)
21086 /* DImode rotates need rldimi. */
21087 if (mode
== DImode
&& code
== ROTATE
)
21090 /* SImode rotates need rlwimi. */
21091 if (mode
== SImode
&& code
== ROTATE
)
21092 return (nb
< 32 && ne
< 32 && sh
< 32);
21094 /* Wrap-around masks are only okay for rotates. */
21098 /* Don't allow ASHIFT if the mask is wrong for that. */
21099 if (code
== ASHIFT
&& ne
< sh
)
21102 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
21103 if the mask is wrong for that. */
21104 if (nb
< 32 && ne
< 32 && sh
< 32
21105 && !(code
== LSHIFTRT
&& nb
>= 32 - sh
))
21108 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
21109 if the mask is wrong for that. */
21110 if (code
== LSHIFTRT
)
21113 return !(code
== LSHIFTRT
&& nb
>= sh
);
21118 /* Return the instruction template for an insert with mask in mode MODE, with
21119 operands OPERANDS. If DOT is true, make it a record-form instruction. */
21122 rs6000_insn_for_insert_mask (machine_mode mode
, rtx
*operands
, bool dot
)
21126 if (!rs6000_is_valid_mask (operands
[3], &nb
, &ne
, mode
))
21127 gcc_unreachable ();
21129 /* Prefer rldimi because rlwimi is cracked. */
21130 if (TARGET_POWERPC64
21131 && (!dot
|| mode
== DImode
)
21132 && GET_CODE (operands
[4]) != LSHIFTRT
21133 && ne
== INTVAL (operands
[2]))
21135 operands
[3] = GEN_INT (63 - nb
);
21137 return "rldimi. %0,%1,%2,%3";
21138 return "rldimi %0,%1,%2,%3";
21141 if (nb
< 32 && ne
< 32)
21143 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
21144 operands
[2] = GEN_INT (32 - INTVAL (operands
[2]));
21145 operands
[3] = GEN_INT (31 - nb
);
21146 operands
[4] = GEN_INT (31 - ne
);
21148 return "rlwimi. %0,%1,%2,%3,%4";
21149 return "rlwimi %0,%1,%2,%3,%4";
21152 gcc_unreachable ();
21155 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
21156 using two machine instructions. */
21159 rs6000_is_valid_2insn_and (rtx c
, machine_mode mode
)
21161 /* There are two kinds of AND we can handle with two insns:
21162 1) those we can do with two rl* insn;
21165 We do not handle that last case yet. */
21167 /* If there is just one stretch of ones, we can do it. */
21168 if (rs6000_is_valid_mask (c
, NULL
, NULL
, mode
))
21171 /* Otherwise, fill in the lowest "hole"; if we can do the result with
21172 one insn, we can do the whole thing with two. */
21173 unsigned HOST_WIDE_INT val
= INTVAL (c
);
21174 unsigned HOST_WIDE_INT bit1
= val
& -val
;
21175 unsigned HOST_WIDE_INT bit2
= (val
+ bit1
) & ~val
;
21176 unsigned HOST_WIDE_INT val1
= (val
+ bit1
) & val
;
21177 unsigned HOST_WIDE_INT bit3
= val1
& -val1
;
21178 return rs6000_is_valid_and_mask (GEN_INT (val
+ bit3
- bit2
), mode
);
21181 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
21182 If EXPAND is true, split rotate-and-mask instructions we generate to
21183 their constituent parts as well (this is used during expand); if DOT
21184 is 1, make the last insn a record-form instruction clobbering the
21185 destination GPR and setting the CC reg (from operands[3]); if 2, set
21186 that GPR as well as the CC reg. */
21189 rs6000_emit_2insn_and (machine_mode mode
, rtx
*operands
, bool expand
, int dot
)
21191 gcc_assert (!(expand
&& dot
));
21193 unsigned HOST_WIDE_INT val
= INTVAL (operands
[2]);
21195 /* If it is one stretch of ones, it is DImode; shift left, mask, then
21196 shift right. This generates better code than doing the masks without
21197 shifts, or shifting first right and then left. */
21199 if (rs6000_is_valid_mask (operands
[2], &nb
, &ne
, mode
) && nb
>= ne
)
21201 gcc_assert (mode
== DImode
);
21203 int shift
= 63 - nb
;
21206 rtx tmp1
= gen_reg_rtx (DImode
);
21207 rtx tmp2
= gen_reg_rtx (DImode
);
21208 emit_insn (gen_ashldi3 (tmp1
, operands
[1], GEN_INT (shift
)));
21209 emit_insn (gen_anddi3 (tmp2
, tmp1
, GEN_INT (val
<< shift
)));
21210 emit_insn (gen_lshrdi3 (operands
[0], tmp2
, GEN_INT (shift
)));
21214 rtx tmp
= gen_rtx_ASHIFT (mode
, operands
[1], GEN_INT (shift
));
21215 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (val
<< shift
));
21216 emit_move_insn (operands
[0], tmp
);
21217 tmp
= gen_rtx_LSHIFTRT (mode
, operands
[0], GEN_INT (shift
));
21218 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
21223 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
21224 that does the rest. */
21225 unsigned HOST_WIDE_INT bit1
= val
& -val
;
21226 unsigned HOST_WIDE_INT bit2
= (val
+ bit1
) & ~val
;
21227 unsigned HOST_WIDE_INT val1
= (val
+ bit1
) & val
;
21228 unsigned HOST_WIDE_INT bit3
= val1
& -val1
;
21230 unsigned HOST_WIDE_INT mask1
= -bit3
+ bit2
- 1;
21231 unsigned HOST_WIDE_INT mask2
= val
+ bit3
- bit2
;
21233 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2
), mode
));
21235 /* Two "no-rotate"-and-mask instructions, for SImode. */
21236 if (rs6000_is_valid_and_mask (GEN_INT (mask1
), mode
))
21238 gcc_assert (mode
== SImode
);
21240 rtx reg
= expand
? gen_reg_rtx (mode
) : operands
[0];
21241 rtx tmp
= gen_rtx_AND (mode
, operands
[1], GEN_INT (mask1
));
21242 emit_move_insn (reg
, tmp
);
21243 tmp
= gen_rtx_AND (mode
, reg
, GEN_INT (mask2
));
21244 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
21248 gcc_assert (mode
== DImode
);
21250 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
21251 insns; we have to do the first in SImode, because it wraps. */
21252 if (mask2
<= 0xffffffff
21253 && rs6000_is_valid_and_mask (GEN_INT (mask1
), SImode
))
21255 rtx reg
= expand
? gen_reg_rtx (mode
) : operands
[0];
21256 rtx tmp
= gen_rtx_AND (SImode
, gen_lowpart (SImode
, operands
[1]),
21258 rtx reg_low
= gen_lowpart (SImode
, reg
);
21259 emit_move_insn (reg_low
, tmp
);
21260 tmp
= gen_rtx_AND (mode
, reg
, GEN_INT (mask2
));
21261 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
21265 /* Two rld* insns: rotate, clear the hole in the middle (which now is
21266 at the top end), rotate back and clear the other hole. */
21267 int right
= exact_log2 (bit3
);
21268 int left
= 64 - right
;
21270 /* Rotate the mask too. */
21271 mask1
= (mask1
>> right
) | ((bit2
- 1) << left
);
21275 rtx tmp1
= gen_reg_rtx (DImode
);
21276 rtx tmp2
= gen_reg_rtx (DImode
);
21277 rtx tmp3
= gen_reg_rtx (DImode
);
21278 emit_insn (gen_rotldi3 (tmp1
, operands
[1], GEN_INT (left
)));
21279 emit_insn (gen_anddi3 (tmp2
, tmp1
, GEN_INT (mask1
)));
21280 emit_insn (gen_rotldi3 (tmp3
, tmp2
, GEN_INT (right
)));
21281 emit_insn (gen_anddi3 (operands
[0], tmp3
, GEN_INT (mask2
)));
21285 rtx tmp
= gen_rtx_ROTATE (mode
, operands
[1], GEN_INT (left
));
21286 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (mask1
));
21287 emit_move_insn (operands
[0], tmp
);
21288 tmp
= gen_rtx_ROTATE (mode
, operands
[0], GEN_INT (right
));
21289 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (mask2
));
21290 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
21294 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
21295 for lfq and stfq insns iff the registers are hard registers. */
21298 registers_ok_for_quad_peep (rtx reg1
, rtx reg2
)
21300 /* We might have been passed a SUBREG. */
21301 if (GET_CODE (reg1
) != REG
|| GET_CODE (reg2
) != REG
)
21304 /* We might have been passed non floating point registers. */
21305 if (!FP_REGNO_P (REGNO (reg1
))
21306 || !FP_REGNO_P (REGNO (reg2
)))
21309 return (REGNO (reg1
) == REGNO (reg2
) - 1);
21312 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
21313 addr1 and addr2 must be in consecutive memory locations
21314 (addr2 == addr1 + 8). */
21317 mems_ok_for_quad_peep (rtx mem1
, rtx mem2
)
21320 unsigned int reg1
, reg2
;
21321 int offset1
, offset2
;
21323 /* The mems cannot be volatile. */
21324 if (MEM_VOLATILE_P (mem1
) || MEM_VOLATILE_P (mem2
))
21327 addr1
= XEXP (mem1
, 0);
21328 addr2
= XEXP (mem2
, 0);
21330 /* Extract an offset (if used) from the first addr. */
21331 if (GET_CODE (addr1
) == PLUS
)
21333 /* If not a REG, return zero. */
21334 if (GET_CODE (XEXP (addr1
, 0)) != REG
)
21338 reg1
= REGNO (XEXP (addr1
, 0));
21339 /* The offset must be constant! */
21340 if (GET_CODE (XEXP (addr1
, 1)) != CONST_INT
)
21342 offset1
= INTVAL (XEXP (addr1
, 1));
21345 else if (GET_CODE (addr1
) != REG
)
21349 reg1
= REGNO (addr1
);
21350 /* This was a simple (mem (reg)) expression. Offset is 0. */
21354 /* And now for the second addr. */
21355 if (GET_CODE (addr2
) == PLUS
)
21357 /* If not a REG, return zero. */
21358 if (GET_CODE (XEXP (addr2
, 0)) != REG
)
21362 reg2
= REGNO (XEXP (addr2
, 0));
21363 /* The offset must be constant. */
21364 if (GET_CODE (XEXP (addr2
, 1)) != CONST_INT
)
21366 offset2
= INTVAL (XEXP (addr2
, 1));
21369 else if (GET_CODE (addr2
) != REG
)
21373 reg2
= REGNO (addr2
);
21374 /* This was a simple (mem (reg)) expression. Offset is 0. */
21378 /* Both of these must have the same base register. */
21382 /* The offset for the second addr must be 8 more than the first addr. */
21383 if (offset2
!= offset1
+ 8)
21386 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
21393 rs6000_secondary_memory_needed_rtx (machine_mode mode
)
21395 static bool eliminated
= false;
21398 if (mode
!= SDmode
|| TARGET_NO_SDMODE_STACK
)
21399 ret
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
21402 rtx mem
= cfun
->machine
->sdmode_stack_slot
;
21403 gcc_assert (mem
!= NULL_RTX
);
21407 mem
= eliminate_regs (mem
, VOIDmode
, NULL_RTX
);
21408 cfun
->machine
->sdmode_stack_slot
= mem
;
21414 if (TARGET_DEBUG_ADDR
)
21416 fprintf (stderr
, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
21417 GET_MODE_NAME (mode
));
21419 fprintf (stderr
, "\tNULL_RTX\n");
21427 /* Return the mode to be used for memory when a secondary memory
21428 location is needed. For SDmode values we need to use DDmode, in
21429 all other cases we can use the same mode. */
21431 rs6000_secondary_memory_needed_mode (machine_mode mode
)
21433 if (lra_in_progress
&& mode
== SDmode
)
21439 rs6000_check_sdmode (tree
*tp
, int *walk_subtrees
, void *data ATTRIBUTE_UNUSED
)
21441 /* Don't walk into types. */
21442 if (*tp
== NULL_TREE
|| *tp
== error_mark_node
|| TYPE_P (*tp
))
21444 *walk_subtrees
= 0;
21448 switch (TREE_CODE (*tp
))
21457 case VIEW_CONVERT_EXPR
:
21458 if (TYPE_MODE (TREE_TYPE (*tp
)) == SDmode
)
21468 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
21469 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
21470 only work on the traditional altivec registers, note if an altivec register
21473 static enum rs6000_reg_type
21474 register_to_reg_type (rtx reg
, bool *is_altivec
)
21476 HOST_WIDE_INT regno
;
21477 enum reg_class rclass
;
21479 if (GET_CODE (reg
) == SUBREG
)
21480 reg
= SUBREG_REG (reg
);
21483 return NO_REG_TYPE
;
21485 regno
= REGNO (reg
);
21486 if (regno
>= FIRST_PSEUDO_REGISTER
)
21488 if (!lra_in_progress
&& !reload_in_progress
&& !reload_completed
)
21489 return PSEUDO_REG_TYPE
;
21491 regno
= true_regnum (reg
);
21492 if (regno
< 0 || regno
>= FIRST_PSEUDO_REGISTER
)
21493 return PSEUDO_REG_TYPE
;
21496 gcc_assert (regno
>= 0);
21498 if (is_altivec
&& ALTIVEC_REGNO_P (regno
))
21499 *is_altivec
= true;
21501 rclass
= rs6000_regno_regclass
[regno
];
21502 return reg_class_to_reg_type
[(int)rclass
];
21505 /* Helper function to return the cost of adding a TOC entry address. */
21508 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask
)
21512 if (TARGET_CMODEL
!= CMODEL_SMALL
)
21513 ret
= ((addr_mask
& RELOAD_REG_OFFSET
) == 0) ? 1 : 2;
21516 ret
= (TARGET_MINIMAL_TOC
) ? 6 : 3;
21521 /* Helper function for rs6000_secondary_reload to determine whether the memory
21522 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
21523 needs reloading. Return negative if the memory is not handled by the memory
21524 helper functions and to try a different reload method, 0 if no additional
21525 instructions are need, and positive to give the extra cost for the
21529 rs6000_secondary_reload_memory (rtx addr
,
21530 enum reg_class rclass
,
21533 int extra_cost
= 0;
21534 rtx reg
, and_arg
, plus_arg0
, plus_arg1
;
21535 addr_mask_type addr_mask
;
21536 const char *type
= NULL
;
21537 const char *fail_msg
= NULL
;
21539 if (GPR_REG_CLASS_P (rclass
))
21540 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_GPR
];
21542 else if (rclass
== FLOAT_REGS
)
21543 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
];
21545 else if (rclass
== ALTIVEC_REGS
)
21546 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
];
21548 /* For the combined VSX_REGS, turn off Altivec AND -16. */
21549 else if (rclass
== VSX_REGS
)
21550 addr_mask
= (reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
]
21551 & ~RELOAD_REG_AND_M16
);
21553 /* If the register allocator hasn't made up its mind yet on the register
21554 class to use, settle on defaults to use. */
21555 else if (rclass
== NO_REGS
)
21557 addr_mask
= (reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
]
21558 & ~RELOAD_REG_AND_M16
);
21560 if ((addr_mask
& RELOAD_REG_MULTIPLE
) != 0)
21561 addr_mask
&= ~(RELOAD_REG_INDEXED
21562 | RELOAD_REG_PRE_INCDEC
21563 | RELOAD_REG_PRE_MODIFY
);
21569 /* If the register isn't valid in this register class, just return now. */
21570 if ((addr_mask
& RELOAD_REG_VALID
) == 0)
21572 if (TARGET_DEBUG_ADDR
)
21575 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
21576 "not valid in class\n",
21577 GET_MODE_NAME (mode
), reg_class_names
[rclass
]);
21584 switch (GET_CODE (addr
))
21586 /* Does the register class supports auto update forms for this mode? We
21587 don't need a scratch register, since the powerpc only supports
21588 PRE_INC, PRE_DEC, and PRE_MODIFY. */
21591 reg
= XEXP (addr
, 0);
21592 if (!base_reg_operand (addr
, GET_MODE (reg
)))
21594 fail_msg
= "no base register #1";
21598 else if ((addr_mask
& RELOAD_REG_PRE_INCDEC
) == 0)
21606 reg
= XEXP (addr
, 0);
21607 plus_arg1
= XEXP (addr
, 1);
21608 if (!base_reg_operand (reg
, GET_MODE (reg
))
21609 || GET_CODE (plus_arg1
) != PLUS
21610 || !rtx_equal_p (reg
, XEXP (plus_arg1
, 0)))
21612 fail_msg
= "bad PRE_MODIFY";
21616 else if ((addr_mask
& RELOAD_REG_PRE_MODIFY
) == 0)
21623 /* Do we need to simulate AND -16 to clear the bottom address bits used
21624 in VMX load/stores? Only allow the AND for vector sizes. */
21626 and_arg
= XEXP (addr
, 0);
21627 if (GET_MODE_SIZE (mode
) != 16
21628 || GET_CODE (XEXP (addr
, 1)) != CONST_INT
21629 || INTVAL (XEXP (addr
, 1)) != -16)
21631 fail_msg
= "bad Altivec AND #1";
21635 if (rclass
!= ALTIVEC_REGS
)
21637 if (legitimate_indirect_address_p (and_arg
, false))
21640 else if (legitimate_indexed_address_p (and_arg
, false))
21645 fail_msg
= "bad Altivec AND #2";
21653 /* If this is an indirect address, make sure it is a base register. */
21656 if (!legitimate_indirect_address_p (addr
, false))
21663 /* If this is an indexed address, make sure the register class can handle
21664 indexed addresses for this mode. */
21666 plus_arg0
= XEXP (addr
, 0);
21667 plus_arg1
= XEXP (addr
, 1);
21669 /* (plus (plus (reg) (constant)) (constant)) is generated during
21670 push_reload processing, so handle it now. */
21671 if (GET_CODE (plus_arg0
) == PLUS
&& CONST_INT_P (plus_arg1
))
21673 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
21680 /* (plus (plus (reg) (constant)) (reg)) is also generated during
21681 push_reload processing, so handle it now. */
21682 else if (GET_CODE (plus_arg0
) == PLUS
&& REG_P (plus_arg1
))
21684 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
21687 type
= "indexed #2";
21691 else if (!base_reg_operand (plus_arg0
, GET_MODE (plus_arg0
)))
21693 fail_msg
= "no base register #2";
21697 else if (int_reg_operand (plus_arg1
, GET_MODE (plus_arg1
)))
21699 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0
21700 || !legitimate_indexed_address_p (addr
, false))
21707 else if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0
21708 && CONST_INT_P (plus_arg1
))
21710 if (!quad_address_offset_p (INTVAL (plus_arg1
)))
21713 type
= "vector d-form offset";
21717 /* Make sure the register class can handle offset addresses. */
21718 else if (rs6000_legitimate_offset_address_p (mode
, addr
, false, true))
21720 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
21723 type
= "offset #2";
21729 fail_msg
= "bad PLUS";
21736 /* Quad offsets are restricted and can't handle normal addresses. */
21737 if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
21740 type
= "vector d-form lo_sum";
21743 else if (!legitimate_lo_sum_address_p (mode
, addr
, false))
21745 fail_msg
= "bad LO_SUM";
21749 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
21756 /* Static addresses need to create a TOC entry. */
21760 if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
21763 type
= "vector d-form lo_sum #2";
21769 extra_cost
= rs6000_secondary_reload_toc_costs (addr_mask
);
21773 /* TOC references look like offsetable memory. */
21775 if (TARGET_CMODEL
== CMODEL_SMALL
|| XINT (addr
, 1) != UNSPEC_TOCREL
)
21777 fail_msg
= "bad UNSPEC";
21781 else if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
21784 type
= "vector d-form lo_sum #3";
21787 else if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
21790 type
= "toc reference";
21796 fail_msg
= "bad address";
21801 if (TARGET_DEBUG_ADDR
/* && extra_cost != 0 */)
21803 if (extra_cost
< 0)
21805 "rs6000_secondary_reload_memory error: mode = %s, "
21806 "class = %s, addr_mask = '%s', %s\n",
21807 GET_MODE_NAME (mode
),
21808 reg_class_names
[rclass
],
21809 rs6000_debug_addr_mask (addr_mask
, false),
21810 (fail_msg
!= NULL
) ? fail_msg
: "<bad address>");
21814 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
21815 "addr_mask = '%s', extra cost = %d, %s\n",
21816 GET_MODE_NAME (mode
),
21817 reg_class_names
[rclass
],
21818 rs6000_debug_addr_mask (addr_mask
, false),
21820 (type
) ? type
: "<none>");
21828 /* Helper function for rs6000_secondary_reload to return true if a move to a
21829 different register classe is really a simple move. */
21832 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type
,
21833 enum rs6000_reg_type from_type
,
21836 int size
= GET_MODE_SIZE (mode
);
21838 /* Add support for various direct moves available. In this function, we only
21839 look at cases where we don't need any extra registers, and one or more
21840 simple move insns are issued. Originally small integers are not allowed
21841 in FPR/VSX registers. Single precision binary floating is not a simple
21842 move because we need to convert to the single precision memory layout.
21843 The 4-byte SDmode can be moved. TDmode values are disallowed since they
21844 need special direct move handling, which we do not support yet. */
21845 if (TARGET_DIRECT_MOVE
21846 && ((to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
21847 || (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)))
21849 if (TARGET_POWERPC64
)
21851 /* ISA 2.07: MTVSRD or MVFVSRD. */
21855 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
21856 if (size
== 16 && TARGET_P9_VECTOR
&& mode
!= TDmode
)
21860 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
21861 if (TARGET_VSX_SMALL_INTEGER
)
21863 if (mode
== SImode
)
21866 if (TARGET_P9_VECTOR
&& (mode
== HImode
|| mode
== QImode
))
21870 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
21871 if (mode
== SDmode
)
21875 /* Power6+: MFTGPR or MFFGPR. */
21876 else if (TARGET_MFPGPR
&& TARGET_POWERPC64
&& size
== 8
21877 && ((to_type
== GPR_REG_TYPE
&& from_type
== FPR_REG_TYPE
)
21878 || (to_type
== FPR_REG_TYPE
&& from_type
== GPR_REG_TYPE
)))
21881 /* Move to/from SPR. */
21882 else if ((size
== 4 || (TARGET_POWERPC64
&& size
== 8))
21883 && ((to_type
== GPR_REG_TYPE
&& from_type
== SPR_REG_TYPE
)
21884 || (to_type
== SPR_REG_TYPE
&& from_type
== GPR_REG_TYPE
)))
21890 /* Direct move helper function for rs6000_secondary_reload, handle all of the
21891 special direct moves that involve allocating an extra register, return the
21892 insn code of the helper function if there is such a function or
21893 CODE_FOR_nothing if not. */
21896 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type
,
21897 enum rs6000_reg_type from_type
,
21899 secondary_reload_info
*sri
,
21903 enum insn_code icode
= CODE_FOR_nothing
;
21905 int size
= GET_MODE_SIZE (mode
);
21907 if (TARGET_POWERPC64
&& size
== 16)
21909 /* Handle moving 128-bit values from GPRs to VSX point registers on
21910 ISA 2.07 (power8, power9) when running in 64-bit mode using
21911 XXPERMDI to glue the two 64-bit values back together. */
21912 if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)
21914 cost
= 3; /* 2 mtvsrd's, 1 xxpermdi. */
21915 icode
= reg_addr
[mode
].reload_vsx_gpr
;
21918 /* Handle moving 128-bit values from VSX point registers to GPRs on
21919 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
21920 bottom 64-bit value. */
21921 else if (to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
21923 cost
= 3; /* 2 mfvsrd's, 1 xxpermdi. */
21924 icode
= reg_addr
[mode
].reload_gpr_vsx
;
21928 else if (TARGET_POWERPC64
&& mode
== SFmode
)
21930 if (to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
21932 cost
= 3; /* xscvdpspn, mfvsrd, and. */
21933 icode
= reg_addr
[mode
].reload_gpr_vsx
;
21936 else if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)
21938 cost
= 2; /* mtvsrz, xscvspdpn. */
21939 icode
= reg_addr
[mode
].reload_vsx_gpr
;
21943 else if (!TARGET_POWERPC64
&& size
== 8)
21945 /* Handle moving 64-bit values from GPRs to floating point registers on
21946 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
21947 32-bit values back together. Altivec register classes must be handled
21948 specially since a different instruction is used, and the secondary
21949 reload support requires a single instruction class in the scratch
21950 register constraint. However, right now TFmode is not allowed in
21951 Altivec registers, so the pattern will never match. */
21952 if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
&& !altivec_p
)
21954 cost
= 3; /* 2 mtvsrwz's, 1 fmrgow. */
21955 icode
= reg_addr
[mode
].reload_fpr_gpr
;
21959 if (icode
!= CODE_FOR_nothing
)
21964 sri
->icode
= icode
;
21965 sri
->extra_cost
= cost
;
21972 /* Return whether a move between two register classes can be done either
21973 directly (simple move) or via a pattern that uses a single extra temporary
21974 (using ISA 2.07's direct move in this case. */
21977 rs6000_secondary_reload_move (enum rs6000_reg_type to_type
,
21978 enum rs6000_reg_type from_type
,
21980 secondary_reload_info
*sri
,
21983 /* Fall back to load/store reloads if either type is not a register. */
21984 if (to_type
== NO_REG_TYPE
|| from_type
== NO_REG_TYPE
)
21987 /* If we haven't allocated registers yet, assume the move can be done for the
21988 standard register types. */
21989 if ((to_type
== PSEUDO_REG_TYPE
&& from_type
== PSEUDO_REG_TYPE
)
21990 || (to_type
== PSEUDO_REG_TYPE
&& IS_STD_REG_TYPE (from_type
))
21991 || (from_type
== PSEUDO_REG_TYPE
&& IS_STD_REG_TYPE (to_type
)))
21994 /* Moves to the same set of registers is a simple move for non-specialized
21996 if (to_type
== from_type
&& IS_STD_REG_TYPE (to_type
))
21999 /* Check whether a simple move can be done directly. */
22000 if (rs6000_secondary_reload_simple_move (to_type
, from_type
, mode
))
22004 sri
->icode
= CODE_FOR_nothing
;
22005 sri
->extra_cost
= 0;
22010 /* Now check if we can do it in a few steps. */
22011 return rs6000_secondary_reload_direct_move (to_type
, from_type
, mode
, sri
,
22015 /* Inform reload about cases where moving X with a mode MODE to a register in
22016 RCLASS requires an extra scratch or immediate register. Return the class
22017 needed for the immediate register.
22019 For VSX and Altivec, we may need a register to convert sp+offset into
22022 For misaligned 64-bit gpr loads and stores we need a register to
22023 convert an offset address to indirect. */
22026 rs6000_secondary_reload (bool in_p
,
22028 reg_class_t rclass_i
,
22030 secondary_reload_info
*sri
)
22032 enum reg_class rclass
= (enum reg_class
) rclass_i
;
22033 reg_class_t ret
= ALL_REGS
;
22034 enum insn_code icode
;
22035 bool default_p
= false;
22036 bool done_p
= false;
22038 /* Allow subreg of memory before/during reload. */
22039 bool memory_p
= (MEM_P (x
)
22040 || (!reload_completed
&& GET_CODE (x
) == SUBREG
22041 && MEM_P (SUBREG_REG (x
))));
22043 sri
->icode
= CODE_FOR_nothing
;
22044 sri
->t_icode
= CODE_FOR_nothing
;
22045 sri
->extra_cost
= 0;
22047 ? reg_addr
[mode
].reload_load
22048 : reg_addr
[mode
].reload_store
);
22050 if (REG_P (x
) || register_operand (x
, mode
))
22052 enum rs6000_reg_type to_type
= reg_class_to_reg_type
[(int)rclass
];
22053 bool altivec_p
= (rclass
== ALTIVEC_REGS
);
22054 enum rs6000_reg_type from_type
= register_to_reg_type (x
, &altivec_p
);
22057 std::swap (to_type
, from_type
);
22059 /* Can we do a direct move of some sort? */
22060 if (rs6000_secondary_reload_move (to_type
, from_type
, mode
, sri
,
22063 icode
= (enum insn_code
)sri
->icode
;
22070 /* Make sure 0.0 is not reloaded or forced into memory. */
22071 if (x
== CONST0_RTX (mode
) && VSX_REG_CLASS_P (rclass
))
22078 /* If this is a scalar floating point value and we want to load it into the
22079 traditional Altivec registers, do it via a move via a traditional floating
22080 point register, unless we have D-form addressing. Also make sure that
22081 non-zero constants use a FPR. */
22082 if (!done_p
&& reg_addr
[mode
].scalar_in_vmx_p
22083 && !mode_supports_vmx_dform (mode
)
22084 && (rclass
== VSX_REGS
|| rclass
== ALTIVEC_REGS
)
22085 && (memory_p
|| (GET_CODE (x
) == CONST_DOUBLE
)))
22092 /* Handle reload of load/stores if we have reload helper functions. */
22093 if (!done_p
&& icode
!= CODE_FOR_nothing
&& memory_p
)
22095 int extra_cost
= rs6000_secondary_reload_memory (XEXP (x
, 0), rclass
,
22098 if (extra_cost
>= 0)
22102 if (extra_cost
> 0)
22104 sri
->extra_cost
= extra_cost
;
22105 sri
->icode
= icode
;
22110 /* Handle unaligned loads and stores of integer registers. */
22111 if (!done_p
&& TARGET_POWERPC64
22112 && reg_class_to_reg_type
[(int)rclass
] == GPR_REG_TYPE
22114 && GET_MODE_SIZE (GET_MODE (x
)) >= UNITS_PER_WORD
)
22116 rtx addr
= XEXP (x
, 0);
22117 rtx off
= address_offset (addr
);
22119 if (off
!= NULL_RTX
)
22121 unsigned int extra
= GET_MODE_SIZE (GET_MODE (x
)) - UNITS_PER_WORD
;
22122 unsigned HOST_WIDE_INT offset
= INTVAL (off
);
22124 /* We need a secondary reload when our legitimate_address_p
22125 says the address is good (as otherwise the entire address
22126 will be reloaded), and the offset is not a multiple of
22127 four or we have an address wrap. Address wrap will only
22128 occur for LO_SUMs since legitimate_offset_address_p
22129 rejects addresses for 16-byte mems that will wrap. */
22130 if (GET_CODE (addr
) == LO_SUM
22131 ? (1 /* legitimate_address_p allows any offset for lo_sum */
22132 && ((offset
& 3) != 0
22133 || ((offset
& 0xffff) ^ 0x8000) >= 0x10000 - extra
))
22134 : (offset
+ 0x8000 < 0x10000 - extra
/* legitimate_address_p */
22135 && (offset
& 3) != 0))
22137 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
22139 sri
->icode
= ((TARGET_32BIT
) ? CODE_FOR_reload_si_load
22140 : CODE_FOR_reload_di_load
);
22142 sri
->icode
= ((TARGET_32BIT
) ? CODE_FOR_reload_si_store
22143 : CODE_FOR_reload_di_store
);
22144 sri
->extra_cost
= 2;
22155 if (!done_p
&& !TARGET_POWERPC64
22156 && reg_class_to_reg_type
[(int)rclass
] == GPR_REG_TYPE
22158 && GET_MODE_SIZE (GET_MODE (x
)) > UNITS_PER_WORD
)
22160 rtx addr
= XEXP (x
, 0);
22161 rtx off
= address_offset (addr
);
22163 if (off
!= NULL_RTX
)
22165 unsigned int extra
= GET_MODE_SIZE (GET_MODE (x
)) - UNITS_PER_WORD
;
22166 unsigned HOST_WIDE_INT offset
= INTVAL (off
);
22168 /* We need a secondary reload when our legitimate_address_p
22169 says the address is good (as otherwise the entire address
22170 will be reloaded), and we have a wrap.
22172 legitimate_lo_sum_address_p allows LO_SUM addresses to
22173 have any offset so test for wrap in the low 16 bits.
22175 legitimate_offset_address_p checks for the range
22176 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
22177 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
22178 [0x7ff4,0x7fff] respectively, so test for the
22179 intersection of these ranges, [0x7ffc,0x7fff] and
22180 [0x7ff4,0x7ff7] respectively.
22182 Note that the address we see here may have been
22183 manipulated by legitimize_reload_address. */
22184 if (GET_CODE (addr
) == LO_SUM
22185 ? ((offset
& 0xffff) ^ 0x8000) >= 0x10000 - extra
22186 : offset
- (0x8000 - extra
) < UNITS_PER_WORD
)
22189 sri
->icode
= CODE_FOR_reload_si_load
;
22191 sri
->icode
= CODE_FOR_reload_si_store
;
22192 sri
->extra_cost
= 2;
22207 ret
= default_secondary_reload (in_p
, x
, rclass
, mode
, sri
);
22209 gcc_assert (ret
!= ALL_REGS
);
22211 if (TARGET_DEBUG_ADDR
)
22214 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
22216 reg_class_names
[ret
],
22217 in_p
? "true" : "false",
22218 reg_class_names
[rclass
],
22219 GET_MODE_NAME (mode
));
22221 if (reload_completed
)
22222 fputs (", after reload", stderr
);
22225 fputs (", done_p not set", stderr
);
22228 fputs (", default secondary reload", stderr
);
22230 if (sri
->icode
!= CODE_FOR_nothing
)
22231 fprintf (stderr
, ", reload func = %s, extra cost = %d",
22232 insn_data
[sri
->icode
].name
, sri
->extra_cost
);
22234 else if (sri
->extra_cost
> 0)
22235 fprintf (stderr
, ", extra cost = %d", sri
->extra_cost
);
22237 fputs ("\n", stderr
);
22244 /* Better tracing for rs6000_secondary_reload_inner. */
22247 rs6000_secondary_reload_trace (int line
, rtx reg
, rtx mem
, rtx scratch
,
22252 gcc_assert (reg
!= NULL_RTX
&& mem
!= NULL_RTX
&& scratch
!= NULL_RTX
);
22254 fprintf (stderr
, "rs6000_secondary_reload_inner:%d, type = %s\n", line
,
22255 store_p
? "store" : "load");
22258 set
= gen_rtx_SET (mem
, reg
);
22260 set
= gen_rtx_SET (reg
, mem
);
22262 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
22263 debug_rtx (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
22266 static void rs6000_secondary_reload_fail (int, rtx
, rtx
, rtx
, bool)
22267 ATTRIBUTE_NORETURN
;
22270 rs6000_secondary_reload_fail (int line
, rtx reg
, rtx mem
, rtx scratch
,
22273 rs6000_secondary_reload_trace (line
, reg
, mem
, scratch
, store_p
);
22274 gcc_unreachable ();
22277 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
22278 reload helper functions. These were identified in
22279 rs6000_secondary_reload_memory, and if reload decided to use the secondary
22280 reload, it calls the insns:
22281 reload_<RELOAD:mode>_<P:mptrsize>_store
22282 reload_<RELOAD:mode>_<P:mptrsize>_load
22284 which in turn calls this function, to do whatever is necessary to create
22285 valid addresses. */
22288 rs6000_secondary_reload_inner (rtx reg
, rtx mem
, rtx scratch
, bool store_p
)
22290 int regno
= true_regnum (reg
);
22291 machine_mode mode
= GET_MODE (reg
);
22292 addr_mask_type addr_mask
;
22295 rtx op_reg
, op0
, op1
;
22300 if (regno
< 0 || regno
>= FIRST_PSEUDO_REGISTER
|| !MEM_P (mem
)
22301 || !base_reg_operand (scratch
, GET_MODE (scratch
)))
22302 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
22304 if (IN_RANGE (regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
))
22305 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_GPR
];
22307 else if (IN_RANGE (regno
, FIRST_FPR_REGNO
, LAST_FPR_REGNO
))
22308 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
];
22310 else if (IN_RANGE (regno
, FIRST_ALTIVEC_REGNO
, LAST_ALTIVEC_REGNO
))
22311 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
];
22314 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
22316 /* Make sure the mode is valid in this register class. */
22317 if ((addr_mask
& RELOAD_REG_VALID
) == 0)
22318 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
22320 if (TARGET_DEBUG_ADDR
)
22321 rs6000_secondary_reload_trace (__LINE__
, reg
, mem
, scratch
, store_p
);
22323 new_addr
= addr
= XEXP (mem
, 0);
22324 switch (GET_CODE (addr
))
22326 /* Does the register class support auto update forms for this mode? If
22327 not, do the update now. We don't need a scratch register, since the
22328 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
22331 op_reg
= XEXP (addr
, 0);
22332 if (!base_reg_operand (op_reg
, Pmode
))
22333 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
22335 if ((addr_mask
& RELOAD_REG_PRE_INCDEC
) == 0)
22337 emit_insn (gen_add2_insn (op_reg
, GEN_INT (GET_MODE_SIZE (mode
))));
22343 op0
= XEXP (addr
, 0);
22344 op1
= XEXP (addr
, 1);
22345 if (!base_reg_operand (op0
, Pmode
)
22346 || GET_CODE (op1
) != PLUS
22347 || !rtx_equal_p (op0
, XEXP (op1
, 0)))
22348 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
22350 if ((addr_mask
& RELOAD_REG_PRE_MODIFY
) == 0)
22352 emit_insn (gen_rtx_SET (op0
, op1
));
22357 /* Do we need to simulate AND -16 to clear the bottom address bits used
22358 in VMX load/stores? */
22360 op0
= XEXP (addr
, 0);
22361 op1
= XEXP (addr
, 1);
22362 if ((addr_mask
& RELOAD_REG_AND_M16
) == 0)
22364 if (REG_P (op0
) || GET_CODE (op0
) == SUBREG
)
22367 else if (GET_CODE (op1
) == PLUS
)
22369 emit_insn (gen_rtx_SET (scratch
, op1
));
22374 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
22376 and_op
= gen_rtx_AND (GET_MODE (scratch
), op_reg
, op1
);
22377 cc_clobber
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (CCmode
));
22378 rv
= gen_rtvec (2, gen_rtx_SET (scratch
, and_op
), cc_clobber
);
22379 emit_insn (gen_rtx_PARALLEL (VOIDmode
, rv
));
22380 new_addr
= scratch
;
22384 /* If this is an indirect address, make sure it is a base register. */
22387 if (!base_reg_operand (addr
, GET_MODE (addr
)))
22389 emit_insn (gen_rtx_SET (scratch
, addr
));
22390 new_addr
= scratch
;
22394 /* If this is an indexed address, make sure the register class can handle
22395 indexed addresses for this mode. */
22397 op0
= XEXP (addr
, 0);
22398 op1
= XEXP (addr
, 1);
22399 if (!base_reg_operand (op0
, Pmode
))
22400 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
22402 else if (int_reg_operand (op1
, Pmode
))
22404 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
22406 emit_insn (gen_rtx_SET (scratch
, addr
));
22407 new_addr
= scratch
;
22411 else if (mode_supports_vsx_dform_quad (mode
) && CONST_INT_P (op1
))
22413 if (((addr_mask
& RELOAD_REG_QUAD_OFFSET
) == 0)
22414 || !quad_address_p (addr
, mode
, false))
22416 emit_insn (gen_rtx_SET (scratch
, addr
));
22417 new_addr
= scratch
;
22421 /* Make sure the register class can handle offset addresses. */
22422 else if (rs6000_legitimate_offset_address_p (mode
, addr
, false, true))
22424 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
22426 emit_insn (gen_rtx_SET (scratch
, addr
));
22427 new_addr
= scratch
;
22432 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
22437 op0
= XEXP (addr
, 0);
22438 op1
= XEXP (addr
, 1);
22439 if (!base_reg_operand (op0
, Pmode
))
22440 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
22442 else if (int_reg_operand (op1
, Pmode
))
22444 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
22446 emit_insn (gen_rtx_SET (scratch
, addr
));
22447 new_addr
= scratch
;
22451 /* Quad offsets are restricted and can't handle normal addresses. */
22452 else if (mode_supports_vsx_dform_quad (mode
))
22454 emit_insn (gen_rtx_SET (scratch
, addr
));
22455 new_addr
= scratch
;
22458 /* Make sure the register class can handle offset addresses. */
22459 else if (legitimate_lo_sum_address_p (mode
, addr
, false))
22461 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
22463 emit_insn (gen_rtx_SET (scratch
, addr
));
22464 new_addr
= scratch
;
22469 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
22476 rs6000_emit_move (scratch
, addr
, Pmode
);
22477 new_addr
= scratch
;
22481 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
22484 /* Adjust the address if it changed. */
22485 if (addr
!= new_addr
)
22487 mem
= replace_equiv_address_nv (mem
, new_addr
);
22488 if (TARGET_DEBUG_ADDR
)
22489 fprintf (stderr
, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
22492 /* Now create the move. */
22494 emit_insn (gen_rtx_SET (mem
, reg
));
22496 emit_insn (gen_rtx_SET (reg
, mem
));
22501 /* Convert reloads involving 64-bit gprs and misaligned offset
22502 addressing, or multiple 32-bit gprs and offsets that are too large,
22503 to use indirect addressing. */
22506 rs6000_secondary_reload_gpr (rtx reg
, rtx mem
, rtx scratch
, bool store_p
)
22508 int regno
= true_regnum (reg
);
22509 enum reg_class rclass
;
22511 rtx scratch_or_premodify
= scratch
;
22513 if (TARGET_DEBUG_ADDR
)
22515 fprintf (stderr
, "\nrs6000_secondary_reload_gpr, type = %s\n",
22516 store_p
? "store" : "load");
22517 fprintf (stderr
, "reg:\n");
22519 fprintf (stderr
, "mem:\n");
22521 fprintf (stderr
, "scratch:\n");
22522 debug_rtx (scratch
);
22525 gcc_assert (regno
>= 0 && regno
< FIRST_PSEUDO_REGISTER
);
22526 gcc_assert (GET_CODE (mem
) == MEM
);
22527 rclass
= REGNO_REG_CLASS (regno
);
22528 gcc_assert (rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
);
22529 addr
= XEXP (mem
, 0);
22531 if (GET_CODE (addr
) == PRE_MODIFY
)
22533 gcc_assert (REG_P (XEXP (addr
, 0))
22534 && GET_CODE (XEXP (addr
, 1)) == PLUS
22535 && XEXP (XEXP (addr
, 1), 0) == XEXP (addr
, 0));
22536 scratch_or_premodify
= XEXP (addr
, 0);
22537 if (!HARD_REGISTER_P (scratch_or_premodify
))
22538 /* If we have a pseudo here then reload will have arranged
22539 to have it replaced, but only in the original insn.
22540 Use the replacement here too. */
22541 scratch_or_premodify
= find_replacement (&XEXP (addr
, 0));
22543 /* RTL emitted by rs6000_secondary_reload_gpr uses RTL
22544 expressions from the original insn, without unsharing them.
22545 Any RTL that points into the original insn will of course
22546 have register replacements applied. That is why we don't
22547 need to look for replacements under the PLUS. */
22548 addr
= XEXP (addr
, 1);
22550 gcc_assert (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == LO_SUM
);
22552 rs6000_emit_move (scratch_or_premodify
, addr
, Pmode
);
22554 mem
= replace_equiv_address_nv (mem
, scratch_or_premodify
);
22556 /* Now create the move. */
22558 emit_insn (gen_rtx_SET (mem
, reg
));
22560 emit_insn (gen_rtx_SET (reg
, mem
));
22565 /* Allocate a 64-bit stack slot to be used for copying SDmode values through if
22566 this function has any SDmode references. If we are on a power7 or later, we
22567 don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
22568 can load/store the value. */
22571 rs6000_alloc_sdmode_stack_slot (void)
22575 gimple_stmt_iterator gsi
;
22577 gcc_assert (cfun
->machine
->sdmode_stack_slot
== NULL_RTX
);
22578 /* We use a different approach for dealing with the secondary
22583 if (TARGET_NO_SDMODE_STACK
)
22586 FOR_EACH_BB_FN (bb
, cfun
)
22587 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
22589 tree ret
= walk_gimple_op (gsi_stmt (gsi
), rs6000_check_sdmode
, NULL
);
22592 rtx stack
= assign_stack_local (DDmode
, GET_MODE_SIZE (DDmode
), 0);
22593 cfun
->machine
->sdmode_stack_slot
= adjust_address_nv (stack
,
22599 /* Check for any SDmode parameters of the function. */
22600 for (t
= DECL_ARGUMENTS (cfun
->decl
); t
; t
= DECL_CHAIN (t
))
22602 if (TREE_TYPE (t
) == error_mark_node
)
22605 if (TYPE_MODE (TREE_TYPE (t
)) == SDmode
22606 || TYPE_MODE (DECL_ARG_TYPE (t
)) == SDmode
)
22608 rtx stack
= assign_stack_local (DDmode
, GET_MODE_SIZE (DDmode
), 0);
22609 cfun
->machine
->sdmode_stack_slot
= adjust_address_nv (stack
,
22617 rs6000_instantiate_decls (void)
22619 if (cfun
->machine
->sdmode_stack_slot
!= NULL_RTX
)
22620 instantiate_decl_rtl (cfun
->machine
->sdmode_stack_slot
);
22623 /* Given an rtx X being reloaded into a reg required to be
22624 in class CLASS, return the class of reg to actually use.
22625 In general this is just CLASS; but on some machines
22626 in some cases it is preferable to use a more restrictive class.
22628 On the RS/6000, we have to return NO_REGS when we want to reload a
22629 floating-point CONST_DOUBLE to force it to be copied to memory.
22631 We also don't want to reload integer values into floating-point
22632 registers if we can at all help it. In fact, this can
22633 cause reload to die, if it tries to generate a reload of CTR
22634 into a FP register and discovers it doesn't have the memory location
22637 ??? Would it be a good idea to have reload do the converse, that is
22638 try to reload floating modes into FP registers if possible?
22641 static enum reg_class
22642 rs6000_preferred_reload_class (rtx x
, enum reg_class rclass
)
22644 machine_mode mode
= GET_MODE (x
);
22645 bool is_constant
= CONSTANT_P (x
);
22647 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
22648 reload class for it. */
22649 if ((rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
22650 && (reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_VALID
) == 0)
22653 if ((rclass
== FLOAT_REGS
|| rclass
== VSX_REGS
)
22654 && (reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
] & RELOAD_REG_VALID
) == 0)
22657 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
22658 the reloading of address expressions using PLUS into floating point
22660 if (TARGET_VSX
&& VSX_REG_CLASS_P (rclass
) && GET_CODE (x
) != PLUS
)
22664 /* Zero is always allowed in all VSX registers. */
22665 if (x
== CONST0_RTX (mode
))
22668 /* If this is a vector constant that can be formed with a few Altivec
22669 instructions, we want altivec registers. */
22670 if (GET_CODE (x
) == CONST_VECTOR
&& easy_vector_constant (x
, mode
))
22671 return ALTIVEC_REGS
;
22673 /* If this is an integer constant that can easily be loaded into
22674 vector registers, allow it. */
22675 if (CONST_INT_P (x
))
22677 HOST_WIDE_INT value
= INTVAL (x
);
22679 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
22680 2.06 can generate it in the Altivec registers with
22684 if (TARGET_P8_VECTOR
)
22686 else if (rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
22687 return ALTIVEC_REGS
;
22692 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
22693 a sign extend in the Altivec registers. */
22694 if (IN_RANGE (value
, -128, 127) && TARGET_P9_VECTOR
22695 && TARGET_VSX_SMALL_INTEGER
22696 && (rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
))
22697 return ALTIVEC_REGS
;
22700 /* Force constant to memory. */
22704 /* D-form addressing can easily reload the value. */
22705 if (mode_supports_vmx_dform (mode
)
22706 || mode_supports_vsx_dform_quad (mode
))
22709 /* If this is a scalar floating point value and we don't have D-form
22710 addressing, prefer the traditional floating point registers so that we
22711 can use D-form (register+offset) addressing. */
22712 if (rclass
== VSX_REGS
22713 && (mode
== SFmode
|| GET_MODE_SIZE (mode
) == 8))
22716 /* Prefer the Altivec registers if Altivec is handling the vector
22717 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
22719 if (VECTOR_UNIT_ALTIVEC_P (mode
) || VECTOR_MEM_ALTIVEC_P (mode
)
22720 || mode
== V1TImode
)
22721 return ALTIVEC_REGS
;
22726 if (is_constant
|| GET_CODE (x
) == PLUS
)
22728 if (reg_class_subset_p (GENERAL_REGS
, rclass
))
22729 return GENERAL_REGS
;
22730 if (reg_class_subset_p (BASE_REGS
, rclass
))
22735 if (GET_MODE_CLASS (mode
) == MODE_INT
&& rclass
== NON_SPECIAL_REGS
)
22736 return GENERAL_REGS
;
22741 /* Debug version of rs6000_preferred_reload_class. */
22742 static enum reg_class
22743 rs6000_debug_preferred_reload_class (rtx x
, enum reg_class rclass
)
22745 enum reg_class ret
= rs6000_preferred_reload_class (x
, rclass
);
22748 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
22750 reg_class_names
[ret
], reg_class_names
[rclass
],
22751 GET_MODE_NAME (GET_MODE (x
)));
22757 /* If we are copying between FP or AltiVec registers and anything else, we need
22758 a memory location. The exception is when we are targeting ppc64 and the
22759 move to/from fpr to gpr instructions are available. Also, under VSX, you
22760 can copy vector registers from the FP register set to the Altivec register
22761 set and vice versa. */
22764 rs6000_secondary_memory_needed (enum reg_class from_class
,
22765 enum reg_class to_class
,
22768 enum rs6000_reg_type from_type
, to_type
;
22769 bool altivec_p
= ((from_class
== ALTIVEC_REGS
)
22770 || (to_class
== ALTIVEC_REGS
));
22772 /* If a simple/direct move is available, we don't need secondary memory */
22773 from_type
= reg_class_to_reg_type
[(int)from_class
];
22774 to_type
= reg_class_to_reg_type
[(int)to_class
];
22776 if (rs6000_secondary_reload_move (to_type
, from_type
, mode
,
22777 (secondary_reload_info
*)0, altivec_p
))
22780 /* If we have a floating point or vector register class, we need to use
22781 memory to transfer the data. */
22782 if (IS_FP_VECT_REG_TYPE (from_type
) || IS_FP_VECT_REG_TYPE (to_type
))
22788 /* Debug version of rs6000_secondary_memory_needed. */
22790 rs6000_debug_secondary_memory_needed (enum reg_class from_class
,
22791 enum reg_class to_class
,
22794 bool ret
= rs6000_secondary_memory_needed (from_class
, to_class
, mode
);
22797 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
22798 "to_class = %s, mode = %s\n",
22799 ret
? "true" : "false",
22800 reg_class_names
[from_class
],
22801 reg_class_names
[to_class
],
22802 GET_MODE_NAME (mode
));
22807 /* Return the register class of a scratch register needed to copy IN into
22808 or out of a register in RCLASS in MODE. If it can be done directly,
22809 NO_REGS is returned. */
22811 static enum reg_class
22812 rs6000_secondary_reload_class (enum reg_class rclass
, machine_mode mode
,
22817 if (TARGET_ELF
|| (DEFAULT_ABI
== ABI_DARWIN
22819 && MACHOPIC_INDIRECT
22823 /* We cannot copy a symbolic operand directly into anything
22824 other than BASE_REGS for TARGET_ELF. So indicate that a
22825 register from BASE_REGS is needed as an intermediate
22828 On Darwin, pic addresses require a load from memory, which
22829 needs a base register. */
22830 if (rclass
!= BASE_REGS
22831 && (GET_CODE (in
) == SYMBOL_REF
22832 || GET_CODE (in
) == HIGH
22833 || GET_CODE (in
) == LABEL_REF
22834 || GET_CODE (in
) == CONST
))
22838 if (GET_CODE (in
) == REG
)
22840 regno
= REGNO (in
);
22841 if (regno
>= FIRST_PSEUDO_REGISTER
)
22843 regno
= true_regnum (in
);
22844 if (regno
>= FIRST_PSEUDO_REGISTER
)
22848 else if (GET_CODE (in
) == SUBREG
)
22850 regno
= true_regnum (in
);
22851 if (regno
>= FIRST_PSEUDO_REGISTER
)
22857 /* If we have VSX register moves, prefer moving scalar values between
22858 Altivec registers and GPR by going via an FPR (and then via memory)
22859 instead of reloading the secondary memory address for Altivec moves. */
22861 && GET_MODE_SIZE (mode
) < 16
22862 && !mode_supports_vmx_dform (mode
)
22863 && (((rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
)
22864 && (regno
>= 0 && ALTIVEC_REGNO_P (regno
)))
22865 || ((rclass
== VSX_REGS
|| rclass
== ALTIVEC_REGS
)
22866 && (regno
>= 0 && INT_REGNO_P (regno
)))))
22869 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
22871 if (rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
22872 || (regno
>= 0 && INT_REGNO_P (regno
)))
22875 /* Constants, memory, and VSX registers can go into VSX registers (both the
22876 traditional floating point and the altivec registers). */
22877 if (rclass
== VSX_REGS
22878 && (regno
== -1 || VSX_REGNO_P (regno
)))
22881 /* Constants, memory, and FP registers can go into FP registers. */
22882 if ((regno
== -1 || FP_REGNO_P (regno
))
22883 && (rclass
== FLOAT_REGS
|| rclass
== NON_SPECIAL_REGS
))
22884 return (mode
!= SDmode
|| lra_in_progress
) ? NO_REGS
: GENERAL_REGS
;
22886 /* Memory, and AltiVec registers can go into AltiVec registers. */
22887 if ((regno
== -1 || ALTIVEC_REGNO_P (regno
))
22888 && rclass
== ALTIVEC_REGS
)
22891 /* We can copy among the CR registers. */
22892 if ((rclass
== CR_REGS
|| rclass
== CR0_REGS
)
22893 && regno
>= 0 && CR_REGNO_P (regno
))
22896 /* Otherwise, we need GENERAL_REGS. */
22897 return GENERAL_REGS
;
22900 /* Debug version of rs6000_secondary_reload_class. */
22901 static enum reg_class
22902 rs6000_debug_secondary_reload_class (enum reg_class rclass
,
22903 machine_mode mode
, rtx in
)
22905 enum reg_class ret
= rs6000_secondary_reload_class (rclass
, mode
, in
);
22907 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
22908 "mode = %s, input rtx:\n",
22909 reg_class_names
[ret
], reg_class_names
[rclass
],
22910 GET_MODE_NAME (mode
));
22916 /* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
22919 rs6000_cannot_change_mode_class (machine_mode from
,
22921 enum reg_class rclass
)
22923 unsigned from_size
= GET_MODE_SIZE (from
);
22924 unsigned to_size
= GET_MODE_SIZE (to
);
22926 if (from_size
!= to_size
)
22928 enum reg_class xclass
= (TARGET_VSX
) ? VSX_REGS
: FLOAT_REGS
;
22930 if (reg_classes_intersect_p (xclass
, rclass
))
22932 unsigned to_nregs
= hard_regno_nregs
[FIRST_FPR_REGNO
][to
];
22933 unsigned from_nregs
= hard_regno_nregs
[FIRST_FPR_REGNO
][from
];
22934 bool to_float128_vector_p
= FLOAT128_VECTOR_P (to
);
22935 bool from_float128_vector_p
= FLOAT128_VECTOR_P (from
);
22937 /* Don't allow 64-bit types to overlap with 128-bit types that take a
22938 single register under VSX because the scalar part of the register
22939 is in the upper 64-bits, and not the lower 64-bits. Types like
22940 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
22941 IEEE floating point can't overlap, and neither can small
22944 if (to_float128_vector_p
&& from_float128_vector_p
)
22947 else if (to_float128_vector_p
|| from_float128_vector_p
)
22950 /* TDmode in floating-mode registers must always go into a register
22951 pair with the most significant word in the even-numbered register
22952 to match ISA requirements. In little-endian mode, this does not
22953 match subreg numbering, so we cannot allow subregs. */
22954 if (!BYTES_BIG_ENDIAN
&& (to
== TDmode
|| from
== TDmode
))
22957 if (from_size
< 8 || to_size
< 8)
22960 if (from_size
== 8 && (8 * to_nregs
) != to_size
)
22963 if (to_size
== 8 && (8 * from_nregs
) != from_size
)
22972 if (TARGET_E500_DOUBLE
22973 && ((((to
) == DFmode
) + ((from
) == DFmode
)) == 1
22974 || (((to
) == TFmode
) + ((from
) == TFmode
)) == 1
22975 || (((to
) == IFmode
) + ((from
) == IFmode
)) == 1
22976 || (((to
) == KFmode
) + ((from
) == KFmode
)) == 1
22977 || (((to
) == DDmode
) + ((from
) == DDmode
)) == 1
22978 || (((to
) == TDmode
) + ((from
) == TDmode
)) == 1
22979 || (((to
) == DImode
) + ((from
) == DImode
)) == 1))
22982 /* Since the VSX register set includes traditional floating point registers
22983 and altivec registers, just check for the size being different instead of
22984 trying to check whether the modes are vector modes. Otherwise it won't
22985 allow say DF and DI to change classes. For types like TFmode and TDmode
22986 that take 2 64-bit registers, rather than a single 128-bit register, don't
22987 allow subregs of those types to other 128 bit types. */
22988 if (TARGET_VSX
&& VSX_REG_CLASS_P (rclass
))
22990 unsigned num_regs
= (from_size
+ 15) / 16;
22991 if (hard_regno_nregs
[FIRST_FPR_REGNO
][to
] > num_regs
22992 || hard_regno_nregs
[FIRST_FPR_REGNO
][from
] > num_regs
)
22995 return (from_size
!= 8 && from_size
!= 16);
22998 if (TARGET_ALTIVEC
&& rclass
== ALTIVEC_REGS
22999 && (ALTIVEC_VECTOR_MODE (from
) + ALTIVEC_VECTOR_MODE (to
)) == 1)
23002 if (TARGET_SPE
&& (SPE_VECTOR_MODE (from
) + SPE_VECTOR_MODE (to
)) == 1
23003 && reg_classes_intersect_p (GENERAL_REGS
, rclass
))
23009 /* Debug version of rs6000_cannot_change_mode_class. */
23011 rs6000_debug_cannot_change_mode_class (machine_mode from
,
23013 enum reg_class rclass
)
23015 bool ret
= rs6000_cannot_change_mode_class (from
, to
, rclass
);
23018 "rs6000_cannot_change_mode_class, return %s, from = %s, "
23019 "to = %s, rclass = %s\n",
23020 ret
? "true" : "false",
23021 GET_MODE_NAME (from
), GET_MODE_NAME (to
),
23022 reg_class_names
[rclass
]);
23027 /* Return a string to do a move operation of 128 bits of data. */
23030 rs6000_output_move_128bit (rtx operands
[])
23032 rtx dest
= operands
[0];
23033 rtx src
= operands
[1];
23034 machine_mode mode
= GET_MODE (dest
);
23037 bool dest_gpr_p
, dest_fp_p
, dest_vmx_p
, dest_vsx_p
;
23038 bool src_gpr_p
, src_fp_p
, src_vmx_p
, src_vsx_p
;
23042 dest_regno
= REGNO (dest
);
23043 dest_gpr_p
= INT_REGNO_P (dest_regno
);
23044 dest_fp_p
= FP_REGNO_P (dest_regno
);
23045 dest_vmx_p
= ALTIVEC_REGNO_P (dest_regno
);
23046 dest_vsx_p
= dest_fp_p
| dest_vmx_p
;
23051 dest_gpr_p
= dest_fp_p
= dest_vmx_p
= dest_vsx_p
= false;
23056 src_regno
= REGNO (src
);
23057 src_gpr_p
= INT_REGNO_P (src_regno
);
23058 src_fp_p
= FP_REGNO_P (src_regno
);
23059 src_vmx_p
= ALTIVEC_REGNO_P (src_regno
);
23060 src_vsx_p
= src_fp_p
| src_vmx_p
;
23065 src_gpr_p
= src_fp_p
= src_vmx_p
= src_vsx_p
= false;
23068 /* Register moves. */
23069 if (dest_regno
>= 0 && src_regno
>= 0)
23076 if (TARGET_DIRECT_MOVE_128
&& src_vsx_p
)
23077 return (WORDS_BIG_ENDIAN
23078 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
23079 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
23081 else if (TARGET_VSX
&& TARGET_DIRECT_MOVE
&& src_vsx_p
)
23085 else if (TARGET_VSX
&& dest_vsx_p
)
23088 return "xxlor %x0,%x1,%x1";
23090 else if (TARGET_DIRECT_MOVE_128
&& src_gpr_p
)
23091 return (WORDS_BIG_ENDIAN
23092 ? "mtvsrdd %x0,%1,%L1"
23093 : "mtvsrdd %x0,%L1,%1");
23095 else if (TARGET_DIRECT_MOVE
&& src_gpr_p
)
23099 else if (TARGET_ALTIVEC
&& dest_vmx_p
&& src_vmx_p
)
23100 return "vor %0,%1,%1";
23102 else if (dest_fp_p
&& src_fp_p
)
23107 else if (dest_regno
>= 0 && MEM_P (src
))
23111 if (TARGET_QUAD_MEMORY
&& quad_load_store_p (dest
, src
))
23117 else if (TARGET_ALTIVEC
&& dest_vmx_p
23118 && altivec_indexed_or_indirect_operand (src
, mode
))
23119 return "lvx %0,%y1";
23121 else if (TARGET_VSX
&& dest_vsx_p
)
23123 if (mode_supports_vsx_dform_quad (mode
)
23124 && quad_address_p (XEXP (src
, 0), mode
, true))
23125 return "lxv %x0,%1";
23127 else if (TARGET_P9_VECTOR
)
23128 return "lxvx %x0,%y1";
23130 else if (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
23131 return "lxvw4x %x0,%y1";
23134 return "lxvd2x %x0,%y1";
23137 else if (TARGET_ALTIVEC
&& dest_vmx_p
)
23138 return "lvx %0,%y1";
23140 else if (dest_fp_p
)
23145 else if (src_regno
>= 0 && MEM_P (dest
))
23149 if (TARGET_QUAD_MEMORY
&& quad_load_store_p (dest
, src
))
23150 return "stq %1,%0";
23155 else if (TARGET_ALTIVEC
&& src_vmx_p
23156 && altivec_indexed_or_indirect_operand (src
, mode
))
23157 return "stvx %1,%y0";
23159 else if (TARGET_VSX
&& src_vsx_p
)
23161 if (mode_supports_vsx_dform_quad (mode
)
23162 && quad_address_p (XEXP (dest
, 0), mode
, true))
23163 return "stxv %x1,%0";
23165 else if (TARGET_P9_VECTOR
)
23166 return "stxvx %x1,%y0";
23168 else if (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
23169 return "stxvw4x %x1,%y0";
23172 return "stxvd2x %x1,%y0";
23175 else if (TARGET_ALTIVEC
&& src_vmx_p
)
23176 return "stvx %1,%y0";
23183 else if (dest_regno
>= 0
23184 && (GET_CODE (src
) == CONST_INT
23185 || GET_CODE (src
) == CONST_WIDE_INT
23186 || GET_CODE (src
) == CONST_DOUBLE
23187 || GET_CODE (src
) == CONST_VECTOR
))
23192 else if ((dest_vmx_p
&& TARGET_ALTIVEC
)
23193 || (dest_vsx_p
&& TARGET_VSX
))
23194 return output_vec_const_move (operands
);
23197 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest
, src
));
23200 /* Validate a 128-bit move. */
23202 rs6000_move_128bit_ok_p (rtx operands
[])
23204 machine_mode mode
= GET_MODE (operands
[0]);
23205 return (gpc_reg_operand (operands
[0], mode
)
23206 || gpc_reg_operand (operands
[1], mode
));
23209 /* Return true if a 128-bit move needs to be split. */
23211 rs6000_split_128bit_ok_p (rtx operands
[])
23213 if (!reload_completed
)
23216 if (!gpr_or_gpr_p (operands
[0], operands
[1]))
23219 if (quad_load_store_p (operands
[0], operands
[1]))
23226 /* Given a comparison operation, return the bit number in CCR to test. We
23227 know this is a valid comparison.
23229 SCC_P is 1 if this is for an scc. That means that %D will have been
23230 used instead of %C, so the bits will be in different places.
23232 Return -1 if OP isn't a valid comparison for some reason. */
23235 ccr_bit (rtx op
, int scc_p
)
23237 enum rtx_code code
= GET_CODE (op
);
23238 machine_mode cc_mode
;
23243 if (!COMPARISON_P (op
))
23246 reg
= XEXP (op
, 0);
23248 gcc_assert (GET_CODE (reg
) == REG
&& CR_REGNO_P (REGNO (reg
)));
23250 cc_mode
= GET_MODE (reg
);
23251 cc_regnum
= REGNO (reg
);
23252 base_bit
= 4 * (cc_regnum
- CR0_REGNO
);
23254 validate_condition_mode (code
, cc_mode
);
23256 /* When generating a sCOND operation, only positive conditions are
23259 || code
== EQ
|| code
== GT
|| code
== LT
|| code
== UNORDERED
23260 || code
== GTU
|| code
== LTU
);
23265 return scc_p
? base_bit
+ 3 : base_bit
+ 2;
23267 return base_bit
+ 2;
23268 case GT
: case GTU
: case UNLE
:
23269 return base_bit
+ 1;
23270 case LT
: case LTU
: case UNGE
:
23272 case ORDERED
: case UNORDERED
:
23273 return base_bit
+ 3;
23276 /* If scc, we will have done a cror to put the bit in the
23277 unordered position. So test that bit. For integer, this is ! LT
23278 unless this is an scc insn. */
23279 return scc_p
? base_bit
+ 3 : base_bit
;
23282 return scc_p
? base_bit
+ 3 : base_bit
+ 1;
23285 gcc_unreachable ();
23289 /* Return the GOT register. */
23292 rs6000_got_register (rtx value ATTRIBUTE_UNUSED
)
23294 /* The second flow pass currently (June 1999) can't update
23295 regs_ever_live without disturbing other parts of the compiler, so
23296 update it here to make the prolog/epilogue code happy. */
23297 if (!can_create_pseudo_p ()
23298 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM
))
23299 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM
, true);
23301 crtl
->uses_pic_offset_table
= 1;
23303 return pic_offset_table_rtx
;
23306 static rs6000_stack_t stack_info
;
23308 /* Function to init struct machine_function.
23309 This will be called, via a pointer variable,
23310 from push_function_context. */
23312 static struct machine_function
*
23313 rs6000_init_machine_status (void)
23315 stack_info
.reload_completed
= 0;
23316 return ggc_cleared_alloc
<machine_function
> ();
23319 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
23321 /* Write out a function code label. */
23324 rs6000_output_function_entry (FILE *file
, const char *fname
)
23326 if (fname
[0] != '.')
23328 switch (DEFAULT_ABI
)
23331 gcc_unreachable ();
23337 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "L.");
23347 RS6000_OUTPUT_BASENAME (file
, fname
);
23350 /* Print an operand. Recognize special options, documented below. */
23353 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
23354 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
23356 #define SMALL_DATA_RELOC "sda21"
23357 #define SMALL_DATA_REG 0
23361 print_operand (FILE *file
, rtx x
, int code
)
23364 unsigned HOST_WIDE_INT uval
;
23368 /* %a is output_address. */
23370 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
23374 /* Like 'J' but get to the GT bit only. */
23375 gcc_assert (REG_P (x
));
23377 /* Bit 1 is GT bit. */
23378 i
= 4 * (REGNO (x
) - CR0_REGNO
) + 1;
23380 /* Add one for shift count in rlinm for scc. */
23381 fprintf (file
, "%d", i
+ 1);
23385 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
23388 output_operand_lossage ("invalid %%e value");
23393 if ((uval
& 0xffff) == 0 && uval
!= 0)
23398 /* X is a CR register. Print the number of the EQ bit of the CR */
23399 if (GET_CODE (x
) != REG
|| ! CR_REGNO_P (REGNO (x
)))
23400 output_operand_lossage ("invalid %%E value");
23402 fprintf (file
, "%d", 4 * (REGNO (x
) - CR0_REGNO
) + 2);
23406 /* X is a CR register. Print the shift count needed to move it
23407 to the high-order four bits. */
23408 if (GET_CODE (x
) != REG
|| ! CR_REGNO_P (REGNO (x
)))
23409 output_operand_lossage ("invalid %%f value");
23411 fprintf (file
, "%d", 4 * (REGNO (x
) - CR0_REGNO
));
23415 /* Similar, but print the count for the rotate in the opposite
23417 if (GET_CODE (x
) != REG
|| ! CR_REGNO_P (REGNO (x
)))
23418 output_operand_lossage ("invalid %%F value");
23420 fprintf (file
, "%d", 32 - 4 * (REGNO (x
) - CR0_REGNO
));
23424 /* X is a constant integer. If it is negative, print "m",
23425 otherwise print "z". This is to make an aze or ame insn. */
23426 if (GET_CODE (x
) != CONST_INT
)
23427 output_operand_lossage ("invalid %%G value");
23428 else if (INTVAL (x
) >= 0)
23435 /* If constant, output low-order five bits. Otherwise, write
23438 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 31);
23440 print_operand (file
, x
, 0);
23444 /* If constant, output low-order six bits. Otherwise, write
23447 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 63);
23449 print_operand (file
, x
, 0);
23453 /* Print `i' if this is a constant, else nothing. */
23459 /* Write the bit number in CCR for jump. */
23460 i
= ccr_bit (x
, 0);
23462 output_operand_lossage ("invalid %%j code");
23464 fprintf (file
, "%d", i
);
23468 /* Similar, but add one for shift count in rlinm for scc and pass
23469 scc flag to `ccr_bit'. */
23470 i
= ccr_bit (x
, 1);
23472 output_operand_lossage ("invalid %%J code");
23474 /* If we want bit 31, write a shift count of zero, not 32. */
23475 fprintf (file
, "%d", i
== 31 ? 0 : i
+ 1);
23479 /* X must be a constant. Write the 1's complement of the
23482 output_operand_lossage ("invalid %%k value");
23484 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, ~ INTVAL (x
));
23488 /* X must be a symbolic constant on ELF. Write an
23489 expression suitable for an 'addi' that adds in the low 16
23490 bits of the MEM. */
23491 if (GET_CODE (x
) == CONST
)
23493 if (GET_CODE (XEXP (x
, 0)) != PLUS
23494 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) != SYMBOL_REF
23495 && GET_CODE (XEXP (XEXP (x
, 0), 0)) != LABEL_REF
)
23496 || GET_CODE (XEXP (XEXP (x
, 0), 1)) != CONST_INT
)
23497 output_operand_lossage ("invalid %%K value");
23499 print_operand_address (file
, x
);
23500 fputs ("@l", file
);
23503 /* %l is output_asm_label. */
23506 /* Write second word of DImode or DFmode reference. Works on register
23507 or non-indexed memory only. */
23509 fputs (reg_names
[REGNO (x
) + 1], file
);
23510 else if (MEM_P (x
))
23512 machine_mode mode
= GET_MODE (x
);
23513 /* Handle possible auto-increment. Since it is pre-increment and
23514 we have already done it, we can just use an offset of word. */
23515 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
23516 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
23517 output_address (mode
, plus_constant (Pmode
, XEXP (XEXP (x
, 0), 0),
23519 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
23520 output_address (mode
, plus_constant (Pmode
, XEXP (XEXP (x
, 0), 0),
23523 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
,
23527 if (small_data_operand (x
, GET_MODE (x
)))
23528 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
23529 reg_names
[SMALL_DATA_REG
]);
23534 /* Write the number of elements in the vector times 4. */
23535 if (GET_CODE (x
) != PARALLEL
)
23536 output_operand_lossage ("invalid %%N value");
23538 fprintf (file
, "%d", XVECLEN (x
, 0) * 4);
23542 /* Similar, but subtract 1 first. */
23543 if (GET_CODE (x
) != PARALLEL
)
23544 output_operand_lossage ("invalid %%O value");
23546 fprintf (file
, "%d", (XVECLEN (x
, 0) - 1) * 4);
23550 /* X is a CONST_INT that is a power of two. Output the logarithm. */
23553 || (i
= exact_log2 (INTVAL (x
))) < 0)
23554 output_operand_lossage ("invalid %%p value");
23556 fprintf (file
, "%d", i
);
23560 /* The operand must be an indirect memory reference. The result
23561 is the register name. */
23562 if (GET_CODE (x
) != MEM
|| GET_CODE (XEXP (x
, 0)) != REG
23563 || REGNO (XEXP (x
, 0)) >= 32)
23564 output_operand_lossage ("invalid %%P value");
23566 fputs (reg_names
[REGNO (XEXP (x
, 0))], file
);
23570 /* This outputs the logical code corresponding to a boolean
23571 expression. The expression may have one or both operands
23572 negated (if one, only the first one). For condition register
23573 logical operations, it will also treat the negated
23574 CR codes as NOTs, but not handle NOTs of them. */
23576 const char *const *t
= 0;
23578 enum rtx_code code
= GET_CODE (x
);
23579 static const char * const tbl
[3][3] = {
23580 { "and", "andc", "nor" },
23581 { "or", "orc", "nand" },
23582 { "xor", "eqv", "xor" } };
23586 else if (code
== IOR
)
23588 else if (code
== XOR
)
23591 output_operand_lossage ("invalid %%q value");
23593 if (GET_CODE (XEXP (x
, 0)) != NOT
)
23597 if (GET_CODE (XEXP (x
, 1)) == NOT
)
23608 if (! TARGET_MFCRF
)
23614 /* X is a CR register. Print the mask for `mtcrf'. */
23615 if (GET_CODE (x
) != REG
|| ! CR_REGNO_P (REGNO (x
)))
23616 output_operand_lossage ("invalid %%R value");
23618 fprintf (file
, "%d", 128 >> (REGNO (x
) - CR0_REGNO
));
23622 /* Low 5 bits of 32 - value */
23624 output_operand_lossage ("invalid %%s value");
23626 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, (32 - INTVAL (x
)) & 31);
23630 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
23631 gcc_assert (REG_P (x
) && GET_MODE (x
) == CCmode
);
23633 /* Bit 3 is OV bit. */
23634 i
= 4 * (REGNO (x
) - CR0_REGNO
) + 3;
23636 /* If we want bit 31, write a shift count of zero, not 32. */
23637 fprintf (file
, "%d", i
== 31 ? 0 : i
+ 1);
23641 /* Print the symbolic name of a branch target register. */
23642 if (GET_CODE (x
) != REG
|| (REGNO (x
) != LR_REGNO
23643 && REGNO (x
) != CTR_REGNO
))
23644 output_operand_lossage ("invalid %%T value");
23645 else if (REGNO (x
) == LR_REGNO
)
23646 fputs ("lr", file
);
23648 fputs ("ctr", file
);
23652 /* High-order or low-order 16 bits of constant, whichever is non-zero,
23653 for use in unsigned operand. */
23656 output_operand_lossage ("invalid %%u value");
23661 if ((uval
& 0xffff) == 0)
23664 fprintf (file
, HOST_WIDE_INT_PRINT_HEX
, uval
& 0xffff);
23668 /* High-order 16 bits of constant for use in signed operand. */
23670 output_operand_lossage ("invalid %%v value");
23672 fprintf (file
, HOST_WIDE_INT_PRINT_HEX
,
23673 (INTVAL (x
) >> 16) & 0xffff);
23677 /* Print `u' if this has an auto-increment or auto-decrement. */
23679 && (GET_CODE (XEXP (x
, 0)) == PRE_INC
23680 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
23681 || GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
))
23686 /* Print the trap code for this operand. */
23687 switch (GET_CODE (x
))
23690 fputs ("eq", file
); /* 4 */
23693 fputs ("ne", file
); /* 24 */
23696 fputs ("lt", file
); /* 16 */
23699 fputs ("le", file
); /* 20 */
23702 fputs ("gt", file
); /* 8 */
23705 fputs ("ge", file
); /* 12 */
23708 fputs ("llt", file
); /* 2 */
23711 fputs ("lle", file
); /* 6 */
23714 fputs ("lgt", file
); /* 1 */
23717 fputs ("lge", file
); /* 5 */
23720 gcc_unreachable ();
23725 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
23728 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
23729 ((INTVAL (x
) & 0xffff) ^ 0x8000) - 0x8000);
23731 print_operand (file
, x
, 0);
23735 /* X is a FPR or Altivec register used in a VSX context. */
23736 if (GET_CODE (x
) != REG
|| !VSX_REGNO_P (REGNO (x
)))
23737 output_operand_lossage ("invalid %%x value");
23740 int reg
= REGNO (x
);
23741 int vsx_reg
= (FP_REGNO_P (reg
)
23743 : reg
- FIRST_ALTIVEC_REGNO
+ 32);
23745 #ifdef TARGET_REGNAMES
23746 if (TARGET_REGNAMES
)
23747 fprintf (file
, "%%vs%d", vsx_reg
);
23750 fprintf (file
, "%d", vsx_reg
);
23756 && (legitimate_indexed_address_p (XEXP (x
, 0), 0)
23757 || (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
23758 && legitimate_indexed_address_p (XEXP (XEXP (x
, 0), 1), 0))))
23763 /* Like 'L', for third word of TImode/PTImode */
23765 fputs (reg_names
[REGNO (x
) + 2], file
);
23766 else if (MEM_P (x
))
23768 machine_mode mode
= GET_MODE (x
);
23769 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
23770 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
23771 output_address (mode
, plus_constant (Pmode
,
23772 XEXP (XEXP (x
, 0), 0), 8));
23773 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
23774 output_address (mode
, plus_constant (Pmode
,
23775 XEXP (XEXP (x
, 0), 0), 8));
23777 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
, 8), 0));
23778 if (small_data_operand (x
, GET_MODE (x
)))
23779 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
23780 reg_names
[SMALL_DATA_REG
]);
23785 /* X is a SYMBOL_REF. Write out the name preceded by a
23786 period and without any trailing data in brackets. Used for function
23787 names. If we are configured for System V (or the embedded ABI) on
23788 the PowerPC, do not emit the period, since those systems do not use
23789 TOCs and the like. */
23790 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
23792 /* For macho, check to see if we need a stub. */
23795 const char *name
= XSTR (x
, 0);
23797 if (darwin_emit_branch_islands
23798 && MACHOPIC_INDIRECT
23799 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
23800 name
= machopic_indirection_name (x
, /*stub_p=*/true);
23802 assemble_name (file
, name
);
23804 else if (!DOT_SYMBOLS
)
23805 assemble_name (file
, XSTR (x
, 0));
23807 rs6000_output_function_entry (file
, XSTR (x
, 0));
23811 /* Like 'L', for last word of TImode/PTImode. */
23813 fputs (reg_names
[REGNO (x
) + 3], file
);
23814 else if (MEM_P (x
))
23816 machine_mode mode
= GET_MODE (x
);
23817 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
23818 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
23819 output_address (mode
, plus_constant (Pmode
,
23820 XEXP (XEXP (x
, 0), 0), 12));
23821 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
23822 output_address (mode
, plus_constant (Pmode
,
23823 XEXP (XEXP (x
, 0), 0), 12));
23825 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
, 12), 0));
23826 if (small_data_operand (x
, GET_MODE (x
)))
23827 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
23828 reg_names
[SMALL_DATA_REG
]);
23832 /* Print AltiVec or SPE memory operand. */
23837 gcc_assert (MEM_P (x
));
23841 /* Ugly hack because %y is overloaded. */
23842 if ((TARGET_SPE
|| TARGET_E500_DOUBLE
)
23843 && (GET_MODE_SIZE (GET_MODE (x
)) == 8
23844 || FLOAT128_2REG_P (GET_MODE (x
))
23845 || GET_MODE (x
) == TImode
23846 || GET_MODE (x
) == PTImode
))
23848 /* Handle [reg]. */
23851 fprintf (file
, "0(%s)", reg_names
[REGNO (tmp
)]);
23854 /* Handle [reg+UIMM]. */
23855 else if (GET_CODE (tmp
) == PLUS
&&
23856 GET_CODE (XEXP (tmp
, 1)) == CONST_INT
)
23860 gcc_assert (REG_P (XEXP (tmp
, 0)));
23862 x
= INTVAL (XEXP (tmp
, 1));
23863 fprintf (file
, "%d(%s)", x
, reg_names
[REGNO (XEXP (tmp
, 0))]);
23867 /* Fall through. Must be [reg+reg]. */
23869 if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x
))
23870 && GET_CODE (tmp
) == AND
23871 && GET_CODE (XEXP (tmp
, 1)) == CONST_INT
23872 && INTVAL (XEXP (tmp
, 1)) == -16)
23873 tmp
= XEXP (tmp
, 0);
23874 else if (VECTOR_MEM_VSX_P (GET_MODE (x
))
23875 && GET_CODE (tmp
) == PRE_MODIFY
)
23876 tmp
= XEXP (tmp
, 1);
23878 fprintf (file
, "0,%s", reg_names
[REGNO (tmp
)]);
23881 if (GET_CODE (tmp
) != PLUS
23882 || !REG_P (XEXP (tmp
, 0))
23883 || !REG_P (XEXP (tmp
, 1)))
23885 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
23889 if (REGNO (XEXP (tmp
, 0)) == 0)
23890 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (tmp
, 1)) ],
23891 reg_names
[ REGNO (XEXP (tmp
, 0)) ]);
23893 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (tmp
, 0)) ],
23894 reg_names
[ REGNO (XEXP (tmp
, 1)) ]);
23901 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
23902 else if (MEM_P (x
))
23904 /* We need to handle PRE_INC and PRE_DEC here, since we need to
23905 know the width from the mode. */
23906 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
)
23907 fprintf (file
, "%d(%s)", GET_MODE_SIZE (GET_MODE (x
)),
23908 reg_names
[REGNO (XEXP (XEXP (x
, 0), 0))]);
23909 else if (GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
23910 fprintf (file
, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x
)),
23911 reg_names
[REGNO (XEXP (XEXP (x
, 0), 0))]);
23912 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
23913 output_address (GET_MODE (x
), XEXP (XEXP (x
, 0), 1));
23915 output_address (GET_MODE (x
), XEXP (x
, 0));
23919 if (toc_relative_expr_p (x
, false))
23920 /* This hack along with a corresponding hack in
23921 rs6000_output_addr_const_extra arranges to output addends
23922 where the assembler expects to find them. eg.
23923 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
23924 without this hack would be output as "x@toc+4". We
23926 output_addr_const (file
, CONST_CAST_RTX (tocrel_base
));
23928 output_addr_const (file
, x
);
23933 if (const char *name
= get_some_local_dynamic_name ())
23934 assemble_name (file
, name
);
23936 output_operand_lossage ("'%%&' used without any "
23937 "local dynamic TLS references");
23941 output_operand_lossage ("invalid %%xn code");
23945 /* Print the address of an operand. */
23948 print_operand_address (FILE *file
, rtx x
)
23951 fprintf (file
, "0(%s)", reg_names
[ REGNO (x
) ]);
23952 else if (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == CONST
23953 || GET_CODE (x
) == LABEL_REF
)
23955 output_addr_const (file
, x
);
23956 if (small_data_operand (x
, GET_MODE (x
)))
23957 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
23958 reg_names
[SMALL_DATA_REG
]);
23960 gcc_assert (!TARGET_TOC
);
23962 else if (GET_CODE (x
) == PLUS
&& REG_P (XEXP (x
, 0))
23963 && REG_P (XEXP (x
, 1)))
23965 if (REGNO (XEXP (x
, 0)) == 0)
23966 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (x
, 1)) ],
23967 reg_names
[ REGNO (XEXP (x
, 0)) ]);
23969 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (x
, 0)) ],
23970 reg_names
[ REGNO (XEXP (x
, 1)) ]);
23972 else if (GET_CODE (x
) == PLUS
&& REG_P (XEXP (x
, 0))
23973 && GET_CODE (XEXP (x
, 1)) == CONST_INT
)
23974 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
"(%s)",
23975 INTVAL (XEXP (x
, 1)), reg_names
[ REGNO (XEXP (x
, 0)) ]);
23977 else if (GET_CODE (x
) == LO_SUM
&& REG_P (XEXP (x
, 0))
23978 && CONSTANT_P (XEXP (x
, 1)))
23980 fprintf (file
, "lo16(");
23981 output_addr_const (file
, XEXP (x
, 1));
23982 fprintf (file
, ")(%s)", reg_names
[ REGNO (XEXP (x
, 0)) ]);
23986 else if (GET_CODE (x
) == LO_SUM
&& REG_P (XEXP (x
, 0))
23987 && CONSTANT_P (XEXP (x
, 1)))
23989 output_addr_const (file
, XEXP (x
, 1));
23990 fprintf (file
, "@l(%s)", reg_names
[ REGNO (XEXP (x
, 0)) ]);
23993 else if (toc_relative_expr_p (x
, false))
23995 /* This hack along with a corresponding hack in
23996 rs6000_output_addr_const_extra arranges to output addends
23997 where the assembler expects to find them. eg.
23999 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
24000 without this hack would be output as "x@toc+8@l(9)". We
24001 want "x+8@toc@l(9)". */
24002 output_addr_const (file
, CONST_CAST_RTX (tocrel_base
));
24003 if (GET_CODE (x
) == LO_SUM
)
24004 fprintf (file
, "@l(%s)", reg_names
[REGNO (XEXP (x
, 0))]);
24006 fprintf (file
, "(%s)", reg_names
[REGNO (XVECEXP (tocrel_base
, 0, 1))]);
24009 gcc_unreachable ();
24012 /* Implement TARGET_OUTPUT_ADDR_CONST_EXTRA. */
24015 rs6000_output_addr_const_extra (FILE *file
, rtx x
)
24017 if (GET_CODE (x
) == UNSPEC
)
24018 switch (XINT (x
, 1))
24020 case UNSPEC_TOCREL
:
24021 gcc_checking_assert (GET_CODE (XVECEXP (x
, 0, 0)) == SYMBOL_REF
24022 && REG_P (XVECEXP (x
, 0, 1))
24023 && REGNO (XVECEXP (x
, 0, 1)) == TOC_REGISTER
);
24024 output_addr_const (file
, XVECEXP (x
, 0, 0));
24025 if (x
== tocrel_base
&& tocrel_offset
!= const0_rtx
)
24027 if (INTVAL (tocrel_offset
) >= 0)
24028 fprintf (file
, "+");
24029 output_addr_const (file
, CONST_CAST_RTX (tocrel_offset
));
24031 if (!TARGET_AIX
|| (TARGET_ELF
&& TARGET_MINIMAL_TOC
))
24034 assemble_name (file
, toc_label_name
);
24037 else if (TARGET_ELF
)
24038 fputs ("@toc", file
);
24042 case UNSPEC_MACHOPIC_OFFSET
:
24043 output_addr_const (file
, XVECEXP (x
, 0, 0));
24045 machopic_output_function_base_name (file
);
24052 /* Target hook for assembling integer objects. The PowerPC version has
24053 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
24054 is defined. It also needs to handle DI-mode objects on 64-bit
24058 rs6000_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
24060 #ifdef RELOCATABLE_NEEDS_FIXUP
24061 /* Special handling for SI values. */
24062 if (RELOCATABLE_NEEDS_FIXUP
&& size
== 4 && aligned_p
)
24064 static int recurse
= 0;
24066 /* For -mrelocatable, we mark all addresses that need to be fixed up in
24067 the .fixup section. Since the TOC section is already relocated, we
24068 don't need to mark it here. We used to skip the text section, but it
24069 should never be valid for relocated addresses to be placed in the text
24071 if (DEFAULT_ABI
== ABI_V4
24072 && (TARGET_RELOCATABLE
|| flag_pic
> 1)
24073 && in_section
!= toc_section
24075 && !CONST_SCALAR_INT_P (x
)
24081 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCP", fixuplabelno
);
24083 ASM_OUTPUT_LABEL (asm_out_file
, buf
);
24084 fprintf (asm_out_file
, "\t.long\t(");
24085 output_addr_const (asm_out_file
, x
);
24086 fprintf (asm_out_file
, ")@fixup\n");
24087 fprintf (asm_out_file
, "\t.section\t\".fixup\",\"aw\"\n");
24088 ASM_OUTPUT_ALIGN (asm_out_file
, 2);
24089 fprintf (asm_out_file
, "\t.long\t");
24090 assemble_name (asm_out_file
, buf
);
24091 fprintf (asm_out_file
, "\n\t.previous\n");
24095 /* Remove initial .'s to turn a -mcall-aixdesc function
24096 address into the address of the descriptor, not the function
24098 else if (GET_CODE (x
) == SYMBOL_REF
24099 && XSTR (x
, 0)[0] == '.'
24100 && DEFAULT_ABI
== ABI_AIX
)
24102 const char *name
= XSTR (x
, 0);
24103 while (*name
== '.')
24106 fprintf (asm_out_file
, "\t.long\t%s\n", name
);
24110 #endif /* RELOCATABLE_NEEDS_FIXUP */
24111 return default_assemble_integer (x
, size
, aligned_p
);
24114 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
24115 /* Emit an assembler directive to set symbol visibility for DECL to
24116 VISIBILITY_TYPE. */
24119 rs6000_assemble_visibility (tree decl
, int vis
)
24124 /* Functions need to have their entry point symbol visibility set as
24125 well as their descriptor symbol visibility. */
24126 if (DEFAULT_ABI
== ABI_AIX
24128 && TREE_CODE (decl
) == FUNCTION_DECL
)
24130 static const char * const visibility_types
[] = {
24131 NULL
, "protected", "hidden", "internal"
24134 const char *name
, *type
;
24136 name
= ((* targetm
.strip_name_encoding
)
24137 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
))));
24138 type
= visibility_types
[vis
];
24140 fprintf (asm_out_file
, "\t.%s\t%s\n", type
, name
);
24141 fprintf (asm_out_file
, "\t.%s\t.%s\n", type
, name
);
24144 default_assemble_visibility (decl
, vis
);
24149 rs6000_reverse_condition (machine_mode mode
, enum rtx_code code
)
24151 /* Reversal of FP compares takes care -- an ordered compare
24152 becomes an unordered compare and vice versa. */
24153 if (mode
== CCFPmode
24154 && (!flag_finite_math_only
24155 || code
== UNLT
|| code
== UNLE
|| code
== UNGT
|| code
== UNGE
24156 || code
== UNEQ
|| code
== LTGT
))
24157 return reverse_condition_maybe_unordered (code
);
24159 return reverse_condition (code
);
24162 /* Generate a compare for CODE. Return a brand-new rtx that
24163 represents the result of the compare. */
24166 rs6000_generate_compare (rtx cmp
, machine_mode mode
)
24168 machine_mode comp_mode
;
24169 rtx compare_result
;
24170 enum rtx_code code
= GET_CODE (cmp
);
24171 rtx op0
= XEXP (cmp
, 0);
24172 rtx op1
= XEXP (cmp
, 1);
24174 if (!TARGET_FLOAT128_HW
&& FLOAT128_VECTOR_P (mode
))
24175 comp_mode
= CCmode
;
24176 else if (FLOAT_MODE_P (mode
))
24177 comp_mode
= CCFPmode
;
24178 else if (code
== GTU
|| code
== LTU
24179 || code
== GEU
|| code
== LEU
)
24180 comp_mode
= CCUNSmode
;
24181 else if ((code
== EQ
|| code
== NE
)
24182 && unsigned_reg_p (op0
)
24183 && (unsigned_reg_p (op1
)
24184 || (CONST_INT_P (op1
) && INTVAL (op1
) != 0)))
24185 /* These are unsigned values, perhaps there will be a later
24186 ordering compare that can be shared with this one. */
24187 comp_mode
= CCUNSmode
;
24189 comp_mode
= CCmode
;
24191 /* If we have an unsigned compare, make sure we don't have a signed value as
24193 if (comp_mode
== CCUNSmode
&& GET_CODE (op1
) == CONST_INT
24194 && INTVAL (op1
) < 0)
24196 op0
= copy_rtx_if_shared (op0
);
24197 op1
= force_reg (GET_MODE (op0
), op1
);
24198 cmp
= gen_rtx_fmt_ee (code
, GET_MODE (cmp
), op0
, op1
);
24201 /* First, the compare. */
24202 compare_result
= gen_reg_rtx (comp_mode
);
24204 /* E500 FP compare instructions on the GPRs. Yuck! */
24205 if ((!TARGET_FPRS
&& TARGET_HARD_FLOAT
)
24206 && FLOAT_MODE_P (mode
))
24208 rtx cmp
, or_result
, compare_result2
;
24209 machine_mode op_mode
= GET_MODE (op0
);
24212 if (op_mode
== VOIDmode
)
24213 op_mode
= GET_MODE (op1
);
24215 /* First reverse the condition codes that aren't directly supported. */
24223 code
= reverse_condition_maybe_unordered (code
);
24236 gcc_unreachable ();
24239 /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only.
24240 This explains the following mess. */
24248 cmp
= (flag_finite_math_only
&& !flag_trapping_math
)
24249 ? gen_tstsfeq_gpr (compare_result
, op0
, op1
)
24250 : gen_cmpsfeq_gpr (compare_result
, op0
, op1
);
24254 cmp
= (flag_finite_math_only
&& !flag_trapping_math
)
24255 ? gen_tstdfeq_gpr (compare_result
, op0
, op1
)
24256 : gen_cmpdfeq_gpr (compare_result
, op0
, op1
);
24262 cmp
= (flag_finite_math_only
&& !flag_trapping_math
)
24263 ? gen_tsttfeq_gpr (compare_result
, op0
, op1
)
24264 : gen_cmptfeq_gpr (compare_result
, op0
, op1
);
24268 gcc_unreachable ();
24277 cmp
= (flag_finite_math_only
&& !flag_trapping_math
)
24278 ? gen_tstsfgt_gpr (compare_result
, op0
, op1
)
24279 : gen_cmpsfgt_gpr (compare_result
, op0
, op1
);
24283 cmp
= (flag_finite_math_only
&& !flag_trapping_math
)
24284 ? gen_tstdfgt_gpr (compare_result
, op0
, op1
)
24285 : gen_cmpdfgt_gpr (compare_result
, op0
, op1
);
24291 cmp
= (flag_finite_math_only
&& !flag_trapping_math
)
24292 ? gen_tsttfgt_gpr (compare_result
, op0
, op1
)
24293 : gen_cmptfgt_gpr (compare_result
, op0
, op1
);
24297 gcc_unreachable ();
24306 cmp
= (flag_finite_math_only
&& !flag_trapping_math
)
24307 ? gen_tstsflt_gpr (compare_result
, op0
, op1
)
24308 : gen_cmpsflt_gpr (compare_result
, op0
, op1
);
24312 cmp
= (flag_finite_math_only
&& !flag_trapping_math
)
24313 ? gen_tstdflt_gpr (compare_result
, op0
, op1
)
24314 : gen_cmpdflt_gpr (compare_result
, op0
, op1
);
24320 cmp
= (flag_finite_math_only
&& !flag_trapping_math
)
24321 ? gen_tsttflt_gpr (compare_result
, op0
, op1
)
24322 : gen_cmptflt_gpr (compare_result
, op0
, op1
);
24326 gcc_unreachable ();
24331 gcc_unreachable ();
24334 /* Synthesize LE and GE from LT/GT || EQ. */
24335 if (code
== LE
|| code
== GE
)
24339 compare_result2
= gen_reg_rtx (CCFPmode
);
24345 cmp
= (flag_finite_math_only
&& !flag_trapping_math
)
24346 ? gen_tstsfeq_gpr (compare_result2
, op0
, op1
)
24347 : gen_cmpsfeq_gpr (compare_result2
, op0
, op1
);
24351 cmp
= (flag_finite_math_only
&& !flag_trapping_math
)
24352 ? gen_tstdfeq_gpr (compare_result2
, op0
, op1
)
24353 : gen_cmpdfeq_gpr (compare_result2
, op0
, op1
);
24359 cmp
= (flag_finite_math_only
&& !flag_trapping_math
)
24360 ? gen_tsttfeq_gpr (compare_result2
, op0
, op1
)
24361 : gen_cmptfeq_gpr (compare_result2
, op0
, op1
);
24365 gcc_unreachable ();
24370 /* OR them together. */
24371 or_result
= gen_reg_rtx (CCFPmode
);
24372 cmp
= gen_e500_cr_ior_compare (or_result
, compare_result
,
24374 compare_result
= or_result
;
24377 code
= reverse_p
? NE
: EQ
;
24382 /* IEEE 128-bit support in VSX registers when we do not have hardware
24384 else if (!TARGET_FLOAT128_HW
&& FLOAT128_VECTOR_P (mode
))
24386 rtx libfunc
= NULL_RTX
;
24387 bool check_nan
= false;
24394 libfunc
= optab_libfunc (eq_optab
, mode
);
24399 libfunc
= optab_libfunc (ge_optab
, mode
);
24404 libfunc
= optab_libfunc (le_optab
, mode
);
24409 libfunc
= optab_libfunc (unord_optab
, mode
);
24410 code
= (code
== UNORDERED
) ? NE
: EQ
;
24416 libfunc
= optab_libfunc (ge_optab
, mode
);
24417 code
= (code
== UNGE
) ? GE
: GT
;
24423 libfunc
= optab_libfunc (le_optab
, mode
);
24424 code
= (code
== UNLE
) ? LE
: LT
;
24430 libfunc
= optab_libfunc (eq_optab
, mode
);
24431 code
= (code
= UNEQ
) ? EQ
: NE
;
24435 gcc_unreachable ();
24438 gcc_assert (libfunc
);
24441 dest
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
24442 SImode
, 2, op0
, mode
, op1
, mode
);
24444 /* The library signals an exception for signalling NaNs, so we need to
24445 handle isgreater, etc. by first checking isordered. */
24448 rtx ne_rtx
, normal_dest
, unord_dest
;
24449 rtx unord_func
= optab_libfunc (unord_optab
, mode
);
24450 rtx join_label
= gen_label_rtx ();
24451 rtx join_ref
= gen_rtx_LABEL_REF (VOIDmode
, join_label
);
24452 rtx unord_cmp
= gen_reg_rtx (comp_mode
);
24455 /* Test for either value being a NaN. */
24456 gcc_assert (unord_func
);
24457 unord_dest
= emit_library_call_value (unord_func
, NULL_RTX
, LCT_CONST
,
24458 SImode
, 2, op0
, mode
, op1
,
24461 /* Set value (0) if either value is a NaN, and jump to the join
24463 dest
= gen_reg_rtx (SImode
);
24464 emit_move_insn (dest
, const1_rtx
);
24465 emit_insn (gen_rtx_SET (unord_cmp
,
24466 gen_rtx_COMPARE (comp_mode
, unord_dest
,
24469 ne_rtx
= gen_rtx_NE (comp_mode
, unord_cmp
, const0_rtx
);
24470 emit_jump_insn (gen_rtx_SET (pc_rtx
,
24471 gen_rtx_IF_THEN_ELSE (VOIDmode
, ne_rtx
,
24475 /* Do the normal comparison, knowing that the values are not
24477 normal_dest
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
24478 SImode
, 2, op0
, mode
, op1
,
24481 emit_insn (gen_cstoresi4 (dest
,
24482 gen_rtx_fmt_ee (code
, SImode
, normal_dest
,
24484 normal_dest
, const0_rtx
));
24486 /* Join NaN and non-Nan paths. Compare dest against 0. */
24487 emit_label (join_label
);
24491 emit_insn (gen_rtx_SET (compare_result
,
24492 gen_rtx_COMPARE (comp_mode
, dest
, const0_rtx
)));
24497 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
24498 CLOBBERs to match cmptf_internal2 pattern. */
24499 if (comp_mode
== CCFPmode
&& TARGET_XL_COMPAT
24500 && FLOAT128_IBM_P (GET_MODE (op0
))
24501 && TARGET_HARD_FLOAT
&& TARGET_FPRS
)
24502 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
24504 gen_rtx_SET (compare_result
,
24505 gen_rtx_COMPARE (comp_mode
, op0
, op1
)),
24506 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
24507 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
24508 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
24509 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
24510 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
24511 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
24512 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
24513 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
24514 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (Pmode
)))));
24515 else if (GET_CODE (op1
) == UNSPEC
24516 && XINT (op1
, 1) == UNSPEC_SP_TEST
)
24518 rtx op1b
= XVECEXP (op1
, 0, 0);
24519 comp_mode
= CCEQmode
;
24520 compare_result
= gen_reg_rtx (CCEQmode
);
24522 emit_insn (gen_stack_protect_testdi (compare_result
, op0
, op1b
));
24524 emit_insn (gen_stack_protect_testsi (compare_result
, op0
, op1b
));
24527 emit_insn (gen_rtx_SET (compare_result
,
24528 gen_rtx_COMPARE (comp_mode
, op0
, op1
)));
24531 /* Some kinds of FP comparisons need an OR operation;
24532 under flag_finite_math_only we don't bother. */
24533 if (FLOAT_MODE_P (mode
)
24534 && (!FLOAT128_IEEE_P (mode
) || TARGET_FLOAT128_HW
)
24535 && !flag_finite_math_only
24536 && !(TARGET_HARD_FLOAT
&& !TARGET_FPRS
)
24537 && (code
== LE
|| code
== GE
24538 || code
== UNEQ
|| code
== LTGT
24539 || code
== UNGT
|| code
== UNLT
))
24541 enum rtx_code or1
, or2
;
24542 rtx or1_rtx
, or2_rtx
, compare2_rtx
;
24543 rtx or_result
= gen_reg_rtx (CCEQmode
);
24547 case LE
: or1
= LT
; or2
= EQ
; break;
24548 case GE
: or1
= GT
; or2
= EQ
; break;
24549 case UNEQ
: or1
= UNORDERED
; or2
= EQ
; break;
24550 case LTGT
: or1
= LT
; or2
= GT
; break;
24551 case UNGT
: or1
= UNORDERED
; or2
= GT
; break;
24552 case UNLT
: or1
= UNORDERED
; or2
= LT
; break;
24553 default: gcc_unreachable ();
24555 validate_condition_mode (or1
, comp_mode
);
24556 validate_condition_mode (or2
, comp_mode
);
24557 or1_rtx
= gen_rtx_fmt_ee (or1
, SImode
, compare_result
, const0_rtx
);
24558 or2_rtx
= gen_rtx_fmt_ee (or2
, SImode
, compare_result
, const0_rtx
);
24559 compare2_rtx
= gen_rtx_COMPARE (CCEQmode
,
24560 gen_rtx_IOR (SImode
, or1_rtx
, or2_rtx
),
24562 emit_insn (gen_rtx_SET (or_result
, compare2_rtx
));
24564 compare_result
= or_result
;
24568 validate_condition_mode (code
, GET_MODE (compare_result
));
24570 return gen_rtx_fmt_ee (code
, VOIDmode
, compare_result
, const0_rtx
);
24574 /* Return the diagnostic message string if the binary operation OP is
24575 not permitted on TYPE1 and TYPE2, NULL otherwise. */
24578 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED
,
24582 enum machine_mode mode1
= TYPE_MODE (type1
);
24583 enum machine_mode mode2
= TYPE_MODE (type2
);
24585 /* For complex modes, use the inner type. */
24586 if (COMPLEX_MODE_P (mode1
))
24587 mode1
= GET_MODE_INNER (mode1
);
24589 if (COMPLEX_MODE_P (mode2
))
24590 mode2
= GET_MODE_INNER (mode2
);
24592 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
24593 double to intermix unless -mfloat128-convert. */
24594 if (mode1
== mode2
)
24597 if (!TARGET_FLOAT128_CVT
)
24599 if ((mode1
== KFmode
&& mode2
== IFmode
)
24600 || (mode1
== IFmode
&& mode2
== KFmode
))
24601 return N_("__float128 and __ibm128 cannot be used in the same "
24604 if (TARGET_IEEEQUAD
24605 && ((mode1
== IFmode
&& mode2
== TFmode
)
24606 || (mode1
== TFmode
&& mode2
== IFmode
)))
24607 return N_("__ibm128 and long double cannot be used in the same "
24610 if (!TARGET_IEEEQUAD
24611 && ((mode1
== KFmode
&& mode2
== TFmode
)
24612 || (mode1
== TFmode
&& mode2
== KFmode
)))
24613 return N_("__float128 and long double cannot be used in the same "
24621 /* Expand floating point conversion to/from __float128 and __ibm128. */
24624 rs6000_expand_float128_convert (rtx dest
, rtx src
, bool unsigned_p
)
24626 machine_mode dest_mode
= GET_MODE (dest
);
24627 machine_mode src_mode
= GET_MODE (src
);
24628 convert_optab cvt
= unknown_optab
;
24629 bool do_move
= false;
24630 rtx libfunc
= NULL_RTX
;
24632 typedef rtx (*rtx_2func_t
) (rtx
, rtx
);
24633 rtx_2func_t hw_convert
= (rtx_2func_t
)0;
24637 rtx_2func_t from_df
;
24638 rtx_2func_t from_sf
;
24639 rtx_2func_t from_si_sign
;
24640 rtx_2func_t from_si_uns
;
24641 rtx_2func_t from_di_sign
;
24642 rtx_2func_t from_di_uns
;
24645 rtx_2func_t to_si_sign
;
24646 rtx_2func_t to_si_uns
;
24647 rtx_2func_t to_di_sign
;
24648 rtx_2func_t to_di_uns
;
24649 } hw_conversions
[2] = {
24650 /* convertions to/from KFmode */
24652 gen_extenddfkf2_hw
, /* KFmode <- DFmode. */
24653 gen_extendsfkf2_hw
, /* KFmode <- SFmode. */
24654 gen_float_kfsi2_hw
, /* KFmode <- SImode (signed). */
24655 gen_floatuns_kfsi2_hw
, /* KFmode <- SImode (unsigned). */
24656 gen_float_kfdi2_hw
, /* KFmode <- DImode (signed). */
24657 gen_floatuns_kfdi2_hw
, /* KFmode <- DImode (unsigned). */
24658 gen_trunckfdf2_hw
, /* DFmode <- KFmode. */
24659 gen_trunckfsf2_hw
, /* SFmode <- KFmode. */
24660 gen_fix_kfsi2_hw
, /* SImode <- KFmode (signed). */
24661 gen_fixuns_kfsi2_hw
, /* SImode <- KFmode (unsigned). */
24662 gen_fix_kfdi2_hw
, /* DImode <- KFmode (signed). */
24663 gen_fixuns_kfdi2_hw
, /* DImode <- KFmode (unsigned). */
24666 /* convertions to/from TFmode */
24668 gen_extenddftf2_hw
, /* TFmode <- DFmode. */
24669 gen_extendsftf2_hw
, /* TFmode <- SFmode. */
24670 gen_float_tfsi2_hw
, /* TFmode <- SImode (signed). */
24671 gen_floatuns_tfsi2_hw
, /* TFmode <- SImode (unsigned). */
24672 gen_float_tfdi2_hw
, /* TFmode <- DImode (signed). */
24673 gen_floatuns_tfdi2_hw
, /* TFmode <- DImode (unsigned). */
24674 gen_trunctfdf2_hw
, /* DFmode <- TFmode. */
24675 gen_trunctfsf2_hw
, /* SFmode <- TFmode. */
24676 gen_fix_tfsi2_hw
, /* SImode <- TFmode (signed). */
24677 gen_fixuns_tfsi2_hw
, /* SImode <- TFmode (unsigned). */
24678 gen_fix_tfdi2_hw
, /* DImode <- TFmode (signed). */
24679 gen_fixuns_tfdi2_hw
, /* DImode <- TFmode (unsigned). */
24683 if (dest_mode
== src_mode
)
24684 gcc_unreachable ();
24686 /* Eliminate memory operations. */
24688 src
= force_reg (src_mode
, src
);
24692 rtx tmp
= gen_reg_rtx (dest_mode
);
24693 rs6000_expand_float128_convert (tmp
, src
, unsigned_p
);
24694 rs6000_emit_move (dest
, tmp
, dest_mode
);
24698 /* Convert to IEEE 128-bit floating point. */
24699 if (FLOAT128_IEEE_P (dest_mode
))
24701 if (dest_mode
== KFmode
)
24703 else if (dest_mode
== TFmode
)
24706 gcc_unreachable ();
24712 hw_convert
= hw_conversions
[kf_or_tf
].from_df
;
24717 hw_convert
= hw_conversions
[kf_or_tf
].from_sf
;
24723 if (FLOAT128_IBM_P (src_mode
))
24732 cvt
= ufloat_optab
;
24733 hw_convert
= hw_conversions
[kf_or_tf
].from_si_uns
;
24737 cvt
= sfloat_optab
;
24738 hw_convert
= hw_conversions
[kf_or_tf
].from_si_sign
;
24745 cvt
= ufloat_optab
;
24746 hw_convert
= hw_conversions
[kf_or_tf
].from_di_uns
;
24750 cvt
= sfloat_optab
;
24751 hw_convert
= hw_conversions
[kf_or_tf
].from_di_sign
;
24756 gcc_unreachable ();
24760 /* Convert from IEEE 128-bit floating point. */
24761 else if (FLOAT128_IEEE_P (src_mode
))
24763 if (src_mode
== KFmode
)
24765 else if (src_mode
== TFmode
)
24768 gcc_unreachable ();
24774 hw_convert
= hw_conversions
[kf_or_tf
].to_df
;
24779 hw_convert
= hw_conversions
[kf_or_tf
].to_sf
;
24785 if (FLOAT128_IBM_P (dest_mode
))
24795 hw_convert
= hw_conversions
[kf_or_tf
].to_si_uns
;
24800 hw_convert
= hw_conversions
[kf_or_tf
].to_si_sign
;
24808 hw_convert
= hw_conversions
[kf_or_tf
].to_di_uns
;
24813 hw_convert
= hw_conversions
[kf_or_tf
].to_di_sign
;
24818 gcc_unreachable ();
24822 /* Both IBM format. */
24823 else if (FLOAT128_IBM_P (dest_mode
) && FLOAT128_IBM_P (src_mode
))
24827 gcc_unreachable ();
24829 /* Handle conversion between TFmode/KFmode. */
24831 emit_move_insn (dest
, gen_lowpart (dest_mode
, src
));
24833 /* Handle conversion if we have hardware support. */
24834 else if (TARGET_FLOAT128_HW
&& hw_convert
)
24835 emit_insn ((hw_convert
) (dest
, src
));
24837 /* Call an external function to do the conversion. */
24838 else if (cvt
!= unknown_optab
)
24840 libfunc
= convert_optab_libfunc (cvt
, dest_mode
, src_mode
);
24841 gcc_assert (libfunc
!= NULL_RTX
);
24843 dest2
= emit_library_call_value (libfunc
, dest
, LCT_CONST
, dest_mode
, 1, src
,
24846 gcc_assert (dest2
!= NULL_RTX
);
24847 if (!rtx_equal_p (dest
, dest2
))
24848 emit_move_insn (dest
, dest2
);
24852 gcc_unreachable ();
24858 /* Emit the RTL for an sISEL pattern. */
24861 rs6000_emit_sISEL (machine_mode mode ATTRIBUTE_UNUSED
, rtx operands
[])
24863 rs6000_emit_int_cmove (operands
[0], operands
[1], const1_rtx
, const0_rtx
);
24866 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
24867 can be used as that dest register. Return the dest register. */
24870 rs6000_emit_eqne (machine_mode mode
, rtx op1
, rtx op2
, rtx scratch
)
24872 if (op2
== const0_rtx
)
24875 if (GET_CODE (scratch
) == SCRATCH
)
24876 scratch
= gen_reg_rtx (mode
);
24878 if (logical_operand (op2
, mode
))
24879 emit_insn (gen_rtx_SET (scratch
, gen_rtx_XOR (mode
, op1
, op2
)));
24881 emit_insn (gen_rtx_SET (scratch
,
24882 gen_rtx_PLUS (mode
, op1
, negate_rtx (mode
, op2
))));
24888 rs6000_emit_sCOND (machine_mode mode
, rtx operands
[])
24891 machine_mode op_mode
;
24892 enum rtx_code cond_code
;
24893 rtx result
= operands
[0];
24895 condition_rtx
= rs6000_generate_compare (operands
[1], mode
);
24896 cond_code
= GET_CODE (condition_rtx
);
24898 if (FLOAT_MODE_P (mode
)
24899 && !TARGET_FPRS
&& TARGET_HARD_FLOAT
)
24903 PUT_MODE (condition_rtx
, SImode
);
24904 t
= XEXP (condition_rtx
, 0);
24906 gcc_assert (cond_code
== NE
|| cond_code
== EQ
);
24908 if (cond_code
== NE
)
24909 emit_insn (gen_e500_flip_gt_bit (t
, t
));
24911 emit_insn (gen_move_from_CR_gt_bit (result
, t
));
24915 if (cond_code
== NE
24916 || cond_code
== GE
|| cond_code
== LE
24917 || cond_code
== GEU
|| cond_code
== LEU
24918 || cond_code
== ORDERED
|| cond_code
== UNGE
|| cond_code
== UNLE
)
24920 rtx not_result
= gen_reg_rtx (CCEQmode
);
24921 rtx not_op
, rev_cond_rtx
;
24922 machine_mode cc_mode
;
24924 cc_mode
= GET_MODE (XEXP (condition_rtx
, 0));
24926 rev_cond_rtx
= gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode
, cond_code
),
24927 SImode
, XEXP (condition_rtx
, 0), const0_rtx
);
24928 not_op
= gen_rtx_COMPARE (CCEQmode
, rev_cond_rtx
, const0_rtx
);
24929 emit_insn (gen_rtx_SET (not_result
, not_op
));
24930 condition_rtx
= gen_rtx_EQ (VOIDmode
, not_result
, const0_rtx
);
24933 op_mode
= GET_MODE (XEXP (operands
[1], 0));
24934 if (op_mode
== VOIDmode
)
24935 op_mode
= GET_MODE (XEXP (operands
[1], 1));
24937 if (TARGET_POWERPC64
&& (op_mode
== DImode
|| FLOAT_MODE_P (mode
)))
24939 PUT_MODE (condition_rtx
, DImode
);
24940 convert_move (result
, condition_rtx
, 0);
24944 PUT_MODE (condition_rtx
, SImode
);
24945 emit_insn (gen_rtx_SET (result
, condition_rtx
));
24949 /* Emit a branch of kind CODE to location LOC. */
24952 rs6000_emit_cbranch (machine_mode mode
, rtx operands
[])
24954 rtx condition_rtx
, loc_ref
;
24956 condition_rtx
= rs6000_generate_compare (operands
[0], mode
);
24957 loc_ref
= gen_rtx_LABEL_REF (VOIDmode
, operands
[3]);
24958 emit_jump_insn (gen_rtx_SET (pc_rtx
,
24959 gen_rtx_IF_THEN_ELSE (VOIDmode
, condition_rtx
,
24960 loc_ref
, pc_rtx
)));
24963 /* Return the string to output a conditional branch to LABEL, which is
24964 the operand template of the label, or NULL if the branch is really a
24965 conditional return.
24967 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
24968 condition code register and its mode specifies what kind of
24969 comparison we made.
24971 REVERSED is nonzero if we should reverse the sense of the comparison.
24973 INSN is the insn. */
24976 output_cbranch (rtx op
, const char *label
, int reversed
, rtx_insn
*insn
)
24978 static char string
[64];
24979 enum rtx_code code
= GET_CODE (op
);
24980 rtx cc_reg
= XEXP (op
, 0);
24981 machine_mode mode
= GET_MODE (cc_reg
);
24982 int cc_regno
= REGNO (cc_reg
) - CR0_REGNO
;
24983 int need_longbranch
= label
!= NULL
&& get_attr_length (insn
) == 8;
24984 int really_reversed
= reversed
^ need_longbranch
;
24990 validate_condition_mode (code
, mode
);
24992 /* Work out which way this really branches. We could use
24993 reverse_condition_maybe_unordered here always but this
24994 makes the resulting assembler clearer. */
24995 if (really_reversed
)
24997 /* Reversal of FP compares takes care -- an ordered compare
24998 becomes an unordered compare and vice versa. */
24999 if (mode
== CCFPmode
)
25000 code
= reverse_condition_maybe_unordered (code
);
25002 code
= reverse_condition (code
);
25005 if ((!TARGET_FPRS
&& TARGET_HARD_FLOAT
) && mode
== CCFPmode
)
25007 /* The efscmp/tst* instructions twiddle bit 2, which maps nicely
25012 /* Opposite of GT. */
25021 gcc_unreachable ();
25027 /* Not all of these are actually distinct opcodes, but
25028 we distinguish them for clarity of the resulting assembler. */
25029 case NE
: case LTGT
:
25030 ccode
= "ne"; break;
25031 case EQ
: case UNEQ
:
25032 ccode
= "eq"; break;
25034 ccode
= "ge"; break;
25035 case GT
: case GTU
: case UNGT
:
25036 ccode
= "gt"; break;
25038 ccode
= "le"; break;
25039 case LT
: case LTU
: case UNLT
:
25040 ccode
= "lt"; break;
25041 case UNORDERED
: ccode
= "un"; break;
25042 case ORDERED
: ccode
= "nu"; break;
25043 case UNGE
: ccode
= "nl"; break;
25044 case UNLE
: ccode
= "ng"; break;
25046 gcc_unreachable ();
25049 /* Maybe we have a guess as to how likely the branch is. */
25051 note
= find_reg_note (insn
, REG_BR_PROB
, NULL_RTX
);
25052 if (note
!= NULL_RTX
)
25054 /* PROB is the difference from 50%. */
25055 int prob
= XINT (note
, 0) - REG_BR_PROB_BASE
/ 2;
25057 /* Only hint for highly probable/improbable branches on newer cpus when
25058 we have real profile data, as static prediction overrides processor
25059 dynamic prediction. For older cpus we may as well always hint, but
25060 assume not taken for branches that are very close to 50% as a
25061 mispredicted taken branch is more expensive than a
25062 mispredicted not-taken branch. */
25063 if (rs6000_always_hint
25064 || (abs (prob
) > REG_BR_PROB_BASE
/ 100 * 48
25065 && (profile_status_for_fn (cfun
) != PROFILE_GUESSED
)
25066 && br_prob_note_reliable_p (note
)))
25068 if (abs (prob
) > REG_BR_PROB_BASE
/ 20
25069 && ((prob
> 0) ^ need_longbranch
))
25077 s
+= sprintf (s
, "b%slr%s ", ccode
, pred
);
25079 s
+= sprintf (s
, "b%s%s ", ccode
, pred
);
25081 /* We need to escape any '%' characters in the reg_names string.
25082 Assume they'd only be the first character.... */
25083 if (reg_names
[cc_regno
+ CR0_REGNO
][0] == '%')
25085 s
+= sprintf (s
, "%s", reg_names
[cc_regno
+ CR0_REGNO
]);
25089 /* If the branch distance was too far, we may have to use an
25090 unconditional branch to go the distance. */
25091 if (need_longbranch
)
25092 s
+= sprintf (s
, ",$+8\n\tb %s", label
);
25094 s
+= sprintf (s
, ",%s", label
);
25100 /* Return the string to flip the GT bit on a CR. */
25102 output_e500_flip_gt_bit (rtx dst
, rtx src
)
25104 static char string
[64];
25107 gcc_assert (GET_CODE (dst
) == REG
&& CR_REGNO_P (REGNO (dst
))
25108 && GET_CODE (src
) == REG
&& CR_REGNO_P (REGNO (src
)));
25111 a
= 4 * (REGNO (dst
) - CR0_REGNO
) + 1;
25112 b
= 4 * (REGNO (src
) - CR0_REGNO
) + 1;
25114 sprintf (string
, "crnot %d,%d", a
, b
);
25118 /* Return insn for VSX or Altivec comparisons. */
25121 rs6000_emit_vector_compare_inner (enum rtx_code code
, rtx op0
, rtx op1
)
25124 machine_mode mode
= GET_MODE (op0
);
25132 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
25143 mask
= gen_reg_rtx (mode
);
25144 emit_insn (gen_rtx_SET (mask
, gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
25151 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
25152 DMODE is expected destination mode. This is a recursive function. */
25155 rs6000_emit_vector_compare (enum rtx_code rcode
,
25157 machine_mode dmode
)
25160 bool swap_operands
= false;
25161 bool try_again
= false;
25163 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode
));
25164 gcc_assert (GET_MODE (op0
) == GET_MODE (op1
));
25166 /* See if the comparison works as is. */
25167 mask
= rs6000_emit_vector_compare_inner (rcode
, op0
, op1
);
25175 swap_operands
= true;
25180 swap_operands
= true;
25188 /* Invert condition and try again.
25189 e.g., A != B becomes ~(A==B). */
25191 enum rtx_code rev_code
;
25192 enum insn_code nor_code
;
25195 rev_code
= reverse_condition_maybe_unordered (rcode
);
25196 if (rev_code
== UNKNOWN
)
25199 nor_code
= optab_handler (one_cmpl_optab
, dmode
);
25200 if (nor_code
== CODE_FOR_nothing
)
25203 mask2
= rs6000_emit_vector_compare (rev_code
, op0
, op1
, dmode
);
25207 mask
= gen_reg_rtx (dmode
);
25208 emit_insn (GEN_FCN (nor_code
) (mask
, mask2
));
25216 /* Try GT/GTU/LT/LTU OR EQ */
25219 enum insn_code ior_code
;
25220 enum rtx_code new_code
;
25241 gcc_unreachable ();
25244 ior_code
= optab_handler (ior_optab
, dmode
);
25245 if (ior_code
== CODE_FOR_nothing
)
25248 c_rtx
= rs6000_emit_vector_compare (new_code
, op0
, op1
, dmode
);
25252 eq_rtx
= rs6000_emit_vector_compare (EQ
, op0
, op1
, dmode
);
25256 mask
= gen_reg_rtx (dmode
);
25257 emit_insn (GEN_FCN (ior_code
) (mask
, c_rtx
, eq_rtx
));
25268 std::swap (op0
, op1
);
25270 mask
= rs6000_emit_vector_compare_inner (rcode
, op0
, op1
);
25275 /* You only get two chances. */
25279 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
25280 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
25281 operands for the relation operation COND. */
25284 rs6000_emit_vector_cond_expr (rtx dest
, rtx op_true
, rtx op_false
,
25285 rtx cond
, rtx cc_op0
, rtx cc_op1
)
25287 machine_mode dest_mode
= GET_MODE (dest
);
25288 machine_mode mask_mode
= GET_MODE (cc_op0
);
25289 enum rtx_code rcode
= GET_CODE (cond
);
25290 machine_mode cc_mode
= CCmode
;
25293 bool invert_move
= false;
25295 if (VECTOR_UNIT_NONE_P (dest_mode
))
25298 gcc_assert (GET_MODE_SIZE (dest_mode
) == GET_MODE_SIZE (mask_mode
)
25299 && GET_MODE_NUNITS (dest_mode
) == GET_MODE_NUNITS (mask_mode
));
25303 /* Swap operands if we can, and fall back to doing the operation as
25304 specified, and doing a NOR to invert the test. */
25310 /* Invert condition and try again.
25311 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
25312 invert_move
= true;
25313 rcode
= reverse_condition_maybe_unordered (rcode
);
25314 if (rcode
== UNKNOWN
)
25320 if (GET_MODE_CLASS (mask_mode
) == MODE_VECTOR_INT
)
25322 /* Invert condition to avoid compound test. */
25323 invert_move
= true;
25324 rcode
= reverse_condition (rcode
);
25332 /* Mark unsigned tests with CCUNSmode. */
25333 cc_mode
= CCUNSmode
;
25335 /* Invert condition to avoid compound test if necessary. */
25336 if (rcode
== GEU
|| rcode
== LEU
)
25338 invert_move
= true;
25339 rcode
= reverse_condition (rcode
);
25347 /* Get the vector mask for the given relational operations. */
25348 mask
= rs6000_emit_vector_compare (rcode
, cc_op0
, cc_op1
, mask_mode
);
25354 std::swap (op_true
, op_false
);
25356 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
25357 if (GET_MODE_CLASS (dest_mode
) == MODE_VECTOR_INT
25358 && (GET_CODE (op_true
) == CONST_VECTOR
25359 || GET_CODE (op_false
) == CONST_VECTOR
))
25361 rtx constant_0
= CONST0_RTX (dest_mode
);
25362 rtx constant_m1
= CONSTM1_RTX (dest_mode
);
25364 if (op_true
== constant_m1
&& op_false
== constant_0
)
25366 emit_move_insn (dest
, mask
);
25370 else if (op_true
== constant_0
&& op_false
== constant_m1
)
25372 emit_insn (gen_rtx_SET (dest
, gen_rtx_NOT (dest_mode
, mask
)));
25376 /* If we can't use the vector comparison directly, perhaps we can use
25377 the mask for the true or false fields, instead of loading up a
25379 if (op_true
== constant_m1
)
25382 if (op_false
== constant_0
)
25386 if (!REG_P (op_true
) && !SUBREG_P (op_true
))
25387 op_true
= force_reg (dest_mode
, op_true
);
25389 if (!REG_P (op_false
) && !SUBREG_P (op_false
))
25390 op_false
= force_reg (dest_mode
, op_false
);
25392 cond2
= gen_rtx_fmt_ee (NE
, cc_mode
, gen_lowpart (dest_mode
, mask
),
25393 CONST0_RTX (dest_mode
));
25394 emit_insn (gen_rtx_SET (dest
,
25395 gen_rtx_IF_THEN_ELSE (dest_mode
,
25402 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
25403 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
25404 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
25405 hardware has no such operation. */
25408 rs6000_emit_p9_fp_minmax (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
25410 enum rtx_code code
= GET_CODE (op
);
25411 rtx op0
= XEXP (op
, 0);
25412 rtx op1
= XEXP (op
, 1);
25413 machine_mode compare_mode
= GET_MODE (op0
);
25414 machine_mode result_mode
= GET_MODE (dest
);
25415 bool max_p
= false;
25417 if (result_mode
!= compare_mode
)
25420 if (code
== GE
|| code
== GT
)
25422 else if (code
== LE
|| code
== LT
)
25427 if (rtx_equal_p (op0
, true_cond
) && rtx_equal_p (op1
, false_cond
))
25430 else if (rtx_equal_p (op1
, true_cond
) && rtx_equal_p (op0
, false_cond
))
25436 rs6000_emit_minmax (dest
, max_p
? SMAX
: SMIN
, op0
, op1
);
25440 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
25441 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
25442 operands of the last comparison is nonzero/true, FALSE_COND if it is
25443 zero/false. Return 0 if the hardware has no such operation. */
25446 rs6000_emit_p9_fp_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
25448 enum rtx_code code
= GET_CODE (op
);
25449 rtx op0
= XEXP (op
, 0);
25450 rtx op1
= XEXP (op
, 1);
25451 machine_mode result_mode
= GET_MODE (dest
);
25456 if (!can_create_pseudo_p ())
25469 code
= swap_condition (code
);
25470 std::swap (op0
, op1
);
25477 /* Generate: [(parallel [(set (dest)
25478 (if_then_else (op (cmp1) (cmp2))
25481 (clobber (scratch))])]. */
25483 compare_rtx
= gen_rtx_fmt_ee (code
, CCFPmode
, op0
, op1
);
25484 cmove_rtx
= gen_rtx_SET (dest
,
25485 gen_rtx_IF_THEN_ELSE (result_mode
,
25490 clobber_rtx
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (V2DImode
));
25491 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
25492 gen_rtvec (2, cmove_rtx
, clobber_rtx
)));
25497 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
25498 operands of the last comparison is nonzero/true, FALSE_COND if it
25499 is zero/false. Return 0 if the hardware has no such operation. */
25502 rs6000_emit_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
25504 enum rtx_code code
= GET_CODE (op
);
25505 rtx op0
= XEXP (op
, 0);
25506 rtx op1
= XEXP (op
, 1);
25507 machine_mode compare_mode
= GET_MODE (op0
);
25508 machine_mode result_mode
= GET_MODE (dest
);
25510 bool is_against_zero
;
25512 /* These modes should always match. */
25513 if (GET_MODE (op1
) != compare_mode
25514 /* In the isel case however, we can use a compare immediate, so
25515 op1 may be a small constant. */
25516 && (!TARGET_ISEL
|| !short_cint_operand (op1
, VOIDmode
)))
25518 if (GET_MODE (true_cond
) != result_mode
)
25520 if (GET_MODE (false_cond
) != result_mode
)
25523 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
25524 if (TARGET_P9_MINMAX
25525 && (compare_mode
== SFmode
|| compare_mode
== DFmode
)
25526 && (result_mode
== SFmode
|| result_mode
== DFmode
))
25528 if (rs6000_emit_p9_fp_minmax (dest
, op
, true_cond
, false_cond
))
25531 if (rs6000_emit_p9_fp_cmove (dest
, op
, true_cond
, false_cond
))
25535 /* Don't allow using floating point comparisons for integer results for
25537 if (FLOAT_MODE_P (compare_mode
) && !FLOAT_MODE_P (result_mode
))
25540 /* First, work out if the hardware can do this at all, or
25541 if it's too slow.... */
25542 if (!FLOAT_MODE_P (compare_mode
))
25545 return rs6000_emit_int_cmove (dest
, op
, true_cond
, false_cond
);
25548 else if (TARGET_HARD_FLOAT
&& !TARGET_FPRS
25549 && SCALAR_FLOAT_MODE_P (compare_mode
))
25552 is_against_zero
= op1
== CONST0_RTX (compare_mode
);
25554 /* A floating-point subtract might overflow, underflow, or produce
25555 an inexact result, thus changing the floating-point flags, so it
25556 can't be generated if we care about that. It's safe if one side
25557 of the construct is zero, since then no subtract will be
25559 if (SCALAR_FLOAT_MODE_P (compare_mode
)
25560 && flag_trapping_math
&& ! is_against_zero
)
25563 /* Eliminate half of the comparisons by switching operands, this
25564 makes the remaining code simpler. */
25565 if (code
== UNLT
|| code
== UNGT
|| code
== UNORDERED
|| code
== NE
25566 || code
== LTGT
|| code
== LT
|| code
== UNLE
)
25568 code
= reverse_condition_maybe_unordered (code
);
25570 true_cond
= false_cond
;
25574 /* UNEQ and LTGT take four instructions for a comparison with zero,
25575 it'll probably be faster to use a branch here too. */
25576 if (code
== UNEQ
&& HONOR_NANS (compare_mode
))
25579 /* We're going to try to implement comparisons by performing
25580 a subtract, then comparing against zero. Unfortunately,
25581 Inf - Inf is NaN which is not zero, and so if we don't
25582 know that the operand is finite and the comparison
25583 would treat EQ different to UNORDERED, we can't do it. */
25584 if (HONOR_INFINITIES (compare_mode
)
25585 && code
!= GT
&& code
!= UNGE
25586 && (GET_CODE (op1
) != CONST_DOUBLE
25587 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1
)))
25588 /* Constructs of the form (a OP b ? a : b) are safe. */
25589 && ((! rtx_equal_p (op0
, false_cond
) && ! rtx_equal_p (op1
, false_cond
))
25590 || (! rtx_equal_p (op0
, true_cond
)
25591 && ! rtx_equal_p (op1
, true_cond
))))
25594 /* At this point we know we can use fsel. */
25596 /* Reduce the comparison to a comparison against zero. */
25597 if (! is_against_zero
)
25599 temp
= gen_reg_rtx (compare_mode
);
25600 emit_insn (gen_rtx_SET (temp
, gen_rtx_MINUS (compare_mode
, op0
, op1
)));
25602 op1
= CONST0_RTX (compare_mode
);
25605 /* If we don't care about NaNs we can reduce some of the comparisons
25606 down to faster ones. */
25607 if (! HONOR_NANS (compare_mode
))
25613 true_cond
= false_cond
;
25626 /* Now, reduce everything down to a GE. */
25633 temp
= gen_reg_rtx (compare_mode
);
25634 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
25639 temp
= gen_reg_rtx (compare_mode
);
25640 emit_insn (gen_rtx_SET (temp
, gen_rtx_ABS (compare_mode
, op0
)));
25645 temp
= gen_reg_rtx (compare_mode
);
25646 emit_insn (gen_rtx_SET (temp
,
25647 gen_rtx_NEG (compare_mode
,
25648 gen_rtx_ABS (compare_mode
, op0
))));
25653 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
25654 temp
= gen_reg_rtx (result_mode
);
25655 emit_insn (gen_rtx_SET (temp
,
25656 gen_rtx_IF_THEN_ELSE (result_mode
,
25657 gen_rtx_GE (VOIDmode
,
25659 true_cond
, false_cond
)));
25660 false_cond
= true_cond
;
25663 temp
= gen_reg_rtx (compare_mode
);
25664 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
25669 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
25670 temp
= gen_reg_rtx (result_mode
);
25671 emit_insn (gen_rtx_SET (temp
,
25672 gen_rtx_IF_THEN_ELSE (result_mode
,
25673 gen_rtx_GE (VOIDmode
,
25675 true_cond
, false_cond
)));
25676 true_cond
= false_cond
;
25679 temp
= gen_reg_rtx (compare_mode
);
25680 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
25685 gcc_unreachable ();
25688 emit_insn (gen_rtx_SET (dest
,
25689 gen_rtx_IF_THEN_ELSE (result_mode
,
25690 gen_rtx_GE (VOIDmode
,
25692 true_cond
, false_cond
)));
25696 /* Same as above, but for ints (isel). */
25699 rs6000_emit_int_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
25701 rtx condition_rtx
, cr
;
25702 machine_mode mode
= GET_MODE (dest
);
25703 enum rtx_code cond_code
;
25704 rtx (*isel_func
) (rtx
, rtx
, rtx
, rtx
, rtx
);
25707 if (mode
!= SImode
&& (!TARGET_POWERPC64
|| mode
!= DImode
))
25710 /* We still have to do the compare, because isel doesn't do a
25711 compare, it just looks at the CRx bits set by a previous compare
25713 condition_rtx
= rs6000_generate_compare (op
, mode
);
25714 cond_code
= GET_CODE (condition_rtx
);
25715 cr
= XEXP (condition_rtx
, 0);
25716 signedp
= GET_MODE (cr
) == CCmode
;
25718 isel_func
= (mode
== SImode
25719 ? (signedp
? gen_isel_signed_si
: gen_isel_unsigned_si
)
25720 : (signedp
? gen_isel_signed_di
: gen_isel_unsigned_di
));
25724 case LT
: case GT
: case LTU
: case GTU
: case EQ
:
25725 /* isel handles these directly. */
25729 /* We need to swap the sense of the comparison. */
25731 std::swap (false_cond
, true_cond
);
25732 PUT_CODE (condition_rtx
, reverse_condition (cond_code
));
25737 false_cond
= force_reg (mode
, false_cond
);
25738 if (true_cond
!= const0_rtx
)
25739 true_cond
= force_reg (mode
, true_cond
);
25741 emit_insn (isel_func (dest
, condition_rtx
, true_cond
, false_cond
, cr
));
25747 output_isel (rtx
*operands
)
25749 enum rtx_code code
;
25751 code
= GET_CODE (operands
[1]);
25753 if (code
== GE
|| code
== GEU
|| code
== LE
|| code
== LEU
|| code
== NE
)
25755 gcc_assert (GET_CODE (operands
[2]) == REG
25756 && GET_CODE (operands
[3]) == REG
);
25757 PUT_CODE (operands
[1], reverse_condition (code
));
25758 return "isel %0,%3,%2,%j1";
25761 return "isel %0,%2,%3,%j1";
25765 rs6000_emit_minmax (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
25767 machine_mode mode
= GET_MODE (op0
);
25771 /* VSX/altivec have direct min/max insns. */
25772 if ((code
== SMAX
|| code
== SMIN
)
25773 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode
)
25774 || (mode
== SFmode
&& VECTOR_UNIT_VSX_P (DFmode
))))
25776 emit_insn (gen_rtx_SET (dest
, gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
25780 if (code
== SMAX
|| code
== SMIN
)
25785 if (code
== SMAX
|| code
== UMAX
)
25786 target
= emit_conditional_move (dest
, c
, op0
, op1
, mode
,
25787 op0
, op1
, mode
, 0);
25789 target
= emit_conditional_move (dest
, c
, op0
, op1
, mode
,
25790 op1
, op0
, mode
, 0);
25791 gcc_assert (target
);
25792 if (target
!= dest
)
25793 emit_move_insn (dest
, target
);
25796 /* Split a signbit operation on 64-bit machines with direct move. Also allow
25797 for the value to come from memory or if it is already loaded into a GPR. */
25800 rs6000_split_signbit (rtx dest
, rtx src
)
25802 machine_mode d_mode
= GET_MODE (dest
);
25803 machine_mode s_mode
= GET_MODE (src
);
25804 rtx dest_di
= (d_mode
== DImode
) ? dest
: gen_lowpart (DImode
, dest
);
25805 rtx shift_reg
= dest_di
;
25807 gcc_assert (FLOAT128_IEEE_P (s_mode
) && TARGET_POWERPC64
);
25811 rtx mem
= (WORDS_BIG_ENDIAN
25812 ? adjust_address (src
, DImode
, 0)
25813 : adjust_address (src
, DImode
, 8));
25814 emit_insn (gen_rtx_SET (dest_di
, mem
));
25819 unsigned int r
= reg_or_subregno (src
);
25821 if (INT_REGNO_P (r
))
25822 shift_reg
= gen_rtx_REG (DImode
, r
+ (BYTES_BIG_ENDIAN
== 0));
25826 /* Generate the special mfvsrd instruction to get it in a GPR. */
25827 gcc_assert (VSX_REGNO_P (r
));
25828 if (s_mode
== KFmode
)
25829 emit_insn (gen_signbitkf2_dm2 (dest_di
, src
));
25831 emit_insn (gen_signbittf2_dm2 (dest_di
, src
));
25835 emit_insn (gen_lshrdi3 (dest_di
, shift_reg
, GEN_INT (63)));
25839 /* A subroutine of the atomic operation splitters. Jump to LABEL if
25840 COND is true. Mark the jump as unlikely to be taken. */
25843 emit_unlikely_jump (rtx cond
, rtx label
)
25845 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
25846 rtx x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, cond
, label
, pc_rtx
);
25847 rtx_insn
*insn
= emit_jump_insn (gen_rtx_SET (pc_rtx
, x
));
25848 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
25851 /* A subroutine of the atomic operation splitters. Emit a load-locked
25852 instruction in MODE. For QI/HImode, possibly use a pattern than includes
25853 the zero_extend operation. */
25856 emit_load_locked (machine_mode mode
, rtx reg
, rtx mem
)
25858 rtx (*fn
) (rtx
, rtx
) = NULL
;
25863 fn
= gen_load_lockedqi
;
25866 fn
= gen_load_lockedhi
;
25869 if (GET_MODE (mem
) == QImode
)
25870 fn
= gen_load_lockedqi_si
;
25871 else if (GET_MODE (mem
) == HImode
)
25872 fn
= gen_load_lockedhi_si
;
25874 fn
= gen_load_lockedsi
;
25877 fn
= gen_load_lockeddi
;
25880 fn
= gen_load_lockedti
;
25883 gcc_unreachable ();
25885 emit_insn (fn (reg
, mem
));
25888 /* A subroutine of the atomic operation splitters. Emit a store-conditional
25889 instruction in MODE. */
25892 emit_store_conditional (machine_mode mode
, rtx res
, rtx mem
, rtx val
)
25894 rtx (*fn
) (rtx
, rtx
, rtx
) = NULL
;
25899 fn
= gen_store_conditionalqi
;
25902 fn
= gen_store_conditionalhi
;
25905 fn
= gen_store_conditionalsi
;
25908 fn
= gen_store_conditionaldi
;
25911 fn
= gen_store_conditionalti
;
25914 gcc_unreachable ();
25917 /* Emit sync before stwcx. to address PPC405 Erratum. */
25918 if (PPC405_ERRATUM77
)
25919 emit_insn (gen_hwsync ());
25921 emit_insn (fn (res
, mem
, val
));
25924 /* Expand barriers before and after a load_locked/store_cond sequence. */
25927 rs6000_pre_atomic_barrier (rtx mem
, enum memmodel model
)
25929 rtx addr
= XEXP (mem
, 0);
25930 int strict_p
= (reload_in_progress
|| reload_completed
);
25932 if (!legitimate_indirect_address_p (addr
, strict_p
)
25933 && !legitimate_indexed_address_p (addr
, strict_p
))
25935 addr
= force_reg (Pmode
, addr
);
25936 mem
= replace_equiv_address_nv (mem
, addr
);
25941 case MEMMODEL_RELAXED
:
25942 case MEMMODEL_CONSUME
:
25943 case MEMMODEL_ACQUIRE
:
25945 case MEMMODEL_RELEASE
:
25946 case MEMMODEL_ACQ_REL
:
25947 emit_insn (gen_lwsync ());
25949 case MEMMODEL_SEQ_CST
:
25950 emit_insn (gen_hwsync ());
25953 gcc_unreachable ();
25959 rs6000_post_atomic_barrier (enum memmodel model
)
25963 case MEMMODEL_RELAXED
:
25964 case MEMMODEL_CONSUME
:
25965 case MEMMODEL_RELEASE
:
25967 case MEMMODEL_ACQUIRE
:
25968 case MEMMODEL_ACQ_REL
:
25969 case MEMMODEL_SEQ_CST
:
25970 emit_insn (gen_isync ());
25973 gcc_unreachable ();
25977 /* A subroutine of the various atomic expanders. For sub-word operations,
25978 we must adjust things to operate on SImode. Given the original MEM,
25979 return a new aligned memory. Also build and return the quantities by
25980 which to shift and mask. */
25983 rs6000_adjust_atomic_subword (rtx orig_mem
, rtx
*pshift
, rtx
*pmask
)
25985 rtx addr
, align
, shift
, mask
, mem
;
25986 HOST_WIDE_INT shift_mask
;
25987 machine_mode mode
= GET_MODE (orig_mem
);
25989 /* For smaller modes, we have to implement this via SImode. */
25990 shift_mask
= (mode
== QImode
? 0x18 : 0x10);
25992 addr
= XEXP (orig_mem
, 0);
25993 addr
= force_reg (GET_MODE (addr
), addr
);
25995 /* Aligned memory containing subword. Generate a new memory. We
25996 do not want any of the existing MEM_ATTR data, as we're now
25997 accessing memory outside the original object. */
25998 align
= expand_simple_binop (Pmode
, AND
, addr
, GEN_INT (-4),
25999 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
26000 mem
= gen_rtx_MEM (SImode
, align
);
26001 MEM_VOLATILE_P (mem
) = MEM_VOLATILE_P (orig_mem
);
26002 if (MEM_ALIAS_SET (orig_mem
) == ALIAS_SET_MEMORY_BARRIER
)
26003 set_mem_alias_set (mem
, ALIAS_SET_MEMORY_BARRIER
);
26005 /* Shift amount for subword relative to aligned word. */
26006 shift
= gen_reg_rtx (SImode
);
26007 addr
= gen_lowpart (SImode
, addr
);
26008 rtx tmp
= gen_reg_rtx (SImode
);
26009 emit_insn (gen_ashlsi3 (tmp
, addr
, GEN_INT (3)));
26010 emit_insn (gen_andsi3 (shift
, tmp
, GEN_INT (shift_mask
)));
26011 if (BYTES_BIG_ENDIAN
)
26012 shift
= expand_simple_binop (SImode
, XOR
, shift
, GEN_INT (shift_mask
),
26013 shift
, 1, OPTAB_LIB_WIDEN
);
26016 /* Mask for insertion. */
26017 mask
= expand_simple_binop (SImode
, ASHIFT
, GEN_INT (GET_MODE_MASK (mode
)),
26018 shift
, NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
26024 /* A subroutine of the various atomic expanders. For sub-word operands,
26025 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
26028 rs6000_mask_atomic_subword (rtx oldval
, rtx newval
, rtx mask
)
26032 x
= gen_reg_rtx (SImode
);
26033 emit_insn (gen_rtx_SET (x
, gen_rtx_AND (SImode
,
26034 gen_rtx_NOT (SImode
, mask
),
26037 x
= expand_simple_binop (SImode
, IOR
, newval
, x
, x
, 1, OPTAB_LIB_WIDEN
);
26042 /* A subroutine of the various atomic expanders. For sub-word operands,
26043 extract WIDE to NARROW via SHIFT. */
26046 rs6000_finish_atomic_subword (rtx narrow
, rtx wide
, rtx shift
)
26048 wide
= expand_simple_binop (SImode
, LSHIFTRT
, wide
, shift
,
26049 wide
, 1, OPTAB_LIB_WIDEN
);
26050 emit_move_insn (narrow
, gen_lowpart (GET_MODE (narrow
), wide
));
26053 /* Expand an atomic compare and swap operation. */
26056 rs6000_expand_atomic_compare_and_swap (rtx operands
[])
26058 rtx boolval
, retval
, mem
, oldval
, newval
, cond
;
26059 rtx label1
, label2
, x
, mask
, shift
;
26060 machine_mode mode
, orig_mode
;
26061 enum memmodel mod_s
, mod_f
;
26064 boolval
= operands
[0];
26065 retval
= operands
[1];
26067 oldval
= operands
[3];
26068 newval
= operands
[4];
26069 is_weak
= (INTVAL (operands
[5]) != 0);
26070 mod_s
= memmodel_base (INTVAL (operands
[6]));
26071 mod_f
= memmodel_base (INTVAL (operands
[7]));
26072 orig_mode
= mode
= GET_MODE (mem
);
26074 mask
= shift
= NULL_RTX
;
26075 if (mode
== QImode
|| mode
== HImode
)
26077 /* Before power8, we didn't have access to lbarx/lharx, so generate a
26078 lwarx and shift/mask operations. With power8, we need to do the
26079 comparison in SImode, but the store is still done in QI/HImode. */
26080 oldval
= convert_modes (SImode
, mode
, oldval
, 1);
26082 if (!TARGET_SYNC_HI_QI
)
26084 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
26086 /* Shift and mask OLDVAL into position with the word. */
26087 oldval
= expand_simple_binop (SImode
, ASHIFT
, oldval
, shift
,
26088 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
26090 /* Shift and mask NEWVAL into position within the word. */
26091 newval
= convert_modes (SImode
, mode
, newval
, 1);
26092 newval
= expand_simple_binop (SImode
, ASHIFT
, newval
, shift
,
26093 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
26096 /* Prepare to adjust the return value. */
26097 retval
= gen_reg_rtx (SImode
);
26100 else if (reg_overlap_mentioned_p (retval
, oldval
))
26101 oldval
= copy_to_reg (oldval
);
26103 if (mode
!= TImode
&& !reg_or_short_operand (oldval
, mode
))
26104 oldval
= copy_to_mode_reg (mode
, oldval
);
26106 if (reg_overlap_mentioned_p (retval
, newval
))
26107 newval
= copy_to_reg (newval
);
26109 mem
= rs6000_pre_atomic_barrier (mem
, mod_s
);
26114 label1
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
26115 emit_label (XEXP (label1
, 0));
26117 label2
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
26119 emit_load_locked (mode
, retval
, mem
);
26123 x
= expand_simple_binop (SImode
, AND
, retval
, mask
,
26124 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
26126 cond
= gen_reg_rtx (CCmode
);
26127 /* If we have TImode, synthesize a comparison. */
26128 if (mode
!= TImode
)
26129 x
= gen_rtx_COMPARE (CCmode
, x
, oldval
);
26132 rtx xor1_result
= gen_reg_rtx (DImode
);
26133 rtx xor2_result
= gen_reg_rtx (DImode
);
26134 rtx or_result
= gen_reg_rtx (DImode
);
26135 rtx new_word0
= simplify_gen_subreg (DImode
, x
, TImode
, 0);
26136 rtx new_word1
= simplify_gen_subreg (DImode
, x
, TImode
, 8);
26137 rtx old_word0
= simplify_gen_subreg (DImode
, oldval
, TImode
, 0);
26138 rtx old_word1
= simplify_gen_subreg (DImode
, oldval
, TImode
, 8);
26140 emit_insn (gen_xordi3 (xor1_result
, new_word0
, old_word0
));
26141 emit_insn (gen_xordi3 (xor2_result
, new_word1
, old_word1
));
26142 emit_insn (gen_iordi3 (or_result
, xor1_result
, xor2_result
));
26143 x
= gen_rtx_COMPARE (CCmode
, or_result
, const0_rtx
);
26146 emit_insn (gen_rtx_SET (cond
, x
));
26148 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
26149 emit_unlikely_jump (x
, label2
);
26153 x
= rs6000_mask_atomic_subword (retval
, newval
, mask
);
26155 emit_store_conditional (orig_mode
, cond
, mem
, x
);
26159 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
26160 emit_unlikely_jump (x
, label1
);
26163 if (!is_mm_relaxed (mod_f
))
26164 emit_label (XEXP (label2
, 0));
26166 rs6000_post_atomic_barrier (mod_s
);
26168 if (is_mm_relaxed (mod_f
))
26169 emit_label (XEXP (label2
, 0));
26172 rs6000_finish_atomic_subword (operands
[1], retval
, shift
);
26173 else if (mode
!= GET_MODE (operands
[1]))
26174 convert_move (operands
[1], retval
, 1);
26176 /* In all cases, CR0 contains EQ on success, and NE on failure. */
26177 x
= gen_rtx_EQ (SImode
, cond
, const0_rtx
);
26178 emit_insn (gen_rtx_SET (boolval
, x
));
26181 /* Expand an atomic exchange operation. */
26184 rs6000_expand_atomic_exchange (rtx operands
[])
26186 rtx retval
, mem
, val
, cond
;
26188 enum memmodel model
;
26189 rtx label
, x
, mask
, shift
;
26191 retval
= operands
[0];
26194 model
= memmodel_base (INTVAL (operands
[3]));
26195 mode
= GET_MODE (mem
);
26197 mask
= shift
= NULL_RTX
;
26198 if (!TARGET_SYNC_HI_QI
&& (mode
== QImode
|| mode
== HImode
))
26200 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
26202 /* Shift and mask VAL into position with the word. */
26203 val
= convert_modes (SImode
, mode
, val
, 1);
26204 val
= expand_simple_binop (SImode
, ASHIFT
, val
, shift
,
26205 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
26207 /* Prepare to adjust the return value. */
26208 retval
= gen_reg_rtx (SImode
);
26212 mem
= rs6000_pre_atomic_barrier (mem
, model
);
26214 label
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
26215 emit_label (XEXP (label
, 0));
26217 emit_load_locked (mode
, retval
, mem
);
26221 x
= rs6000_mask_atomic_subword (retval
, val
, mask
);
26223 cond
= gen_reg_rtx (CCmode
);
26224 emit_store_conditional (mode
, cond
, mem
, x
);
26226 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
26227 emit_unlikely_jump (x
, label
);
26229 rs6000_post_atomic_barrier (model
);
26232 rs6000_finish_atomic_subword (operands
[0], retval
, shift
);
26235 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
26236 to perform. MEM is the memory on which to operate. VAL is the second
26237 operand of the binary operator. BEFORE and AFTER are optional locations to
26238 return the value of MEM either before of after the operation. MODEL_RTX
26239 is a CONST_INT containing the memory model to use. */
26242 rs6000_expand_atomic_op (enum rtx_code code
, rtx mem
, rtx val
,
26243 rtx orig_before
, rtx orig_after
, rtx model_rtx
)
26245 enum memmodel model
= memmodel_base (INTVAL (model_rtx
));
26246 machine_mode mode
= GET_MODE (mem
);
26247 machine_mode store_mode
= mode
;
26248 rtx label
, x
, cond
, mask
, shift
;
26249 rtx before
= orig_before
, after
= orig_after
;
26251 mask
= shift
= NULL_RTX
;
26252 /* On power8, we want to use SImode for the operation. On previous systems,
26253 use the operation in a subword and shift/mask to get the proper byte or
26255 if (mode
== QImode
|| mode
== HImode
)
26257 if (TARGET_SYNC_HI_QI
)
26259 val
= convert_modes (SImode
, mode
, val
, 1);
26261 /* Prepare to adjust the return value. */
26262 before
= gen_reg_rtx (SImode
);
26264 after
= gen_reg_rtx (SImode
);
26269 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
26271 /* Shift and mask VAL into position with the word. */
26272 val
= convert_modes (SImode
, mode
, val
, 1);
26273 val
= expand_simple_binop (SImode
, ASHIFT
, val
, shift
,
26274 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
26280 /* We've already zero-extended VAL. That is sufficient to
26281 make certain that it does not affect other bits. */
26286 /* If we make certain that all of the other bits in VAL are
26287 set, that will be sufficient to not affect other bits. */
26288 x
= gen_rtx_NOT (SImode
, mask
);
26289 x
= gen_rtx_IOR (SImode
, x
, val
);
26290 emit_insn (gen_rtx_SET (val
, x
));
26297 /* These will all affect bits outside the field and need
26298 adjustment via MASK within the loop. */
26302 gcc_unreachable ();
26305 /* Prepare to adjust the return value. */
26306 before
= gen_reg_rtx (SImode
);
26308 after
= gen_reg_rtx (SImode
);
26309 store_mode
= mode
= SImode
;
26313 mem
= rs6000_pre_atomic_barrier (mem
, model
);
26315 label
= gen_label_rtx ();
26316 emit_label (label
);
26317 label
= gen_rtx_LABEL_REF (VOIDmode
, label
);
26319 if (before
== NULL_RTX
)
26320 before
= gen_reg_rtx (mode
);
26322 emit_load_locked (mode
, before
, mem
);
26326 x
= expand_simple_binop (mode
, AND
, before
, val
,
26327 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
26328 after
= expand_simple_unop (mode
, NOT
, x
, after
, 1);
26332 after
= expand_simple_binop (mode
, code
, before
, val
,
26333 after
, 1, OPTAB_LIB_WIDEN
);
26339 x
= expand_simple_binop (SImode
, AND
, after
, mask
,
26340 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
26341 x
= rs6000_mask_atomic_subword (before
, x
, mask
);
26343 else if (store_mode
!= mode
)
26344 x
= convert_modes (store_mode
, mode
, x
, 1);
26346 cond
= gen_reg_rtx (CCmode
);
26347 emit_store_conditional (store_mode
, cond
, mem
, x
);
26349 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
26350 emit_unlikely_jump (x
, label
);
26352 rs6000_post_atomic_barrier (model
);
26356 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
26357 then do the calcuations in a SImode register. */
26359 rs6000_finish_atomic_subword (orig_before
, before
, shift
);
26361 rs6000_finish_atomic_subword (orig_after
, after
, shift
);
26363 else if (store_mode
!= mode
)
26365 /* QImode/HImode on machines with lbarx/lharx where we do the native
26366 operation and then do the calcuations in a SImode register. */
26368 convert_move (orig_before
, before
, 1);
26370 convert_move (orig_after
, after
, 1);
26372 else if (orig_after
&& after
!= orig_after
)
26373 emit_move_insn (orig_after
, after
);
26376 /* Emit instructions to move SRC to DST. Called by splitters for
26377 multi-register moves. It will emit at most one instruction for
26378 each register that is accessed; that is, it won't emit li/lis pairs
26379 (or equivalent for 64-bit code). One of SRC or DST must be a hard
26383 rs6000_split_multireg_move (rtx dst
, rtx src
)
26385 /* The register number of the first register being moved. */
26387 /* The mode that is to be moved. */
26389 /* The mode that the move is being done in, and its size. */
26390 machine_mode reg_mode
;
26392 /* The number of registers that will be moved. */
26395 reg
= REG_P (dst
) ? REGNO (dst
) : REGNO (src
);
26396 mode
= GET_MODE (dst
);
26397 nregs
= hard_regno_nregs
[reg
][mode
];
26398 if (FP_REGNO_P (reg
))
26399 reg_mode
= DECIMAL_FLOAT_MODE_P (mode
) ? DDmode
:
26400 ((TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
) ? DFmode
: SFmode
);
26401 else if (ALTIVEC_REGNO_P (reg
))
26402 reg_mode
= V16QImode
;
26403 else if (TARGET_E500_DOUBLE
&& FLOAT128_2REG_P (mode
))
26406 reg_mode
= word_mode
;
26407 reg_mode_size
= GET_MODE_SIZE (reg_mode
);
26409 gcc_assert (reg_mode_size
* nregs
== GET_MODE_SIZE (mode
));
26411 /* TDmode residing in FP registers is special, since the ISA requires that
26412 the lower-numbered word of a register pair is always the most significant
26413 word, even in little-endian mode. This does not match the usual subreg
26414 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
26415 the appropriate constituent registers "by hand" in little-endian mode.
26417 Note we do not need to check for destructive overlap here since TDmode
26418 can only reside in even/odd register pairs. */
26419 if (FP_REGNO_P (reg
) && DECIMAL_FLOAT_MODE_P (mode
) && !BYTES_BIG_ENDIAN
)
26424 for (i
= 0; i
< nregs
; i
++)
26426 if (REG_P (src
) && FP_REGNO_P (REGNO (src
)))
26427 p_src
= gen_rtx_REG (reg_mode
, REGNO (src
) + nregs
- 1 - i
);
26429 p_src
= simplify_gen_subreg (reg_mode
, src
, mode
,
26430 i
* reg_mode_size
);
26432 if (REG_P (dst
) && FP_REGNO_P (REGNO (dst
)))
26433 p_dst
= gen_rtx_REG (reg_mode
, REGNO (dst
) + nregs
- 1 - i
);
26435 p_dst
= simplify_gen_subreg (reg_mode
, dst
, mode
,
26436 i
* reg_mode_size
);
26438 emit_insn (gen_rtx_SET (p_dst
, p_src
));
26444 if (REG_P (src
) && REG_P (dst
) && (REGNO (src
) < REGNO (dst
)))
26446 /* Move register range backwards, if we might have destructive
26449 for (i
= nregs
- 1; i
>= 0; i
--)
26450 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode
, dst
, mode
,
26451 i
* reg_mode_size
),
26452 simplify_gen_subreg (reg_mode
, src
, mode
,
26453 i
* reg_mode_size
)));
26459 bool used_update
= false;
26460 rtx restore_basereg
= NULL_RTX
;
26462 if (MEM_P (src
) && INT_REGNO_P (reg
))
26466 if (GET_CODE (XEXP (src
, 0)) == PRE_INC
26467 || GET_CODE (XEXP (src
, 0)) == PRE_DEC
)
26470 breg
= XEXP (XEXP (src
, 0), 0);
26471 delta_rtx
= (GET_CODE (XEXP (src
, 0)) == PRE_INC
26472 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src
)))
26473 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src
))));
26474 emit_insn (gen_add3_insn (breg
, breg
, delta_rtx
));
26475 src
= replace_equiv_address (src
, breg
);
26477 else if (! rs6000_offsettable_memref_p (src
, reg_mode
))
26479 if (GET_CODE (XEXP (src
, 0)) == PRE_MODIFY
)
26481 rtx basereg
= XEXP (XEXP (src
, 0), 0);
26484 rtx ndst
= simplify_gen_subreg (reg_mode
, dst
, mode
, 0);
26485 emit_insn (gen_rtx_SET (ndst
,
26486 gen_rtx_MEM (reg_mode
,
26488 used_update
= true;
26491 emit_insn (gen_rtx_SET (basereg
,
26492 XEXP (XEXP (src
, 0), 1)));
26493 src
= replace_equiv_address (src
, basereg
);
26497 rtx basereg
= gen_rtx_REG (Pmode
, reg
);
26498 emit_insn (gen_rtx_SET (basereg
, XEXP (src
, 0)));
26499 src
= replace_equiv_address (src
, basereg
);
26503 breg
= XEXP (src
, 0);
26504 if (GET_CODE (breg
) == PLUS
|| GET_CODE (breg
) == LO_SUM
)
26505 breg
= XEXP (breg
, 0);
26507 /* If the base register we are using to address memory is
26508 also a destination reg, then change that register last. */
26510 && REGNO (breg
) >= REGNO (dst
)
26511 && REGNO (breg
) < REGNO (dst
) + nregs
)
26512 j
= REGNO (breg
) - REGNO (dst
);
26514 else if (MEM_P (dst
) && INT_REGNO_P (reg
))
26518 if (GET_CODE (XEXP (dst
, 0)) == PRE_INC
26519 || GET_CODE (XEXP (dst
, 0)) == PRE_DEC
)
26522 breg
= XEXP (XEXP (dst
, 0), 0);
26523 delta_rtx
= (GET_CODE (XEXP (dst
, 0)) == PRE_INC
26524 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst
)))
26525 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst
))));
26527 /* We have to update the breg before doing the store.
26528 Use store with update, if available. */
26532 rtx nsrc
= simplify_gen_subreg (reg_mode
, src
, mode
, 0);
26533 emit_insn (TARGET_32BIT
26534 ? (TARGET_POWERPC64
26535 ? gen_movdi_si_update (breg
, breg
, delta_rtx
, nsrc
)
26536 : gen_movsi_update (breg
, breg
, delta_rtx
, nsrc
))
26537 : gen_movdi_di_update (breg
, breg
, delta_rtx
, nsrc
));
26538 used_update
= true;
26541 emit_insn (gen_add3_insn (breg
, breg
, delta_rtx
));
26542 dst
= replace_equiv_address (dst
, breg
);
26544 else if (!rs6000_offsettable_memref_p (dst
, reg_mode
)
26545 && GET_CODE (XEXP (dst
, 0)) != LO_SUM
)
26547 if (GET_CODE (XEXP (dst
, 0)) == PRE_MODIFY
)
26549 rtx basereg
= XEXP (XEXP (dst
, 0), 0);
26552 rtx nsrc
= simplify_gen_subreg (reg_mode
, src
, mode
, 0);
26553 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode
,
26556 used_update
= true;
26559 emit_insn (gen_rtx_SET (basereg
,
26560 XEXP (XEXP (dst
, 0), 1)));
26561 dst
= replace_equiv_address (dst
, basereg
);
26565 rtx basereg
= XEXP (XEXP (dst
, 0), 0);
26566 rtx offsetreg
= XEXP (XEXP (dst
, 0), 1);
26567 gcc_assert (GET_CODE (XEXP (dst
, 0)) == PLUS
26569 && REG_P (offsetreg
)
26570 && REGNO (basereg
) != REGNO (offsetreg
));
26571 if (REGNO (basereg
) == 0)
26573 rtx tmp
= offsetreg
;
26574 offsetreg
= basereg
;
26577 emit_insn (gen_add3_insn (basereg
, basereg
, offsetreg
));
26578 restore_basereg
= gen_sub3_insn (basereg
, basereg
, offsetreg
);
26579 dst
= replace_equiv_address (dst
, basereg
);
26582 else if (GET_CODE (XEXP (dst
, 0)) != LO_SUM
)
26583 gcc_assert (rs6000_offsettable_memref_p (dst
, reg_mode
));
26586 for (i
= 0; i
< nregs
; i
++)
26588 /* Calculate index to next subword. */
26593 /* If compiler already emitted move of first word by
26594 store with update, no need to do anything. */
26595 if (j
== 0 && used_update
)
26598 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode
, dst
, mode
,
26599 j
* reg_mode_size
),
26600 simplify_gen_subreg (reg_mode
, src
, mode
,
26601 j
* reg_mode_size
)));
26603 if (restore_basereg
!= NULL_RTX
)
26604 emit_insn (restore_basereg
);
26609 /* This page contains routines that are used to determine what the
26610 function prologue and epilogue code will do and write them out. */
26615 return !call_used_regs
[r
] && df_regs_ever_live_p (r
);
26618 /* Determine whether the gp REG is really used. */
26621 rs6000_reg_live_or_pic_offset_p (int reg
)
26623 /* We need to mark the PIC offset register live for the same conditions
26624 as it is set up, or otherwise it won't be saved before we clobber it. */
26626 if (reg
== RS6000_PIC_OFFSET_TABLE_REGNUM
&& !TARGET_SINGLE_PIC_BASE
)
26628 if (TARGET_TOC
&& TARGET_MINIMAL_TOC
26629 && (crtl
->calls_eh_return
26630 || df_regs_ever_live_p (reg
)
26631 || !constant_pool_empty_p ()))
26634 if ((DEFAULT_ABI
== ABI_V4
|| DEFAULT_ABI
== ABI_DARWIN
)
26639 /* If the function calls eh_return, claim used all the registers that would
26640 be checked for liveness otherwise. */
26642 return ((crtl
->calls_eh_return
|| df_regs_ever_live_p (reg
))
26643 && !call_used_regs
[reg
]);
26646 /* Return the first fixed-point register that is required to be
26647 saved. 32 if none. */
26650 first_reg_to_save (void)
26654 /* Find lowest numbered live register. */
26655 for (first_reg
= 13; first_reg
<= 31; first_reg
++)
26656 if (save_reg_p (first_reg
))
26659 if (first_reg
> RS6000_PIC_OFFSET_TABLE_REGNUM
26660 && ((DEFAULT_ABI
== ABI_V4
&& flag_pic
!= 0)
26661 || (DEFAULT_ABI
== ABI_DARWIN
&& flag_pic
)
26662 || (TARGET_TOC
&& TARGET_MINIMAL_TOC
))
26663 && rs6000_reg_live_or_pic_offset_p (RS6000_PIC_OFFSET_TABLE_REGNUM
))
26664 first_reg
= RS6000_PIC_OFFSET_TABLE_REGNUM
;
26668 && crtl
->uses_pic_offset_table
26669 && first_reg
> RS6000_PIC_OFFSET_TABLE_REGNUM
)
26670 return RS6000_PIC_OFFSET_TABLE_REGNUM
;
26676 /* Similar, for FP regs. */
26679 first_fp_reg_to_save (void)
26683 /* Find lowest numbered live register. */
26684 for (first_reg
= 14 + 32; first_reg
<= 63; first_reg
++)
26685 if (save_reg_p (first_reg
))
26691 /* Similar, for AltiVec regs. */
26694 first_altivec_reg_to_save (void)
26698 /* Stack frame remains as is unless we are in AltiVec ABI. */
26699 if (! TARGET_ALTIVEC_ABI
)
26700 return LAST_ALTIVEC_REGNO
+ 1;
26702 /* On Darwin, the unwind routines are compiled without
26703 TARGET_ALTIVEC, and use save_world to save/restore the
26704 altivec registers when necessary. */
26705 if (DEFAULT_ABI
== ABI_DARWIN
&& crtl
->calls_eh_return
26706 && ! TARGET_ALTIVEC
)
26707 return FIRST_ALTIVEC_REGNO
+ 20;
26709 /* Find lowest numbered live register. */
26710 for (i
= FIRST_ALTIVEC_REGNO
+ 20; i
<= LAST_ALTIVEC_REGNO
; ++i
)
26711 if (save_reg_p (i
))
26717 /* Return a 32-bit mask of the AltiVec registers we need to set in
26718 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
26719 the 32-bit word is 0. */
26721 static unsigned int
26722 compute_vrsave_mask (void)
26724 unsigned int i
, mask
= 0;
26726 /* On Darwin, the unwind routines are compiled without
26727 TARGET_ALTIVEC, and use save_world to save/restore the
26728 call-saved altivec registers when necessary. */
26729 if (DEFAULT_ABI
== ABI_DARWIN
&& crtl
->calls_eh_return
26730 && ! TARGET_ALTIVEC
)
26733 /* First, find out if we use _any_ altivec registers. */
26734 for (i
= FIRST_ALTIVEC_REGNO
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
26735 if (df_regs_ever_live_p (i
))
26736 mask
|= ALTIVEC_REG_BIT (i
);
26741 /* Next, remove the argument registers from the set. These must
26742 be in the VRSAVE mask set by the caller, so we don't need to add
26743 them in again. More importantly, the mask we compute here is
26744 used to generate CLOBBERs in the set_vrsave insn, and we do not
26745 wish the argument registers to die. */
26746 for (i
= ALTIVEC_ARG_MIN_REG
; i
< (unsigned) crtl
->args
.info
.vregno
; i
++)
26747 mask
&= ~ALTIVEC_REG_BIT (i
);
26749 /* Similarly, remove the return value from the set. */
26752 diddle_return_value (is_altivec_return_reg
, &yes
);
26754 mask
&= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN
);
26760 /* For a very restricted set of circumstances, we can cut down the
26761 size of prologues/epilogues by calling our own save/restore-the-world
26765 compute_save_world_info (rs6000_stack_t
*info
)
26767 info
->world_save_p
= 1;
26769 = (WORLD_SAVE_P (info
)
26770 && DEFAULT_ABI
== ABI_DARWIN
26771 && !cfun
->has_nonlocal_label
26772 && info
->first_fp_reg_save
== FIRST_SAVED_FP_REGNO
26773 && info
->first_gp_reg_save
== FIRST_SAVED_GP_REGNO
26774 && info
->first_altivec_reg_save
== FIRST_SAVED_ALTIVEC_REGNO
26775 && info
->cr_save_p
);
26777 /* This will not work in conjunction with sibcalls. Make sure there
26778 are none. (This check is expensive, but seldom executed.) */
26779 if (WORLD_SAVE_P (info
))
26782 for (insn
= get_last_insn_anywhere (); insn
; insn
= PREV_INSN (insn
))
26783 if (CALL_P (insn
) && SIBLING_CALL_P (insn
))
26785 info
->world_save_p
= 0;
26790 if (WORLD_SAVE_P (info
))
26792 /* Even if we're not touching VRsave, make sure there's room on the
26793 stack for it, if it looks like we're calling SAVE_WORLD, which
26794 will attempt to save it. */
26795 info
->vrsave_size
= 4;
26797 /* If we are going to save the world, we need to save the link register too. */
26798 info
->lr_save_p
= 1;
26800 /* "Save" the VRsave register too if we're saving the world. */
26801 if (info
->vrsave_mask
== 0)
26802 info
->vrsave_mask
= compute_vrsave_mask ();
26804 /* Because the Darwin register save/restore routines only handle
26805 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
26807 gcc_assert (info
->first_fp_reg_save
>= FIRST_SAVED_FP_REGNO
26808 && (info
->first_altivec_reg_save
26809 >= FIRST_SAVED_ALTIVEC_REGNO
));
26817 is_altivec_return_reg (rtx reg
, void *xyes
)
26819 bool *yes
= (bool *) xyes
;
26820 if (REGNO (reg
) == ALTIVEC_ARG_RETURN
)
26825 /* Return whether REG is a global user reg or has been specifed by
26826 -ffixed-REG. We should not restore these, and so cannot use
26827 lmw or out-of-line restore functions if there are any. We also
26828 can't save them (well, emit frame notes for them), because frame
26829 unwinding during exception handling will restore saved registers. */
26832 fixed_reg_p (int reg
)
26834 /* Ignore fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] when the
26835 backend sets it, overriding anything the user might have given. */
26836 if (reg
== RS6000_PIC_OFFSET_TABLE_REGNUM
26837 && ((DEFAULT_ABI
== ABI_V4
&& flag_pic
)
26838 || (DEFAULT_ABI
== ABI_DARWIN
&& flag_pic
)
26839 || (TARGET_TOC
&& TARGET_MINIMAL_TOC
)))
26842 return fixed_regs
[reg
];
26845 /* Determine the strategy for savings/restoring registers. */
26848 SAVE_MULTIPLE
= 0x1,
26849 SAVE_INLINE_GPRS
= 0x2,
26850 SAVE_INLINE_FPRS
= 0x4,
26851 SAVE_NOINLINE_GPRS_SAVES_LR
= 0x8,
26852 SAVE_NOINLINE_FPRS_SAVES_LR
= 0x10,
26853 SAVE_INLINE_VRS
= 0x20,
26854 REST_MULTIPLE
= 0x100,
26855 REST_INLINE_GPRS
= 0x200,
26856 REST_INLINE_FPRS
= 0x400,
26857 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR
= 0x800,
26858 REST_INLINE_VRS
= 0x1000
26862 rs6000_savres_strategy (rs6000_stack_t
*info
,
26863 bool using_static_chain_p
)
26867 /* Select between in-line and out-of-line save and restore of regs.
26868 First, all the obvious cases where we don't use out-of-line. */
26869 if (crtl
->calls_eh_return
26870 || cfun
->machine
->ra_need_lr
)
26871 strategy
|= (SAVE_INLINE_FPRS
| REST_INLINE_FPRS
26872 | SAVE_INLINE_GPRS
| REST_INLINE_GPRS
26873 | SAVE_INLINE_VRS
| REST_INLINE_VRS
);
26875 if (info
->first_gp_reg_save
== 32)
26876 strategy
|= SAVE_INLINE_GPRS
| REST_INLINE_GPRS
;
26878 if (info
->first_fp_reg_save
== 64
26879 /* The out-of-line FP routines use double-precision stores;
26880 we can't use those routines if we don't have such stores. */
26881 || (TARGET_HARD_FLOAT
&& !TARGET_DOUBLE_FLOAT
))
26882 strategy
|= SAVE_INLINE_FPRS
| REST_INLINE_FPRS
;
26884 if (info
->first_altivec_reg_save
== LAST_ALTIVEC_REGNO
+ 1)
26885 strategy
|= SAVE_INLINE_VRS
| REST_INLINE_VRS
;
26887 /* Define cutoff for using out-of-line functions to save registers. */
26888 if (DEFAULT_ABI
== ABI_V4
|| TARGET_ELF
)
26890 if (!optimize_size
)
26892 strategy
|= SAVE_INLINE_FPRS
| REST_INLINE_FPRS
;
26893 strategy
|= SAVE_INLINE_GPRS
| REST_INLINE_GPRS
;
26894 strategy
|= SAVE_INLINE_VRS
| REST_INLINE_VRS
;
26898 /* Prefer out-of-line restore if it will exit. */
26899 if (info
->first_fp_reg_save
> 61)
26900 strategy
|= SAVE_INLINE_FPRS
;
26901 if (info
->first_gp_reg_save
> 29)
26903 if (info
->first_fp_reg_save
== 64)
26904 strategy
|= SAVE_INLINE_GPRS
;
26906 strategy
|= SAVE_INLINE_GPRS
| REST_INLINE_GPRS
;
26908 if (info
->first_altivec_reg_save
== LAST_ALTIVEC_REGNO
)
26909 strategy
|= SAVE_INLINE_VRS
| REST_INLINE_VRS
;
26912 else if (DEFAULT_ABI
== ABI_DARWIN
)
26914 if (info
->first_fp_reg_save
> 60)
26915 strategy
|= SAVE_INLINE_FPRS
| REST_INLINE_FPRS
;
26916 if (info
->first_gp_reg_save
> 29)
26917 strategy
|= SAVE_INLINE_GPRS
| REST_INLINE_GPRS
;
26918 strategy
|= SAVE_INLINE_VRS
| REST_INLINE_VRS
;
26922 gcc_checking_assert (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
);
26923 if ((flag_shrink_wrap_separate
&& optimize_function_for_speed_p (cfun
))
26924 || info
->first_fp_reg_save
> 61)
26925 strategy
|= SAVE_INLINE_FPRS
| REST_INLINE_FPRS
;
26926 strategy
|= SAVE_INLINE_GPRS
| REST_INLINE_GPRS
;
26927 strategy
|= SAVE_INLINE_VRS
| REST_INLINE_VRS
;
26930 /* Don't bother to try to save things out-of-line if r11 is occupied
26931 by the static chain. It would require too much fiddling and the
26932 static chain is rarely used anyway. FPRs are saved w.r.t the stack
26933 pointer on Darwin, and AIX uses r1 or r12. */
26934 if (using_static_chain_p
26935 && (DEFAULT_ABI
== ABI_V4
|| DEFAULT_ABI
== ABI_DARWIN
))
26936 strategy
|= ((DEFAULT_ABI
== ABI_DARWIN
? 0 : SAVE_INLINE_FPRS
)
26938 | SAVE_INLINE_VRS
);
26940 /* Saving CR interferes with the exit routines used on the SPE, so
26943 && info
->spe_64bit_regs_used
26944 && info
->cr_save_p
)
26945 strategy
|= REST_INLINE_GPRS
;
26947 /* We can only use the out-of-line routines to restore fprs if we've
26948 saved all the registers from first_fp_reg_save in the prologue.
26949 Otherwise, we risk loading garbage. Of course, if we have saved
26950 out-of-line then we know we haven't skipped any fprs. */
26951 if ((strategy
& SAVE_INLINE_FPRS
)
26952 && !(strategy
& REST_INLINE_FPRS
))
26956 for (i
= info
->first_fp_reg_save
; i
< 64; i
++)
26957 if (fixed_regs
[i
] || !save_reg_p (i
))
26959 strategy
|= REST_INLINE_FPRS
;
26964 /* Similarly, for altivec regs. */
26965 if ((strategy
& SAVE_INLINE_VRS
)
26966 && !(strategy
& REST_INLINE_VRS
))
26970 for (i
= info
->first_altivec_reg_save
; i
< LAST_ALTIVEC_REGNO
+ 1; i
++)
26971 if (fixed_regs
[i
] || !save_reg_p (i
))
26973 strategy
|= REST_INLINE_VRS
;
26978 /* info->lr_save_p isn't yet set if the only reason lr needs to be
26979 saved is an out-of-line save or restore. Set up the value for
26980 the next test (excluding out-of-line gprs). */
26981 bool lr_save_p
= (info
->lr_save_p
26982 || !(strategy
& SAVE_INLINE_FPRS
)
26983 || !(strategy
& SAVE_INLINE_VRS
)
26984 || !(strategy
& REST_INLINE_FPRS
)
26985 || !(strategy
& REST_INLINE_VRS
));
26987 if (TARGET_MULTIPLE
26988 && !TARGET_POWERPC64
26989 && !(TARGET_SPE_ABI
&& info
->spe_64bit_regs_used
)
26990 && info
->first_gp_reg_save
< 31
26991 && !(flag_shrink_wrap
26992 && flag_shrink_wrap_separate
26993 && optimize_function_for_speed_p (cfun
)))
26995 /* Prefer store multiple for saves over out-of-line routines,
26996 since the store-multiple instruction will always be smaller. */
26997 strategy
|= SAVE_INLINE_GPRS
| SAVE_MULTIPLE
;
26999 /* The situation is more complicated with load multiple. We'd
27000 prefer to use the out-of-line routines for restores, since the
27001 "exit" out-of-line routines can handle the restore of LR and the
27002 frame teardown. However if doesn't make sense to use the
27003 out-of-line routine if that is the only reason we'd need to save
27004 LR, and we can't use the "exit" out-of-line gpr restore if we
27005 have saved some fprs; In those cases it is advantageous to use
27006 load multiple when available. */
27007 if (info
->first_fp_reg_save
!= 64 || !lr_save_p
)
27008 strategy
|= REST_INLINE_GPRS
| REST_MULTIPLE
;
27011 /* Using the "exit" out-of-line routine does not improve code size
27012 if using it would require lr to be saved and if only saving one
27014 else if (!lr_save_p
&& info
->first_gp_reg_save
> 29)
27015 strategy
|= SAVE_INLINE_GPRS
| REST_INLINE_GPRS
;
27017 /* We can only use load multiple or the out-of-line routines to
27018 restore gprs if we've saved all the registers from
27019 first_gp_reg_save. Otherwise, we risk loading garbage.
27020 Of course, if we have saved out-of-line or used stmw then we know
27021 we haven't skipped any gprs. */
27022 if ((strategy
& (SAVE_INLINE_GPRS
| SAVE_MULTIPLE
)) == SAVE_INLINE_GPRS
27023 && (strategy
& (REST_INLINE_GPRS
| REST_MULTIPLE
)) != REST_INLINE_GPRS
)
27027 for (i
= info
->first_gp_reg_save
; i
< 32; i
++)
27028 if (fixed_reg_p (i
) || !save_reg_p (i
))
27030 strategy
|= REST_INLINE_GPRS
;
27031 strategy
&= ~REST_MULTIPLE
;
27036 if (TARGET_ELF
&& TARGET_64BIT
)
27038 if (!(strategy
& SAVE_INLINE_FPRS
))
27039 strategy
|= SAVE_NOINLINE_FPRS_SAVES_LR
;
27040 else if (!(strategy
& SAVE_INLINE_GPRS
)
27041 && info
->first_fp_reg_save
== 64)
27042 strategy
|= SAVE_NOINLINE_GPRS_SAVES_LR
;
27044 else if (TARGET_AIX
&& !(strategy
& REST_INLINE_FPRS
))
27045 strategy
|= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR
;
27047 if (TARGET_MACHO
&& !(strategy
& SAVE_INLINE_FPRS
))
27048 strategy
|= SAVE_NOINLINE_FPRS_SAVES_LR
;
27053 /* Calculate the stack information for the current function. This is
27054 complicated by having two separate calling sequences, the AIX calling
27055 sequence and the V.4 calling sequence.
27057 AIX (and Darwin/Mac OS X) stack frames look like:
27059 SP----> +---------------------------------------+
27060 | back chain to caller | 0 0
27061 +---------------------------------------+
27062 | saved CR | 4 8 (8-11)
27063 +---------------------------------------+
27065 +---------------------------------------+
27066 | reserved for compilers | 12 24
27067 +---------------------------------------+
27068 | reserved for binders | 16 32
27069 +---------------------------------------+
27070 | saved TOC pointer | 20 40
27071 +---------------------------------------+
27072 | Parameter save area (+padding*) (P) | 24 48
27073 +---------------------------------------+
27074 | Alloca space (A) | 24+P etc.
27075 +---------------------------------------+
27076 | Local variable space (L) | 24+P+A
27077 +---------------------------------------+
27078 | Float/int conversion temporary (X) | 24+P+A+L
27079 +---------------------------------------+
27080 | Save area for AltiVec registers (W) | 24+P+A+L+X
27081 +---------------------------------------+
27082 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
27083 +---------------------------------------+
27084 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
27085 +---------------------------------------+
27086 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
27087 +---------------------------------------+
27088 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
27089 +---------------------------------------+
27090 old SP->| back chain to caller's caller |
27091 +---------------------------------------+
27093 * If the alloca area is present, the parameter save area is
27094 padded so that the former starts 16-byte aligned.
27096 The required alignment for AIX configurations is two words (i.e., 8
27099 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
27101 SP----> +---------------------------------------+
27102 | Back chain to caller | 0
27103 +---------------------------------------+
27104 | Save area for CR | 8
27105 +---------------------------------------+
27107 +---------------------------------------+
27108 | Saved TOC pointer | 24
27109 +---------------------------------------+
27110 | Parameter save area (+padding*) (P) | 32
27111 +---------------------------------------+
27112 | Alloca space (A) | 32+P
27113 +---------------------------------------+
27114 | Local variable space (L) | 32+P+A
27115 +---------------------------------------+
27116 | Save area for AltiVec registers (W) | 32+P+A+L
27117 +---------------------------------------+
27118 | AltiVec alignment padding (Y) | 32+P+A+L+W
27119 +---------------------------------------+
27120 | Save area for GP registers (G) | 32+P+A+L+W+Y
27121 +---------------------------------------+
27122 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
27123 +---------------------------------------+
27124 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
27125 +---------------------------------------+
27127 * If the alloca area is present, the parameter save area is
27128 padded so that the former starts 16-byte aligned.
27130 V.4 stack frames look like:
27132 SP----> +---------------------------------------+
27133 | back chain to caller | 0
27134 +---------------------------------------+
27135 | caller's saved LR | 4
27136 +---------------------------------------+
27137 | Parameter save area (+padding*) (P) | 8
27138 +---------------------------------------+
27139 | Alloca space (A) | 8+P
27140 +---------------------------------------+
27141 | Varargs save area (V) | 8+P+A
27142 +---------------------------------------+
27143 | Local variable space (L) | 8+P+A+V
27144 +---------------------------------------+
27145 | Float/int conversion temporary (X) | 8+P+A+V+L
27146 +---------------------------------------+
27147 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
27148 +---------------------------------------+
27149 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
27150 +---------------------------------------+
27151 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
27152 +---------------------------------------+
27153 | SPE: area for 64-bit GP registers |
27154 +---------------------------------------+
27155 | SPE alignment padding |
27156 +---------------------------------------+
27157 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
27158 +---------------------------------------+
27159 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
27160 +---------------------------------------+
27161 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
27162 +---------------------------------------+
27163 old SP->| back chain to caller's caller |
27164 +---------------------------------------+
27166 * If the alloca area is present and the required alignment is
27167 16 bytes, the parameter save area is padded so that the
27168 alloca area starts 16-byte aligned.
27170 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
27171 given. (But note below and in sysv4.h that we require only 8 and
27172 may round up the size of our stack frame anyways. The historical
27173 reason is early versions of powerpc-linux which didn't properly
27174 align the stack at program startup. A happy side-effect is that
27175 -mno-eabi libraries can be used with -meabi programs.)
27177 The EABI configuration defaults to the V.4 layout. However,
27178 the stack alignment requirements may differ. If -mno-eabi is not
27179 given, the required stack alignment is 8 bytes; if -mno-eabi is
27180 given, the required alignment is 16 bytes. (But see V.4 comment
27183 #ifndef ABI_STACK_BOUNDARY
27184 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
27187 static rs6000_stack_t
*
27188 rs6000_stack_info (void)
27190 /* We should never be called for thunks, we are not set up for that. */
27191 gcc_assert (!cfun
->is_thunk
);
27193 rs6000_stack_t
*info
= &stack_info
;
27194 int reg_size
= TARGET_32BIT
? 4 : 8;
27199 HOST_WIDE_INT non_fixed_size
;
27200 bool using_static_chain_p
;
27202 if (reload_completed
&& info
->reload_completed
)
27205 memset (info
, 0, sizeof (*info
));
27206 info
->reload_completed
= reload_completed
;
27210 /* Cache value so we don't rescan instruction chain over and over. */
27211 if (cfun
->machine
->spe_insn_chain_scanned_p
== 0)
27212 cfun
->machine
->spe_insn_chain_scanned_p
27213 = spe_func_has_64bit_regs_p () + 1;
27214 info
->spe_64bit_regs_used
= cfun
->machine
->spe_insn_chain_scanned_p
- 1;
27217 /* Select which calling sequence. */
27218 info
->abi
= DEFAULT_ABI
;
27220 /* Calculate which registers need to be saved & save area size. */
27221 info
->first_gp_reg_save
= first_reg_to_save ();
27222 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
27223 even if it currently looks like we won't. Reload may need it to
27224 get at a constant; if so, it will have already created a constant
27225 pool entry for it. */
27226 if (((TARGET_TOC
&& TARGET_MINIMAL_TOC
)
27227 || (flag_pic
== 1 && DEFAULT_ABI
== ABI_V4
)
27228 || (flag_pic
&& DEFAULT_ABI
== ABI_DARWIN
))
27229 && crtl
->uses_const_pool
27230 && info
->first_gp_reg_save
> RS6000_PIC_OFFSET_TABLE_REGNUM
)
27231 first_gp
= RS6000_PIC_OFFSET_TABLE_REGNUM
;
27233 first_gp
= info
->first_gp_reg_save
;
27235 info
->gp_size
= reg_size
* (32 - first_gp
);
27237 /* For the SPE, we have an additional upper 32-bits on each GPR.
27238 Ideally we should save the entire 64-bits only when the upper
27239 half is used in SIMD instructions. Since we only record
27240 registers live (not the size they are used in), this proves
27241 difficult because we'd have to traverse the instruction chain at
27242 the right time, taking reload into account. This is a real pain,
27243 so we opt to save the GPRs in 64-bits always if but one register
27244 gets used in 64-bits. Otherwise, all the registers in the frame
27245 get saved in 32-bits.
27247 So... since when we save all GPRs (except the SP) in 64-bits, the
27248 traditional GP save area will be empty. */
27249 if (TARGET_SPE_ABI
&& info
->spe_64bit_regs_used
!= 0)
27252 info
->first_fp_reg_save
= first_fp_reg_to_save ();
27253 info
->fp_size
= 8 * (64 - info
->first_fp_reg_save
);
27255 info
->first_altivec_reg_save
= first_altivec_reg_to_save ();
27256 info
->altivec_size
= 16 * (LAST_ALTIVEC_REGNO
+ 1
27257 - info
->first_altivec_reg_save
);
27259 /* Does this function call anything? */
27260 info
->calls_p
= (!crtl
->is_leaf
|| cfun
->machine
->ra_needs_full_frame
);
27262 /* Determine if we need to save the condition code registers. */
27263 if (save_reg_p (CR2_REGNO
)
27264 || save_reg_p (CR3_REGNO
)
27265 || save_reg_p (CR4_REGNO
))
27267 info
->cr_save_p
= 1;
27268 if (DEFAULT_ABI
== ABI_V4
)
27269 info
->cr_size
= reg_size
;
27272 /* If the current function calls __builtin_eh_return, then we need
27273 to allocate stack space for registers that will hold data for
27274 the exception handler. */
27275 if (crtl
->calls_eh_return
)
27278 for (i
= 0; EH_RETURN_DATA_REGNO (i
) != INVALID_REGNUM
; ++i
)
27281 /* SPE saves EH registers in 64-bits. */
27282 ehrd_size
= i
* (TARGET_SPE_ABI
&& info
->spe_64bit_regs_used
!= 0
27283 ? UNITS_PER_SPE_WORD
: UNITS_PER_WORD
);
27288 /* In the ELFv2 ABI, we also need to allocate space for separate
27289 CR field save areas if the function calls __builtin_eh_return. */
27290 if (DEFAULT_ABI
== ABI_ELFv2
&& crtl
->calls_eh_return
)
27292 /* This hard-codes that we have three call-saved CR fields. */
27293 ehcr_size
= 3 * reg_size
;
27294 /* We do *not* use the regular CR save mechanism. */
27295 info
->cr_save_p
= 0;
27300 /* Determine various sizes. */
27301 info
->reg_size
= reg_size
;
27302 info
->fixed_size
= RS6000_SAVE_AREA
;
27303 info
->vars_size
= RS6000_ALIGN (get_frame_size (), 8);
27304 if (cfun
->calls_alloca
)
27306 RS6000_ALIGN (crtl
->outgoing_args_size
+ info
->fixed_size
,
27307 STACK_BOUNDARY
/ BITS_PER_UNIT
) - info
->fixed_size
;
27309 info
->parm_size
= RS6000_ALIGN (crtl
->outgoing_args_size
,
27310 TARGET_ALTIVEC
? 16 : 8);
27311 if (FRAME_GROWS_DOWNWARD
)
27313 += RS6000_ALIGN (info
->fixed_size
+ info
->vars_size
+ info
->parm_size
,
27314 ABI_STACK_BOUNDARY
/ BITS_PER_UNIT
)
27315 - (info
->fixed_size
+ info
->vars_size
+ info
->parm_size
);
27317 if (TARGET_SPE_ABI
&& info
->spe_64bit_regs_used
!= 0)
27318 info
->spe_gp_size
= 8 * (32 - first_gp
);
27320 if (TARGET_ALTIVEC_ABI
)
27321 info
->vrsave_mask
= compute_vrsave_mask ();
27323 if (TARGET_ALTIVEC_VRSAVE
&& info
->vrsave_mask
)
27324 info
->vrsave_size
= 4;
27326 compute_save_world_info (info
);
27328 /* Calculate the offsets. */
27329 switch (DEFAULT_ABI
)
27333 gcc_unreachable ();
27338 info
->fp_save_offset
= -info
->fp_size
;
27339 info
->gp_save_offset
= info
->fp_save_offset
- info
->gp_size
;
27341 if (TARGET_ALTIVEC_ABI
)
27343 info
->vrsave_save_offset
= info
->gp_save_offset
- info
->vrsave_size
;
27345 /* Align stack so vector save area is on a quadword boundary.
27346 The padding goes above the vectors. */
27347 if (info
->altivec_size
!= 0)
27348 info
->altivec_padding_size
= info
->vrsave_save_offset
& 0xF;
27350 info
->altivec_save_offset
= info
->vrsave_save_offset
27351 - info
->altivec_padding_size
27352 - info
->altivec_size
;
27353 gcc_assert (info
->altivec_size
== 0
27354 || info
->altivec_save_offset
% 16 == 0);
27356 /* Adjust for AltiVec case. */
27357 info
->ehrd_offset
= info
->altivec_save_offset
- ehrd_size
;
27360 info
->ehrd_offset
= info
->gp_save_offset
- ehrd_size
;
27362 info
->ehcr_offset
= info
->ehrd_offset
- ehcr_size
;
27363 info
->cr_save_offset
= reg_size
; /* first word when 64-bit. */
27364 info
->lr_save_offset
= 2*reg_size
;
27368 info
->fp_save_offset
= -info
->fp_size
;
27369 info
->gp_save_offset
= info
->fp_save_offset
- info
->gp_size
;
27370 info
->cr_save_offset
= info
->gp_save_offset
- info
->cr_size
;
27372 if (TARGET_SPE_ABI
&& info
->spe_64bit_regs_used
!= 0)
27374 /* Align stack so SPE GPR save area is aligned on a
27375 double-word boundary. */
27376 if (info
->spe_gp_size
!= 0 && info
->cr_save_offset
!= 0)
27377 info
->spe_padding_size
= 8 - (-info
->cr_save_offset
% 8);
27379 info
->spe_padding_size
= 0;
27381 info
->spe_gp_save_offset
= info
->cr_save_offset
27382 - info
->spe_padding_size
27383 - info
->spe_gp_size
;
27385 /* Adjust for SPE case. */
27386 info
->ehrd_offset
= info
->spe_gp_save_offset
;
27388 else if (TARGET_ALTIVEC_ABI
)
27390 info
->vrsave_save_offset
= info
->cr_save_offset
- info
->vrsave_size
;
27392 /* Align stack so vector save area is on a quadword boundary. */
27393 if (info
->altivec_size
!= 0)
27394 info
->altivec_padding_size
= 16 - (-info
->vrsave_save_offset
% 16);
27396 info
->altivec_save_offset
= info
->vrsave_save_offset
27397 - info
->altivec_padding_size
27398 - info
->altivec_size
;
27400 /* Adjust for AltiVec case. */
27401 info
->ehrd_offset
= info
->altivec_save_offset
;
27404 info
->ehrd_offset
= info
->cr_save_offset
;
27406 info
->ehrd_offset
-= ehrd_size
;
27407 info
->lr_save_offset
= reg_size
;
27410 save_align
= (TARGET_ALTIVEC_ABI
|| DEFAULT_ABI
== ABI_DARWIN
) ? 16 : 8;
27411 info
->save_size
= RS6000_ALIGN (info
->fp_size
27413 + info
->altivec_size
27414 + info
->altivec_padding_size
27415 + info
->spe_gp_size
27416 + info
->spe_padding_size
27420 + info
->vrsave_size
,
27423 non_fixed_size
= info
->vars_size
+ info
->parm_size
+ info
->save_size
;
27425 info
->total_size
= RS6000_ALIGN (non_fixed_size
+ info
->fixed_size
,
27426 ABI_STACK_BOUNDARY
/ BITS_PER_UNIT
);
27428 /* Determine if we need to save the link register. */
27430 || ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
27432 && !TARGET_PROFILE_KERNEL
)
27433 || (DEFAULT_ABI
== ABI_V4
&& cfun
->calls_alloca
)
27434 #ifdef TARGET_RELOCATABLE
27435 || (DEFAULT_ABI
== ABI_V4
27436 && (TARGET_RELOCATABLE
|| flag_pic
> 1)
27437 && !constant_pool_empty_p ())
27439 || rs6000_ra_ever_killed ())
27440 info
->lr_save_p
= 1;
27442 using_static_chain_p
= (cfun
->static_chain_decl
!= NULL_TREE
27443 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM
)
27444 && call_used_regs
[STATIC_CHAIN_REGNUM
]);
27445 info
->savres_strategy
= rs6000_savres_strategy (info
, using_static_chain_p
);
27447 if (!(info
->savres_strategy
& SAVE_INLINE_GPRS
)
27448 || !(info
->savres_strategy
& SAVE_INLINE_FPRS
)
27449 || !(info
->savres_strategy
& SAVE_INLINE_VRS
)
27450 || !(info
->savres_strategy
& REST_INLINE_GPRS
)
27451 || !(info
->savres_strategy
& REST_INLINE_FPRS
)
27452 || !(info
->savres_strategy
& REST_INLINE_VRS
))
27453 info
->lr_save_p
= 1;
27455 if (info
->lr_save_p
)
27456 df_set_regs_ever_live (LR_REGNO
, true);
27458 /* Determine if we need to allocate any stack frame:
27460 For AIX we need to push the stack if a frame pointer is needed
27461 (because the stack might be dynamically adjusted), if we are
27462 debugging, if we make calls, or if the sum of fp_save, gp_save,
27463 and local variables are more than the space needed to save all
27464 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
27465 + 18*8 = 288 (GPR13 reserved).
27467 For V.4 we don't have the stack cushion that AIX uses, but assume
27468 that the debugger can handle stackless frames. */
27473 else if (DEFAULT_ABI
== ABI_V4
)
27474 info
->push_p
= non_fixed_size
!= 0;
27476 else if (frame_pointer_needed
)
27479 else if (TARGET_XCOFF
&& write_symbols
!= NO_DEBUG
)
27483 info
->push_p
= non_fixed_size
> (TARGET_32BIT
? 220 : 288);
27488 /* Return true if the current function uses any GPRs in 64-bit SIMD
27492 spe_func_has_64bit_regs_p (void)
27494 rtx_insn
*insns
, *insn
;
27496 /* Functions that save and restore all the call-saved registers will
27497 need to save/restore the registers in 64-bits. */
27498 if (crtl
->calls_eh_return
27499 || cfun
->calls_setjmp
27500 || crtl
->has_nonlocal_goto
)
27503 insns
= get_insns ();
27505 for (insn
= NEXT_INSN (insns
); insn
!= NULL_RTX
; insn
= NEXT_INSN (insn
))
27511 /* FIXME: This should be implemented with attributes...
27513 (set_attr "spe64" "true")....then,
27514 if (get_spe64(insn)) return true;
27516 It's the only reliable way to do the stuff below. */
27518 i
= PATTERN (insn
);
27519 if (GET_CODE (i
) == SET
)
27521 machine_mode mode
= GET_MODE (SET_SRC (i
));
27523 if (SPE_VECTOR_MODE (mode
))
27525 if (TARGET_E500_DOUBLE
27526 && (mode
== DFmode
|| FLOAT128_2REG_P (mode
)))
27536 debug_stack_info (rs6000_stack_t
*info
)
27538 const char *abi_string
;
27541 info
= rs6000_stack_info ();
27543 fprintf (stderr
, "\nStack information for function %s:\n",
27544 ((current_function_decl
&& DECL_NAME (current_function_decl
))
27545 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl
))
27550 default: abi_string
= "Unknown"; break;
27551 case ABI_NONE
: abi_string
= "NONE"; break;
27552 case ABI_AIX
: abi_string
= "AIX"; break;
27553 case ABI_ELFv2
: abi_string
= "ELFv2"; break;
27554 case ABI_DARWIN
: abi_string
= "Darwin"; break;
27555 case ABI_V4
: abi_string
= "V.4"; break;
27558 fprintf (stderr
, "\tABI = %5s\n", abi_string
);
27560 if (TARGET_ALTIVEC_ABI
)
27561 fprintf (stderr
, "\tALTIVEC ABI extensions enabled.\n");
27563 if (TARGET_SPE_ABI
)
27564 fprintf (stderr
, "\tSPE ABI extensions enabled.\n");
27566 if (info
->first_gp_reg_save
!= 32)
27567 fprintf (stderr
, "\tfirst_gp_reg_save = %5d\n", info
->first_gp_reg_save
);
27569 if (info
->first_fp_reg_save
!= 64)
27570 fprintf (stderr
, "\tfirst_fp_reg_save = %5d\n", info
->first_fp_reg_save
);
27572 if (info
->first_altivec_reg_save
<= LAST_ALTIVEC_REGNO
)
27573 fprintf (stderr
, "\tfirst_altivec_reg_save = %5d\n",
27574 info
->first_altivec_reg_save
);
27576 if (info
->lr_save_p
)
27577 fprintf (stderr
, "\tlr_save_p = %5d\n", info
->lr_save_p
);
27579 if (info
->cr_save_p
)
27580 fprintf (stderr
, "\tcr_save_p = %5d\n", info
->cr_save_p
);
27582 if (info
->vrsave_mask
)
27583 fprintf (stderr
, "\tvrsave_mask = 0x%x\n", info
->vrsave_mask
);
27586 fprintf (stderr
, "\tpush_p = %5d\n", info
->push_p
);
27589 fprintf (stderr
, "\tcalls_p = %5d\n", info
->calls_p
);
27592 fprintf (stderr
, "\tgp_save_offset = %5d\n", info
->gp_save_offset
);
27595 fprintf (stderr
, "\tfp_save_offset = %5d\n", info
->fp_save_offset
);
27597 if (info
->altivec_size
)
27598 fprintf (stderr
, "\taltivec_save_offset = %5d\n",
27599 info
->altivec_save_offset
);
27601 if (info
->spe_gp_size
)
27602 fprintf (stderr
, "\tspe_gp_save_offset = %5d\n",
27603 info
->spe_gp_save_offset
);
27605 if (info
->vrsave_size
)
27606 fprintf (stderr
, "\tvrsave_save_offset = %5d\n",
27607 info
->vrsave_save_offset
);
27609 if (info
->lr_save_p
)
27610 fprintf (stderr
, "\tlr_save_offset = %5d\n", info
->lr_save_offset
);
27612 if (info
->cr_save_p
)
27613 fprintf (stderr
, "\tcr_save_offset = %5d\n", info
->cr_save_offset
);
27615 if (info
->varargs_save_offset
)
27616 fprintf (stderr
, "\tvarargs_save_offset = %5d\n", info
->varargs_save_offset
);
27618 if (info
->total_size
)
27619 fprintf (stderr
, "\ttotal_size = " HOST_WIDE_INT_PRINT_DEC
"\n",
27622 if (info
->vars_size
)
27623 fprintf (stderr
, "\tvars_size = " HOST_WIDE_INT_PRINT_DEC
"\n",
27626 if (info
->parm_size
)
27627 fprintf (stderr
, "\tparm_size = %5d\n", info
->parm_size
);
27629 if (info
->fixed_size
)
27630 fprintf (stderr
, "\tfixed_size = %5d\n", info
->fixed_size
);
27633 fprintf (stderr
, "\tgp_size = %5d\n", info
->gp_size
);
27635 if (info
->spe_gp_size
)
27636 fprintf (stderr
, "\tspe_gp_size = %5d\n", info
->spe_gp_size
);
27639 fprintf (stderr
, "\tfp_size = %5d\n", info
->fp_size
);
27641 if (info
->altivec_size
)
27642 fprintf (stderr
, "\taltivec_size = %5d\n", info
->altivec_size
);
27644 if (info
->vrsave_size
)
27645 fprintf (stderr
, "\tvrsave_size = %5d\n", info
->vrsave_size
);
27647 if (info
->altivec_padding_size
)
27648 fprintf (stderr
, "\taltivec_padding_size= %5d\n",
27649 info
->altivec_padding_size
);
27651 if (info
->spe_padding_size
)
27652 fprintf (stderr
, "\tspe_padding_size = %5d\n",
27653 info
->spe_padding_size
);
27656 fprintf (stderr
, "\tcr_size = %5d\n", info
->cr_size
);
27658 if (info
->save_size
)
27659 fprintf (stderr
, "\tsave_size = %5d\n", info
->save_size
);
27661 if (info
->reg_size
!= 4)
27662 fprintf (stderr
, "\treg_size = %5d\n", info
->reg_size
);
27664 fprintf (stderr
, "\tsave-strategy = %04x\n", info
->savres_strategy
);
27666 fprintf (stderr
, "\n");
27670 rs6000_return_addr (int count
, rtx frame
)
27672 /* Currently we don't optimize very well between prolog and body
27673 code and for PIC code the code can be actually quite bad, so
27674 don't try to be too clever here. */
27676 || ((DEFAULT_ABI
== ABI_V4
|| DEFAULT_ABI
== ABI_DARWIN
) && flag_pic
))
27678 cfun
->machine
->ra_needs_full_frame
= 1;
27685 plus_constant (Pmode
,
27687 (gen_rtx_MEM (Pmode
,
27688 memory_address (Pmode
, frame
))),
27689 RETURN_ADDRESS_OFFSET
)));
27692 cfun
->machine
->ra_need_lr
= 1;
27693 return get_hard_reg_initial_val (Pmode
, LR_REGNO
);
27696 /* Say whether a function is a candidate for sibcall handling or not. */
27699 rs6000_function_ok_for_sibcall (tree decl
, tree exp
)
27704 fntype
= TREE_TYPE (decl
);
27706 fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
27708 /* We can't do it if the called function has more vector parameters
27709 than the current function; there's nowhere to put the VRsave code. */
27710 if (TARGET_ALTIVEC_ABI
27711 && TARGET_ALTIVEC_VRSAVE
27712 && !(decl
&& decl
== current_function_decl
))
27714 function_args_iterator args_iter
;
27718 /* Functions with vector parameters are required to have a
27719 prototype, so the argument type info must be available
27721 FOREACH_FUNCTION_ARGS(fntype
, type
, args_iter
)
27722 if (TREE_CODE (type
) == VECTOR_TYPE
27723 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type
)))
27726 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl
), type
, args_iter
)
27727 if (TREE_CODE (type
) == VECTOR_TYPE
27728 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type
)))
27735 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
27736 functions, because the callee may have a different TOC pointer to
27737 the caller and there's no way to ensure we restore the TOC when
27738 we return. With the secure-plt SYSV ABI we can't make non-local
27739 calls when -fpic/PIC because the plt call stubs use r30. */
27740 if (DEFAULT_ABI
== ABI_DARWIN
27741 || ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
27743 && !DECL_EXTERNAL (decl
)
27744 && !DECL_WEAK (decl
)
27745 && (*targetm
.binds_local_p
) (decl
))
27746 || (DEFAULT_ABI
== ABI_V4
27747 && (!TARGET_SECURE_PLT
27750 && (*targetm
.binds_local_p
) (decl
)))))
27752 tree attr_list
= TYPE_ATTRIBUTES (fntype
);
27754 if (!lookup_attribute ("longcall", attr_list
)
27755 || lookup_attribute ("shortcall", attr_list
))
27763 rs6000_ra_ever_killed (void)
27769 if (cfun
->is_thunk
)
27772 if (cfun
->machine
->lr_save_state
)
27773 return cfun
->machine
->lr_save_state
- 1;
27775 /* regs_ever_live has LR marked as used if any sibcalls are present,
27776 but this should not force saving and restoring in the
27777 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
27778 clobbers LR, so that is inappropriate. */
27780 /* Also, the prologue can generate a store into LR that
27781 doesn't really count, like this:
27784 bcl to set PIC register
27788 When we're called from the epilogue, we need to avoid counting
27789 this as a store. */
27791 push_topmost_sequence ();
27792 top
= get_insns ();
27793 pop_topmost_sequence ();
27794 reg
= gen_rtx_REG (Pmode
, LR_REGNO
);
27796 for (insn
= NEXT_INSN (top
); insn
!= NULL_RTX
; insn
= NEXT_INSN (insn
))
27802 if (!SIBLING_CALL_P (insn
))
27805 else if (find_regno_note (insn
, REG_INC
, LR_REGNO
))
27807 else if (set_of (reg
, insn
) != NULL_RTX
27808 && !prologue_epilogue_contains (insn
))
27815 /* Emit instructions needed to load the TOC register.
27816 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
27817 a constant pool; or for SVR4 -fpic. */
27820 rs6000_emit_load_toc_table (int fromprolog
)
27823 dest
= gen_rtx_REG (Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
27825 if (TARGET_ELF
&& TARGET_SECURE_PLT
&& DEFAULT_ABI
== ABI_V4
&& flag_pic
)
27828 rtx lab
, tmp1
, tmp2
, got
;
27830 lab
= gen_label_rtx ();
27831 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (lab
));
27832 lab
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (buf
));
27835 got
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (toc_label_name
));
27839 got
= rs6000_got_sym ();
27840 tmp1
= tmp2
= dest
;
27843 tmp1
= gen_reg_rtx (Pmode
);
27844 tmp2
= gen_reg_rtx (Pmode
);
27846 emit_insn (gen_load_toc_v4_PIC_1 (lab
));
27847 emit_move_insn (tmp1
, gen_rtx_REG (Pmode
, LR_REGNO
));
27848 emit_insn (gen_load_toc_v4_PIC_3b (tmp2
, tmp1
, got
, lab
));
27849 emit_insn (gen_load_toc_v4_PIC_3c (dest
, tmp2
, got
, lab
));
27851 else if (TARGET_ELF
&& DEFAULT_ABI
== ABI_V4
&& flag_pic
== 1)
27853 emit_insn (gen_load_toc_v4_pic_si ());
27854 emit_move_insn (dest
, gen_rtx_REG (Pmode
, LR_REGNO
));
27856 else if (TARGET_ELF
&& DEFAULT_ABI
== ABI_V4
&& flag_pic
== 2)
27859 rtx temp0
= (fromprolog
27860 ? gen_rtx_REG (Pmode
, 0)
27861 : gen_reg_rtx (Pmode
));
27867 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
27868 symF
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (buf
));
27870 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCL", rs6000_pic_labelno
);
27871 symL
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (buf
));
27873 emit_insn (gen_load_toc_v4_PIC_1 (symF
));
27874 emit_move_insn (dest
, gen_rtx_REG (Pmode
, LR_REGNO
));
27875 emit_insn (gen_load_toc_v4_PIC_2 (temp0
, dest
, symL
, symF
));
27881 tocsym
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (toc_label_name
));
27883 lab
= gen_label_rtx ();
27884 emit_insn (gen_load_toc_v4_PIC_1b (tocsym
, lab
));
27885 emit_move_insn (dest
, gen_rtx_REG (Pmode
, LR_REGNO
));
27886 if (TARGET_LINK_STACK
)
27887 emit_insn (gen_addsi3 (dest
, dest
, GEN_INT (4)));
27888 emit_move_insn (temp0
, gen_rtx_MEM (Pmode
, dest
));
27890 emit_insn (gen_addsi3 (dest
, temp0
, dest
));
27892 else if (TARGET_ELF
&& !TARGET_AIX
&& flag_pic
== 0 && TARGET_MINIMAL_TOC
)
27894 /* This is for AIX code running in non-PIC ELF32. */
27895 rtx realsym
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (toc_label_name
));
27898 emit_insn (gen_elf_high (dest
, realsym
));
27899 emit_insn (gen_elf_low (dest
, dest
, realsym
));
27903 gcc_assert (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
);
27906 emit_insn (gen_load_toc_aix_si (dest
));
27908 emit_insn (gen_load_toc_aix_di (dest
));
27912 /* Emit instructions to restore the link register after determining where
27913 its value has been stored. */
27916 rs6000_emit_eh_reg_restore (rtx source
, rtx scratch
)
27918 rs6000_stack_t
*info
= rs6000_stack_info ();
27921 operands
[0] = source
;
27922 operands
[1] = scratch
;
27924 if (info
->lr_save_p
)
27926 rtx frame_rtx
= stack_pointer_rtx
;
27927 HOST_WIDE_INT sp_offset
= 0;
27930 if (frame_pointer_needed
27931 || cfun
->calls_alloca
27932 || info
->total_size
> 32767)
27934 tmp
= gen_frame_mem (Pmode
, frame_rtx
);
27935 emit_move_insn (operands
[1], tmp
);
27936 frame_rtx
= operands
[1];
27938 else if (info
->push_p
)
27939 sp_offset
= info
->total_size
;
27941 tmp
= plus_constant (Pmode
, frame_rtx
,
27942 info
->lr_save_offset
+ sp_offset
);
27943 tmp
= gen_frame_mem (Pmode
, tmp
);
27944 emit_move_insn (tmp
, operands
[0]);
27947 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNO
), operands
[0]);
27949 /* Freeze lr_save_p. We've just emitted rtl that depends on the
27950 state of lr_save_p so any change from here on would be a bug. In
27951 particular, stop rs6000_ra_ever_killed from considering the SET
27952 of lr we may have added just above. */
27953 cfun
->machine
->lr_save_state
= info
->lr_save_p
+ 1;
27956 static GTY(()) alias_set_type set
= -1;
27959 get_TOC_alias_set (void)
27962 set
= new_alias_set ();
27966 /* This returns nonzero if the current function uses the TOC. This is
27967 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
27968 is generated by the ABI_V4 load_toc_* patterns. */
27975 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
27978 rtx pat
= PATTERN (insn
);
27981 if (GET_CODE (pat
) == PARALLEL
)
27982 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
27984 rtx sub
= XVECEXP (pat
, 0, i
);
27985 if (GET_CODE (sub
) == USE
)
27987 sub
= XEXP (sub
, 0);
27988 if (GET_CODE (sub
) == UNSPEC
27989 && XINT (sub
, 1) == UNSPEC_TOC
)
27999 create_TOC_reference (rtx symbol
, rtx largetoc_reg
)
28001 rtx tocrel
, tocreg
, hi
;
28003 if (TARGET_DEBUG_ADDR
)
28005 if (GET_CODE (symbol
) == SYMBOL_REF
)
28006 fprintf (stderr
, "\ncreate_TOC_reference, (symbol_ref %s)\n",
28010 fprintf (stderr
, "\ncreate_TOC_reference, code %s:\n",
28011 GET_RTX_NAME (GET_CODE (symbol
)));
28012 debug_rtx (symbol
);
28016 if (!can_create_pseudo_p ())
28017 df_set_regs_ever_live (TOC_REGISTER
, true);
28019 tocreg
= gen_rtx_REG (Pmode
, TOC_REGISTER
);
28020 tocrel
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, symbol
, tocreg
), UNSPEC_TOCREL
);
28021 if (TARGET_CMODEL
== CMODEL_SMALL
|| can_create_pseudo_p ())
28024 hi
= gen_rtx_HIGH (Pmode
, copy_rtx (tocrel
));
28025 if (largetoc_reg
!= NULL
)
28027 emit_move_insn (largetoc_reg
, hi
);
28030 return gen_rtx_LO_SUM (Pmode
, hi
, tocrel
);
28033 /* Issue assembly directives that create a reference to the given DWARF
28034 FRAME_TABLE_LABEL from the current function section. */
28036 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label
)
28038 fprintf (asm_out_file
, "\t.ref %s\n",
28039 (* targetm
.strip_name_encoding
) (frame_table_label
));
28042 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
28043 and the change to the stack pointer. */
28046 rs6000_emit_stack_tie (rtx fp
, bool hard_frame_needed
)
28053 regs
[i
++] = gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
28054 if (hard_frame_needed
)
28055 regs
[i
++] = gen_rtx_REG (Pmode
, HARD_FRAME_POINTER_REGNUM
);
28056 if (!(REGNO (fp
) == STACK_POINTER_REGNUM
28057 || (hard_frame_needed
28058 && REGNO (fp
) == HARD_FRAME_POINTER_REGNUM
)))
28061 p
= rtvec_alloc (i
);
28064 rtx mem
= gen_frame_mem (BLKmode
, regs
[i
]);
28065 RTVEC_ELT (p
, i
) = gen_rtx_SET (mem
, const0_rtx
);
28068 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode
, p
)));
28071 /* Emit the correct code for allocating stack space, as insns.
28072 If COPY_REG, make sure a copy of the old frame is left there.
28073 The generated code may use hard register 0 as a temporary. */
28076 rs6000_emit_allocate_stack (HOST_WIDE_INT size
, rtx copy_reg
, int copy_off
)
28079 rtx stack_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
28080 rtx tmp_reg
= gen_rtx_REG (Pmode
, 0);
28081 rtx todec
= gen_int_mode (-size
, Pmode
);
28084 if (INTVAL (todec
) != -size
)
28086 warning (0, "stack frame too large");
28087 emit_insn (gen_trap ());
28091 if (crtl
->limit_stack
)
28093 if (REG_P (stack_limit_rtx
)
28094 && REGNO (stack_limit_rtx
) > 1
28095 && REGNO (stack_limit_rtx
) <= 31)
28097 emit_insn (gen_add3_insn (tmp_reg
, stack_limit_rtx
, GEN_INT (size
)));
28098 emit_insn (gen_cond_trap (LTU
, stack_reg
, tmp_reg
,
28101 else if (GET_CODE (stack_limit_rtx
) == SYMBOL_REF
28103 && DEFAULT_ABI
== ABI_V4
28106 rtx toload
= gen_rtx_CONST (VOIDmode
,
28107 gen_rtx_PLUS (Pmode
,
28111 emit_insn (gen_elf_high (tmp_reg
, toload
));
28112 emit_insn (gen_elf_low (tmp_reg
, tmp_reg
, toload
));
28113 emit_insn (gen_cond_trap (LTU
, stack_reg
, tmp_reg
,
28117 warning (0, "stack limit expression is not supported");
28123 emit_insn (gen_add3_insn (copy_reg
, stack_reg
, GEN_INT (copy_off
)));
28125 emit_move_insn (copy_reg
, stack_reg
);
28130 /* Need a note here so that try_split doesn't get confused. */
28131 if (get_last_insn () == NULL_RTX
)
28132 emit_note (NOTE_INSN_DELETED
);
28133 insn
= emit_move_insn (tmp_reg
, todec
);
28134 try_split (PATTERN (insn
), insn
, 0);
28138 insn
= emit_insn (TARGET_32BIT
28139 ? gen_movsi_update_stack (stack_reg
, stack_reg
,
28141 : gen_movdi_di_update_stack (stack_reg
, stack_reg
,
28142 todec
, stack_reg
));
28143 /* Since we didn't use gen_frame_mem to generate the MEM, grab
28144 it now and set the alias set/attributes. The above gen_*_update
28145 calls will generate a PARALLEL with the MEM set being the first
28147 par
= PATTERN (insn
);
28148 gcc_assert (GET_CODE (par
) == PARALLEL
);
28149 set
= XVECEXP (par
, 0, 0);
28150 gcc_assert (GET_CODE (set
) == SET
);
28151 mem
= SET_DEST (set
);
28152 gcc_assert (MEM_P (mem
));
28153 MEM_NOTRAP_P (mem
) = 1;
28154 set_mem_alias_set (mem
, get_frame_alias_set ());
28156 RTX_FRAME_RELATED_P (insn
) = 1;
28157 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
28158 gen_rtx_SET (stack_reg
, gen_rtx_PLUS (Pmode
, stack_reg
,
28159 GEN_INT (-size
))));
28163 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
28165 #if PROBE_INTERVAL > 32768
28166 #error Cannot use indexed addressing mode for stack probing
28169 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
28170 inclusive. These are offsets from the current stack pointer. */
28173 rs6000_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
)
28175 /* See if we have a constant small number of probes to generate. If so,
28176 that's the easy case. */
28177 if (first
+ size
<= 32768)
28181 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
28182 it exceeds SIZE. If only one probe is needed, this will not
28183 generate any code. Then probe at FIRST + SIZE. */
28184 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
28185 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
28188 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
28192 /* Otherwise, do the same as above, but in a loop. Note that we must be
28193 extra careful with variables wrapping around because we might be at
28194 the very top (or the very bottom) of the address space and we have
28195 to be able to handle this case properly; in particular, we use an
28196 equality test for the loop condition. */
28199 HOST_WIDE_INT rounded_size
;
28200 rtx r12
= gen_rtx_REG (Pmode
, 12);
28201 rtx r0
= gen_rtx_REG (Pmode
, 0);
28203 /* Sanity check for the addressing mode we're going to use. */
28204 gcc_assert (first
<= 32768);
28206 /* Step 1: round SIZE to the previous multiple of the interval. */
28208 rounded_size
= ROUND_DOWN (size
, PROBE_INTERVAL
);
28211 /* Step 2: compute initial and final value of the loop counter. */
28213 /* TEST_ADDR = SP + FIRST. */
28214 emit_insn (gen_rtx_SET (r12
, plus_constant (Pmode
, stack_pointer_rtx
,
28217 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
28218 if (rounded_size
> 32768)
28220 emit_move_insn (r0
, GEN_INT (-rounded_size
));
28221 emit_insn (gen_rtx_SET (r0
, gen_rtx_PLUS (Pmode
, r12
, r0
)));
28224 emit_insn (gen_rtx_SET (r0
, plus_constant (Pmode
, r12
,
28228 /* Step 3: the loop
28232 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
28235 while (TEST_ADDR != LAST_ADDR)
28237 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
28238 until it is equal to ROUNDED_SIZE. */
28241 emit_insn (gen_probe_stack_rangedi (r12
, r12
, r0
));
28243 emit_insn (gen_probe_stack_rangesi (r12
, r12
, r0
));
28246 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
28247 that SIZE is equal to ROUNDED_SIZE. */
28249 if (size
!= rounded_size
)
28250 emit_stack_probe (plus_constant (Pmode
, r12
, rounded_size
- size
));
28254 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
28255 absolute addresses. */
28258 output_probe_stack_range (rtx reg1
, rtx reg2
)
28260 static int labelno
= 0;
28264 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
28267 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
28269 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
28271 xops
[1] = GEN_INT (-PROBE_INTERVAL
);
28272 output_asm_insn ("addi %0,%0,%1", xops
);
28274 /* Probe at TEST_ADDR. */
28275 xops
[1] = gen_rtx_REG (Pmode
, 0);
28276 output_asm_insn ("stw %1,0(%0)", xops
);
28278 /* Test if TEST_ADDR == LAST_ADDR. */
28281 output_asm_insn ("cmpd 0,%0,%1", xops
);
28283 output_asm_insn ("cmpw 0,%0,%1", xops
);
28286 fputs ("\tbne 0,", asm_out_file
);
28287 assemble_name_raw (asm_out_file
, loop_lab
);
28288 fputc ('\n', asm_out_file
);
28293 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
28294 with (plus:P (reg 1) VAL), and with REG2 replaced with REPL2 if REG2
28295 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
28296 deduce these equivalences by itself so it wasn't necessary to hold
28297 its hand so much. Don't be tempted to always supply d2_f_d_e with
28298 the actual cfa register, ie. r31 when we are using a hard frame
28299 pointer. That fails when saving regs off r1, and sched moves the
28300 r31 setup past the reg saves. */
28303 rs6000_frame_related (rtx_insn
*insn
, rtx reg
, HOST_WIDE_INT val
,
28304 rtx reg2
, rtx repl2
)
28308 if (REGNO (reg
) == STACK_POINTER_REGNUM
)
28310 gcc_checking_assert (val
== 0);
28314 repl
= gen_rtx_PLUS (Pmode
, gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
),
28317 rtx pat
= PATTERN (insn
);
28318 if (!repl
&& !reg2
)
28320 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
28321 if (GET_CODE (pat
) == PARALLEL
)
28322 for (int i
= 0; i
< XVECLEN (pat
, 0); i
++)
28323 if (GET_CODE (XVECEXP (pat
, 0, i
)) == SET
)
28325 rtx set
= XVECEXP (pat
, 0, i
);
28327 /* If this PARALLEL has been emitted for out-of-line
28328 register save functions, or store multiple, then omit
28329 eh_frame info for any user-defined global regs. If
28330 eh_frame info is supplied, frame unwinding will
28331 restore a user reg. */
28332 if (!REG_P (SET_SRC (set
))
28333 || !fixed_reg_p (REGNO (SET_SRC (set
))))
28334 RTX_FRAME_RELATED_P (set
) = 1;
28336 RTX_FRAME_RELATED_P (insn
) = 1;
28340 /* We expect that 'pat' is either a SET or a PARALLEL containing
28341 SETs (and possibly other stuff). In a PARALLEL, all the SETs
28342 are important so they all have to be marked RTX_FRAME_RELATED_P.
28343 Call simplify_replace_rtx on the SETs rather than the whole insn
28344 so as to leave the other stuff alone (for example USE of r12). */
28346 set_used_flags (pat
);
28347 if (GET_CODE (pat
) == SET
)
28350 pat
= simplify_replace_rtx (pat
, reg
, repl
);
28352 pat
= simplify_replace_rtx (pat
, reg2
, repl2
);
28354 else if (GET_CODE (pat
) == PARALLEL
)
28356 pat
= shallow_copy_rtx (pat
);
28357 XVEC (pat
, 0) = shallow_copy_rtvec (XVEC (pat
, 0));
28359 for (int i
= 0; i
< XVECLEN (pat
, 0); i
++)
28360 if (GET_CODE (XVECEXP (pat
, 0, i
)) == SET
)
28362 rtx set
= XVECEXP (pat
, 0, i
);
28365 set
= simplify_replace_rtx (set
, reg
, repl
);
28367 set
= simplify_replace_rtx (set
, reg2
, repl2
);
28368 XVECEXP (pat
, 0, i
) = set
;
28370 /* Omit eh_frame info for any user-defined global regs. */
28371 if (!REG_P (SET_SRC (set
))
28372 || !fixed_reg_p (REGNO (SET_SRC (set
))))
28373 RTX_FRAME_RELATED_P (set
) = 1;
28377 gcc_unreachable ();
28379 RTX_FRAME_RELATED_P (insn
) = 1;
28380 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, copy_rtx_if_shared (pat
));
28385 /* Returns an insn that has a vrsave set operation with the
28386 appropriate CLOBBERs. */
28389 generate_set_vrsave (rtx reg
, rs6000_stack_t
*info
, int epiloguep
)
28392 rtx insn
, clobs
[TOTAL_ALTIVEC_REGS
+ 1];
28393 rtx vrsave
= gen_rtx_REG (SImode
, VRSAVE_REGNO
);
28396 = gen_rtx_SET (vrsave
,
28397 gen_rtx_UNSPEC_VOLATILE (SImode
,
28398 gen_rtvec (2, reg
, vrsave
),
28399 UNSPECV_SET_VRSAVE
));
28403 /* We need to clobber the registers in the mask so the scheduler
28404 does not move sets to VRSAVE before sets of AltiVec registers.
28406 However, if the function receives nonlocal gotos, reload will set
28407 all call saved registers live. We will end up with:
28409 (set (reg 999) (mem))
28410 (parallel [ (set (reg vrsave) (unspec blah))
28411 (clobber (reg 999))])
28413 The clobber will cause the store into reg 999 to be dead, and
28414 flow will attempt to delete an epilogue insn. In this case, we
28415 need an unspec use/set of the register. */
28417 for (i
= FIRST_ALTIVEC_REGNO
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
28418 if (info
->vrsave_mask
& ALTIVEC_REG_BIT (i
))
28420 if (!epiloguep
|| call_used_regs
[i
])
28421 clobs
[nclobs
++] = gen_rtx_CLOBBER (VOIDmode
,
28422 gen_rtx_REG (V4SImode
, i
));
28425 rtx reg
= gen_rtx_REG (V4SImode
, i
);
28428 = gen_rtx_SET (reg
,
28429 gen_rtx_UNSPEC (V4SImode
,
28430 gen_rtvec (1, reg
), 27));
28434 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (nclobs
));
28436 for (i
= 0; i
< nclobs
; ++i
)
28437 XVECEXP (insn
, 0, i
) = clobs
[i
];
28443 gen_frame_set (rtx reg
, rtx frame_reg
, int offset
, bool store
)
28447 addr
= gen_rtx_PLUS (Pmode
, frame_reg
, GEN_INT (offset
));
28448 mem
= gen_frame_mem (GET_MODE (reg
), addr
);
28449 return gen_rtx_SET (store
? mem
: reg
, store
? reg
: mem
);
28453 gen_frame_load (rtx reg
, rtx frame_reg
, int offset
)
28455 return gen_frame_set (reg
, frame_reg
, offset
, false);
28459 gen_frame_store (rtx reg
, rtx frame_reg
, int offset
)
28461 return gen_frame_set (reg
, frame_reg
, offset
, true);
28464 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
28465 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
28468 emit_frame_save (rtx frame_reg
, machine_mode mode
,
28469 unsigned int regno
, int offset
, HOST_WIDE_INT frame_reg_to_sp
)
28473 /* Some cases that need register indexed addressing. */
28474 gcc_checking_assert (!((TARGET_ALTIVEC_ABI
&& ALTIVEC_VECTOR_MODE (mode
))
28475 || (TARGET_VSX
&& ALTIVEC_OR_VSX_VECTOR_MODE (mode
))
28476 || (TARGET_E500_DOUBLE
&& mode
== DFmode
)
28478 && SPE_VECTOR_MODE (mode
)
28479 && !SPE_CONST_OFFSET_OK (offset
))));
28481 reg
= gen_rtx_REG (mode
, regno
);
28482 rtx_insn
*insn
= emit_insn (gen_frame_store (reg
, frame_reg
, offset
));
28483 return rs6000_frame_related (insn
, frame_reg
, frame_reg_to_sp
,
28484 NULL_RTX
, NULL_RTX
);
28487 /* Emit an offset memory reference suitable for a frame store, while
28488 converting to a valid addressing mode. */
28491 gen_frame_mem_offset (machine_mode mode
, rtx reg
, int offset
)
28493 rtx int_rtx
, offset_rtx
;
28495 int_rtx
= GEN_INT (offset
);
28497 if ((TARGET_SPE_ABI
&& SPE_VECTOR_MODE (mode
) && !SPE_CONST_OFFSET_OK (offset
))
28498 || (TARGET_E500_DOUBLE
&& mode
== DFmode
))
28500 offset_rtx
= gen_rtx_REG (Pmode
, FIXED_SCRATCH
);
28501 emit_move_insn (offset_rtx
, int_rtx
);
28504 offset_rtx
= int_rtx
;
28506 return gen_frame_mem (mode
, gen_rtx_PLUS (Pmode
, reg
, offset_rtx
));
28509 #ifndef TARGET_FIX_AND_CONTINUE
28510 #define TARGET_FIX_AND_CONTINUE 0
28513 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
28514 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
28515 #define LAST_SAVRES_REGISTER 31
28516 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
28527 static GTY(()) rtx savres_routine_syms
[N_SAVRES_REGISTERS
][12];
28529 /* Temporary holding space for an out-of-line register save/restore
28531 static char savres_routine_name
[30];
28533 /* Return the name for an out-of-line register save/restore routine.
28534 We are saving/restoring GPRs if GPR is true. */
28537 rs6000_savres_routine_name (rs6000_stack_t
*info
, int regno
, int sel
)
28539 const char *prefix
= "";
28540 const char *suffix
= "";
28542 /* Different targets are supposed to define
28543 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
28544 routine name could be defined with:
28546 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
28548 This is a nice idea in practice, but in reality, things are
28549 complicated in several ways:
28551 - ELF targets have save/restore routines for GPRs.
28553 - SPE targets use different prefixes for 32/64-bit registers, and
28554 neither of them fit neatly in the FOO_{PREFIX,SUFFIX} regimen.
28556 - PPC64 ELF targets have routines for save/restore of GPRs that
28557 differ in what they do with the link register, so having a set
28558 prefix doesn't work. (We only use one of the save routines at
28559 the moment, though.)
28561 - PPC32 elf targets have "exit" versions of the restore routines
28562 that restore the link register and can save some extra space.
28563 These require an extra suffix. (There are also "tail" versions
28564 of the restore routines and "GOT" versions of the save routines,
28565 but we don't generate those at present. Same problems apply,
28568 We deal with all this by synthesizing our own prefix/suffix and
28569 using that for the simple sprintf call shown above. */
28572 /* No floating point saves on the SPE. */
28573 gcc_assert ((sel
& SAVRES_REG
) == SAVRES_GPR
);
28575 if ((sel
& SAVRES_SAVE
))
28576 prefix
= info
->spe_64bit_regs_used
? "_save64gpr_" : "_save32gpr_";
28578 prefix
= info
->spe_64bit_regs_used
? "_rest64gpr_" : "_rest32gpr_";
28580 if ((sel
& SAVRES_LR
))
28583 else if (DEFAULT_ABI
== ABI_V4
)
28588 if ((sel
& SAVRES_REG
) == SAVRES_GPR
)
28589 prefix
= (sel
& SAVRES_SAVE
) ? "_savegpr_" : "_restgpr_";
28590 else if ((sel
& SAVRES_REG
) == SAVRES_FPR
)
28591 prefix
= (sel
& SAVRES_SAVE
) ? "_savefpr_" : "_restfpr_";
28592 else if ((sel
& SAVRES_REG
) == SAVRES_VR
)
28593 prefix
= (sel
& SAVRES_SAVE
) ? "_savevr_" : "_restvr_";
28597 if ((sel
& SAVRES_LR
))
28600 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
28602 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
28603 /* No out-of-line save/restore routines for GPRs on AIX. */
28604 gcc_assert (!TARGET_AIX
|| (sel
& SAVRES_REG
) != SAVRES_GPR
);
28608 if ((sel
& SAVRES_REG
) == SAVRES_GPR
)
28609 prefix
= ((sel
& SAVRES_SAVE
)
28610 ? ((sel
& SAVRES_LR
) ? "_savegpr0_" : "_savegpr1_")
28611 : ((sel
& SAVRES_LR
) ? "_restgpr0_" : "_restgpr1_"));
28612 else if ((sel
& SAVRES_REG
) == SAVRES_FPR
)
28614 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
28615 if ((sel
& SAVRES_LR
))
28616 prefix
= ((sel
& SAVRES_SAVE
) ? "_savefpr_" : "_restfpr_");
28620 prefix
= (sel
& SAVRES_SAVE
) ? SAVE_FP_PREFIX
: RESTORE_FP_PREFIX
;
28621 suffix
= (sel
& SAVRES_SAVE
) ? SAVE_FP_SUFFIX
: RESTORE_FP_SUFFIX
;
28624 else if ((sel
& SAVRES_REG
) == SAVRES_VR
)
28625 prefix
= (sel
& SAVRES_SAVE
) ? "_savevr_" : "_restvr_";
28630 if (DEFAULT_ABI
== ABI_DARWIN
)
28632 /* The Darwin approach is (slightly) different, in order to be
28633 compatible with code generated by the system toolchain. There is a
28634 single symbol for the start of save sequence, and the code here
28635 embeds an offset into that code on the basis of the first register
28637 prefix
= (sel
& SAVRES_SAVE
) ? "save" : "rest" ;
28638 if ((sel
& SAVRES_REG
) == SAVRES_GPR
)
28639 sprintf (savres_routine_name
, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix
,
28640 ((sel
& SAVRES_LR
) ? "x" : ""), (regno
== 13 ? "" : "+"),
28641 (regno
- 13) * 4, prefix
, regno
);
28642 else if ((sel
& SAVRES_REG
) == SAVRES_FPR
)
28643 sprintf (savres_routine_name
, "*%sFP%s%.0d ; %s f%d-f31", prefix
,
28644 (regno
== 14 ? "" : "+"), (regno
- 14) * 4, prefix
, regno
);
28645 else if ((sel
& SAVRES_REG
) == SAVRES_VR
)
28646 sprintf (savres_routine_name
, "*%sVEC%s%.0d ; %s v%d-v31", prefix
,
28647 (regno
== 20 ? "" : "+"), (regno
- 20) * 8, prefix
, regno
);
28652 sprintf (savres_routine_name
, "%s%d%s", prefix
, regno
, suffix
);
28654 return savres_routine_name
;
28657 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
28658 We are saving/restoring GPRs if GPR is true. */
28661 rs6000_savres_routine_sym (rs6000_stack_t
*info
, int sel
)
28663 int regno
= ((sel
& SAVRES_REG
) == SAVRES_GPR
28664 ? info
->first_gp_reg_save
28665 : (sel
& SAVRES_REG
) == SAVRES_FPR
28666 ? info
->first_fp_reg_save
- 32
28667 : (sel
& SAVRES_REG
) == SAVRES_VR
28668 ? info
->first_altivec_reg_save
- FIRST_ALTIVEC_REGNO
28673 /* On the SPE, we never have any FPRs, but we do have 32/64-bit
28674 versions of the gpr routines. */
28675 if (TARGET_SPE_ABI
&& (sel
& SAVRES_REG
) == SAVRES_GPR
28676 && info
->spe_64bit_regs_used
)
28677 select
^= SAVRES_FPR
^ SAVRES_GPR
;
28679 /* Don't generate bogus routine names. */
28680 gcc_assert (FIRST_SAVRES_REGISTER
<= regno
28681 && regno
<= LAST_SAVRES_REGISTER
28682 && select
>= 0 && select
<= 12);
28684 sym
= savres_routine_syms
[regno
-FIRST_SAVRES_REGISTER
][select
];
28690 name
= rs6000_savres_routine_name (info
, regno
, sel
);
28692 sym
= savres_routine_syms
[regno
-FIRST_SAVRES_REGISTER
][select
]
28693 = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
28694 SYMBOL_REF_FLAGS (sym
) |= SYMBOL_FLAG_FUNCTION
;
28700 /* Emit a sequence of insns, including a stack tie if needed, for
28701 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
28702 reset the stack pointer, but move the base of the frame into
28703 reg UPDT_REGNO for use by out-of-line register restore routines. */
28706 rs6000_emit_stack_reset (rs6000_stack_t
*info
,
28707 rtx frame_reg_rtx
, HOST_WIDE_INT frame_off
,
28708 unsigned updt_regno
)
28710 /* If there is nothing to do, don't do anything. */
28711 if (frame_off
== 0 && REGNO (frame_reg_rtx
) == updt_regno
)
28714 rtx updt_reg_rtx
= gen_rtx_REG (Pmode
, updt_regno
);
28716 /* This blockage is needed so that sched doesn't decide to move
28717 the sp change before the register restores. */
28718 if (DEFAULT_ABI
== ABI_V4
28720 && info
->spe_64bit_regs_used
!= 0
28721 && info
->first_gp_reg_save
!= 32))
28722 return emit_insn (gen_stack_restore_tie (updt_reg_rtx
, frame_reg_rtx
,
28723 GEN_INT (frame_off
)));
28725 /* If we are restoring registers out-of-line, we will be using the
28726 "exit" variants of the restore routines, which will reset the
28727 stack for us. But we do need to point updt_reg into the
28728 right place for those routines. */
28729 if (frame_off
!= 0)
28730 return emit_insn (gen_add3_insn (updt_reg_rtx
,
28731 frame_reg_rtx
, GEN_INT (frame_off
)));
28733 return emit_move_insn (updt_reg_rtx
, frame_reg_rtx
);
28738 /* Return the register number used as a pointer by out-of-line
28739 save/restore functions. */
28741 static inline unsigned
28742 ptr_regno_for_savres (int sel
)
28744 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
28745 return (sel
& SAVRES_REG
) == SAVRES_FPR
|| (sel
& SAVRES_LR
) ? 1 : 12;
28746 return DEFAULT_ABI
== ABI_DARWIN
&& (sel
& SAVRES_REG
) == SAVRES_FPR
? 1 : 11;
28749 /* Construct a parallel rtx describing the effect of a call to an
28750 out-of-line register save/restore routine, and emit the insn
28751 or jump_insn as appropriate. */
28754 rs6000_emit_savres_rtx (rs6000_stack_t
*info
,
28755 rtx frame_reg_rtx
, int save_area_offset
, int lr_offset
,
28756 machine_mode reg_mode
, int sel
)
28759 int offset
, start_reg
, end_reg
, n_regs
, use_reg
;
28760 int reg_size
= GET_MODE_SIZE (reg_mode
);
28767 start_reg
= ((sel
& SAVRES_REG
) == SAVRES_GPR
28768 ? info
->first_gp_reg_save
28769 : (sel
& SAVRES_REG
) == SAVRES_FPR
28770 ? info
->first_fp_reg_save
28771 : (sel
& SAVRES_REG
) == SAVRES_VR
28772 ? info
->first_altivec_reg_save
28774 end_reg
= ((sel
& SAVRES_REG
) == SAVRES_GPR
28776 : (sel
& SAVRES_REG
) == SAVRES_FPR
28778 : (sel
& SAVRES_REG
) == SAVRES_VR
28779 ? LAST_ALTIVEC_REGNO
+ 1
28781 n_regs
= end_reg
- start_reg
;
28782 p
= rtvec_alloc (3 + ((sel
& SAVRES_LR
) ? 1 : 0)
28783 + ((sel
& SAVRES_REG
) == SAVRES_VR
? 1 : 0)
28786 if (!(sel
& SAVRES_SAVE
) && (sel
& SAVRES_LR
))
28787 RTVEC_ELT (p
, offset
++) = ret_rtx
;
28789 RTVEC_ELT (p
, offset
++)
28790 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (Pmode
, LR_REGNO
));
28792 sym
= rs6000_savres_routine_sym (info
, sel
);
28793 RTVEC_ELT (p
, offset
++) = gen_rtx_USE (VOIDmode
, sym
);
28795 use_reg
= ptr_regno_for_savres (sel
);
28796 if ((sel
& SAVRES_REG
) == SAVRES_VR
)
28798 /* Vector regs are saved/restored using [reg+reg] addressing. */
28799 RTVEC_ELT (p
, offset
++)
28800 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (Pmode
, use_reg
));
28801 RTVEC_ELT (p
, offset
++)
28802 = gen_rtx_USE (VOIDmode
, gen_rtx_REG (Pmode
, 0));
28805 RTVEC_ELT (p
, offset
++)
28806 = gen_rtx_USE (VOIDmode
, gen_rtx_REG (Pmode
, use_reg
));
28808 for (i
= 0; i
< end_reg
- start_reg
; i
++)
28809 RTVEC_ELT (p
, i
+ offset
)
28810 = gen_frame_set (gen_rtx_REG (reg_mode
, start_reg
+ i
),
28811 frame_reg_rtx
, save_area_offset
+ reg_size
* i
,
28812 (sel
& SAVRES_SAVE
) != 0);
28814 if ((sel
& SAVRES_SAVE
) && (sel
& SAVRES_LR
))
28815 RTVEC_ELT (p
, i
+ offset
)
28816 = gen_frame_store (gen_rtx_REG (Pmode
, 0), frame_reg_rtx
, lr_offset
);
28818 par
= gen_rtx_PARALLEL (VOIDmode
, p
);
28820 if (!(sel
& SAVRES_SAVE
) && (sel
& SAVRES_LR
))
28822 insn
= emit_jump_insn (par
);
28823 JUMP_LABEL (insn
) = ret_rtx
;
28826 insn
= emit_insn (par
);
28830 /* Emit code to store CR fields that need to be saved into REG. */
28833 rs6000_emit_move_from_cr (rtx reg
)
28835 /* Only the ELFv2 ABI allows storing only selected fields. */
28836 if (DEFAULT_ABI
== ABI_ELFv2
&& TARGET_MFCRF
)
28838 int i
, cr_reg
[8], count
= 0;
28840 /* Collect CR fields that must be saved. */
28841 for (i
= 0; i
< 8; i
++)
28842 if (save_reg_p (CR0_REGNO
+ i
))
28843 cr_reg
[count
++] = i
;
28845 /* If it's just a single one, use mfcrf. */
28848 rtvec p
= rtvec_alloc (1);
28849 rtvec r
= rtvec_alloc (2);
28850 RTVEC_ELT (r
, 0) = gen_rtx_REG (CCmode
, CR0_REGNO
+ cr_reg
[0]);
28851 RTVEC_ELT (r
, 1) = GEN_INT (1 << (7 - cr_reg
[0]));
28853 = gen_rtx_SET (reg
,
28854 gen_rtx_UNSPEC (SImode
, r
, UNSPEC_MOVESI_FROM_CR
));
28856 emit_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
28860 /* ??? It might be better to handle count == 2 / 3 cases here
28861 as well, using logical operations to combine the values. */
28864 emit_insn (gen_movesi_from_cr (reg
));
28867 /* Return whether the split-stack arg pointer (r12) is used. */
28870 split_stack_arg_pointer_used_p (void)
28872 /* If the pseudo holding the arg pointer is no longer a pseudo,
28873 then the arg pointer is used. */
28874 if (cfun
->machine
->split_stack_arg_pointer
!= NULL_RTX
28875 && (!REG_P (cfun
->machine
->split_stack_arg_pointer
)
28876 || (REGNO (cfun
->machine
->split_stack_arg_pointer
)
28877 < FIRST_PSEUDO_REGISTER
)))
28880 /* Unfortunately we also need to do some code scanning, since
28881 r12 may have been substituted for the pseudo. */
28883 basic_block bb
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->next_bb
;
28884 FOR_BB_INSNS (bb
, insn
)
28885 if (NONDEBUG_INSN_P (insn
))
28887 /* A call destroys r12. */
28892 FOR_EACH_INSN_USE (use
, insn
)
28894 rtx x
= DF_REF_REG (use
);
28895 if (REG_P (x
) && REGNO (x
) == 12)
28899 FOR_EACH_INSN_DEF (def
, insn
)
28901 rtx x
= DF_REF_REG (def
);
28902 if (REG_P (x
) && REGNO (x
) == 12)
28906 return bitmap_bit_p (DF_LR_OUT (bb
), 12);
28909 /* Return whether we need to emit an ELFv2 global entry point prologue. */
28912 rs6000_global_entry_point_needed_p (void)
28914 /* Only needed for the ELFv2 ABI. */
28915 if (DEFAULT_ABI
!= ABI_ELFv2
)
28918 /* With -msingle-pic-base, we assume the whole program shares the same
28919 TOC, so no global entry point prologues are needed anywhere. */
28920 if (TARGET_SINGLE_PIC_BASE
)
28923 /* Ensure we have a global entry point for thunks. ??? We could
28924 avoid that if the target routine doesn't need a global entry point,
28925 but we do not know whether this is the case at this point. */
28926 if (cfun
->is_thunk
)
28929 /* For regular functions, rs6000_emit_prologue sets this flag if the
28930 routine ever uses the TOC pointer. */
28931 return cfun
->machine
->r2_setup_needed
;
28934 /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
28936 rs6000_get_separate_components (void)
28938 rs6000_stack_t
*info
= rs6000_stack_info ();
28940 if (WORLD_SAVE_P (info
))
28943 if (TARGET_SPE_ABI
)
28946 sbitmap components
= sbitmap_alloc (32);
28947 bitmap_clear (components
);
28949 gcc_assert (!(info
->savres_strategy
& SAVE_MULTIPLE
)
28950 && !(info
->savres_strategy
& REST_MULTIPLE
));
28952 /* The GPRs we need saved to the frame. */
28953 if ((info
->savres_strategy
& SAVE_INLINE_GPRS
)
28954 && (info
->savres_strategy
& REST_INLINE_GPRS
))
28956 int reg_size
= TARGET_32BIT
? 4 : 8;
28957 int offset
= info
->gp_save_offset
;
28959 offset
+= info
->total_size
;
28961 for (unsigned regno
= info
->first_gp_reg_save
; regno
< 32; regno
++)
28963 if (IN_RANGE (offset
, -0x8000, 0x7fff)
28964 && rs6000_reg_live_or_pic_offset_p (regno
))
28965 bitmap_set_bit (components
, regno
);
28967 offset
+= reg_size
;
28971 /* Don't mess with the hard frame pointer. */
28972 if (frame_pointer_needed
)
28973 bitmap_clear_bit (components
, HARD_FRAME_POINTER_REGNUM
);
28975 /* Don't mess with the fixed TOC register. */
28976 if ((TARGET_TOC
&& TARGET_MINIMAL_TOC
)
28977 || (flag_pic
== 1 && DEFAULT_ABI
== ABI_V4
)
28978 || (flag_pic
&& DEFAULT_ABI
== ABI_DARWIN
))
28979 bitmap_clear_bit (components
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
28981 /* Optimize LR save and restore if we can. This is component 0. Any
28982 out-of-line register save/restore routines need LR. */
28983 if (info
->lr_save_p
28984 && !(flag_pic
&& (DEFAULT_ABI
== ABI_V4
|| DEFAULT_ABI
== ABI_DARWIN
))
28985 && (info
->savres_strategy
& SAVE_INLINE_GPRS
)
28986 && (info
->savres_strategy
& REST_INLINE_GPRS
)
28987 && (info
->savres_strategy
& SAVE_INLINE_FPRS
)
28988 && (info
->savres_strategy
& REST_INLINE_FPRS
)
28989 && (info
->savres_strategy
& SAVE_INLINE_VRS
)
28990 && (info
->savres_strategy
& REST_INLINE_VRS
))
28992 int offset
= info
->lr_save_offset
;
28994 offset
+= info
->total_size
;
28995 if (IN_RANGE (offset
, -0x8000, 0x7fff))
28996 bitmap_set_bit (components
, 0);
29002 /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
29004 rs6000_components_for_bb (basic_block bb
)
29006 rs6000_stack_t
*info
= rs6000_stack_info ();
29008 bitmap in
= DF_LIVE_IN (bb
);
29009 bitmap gen
= &DF_LIVE_BB_INFO (bb
)->gen
;
29010 bitmap kill
= &DF_LIVE_BB_INFO (bb
)->kill
;
29012 sbitmap components
= sbitmap_alloc (32);
29013 bitmap_clear (components
);
29015 /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
29016 for (unsigned regno
= info
->first_gp_reg_save
; regno
< 32; regno
++)
29017 if (bitmap_bit_p (in
, regno
)
29018 || bitmap_bit_p (gen
, regno
)
29019 || bitmap_bit_p (kill
, regno
))
29020 bitmap_set_bit (components
, regno
);
29022 /* LR needs to be saved around a bb if it is killed in that bb. */
29023 if (bitmap_bit_p (in
, LR_REGNO
)
29024 || bitmap_bit_p (gen
, LR_REGNO
)
29025 || bitmap_bit_p (kill
, LR_REGNO
))
29026 bitmap_set_bit (components
, 0);
29031 /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */
29033 rs6000_disqualify_components (sbitmap components
, edge e
,
29034 sbitmap edge_components
, bool /*is_prologue*/)
29036 /* Our LR pro/epilogue code moves LR via R0, so R0 had better not be
29037 live where we want to place that code. */
29038 if (bitmap_bit_p (edge_components
, 0)
29039 && bitmap_bit_p (DF_LIVE_IN (e
->dest
), 0))
29042 fprintf (dump_file
, "Disqualifying LR because GPR0 is live "
29043 "on entry to bb %d\n", e
->dest
->index
);
29044 bitmap_clear_bit (components
, 0);
29048 /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
29050 rs6000_emit_prologue_components (sbitmap components
)
29052 rs6000_stack_t
*info
= rs6000_stack_info ();
29053 rtx ptr_reg
= gen_rtx_REG (Pmode
, frame_pointer_needed
29054 ? HARD_FRAME_POINTER_REGNUM
29055 : STACK_POINTER_REGNUM
);
29056 int reg_size
= TARGET_32BIT
? 4 : 8;
29058 /* Prologue for LR. */
29059 if (bitmap_bit_p (components
, 0))
29061 rtx reg
= gen_rtx_REG (Pmode
, 0);
29062 rtx_insn
*insn
= emit_move_insn (reg
, gen_rtx_REG (Pmode
, LR_REGNO
));
29063 RTX_FRAME_RELATED_P (insn
) = 1;
29064 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
29066 int offset
= info
->lr_save_offset
;
29068 offset
+= info
->total_size
;
29070 insn
= emit_insn (gen_frame_store (reg
, ptr_reg
, offset
));
29071 RTX_FRAME_RELATED_P (insn
) = 1;
29072 rtx lr
= gen_rtx_REG (Pmode
, LR_REGNO
);
29073 rtx mem
= copy_rtx (SET_DEST (single_set (insn
)));
29074 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (mem
, lr
));
29077 /* Prologue for the GPRs. */
29078 int offset
= info
->gp_save_offset
;
29080 offset
+= info
->total_size
;
29082 for (int i
= info
->first_gp_reg_save
; i
< 32; i
++)
29084 if (bitmap_bit_p (components
, i
))
29086 rtx reg
= gen_rtx_REG (Pmode
, i
);
29087 rtx_insn
*insn
= emit_insn (gen_frame_store (reg
, ptr_reg
, offset
));
29088 RTX_FRAME_RELATED_P (insn
) = 1;
29089 rtx set
= copy_rtx (single_set (insn
));
29090 add_reg_note (insn
, REG_CFA_OFFSET
, set
);
29093 offset
+= reg_size
;
29097 /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
29099 rs6000_emit_epilogue_components (sbitmap components
)
29101 rs6000_stack_t
*info
= rs6000_stack_info ();
29102 rtx ptr_reg
= gen_rtx_REG (Pmode
, frame_pointer_needed
29103 ? HARD_FRAME_POINTER_REGNUM
29104 : STACK_POINTER_REGNUM
);
29105 int reg_size
= TARGET_32BIT
? 4 : 8;
29107 /* Epilogue for the GPRs. */
29108 int offset
= info
->gp_save_offset
;
29110 offset
+= info
->total_size
;
29112 for (int i
= info
->first_gp_reg_save
; i
< 32; i
++)
29114 if (bitmap_bit_p (components
, i
))
29116 rtx reg
= gen_rtx_REG (Pmode
, i
);
29117 rtx_insn
*insn
= emit_insn (gen_frame_load (reg
, ptr_reg
, offset
));
29118 RTX_FRAME_RELATED_P (insn
) = 1;
29119 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
29122 offset
+= reg_size
;
29125 /* Epilogue for LR. */
29126 if (bitmap_bit_p (components
, 0))
29128 int offset
= info
->lr_save_offset
;
29130 offset
+= info
->total_size
;
29132 rtx reg
= gen_rtx_REG (Pmode
, 0);
29133 rtx_insn
*insn
= emit_insn (gen_frame_load (reg
, ptr_reg
, offset
));
29135 rtx lr
= gen_rtx_REG (Pmode
, LR_REGNO
);
29136 insn
= emit_move_insn (lr
, reg
);
29137 RTX_FRAME_RELATED_P (insn
) = 1;
29138 add_reg_note (insn
, REG_CFA_RESTORE
, lr
);
29142 /* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
29144 rs6000_set_handled_components (sbitmap components
)
29146 rs6000_stack_t
*info
= rs6000_stack_info ();
29148 for (int i
= info
->first_gp_reg_save
; i
< 32; i
++)
29149 if (bitmap_bit_p (components
, i
))
29150 cfun
->machine
->gpr_is_wrapped_separately
[i
] = true;
29152 if (bitmap_bit_p (components
, 0))
29153 cfun
->machine
->lr_is_wrapped_separately
= true;
29156 /* Emit function prologue as insns. */
29159 rs6000_emit_prologue (void)
29161 rs6000_stack_t
*info
= rs6000_stack_info ();
29162 machine_mode reg_mode
= Pmode
;
29163 int reg_size
= TARGET_32BIT
? 4 : 8;
29164 rtx sp_reg_rtx
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
29165 rtx frame_reg_rtx
= sp_reg_rtx
;
29166 unsigned int cr_save_regno
;
29167 rtx cr_save_rtx
= NULL_RTX
;
29170 int using_static_chain_p
= (cfun
->static_chain_decl
!= NULL_TREE
29171 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM
)
29172 && call_used_regs
[STATIC_CHAIN_REGNUM
]);
29173 int using_split_stack
= (flag_split_stack
29174 && (lookup_attribute ("no_split_stack",
29175 DECL_ATTRIBUTES (cfun
->decl
))
29178 /* Offset to top of frame for frame_reg and sp respectively. */
29179 HOST_WIDE_INT frame_off
= 0;
29180 HOST_WIDE_INT sp_off
= 0;
29181 /* sp_adjust is the stack adjusting instruction, tracked so that the
29182 insn setting up the split-stack arg pointer can be emitted just
29183 prior to it, when r12 is not used here for other purposes. */
29184 rtx_insn
*sp_adjust
= 0;
29187 /* Track and check usage of r0, r11, r12. */
29188 int reg_inuse
= using_static_chain_p
? 1 << 11 : 0;
29189 #define START_USE(R) do \
29191 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
29192 reg_inuse |= 1 << (R); \
29194 #define END_USE(R) do \
29196 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
29197 reg_inuse &= ~(1 << (R)); \
29199 #define NOT_INUSE(R) do \
29201 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
29204 #define START_USE(R) do {} while (0)
29205 #define END_USE(R) do {} while (0)
29206 #define NOT_INUSE(R) do {} while (0)
29209 if (DEFAULT_ABI
== ABI_ELFv2
29210 && !TARGET_SINGLE_PIC_BASE
)
29212 cfun
->machine
->r2_setup_needed
= df_regs_ever_live_p (TOC_REGNUM
);
29214 /* With -mminimal-toc we may generate an extra use of r2 below. */
29215 if (TARGET_TOC
&& TARGET_MINIMAL_TOC
29216 && !constant_pool_empty_p ())
29217 cfun
->machine
->r2_setup_needed
= true;
29221 if (flag_stack_usage_info
)
29222 current_function_static_stack_size
= info
->total_size
;
29224 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
29226 HOST_WIDE_INT size
= info
->total_size
;
29228 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
29230 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
29231 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT
,
29232 size
- STACK_CHECK_PROTECT
);
29235 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
);
29238 if (TARGET_FIX_AND_CONTINUE
)
29240 /* gdb on darwin arranges to forward a function from the old
29241 address by modifying the first 5 instructions of the function
29242 to branch to the overriding function. This is necessary to
29243 permit function pointers that point to the old function to
29244 actually forward to the new function. */
29245 emit_insn (gen_nop ());
29246 emit_insn (gen_nop ());
29247 emit_insn (gen_nop ());
29248 emit_insn (gen_nop ());
29249 emit_insn (gen_nop ());
29252 if (TARGET_SPE_ABI
&& info
->spe_64bit_regs_used
!= 0)
29254 reg_mode
= V2SImode
;
29258 /* Handle world saves specially here. */
29259 if (WORLD_SAVE_P (info
))
29266 /* save_world expects lr in r0. */
29267 reg0
= gen_rtx_REG (Pmode
, 0);
29268 if (info
->lr_save_p
)
29270 insn
= emit_move_insn (reg0
,
29271 gen_rtx_REG (Pmode
, LR_REGNO
));
29272 RTX_FRAME_RELATED_P (insn
) = 1;
29275 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
29276 assumptions about the offsets of various bits of the stack
29278 gcc_assert (info
->gp_save_offset
== -220
29279 && info
->fp_save_offset
== -144
29280 && info
->lr_save_offset
== 8
29281 && info
->cr_save_offset
== 4
29284 && (!crtl
->calls_eh_return
29285 || info
->ehrd_offset
== -432)
29286 && info
->vrsave_save_offset
== -224
29287 && info
->altivec_save_offset
== -416);
29289 treg
= gen_rtx_REG (SImode
, 11);
29290 emit_move_insn (treg
, GEN_INT (-info
->total_size
));
29292 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
29293 in R11. It also clobbers R12, so beware! */
29295 /* Preserve CR2 for save_world prologues */
29297 sz
+= 32 - info
->first_gp_reg_save
;
29298 sz
+= 64 - info
->first_fp_reg_save
;
29299 sz
+= LAST_ALTIVEC_REGNO
- info
->first_altivec_reg_save
+ 1;
29300 p
= rtvec_alloc (sz
);
29302 RTVEC_ELT (p
, j
++) = gen_rtx_CLOBBER (VOIDmode
,
29303 gen_rtx_REG (SImode
,
29305 RTVEC_ELT (p
, j
++) = gen_rtx_USE (VOIDmode
,
29306 gen_rtx_SYMBOL_REF (Pmode
,
29308 /* We do floats first so that the instruction pattern matches
29310 for (i
= 0; i
< 64 - info
->first_fp_reg_save
; i
++)
29312 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
29314 info
->first_fp_reg_save
+ i
),
29316 info
->fp_save_offset
+ frame_off
+ 8 * i
);
29317 for (i
= 0; info
->first_altivec_reg_save
+ i
<= LAST_ALTIVEC_REGNO
; i
++)
29319 = gen_frame_store (gen_rtx_REG (V4SImode
,
29320 info
->first_altivec_reg_save
+ i
),
29322 info
->altivec_save_offset
+ frame_off
+ 16 * i
);
29323 for (i
= 0; i
< 32 - info
->first_gp_reg_save
; i
++)
29325 = gen_frame_store (gen_rtx_REG (reg_mode
, info
->first_gp_reg_save
+ i
),
29327 info
->gp_save_offset
+ frame_off
+ reg_size
* i
);
29329 /* CR register traditionally saved as CR2. */
29331 = gen_frame_store (gen_rtx_REG (SImode
, CR2_REGNO
),
29332 frame_reg_rtx
, info
->cr_save_offset
+ frame_off
);
29333 /* Explain about use of R0. */
29334 if (info
->lr_save_p
)
29336 = gen_frame_store (reg0
,
29337 frame_reg_rtx
, info
->lr_save_offset
+ frame_off
);
29338 /* Explain what happens to the stack pointer. */
29340 rtx newval
= gen_rtx_PLUS (Pmode
, sp_reg_rtx
, treg
);
29341 RTVEC_ELT (p
, j
++) = gen_rtx_SET (sp_reg_rtx
, newval
);
29344 insn
= emit_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
29345 rs6000_frame_related (insn
, frame_reg_rtx
, sp_off
- frame_off
,
29346 treg
, GEN_INT (-info
->total_size
));
29347 sp_off
= frame_off
= info
->total_size
;
29350 strategy
= info
->savres_strategy
;
29352 /* For V.4, update stack before we do any saving and set back pointer. */
29353 if (! WORLD_SAVE_P (info
)
29355 && (DEFAULT_ABI
== ABI_V4
29356 || crtl
->calls_eh_return
))
29358 bool need_r11
= (TARGET_SPE
29359 ? (!(strategy
& SAVE_INLINE_GPRS
)
29360 && info
->spe_64bit_regs_used
== 0)
29361 : (!(strategy
& SAVE_INLINE_FPRS
)
29362 || !(strategy
& SAVE_INLINE_GPRS
)
29363 || !(strategy
& SAVE_INLINE_VRS
)));
29364 int ptr_regno
= -1;
29365 rtx ptr_reg
= NULL_RTX
;
29368 if (info
->total_size
< 32767)
29369 frame_off
= info
->total_size
;
29372 else if (info
->cr_save_p
29374 || info
->first_fp_reg_save
< 64
29375 || info
->first_gp_reg_save
< 32
29376 || info
->altivec_size
!= 0
29377 || info
->vrsave_size
!= 0
29378 || crtl
->calls_eh_return
)
29382 /* The prologue won't be saving any regs so there is no need
29383 to set up a frame register to access any frame save area.
29384 We also won't be using frame_off anywhere below, but set
29385 the correct value anyway to protect against future
29386 changes to this function. */
29387 frame_off
= info
->total_size
;
29389 if (ptr_regno
!= -1)
29391 /* Set up the frame offset to that needed by the first
29392 out-of-line save function. */
29393 START_USE (ptr_regno
);
29394 ptr_reg
= gen_rtx_REG (Pmode
, ptr_regno
);
29395 frame_reg_rtx
= ptr_reg
;
29396 if (!(strategy
& SAVE_INLINE_FPRS
) && info
->fp_size
!= 0)
29397 gcc_checking_assert (info
->fp_save_offset
+ info
->fp_size
== 0);
29398 else if (!(strategy
& SAVE_INLINE_GPRS
) && info
->first_gp_reg_save
< 32)
29399 ptr_off
= info
->gp_save_offset
+ info
->gp_size
;
29400 else if (!(strategy
& SAVE_INLINE_VRS
) && info
->altivec_size
!= 0)
29401 ptr_off
= info
->altivec_save_offset
+ info
->altivec_size
;
29402 frame_off
= -ptr_off
;
29404 sp_adjust
= rs6000_emit_allocate_stack (info
->total_size
,
29406 if (REGNO (frame_reg_rtx
) == 12)
29408 sp_off
= info
->total_size
;
29409 if (frame_reg_rtx
!= sp_reg_rtx
)
29410 rs6000_emit_stack_tie (frame_reg_rtx
, false);
29413 /* If we use the link register, get it into r0. */
29414 if (!WORLD_SAVE_P (info
) && info
->lr_save_p
29415 && !cfun
->machine
->lr_is_wrapped_separately
)
29417 rtx addr
, reg
, mem
;
29419 reg
= gen_rtx_REG (Pmode
, 0);
29421 insn
= emit_move_insn (reg
, gen_rtx_REG (Pmode
, LR_REGNO
));
29422 RTX_FRAME_RELATED_P (insn
) = 1;
29424 if (!(strategy
& (SAVE_NOINLINE_GPRS_SAVES_LR
29425 | SAVE_NOINLINE_FPRS_SAVES_LR
)))
29427 addr
= gen_rtx_PLUS (Pmode
, frame_reg_rtx
,
29428 GEN_INT (info
->lr_save_offset
+ frame_off
));
29429 mem
= gen_rtx_MEM (Pmode
, addr
);
29430 /* This should not be of rs6000_sr_alias_set, because of
29431 __builtin_return_address. */
29433 insn
= emit_move_insn (mem
, reg
);
29434 rs6000_frame_related (insn
, frame_reg_rtx
, sp_off
- frame_off
,
29435 NULL_RTX
, NULL_RTX
);
29440 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
29441 r12 will be needed by out-of-line gpr restore. */
29442 cr_save_regno
= ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
29443 && !(strategy
& (SAVE_INLINE_GPRS
29444 | SAVE_NOINLINE_GPRS_SAVES_LR
))
29446 if (!WORLD_SAVE_P (info
)
29448 && REGNO (frame_reg_rtx
) != cr_save_regno
29449 && !(using_static_chain_p
&& cr_save_regno
== 11)
29450 && !(using_split_stack
&& cr_save_regno
== 12 && sp_adjust
))
29452 cr_save_rtx
= gen_rtx_REG (SImode
, cr_save_regno
);
29453 START_USE (cr_save_regno
);
29454 rs6000_emit_move_from_cr (cr_save_rtx
);
29457 /* Do any required saving of fpr's. If only one or two to save, do
29458 it ourselves. Otherwise, call function. */
29459 if (!WORLD_SAVE_P (info
) && (strategy
& SAVE_INLINE_FPRS
))
29462 for (i
= 0; i
< 64 - info
->first_fp_reg_save
; i
++)
29463 if (save_reg_p (info
->first_fp_reg_save
+ i
))
29464 emit_frame_save (frame_reg_rtx
,
29465 (TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
29466 ? DFmode
: SFmode
),
29467 info
->first_fp_reg_save
+ i
,
29468 info
->fp_save_offset
+ frame_off
+ 8 * i
,
29469 sp_off
- frame_off
);
29471 else if (!WORLD_SAVE_P (info
) && info
->first_fp_reg_save
!= 64)
29473 bool lr
= (strategy
& SAVE_NOINLINE_FPRS_SAVES_LR
) != 0;
29474 int sel
= SAVRES_SAVE
| SAVRES_FPR
| (lr
? SAVRES_LR
: 0);
29475 unsigned ptr_regno
= ptr_regno_for_savres (sel
);
29476 rtx ptr_reg
= frame_reg_rtx
;
29478 if (REGNO (frame_reg_rtx
) == ptr_regno
)
29479 gcc_checking_assert (frame_off
== 0);
29482 ptr_reg
= gen_rtx_REG (Pmode
, ptr_regno
);
29483 NOT_INUSE (ptr_regno
);
29484 emit_insn (gen_add3_insn (ptr_reg
,
29485 frame_reg_rtx
, GEN_INT (frame_off
)));
29487 insn
= rs6000_emit_savres_rtx (info
, ptr_reg
,
29488 info
->fp_save_offset
,
29489 info
->lr_save_offset
,
29491 rs6000_frame_related (insn
, ptr_reg
, sp_off
,
29492 NULL_RTX
, NULL_RTX
);
29497 /* Save GPRs. This is done as a PARALLEL if we are using
29498 the store-multiple instructions. */
29499 if (!WORLD_SAVE_P (info
)
29501 && info
->spe_64bit_regs_used
!= 0
29502 && info
->first_gp_reg_save
!= 32)
29505 rtx spe_save_area_ptr
;
29506 HOST_WIDE_INT save_off
;
29507 int ool_adjust
= 0;
29509 /* Determine whether we can address all of the registers that need
29510 to be saved with an offset from frame_reg_rtx that fits in
29511 the small const field for SPE memory instructions. */
29512 int spe_regs_addressable
29513 = (SPE_CONST_OFFSET_OK (info
->spe_gp_save_offset
+ frame_off
29514 + reg_size
* (32 - info
->first_gp_reg_save
- 1))
29515 && (strategy
& SAVE_INLINE_GPRS
));
29517 if (spe_regs_addressable
)
29519 spe_save_area_ptr
= frame_reg_rtx
;
29520 save_off
= frame_off
;
29524 /* Make r11 point to the start of the SPE save area. We need
29525 to be careful here if r11 is holding the static chain. If
29526 it is, then temporarily save it in r0. */
29527 HOST_WIDE_INT offset
;
29529 if (!(strategy
& SAVE_INLINE_GPRS
))
29530 ool_adjust
= 8 * (info
->first_gp_reg_save
- FIRST_SAVED_GP_REGNO
);
29531 offset
= info
->spe_gp_save_offset
+ frame_off
- ool_adjust
;
29532 spe_save_area_ptr
= gen_rtx_REG (Pmode
, 11);
29533 save_off
= frame_off
- offset
;
29535 if (using_static_chain_p
)
29537 rtx r0
= gen_rtx_REG (Pmode
, 0);
29540 gcc_assert (info
->first_gp_reg_save
> 11);
29542 emit_move_insn (r0
, spe_save_area_ptr
);
29544 else if (REGNO (frame_reg_rtx
) != 11)
29547 emit_insn (gen_addsi3 (spe_save_area_ptr
,
29548 frame_reg_rtx
, GEN_INT (offset
)));
29549 if (!using_static_chain_p
&& REGNO (frame_reg_rtx
) == 11)
29550 frame_off
= -info
->spe_gp_save_offset
+ ool_adjust
;
29553 if ((strategy
& SAVE_INLINE_GPRS
))
29555 for (i
= 0; i
< 32 - info
->first_gp_reg_save
; i
++)
29556 if (rs6000_reg_live_or_pic_offset_p (info
->first_gp_reg_save
+ i
))
29557 emit_frame_save (spe_save_area_ptr
, reg_mode
,
29558 info
->first_gp_reg_save
+ i
,
29559 (info
->spe_gp_save_offset
+ save_off
29561 sp_off
- save_off
);
29565 insn
= rs6000_emit_savres_rtx (info
, spe_save_area_ptr
,
29566 info
->spe_gp_save_offset
+ save_off
,
29568 SAVRES_SAVE
| SAVRES_GPR
);
29570 rs6000_frame_related (insn
, spe_save_area_ptr
, sp_off
- save_off
,
29571 NULL_RTX
, NULL_RTX
);
29574 /* Move the static chain pointer back. */
29575 if (!spe_regs_addressable
)
29577 if (using_static_chain_p
)
29579 emit_move_insn (spe_save_area_ptr
, gen_rtx_REG (Pmode
, 0));
29582 else if (REGNO (frame_reg_rtx
) != 11)
29586 else if (!WORLD_SAVE_P (info
) && !(strategy
& SAVE_INLINE_GPRS
))
29588 bool lr
= (strategy
& SAVE_NOINLINE_GPRS_SAVES_LR
) != 0;
29589 int sel
= SAVRES_SAVE
| SAVRES_GPR
| (lr
? SAVRES_LR
: 0);
29590 unsigned ptr_regno
= ptr_regno_for_savres (sel
);
29591 rtx ptr_reg
= frame_reg_rtx
;
29592 bool ptr_set_up
= REGNO (ptr_reg
) == ptr_regno
;
29593 int end_save
= info
->gp_save_offset
+ info
->gp_size
;
29596 if (ptr_regno
== 12)
29599 ptr_reg
= gen_rtx_REG (Pmode
, ptr_regno
);
29601 /* Need to adjust r11 (r12) if we saved any FPRs. */
29602 if (end_save
+ frame_off
!= 0)
29604 rtx offset
= GEN_INT (end_save
+ frame_off
);
29607 frame_off
= -end_save
;
29609 NOT_INUSE (ptr_regno
);
29610 emit_insn (gen_add3_insn (ptr_reg
, frame_reg_rtx
, offset
));
29612 else if (!ptr_set_up
)
29614 NOT_INUSE (ptr_regno
);
29615 emit_move_insn (ptr_reg
, frame_reg_rtx
);
29617 ptr_off
= -end_save
;
29618 insn
= rs6000_emit_savres_rtx (info
, ptr_reg
,
29619 info
->gp_save_offset
+ ptr_off
,
29620 info
->lr_save_offset
+ ptr_off
,
29622 rs6000_frame_related (insn
, ptr_reg
, sp_off
- ptr_off
,
29623 NULL_RTX
, NULL_RTX
);
29627 else if (!WORLD_SAVE_P (info
) && (strategy
& SAVE_MULTIPLE
))
29631 p
= rtvec_alloc (32 - info
->first_gp_reg_save
);
29632 for (i
= 0; i
< 32 - info
->first_gp_reg_save
; i
++)
29634 = gen_frame_store (gen_rtx_REG (reg_mode
, info
->first_gp_reg_save
+ i
),
29636 info
->gp_save_offset
+ frame_off
+ reg_size
* i
);
29637 insn
= emit_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
29638 rs6000_frame_related (insn
, frame_reg_rtx
, sp_off
- frame_off
,
29639 NULL_RTX
, NULL_RTX
);
29641 else if (!WORLD_SAVE_P (info
))
29643 int offset
= info
->gp_save_offset
+ frame_off
;
29644 for (int i
= info
->first_gp_reg_save
; i
< 32; i
++)
29646 if (rs6000_reg_live_or_pic_offset_p (i
)
29647 && !cfun
->machine
->gpr_is_wrapped_separately
[i
])
29648 emit_frame_save (frame_reg_rtx
, reg_mode
, i
, offset
,
29649 sp_off
- frame_off
);
29651 offset
+= reg_size
;
29655 if (crtl
->calls_eh_return
)
29662 unsigned int regno
= EH_RETURN_DATA_REGNO (i
);
29663 if (regno
== INVALID_REGNUM
)
29667 p
= rtvec_alloc (i
);
29671 unsigned int regno
= EH_RETURN_DATA_REGNO (i
);
29672 if (regno
== INVALID_REGNUM
)
29676 = gen_frame_store (gen_rtx_REG (reg_mode
, regno
),
29678 info
->ehrd_offset
+ sp_off
+ reg_size
* (int) i
);
29679 RTVEC_ELT (p
, i
) = set
;
29680 RTX_FRAME_RELATED_P (set
) = 1;
29683 insn
= emit_insn (gen_blockage ());
29684 RTX_FRAME_RELATED_P (insn
) = 1;
29685 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, gen_rtx_PARALLEL (VOIDmode
, p
));
29688 /* In AIX ABI we need to make sure r2 is really saved. */
29689 if (TARGET_AIX
&& crtl
->calls_eh_return
)
29691 rtx tmp_reg
, tmp_reg_si
, hi
, lo
, compare_result
, toc_save_done
, jump
;
29692 rtx join_insn
, note
;
29693 rtx_insn
*save_insn
;
29694 long toc_restore_insn
;
29696 tmp_reg
= gen_rtx_REG (Pmode
, 11);
29697 tmp_reg_si
= gen_rtx_REG (SImode
, 11);
29698 if (using_static_chain_p
)
29701 emit_move_insn (gen_rtx_REG (Pmode
, 0), tmp_reg
);
29705 emit_move_insn (tmp_reg
, gen_rtx_REG (Pmode
, LR_REGNO
));
29706 /* Peek at instruction to which this function returns. If it's
29707 restoring r2, then we know we've already saved r2. We can't
29708 unconditionally save r2 because the value we have will already
29709 be updated if we arrived at this function via a plt call or
29710 toc adjusting stub. */
29711 emit_move_insn (tmp_reg_si
, gen_rtx_MEM (SImode
, tmp_reg
));
29712 toc_restore_insn
= ((TARGET_32BIT
? 0x80410000 : 0xE8410000)
29713 + RS6000_TOC_SAVE_SLOT
);
29714 hi
= gen_int_mode (toc_restore_insn
& ~0xffff, SImode
);
29715 emit_insn (gen_xorsi3 (tmp_reg_si
, tmp_reg_si
, hi
));
29716 compare_result
= gen_rtx_REG (CCUNSmode
, CR0_REGNO
);
29717 validate_condition_mode (EQ
, CCUNSmode
);
29718 lo
= gen_int_mode (toc_restore_insn
& 0xffff, SImode
);
29719 emit_insn (gen_rtx_SET (compare_result
,
29720 gen_rtx_COMPARE (CCUNSmode
, tmp_reg_si
, lo
)));
29721 toc_save_done
= gen_label_rtx ();
29722 jump
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
29723 gen_rtx_EQ (VOIDmode
, compare_result
,
29725 gen_rtx_LABEL_REF (VOIDmode
, toc_save_done
),
29727 jump
= emit_jump_insn (gen_rtx_SET (pc_rtx
, jump
));
29728 JUMP_LABEL (jump
) = toc_save_done
;
29729 LABEL_NUSES (toc_save_done
) += 1;
29731 save_insn
= emit_frame_save (frame_reg_rtx
, reg_mode
,
29732 TOC_REGNUM
, frame_off
+ RS6000_TOC_SAVE_SLOT
,
29733 sp_off
- frame_off
);
29735 emit_label (toc_save_done
);
29737 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
29738 have a CFG that has different saves along different paths.
29739 Move the note to a dummy blockage insn, which describes that
29740 R2 is unconditionally saved after the label. */
29741 /* ??? An alternate representation might be a special insn pattern
29742 containing both the branch and the store. That might let the
29743 code that minimizes the number of DW_CFA_advance opcodes better
29744 freedom in placing the annotations. */
29745 note
= find_reg_note (save_insn
, REG_FRAME_RELATED_EXPR
, NULL
);
29747 remove_note (save_insn
, note
);
29749 note
= alloc_reg_note (REG_FRAME_RELATED_EXPR
,
29750 copy_rtx (PATTERN (save_insn
)), NULL_RTX
);
29751 RTX_FRAME_RELATED_P (save_insn
) = 0;
29753 join_insn
= emit_insn (gen_blockage ());
29754 REG_NOTES (join_insn
) = note
;
29755 RTX_FRAME_RELATED_P (join_insn
) = 1;
29757 if (using_static_chain_p
)
29759 emit_move_insn (tmp_reg
, gen_rtx_REG (Pmode
, 0));
29766 /* Save CR if we use any that must be preserved. */
29767 if (!WORLD_SAVE_P (info
) && info
->cr_save_p
)
29769 rtx addr
= gen_rtx_PLUS (Pmode
, frame_reg_rtx
,
29770 GEN_INT (info
->cr_save_offset
+ frame_off
));
29771 rtx mem
= gen_frame_mem (SImode
, addr
);
29773 /* If we didn't copy cr before, do so now using r0. */
29774 if (cr_save_rtx
== NULL_RTX
)
29777 cr_save_rtx
= gen_rtx_REG (SImode
, 0);
29778 rs6000_emit_move_from_cr (cr_save_rtx
);
29781 /* Saving CR requires a two-instruction sequence: one instruction
29782 to move the CR to a general-purpose register, and a second
29783 instruction that stores the GPR to memory.
29785 We do not emit any DWARF CFI records for the first of these,
29786 because we cannot properly represent the fact that CR is saved in
29787 a register. One reason is that we cannot express that multiple
29788 CR fields are saved; another reason is that on 64-bit, the size
29789 of the CR register in DWARF (4 bytes) differs from the size of
29790 a general-purpose register.
29792 This means if any intervening instruction were to clobber one of
29793 the call-saved CR fields, we'd have incorrect CFI. To prevent
29794 this from happening, we mark the store to memory as a use of
29795 those CR fields, which prevents any such instruction from being
29796 scheduled in between the two instructions. */
29801 crsave_v
[n_crsave
++] = gen_rtx_SET (mem
, cr_save_rtx
);
29802 for (i
= 0; i
< 8; i
++)
29803 if (save_reg_p (CR0_REGNO
+ i
))
29804 crsave_v
[n_crsave
++]
29805 = gen_rtx_USE (VOIDmode
, gen_rtx_REG (CCmode
, CR0_REGNO
+ i
));
29807 insn
= emit_insn (gen_rtx_PARALLEL (VOIDmode
,
29808 gen_rtvec_v (n_crsave
, crsave_v
)));
29809 END_USE (REGNO (cr_save_rtx
));
29811 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
29812 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
29813 so we need to construct a frame expression manually. */
29814 RTX_FRAME_RELATED_P (insn
) = 1;
29816 /* Update address to be stack-pointer relative, like
29817 rs6000_frame_related would do. */
29818 addr
= gen_rtx_PLUS (Pmode
, gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
),
29819 GEN_INT (info
->cr_save_offset
+ sp_off
));
29820 mem
= gen_frame_mem (SImode
, addr
);
29822 if (DEFAULT_ABI
== ABI_ELFv2
)
29824 /* In the ELFv2 ABI we generate separate CFI records for each
29825 CR field that was actually saved. They all point to the
29826 same 32-bit stack slot. */
29830 for (i
= 0; i
< 8; i
++)
29831 if (save_reg_p (CR0_REGNO
+ i
))
29834 = gen_rtx_SET (mem
, gen_rtx_REG (SImode
, CR0_REGNO
+ i
));
29836 RTX_FRAME_RELATED_P (crframe
[n_crframe
]) = 1;
29840 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
29841 gen_rtx_PARALLEL (VOIDmode
,
29842 gen_rtvec_v (n_crframe
, crframe
)));
29846 /* In other ABIs, by convention, we use a single CR regnum to
29847 represent the fact that all call-saved CR fields are saved.
29848 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
29849 rtx set
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, CR2_REGNO
));
29850 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, set
);
29854 /* In the ELFv2 ABI we need to save all call-saved CR fields into
29855 *separate* slots if the routine calls __builtin_eh_return, so
29856 that they can be independently restored by the unwinder. */
29857 if (DEFAULT_ABI
== ABI_ELFv2
&& crtl
->calls_eh_return
)
29859 int i
, cr_off
= info
->ehcr_offset
;
29862 /* ??? We might get better performance by using multiple mfocrf
29864 crsave
= gen_rtx_REG (SImode
, 0);
29865 emit_insn (gen_movesi_from_cr (crsave
));
29867 for (i
= 0; i
< 8; i
++)
29868 if (!call_used_regs
[CR0_REGNO
+ i
])
29870 rtvec p
= rtvec_alloc (2);
29872 = gen_frame_store (crsave
, frame_reg_rtx
, cr_off
+ frame_off
);
29874 = gen_rtx_USE (VOIDmode
, gen_rtx_REG (CCmode
, CR0_REGNO
+ i
));
29876 insn
= emit_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
29878 RTX_FRAME_RELATED_P (insn
) = 1;
29879 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
29880 gen_frame_store (gen_rtx_REG (SImode
, CR0_REGNO
+ i
),
29881 sp_reg_rtx
, cr_off
+ sp_off
));
29883 cr_off
+= reg_size
;
29887 /* Update stack and set back pointer unless this is V.4,
29888 for which it was done previously. */
29889 if (!WORLD_SAVE_P (info
) && info
->push_p
29890 && !(DEFAULT_ABI
== ABI_V4
|| crtl
->calls_eh_return
))
29892 rtx ptr_reg
= NULL
;
29895 /* If saving altivec regs we need to be able to address all save
29896 locations using a 16-bit offset. */
29897 if ((strategy
& SAVE_INLINE_VRS
) == 0
29898 || (info
->altivec_size
!= 0
29899 && (info
->altivec_save_offset
+ info
->altivec_size
- 16
29900 + info
->total_size
- frame_off
) > 32767)
29901 || (info
->vrsave_size
!= 0
29902 && (info
->vrsave_save_offset
29903 + info
->total_size
- frame_off
) > 32767))
29905 int sel
= SAVRES_SAVE
| SAVRES_VR
;
29906 unsigned ptr_regno
= ptr_regno_for_savres (sel
);
29908 if (using_static_chain_p
29909 && ptr_regno
== STATIC_CHAIN_REGNUM
)
29911 if (REGNO (frame_reg_rtx
) != ptr_regno
)
29912 START_USE (ptr_regno
);
29913 ptr_reg
= gen_rtx_REG (Pmode
, ptr_regno
);
29914 frame_reg_rtx
= ptr_reg
;
29915 ptr_off
= info
->altivec_save_offset
+ info
->altivec_size
;
29916 frame_off
= -ptr_off
;
29918 else if (REGNO (frame_reg_rtx
) == 1)
29919 frame_off
= info
->total_size
;
29920 sp_adjust
= rs6000_emit_allocate_stack (info
->total_size
,
29922 if (REGNO (frame_reg_rtx
) == 12)
29924 sp_off
= info
->total_size
;
29925 if (frame_reg_rtx
!= sp_reg_rtx
)
29926 rs6000_emit_stack_tie (frame_reg_rtx
, false);
29929 /* Set frame pointer, if needed. */
29930 if (frame_pointer_needed
)
29932 insn
= emit_move_insn (gen_rtx_REG (Pmode
, HARD_FRAME_POINTER_REGNUM
),
29934 RTX_FRAME_RELATED_P (insn
) = 1;
29937 /* Save AltiVec registers if needed. Save here because the red zone does
29938 not always include AltiVec registers. */
29939 if (!WORLD_SAVE_P (info
)
29940 && info
->altivec_size
!= 0 && (strategy
& SAVE_INLINE_VRS
) == 0)
29942 int end_save
= info
->altivec_save_offset
+ info
->altivec_size
;
29944 /* Oddly, the vector save/restore functions point r0 at the end
29945 of the save area, then use r11 or r12 to load offsets for
29946 [reg+reg] addressing. */
29947 rtx ptr_reg
= gen_rtx_REG (Pmode
, 0);
29948 int scratch_regno
= ptr_regno_for_savres (SAVRES_SAVE
| SAVRES_VR
);
29949 rtx scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
29951 gcc_checking_assert (scratch_regno
== 11 || scratch_regno
== 12);
29953 if (scratch_regno
== 12)
29955 if (end_save
+ frame_off
!= 0)
29957 rtx offset
= GEN_INT (end_save
+ frame_off
);
29959 emit_insn (gen_add3_insn (ptr_reg
, frame_reg_rtx
, offset
));
29962 emit_move_insn (ptr_reg
, frame_reg_rtx
);
29964 ptr_off
= -end_save
;
29965 insn
= rs6000_emit_savres_rtx (info
, scratch_reg
,
29966 info
->altivec_save_offset
+ ptr_off
,
29967 0, V4SImode
, SAVRES_SAVE
| SAVRES_VR
);
29968 rs6000_frame_related (insn
, scratch_reg
, sp_off
- ptr_off
,
29969 NULL_RTX
, NULL_RTX
);
29970 if (REGNO (frame_reg_rtx
) == REGNO (scratch_reg
))
29972 /* The oddity mentioned above clobbered our frame reg. */
29973 emit_move_insn (frame_reg_rtx
, ptr_reg
);
29974 frame_off
= ptr_off
;
29977 else if (!WORLD_SAVE_P (info
)
29978 && info
->altivec_size
!= 0)
29982 for (i
= info
->first_altivec_reg_save
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
29983 if (info
->vrsave_mask
& ALTIVEC_REG_BIT (i
))
29985 rtx areg
, savereg
, mem
;
29986 HOST_WIDE_INT offset
;
29988 offset
= (info
->altivec_save_offset
+ frame_off
29989 + 16 * (i
- info
->first_altivec_reg_save
));
29991 savereg
= gen_rtx_REG (V4SImode
, i
);
29993 if (TARGET_P9_DFORM_VECTOR
&& quad_address_offset_p (offset
))
29995 mem
= gen_frame_mem (V4SImode
,
29996 gen_rtx_PLUS (Pmode
, frame_reg_rtx
,
29997 GEN_INT (offset
)));
29998 insn
= emit_insn (gen_rtx_SET (mem
, savereg
));
30004 areg
= gen_rtx_REG (Pmode
, 0);
30005 emit_move_insn (areg
, GEN_INT (offset
));
30007 /* AltiVec addressing mode is [reg+reg]. */
30008 mem
= gen_frame_mem (V4SImode
,
30009 gen_rtx_PLUS (Pmode
, frame_reg_rtx
, areg
));
30011 /* Rather than emitting a generic move, force use of the stvx
30012 instruction, which we always want on ISA 2.07 (power8) systems.
30013 In particular we don't want xxpermdi/stxvd2x for little
30015 insn
= emit_insn (gen_altivec_stvx_v4si_internal (mem
, savereg
));
30018 rs6000_frame_related (insn
, frame_reg_rtx
, sp_off
- frame_off
,
30019 areg
, GEN_INT (offset
));
30023 /* VRSAVE is a bit vector representing which AltiVec registers
30024 are used. The OS uses this to determine which vector
30025 registers to save on a context switch. We need to save
30026 VRSAVE on the stack frame, add whatever AltiVec registers we
30027 used in this function, and do the corresponding magic in the
30030 if (!WORLD_SAVE_P (info
)
30031 && info
->vrsave_size
!= 0)
30037 /* Get VRSAVE onto a GPR. Note that ABI_V4 and ABI_DARWIN might
30038 be using r12 as frame_reg_rtx and r11 as the static chain
30039 pointer for nested functions. */
30041 if ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
30042 && !using_static_chain_p
)
30044 else if (using_split_stack
|| REGNO (frame_reg_rtx
) == 12)
30047 if (using_static_chain_p
)
30051 NOT_INUSE (save_regno
);
30052 reg
= gen_rtx_REG (SImode
, save_regno
);
30053 vrsave
= gen_rtx_REG (SImode
, VRSAVE_REGNO
);
30055 emit_insn (gen_get_vrsave_internal (reg
));
30057 emit_insn (gen_rtx_SET (reg
, vrsave
));
30060 offset
= info
->vrsave_save_offset
+ frame_off
;
30061 insn
= emit_insn (gen_frame_store (reg
, frame_reg_rtx
, offset
));
30063 /* Include the registers in the mask. */
30064 emit_insn (gen_iorsi3 (reg
, reg
, GEN_INT ((int) info
->vrsave_mask
)));
30066 insn
= emit_insn (generate_set_vrsave (reg
, info
, 0));
30069 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
30070 if (!TARGET_SINGLE_PIC_BASE
30071 && ((TARGET_TOC
&& TARGET_MINIMAL_TOC
30072 && !constant_pool_empty_p ())
30073 || (DEFAULT_ABI
== ABI_V4
30074 && (flag_pic
== 1 || (flag_pic
&& TARGET_SECURE_PLT
))
30075 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM
))))
30077 /* If emit_load_toc_table will use the link register, we need to save
30078 it. We use R12 for this purpose because emit_load_toc_table
30079 can use register 0. This allows us to use a plain 'blr' to return
30080 from the procedure more often. */
30081 int save_LR_around_toc_setup
= (TARGET_ELF
30082 && DEFAULT_ABI
== ABI_V4
30084 && ! info
->lr_save_p
30085 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
) > 0);
30086 if (save_LR_around_toc_setup
)
30088 rtx lr
= gen_rtx_REG (Pmode
, LR_REGNO
);
30089 rtx tmp
= gen_rtx_REG (Pmode
, 12);
30092 insn
= emit_move_insn (tmp
, lr
);
30093 RTX_FRAME_RELATED_P (insn
) = 1;
30095 rs6000_emit_load_toc_table (TRUE
);
30097 insn
= emit_move_insn (lr
, tmp
);
30098 add_reg_note (insn
, REG_CFA_RESTORE
, lr
);
30099 RTX_FRAME_RELATED_P (insn
) = 1;
30102 rs6000_emit_load_toc_table (TRUE
);
30106 if (!TARGET_SINGLE_PIC_BASE
30107 && DEFAULT_ABI
== ABI_DARWIN
30108 && flag_pic
&& crtl
->uses_pic_offset_table
)
30110 rtx lr
= gen_rtx_REG (Pmode
, LR_REGNO
);
30111 rtx src
= gen_rtx_SYMBOL_REF (Pmode
, MACHOPIC_FUNCTION_BASE_NAME
);
30113 /* Save and restore LR locally around this call (in R0). */
30114 if (!info
->lr_save_p
)
30115 emit_move_insn (gen_rtx_REG (Pmode
, 0), lr
);
30117 emit_insn (gen_load_macho_picbase (src
));
30119 emit_move_insn (gen_rtx_REG (Pmode
,
30120 RS6000_PIC_OFFSET_TABLE_REGNUM
),
30123 if (!info
->lr_save_p
)
30124 emit_move_insn (lr
, gen_rtx_REG (Pmode
, 0));
30128 /* If we need to, save the TOC register after doing the stack setup.
30129 Do not emit eh frame info for this save. The unwinder wants info,
30130 conceptually attached to instructions in this function, about
30131 register values in the caller of this function. This R2 may have
30132 already been changed from the value in the caller.
30133 We don't attempt to write accurate DWARF EH frame info for R2
30134 because code emitted by gcc for a (non-pointer) function call
30135 doesn't save and restore R2. Instead, R2 is managed out-of-line
30136 by a linker generated plt call stub when the function resides in
30137 a shared library. This behavior is costly to describe in DWARF,
30138 both in terms of the size of DWARF info and the time taken in the
30139 unwinder to interpret it. R2 changes, apart from the
30140 calls_eh_return case earlier in this function, are handled by
30141 linux-unwind.h frob_update_context. */
30142 if (rs6000_save_toc_in_prologue_p ())
30144 rtx reg
= gen_rtx_REG (reg_mode
, TOC_REGNUM
);
30145 emit_insn (gen_frame_store (reg
, sp_reg_rtx
, RS6000_TOC_SAVE_SLOT
));
30148 if (using_split_stack
&& split_stack_arg_pointer_used_p ())
30150 /* Set up the arg pointer (r12) for -fsplit-stack code. If
30151 __morestack was called, it left the arg pointer to the old
30152 stack in r29. Otherwise, the arg pointer is the top of the
30154 cfun
->machine
->split_stack_argp_used
= true;
30157 rtx r12
= gen_rtx_REG (Pmode
, 12);
30158 rtx set_r12
= gen_rtx_SET (r12
, sp_reg_rtx
);
30159 emit_insn_before (set_r12
, sp_adjust
);
30161 else if (frame_off
!= 0 || REGNO (frame_reg_rtx
) != 12)
30163 rtx r12
= gen_rtx_REG (Pmode
, 12);
30164 if (frame_off
== 0)
30165 emit_move_insn (r12
, frame_reg_rtx
);
30167 emit_insn (gen_add3_insn (r12
, frame_reg_rtx
, GEN_INT (frame_off
)));
30171 rtx r12
= gen_rtx_REG (Pmode
, 12);
30172 rtx r29
= gen_rtx_REG (Pmode
, 29);
30173 rtx cr7
= gen_rtx_REG (CCUNSmode
, CR7_REGNO
);
30174 rtx not_more
= gen_label_rtx ();
30177 jump
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
30178 gen_rtx_GEU (VOIDmode
, cr7
, const0_rtx
),
30179 gen_rtx_LABEL_REF (VOIDmode
, not_more
),
30181 jump
= emit_jump_insn (gen_rtx_SET (pc_rtx
, jump
));
30182 JUMP_LABEL (jump
) = not_more
;
30183 LABEL_NUSES (not_more
) += 1;
30184 emit_move_insn (r12
, r29
);
30185 emit_label (not_more
);
30190 /* Output .extern statements for the save/restore routines we use. */
30193 rs6000_output_savres_externs (FILE *file
)
30195 rs6000_stack_t
*info
= rs6000_stack_info ();
30197 if (TARGET_DEBUG_STACK
)
30198 debug_stack_info (info
);
30200 /* Write .extern for any function we will call to save and restore
30202 if (info
->first_fp_reg_save
< 64
30207 int regno
= info
->first_fp_reg_save
- 32;
30209 if ((info
->savres_strategy
& SAVE_INLINE_FPRS
) == 0)
30211 bool lr
= (info
->savres_strategy
& SAVE_NOINLINE_FPRS_SAVES_LR
) != 0;
30212 int sel
= SAVRES_SAVE
| SAVRES_FPR
| (lr
? SAVRES_LR
: 0);
30213 name
= rs6000_savres_routine_name (info
, regno
, sel
);
30214 fprintf (file
, "\t.extern %s\n", name
);
30216 if ((info
->savres_strategy
& REST_INLINE_FPRS
) == 0)
30218 bool lr
= (info
->savres_strategy
30219 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR
) == 0;
30220 int sel
= SAVRES_FPR
| (lr
? SAVRES_LR
: 0);
30221 name
= rs6000_savres_routine_name (info
, regno
, sel
);
30222 fprintf (file
, "\t.extern %s\n", name
);
30227 /* Write function prologue. */
30230 rs6000_output_function_prologue (FILE *file
,
30231 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
30233 if (!cfun
->is_thunk
)
30234 rs6000_output_savres_externs (file
);
30236 /* ELFv2 ABI r2 setup code and local entry point. This must follow
30237 immediately after the global entry point label. */
30238 if (rs6000_global_entry_point_needed_p ())
30240 const char *name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
30242 (*targetm
.asm_out
.internal_label
) (file
, "LCF", rs6000_pic_labelno
);
30244 if (TARGET_CMODEL
!= CMODEL_LARGE
)
30246 /* In the small and medium code models, we assume the TOC is less
30247 2 GB away from the text section, so it can be computed via the
30248 following two-instruction sequence. */
30251 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
30252 fprintf (file
, "0:\taddis 2,12,.TOC.-");
30253 assemble_name (file
, buf
);
30254 fprintf (file
, "@ha\n");
30255 fprintf (file
, "\taddi 2,2,.TOC.-");
30256 assemble_name (file
, buf
);
30257 fprintf (file
, "@l\n");
30261 /* In the large code model, we allow arbitrary offsets between the
30262 TOC and the text section, so we have to load the offset from
30263 memory. The data field is emitted directly before the global
30264 entry point in rs6000_elf_declare_function_name. */
30267 #ifdef HAVE_AS_ENTRY_MARKERS
30268 /* If supported by the linker, emit a marker relocation. If the
30269 total code size of the final executable or shared library
30270 happens to fit into 2 GB after all, the linker will replace
30271 this code sequence with the sequence for the small or medium
30273 fprintf (file
, "\t.reloc .,R_PPC64_ENTRY\n");
30275 fprintf (file
, "\tld 2,");
30276 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCL", rs6000_pic_labelno
);
30277 assemble_name (file
, buf
);
30278 fprintf (file
, "-");
30279 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
30280 assemble_name (file
, buf
);
30281 fprintf (file
, "(12)\n");
30282 fprintf (file
, "\tadd 2,2,12\n");
30285 fputs ("\t.localentry\t", file
);
30286 assemble_name (file
, name
);
30287 fputs (",.-", file
);
30288 assemble_name (file
, name
);
30289 fputs ("\n", file
);
30292 /* Output -mprofile-kernel code. This needs to be done here instead of
30293 in output_function_profile since it must go after the ELFv2 ABI
30294 local entry point. */
30295 if (TARGET_PROFILE_KERNEL
&& crtl
->profile
)
30297 gcc_assert (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
);
30298 gcc_assert (!TARGET_32BIT
);
30300 asm_fprintf (file
, "\tmflr %s\n", reg_names
[0]);
30302 /* In the ELFv2 ABI we have no compiler stack word. It must be
30303 the resposibility of _mcount to preserve the static chain
30304 register if required. */
30305 if (DEFAULT_ABI
!= ABI_ELFv2
30306 && cfun
->static_chain_decl
!= NULL
)
30308 asm_fprintf (file
, "\tstd %s,24(%s)\n",
30309 reg_names
[STATIC_CHAIN_REGNUM
], reg_names
[1]);
30310 fprintf (file
, "\tbl %s\n", RS6000_MCOUNT
);
30311 asm_fprintf (file
, "\tld %s,24(%s)\n",
30312 reg_names
[STATIC_CHAIN_REGNUM
], reg_names
[1]);
30315 fprintf (file
, "\tbl %s\n", RS6000_MCOUNT
);
30318 rs6000_pic_labelno
++;
30321 /* -mprofile-kernel code calls mcount before the function prolog,
30322 so a profiled leaf function should stay a leaf function. */
30324 rs6000_keep_leaf_when_profiled ()
30326 return TARGET_PROFILE_KERNEL
;
30329 /* Non-zero if vmx regs are restored before the frame pop, zero if
30330 we restore after the pop when possible. */
30331 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
30333 /* Restoring cr is a two step process: loading a reg from the frame
30334 save, then moving the reg to cr. For ABI_V4 we must let the
30335 unwinder know that the stack location is no longer valid at or
30336 before the stack deallocation, but we can't emit a cfa_restore for
30337 cr at the stack deallocation like we do for other registers.
30338 The trouble is that it is possible for the move to cr to be
30339 scheduled after the stack deallocation. So say exactly where cr
30340 is located on each of the two insns. */
30343 load_cr_save (int regno
, rtx frame_reg_rtx
, int offset
, bool exit_func
)
30345 rtx mem
= gen_frame_mem_offset (SImode
, frame_reg_rtx
, offset
);
30346 rtx reg
= gen_rtx_REG (SImode
, regno
);
30347 rtx_insn
*insn
= emit_move_insn (reg
, mem
);
30349 if (!exit_func
&& DEFAULT_ABI
== ABI_V4
)
30351 rtx cr
= gen_rtx_REG (SImode
, CR2_REGNO
);
30352 rtx set
= gen_rtx_SET (reg
, cr
);
30354 add_reg_note (insn
, REG_CFA_REGISTER
, set
);
30355 RTX_FRAME_RELATED_P (insn
) = 1;
30360 /* Reload CR from REG. */
30363 restore_saved_cr (rtx reg
, int using_mfcr_multiple
, bool exit_func
)
30368 if (using_mfcr_multiple
)
30370 for (i
= 0; i
< 8; i
++)
30371 if (save_reg_p (CR0_REGNO
+ i
))
30373 gcc_assert (count
);
30376 if (using_mfcr_multiple
&& count
> 1)
30382 p
= rtvec_alloc (count
);
30385 for (i
= 0; i
< 8; i
++)
30386 if (save_reg_p (CR0_REGNO
+ i
))
30388 rtvec r
= rtvec_alloc (2);
30389 RTVEC_ELT (r
, 0) = reg
;
30390 RTVEC_ELT (r
, 1) = GEN_INT (1 << (7-i
));
30391 RTVEC_ELT (p
, ndx
) =
30392 gen_rtx_SET (gen_rtx_REG (CCmode
, CR0_REGNO
+ i
),
30393 gen_rtx_UNSPEC (CCmode
, r
, UNSPEC_MOVESI_TO_CR
));
30396 insn
= emit_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
30397 gcc_assert (ndx
== count
);
30399 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
30400 CR field separately. */
30401 if (!exit_func
&& DEFAULT_ABI
== ABI_ELFv2
&& flag_shrink_wrap
)
30403 for (i
= 0; i
< 8; i
++)
30404 if (save_reg_p (CR0_REGNO
+ i
))
30405 add_reg_note (insn
, REG_CFA_RESTORE
,
30406 gen_rtx_REG (SImode
, CR0_REGNO
+ i
));
30408 RTX_FRAME_RELATED_P (insn
) = 1;
30412 for (i
= 0; i
< 8; i
++)
30413 if (save_reg_p (CR0_REGNO
+ i
))
30415 rtx insn
= emit_insn (gen_movsi_to_cr_one
30416 (gen_rtx_REG (CCmode
, CR0_REGNO
+ i
), reg
));
30418 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
30419 CR field separately, attached to the insn that in fact
30420 restores this particular CR field. */
30421 if (!exit_func
&& DEFAULT_ABI
== ABI_ELFv2
&& flag_shrink_wrap
)
30423 add_reg_note (insn
, REG_CFA_RESTORE
,
30424 gen_rtx_REG (SImode
, CR0_REGNO
+ i
));
30426 RTX_FRAME_RELATED_P (insn
) = 1;
30430 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
30431 if (!exit_func
&& DEFAULT_ABI
!= ABI_ELFv2
30432 && (DEFAULT_ABI
== ABI_V4
|| flag_shrink_wrap
))
30434 rtx_insn
*insn
= get_last_insn ();
30435 rtx cr
= gen_rtx_REG (SImode
, CR2_REGNO
);
30437 add_reg_note (insn
, REG_CFA_RESTORE
, cr
);
30438 RTX_FRAME_RELATED_P (insn
) = 1;
30442 /* Like cr, the move to lr instruction can be scheduled after the
30443 stack deallocation, but unlike cr, its stack frame save is still
30444 valid. So we only need to emit the cfa_restore on the correct
30448 load_lr_save (int regno
, rtx frame_reg_rtx
, int offset
)
30450 rtx mem
= gen_frame_mem_offset (Pmode
, frame_reg_rtx
, offset
);
30451 rtx reg
= gen_rtx_REG (Pmode
, regno
);
30453 emit_move_insn (reg
, mem
);
30457 restore_saved_lr (int regno
, bool exit_func
)
30459 rtx reg
= gen_rtx_REG (Pmode
, regno
);
30460 rtx lr
= gen_rtx_REG (Pmode
, LR_REGNO
);
30461 rtx_insn
*insn
= emit_move_insn (lr
, reg
);
30463 if (!exit_func
&& flag_shrink_wrap
)
30465 add_reg_note (insn
, REG_CFA_RESTORE
, lr
);
30466 RTX_FRAME_RELATED_P (insn
) = 1;
30471 add_crlr_cfa_restore (const rs6000_stack_t
*info
, rtx cfa_restores
)
30473 if (DEFAULT_ABI
== ABI_ELFv2
)
30476 for (i
= 0; i
< 8; i
++)
30477 if (save_reg_p (CR0_REGNO
+ i
))
30479 rtx cr
= gen_rtx_REG (SImode
, CR0_REGNO
+ i
);
30480 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
, cr
,
30484 else if (info
->cr_save_p
)
30485 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
,
30486 gen_rtx_REG (SImode
, CR2_REGNO
),
30489 if (info
->lr_save_p
)
30490 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
,
30491 gen_rtx_REG (Pmode
, LR_REGNO
),
30493 return cfa_restores
;
30496 /* Return true if OFFSET from stack pointer can be clobbered by signals.
30497 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
30498 below stack pointer not cloberred by signals. */
30501 offset_below_red_zone_p (HOST_WIDE_INT offset
)
30503 return offset
< (DEFAULT_ABI
== ABI_V4
30505 : TARGET_32BIT
? -220 : -288);
30508 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
30511 emit_cfa_restores (rtx cfa_restores
)
30513 rtx_insn
*insn
= get_last_insn ();
30514 rtx
*loc
= ®_NOTES (insn
);
30517 loc
= &XEXP (*loc
, 1);
30518 *loc
= cfa_restores
;
30519 RTX_FRAME_RELATED_P (insn
) = 1;
30522 /* Emit function epilogue as insns. */
30525 rs6000_emit_epilogue (int sibcall
)
30527 rs6000_stack_t
*info
;
30528 int restoring_GPRs_inline
;
30529 int restoring_FPRs_inline
;
30530 int using_load_multiple
;
30531 int using_mtcr_multiple
;
30532 int use_backchain_to_restore_sp
;
30535 HOST_WIDE_INT frame_off
= 0;
30536 rtx sp_reg_rtx
= gen_rtx_REG (Pmode
, 1);
30537 rtx frame_reg_rtx
= sp_reg_rtx
;
30538 rtx cfa_restores
= NULL_RTX
;
30540 rtx cr_save_reg
= NULL_RTX
;
30541 machine_mode reg_mode
= Pmode
;
30542 int reg_size
= TARGET_32BIT
? 4 : 8;
30545 unsigned ptr_regno
;
30547 info
= rs6000_stack_info ();
30549 if (TARGET_SPE_ABI
&& info
->spe_64bit_regs_used
!= 0)
30551 reg_mode
= V2SImode
;
30555 strategy
= info
->savres_strategy
;
30556 using_load_multiple
= strategy
& REST_MULTIPLE
;
30557 restoring_FPRs_inline
= sibcall
|| (strategy
& REST_INLINE_FPRS
);
30558 restoring_GPRs_inline
= sibcall
|| (strategy
& REST_INLINE_GPRS
);
30559 using_mtcr_multiple
= (rs6000_cpu
== PROCESSOR_PPC601
30560 || rs6000_cpu
== PROCESSOR_PPC603
30561 || rs6000_cpu
== PROCESSOR_PPC750
30563 /* Restore via the backchain when we have a large frame, since this
30564 is more efficient than an addis, addi pair. The second condition
30565 here will not trigger at the moment; We don't actually need a
30566 frame pointer for alloca, but the generic parts of the compiler
30567 give us one anyway. */
30568 use_backchain_to_restore_sp
= (info
->total_size
+ (info
->lr_save_p
30569 ? info
->lr_save_offset
30571 || (cfun
->calls_alloca
30572 && !frame_pointer_needed
));
30573 restore_lr
= (info
->lr_save_p
30574 && (restoring_FPRs_inline
30575 || (strategy
& REST_NOINLINE_FPRS_DOESNT_RESTORE_LR
))
30576 && (restoring_GPRs_inline
30577 || info
->first_fp_reg_save
< 64)
30578 && !cfun
->machine
->lr_is_wrapped_separately
);
30581 if (WORLD_SAVE_P (info
))
30585 const char *alloc_rname
;
30588 /* eh_rest_world_r10 will return to the location saved in the LR
30589 stack slot (which is not likely to be our caller.)
30590 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
30591 rest_world is similar, except any R10 parameter is ignored.
30592 The exception-handling stuff that was here in 2.95 is no
30593 longer necessary. */
30596 + 32 - info
->first_gp_reg_save
30597 + LAST_ALTIVEC_REGNO
+ 1 - info
->first_altivec_reg_save
30598 + 63 + 1 - info
->first_fp_reg_save
);
30600 strcpy (rname
, ((crtl
->calls_eh_return
) ?
30601 "*eh_rest_world_r10" : "*rest_world"));
30602 alloc_rname
= ggc_strdup (rname
);
30605 RTVEC_ELT (p
, j
++) = ret_rtx
;
30607 = gen_rtx_USE (VOIDmode
, gen_rtx_SYMBOL_REF (Pmode
, alloc_rname
));
30608 /* The instruction pattern requires a clobber here;
30609 it is shared with the restVEC helper. */
30611 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (Pmode
, 11));
30614 /* CR register traditionally saved as CR2. */
30615 rtx reg
= gen_rtx_REG (SImode
, CR2_REGNO
);
30617 = gen_frame_load (reg
, frame_reg_rtx
, info
->cr_save_offset
);
30618 if (flag_shrink_wrap
)
30620 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
,
30621 gen_rtx_REG (Pmode
, LR_REGNO
),
30623 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
, reg
, cfa_restores
);
30627 for (i
= 0; i
< 32 - info
->first_gp_reg_save
; i
++)
30629 rtx reg
= gen_rtx_REG (reg_mode
, info
->first_gp_reg_save
+ i
);
30631 = gen_frame_load (reg
,
30632 frame_reg_rtx
, info
->gp_save_offset
+ reg_size
* i
);
30633 if (flag_shrink_wrap
)
30634 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
, reg
, cfa_restores
);
30636 for (i
= 0; info
->first_altivec_reg_save
+ i
<= LAST_ALTIVEC_REGNO
; i
++)
30638 rtx reg
= gen_rtx_REG (V4SImode
, info
->first_altivec_reg_save
+ i
);
30640 = gen_frame_load (reg
,
30641 frame_reg_rtx
, info
->altivec_save_offset
+ 16 * i
);
30642 if (flag_shrink_wrap
)
30643 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
, reg
, cfa_restores
);
30645 for (i
= 0; info
->first_fp_reg_save
+ i
<= 63; i
++)
30647 rtx reg
= gen_rtx_REG ((TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
30648 ? DFmode
: SFmode
),
30649 info
->first_fp_reg_save
+ i
);
30651 = gen_frame_load (reg
, frame_reg_rtx
, info
->fp_save_offset
+ 8 * i
);
30652 if (flag_shrink_wrap
)
30653 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
, reg
, cfa_restores
);
30656 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (Pmode
, 0));
30658 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (SImode
, 12));
30660 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (SImode
, 7));
30662 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (SImode
, 8));
30664 = gen_rtx_USE (VOIDmode
, gen_rtx_REG (SImode
, 10));
30665 insn
= emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
30667 if (flag_shrink_wrap
)
30669 REG_NOTES (insn
) = cfa_restores
;
30670 add_reg_note (insn
, REG_CFA_DEF_CFA
, sp_reg_rtx
);
30671 RTX_FRAME_RELATED_P (insn
) = 1;
30676 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
30678 frame_off
= info
->total_size
;
30680 /* Restore AltiVec registers if we must do so before adjusting the
30682 if (info
->altivec_size
!= 0
30683 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
30684 || (DEFAULT_ABI
!= ABI_V4
30685 && offset_below_red_zone_p (info
->altivec_save_offset
))))
30688 int scratch_regno
= ptr_regno_for_savres (SAVRES_VR
);
30690 gcc_checking_assert (scratch_regno
== 11 || scratch_regno
== 12);
30691 if (use_backchain_to_restore_sp
)
30693 int frame_regno
= 11;
30695 if ((strategy
& REST_INLINE_VRS
) == 0)
30697 /* Of r11 and r12, select the one not clobbered by an
30698 out-of-line restore function for the frame register. */
30699 frame_regno
= 11 + 12 - scratch_regno
;
30701 frame_reg_rtx
= gen_rtx_REG (Pmode
, frame_regno
);
30702 emit_move_insn (frame_reg_rtx
,
30703 gen_rtx_MEM (Pmode
, sp_reg_rtx
));
30706 else if (frame_pointer_needed
)
30707 frame_reg_rtx
= hard_frame_pointer_rtx
;
30709 if ((strategy
& REST_INLINE_VRS
) == 0)
30711 int end_save
= info
->altivec_save_offset
+ info
->altivec_size
;
30713 rtx ptr_reg
= gen_rtx_REG (Pmode
, 0);
30714 rtx scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
30716 if (end_save
+ frame_off
!= 0)
30718 rtx offset
= GEN_INT (end_save
+ frame_off
);
30720 emit_insn (gen_add3_insn (ptr_reg
, frame_reg_rtx
, offset
));
30723 emit_move_insn (ptr_reg
, frame_reg_rtx
);
30725 ptr_off
= -end_save
;
30726 insn
= rs6000_emit_savres_rtx (info
, scratch_reg
,
30727 info
->altivec_save_offset
+ ptr_off
,
30728 0, V4SImode
, SAVRES_VR
);
30732 for (i
= info
->first_altivec_reg_save
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
30733 if (info
->vrsave_mask
& ALTIVEC_REG_BIT (i
))
30735 rtx addr
, areg
, mem
, insn
;
30736 rtx reg
= gen_rtx_REG (V4SImode
, i
);
30737 HOST_WIDE_INT offset
30738 = (info
->altivec_save_offset
+ frame_off
30739 + 16 * (i
- info
->first_altivec_reg_save
));
30741 if (TARGET_P9_DFORM_VECTOR
&& quad_address_offset_p (offset
))
30743 mem
= gen_frame_mem (V4SImode
,
30744 gen_rtx_PLUS (Pmode
, frame_reg_rtx
,
30745 GEN_INT (offset
)));
30746 insn
= gen_rtx_SET (reg
, mem
);
30750 areg
= gen_rtx_REG (Pmode
, 0);
30751 emit_move_insn (areg
, GEN_INT (offset
));
30753 /* AltiVec addressing mode is [reg+reg]. */
30754 addr
= gen_rtx_PLUS (Pmode
, frame_reg_rtx
, areg
);
30755 mem
= gen_frame_mem (V4SImode
, addr
);
30757 /* Rather than emitting a generic move, force use of the
30758 lvx instruction, which we always want. In particular we
30759 don't want lxvd2x/xxpermdi for little endian. */
30760 insn
= gen_altivec_lvx_v4si_internal (reg
, mem
);
30763 (void) emit_insn (insn
);
30767 for (i
= info
->first_altivec_reg_save
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
30768 if (((strategy
& REST_INLINE_VRS
) == 0
30769 || (info
->vrsave_mask
& ALTIVEC_REG_BIT (i
)) != 0)
30770 && (flag_shrink_wrap
30771 || (offset_below_red_zone_p
30772 (info
->altivec_save_offset
30773 + 16 * (i
- info
->first_altivec_reg_save
)))))
30775 rtx reg
= gen_rtx_REG (V4SImode
, i
);
30776 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
, reg
, cfa_restores
);
30780 /* Restore VRSAVE if we must do so before adjusting the stack. */
30781 if (info
->vrsave_size
!= 0
30782 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
30783 || (DEFAULT_ABI
!= ABI_V4
30784 && offset_below_red_zone_p (info
->vrsave_save_offset
))))
30788 if (frame_reg_rtx
== sp_reg_rtx
)
30790 if (use_backchain_to_restore_sp
)
30792 frame_reg_rtx
= gen_rtx_REG (Pmode
, 11);
30793 emit_move_insn (frame_reg_rtx
,
30794 gen_rtx_MEM (Pmode
, sp_reg_rtx
));
30797 else if (frame_pointer_needed
)
30798 frame_reg_rtx
= hard_frame_pointer_rtx
;
30801 reg
= gen_rtx_REG (SImode
, 12);
30802 emit_insn (gen_frame_load (reg
, frame_reg_rtx
,
30803 info
->vrsave_save_offset
+ frame_off
));
30805 emit_insn (generate_set_vrsave (reg
, info
, 1));
30809 /* If we have a large stack frame, restore the old stack pointer
30810 using the backchain. */
30811 if (use_backchain_to_restore_sp
)
30813 if (frame_reg_rtx
== sp_reg_rtx
)
30815 /* Under V.4, don't reset the stack pointer until after we're done
30816 loading the saved registers. */
30817 if (DEFAULT_ABI
== ABI_V4
)
30818 frame_reg_rtx
= gen_rtx_REG (Pmode
, 11);
30820 insn
= emit_move_insn (frame_reg_rtx
,
30821 gen_rtx_MEM (Pmode
, sp_reg_rtx
));
30824 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
30825 && DEFAULT_ABI
== ABI_V4
)
30826 /* frame_reg_rtx has been set up by the altivec restore. */
30830 insn
= emit_move_insn (sp_reg_rtx
, frame_reg_rtx
);
30831 frame_reg_rtx
= sp_reg_rtx
;
30834 /* If we have a frame pointer, we can restore the old stack pointer
30836 else if (frame_pointer_needed
)
30838 frame_reg_rtx
= sp_reg_rtx
;
30839 if (DEFAULT_ABI
== ABI_V4
)
30840 frame_reg_rtx
= gen_rtx_REG (Pmode
, 11);
30841 /* Prevent reordering memory accesses against stack pointer restore. */
30842 else if (cfun
->calls_alloca
30843 || offset_below_red_zone_p (-info
->total_size
))
30844 rs6000_emit_stack_tie (frame_reg_rtx
, true);
30846 insn
= emit_insn (gen_add3_insn (frame_reg_rtx
, hard_frame_pointer_rtx
,
30847 GEN_INT (info
->total_size
)));
30850 else if (info
->push_p
30851 && DEFAULT_ABI
!= ABI_V4
30852 && !crtl
->calls_eh_return
)
30854 /* Prevent reordering memory accesses against stack pointer restore. */
30855 if (cfun
->calls_alloca
30856 || offset_below_red_zone_p (-info
->total_size
))
30857 rs6000_emit_stack_tie (frame_reg_rtx
, false);
30858 insn
= emit_insn (gen_add3_insn (sp_reg_rtx
, sp_reg_rtx
,
30859 GEN_INT (info
->total_size
)));
30862 if (insn
&& frame_reg_rtx
== sp_reg_rtx
)
30866 REG_NOTES (insn
) = cfa_restores
;
30867 cfa_restores
= NULL_RTX
;
30869 add_reg_note (insn
, REG_CFA_DEF_CFA
, sp_reg_rtx
);
30870 RTX_FRAME_RELATED_P (insn
) = 1;
30873 /* Restore AltiVec registers if we have not done so already. */
30874 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
30875 && info
->altivec_size
!= 0
30876 && (DEFAULT_ABI
== ABI_V4
30877 || !offset_below_red_zone_p (info
->altivec_save_offset
)))
30881 if ((strategy
& REST_INLINE_VRS
) == 0)
30883 int end_save
= info
->altivec_save_offset
+ info
->altivec_size
;
30885 rtx ptr_reg
= gen_rtx_REG (Pmode
, 0);
30886 int scratch_regno
= ptr_regno_for_savres (SAVRES_VR
);
30887 rtx scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
30889 if (end_save
+ frame_off
!= 0)
30891 rtx offset
= GEN_INT (end_save
+ frame_off
);
30893 emit_insn (gen_add3_insn (ptr_reg
, frame_reg_rtx
, offset
));
30896 emit_move_insn (ptr_reg
, frame_reg_rtx
);
30898 ptr_off
= -end_save
;
30899 insn
= rs6000_emit_savres_rtx (info
, scratch_reg
,
30900 info
->altivec_save_offset
+ ptr_off
,
30901 0, V4SImode
, SAVRES_VR
);
30902 if (REGNO (frame_reg_rtx
) == REGNO (scratch_reg
))
30904 /* Frame reg was clobbered by out-of-line save. Restore it
30905 from ptr_reg, and if we are calling out-of-line gpr or
30906 fpr restore set up the correct pointer and offset. */
30907 unsigned newptr_regno
= 1;
30908 if (!restoring_GPRs_inline
)
30910 bool lr
= info
->gp_save_offset
+ info
->gp_size
== 0;
30911 int sel
= SAVRES_GPR
| (lr
? SAVRES_LR
: 0);
30912 newptr_regno
= ptr_regno_for_savres (sel
);
30913 end_save
= info
->gp_save_offset
+ info
->gp_size
;
30915 else if (!restoring_FPRs_inline
)
30917 bool lr
= !(strategy
& REST_NOINLINE_FPRS_DOESNT_RESTORE_LR
);
30918 int sel
= SAVRES_FPR
| (lr
? SAVRES_LR
: 0);
30919 newptr_regno
= ptr_regno_for_savres (sel
);
30920 end_save
= info
->fp_save_offset
+ info
->fp_size
;
30923 if (newptr_regno
!= 1 && REGNO (frame_reg_rtx
) != newptr_regno
)
30924 frame_reg_rtx
= gen_rtx_REG (Pmode
, newptr_regno
);
30926 if (end_save
+ ptr_off
!= 0)
30928 rtx offset
= GEN_INT (end_save
+ ptr_off
);
30930 frame_off
= -end_save
;
30932 emit_insn (gen_addsi3_carry (frame_reg_rtx
,
30935 emit_insn (gen_adddi3_carry (frame_reg_rtx
,
30940 frame_off
= ptr_off
;
30941 emit_move_insn (frame_reg_rtx
, ptr_reg
);
30947 for (i
= info
->first_altivec_reg_save
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
30948 if (info
->vrsave_mask
& ALTIVEC_REG_BIT (i
))
30950 rtx addr
, areg
, mem
, insn
;
30951 rtx reg
= gen_rtx_REG (V4SImode
, i
);
30952 HOST_WIDE_INT offset
30953 = (info
->altivec_save_offset
+ frame_off
30954 + 16 * (i
- info
->first_altivec_reg_save
));
30956 if (TARGET_P9_DFORM_VECTOR
&& quad_address_offset_p (offset
))
30958 mem
= gen_frame_mem (V4SImode
,
30959 gen_rtx_PLUS (Pmode
, frame_reg_rtx
,
30960 GEN_INT (offset
)));
30961 insn
= gen_rtx_SET (reg
, mem
);
30965 areg
= gen_rtx_REG (Pmode
, 0);
30966 emit_move_insn (areg
, GEN_INT (offset
));
30968 /* AltiVec addressing mode is [reg+reg]. */
30969 addr
= gen_rtx_PLUS (Pmode
, frame_reg_rtx
, areg
);
30970 mem
= gen_frame_mem (V4SImode
, addr
);
30972 /* Rather than emitting a generic move, force use of the
30973 lvx instruction, which we always want. In particular we
30974 don't want lxvd2x/xxpermdi for little endian. */
30975 insn
= gen_altivec_lvx_v4si_internal (reg
, mem
);
30978 (void) emit_insn (insn
);
30982 for (i
= info
->first_altivec_reg_save
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
30983 if (((strategy
& REST_INLINE_VRS
) == 0
30984 || (info
->vrsave_mask
& ALTIVEC_REG_BIT (i
)) != 0)
30985 && (DEFAULT_ABI
== ABI_V4
|| flag_shrink_wrap
))
30987 rtx reg
= gen_rtx_REG (V4SImode
, i
);
30988 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
, reg
, cfa_restores
);
30992 /* Restore VRSAVE if we have not done so already. */
30993 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
30994 && info
->vrsave_size
!= 0
30995 && (DEFAULT_ABI
== ABI_V4
30996 || !offset_below_red_zone_p (info
->vrsave_save_offset
)))
31000 reg
= gen_rtx_REG (SImode
, 12);
31001 emit_insn (gen_frame_load (reg
, frame_reg_rtx
,
31002 info
->vrsave_save_offset
+ frame_off
));
31004 emit_insn (generate_set_vrsave (reg
, info
, 1));
31007 /* If we exit by an out-of-line restore function on ABI_V4 then that
31008 function will deallocate the stack, so we don't need to worry
31009 about the unwinder restoring cr from an invalid stack frame
31011 exit_func
= (!restoring_FPRs_inline
31012 || (!restoring_GPRs_inline
31013 && info
->first_fp_reg_save
== 64));
31015 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
31016 *separate* slots if the routine calls __builtin_eh_return, so
31017 that they can be independently restored by the unwinder. */
31018 if (DEFAULT_ABI
== ABI_ELFv2
&& crtl
->calls_eh_return
)
31020 int i
, cr_off
= info
->ehcr_offset
;
31022 for (i
= 0; i
< 8; i
++)
31023 if (!call_used_regs
[CR0_REGNO
+ i
])
31025 rtx reg
= gen_rtx_REG (SImode
, 0);
31026 emit_insn (gen_frame_load (reg
, frame_reg_rtx
,
31027 cr_off
+ frame_off
));
31029 insn
= emit_insn (gen_movsi_to_cr_one
31030 (gen_rtx_REG (CCmode
, CR0_REGNO
+ i
), reg
));
31032 if (!exit_func
&& flag_shrink_wrap
)
31034 add_reg_note (insn
, REG_CFA_RESTORE
,
31035 gen_rtx_REG (SImode
, CR0_REGNO
+ i
));
31037 RTX_FRAME_RELATED_P (insn
) = 1;
31040 cr_off
+= reg_size
;
31044 /* Get the old lr if we saved it. If we are restoring registers
31045 out-of-line, then the out-of-line routines can do this for us. */
31046 if (restore_lr
&& restoring_GPRs_inline
)
31047 load_lr_save (0, frame_reg_rtx
, info
->lr_save_offset
+ frame_off
);
31049 /* Get the old cr if we saved it. */
31050 if (info
->cr_save_p
)
31052 unsigned cr_save_regno
= 12;
31054 if (!restoring_GPRs_inline
)
31056 /* Ensure we don't use the register used by the out-of-line
31057 gpr register restore below. */
31058 bool lr
= info
->gp_save_offset
+ info
->gp_size
== 0;
31059 int sel
= SAVRES_GPR
| (lr
? SAVRES_LR
: 0);
31060 int gpr_ptr_regno
= ptr_regno_for_savres (sel
);
31062 if (gpr_ptr_regno
== 12)
31063 cr_save_regno
= 11;
31064 gcc_checking_assert (REGNO (frame_reg_rtx
) != cr_save_regno
);
31066 else if (REGNO (frame_reg_rtx
) == 12)
31067 cr_save_regno
= 11;
31069 cr_save_reg
= load_cr_save (cr_save_regno
, frame_reg_rtx
,
31070 info
->cr_save_offset
+ frame_off
,
31074 /* Set LR here to try to overlap restores below. */
31075 if (restore_lr
&& restoring_GPRs_inline
)
31076 restore_saved_lr (0, exit_func
);
31078 /* Load exception handler data registers, if needed. */
31079 if (crtl
->calls_eh_return
)
31081 unsigned int i
, regno
;
31085 rtx reg
= gen_rtx_REG (reg_mode
, 2);
31086 emit_insn (gen_frame_load (reg
, frame_reg_rtx
,
31087 frame_off
+ RS6000_TOC_SAVE_SLOT
));
31094 regno
= EH_RETURN_DATA_REGNO (i
);
31095 if (regno
== INVALID_REGNUM
)
31098 /* Note: possible use of r0 here to address SPE regs. */
31099 mem
= gen_frame_mem_offset (reg_mode
, frame_reg_rtx
,
31100 info
->ehrd_offset
+ frame_off
31101 + reg_size
* (int) i
);
31103 emit_move_insn (gen_rtx_REG (reg_mode
, regno
), mem
);
31107 /* Restore GPRs. This is done as a PARALLEL if we are using
31108 the load-multiple instructions. */
31110 && info
->spe_64bit_regs_used
31111 && info
->first_gp_reg_save
!= 32)
31113 /* Determine whether we can address all of the registers that need
31114 to be saved with an offset from frame_reg_rtx that fits in
31115 the small const field for SPE memory instructions. */
31116 int spe_regs_addressable
31117 = (SPE_CONST_OFFSET_OK (info
->spe_gp_save_offset
+ frame_off
31118 + reg_size
* (32 - info
->first_gp_reg_save
- 1))
31119 && restoring_GPRs_inline
);
31121 if (!spe_regs_addressable
)
31123 int ool_adjust
= 0;
31124 rtx old_frame_reg_rtx
= frame_reg_rtx
;
31125 /* Make r11 point to the start of the SPE save area. We worried about
31126 not clobbering it when we were saving registers in the prologue.
31127 There's no need to worry here because the static chain is passed
31128 anew to every function. */
31130 if (!restoring_GPRs_inline
)
31131 ool_adjust
= 8 * (info
->first_gp_reg_save
- FIRST_SAVED_GP_REGNO
);
31132 frame_reg_rtx
= gen_rtx_REG (Pmode
, 11);
31133 emit_insn (gen_addsi3 (frame_reg_rtx
, old_frame_reg_rtx
,
31134 GEN_INT (info
->spe_gp_save_offset
31137 /* Keep the invariant that frame_reg_rtx + frame_off points
31138 at the top of the stack frame. */
31139 frame_off
= -info
->spe_gp_save_offset
+ ool_adjust
;
31142 if (restoring_GPRs_inline
)
31144 HOST_WIDE_INT spe_offset
= info
->spe_gp_save_offset
+ frame_off
;
31146 for (i
= 0; i
< 32 - info
->first_gp_reg_save
; i
++)
31147 if (rs6000_reg_live_or_pic_offset_p (info
->first_gp_reg_save
+ i
))
31149 rtx offset
, addr
, mem
, reg
;
31151 /* We're doing all this to ensure that the immediate offset
31152 fits into the immediate field of 'evldd'. */
31153 gcc_assert (SPE_CONST_OFFSET_OK (spe_offset
+ reg_size
* i
));
31155 offset
= GEN_INT (spe_offset
+ reg_size
* i
);
31156 addr
= gen_rtx_PLUS (Pmode
, frame_reg_rtx
, offset
);
31157 mem
= gen_rtx_MEM (V2SImode
, addr
);
31158 reg
= gen_rtx_REG (reg_mode
, info
->first_gp_reg_save
+ i
);
31160 emit_move_insn (reg
, mem
);
31164 rs6000_emit_savres_rtx (info
, frame_reg_rtx
,
31165 info
->spe_gp_save_offset
+ frame_off
,
31166 info
->lr_save_offset
+ frame_off
,
31168 SAVRES_GPR
| SAVRES_LR
);
31170 else if (!restoring_GPRs_inline
)
31172 /* We are jumping to an out-of-line function. */
31174 int end_save
= info
->gp_save_offset
+ info
->gp_size
;
31175 bool can_use_exit
= end_save
== 0;
31176 int sel
= SAVRES_GPR
| (can_use_exit
? SAVRES_LR
: 0);
31179 /* Emit stack reset code if we need it. */
31180 ptr_regno
= ptr_regno_for_savres (sel
);
31181 ptr_reg
= gen_rtx_REG (Pmode
, ptr_regno
);
31183 rs6000_emit_stack_reset (info
, frame_reg_rtx
, frame_off
, ptr_regno
);
31184 else if (end_save
+ frame_off
!= 0)
31185 emit_insn (gen_add3_insn (ptr_reg
, frame_reg_rtx
,
31186 GEN_INT (end_save
+ frame_off
)));
31187 else if (REGNO (frame_reg_rtx
) != ptr_regno
)
31188 emit_move_insn (ptr_reg
, frame_reg_rtx
);
31189 if (REGNO (frame_reg_rtx
) == ptr_regno
)
31190 frame_off
= -end_save
;
31192 if (can_use_exit
&& info
->cr_save_p
)
31193 restore_saved_cr (cr_save_reg
, using_mtcr_multiple
, true);
31195 ptr_off
= -end_save
;
31196 rs6000_emit_savres_rtx (info
, ptr_reg
,
31197 info
->gp_save_offset
+ ptr_off
,
31198 info
->lr_save_offset
+ ptr_off
,
31201 else if (using_load_multiple
)
31204 p
= rtvec_alloc (32 - info
->first_gp_reg_save
);
31205 for (i
= 0; i
< 32 - info
->first_gp_reg_save
; i
++)
31207 = gen_frame_load (gen_rtx_REG (reg_mode
, info
->first_gp_reg_save
+ i
),
31209 info
->gp_save_offset
+ frame_off
+ reg_size
* i
);
31210 emit_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
31214 int offset
= info
->gp_save_offset
+ frame_off
;
31215 for (i
= info
->first_gp_reg_save
; i
< 32; i
++)
31217 if (rs6000_reg_live_or_pic_offset_p (i
)
31218 && !cfun
->machine
->gpr_is_wrapped_separately
[i
])
31220 rtx reg
= gen_rtx_REG (reg_mode
, i
);
31221 emit_insn (gen_frame_load (reg
, frame_reg_rtx
, offset
));
31224 offset
+= reg_size
;
31228 if (DEFAULT_ABI
== ABI_V4
|| flag_shrink_wrap
)
31230 /* If the frame pointer was used then we can't delay emitting
31231 a REG_CFA_DEF_CFA note. This must happen on the insn that
31232 restores the frame pointer, r31. We may have already emitted
31233 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
31234 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
31235 be harmless if emitted. */
31236 if (frame_pointer_needed
)
31238 insn
= get_last_insn ();
31239 add_reg_note (insn
, REG_CFA_DEF_CFA
,
31240 plus_constant (Pmode
, frame_reg_rtx
, frame_off
));
31241 RTX_FRAME_RELATED_P (insn
) = 1;
31244 /* Set up cfa_restores. We always need these when
31245 shrink-wrapping. If not shrink-wrapping then we only need
31246 the cfa_restore when the stack location is no longer valid.
31247 The cfa_restores must be emitted on or before the insn that
31248 invalidates the stack, and of course must not be emitted
31249 before the insn that actually does the restore. The latter
31250 is why it is a bad idea to emit the cfa_restores as a group
31251 on the last instruction here that actually does a restore:
31252 That insn may be reordered with respect to others doing
31254 if (flag_shrink_wrap
31255 && !restoring_GPRs_inline
31256 && info
->first_fp_reg_save
== 64)
31257 cfa_restores
= add_crlr_cfa_restore (info
, cfa_restores
);
31259 for (i
= info
->first_gp_reg_save
; i
< 32; i
++)
31260 if (!restoring_GPRs_inline
31261 || using_load_multiple
31262 || rs6000_reg_live_or_pic_offset_p (i
))
31264 if (cfun
->machine
->gpr_is_wrapped_separately
[i
])
31267 rtx reg
= gen_rtx_REG (reg_mode
, i
);
31268 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
, reg
, cfa_restores
);
31272 if (!restoring_GPRs_inline
31273 && info
->first_fp_reg_save
== 64)
31275 /* We are jumping to an out-of-line function. */
31277 emit_cfa_restores (cfa_restores
);
31281 if (restore_lr
&& !restoring_GPRs_inline
)
31283 load_lr_save (0, frame_reg_rtx
, info
->lr_save_offset
+ frame_off
);
31284 restore_saved_lr (0, exit_func
);
31287 /* Restore fpr's if we need to do it without calling a function. */
31288 if (restoring_FPRs_inline
)
31289 for (i
= 0; i
< 64 - info
->first_fp_reg_save
; i
++)
31290 if (save_reg_p (info
->first_fp_reg_save
+ i
))
31292 rtx reg
= gen_rtx_REG ((TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
31293 ? DFmode
: SFmode
),
31294 info
->first_fp_reg_save
+ i
);
31295 emit_insn (gen_frame_load (reg
, frame_reg_rtx
,
31296 info
->fp_save_offset
+ frame_off
+ 8 * i
));
31297 if (DEFAULT_ABI
== ABI_V4
|| flag_shrink_wrap
)
31298 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
, reg
, cfa_restores
);
31301 /* If we saved cr, restore it here. Just those that were used. */
31302 if (info
->cr_save_p
)
31303 restore_saved_cr (cr_save_reg
, using_mtcr_multiple
, exit_func
);
31305 /* If this is V.4, unwind the stack pointer after all of the loads
31306 have been done, or set up r11 if we are restoring fp out of line. */
31308 if (!restoring_FPRs_inline
)
31310 bool lr
= (strategy
& REST_NOINLINE_FPRS_DOESNT_RESTORE_LR
) == 0;
31311 int sel
= SAVRES_FPR
| (lr
? SAVRES_LR
: 0);
31312 ptr_regno
= ptr_regno_for_savres (sel
);
31315 insn
= rs6000_emit_stack_reset (info
, frame_reg_rtx
, frame_off
, ptr_regno
);
31316 if (REGNO (frame_reg_rtx
) == ptr_regno
)
31319 if (insn
&& restoring_FPRs_inline
)
31323 REG_NOTES (insn
) = cfa_restores
;
31324 cfa_restores
= NULL_RTX
;
31326 add_reg_note (insn
, REG_CFA_DEF_CFA
, sp_reg_rtx
);
31327 RTX_FRAME_RELATED_P (insn
) = 1;
31330 if (crtl
->calls_eh_return
)
31332 rtx sa
= EH_RETURN_STACKADJ_RTX
;
31333 emit_insn (gen_add3_insn (sp_reg_rtx
, sp_reg_rtx
, sa
));
31336 if (!sibcall
&& restoring_FPRs_inline
)
31340 /* We can't hang the cfa_restores off a simple return,
31341 since the shrink-wrap code sometimes uses an existing
31342 return. This means there might be a path from
31343 pre-prologue code to this return, and dwarf2cfi code
31344 wants the eh_frame unwinder state to be the same on
31345 all paths to any point. So we need to emit the
31346 cfa_restores before the return. For -m64 we really
31347 don't need epilogue cfa_restores at all, except for
31348 this irritating dwarf2cfi with shrink-wrap
31349 requirement; The stack red-zone means eh_frame info
31350 from the prologue telling the unwinder to restore
31351 from the stack is perfectly good right to the end of
31353 emit_insn (gen_blockage ());
31354 emit_cfa_restores (cfa_restores
);
31355 cfa_restores
= NULL_RTX
;
31358 emit_jump_insn (targetm
.gen_simple_return ());
31361 if (!sibcall
&& !restoring_FPRs_inline
)
31363 bool lr
= (strategy
& REST_NOINLINE_FPRS_DOESNT_RESTORE_LR
) == 0;
31364 rtvec p
= rtvec_alloc (3 + !!lr
+ 64 - info
->first_fp_reg_save
);
31366 RTVEC_ELT (p
, elt
++) = ret_rtx
;
31368 RTVEC_ELT (p
, elt
++)
31369 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (Pmode
, LR_REGNO
));
31371 /* We have to restore more than two FP registers, so branch to the
31372 restore function. It will return to our caller. */
31377 if (flag_shrink_wrap
)
31378 cfa_restores
= add_crlr_cfa_restore (info
, cfa_restores
);
31380 sym
= rs6000_savres_routine_sym (info
, SAVRES_FPR
| (lr
? SAVRES_LR
: 0));
31381 RTVEC_ELT (p
, elt
++) = gen_rtx_USE (VOIDmode
, sym
);
31382 reg
= (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)? 1 : 11;
31383 RTVEC_ELT (p
, elt
++) = gen_rtx_USE (VOIDmode
, gen_rtx_REG (Pmode
, reg
));
31385 for (i
= 0; i
< 64 - info
->first_fp_reg_save
; i
++)
31387 rtx reg
= gen_rtx_REG (DFmode
, info
->first_fp_reg_save
+ i
);
31389 RTVEC_ELT (p
, elt
++)
31390 = gen_frame_load (reg
, sp_reg_rtx
, info
->fp_save_offset
+ 8 * i
);
31391 if (flag_shrink_wrap
)
31392 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
, reg
, cfa_restores
);
31395 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
31401 /* Ensure the cfa_restores are hung off an insn that won't
31402 be reordered above other restores. */
31403 emit_insn (gen_blockage ());
31405 emit_cfa_restores (cfa_restores
);
31409 /* Write function epilogue. */
31412 rs6000_output_function_epilogue (FILE *file
,
31413 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
31416 macho_branch_islands ();
31419 rtx_insn
*insn
= get_last_insn ();
31420 rtx_insn
*deleted_debug_label
= NULL
;
31422 /* Mach-O doesn't support labels at the end of objects, so if
31423 it looks like we might want one, take special action.
31425 First, collect any sequence of deleted debug labels. */
31428 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
31430 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
31431 notes only, instead set their CODE_LABEL_NUMBER to -1,
31432 otherwise there would be code generation differences
31433 in between -g and -g0. */
31434 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
31435 deleted_debug_label
= insn
;
31436 insn
= PREV_INSN (insn
);
31439 /* Second, if we have:
31442 then this needs to be detected, so skip past the barrier. */
31444 if (insn
&& BARRIER_P (insn
))
31445 insn
= PREV_INSN (insn
);
31447 /* Up to now we've only seen notes or barriers. */
31452 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
))
31453 /* Trailing label: <barrier>. */
31454 fputs ("\tnop\n", file
);
31457 /* Lastly, see if we have a completely empty function body. */
31458 while (insn
&& ! INSN_P (insn
))
31459 insn
= PREV_INSN (insn
);
31460 /* If we don't find any insns, we've got an empty function body;
31461 I.e. completely empty - without a return or branch. This is
31462 taken as the case where a function body has been removed
31463 because it contains an inline __builtin_unreachable(). GCC
31464 states that reaching __builtin_unreachable() means UB so we're
31465 not obliged to do anything special; however, we want
31466 non-zero-sized function bodies. To meet this, and help the
31467 user out, let's trap the case. */
31469 fputs ("\ttrap\n", file
);
31472 else if (deleted_debug_label
)
31473 for (insn
= deleted_debug_label
; insn
; insn
= NEXT_INSN (insn
))
31474 if (NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
31475 CODE_LABEL_NUMBER (insn
) = -1;
31479 /* Output a traceback table here. See /usr/include/sys/debug.h for info
31482 We don't output a traceback table if -finhibit-size-directive was
31483 used. The documentation for -finhibit-size-directive reads
31484 ``don't output a @code{.size} assembler directive, or anything
31485 else that would cause trouble if the function is split in the
31486 middle, and the two halves are placed at locations far apart in
31487 memory.'' The traceback table has this property, since it
31488 includes the offset from the start of the function to the
31489 traceback table itself.
31491 System V.4 Powerpc's (and the embedded ABI derived from it) use a
31492 different traceback table. */
31493 if ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
31494 && ! flag_inhibit_size_directive
31495 && rs6000_traceback
!= traceback_none
&& !cfun
->is_thunk
)
31497 const char *fname
= NULL
;
31498 const char *language_string
= lang_hooks
.name
;
31499 int fixed_parms
= 0, float_parms
= 0, parm_info
= 0;
31501 int optional_tbtab
;
31502 rs6000_stack_t
*info
= rs6000_stack_info ();
31504 if (rs6000_traceback
== traceback_full
)
31505 optional_tbtab
= 1;
31506 else if (rs6000_traceback
== traceback_part
)
31507 optional_tbtab
= 0;
31509 optional_tbtab
= !optimize_size
&& !TARGET_ELF
;
31511 if (optional_tbtab
)
31513 fname
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
31514 while (*fname
== '.') /* V.4 encodes . in the name */
31517 /* Need label immediately before tbtab, so we can compute
31518 its offset from the function start. */
31519 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LT");
31520 ASM_OUTPUT_LABEL (file
, fname
);
31523 /* The .tbtab pseudo-op can only be used for the first eight
31524 expressions, since it can't handle the possibly variable
31525 length fields that follow. However, if you omit the optional
31526 fields, the assembler outputs zeros for all optional fields
31527 anyways, giving each variable length field is minimum length
31528 (as defined in sys/debug.h). Thus we can not use the .tbtab
31529 pseudo-op at all. */
31531 /* An all-zero word flags the start of the tbtab, for debuggers
31532 that have to find it by searching forward from the entry
31533 point or from the current pc. */
31534 fputs ("\t.long 0\n", file
);
31536 /* Tbtab format type. Use format type 0. */
31537 fputs ("\t.byte 0,", file
);
31539 /* Language type. Unfortunately, there does not seem to be any
31540 official way to discover the language being compiled, so we
31541 use language_string.
31542 C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9.
31543 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
31544 a number, so for now use 9. LTO, Go and JIT aren't assigned numbers
31545 either, so for now use 0. */
31547 || ! strcmp (language_string
, "GNU GIMPLE")
31548 || ! strcmp (language_string
, "GNU Go")
31549 || ! strcmp (language_string
, "libgccjit"))
31551 else if (! strcmp (language_string
, "GNU F77")
31552 || lang_GNU_Fortran ())
31554 else if (! strcmp (language_string
, "GNU Pascal"))
31556 else if (! strcmp (language_string
, "GNU Ada"))
31558 else if (lang_GNU_CXX ()
31559 || ! strcmp (language_string
, "GNU Objective-C++"))
31561 else if (! strcmp (language_string
, "GNU Java"))
31563 else if (! strcmp (language_string
, "GNU Objective-C"))
31566 gcc_unreachable ();
31567 fprintf (file
, "%d,", i
);
31569 /* 8 single bit fields: global linkage (not set for C extern linkage,
31570 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
31571 from start of procedure stored in tbtab, internal function, function
31572 has controlled storage, function has no toc, function uses fp,
31573 function logs/aborts fp operations. */
31574 /* Assume that fp operations are used if any fp reg must be saved. */
31575 fprintf (file
, "%d,",
31576 (optional_tbtab
<< 5) | ((info
->first_fp_reg_save
!= 64) << 1));
31578 /* 6 bitfields: function is interrupt handler, name present in
31579 proc table, function calls alloca, on condition directives
31580 (controls stack walks, 3 bits), saves condition reg, saves
31582 /* The `function calls alloca' bit seems to be set whenever reg 31 is
31583 set up as a frame pointer, even when there is no alloca call. */
31584 fprintf (file
, "%d,",
31585 ((optional_tbtab
<< 6)
31586 | ((optional_tbtab
& frame_pointer_needed
) << 5)
31587 | (info
->cr_save_p
<< 1)
31588 | (info
->lr_save_p
)));
31590 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
31592 fprintf (file
, "%d,",
31593 (info
->push_p
<< 7) | (64 - info
->first_fp_reg_save
));
31595 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
31596 fprintf (file
, "%d,", (32 - first_reg_to_save ()));
31598 if (optional_tbtab
)
31600 /* Compute the parameter info from the function decl argument
31603 int next_parm_info_bit
= 31;
31605 for (decl
= DECL_ARGUMENTS (current_function_decl
);
31606 decl
; decl
= DECL_CHAIN (decl
))
31608 rtx parameter
= DECL_INCOMING_RTL (decl
);
31609 machine_mode mode
= GET_MODE (parameter
);
31611 if (GET_CODE (parameter
) == REG
)
31613 if (SCALAR_FLOAT_MODE_P (mode
))
31636 gcc_unreachable ();
31639 /* If only one bit will fit, don't or in this entry. */
31640 if (next_parm_info_bit
> 0)
31641 parm_info
|= (bits
<< (next_parm_info_bit
- 1));
31642 next_parm_info_bit
-= 2;
31646 fixed_parms
+= ((GET_MODE_SIZE (mode
)
31647 + (UNITS_PER_WORD
- 1))
31649 next_parm_info_bit
-= 1;
31655 /* Number of fixed point parameters. */
31656 /* This is actually the number of words of fixed point parameters; thus
31657 an 8 byte struct counts as 2; and thus the maximum value is 8. */
31658 fprintf (file
, "%d,", fixed_parms
);
31660 /* 2 bitfields: number of floating point parameters (7 bits), parameters
31662 /* This is actually the number of fp registers that hold parameters;
31663 and thus the maximum value is 13. */
31664 /* Set parameters on stack bit if parameters are not in their original
31665 registers, regardless of whether they are on the stack? Xlc
31666 seems to set the bit when not optimizing. */
31667 fprintf (file
, "%d\n", ((float_parms
<< 1) | (! optimize
)));
31669 if (optional_tbtab
)
31671 /* Optional fields follow. Some are variable length. */
31673 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single
31674 float, 11 double float. */
31675 /* There is an entry for each parameter in a register, in the order
31676 that they occur in the parameter list. Any intervening arguments
31677 on the stack are ignored. If the list overflows a long (max
31678 possible length 34 bits) then completely leave off all elements
31680 /* Only emit this long if there was at least one parameter. */
31681 if (fixed_parms
|| float_parms
)
31682 fprintf (file
, "\t.long %d\n", parm_info
);
31684 /* Offset from start of code to tb table. */
31685 fputs ("\t.long ", file
);
31686 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LT");
31687 RS6000_OUTPUT_BASENAME (file
, fname
);
31689 rs6000_output_function_entry (file
, fname
);
31692 /* Interrupt handler mask. */
31693 /* Omit this long, since we never set the interrupt handler bit
31696 /* Number of CTL (controlled storage) anchors. */
31697 /* Omit this long, since the has_ctl bit is never set above. */
31699 /* Displacement into stack of each CTL anchor. */
31700 /* Omit this list of longs, because there are no CTL anchors. */
31702 /* Length of function name. */
31705 fprintf (file
, "\t.short %d\n", (int) strlen (fname
));
31707 /* Function name. */
31708 assemble_string (fname
, strlen (fname
));
31710 /* Register for alloca automatic storage; this is always reg 31.
31711 Only emit this if the alloca bit was set above. */
31712 if (frame_pointer_needed
)
31713 fputs ("\t.byte 31\n", file
);
31715 fputs ("\t.align 2\n", file
);
31719 /* Arrange to define .LCTOC1 label, if not already done. */
31723 if (!toc_initialized
)
31725 switch_to_section (toc_section
);
31726 switch_to_section (current_function_section ());
31731 /* -fsplit-stack support. */
31733 /* A SYMBOL_REF for __morestack. */
31734 static GTY(()) rtx morestack_ref
;
31737 gen_add3_const (rtx rt
, rtx ra
, long c
)
31740 return gen_adddi3 (rt
, ra
, GEN_INT (c
));
31742 return gen_addsi3 (rt
, ra
, GEN_INT (c
));
31745 /* Emit -fsplit-stack prologue, which goes before the regular function
31746 prologue (at local entry point in the case of ELFv2). */
31749 rs6000_expand_split_stack_prologue (void)
31751 rs6000_stack_t
*info
= rs6000_stack_info ();
31752 unsigned HOST_WIDE_INT allocate
;
31753 long alloc_hi
, alloc_lo
;
31754 rtx r0
, r1
, r12
, lr
, ok_label
, compare
, jump
, call_fusage
;
31757 gcc_assert (flag_split_stack
&& reload_completed
);
31762 if (global_regs
[29])
31764 error ("-fsplit-stack uses register r29");
31765 inform (DECL_SOURCE_LOCATION (global_regs_decl
[29]),
31766 "conflicts with %qD", global_regs_decl
[29]);
31769 allocate
= info
->total_size
;
31770 if (allocate
> (unsigned HOST_WIDE_INT
) 1 << 31)
31772 sorry ("Stack frame larger than 2G is not supported for -fsplit-stack");
31775 if (morestack_ref
== NULL_RTX
)
31777 morestack_ref
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
31778 SYMBOL_REF_FLAGS (morestack_ref
) |= (SYMBOL_FLAG_LOCAL
31779 | SYMBOL_FLAG_FUNCTION
);
31782 r0
= gen_rtx_REG (Pmode
, 0);
31783 r1
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
31784 r12
= gen_rtx_REG (Pmode
, 12);
31785 emit_insn (gen_load_split_stack_limit (r0
));
31786 /* Always emit two insns here to calculate the requested stack,
31787 so that the linker can edit them when adjusting size for calling
31788 non-split-stack code. */
31789 alloc_hi
= (-allocate
+ 0x8000) & ~0xffffL
;
31790 alloc_lo
= -allocate
- alloc_hi
;
31793 emit_insn (gen_add3_const (r12
, r1
, alloc_hi
));
31795 emit_insn (gen_add3_const (r12
, r12
, alloc_lo
));
31797 emit_insn (gen_nop ());
31801 emit_insn (gen_add3_const (r12
, r1
, alloc_lo
));
31802 emit_insn (gen_nop ());
31805 compare
= gen_rtx_REG (CCUNSmode
, CR7_REGNO
);
31806 emit_insn (gen_rtx_SET (compare
, gen_rtx_COMPARE (CCUNSmode
, r12
, r0
)));
31807 ok_label
= gen_label_rtx ();
31808 jump
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
31809 gen_rtx_GEU (VOIDmode
, compare
, const0_rtx
),
31810 gen_rtx_LABEL_REF (VOIDmode
, ok_label
),
31812 insn
= emit_jump_insn (gen_rtx_SET (pc_rtx
, jump
));
31813 JUMP_LABEL (insn
) = ok_label
;
31814 /* Mark the jump as very likely to be taken. */
31815 add_int_reg_note (insn
, REG_BR_PROB
,
31816 REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100);
31818 lr
= gen_rtx_REG (Pmode
, LR_REGNO
);
31819 insn
= emit_move_insn (r0
, lr
);
31820 RTX_FRAME_RELATED_P (insn
) = 1;
31821 insn
= emit_insn (gen_frame_store (r0
, r1
, info
->lr_save_offset
));
31822 RTX_FRAME_RELATED_P (insn
) = 1;
31824 insn
= emit_call_insn (gen_call (gen_rtx_MEM (SImode
, morestack_ref
),
31825 const0_rtx
, const0_rtx
));
31826 call_fusage
= NULL_RTX
;
31827 use_reg (&call_fusage
, r12
);
31828 /* Say the call uses r0, even though it doesn't, to stop regrename
31829 from twiddling with the insns saving lr, trashing args for cfun.
31830 The insns restoring lr are similarly protected by making
31831 split_stack_return use r0. */
31832 use_reg (&call_fusage
, r0
);
31833 add_function_usage_to (insn
, call_fusage
);
31834 /* Indicate that this function can't jump to non-local gotos. */
31835 make_reg_eh_region_note_nothrow_nononlocal (insn
);
31836 emit_insn (gen_frame_load (r0
, r1
, info
->lr_save_offset
));
31837 insn
= emit_move_insn (lr
, r0
);
31838 add_reg_note (insn
, REG_CFA_RESTORE
, lr
);
31839 RTX_FRAME_RELATED_P (insn
) = 1;
31840 emit_insn (gen_split_stack_return ());
31842 emit_label (ok_label
);
31843 LABEL_NUSES (ok_label
) = 1;
31846 /* Return the internal arg pointer used for function incoming
31847 arguments. When -fsplit-stack, the arg pointer is r12 so we need
31848 to copy it to a pseudo in order for it to be preserved over calls
31849 and suchlike. We'd really like to use a pseudo here for the
31850 internal arg pointer but data-flow analysis is not prepared to
31851 accept pseudos as live at the beginning of a function. */
31854 rs6000_internal_arg_pointer (void)
31856 if (flag_split_stack
31857 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun
->decl
))
31861 if (cfun
->machine
->split_stack_arg_pointer
== NULL_RTX
)
31865 cfun
->machine
->split_stack_arg_pointer
= gen_reg_rtx (Pmode
);
31866 REG_POINTER (cfun
->machine
->split_stack_arg_pointer
) = 1;
31868 /* Put the pseudo initialization right after the note at the
31869 beginning of the function. */
31870 pat
= gen_rtx_SET (cfun
->machine
->split_stack_arg_pointer
,
31871 gen_rtx_REG (Pmode
, 12));
31872 push_topmost_sequence ();
31873 emit_insn_after (pat
, get_insns ());
31874 pop_topmost_sequence ();
31876 return plus_constant (Pmode
, cfun
->machine
->split_stack_arg_pointer
,
31877 FIRST_PARM_OFFSET (current_function_decl
));
31879 return virtual_incoming_args_rtx
;
31882 /* We may have to tell the dataflow pass that the split stack prologue
31883 is initializing a register. */
31886 rs6000_live_on_entry (bitmap regs
)
31888 if (flag_split_stack
)
31889 bitmap_set_bit (regs
, 12);
31892 /* Emit -fsplit-stack dynamic stack allocation space check. */
31895 rs6000_split_stack_space_check (rtx size
, rtx label
)
31897 rtx sp
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
31898 rtx limit
= gen_reg_rtx (Pmode
);
31899 rtx requested
= gen_reg_rtx (Pmode
);
31900 rtx cmp
= gen_reg_rtx (CCUNSmode
);
31903 emit_insn (gen_load_split_stack_limit (limit
));
31904 if (CONST_INT_P (size
))
31905 emit_insn (gen_add3_insn (requested
, sp
, GEN_INT (-INTVAL (size
))));
31908 size
= force_reg (Pmode
, size
);
31909 emit_move_insn (requested
, gen_rtx_MINUS (Pmode
, sp
, size
));
31911 emit_insn (gen_rtx_SET (cmp
, gen_rtx_COMPARE (CCUNSmode
, requested
, limit
)));
31912 jump
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
31913 gen_rtx_GEU (VOIDmode
, cmp
, const0_rtx
),
31914 gen_rtx_LABEL_REF (VOIDmode
, label
),
31916 jump
= emit_jump_insn (gen_rtx_SET (pc_rtx
, jump
));
31917 JUMP_LABEL (jump
) = label
;
31920 /* A C compound statement that outputs the assembler code for a thunk
31921 function, used to implement C++ virtual function calls with
31922 multiple inheritance. The thunk acts as a wrapper around a virtual
31923 function, adjusting the implicit object parameter before handing
31924 control off to the real function.
31926 First, emit code to add the integer DELTA to the location that
31927 contains the incoming first argument. Assume that this argument
31928 contains a pointer, and is the one used to pass the `this' pointer
31929 in C++. This is the incoming argument *before* the function
31930 prologue, e.g. `%o0' on a sparc. The addition must preserve the
31931 values of all other incoming arguments.
31933 After the addition, emit code to jump to FUNCTION, which is a
31934 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
31935 not touch the return address. Hence returning from FUNCTION will
31936 return to whoever called the current `thunk'.
31938 The effect must be as if FUNCTION had been called directly with the
31939 adjusted first argument. This macro is responsible for emitting
31940 all of the code for a thunk function; output_function_prologue()
31941 and output_function_epilogue() are not invoked.
31943 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
31944 been extracted from it.) It might possibly be useful on some
31945 targets, but probably not.
31947 If you do not define this macro, the target-independent code in the
31948 C++ frontend will generate a less efficient heavyweight thunk that
31949 calls FUNCTION instead of jumping to it. The generic approach does
31950 not support varargs. */
31953 rs6000_output_mi_thunk (FILE *file
, tree thunk_fndecl ATTRIBUTE_UNUSED
,
31954 HOST_WIDE_INT delta
, HOST_WIDE_INT vcall_offset
,
31957 rtx this_rtx
, funexp
;
31960 reload_completed
= 1;
31961 epilogue_completed
= 1;
31963 /* Mark the end of the (empty) prologue. */
31964 emit_note (NOTE_INSN_PROLOGUE_END
);
31966 /* Find the "this" pointer. If the function returns a structure,
31967 the structure return pointer is in r3. */
31968 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
))
31969 this_rtx
= gen_rtx_REG (Pmode
, 4);
31971 this_rtx
= gen_rtx_REG (Pmode
, 3);
31973 /* Apply the constant offset, if required. */
31975 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, GEN_INT (delta
)));
31977 /* Apply the offset from the vtable, if required. */
31980 rtx vcall_offset_rtx
= GEN_INT (vcall_offset
);
31981 rtx tmp
= gen_rtx_REG (Pmode
, 12);
31983 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, this_rtx
));
31984 if (((unsigned HOST_WIDE_INT
) vcall_offset
) + 0x8000 >= 0x10000)
31986 emit_insn (gen_add3_insn (tmp
, tmp
, vcall_offset_rtx
));
31987 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, tmp
));
31991 rtx loc
= gen_rtx_PLUS (Pmode
, tmp
, vcall_offset_rtx
);
31993 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, loc
));
31995 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, tmp
));
31998 /* Generate a tail call to the target function. */
31999 if (!TREE_USED (function
))
32001 assemble_external (function
);
32002 TREE_USED (function
) = 1;
32004 funexp
= XEXP (DECL_RTL (function
), 0);
32005 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
32008 if (MACHOPIC_INDIRECT
)
32009 funexp
= machopic_indirect_call_target (funexp
);
32012 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
32013 generate sibcall RTL explicitly. */
32014 insn
= emit_call_insn (
32015 gen_rtx_PARALLEL (VOIDmode
,
32017 gen_rtx_CALL (VOIDmode
,
32018 funexp
, const0_rtx
),
32019 gen_rtx_USE (VOIDmode
, const0_rtx
),
32020 simple_return_rtx
)));
32021 SIBLING_CALL_P (insn
) = 1;
32024 /* Run just enough of rest_of_compilation to get the insns emitted.
32025 There's not really enough bulk here to make other passes such as
32026 instruction scheduling worth while. Note that use_thunk calls
32027 assemble_start_function and assemble_end_function. */
32028 insn
= get_insns ();
32029 shorten_branches (insn
);
32030 final_start_function (insn
, file
, 1);
32031 final (insn
, file
, 1);
32032 final_end_function ();
32034 reload_completed
= 0;
32035 epilogue_completed
= 0;
32038 /* A quick summary of the various types of 'constant-pool tables'
32041 Target Flags Name One table per
32042 AIX (none) AIX TOC object file
32043 AIX -mfull-toc AIX TOC object file
32044 AIX -mminimal-toc AIX minimal TOC translation unit
32045 SVR4/EABI (none) SVR4 SDATA object file
32046 SVR4/EABI -fpic SVR4 pic object file
32047 SVR4/EABI -fPIC SVR4 PIC translation unit
32048 SVR4/EABI -mrelocatable EABI TOC function
32049 SVR4/EABI -maix AIX TOC object file
32050 SVR4/EABI -maix -mminimal-toc
32051 AIX minimal TOC translation unit
32053 Name Reg. Set by entries contains:
32054 made by addrs? fp? sum?
32056 AIX TOC 2 crt0 as Y option option
32057 AIX minimal TOC 30 prolog gcc Y Y option
32058 SVR4 SDATA 13 crt0 gcc N Y N
32059 SVR4 pic 30 prolog ld Y not yet N
32060 SVR4 PIC 30 prolog gcc Y option option
32061 EABI TOC 30 prolog gcc Y option option
32065 /* Hash functions for the hash table. */
32068 rs6000_hash_constant (rtx k
)
32070 enum rtx_code code
= GET_CODE (k
);
32071 machine_mode mode
= GET_MODE (k
);
32072 unsigned result
= (code
<< 3) ^ mode
;
32073 const char *format
;
32076 format
= GET_RTX_FORMAT (code
);
32077 flen
= strlen (format
);
32083 return result
* 1231 + (unsigned) INSN_UID (XEXP (k
, 0));
32085 case CONST_WIDE_INT
:
32088 flen
= CONST_WIDE_INT_NUNITS (k
);
32089 for (i
= 0; i
< flen
; i
++)
32090 result
= result
* 613 + CONST_WIDE_INT_ELT (k
, i
);
32095 if (mode
!= VOIDmode
)
32096 return real_hash (CONST_DOUBLE_REAL_VALUE (k
)) * result
;
32108 for (; fidx
< flen
; fidx
++)
32109 switch (format
[fidx
])
32114 const char *str
= XSTR (k
, fidx
);
32115 len
= strlen (str
);
32116 result
= result
* 613 + len
;
32117 for (i
= 0; i
< len
; i
++)
32118 result
= result
* 613 + (unsigned) str
[i
];
32123 result
= result
* 1231 + rs6000_hash_constant (XEXP (k
, fidx
));
32127 result
= result
* 613 + (unsigned) XINT (k
, fidx
);
32130 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT
))
32131 result
= result
* 613 + (unsigned) XWINT (k
, fidx
);
32135 for (i
= 0; i
< sizeof (HOST_WIDE_INT
) / sizeof (unsigned); i
++)
32136 result
= result
* 613 + (unsigned) (XWINT (k
, fidx
)
32143 gcc_unreachable ();
32150 toc_hasher::hash (toc_hash_struct
*thc
)
32152 return rs6000_hash_constant (thc
->key
) ^ thc
->key_mode
;
32155 /* Compare H1 and H2 for equivalence. */
32158 toc_hasher::equal (toc_hash_struct
*h1
, toc_hash_struct
*h2
)
32163 if (h1
->key_mode
!= h2
->key_mode
)
32166 return rtx_equal_p (r1
, r2
);
32169 /* These are the names given by the C++ front-end to vtables, and
32170 vtable-like objects. Ideally, this logic should not be here;
32171 instead, there should be some programmatic way of inquiring as
32172 to whether or not an object is a vtable. */
32174 #define VTABLE_NAME_P(NAME) \
32175 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
32176 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
32177 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
32178 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
32179 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
32181 #ifdef NO_DOLLAR_IN_LABEL
32182 /* Return a GGC-allocated character string translating dollar signs in
32183 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
32186 rs6000_xcoff_strip_dollar (const char *name
)
32192 q
= (const char *) strchr (name
, '$');
32194 if (q
== 0 || q
== name
)
32197 len
= strlen (name
);
32198 strip
= XALLOCAVEC (char, len
+ 1);
32199 strcpy (strip
, name
);
32200 p
= strip
+ (q
- name
);
32204 p
= strchr (p
+ 1, '$');
32207 return ggc_alloc_string (strip
, len
);
32212 rs6000_output_symbol_ref (FILE *file
, rtx x
)
32214 const char *name
= XSTR (x
, 0);
32216 /* Currently C++ toc references to vtables can be emitted before it
32217 is decided whether the vtable is public or private. If this is
32218 the case, then the linker will eventually complain that there is
32219 a reference to an unknown section. Thus, for vtables only,
32220 we emit the TOC reference to reference the identifier and not the
32222 if (VTABLE_NAME_P (name
))
32224 RS6000_OUTPUT_BASENAME (file
, name
);
32227 assemble_name (file
, name
);
32230 /* Output a TOC entry. We derive the entry name from what is being
32234 output_toc (FILE *file
, rtx x
, int labelno
, machine_mode mode
)
32237 const char *name
= buf
;
32239 HOST_WIDE_INT offset
= 0;
32241 gcc_assert (!TARGET_NO_TOC
);
32243 /* When the linker won't eliminate them, don't output duplicate
32244 TOC entries (this happens on AIX if there is any kind of TOC,
32245 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
32247 if (TARGET_TOC
&& GET_CODE (x
) != LABEL_REF
)
32249 struct toc_hash_struct
*h
;
32251 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
32252 time because GGC is not initialized at that point. */
32253 if (toc_hash_table
== NULL
)
32254 toc_hash_table
= hash_table
<toc_hasher
>::create_ggc (1021);
32256 h
= ggc_alloc
<toc_hash_struct
> ();
32258 h
->key_mode
= mode
;
32259 h
->labelno
= labelno
;
32261 toc_hash_struct
**found
= toc_hash_table
->find_slot (h
, INSERT
);
32262 if (*found
== NULL
)
32264 else /* This is indeed a duplicate.
32265 Set this label equal to that label. */
32267 fputs ("\t.set ", file
);
32268 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LC");
32269 fprintf (file
, "%d,", labelno
);
32270 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LC");
32271 fprintf (file
, "%d\n", ((*found
)->labelno
));
32274 if (TARGET_XCOFF
&& GET_CODE (x
) == SYMBOL_REF
32275 && (SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_GLOBAL_DYNAMIC
32276 || SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
))
32278 fputs ("\t.set ", file
);
32279 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LCM");
32280 fprintf (file
, "%d,", labelno
);
32281 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LCM");
32282 fprintf (file
, "%d\n", ((*found
)->labelno
));
32289 /* If we're going to put a double constant in the TOC, make sure it's
32290 aligned properly when strict alignment is on. */
32291 if ((CONST_DOUBLE_P (x
) || CONST_WIDE_INT_P (x
))
32292 && STRICT_ALIGNMENT
32293 && GET_MODE_BITSIZE (mode
) >= 64
32294 && ! (TARGET_NO_FP_IN_TOC
&& ! TARGET_MINIMAL_TOC
)) {
32295 ASM_OUTPUT_ALIGN (file
, 3);
32298 (*targetm
.asm_out
.internal_label
) (file
, "LC", labelno
);
32300 /* Handle FP constants specially. Note that if we have a minimal
32301 TOC, things we put here aren't actually in the TOC, so we can allow
32303 if (GET_CODE (x
) == CONST_DOUBLE
&&
32304 (GET_MODE (x
) == TFmode
|| GET_MODE (x
) == TDmode
32305 || GET_MODE (x
) == IFmode
|| GET_MODE (x
) == KFmode
))
32309 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
32310 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x
), k
);
32312 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x
), k
);
32316 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
32317 fputs (DOUBLE_INT_ASM_OP
, file
);
32319 fprintf (file
, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
32320 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
32321 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
32322 fprintf (file
, "0x%lx%08lx,0x%lx%08lx\n",
32323 k
[WORDS_BIG_ENDIAN
? 0 : 1] & 0xffffffff,
32324 k
[WORDS_BIG_ENDIAN
? 1 : 0] & 0xffffffff,
32325 k
[WORDS_BIG_ENDIAN
? 2 : 3] & 0xffffffff,
32326 k
[WORDS_BIG_ENDIAN
? 3 : 2] & 0xffffffff);
32331 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
32332 fputs ("\t.long ", file
);
32334 fprintf (file
, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
32335 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
32336 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
32337 fprintf (file
, "0x%lx,0x%lx,0x%lx,0x%lx\n",
32338 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
32339 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
32343 else if (GET_CODE (x
) == CONST_DOUBLE
&&
32344 (GET_MODE (x
) == DFmode
|| GET_MODE (x
) == DDmode
))
32348 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
32349 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x
), k
);
32351 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x
), k
);
32355 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
32356 fputs (DOUBLE_INT_ASM_OP
, file
);
32358 fprintf (file
, "\t.tc FD_%lx_%lx[TC],",
32359 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
32360 fprintf (file
, "0x%lx%08lx\n",
32361 k
[WORDS_BIG_ENDIAN
? 0 : 1] & 0xffffffff,
32362 k
[WORDS_BIG_ENDIAN
? 1 : 0] & 0xffffffff);
32367 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
32368 fputs ("\t.long ", file
);
32370 fprintf (file
, "\t.tc FD_%lx_%lx[TC],",
32371 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
32372 fprintf (file
, "0x%lx,0x%lx\n",
32373 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
32377 else if (GET_CODE (x
) == CONST_DOUBLE
&&
32378 (GET_MODE (x
) == SFmode
|| GET_MODE (x
) == SDmode
))
32382 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
32383 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x
), l
);
32385 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x
), l
);
32389 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
32390 fputs (DOUBLE_INT_ASM_OP
, file
);
32392 fprintf (file
, "\t.tc FS_%lx[TC],", l
& 0xffffffff);
32393 if (WORDS_BIG_ENDIAN
)
32394 fprintf (file
, "0x%lx00000000\n", l
& 0xffffffff);
32396 fprintf (file
, "0x%lx\n", l
& 0xffffffff);
32401 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
32402 fputs ("\t.long ", file
);
32404 fprintf (file
, "\t.tc FS_%lx[TC],", l
& 0xffffffff);
32405 fprintf (file
, "0x%lx\n", l
& 0xffffffff);
32409 else if (GET_MODE (x
) == VOIDmode
&& GET_CODE (x
) == CONST_INT
)
32411 unsigned HOST_WIDE_INT low
;
32412 HOST_WIDE_INT high
;
32414 low
= INTVAL (x
) & 0xffffffff;
32415 high
= (HOST_WIDE_INT
) INTVAL (x
) >> 32;
32417 /* TOC entries are always Pmode-sized, so when big-endian
32418 smaller integer constants in the TOC need to be padded.
32419 (This is still a win over putting the constants in
32420 a separate constant pool, because then we'd have
32421 to have both a TOC entry _and_ the actual constant.)
32423 For a 32-bit target, CONST_INT values are loaded and shifted
32424 entirely within `low' and can be stored in one TOC entry. */
32426 /* It would be easy to make this work, but it doesn't now. */
32427 gcc_assert (!TARGET_64BIT
|| POINTER_SIZE
>= GET_MODE_BITSIZE (mode
));
32429 if (WORDS_BIG_ENDIAN
&& POINTER_SIZE
> GET_MODE_BITSIZE (mode
))
32432 low
<<= POINTER_SIZE
- GET_MODE_BITSIZE (mode
);
32433 high
= (HOST_WIDE_INT
) low
>> 32;
32439 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
32440 fputs (DOUBLE_INT_ASM_OP
, file
);
32442 fprintf (file
, "\t.tc ID_%lx_%lx[TC],",
32443 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
32444 fprintf (file
, "0x%lx%08lx\n",
32445 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
32450 if (POINTER_SIZE
< GET_MODE_BITSIZE (mode
))
32452 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
32453 fputs ("\t.long ", file
);
32455 fprintf (file
, "\t.tc ID_%lx_%lx[TC],",
32456 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
32457 fprintf (file
, "0x%lx,0x%lx\n",
32458 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
32462 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
32463 fputs ("\t.long ", file
);
32465 fprintf (file
, "\t.tc IS_%lx[TC],", (long) low
& 0xffffffff);
32466 fprintf (file
, "0x%lx\n", (long) low
& 0xffffffff);
32472 if (GET_CODE (x
) == CONST
)
32474 gcc_assert (GET_CODE (XEXP (x
, 0)) == PLUS
32475 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
);
32477 base
= XEXP (XEXP (x
, 0), 0);
32478 offset
= INTVAL (XEXP (XEXP (x
, 0), 1));
32481 switch (GET_CODE (base
))
32484 name
= XSTR (base
, 0);
32488 ASM_GENERATE_INTERNAL_LABEL (buf
, "L",
32489 CODE_LABEL_NUMBER (XEXP (base
, 0)));
32493 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (base
));
32497 gcc_unreachable ();
32500 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
32501 fputs (TARGET_32BIT
? "\t.long " : DOUBLE_INT_ASM_OP
, file
);
32504 fputs ("\t.tc ", file
);
32505 RS6000_OUTPUT_BASENAME (file
, name
);
32508 fprintf (file
, ".N" HOST_WIDE_INT_PRINT_UNSIGNED
, - offset
);
32510 fprintf (file
, ".P" HOST_WIDE_INT_PRINT_UNSIGNED
, offset
);
32512 /* Mark large TOC symbols on AIX with [TE] so they are mapped
32513 after other TOC symbols, reducing overflow of small TOC access
32514 to [TC] symbols. */
32515 fputs (TARGET_XCOFF
&& TARGET_CMODEL
!= CMODEL_SMALL
32516 ? "[TE]," : "[TC],", file
);
32519 /* Currently C++ toc references to vtables can be emitted before it
32520 is decided whether the vtable is public or private. If this is
32521 the case, then the linker will eventually complain that there is
32522 a TOC reference to an unknown section. Thus, for vtables only,
32523 we emit the TOC reference to reference the symbol and not the
32525 if (VTABLE_NAME_P (name
))
32527 RS6000_OUTPUT_BASENAME (file
, name
);
32529 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, offset
);
32530 else if (offset
> 0)
32531 fprintf (file
, "+" HOST_WIDE_INT_PRINT_DEC
, offset
);
32534 output_addr_const (file
, x
);
32537 if (TARGET_XCOFF
&& GET_CODE (base
) == SYMBOL_REF
)
32539 switch (SYMBOL_REF_TLS_MODEL (base
))
32543 case TLS_MODEL_LOCAL_EXEC
:
32544 fputs ("@le", file
);
32546 case TLS_MODEL_INITIAL_EXEC
:
32547 fputs ("@ie", file
);
32549 /* Use global-dynamic for local-dynamic. */
32550 case TLS_MODEL_GLOBAL_DYNAMIC
:
32551 case TLS_MODEL_LOCAL_DYNAMIC
:
32553 (*targetm
.asm_out
.internal_label
) (file
, "LCM", labelno
);
32554 fputs ("\t.tc .", file
);
32555 RS6000_OUTPUT_BASENAME (file
, name
);
32556 fputs ("[TC],", file
);
32557 output_addr_const (file
, x
);
32558 fputs ("@m", file
);
32561 gcc_unreachable ();
32569 /* Output an assembler pseudo-op to write an ASCII string of N characters
32570 starting at P to FILE.
32572 On the RS/6000, we have to do this using the .byte operation and
32573 write out special characters outside the quoted string.
32574 Also, the assembler is broken; very long strings are truncated,
32575 so we must artificially break them up early. */
32578 output_ascii (FILE *file
, const char *p
, int n
)
32581 int i
, count_string
;
32582 const char *for_string
= "\t.byte \"";
32583 const char *for_decimal
= "\t.byte ";
32584 const char *to_close
= NULL
;
32587 for (i
= 0; i
< n
; i
++)
32590 if (c
>= ' ' && c
< 0177)
32593 fputs (for_string
, file
);
32596 /* Write two quotes to get one. */
32604 for_decimal
= "\"\n\t.byte ";
32608 if (count_string
>= 512)
32610 fputs (to_close
, file
);
32612 for_string
= "\t.byte \"";
32613 for_decimal
= "\t.byte ";
32621 fputs (for_decimal
, file
);
32622 fprintf (file
, "%d", c
);
32624 for_string
= "\n\t.byte \"";
32625 for_decimal
= ", ";
32631 /* Now close the string if we have written one. Then end the line. */
32633 fputs (to_close
, file
);
32636 /* Generate a unique section name for FILENAME for a section type
32637 represented by SECTION_DESC. Output goes into BUF.
32639 SECTION_DESC can be any string, as long as it is different for each
32640 possible section type.
32642 We name the section in the same manner as xlc. The name begins with an
32643 underscore followed by the filename (after stripping any leading directory
32644 names) with the last period replaced by the string SECTION_DESC. If
32645 FILENAME does not contain a period, SECTION_DESC is appended to the end of
32649 rs6000_gen_section_name (char **buf
, const char *filename
,
32650 const char *section_desc
)
32652 const char *q
, *after_last_slash
, *last_period
= 0;
32656 after_last_slash
= filename
;
32657 for (q
= filename
; *q
; q
++)
32660 after_last_slash
= q
+ 1;
32661 else if (*q
== '.')
32665 len
= strlen (after_last_slash
) + strlen (section_desc
) + 2;
32666 *buf
= (char *) xmalloc (len
);
32671 for (q
= after_last_slash
; *q
; q
++)
32673 if (q
== last_period
)
32675 strcpy (p
, section_desc
);
32676 p
+= strlen (section_desc
);
32680 else if (ISALNUM (*q
))
32684 if (last_period
== 0)
32685 strcpy (p
, section_desc
);
32690 /* Emit profile function. */
32693 output_profile_hook (int labelno ATTRIBUTE_UNUSED
)
32695 /* Non-standard profiling for kernels, which just saves LR then calls
32696 _mcount without worrying about arg saves. The idea is to change
32697 the function prologue as little as possible as it isn't easy to
32698 account for arg save/restore code added just for _mcount. */
32699 if (TARGET_PROFILE_KERNEL
)
32702 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
32704 #ifndef NO_PROFILE_COUNTERS
32705 # define NO_PROFILE_COUNTERS 0
32707 if (NO_PROFILE_COUNTERS
)
32708 emit_library_call (init_one_libfunc (RS6000_MCOUNT
),
32709 LCT_NORMAL
, VOIDmode
, 0);
32713 const char *label_name
;
32716 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
32717 label_name
= ggc_strdup ((*targetm
.strip_name_encoding
) (buf
));
32718 fun
= gen_rtx_SYMBOL_REF (Pmode
, label_name
);
32720 emit_library_call (init_one_libfunc (RS6000_MCOUNT
),
32721 LCT_NORMAL
, VOIDmode
, 1, fun
, Pmode
);
32724 else if (DEFAULT_ABI
== ABI_DARWIN
)
32726 const char *mcount_name
= RS6000_MCOUNT
;
32727 int caller_addr_regno
= LR_REGNO
;
32729 /* Be conservative and always set this, at least for now. */
32730 crtl
->uses_pic_offset_table
= 1;
32733 /* For PIC code, set up a stub and collect the caller's address
32734 from r0, which is where the prologue puts it. */
32735 if (MACHOPIC_INDIRECT
32736 && crtl
->uses_pic_offset_table
)
32737 caller_addr_regno
= 0;
32739 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, mcount_name
),
32740 LCT_NORMAL
, VOIDmode
, 1,
32741 gen_rtx_REG (Pmode
, caller_addr_regno
), Pmode
);
32745 /* Write function profiler code. */
32748 output_function_profiler (FILE *file
, int labelno
)
32752 switch (DEFAULT_ABI
)
32755 gcc_unreachable ();
32760 warning (0, "no profiling of 64-bit code for this ABI");
32763 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
32764 fprintf (file
, "\tmflr %s\n", reg_names
[0]);
32765 if (NO_PROFILE_COUNTERS
)
32767 asm_fprintf (file
, "\tstw %s,4(%s)\n",
32768 reg_names
[0], reg_names
[1]);
32770 else if (TARGET_SECURE_PLT
&& flag_pic
)
32772 if (TARGET_LINK_STACK
)
32775 get_ppc476_thunk_name (name
);
32776 asm_fprintf (file
, "\tbl %s\n", name
);
32779 asm_fprintf (file
, "\tbcl 20,31,1f\n1:\n");
32780 asm_fprintf (file
, "\tstw %s,4(%s)\n",
32781 reg_names
[0], reg_names
[1]);
32782 asm_fprintf (file
, "\tmflr %s\n", reg_names
[12]);
32783 asm_fprintf (file
, "\taddis %s,%s,",
32784 reg_names
[12], reg_names
[12]);
32785 assemble_name (file
, buf
);
32786 asm_fprintf (file
, "-1b@ha\n\tla %s,", reg_names
[0]);
32787 assemble_name (file
, buf
);
32788 asm_fprintf (file
, "-1b@l(%s)\n", reg_names
[12]);
32790 else if (flag_pic
== 1)
32792 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file
);
32793 asm_fprintf (file
, "\tstw %s,4(%s)\n",
32794 reg_names
[0], reg_names
[1]);
32795 asm_fprintf (file
, "\tmflr %s\n", reg_names
[12]);
32796 asm_fprintf (file
, "\tlwz %s,", reg_names
[0]);
32797 assemble_name (file
, buf
);
32798 asm_fprintf (file
, "@got(%s)\n", reg_names
[12]);
32800 else if (flag_pic
> 1)
32802 asm_fprintf (file
, "\tstw %s,4(%s)\n",
32803 reg_names
[0], reg_names
[1]);
32804 /* Now, we need to get the address of the label. */
32805 if (TARGET_LINK_STACK
)
32808 get_ppc476_thunk_name (name
);
32809 asm_fprintf (file
, "\tbl %s\n\tb 1f\n\t.long ", name
);
32810 assemble_name (file
, buf
);
32811 fputs ("-.\n1:", file
);
32812 asm_fprintf (file
, "\tmflr %s\n", reg_names
[11]);
32813 asm_fprintf (file
, "\taddi %s,%s,4\n",
32814 reg_names
[11], reg_names
[11]);
32818 fputs ("\tbcl 20,31,1f\n\t.long ", file
);
32819 assemble_name (file
, buf
);
32820 fputs ("-.\n1:", file
);
32821 asm_fprintf (file
, "\tmflr %s\n", reg_names
[11]);
32823 asm_fprintf (file
, "\tlwz %s,0(%s)\n",
32824 reg_names
[0], reg_names
[11]);
32825 asm_fprintf (file
, "\tadd %s,%s,%s\n",
32826 reg_names
[0], reg_names
[0], reg_names
[11]);
32830 asm_fprintf (file
, "\tlis %s,", reg_names
[12]);
32831 assemble_name (file
, buf
);
32832 fputs ("@ha\n", file
);
32833 asm_fprintf (file
, "\tstw %s,4(%s)\n",
32834 reg_names
[0], reg_names
[1]);
32835 asm_fprintf (file
, "\tla %s,", reg_names
[0]);
32836 assemble_name (file
, buf
);
32837 asm_fprintf (file
, "@l(%s)\n", reg_names
[12]);
32840 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
32841 fprintf (file
, "\tbl %s%s\n",
32842 RS6000_MCOUNT
, flag_pic
? "@plt" : "");
32848 /* Don't do anything, done in output_profile_hook (). */
32855 /* The following variable value is the last issued insn. */
32857 static rtx_insn
*last_scheduled_insn
;
32859 /* The following variable helps to balance issuing of load and
32860 store instructions */
32862 static int load_store_pendulum
;
32864 /* The following variable helps pair divide insns during scheduling. */
32865 static int divide_cnt
;
32866 /* The following variable helps pair and alternate vector and vector load
32867 insns during scheduling. */
32868 static int vec_pairing
;
32871 /* Power4 load update and store update instructions are cracked into a
32872 load or store and an integer insn which are executed in the same cycle.
32873 Branches have their own dispatch slot which does not count against the
32874 GCC issue rate, but it changes the program flow so there are no other
32875 instructions to issue in this cycle. */
32878 rs6000_variable_issue_1 (rtx_insn
*insn
, int more
)
32880 last_scheduled_insn
= insn
;
32881 if (GET_CODE (PATTERN (insn
)) == USE
32882 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
32884 cached_can_issue_more
= more
;
32885 return cached_can_issue_more
;
32888 if (insn_terminates_group_p (insn
, current_group
))
32890 cached_can_issue_more
= 0;
32891 return cached_can_issue_more
;
32894 /* If no reservation, but reach here */
32895 if (recog_memoized (insn
) < 0)
32898 if (rs6000_sched_groups
)
32900 if (is_microcoded_insn (insn
))
32901 cached_can_issue_more
= 0;
32902 else if (is_cracked_insn (insn
))
32903 cached_can_issue_more
= more
> 2 ? more
- 2 : 0;
32905 cached_can_issue_more
= more
- 1;
32907 return cached_can_issue_more
;
32910 if (rs6000_cpu_attr
== CPU_CELL
&& is_nonpipeline_insn (insn
))
32913 cached_can_issue_more
= more
- 1;
32914 return cached_can_issue_more
;
32918 rs6000_variable_issue (FILE *stream
, int verbose
, rtx_insn
*insn
, int more
)
32920 int r
= rs6000_variable_issue_1 (insn
, more
);
32922 fprintf (stream
, "// rs6000_variable_issue (more = %d) = %d\n", more
, r
);
32926 /* Adjust the cost of a scheduling dependency. Return the new cost of
32927 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
32930 rs6000_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep_insn
, int cost
,
32933 enum attr_type attr_type
;
32935 if (recog_memoized (insn
) < 0 || recog_memoized (dep_insn
) < 0)
32942 /* Data dependency; DEP_INSN writes a register that INSN reads
32943 some cycles later. */
32945 /* Separate a load from a narrower, dependent store. */
32946 if ((rs6000_sched_groups
|| rs6000_cpu_attr
== CPU_POWER9
)
32947 && GET_CODE (PATTERN (insn
)) == SET
32948 && GET_CODE (PATTERN (dep_insn
)) == SET
32949 && GET_CODE (XEXP (PATTERN (insn
), 1)) == MEM
32950 && GET_CODE (XEXP (PATTERN (dep_insn
), 0)) == MEM
32951 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn
), 1)))
32952 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn
), 0)))))
32955 attr_type
= get_attr_type (insn
);
32960 /* Tell the first scheduling pass about the latency between
32961 a mtctr and bctr (and mtlr and br/blr). The first
32962 scheduling pass will not know about this latency since
32963 the mtctr instruction, which has the latency associated
32964 to it, will be generated by reload. */
32967 /* Leave some extra cycles between a compare and its
32968 dependent branch, to inhibit expensive mispredicts. */
32969 if ((rs6000_cpu_attr
== CPU_PPC603
32970 || rs6000_cpu_attr
== CPU_PPC604
32971 || rs6000_cpu_attr
== CPU_PPC604E
32972 || rs6000_cpu_attr
== CPU_PPC620
32973 || rs6000_cpu_attr
== CPU_PPC630
32974 || rs6000_cpu_attr
== CPU_PPC750
32975 || rs6000_cpu_attr
== CPU_PPC7400
32976 || rs6000_cpu_attr
== CPU_PPC7450
32977 || rs6000_cpu_attr
== CPU_PPCE5500
32978 || rs6000_cpu_attr
== CPU_PPCE6500
32979 || rs6000_cpu_attr
== CPU_POWER4
32980 || rs6000_cpu_attr
== CPU_POWER5
32981 || rs6000_cpu_attr
== CPU_POWER7
32982 || rs6000_cpu_attr
== CPU_POWER8
32983 || rs6000_cpu_attr
== CPU_POWER9
32984 || rs6000_cpu_attr
== CPU_CELL
)
32985 && recog_memoized (dep_insn
)
32986 && (INSN_CODE (dep_insn
) >= 0))
32988 switch (get_attr_type (dep_insn
))
32991 case TYPE_FPCOMPARE
:
32992 case TYPE_CR_LOGICAL
:
32993 case TYPE_DELAYED_CR
:
32997 if (get_attr_dot (dep_insn
) == DOT_YES
)
33002 if (get_attr_dot (dep_insn
) == DOT_YES
33003 && get_attr_var_shift (dep_insn
) == VAR_SHIFT_NO
)
33014 if ((rs6000_cpu
== PROCESSOR_POWER6
)
33015 && recog_memoized (dep_insn
)
33016 && (INSN_CODE (dep_insn
) >= 0))
33019 if (GET_CODE (PATTERN (insn
)) != SET
)
33020 /* If this happens, we have to extend this to schedule
33021 optimally. Return default for now. */
33024 /* Adjust the cost for the case where the value written
33025 by a fixed point operation is used as the address
33026 gen value on a store. */
33027 switch (get_attr_type (dep_insn
))
33032 if (! store_data_bypass_p (dep_insn
, insn
))
33033 return get_attr_sign_extend (dep_insn
)
33034 == SIGN_EXTEND_YES
? 6 : 4;
33039 if (! store_data_bypass_p (dep_insn
, insn
))
33040 return get_attr_var_shift (dep_insn
) == VAR_SHIFT_YES
?
33050 if (! store_data_bypass_p (dep_insn
, insn
))
33058 if (get_attr_update (dep_insn
) == UPDATE_YES
33059 && ! store_data_bypass_p (dep_insn
, insn
))
33065 if (! store_data_bypass_p (dep_insn
, insn
))
33071 if (! store_data_bypass_p (dep_insn
, insn
))
33072 return get_attr_size (dep_insn
) == SIZE_32
? 45 : 57;
33082 if ((rs6000_cpu
== PROCESSOR_POWER6
)
33083 && recog_memoized (dep_insn
)
33084 && (INSN_CODE (dep_insn
) >= 0))
33087 /* Adjust the cost for the case where the value written
33088 by a fixed point instruction is used within the address
33089 gen portion of a subsequent load(u)(x) */
33090 switch (get_attr_type (dep_insn
))
33095 if (set_to_load_agen (dep_insn
, insn
))
33096 return get_attr_sign_extend (dep_insn
)
33097 == SIGN_EXTEND_YES
? 6 : 4;
33102 if (set_to_load_agen (dep_insn
, insn
))
33103 return get_attr_var_shift (dep_insn
) == VAR_SHIFT_YES
?
33113 if (set_to_load_agen (dep_insn
, insn
))
33121 if (get_attr_update (dep_insn
) == UPDATE_YES
33122 && set_to_load_agen (dep_insn
, insn
))
33128 if (set_to_load_agen (dep_insn
, insn
))
33134 if (set_to_load_agen (dep_insn
, insn
))
33135 return get_attr_size (dep_insn
) == SIZE_32
? 45 : 57;
33145 if ((rs6000_cpu
== PROCESSOR_POWER6
)
33146 && get_attr_update (insn
) == UPDATE_NO
33147 && recog_memoized (dep_insn
)
33148 && (INSN_CODE (dep_insn
) >= 0)
33149 && (get_attr_type (dep_insn
) == TYPE_MFFGPR
))
33156 /* Fall out to return default cost. */
33160 case REG_DEP_OUTPUT
:
33161 /* Output dependency; DEP_INSN writes a register that INSN writes some
33163 if ((rs6000_cpu
== PROCESSOR_POWER6
)
33164 && recog_memoized (dep_insn
)
33165 && (INSN_CODE (dep_insn
) >= 0))
33167 attr_type
= get_attr_type (insn
);
33172 case TYPE_FPSIMPLE
:
33173 if (get_attr_type (dep_insn
) == TYPE_FP
33174 || get_attr_type (dep_insn
) == TYPE_FPSIMPLE
)
33178 if (get_attr_update (insn
) == UPDATE_NO
33179 && get_attr_type (dep_insn
) == TYPE_MFFGPR
)
33186 /* Fall through, no cost for output dependency. */
33190 /* Anti dependency; DEP_INSN reads a register that INSN writes some
33195 gcc_unreachable ();
33201 /* Debug version of rs6000_adjust_cost. */
33204 rs6000_debug_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep_insn
,
33205 int cost
, unsigned int dw
)
33207 int ret
= rs6000_adjust_cost (insn
, dep_type
, dep_insn
, cost
, dw
);
33215 default: dep
= "unknown depencency"; break;
33216 case REG_DEP_TRUE
: dep
= "data dependency"; break;
33217 case REG_DEP_OUTPUT
: dep
= "output dependency"; break;
33218 case REG_DEP_ANTI
: dep
= "anti depencency"; break;
33222 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
33223 "%s, insn:\n", ret
, cost
, dep
);
33231 /* The function returns a true if INSN is microcoded.
33232 Return false otherwise. */
33235 is_microcoded_insn (rtx_insn
*insn
)
33237 if (!insn
|| !NONDEBUG_INSN_P (insn
)
33238 || GET_CODE (PATTERN (insn
)) == USE
33239 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
33242 if (rs6000_cpu_attr
== CPU_CELL
)
33243 return get_attr_cell_micro (insn
) == CELL_MICRO_ALWAYS
;
33245 if (rs6000_sched_groups
33246 && (rs6000_cpu
== PROCESSOR_POWER4
|| rs6000_cpu
== PROCESSOR_POWER5
))
33248 enum attr_type type
= get_attr_type (insn
);
33249 if ((type
== TYPE_LOAD
33250 && get_attr_update (insn
) == UPDATE_YES
33251 && get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
)
33252 || ((type
== TYPE_LOAD
|| type
== TYPE_STORE
)
33253 && get_attr_update (insn
) == UPDATE_YES
33254 && get_attr_indexed (insn
) == INDEXED_YES
)
33255 || type
== TYPE_MFCR
)
33262 /* The function returns true if INSN is cracked into 2 instructions
33263 by the processor (and therefore occupies 2 issue slots). */
33266 is_cracked_insn (rtx_insn
*insn
)
33268 if (!insn
|| !NONDEBUG_INSN_P (insn
)
33269 || GET_CODE (PATTERN (insn
)) == USE
33270 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
33273 if (rs6000_sched_groups
33274 && (rs6000_cpu
== PROCESSOR_POWER4
|| rs6000_cpu
== PROCESSOR_POWER5
))
33276 enum attr_type type
= get_attr_type (insn
);
33277 if ((type
== TYPE_LOAD
33278 && get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
33279 && get_attr_update (insn
) == UPDATE_NO
)
33280 || (type
== TYPE_LOAD
33281 && get_attr_sign_extend (insn
) == SIGN_EXTEND_NO
33282 && get_attr_update (insn
) == UPDATE_YES
33283 && get_attr_indexed (insn
) == INDEXED_NO
)
33284 || (type
== TYPE_STORE
33285 && get_attr_update (insn
) == UPDATE_YES
33286 && get_attr_indexed (insn
) == INDEXED_NO
)
33287 || ((type
== TYPE_FPLOAD
|| type
== TYPE_FPSTORE
)
33288 && get_attr_update (insn
) == UPDATE_YES
)
33289 || type
== TYPE_DELAYED_CR
33290 || (type
== TYPE_EXTS
33291 && get_attr_dot (insn
) == DOT_YES
)
33292 || (type
== TYPE_SHIFT
33293 && get_attr_dot (insn
) == DOT_YES
33294 && get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
33295 || (type
== TYPE_MUL
33296 && get_attr_dot (insn
) == DOT_YES
)
33297 || type
== TYPE_DIV
33298 || (type
== TYPE_INSERT
33299 && get_attr_size (insn
) == SIZE_32
))
33306 /* The function returns true if INSN can be issued only from
33307 the branch slot. */
33310 is_branch_slot_insn (rtx_insn
*insn
)
33312 if (!insn
|| !NONDEBUG_INSN_P (insn
)
33313 || GET_CODE (PATTERN (insn
)) == USE
33314 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
33317 if (rs6000_sched_groups
)
33319 enum attr_type type
= get_attr_type (insn
);
33320 if (type
== TYPE_BRANCH
|| type
== TYPE_JMPREG
)
33328 /* The function returns true if out_inst sets a value that is
33329 used in the address generation computation of in_insn */
33331 set_to_load_agen (rtx_insn
*out_insn
, rtx_insn
*in_insn
)
33333 rtx out_set
, in_set
;
33335 /* For performance reasons, only handle the simple case where
33336 both loads are a single_set. */
33337 out_set
= single_set (out_insn
);
33340 in_set
= single_set (in_insn
);
33342 return reg_mentioned_p (SET_DEST (out_set
), SET_SRC (in_set
));
33348 /* Try to determine base/offset/size parts of the given MEM.
33349 Return true if successful, false if all the values couldn't
33352 This function only looks for REG or REG+CONST address forms.
33353 REG+REG address form will return false. */
33356 get_memref_parts (rtx mem
, rtx
*base
, HOST_WIDE_INT
*offset
,
33357 HOST_WIDE_INT
*size
)
33360 if MEM_SIZE_KNOWN_P (mem
)
33361 *size
= MEM_SIZE (mem
);
33365 addr_rtx
= (XEXP (mem
, 0));
33366 if (GET_CODE (addr_rtx
) == PRE_MODIFY
)
33367 addr_rtx
= XEXP (addr_rtx
, 1);
33370 while (GET_CODE (addr_rtx
) == PLUS
33371 && CONST_INT_P (XEXP (addr_rtx
, 1)))
33373 *offset
+= INTVAL (XEXP (addr_rtx
, 1));
33374 addr_rtx
= XEXP (addr_rtx
, 0);
33376 if (!REG_P (addr_rtx
))
33383 /* The function returns true if the target storage location of
33384 mem1 is adjacent to the target storage location of mem2 */
33385 /* Return 1 if memory locations are adjacent. */
33388 adjacent_mem_locations (rtx mem1
, rtx mem2
)
33391 HOST_WIDE_INT off1
, size1
, off2
, size2
;
33393 if (get_memref_parts (mem1
, ®1
, &off1
, &size1
)
33394 && get_memref_parts (mem2
, ®2
, &off2
, &size2
))
33395 return ((REGNO (reg1
) == REGNO (reg2
))
33396 && ((off1
+ size1
== off2
)
33397 || (off2
+ size2
== off1
)));
33402 /* This function returns true if it can be determined that the two MEM
33403 locations overlap by at least 1 byte based on base reg/offset/size. */
33406 mem_locations_overlap (rtx mem1
, rtx mem2
)
33409 HOST_WIDE_INT off1
, size1
, off2
, size2
;
33411 if (get_memref_parts (mem1
, ®1
, &off1
, &size1
)
33412 && get_memref_parts (mem2
, ®2
, &off2
, &size2
))
33413 return ((REGNO (reg1
) == REGNO (reg2
))
33414 && (((off1
<= off2
) && (off1
+ size1
> off2
))
33415 || ((off2
<= off1
) && (off2
+ size2
> off1
))));
33420 /* A C statement (sans semicolon) to update the integer scheduling
33421 priority INSN_PRIORITY (INSN). Increase the priority to execute the
33422 INSN earlier, reduce the priority to execute INSN later. Do not
33423 define this macro if you do not need to adjust the scheduling
33424 priorities of insns. */
33427 rs6000_adjust_priority (rtx_insn
*insn ATTRIBUTE_UNUSED
, int priority
)
33429 rtx load_mem
, str_mem
;
33430 /* On machines (like the 750) which have asymmetric integer units,
33431 where one integer unit can do multiply and divides and the other
33432 can't, reduce the priority of multiply/divide so it is scheduled
33433 before other integer operations. */
33436 if (! INSN_P (insn
))
33439 if (GET_CODE (PATTERN (insn
)) == USE
)
33442 switch (rs6000_cpu_attr
) {
33444 switch (get_attr_type (insn
))
33451 fprintf (stderr
, "priority was %#x (%d) before adjustment\n",
33452 priority
, priority
);
33453 if (priority
>= 0 && priority
< 0x01000000)
33460 if (insn_must_be_first_in_group (insn
)
33461 && reload_completed
33462 && current_sched_info
->sched_max_insns_priority
33463 && rs6000_sched_restricted_insns_priority
)
33466 /* Prioritize insns that can be dispatched only in the first
33468 if (rs6000_sched_restricted_insns_priority
== 1)
33469 /* Attach highest priority to insn. This means that in
33470 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
33471 precede 'priority' (critical path) considerations. */
33472 return current_sched_info
->sched_max_insns_priority
;
33473 else if (rs6000_sched_restricted_insns_priority
== 2)
33474 /* Increase priority of insn by a minimal amount. This means that in
33475 haifa-sched.c:ready_sort(), only 'priority' (critical path)
33476 considerations precede dispatch-slot restriction considerations. */
33477 return (priority
+ 1);
33480 if (rs6000_cpu
== PROCESSOR_POWER6
33481 && ((load_store_pendulum
== -2 && is_load_insn (insn
, &load_mem
))
33482 || (load_store_pendulum
== 2 && is_store_insn (insn
, &str_mem
))))
33483 /* Attach highest priority to insn if the scheduler has just issued two
33484 stores and this instruction is a load, or two loads and this instruction
33485 is a store. Power6 wants loads and stores scheduled alternately
33487 return current_sched_info
->sched_max_insns_priority
;
33492 /* Return true if the instruction is nonpipelined on the Cell. */
33494 is_nonpipeline_insn (rtx_insn
*insn
)
33496 enum attr_type type
;
33497 if (!insn
|| !NONDEBUG_INSN_P (insn
)
33498 || GET_CODE (PATTERN (insn
)) == USE
33499 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
33502 type
= get_attr_type (insn
);
33503 if (type
== TYPE_MUL
33504 || type
== TYPE_DIV
33505 || type
== TYPE_SDIV
33506 || type
== TYPE_DDIV
33507 || type
== TYPE_SSQRT
33508 || type
== TYPE_DSQRT
33509 || type
== TYPE_MFCR
33510 || type
== TYPE_MFCRF
33511 || type
== TYPE_MFJMPR
)
33519 /* Return how many instructions the machine can issue per cycle. */
33522 rs6000_issue_rate (void)
33524 /* Unless scheduling for register pressure, use issue rate of 1 for
33525 first scheduling pass to decrease degradation. */
33526 if (!reload_completed
&& !flag_sched_pressure
)
33529 switch (rs6000_cpu_attr
) {
33531 case CPU_PPC601
: /* ? */
33541 case CPU_PPCE300C2
:
33542 case CPU_PPCE300C3
:
33543 case CPU_PPCE500MC
:
33544 case CPU_PPCE500MC64
:
33569 /* Return how many instructions to look ahead for better insn
33573 rs6000_use_sched_lookahead (void)
33575 switch (rs6000_cpu_attr
)
33582 return (reload_completed
? 8 : 0);
33589 /* We are choosing insn from the ready queue. Return zero if INSN can be
33592 rs6000_use_sched_lookahead_guard (rtx_insn
*insn
, int ready_index
)
33594 if (ready_index
== 0)
33597 if (rs6000_cpu_attr
!= CPU_CELL
)
33600 gcc_assert (insn
!= NULL_RTX
&& INSN_P (insn
));
33602 if (!reload_completed
33603 || is_nonpipeline_insn (insn
)
33604 || is_microcoded_insn (insn
))
33610 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
33611 and return true. */
33614 find_mem_ref (rtx pat
, rtx
*mem_ref
)
33619 /* stack_tie does not produce any real memory traffic. */
33620 if (tie_operand (pat
, VOIDmode
))
33623 if (GET_CODE (pat
) == MEM
)
33629 /* Recursively process the pattern. */
33630 fmt
= GET_RTX_FORMAT (GET_CODE (pat
));
33632 for (i
= GET_RTX_LENGTH (GET_CODE (pat
)) - 1; i
>= 0; i
--)
33636 if (find_mem_ref (XEXP (pat
, i
), mem_ref
))
33639 else if (fmt
[i
] == 'E')
33640 for (j
= XVECLEN (pat
, i
) - 1; j
>= 0; j
--)
33642 if (find_mem_ref (XVECEXP (pat
, i
, j
), mem_ref
))
33650 /* Determine if PAT is a PATTERN of a load insn. */
33653 is_load_insn1 (rtx pat
, rtx
*load_mem
)
33655 if (!pat
|| pat
== NULL_RTX
)
33658 if (GET_CODE (pat
) == SET
)
33659 return find_mem_ref (SET_SRC (pat
), load_mem
);
33661 if (GET_CODE (pat
) == PARALLEL
)
33665 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
33666 if (is_load_insn1 (XVECEXP (pat
, 0, i
), load_mem
))
33673 /* Determine if INSN loads from memory. */
33676 is_load_insn (rtx insn
, rtx
*load_mem
)
33678 if (!insn
|| !INSN_P (insn
))
33684 return is_load_insn1 (PATTERN (insn
), load_mem
);
33687 /* Determine if PAT is a PATTERN of a store insn. */
33690 is_store_insn1 (rtx pat
, rtx
*str_mem
)
33692 if (!pat
|| pat
== NULL_RTX
)
33695 if (GET_CODE (pat
) == SET
)
33696 return find_mem_ref (SET_DEST (pat
), str_mem
);
33698 if (GET_CODE (pat
) == PARALLEL
)
33702 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
33703 if (is_store_insn1 (XVECEXP (pat
, 0, i
), str_mem
))
33710 /* Determine if INSN stores to memory. */
33713 is_store_insn (rtx insn
, rtx
*str_mem
)
33715 if (!insn
|| !INSN_P (insn
))
33718 return is_store_insn1 (PATTERN (insn
), str_mem
);
33721 /* Return whether TYPE is a Power9 pairable vector instruction type. */
33724 is_power9_pairable_vec_type (enum attr_type type
)
33728 case TYPE_VECSIMPLE
:
33729 case TYPE_VECCOMPLEX
:
33733 case TYPE_VECFLOAT
:
33735 case TYPE_VECDOUBLE
:
33743 /* Returns whether the dependence between INSN and NEXT is considered
33744 costly by the given target. */
33747 rs6000_is_costly_dependence (dep_t dep
, int cost
, int distance
)
33751 rtx load_mem
, str_mem
;
33753 /* If the flag is not enabled - no dependence is considered costly;
33754 allow all dependent insns in the same group.
33755 This is the most aggressive option. */
33756 if (rs6000_sched_costly_dep
== no_dep_costly
)
33759 /* If the flag is set to 1 - a dependence is always considered costly;
33760 do not allow dependent instructions in the same group.
33761 This is the most conservative option. */
33762 if (rs6000_sched_costly_dep
== all_deps_costly
)
33765 insn
= DEP_PRO (dep
);
33766 next
= DEP_CON (dep
);
33768 if (rs6000_sched_costly_dep
== store_to_load_dep_costly
33769 && is_load_insn (next
, &load_mem
)
33770 && is_store_insn (insn
, &str_mem
))
33771 /* Prevent load after store in the same group. */
33774 if (rs6000_sched_costly_dep
== true_store_to_load_dep_costly
33775 && is_load_insn (next
, &load_mem
)
33776 && is_store_insn (insn
, &str_mem
)
33777 && DEP_TYPE (dep
) == REG_DEP_TRUE
33778 && mem_locations_overlap(str_mem
, load_mem
))
33779 /* Prevent load after store in the same group if it is a true
33783 /* The flag is set to X; dependences with latency >= X are considered costly,
33784 and will not be scheduled in the same group. */
33785 if (rs6000_sched_costly_dep
<= max_dep_latency
33786 && ((cost
- distance
) >= (int)rs6000_sched_costly_dep
))
33792 /* Return the next insn after INSN that is found before TAIL is reached,
33793 skipping any "non-active" insns - insns that will not actually occupy
33794 an issue slot. Return NULL_RTX if such an insn is not found. */
33797 get_next_active_insn (rtx_insn
*insn
, rtx_insn
*tail
)
33799 if (insn
== NULL_RTX
|| insn
== tail
)
33804 insn
= NEXT_INSN (insn
);
33805 if (insn
== NULL_RTX
|| insn
== tail
)
33809 || JUMP_P (insn
) || JUMP_TABLE_DATA_P (insn
)
33810 || (NONJUMP_INSN_P (insn
)
33811 && GET_CODE (PATTERN (insn
)) != USE
33812 && GET_CODE (PATTERN (insn
)) != CLOBBER
33813 && INSN_CODE (insn
) != CODE_FOR_stack_tie
))
33819 /* Do Power9 specific sched_reorder2 reordering of ready list. */
33822 power9_sched_reorder2 (rtx_insn
**ready
, int lastpos
)
33827 enum attr_type type
, type2
;
33829 type
= get_attr_type (last_scheduled_insn
);
33831 /* Try to issue fixed point divides back-to-back in pairs so they will be
33832 routed to separate execution units and execute in parallel. */
33833 if (type
== TYPE_DIV
&& divide_cnt
== 0)
33835 /* First divide has been scheduled. */
33838 /* Scan the ready list looking for another divide, if found move it
33839 to the end of the list so it is chosen next. */
33843 if (recog_memoized (ready
[pos
]) >= 0
33844 && get_attr_type (ready
[pos
]) == TYPE_DIV
)
33847 for (i
= pos
; i
< lastpos
; i
++)
33848 ready
[i
] = ready
[i
+ 1];
33849 ready
[lastpos
] = tmp
;
33857 /* Last insn was the 2nd divide or not a divide, reset the counter. */
33860 /* The best dispatch throughput for vector and vector load insns can be
33861 achieved by interleaving a vector and vector load such that they'll
33862 dispatch to the same superslice. If this pairing cannot be achieved
33863 then it is best to pair vector insns together and vector load insns
33866 To aid in this pairing, vec_pairing maintains the current state with
33867 the following values:
33869 0 : Initial state, no vecload/vector pairing has been started.
33871 1 : A vecload or vector insn has been issued and a candidate for
33872 pairing has been found and moved to the end of the ready
33874 if (type
== TYPE_VECLOAD
)
33876 /* Issued a vecload. */
33877 if (vec_pairing
== 0)
33879 int vecload_pos
= -1;
33880 /* We issued a single vecload, look for a vector insn to pair it
33881 with. If one isn't found, try to pair another vecload. */
33885 if (recog_memoized (ready
[pos
]) >= 0)
33887 type2
= get_attr_type (ready
[pos
]);
33888 if (is_power9_pairable_vec_type (type2
))
33890 /* Found a vector insn to pair with, move it to the
33891 end of the ready list so it is scheduled next. */
33893 for (i
= pos
; i
< lastpos
; i
++)
33894 ready
[i
] = ready
[i
+ 1];
33895 ready
[lastpos
] = tmp
;
33897 return cached_can_issue_more
;
33899 else if (type2
== TYPE_VECLOAD
&& vecload_pos
== -1)
33900 /* Remember position of first vecload seen. */
33905 if (vecload_pos
>= 0)
33907 /* Didn't find a vector to pair with but did find a vecload,
33908 move it to the end of the ready list. */
33909 tmp
= ready
[vecload_pos
];
33910 for (i
= vecload_pos
; i
< lastpos
; i
++)
33911 ready
[i
] = ready
[i
+ 1];
33912 ready
[lastpos
] = tmp
;
33914 return cached_can_issue_more
;
33918 else if (is_power9_pairable_vec_type (type
))
33920 /* Issued a vector operation. */
33921 if (vec_pairing
== 0)
33924 /* We issued a single vector insn, look for a vecload to pair it
33925 with. If one isn't found, try to pair another vector. */
33929 if (recog_memoized (ready
[pos
]) >= 0)
33931 type2
= get_attr_type (ready
[pos
]);
33932 if (type2
== TYPE_VECLOAD
)
33934 /* Found a vecload insn to pair with, move it to the
33935 end of the ready list so it is scheduled next. */
33937 for (i
= pos
; i
< lastpos
; i
++)
33938 ready
[i
] = ready
[i
+ 1];
33939 ready
[lastpos
] = tmp
;
33941 return cached_can_issue_more
;
33943 else if (is_power9_pairable_vec_type (type2
)
33945 /* Remember position of first vector insn seen. */
33952 /* Didn't find a vecload to pair with but did find a vector
33953 insn, move it to the end of the ready list. */
33954 tmp
= ready
[vec_pos
];
33955 for (i
= vec_pos
; i
< lastpos
; i
++)
33956 ready
[i
] = ready
[i
+ 1];
33957 ready
[lastpos
] = tmp
;
33959 return cached_can_issue_more
;
33964 /* We've either finished a vec/vecload pair, couldn't find an insn to
33965 continue the current pair, or the last insn had nothing to do with
33966 with pairing. In any case, reset the state. */
33970 return cached_can_issue_more
;
33973 /* We are about to begin issuing insns for this clock cycle. */
33976 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED
, int sched_verbose
,
33977 rtx_insn
**ready ATTRIBUTE_UNUSED
,
33978 int *pn_ready ATTRIBUTE_UNUSED
,
33979 int clock_var ATTRIBUTE_UNUSED
)
33981 int n_ready
= *pn_ready
;
33984 fprintf (dump
, "// rs6000_sched_reorder :\n");
33986 /* Reorder the ready list, if the second to last ready insn
33987 is a nonepipeline insn. */
33988 if (rs6000_cpu_attr
== CPU_CELL
&& n_ready
> 1)
33990 if (is_nonpipeline_insn (ready
[n_ready
- 1])
33991 && (recog_memoized (ready
[n_ready
- 2]) > 0))
33992 /* Simply swap first two insns. */
33993 std::swap (ready
[n_ready
- 1], ready
[n_ready
- 2]);
33996 if (rs6000_cpu
== PROCESSOR_POWER6
)
33997 load_store_pendulum
= 0;
33999 return rs6000_issue_rate ();
34002 /* Like rs6000_sched_reorder, but called after issuing each insn. */
34005 rs6000_sched_reorder2 (FILE *dump
, int sched_verbose
, rtx_insn
**ready
,
34006 int *pn_ready
, int clock_var ATTRIBUTE_UNUSED
)
34009 fprintf (dump
, "// rs6000_sched_reorder2 :\n");
34011 /* For Power6, we need to handle some special cases to try and keep the
34012 store queue from overflowing and triggering expensive flushes.
34014 This code monitors how load and store instructions are being issued
34015 and skews the ready list one way or the other to increase the likelihood
34016 that a desired instruction is issued at the proper time.
34018 A couple of things are done. First, we maintain a "load_store_pendulum"
34019 to track the current state of load/store issue.
34021 - If the pendulum is at zero, then no loads or stores have been
34022 issued in the current cycle so we do nothing.
34024 - If the pendulum is 1, then a single load has been issued in this
34025 cycle and we attempt to locate another load in the ready list to
34028 - If the pendulum is -2, then two stores have already been
34029 issued in this cycle, so we increase the priority of the first load
34030 in the ready list to increase it's likelihood of being chosen first
34033 - If the pendulum is -1, then a single store has been issued in this
34034 cycle and we attempt to locate another store in the ready list to
34035 issue with it, preferring a store to an adjacent memory location to
34036 facilitate store pairing in the store queue.
34038 - If the pendulum is 2, then two loads have already been
34039 issued in this cycle, so we increase the priority of the first store
34040 in the ready list to increase it's likelihood of being chosen first
34043 - If the pendulum < -2 or > 2, then do nothing.
34045 Note: This code covers the most common scenarios. There exist non
34046 load/store instructions which make use of the LSU and which
34047 would need to be accounted for to strictly model the behavior
34048 of the machine. Those instructions are currently unaccounted
34049 for to help minimize compile time overhead of this code.
34051 if (rs6000_cpu
== PROCESSOR_POWER6
&& last_scheduled_insn
)
34056 rtx load_mem
, str_mem
;
34058 if (is_store_insn (last_scheduled_insn
, &str_mem
))
34059 /* Issuing a store, swing the load_store_pendulum to the left */
34060 load_store_pendulum
--;
34061 else if (is_load_insn (last_scheduled_insn
, &load_mem
))
34062 /* Issuing a load, swing the load_store_pendulum to the right */
34063 load_store_pendulum
++;
34065 return cached_can_issue_more
;
34067 /* If the pendulum is balanced, or there is only one instruction on
34068 the ready list, then all is well, so return. */
34069 if ((load_store_pendulum
== 0) || (*pn_ready
<= 1))
34070 return cached_can_issue_more
;
34072 if (load_store_pendulum
== 1)
34074 /* A load has been issued in this cycle. Scan the ready list
34075 for another load to issue with it */
34080 if (is_load_insn (ready
[pos
], &load_mem
))
34082 /* Found a load. Move it to the head of the ready list,
34083 and adjust it's priority so that it is more likely to
34086 for (i
=pos
; i
<*pn_ready
-1; i
++)
34087 ready
[i
] = ready
[i
+ 1];
34088 ready
[*pn_ready
-1] = tmp
;
34090 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp
))
34091 INSN_PRIORITY (tmp
)++;
34097 else if (load_store_pendulum
== -2)
34099 /* Two stores have been issued in this cycle. Increase the
34100 priority of the first load in the ready list to favor it for
34101 issuing in the next cycle. */
34106 if (is_load_insn (ready
[pos
], &load_mem
)
34108 && INSN_PRIORITY_KNOWN (ready
[pos
]))
34110 INSN_PRIORITY (ready
[pos
])++;
34112 /* Adjust the pendulum to account for the fact that a load
34113 was found and increased in priority. This is to prevent
34114 increasing the priority of multiple loads */
34115 load_store_pendulum
--;
34122 else if (load_store_pendulum
== -1)
34124 /* A store has been issued in this cycle. Scan the ready list for
34125 another store to issue with it, preferring a store to an adjacent
34127 int first_store_pos
= -1;
34133 if (is_store_insn (ready
[pos
], &str_mem
))
34136 /* Maintain the index of the first store found on the
34138 if (first_store_pos
== -1)
34139 first_store_pos
= pos
;
34141 if (is_store_insn (last_scheduled_insn
, &str_mem2
)
34142 && adjacent_mem_locations (str_mem
, str_mem2
))
34144 /* Found an adjacent store. Move it to the head of the
34145 ready list, and adjust it's priority so that it is
34146 more likely to stay there */
34148 for (i
=pos
; i
<*pn_ready
-1; i
++)
34149 ready
[i
] = ready
[i
+ 1];
34150 ready
[*pn_ready
-1] = tmp
;
34152 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp
))
34153 INSN_PRIORITY (tmp
)++;
34155 first_store_pos
= -1;
34163 if (first_store_pos
>= 0)
34165 /* An adjacent store wasn't found, but a non-adjacent store was,
34166 so move the non-adjacent store to the front of the ready
34167 list, and adjust its priority so that it is more likely to
34169 tmp
= ready
[first_store_pos
];
34170 for (i
=first_store_pos
; i
<*pn_ready
-1; i
++)
34171 ready
[i
] = ready
[i
+ 1];
34172 ready
[*pn_ready
-1] = tmp
;
34173 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp
))
34174 INSN_PRIORITY (tmp
)++;
34177 else if (load_store_pendulum
== 2)
34179 /* Two loads have been issued in this cycle. Increase the priority
34180 of the first store in the ready list to favor it for issuing in
34186 if (is_store_insn (ready
[pos
], &str_mem
)
34188 && INSN_PRIORITY_KNOWN (ready
[pos
]))
34190 INSN_PRIORITY (ready
[pos
])++;
34192 /* Adjust the pendulum to account for the fact that a store
34193 was found and increased in priority. This is to prevent
34194 increasing the priority of multiple stores */
34195 load_store_pendulum
++;
34204 /* Do Power9 dependent reordering if necessary. */
34205 if (rs6000_cpu
== PROCESSOR_POWER9
&& last_scheduled_insn
34206 && recog_memoized (last_scheduled_insn
) >= 0)
34207 return power9_sched_reorder2 (ready
, *pn_ready
- 1);
34209 return cached_can_issue_more
;
34212 /* Return whether the presence of INSN causes a dispatch group termination
34213 of group WHICH_GROUP.
34215 If WHICH_GROUP == current_group, this function will return true if INSN
34216 causes the termination of the current group (i.e, the dispatch group to
34217 which INSN belongs). This means that INSN will be the last insn in the
34218 group it belongs to.
34220 If WHICH_GROUP == previous_group, this function will return true if INSN
34221 causes the termination of the previous group (i.e, the dispatch group that
34222 precedes the group to which INSN belongs). This means that INSN will be
34223 the first insn in the group it belongs to). */
34226 insn_terminates_group_p (rtx_insn
*insn
, enum group_termination which_group
)
34233 first
= insn_must_be_first_in_group (insn
);
34234 last
= insn_must_be_last_in_group (insn
);
34239 if (which_group
== current_group
)
34241 else if (which_group
== previous_group
)
34249 insn_must_be_first_in_group (rtx_insn
*insn
)
34251 enum attr_type type
;
34255 || DEBUG_INSN_P (insn
)
34256 || GET_CODE (PATTERN (insn
)) == USE
34257 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
34260 switch (rs6000_cpu
)
34262 case PROCESSOR_POWER5
:
34263 if (is_cracked_insn (insn
))
34266 case PROCESSOR_POWER4
:
34267 if (is_microcoded_insn (insn
))
34270 if (!rs6000_sched_groups
)
34273 type
= get_attr_type (insn
);
34280 case TYPE_DELAYED_CR
:
34281 case TYPE_CR_LOGICAL
:
34294 case PROCESSOR_POWER6
:
34295 type
= get_attr_type (insn
);
34304 case TYPE_FPCOMPARE
:
34315 if (get_attr_dot (insn
) == DOT_NO
34316 || get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
34321 if (get_attr_size (insn
) == SIZE_32
)
34329 if (get_attr_update (insn
) == UPDATE_YES
)
34337 case PROCESSOR_POWER7
:
34338 type
= get_attr_type (insn
);
34342 case TYPE_CR_LOGICAL
:
34356 if (get_attr_dot (insn
) == DOT_YES
)
34361 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
34362 || get_attr_update (insn
) == UPDATE_YES
)
34369 if (get_attr_update (insn
) == UPDATE_YES
)
34377 case PROCESSOR_POWER8
:
34378 type
= get_attr_type (insn
);
34382 case TYPE_CR_LOGICAL
:
34383 case TYPE_DELAYED_CR
:
34391 case TYPE_VECSTORE
:
34398 if (get_attr_dot (insn
) == DOT_YES
)
34403 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
34404 || get_attr_update (insn
) == UPDATE_YES
)
34409 if (get_attr_update (insn
) == UPDATE_YES
34410 && get_attr_indexed (insn
) == INDEXED_YES
)
34426 insn_must_be_last_in_group (rtx_insn
*insn
)
34428 enum attr_type type
;
34432 || DEBUG_INSN_P (insn
)
34433 || GET_CODE (PATTERN (insn
)) == USE
34434 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
34437 switch (rs6000_cpu
) {
34438 case PROCESSOR_POWER4
:
34439 case PROCESSOR_POWER5
:
34440 if (is_microcoded_insn (insn
))
34443 if (is_branch_slot_insn (insn
))
34447 case PROCESSOR_POWER6
:
34448 type
= get_attr_type (insn
);
34456 case TYPE_FPCOMPARE
:
34467 if (get_attr_dot (insn
) == DOT_NO
34468 || get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
34473 if (get_attr_size (insn
) == SIZE_32
)
34481 case PROCESSOR_POWER7
:
34482 type
= get_attr_type (insn
);
34492 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
34493 && get_attr_update (insn
) == UPDATE_YES
)
34498 if (get_attr_update (insn
) == UPDATE_YES
34499 && get_attr_indexed (insn
) == INDEXED_YES
)
34507 case PROCESSOR_POWER8
:
34508 type
= get_attr_type (insn
);
34520 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
34521 && get_attr_update (insn
) == UPDATE_YES
)
34526 if (get_attr_update (insn
) == UPDATE_YES
34527 && get_attr_indexed (insn
) == INDEXED_YES
)
34542 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
34543 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
34546 is_costly_group (rtx
*group_insns
, rtx next_insn
)
34549 int issue_rate
= rs6000_issue_rate ();
34551 for (i
= 0; i
< issue_rate
; i
++)
34553 sd_iterator_def sd_it
;
34555 rtx insn
= group_insns
[i
];
34560 FOR_EACH_DEP (insn
, SD_LIST_RES_FORW
, sd_it
, dep
)
34562 rtx next
= DEP_CON (dep
);
34564 if (next
== next_insn
34565 && rs6000_is_costly_dependence (dep
, dep_cost (dep
), 0))
34573 /* Utility of the function redefine_groups.
34574 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
34575 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
34576 to keep it "far" (in a separate group) from GROUP_INSNS, following
34577 one of the following schemes, depending on the value of the flag
34578 -minsert_sched_nops = X:
34579 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
34580 in order to force NEXT_INSN into a separate group.
34581 (2) X < sched_finish_regroup_exact: insert exactly X nops.
34582 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
34583 insertion (has a group just ended, how many vacant issue slots remain in the
34584 last group, and how many dispatch groups were encountered so far). */
34587 force_new_group (int sched_verbose
, FILE *dump
, rtx
*group_insns
,
34588 rtx_insn
*next_insn
, bool *group_end
, int can_issue_more
,
34593 int issue_rate
= rs6000_issue_rate ();
34594 bool end
= *group_end
;
34597 if (next_insn
== NULL_RTX
|| DEBUG_INSN_P (next_insn
))
34598 return can_issue_more
;
34600 if (rs6000_sched_insert_nops
> sched_finish_regroup_exact
)
34601 return can_issue_more
;
34603 force
= is_costly_group (group_insns
, next_insn
);
34605 return can_issue_more
;
34607 if (sched_verbose
> 6)
34608 fprintf (dump
,"force: group count = %d, can_issue_more = %d\n",
34609 *group_count
,can_issue_more
);
34611 if (rs6000_sched_insert_nops
== sched_finish_regroup_exact
)
34614 can_issue_more
= 0;
34616 /* Since only a branch can be issued in the last issue_slot, it is
34617 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
34618 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
34619 in this case the last nop will start a new group and the branch
34620 will be forced to the new group. */
34621 if (can_issue_more
&& !is_branch_slot_insn (next_insn
))
34624 /* Do we have a special group ending nop? */
34625 if (rs6000_cpu_attr
== CPU_POWER6
|| rs6000_cpu_attr
== CPU_POWER7
34626 || rs6000_cpu_attr
== CPU_POWER8
)
34628 nop
= gen_group_ending_nop ();
34629 emit_insn_before (nop
, next_insn
);
34630 can_issue_more
= 0;
34633 while (can_issue_more
> 0)
34636 emit_insn_before (nop
, next_insn
);
34644 if (rs6000_sched_insert_nops
< sched_finish_regroup_exact
)
34646 int n_nops
= rs6000_sched_insert_nops
;
34648 /* Nops can't be issued from the branch slot, so the effective
34649 issue_rate for nops is 'issue_rate - 1'. */
34650 if (can_issue_more
== 0)
34651 can_issue_more
= issue_rate
;
34653 if (can_issue_more
== 0)
34655 can_issue_more
= issue_rate
- 1;
34658 for (i
= 0; i
< issue_rate
; i
++)
34660 group_insns
[i
] = 0;
34667 emit_insn_before (nop
, next_insn
);
34668 if (can_issue_more
== issue_rate
- 1) /* new group begins */
34671 if (can_issue_more
== 0)
34673 can_issue_more
= issue_rate
- 1;
34676 for (i
= 0; i
< issue_rate
; i
++)
34678 group_insns
[i
] = 0;
34684 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
34687 /* Is next_insn going to start a new group? */
34690 || (can_issue_more
== 1 && !is_branch_slot_insn (next_insn
))
34691 || (can_issue_more
<= 2 && is_cracked_insn (next_insn
))
34692 || (can_issue_more
< issue_rate
&&
34693 insn_terminates_group_p (next_insn
, previous_group
)));
34694 if (*group_end
&& end
)
34697 if (sched_verbose
> 6)
34698 fprintf (dump
, "done force: group count = %d, can_issue_more = %d\n",
34699 *group_count
, can_issue_more
);
34700 return can_issue_more
;
34703 return can_issue_more
;
34706 /* This function tries to synch the dispatch groups that the compiler "sees"
34707 with the dispatch groups that the processor dispatcher is expected to
34708 form in practice. It tries to achieve this synchronization by forcing the
34709 estimated processor grouping on the compiler (as opposed to the function
34710 'pad_goups' which tries to force the scheduler's grouping on the processor).
34712 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
34713 examines the (estimated) dispatch groups that will be formed by the processor
34714 dispatcher. It marks these group boundaries to reflect the estimated
34715 processor grouping, overriding the grouping that the scheduler had marked.
34716 Depending on the value of the flag '-minsert-sched-nops' this function can
34717 force certain insns into separate groups or force a certain distance between
34718 them by inserting nops, for example, if there exists a "costly dependence"
34721 The function estimates the group boundaries that the processor will form as
34722 follows: It keeps track of how many vacant issue slots are available after
34723 each insn. A subsequent insn will start a new group if one of the following
34725 - no more vacant issue slots remain in the current dispatch group.
34726 - only the last issue slot, which is the branch slot, is vacant, but the next
34727 insn is not a branch.
34728 - only the last 2 or less issue slots, including the branch slot, are vacant,
34729 which means that a cracked insn (which occupies two issue slots) can't be
34730 issued in this group.
34731 - less than 'issue_rate' slots are vacant, and the next insn always needs to
34732 start a new group. */
34735 redefine_groups (FILE *dump
, int sched_verbose
, rtx_insn
*prev_head_insn
,
34738 rtx_insn
*insn
, *next_insn
;
34740 int can_issue_more
;
34743 int group_count
= 0;
34747 issue_rate
= rs6000_issue_rate ();
34748 group_insns
= XALLOCAVEC (rtx
, issue_rate
);
34749 for (i
= 0; i
< issue_rate
; i
++)
34751 group_insns
[i
] = 0;
34753 can_issue_more
= issue_rate
;
34755 insn
= get_next_active_insn (prev_head_insn
, tail
);
34758 while (insn
!= NULL_RTX
)
34760 slot
= (issue_rate
- can_issue_more
);
34761 group_insns
[slot
] = insn
;
34763 rs6000_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
);
34764 if (insn_terminates_group_p (insn
, current_group
))
34765 can_issue_more
= 0;
34767 next_insn
= get_next_active_insn (insn
, tail
);
34768 if (next_insn
== NULL_RTX
)
34769 return group_count
+ 1;
34771 /* Is next_insn going to start a new group? */
34773 = (can_issue_more
== 0
34774 || (can_issue_more
== 1 && !is_branch_slot_insn (next_insn
))
34775 || (can_issue_more
<= 2 && is_cracked_insn (next_insn
))
34776 || (can_issue_more
< issue_rate
&&
34777 insn_terminates_group_p (next_insn
, previous_group
)));
34779 can_issue_more
= force_new_group (sched_verbose
, dump
, group_insns
,
34780 next_insn
, &group_end
, can_issue_more
,
34786 can_issue_more
= 0;
34787 for (i
= 0; i
< issue_rate
; i
++)
34789 group_insns
[i
] = 0;
34793 if (GET_MODE (next_insn
) == TImode
&& can_issue_more
)
34794 PUT_MODE (next_insn
, VOIDmode
);
34795 else if (!can_issue_more
&& GET_MODE (next_insn
) != TImode
)
34796 PUT_MODE (next_insn
, TImode
);
34799 if (can_issue_more
== 0)
34800 can_issue_more
= issue_rate
;
34803 return group_count
;
34806 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
34807 dispatch group boundaries that the scheduler had marked. Pad with nops
34808 any dispatch groups which have vacant issue slots, in order to force the
34809 scheduler's grouping on the processor dispatcher. The function
34810 returns the number of dispatch groups found. */
34813 pad_groups (FILE *dump
, int sched_verbose
, rtx_insn
*prev_head_insn
,
34816 rtx_insn
*insn
, *next_insn
;
34819 int can_issue_more
;
34821 int group_count
= 0;
34823 /* Initialize issue_rate. */
34824 issue_rate
= rs6000_issue_rate ();
34825 can_issue_more
= issue_rate
;
34827 insn
= get_next_active_insn (prev_head_insn
, tail
);
34828 next_insn
= get_next_active_insn (insn
, tail
);
34830 while (insn
!= NULL_RTX
)
34833 rs6000_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
);
34835 group_end
= (next_insn
== NULL_RTX
|| GET_MODE (next_insn
) == TImode
);
34837 if (next_insn
== NULL_RTX
)
34842 /* If the scheduler had marked group termination at this location
34843 (between insn and next_insn), and neither insn nor next_insn will
34844 force group termination, pad the group with nops to force group
34847 && (rs6000_sched_insert_nops
== sched_finish_pad_groups
)
34848 && !insn_terminates_group_p (insn
, current_group
)
34849 && !insn_terminates_group_p (next_insn
, previous_group
))
34851 if (!is_branch_slot_insn (next_insn
))
34854 while (can_issue_more
)
34857 emit_insn_before (nop
, next_insn
);
34862 can_issue_more
= issue_rate
;
34867 next_insn
= get_next_active_insn (insn
, tail
);
34870 return group_count
;
34873 /* We're beginning a new block. Initialize data structures as necessary. */
34876 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED
,
34877 int sched_verbose ATTRIBUTE_UNUSED
,
34878 int max_ready ATTRIBUTE_UNUSED
)
34880 last_scheduled_insn
= NULL
;
34881 load_store_pendulum
= 0;
34886 /* The following function is called at the end of scheduling BB.
34887 After reload, it inserts nops at insn group bundling. */
34890 rs6000_sched_finish (FILE *dump
, int sched_verbose
)
34895 fprintf (dump
, "=== Finishing schedule.\n");
34897 if (reload_completed
&& rs6000_sched_groups
)
34899 /* Do not run sched_finish hook when selective scheduling enabled. */
34900 if (sel_sched_p ())
34903 if (rs6000_sched_insert_nops
== sched_finish_none
)
34906 if (rs6000_sched_insert_nops
== sched_finish_pad_groups
)
34907 n_groups
= pad_groups (dump
, sched_verbose
,
34908 current_sched_info
->prev_head
,
34909 current_sched_info
->next_tail
);
34911 n_groups
= redefine_groups (dump
, sched_verbose
,
34912 current_sched_info
->prev_head
,
34913 current_sched_info
->next_tail
);
34915 if (sched_verbose
>= 6)
34917 fprintf (dump
, "ngroups = %d\n", n_groups
);
34918 print_rtl (dump
, current_sched_info
->prev_head
);
34919 fprintf (dump
, "Done finish_sched\n");
34924 struct rs6000_sched_context
34926 short cached_can_issue_more
;
34927 rtx_insn
*last_scheduled_insn
;
34928 int load_store_pendulum
;
34933 typedef struct rs6000_sched_context rs6000_sched_context_def
;
34934 typedef rs6000_sched_context_def
*rs6000_sched_context_t
;
34936 /* Allocate store for new scheduling context. */
34938 rs6000_alloc_sched_context (void)
34940 return xmalloc (sizeof (rs6000_sched_context_def
));
34943 /* If CLEAN_P is true then initializes _SC with clean data,
34944 and from the global context otherwise. */
34946 rs6000_init_sched_context (void *_sc
, bool clean_p
)
34948 rs6000_sched_context_t sc
= (rs6000_sched_context_t
) _sc
;
34952 sc
->cached_can_issue_more
= 0;
34953 sc
->last_scheduled_insn
= NULL
;
34954 sc
->load_store_pendulum
= 0;
34955 sc
->divide_cnt
= 0;
34956 sc
->vec_pairing
= 0;
34960 sc
->cached_can_issue_more
= cached_can_issue_more
;
34961 sc
->last_scheduled_insn
= last_scheduled_insn
;
34962 sc
->load_store_pendulum
= load_store_pendulum
;
34963 sc
->divide_cnt
= divide_cnt
;
34964 sc
->vec_pairing
= vec_pairing
;
34968 /* Sets the global scheduling context to the one pointed to by _SC. */
34970 rs6000_set_sched_context (void *_sc
)
34972 rs6000_sched_context_t sc
= (rs6000_sched_context_t
) _sc
;
34974 gcc_assert (sc
!= NULL
);
34976 cached_can_issue_more
= sc
->cached_can_issue_more
;
34977 last_scheduled_insn
= sc
->last_scheduled_insn
;
34978 load_store_pendulum
= sc
->load_store_pendulum
;
34979 divide_cnt
= sc
->divide_cnt
;
34980 vec_pairing
= sc
->vec_pairing
;
34985 rs6000_free_sched_context (void *_sc
)
34987 gcc_assert (_sc
!= NULL
);
34993 rs6000_sched_can_speculate_insn (rtx_insn
*insn
)
34995 switch (get_attr_type (insn
))
35010 /* Length in units of the trampoline for entering a nested function. */
35013 rs6000_trampoline_size (void)
35017 switch (DEFAULT_ABI
)
35020 gcc_unreachable ();
35023 ret
= (TARGET_32BIT
) ? 12 : 24;
35027 gcc_assert (!TARGET_32BIT
);
35033 ret
= (TARGET_32BIT
) ? 40 : 48;
35040 /* Emit RTL insns to initialize the variable parts of a trampoline.
35041 FNADDR is an RTX for the address of the function's pure code.
35042 CXT is an RTX for the static chain value for the function. */
35045 rs6000_trampoline_init (rtx m_tramp
, tree fndecl
, rtx cxt
)
35047 int regsize
= (TARGET_32BIT
) ? 4 : 8;
35048 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
35049 rtx ctx_reg
= force_reg (Pmode
, cxt
);
35050 rtx addr
= force_reg (Pmode
, XEXP (m_tramp
, 0));
35052 switch (DEFAULT_ABI
)
35055 gcc_unreachable ();
35057 /* Under AIX, just build the 3 word function descriptor */
35060 rtx fnmem
, fn_reg
, toc_reg
;
35062 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS
)
35063 error ("You cannot take the address of a nested function if you use "
35064 "the -mno-pointers-to-nested-functions option.");
35066 fnmem
= gen_const_mem (Pmode
, force_reg (Pmode
, fnaddr
));
35067 fn_reg
= gen_reg_rtx (Pmode
);
35068 toc_reg
= gen_reg_rtx (Pmode
);
35070 /* Macro to shorten the code expansions below. */
35071 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
35073 m_tramp
= replace_equiv_address (m_tramp
, addr
);
35075 emit_move_insn (fn_reg
, MEM_PLUS (fnmem
, 0));
35076 emit_move_insn (toc_reg
, MEM_PLUS (fnmem
, regsize
));
35077 emit_move_insn (MEM_PLUS (m_tramp
, 0), fn_reg
);
35078 emit_move_insn (MEM_PLUS (m_tramp
, regsize
), toc_reg
);
35079 emit_move_insn (MEM_PLUS (m_tramp
, 2*regsize
), ctx_reg
);
35085 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
35089 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__trampoline_setup"),
35090 LCT_NORMAL
, VOIDmode
, 4,
35092 GEN_INT (rs6000_trampoline_size ()), SImode
,
35100 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
35101 identifier as an argument, so the front end shouldn't look it up. */
35104 rs6000_attribute_takes_identifier_p (const_tree attr_id
)
35106 return is_attribute_p ("altivec", attr_id
);
35109 /* Handle the "altivec" attribute. The attribute may have
35110 arguments as follows:
35112 __attribute__((altivec(vector__)))
35113 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
35114 __attribute__((altivec(bool__))) (always followed by 'unsigned')
35116 and may appear more than once (e.g., 'vector bool char') in a
35117 given declaration. */
35120 rs6000_handle_altivec_attribute (tree
*node
,
35121 tree name ATTRIBUTE_UNUSED
,
35123 int flags ATTRIBUTE_UNUSED
,
35124 bool *no_add_attrs
)
35126 tree type
= *node
, result
= NULL_TREE
;
35130 = ((args
&& TREE_CODE (args
) == TREE_LIST
&& TREE_VALUE (args
)
35131 && TREE_CODE (TREE_VALUE (args
)) == IDENTIFIER_NODE
)
35132 ? *IDENTIFIER_POINTER (TREE_VALUE (args
))
35135 while (POINTER_TYPE_P (type
)
35136 || TREE_CODE (type
) == FUNCTION_TYPE
35137 || TREE_CODE (type
) == METHOD_TYPE
35138 || TREE_CODE (type
) == ARRAY_TYPE
)
35139 type
= TREE_TYPE (type
);
35141 mode
= TYPE_MODE (type
);
35143 /* Check for invalid AltiVec type qualifiers. */
35144 if (type
== long_double_type_node
)
35145 error ("use of %<long double%> in AltiVec types is invalid");
35146 else if (type
== boolean_type_node
)
35147 error ("use of boolean types in AltiVec types is invalid");
35148 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
35149 error ("use of %<complex%> in AltiVec types is invalid");
35150 else if (DECIMAL_FLOAT_MODE_P (mode
))
35151 error ("use of decimal floating point types in AltiVec types is invalid");
35152 else if (!TARGET_VSX
)
35154 if (type
== long_unsigned_type_node
|| type
== long_integer_type_node
)
35157 error ("use of %<long%> in AltiVec types is invalid for "
35158 "64-bit code without -mvsx");
35159 else if (rs6000_warn_altivec_long
)
35160 warning (0, "use of %<long%> in AltiVec types is deprecated; "
35163 else if (type
== long_long_unsigned_type_node
35164 || type
== long_long_integer_type_node
)
35165 error ("use of %<long long%> in AltiVec types is invalid without "
35167 else if (type
== double_type_node
)
35168 error ("use of %<double%> in AltiVec types is invalid without -mvsx");
35171 switch (altivec_type
)
35174 unsigned_p
= TYPE_UNSIGNED (type
);
35178 result
= (unsigned_p
? unsigned_V1TI_type_node
: V1TI_type_node
);
35181 result
= (unsigned_p
? unsigned_V2DI_type_node
: V2DI_type_node
);
35184 result
= (unsigned_p
? unsigned_V4SI_type_node
: V4SI_type_node
);
35187 result
= (unsigned_p
? unsigned_V8HI_type_node
: V8HI_type_node
);
35190 result
= (unsigned_p
? unsigned_V16QI_type_node
: V16QI_type_node
);
35192 case SFmode
: result
= V4SF_type_node
; break;
35193 case DFmode
: result
= V2DF_type_node
; break;
35194 /* If the user says 'vector int bool', we may be handed the 'bool'
35195 attribute _before_ the 'vector' attribute, and so select the
35196 proper type in the 'b' case below. */
35197 case V4SImode
: case V8HImode
: case V16QImode
: case V4SFmode
:
35198 case V2DImode
: case V2DFmode
:
35206 case DImode
: case V2DImode
: result
= bool_V2DI_type_node
; break;
35207 case SImode
: case V4SImode
: result
= bool_V4SI_type_node
; break;
35208 case HImode
: case V8HImode
: result
= bool_V8HI_type_node
; break;
35209 case QImode
: case V16QImode
: result
= bool_V16QI_type_node
;
35216 case V8HImode
: result
= pixel_V8HI_type_node
;
35222 /* Propagate qualifiers attached to the element type
35223 onto the vector type. */
35224 if (result
&& result
!= type
&& TYPE_QUALS (type
))
35225 result
= build_qualified_type (result
, TYPE_QUALS (type
));
35227 *no_add_attrs
= true; /* No need to hang on to the attribute. */
35230 *node
= lang_hooks
.types
.reconstruct_complex_type (*node
, result
);
35235 /* AltiVec defines four built-in scalar types that serve as vector
35236 elements; we must teach the compiler how to mangle them. */
35238 static const char *
35239 rs6000_mangle_type (const_tree type
)
35241 type
= TYPE_MAIN_VARIANT (type
);
35243 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
35244 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
35247 if (type
== bool_char_type_node
) return "U6__boolc";
35248 if (type
== bool_short_type_node
) return "U6__bools";
35249 if (type
== pixel_type_node
) return "u7__pixel";
35250 if (type
== bool_int_type_node
) return "U6__booli";
35251 if (type
== bool_long_type_node
) return "U6__booll";
35253 /* Use a unique name for __float128 rather than trying to use "e" or "g". Use
35254 "g" for IBM extended double, no matter whether it is long double (using
35255 -mabi=ibmlongdouble) or the distinct __ibm128 type. */
35256 if (TARGET_FLOAT128_TYPE
)
35258 if (type
== ieee128_float_type_node
)
35259 return "U10__float128";
35261 if (type
== ibm128_float_type_node
)
35264 if (type
== long_double_type_node
&& TARGET_LONG_DOUBLE_128
)
35265 return (TARGET_IEEEQUAD
) ? "U10__float128" : "g";
35268 /* Mangle IBM extended float long double as `g' (__float128) on
35269 powerpc*-linux where long-double-64 previously was the default. */
35270 if (TYPE_MAIN_VARIANT (type
) == long_double_type_node
35272 && TARGET_LONG_DOUBLE_128
35273 && !TARGET_IEEEQUAD
)
35276 /* For all other types, use normal C++ mangling. */
35280 /* Handle a "longcall" or "shortcall" attribute; arguments as in
35281 struct attribute_spec.handler. */
35284 rs6000_handle_longcall_attribute (tree
*node
, tree name
,
35285 tree args ATTRIBUTE_UNUSED
,
35286 int flags ATTRIBUTE_UNUSED
,
35287 bool *no_add_attrs
)
35289 if (TREE_CODE (*node
) != FUNCTION_TYPE
35290 && TREE_CODE (*node
) != FIELD_DECL
35291 && TREE_CODE (*node
) != TYPE_DECL
)
35293 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
35295 *no_add_attrs
= true;
35301 /* Set longcall attributes on all functions declared when
35302 rs6000_default_long_calls is true. */
35304 rs6000_set_default_type_attributes (tree type
)
35306 if (rs6000_default_long_calls
35307 && (TREE_CODE (type
) == FUNCTION_TYPE
35308 || TREE_CODE (type
) == METHOD_TYPE
))
35309 TYPE_ATTRIBUTES (type
) = tree_cons (get_identifier ("longcall"),
35311 TYPE_ATTRIBUTES (type
));
35314 darwin_set_default_type_attributes (type
);
35318 /* Return a reference suitable for calling a function with the
35319 longcall attribute. */
35322 rs6000_longcall_ref (rtx call_ref
)
35324 const char *call_name
;
35327 if (GET_CODE (call_ref
) != SYMBOL_REF
)
35330 /* System V adds '.' to the internal name, so skip them. */
35331 call_name
= XSTR (call_ref
, 0);
35332 if (*call_name
== '.')
35334 while (*call_name
== '.')
35337 node
= get_identifier (call_name
);
35338 call_ref
= gen_rtx_SYMBOL_REF (VOIDmode
, IDENTIFIER_POINTER (node
));
35341 return force_reg (Pmode
, call_ref
);
35344 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
35345 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
35348 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
35349 struct attribute_spec.handler. */
35351 rs6000_handle_struct_attribute (tree
*node
, tree name
,
35352 tree args ATTRIBUTE_UNUSED
,
35353 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
35356 if (DECL_P (*node
))
35358 if (TREE_CODE (*node
) == TYPE_DECL
)
35359 type
= &TREE_TYPE (*node
);
35364 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
35365 || TREE_CODE (*type
) == UNION_TYPE
)))
35367 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
35368 *no_add_attrs
= true;
35371 else if ((is_attribute_p ("ms_struct", name
)
35372 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
35373 || ((is_attribute_p ("gcc_struct", name
)
35374 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
35376 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
35378 *no_add_attrs
= true;
35385 rs6000_ms_bitfield_layout_p (const_tree record_type
)
35387 return (TARGET_USE_MS_BITFIELD_LAYOUT
&&
35388 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
35389 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
35392 #ifdef USING_ELFOS_H
35394 /* A get_unnamed_section callback, used for switching to toc_section. */
35397 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED
)
35399 if ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
35400 && TARGET_MINIMAL_TOC
)
35402 if (!toc_initialized
)
35404 fprintf (asm_out_file
, "%s\n", TOC_SECTION_ASM_OP
);
35405 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
35406 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "LCTOC", 0);
35407 fprintf (asm_out_file
, "\t.tc ");
35408 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1[TC],");
35409 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
35410 fprintf (asm_out_file
, "\n");
35412 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
35413 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
35414 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
35415 fprintf (asm_out_file
, " = .+32768\n");
35416 toc_initialized
= 1;
35419 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
35421 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
35423 fprintf (asm_out_file
, "%s\n", TOC_SECTION_ASM_OP
);
35424 if (!toc_initialized
)
35426 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
35427 toc_initialized
= 1;
35432 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
35433 if (!toc_initialized
)
35435 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
35436 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
35437 fprintf (asm_out_file
, " = .+32768\n");
35438 toc_initialized
= 1;
35443 /* Implement TARGET_ASM_INIT_SECTIONS. */
35446 rs6000_elf_asm_init_sections (void)
35449 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op
, NULL
);
35452 = get_unnamed_section (SECTION_WRITE
, output_section_asm_op
,
35453 SDATA2_SECTION_ASM_OP
);
35456 /* Implement TARGET_SELECT_RTX_SECTION. */
35459 rs6000_elf_select_rtx_section (machine_mode mode
, rtx x
,
35460 unsigned HOST_WIDE_INT align
)
35462 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
))
35463 return toc_section
;
35465 return default_elf_select_rtx_section (mode
, x
, align
);
35468 /* For a SYMBOL_REF, set generic flags and then perform some
35469 target-specific processing.
35471 When the AIX ABI is requested on a non-AIX system, replace the
35472 function name with the real name (with a leading .) rather than the
35473 function descriptor name. This saves a lot of overriding code to
35474 read the prefixes. */
35476 static void rs6000_elf_encode_section_info (tree
, rtx
, int) ATTRIBUTE_UNUSED
;
35478 rs6000_elf_encode_section_info (tree decl
, rtx rtl
, int first
)
35480 default_encode_section_info (decl
, rtl
, first
);
35483 && TREE_CODE (decl
) == FUNCTION_DECL
35485 && DEFAULT_ABI
== ABI_AIX
)
35487 rtx sym_ref
= XEXP (rtl
, 0);
35488 size_t len
= strlen (XSTR (sym_ref
, 0));
35489 char *str
= XALLOCAVEC (char, len
+ 2);
35491 memcpy (str
+ 1, XSTR (sym_ref
, 0), len
+ 1);
35492 XSTR (sym_ref
, 0) = ggc_alloc_string (str
, len
+ 1);
35497 compare_section_name (const char *section
, const char *templ
)
35501 len
= strlen (templ
);
35502 return (strncmp (section
, templ
, len
) == 0
35503 && (section
[len
] == 0 || section
[len
] == '.'));
35507 rs6000_elf_in_small_data_p (const_tree decl
)
35509 if (rs6000_sdata
== SDATA_NONE
)
35512 /* We want to merge strings, so we never consider them small data. */
35513 if (TREE_CODE (decl
) == STRING_CST
)
35516 /* Functions are never in the small data area. */
35517 if (TREE_CODE (decl
) == FUNCTION_DECL
)
35520 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_SECTION_NAME (decl
))
35522 const char *section
= DECL_SECTION_NAME (decl
);
35523 if (compare_section_name (section
, ".sdata")
35524 || compare_section_name (section
, ".sdata2")
35525 || compare_section_name (section
, ".gnu.linkonce.s")
35526 || compare_section_name (section
, ".sbss")
35527 || compare_section_name (section
, ".sbss2")
35528 || compare_section_name (section
, ".gnu.linkonce.sb")
35529 || strcmp (section
, ".PPC.EMB.sdata0") == 0
35530 || strcmp (section
, ".PPC.EMB.sbss0") == 0)
35535 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (decl
));
35538 && size
<= g_switch_value
35539 /* If it's not public, and we're not going to reference it there,
35540 there's no need to put it in the small data section. */
35541 && (rs6000_sdata
!= SDATA_DATA
|| TREE_PUBLIC (decl
)))
35548 #endif /* USING_ELFOS_H */
35550 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
35553 rs6000_use_blocks_for_constant_p (machine_mode mode
, const_rtx x
)
35555 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
);
35558 /* Do not place thread-local symbols refs in the object blocks. */
35561 rs6000_use_blocks_for_decl_p (const_tree decl
)
35563 return !DECL_THREAD_LOCAL_P (decl
);
35566 /* Return a REG that occurs in ADDR with coefficient 1.
35567 ADDR can be effectively incremented by incrementing REG.
35569 r0 is special and we must not select it as an address
35570 register by this routine since our caller will try to
35571 increment the returned register via an "la" instruction. */
35574 find_addr_reg (rtx addr
)
35576 while (GET_CODE (addr
) == PLUS
)
35578 if (GET_CODE (XEXP (addr
, 0)) == REG
35579 && REGNO (XEXP (addr
, 0)) != 0)
35580 addr
= XEXP (addr
, 0);
35581 else if (GET_CODE (XEXP (addr
, 1)) == REG
35582 && REGNO (XEXP (addr
, 1)) != 0)
35583 addr
= XEXP (addr
, 1);
35584 else if (CONSTANT_P (XEXP (addr
, 0)))
35585 addr
= XEXP (addr
, 1);
35586 else if (CONSTANT_P (XEXP (addr
, 1)))
35587 addr
= XEXP (addr
, 0);
35589 gcc_unreachable ();
35591 gcc_assert (GET_CODE (addr
) == REG
&& REGNO (addr
) != 0);
35596 rs6000_fatal_bad_address (rtx op
)
35598 fatal_insn ("bad address", op
);
35603 typedef struct branch_island_d
{
35604 tree function_name
;
35610 static vec
<branch_island
, va_gc
> *branch_islands
;
35612 /* Remember to generate a branch island for far calls to the given
35616 add_compiler_branch_island (tree label_name
, tree function_name
,
35619 branch_island bi
= {function_name
, label_name
, line_number
};
35620 vec_safe_push (branch_islands
, bi
);
35623 /* Generate far-jump branch islands for everything recorded in
35624 branch_islands. Invoked immediately after the last instruction of
35625 the epilogue has been emitted; the branch islands must be appended
35626 to, and contiguous with, the function body. Mach-O stubs are
35627 generated in machopic_output_stub(). */
35630 macho_branch_islands (void)
35634 while (!vec_safe_is_empty (branch_islands
))
35636 branch_island
*bi
= &branch_islands
->last ();
35637 const char *label
= IDENTIFIER_POINTER (bi
->label_name
);
35638 const char *name
= IDENTIFIER_POINTER (bi
->function_name
);
35639 char name_buf
[512];
35640 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
35641 if (name
[0] == '*' || name
[0] == '&')
35642 strcpy (name_buf
, name
+1);
35646 strcpy (name_buf
+1, name
);
35648 strcpy (tmp_buf
, "\n");
35649 strcat (tmp_buf
, label
);
35650 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
35651 if (write_symbols
== DBX_DEBUG
|| write_symbols
== XCOFF_DEBUG
)
35652 dbxout_stabd (N_SLINE
, bi
->line_number
);
35653 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
35656 if (TARGET_LINK_STACK
)
35659 get_ppc476_thunk_name (name
);
35660 strcat (tmp_buf
, ":\n\tmflr r0\n\tbl ");
35661 strcat (tmp_buf
, name
);
35662 strcat (tmp_buf
, "\n");
35663 strcat (tmp_buf
, label
);
35664 strcat (tmp_buf
, "_pic:\n\tmflr r11\n");
35668 strcat (tmp_buf
, ":\n\tmflr r0\n\tbcl 20,31,");
35669 strcat (tmp_buf
, label
);
35670 strcat (tmp_buf
, "_pic\n");
35671 strcat (tmp_buf
, label
);
35672 strcat (tmp_buf
, "_pic:\n\tmflr r11\n");
35675 strcat (tmp_buf
, "\taddis r11,r11,ha16(");
35676 strcat (tmp_buf
, name_buf
);
35677 strcat (tmp_buf
, " - ");
35678 strcat (tmp_buf
, label
);
35679 strcat (tmp_buf
, "_pic)\n");
35681 strcat (tmp_buf
, "\tmtlr r0\n");
35683 strcat (tmp_buf
, "\taddi r12,r11,lo16(");
35684 strcat (tmp_buf
, name_buf
);
35685 strcat (tmp_buf
, " - ");
35686 strcat (tmp_buf
, label
);
35687 strcat (tmp_buf
, "_pic)\n");
35689 strcat (tmp_buf
, "\tmtctr r12\n\tbctr\n");
35693 strcat (tmp_buf
, ":\nlis r12,hi16(");
35694 strcat (tmp_buf
, name_buf
);
35695 strcat (tmp_buf
, ")\n\tori r12,r12,lo16(");
35696 strcat (tmp_buf
, name_buf
);
35697 strcat (tmp_buf
, ")\n\tmtctr r12\n\tbctr");
35699 output_asm_insn (tmp_buf
, 0);
35700 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
35701 if (write_symbols
== DBX_DEBUG
|| write_symbols
== XCOFF_DEBUG
)
35702 dbxout_stabd (N_SLINE
, bi
->line_number
);
35703 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
35704 branch_islands
->pop ();
35708 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
35709 already there or not. */
35712 no_previous_def (tree function_name
)
35717 FOR_EACH_VEC_SAFE_ELT (branch_islands
, ix
, bi
)
35718 if (function_name
== bi
->function_name
)
35723 /* GET_PREV_LABEL gets the label name from the previous definition of
35727 get_prev_label (tree function_name
)
35732 FOR_EACH_VEC_SAFE_ELT (branch_islands
, ix
, bi
)
35733 if (function_name
== bi
->function_name
)
35734 return bi
->label_name
;
35738 /* INSN is either a function call or a millicode call. It may have an
35739 unconditional jump in its delay slot.
35741 CALL_DEST is the routine we are calling. */
35744 output_call (rtx_insn
*insn
, rtx
*operands
, int dest_operand_number
,
35745 int cookie_operand_number
)
35747 static char buf
[256];
35748 if (darwin_emit_branch_islands
35749 && GET_CODE (operands
[dest_operand_number
]) == SYMBOL_REF
35750 && (INTVAL (operands
[cookie_operand_number
]) & CALL_LONG
))
35753 tree funname
= get_identifier (XSTR (operands
[dest_operand_number
], 0));
35755 if (no_previous_def (funname
))
35757 rtx label_rtx
= gen_label_rtx ();
35758 char *label_buf
, temp_buf
[256];
35759 ASM_GENERATE_INTERNAL_LABEL (temp_buf
, "L",
35760 CODE_LABEL_NUMBER (label_rtx
));
35761 label_buf
= temp_buf
[0] == '*' ? temp_buf
+ 1 : temp_buf
;
35762 labelname
= get_identifier (label_buf
);
35763 add_compiler_branch_island (labelname
, funname
, insn_line (insn
));
35766 labelname
= get_prev_label (funname
);
35768 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
35769 instruction will reach 'foo', otherwise link as 'bl L42'".
35770 "L42" should be a 'branch island', that will do a far jump to
35771 'foo'. Branch islands are generated in
35772 macho_branch_islands(). */
35773 sprintf (buf
, "jbsr %%z%d,%.246s",
35774 dest_operand_number
, IDENTIFIER_POINTER (labelname
));
35777 sprintf (buf
, "bl %%z%d", dest_operand_number
);
35781 /* Generate PIC and indirect symbol stubs. */
35784 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
35786 unsigned int length
;
35787 char *symbol_name
, *lazy_ptr_name
;
35788 char *local_label_0
;
35789 static int label
= 0;
35791 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
35792 symb
= (*targetm
.strip_name_encoding
) (symb
);
35795 length
= strlen (symb
);
35796 symbol_name
= XALLOCAVEC (char, length
+ 32);
35797 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
35799 lazy_ptr_name
= XALLOCAVEC (char, length
+ 32);
35800 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name
, symb
, length
);
35803 switch_to_section (darwin_sections
[machopic_picsymbol_stub1_section
]);
35805 switch_to_section (darwin_sections
[machopic_symbol_stub1_section
]);
35809 fprintf (file
, "\t.align 5\n");
35811 fprintf (file
, "%s:\n", stub
);
35812 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
35815 local_label_0
= XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
35816 sprintf (local_label_0
, "\"L%011d$spb\"", label
);
35818 fprintf (file
, "\tmflr r0\n");
35819 if (TARGET_LINK_STACK
)
35822 get_ppc476_thunk_name (name
);
35823 fprintf (file
, "\tbl %s\n", name
);
35824 fprintf (file
, "%s:\n\tmflr r11\n", local_label_0
);
35828 fprintf (file
, "\tbcl 20,31,%s\n", local_label_0
);
35829 fprintf (file
, "%s:\n\tmflr r11\n", local_label_0
);
35831 fprintf (file
, "\taddis r11,r11,ha16(%s-%s)\n",
35832 lazy_ptr_name
, local_label_0
);
35833 fprintf (file
, "\tmtlr r0\n");
35834 fprintf (file
, "\t%s r12,lo16(%s-%s)(r11)\n",
35835 (TARGET_64BIT
? "ldu" : "lwzu"),
35836 lazy_ptr_name
, local_label_0
);
35837 fprintf (file
, "\tmtctr r12\n");
35838 fprintf (file
, "\tbctr\n");
35842 fprintf (file
, "\t.align 4\n");
35844 fprintf (file
, "%s:\n", stub
);
35845 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
35847 fprintf (file
, "\tlis r11,ha16(%s)\n", lazy_ptr_name
);
35848 fprintf (file
, "\t%s r12,lo16(%s)(r11)\n",
35849 (TARGET_64BIT
? "ldu" : "lwzu"),
35851 fprintf (file
, "\tmtctr r12\n");
35852 fprintf (file
, "\tbctr\n");
35855 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
35856 fprintf (file
, "%s:\n", lazy_ptr_name
);
35857 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
35858 fprintf (file
, "%sdyld_stub_binding_helper\n",
35859 (TARGET_64BIT
? DOUBLE_INT_ASM_OP
: "\t.long\t"));
35862 /* Legitimize PIC addresses. If the address is already
35863 position-independent, we return ORIG. Newly generated
35864 position-independent addresses go into a reg. This is REG if non
35865 zero, otherwise we allocate register(s) as necessary. */
35867 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
35870 rs6000_machopic_legitimize_pic_address (rtx orig
, machine_mode mode
,
35875 if (reg
== NULL
&& ! reload_in_progress
&& ! reload_completed
)
35876 reg
= gen_reg_rtx (Pmode
);
35878 if (GET_CODE (orig
) == CONST
)
35882 if (GET_CODE (XEXP (orig
, 0)) == PLUS
35883 && XEXP (XEXP (orig
, 0), 0) == pic_offset_table_rtx
)
35886 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
35888 /* Use a different reg for the intermediate value, as
35889 it will be marked UNCHANGING. */
35890 reg_temp
= !can_create_pseudo_p () ? reg
: gen_reg_rtx (Pmode
);
35891 base
= rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig
, 0), 0),
35894 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig
, 0), 1),
35897 if (GET_CODE (offset
) == CONST_INT
)
35899 if (SMALL_INT (offset
))
35900 return plus_constant (Pmode
, base
, INTVAL (offset
));
35901 else if (! reload_in_progress
&& ! reload_completed
)
35902 offset
= force_reg (Pmode
, offset
);
35905 rtx mem
= force_const_mem (Pmode
, orig
);
35906 return machopic_legitimize_pic_address (mem
, Pmode
, reg
);
35909 return gen_rtx_PLUS (Pmode
, base
, offset
);
35912 /* Fall back on generic machopic code. */
35913 return machopic_legitimize_pic_address (orig
, mode
, reg
);
35916 /* Output a .machine directive for the Darwin assembler, and call
35917 the generic start_file routine. */
35920 rs6000_darwin_file_start (void)
35922 static const struct
35926 HOST_WIDE_INT if_set
;
35928 { "ppc64", "ppc64", MASK_64BIT
},
35929 { "970", "ppc970", MASK_PPC_GPOPT
| MASK_MFCRF
| MASK_POWERPC64
},
35930 { "power4", "ppc970", 0 },
35931 { "G5", "ppc970", 0 },
35932 { "7450", "ppc7450", 0 },
35933 { "7400", "ppc7400", MASK_ALTIVEC
},
35934 { "G4", "ppc7400", 0 },
35935 { "750", "ppc750", 0 },
35936 { "740", "ppc750", 0 },
35937 { "G3", "ppc750", 0 },
35938 { "604e", "ppc604e", 0 },
35939 { "604", "ppc604", 0 },
35940 { "603e", "ppc603", 0 },
35941 { "603", "ppc603", 0 },
35942 { "601", "ppc601", 0 },
35943 { NULL
, "ppc", 0 } };
35944 const char *cpu_id
= "";
35947 rs6000_file_start ();
35948 darwin_file_start ();
35950 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
35952 if (rs6000_default_cpu
!= 0 && rs6000_default_cpu
[0] != '\0')
35953 cpu_id
= rs6000_default_cpu
;
35955 if (global_options_set
.x_rs6000_cpu_index
)
35956 cpu_id
= processor_target_table
[rs6000_cpu_index
].name
;
35958 /* Look through the mapping array. Pick the first name that either
35959 matches the argument, has a bit set in IF_SET that is also set
35960 in the target flags, or has a NULL name. */
35963 while (mapping
[i
].arg
!= NULL
35964 && strcmp (mapping
[i
].arg
, cpu_id
) != 0
35965 && (mapping
[i
].if_set
& rs6000_isa_flags
) == 0)
35968 fprintf (asm_out_file
, "\t.machine %s\n", mapping
[i
].name
);
35971 #endif /* TARGET_MACHO */
35975 rs6000_elf_reloc_rw_mask (void)
35979 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
35985 /* Record an element in the table of global constructors. SYMBOL is
35986 a SYMBOL_REF of the function to be called; PRIORITY is a number
35987 between 0 and MAX_INIT_PRIORITY.
35989 This differs from default_named_section_asm_out_constructor in
35990 that we have special handling for -mrelocatable. */
35992 static void rs6000_elf_asm_out_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
35994 rs6000_elf_asm_out_constructor (rtx symbol
, int priority
)
35996 const char *section
= ".ctors";
35999 if (priority
!= DEFAULT_INIT_PRIORITY
)
36001 sprintf (buf
, ".ctors.%.5u",
36002 /* Invert the numbering so the linker puts us in the proper
36003 order; constructors are run from right to left, and the
36004 linker sorts in increasing order. */
36005 MAX_INIT_PRIORITY
- priority
);
36009 switch_to_section (get_section (section
, SECTION_WRITE
, NULL
));
36010 assemble_align (POINTER_SIZE
);
36012 if (DEFAULT_ABI
== ABI_V4
36013 && (TARGET_RELOCATABLE
|| flag_pic
> 1))
36015 fputs ("\t.long (", asm_out_file
);
36016 output_addr_const (asm_out_file
, symbol
);
36017 fputs (")@fixup\n", asm_out_file
);
36020 assemble_integer (symbol
, POINTER_SIZE
/ BITS_PER_UNIT
, POINTER_SIZE
, 1);
36023 static void rs6000_elf_asm_out_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
36025 rs6000_elf_asm_out_destructor (rtx symbol
, int priority
)
36027 const char *section
= ".dtors";
36030 if (priority
!= DEFAULT_INIT_PRIORITY
)
36032 sprintf (buf
, ".dtors.%.5u",
36033 /* Invert the numbering so the linker puts us in the proper
36034 order; constructors are run from right to left, and the
36035 linker sorts in increasing order. */
36036 MAX_INIT_PRIORITY
- priority
);
36040 switch_to_section (get_section (section
, SECTION_WRITE
, NULL
));
36041 assemble_align (POINTER_SIZE
);
36043 if (DEFAULT_ABI
== ABI_V4
36044 && (TARGET_RELOCATABLE
|| flag_pic
> 1))
36046 fputs ("\t.long (", asm_out_file
);
36047 output_addr_const (asm_out_file
, symbol
);
36048 fputs (")@fixup\n", asm_out_file
);
36051 assemble_integer (symbol
, POINTER_SIZE
/ BITS_PER_UNIT
, POINTER_SIZE
, 1);
36055 rs6000_elf_declare_function_name (FILE *file
, const char *name
, tree decl
)
36057 if (TARGET_64BIT
&& DEFAULT_ABI
!= ABI_ELFv2
)
36059 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file
);
36060 ASM_OUTPUT_LABEL (file
, name
);
36061 fputs (DOUBLE_INT_ASM_OP
, file
);
36062 rs6000_output_function_entry (file
, name
);
36063 fputs (",.TOC.@tocbase,0\n\t.previous\n", file
);
36066 fputs ("\t.size\t", file
);
36067 assemble_name (file
, name
);
36068 fputs (",24\n\t.type\t.", file
);
36069 assemble_name (file
, name
);
36070 fputs (",@function\n", file
);
36071 if (TREE_PUBLIC (decl
) && ! DECL_WEAK (decl
))
36073 fputs ("\t.globl\t.", file
);
36074 assemble_name (file
, name
);
36079 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
36080 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
36081 rs6000_output_function_entry (file
, name
);
36082 fputs (":\n", file
);
36086 if (DEFAULT_ABI
== ABI_V4
36087 && (TARGET_RELOCATABLE
|| flag_pic
> 1)
36088 && !TARGET_SECURE_PLT
36089 && (!constant_pool_empty_p () || crtl
->profile
)
36094 (*targetm
.asm_out
.internal_label
) (file
, "LCL", rs6000_pic_labelno
);
36096 fprintf (file
, "\t.long ");
36097 assemble_name (file
, toc_label_name
);
36100 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
36101 assemble_name (file
, buf
);
36105 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
36106 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
36108 if (TARGET_CMODEL
== CMODEL_LARGE
&& rs6000_global_entry_point_needed_p ())
36112 (*targetm
.asm_out
.internal_label
) (file
, "LCL", rs6000_pic_labelno
);
36114 fprintf (file
, "\t.quad .TOC.-");
36115 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
36116 assemble_name (file
, buf
);
36120 if (DEFAULT_ABI
== ABI_AIX
)
36122 const char *desc_name
, *orig_name
;
36124 orig_name
= (*targetm
.strip_name_encoding
) (name
);
36125 desc_name
= orig_name
;
36126 while (*desc_name
== '.')
36129 if (TREE_PUBLIC (decl
))
36130 fprintf (file
, "\t.globl %s\n", desc_name
);
36132 fprintf (file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
36133 fprintf (file
, "%s:\n", desc_name
);
36134 fprintf (file
, "\t.long %s\n", orig_name
);
36135 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file
);
36136 fputs ("\t.long 0\n", file
);
36137 fprintf (file
, "\t.previous\n");
36139 ASM_OUTPUT_LABEL (file
, name
);
36142 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED
;
36144 rs6000_elf_file_end (void)
36146 #ifdef HAVE_AS_GNU_ATTRIBUTE
36147 /* ??? The value emitted depends on options active at file end.
36148 Assume anyone using #pragma or attributes that might change
36149 options knows what they are doing. */
36150 if ((TARGET_64BIT
|| DEFAULT_ABI
== ABI_V4
)
36151 && rs6000_passes_float
)
36155 if (TARGET_DF_FPR
| TARGET_DF_SPE
)
36157 else if (TARGET_SF_FPR
| TARGET_SF_SPE
)
36161 if (rs6000_passes_long_double
)
36163 if (!TARGET_LONG_DOUBLE_128
)
36165 else if (TARGET_IEEEQUAD
)
36170 fprintf (asm_out_file
, "\t.gnu_attribute 4, %d\n", fp
);
36172 if (TARGET_32BIT
&& DEFAULT_ABI
== ABI_V4
)
36174 if (rs6000_passes_vector
)
36175 fprintf (asm_out_file
, "\t.gnu_attribute 8, %d\n",
36176 (TARGET_ALTIVEC_ABI
? 2
36177 : TARGET_SPE_ABI
? 3
36179 if (rs6000_returns_struct
)
36180 fprintf (asm_out_file
, "\t.gnu_attribute 12, %d\n",
36181 aix_struct_return
? 2 : 1);
36184 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
36185 if (TARGET_32BIT
|| DEFAULT_ABI
== ABI_ELFv2
)
36186 file_end_indicate_exec_stack ();
36189 if (flag_split_stack
)
36190 file_end_indicate_split_stack ();
36194 /* We have expanded a CPU builtin, so we need to emit a reference to
36195 the special symbol that LIBC uses to declare it supports the
36196 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
36197 switch_to_section (data_section
);
36198 fprintf (asm_out_file
, "\t.align %u\n", TARGET_32BIT
? 2 : 3);
36199 fprintf (asm_out_file
, "\t%s %s\n",
36200 TARGET_32BIT
? ".long" : ".quad", tcb_verification_symbol
);
36207 #ifndef HAVE_XCOFF_DWARF_EXTRAS
36208 #define HAVE_XCOFF_DWARF_EXTRAS 0
36211 static enum unwind_info_type
36212 rs6000_xcoff_debug_unwind_info (void)
36218 rs6000_xcoff_asm_output_anchor (rtx symbol
)
36222 sprintf (buffer
, "$ + " HOST_WIDE_INT_PRINT_DEC
,
36223 SYMBOL_REF_BLOCK_OFFSET (symbol
));
36224 fprintf (asm_out_file
, "%s", SET_ASM_OP
);
36225 RS6000_OUTPUT_BASENAME (asm_out_file
, XSTR (symbol
, 0));
36226 fprintf (asm_out_file
, ",");
36227 RS6000_OUTPUT_BASENAME (asm_out_file
, buffer
);
36228 fprintf (asm_out_file
, "\n");
36232 rs6000_xcoff_asm_globalize_label (FILE *stream
, const char *name
)
36234 fputs (GLOBAL_ASM_OP
, stream
);
36235 RS6000_OUTPUT_BASENAME (stream
, name
);
36236 putc ('\n', stream
);
36239 /* A get_unnamed_decl callback, used for read-only sections. PTR
36240 points to the section string variable. */
36243 rs6000_xcoff_output_readonly_section_asm_op (const void *directive
)
36245 fprintf (asm_out_file
, "\t.csect %s[RO],%s\n",
36246 *(const char *const *) directive
,
36247 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
36250 /* Likewise for read-write sections. */
36253 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive
)
36255 fprintf (asm_out_file
, "\t.csect %s[RW],%s\n",
36256 *(const char *const *) directive
,
36257 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
36261 rs6000_xcoff_output_tls_section_asm_op (const void *directive
)
36263 fprintf (asm_out_file
, "\t.csect %s[TL],%s\n",
36264 *(const char *const *) directive
,
36265 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
36268 /* A get_unnamed_section callback, used for switching to toc_section. */
36271 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED
)
36273 if (TARGET_MINIMAL_TOC
)
36275 /* toc_section is always selected at least once from
36276 rs6000_xcoff_file_start, so this is guaranteed to
36277 always be defined once and only once in each file. */
36278 if (!toc_initialized
)
36280 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file
);
36281 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file
);
36282 toc_initialized
= 1;
36284 fprintf (asm_out_file
, "\t.csect toc_table[RW]%s\n",
36285 (TARGET_32BIT
? "" : ",3"));
36288 fputs ("\t.toc\n", asm_out_file
);
36291 /* Implement TARGET_ASM_INIT_SECTIONS. */
36294 rs6000_xcoff_asm_init_sections (void)
36296 read_only_data_section
36297 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op
,
36298 &xcoff_read_only_section_name
);
36300 private_data_section
36301 = get_unnamed_section (SECTION_WRITE
,
36302 rs6000_xcoff_output_readwrite_section_asm_op
,
36303 &xcoff_private_data_section_name
);
36306 = get_unnamed_section (SECTION_TLS
,
36307 rs6000_xcoff_output_tls_section_asm_op
,
36308 &xcoff_tls_data_section_name
);
36310 tls_private_data_section
36311 = get_unnamed_section (SECTION_TLS
,
36312 rs6000_xcoff_output_tls_section_asm_op
,
36313 &xcoff_private_data_section_name
);
36315 read_only_private_data_section
36316 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op
,
36317 &xcoff_private_data_section_name
);
36320 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op
, NULL
);
36322 readonly_data_section
= read_only_data_section
;
36326 rs6000_xcoff_reloc_rw_mask (void)
36332 rs6000_xcoff_asm_named_section (const char *name
, unsigned int flags
,
36333 tree decl ATTRIBUTE_UNUSED
)
36336 static const char * const suffix
[5] = { "PR", "RO", "RW", "TL", "XO" };
36338 if (flags
& SECTION_EXCLUDE
)
36340 else if (flags
& SECTION_DEBUG
)
36342 fprintf (asm_out_file
, "\t.dwsect %s\n", name
);
36345 else if (flags
& SECTION_CODE
)
36347 else if (flags
& SECTION_TLS
)
36349 else if (flags
& SECTION_WRITE
)
36354 fprintf (asm_out_file
, "\t.csect %s%s[%s],%u\n",
36355 (flags
& SECTION_CODE
) ? "." : "",
36356 name
, suffix
[smclass
], flags
& SECTION_ENTSIZE
);
36359 #define IN_NAMED_SECTION(DECL) \
36360 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
36361 && DECL_SECTION_NAME (DECL) != NULL)
36364 rs6000_xcoff_select_section (tree decl
, int reloc
,
36365 unsigned HOST_WIDE_INT align
)
36367 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
36369 if (align
> BIGGEST_ALIGNMENT
)
36371 resolve_unique_section (decl
, reloc
, true);
36372 if (IN_NAMED_SECTION (decl
))
36373 return get_named_section (decl
, NULL
, reloc
);
36376 if (decl_readonly_section (decl
, reloc
))
36378 if (TREE_PUBLIC (decl
))
36379 return read_only_data_section
;
36381 return read_only_private_data_section
;
36386 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_THREAD_LOCAL_P (decl
))
36388 if (TREE_PUBLIC (decl
))
36389 return tls_data_section
;
36390 else if (bss_initializer_p (decl
))
36392 /* Convert to COMMON to emit in BSS. */
36393 DECL_COMMON (decl
) = 1;
36394 return tls_comm_section
;
36397 return tls_private_data_section
;
36401 if (TREE_PUBLIC (decl
))
36402 return data_section
;
36404 return private_data_section
;
36409 rs6000_xcoff_unique_section (tree decl
, int reloc ATTRIBUTE_UNUSED
)
36413 /* Use select_section for private data and uninitialized data with
36414 alignment <= BIGGEST_ALIGNMENT. */
36415 if (!TREE_PUBLIC (decl
)
36416 || DECL_COMMON (decl
)
36417 || (DECL_INITIAL (decl
) == NULL_TREE
36418 && DECL_ALIGN (decl
) <= BIGGEST_ALIGNMENT
)
36419 || DECL_INITIAL (decl
) == error_mark_node
36420 || (flag_zero_initialized_in_bss
36421 && initializer_zerop (DECL_INITIAL (decl
))))
36424 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
36425 name
= (*targetm
.strip_name_encoding
) (name
);
36426 set_decl_section_name (decl
, name
);
36429 /* Select section for constant in constant pool.
36431 On RS/6000, all constants are in the private read-only data area.
36432 However, if this is being placed in the TOC it must be output as a
36436 rs6000_xcoff_select_rtx_section (machine_mode mode
, rtx x
,
36437 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
36439 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
))
36440 return toc_section
;
36442 return read_only_private_data_section
;
36445 /* Remove any trailing [DS] or the like from the symbol name. */
36447 static const char *
36448 rs6000_xcoff_strip_name_encoding (const char *name
)
36453 len
= strlen (name
);
36454 if (name
[len
- 1] == ']')
36455 return ggc_alloc_string (name
, len
- 4);
36460 /* Section attributes. AIX is always PIC. */
36462 static unsigned int
36463 rs6000_xcoff_section_type_flags (tree decl
, const char *name
, int reloc
)
36465 unsigned int align
;
36466 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
36468 /* Align to at least UNIT size. */
36469 if ((flags
& SECTION_CODE
) != 0 || !decl
|| !DECL_P (decl
))
36470 align
= MIN_UNITS_PER_WORD
;
36472 /* Increase alignment of large objects if not already stricter. */
36473 align
= MAX ((DECL_ALIGN (decl
) / BITS_PER_UNIT
),
36474 int_size_in_bytes (TREE_TYPE (decl
)) > MIN_UNITS_PER_WORD
36475 ? UNITS_PER_FP_WORD
: MIN_UNITS_PER_WORD
);
36477 return flags
| (exact_log2 (align
) & SECTION_ENTSIZE
);
36480 /* Output at beginning of assembler file.
36482 Initialize the section names for the RS/6000 at this point.
36484 Specify filename, including full path, to assembler.
36486 We want to go into the TOC section so at least one .toc will be emitted.
36487 Also, in order to output proper .bs/.es pairs, we need at least one static
36488 [RW] section emitted.
36490 Finally, declare mcount when profiling to make the assembler happy. */
36493 rs6000_xcoff_file_start (void)
36495 rs6000_gen_section_name (&xcoff_bss_section_name
,
36496 main_input_filename
, ".bss_");
36497 rs6000_gen_section_name (&xcoff_private_data_section_name
,
36498 main_input_filename
, ".rw_");
36499 rs6000_gen_section_name (&xcoff_read_only_section_name
,
36500 main_input_filename
, ".ro_");
36501 rs6000_gen_section_name (&xcoff_tls_data_section_name
,
36502 main_input_filename
, ".tls_");
36503 rs6000_gen_section_name (&xcoff_tbss_section_name
,
36504 main_input_filename
, ".tbss_[UL]");
36506 fputs ("\t.file\t", asm_out_file
);
36507 output_quoted_string (asm_out_file
, main_input_filename
);
36508 fputc ('\n', asm_out_file
);
36509 if (write_symbols
!= NO_DEBUG
)
36510 switch_to_section (private_data_section
);
36511 switch_to_section (toc_section
);
36512 switch_to_section (text_section
);
36514 fprintf (asm_out_file
, "\t.extern %s\n", RS6000_MCOUNT
);
36515 rs6000_file_start ();
36518 /* Output at end of assembler file.
36519 On the RS/6000, referencing data should automatically pull in text. */
36522 rs6000_xcoff_file_end (void)
36524 switch_to_section (text_section
);
36525 fputs ("_section_.text:\n", asm_out_file
);
36526 switch_to_section (data_section
);
36527 fputs (TARGET_32BIT
36528 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
36532 struct declare_alias_data
36535 bool function_descriptor
;
36538 /* Declare alias N. A helper function for for_node_and_aliases. */
36541 rs6000_declare_alias (struct symtab_node
*n
, void *d
)
36543 struct declare_alias_data
*data
= (struct declare_alias_data
*)d
;
36544 /* Main symbol is output specially, because varasm machinery does part of
36545 the job for us - we do not need to declare .globl/lglobs and such. */
36546 if (!n
->alias
|| n
->weakref
)
36549 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n
->decl
)))
36552 /* Prevent assemble_alias from trying to use .set pseudo operation
36553 that does not behave as expected by the middle-end. */
36554 TREE_ASM_WRITTEN (n
->decl
) = true;
36556 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n
->decl
));
36557 char *buffer
= (char *) alloca (strlen (name
) + 2);
36559 int dollar_inside
= 0;
36561 strcpy (buffer
, name
);
36562 p
= strchr (buffer
, '$');
36566 p
= strchr (p
+ 1, '$');
36568 if (TREE_PUBLIC (n
->decl
))
36570 if (!RS6000_WEAK
|| !DECL_WEAK (n
->decl
))
36572 if (dollar_inside
) {
36573 if (data
->function_descriptor
)
36574 fprintf(data
->file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
36575 fprintf(data
->file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
36577 if (data
->function_descriptor
)
36579 fputs ("\t.globl .", data
->file
);
36580 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
36581 putc ('\n', data
->file
);
36583 fputs ("\t.globl ", data
->file
);
36584 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
36585 putc ('\n', data
->file
);
36587 #ifdef ASM_WEAKEN_DECL
36588 else if (DECL_WEAK (n
->decl
) && !data
->function_descriptor
)
36589 ASM_WEAKEN_DECL (data
->file
, n
->decl
, name
, NULL
);
36596 if (data
->function_descriptor
)
36597 fprintf(data
->file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
36598 fprintf(data
->file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
36600 if (data
->function_descriptor
)
36602 fputs ("\t.lglobl .", data
->file
);
36603 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
36604 putc ('\n', data
->file
);
36606 fputs ("\t.lglobl ", data
->file
);
36607 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
36608 putc ('\n', data
->file
);
36610 if (data
->function_descriptor
)
36611 fputs (".", data
->file
);
36612 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
36613 fputs (":\n", data
->file
);
36618 #ifdef HAVE_GAS_HIDDEN
36619 /* Helper function to calculate visibility of a DECL
36620 and return the value as a const string. */
36622 static const char *
36623 rs6000_xcoff_visibility (tree decl
)
36625 static const char * const visibility_types
[] = {
36626 "", ",protected", ",hidden", ",internal"
36629 enum symbol_visibility vis
= DECL_VISIBILITY (decl
);
36631 if (TREE_CODE (decl
) == FUNCTION_DECL
36632 && cgraph_node::get (decl
)
36633 && cgraph_node::get (decl
)->instrumentation_clone
36634 && cgraph_node::get (decl
)->instrumented_version
)
36635 vis
= DECL_VISIBILITY (cgraph_node::get (decl
)->instrumented_version
->decl
);
36637 return visibility_types
[vis
];
36642 /* This macro produces the initial definition of a function name.
36643 On the RS/6000, we need to place an extra '.' in the function name and
36644 output the function descriptor.
36645 Dollar signs are converted to underscores.
36647 The csect for the function will have already been created when
36648 text_section was selected. We do have to go back to that csect, however.
36650 The third and fourth parameters to the .function pseudo-op (16 and 044)
36651 are placeholders which no longer have any use.
36653 Because AIX assembler's .set command has unexpected semantics, we output
36654 all aliases as alternative labels in front of the definition. */
36657 rs6000_xcoff_declare_function_name (FILE *file
, const char *name
, tree decl
)
36659 char *buffer
= (char *) alloca (strlen (name
) + 1);
36661 int dollar_inside
= 0;
36662 struct declare_alias_data data
= {file
, false};
36664 strcpy (buffer
, name
);
36665 p
= strchr (buffer
, '$');
36669 p
= strchr (p
+ 1, '$');
36671 if (TREE_PUBLIC (decl
))
36673 if (!RS6000_WEAK
|| !DECL_WEAK (decl
))
36675 if (dollar_inside
) {
36676 fprintf(file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
36677 fprintf(file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
36679 fputs ("\t.globl .", file
);
36680 RS6000_OUTPUT_BASENAME (file
, buffer
);
36681 #ifdef HAVE_GAS_HIDDEN
36682 fputs (rs6000_xcoff_visibility (decl
), file
);
36689 if (dollar_inside
) {
36690 fprintf(file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
36691 fprintf(file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
36693 fputs ("\t.lglobl .", file
);
36694 RS6000_OUTPUT_BASENAME (file
, buffer
);
36697 fputs ("\t.csect ", file
);
36698 RS6000_OUTPUT_BASENAME (file
, buffer
);
36699 fputs (TARGET_32BIT
? "[DS]\n" : "[DS],3\n", file
);
36700 RS6000_OUTPUT_BASENAME (file
, buffer
);
36701 fputs (":\n", file
);
36702 symtab_node::get (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
36704 fputs (TARGET_32BIT
? "\t.long ." : "\t.llong .", file
);
36705 RS6000_OUTPUT_BASENAME (file
, buffer
);
36706 fputs (", TOC[tc0], 0\n", file
);
36708 switch_to_section (function_section (decl
));
36710 RS6000_OUTPUT_BASENAME (file
, buffer
);
36711 fputs (":\n", file
);
36712 data
.function_descriptor
= true;
36713 symtab_node::get (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
36715 if (!DECL_IGNORED_P (decl
))
36717 if (write_symbols
== DBX_DEBUG
|| write_symbols
== XCOFF_DEBUG
)
36718 xcoffout_declare_function (file
, decl
, buffer
);
36719 else if (write_symbols
== DWARF2_DEBUG
)
36721 name
= (*targetm
.strip_name_encoding
) (name
);
36722 fprintf (file
, "\t.function .%s,.%s,2,0\n", name
, name
);
36729 /* Output assembly language to globalize a symbol from a DECL,
36730 possibly with visibility. */
36733 rs6000_xcoff_asm_globalize_decl_name (FILE *stream
, tree decl
)
36735 const char *name
= XSTR (XEXP (DECL_RTL (decl
), 0), 0);
36736 fputs (GLOBAL_ASM_OP
, stream
);
36737 RS6000_OUTPUT_BASENAME (stream
, name
);
36738 #ifdef HAVE_GAS_HIDDEN
36739 fputs (rs6000_xcoff_visibility (decl
), stream
);
36741 putc ('\n', stream
);
36744 /* Output assembly language to define a symbol as COMMON from a DECL,
36745 possibly with visibility. */
36748 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream
,
36749 tree decl ATTRIBUTE_UNUSED
,
36751 unsigned HOST_WIDE_INT size
,
36752 unsigned HOST_WIDE_INT align
)
36754 unsigned HOST_WIDE_INT align2
= 2;
36757 align2
= floor_log2 (align
/ BITS_PER_UNIT
);
36761 fputs (COMMON_ASM_OP
, stream
);
36762 RS6000_OUTPUT_BASENAME (stream
, name
);
36765 "," HOST_WIDE_INT_PRINT_UNSIGNED
"," HOST_WIDE_INT_PRINT_UNSIGNED
,
36768 #ifdef HAVE_GAS_HIDDEN
36769 fputs (rs6000_xcoff_visibility (decl
), stream
);
36771 putc ('\n', stream
);
36774 /* This macro produces the initial definition of a object (variable) name.
36775 Because AIX assembler's .set command has unexpected semantics, we output
36776 all aliases as alternative labels in front of the definition. */
36779 rs6000_xcoff_declare_object_name (FILE *file
, const char *name
, tree decl
)
36781 struct declare_alias_data data
= {file
, false};
36782 RS6000_OUTPUT_BASENAME (file
, name
);
36783 fputs (":\n", file
);
36784 symtab_node::get_create (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
36788 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
36791 rs6000_asm_output_dwarf_pcrel (FILE *file
, int size
, const char *label
)
36793 fputs (integer_asm_op (size
, FALSE
), file
);
36794 assemble_name (file
, label
);
36795 fputs ("-$", file
);
36798 /* Output a symbol offset relative to the dbase for the current object.
36799 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
36802 __gcc_unwind_dbase is embedded in all executables/libraries through
36803 libgcc/config/rs6000/crtdbase.S. */
36806 rs6000_asm_output_dwarf_datarel (FILE *file
, int size
, const char *label
)
36808 fputs (integer_asm_op (size
, FALSE
), file
);
36809 assemble_name (file
, label
);
36810 fputs("-__gcc_unwind_dbase", file
);
36815 rs6000_xcoff_encode_section_info (tree decl
, rtx rtl
, int first
)
36819 const char *symname
;
36821 default_encode_section_info (decl
, rtl
, first
);
36823 /* Careful not to prod global register variables. */
36826 symbol
= XEXP (rtl
, 0);
36827 if (GET_CODE (symbol
) != SYMBOL_REF
)
36830 flags
= SYMBOL_REF_FLAGS (symbol
);
36832 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_THREAD_LOCAL_P (decl
))
36833 flags
&= ~SYMBOL_FLAG_HAS_BLOCK_INFO
;
36835 SYMBOL_REF_FLAGS (symbol
) = flags
;
36837 /* Append mapping class to extern decls. */
36838 symname
= XSTR (symbol
, 0);
36839 if (decl
/* sync condition with assemble_external () */
36840 && DECL_P (decl
) && DECL_EXTERNAL (decl
) && TREE_PUBLIC (decl
)
36841 && ((TREE_CODE (decl
) == VAR_DECL
&& !DECL_THREAD_LOCAL_P (decl
))
36842 || TREE_CODE (decl
) == FUNCTION_DECL
)
36843 && symname
[strlen (symname
) - 1] != ']')
36845 char *newname
= (char *) alloca (strlen (symname
) + 5);
36846 strcpy (newname
, symname
);
36847 strcat (newname
, (TREE_CODE (decl
) == FUNCTION_DECL
36848 ? "[DS]" : "[UA]"));
36849 XSTR (symbol
, 0) = ggc_strdup (newname
);
36852 #endif /* HAVE_AS_TLS */
36853 #endif /* TARGET_XCOFF */
36856 rs6000_asm_weaken_decl (FILE *stream
, tree decl
,
36857 const char *name
, const char *val
)
36859 fputs ("\t.weak\t", stream
);
36860 RS6000_OUTPUT_BASENAME (stream
, name
);
36861 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
36862 && DEFAULT_ABI
== ABI_AIX
&& DOT_SYMBOLS
)
36865 fputs ("[DS]", stream
);
36866 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
36868 fputs (rs6000_xcoff_visibility (decl
), stream
);
36870 fputs ("\n\t.weak\t.", stream
);
36871 RS6000_OUTPUT_BASENAME (stream
, name
);
36873 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
36875 fputs (rs6000_xcoff_visibility (decl
), stream
);
36877 fputc ('\n', stream
);
36880 #ifdef ASM_OUTPUT_DEF
36881 ASM_OUTPUT_DEF (stream
, name
, val
);
36883 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
36884 && DEFAULT_ABI
== ABI_AIX
&& DOT_SYMBOLS
)
36886 fputs ("\t.set\t.", stream
);
36887 RS6000_OUTPUT_BASENAME (stream
, name
);
36888 fputs (",.", stream
);
36889 RS6000_OUTPUT_BASENAME (stream
, val
);
36890 fputc ('\n', stream
);
36896 /* Return true if INSN should not be copied. */
36899 rs6000_cannot_copy_insn_p (rtx_insn
*insn
)
36901 return recog_memoized (insn
) >= 0
36902 && get_attr_cannot_copy (insn
);
36905 /* Compute a (partial) cost for rtx X. Return true if the complete
36906 cost has been computed, and false if subexpressions should be
36907 scanned. In either case, *TOTAL contains the cost result. */
36910 rs6000_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
36911 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
36913 int code
= GET_CODE (x
);
36917 /* On the RS/6000, if it is valid in the insn, it is free. */
36919 if (((outer_code
== SET
36920 || outer_code
== PLUS
36921 || outer_code
== MINUS
)
36922 && (satisfies_constraint_I (x
)
36923 || satisfies_constraint_L (x
)))
36924 || (outer_code
== AND
36925 && (satisfies_constraint_K (x
)
36927 ? satisfies_constraint_L (x
)
36928 : satisfies_constraint_J (x
))))
36929 || ((outer_code
== IOR
|| outer_code
== XOR
)
36930 && (satisfies_constraint_K (x
)
36932 ? satisfies_constraint_L (x
)
36933 : satisfies_constraint_J (x
))))
36934 || outer_code
== ASHIFT
36935 || outer_code
== ASHIFTRT
36936 || outer_code
== LSHIFTRT
36937 || outer_code
== ROTATE
36938 || outer_code
== ROTATERT
36939 || outer_code
== ZERO_EXTRACT
36940 || (outer_code
== MULT
36941 && satisfies_constraint_I (x
))
36942 || ((outer_code
== DIV
|| outer_code
== UDIV
36943 || outer_code
== MOD
|| outer_code
== UMOD
)
36944 && exact_log2 (INTVAL (x
)) >= 0)
36945 || (outer_code
== COMPARE
36946 && (satisfies_constraint_I (x
)
36947 || satisfies_constraint_K (x
)))
36948 || ((outer_code
== EQ
|| outer_code
== NE
)
36949 && (satisfies_constraint_I (x
)
36950 || satisfies_constraint_K (x
)
36952 ? satisfies_constraint_L (x
)
36953 : satisfies_constraint_J (x
))))
36954 || (outer_code
== GTU
36955 && satisfies_constraint_I (x
))
36956 || (outer_code
== LTU
36957 && satisfies_constraint_P (x
)))
36962 else if ((outer_code
== PLUS
36963 && reg_or_add_cint_operand (x
, VOIDmode
))
36964 || (outer_code
== MINUS
36965 && reg_or_sub_cint_operand (x
, VOIDmode
))
36966 || ((outer_code
== SET
36967 || outer_code
== IOR
36968 || outer_code
== XOR
)
36970 & ~ (unsigned HOST_WIDE_INT
) 0xffffffff) == 0))
36972 *total
= COSTS_N_INSNS (1);
36978 case CONST_WIDE_INT
:
36982 *total
= !speed
? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
36986 /* When optimizing for size, MEM should be slightly more expensive
36987 than generating address, e.g., (plus (reg) (const)).
36988 L1 cache latency is about two instructions. */
36989 *total
= !speed
? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
36990 if (SLOW_UNALIGNED_ACCESS (mode
, MEM_ALIGN (x
)))
36991 *total
+= COSTS_N_INSNS (100);
37000 if (FLOAT_MODE_P (mode
))
37001 *total
= rs6000_cost
->fp
;
37003 *total
= COSTS_N_INSNS (1);
37007 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
37008 && satisfies_constraint_I (XEXP (x
, 1)))
37010 if (INTVAL (XEXP (x
, 1)) >= -256
37011 && INTVAL (XEXP (x
, 1)) <= 255)
37012 *total
= rs6000_cost
->mulsi_const9
;
37014 *total
= rs6000_cost
->mulsi_const
;
37016 else if (mode
== SFmode
)
37017 *total
= rs6000_cost
->fp
;
37018 else if (FLOAT_MODE_P (mode
))
37019 *total
= rs6000_cost
->dmul
;
37020 else if (mode
== DImode
)
37021 *total
= rs6000_cost
->muldi
;
37023 *total
= rs6000_cost
->mulsi
;
37027 if (mode
== SFmode
)
37028 *total
= rs6000_cost
->fp
;
37030 *total
= rs6000_cost
->dmul
;
37035 if (FLOAT_MODE_P (mode
))
37037 *total
= mode
== DFmode
? rs6000_cost
->ddiv
37038 : rs6000_cost
->sdiv
;
37045 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
37046 && exact_log2 (INTVAL (XEXP (x
, 1))) >= 0)
37048 if (code
== DIV
|| code
== MOD
)
37050 *total
= COSTS_N_INSNS (2);
37053 *total
= COSTS_N_INSNS (1);
37057 if (GET_MODE (XEXP (x
, 1)) == DImode
)
37058 *total
= rs6000_cost
->divdi
;
37060 *total
= rs6000_cost
->divsi
;
37062 /* Add in shift and subtract for MOD unless we have a mod instruction. */
37063 if (!TARGET_MODULO
&& (code
== MOD
|| code
== UMOD
))
37064 *total
+= COSTS_N_INSNS (2);
37068 *total
= COSTS_N_INSNS (TARGET_CTZ
? 1 : 4);
37072 *total
= COSTS_N_INSNS (4);
37076 *total
= COSTS_N_INSNS (TARGET_POPCNTD
? 1 : 6);
37080 *total
= COSTS_N_INSNS (TARGET_CMPB
? 2 : 6);
37084 if (outer_code
== AND
|| outer_code
== IOR
|| outer_code
== XOR
)
37087 *total
= COSTS_N_INSNS (1);
37091 if (CONST_INT_P (XEXP (x
, 1)))
37093 rtx left
= XEXP (x
, 0);
37094 rtx_code left_code
= GET_CODE (left
);
37096 /* rotate-and-mask: 1 insn. */
37097 if ((left_code
== ROTATE
37098 || left_code
== ASHIFT
37099 || left_code
== LSHIFTRT
)
37100 && rs6000_is_valid_shift_mask (XEXP (x
, 1), left
, mode
))
37102 *total
= rtx_cost (XEXP (left
, 0), mode
, left_code
, 0, speed
);
37103 if (!CONST_INT_P (XEXP (left
, 1)))
37104 *total
+= rtx_cost (XEXP (left
, 1), SImode
, left_code
, 1, speed
);
37105 *total
+= COSTS_N_INSNS (1);
37109 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
37110 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
37111 if (rs6000_is_valid_and_mask (XEXP (x
, 1), mode
)
37112 || (val
& 0xffff) == val
37113 || (val
& 0xffff0000) == val
37114 || ((val
& 0xffff) == 0 && mode
== SImode
))
37116 *total
= rtx_cost (left
, mode
, AND
, 0, speed
);
37117 *total
+= COSTS_N_INSNS (1);
37122 if (rs6000_is_valid_2insn_and (XEXP (x
, 1), mode
))
37124 *total
= rtx_cost (left
, mode
, AND
, 0, speed
);
37125 *total
+= COSTS_N_INSNS (2);
37130 *total
= COSTS_N_INSNS (1);
37135 *total
= COSTS_N_INSNS (1);
37141 *total
= COSTS_N_INSNS (1);
37145 /* The EXTSWSLI instruction is a combined instruction. Don't count both
37146 the sign extend and shift separately within the insn. */
37147 if (TARGET_EXTSWSLI
&& mode
== DImode
37148 && GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
37149 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
)
37160 /* Handle mul_highpart. */
37161 if (outer_code
== TRUNCATE
37162 && GET_CODE (XEXP (x
, 0)) == MULT
)
37164 if (mode
== DImode
)
37165 *total
= rs6000_cost
->muldi
;
37167 *total
= rs6000_cost
->mulsi
;
37170 else if (outer_code
== AND
)
37173 *total
= COSTS_N_INSNS (1);
37178 if (GET_CODE (XEXP (x
, 0)) == MEM
)
37181 *total
= COSTS_N_INSNS (1);
37187 if (!FLOAT_MODE_P (mode
))
37189 *total
= COSTS_N_INSNS (1);
37195 case UNSIGNED_FLOAT
:
37198 case FLOAT_TRUNCATE
:
37199 *total
= rs6000_cost
->fp
;
37203 if (mode
== DFmode
)
37204 *total
= rs6000_cost
->sfdf_convert
;
37206 *total
= rs6000_cost
->fp
;
37210 switch (XINT (x
, 1))
37213 *total
= rs6000_cost
->fp
;
37225 *total
= COSTS_N_INSNS (1);
37228 else if (FLOAT_MODE_P (mode
)
37229 && TARGET_PPC_GFXOPT
&& TARGET_HARD_FLOAT
&& TARGET_FPRS
)
37231 *total
= rs6000_cost
->fp
;
37240 /* Carry bit requires mode == Pmode.
37241 NEG or PLUS already counted so only add one. */
37243 && (outer_code
== NEG
|| outer_code
== PLUS
))
37245 *total
= COSTS_N_INSNS (1);
37248 if (outer_code
== SET
)
37250 if (XEXP (x
, 1) == const0_rtx
)
37252 if (TARGET_ISEL
&& !TARGET_MFCRF
)
37253 *total
= COSTS_N_INSNS (8);
37255 *total
= COSTS_N_INSNS (2);
37260 *total
= COSTS_N_INSNS (3);
37269 if (outer_code
== SET
&& (XEXP (x
, 1) == const0_rtx
))
37271 if (TARGET_ISEL
&& !TARGET_MFCRF
)
37272 *total
= COSTS_N_INSNS (8);
37274 *total
= COSTS_N_INSNS (2);
37278 if (outer_code
== COMPARE
)
37292 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
37295 rs6000_debug_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
37296 int opno
, int *total
, bool speed
)
37298 bool ret
= rs6000_rtx_costs (x
, mode
, outer_code
, opno
, total
, speed
);
37301 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
37302 "opno = %d, total = %d, speed = %s, x:\n",
37303 ret
? "complete" : "scan inner",
37304 GET_MODE_NAME (mode
),
37305 GET_RTX_NAME (outer_code
),
37308 speed
? "true" : "false");
37315 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
37318 rs6000_debug_address_cost (rtx x
, machine_mode mode
,
37319 addr_space_t as
, bool speed
)
37321 int ret
= TARGET_ADDRESS_COST (x
, mode
, as
, speed
);
37323 fprintf (stderr
, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
37324 ret
, speed
? "true" : "false");
37331 /* A C expression returning the cost of moving data from a register of class
37332 CLASS1 to one of CLASS2. */
37335 rs6000_register_move_cost (machine_mode mode
,
37336 reg_class_t from
, reg_class_t to
)
37340 if (TARGET_DEBUG_COST
)
37343 /* Moves from/to GENERAL_REGS. */
37344 if (reg_classes_intersect_p (to
, GENERAL_REGS
)
37345 || reg_classes_intersect_p (from
, GENERAL_REGS
))
37347 reg_class_t rclass
= from
;
37349 if (! reg_classes_intersect_p (to
, GENERAL_REGS
))
37352 if (rclass
== FLOAT_REGS
|| rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
37353 ret
= (rs6000_memory_move_cost (mode
, rclass
, false)
37354 + rs6000_memory_move_cost (mode
, GENERAL_REGS
, false));
37356 /* It's more expensive to move CR_REGS than CR0_REGS because of the
37358 else if (rclass
== CR_REGS
)
37361 /* For those processors that have slow LR/CTR moves, make them more
37362 expensive than memory in order to bias spills to memory .*/
37363 else if ((rs6000_cpu
== PROCESSOR_POWER6
37364 || rs6000_cpu
== PROCESSOR_POWER7
37365 || rs6000_cpu
== PROCESSOR_POWER8
37366 || rs6000_cpu
== PROCESSOR_POWER9
)
37367 && reg_classes_intersect_p (rclass
, LINK_OR_CTR_REGS
))
37368 ret
= 6 * hard_regno_nregs
[0][mode
];
37371 /* A move will cost one instruction per GPR moved. */
37372 ret
= 2 * hard_regno_nregs
[0][mode
];
37375 /* If we have VSX, we can easily move between FPR or Altivec registers. */
37376 else if (VECTOR_MEM_VSX_P (mode
)
37377 && reg_classes_intersect_p (to
, VSX_REGS
)
37378 && reg_classes_intersect_p (from
, VSX_REGS
))
37379 ret
= 2 * hard_regno_nregs
[FIRST_FPR_REGNO
][mode
];
37381 /* Moving between two similar registers is just one instruction. */
37382 else if (reg_classes_intersect_p (to
, from
))
37383 ret
= (FLOAT128_2REG_P (mode
)) ? 4 : 2;
37385 /* Everything else has to go through GENERAL_REGS. */
37387 ret
= (rs6000_register_move_cost (mode
, GENERAL_REGS
, to
)
37388 + rs6000_register_move_cost (mode
, from
, GENERAL_REGS
));
37390 if (TARGET_DEBUG_COST
)
37392 if (dbg_cost_ctrl
== 1)
37394 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
37395 ret
, GET_MODE_NAME (mode
), reg_class_names
[from
],
37396 reg_class_names
[to
]);
37403 /* A C expressions returning the cost of moving data of MODE from a register to
37407 rs6000_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
37408 bool in ATTRIBUTE_UNUSED
)
37412 if (TARGET_DEBUG_COST
)
37415 if (reg_classes_intersect_p (rclass
, GENERAL_REGS
))
37416 ret
= 4 * hard_regno_nregs
[0][mode
];
37417 else if ((reg_classes_intersect_p (rclass
, FLOAT_REGS
)
37418 || reg_classes_intersect_p (rclass
, VSX_REGS
)))
37419 ret
= 4 * hard_regno_nregs
[32][mode
];
37420 else if (reg_classes_intersect_p (rclass
, ALTIVEC_REGS
))
37421 ret
= 4 * hard_regno_nregs
[FIRST_ALTIVEC_REGNO
][mode
];
37423 ret
= 4 + rs6000_register_move_cost (mode
, rclass
, GENERAL_REGS
);
37425 if (TARGET_DEBUG_COST
)
37427 if (dbg_cost_ctrl
== 1)
37429 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
37430 ret
, GET_MODE_NAME (mode
), reg_class_names
[rclass
], in
);
37437 /* Returns a code for a target-specific builtin that implements
37438 reciprocal of the function, or NULL_TREE if not available. */
37441 rs6000_builtin_reciprocal (tree fndecl
)
37443 switch (DECL_FUNCTION_CODE (fndecl
))
37445 case VSX_BUILTIN_XVSQRTDP
:
37446 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode
))
37449 return rs6000_builtin_decls
[VSX_BUILTIN_RSQRT_2DF
];
37451 case VSX_BUILTIN_XVSQRTSP
:
37452 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode
))
37455 return rs6000_builtin_decls
[VSX_BUILTIN_RSQRT_4SF
];
37462 /* Load up a constant. If the mode is a vector mode, splat the value across
37463 all of the vector elements. */
37466 rs6000_load_constant_and_splat (machine_mode mode
, REAL_VALUE_TYPE dconst
)
37470 if (mode
== SFmode
|| mode
== DFmode
)
37472 rtx d
= const_double_from_real_value (dconst
, mode
);
37473 reg
= force_reg (mode
, d
);
37475 else if (mode
== V4SFmode
)
37477 rtx d
= const_double_from_real_value (dconst
, SFmode
);
37478 rtvec v
= gen_rtvec (4, d
, d
, d
, d
);
37479 reg
= gen_reg_rtx (mode
);
37480 rs6000_expand_vector_init (reg
, gen_rtx_PARALLEL (mode
, v
));
37482 else if (mode
== V2DFmode
)
37484 rtx d
= const_double_from_real_value (dconst
, DFmode
);
37485 rtvec v
= gen_rtvec (2, d
, d
);
37486 reg
= gen_reg_rtx (mode
);
37487 rs6000_expand_vector_init (reg
, gen_rtx_PARALLEL (mode
, v
));
37490 gcc_unreachable ();
37495 /* Generate an FMA instruction. */
37498 rs6000_emit_madd (rtx target
, rtx m1
, rtx m2
, rtx a
)
37500 machine_mode mode
= GET_MODE (target
);
37503 dst
= expand_ternary_op (mode
, fma_optab
, m1
, m2
, a
, target
, 0);
37504 gcc_assert (dst
!= NULL
);
37507 emit_move_insn (target
, dst
);
37510 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
37513 rs6000_emit_nmsub (rtx dst
, rtx m1
, rtx m2
, rtx a
)
37515 machine_mode mode
= GET_MODE (dst
);
37518 /* This is a tad more complicated, since the fnma_optab is for
37519 a different expression: fma(-m1, m2, a), which is the same
37520 thing except in the case of signed zeros.
37522 Fortunately we know that if FMA is supported that FNMSUB is
37523 also supported in the ISA. Just expand it directly. */
37525 gcc_assert (optab_handler (fma_optab
, mode
) != CODE_FOR_nothing
);
37527 r
= gen_rtx_NEG (mode
, a
);
37528 r
= gen_rtx_FMA (mode
, m1
, m2
, r
);
37529 r
= gen_rtx_NEG (mode
, r
);
37530 emit_insn (gen_rtx_SET (dst
, r
));
37533 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
37534 add a reg_note saying that this was a division. Support both scalar and
37535 vector divide. Assumes no trapping math and finite arguments. */
37538 rs6000_emit_swdiv (rtx dst
, rtx n
, rtx d
, bool note_p
)
37540 machine_mode mode
= GET_MODE (dst
);
37541 rtx one
, x0
, e0
, x1
, xprev
, eprev
, xnext
, enext
, u
, v
;
37544 /* Low precision estimates guarantee 5 bits of accuracy. High
37545 precision estimates guarantee 14 bits of accuracy. SFmode
37546 requires 23 bits of accuracy. DFmode requires 52 bits of
37547 accuracy. Each pass at least doubles the accuracy, leading
37548 to the following. */
37549 int passes
= (TARGET_RECIP_PRECISION
) ? 1 : 3;
37550 if (mode
== DFmode
|| mode
== V2DFmode
)
37553 enum insn_code code
= optab_handler (smul_optab
, mode
);
37554 insn_gen_fn gen_mul
= GEN_FCN (code
);
37556 gcc_assert (code
!= CODE_FOR_nothing
);
37558 one
= rs6000_load_constant_and_splat (mode
, dconst1
);
37560 /* x0 = 1./d estimate */
37561 x0
= gen_reg_rtx (mode
);
37562 emit_insn (gen_rtx_SET (x0
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, d
),
37565 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
37568 /* e0 = 1. - d * x0 */
37569 e0
= gen_reg_rtx (mode
);
37570 rs6000_emit_nmsub (e0
, d
, x0
, one
);
37572 /* x1 = x0 + e0 * x0 */
37573 x1
= gen_reg_rtx (mode
);
37574 rs6000_emit_madd (x1
, e0
, x0
, x0
);
37576 for (i
= 0, xprev
= x1
, eprev
= e0
; i
< passes
- 2;
37577 ++i
, xprev
= xnext
, eprev
= enext
) {
37579 /* enext = eprev * eprev */
37580 enext
= gen_reg_rtx (mode
);
37581 emit_insn (gen_mul (enext
, eprev
, eprev
));
37583 /* xnext = xprev + enext * xprev */
37584 xnext
= gen_reg_rtx (mode
);
37585 rs6000_emit_madd (xnext
, enext
, xprev
, xprev
);
37591 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
37593 /* u = n * xprev */
37594 u
= gen_reg_rtx (mode
);
37595 emit_insn (gen_mul (u
, n
, xprev
));
37597 /* v = n - (d * u) */
37598 v
= gen_reg_rtx (mode
);
37599 rs6000_emit_nmsub (v
, d
, u
, n
);
37601 /* dst = (v * xprev) + u */
37602 rs6000_emit_madd (dst
, v
, xprev
, u
);
37605 add_reg_note (get_last_insn (), REG_EQUAL
, gen_rtx_DIV (mode
, n
, d
));
37608 /* Goldschmidt's Algorithm for single/double-precision floating point
37609 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
37612 rs6000_emit_swsqrt (rtx dst
, rtx src
, bool recip
)
37614 machine_mode mode
= GET_MODE (src
);
37615 rtx e
= gen_reg_rtx (mode
);
37616 rtx g
= gen_reg_rtx (mode
);
37617 rtx h
= gen_reg_rtx (mode
);
37619 /* Low precision estimates guarantee 5 bits of accuracy. High
37620 precision estimates guarantee 14 bits of accuracy. SFmode
37621 requires 23 bits of accuracy. DFmode requires 52 bits of
37622 accuracy. Each pass at least doubles the accuracy, leading
37623 to the following. */
37624 int passes
= (TARGET_RECIP_PRECISION
) ? 1 : 3;
37625 if (mode
== DFmode
|| mode
== V2DFmode
)
37630 enum insn_code code
= optab_handler (smul_optab
, mode
);
37631 insn_gen_fn gen_mul
= GEN_FCN (code
);
37633 gcc_assert (code
!= CODE_FOR_nothing
);
37635 mhalf
= rs6000_load_constant_and_splat (mode
, dconsthalf
);
37637 /* e = rsqrt estimate */
37638 emit_insn (gen_rtx_SET (e
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, src
),
37641 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
37644 rtx zero
= force_reg (mode
, CONST0_RTX (mode
));
37646 if (mode
== SFmode
)
37648 rtx target
= emit_conditional_move (e
, GT
, src
, zero
, mode
,
37651 emit_move_insn (e
, target
);
37655 rtx cond
= gen_rtx_GT (VOIDmode
, e
, zero
);
37656 rs6000_emit_vector_cond_expr (e
, e
, zero
, cond
, src
, zero
);
37660 /* g = sqrt estimate. */
37661 emit_insn (gen_mul (g
, e
, src
));
37662 /* h = 1/(2*sqrt) estimate. */
37663 emit_insn (gen_mul (h
, e
, mhalf
));
37669 rtx t
= gen_reg_rtx (mode
);
37670 rs6000_emit_nmsub (t
, g
, h
, mhalf
);
37671 /* Apply correction directly to 1/rsqrt estimate. */
37672 rs6000_emit_madd (dst
, e
, t
, e
);
37676 for (i
= 0; i
< passes
; i
++)
37678 rtx t1
= gen_reg_rtx (mode
);
37679 rtx g1
= gen_reg_rtx (mode
);
37680 rtx h1
= gen_reg_rtx (mode
);
37682 rs6000_emit_nmsub (t1
, g
, h
, mhalf
);
37683 rs6000_emit_madd (g1
, g
, t1
, g
);
37684 rs6000_emit_madd (h1
, h
, t1
, h
);
37689 /* Multiply by 2 for 1/rsqrt. */
37690 emit_insn (gen_add3_insn (dst
, h
, h
));
37695 rtx t
= gen_reg_rtx (mode
);
37696 rs6000_emit_nmsub (t
, g
, h
, mhalf
);
37697 rs6000_emit_madd (dst
, g
, t
, g
);
37703 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
37704 (Power7) targets. DST is the target, and SRC is the argument operand. */
37707 rs6000_emit_popcount (rtx dst
, rtx src
)
37709 machine_mode mode
= GET_MODE (dst
);
37712 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
37713 if (TARGET_POPCNTD
)
37715 if (mode
== SImode
)
37716 emit_insn (gen_popcntdsi2 (dst
, src
));
37718 emit_insn (gen_popcntddi2 (dst
, src
));
37722 tmp1
= gen_reg_rtx (mode
);
37724 if (mode
== SImode
)
37726 emit_insn (gen_popcntbsi2 (tmp1
, src
));
37727 tmp2
= expand_mult (SImode
, tmp1
, GEN_INT (0x01010101),
37729 tmp2
= force_reg (SImode
, tmp2
);
37730 emit_insn (gen_lshrsi3 (dst
, tmp2
, GEN_INT (24)));
37734 emit_insn (gen_popcntbdi2 (tmp1
, src
));
37735 tmp2
= expand_mult (DImode
, tmp1
,
37736 GEN_INT ((HOST_WIDE_INT
)
37737 0x01010101 << 32 | 0x01010101),
37739 tmp2
= force_reg (DImode
, tmp2
);
37740 emit_insn (gen_lshrdi3 (dst
, tmp2
, GEN_INT (56)));
37745 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
37746 target, and SRC is the argument operand. */
37749 rs6000_emit_parity (rtx dst
, rtx src
)
37751 machine_mode mode
= GET_MODE (dst
);
37754 tmp
= gen_reg_rtx (mode
);
37756 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
37759 if (mode
== SImode
)
37761 emit_insn (gen_popcntbsi2 (tmp
, src
));
37762 emit_insn (gen_paritysi2_cmpb (dst
, tmp
));
37766 emit_insn (gen_popcntbdi2 (tmp
, src
));
37767 emit_insn (gen_paritydi2_cmpb (dst
, tmp
));
37772 if (mode
== SImode
)
37774 /* Is mult+shift >= shift+xor+shift+xor? */
37775 if (rs6000_cost
->mulsi_const
>= COSTS_N_INSNS (3))
37777 rtx tmp1
, tmp2
, tmp3
, tmp4
;
37779 tmp1
= gen_reg_rtx (SImode
);
37780 emit_insn (gen_popcntbsi2 (tmp1
, src
));
37782 tmp2
= gen_reg_rtx (SImode
);
37783 emit_insn (gen_lshrsi3 (tmp2
, tmp1
, GEN_INT (16)));
37784 tmp3
= gen_reg_rtx (SImode
);
37785 emit_insn (gen_xorsi3 (tmp3
, tmp1
, tmp2
));
37787 tmp4
= gen_reg_rtx (SImode
);
37788 emit_insn (gen_lshrsi3 (tmp4
, tmp3
, GEN_INT (8)));
37789 emit_insn (gen_xorsi3 (tmp
, tmp3
, tmp4
));
37792 rs6000_emit_popcount (tmp
, src
);
37793 emit_insn (gen_andsi3 (dst
, tmp
, const1_rtx
));
37797 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
37798 if (rs6000_cost
->muldi
>= COSTS_N_INSNS (5))
37800 rtx tmp1
, tmp2
, tmp3
, tmp4
, tmp5
, tmp6
;
37802 tmp1
= gen_reg_rtx (DImode
);
37803 emit_insn (gen_popcntbdi2 (tmp1
, src
));
37805 tmp2
= gen_reg_rtx (DImode
);
37806 emit_insn (gen_lshrdi3 (tmp2
, tmp1
, GEN_INT (32)));
37807 tmp3
= gen_reg_rtx (DImode
);
37808 emit_insn (gen_xordi3 (tmp3
, tmp1
, tmp2
));
37810 tmp4
= gen_reg_rtx (DImode
);
37811 emit_insn (gen_lshrdi3 (tmp4
, tmp3
, GEN_INT (16)));
37812 tmp5
= gen_reg_rtx (DImode
);
37813 emit_insn (gen_xordi3 (tmp5
, tmp3
, tmp4
));
37815 tmp6
= gen_reg_rtx (DImode
);
37816 emit_insn (gen_lshrdi3 (tmp6
, tmp5
, GEN_INT (8)));
37817 emit_insn (gen_xordi3 (tmp
, tmp5
, tmp6
));
37820 rs6000_emit_popcount (tmp
, src
);
37821 emit_insn (gen_anddi3 (dst
, tmp
, const1_rtx
));
37825 /* Expand an Altivec constant permutation for little endian mode.
37826 There are two issues: First, the two input operands must be
37827 swapped so that together they form a double-wide array in LE
37828 order. Second, the vperm instruction has surprising behavior
37829 in LE mode: it interprets the elements of the source vectors
37830 in BE mode ("left to right") and interprets the elements of
37831 the destination vector in LE mode ("right to left"). To
37832 correct for this, we must subtract each element of the permute
37833 control vector from 31.
37835 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
37836 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
37837 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
37838 serve as the permute control vector. Then, in BE mode,
37842 places the desired result in vr9. However, in LE mode the
37843 vector contents will be
37845 vr10 = 00000003 00000002 00000001 00000000
37846 vr11 = 00000007 00000006 00000005 00000004
37848 The result of the vperm using the same permute control vector is
37850 vr9 = 05000000 07000000 01000000 03000000
37852 That is, the leftmost 4 bytes of vr10 are interpreted as the
37853 source for the rightmost 4 bytes of vr9, and so on.
37855 If we change the permute control vector to
37857 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
37865 vr9 = 00000006 00000004 00000002 00000000. */
37868 altivec_expand_vec_perm_const_le (rtx operands
[4])
37872 rtx constv
, unspec
;
37873 rtx target
= operands
[0];
37874 rtx op0
= operands
[1];
37875 rtx op1
= operands
[2];
37876 rtx sel
= operands
[3];
37878 /* Unpack and adjust the constant selector. */
37879 for (i
= 0; i
< 16; ++i
)
37881 rtx e
= XVECEXP (sel
, 0, i
);
37882 unsigned int elt
= 31 - (INTVAL (e
) & 31);
37883 perm
[i
] = GEN_INT (elt
);
37886 /* Expand to a permute, swapping the inputs and using the
37887 adjusted selector. */
37889 op0
= force_reg (V16QImode
, op0
);
37891 op1
= force_reg (V16QImode
, op1
);
37893 constv
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, perm
));
37894 constv
= force_reg (V16QImode
, constv
);
37895 unspec
= gen_rtx_UNSPEC (V16QImode
, gen_rtvec (3, op1
, op0
, constv
),
37897 if (!REG_P (target
))
37899 rtx tmp
= gen_reg_rtx (V16QImode
);
37900 emit_move_insn (tmp
, unspec
);
37904 emit_move_insn (target
, unspec
);
37907 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
37908 permute control vector. But here it's not a constant, so we must
37909 generate a vector NAND or NOR to do the adjustment. */
37912 altivec_expand_vec_perm_le (rtx operands
[4])
37914 rtx notx
, iorx
, unspec
;
37915 rtx target
= operands
[0];
37916 rtx op0
= operands
[1];
37917 rtx op1
= operands
[2];
37918 rtx sel
= operands
[3];
37920 rtx norreg
= gen_reg_rtx (V16QImode
);
37921 machine_mode mode
= GET_MODE (target
);
37923 /* Get everything in regs so the pattern matches. */
37925 op0
= force_reg (mode
, op0
);
37927 op1
= force_reg (mode
, op1
);
37929 sel
= force_reg (V16QImode
, sel
);
37930 if (!REG_P (target
))
37931 tmp
= gen_reg_rtx (mode
);
37933 if (TARGET_P9_VECTOR
)
37935 unspec
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, op0
, op1
, sel
),
37940 /* Invert the selector with a VNAND if available, else a VNOR.
37941 The VNAND is preferred for future fusion opportunities. */
37942 notx
= gen_rtx_NOT (V16QImode
, sel
);
37943 iorx
= (TARGET_P8_VECTOR
37944 ? gen_rtx_IOR (V16QImode
, notx
, notx
)
37945 : gen_rtx_AND (V16QImode
, notx
, notx
));
37946 emit_insn (gen_rtx_SET (norreg
, iorx
));
37948 /* Permute with operands reversed and adjusted selector. */
37949 unspec
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, op1
, op0
, norreg
),
37953 /* Copy into target, possibly by way of a register. */
37954 if (!REG_P (target
))
37956 emit_move_insn (tmp
, unspec
);
37960 emit_move_insn (target
, unspec
);
37963 /* Expand an Altivec constant permutation. Return true if we match
37964 an efficient implementation; false to fall back to VPERM. */
37967 altivec_expand_vec_perm_const (rtx operands
[4])
37969 struct altivec_perm_insn
{
37970 HOST_WIDE_INT mask
;
37971 enum insn_code impl
;
37972 unsigned char perm
[16];
37974 static const struct altivec_perm_insn patterns
[] = {
37975 { OPTION_MASK_ALTIVEC
, CODE_FOR_altivec_vpkuhum_direct
,
37976 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
37977 { OPTION_MASK_ALTIVEC
, CODE_FOR_altivec_vpkuwum_direct
,
37978 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
37979 { OPTION_MASK_ALTIVEC
,
37980 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghb_direct
37981 : CODE_FOR_altivec_vmrglb_direct
),
37982 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
37983 { OPTION_MASK_ALTIVEC
,
37984 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghh_direct
37985 : CODE_FOR_altivec_vmrglh_direct
),
37986 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
37987 { OPTION_MASK_ALTIVEC
,
37988 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghw_direct
37989 : CODE_FOR_altivec_vmrglw_direct
),
37990 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
37991 { OPTION_MASK_ALTIVEC
,
37992 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglb_direct
37993 : CODE_FOR_altivec_vmrghb_direct
),
37994 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
37995 { OPTION_MASK_ALTIVEC
,
37996 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglh_direct
37997 : CODE_FOR_altivec_vmrghh_direct
),
37998 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
37999 { OPTION_MASK_ALTIVEC
,
38000 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglw_direct
38001 : CODE_FOR_altivec_vmrghw_direct
),
38002 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
38003 { OPTION_MASK_P8_VECTOR
, CODE_FOR_p8_vmrgew
,
38004 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
38005 { OPTION_MASK_P8_VECTOR
, CODE_FOR_p8_vmrgow
,
38006 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
38009 unsigned int i
, j
, elt
, which
;
38010 unsigned char perm
[16];
38011 rtx target
, op0
, op1
, sel
, x
;
38014 target
= operands
[0];
38019 /* Unpack the constant selector. */
38020 for (i
= which
= 0; i
< 16; ++i
)
38022 rtx e
= XVECEXP (sel
, 0, i
);
38023 elt
= INTVAL (e
) & 31;
38024 which
|= (elt
< 16 ? 1 : 2);
38028 /* Simplify the constant selector based on operands. */
38032 gcc_unreachable ();
38036 if (!rtx_equal_p (op0
, op1
))
38041 for (i
= 0; i
< 16; ++i
)
38053 /* Look for splat patterns. */
38058 for (i
= 0; i
< 16; ++i
)
38059 if (perm
[i
] != elt
)
38063 if (!BYTES_BIG_ENDIAN
)
38065 emit_insn (gen_altivec_vspltb_direct (target
, op0
, GEN_INT (elt
)));
38071 for (i
= 0; i
< 16; i
+= 2)
38072 if (perm
[i
] != elt
|| perm
[i
+ 1] != elt
+ 1)
38076 int field
= BYTES_BIG_ENDIAN
? elt
/ 2 : 7 - elt
/ 2;
38077 x
= gen_reg_rtx (V8HImode
);
38078 emit_insn (gen_altivec_vsplth_direct (x
, gen_lowpart (V8HImode
, op0
),
38080 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
38087 for (i
= 0; i
< 16; i
+= 4)
38089 || perm
[i
+ 1] != elt
+ 1
38090 || perm
[i
+ 2] != elt
+ 2
38091 || perm
[i
+ 3] != elt
+ 3)
38095 int field
= BYTES_BIG_ENDIAN
? elt
/ 4 : 3 - elt
/ 4;
38096 x
= gen_reg_rtx (V4SImode
);
38097 emit_insn (gen_altivec_vspltw_direct (x
, gen_lowpart (V4SImode
, op0
),
38099 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
38105 /* Look for merge and pack patterns. */
38106 for (j
= 0; j
< ARRAY_SIZE (patterns
); ++j
)
38110 if ((patterns
[j
].mask
& rs6000_isa_flags
) == 0)
38113 elt
= patterns
[j
].perm
[0];
38114 if (perm
[0] == elt
)
38116 else if (perm
[0] == elt
+ 16)
38120 for (i
= 1; i
< 16; ++i
)
38122 elt
= patterns
[j
].perm
[i
];
38124 elt
= (elt
>= 16 ? elt
- 16 : elt
+ 16);
38125 else if (one_vec
&& elt
>= 16)
38127 if (perm
[i
] != elt
)
38132 enum insn_code icode
= patterns
[j
].impl
;
38133 machine_mode omode
= insn_data
[icode
].operand
[0].mode
;
38134 machine_mode imode
= insn_data
[icode
].operand
[1].mode
;
38136 /* For little-endian, don't use vpkuwum and vpkuhum if the
38137 underlying vector type is not V4SI and V8HI, respectively.
38138 For example, using vpkuwum with a V8HI picks up the even
38139 halfwords (BE numbering) when the even halfwords (LE
38140 numbering) are what we need. */
38141 if (!BYTES_BIG_ENDIAN
38142 && icode
== CODE_FOR_altivec_vpkuwum_direct
38143 && ((GET_CODE (op0
) == REG
38144 && GET_MODE (op0
) != V4SImode
)
38145 || (GET_CODE (op0
) == SUBREG
38146 && GET_MODE (XEXP (op0
, 0)) != V4SImode
)))
38148 if (!BYTES_BIG_ENDIAN
38149 && icode
== CODE_FOR_altivec_vpkuhum_direct
38150 && ((GET_CODE (op0
) == REG
38151 && GET_MODE (op0
) != V8HImode
)
38152 || (GET_CODE (op0
) == SUBREG
38153 && GET_MODE (XEXP (op0
, 0)) != V8HImode
)))
38156 /* For little-endian, the two input operands must be swapped
38157 (or swapped back) to ensure proper right-to-left numbering
38159 if (swapped
^ !BYTES_BIG_ENDIAN
)
38160 std::swap (op0
, op1
);
38161 if (imode
!= V16QImode
)
38163 op0
= gen_lowpart (imode
, op0
);
38164 op1
= gen_lowpart (imode
, op1
);
38166 if (omode
== V16QImode
)
38169 x
= gen_reg_rtx (omode
);
38170 emit_insn (GEN_FCN (icode
) (x
, op0
, op1
));
38171 if (omode
!= V16QImode
)
38172 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
38177 if (!BYTES_BIG_ENDIAN
)
38179 altivec_expand_vec_perm_const_le (operands
);
38186 /* Expand a Paired Single, VSX Permute Doubleword, or SPE constant permutation.
38187 Return true if we match an efficient implementation. */
38190 rs6000_expand_vec_perm_const_1 (rtx target
, rtx op0
, rtx op1
,
38191 unsigned char perm0
, unsigned char perm1
)
38195 /* If both selectors come from the same operand, fold to single op. */
38196 if ((perm0
& 2) == (perm1
& 2))
38203 /* If both operands are equal, fold to simpler permutation. */
38204 if (rtx_equal_p (op0
, op1
))
38207 perm1
= (perm1
& 1) + 2;
38209 /* If the first selector comes from the second operand, swap. */
38210 else if (perm0
& 2)
38216 std::swap (op0
, op1
);
38218 /* If the second selector does not come from the second operand, fail. */
38219 else if ((perm1
& 2) == 0)
38223 if (target
!= NULL
)
38225 machine_mode vmode
, dmode
;
38228 vmode
= GET_MODE (target
);
38229 gcc_assert (GET_MODE_NUNITS (vmode
) == 2);
38230 dmode
= mode_for_vector (GET_MODE_INNER (vmode
), 4);
38231 x
= gen_rtx_VEC_CONCAT (dmode
, op0
, op1
);
38232 v
= gen_rtvec (2, GEN_INT (perm0
), GEN_INT (perm1
));
38233 x
= gen_rtx_VEC_SELECT (vmode
, x
, gen_rtx_PARALLEL (VOIDmode
, v
));
38234 emit_insn (gen_rtx_SET (target
, x
));
38240 rs6000_expand_vec_perm_const (rtx operands
[4])
38242 rtx target
, op0
, op1
, sel
;
38243 unsigned char perm0
, perm1
;
38245 target
= operands
[0];
38250 /* Unpack the constant selector. */
38251 perm0
= INTVAL (XVECEXP (sel
, 0, 0)) & 3;
38252 perm1
= INTVAL (XVECEXP (sel
, 0, 1)) & 3;
38254 return rs6000_expand_vec_perm_const_1 (target
, op0
, op1
, perm0
, perm1
);
38257 /* Test whether a constant permutation is supported. */
38260 rs6000_vectorize_vec_perm_const_ok (machine_mode vmode
,
38261 const unsigned char *sel
)
38263 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
38264 if (TARGET_ALTIVEC
)
38267 /* Check for ps_merge* or evmerge* insns. */
38268 if ((TARGET_PAIRED_FLOAT
&& vmode
== V2SFmode
)
38269 || (TARGET_SPE
&& vmode
== V2SImode
))
38271 rtx op0
= gen_raw_REG (vmode
, LAST_VIRTUAL_REGISTER
+ 1);
38272 rtx op1
= gen_raw_REG (vmode
, LAST_VIRTUAL_REGISTER
+ 2);
38273 return rs6000_expand_vec_perm_const_1 (NULL
, op0
, op1
, sel
[0], sel
[1]);
38279 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */
38282 rs6000_do_expand_vec_perm (rtx target
, rtx op0
, rtx op1
,
38283 machine_mode vmode
, unsigned nelt
, rtx perm
[])
38285 machine_mode imode
;
38289 if (GET_MODE_CLASS (vmode
) != MODE_VECTOR_INT
)
38291 imode
= mode_for_size (GET_MODE_UNIT_BITSIZE (vmode
), MODE_INT
, 0);
38292 imode
= mode_for_vector (imode
, nelt
);
38295 x
= gen_rtx_CONST_VECTOR (imode
, gen_rtvec_v (nelt
, perm
));
38296 x
= expand_vec_perm (vmode
, op0
, op1
, x
, target
);
38298 emit_move_insn (target
, x
);
38301 /* Expand an extract even operation. */
38304 rs6000_expand_extract_even (rtx target
, rtx op0
, rtx op1
)
38306 machine_mode vmode
= GET_MODE (target
);
38307 unsigned i
, nelt
= GET_MODE_NUNITS (vmode
);
38310 for (i
= 0; i
< nelt
; i
++)
38311 perm
[i
] = GEN_INT (i
* 2);
38313 rs6000_do_expand_vec_perm (target
, op0
, op1
, vmode
, nelt
, perm
);
38316 /* Expand a vector interleave operation. */
38319 rs6000_expand_interleave (rtx target
, rtx op0
, rtx op1
, bool highp
)
38321 machine_mode vmode
= GET_MODE (target
);
38322 unsigned i
, high
, nelt
= GET_MODE_NUNITS (vmode
);
38325 high
= (highp
? 0 : nelt
/ 2);
38326 for (i
= 0; i
< nelt
/ 2; i
++)
38328 perm
[i
* 2] = GEN_INT (i
+ high
);
38329 perm
[i
* 2 + 1] = GEN_INT (i
+ nelt
+ high
);
38332 rs6000_do_expand_vec_perm (target
, op0
, op1
, vmode
, nelt
, perm
);
38335 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
38337 rs6000_scale_v2df (rtx tgt
, rtx src
, int scale
)
38339 HOST_WIDE_INT
hwi_scale (scale
);
38340 REAL_VALUE_TYPE r_pow
;
38341 rtvec v
= rtvec_alloc (2);
38343 rtx scale_vec
= gen_reg_rtx (V2DFmode
);
38344 (void)real_powi (&r_pow
, DFmode
, &dconst2
, hwi_scale
);
38345 elt
= const_double_from_real_value (r_pow
, DFmode
);
38346 RTVEC_ELT (v
, 0) = elt
;
38347 RTVEC_ELT (v
, 1) = elt
;
38348 rs6000_expand_vector_init (scale_vec
, gen_rtx_PARALLEL (V2DFmode
, v
));
38349 emit_insn (gen_mulv2df3 (tgt
, src
, scale_vec
));
38352 /* Return an RTX representing where to find the function value of a
38353 function returning MODE. */
38355 rs6000_complex_function_value (machine_mode mode
)
38357 unsigned int regno
;
38359 machine_mode inner
= GET_MODE_INNER (mode
);
38360 unsigned int inner_bytes
= GET_MODE_UNIT_SIZE (mode
);
38362 if (TARGET_FLOAT128_TYPE
38364 || (mode
== TCmode
&& TARGET_IEEEQUAD
)))
38365 regno
= ALTIVEC_ARG_RETURN
;
38367 else if (FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
&& TARGET_FPRS
)
38368 regno
= FP_ARG_RETURN
;
38372 regno
= GP_ARG_RETURN
;
38374 /* 32-bit is OK since it'll go in r3/r4. */
38375 if (TARGET_32BIT
&& inner_bytes
>= 4)
38376 return gen_rtx_REG (mode
, regno
);
38379 if (inner_bytes
>= 8)
38380 return gen_rtx_REG (mode
, regno
);
38382 r1
= gen_rtx_EXPR_LIST (inner
, gen_rtx_REG (inner
, regno
),
38384 r2
= gen_rtx_EXPR_LIST (inner
, gen_rtx_REG (inner
, regno
+ 1),
38385 GEN_INT (inner_bytes
));
38386 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, r1
, r2
));
38389 /* Return an rtx describing a return value of MODE as a PARALLEL
38390 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
38391 stride REG_STRIDE. */
38394 rs6000_parallel_return (machine_mode mode
,
38395 int n_elts
, machine_mode elt_mode
,
38396 unsigned int regno
, unsigned int reg_stride
)
38398 rtx par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (n_elts
));
38401 for (i
= 0; i
< n_elts
; i
++)
38403 rtx r
= gen_rtx_REG (elt_mode
, regno
);
38404 rtx off
= GEN_INT (i
* GET_MODE_SIZE (elt_mode
));
38405 XVECEXP (par
, 0, i
) = gen_rtx_EXPR_LIST (VOIDmode
, r
, off
);
38406 regno
+= reg_stride
;
38412 /* Target hook for TARGET_FUNCTION_VALUE.
38414 On the SPE, both FPs and vectors are returned in r3.
38416 On RS/6000 an integer value is in r3 and a floating-point value is in
38417 fp1, unless -msoft-float. */
38420 rs6000_function_value (const_tree valtype
,
38421 const_tree fn_decl_or_type ATTRIBUTE_UNUSED
,
38422 bool outgoing ATTRIBUTE_UNUSED
)
38425 unsigned int regno
;
38426 machine_mode elt_mode
;
38429 /* Special handling for structs in darwin64. */
38431 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype
), valtype
))
38433 CUMULATIVE_ARGS valcum
;
38437 valcum
.fregno
= FP_ARG_MIN_REG
;
38438 valcum
.vregno
= ALTIVEC_ARG_MIN_REG
;
38439 /* Do a trial code generation as if this were going to be passed as
38440 an argument; if any part goes in memory, we return NULL. */
38441 valret
= rs6000_darwin64_record_arg (&valcum
, valtype
, true, /* retval= */ true);
38444 /* Otherwise fall through to standard ABI rules. */
38447 mode
= TYPE_MODE (valtype
);
38449 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
38450 if (rs6000_discover_homogeneous_aggregate (mode
, valtype
, &elt_mode
, &n_elts
))
38452 int first_reg
, n_regs
;
38454 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode
))
38456 /* _Decimal128 must use even/odd register pairs. */
38457 first_reg
= (elt_mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
38458 n_regs
= (GET_MODE_SIZE (elt_mode
) + 7) >> 3;
38462 first_reg
= ALTIVEC_ARG_RETURN
;
38466 return rs6000_parallel_return (mode
, n_elts
, elt_mode
, first_reg
, n_regs
);
38469 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
38470 if (TARGET_32BIT
&& TARGET_POWERPC64
)
38479 int count
= GET_MODE_SIZE (mode
) / 4;
38480 return rs6000_parallel_return (mode
, count
, SImode
, GP_ARG_RETURN
, 1);
38483 if ((INTEGRAL_TYPE_P (valtype
)
38484 && GET_MODE_BITSIZE (mode
) < (TARGET_32BIT
? 32 : 64))
38485 || POINTER_TYPE_P (valtype
))
38486 mode
= TARGET_32BIT
? SImode
: DImode
;
38488 if (DECIMAL_FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
&& TARGET_FPRS
)
38489 /* _Decimal128 must use an even/odd register pair. */
38490 regno
= (mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
38491 else if (SCALAR_FLOAT_TYPE_P (valtype
) && TARGET_HARD_FLOAT
&& TARGET_FPRS
38492 && !FLOAT128_VECTOR_P (mode
)
38493 && ((TARGET_SINGLE_FLOAT
&& (mode
== SFmode
)) || TARGET_DOUBLE_FLOAT
))
38494 regno
= FP_ARG_RETURN
;
38495 else if (TREE_CODE (valtype
) == COMPLEX_TYPE
38496 && targetm
.calls
.split_complex_arg
)
38497 return rs6000_complex_function_value (mode
);
38498 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
38499 return register is used in both cases, and we won't see V2DImode/V2DFmode
38500 for pure altivec, combine the two cases. */
38501 else if ((TREE_CODE (valtype
) == VECTOR_TYPE
|| FLOAT128_VECTOR_P (mode
))
38502 && TARGET_ALTIVEC
&& TARGET_ALTIVEC_ABI
38503 && ALTIVEC_OR_VSX_VECTOR_MODE (mode
))
38504 regno
= ALTIVEC_ARG_RETURN
;
38505 else if (TARGET_E500_DOUBLE
&& TARGET_HARD_FLOAT
38506 && (mode
== DFmode
|| mode
== DCmode
38507 || FLOAT128_IBM_P (mode
) || mode
== TCmode
))
38508 return spe_build_register_parallel (mode
, GP_ARG_RETURN
);
38510 regno
= GP_ARG_RETURN
;
38512 return gen_rtx_REG (mode
, regno
);
38515 /* Define how to find the value returned by a library function
38516 assuming the value has mode MODE. */
38518 rs6000_libcall_value (machine_mode mode
)
38520 unsigned int regno
;
38522 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
38523 if (TARGET_32BIT
&& TARGET_POWERPC64
&& mode
== DImode
)
38524 return rs6000_parallel_return (mode
, 2, SImode
, GP_ARG_RETURN
, 1);
38526 if (DECIMAL_FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
&& TARGET_FPRS
)
38527 /* _Decimal128 must use an even/odd register pair. */
38528 regno
= (mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
38529 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode
)
38530 && TARGET_HARD_FLOAT
&& TARGET_FPRS
38531 && ((TARGET_SINGLE_FLOAT
&& mode
== SFmode
) || TARGET_DOUBLE_FLOAT
))
38532 regno
= FP_ARG_RETURN
;
38533 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
38534 return register is used in both cases, and we won't see V2DImode/V2DFmode
38535 for pure altivec, combine the two cases. */
38536 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode
)
38537 && TARGET_ALTIVEC
&& TARGET_ALTIVEC_ABI
)
38538 regno
= ALTIVEC_ARG_RETURN
;
38539 else if (COMPLEX_MODE_P (mode
) && targetm
.calls
.split_complex_arg
)
38540 return rs6000_complex_function_value (mode
);
38541 else if (TARGET_E500_DOUBLE
&& TARGET_HARD_FLOAT
38542 && (mode
== DFmode
|| mode
== DCmode
38543 || FLOAT128_IBM_P (mode
) || mode
== TCmode
))
38544 return spe_build_register_parallel (mode
, GP_ARG_RETURN
);
38546 regno
= GP_ARG_RETURN
;
38548 return gen_rtx_REG (mode
, regno
);
38552 /* Return true if we use LRA instead of reload pass. */
38554 rs6000_lra_p (void)
38559 /* Compute register pressure classes. We implement the target hook to avoid
38560 IRA picking something like NON_SPECIAL_REGS as a pressure class, which can
38561 lead to incorrect estimates of number of available registers and therefor
38562 increased register pressure/spill. */
38564 rs6000_compute_pressure_classes (enum reg_class
*pressure_classes
)
38569 pressure_classes
[n
++] = GENERAL_REGS
;
38571 pressure_classes
[n
++] = VSX_REGS
;
38574 if (TARGET_ALTIVEC
)
38575 pressure_classes
[n
++] = ALTIVEC_REGS
;
38576 if (TARGET_HARD_FLOAT
&& TARGET_FPRS
)
38577 pressure_classes
[n
++] = FLOAT_REGS
;
38579 pressure_classes
[n
++] = CR_REGS
;
38580 pressure_classes
[n
++] = SPECIAL_REGS
;
38585 /* Given FROM and TO register numbers, say whether this elimination is allowed.
38586 Frame pointer elimination is automatically handled.
38588 For the RS/6000, if frame pointer elimination is being done, we would like
38589 to convert ap into fp, not sp.
38591 We need r30 if -mminimal-toc was specified, and there are constant pool
38595 rs6000_can_eliminate (const int from
, const int to
)
38597 return (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
38598 ? ! frame_pointer_needed
38599 : from
== RS6000_PIC_OFFSET_TABLE_REGNUM
38600 ? ! TARGET_MINIMAL_TOC
|| TARGET_NO_TOC
38601 || constant_pool_empty_p ()
38605 /* Define the offset between two registers, FROM to be eliminated and its
38606 replacement TO, at the start of a routine. */
38608 rs6000_initial_elimination_offset (int from
, int to
)
38610 rs6000_stack_t
*info
= rs6000_stack_info ();
38611 HOST_WIDE_INT offset
;
38613 if (from
== HARD_FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
38614 offset
= info
->push_p
? 0 : -info
->total_size
;
38615 else if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
38617 offset
= info
->push_p
? 0 : -info
->total_size
;
38618 if (FRAME_GROWS_DOWNWARD
)
38619 offset
+= info
->fixed_size
+ info
->vars_size
+ info
->parm_size
;
38621 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
38622 offset
= FRAME_GROWS_DOWNWARD
38623 ? info
->fixed_size
+ info
->vars_size
+ info
->parm_size
38625 else if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
38626 offset
= info
->total_size
;
38627 else if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
38628 offset
= info
->push_p
? info
->total_size
: 0;
38629 else if (from
== RS6000_PIC_OFFSET_TABLE_REGNUM
)
38632 gcc_unreachable ();
38638 rs6000_dwarf_register_span (rtx reg
)
38642 unsigned regno
= REGNO (reg
);
38643 machine_mode mode
= GET_MODE (reg
);
38647 && (SPE_VECTOR_MODE (GET_MODE (reg
))
38648 || (TARGET_E500_DOUBLE
&& FLOAT_MODE_P (mode
)
38649 && mode
!= SFmode
&& mode
!= SDmode
&& mode
!= SCmode
)))
38654 regno
= REGNO (reg
);
38656 /* The duality of the SPE register size wreaks all kinds of havoc.
38657 This is a way of distinguishing r0 in 32-bits from r0 in
38659 words
= (GET_MODE_SIZE (mode
) + UNITS_PER_FP_WORD
- 1) / UNITS_PER_FP_WORD
;
38660 gcc_assert (words
<= 4);
38661 for (i
= 0; i
< words
; i
++, regno
++)
38663 if (BYTES_BIG_ENDIAN
)
38665 parts
[2 * i
] = gen_rtx_REG (SImode
, regno
+ FIRST_SPE_HIGH_REGNO
);
38666 parts
[2 * i
+ 1] = gen_rtx_REG (SImode
, regno
);
38670 parts
[2 * i
] = gen_rtx_REG (SImode
, regno
);
38671 parts
[2 * i
+ 1] = gen_rtx_REG (SImode
, regno
+ FIRST_SPE_HIGH_REGNO
);
38675 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (words
* 2, parts
));
38678 /* Fill in sizes for SPE register high parts in table used by unwinder. */
38681 rs6000_init_dwarf_reg_sizes_extra (tree address
)
38686 machine_mode mode
= TYPE_MODE (char_type_node
);
38687 rtx addr
= expand_expr (address
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
38688 rtx mem
= gen_rtx_MEM (BLKmode
, addr
);
38689 rtx value
= gen_int_mode (4, mode
);
38691 for (i
= FIRST_SPE_HIGH_REGNO
; i
< LAST_SPE_HIGH_REGNO
+1; i
++)
38693 int column
= DWARF_REG_TO_UNWIND_COLUMN
38694 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i
), true));
38695 HOST_WIDE_INT offset
= column
* GET_MODE_SIZE (mode
);
38697 emit_move_insn (adjust_address (mem
, mode
, offset
), value
);
38701 if (TARGET_MACHO
&& ! TARGET_ALTIVEC
)
38704 machine_mode mode
= TYPE_MODE (char_type_node
);
38705 rtx addr
= expand_expr (address
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
38706 rtx mem
= gen_rtx_MEM (BLKmode
, addr
);
38707 rtx value
= gen_int_mode (16, mode
);
38709 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
38710 The unwinder still needs to know the size of Altivec registers. */
38712 for (i
= FIRST_ALTIVEC_REGNO
; i
< LAST_ALTIVEC_REGNO
+1; i
++)
38714 int column
= DWARF_REG_TO_UNWIND_COLUMN
38715 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i
), true));
38716 HOST_WIDE_INT offset
= column
* GET_MODE_SIZE (mode
);
38718 emit_move_insn (adjust_address (mem
, mode
, offset
), value
);
38723 /* Map internal gcc register numbers to debug format register numbers.
38724 FORMAT specifies the type of debug register number to use:
38725 0 -- debug information, except for frame-related sections
38726 1 -- DWARF .debug_frame section
38727 2 -- DWARF .eh_frame section */
38730 rs6000_dbx_register_number (unsigned int regno
, unsigned int format
)
38732 /* We never use the GCC internal number for SPE high registers.
38733 Those are mapped to the 1200..1231 range for all debug formats. */
38734 if (SPE_HIGH_REGNO_P (regno
))
38735 return regno
- FIRST_SPE_HIGH_REGNO
+ 1200;
38737 /* Except for the above, we use the internal number for non-DWARF
38738 debug information, and also for .eh_frame. */
38739 if ((format
== 0 && write_symbols
!= DWARF2_DEBUG
) || format
== 2)
38742 /* On some platforms, we use the standard DWARF register
38743 numbering for .debug_info and .debug_frame. */
38744 #ifdef RS6000_USE_DWARF_NUMBERING
38747 if (regno
== LR_REGNO
)
38749 if (regno
== CTR_REGNO
)
38751 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
38752 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
38753 The actual code emitted saves the whole of CR, so we map CR2_REGNO
38754 to the DWARF reg for CR. */
38755 if (format
== 1 && regno
== CR2_REGNO
)
38757 if (CR_REGNO_P (regno
))
38758 return regno
- CR0_REGNO
+ 86;
38759 if (regno
== CA_REGNO
)
38760 return 101; /* XER */
38761 if (ALTIVEC_REGNO_P (regno
))
38762 return regno
- FIRST_ALTIVEC_REGNO
+ 1124;
38763 if (regno
== VRSAVE_REGNO
)
38765 if (regno
== VSCR_REGNO
)
38767 if (regno
== SPE_ACC_REGNO
)
38769 if (regno
== SPEFSCR_REGNO
)
38775 /* target hook eh_return_filter_mode */
38776 static machine_mode
38777 rs6000_eh_return_filter_mode (void)
38779 return TARGET_32BIT
? SImode
: word_mode
;
38782 /* Target hook for scalar_mode_supported_p. */
38784 rs6000_scalar_mode_supported_p (machine_mode mode
)
38786 /* -m32 does not support TImode. This is the default, from
38787 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
38788 same ABI as for -m32. But default_scalar_mode_supported_p allows
38789 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
38790 for -mpowerpc64. */
38791 if (TARGET_32BIT
&& mode
== TImode
)
38794 if (DECIMAL_FLOAT_MODE_P (mode
))
38795 return default_decimal_float_supported_p ();
38796 else if (TARGET_FLOAT128_TYPE
&& (mode
== KFmode
|| mode
== IFmode
))
38799 return default_scalar_mode_supported_p (mode
);
38802 /* Target hook for vector_mode_supported_p. */
38804 rs6000_vector_mode_supported_p (machine_mode mode
)
38807 if (TARGET_PAIRED_FLOAT
&& PAIRED_VECTOR_MODE (mode
))
38810 if (TARGET_SPE
&& SPE_VECTOR_MODE (mode
))
38813 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
38814 128-bit, the compiler might try to widen IEEE 128-bit to IBM
38816 else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
) && !FLOAT128_IEEE_P (mode
))
38823 /* Target hook for floatn_mode. */
38824 static machine_mode
38825 rs6000_floatn_mode (int n
, bool extended
)
38835 if (TARGET_FLOAT128_KEYWORD
)
38836 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
38844 /* Those are the only valid _FloatNx types. */
38845 gcc_unreachable ();
38859 if (TARGET_FLOAT128_KEYWORD
)
38860 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
38871 /* Target hook for c_mode_for_suffix. */
38872 static machine_mode
38873 rs6000_c_mode_for_suffix (char suffix
)
38875 if (TARGET_FLOAT128_TYPE
)
38877 if (suffix
== 'q' || suffix
== 'Q')
38878 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
38880 /* At the moment, we are not defining a suffix for IBM extended double.
38881 If/when the default for -mabi=ieeelongdouble is changed, and we want
38882 to support __ibm128 constants in legacy library code, we may need to
38883 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
38884 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
38885 __float80 constants. */
38891 /* Target hook for invalid_arg_for_unprototyped_fn. */
38892 static const char *
38893 invalid_arg_for_unprototyped_fn (const_tree typelist
, const_tree funcdecl
, const_tree val
)
38895 return (!rs6000_darwin64_abi
38897 && TREE_CODE (TREE_TYPE (val
)) == VECTOR_TYPE
38898 && (funcdecl
== NULL_TREE
38899 || (TREE_CODE (funcdecl
) == FUNCTION_DECL
38900 && DECL_BUILT_IN_CLASS (funcdecl
) != BUILT_IN_MD
)))
38901 ? N_("AltiVec argument passed to unprototyped function")
38905 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
38906 setup by using __stack_chk_fail_local hidden function instead of
38907 calling __stack_chk_fail directly. Otherwise it is better to call
38908 __stack_chk_fail directly. */
38910 static tree ATTRIBUTE_UNUSED
38911 rs6000_stack_protect_fail (void)
38913 return (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
&& flag_pic
)
38914 ? default_hidden_stack_protect_fail ()
38915 : default_external_stack_protect_fail ();
38919 rs6000_final_prescan_insn (rtx_insn
*insn
, rtx
*operand ATTRIBUTE_UNUSED
,
38920 int num_operands ATTRIBUTE_UNUSED
)
38922 if (rs6000_warn_cell_microcode
)
38925 int insn_code_number
= recog_memoized (insn
);
38926 location_t location
= INSN_LOCATION (insn
);
38928 /* Punt on insns we cannot recognize. */
38929 if (insn_code_number
< 0)
38932 /* get_insn_template can modify recog_data, so save and restore it. */
38933 struct recog_data_d recog_data_save
= recog_data
;
38934 for (int i
= 0; i
< recog_data
.n_operands
; i
++)
38935 recog_data
.operand
[i
] = copy_rtx (recog_data
.operand
[i
]);
38936 temp
= get_insn_template (insn_code_number
, insn
);
38937 recog_data
= recog_data_save
;
38939 if (get_attr_cell_micro (insn
) == CELL_MICRO_ALWAYS
)
38940 warning_at (location
, OPT_mwarn_cell_microcode
,
38941 "emitting microcode insn %s\t[%s] #%d",
38942 temp
, insn_data
[INSN_CODE (insn
)].name
, INSN_UID (insn
));
38943 else if (get_attr_cell_micro (insn
) == CELL_MICRO_CONDITIONAL
)
38944 warning_at (location
, OPT_mwarn_cell_microcode
,
38945 "emitting conditional microcode insn %s\t[%s] #%d",
38946 temp
, insn_data
[INSN_CODE (insn
)].name
, INSN_UID (insn
));
38950 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
38953 static unsigned HOST_WIDE_INT
38954 rs6000_asan_shadow_offset (void)
38956 return (unsigned HOST_WIDE_INT
) 1 << (TARGET_64BIT
? 41 : 29);
38960 /* Mask options that we want to support inside of attribute((target)) and
38961 #pragma GCC target operations. Note, we do not include things like
38962 64/32-bit, endianness, hard/soft floating point, etc. that would have
38963 different calling sequences. */
38965 struct rs6000_opt_mask
{
38966 const char *name
; /* option name */
38967 HOST_WIDE_INT mask
; /* mask to set */
38968 bool invert
; /* invert sense of mask */
38969 bool valid_target
; /* option is a target option */
38972 static struct rs6000_opt_mask
const rs6000_opt_masks
[] =
38974 { "altivec", OPTION_MASK_ALTIVEC
, false, true },
38975 { "cmpb", OPTION_MASK_CMPB
, false, true },
38976 { "crypto", OPTION_MASK_CRYPTO
, false, true },
38977 { "direct-move", OPTION_MASK_DIRECT_MOVE
, false, true },
38978 { "dlmzb", OPTION_MASK_DLMZB
, false, true },
38979 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX
,
38981 { "float128", OPTION_MASK_FLOAT128_KEYWORD
, false, false },
38982 { "float128-type", OPTION_MASK_FLOAT128_TYPE
, false, false },
38983 { "float128-hardware", OPTION_MASK_FLOAT128_HW
, false, false },
38984 { "fprnd", OPTION_MASK_FPRND
, false, true },
38985 { "hard-dfp", OPTION_MASK_DFP
, false, true },
38986 { "htm", OPTION_MASK_HTM
, false, true },
38987 { "isel", OPTION_MASK_ISEL
, false, true },
38988 { "mfcrf", OPTION_MASK_MFCRF
, false, true },
38989 { "mfpgpr", OPTION_MASK_MFPGPR
, false, true },
38990 { "modulo", OPTION_MASK_MODULO
, false, true },
38991 { "mulhw", OPTION_MASK_MULHW
, false, true },
38992 { "multiple", OPTION_MASK_MULTIPLE
, false, true },
38993 { "popcntb", OPTION_MASK_POPCNTB
, false, true },
38994 { "popcntd", OPTION_MASK_POPCNTD
, false, true },
38995 { "power8-fusion", OPTION_MASK_P8_FUSION
, false, true },
38996 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN
, false, true },
38997 { "power8-vector", OPTION_MASK_P8_VECTOR
, false, true },
38998 { "power9-dform-scalar", OPTION_MASK_P9_DFORM_SCALAR
, false, true },
38999 { "power9-dform-vector", OPTION_MASK_P9_DFORM_VECTOR
, false, true },
39000 { "power9-fusion", OPTION_MASK_P9_FUSION
, false, true },
39001 { "power9-minmax", OPTION_MASK_P9_MINMAX
, false, true },
39002 { "power9-misc", OPTION_MASK_P9_MISC
, false, true },
39003 { "power9-vector", OPTION_MASK_P9_VECTOR
, false, true },
39004 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT
, false, true },
39005 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT
, false, true },
39006 { "quad-memory", OPTION_MASK_QUAD_MEMORY
, false, true },
39007 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC
, false, true },
39008 { "recip-precision", OPTION_MASK_RECIP_PRECISION
, false, true },
39009 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT
, false, true },
39010 { "string", OPTION_MASK_STRING
, false, true },
39011 { "toc-fusion", OPTION_MASK_TOC_FUSION
, false, true },
39012 { "update", OPTION_MASK_NO_UPDATE
, true , true },
39013 { "upper-regs-di", OPTION_MASK_UPPER_REGS_DI
, false, true },
39014 { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF
, false, true },
39015 { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF
, false, true },
39016 { "vsx", OPTION_MASK_VSX
, false, true },
39017 { "vsx-small-integer", OPTION_MASK_VSX_SMALL_INTEGER
, false, true },
39018 { "vsx-timode", OPTION_MASK_VSX_TIMODE
, false, true },
39019 #ifdef OPTION_MASK_64BIT
39021 { "aix64", OPTION_MASK_64BIT
, false, false },
39022 { "aix32", OPTION_MASK_64BIT
, true, false },
39024 { "64", OPTION_MASK_64BIT
, false, false },
39025 { "32", OPTION_MASK_64BIT
, true, false },
39028 #ifdef OPTION_MASK_EABI
39029 { "eabi", OPTION_MASK_EABI
, false, false },
39031 #ifdef OPTION_MASK_LITTLE_ENDIAN
39032 { "little", OPTION_MASK_LITTLE_ENDIAN
, false, false },
39033 { "big", OPTION_MASK_LITTLE_ENDIAN
, true, false },
39035 #ifdef OPTION_MASK_RELOCATABLE
39036 { "relocatable", OPTION_MASK_RELOCATABLE
, false, false },
39038 #ifdef OPTION_MASK_STRICT_ALIGN
39039 { "strict-align", OPTION_MASK_STRICT_ALIGN
, false, false },
39041 { "soft-float", OPTION_MASK_SOFT_FLOAT
, false, false },
39042 { "string", OPTION_MASK_STRING
, false, false },
39045 /* Builtin mask mapping for printing the flags. */
39046 static struct rs6000_opt_mask
const rs6000_builtin_mask_names
[] =
39048 { "altivec", RS6000_BTM_ALTIVEC
, false, false },
39049 { "vsx", RS6000_BTM_VSX
, false, false },
39050 { "spe", RS6000_BTM_SPE
, false, false },
39051 { "paired", RS6000_BTM_PAIRED
, false, false },
39052 { "fre", RS6000_BTM_FRE
, false, false },
39053 { "fres", RS6000_BTM_FRES
, false, false },
39054 { "frsqrte", RS6000_BTM_FRSQRTE
, false, false },
39055 { "frsqrtes", RS6000_BTM_FRSQRTES
, false, false },
39056 { "popcntd", RS6000_BTM_POPCNTD
, false, false },
39057 { "cell", RS6000_BTM_CELL
, false, false },
39058 { "power8-vector", RS6000_BTM_P8_VECTOR
, false, false },
39059 { "power9-vector", RS6000_BTM_P9_VECTOR
, false, false },
39060 { "power9-misc", RS6000_BTM_P9_MISC
, false, false },
39061 { "crypto", RS6000_BTM_CRYPTO
, false, false },
39062 { "htm", RS6000_BTM_HTM
, false, false },
39063 { "hard-dfp", RS6000_BTM_DFP
, false, false },
39064 { "hard-float", RS6000_BTM_HARD_FLOAT
, false, false },
39065 { "long-double-128", RS6000_BTM_LDBL128
, false, false },
39066 { "float128", RS6000_BTM_FLOAT128
, false, false },
39069 /* Option variables that we want to support inside attribute((target)) and
39070 #pragma GCC target operations. */
39072 struct rs6000_opt_var
{
39073 const char *name
; /* option name */
39074 size_t global_offset
; /* offset of the option in global_options. */
39075 size_t target_offset
; /* offset of the option in target options. */
39078 static struct rs6000_opt_var
const rs6000_opt_vars
[] =
39081 offsetof (struct gcc_options
, x_TARGET_FRIZ
),
39082 offsetof (struct cl_target_option
, x_TARGET_FRIZ
), },
39083 { "avoid-indexed-addresses",
39084 offsetof (struct gcc_options
, x_TARGET_AVOID_XFORM
),
39085 offsetof (struct cl_target_option
, x_TARGET_AVOID_XFORM
) },
39087 offsetof (struct gcc_options
, x_rs6000_paired_float
),
39088 offsetof (struct cl_target_option
, x_rs6000_paired_float
), },
39090 offsetof (struct gcc_options
, x_rs6000_default_long_calls
),
39091 offsetof (struct cl_target_option
, x_rs6000_default_long_calls
), },
39092 { "optimize-swaps",
39093 offsetof (struct gcc_options
, x_rs6000_optimize_swaps
),
39094 offsetof (struct cl_target_option
, x_rs6000_optimize_swaps
), },
39095 { "allow-movmisalign",
39096 offsetof (struct gcc_options
, x_TARGET_ALLOW_MOVMISALIGN
),
39097 offsetof (struct cl_target_option
, x_TARGET_ALLOW_MOVMISALIGN
), },
39098 { "allow-df-permute",
39099 offsetof (struct gcc_options
, x_TARGET_ALLOW_DF_PERMUTE
),
39100 offsetof (struct cl_target_option
, x_TARGET_ALLOW_DF_PERMUTE
), },
39102 offsetof (struct gcc_options
, x_TARGET_SCHED_GROUPS
),
39103 offsetof (struct cl_target_option
, x_TARGET_SCHED_GROUPS
), },
39105 offsetof (struct gcc_options
, x_TARGET_ALWAYS_HINT
),
39106 offsetof (struct cl_target_option
, x_TARGET_ALWAYS_HINT
), },
39107 { "align-branch-targets",
39108 offsetof (struct gcc_options
, x_TARGET_ALIGN_BRANCH_TARGETS
),
39109 offsetof (struct cl_target_option
, x_TARGET_ALIGN_BRANCH_TARGETS
), },
39110 { "vectorize-builtins",
39111 offsetof (struct gcc_options
, x_TARGET_VECTORIZE_BUILTINS
),
39112 offsetof (struct cl_target_option
, x_TARGET_VECTORIZE_BUILTINS
), },
39114 offsetof (struct gcc_options
, x_tls_markers
),
39115 offsetof (struct cl_target_option
, x_tls_markers
), },
39117 offsetof (struct gcc_options
, x_TARGET_SCHED_PROLOG
),
39118 offsetof (struct cl_target_option
, x_TARGET_SCHED_PROLOG
), },
39120 offsetof (struct gcc_options
, x_TARGET_SCHED_PROLOG
),
39121 offsetof (struct cl_target_option
, x_TARGET_SCHED_PROLOG
), },
39122 { "gen-cell-microcode",
39123 offsetof (struct gcc_options
, x_rs6000_gen_cell_microcode
),
39124 offsetof (struct cl_target_option
, x_rs6000_gen_cell_microcode
), },
39125 { "warn-cell-microcode",
39126 offsetof (struct gcc_options
, x_rs6000_warn_cell_microcode
),
39127 offsetof (struct cl_target_option
, x_rs6000_warn_cell_microcode
), },
39130 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
39131 parsing. Return true if there were no errors. */
39134 rs6000_inner_target_options (tree args
, bool attr_p
)
39138 if (args
== NULL_TREE
)
39141 else if (TREE_CODE (args
) == STRING_CST
)
39143 char *p
= ASTRDUP (TREE_STRING_POINTER (args
));
39146 while ((q
= strtok (p
, ",")) != NULL
)
39148 bool error_p
= false;
39149 bool not_valid_p
= false;
39150 const char *cpu_opt
= NULL
;
39153 if (strncmp (q
, "cpu=", 4) == 0)
39155 int cpu_index
= rs6000_cpu_name_lookup (q
+4);
39156 if (cpu_index
>= 0)
39157 rs6000_cpu_index
= cpu_index
;
39164 else if (strncmp (q
, "tune=", 5) == 0)
39166 int tune_index
= rs6000_cpu_name_lookup (q
+5);
39167 if (tune_index
>= 0)
39168 rs6000_tune_index
= tune_index
;
39178 bool invert
= false;
39182 if (strncmp (r
, "no-", 3) == 0)
39188 for (i
= 0; i
< ARRAY_SIZE (rs6000_opt_masks
); i
++)
39189 if (strcmp (r
, rs6000_opt_masks
[i
].name
) == 0)
39191 HOST_WIDE_INT mask
= rs6000_opt_masks
[i
].mask
;
39193 if (!rs6000_opt_masks
[i
].valid_target
)
39194 not_valid_p
= true;
39198 rs6000_isa_flags_explicit
|= mask
;
39200 /* VSX needs altivec, so -mvsx automagically sets
39201 altivec and disables -mavoid-indexed-addresses. */
39204 if (mask
== OPTION_MASK_VSX
)
39206 mask
|= OPTION_MASK_ALTIVEC
;
39207 TARGET_AVOID_XFORM
= 0;
39211 if (rs6000_opt_masks
[i
].invert
)
39215 rs6000_isa_flags
&= ~mask
;
39217 rs6000_isa_flags
|= mask
;
39222 if (error_p
&& !not_valid_p
)
39224 for (i
= 0; i
< ARRAY_SIZE (rs6000_opt_vars
); i
++)
39225 if (strcmp (r
, rs6000_opt_vars
[i
].name
) == 0)
39227 size_t j
= rs6000_opt_vars
[i
].global_offset
;
39228 *((int *) ((char *)&global_options
+ j
)) = !invert
;
39230 not_valid_p
= false;
39238 const char *eprefix
, *esuffix
;
39243 eprefix
= "__attribute__((__target__(";
39248 eprefix
= "#pragma GCC target ";
39253 error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt
, eprefix
,
39255 else if (not_valid_p
)
39256 error ("%s\"%s\"%s is not allowed", eprefix
, q
, esuffix
);
39258 error ("%s\"%s\"%s is invalid", eprefix
, q
, esuffix
);
39263 else if (TREE_CODE (args
) == TREE_LIST
)
39267 tree value
= TREE_VALUE (args
);
39270 bool ret2
= rs6000_inner_target_options (value
, attr_p
);
39274 args
= TREE_CHAIN (args
);
39276 while (args
!= NULL_TREE
);
39281 error ("attribute %<target%> argument not a string");
39288 /* Print out the target options as a list for -mdebug=target. */
39291 rs6000_debug_target_options (tree args
, const char *prefix
)
39293 if (args
== NULL_TREE
)
39294 fprintf (stderr
, "%s<NULL>", prefix
);
39296 else if (TREE_CODE (args
) == STRING_CST
)
39298 char *p
= ASTRDUP (TREE_STRING_POINTER (args
));
39301 while ((q
= strtok (p
, ",")) != NULL
)
39304 fprintf (stderr
, "%s\"%s\"", prefix
, q
);
39309 else if (TREE_CODE (args
) == TREE_LIST
)
39313 tree value
= TREE_VALUE (args
);
39316 rs6000_debug_target_options (value
, prefix
);
39319 args
= TREE_CHAIN (args
);
39321 while (args
!= NULL_TREE
);
39325 gcc_unreachable ();
39331 /* Hook to validate attribute((target("..."))). */
39334 rs6000_valid_attribute_p (tree fndecl
,
39335 tree
ARG_UNUSED (name
),
39339 struct cl_target_option cur_target
;
39341 tree old_optimize
= build_optimization_node (&global_options
);
39342 tree new_target
, new_optimize
;
39343 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
39345 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
39347 if (TARGET_DEBUG_TARGET
)
39349 tree tname
= DECL_NAME (fndecl
);
39350 fprintf (stderr
, "\n==================== rs6000_valid_attribute_p:\n");
39352 fprintf (stderr
, "function: %.*s\n",
39353 (int) IDENTIFIER_LENGTH (tname
),
39354 IDENTIFIER_POINTER (tname
));
39356 fprintf (stderr
, "function: unknown\n");
39358 fprintf (stderr
, "args:");
39359 rs6000_debug_target_options (args
, " ");
39360 fprintf (stderr
, "\n");
39363 fprintf (stderr
, "flags: 0x%x\n", flags
);
39365 fprintf (stderr
, "--------------------\n");
39368 old_optimize
= build_optimization_node (&global_options
);
39369 func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
39371 /* If the function changed the optimization levels as well as setting target
39372 options, start with the optimizations specified. */
39373 if (func_optimize
&& func_optimize
!= old_optimize
)
39374 cl_optimization_restore (&global_options
,
39375 TREE_OPTIMIZATION (func_optimize
));
39377 /* The target attributes may also change some optimization flags, so update
39378 the optimization options if necessary. */
39379 cl_target_option_save (&cur_target
, &global_options
);
39380 rs6000_cpu_index
= rs6000_tune_index
= -1;
39381 ret
= rs6000_inner_target_options (args
, true);
39383 /* Set up any additional state. */
39386 ret
= rs6000_option_override_internal (false);
39387 new_target
= build_target_option_node (&global_options
);
39392 new_optimize
= build_optimization_node (&global_options
);
39399 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
39401 if (old_optimize
!= new_optimize
)
39402 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
39405 cl_target_option_restore (&global_options
, &cur_target
);
39407 if (old_optimize
!= new_optimize
)
39408 cl_optimization_restore (&global_options
,
39409 TREE_OPTIMIZATION (old_optimize
));
39415 /* Hook to validate the current #pragma GCC target and set the state, and
39416 update the macros based on what was changed. If ARGS is NULL, then
39417 POP_TARGET is used to reset the options. */
39420 rs6000_pragma_target_parse (tree args
, tree pop_target
)
39422 tree prev_tree
= build_target_option_node (&global_options
);
39424 struct cl_target_option
*prev_opt
, *cur_opt
;
39425 HOST_WIDE_INT prev_flags
, cur_flags
, diff_flags
;
39426 HOST_WIDE_INT prev_bumask
, cur_bumask
, diff_bumask
;
39428 if (TARGET_DEBUG_TARGET
)
39430 fprintf (stderr
, "\n==================== rs6000_pragma_target_parse\n");
39431 fprintf (stderr
, "args:");
39432 rs6000_debug_target_options (args
, " ");
39433 fprintf (stderr
, "\n");
39437 fprintf (stderr
, "pop_target:\n");
39438 debug_tree (pop_target
);
39441 fprintf (stderr
, "pop_target: <NULL>\n");
39443 fprintf (stderr
, "--------------------\n");
39448 cur_tree
= ((pop_target
)
39450 : target_option_default_node
);
39451 cl_target_option_restore (&global_options
,
39452 TREE_TARGET_OPTION (cur_tree
));
39456 rs6000_cpu_index
= rs6000_tune_index
= -1;
39457 if (!rs6000_inner_target_options (args
, false)
39458 || !rs6000_option_override_internal (false)
39459 || (cur_tree
= build_target_option_node (&global_options
))
39462 if (TARGET_DEBUG_BUILTIN
|| TARGET_DEBUG_TARGET
)
39463 fprintf (stderr
, "invalid pragma\n");
39469 target_option_current_node
= cur_tree
;
39471 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
39472 change the macros that are defined. */
39473 if (rs6000_target_modify_macros_ptr
)
39475 prev_opt
= TREE_TARGET_OPTION (prev_tree
);
39476 prev_bumask
= prev_opt
->x_rs6000_builtin_mask
;
39477 prev_flags
= prev_opt
->x_rs6000_isa_flags
;
39479 cur_opt
= TREE_TARGET_OPTION (cur_tree
);
39480 cur_flags
= cur_opt
->x_rs6000_isa_flags
;
39481 cur_bumask
= cur_opt
->x_rs6000_builtin_mask
;
39483 diff_bumask
= (prev_bumask
^ cur_bumask
);
39484 diff_flags
= (prev_flags
^ cur_flags
);
39486 if ((diff_flags
!= 0) || (diff_bumask
!= 0))
39488 /* Delete old macros. */
39489 rs6000_target_modify_macros_ptr (false,
39490 prev_flags
& diff_flags
,
39491 prev_bumask
& diff_bumask
);
39493 /* Define new macros. */
39494 rs6000_target_modify_macros_ptr (true,
39495 cur_flags
& diff_flags
,
39496 cur_bumask
& diff_bumask
);
39504 /* Remember the last target of rs6000_set_current_function. */
39505 static GTY(()) tree rs6000_previous_fndecl
;
39507 /* Establish appropriate back-end context for processing the function
39508 FNDECL. The argument might be NULL to indicate processing at top
39509 level, outside of any function scope. */
39511 rs6000_set_current_function (tree fndecl
)
39513 tree old_tree
= (rs6000_previous_fndecl
39514 ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl
)
39517 tree new_tree
= (fndecl
39518 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl
)
39521 if (TARGET_DEBUG_TARGET
)
39523 bool print_final
= false;
39524 fprintf (stderr
, "\n==================== rs6000_set_current_function");
39527 fprintf (stderr
, ", fndecl %s (%p)",
39528 (DECL_NAME (fndecl
)
39529 ? IDENTIFIER_POINTER (DECL_NAME (fndecl
))
39530 : "<unknown>"), (void *)fndecl
);
39532 if (rs6000_previous_fndecl
)
39533 fprintf (stderr
, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl
);
39535 fprintf (stderr
, "\n");
39538 fprintf (stderr
, "\nnew fndecl target specific options:\n");
39539 debug_tree (new_tree
);
39540 print_final
= true;
39545 fprintf (stderr
, "\nold fndecl target specific options:\n");
39546 debug_tree (old_tree
);
39547 print_final
= true;
39551 fprintf (stderr
, "--------------------\n");
39554 /* Only change the context if the function changes. This hook is called
39555 several times in the course of compiling a function, and we don't want to
39556 slow things down too much or call target_reinit when it isn't safe. */
39557 if (fndecl
&& fndecl
!= rs6000_previous_fndecl
)
39559 rs6000_previous_fndecl
= fndecl
;
39560 if (old_tree
== new_tree
)
39563 else if (new_tree
&& new_tree
!= target_option_default_node
)
39565 cl_target_option_restore (&global_options
,
39566 TREE_TARGET_OPTION (new_tree
));
39567 if (TREE_TARGET_GLOBALS (new_tree
))
39568 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
39570 TREE_TARGET_GLOBALS (new_tree
)
39571 = save_target_globals_default_opts ();
39574 else if (old_tree
&& old_tree
!= target_option_default_node
)
39576 new_tree
= target_option_current_node
;
39577 cl_target_option_restore (&global_options
,
39578 TREE_TARGET_OPTION (new_tree
));
39579 if (TREE_TARGET_GLOBALS (new_tree
))
39580 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
39581 else if (new_tree
== target_option_default_node
)
39582 restore_target_globals (&default_target_globals
);
39584 TREE_TARGET_GLOBALS (new_tree
)
39585 = save_target_globals_default_opts ();
39591 /* Save the current options */
39594 rs6000_function_specific_save (struct cl_target_option
*ptr
,
39595 struct gcc_options
*opts
)
39597 ptr
->x_rs6000_isa_flags
= opts
->x_rs6000_isa_flags
;
39598 ptr
->x_rs6000_isa_flags_explicit
= opts
->x_rs6000_isa_flags_explicit
;
39601 /* Restore the current options */
39604 rs6000_function_specific_restore (struct gcc_options
*opts
,
39605 struct cl_target_option
*ptr
)
39608 opts
->x_rs6000_isa_flags
= ptr
->x_rs6000_isa_flags
;
39609 opts
->x_rs6000_isa_flags_explicit
= ptr
->x_rs6000_isa_flags_explicit
;
39610 (void) rs6000_option_override_internal (false);
39613 /* Print the current options */
39616 rs6000_function_specific_print (FILE *file
, int indent
,
39617 struct cl_target_option
*ptr
)
39619 rs6000_print_isa_options (file
, indent
, "Isa options set",
39620 ptr
->x_rs6000_isa_flags
);
39622 rs6000_print_isa_options (file
, indent
, "Isa options explicit",
39623 ptr
->x_rs6000_isa_flags_explicit
);
39626 /* Helper function to print the current isa or misc options on a line. */
39629 rs6000_print_options_internal (FILE *file
,
39631 const char *string
,
39632 HOST_WIDE_INT flags
,
39633 const char *prefix
,
39634 const struct rs6000_opt_mask
*opts
,
39635 size_t num_elements
)
39638 size_t start_column
= 0;
39640 size_t max_column
= 120;
39641 size_t prefix_len
= strlen (prefix
);
39642 size_t comma_len
= 0;
39643 const char *comma
= "";
39646 start_column
+= fprintf (file
, "%*s", indent
, "");
39650 fprintf (stderr
, DEBUG_FMT_S
, string
, "<none>");
39654 start_column
+= fprintf (stderr
, DEBUG_FMT_WX
, string
, flags
);
39656 /* Print the various mask options. */
39657 cur_column
= start_column
;
39658 for (i
= 0; i
< num_elements
; i
++)
39660 bool invert
= opts
[i
].invert
;
39661 const char *name
= opts
[i
].name
;
39662 const char *no_str
= "";
39663 HOST_WIDE_INT mask
= opts
[i
].mask
;
39664 size_t len
= comma_len
+ prefix_len
+ strlen (name
);
39668 if ((flags
& mask
) == 0)
39671 len
+= sizeof ("no-") - 1;
39679 if ((flags
& mask
) != 0)
39682 len
+= sizeof ("no-") - 1;
39689 if (cur_column
> max_column
)
39691 fprintf (stderr
, ", \\\n%*s", (int)start_column
, "");
39692 cur_column
= start_column
+ len
;
39696 fprintf (file
, "%s%s%s%s", comma
, prefix
, no_str
, name
);
39698 comma_len
= sizeof (", ") - 1;
39701 fputs ("\n", file
);
39704 /* Helper function to print the current isa options on a line. */
39707 rs6000_print_isa_options (FILE *file
, int indent
, const char *string
,
39708 HOST_WIDE_INT flags
)
39710 rs6000_print_options_internal (file
, indent
, string
, flags
, "-m",
39711 &rs6000_opt_masks
[0],
39712 ARRAY_SIZE (rs6000_opt_masks
));
39716 rs6000_print_builtin_options (FILE *file
, int indent
, const char *string
,
39717 HOST_WIDE_INT flags
)
39719 rs6000_print_options_internal (file
, indent
, string
, flags
, "",
39720 &rs6000_builtin_mask_names
[0],
39721 ARRAY_SIZE (rs6000_builtin_mask_names
));
39725 /* Hook to determine if one function can safely inline another. */
39728 rs6000_can_inline_p (tree caller
, tree callee
)
39731 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
39732 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
39734 /* If callee has no option attributes, then it is ok to inline. */
39738 /* If caller has no option attributes, but callee does then it is not ok to
39740 else if (!caller_tree
)
39745 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
39746 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
39748 /* Callee's options should a subset of the caller's, i.e. a vsx function
39749 can inline an altivec function but a non-vsx function can't inline a
39751 if ((caller_opts
->x_rs6000_isa_flags
& callee_opts
->x_rs6000_isa_flags
)
39752 == callee_opts
->x_rs6000_isa_flags
)
39756 if (TARGET_DEBUG_TARGET
)
39757 fprintf (stderr
, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
39758 (DECL_NAME (caller
)
39759 ? IDENTIFIER_POINTER (DECL_NAME (caller
))
39761 (DECL_NAME (callee
)
39762 ? IDENTIFIER_POINTER (DECL_NAME (callee
))
39764 (ret
? "can" : "cannot"));
39769 /* Allocate a stack temp and fixup the address so it meets the particular
39770 memory requirements (either offetable or REG+REG addressing). */
39773 rs6000_allocate_stack_temp (machine_mode mode
,
39774 bool offsettable_p
,
39777 rtx stack
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
39778 rtx addr
= XEXP (stack
, 0);
39779 int strict_p
= (reload_in_progress
|| reload_completed
);
39781 if (!legitimate_indirect_address_p (addr
, strict_p
))
39784 && !rs6000_legitimate_offset_address_p (mode
, addr
, strict_p
, true))
39785 stack
= replace_equiv_address (stack
, copy_addr_to_reg (addr
));
39787 else if (reg_reg_p
&& !legitimate_indexed_address_p (addr
, strict_p
))
39788 stack
= replace_equiv_address (stack
, copy_addr_to_reg (addr
));
39794 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
39795 to such a form to deal with memory reference instructions like STFIWX that
39796 only take reg+reg addressing. */
39799 rs6000_address_for_fpconvert (rtx x
)
39801 int strict_p
= (reload_in_progress
|| reload_completed
);
39804 gcc_assert (MEM_P (x
));
39805 addr
= XEXP (x
, 0);
39806 if (! legitimate_indirect_address_p (addr
, strict_p
)
39807 && ! legitimate_indexed_address_p (addr
, strict_p
))
39809 if (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
39811 rtx reg
= XEXP (addr
, 0);
39812 HOST_WIDE_INT size
= GET_MODE_SIZE (GET_MODE (x
));
39813 rtx size_rtx
= GEN_INT ((GET_CODE (addr
) == PRE_DEC
) ? -size
: size
);
39814 gcc_assert (REG_P (reg
));
39815 emit_insn (gen_add3_insn (reg
, reg
, size_rtx
));
39818 else if (GET_CODE (addr
) == PRE_MODIFY
)
39820 rtx reg
= XEXP (addr
, 0);
39821 rtx expr
= XEXP (addr
, 1);
39822 gcc_assert (REG_P (reg
));
39823 gcc_assert (GET_CODE (expr
) == PLUS
);
39824 emit_insn (gen_add3_insn (reg
, XEXP (expr
, 0), XEXP (expr
, 1)));
39828 x
= replace_equiv_address (x
, copy_addr_to_reg (addr
));
39834 /* Given a memory reference, if it is not in the form for altivec memory
39835 reference instructions (i.e. reg or reg+reg addressing with AND of -16),
39836 convert to the altivec format. */
39839 rs6000_address_for_altivec (rtx x
)
39841 gcc_assert (MEM_P (x
));
39842 if (!altivec_indexed_or_indirect_operand (x
, GET_MODE (x
)))
39844 rtx addr
= XEXP (x
, 0);
39845 int strict_p
= (reload_in_progress
|| reload_completed
);
39847 if (!legitimate_indexed_address_p (addr
, strict_p
)
39848 && !legitimate_indirect_address_p (addr
, strict_p
))
39849 addr
= copy_to_mode_reg (Pmode
, addr
);
39851 addr
= gen_rtx_AND (Pmode
, addr
, GEN_INT (-16));
39852 x
= change_address (x
, GET_MODE (x
), addr
);
39858 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
39860 On the RS/6000, all integer constants are acceptable, most won't be valid
39861 for particular insns, though. Only easy FP constants are acceptable. */
39864 rs6000_legitimate_constant_p (machine_mode mode
, rtx x
)
39866 if (TARGET_ELF
&& tls_referenced_p (x
))
39869 return ((GET_CODE (x
) != CONST_DOUBLE
&& GET_CODE (x
) != CONST_VECTOR
)
39870 || GET_MODE (x
) == VOIDmode
39871 || (TARGET_POWERPC64
&& mode
== DImode
)
39872 || easy_fp_constant (x
, mode
)
39873 || easy_vector_constant (x
, mode
));
39877 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
39880 chain_already_loaded (rtx_insn
*last
)
39882 for (; last
!= NULL
; last
= PREV_INSN (last
))
39884 if (NONJUMP_INSN_P (last
))
39886 rtx patt
= PATTERN (last
);
39888 if (GET_CODE (patt
) == SET
)
39890 rtx lhs
= XEXP (patt
, 0);
39892 if (REG_P (lhs
) && REGNO (lhs
) == STATIC_CHAIN_REGNUM
)
39900 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
39903 rs6000_call_aix (rtx value
, rtx func_desc
, rtx flag
, rtx cookie
)
39905 const bool direct_call_p
39906 = GET_CODE (func_desc
) == SYMBOL_REF
&& SYMBOL_REF_FUNCTION_P (func_desc
);
39907 rtx toc_reg
= gen_rtx_REG (Pmode
, TOC_REGNUM
);
39908 rtx toc_load
= NULL_RTX
;
39909 rtx toc_restore
= NULL_RTX
;
39911 rtx abi_reg
= NULL_RTX
;
39916 /* Handle longcall attributes. */
39917 if (INTVAL (cookie
) & CALL_LONG
)
39918 func_desc
= rs6000_longcall_ref (func_desc
);
39920 /* Handle indirect calls. */
39921 if (GET_CODE (func_desc
) != SYMBOL_REF
39922 || (DEFAULT_ABI
== ABI_AIX
&& !SYMBOL_REF_FUNCTION_P (func_desc
)))
39924 /* Save the TOC into its reserved slot before the call,
39925 and prepare to restore it after the call. */
39926 rtx stack_ptr
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
39927 rtx stack_toc_offset
= GEN_INT (RS6000_TOC_SAVE_SLOT
);
39928 rtx stack_toc_mem
= gen_frame_mem (Pmode
,
39929 gen_rtx_PLUS (Pmode
, stack_ptr
,
39930 stack_toc_offset
));
39931 rtx stack_toc_unspec
= gen_rtx_UNSPEC (Pmode
,
39932 gen_rtvec (1, stack_toc_offset
),
39934 toc_restore
= gen_rtx_SET (toc_reg
, stack_toc_unspec
);
39936 /* Can we optimize saving the TOC in the prologue or
39937 do we need to do it at every call? */
39938 if (TARGET_SAVE_TOC_INDIRECT
&& !cfun
->calls_alloca
)
39939 cfun
->machine
->save_toc_in_prologue
= true;
39942 MEM_VOLATILE_P (stack_toc_mem
) = 1;
39943 emit_move_insn (stack_toc_mem
, toc_reg
);
39946 if (DEFAULT_ABI
== ABI_ELFv2
)
39948 /* A function pointer in the ELFv2 ABI is just a plain address, but
39949 the ABI requires it to be loaded into r12 before the call. */
39950 func_addr
= gen_rtx_REG (Pmode
, 12);
39951 emit_move_insn (func_addr
, func_desc
);
39952 abi_reg
= func_addr
;
39956 /* A function pointer under AIX is a pointer to a data area whose
39957 first word contains the actual address of the function, whose
39958 second word contains a pointer to its TOC, and whose third word
39959 contains a value to place in the static chain register (r11).
39960 Note that if we load the static chain, our "trampoline" need
39961 not have any executable code. */
39963 /* Load up address of the actual function. */
39964 func_desc
= force_reg (Pmode
, func_desc
);
39965 func_addr
= gen_reg_rtx (Pmode
);
39966 emit_move_insn (func_addr
, gen_rtx_MEM (Pmode
, func_desc
));
39968 /* Prepare to load the TOC of the called function. Note that the
39969 TOC load must happen immediately before the actual call so
39970 that unwinding the TOC registers works correctly. See the
39971 comment in frob_update_context. */
39972 rtx func_toc_offset
= GEN_INT (GET_MODE_SIZE (Pmode
));
39973 rtx func_toc_mem
= gen_rtx_MEM (Pmode
,
39974 gen_rtx_PLUS (Pmode
, func_desc
,
39976 toc_load
= gen_rtx_USE (VOIDmode
, func_toc_mem
);
39978 /* If we have a static chain, load it up. But, if the call was
39979 originally direct, the 3rd word has not been written since no
39980 trampoline has been built, so we ought not to load it, lest we
39981 override a static chain value. */
39983 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
39984 && !chain_already_loaded (get_current_sequence ()->next
->last
))
39986 rtx sc_reg
= gen_rtx_REG (Pmode
, STATIC_CHAIN_REGNUM
);
39987 rtx func_sc_offset
= GEN_INT (2 * GET_MODE_SIZE (Pmode
));
39988 rtx func_sc_mem
= gen_rtx_MEM (Pmode
,
39989 gen_rtx_PLUS (Pmode
, func_desc
,
39991 emit_move_insn (sc_reg
, func_sc_mem
);
39998 /* Direct calls use the TOC: for local calls, the callee will
39999 assume the TOC register is set; for non-local calls, the
40000 PLT stub needs the TOC register. */
40002 func_addr
= func_desc
;
40005 /* Create the call. */
40006 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), flag
);
40007 if (value
!= NULL_RTX
)
40008 call
[0] = gen_rtx_SET (value
, call
[0]);
40012 call
[n_call
++] = toc_load
;
40014 call
[n_call
++] = toc_restore
;
40016 call
[n_call
++] = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (Pmode
, LR_REGNO
));
40018 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (n_call
, call
));
40019 insn
= emit_call_insn (insn
);
40021 /* Mention all registers defined by the ABI to hold information
40022 as uses in CALL_INSN_FUNCTION_USAGE. */
40024 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), abi_reg
);
40027 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
40030 rs6000_sibcall_aix (rtx value
, rtx func_desc
, rtx flag
, rtx cookie
)
40035 gcc_assert (INTVAL (cookie
) == 0);
40037 /* Create the call. */
40038 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_desc
), flag
);
40039 if (value
!= NULL_RTX
)
40040 call
[0] = gen_rtx_SET (value
, call
[0]);
40042 call
[1] = simple_return_rtx
;
40044 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (2, call
));
40045 insn
= emit_call_insn (insn
);
40047 /* Note use of the TOC register. */
40048 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), gen_rtx_REG (Pmode
, TOC_REGNUM
));
40051 /* Return whether we need to always update the saved TOC pointer when we update
40052 the stack pointer. */
40055 rs6000_save_toc_in_prologue_p (void)
40057 return (cfun
&& cfun
->machine
&& cfun
->machine
->save_toc_in_prologue
);
40060 #ifdef HAVE_GAS_HIDDEN
40061 # define USE_HIDDEN_LINKONCE 1
40063 # define USE_HIDDEN_LINKONCE 0
40066 /* Fills in the label name that should be used for a 476 link stack thunk. */
40069 get_ppc476_thunk_name (char name
[32])
40071 gcc_assert (TARGET_LINK_STACK
);
40073 if (USE_HIDDEN_LINKONCE
)
40074 sprintf (name
, "__ppc476.get_thunk");
40076 ASM_GENERATE_INTERNAL_LABEL (name
, "LPPC476_", 0);
40079 /* This function emits the simple thunk routine that is used to preserve
40080 the link stack on the 476 cpu. */
40082 static void rs6000_code_end (void) ATTRIBUTE_UNUSED
;
40084 rs6000_code_end (void)
40089 if (!TARGET_LINK_STACK
)
40092 get_ppc476_thunk_name (name
);
40094 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
, get_identifier (name
),
40095 build_function_type_list (void_type_node
, NULL_TREE
));
40096 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
40097 NULL_TREE
, void_type_node
);
40098 TREE_PUBLIC (decl
) = 1;
40099 TREE_STATIC (decl
) = 1;
40102 if (USE_HIDDEN_LINKONCE
&& !TARGET_XCOFF
)
40104 cgraph_node::create (decl
)->set_comdat_group (DECL_ASSEMBLER_NAME (decl
));
40105 targetm
.asm_out
.unique_section (decl
, 0);
40106 switch_to_section (get_named_section (decl
, NULL
, 0));
40107 DECL_WEAK (decl
) = 1;
40108 ASM_WEAKEN_DECL (asm_out_file
, decl
, name
, 0);
40109 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
40110 targetm
.asm_out
.assemble_visibility (decl
, VISIBILITY_HIDDEN
);
40111 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
40116 switch_to_section (text_section
);
40117 ASM_OUTPUT_LABEL (asm_out_file
, name
);
40120 DECL_INITIAL (decl
) = make_node (BLOCK
);
40121 current_function_decl
= decl
;
40122 allocate_struct_function (decl
, false);
40123 init_function_start (decl
);
40124 first_function_block_is_cold
= false;
40125 /* Make sure unwind info is emitted for the thunk if needed. */
40126 final_start_function (emit_barrier (), asm_out_file
, 1);
40128 fputs ("\tblr\n", asm_out_file
);
40130 final_end_function ();
40131 init_insn_lengths ();
40132 free_after_compilation (cfun
);
40134 current_function_decl
= NULL
;
40137 /* Add r30 to hard reg set if the prologue sets it up and it is not
40138 pic_offset_table_rtx. */
40141 rs6000_set_up_by_prologue (struct hard_reg_set_container
*set
)
40143 if (!TARGET_SINGLE_PIC_BASE
40145 && TARGET_MINIMAL_TOC
40146 && !constant_pool_empty_p ())
40147 add_to_hard_reg_set (&set
->set
, Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
40148 if (cfun
->machine
->split_stack_argp_used
)
40149 add_to_hard_reg_set (&set
->set
, Pmode
, 12);
40153 /* Helper function for rs6000_split_logical to emit a logical instruction after
40154 spliting the operation to single GPR registers.
40156 DEST is the destination register.
40157 OP1 and OP2 are the input source registers.
40158 CODE is the base operation (AND, IOR, XOR, NOT).
40159 MODE is the machine mode.
40160 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
40161 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
40162 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
40165 rs6000_split_logical_inner (rtx dest
,
40168 enum rtx_code code
,
40170 bool complement_final_p
,
40171 bool complement_op1_p
,
40172 bool complement_op2_p
)
40176 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
40177 if (op2
&& GET_CODE (op2
) == CONST_INT
40178 && (mode
== SImode
|| (mode
== DImode
&& TARGET_POWERPC64
))
40179 && !complement_final_p
&& !complement_op1_p
&& !complement_op2_p
)
40181 HOST_WIDE_INT mask
= GET_MODE_MASK (mode
);
40182 HOST_WIDE_INT value
= INTVAL (op2
) & mask
;
40184 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
40189 emit_insn (gen_rtx_SET (dest
, const0_rtx
));
40193 else if (value
== mask
)
40195 if (!rtx_equal_p (dest
, op1
))
40196 emit_insn (gen_rtx_SET (dest
, op1
));
40201 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
40202 into separate ORI/ORIS or XORI/XORIS instrucitons. */
40203 else if (code
== IOR
|| code
== XOR
)
40207 if (!rtx_equal_p (dest
, op1
))
40208 emit_insn (gen_rtx_SET (dest
, op1
));
40214 if (code
== AND
&& mode
== SImode
40215 && !complement_final_p
&& !complement_op1_p
&& !complement_op2_p
)
40217 emit_insn (gen_andsi3 (dest
, op1
, op2
));
40221 if (complement_op1_p
)
40222 op1
= gen_rtx_NOT (mode
, op1
);
40224 if (complement_op2_p
)
40225 op2
= gen_rtx_NOT (mode
, op2
);
40227 /* For canonical RTL, if only one arm is inverted it is the first. */
40228 if (!complement_op1_p
&& complement_op2_p
)
40229 std::swap (op1
, op2
);
40231 bool_rtx
= ((code
== NOT
)
40232 ? gen_rtx_NOT (mode
, op1
)
40233 : gen_rtx_fmt_ee (code
, mode
, op1
, op2
));
40235 if (complement_final_p
)
40236 bool_rtx
= gen_rtx_NOT (mode
, bool_rtx
);
40238 emit_insn (gen_rtx_SET (dest
, bool_rtx
));
40241 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
40242 operations are split immediately during RTL generation to allow for more
40243 optimizations of the AND/IOR/XOR.
40245 OPERANDS is an array containing the destination and two input operands.
40246 CODE is the base operation (AND, IOR, XOR, NOT).
40247 MODE is the machine mode.
40248 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
40249 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
40250 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
40251 CLOBBER_REG is either NULL or a scratch register of type CC to allow
40252 formation of the AND instructions. */
40255 rs6000_split_logical_di (rtx operands
[3],
40256 enum rtx_code code
,
40257 bool complement_final_p
,
40258 bool complement_op1_p
,
40259 bool complement_op2_p
)
40261 const HOST_WIDE_INT lower_32bits
= HOST_WIDE_INT_C(0xffffffff);
40262 const HOST_WIDE_INT upper_32bits
= ~ lower_32bits
;
40263 const HOST_WIDE_INT sign_bit
= HOST_WIDE_INT_C(0x80000000);
40264 enum hi_lo
{ hi
= 0, lo
= 1 };
40265 rtx op0_hi_lo
[2], op1_hi_lo
[2], op2_hi_lo
[2];
40268 op0_hi_lo
[hi
] = gen_highpart (SImode
, operands
[0]);
40269 op1_hi_lo
[hi
] = gen_highpart (SImode
, operands
[1]);
40270 op0_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[0]);
40271 op1_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[1]);
40274 op2_hi_lo
[hi
] = op2_hi_lo
[lo
] = NULL_RTX
;
40277 if (GET_CODE (operands
[2]) != CONST_INT
)
40279 op2_hi_lo
[hi
] = gen_highpart_mode (SImode
, DImode
, operands
[2]);
40280 op2_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[2]);
40284 HOST_WIDE_INT value
= INTVAL (operands
[2]);
40285 HOST_WIDE_INT value_hi_lo
[2];
40287 gcc_assert (!complement_final_p
);
40288 gcc_assert (!complement_op1_p
);
40289 gcc_assert (!complement_op2_p
);
40291 value_hi_lo
[hi
] = value
>> 32;
40292 value_hi_lo
[lo
] = value
& lower_32bits
;
40294 for (i
= 0; i
< 2; i
++)
40296 HOST_WIDE_INT sub_value
= value_hi_lo
[i
];
40298 if (sub_value
& sign_bit
)
40299 sub_value
|= upper_32bits
;
40301 op2_hi_lo
[i
] = GEN_INT (sub_value
);
40303 /* If this is an AND instruction, check to see if we need to load
40304 the value in a register. */
40305 if (code
== AND
&& sub_value
!= -1 && sub_value
!= 0
40306 && !and_operand (op2_hi_lo
[i
], SImode
))
40307 op2_hi_lo
[i
] = force_reg (SImode
, op2_hi_lo
[i
]);
40312 for (i
= 0; i
< 2; i
++)
40314 /* Split large IOR/XOR operations. */
40315 if ((code
== IOR
|| code
== XOR
)
40316 && GET_CODE (op2_hi_lo
[i
]) == CONST_INT
40317 && !complement_final_p
40318 && !complement_op1_p
40319 && !complement_op2_p
40320 && !logical_const_operand (op2_hi_lo
[i
], SImode
))
40322 HOST_WIDE_INT value
= INTVAL (op2_hi_lo
[i
]);
40323 HOST_WIDE_INT hi_16bits
= value
& HOST_WIDE_INT_C(0xffff0000);
40324 HOST_WIDE_INT lo_16bits
= value
& HOST_WIDE_INT_C(0x0000ffff);
40325 rtx tmp
= gen_reg_rtx (SImode
);
40327 /* Make sure the constant is sign extended. */
40328 if ((hi_16bits
& sign_bit
) != 0)
40329 hi_16bits
|= upper_32bits
;
40331 rs6000_split_logical_inner (tmp
, op1_hi_lo
[i
], GEN_INT (hi_16bits
),
40332 code
, SImode
, false, false, false);
40334 rs6000_split_logical_inner (op0_hi_lo
[i
], tmp
, GEN_INT (lo_16bits
),
40335 code
, SImode
, false, false, false);
40338 rs6000_split_logical_inner (op0_hi_lo
[i
], op1_hi_lo
[i
], op2_hi_lo
[i
],
40339 code
, SImode
, complement_final_p
,
40340 complement_op1_p
, complement_op2_p
);
40346 /* Split the insns that make up boolean operations operating on multiple GPR
40347 registers. The boolean MD patterns ensure that the inputs either are
40348 exactly the same as the output registers, or there is no overlap.
40350 OPERANDS is an array containing the destination and two input operands.
40351 CODE is the base operation (AND, IOR, XOR, NOT).
40352 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
40353 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
40354 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
40357 rs6000_split_logical (rtx operands
[3],
40358 enum rtx_code code
,
40359 bool complement_final_p
,
40360 bool complement_op1_p
,
40361 bool complement_op2_p
)
40363 machine_mode mode
= GET_MODE (operands
[0]);
40364 machine_mode sub_mode
;
40366 int sub_size
, regno0
, regno1
, nregs
, i
;
40368 /* If this is DImode, use the specialized version that can run before
40369 register allocation. */
40370 if (mode
== DImode
&& !TARGET_POWERPC64
)
40372 rs6000_split_logical_di (operands
, code
, complement_final_p
,
40373 complement_op1_p
, complement_op2_p
);
40379 op2
= (code
== NOT
) ? NULL_RTX
: operands
[2];
40380 sub_mode
= (TARGET_POWERPC64
) ? DImode
: SImode
;
40381 sub_size
= GET_MODE_SIZE (sub_mode
);
40382 regno0
= REGNO (op0
);
40383 regno1
= REGNO (op1
);
40385 gcc_assert (reload_completed
);
40386 gcc_assert (IN_RANGE (regno0
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
40387 gcc_assert (IN_RANGE (regno1
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
40389 nregs
= rs6000_hard_regno_nregs
[(int)mode
][regno0
];
40390 gcc_assert (nregs
> 1);
40392 if (op2
&& REG_P (op2
))
40393 gcc_assert (IN_RANGE (REGNO (op2
), FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
40395 for (i
= 0; i
< nregs
; i
++)
40397 int offset
= i
* sub_size
;
40398 rtx sub_op0
= simplify_subreg (sub_mode
, op0
, mode
, offset
);
40399 rtx sub_op1
= simplify_subreg (sub_mode
, op1
, mode
, offset
);
40400 rtx sub_op2
= ((code
== NOT
)
40402 : simplify_subreg (sub_mode
, op2
, mode
, offset
));
40404 rs6000_split_logical_inner (sub_op0
, sub_op1
, sub_op2
, code
, sub_mode
,
40405 complement_final_p
, complement_op1_p
,
40413 /* Return true if the peephole2 can combine a load involving a combination of
40414 an addis instruction and a load with an offset that can be fused together on
40418 fusion_gpr_load_p (rtx addis_reg
, /* register set via addis. */
40419 rtx addis_value
, /* addis value. */
40420 rtx target
, /* target register that is loaded. */
40421 rtx mem
) /* bottom part of the memory addr. */
40426 /* Validate arguments. */
40427 if (!base_reg_operand (addis_reg
, GET_MODE (addis_reg
)))
40430 if (!base_reg_operand (target
, GET_MODE (target
)))
40433 if (!fusion_gpr_addis (addis_value
, GET_MODE (addis_value
)))
40436 /* Allow sign/zero extension. */
40437 if (GET_CODE (mem
) == ZERO_EXTEND
40438 || (GET_CODE (mem
) == SIGN_EXTEND
&& TARGET_P8_FUSION_SIGN
))
40439 mem
= XEXP (mem
, 0);
40444 if (!fusion_gpr_mem_load (mem
, GET_MODE (mem
)))
40447 addr
= XEXP (mem
, 0); /* either PLUS or LO_SUM. */
40448 if (GET_CODE (addr
) != PLUS
&& GET_CODE (addr
) != LO_SUM
)
40451 /* Validate that the register used to load the high value is either the
40452 register being loaded, or we can safely replace its use.
40454 This function is only called from the peephole2 pass and we assume that
40455 there are 2 instructions in the peephole (addis and load), so we want to
40456 check if the target register was not used in the memory address and the
40457 register to hold the addis result is dead after the peephole. */
40458 if (REGNO (addis_reg
) != REGNO (target
))
40460 if (reg_mentioned_p (target
, mem
))
40463 if (!peep2_reg_dead_p (2, addis_reg
))
40466 /* If the target register being loaded is the stack pointer, we must
40467 avoid loading any other value into it, even temporarily. */
40468 if (REG_P (target
) && REGNO (target
) == STACK_POINTER_REGNUM
)
40472 base_reg
= XEXP (addr
, 0);
40473 return REGNO (addis_reg
) == REGNO (base_reg
);
40476 /* During the peephole2 pass, adjust and expand the insns for a load fusion
40477 sequence. We adjust the addis register to use the target register. If the
40478 load sign extends, we adjust the code to do the zero extending load, and an
40479 explicit sign extension later since the fusion only covers zero extending
40483 operands[0] register set with addis (to be replaced with target)
40484 operands[1] value set via addis
40485 operands[2] target register being loaded
40486 operands[3] D-form memory reference using operands[0]. */
40489 expand_fusion_gpr_load (rtx
*operands
)
40491 rtx addis_value
= operands
[1];
40492 rtx target
= operands
[2];
40493 rtx orig_mem
= operands
[3];
40494 rtx new_addr
, new_mem
, orig_addr
, offset
;
40495 enum rtx_code plus_or_lo_sum
;
40496 machine_mode target_mode
= GET_MODE (target
);
40497 machine_mode extend_mode
= target_mode
;
40498 machine_mode ptr_mode
= Pmode
;
40499 enum rtx_code extend
= UNKNOWN
;
40501 if (GET_CODE (orig_mem
) == ZERO_EXTEND
40502 || (TARGET_P8_FUSION_SIGN
&& GET_CODE (orig_mem
) == SIGN_EXTEND
))
40504 extend
= GET_CODE (orig_mem
);
40505 orig_mem
= XEXP (orig_mem
, 0);
40506 target_mode
= GET_MODE (orig_mem
);
40509 gcc_assert (MEM_P (orig_mem
));
40511 orig_addr
= XEXP (orig_mem
, 0);
40512 plus_or_lo_sum
= GET_CODE (orig_addr
);
40513 gcc_assert (plus_or_lo_sum
== PLUS
|| plus_or_lo_sum
== LO_SUM
);
40515 offset
= XEXP (orig_addr
, 1);
40516 new_addr
= gen_rtx_fmt_ee (plus_or_lo_sum
, ptr_mode
, addis_value
, offset
);
40517 new_mem
= replace_equiv_address_nv (orig_mem
, new_addr
, false);
40519 if (extend
!= UNKNOWN
)
40520 new_mem
= gen_rtx_fmt_e (ZERO_EXTEND
, extend_mode
, new_mem
);
40522 new_mem
= gen_rtx_UNSPEC (extend_mode
, gen_rtvec (1, new_mem
),
40523 UNSPEC_FUSION_GPR
);
40524 emit_insn (gen_rtx_SET (target
, new_mem
));
40526 if (extend
== SIGN_EXTEND
)
40528 int sub_off
= ((BYTES_BIG_ENDIAN
)
40529 ? GET_MODE_SIZE (extend_mode
) - GET_MODE_SIZE (target_mode
)
40532 = simplify_subreg (target_mode
, target
, extend_mode
, sub_off
);
40534 emit_insn (gen_rtx_SET (target
,
40535 gen_rtx_SIGN_EXTEND (extend_mode
, sign_reg
)));
40541 /* Emit the addis instruction that will be part of a fused instruction
40545 emit_fusion_addis (rtx target
, rtx addis_value
, const char *comment
,
40546 const char *mode_name
)
40549 char insn_template
[80];
40550 const char *addis_str
= NULL
;
40551 const char *comment_str
= ASM_COMMENT_START
;
40553 if (*comment_str
== ' ')
40556 /* Emit the addis instruction. */
40557 fuse_ops
[0] = target
;
40558 if (satisfies_constraint_L (addis_value
))
40560 fuse_ops
[1] = addis_value
;
40561 addis_str
= "lis %0,%v1";
40564 else if (GET_CODE (addis_value
) == PLUS
)
40566 rtx op0
= XEXP (addis_value
, 0);
40567 rtx op1
= XEXP (addis_value
, 1);
40569 if (REG_P (op0
) && CONST_INT_P (op1
)
40570 && satisfies_constraint_L (op1
))
40574 addis_str
= "addis %0,%1,%v2";
40578 else if (GET_CODE (addis_value
) == HIGH
)
40580 rtx value
= XEXP (addis_value
, 0);
40581 if (GET_CODE (value
) == UNSPEC
&& XINT (value
, 1) == UNSPEC_TOCREL
)
40583 fuse_ops
[1] = XVECEXP (value
, 0, 0); /* symbol ref. */
40584 fuse_ops
[2] = XVECEXP (value
, 0, 1); /* TOC register. */
40586 addis_str
= "addis %0,%2,%1@toc@ha";
40588 else if (TARGET_XCOFF
)
40589 addis_str
= "addis %0,%1@u(%2)";
40592 gcc_unreachable ();
40595 else if (GET_CODE (value
) == PLUS
)
40597 rtx op0
= XEXP (value
, 0);
40598 rtx op1
= XEXP (value
, 1);
40600 if (GET_CODE (op0
) == UNSPEC
40601 && XINT (op0
, 1) == UNSPEC_TOCREL
40602 && CONST_INT_P (op1
))
40604 fuse_ops
[1] = XVECEXP (op0
, 0, 0); /* symbol ref. */
40605 fuse_ops
[2] = XVECEXP (op0
, 0, 1); /* TOC register. */
40608 addis_str
= "addis %0,%2,%1+%3@toc@ha";
40610 else if (TARGET_XCOFF
)
40611 addis_str
= "addis %0,%1+%3@u(%2)";
40614 gcc_unreachable ();
40618 else if (satisfies_constraint_L (value
))
40620 fuse_ops
[1] = value
;
40621 addis_str
= "lis %0,%v1";
40624 else if (TARGET_ELF
&& !TARGET_POWERPC64
&& CONSTANT_P (value
))
40626 fuse_ops
[1] = value
;
40627 addis_str
= "lis %0,%1@ha";
40632 fatal_insn ("Could not generate addis value for fusion", addis_value
);
40634 sprintf (insn_template
, "%s\t\t%s %s, type %s", addis_str
, comment_str
,
40635 comment
, mode_name
);
40636 output_asm_insn (insn_template
, fuse_ops
);
40639 /* Emit a D-form load or store instruction that is the second instruction
40640 of a fusion sequence. */
40643 emit_fusion_load_store (rtx load_store_reg
, rtx addis_reg
, rtx offset
,
40644 const char *insn_str
)
40647 char insn_template
[80];
40649 fuse_ops
[0] = load_store_reg
;
40650 fuse_ops
[1] = addis_reg
;
40652 if (CONST_INT_P (offset
) && satisfies_constraint_I (offset
))
40654 sprintf (insn_template
, "%s %%0,%%2(%%1)", insn_str
);
40655 fuse_ops
[2] = offset
;
40656 output_asm_insn (insn_template
, fuse_ops
);
40659 else if (GET_CODE (offset
) == UNSPEC
40660 && XINT (offset
, 1) == UNSPEC_TOCREL
)
40663 sprintf (insn_template
, "%s %%0,%%2@toc@l(%%1)", insn_str
);
40665 else if (TARGET_XCOFF
)
40666 sprintf (insn_template
, "%s %%0,%%2@l(%%1)", insn_str
);
40669 gcc_unreachable ();
40671 fuse_ops
[2] = XVECEXP (offset
, 0, 0);
40672 output_asm_insn (insn_template
, fuse_ops
);
40675 else if (GET_CODE (offset
) == PLUS
40676 && GET_CODE (XEXP (offset
, 0)) == UNSPEC
40677 && XINT (XEXP (offset
, 0), 1) == UNSPEC_TOCREL
40678 && CONST_INT_P (XEXP (offset
, 1)))
40680 rtx tocrel_unspec
= XEXP (offset
, 0);
40682 sprintf (insn_template
, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str
);
40684 else if (TARGET_XCOFF
)
40685 sprintf (insn_template
, "%s %%0,%%2+%%3@l(%%1)", insn_str
);
40688 gcc_unreachable ();
40690 fuse_ops
[2] = XVECEXP (tocrel_unspec
, 0, 0);
40691 fuse_ops
[3] = XEXP (offset
, 1);
40692 output_asm_insn (insn_template
, fuse_ops
);
40695 else if (TARGET_ELF
&& !TARGET_POWERPC64
&& CONSTANT_P (offset
))
40697 sprintf (insn_template
, "%s %%0,%%2@l(%%1)", insn_str
);
40699 fuse_ops
[2] = offset
;
40700 output_asm_insn (insn_template
, fuse_ops
);
40704 fatal_insn ("Unable to generate load/store offset for fusion", offset
);
40709 /* Wrap a TOC address that can be fused to indicate that special fusion
40710 processing is needed. */
40713 fusion_wrap_memory_address (rtx old_mem
)
40715 rtx old_addr
= XEXP (old_mem
, 0);
40716 rtvec v
= gen_rtvec (1, old_addr
);
40717 rtx new_addr
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_FUSION_ADDIS
);
40718 return replace_equiv_address_nv (old_mem
, new_addr
, false);
40721 /* Given an address, convert it into the addis and load offset parts. Addresses
40722 created during the peephole2 process look like:
40723 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
40724 (unspec [(...)] UNSPEC_TOCREL))
40726 Addresses created via toc fusion look like:
40727 (unspec [(unspec [(...)] UNSPEC_TOCREL)] UNSPEC_FUSION_ADDIS)) */
40730 fusion_split_address (rtx addr
, rtx
*p_hi
, rtx
*p_lo
)
40734 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_FUSION_ADDIS
)
40736 lo
= XVECEXP (addr
, 0, 0);
40737 hi
= gen_rtx_HIGH (Pmode
, lo
);
40739 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == LO_SUM
)
40741 hi
= XEXP (addr
, 0);
40742 lo
= XEXP (addr
, 1);
40745 gcc_unreachable ();
40751 /* Return a string to fuse an addis instruction with a gpr load to the same
40752 register that we loaded up the addis instruction. The address that is used
40753 is the logical address that was formed during peephole2:
40754 (lo_sum (high) (low-part))
40756 Or the address is the TOC address that is wrapped before register allocation:
40757 (unspec [(addr) (toc-reg)] UNSPEC_FUSION_ADDIS)
40759 The code is complicated, so we call output_asm_insn directly, and just
40763 emit_fusion_gpr_load (rtx target
, rtx mem
)
40768 const char *load_str
= NULL
;
40769 const char *mode_name
= NULL
;
40772 if (GET_CODE (mem
) == ZERO_EXTEND
)
40773 mem
= XEXP (mem
, 0);
40775 gcc_assert (REG_P (target
) && MEM_P (mem
));
40777 addr
= XEXP (mem
, 0);
40778 fusion_split_address (addr
, &addis_value
, &load_offset
);
40780 /* Now emit the load instruction to the same register. */
40781 mode
= GET_MODE (mem
);
40785 mode_name
= "char";
40790 mode_name
= "short";
40796 mode_name
= (mode
== SFmode
) ? "float" : "int";
40802 gcc_assert (TARGET_POWERPC64
);
40803 mode_name
= (mode
== DFmode
) ? "double" : "long";
40808 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target
, mem
));
40811 /* Emit the addis instruction. */
40812 emit_fusion_addis (target
, addis_value
, "gpr load fusion", mode_name
);
40814 /* Emit the D-form load instruction. */
40815 emit_fusion_load_store (target
, target
, load_offset
, load_str
);
40821 /* Return true if the peephole2 can combine a load/store involving a
40822 combination of an addis instruction and the memory operation. This was
40823 added to the ISA 3.0 (power9) hardware. */
40826 fusion_p9_p (rtx addis_reg
, /* register set via addis. */
40827 rtx addis_value
, /* addis value. */
40828 rtx dest
, /* destination (memory or register). */
40829 rtx src
) /* source (register or memory). */
40831 rtx addr
, mem
, offset
;
40832 enum machine_mode mode
= GET_MODE (src
);
40834 /* Validate arguments. */
40835 if (!base_reg_operand (addis_reg
, GET_MODE (addis_reg
)))
40838 if (!fusion_gpr_addis (addis_value
, GET_MODE (addis_value
)))
40841 /* Ignore extend operations that are part of the load. */
40842 if (GET_CODE (src
) == FLOAT_EXTEND
|| GET_CODE (src
) == ZERO_EXTEND
)
40843 src
= XEXP (src
, 0);
40845 /* Test for memory<-register or register<-memory. */
40846 if (fpr_reg_operand (src
, mode
) || int_reg_operand (src
, mode
))
40854 else if (MEM_P (src
))
40856 if (!fpr_reg_operand (dest
, mode
) && !int_reg_operand (dest
, mode
))
40865 addr
= XEXP (mem
, 0); /* either PLUS or LO_SUM. */
40866 if (GET_CODE (addr
) == PLUS
)
40868 if (!rtx_equal_p (addis_reg
, XEXP (addr
, 0)))
40871 return satisfies_constraint_I (XEXP (addr
, 1));
40874 else if (GET_CODE (addr
) == LO_SUM
)
40876 if (!rtx_equal_p (addis_reg
, XEXP (addr
, 0)))
40879 offset
= XEXP (addr
, 1);
40880 if (TARGET_XCOFF
|| (TARGET_ELF
&& TARGET_POWERPC64
))
40881 return small_toc_ref (offset
, GET_MODE (offset
));
40883 else if (TARGET_ELF
&& !TARGET_POWERPC64
)
40884 return CONSTANT_P (offset
);
40890 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
40894 operands[0] register set with addis
40895 operands[1] value set via addis
40896 operands[2] target register being loaded
40897 operands[3] D-form memory reference using operands[0].
40899 This is similar to the fusion introduced with power8, except it scales to
40900 both loads/stores and does not require the result register to be the same as
40901 the base register. At the moment, we only do this if register set with addis
40905 expand_fusion_p9_load (rtx
*operands
)
40907 rtx tmp_reg
= operands
[0];
40908 rtx addis_value
= operands
[1];
40909 rtx target
= operands
[2];
40910 rtx orig_mem
= operands
[3];
40911 rtx new_addr
, new_mem
, orig_addr
, offset
, set
, clobber
, insn
;
40912 enum rtx_code plus_or_lo_sum
;
40913 machine_mode target_mode
= GET_MODE (target
);
40914 machine_mode extend_mode
= target_mode
;
40915 machine_mode ptr_mode
= Pmode
;
40916 enum rtx_code extend
= UNKNOWN
;
40918 if (GET_CODE (orig_mem
) == FLOAT_EXTEND
|| GET_CODE (orig_mem
) == ZERO_EXTEND
)
40920 extend
= GET_CODE (orig_mem
);
40921 orig_mem
= XEXP (orig_mem
, 0);
40922 target_mode
= GET_MODE (orig_mem
);
40925 gcc_assert (MEM_P (orig_mem
));
40927 orig_addr
= XEXP (orig_mem
, 0);
40928 plus_or_lo_sum
= GET_CODE (orig_addr
);
40929 gcc_assert (plus_or_lo_sum
== PLUS
|| plus_or_lo_sum
== LO_SUM
);
40931 offset
= XEXP (orig_addr
, 1);
40932 new_addr
= gen_rtx_fmt_ee (plus_or_lo_sum
, ptr_mode
, addis_value
, offset
);
40933 new_mem
= replace_equiv_address_nv (orig_mem
, new_addr
, false);
40935 if (extend
!= UNKNOWN
)
40936 new_mem
= gen_rtx_fmt_e (extend
, extend_mode
, new_mem
);
40938 new_mem
= gen_rtx_UNSPEC (extend_mode
, gen_rtvec (1, new_mem
),
40941 set
= gen_rtx_SET (target
, new_mem
);
40942 clobber
= gen_rtx_CLOBBER (VOIDmode
, tmp_reg
);
40943 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
));
40949 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
40953 operands[0] register set with addis
40954 operands[1] value set via addis
40955 operands[2] target D-form memory being stored to
40956 operands[3] register being stored
40958 This is similar to the fusion introduced with power8, except it scales to
40959 both loads/stores and does not require the result register to be the same as
40960 the base register. At the moment, we only do this if register set with addis
40964 expand_fusion_p9_store (rtx
*operands
)
40966 rtx tmp_reg
= operands
[0];
40967 rtx addis_value
= operands
[1];
40968 rtx orig_mem
= operands
[2];
40969 rtx src
= operands
[3];
40970 rtx new_addr
, new_mem
, orig_addr
, offset
, set
, clobber
, insn
, new_src
;
40971 enum rtx_code plus_or_lo_sum
;
40972 machine_mode target_mode
= GET_MODE (orig_mem
);
40973 machine_mode ptr_mode
= Pmode
;
40975 gcc_assert (MEM_P (orig_mem
));
40977 orig_addr
= XEXP (orig_mem
, 0);
40978 plus_or_lo_sum
= GET_CODE (orig_addr
);
40979 gcc_assert (plus_or_lo_sum
== PLUS
|| plus_or_lo_sum
== LO_SUM
);
40981 offset
= XEXP (orig_addr
, 1);
40982 new_addr
= gen_rtx_fmt_ee (plus_or_lo_sum
, ptr_mode
, addis_value
, offset
);
40983 new_mem
= replace_equiv_address_nv (orig_mem
, new_addr
, false);
40985 new_src
= gen_rtx_UNSPEC (target_mode
, gen_rtvec (1, src
),
40988 set
= gen_rtx_SET (new_mem
, new_src
);
40989 clobber
= gen_rtx_CLOBBER (VOIDmode
, tmp_reg
);
40990 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
));
40996 /* Return a string to fuse an addis instruction with a load using extended
40997 fusion. The address that is used is the logical address that was formed
40998 during peephole2: (lo_sum (high) (low-part))
41000 The code is complicated, so we call output_asm_insn directly, and just
41004 emit_fusion_p9_load (rtx reg
, rtx mem
, rtx tmp_reg
)
41006 enum machine_mode mode
= GET_MODE (reg
);
41010 const char *load_string
;
41013 if (GET_CODE (mem
) == FLOAT_EXTEND
|| GET_CODE (mem
) == ZERO_EXTEND
)
41015 mem
= XEXP (mem
, 0);
41016 mode
= GET_MODE (mem
);
41019 if (GET_CODE (reg
) == SUBREG
)
41021 gcc_assert (SUBREG_BYTE (reg
) == 0);
41022 reg
= SUBREG_REG (reg
);
41026 fatal_insn ("emit_fusion_p9_load, bad reg #1", reg
);
41029 if (FP_REGNO_P (r
))
41031 if (mode
== SFmode
)
41032 load_string
= "lfs";
41033 else if (mode
== DFmode
|| mode
== DImode
)
41034 load_string
= "lfd";
41036 gcc_unreachable ();
41038 else if (ALTIVEC_REGNO_P (r
) && TARGET_P9_DFORM_SCALAR
)
41040 if (mode
== SFmode
)
41041 load_string
= "lxssp";
41042 else if (mode
== DFmode
|| mode
== DImode
)
41043 load_string
= "lxsd";
41045 gcc_unreachable ();
41047 else if (INT_REGNO_P (r
))
41052 load_string
= "lbz";
41055 load_string
= "lhz";
41059 load_string
= "lwz";
41063 if (!TARGET_POWERPC64
)
41064 gcc_unreachable ();
41065 load_string
= "ld";
41068 gcc_unreachable ();
41072 fatal_insn ("emit_fusion_p9_load, bad reg #2", reg
);
41075 fatal_insn ("emit_fusion_p9_load not MEM", mem
);
41077 addr
= XEXP (mem
, 0);
41078 fusion_split_address (addr
, &hi
, &lo
);
41080 /* Emit the addis instruction. */
41081 emit_fusion_addis (tmp_reg
, hi
, "power9 load fusion", GET_MODE_NAME (mode
));
41083 /* Emit the D-form load instruction. */
41084 emit_fusion_load_store (reg
, tmp_reg
, lo
, load_string
);
41089 /* Return a string to fuse an addis instruction with a store using extended
41090 fusion. The address that is used is the logical address that was formed
41091 during peephole2: (lo_sum (high) (low-part))
41093 The code is complicated, so we call output_asm_insn directly, and just
41097 emit_fusion_p9_store (rtx mem
, rtx reg
, rtx tmp_reg
)
41099 enum machine_mode mode
= GET_MODE (reg
);
41103 const char *store_string
;
41106 if (GET_CODE (reg
) == SUBREG
)
41108 gcc_assert (SUBREG_BYTE (reg
) == 0);
41109 reg
= SUBREG_REG (reg
);
41113 fatal_insn ("emit_fusion_p9_store, bad reg #1", reg
);
41116 if (FP_REGNO_P (r
))
41118 if (mode
== SFmode
)
41119 store_string
= "stfs";
41120 else if (mode
== DFmode
)
41121 store_string
= "stfd";
41123 gcc_unreachable ();
41125 else if (ALTIVEC_REGNO_P (r
) && TARGET_P9_DFORM_SCALAR
)
41127 if (mode
== SFmode
)
41128 store_string
= "stxssp";
41129 else if (mode
== DFmode
|| mode
== DImode
)
41130 store_string
= "stxsd";
41132 gcc_unreachable ();
41134 else if (INT_REGNO_P (r
))
41139 store_string
= "stb";
41142 store_string
= "sth";
41146 store_string
= "stw";
41150 if (!TARGET_POWERPC64
)
41151 gcc_unreachable ();
41152 store_string
= "std";
41155 gcc_unreachable ();
41159 fatal_insn ("emit_fusion_p9_store, bad reg #2", reg
);
41162 fatal_insn ("emit_fusion_p9_store not MEM", mem
);
41164 addr
= XEXP (mem
, 0);
41165 fusion_split_address (addr
, &hi
, &lo
);
41167 /* Emit the addis instruction. */
41168 emit_fusion_addis (tmp_reg
, hi
, "power9 store fusion", GET_MODE_NAME (mode
));
41170 /* Emit the D-form load instruction. */
41171 emit_fusion_load_store (reg
, tmp_reg
, lo
, store_string
);
41177 /* Analyze vector computations and remove unnecessary doubleword
41178 swaps (xxswapdi instructions). This pass is performed only
41179 for little-endian VSX code generation.
41181 For this specific case, loads and stores of 4x32 and 2x64 vectors
41182 are inefficient. These are implemented using the lvx2dx and
41183 stvx2dx instructions, which invert the order of doublewords in
41184 a vector register. Thus the code generation inserts an xxswapdi
41185 after each such load, and prior to each such store. (For spill
41186 code after register assignment, an additional xxswapdi is inserted
41187 following each store in order to return a hard register to its
41190 The extra xxswapdi instructions reduce performance. This can be
41191 particularly bad for vectorized code. The purpose of this pass
41192 is to reduce the number of xxswapdi instructions required for
41195 The primary insight is that much code that operates on vectors
41196 does not care about the relative order of elements in a register,
41197 so long as the correct memory order is preserved. If we have
41198 a computation where all input values are provided by lvxd2x/xxswapdi
41199 sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
41200 and all intermediate computations are pure SIMD (independent of
41201 element order), then all the xxswapdi's associated with the loads
41202 and stores may be removed.
41204 This pass uses some of the infrastructure and logical ideas from
41205 the "web" pass in web.c. We create maximal webs of computations
41206 fitting the description above using union-find. Each such web is
41207 then optimized by removing its unnecessary xxswapdi instructions.
41209 The pass is placed prior to global optimization so that we can
41210 perform the optimization in the safest and simplest way possible;
41211 that is, by replacing each xxswapdi insn with a register copy insn.
41212 Subsequent forward propagation will remove copies where possible.
41214 There are some operations sensitive to element order for which we
41215 can still allow the operation, provided we modify those operations.
41216 These include CONST_VECTORs, for which we must swap the first and
41217 second halves of the constant vector; and SUBREGs, for which we
41218 must adjust the byte offset to account for the swapped doublewords.
41219 A remaining opportunity would be non-immediate-form splats, for
41220 which we should adjust the selected lane of the input. We should
41221 also make code generation adjustments for sum-across operations,
41222 since this is a common vectorizer reduction.
41224 Because we run prior to the first split, we can see loads and stores
41225 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
41226 vector loads and stores that have not yet been split into a permuting
41227 load/store and a swap. (One way this can happen is with a builtin
41228 call to vec_vsx_{ld,st}.) We can handle these as well, but rather
41229 than deleting a swap, we convert the load/store into a permuting
41230 load/store (which effectively removes the swap). */
41232 /* Notes on Permutes
41234 We do not currently handle computations that contain permutes. There
41235 is a general transformation that can be performed correctly, but it
41236 may introduce more expensive code than it replaces. To handle these
41237 would require a cost model to determine when to perform the optimization.
41238 This commentary records how this could be done if desired.
41240 The most general permute is something like this (example for V16QI):
41242 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
41243 (parallel [(const_int a0) (const_int a1)
41245 (const_int a14) (const_int a15)]))
41247 where a0,...,a15 are in [0,31] and select elements from op1 and op2
41248 to produce in the result.
41250 Regardless of mode, we can convert the PARALLEL to a mask of 16
41251 byte-element selectors. Let's call this M, with M[i] representing
41252 the ith byte-element selector value. Then if we swap doublewords
41253 throughout the computation, we can get correct behavior by replacing
41254 M with M' as follows:
41256 M'[i] = { (M[i]+8)%16 : M[i] in [0,15]
41257 { ((M[i]+8)%16)+16 : M[i] in [16,31]
41259 This seems promising at first, since we are just replacing one mask
41260 with another. But certain masks are preferable to others. If M
41261 is a mask that matches a vmrghh pattern, for example, M' certainly
41262 will not. Instead of a single vmrghh, we would generate a load of
41263 M' and a vperm. So we would need to know how many xxswapd's we can
41264 remove as a result of this transformation to determine if it's
41265 profitable; and preferably the logic would need to be aware of all
41266 the special preferable masks.
41268 Another form of permute is an UNSPEC_VPERM, in which the mask is
41269 already in a register. In some cases, this mask may be a constant
41270 that we can discover with ud-chains, in which case the above
41271 transformation is ok. However, the common usage here is for the
41272 mask to be produced by an UNSPEC_LVSL, in which case the mask
41273 cannot be known at compile time. In such a case we would have to
41274 generate several instructions to compute M' as above at run time,
41275 and a cost model is needed again.
41277 However, when the mask M for an UNSPEC_VPERM is loaded from the
41278 constant pool, we can replace M with M' as above at no cost
41279 beyond adding a constant pool entry. */
41281 /* This is based on the union-find logic in web.c. web_entry_base is
41282 defined in df.h. */
41283 class swap_web_entry
: public web_entry_base
41286 /* Pointer to the insn. */
41288 /* Set if insn contains a mention of a vector register. All other
41289 fields are undefined if this field is unset. */
41290 unsigned int is_relevant
: 1;
41291 /* Set if insn is a load. */
41292 unsigned int is_load
: 1;
41293 /* Set if insn is a store. */
41294 unsigned int is_store
: 1;
41295 /* Set if insn is a doubleword swap. This can either be a register swap
41296 or a permuting load or store (test is_load and is_store for this). */
41297 unsigned int is_swap
: 1;
41298 /* Set if the insn has a live-in use of a parameter register. */
41299 unsigned int is_live_in
: 1;
41300 /* Set if the insn has a live-out def of a return register. */
41301 unsigned int is_live_out
: 1;
41302 /* Set if the insn contains a subreg reference of a vector register. */
41303 unsigned int contains_subreg
: 1;
41304 /* Set if the insn contains a 128-bit integer operand. */
41305 unsigned int is_128_int
: 1;
41306 /* Set if this is a call-insn. */
41307 unsigned int is_call
: 1;
41308 /* Set if this insn does not perform a vector operation for which
41309 element order matters, or if we know how to fix it up if it does.
41310 Undefined if is_swap is set. */
41311 unsigned int is_swappable
: 1;
41312 /* A nonzero value indicates what kind of special handling for this
41313 insn is required if doublewords are swapped. Undefined if
41314 is_swappable is not set. */
41315 unsigned int special_handling
: 4;
41316 /* Set if the web represented by this entry cannot be optimized. */
41317 unsigned int web_not_optimizable
: 1;
41318 /* Set if this insn should be deleted. */
41319 unsigned int will_delete
: 1;
41322 enum special_handling_values
{
41335 /* Union INSN with all insns containing definitions that reach USE.
41336 Detect whether USE is live-in to the current function. */
41338 union_defs (swap_web_entry
*insn_entry
, rtx insn
, df_ref use
)
41340 struct df_link
*link
= DF_REF_CHAIN (use
);
41343 insn_entry
[INSN_UID (insn
)].is_live_in
= 1;
41347 if (DF_REF_IS_ARTIFICIAL (link
->ref
))
41348 insn_entry
[INSN_UID (insn
)].is_live_in
= 1;
41350 if (DF_REF_INSN_INFO (link
->ref
))
41352 rtx def_insn
= DF_REF_INSN (link
->ref
);
41353 (void)unionfind_union (insn_entry
+ INSN_UID (insn
),
41354 insn_entry
+ INSN_UID (def_insn
));
41361 /* Union INSN with all insns containing uses reached from DEF.
41362 Detect whether DEF is live-out from the current function. */
41364 union_uses (swap_web_entry
*insn_entry
, rtx insn
, df_ref def
)
41366 struct df_link
*link
= DF_REF_CHAIN (def
);
41369 insn_entry
[INSN_UID (insn
)].is_live_out
= 1;
41373 /* This could be an eh use or some other artificial use;
41374 we treat these all the same (killing the optimization). */
41375 if (DF_REF_IS_ARTIFICIAL (link
->ref
))
41376 insn_entry
[INSN_UID (insn
)].is_live_out
= 1;
41378 if (DF_REF_INSN_INFO (link
->ref
))
41380 rtx use_insn
= DF_REF_INSN (link
->ref
);
41381 (void)unionfind_union (insn_entry
+ INSN_UID (insn
),
41382 insn_entry
+ INSN_UID (use_insn
));
41389 /* Return 1 iff INSN is a load insn, including permuting loads that
41390 represent an lvxd2x instruction; else return 0. */
41391 static unsigned int
41392 insn_is_load_p (rtx insn
)
41394 rtx body
= PATTERN (insn
);
41396 if (GET_CODE (body
) == SET
)
41398 if (GET_CODE (SET_SRC (body
)) == MEM
)
41401 if (GET_CODE (SET_SRC (body
)) == VEC_SELECT
41402 && GET_CODE (XEXP (SET_SRC (body
), 0)) == MEM
)
41408 if (GET_CODE (body
) != PARALLEL
)
41411 rtx set
= XVECEXP (body
, 0, 0);
41413 if (GET_CODE (set
) == SET
&& GET_CODE (SET_SRC (set
)) == MEM
)
41419 /* Return 1 iff INSN is a store insn, including permuting stores that
41420 represent an stvxd2x instruction; else return 0. */
41421 static unsigned int
41422 insn_is_store_p (rtx insn
)
41424 rtx body
= PATTERN (insn
);
41425 if (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == MEM
)
41427 if (GET_CODE (body
) != PARALLEL
)
41429 rtx set
= XVECEXP (body
, 0, 0);
41430 if (GET_CODE (set
) == SET
&& GET_CODE (SET_DEST (set
)) == MEM
)
41435 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
41436 a permuting load, or a permuting store. */
41437 static unsigned int
41438 insn_is_swap_p (rtx insn
)
41440 rtx body
= PATTERN (insn
);
41441 if (GET_CODE (body
) != SET
)
41443 rtx rhs
= SET_SRC (body
);
41444 if (GET_CODE (rhs
) != VEC_SELECT
)
41446 rtx parallel
= XEXP (rhs
, 1);
41447 if (GET_CODE (parallel
) != PARALLEL
)
41449 unsigned int len
= XVECLEN (parallel
, 0);
41450 if (len
!= 2 && len
!= 4 && len
!= 8 && len
!= 16)
41452 for (unsigned int i
= 0; i
< len
/ 2; ++i
)
41454 rtx op
= XVECEXP (parallel
, 0, i
);
41455 if (GET_CODE (op
) != CONST_INT
|| INTVAL (op
) != len
/ 2 + i
)
41458 for (unsigned int i
= len
/ 2; i
< len
; ++i
)
41460 rtx op
= XVECEXP (parallel
, 0, i
);
41461 if (GET_CODE (op
) != CONST_INT
|| INTVAL (op
) != i
- len
/ 2)
41467 /* Return TRUE if insn is a swap fed by a load from the constant pool. */
41469 const_load_sequence_p (swap_web_entry
*insn_entry
, rtx insn
)
41471 unsigned uid
= INSN_UID (insn
);
41472 if (!insn_entry
[uid
].is_swap
|| insn_entry
[uid
].is_load
)
41475 /* Find the unique use in the swap and locate its def. If the def
41476 isn't unique, punt. */
41477 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
41479 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
41481 struct df_link
*def_link
= DF_REF_CHAIN (use
);
41482 if (!def_link
|| def_link
->next
)
41485 rtx def_insn
= DF_REF_INSN (def_link
->ref
);
41486 unsigned uid2
= INSN_UID (def_insn
);
41487 if (!insn_entry
[uid2
].is_load
|| !insn_entry
[uid2
].is_swap
)
41490 rtx body
= PATTERN (def_insn
);
41491 if (GET_CODE (body
) != SET
41492 || GET_CODE (SET_SRC (body
)) != VEC_SELECT
41493 || GET_CODE (XEXP (SET_SRC (body
), 0)) != MEM
)
41496 rtx mem
= XEXP (SET_SRC (body
), 0);
41497 rtx base_reg
= XEXP (mem
, 0);
41500 insn_info
= DF_INSN_INFO_GET (def_insn
);
41501 FOR_EACH_INSN_INFO_USE (base_use
, insn_info
)
41503 if (!rtx_equal_p (DF_REF_REG (base_use
), base_reg
))
41506 struct df_link
*base_def_link
= DF_REF_CHAIN (base_use
);
41507 if (!base_def_link
|| base_def_link
->next
)
41510 rtx tocrel_insn
= DF_REF_INSN (base_def_link
->ref
);
41511 rtx tocrel_body
= PATTERN (tocrel_insn
);
41513 if (GET_CODE (tocrel_body
) != SET
)
41515 /* There is an extra level of indirection for small/large
41517 rtx tocrel_expr
= SET_SRC (tocrel_body
);
41518 if (GET_CODE (tocrel_expr
) == MEM
)
41519 tocrel_expr
= XEXP (tocrel_expr
, 0);
41520 if (!toc_relative_expr_p (tocrel_expr
, false))
41522 split_const (XVECEXP (tocrel_base
, 0, 0), &base
, &offset
);
41523 if (GET_CODE (base
) != SYMBOL_REF
|| !CONSTANT_POOL_ADDRESS_P (base
))
41530 /* Return TRUE iff OP matches a V2DF reduction pattern. See the
41531 definition of vsx_reduc_<VEC_reduc_name>_v2df in vsx.md. */
41533 v2df_reduction_p (rtx op
)
41535 if (GET_MODE (op
) != V2DFmode
)
41538 enum rtx_code code
= GET_CODE (op
);
41539 if (code
!= PLUS
&& code
!= SMIN
&& code
!= SMAX
)
41542 rtx concat
= XEXP (op
, 0);
41543 if (GET_CODE (concat
) != VEC_CONCAT
)
41546 rtx select0
= XEXP (concat
, 0);
41547 rtx select1
= XEXP (concat
, 1);
41548 if (GET_CODE (select0
) != VEC_SELECT
|| GET_CODE (select1
) != VEC_SELECT
)
41551 rtx reg0
= XEXP (select0
, 0);
41552 rtx reg1
= XEXP (select1
, 0);
41553 if (!rtx_equal_p (reg0
, reg1
) || !REG_P (reg0
))
41556 rtx parallel0
= XEXP (select0
, 1);
41557 rtx parallel1
= XEXP (select1
, 1);
41558 if (GET_CODE (parallel0
) != PARALLEL
|| GET_CODE (parallel1
) != PARALLEL
)
41561 if (!rtx_equal_p (XVECEXP (parallel0
, 0, 0), const1_rtx
)
41562 || !rtx_equal_p (XVECEXP (parallel1
, 0, 0), const0_rtx
))
41568 /* Return 1 iff OP is an operand that will not be affected by having
41569 vector doublewords swapped in memory. */
41570 static unsigned int
41571 rtx_is_swappable_p (rtx op
, unsigned int *special
)
41573 enum rtx_code code
= GET_CODE (op
);
41592 *special
= SH_CONST_VECTOR
;
41596 case VEC_DUPLICATE
:
41597 /* Opportunity: If XEXP (op, 0) has the same mode as the result,
41598 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
41599 it represents a vector splat for which we can do special
41601 if (GET_CODE (XEXP (op
, 0)) == CONST_INT
)
41603 else if (REG_P (XEXP (op
, 0))
41604 && GET_MODE_INNER (GET_MODE (op
)) == GET_MODE (XEXP (op
, 0)))
41605 /* This catches V2DF and V2DI splat, at a minimum. */
41607 else if (GET_CODE (XEXP (op
, 0)) == TRUNCATE
41608 && REG_P (XEXP (XEXP (op
, 0), 0))
41609 && GET_MODE_INNER (GET_MODE (op
)) == GET_MODE (XEXP (op
, 0)))
41610 /* This catches splat of a truncated value. */
41612 else if (GET_CODE (XEXP (op
, 0)) == VEC_SELECT
)
41613 /* If the duplicated item is from a select, defer to the select
41614 processing to see if we can change the lane for the splat. */
41615 return rtx_is_swappable_p (XEXP (op
, 0), special
);
41620 /* A vec_extract operation is ok if we change the lane. */
41621 if (GET_CODE (XEXP (op
, 0)) == REG
41622 && GET_MODE_INNER (GET_MODE (XEXP (op
, 0))) == GET_MODE (op
)
41623 && GET_CODE ((parallel
= XEXP (op
, 1))) == PARALLEL
41624 && XVECLEN (parallel
, 0) == 1
41625 && GET_CODE (XVECEXP (parallel
, 0, 0)) == CONST_INT
)
41627 *special
= SH_EXTRACT
;
41630 /* An XXPERMDI is ok if we adjust the lanes. Note that if the
41631 XXPERMDI is a swap operation, it will be identified by
41632 insn_is_swap_p and therefore we won't get here. */
41633 else if (GET_CODE (XEXP (op
, 0)) == VEC_CONCAT
41634 && (GET_MODE (XEXP (op
, 0)) == V4DFmode
41635 || GET_MODE (XEXP (op
, 0)) == V4DImode
)
41636 && GET_CODE ((parallel
= XEXP (op
, 1))) == PARALLEL
41637 && XVECLEN (parallel
, 0) == 2
41638 && GET_CODE (XVECEXP (parallel
, 0, 0)) == CONST_INT
41639 && GET_CODE (XVECEXP (parallel
, 0, 1)) == CONST_INT
)
41641 *special
= SH_XXPERMDI
;
41644 else if (v2df_reduction_p (op
))
41651 /* Various operations are unsafe for this optimization, at least
41652 without significant additional work. Permutes are obviously
41653 problematic, as both the permute control vector and the ordering
41654 of the target values are invalidated by doubleword swapping.
41655 Vector pack and unpack modify the number of vector lanes.
41656 Merge-high/low will not operate correctly on swapped operands.
41657 Vector shifts across element boundaries are clearly uncool,
41658 as are vector select and concatenate operations. Vector
41659 sum-across instructions define one operand with a specific
41660 order-dependent element, so additional fixup code would be
41661 needed to make those work. Vector set and non-immediate-form
41662 vector splat are element-order sensitive. A few of these
41663 cases might be workable with special handling if required.
41664 Adding cost modeling would be appropriate in some cases. */
41665 int val
= XINT (op
, 1);
41670 case UNSPEC_VMRGH_DIRECT
:
41671 case UNSPEC_VMRGL_DIRECT
:
41672 case UNSPEC_VPACK_SIGN_SIGN_SAT
:
41673 case UNSPEC_VPACK_SIGN_UNS_SAT
:
41674 case UNSPEC_VPACK_UNS_UNS_MOD
:
41675 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT
:
41676 case UNSPEC_VPACK_UNS_UNS_SAT
:
41678 case UNSPEC_VPERM_UNS
:
41679 case UNSPEC_VPERMHI
:
41680 case UNSPEC_VPERMSI
:
41682 case UNSPEC_VSLDOI
:
41685 case UNSPEC_VSUM2SWS
:
41686 case UNSPEC_VSUM4S
:
41687 case UNSPEC_VSUM4UBS
:
41688 case UNSPEC_VSUMSWS
:
41689 case UNSPEC_VSUMSWS_DIRECT
:
41690 case UNSPEC_VSX_CONCAT
:
41691 case UNSPEC_VSX_SET
:
41692 case UNSPEC_VSX_SLDWI
:
41693 case UNSPEC_VUNPACK_HI_SIGN
:
41694 case UNSPEC_VUNPACK_HI_SIGN_DIRECT
:
41695 case UNSPEC_VUNPACK_LO_SIGN
:
41696 case UNSPEC_VUNPACK_LO_SIGN_DIRECT
:
41697 case UNSPEC_VUPKHPX
:
41698 case UNSPEC_VUPKHS_V4SF
:
41699 case UNSPEC_VUPKHU_V4SF
:
41700 case UNSPEC_VUPKLPX
:
41701 case UNSPEC_VUPKLS_V4SF
:
41702 case UNSPEC_VUPKLU_V4SF
:
41703 case UNSPEC_VSX_CVDPSPN
:
41704 case UNSPEC_VSX_CVSPDP
:
41705 case UNSPEC_VSX_CVSPDPN
:
41706 case UNSPEC_VSX_EXTRACT
:
41707 case UNSPEC_VSX_VSLO
:
41708 case UNSPEC_VSX_VEC_INIT
:
41710 case UNSPEC_VSPLT_DIRECT
:
41711 case UNSPEC_VSX_XXSPLTD
:
41712 *special
= SH_SPLAT
;
41714 case UNSPEC_REDUC_PLUS
:
41724 const char *fmt
= GET_RTX_FORMAT (code
);
41727 for (i
= 0; i
< GET_RTX_LENGTH (code
); ++i
)
41728 if (fmt
[i
] == 'e' || fmt
[i
] == 'u')
41730 unsigned int special_op
= SH_NONE
;
41731 ok
&= rtx_is_swappable_p (XEXP (op
, i
), &special_op
);
41732 if (special_op
== SH_NONE
)
41734 /* Ensure we never have two kinds of special handling
41735 for the same insn. */
41736 if (*special
!= SH_NONE
&& *special
!= special_op
)
41738 *special
= special_op
;
41740 else if (fmt
[i
] == 'E')
41741 for (j
= 0; j
< XVECLEN (op
, i
); ++j
)
41743 unsigned int special_op
= SH_NONE
;
41744 ok
&= rtx_is_swappable_p (XVECEXP (op
, i
, j
), &special_op
);
41745 if (special_op
== SH_NONE
)
41747 /* Ensure we never have two kinds of special handling
41748 for the same insn. */
41749 if (*special
!= SH_NONE
&& *special
!= special_op
)
41751 *special
= special_op
;
41757 /* Return 1 iff INSN is an operand that will not be affected by
41758 having vector doublewords swapped in memory (in which case
41759 *SPECIAL is unchanged), or that can be modified to be correct
41760 if vector doublewords are swapped in memory (in which case
41761 *SPECIAL is changed to a value indicating how). */
41762 static unsigned int
41763 insn_is_swappable_p (swap_web_entry
*insn_entry
, rtx insn
,
41764 unsigned int *special
)
41766 /* Calls are always bad. */
41767 if (GET_CODE (insn
) == CALL_INSN
)
41770 /* Loads and stores seen here are not permuting, but we can still
41771 fix them up by converting them to permuting ones. Exceptions:
41772 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
41773 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
41774 for the SET source. Also we must now make an exception for lvx
41775 and stvx when they are not in the UNSPEC_LVX/STVX form (with the
41776 explicit "& -16") since this leads to unrecognizable insns. */
41777 rtx body
= PATTERN (insn
);
41778 int i
= INSN_UID (insn
);
41780 if (insn_entry
[i
].is_load
)
41782 if (GET_CODE (body
) == SET
)
41784 rtx rhs
= SET_SRC (body
);
41785 /* Even without a swap, the RHS might be a vec_select for, say,
41786 a byte-reversing load. */
41787 if (GET_CODE (rhs
) != MEM
)
41789 if (GET_CODE (XEXP (rhs
, 0)) == AND
)
41792 *special
= SH_NOSWAP_LD
;
41799 if (insn_entry
[i
].is_store
)
41801 if (GET_CODE (body
) == SET
41802 && GET_CODE (SET_SRC (body
)) != UNSPEC
)
41804 rtx lhs
= SET_DEST (body
);
41805 /* Even without a swap, the LHS might be a vec_select for, say,
41806 a byte-reversing store. */
41807 if (GET_CODE (lhs
) != MEM
)
41809 if (GET_CODE (XEXP (lhs
, 0)) == AND
)
41812 *special
= SH_NOSWAP_ST
;
41819 /* A convert to single precision can be left as is provided that
41820 all of its uses are in xxspltw instructions that splat BE element
41822 if (GET_CODE (body
) == SET
41823 && GET_CODE (SET_SRC (body
)) == UNSPEC
41824 && XINT (SET_SRC (body
), 1) == UNSPEC_VSX_CVDPSPN
)
41827 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
41829 FOR_EACH_INSN_INFO_DEF (def
, insn_info
)
41831 struct df_link
*link
= DF_REF_CHAIN (def
);
41835 for (; link
; link
= link
->next
) {
41836 rtx use_insn
= DF_REF_INSN (link
->ref
);
41837 rtx use_body
= PATTERN (use_insn
);
41838 if (GET_CODE (use_body
) != SET
41839 || GET_CODE (SET_SRC (use_body
)) != UNSPEC
41840 || XINT (SET_SRC (use_body
), 1) != UNSPEC_VSX_XXSPLTW
41841 || XVECEXP (SET_SRC (use_body
), 0, 1) != const0_rtx
)
41849 /* A concatenation of two doublewords is ok if we reverse the
41850 order of the inputs. */
41851 if (GET_CODE (body
) == SET
41852 && GET_CODE (SET_SRC (body
)) == VEC_CONCAT
41853 && (GET_MODE (SET_SRC (body
)) == V2DFmode
41854 || GET_MODE (SET_SRC (body
)) == V2DImode
))
41856 *special
= SH_CONCAT
;
41860 /* V2DF reductions are always swappable. */
41861 if (GET_CODE (body
) == PARALLEL
)
41863 rtx expr
= XVECEXP (body
, 0, 0);
41864 if (GET_CODE (expr
) == SET
41865 && v2df_reduction_p (SET_SRC (expr
)))
41869 /* An UNSPEC_VPERM is ok if the mask operand is loaded from the
41871 if (GET_CODE (body
) == SET
41872 && GET_CODE (SET_SRC (body
)) == UNSPEC
41873 && XINT (SET_SRC (body
), 1) == UNSPEC_VPERM
41874 && XVECLEN (SET_SRC (body
), 0) == 3
41875 && GET_CODE (XVECEXP (SET_SRC (body
), 0, 2)) == REG
)
41877 rtx mask_reg
= XVECEXP (SET_SRC (body
), 0, 2);
41878 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
41880 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
41881 if (rtx_equal_p (DF_REF_REG (use
), mask_reg
))
41883 struct df_link
*def_link
= DF_REF_CHAIN (use
);
41884 /* Punt if multiple definitions for this reg. */
41885 if (def_link
&& !def_link
->next
&&
41886 const_load_sequence_p (insn_entry
,
41887 DF_REF_INSN (def_link
->ref
)))
41889 *special
= SH_VPERM
;
41895 /* Otherwise check the operands for vector lane violations. */
41896 return rtx_is_swappable_p (body
, special
);
41899 enum chain_purpose
{ FOR_LOADS
, FOR_STORES
};
41901 /* Return true if the UD or DU chain headed by LINK is non-empty,
41902 and every entry on the chain references an insn that is a
41903 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
41904 register swap must have only permuting loads as reaching defs.
41905 If PURPOSE is FOR_STORES, each such register swap must have only
41906 register swaps or permuting stores as reached uses. */
41908 chain_contains_only_swaps (swap_web_entry
*insn_entry
, struct df_link
*link
,
41909 enum chain_purpose purpose
)
41914 for (; link
; link
= link
->next
)
41916 if (!ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (DF_REF_REG (link
->ref
))))
41919 if (DF_REF_IS_ARTIFICIAL (link
->ref
))
41922 rtx reached_insn
= DF_REF_INSN (link
->ref
);
41923 unsigned uid
= INSN_UID (reached_insn
);
41924 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (reached_insn
);
41926 if (!insn_entry
[uid
].is_swap
|| insn_entry
[uid
].is_load
41927 || insn_entry
[uid
].is_store
)
41930 if (purpose
== FOR_LOADS
)
41933 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
41935 struct df_link
*swap_link
= DF_REF_CHAIN (use
);
41939 if (DF_REF_IS_ARTIFICIAL (link
->ref
))
41942 rtx swap_def_insn
= DF_REF_INSN (swap_link
->ref
);
41943 unsigned uid2
= INSN_UID (swap_def_insn
);
41945 /* Only permuting loads are allowed. */
41946 if (!insn_entry
[uid2
].is_swap
|| !insn_entry
[uid2
].is_load
)
41949 swap_link
= swap_link
->next
;
41953 else if (purpose
== FOR_STORES
)
41956 FOR_EACH_INSN_INFO_DEF (def
, insn_info
)
41958 struct df_link
*swap_link
= DF_REF_CHAIN (def
);
41962 if (DF_REF_IS_ARTIFICIAL (link
->ref
))
41965 rtx swap_use_insn
= DF_REF_INSN (swap_link
->ref
);
41966 unsigned uid2
= INSN_UID (swap_use_insn
);
41968 /* Permuting stores or register swaps are allowed. */
41969 if (!insn_entry
[uid2
].is_swap
|| insn_entry
[uid2
].is_load
)
41972 swap_link
= swap_link
->next
;
41981 /* Mark the xxswapdi instructions associated with permuting loads and
41982 stores for removal. Note that we only flag them for deletion here,
41983 as there is a possibility of a swap being reached from multiple
41986 mark_swaps_for_removal (swap_web_entry
*insn_entry
, unsigned int i
)
41988 rtx insn
= insn_entry
[i
].insn
;
41989 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
41991 if (insn_entry
[i
].is_load
)
41994 FOR_EACH_INSN_INFO_DEF (def
, insn_info
)
41996 struct df_link
*link
= DF_REF_CHAIN (def
);
41998 /* We know by now that these are swaps, so we can delete
41999 them confidently. */
42002 rtx use_insn
= DF_REF_INSN (link
->ref
);
42003 insn_entry
[INSN_UID (use_insn
)].will_delete
= 1;
42008 else if (insn_entry
[i
].is_store
)
42011 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
42013 /* Ignore uses for addressability. */
42014 machine_mode mode
= GET_MODE (DF_REF_REG (use
));
42015 if (!ALTIVEC_OR_VSX_VECTOR_MODE (mode
))
42018 struct df_link
*link
= DF_REF_CHAIN (use
);
42020 /* We know by now that these are swaps, so we can delete
42021 them confidently. */
42024 rtx def_insn
= DF_REF_INSN (link
->ref
);
42025 insn_entry
[INSN_UID (def_insn
)].will_delete
= 1;
42032 /* OP is either a CONST_VECTOR or an expression containing one.
42033 Swap the first half of the vector with the second in the first
42034 case. Recurse to find it in the second. */
42036 swap_const_vector_halves (rtx op
)
42039 enum rtx_code code
= GET_CODE (op
);
42040 if (GET_CODE (op
) == CONST_VECTOR
)
42042 int half_units
= GET_MODE_NUNITS (GET_MODE (op
)) / 2;
42043 for (i
= 0; i
< half_units
; ++i
)
42045 rtx temp
= CONST_VECTOR_ELT (op
, i
);
42046 CONST_VECTOR_ELT (op
, i
) = CONST_VECTOR_ELT (op
, i
+ half_units
);
42047 CONST_VECTOR_ELT (op
, i
+ half_units
) = temp
;
42053 const char *fmt
= GET_RTX_FORMAT (code
);
42054 for (i
= 0; i
< GET_RTX_LENGTH (code
); ++i
)
42055 if (fmt
[i
] == 'e' || fmt
[i
] == 'u')
42056 swap_const_vector_halves (XEXP (op
, i
));
42057 else if (fmt
[i
] == 'E')
42058 for (j
= 0; j
< XVECLEN (op
, i
); ++j
)
42059 swap_const_vector_halves (XVECEXP (op
, i
, j
));
42063 /* Find all subregs of a vector expression that perform a narrowing,
42064 and adjust the subreg index to account for doubleword swapping. */
42066 adjust_subreg_index (rtx op
)
42068 enum rtx_code code
= GET_CODE (op
);
42070 && (GET_MODE_SIZE (GET_MODE (op
))
42071 < GET_MODE_SIZE (GET_MODE (XEXP (op
, 0)))))
42073 unsigned int index
= SUBREG_BYTE (op
);
42078 SUBREG_BYTE (op
) = index
;
42081 const char *fmt
= GET_RTX_FORMAT (code
);
42083 for (i
= 0; i
< GET_RTX_LENGTH (code
); ++i
)
42084 if (fmt
[i
] == 'e' || fmt
[i
] == 'u')
42085 adjust_subreg_index (XEXP (op
, i
));
42086 else if (fmt
[i
] == 'E')
42087 for (j
= 0; j
< XVECLEN (op
, i
); ++j
)
42088 adjust_subreg_index (XVECEXP (op
, i
, j
));
42091 /* Convert the non-permuting load INSN to a permuting one. */
42093 permute_load (rtx_insn
*insn
)
42095 rtx body
= PATTERN (insn
);
42096 rtx mem_op
= SET_SRC (body
);
42097 rtx tgt_reg
= SET_DEST (body
);
42098 machine_mode mode
= GET_MODE (tgt_reg
);
42099 int n_elts
= GET_MODE_NUNITS (mode
);
42100 int half_elts
= n_elts
/ 2;
42101 rtx par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (n_elts
));
42103 for (i
= 0, j
= half_elts
; i
< half_elts
; ++i
, ++j
)
42104 XVECEXP (par
, 0, i
) = GEN_INT (j
);
42105 for (i
= half_elts
, j
= 0; j
< half_elts
; ++i
, ++j
)
42106 XVECEXP (par
, 0, i
) = GEN_INT (j
);
42107 rtx sel
= gen_rtx_VEC_SELECT (mode
, mem_op
, par
);
42108 SET_SRC (body
) = sel
;
42109 INSN_CODE (insn
) = -1; /* Force re-recognition. */
42110 df_insn_rescan (insn
);
42113 fprintf (dump_file
, "Replacing load %d with permuted load\n",
42117 /* Convert the non-permuting store INSN to a permuting one. */
42119 permute_store (rtx_insn
*insn
)
42121 rtx body
= PATTERN (insn
);
42122 rtx src_reg
= SET_SRC (body
);
42123 machine_mode mode
= GET_MODE (src_reg
);
42124 int n_elts
= GET_MODE_NUNITS (mode
);
42125 int half_elts
= n_elts
/ 2;
42126 rtx par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (n_elts
));
42128 for (i
= 0, j
= half_elts
; i
< half_elts
; ++i
, ++j
)
42129 XVECEXP (par
, 0, i
) = GEN_INT (j
);
42130 for (i
= half_elts
, j
= 0; j
< half_elts
; ++i
, ++j
)
42131 XVECEXP (par
, 0, i
) = GEN_INT (j
);
42132 rtx sel
= gen_rtx_VEC_SELECT (mode
, src_reg
, par
);
42133 SET_SRC (body
) = sel
;
42134 INSN_CODE (insn
) = -1; /* Force re-recognition. */
42135 df_insn_rescan (insn
);
42138 fprintf (dump_file
, "Replacing store %d with permuted store\n",
42142 /* Given OP that contains a vector extract operation, adjust the index
42143 of the extracted lane to account for the doubleword swap. */
42145 adjust_extract (rtx_insn
*insn
)
42147 rtx pattern
= PATTERN (insn
);
42148 if (GET_CODE (pattern
) == PARALLEL
)
42149 pattern
= XVECEXP (pattern
, 0, 0);
42150 rtx src
= SET_SRC (pattern
);
42151 /* The vec_select may be wrapped in a vec_duplicate for a splat, so
42152 account for that. */
42153 rtx sel
= GET_CODE (src
) == VEC_DUPLICATE
? XEXP (src
, 0) : src
;
42154 rtx par
= XEXP (sel
, 1);
42155 int half_elts
= GET_MODE_NUNITS (GET_MODE (XEXP (sel
, 0))) >> 1;
42156 int lane
= INTVAL (XVECEXP (par
, 0, 0));
42157 lane
= lane
>= half_elts
? lane
- half_elts
: lane
+ half_elts
;
42158 XVECEXP (par
, 0, 0) = GEN_INT (lane
);
42159 INSN_CODE (insn
) = -1; /* Force re-recognition. */
42160 df_insn_rescan (insn
);
42163 fprintf (dump_file
, "Changing lane for extract %d\n", INSN_UID (insn
));
42166 /* Given OP that contains a vector direct-splat operation, adjust the index
42167 of the source lane to account for the doubleword swap. */
42169 adjust_splat (rtx_insn
*insn
)
42171 rtx body
= PATTERN (insn
);
42172 rtx unspec
= XEXP (body
, 1);
42173 int half_elts
= GET_MODE_NUNITS (GET_MODE (unspec
)) >> 1;
42174 int lane
= INTVAL (XVECEXP (unspec
, 0, 1));
42175 lane
= lane
>= half_elts
? lane
- half_elts
: lane
+ half_elts
;
42176 XVECEXP (unspec
, 0, 1) = GEN_INT (lane
);
42177 INSN_CODE (insn
) = -1; /* Force re-recognition. */
42178 df_insn_rescan (insn
);
42181 fprintf (dump_file
, "Changing lane for splat %d\n", INSN_UID (insn
));
42184 /* Given OP that contains an XXPERMDI operation (that is not a doubleword
42185 swap), reverse the order of the source operands and adjust the indices
42186 of the source lanes to account for doubleword reversal. */
42188 adjust_xxpermdi (rtx_insn
*insn
)
42190 rtx set
= PATTERN (insn
);
42191 rtx select
= XEXP (set
, 1);
42192 rtx concat
= XEXP (select
, 0);
42193 rtx src0
= XEXP (concat
, 0);
42194 XEXP (concat
, 0) = XEXP (concat
, 1);
42195 XEXP (concat
, 1) = src0
;
42196 rtx parallel
= XEXP (select
, 1);
42197 int lane0
= INTVAL (XVECEXP (parallel
, 0, 0));
42198 int lane1
= INTVAL (XVECEXP (parallel
, 0, 1));
42199 int new_lane0
= 3 - lane1
;
42200 int new_lane1
= 3 - lane0
;
42201 XVECEXP (parallel
, 0, 0) = GEN_INT (new_lane0
);
42202 XVECEXP (parallel
, 0, 1) = GEN_INT (new_lane1
);
42203 INSN_CODE (insn
) = -1; /* Force re-recognition. */
42204 df_insn_rescan (insn
);
42207 fprintf (dump_file
, "Changing lanes for xxpermdi %d\n", INSN_UID (insn
));
42210 /* Given OP that contains a VEC_CONCAT operation of two doublewords,
42211 reverse the order of those inputs. */
42213 adjust_concat (rtx_insn
*insn
)
42215 rtx set
= PATTERN (insn
);
42216 rtx concat
= XEXP (set
, 1);
42217 rtx src0
= XEXP (concat
, 0);
42218 XEXP (concat
, 0) = XEXP (concat
, 1);
42219 XEXP (concat
, 1) = src0
;
42220 INSN_CODE (insn
) = -1; /* Force re-recognition. */
42221 df_insn_rescan (insn
);
42224 fprintf (dump_file
, "Reversing inputs for concat %d\n", INSN_UID (insn
));
42227 /* Given an UNSPEC_VPERM insn, modify the mask loaded from the
42228 constant pool to reflect swapped doublewords. */
42230 adjust_vperm (rtx_insn
*insn
)
42232 /* We previously determined that the UNSPEC_VPERM was fed by a
42233 swap of a swapping load of a TOC-relative constant pool symbol.
42234 Find the MEM in the swapping load and replace it with a MEM for
42235 the adjusted mask constant. */
42236 rtx set
= PATTERN (insn
);
42237 rtx mask_reg
= XVECEXP (SET_SRC (set
), 0, 2);
42239 /* Find the swap. */
42240 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
42242 rtx_insn
*swap_insn
= 0;
42243 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
42244 if (rtx_equal_p (DF_REF_REG (use
), mask_reg
))
42246 struct df_link
*def_link
= DF_REF_CHAIN (use
);
42247 gcc_assert (def_link
&& !def_link
->next
);
42248 swap_insn
= DF_REF_INSN (def_link
->ref
);
42251 gcc_assert (swap_insn
);
42253 /* Find the load. */
42254 insn_info
= DF_INSN_INFO_GET (swap_insn
);
42255 rtx_insn
*load_insn
= 0;
42256 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
42258 struct df_link
*def_link
= DF_REF_CHAIN (use
);
42259 gcc_assert (def_link
&& !def_link
->next
);
42260 load_insn
= DF_REF_INSN (def_link
->ref
);
42263 gcc_assert (load_insn
);
42265 /* Find the TOC-relative symbol access. */
42266 insn_info
= DF_INSN_INFO_GET (load_insn
);
42267 rtx_insn
*tocrel_insn
= 0;
42268 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
42270 struct df_link
*def_link
= DF_REF_CHAIN (use
);
42271 gcc_assert (def_link
&& !def_link
->next
);
42272 tocrel_insn
= DF_REF_INSN (def_link
->ref
);
42275 gcc_assert (tocrel_insn
);
42277 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p
42278 to set tocrel_base; otherwise it would be unnecessary as we've
42279 already established it will return true. */
42281 rtx tocrel_expr
= SET_SRC (PATTERN (tocrel_insn
));
42282 /* There is an extra level of indirection for small/large code models. */
42283 if (GET_CODE (tocrel_expr
) == MEM
)
42284 tocrel_expr
= XEXP (tocrel_expr
, 0);
42285 if (!toc_relative_expr_p (tocrel_expr
, false))
42286 gcc_unreachable ();
42287 split_const (XVECEXP (tocrel_base
, 0, 0), &base
, &offset
);
42288 rtx const_vector
= get_pool_constant (base
);
42289 /* With the extra indirection, get_pool_constant will produce the
42290 real constant from the reg_equal expression, so get the real
42292 if (GET_CODE (const_vector
) == SYMBOL_REF
)
42293 const_vector
= get_pool_constant (const_vector
);
42294 gcc_assert (GET_CODE (const_vector
) == CONST_VECTOR
);
42296 /* Create an adjusted mask from the initial mask. */
42297 unsigned int new_mask
[16], i
, val
;
42298 for (i
= 0; i
< 16; ++i
) {
42299 val
= INTVAL (XVECEXP (const_vector
, 0, i
));
42301 new_mask
[i
] = (val
+ 8) % 16;
42303 new_mask
[i
] = ((val
+ 8) % 16) + 16;
42306 /* Create a new CONST_VECTOR and a MEM that references it. */
42307 rtx vals
= gen_rtx_PARALLEL (V16QImode
, rtvec_alloc (16));
42308 for (i
= 0; i
< 16; ++i
)
42309 XVECEXP (vals
, 0, i
) = GEN_INT (new_mask
[i
]);
42310 rtx new_const_vector
= gen_rtx_CONST_VECTOR (V16QImode
, XVEC (vals
, 0));
42311 rtx new_mem
= force_const_mem (V16QImode
, new_const_vector
);
42312 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
42313 can't recognize. Force the SYMBOL_REF into a register. */
42314 if (!REG_P (XEXP (new_mem
, 0))) {
42315 rtx base_reg
= force_reg (Pmode
, XEXP (new_mem
, 0));
42316 XEXP (new_mem
, 0) = base_reg
;
42317 /* Move the newly created insn ahead of the load insn. */
42318 rtx_insn
*force_insn
= get_last_insn ();
42319 remove_insn (force_insn
);
42320 rtx_insn
*before_load_insn
= PREV_INSN (load_insn
);
42321 add_insn_after (force_insn
, before_load_insn
, BLOCK_FOR_INSN (load_insn
));
42322 df_insn_rescan (before_load_insn
);
42323 df_insn_rescan (force_insn
);
42326 /* Replace the MEM in the load instruction and rescan it. */
42327 XEXP (SET_SRC (PATTERN (load_insn
)), 0) = new_mem
;
42328 INSN_CODE (load_insn
) = -1; /* Force re-recognition. */
42329 df_insn_rescan (load_insn
);
42332 fprintf (dump_file
, "Adjusting mask for vperm %d\n", INSN_UID (insn
));
42335 /* The insn described by INSN_ENTRY[I] can be swapped, but only
42336 with special handling. Take care of that here. */
42338 handle_special_swappables (swap_web_entry
*insn_entry
, unsigned i
)
42340 rtx_insn
*insn
= insn_entry
[i
].insn
;
42341 rtx body
= PATTERN (insn
);
42343 switch (insn_entry
[i
].special_handling
)
42346 gcc_unreachable ();
42347 case SH_CONST_VECTOR
:
42349 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
42350 gcc_assert (GET_CODE (body
) == SET
);
42351 rtx rhs
= SET_SRC (body
);
42352 swap_const_vector_halves (rhs
);
42354 fprintf (dump_file
, "Swapping constant halves in insn %d\n", i
);
42358 /* A subreg of the same size is already safe. For subregs that
42359 select a smaller portion of a reg, adjust the index for
42360 swapped doublewords. */
42361 adjust_subreg_index (body
);
42363 fprintf (dump_file
, "Adjusting subreg in insn %d\n", i
);
42366 /* Convert a non-permuting load to a permuting one. */
42367 permute_load (insn
);
42370 /* Convert a non-permuting store to a permuting one. */
42371 permute_store (insn
);
42374 /* Change the lane on an extract operation. */
42375 adjust_extract (insn
);
42378 /* Change the lane on a direct-splat operation. */
42379 adjust_splat (insn
);
42382 /* Change the lanes on an XXPERMDI operation. */
42383 adjust_xxpermdi (insn
);
42386 /* Reverse the order of a concatenation operation. */
42387 adjust_concat (insn
);
42390 /* Change the mask loaded from the constant pool for a VPERM. */
42391 adjust_vperm (insn
);
42396 /* Find the insn from the Ith table entry, which is known to be a
42397 register swap Y = SWAP(X). Replace it with a copy Y = X. */
42399 replace_swap_with_copy (swap_web_entry
*insn_entry
, unsigned i
)
42401 rtx_insn
*insn
= insn_entry
[i
].insn
;
42402 rtx body
= PATTERN (insn
);
42403 rtx src_reg
= XEXP (SET_SRC (body
), 0);
42404 rtx copy
= gen_rtx_SET (SET_DEST (body
), src_reg
);
42405 rtx_insn
*new_insn
= emit_insn_before (copy
, insn
);
42406 set_block_for_insn (new_insn
, BLOCK_FOR_INSN (insn
));
42407 df_insn_rescan (new_insn
);
42411 unsigned int new_uid
= INSN_UID (new_insn
);
42412 fprintf (dump_file
, "Replacing swap %d with copy %d\n", i
, new_uid
);
42415 df_insn_delete (insn
);
42416 remove_insn (insn
);
42417 insn
->set_deleted ();
42420 /* Dump the swap table to DUMP_FILE. */
42422 dump_swap_insn_table (swap_web_entry
*insn_entry
)
42424 int e
= get_max_uid ();
42425 fprintf (dump_file
, "\nRelevant insns with their flag settings\n\n");
42427 for (int i
= 0; i
< e
; ++i
)
42428 if (insn_entry
[i
].is_relevant
)
42430 swap_web_entry
*pred_entry
= (swap_web_entry
*)insn_entry
[i
].pred ();
42431 fprintf (dump_file
, "%6d %6d ", i
,
42432 pred_entry
&& pred_entry
->insn
42433 ? INSN_UID (pred_entry
->insn
) : 0);
42434 if (insn_entry
[i
].is_load
)
42435 fputs ("load ", dump_file
);
42436 if (insn_entry
[i
].is_store
)
42437 fputs ("store ", dump_file
);
42438 if (insn_entry
[i
].is_swap
)
42439 fputs ("swap ", dump_file
);
42440 if (insn_entry
[i
].is_live_in
)
42441 fputs ("live-in ", dump_file
);
42442 if (insn_entry
[i
].is_live_out
)
42443 fputs ("live-out ", dump_file
);
42444 if (insn_entry
[i
].contains_subreg
)
42445 fputs ("subreg ", dump_file
);
42446 if (insn_entry
[i
].is_128_int
)
42447 fputs ("int128 ", dump_file
);
42448 if (insn_entry
[i
].is_call
)
42449 fputs ("call ", dump_file
);
42450 if (insn_entry
[i
].is_swappable
)
42452 fputs ("swappable ", dump_file
);
42453 if (insn_entry
[i
].special_handling
== SH_CONST_VECTOR
)
42454 fputs ("special:constvec ", dump_file
);
42455 else if (insn_entry
[i
].special_handling
== SH_SUBREG
)
42456 fputs ("special:subreg ", dump_file
);
42457 else if (insn_entry
[i
].special_handling
== SH_NOSWAP_LD
)
42458 fputs ("special:load ", dump_file
);
42459 else if (insn_entry
[i
].special_handling
== SH_NOSWAP_ST
)
42460 fputs ("special:store ", dump_file
);
42461 else if (insn_entry
[i
].special_handling
== SH_EXTRACT
)
42462 fputs ("special:extract ", dump_file
);
42463 else if (insn_entry
[i
].special_handling
== SH_SPLAT
)
42464 fputs ("special:splat ", dump_file
);
42465 else if (insn_entry
[i
].special_handling
== SH_XXPERMDI
)
42466 fputs ("special:xxpermdi ", dump_file
);
42467 else if (insn_entry
[i
].special_handling
== SH_CONCAT
)
42468 fputs ("special:concat ", dump_file
);
42469 else if (insn_entry
[i
].special_handling
== SH_VPERM
)
42470 fputs ("special:vperm ", dump_file
);
42472 if (insn_entry
[i
].web_not_optimizable
)
42473 fputs ("unoptimizable ", dump_file
);
42474 if (insn_entry
[i
].will_delete
)
42475 fputs ("delete ", dump_file
);
42476 fputs ("\n", dump_file
);
42478 fputs ("\n", dump_file
);
42481 /* Return RTX with its address canonicalized to (reg) or (+ reg reg).
42482 Here RTX is an (& addr (const_int -16)). Always return a new copy
42483 to avoid problems with combine. */
42485 alignment_with_canonical_addr (rtx align
)
42488 rtx addr
= XEXP (align
, 0);
42493 else if (GET_CODE (addr
) == PLUS
)
42495 rtx addrop0
= XEXP (addr
, 0);
42496 rtx addrop1
= XEXP (addr
, 1);
42498 if (!REG_P (addrop0
))
42499 addrop0
= force_reg (GET_MODE (addrop0
), addrop0
);
42501 if (!REG_P (addrop1
))
42502 addrop1
= force_reg (GET_MODE (addrop1
), addrop1
);
42504 canon
= gen_rtx_PLUS (GET_MODE (addr
), addrop0
, addrop1
);
42508 canon
= force_reg (GET_MODE (addr
), addr
);
42510 return gen_rtx_AND (GET_MODE (align
), canon
, GEN_INT (-16));
42513 /* Check whether an rtx is an alignment mask, and if so, return
42514 a fully-expanded rtx for the masking operation. */
42516 alignment_mask (rtx_insn
*insn
)
42518 rtx body
= PATTERN (insn
);
42520 if (GET_CODE (body
) != SET
42521 || GET_CODE (SET_SRC (body
)) != AND
42522 || !REG_P (XEXP (SET_SRC (body
), 0)))
42525 rtx mask
= XEXP (SET_SRC (body
), 1);
42527 if (GET_CODE (mask
) == CONST_INT
)
42529 if (INTVAL (mask
) == -16)
42530 return alignment_with_canonical_addr (SET_SRC (body
));
42538 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
42542 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
42544 if (!rtx_equal_p (DF_REF_REG (use
), mask
))
42547 struct df_link
*def_link
= DF_REF_CHAIN (use
);
42548 if (!def_link
|| def_link
->next
)
42551 rtx_insn
*const_insn
= DF_REF_INSN (def_link
->ref
);
42552 rtx const_body
= PATTERN (const_insn
);
42553 if (GET_CODE (const_body
) != SET
)
42556 real_mask
= SET_SRC (const_body
);
42558 if (GET_CODE (real_mask
) != CONST_INT
42559 || INTVAL (real_mask
) != -16)
42563 if (real_mask
== 0)
42566 return alignment_with_canonical_addr (SET_SRC (body
));
42569 /* Given INSN that's a load or store based at BASE_REG, look for a
42570 feeding computation that aligns its address on a 16-byte boundary. */
42572 find_alignment_op (rtx_insn
*insn
, rtx base_reg
)
42575 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
42576 rtx and_operation
= 0;
42578 FOR_EACH_INSN_INFO_USE (base_use
, insn_info
)
42580 if (!rtx_equal_p (DF_REF_REG (base_use
), base_reg
))
42583 struct df_link
*base_def_link
= DF_REF_CHAIN (base_use
);
42584 if (!base_def_link
|| base_def_link
->next
)
42587 /* With stack-protector code enabled, and possibly in other
42588 circumstances, there may not be an associated insn for
42590 if (DF_REF_IS_ARTIFICIAL (base_def_link
->ref
))
42593 rtx_insn
*and_insn
= DF_REF_INSN (base_def_link
->ref
);
42594 and_operation
= alignment_mask (and_insn
);
42595 if (and_operation
!= 0)
42599 return and_operation
;
42602 struct del_info
{ bool replace
; rtx_insn
*replace_insn
; };
42604 /* If INSN is the load for an lvx pattern, put it in canonical form. */
42606 recombine_lvx_pattern (rtx_insn
*insn
, del_info
*to_delete
)
42608 rtx body
= PATTERN (insn
);
42609 gcc_assert (GET_CODE (body
) == SET
42610 && GET_CODE (SET_SRC (body
)) == VEC_SELECT
42611 && GET_CODE (XEXP (SET_SRC (body
), 0)) == MEM
);
42613 rtx mem
= XEXP (SET_SRC (body
), 0);
42614 rtx base_reg
= XEXP (mem
, 0);
42616 rtx and_operation
= find_alignment_op (insn
, base_reg
);
42618 if (and_operation
!= 0)
42621 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
42622 FOR_EACH_INSN_INFO_DEF (def
, insn_info
)
42624 struct df_link
*link
= DF_REF_CHAIN (def
);
42625 if (!link
|| link
->next
)
42628 rtx_insn
*swap_insn
= DF_REF_INSN (link
->ref
);
42629 if (!insn_is_swap_p (swap_insn
)
42630 || insn_is_load_p (swap_insn
)
42631 || insn_is_store_p (swap_insn
))
42634 /* Expected lvx pattern found. Change the swap to
42635 a copy, and propagate the AND operation into the
42637 to_delete
[INSN_UID (swap_insn
)].replace
= true;
42638 to_delete
[INSN_UID (swap_insn
)].replace_insn
= swap_insn
;
42640 XEXP (mem
, 0) = and_operation
;
42641 SET_SRC (body
) = mem
;
42642 INSN_CODE (insn
) = -1; /* Force re-recognition. */
42643 df_insn_rescan (insn
);
42646 fprintf (dump_file
, "lvx opportunity found at %d\n",
42652 /* If INSN is the store for an stvx pattern, put it in canonical form. */
42654 recombine_stvx_pattern (rtx_insn
*insn
, del_info
*to_delete
)
42656 rtx body
= PATTERN (insn
);
42657 gcc_assert (GET_CODE (body
) == SET
42658 && GET_CODE (SET_DEST (body
)) == MEM
42659 && GET_CODE (SET_SRC (body
)) == VEC_SELECT
);
42660 rtx mem
= SET_DEST (body
);
42661 rtx base_reg
= XEXP (mem
, 0);
42663 rtx and_operation
= find_alignment_op (insn
, base_reg
);
42665 if (and_operation
!= 0)
42667 rtx src_reg
= XEXP (SET_SRC (body
), 0);
42669 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
42670 FOR_EACH_INSN_INFO_USE (src_use
, insn_info
)
42672 if (!rtx_equal_p (DF_REF_REG (src_use
), src_reg
))
42675 struct df_link
*link
= DF_REF_CHAIN (src_use
);
42676 if (!link
|| link
->next
)
42679 rtx_insn
*swap_insn
= DF_REF_INSN (link
->ref
);
42680 if (!insn_is_swap_p (swap_insn
)
42681 || insn_is_load_p (swap_insn
)
42682 || insn_is_store_p (swap_insn
))
42685 /* Expected stvx pattern found. Change the swap to
42686 a copy, and propagate the AND operation into the
42688 to_delete
[INSN_UID (swap_insn
)].replace
= true;
42689 to_delete
[INSN_UID (swap_insn
)].replace_insn
= swap_insn
;
42691 XEXP (mem
, 0) = and_operation
;
42692 SET_SRC (body
) = src_reg
;
42693 INSN_CODE (insn
) = -1; /* Force re-recognition. */
42694 df_insn_rescan (insn
);
42697 fprintf (dump_file
, "stvx opportunity found at %d\n",
42703 /* Look for patterns created from builtin lvx and stvx calls, and
42704 canonicalize them to be properly recognized as such. */
42706 recombine_lvx_stvx_patterns (function
*fun
)
42712 int num_insns
= get_max_uid ();
42713 del_info
*to_delete
= XCNEWVEC (del_info
, num_insns
);
42715 FOR_ALL_BB_FN (bb
, fun
)
42716 FOR_BB_INSNS (bb
, insn
)
42718 if (!NONDEBUG_INSN_P (insn
))
42721 if (insn_is_load_p (insn
) && insn_is_swap_p (insn
))
42722 recombine_lvx_pattern (insn
, to_delete
);
42723 else if (insn_is_store_p (insn
) && insn_is_swap_p (insn
))
42724 recombine_stvx_pattern (insn
, to_delete
);
42727 /* Turning swaps into copies is delayed until now, to avoid problems
42728 with deleting instructions during the insn walk. */
42729 for (i
= 0; i
< num_insns
; i
++)
42730 if (to_delete
[i
].replace
)
42732 rtx swap_body
= PATTERN (to_delete
[i
].replace_insn
);
42733 rtx src_reg
= XEXP (SET_SRC (swap_body
), 0);
42734 rtx copy
= gen_rtx_SET (SET_DEST (swap_body
), src_reg
);
42735 rtx_insn
*new_insn
= emit_insn_before (copy
,
42736 to_delete
[i
].replace_insn
);
42737 set_block_for_insn (new_insn
,
42738 BLOCK_FOR_INSN (to_delete
[i
].replace_insn
));
42739 df_insn_rescan (new_insn
);
42740 df_insn_delete (to_delete
[i
].replace_insn
);
42741 remove_insn (to_delete
[i
].replace_insn
);
42742 to_delete
[i
].replace_insn
->set_deleted ();
42748 /* Main entry point for this pass. */
42750 rs6000_analyze_swaps (function
*fun
)
42752 swap_web_entry
*insn_entry
;
42754 rtx_insn
*insn
, *curr_insn
= 0;
42756 /* Dataflow analysis for use-def chains. */
42757 df_set_flags (DF_RD_PRUNE_DEAD_DEFS
);
42758 df_chain_add_problem (DF_DU_CHAIN
| DF_UD_CHAIN
);
42760 df_set_flags (DF_DEFER_INSN_RESCAN
);
42762 /* Pre-pass to recombine lvx and stvx patterns so we don't lose info. */
42763 recombine_lvx_stvx_patterns (fun
);
42765 /* Allocate structure to represent webs of insns. */
42766 insn_entry
= XCNEWVEC (swap_web_entry
, get_max_uid ());
42768 /* Walk the insns to gather basic data. */
42769 FOR_ALL_BB_FN (bb
, fun
)
42770 FOR_BB_INSNS_SAFE (bb
, insn
, curr_insn
)
42772 unsigned int uid
= INSN_UID (insn
);
42773 if (NONDEBUG_INSN_P (insn
))
42775 insn_entry
[uid
].insn
= insn
;
42777 if (GET_CODE (insn
) == CALL_INSN
)
42778 insn_entry
[uid
].is_call
= 1;
42780 /* Walk the uses and defs to see if we mention vector regs.
42781 Record any constraints on optimization of such mentions. */
42782 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
42784 FOR_EACH_INSN_INFO_USE (mention
, insn_info
)
42786 /* We use DF_REF_REAL_REG here to get inside any subregs. */
42787 machine_mode mode
= GET_MODE (DF_REF_REAL_REG (mention
));
42789 /* If a use gets its value from a call insn, it will be
42790 a hard register and will look like (reg:V4SI 3 3).
42791 The df analysis creates two mentions for GPR3 and GPR4,
42792 both DImode. We must recognize this and treat it as a
42793 vector mention to ensure the call is unioned with this
42795 if (mode
== DImode
&& DF_REF_INSN_INFO (mention
))
42797 rtx feeder
= DF_REF_INSN (mention
);
42798 /* FIXME: It is pretty hard to get from the df mention
42799 to the mode of the use in the insn. We arbitrarily
42800 pick a vector mode here, even though the use might
42801 be a real DImode. We can be too conservative
42802 (create a web larger than necessary) because of
42803 this, so consider eventually fixing this. */
42804 if (GET_CODE (feeder
) == CALL_INSN
)
42808 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode
) || mode
== TImode
)
42810 insn_entry
[uid
].is_relevant
= 1;
42811 if (mode
== TImode
|| mode
== V1TImode
42812 || FLOAT128_VECTOR_P (mode
))
42813 insn_entry
[uid
].is_128_int
= 1;
42814 if (DF_REF_INSN_INFO (mention
))
42815 insn_entry
[uid
].contains_subreg
42816 = !rtx_equal_p (DF_REF_REG (mention
),
42817 DF_REF_REAL_REG (mention
));
42818 union_defs (insn_entry
, insn
, mention
);
42821 FOR_EACH_INSN_INFO_DEF (mention
, insn_info
)
42823 /* We use DF_REF_REAL_REG here to get inside any subregs. */
42824 machine_mode mode
= GET_MODE (DF_REF_REAL_REG (mention
));
42826 /* If we're loading up a hard vector register for a call,
42827 it looks like (set (reg:V4SI 9 9) (...)). The df
42828 analysis creates two mentions for GPR9 and GPR10, both
42829 DImode. So relying on the mode from the mentions
42830 isn't sufficient to ensure we union the call into the
42831 web with the parameter setup code. */
42832 if (mode
== DImode
&& GET_CODE (insn
) == SET
42833 && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn
))))
42834 mode
= GET_MODE (SET_DEST (insn
));
42836 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode
) || mode
== TImode
)
42838 insn_entry
[uid
].is_relevant
= 1;
42839 if (mode
== TImode
|| mode
== V1TImode
42840 || FLOAT128_VECTOR_P (mode
))
42841 insn_entry
[uid
].is_128_int
= 1;
42842 if (DF_REF_INSN_INFO (mention
))
42843 insn_entry
[uid
].contains_subreg
42844 = !rtx_equal_p (DF_REF_REG (mention
),
42845 DF_REF_REAL_REG (mention
));
42846 /* REG_FUNCTION_VALUE_P is not valid for subregs. */
42847 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention
)))
42848 insn_entry
[uid
].is_live_out
= 1;
42849 union_uses (insn_entry
, insn
, mention
);
42853 if (insn_entry
[uid
].is_relevant
)
42855 /* Determine if this is a load or store. */
42856 insn_entry
[uid
].is_load
= insn_is_load_p (insn
);
42857 insn_entry
[uid
].is_store
= insn_is_store_p (insn
);
42859 /* Determine if this is a doubleword swap. If not,
42860 determine whether it can legally be swapped. */
42861 if (insn_is_swap_p (insn
))
42862 insn_entry
[uid
].is_swap
= 1;
42865 unsigned int special
= SH_NONE
;
42866 insn_entry
[uid
].is_swappable
42867 = insn_is_swappable_p (insn_entry
, insn
, &special
);
42868 if (special
!= SH_NONE
&& insn_entry
[uid
].contains_subreg
)
42869 insn_entry
[uid
].is_swappable
= 0;
42870 else if (special
!= SH_NONE
)
42871 insn_entry
[uid
].special_handling
= special
;
42872 else if (insn_entry
[uid
].contains_subreg
)
42873 insn_entry
[uid
].special_handling
= SH_SUBREG
;
42881 fprintf (dump_file
, "\nSwap insn entry table when first built\n");
42882 dump_swap_insn_table (insn_entry
);
42885 /* Record unoptimizable webs. */
42886 unsigned e
= get_max_uid (), i
;
42887 for (i
= 0; i
< e
; ++i
)
42889 if (!insn_entry
[i
].is_relevant
)
42892 swap_web_entry
*root
42893 = (swap_web_entry
*)(&insn_entry
[i
])->unionfind_root ();
42895 if (insn_entry
[i
].is_live_in
|| insn_entry
[i
].is_live_out
42896 || (insn_entry
[i
].contains_subreg
42897 && insn_entry
[i
].special_handling
!= SH_SUBREG
)
42898 || insn_entry
[i
].is_128_int
|| insn_entry
[i
].is_call
42899 || !(insn_entry
[i
].is_swappable
|| insn_entry
[i
].is_swap
))
42900 root
->web_not_optimizable
= 1;
42902 /* If we have loads or stores that aren't permuting then the
42903 optimization isn't appropriate. */
42904 else if ((insn_entry
[i
].is_load
|| insn_entry
[i
].is_store
)
42905 && !insn_entry
[i
].is_swap
&& !insn_entry
[i
].is_swappable
)
42906 root
->web_not_optimizable
= 1;
42908 /* If we have permuting loads or stores that are not accompanied
42909 by a register swap, the optimization isn't appropriate. */
42910 else if (insn_entry
[i
].is_load
&& insn_entry
[i
].is_swap
)
42912 rtx insn
= insn_entry
[i
].insn
;
42913 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
42916 FOR_EACH_INSN_INFO_DEF (def
, insn_info
)
42918 struct df_link
*link
= DF_REF_CHAIN (def
);
42920 if (!chain_contains_only_swaps (insn_entry
, link
, FOR_LOADS
))
42922 root
->web_not_optimizable
= 1;
42927 else if (insn_entry
[i
].is_store
&& insn_entry
[i
].is_swap
)
42929 rtx insn
= insn_entry
[i
].insn
;
42930 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
42933 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
42935 struct df_link
*link
= DF_REF_CHAIN (use
);
42937 if (!chain_contains_only_swaps (insn_entry
, link
, FOR_STORES
))
42939 root
->web_not_optimizable
= 1;
42948 fprintf (dump_file
, "\nSwap insn entry table after web analysis\n");
42949 dump_swap_insn_table (insn_entry
);
42952 /* For each load and store in an optimizable web (which implies
42953 the loads and stores are permuting), find the associated
42954 register swaps and mark them for removal. Due to various
42955 optimizations we may mark the same swap more than once. Also
42956 perform special handling for swappable insns that require it. */
42957 for (i
= 0; i
< e
; ++i
)
42958 if ((insn_entry
[i
].is_load
|| insn_entry
[i
].is_store
)
42959 && insn_entry
[i
].is_swap
)
42961 swap_web_entry
* root_entry
42962 = (swap_web_entry
*)((&insn_entry
[i
])->unionfind_root ());
42963 if (!root_entry
->web_not_optimizable
)
42964 mark_swaps_for_removal (insn_entry
, i
);
42966 else if (insn_entry
[i
].is_swappable
&& insn_entry
[i
].special_handling
)
42968 swap_web_entry
* root_entry
42969 = (swap_web_entry
*)((&insn_entry
[i
])->unionfind_root ());
42970 if (!root_entry
->web_not_optimizable
)
42971 handle_special_swappables (insn_entry
, i
);
42974 /* Now delete the swaps marked for removal. */
42975 for (i
= 0; i
< e
; ++i
)
42976 if (insn_entry
[i
].will_delete
)
42977 replace_swap_with_copy (insn_entry
, i
);
42984 const pass_data pass_data_analyze_swaps
=
42986 RTL_PASS
, /* type */
42987 "swaps", /* name */
42988 OPTGROUP_NONE
, /* optinfo_flags */
42989 TV_NONE
, /* tv_id */
42990 0, /* properties_required */
42991 0, /* properties_provided */
42992 0, /* properties_destroyed */
42993 0, /* todo_flags_start */
42994 TODO_df_finish
, /* todo_flags_finish */
42997 class pass_analyze_swaps
: public rtl_opt_pass
43000 pass_analyze_swaps(gcc::context
*ctxt
)
43001 : rtl_opt_pass(pass_data_analyze_swaps
, ctxt
)
43004 /* opt_pass methods: */
43005 virtual bool gate (function
*)
43007 return (optimize
> 0 && !BYTES_BIG_ENDIAN
&& TARGET_VSX
43008 && !TARGET_P9_VECTOR
&& rs6000_optimize_swaps
);
43011 virtual unsigned int execute (function
*fun
)
43013 return rs6000_analyze_swaps (fun
);
43018 return new pass_analyze_swaps (m_ctxt
);
43021 }; // class pass_analyze_swaps
43024 make_pass_analyze_swaps (gcc::context
*ctxt
)
43026 return new pass_analyze_swaps (ctxt
);
43029 #ifdef RS6000_GLIBC_ATOMIC_FENV
43030 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
43031 static tree atomic_hold_decl
, atomic_clear_decl
, atomic_update_decl
;
43034 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
43037 rs6000_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
)
43039 if (!TARGET_HARD_FLOAT
|| !TARGET_FPRS
)
43041 #ifdef RS6000_GLIBC_ATOMIC_FENV
43042 if (atomic_hold_decl
== NULL_TREE
)
43045 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
43046 get_identifier ("__atomic_feholdexcept"),
43047 build_function_type_list (void_type_node
,
43048 double_ptr_type_node
,
43050 TREE_PUBLIC (atomic_hold_decl
) = 1;
43051 DECL_EXTERNAL (atomic_hold_decl
) = 1;
43054 if (atomic_clear_decl
== NULL_TREE
)
43057 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
43058 get_identifier ("__atomic_feclearexcept"),
43059 build_function_type_list (void_type_node
,
43061 TREE_PUBLIC (atomic_clear_decl
) = 1;
43062 DECL_EXTERNAL (atomic_clear_decl
) = 1;
43065 tree const_double
= build_qualified_type (double_type_node
,
43067 tree const_double_ptr
= build_pointer_type (const_double
);
43068 if (atomic_update_decl
== NULL_TREE
)
43071 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
43072 get_identifier ("__atomic_feupdateenv"),
43073 build_function_type_list (void_type_node
,
43076 TREE_PUBLIC (atomic_update_decl
) = 1;
43077 DECL_EXTERNAL (atomic_update_decl
) = 1;
43080 tree fenv_var
= create_tmp_var_raw (double_type_node
);
43081 TREE_ADDRESSABLE (fenv_var
) = 1;
43082 tree fenv_addr
= build1 (ADDR_EXPR
, double_ptr_type_node
, fenv_var
);
43084 *hold
= build_call_expr (atomic_hold_decl
, 1, fenv_addr
);
43085 *clear
= build_call_expr (atomic_clear_decl
, 0);
43086 *update
= build_call_expr (atomic_update_decl
, 1,
43087 fold_convert (const_double_ptr
, fenv_addr
));
43092 tree mffs
= rs6000_builtin_decls
[RS6000_BUILTIN_MFFS
];
43093 tree mtfsf
= rs6000_builtin_decls
[RS6000_BUILTIN_MTFSF
];
43094 tree call_mffs
= build_call_expr (mffs
, 0);
43096 /* Generates the equivalent of feholdexcept (&fenv_var)
43098 *fenv_var = __builtin_mffs ();
43100 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
43101 __builtin_mtfsf (0xff, fenv_hold); */
43103 /* Mask to clear everything except for the rounding modes and non-IEEE
43104 arithmetic flag. */
43105 const unsigned HOST_WIDE_INT hold_exception_mask
=
43106 HOST_WIDE_INT_C (0xffffffff00000007);
43108 tree fenv_var
= create_tmp_var_raw (double_type_node
);
43110 tree hold_mffs
= build2 (MODIFY_EXPR
, void_type_node
, fenv_var
, call_mffs
);
43112 tree fenv_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, fenv_var
);
43113 tree fenv_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, fenv_llu
,
43114 build_int_cst (uint64_type_node
,
43115 hold_exception_mask
));
43117 tree fenv_hold_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
43120 tree hold_mtfsf
= build_call_expr (mtfsf
, 2,
43121 build_int_cst (unsigned_type_node
, 0xff),
43124 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, hold_mffs
, hold_mtfsf
);
43126 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
43128 double fenv_clear = __builtin_mffs ();
43129 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
43130 __builtin_mtfsf (0xff, fenv_clear); */
43132 /* Mask to clear everything except for the rounding modes and non-IEEE
43133 arithmetic flag. */
43134 const unsigned HOST_WIDE_INT clear_exception_mask
=
43135 HOST_WIDE_INT_C (0xffffffff00000000);
43137 tree fenv_clear
= create_tmp_var_raw (double_type_node
);
43139 tree clear_mffs
= build2 (MODIFY_EXPR
, void_type_node
, fenv_clear
, call_mffs
);
43141 tree fenv_clean_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, fenv_clear
);
43142 tree fenv_clear_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
,
43144 build_int_cst (uint64_type_node
,
43145 clear_exception_mask
));
43147 tree fenv_clear_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
43148 fenv_clear_llu_and
);
43150 tree clear_mtfsf
= build_call_expr (mtfsf
, 2,
43151 build_int_cst (unsigned_type_node
, 0xff),
43154 *clear
= build2 (COMPOUND_EXPR
, void_type_node
, clear_mffs
, clear_mtfsf
);
43156 /* Generates the equivalent of feupdateenv (&fenv_var)
43158 double old_fenv = __builtin_mffs ();
43159 double fenv_update;
43160 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
43161 (*(uint64_t*)fenv_var 0x1ff80fff);
43162 __builtin_mtfsf (0xff, fenv_update); */
43164 const unsigned HOST_WIDE_INT update_exception_mask
=
43165 HOST_WIDE_INT_C (0xffffffff1fffff00);
43166 const unsigned HOST_WIDE_INT new_exception_mask
=
43167 HOST_WIDE_INT_C (0x1ff80fff);
43169 tree old_fenv
= create_tmp_var_raw (double_type_node
);
43170 tree update_mffs
= build2 (MODIFY_EXPR
, void_type_node
, old_fenv
, call_mffs
);
43172 tree old_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, old_fenv
);
43173 tree old_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, old_llu
,
43174 build_int_cst (uint64_type_node
,
43175 update_exception_mask
));
43177 tree new_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, fenv_llu
,
43178 build_int_cst (uint64_type_node
,
43179 new_exception_mask
));
43181 tree new_llu_mask
= build2 (BIT_IOR_EXPR
, uint64_type_node
,
43182 old_llu_and
, new_llu_and
);
43184 tree fenv_update_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
43187 tree update_mtfsf
= build_call_expr (mtfsf
, 2,
43188 build_int_cst (unsigned_type_node
, 0xff),
43189 fenv_update_mtfsf
);
43191 *update
= build2 (COMPOUND_EXPR
, void_type_node
, update_mffs
, update_mtfsf
);
43194 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
43197 rs6000_optab_supported_p (int op
, machine_mode mode1
, machine_mode
,
43198 optimization_type opt_type
)
43203 return (opt_type
== OPTIMIZE_FOR_SPEED
43204 && RS6000_RECIP_AUTO_RSQRTE_P (mode1
));
43211 struct gcc_target targetm
= TARGET_INITIALIZER
;
43213 #include "gt-rs6000.h"