Merge trunk version 220345 into gupc branch.
[official-gcc.git] / gcc / config / rs6000 / rs6000.c
blob411b9a20f82673607b32bcc2a4c042c1eb904a29
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2015 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "regs.h"
27 #include "hard-reg-set.h"
28 #include "insn-config.h"
29 #include "conditions.h"
30 #include "insn-attr.h"
31 #include "flags.h"
32 #include "recog.h"
33 #include "obstack.h"
34 #include "hash-set.h"
35 #include "machmode.h"
36 #include "vec.h"
37 #include "double-int.h"
38 #include "input.h"
39 #include "alias.h"
40 #include "symtab.h"
41 #include "wide-int.h"
42 #include "inchash.h"
43 #include "tree.h"
44 #include "fold-const.h"
45 #include "stringpool.h"
46 #include "stor-layout.h"
47 #include "calls.h"
48 #include "print-tree.h"
49 #include "varasm.h"
50 #include "hashtab.h"
51 #include "function.h"
52 #include "statistics.h"
53 #include "real.h"
54 #include "fixed-value.h"
55 #include "expmed.h"
56 #include "dojump.h"
57 #include "explow.h"
58 #include "emit-rtl.h"
59 #include "stmt.h"
60 #include "expr.h"
61 #include "insn-codes.h"
62 #include "optabs.h"
63 #include "except.h"
64 #include "output.h"
65 #include "dbxout.h"
66 #include "predict.h"
67 #include "dominance.h"
68 #include "cfg.h"
69 #include "cfgrtl.h"
70 #include "cfganal.h"
71 #include "lcm.h"
72 #include "cfgbuild.h"
73 #include "cfgcleanup.h"
74 #include "basic-block.h"
75 #include "diagnostic-core.h"
76 #include "toplev.h"
77 #include "ggc.h"
78 #include "tm_p.h"
79 #include "target.h"
80 #include "target-def.h"
81 #include "common/common-target.h"
82 #include "langhooks.h"
83 #include "reload.h"
84 #include "cfgloop.h"
85 #include "sched-int.h"
86 #include "hash-table.h"
87 #include "tree-ssa-alias.h"
88 #include "internal-fn.h"
89 #include "gimple-fold.h"
90 #include "tree-eh.h"
91 #include "gimple-expr.h"
92 #include "is-a.h"
93 #include "gimple.h"
94 #include "gimplify.h"
95 #include "gimple-iterator.h"
96 #include "gimple-walk.h"
97 #include "intl.h"
98 #include "params.h"
99 #include "tm-constrs.h"
100 #include "ira.h"
101 #include "opts.h"
102 #include "tree-vectorizer.h"
103 #include "dumpfile.h"
104 #include "hash-map.h"
105 #include "plugin-api.h"
106 #include "ipa-ref.h"
107 #include "cgraph.h"
108 #include "target-globals.h"
109 #include "builtins.h"
110 #include "context.h"
111 #include "tree-pass.h"
112 #if TARGET_XCOFF
113 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
114 #endif
115 #if TARGET_MACHO
116 #include "gstab.h" /* for N_SLINE */
117 #endif
119 #ifndef TARGET_NO_PROTOTYPE
120 #define TARGET_NO_PROTOTYPE 0
121 #endif
123 #define min(A,B) ((A) < (B) ? (A) : (B))
124 #define max(A,B) ((A) > (B) ? (A) : (B))
126 /* Structure used to define the rs6000 stack */
127 typedef struct rs6000_stack {
128 int reload_completed; /* stack info won't change from here on */
129 int first_gp_reg_save; /* first callee saved GP register used */
130 int first_fp_reg_save; /* first callee saved FP register used */
131 int first_altivec_reg_save; /* first callee saved AltiVec register used */
132 int lr_save_p; /* true if the link reg needs to be saved */
133 int cr_save_p; /* true if the CR reg needs to be saved */
134 unsigned int vrsave_mask; /* mask of vec registers to save */
135 int push_p; /* true if we need to allocate stack space */
136 int calls_p; /* true if the function makes any calls */
137 int world_save_p; /* true if we're saving *everything*:
138 r13-r31, cr, f14-f31, vrsave, v20-v31 */
139 enum rs6000_abi abi; /* which ABI to use */
140 int gp_save_offset; /* offset to save GP regs from initial SP */
141 int fp_save_offset; /* offset to save FP regs from initial SP */
142 int altivec_save_offset; /* offset to save AltiVec regs from initial SP */
143 int lr_save_offset; /* offset to save LR from initial SP */
144 int cr_save_offset; /* offset to save CR from initial SP */
145 int vrsave_save_offset; /* offset to save VRSAVE from initial SP */
146 int spe_gp_save_offset; /* offset to save spe 64-bit gprs */
147 int varargs_save_offset; /* offset to save the varargs registers */
148 int ehrd_offset; /* offset to EH return data */
149 int ehcr_offset; /* offset to EH CR field data */
150 int reg_size; /* register size (4 or 8) */
151 HOST_WIDE_INT vars_size; /* variable save area size */
152 int parm_size; /* outgoing parameter size */
153 int save_size; /* save area size */
154 int fixed_size; /* fixed size of stack frame */
155 int gp_size; /* size of saved GP registers */
156 int fp_size; /* size of saved FP registers */
157 int altivec_size; /* size of saved AltiVec registers */
158 int cr_size; /* size to hold CR if not in save_size */
159 int vrsave_size; /* size to hold VRSAVE if not in save_size */
160 int altivec_padding_size; /* size of altivec alignment padding if
161 not in save_size */
162 int spe_gp_size; /* size of 64-bit GPR save size for SPE */
163 int spe_padding_size;
164 HOST_WIDE_INT total_size; /* total bytes allocated for stack */
165 int spe_64bit_regs_used;
166 int savres_strategy;
167 } rs6000_stack_t;
169 /* A C structure for machine-specific, per-function data.
170 This is added to the cfun structure. */
171 typedef struct GTY(()) machine_function
173 /* Whether the instruction chain has been scanned already. */
174 int insn_chain_scanned_p;
175 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
176 int ra_needs_full_frame;
177 /* Flags if __builtin_return_address (0) was used. */
178 int ra_need_lr;
179 /* Cache lr_save_p after expansion of builtin_eh_return. */
180 int lr_save_state;
181 /* Whether we need to save the TOC to the reserved stack location in the
182 function prologue. */
183 bool save_toc_in_prologue;
184 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
185 varargs save area. */
186 HOST_WIDE_INT varargs_save_offset;
187 /* Temporary stack slot to use for SDmode copies. This slot is
188 64-bits wide and is allocated early enough so that the offset
189 does not overflow the 16-bit load/store offset field. */
190 rtx sdmode_stack_slot;
191 /* Flag if r2 setup is needed with ELFv2 ABI. */
192 bool r2_setup_needed;
193 } machine_function;
195 /* Support targetm.vectorize.builtin_mask_for_load. */
196 static GTY(()) tree altivec_builtin_mask_for_load;
198 /* Set to nonzero once AIX common-mode calls have been defined. */
199 static GTY(()) int common_mode_defined;
201 /* Label number of label created for -mrelocatable, to call to so we can
202 get the address of the GOT section */
203 static int rs6000_pic_labelno;
205 #ifdef USING_ELFOS_H
206 /* Counter for labels which are to be placed in .fixup. */
207 int fixuplabelno = 0;
208 #endif
210 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
211 int dot_symbols;
213 /* Specify the machine mode that pointers have. After generation of rtl, the
214 compiler makes no further distinction between pointers and any other objects
215 of this machine mode. The type is unsigned since not all things that
216 include rs6000.h also include machmode.h. */
217 unsigned rs6000_pmode;
219 /* Width in bits of a pointer. */
220 unsigned rs6000_pointer_size;
222 #ifdef HAVE_AS_GNU_ATTRIBUTE
223 /* Flag whether floating point values have been passed/returned. */
224 static bool rs6000_passes_float;
225 /* Flag whether vector values have been passed/returned. */
226 static bool rs6000_passes_vector;
227 /* Flag whether small (<= 8 byte) structures have been returned. */
228 static bool rs6000_returns_struct;
229 #endif
231 /* Value is TRUE if register/mode pair is acceptable. */
232 bool rs6000_hard_regno_mode_ok_p[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
234 /* Maximum number of registers needed for a given register class and mode. */
235 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
237 /* How many registers are needed for a given register and mode. */
238 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
240 /* Map register number to register class. */
241 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
243 static int dbg_cost_ctrl;
245 /* Built in types. */
246 tree rs6000_builtin_types[RS6000_BTI_MAX];
247 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
249 /* Flag to say the TOC is initialized */
250 int toc_initialized;
251 char toc_label_name[10];
253 /* Cached value of rs6000_variable_issue. This is cached in
254 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
255 static short cached_can_issue_more;
257 static GTY(()) section *read_only_data_section;
258 static GTY(()) section *private_data_section;
259 static GTY(()) section *tls_data_section;
260 static GTY(()) section *tls_private_data_section;
261 static GTY(()) section *read_only_private_data_section;
262 static GTY(()) section *sdata2_section;
263 static GTY(()) section *toc_section;
265 struct builtin_description
267 const HOST_WIDE_INT mask;
268 const enum insn_code icode;
269 const char *const name;
270 const enum rs6000_builtins code;
273 /* Describe the vector unit used for modes. */
274 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
275 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
277 /* Register classes for various constraints that are based on the target
278 switches. */
279 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
281 /* Describe the alignment of a vector. */
282 int rs6000_vector_align[NUM_MACHINE_MODES];
284 /* Map selected modes to types for builtins. */
285 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
287 /* What modes to automatically generate reciprocal divide estimate (fre) and
288 reciprocal sqrt (frsqrte) for. */
289 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
291 /* Masks to determine which reciprocal esitmate instructions to generate
292 automatically. */
293 enum rs6000_recip_mask {
294 RECIP_SF_DIV = 0x001, /* Use divide estimate */
295 RECIP_DF_DIV = 0x002,
296 RECIP_V4SF_DIV = 0x004,
297 RECIP_V2DF_DIV = 0x008,
299 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
300 RECIP_DF_RSQRT = 0x020,
301 RECIP_V4SF_RSQRT = 0x040,
302 RECIP_V2DF_RSQRT = 0x080,
304 /* Various combination of flags for -mrecip=xxx. */
305 RECIP_NONE = 0,
306 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
307 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
308 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
310 RECIP_HIGH_PRECISION = RECIP_ALL,
312 /* On low precision machines like the power5, don't enable double precision
313 reciprocal square root estimate, since it isn't accurate enough. */
314 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
317 /* -mrecip options. */
318 static struct
320 const char *string; /* option name */
321 unsigned int mask; /* mask bits to set */
322 } recip_options[] = {
323 { "all", RECIP_ALL },
324 { "none", RECIP_NONE },
325 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
326 | RECIP_V2DF_DIV) },
327 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
328 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
329 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
330 | RECIP_V2DF_RSQRT) },
331 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
332 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
335 /* Pointer to function (in rs6000-c.c) that can define or undefine target
336 macros that have changed. Languages that don't support the preprocessor
337 don't link in rs6000-c.c, so we can't call it directly. */
338 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
340 /* Simplfy register classes into simpler classifications. We assume
341 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
342 check for standard register classes (gpr/floating/altivec/vsx) and
343 floating/vector classes (float/altivec/vsx). */
345 enum rs6000_reg_type {
346 NO_REG_TYPE,
347 PSEUDO_REG_TYPE,
348 GPR_REG_TYPE,
349 VSX_REG_TYPE,
350 ALTIVEC_REG_TYPE,
351 FPR_REG_TYPE,
352 SPR_REG_TYPE,
353 CR_REG_TYPE,
354 SPE_ACC_TYPE,
355 SPEFSCR_REG_TYPE
358 /* Map register class to register type. */
359 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
361 /* First/last register type for the 'normal' register types (i.e. general
362 purpose, floating point, altivec, and VSX registers). */
363 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
365 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
368 /* Register classes we care about in secondary reload or go if legitimate
369 address. We only need to worry about GPR, FPR, and Altivec registers here,
370 along an ANY field that is the OR of the 3 register classes. */
372 enum rs6000_reload_reg_type {
373 RELOAD_REG_GPR, /* General purpose registers. */
374 RELOAD_REG_FPR, /* Traditional floating point regs. */
375 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
376 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
377 N_RELOAD_REG
380 /* For setting up register classes, loop through the 3 register classes mapping
381 into real registers, and skip the ANY class, which is just an OR of the
382 bits. */
383 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
384 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
386 /* Map reload register type to a register in the register class. */
387 struct reload_reg_map_type {
388 const char *name; /* Register class name. */
389 int reg; /* Register in the register class. */
392 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
393 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
394 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
395 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
396 { "Any", -1 }, /* RELOAD_REG_ANY. */
399 /* Mask bits for each register class, indexed per mode. Historically the
400 compiler has been more restrictive which types can do PRE_MODIFY instead of
401 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
402 typedef unsigned char addr_mask_type;
404 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
405 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
406 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
407 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
408 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
409 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
410 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
412 /* Register type masks based on the type, of valid addressing modes. */
413 struct rs6000_reg_addr {
414 enum insn_code reload_load; /* INSN to reload for loading. */
415 enum insn_code reload_store; /* INSN to reload for storing. */
416 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
417 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
418 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
419 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
420 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
423 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
425 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
426 static inline bool
427 mode_supports_pre_incdec_p (machine_mode mode)
429 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
430 != 0);
433 /* Helper function to say whether a mode supports PRE_MODIFY. */
434 static inline bool
435 mode_supports_pre_modify_p (machine_mode mode)
437 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
438 != 0);
442 /* Target cpu costs. */
444 struct processor_costs {
445 const int mulsi; /* cost of SImode multiplication. */
446 const int mulsi_const; /* cost of SImode multiplication by constant. */
447 const int mulsi_const9; /* cost of SImode mult by short constant. */
448 const int muldi; /* cost of DImode multiplication. */
449 const int divsi; /* cost of SImode division. */
450 const int divdi; /* cost of DImode division. */
451 const int fp; /* cost of simple SFmode and DFmode insns. */
452 const int dmul; /* cost of DFmode multiplication (and fmadd). */
453 const int sdiv; /* cost of SFmode division (fdivs). */
454 const int ddiv; /* cost of DFmode division (fdiv). */
455 const int cache_line_size; /* cache line size in bytes. */
456 const int l1_cache_size; /* size of l1 cache, in kilobytes. */
457 const int l2_cache_size; /* size of l2 cache, in kilobytes. */
458 const int simultaneous_prefetches; /* number of parallel prefetch
459 operations. */
462 const struct processor_costs *rs6000_cost;
464 /* Processor costs (relative to an add) */
466 /* Instruction size costs on 32bit processors. */
467 static const
468 struct processor_costs size32_cost = {
469 COSTS_N_INSNS (1), /* mulsi */
470 COSTS_N_INSNS (1), /* mulsi_const */
471 COSTS_N_INSNS (1), /* mulsi_const9 */
472 COSTS_N_INSNS (1), /* muldi */
473 COSTS_N_INSNS (1), /* divsi */
474 COSTS_N_INSNS (1), /* divdi */
475 COSTS_N_INSNS (1), /* fp */
476 COSTS_N_INSNS (1), /* dmul */
477 COSTS_N_INSNS (1), /* sdiv */
478 COSTS_N_INSNS (1), /* ddiv */
485 /* Instruction size costs on 64bit processors. */
486 static const
487 struct processor_costs size64_cost = {
488 COSTS_N_INSNS (1), /* mulsi */
489 COSTS_N_INSNS (1), /* mulsi_const */
490 COSTS_N_INSNS (1), /* mulsi_const9 */
491 COSTS_N_INSNS (1), /* muldi */
492 COSTS_N_INSNS (1), /* divsi */
493 COSTS_N_INSNS (1), /* divdi */
494 COSTS_N_INSNS (1), /* fp */
495 COSTS_N_INSNS (1), /* dmul */
496 COSTS_N_INSNS (1), /* sdiv */
497 COSTS_N_INSNS (1), /* ddiv */
498 128,
504 /* Instruction costs on RS64A processors. */
505 static const
506 struct processor_costs rs64a_cost = {
507 COSTS_N_INSNS (20), /* mulsi */
508 COSTS_N_INSNS (12), /* mulsi_const */
509 COSTS_N_INSNS (8), /* mulsi_const9 */
510 COSTS_N_INSNS (34), /* muldi */
511 COSTS_N_INSNS (65), /* divsi */
512 COSTS_N_INSNS (67), /* divdi */
513 COSTS_N_INSNS (4), /* fp */
514 COSTS_N_INSNS (4), /* dmul */
515 COSTS_N_INSNS (31), /* sdiv */
516 COSTS_N_INSNS (31), /* ddiv */
517 128, /* cache line size */
518 128, /* l1 cache */
519 2048, /* l2 cache */
520 1, /* streams */
523 /* Instruction costs on MPCCORE processors. */
524 static const
525 struct processor_costs mpccore_cost = {
526 COSTS_N_INSNS (2), /* mulsi */
527 COSTS_N_INSNS (2), /* mulsi_const */
528 COSTS_N_INSNS (2), /* mulsi_const9 */
529 COSTS_N_INSNS (2), /* muldi */
530 COSTS_N_INSNS (6), /* divsi */
531 COSTS_N_INSNS (6), /* divdi */
532 COSTS_N_INSNS (4), /* fp */
533 COSTS_N_INSNS (5), /* dmul */
534 COSTS_N_INSNS (10), /* sdiv */
535 COSTS_N_INSNS (17), /* ddiv */
536 32, /* cache line size */
537 4, /* l1 cache */
538 16, /* l2 cache */
539 1, /* streams */
542 /* Instruction costs on PPC403 processors. */
543 static const
544 struct processor_costs ppc403_cost = {
545 COSTS_N_INSNS (4), /* mulsi */
546 COSTS_N_INSNS (4), /* mulsi_const */
547 COSTS_N_INSNS (4), /* mulsi_const9 */
548 COSTS_N_INSNS (4), /* muldi */
549 COSTS_N_INSNS (33), /* divsi */
550 COSTS_N_INSNS (33), /* divdi */
551 COSTS_N_INSNS (11), /* fp */
552 COSTS_N_INSNS (11), /* dmul */
553 COSTS_N_INSNS (11), /* sdiv */
554 COSTS_N_INSNS (11), /* ddiv */
555 32, /* cache line size */
556 4, /* l1 cache */
557 16, /* l2 cache */
558 1, /* streams */
561 /* Instruction costs on PPC405 processors. */
562 static const
563 struct processor_costs ppc405_cost = {
564 COSTS_N_INSNS (5), /* mulsi */
565 COSTS_N_INSNS (4), /* mulsi_const */
566 COSTS_N_INSNS (3), /* mulsi_const9 */
567 COSTS_N_INSNS (5), /* muldi */
568 COSTS_N_INSNS (35), /* divsi */
569 COSTS_N_INSNS (35), /* divdi */
570 COSTS_N_INSNS (11), /* fp */
571 COSTS_N_INSNS (11), /* dmul */
572 COSTS_N_INSNS (11), /* sdiv */
573 COSTS_N_INSNS (11), /* ddiv */
574 32, /* cache line size */
575 16, /* l1 cache */
576 128, /* l2 cache */
577 1, /* streams */
580 /* Instruction costs on PPC440 processors. */
581 static const
582 struct processor_costs ppc440_cost = {
583 COSTS_N_INSNS (3), /* mulsi */
584 COSTS_N_INSNS (2), /* mulsi_const */
585 COSTS_N_INSNS (2), /* mulsi_const9 */
586 COSTS_N_INSNS (3), /* muldi */
587 COSTS_N_INSNS (34), /* divsi */
588 COSTS_N_INSNS (34), /* divdi */
589 COSTS_N_INSNS (5), /* fp */
590 COSTS_N_INSNS (5), /* dmul */
591 COSTS_N_INSNS (19), /* sdiv */
592 COSTS_N_INSNS (33), /* ddiv */
593 32, /* cache line size */
594 32, /* l1 cache */
595 256, /* l2 cache */
596 1, /* streams */
599 /* Instruction costs on PPC476 processors. */
600 static const
601 struct processor_costs ppc476_cost = {
602 COSTS_N_INSNS (4), /* mulsi */
603 COSTS_N_INSNS (4), /* mulsi_const */
604 COSTS_N_INSNS (4), /* mulsi_const9 */
605 COSTS_N_INSNS (4), /* muldi */
606 COSTS_N_INSNS (11), /* divsi */
607 COSTS_N_INSNS (11), /* divdi */
608 COSTS_N_INSNS (6), /* fp */
609 COSTS_N_INSNS (6), /* dmul */
610 COSTS_N_INSNS (19), /* sdiv */
611 COSTS_N_INSNS (33), /* ddiv */
612 32, /* l1 cache line size */
613 32, /* l1 cache */
614 512, /* l2 cache */
615 1, /* streams */
618 /* Instruction costs on PPC601 processors. */
619 static const
620 struct processor_costs ppc601_cost = {
621 COSTS_N_INSNS (5), /* mulsi */
622 COSTS_N_INSNS (5), /* mulsi_const */
623 COSTS_N_INSNS (5), /* mulsi_const9 */
624 COSTS_N_INSNS (5), /* muldi */
625 COSTS_N_INSNS (36), /* divsi */
626 COSTS_N_INSNS (36), /* divdi */
627 COSTS_N_INSNS (4), /* fp */
628 COSTS_N_INSNS (5), /* dmul */
629 COSTS_N_INSNS (17), /* sdiv */
630 COSTS_N_INSNS (31), /* ddiv */
631 32, /* cache line size */
632 32, /* l1 cache */
633 256, /* l2 cache */
634 1, /* streams */
637 /* Instruction costs on PPC603 processors. */
638 static const
639 struct processor_costs ppc603_cost = {
640 COSTS_N_INSNS (5), /* mulsi */
641 COSTS_N_INSNS (3), /* mulsi_const */
642 COSTS_N_INSNS (2), /* mulsi_const9 */
643 COSTS_N_INSNS (5), /* muldi */
644 COSTS_N_INSNS (37), /* divsi */
645 COSTS_N_INSNS (37), /* divdi */
646 COSTS_N_INSNS (3), /* fp */
647 COSTS_N_INSNS (4), /* dmul */
648 COSTS_N_INSNS (18), /* sdiv */
649 COSTS_N_INSNS (33), /* ddiv */
650 32, /* cache line size */
651 8, /* l1 cache */
652 64, /* l2 cache */
653 1, /* streams */
656 /* Instruction costs on PPC604 processors. */
657 static const
658 struct processor_costs ppc604_cost = {
659 COSTS_N_INSNS (4), /* mulsi */
660 COSTS_N_INSNS (4), /* mulsi_const */
661 COSTS_N_INSNS (4), /* mulsi_const9 */
662 COSTS_N_INSNS (4), /* muldi */
663 COSTS_N_INSNS (20), /* divsi */
664 COSTS_N_INSNS (20), /* divdi */
665 COSTS_N_INSNS (3), /* fp */
666 COSTS_N_INSNS (3), /* dmul */
667 COSTS_N_INSNS (18), /* sdiv */
668 COSTS_N_INSNS (32), /* ddiv */
669 32, /* cache line size */
670 16, /* l1 cache */
671 512, /* l2 cache */
672 1, /* streams */
675 /* Instruction costs on PPC604e processors. */
676 static const
677 struct processor_costs ppc604e_cost = {
678 COSTS_N_INSNS (2), /* mulsi */
679 COSTS_N_INSNS (2), /* mulsi_const */
680 COSTS_N_INSNS (2), /* mulsi_const9 */
681 COSTS_N_INSNS (2), /* muldi */
682 COSTS_N_INSNS (20), /* divsi */
683 COSTS_N_INSNS (20), /* divdi */
684 COSTS_N_INSNS (3), /* fp */
685 COSTS_N_INSNS (3), /* dmul */
686 COSTS_N_INSNS (18), /* sdiv */
687 COSTS_N_INSNS (32), /* ddiv */
688 32, /* cache line size */
689 32, /* l1 cache */
690 1024, /* l2 cache */
691 1, /* streams */
694 /* Instruction costs on PPC620 processors. */
695 static const
696 struct processor_costs ppc620_cost = {
697 COSTS_N_INSNS (5), /* mulsi */
698 COSTS_N_INSNS (4), /* mulsi_const */
699 COSTS_N_INSNS (3), /* mulsi_const9 */
700 COSTS_N_INSNS (7), /* muldi */
701 COSTS_N_INSNS (21), /* divsi */
702 COSTS_N_INSNS (37), /* divdi */
703 COSTS_N_INSNS (3), /* fp */
704 COSTS_N_INSNS (3), /* dmul */
705 COSTS_N_INSNS (18), /* sdiv */
706 COSTS_N_INSNS (32), /* ddiv */
707 128, /* cache line size */
708 32, /* l1 cache */
709 1024, /* l2 cache */
710 1, /* streams */
713 /* Instruction costs on PPC630 processors. */
714 static const
715 struct processor_costs ppc630_cost = {
716 COSTS_N_INSNS (5), /* mulsi */
717 COSTS_N_INSNS (4), /* mulsi_const */
718 COSTS_N_INSNS (3), /* mulsi_const9 */
719 COSTS_N_INSNS (7), /* muldi */
720 COSTS_N_INSNS (21), /* divsi */
721 COSTS_N_INSNS (37), /* divdi */
722 COSTS_N_INSNS (3), /* fp */
723 COSTS_N_INSNS (3), /* dmul */
724 COSTS_N_INSNS (17), /* sdiv */
725 COSTS_N_INSNS (21), /* ddiv */
726 128, /* cache line size */
727 64, /* l1 cache */
728 1024, /* l2 cache */
729 1, /* streams */
732 /* Instruction costs on Cell processor. */
733 /* COSTS_N_INSNS (1) ~ one add. */
734 static const
735 struct processor_costs ppccell_cost = {
736 COSTS_N_INSNS (9/2)+2, /* mulsi */
737 COSTS_N_INSNS (6/2), /* mulsi_const */
738 COSTS_N_INSNS (6/2), /* mulsi_const9 */
739 COSTS_N_INSNS (15/2)+2, /* muldi */
740 COSTS_N_INSNS (38/2), /* divsi */
741 COSTS_N_INSNS (70/2), /* divdi */
742 COSTS_N_INSNS (10/2), /* fp */
743 COSTS_N_INSNS (10/2), /* dmul */
744 COSTS_N_INSNS (74/2), /* sdiv */
745 COSTS_N_INSNS (74/2), /* ddiv */
746 128, /* cache line size */
747 32, /* l1 cache */
748 512, /* l2 cache */
749 6, /* streams */
752 /* Instruction costs on PPC750 and PPC7400 processors. */
753 static const
754 struct processor_costs ppc750_cost = {
755 COSTS_N_INSNS (5), /* mulsi */
756 COSTS_N_INSNS (3), /* mulsi_const */
757 COSTS_N_INSNS (2), /* mulsi_const9 */
758 COSTS_N_INSNS (5), /* muldi */
759 COSTS_N_INSNS (17), /* divsi */
760 COSTS_N_INSNS (17), /* divdi */
761 COSTS_N_INSNS (3), /* fp */
762 COSTS_N_INSNS (3), /* dmul */
763 COSTS_N_INSNS (17), /* sdiv */
764 COSTS_N_INSNS (31), /* ddiv */
765 32, /* cache line size */
766 32, /* l1 cache */
767 512, /* l2 cache */
768 1, /* streams */
771 /* Instruction costs on PPC7450 processors. */
772 static const
773 struct processor_costs ppc7450_cost = {
774 COSTS_N_INSNS (4), /* mulsi */
775 COSTS_N_INSNS (3), /* mulsi_const */
776 COSTS_N_INSNS (3), /* mulsi_const9 */
777 COSTS_N_INSNS (4), /* muldi */
778 COSTS_N_INSNS (23), /* divsi */
779 COSTS_N_INSNS (23), /* divdi */
780 COSTS_N_INSNS (5), /* fp */
781 COSTS_N_INSNS (5), /* dmul */
782 COSTS_N_INSNS (21), /* sdiv */
783 COSTS_N_INSNS (35), /* ddiv */
784 32, /* cache line size */
785 32, /* l1 cache */
786 1024, /* l2 cache */
787 1, /* streams */
790 /* Instruction costs on PPC8540 processors. */
791 static const
792 struct processor_costs ppc8540_cost = {
793 COSTS_N_INSNS (4), /* mulsi */
794 COSTS_N_INSNS (4), /* mulsi_const */
795 COSTS_N_INSNS (4), /* mulsi_const9 */
796 COSTS_N_INSNS (4), /* muldi */
797 COSTS_N_INSNS (19), /* divsi */
798 COSTS_N_INSNS (19), /* divdi */
799 COSTS_N_INSNS (4), /* fp */
800 COSTS_N_INSNS (4), /* dmul */
801 COSTS_N_INSNS (29), /* sdiv */
802 COSTS_N_INSNS (29), /* ddiv */
803 32, /* cache line size */
804 32, /* l1 cache */
805 256, /* l2 cache */
806 1, /* prefetch streams /*/
809 /* Instruction costs on E300C2 and E300C3 cores. */
810 static const
811 struct processor_costs ppce300c2c3_cost = {
812 COSTS_N_INSNS (4), /* mulsi */
813 COSTS_N_INSNS (4), /* mulsi_const */
814 COSTS_N_INSNS (4), /* mulsi_const9 */
815 COSTS_N_INSNS (4), /* muldi */
816 COSTS_N_INSNS (19), /* divsi */
817 COSTS_N_INSNS (19), /* divdi */
818 COSTS_N_INSNS (3), /* fp */
819 COSTS_N_INSNS (4), /* dmul */
820 COSTS_N_INSNS (18), /* sdiv */
821 COSTS_N_INSNS (33), /* ddiv */
823 16, /* l1 cache */
824 16, /* l2 cache */
825 1, /* prefetch streams /*/
828 /* Instruction costs on PPCE500MC processors. */
829 static const
830 struct processor_costs ppce500mc_cost = {
831 COSTS_N_INSNS (4), /* mulsi */
832 COSTS_N_INSNS (4), /* mulsi_const */
833 COSTS_N_INSNS (4), /* mulsi_const9 */
834 COSTS_N_INSNS (4), /* muldi */
835 COSTS_N_INSNS (14), /* divsi */
836 COSTS_N_INSNS (14), /* divdi */
837 COSTS_N_INSNS (8), /* fp */
838 COSTS_N_INSNS (10), /* dmul */
839 COSTS_N_INSNS (36), /* sdiv */
840 COSTS_N_INSNS (66), /* ddiv */
841 64, /* cache line size */
842 32, /* l1 cache */
843 128, /* l2 cache */
844 1, /* prefetch streams /*/
847 /* Instruction costs on PPCE500MC64 processors. */
848 static const
849 struct processor_costs ppce500mc64_cost = {
850 COSTS_N_INSNS (4), /* mulsi */
851 COSTS_N_INSNS (4), /* mulsi_const */
852 COSTS_N_INSNS (4), /* mulsi_const9 */
853 COSTS_N_INSNS (4), /* muldi */
854 COSTS_N_INSNS (14), /* divsi */
855 COSTS_N_INSNS (14), /* divdi */
856 COSTS_N_INSNS (4), /* fp */
857 COSTS_N_INSNS (10), /* dmul */
858 COSTS_N_INSNS (36), /* sdiv */
859 COSTS_N_INSNS (66), /* ddiv */
860 64, /* cache line size */
861 32, /* l1 cache */
862 128, /* l2 cache */
863 1, /* prefetch streams /*/
866 /* Instruction costs on PPCE5500 processors. */
867 static const
868 struct processor_costs ppce5500_cost = {
869 COSTS_N_INSNS (5), /* mulsi */
870 COSTS_N_INSNS (5), /* mulsi_const */
871 COSTS_N_INSNS (4), /* mulsi_const9 */
872 COSTS_N_INSNS (5), /* muldi */
873 COSTS_N_INSNS (14), /* divsi */
874 COSTS_N_INSNS (14), /* divdi */
875 COSTS_N_INSNS (7), /* fp */
876 COSTS_N_INSNS (10), /* dmul */
877 COSTS_N_INSNS (36), /* sdiv */
878 COSTS_N_INSNS (66), /* ddiv */
879 64, /* cache line size */
880 32, /* l1 cache */
881 128, /* l2 cache */
882 1, /* prefetch streams /*/
885 /* Instruction costs on PPCE6500 processors. */
886 static const
887 struct processor_costs ppce6500_cost = {
888 COSTS_N_INSNS (5), /* mulsi */
889 COSTS_N_INSNS (5), /* mulsi_const */
890 COSTS_N_INSNS (4), /* mulsi_const9 */
891 COSTS_N_INSNS (5), /* muldi */
892 COSTS_N_INSNS (14), /* divsi */
893 COSTS_N_INSNS (14), /* divdi */
894 COSTS_N_INSNS (7), /* fp */
895 COSTS_N_INSNS (10), /* dmul */
896 COSTS_N_INSNS (36), /* sdiv */
897 COSTS_N_INSNS (66), /* ddiv */
898 64, /* cache line size */
899 32, /* l1 cache */
900 128, /* l2 cache */
901 1, /* prefetch streams /*/
904 /* Instruction costs on AppliedMicro Titan processors. */
905 static const
906 struct processor_costs titan_cost = {
907 COSTS_N_INSNS (5), /* mulsi */
908 COSTS_N_INSNS (5), /* mulsi_const */
909 COSTS_N_INSNS (5), /* mulsi_const9 */
910 COSTS_N_INSNS (5), /* muldi */
911 COSTS_N_INSNS (18), /* divsi */
912 COSTS_N_INSNS (18), /* divdi */
913 COSTS_N_INSNS (10), /* fp */
914 COSTS_N_INSNS (10), /* dmul */
915 COSTS_N_INSNS (46), /* sdiv */
916 COSTS_N_INSNS (72), /* ddiv */
917 32, /* cache line size */
918 32, /* l1 cache */
919 512, /* l2 cache */
920 1, /* prefetch streams /*/
923 /* Instruction costs on POWER4 and POWER5 processors. */
924 static const
925 struct processor_costs power4_cost = {
926 COSTS_N_INSNS (3), /* mulsi */
927 COSTS_N_INSNS (2), /* mulsi_const */
928 COSTS_N_INSNS (2), /* mulsi_const9 */
929 COSTS_N_INSNS (4), /* muldi */
930 COSTS_N_INSNS (18), /* divsi */
931 COSTS_N_INSNS (34), /* divdi */
932 COSTS_N_INSNS (3), /* fp */
933 COSTS_N_INSNS (3), /* dmul */
934 COSTS_N_INSNS (17), /* sdiv */
935 COSTS_N_INSNS (17), /* ddiv */
936 128, /* cache line size */
937 32, /* l1 cache */
938 1024, /* l2 cache */
939 8, /* prefetch streams /*/
942 /* Instruction costs on POWER6 processors. */
943 static const
944 struct processor_costs power6_cost = {
945 COSTS_N_INSNS (8), /* mulsi */
946 COSTS_N_INSNS (8), /* mulsi_const */
947 COSTS_N_INSNS (8), /* mulsi_const9 */
948 COSTS_N_INSNS (8), /* muldi */
949 COSTS_N_INSNS (22), /* divsi */
950 COSTS_N_INSNS (28), /* divdi */
951 COSTS_N_INSNS (3), /* fp */
952 COSTS_N_INSNS (3), /* dmul */
953 COSTS_N_INSNS (13), /* sdiv */
954 COSTS_N_INSNS (16), /* ddiv */
955 128, /* cache line size */
956 64, /* l1 cache */
957 2048, /* l2 cache */
958 16, /* prefetch streams */
961 /* Instruction costs on POWER7 processors. */
962 static const
963 struct processor_costs power7_cost = {
964 COSTS_N_INSNS (2), /* mulsi */
965 COSTS_N_INSNS (2), /* mulsi_const */
966 COSTS_N_INSNS (2), /* mulsi_const9 */
967 COSTS_N_INSNS (2), /* muldi */
968 COSTS_N_INSNS (18), /* divsi */
969 COSTS_N_INSNS (34), /* divdi */
970 COSTS_N_INSNS (3), /* fp */
971 COSTS_N_INSNS (3), /* dmul */
972 COSTS_N_INSNS (13), /* sdiv */
973 COSTS_N_INSNS (16), /* ddiv */
974 128, /* cache line size */
975 32, /* l1 cache */
976 256, /* l2 cache */
977 12, /* prefetch streams */
980 /* Instruction costs on POWER8 processors. */
981 static const
982 struct processor_costs power8_cost = {
983 COSTS_N_INSNS (3), /* mulsi */
984 COSTS_N_INSNS (3), /* mulsi_const */
985 COSTS_N_INSNS (3), /* mulsi_const9 */
986 COSTS_N_INSNS (3), /* muldi */
987 COSTS_N_INSNS (19), /* divsi */
988 COSTS_N_INSNS (35), /* divdi */
989 COSTS_N_INSNS (3), /* fp */
990 COSTS_N_INSNS (3), /* dmul */
991 COSTS_N_INSNS (14), /* sdiv */
992 COSTS_N_INSNS (17), /* ddiv */
993 128, /* cache line size */
994 32, /* l1 cache */
995 256, /* l2 cache */
996 12, /* prefetch streams */
999 /* Instruction costs on POWER A2 processors. */
1000 static const
1001 struct processor_costs ppca2_cost = {
1002 COSTS_N_INSNS (16), /* mulsi */
1003 COSTS_N_INSNS (16), /* mulsi_const */
1004 COSTS_N_INSNS (16), /* mulsi_const9 */
1005 COSTS_N_INSNS (16), /* muldi */
1006 COSTS_N_INSNS (22), /* divsi */
1007 COSTS_N_INSNS (28), /* divdi */
1008 COSTS_N_INSNS (3), /* fp */
1009 COSTS_N_INSNS (3), /* dmul */
1010 COSTS_N_INSNS (59), /* sdiv */
1011 COSTS_N_INSNS (72), /* ddiv */
1013 16, /* l1 cache */
1014 2048, /* l2 cache */
1015 16, /* prefetch streams */
1019 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1020 #undef RS6000_BUILTIN_1
1021 #undef RS6000_BUILTIN_2
1022 #undef RS6000_BUILTIN_3
1023 #undef RS6000_BUILTIN_A
1024 #undef RS6000_BUILTIN_D
1025 #undef RS6000_BUILTIN_E
1026 #undef RS6000_BUILTIN_H
1027 #undef RS6000_BUILTIN_P
1028 #undef RS6000_BUILTIN_Q
1029 #undef RS6000_BUILTIN_S
1030 #undef RS6000_BUILTIN_X
1032 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1033 { NAME, ICODE, MASK, ATTR },
1035 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1036 { NAME, ICODE, MASK, ATTR },
1038 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1039 { NAME, ICODE, MASK, ATTR },
1041 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1042 { NAME, ICODE, MASK, ATTR },
1044 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1045 { NAME, ICODE, MASK, ATTR },
1047 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
1048 { NAME, ICODE, MASK, ATTR },
1050 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1051 { NAME, ICODE, MASK, ATTR },
1053 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1054 { NAME, ICODE, MASK, ATTR },
1056 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
1057 { NAME, ICODE, MASK, ATTR },
1059 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
1060 { NAME, ICODE, MASK, ATTR },
1062 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1063 { NAME, ICODE, MASK, ATTR },
1065 struct rs6000_builtin_info_type {
1066 const char *name;
1067 const enum insn_code icode;
1068 const HOST_WIDE_INT mask;
1069 const unsigned attr;
1072 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1074 #include "rs6000-builtin.def"
1077 #undef RS6000_BUILTIN_1
1078 #undef RS6000_BUILTIN_2
1079 #undef RS6000_BUILTIN_3
1080 #undef RS6000_BUILTIN_A
1081 #undef RS6000_BUILTIN_D
1082 #undef RS6000_BUILTIN_E
1083 #undef RS6000_BUILTIN_H
1084 #undef RS6000_BUILTIN_P
1085 #undef RS6000_BUILTIN_Q
1086 #undef RS6000_BUILTIN_S
1087 #undef RS6000_BUILTIN_X
1089 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1090 static tree (*rs6000_veclib_handler) (tree, tree, tree);
1093 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1094 static bool spe_func_has_64bit_regs_p (void);
1095 static struct machine_function * rs6000_init_machine_status (void);
1096 static int rs6000_ra_ever_killed (void);
1097 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1098 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1099 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1100 static tree rs6000_builtin_vectorized_libmass (tree, tree, tree);
1101 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1102 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1103 static bool rs6000_debug_rtx_costs (rtx, int, int, int, int *, bool);
1104 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1105 bool);
1106 static int rs6000_debug_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
1107 static bool is_microcoded_insn (rtx_insn *);
1108 static bool is_nonpipeline_insn (rtx_insn *);
1109 static bool is_cracked_insn (rtx_insn *);
1110 static bool is_load_insn (rtx, rtx *);
1111 static bool is_store_insn (rtx, rtx *);
1112 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1113 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1114 static bool insn_must_be_first_in_group (rtx_insn *);
1115 static bool insn_must_be_last_in_group (rtx_insn *);
1116 static void altivec_init_builtins (void);
1117 static tree builtin_function_type (machine_mode, machine_mode,
1118 machine_mode, machine_mode,
1119 enum rs6000_builtins, const char *name);
1120 static void rs6000_common_init_builtins (void);
1121 static void paired_init_builtins (void);
1122 static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx);
1123 static void spe_init_builtins (void);
1124 static void htm_init_builtins (void);
1125 static rtx spe_expand_predicate_builtin (enum insn_code, tree, rtx);
1126 static rtx spe_expand_evsel_builtin (enum insn_code, tree, rtx);
1127 static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
1128 static rs6000_stack_t *rs6000_stack_info (void);
1129 static void is_altivec_return_reg (rtx, void *);
1130 int easy_vector_constant (rtx, machine_mode);
1131 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1132 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1133 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1134 bool, bool);
1135 #if TARGET_MACHO
1136 static void macho_branch_islands (void);
1137 #endif
1138 static rtx rs6000_legitimize_reload_address (rtx, machine_mode, int, int,
1139 int, int *);
1140 static rtx rs6000_debug_legitimize_reload_address (rtx, machine_mode, int,
1141 int, int, int *);
1142 static bool rs6000_mode_dependent_address (const_rtx);
1143 static bool rs6000_debug_mode_dependent_address (const_rtx);
1144 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1145 machine_mode, rtx);
1146 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1147 machine_mode,
1148 rtx);
1149 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1150 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1151 enum reg_class);
1152 static bool rs6000_secondary_memory_needed (enum reg_class, enum reg_class,
1153 machine_mode);
1154 static bool rs6000_debug_secondary_memory_needed (enum reg_class,
1155 enum reg_class,
1156 machine_mode);
1157 static bool rs6000_cannot_change_mode_class (machine_mode,
1158 machine_mode,
1159 enum reg_class);
1160 static bool rs6000_debug_cannot_change_mode_class (machine_mode,
1161 machine_mode,
1162 enum reg_class);
1163 static bool rs6000_save_toc_in_prologue_p (void);
1165 rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int,
1166 int, int *)
1167 = rs6000_legitimize_reload_address;
1169 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1170 = rs6000_mode_dependent_address;
1172 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1173 machine_mode, rtx)
1174 = rs6000_secondary_reload_class;
1176 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1177 = rs6000_preferred_reload_class;
1179 bool (*rs6000_secondary_memory_needed_ptr) (enum reg_class, enum reg_class,
1180 machine_mode)
1181 = rs6000_secondary_memory_needed;
1183 bool (*rs6000_cannot_change_mode_class_ptr) (machine_mode,
1184 machine_mode,
1185 enum reg_class)
1186 = rs6000_cannot_change_mode_class;
1188 const int INSN_NOT_AVAILABLE = -1;
1190 static void rs6000_print_isa_options (FILE *, int, const char *,
1191 HOST_WIDE_INT);
1192 static void rs6000_print_builtin_options (FILE *, int, const char *,
1193 HOST_WIDE_INT);
1195 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1196 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1197 enum rs6000_reg_type,
1198 machine_mode,
1199 secondary_reload_info *,
1200 bool);
1201 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1203 /* Hash table stuff for keeping track of TOC entries. */
1205 struct GTY((for_user)) toc_hash_struct
1207 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1208 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1209 rtx key;
1210 machine_mode key_mode;
1211 int labelno;
1214 struct toc_hasher : ggc_hasher<toc_hash_struct *>
1216 static hashval_t hash (toc_hash_struct *);
1217 static bool equal (toc_hash_struct *, toc_hash_struct *);
1220 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1222 /* Hash table to keep track of the argument types for builtin functions. */
1224 struct GTY((for_user)) builtin_hash_struct
1226 tree type;
1227 machine_mode mode[4]; /* return value + 3 arguments. */
1228 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1231 struct builtin_hasher : ggc_hasher<builtin_hash_struct *>
1233 static hashval_t hash (builtin_hash_struct *);
1234 static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1237 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1240 /* Default register names. */
1241 char rs6000_reg_names[][8] =
1243 "0", "1", "2", "3", "4", "5", "6", "7",
1244 "8", "9", "10", "11", "12", "13", "14", "15",
1245 "16", "17", "18", "19", "20", "21", "22", "23",
1246 "24", "25", "26", "27", "28", "29", "30", "31",
1247 "0", "1", "2", "3", "4", "5", "6", "7",
1248 "8", "9", "10", "11", "12", "13", "14", "15",
1249 "16", "17", "18", "19", "20", "21", "22", "23",
1250 "24", "25", "26", "27", "28", "29", "30", "31",
1251 "mq", "lr", "ctr","ap",
1252 "0", "1", "2", "3", "4", "5", "6", "7",
1253 "ca",
1254 /* AltiVec registers. */
1255 "0", "1", "2", "3", "4", "5", "6", "7",
1256 "8", "9", "10", "11", "12", "13", "14", "15",
1257 "16", "17", "18", "19", "20", "21", "22", "23",
1258 "24", "25", "26", "27", "28", "29", "30", "31",
1259 "vrsave", "vscr",
1260 /* SPE registers. */
1261 "spe_acc", "spefscr",
1262 /* Soft frame pointer. */
1263 "sfp",
1264 /* HTM SPR registers. */
1265 "tfhar", "tfiar", "texasr",
1266 /* SPE High registers. */
1267 "0", "1", "2", "3", "4", "5", "6", "7",
1268 "8", "9", "10", "11", "12", "13", "14", "15",
1269 "16", "17", "18", "19", "20", "21", "22", "23",
1270 "24", "25", "26", "27", "28", "29", "30", "31"
1273 #ifdef TARGET_REGNAMES
1274 static const char alt_reg_names[][8] =
1276 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1277 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1278 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1279 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1280 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1281 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1282 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1283 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1284 "mq", "lr", "ctr", "ap",
1285 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1286 "ca",
1287 /* AltiVec registers. */
1288 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1289 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1290 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1291 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1292 "vrsave", "vscr",
1293 /* SPE registers. */
1294 "spe_acc", "spefscr",
1295 /* Soft frame pointer. */
1296 "sfp",
1297 /* HTM SPR registers. */
1298 "tfhar", "tfiar", "texasr",
1299 /* SPE High registers. */
1300 "%rh0", "%rh1", "%rh2", "%rh3", "%rh4", "%rh5", "%rh6", "%rh7",
1301 "%rh8", "%rh9", "%rh10", "%r11", "%rh12", "%rh13", "%rh14", "%rh15",
1302 "%rh16", "%rh17", "%rh18", "%rh19", "%rh20", "%rh21", "%rh22", "%rh23",
1303 "%rh24", "%rh25", "%rh26", "%rh27", "%rh28", "%rh29", "%rh30", "%rh31"
1305 #endif
1307 /* Table of valid machine attributes. */
1309 static const struct attribute_spec rs6000_attribute_table[] =
1311 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
1312 affects_type_identity } */
1313 { "altivec", 1, 1, false, true, false, rs6000_handle_altivec_attribute,
1314 false },
1315 { "longcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1316 false },
1317 { "shortcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1318 false },
1319 { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1320 false },
1321 { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1322 false },
1323 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1324 SUBTARGET_ATTRIBUTE_TABLE,
1325 #endif
1326 { NULL, 0, 0, false, false, false, NULL, false }
1329 #ifndef TARGET_PROFILE_KERNEL
1330 #define TARGET_PROFILE_KERNEL 0
1331 #endif
1333 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1334 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1336 /* Initialize the GCC target structure. */
1337 #undef TARGET_ATTRIBUTE_TABLE
1338 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1339 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1340 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1341 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1342 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1344 #undef TARGET_ASM_ALIGNED_DI_OP
1345 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1347 /* Default unaligned ops are only provided for ELF. Find the ops needed
1348 for non-ELF systems. */
1349 #ifndef OBJECT_FORMAT_ELF
1350 #if TARGET_XCOFF
1351 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1352 64-bit targets. */
1353 #undef TARGET_ASM_UNALIGNED_HI_OP
1354 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1355 #undef TARGET_ASM_UNALIGNED_SI_OP
1356 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1357 #undef TARGET_ASM_UNALIGNED_DI_OP
1358 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1359 #else
1360 /* For Darwin. */
1361 #undef TARGET_ASM_UNALIGNED_HI_OP
1362 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1363 #undef TARGET_ASM_UNALIGNED_SI_OP
1364 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1365 #undef TARGET_ASM_UNALIGNED_DI_OP
1366 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1367 #undef TARGET_ASM_ALIGNED_DI_OP
1368 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1369 #endif
1370 #endif
1372 /* This hook deals with fixups for relocatable code and DI-mode objects
1373 in 64-bit code. */
1374 #undef TARGET_ASM_INTEGER
1375 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1377 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1378 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1379 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1380 #endif
1382 #undef TARGET_SET_UP_BY_PROLOGUE
1383 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1385 #undef TARGET_HAVE_TLS
1386 #define TARGET_HAVE_TLS HAVE_AS_TLS
1388 #undef TARGET_CANNOT_FORCE_CONST_MEM
1389 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1391 #undef TARGET_DELEGITIMIZE_ADDRESS
1392 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1394 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1395 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1397 #undef TARGET_ASM_FUNCTION_PROLOGUE
1398 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1399 #undef TARGET_ASM_FUNCTION_EPILOGUE
1400 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1402 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1403 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1405 #undef TARGET_LEGITIMIZE_ADDRESS
1406 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1408 #undef TARGET_SCHED_VARIABLE_ISSUE
1409 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1411 #undef TARGET_SCHED_ISSUE_RATE
1412 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1413 #undef TARGET_SCHED_ADJUST_COST
1414 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1415 #undef TARGET_SCHED_ADJUST_PRIORITY
1416 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1417 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1418 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1419 #undef TARGET_SCHED_INIT
1420 #define TARGET_SCHED_INIT rs6000_sched_init
1421 #undef TARGET_SCHED_FINISH
1422 #define TARGET_SCHED_FINISH rs6000_sched_finish
1423 #undef TARGET_SCHED_REORDER
1424 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1425 #undef TARGET_SCHED_REORDER2
1426 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1428 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1429 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1431 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1432 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1434 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1435 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1436 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1437 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1438 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1439 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1440 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1441 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1443 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1444 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1445 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1446 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1447 rs6000_builtin_support_vector_misalignment
1448 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1449 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1450 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1451 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1452 rs6000_builtin_vectorization_cost
1453 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1454 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1455 rs6000_preferred_simd_mode
1456 #undef TARGET_VECTORIZE_INIT_COST
1457 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1458 #undef TARGET_VECTORIZE_ADD_STMT_COST
1459 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1460 #undef TARGET_VECTORIZE_FINISH_COST
1461 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1462 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1463 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1465 #undef TARGET_INIT_BUILTINS
1466 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1467 #undef TARGET_BUILTIN_DECL
1468 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1470 #undef TARGET_EXPAND_BUILTIN
1471 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1473 #undef TARGET_MANGLE_TYPE
1474 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1476 #undef TARGET_INIT_LIBFUNCS
1477 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1479 #if TARGET_MACHO
1480 #undef TARGET_BINDS_LOCAL_P
1481 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1482 #endif
1484 #undef TARGET_MS_BITFIELD_LAYOUT_P
1485 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1487 #undef TARGET_ASM_OUTPUT_MI_THUNK
1488 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1490 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1491 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1493 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1494 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1496 #undef TARGET_REGISTER_MOVE_COST
1497 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1498 #undef TARGET_MEMORY_MOVE_COST
1499 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1500 #undef TARGET_RTX_COSTS
1501 #define TARGET_RTX_COSTS rs6000_rtx_costs
1502 #undef TARGET_ADDRESS_COST
1503 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1505 #undef TARGET_DWARF_REGISTER_SPAN
1506 #define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span
1508 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1509 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1511 #undef TARGET_MEMBER_TYPE_FORCES_BLK
1512 #define TARGET_MEMBER_TYPE_FORCES_BLK rs6000_member_type_forces_blk
1514 #undef TARGET_PROMOTE_FUNCTION_MODE
1515 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1517 #undef TARGET_RETURN_IN_MEMORY
1518 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1520 #undef TARGET_RETURN_IN_MSB
1521 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1523 #undef TARGET_SETUP_INCOMING_VARARGS
1524 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1526 /* Always strict argument naming on rs6000. */
1527 #undef TARGET_STRICT_ARGUMENT_NAMING
1528 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1529 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1530 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1531 #undef TARGET_SPLIT_COMPLEX_ARG
1532 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1533 #undef TARGET_MUST_PASS_IN_STACK
1534 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1535 #undef TARGET_PASS_BY_REFERENCE
1536 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1537 #undef TARGET_ARG_PARTIAL_BYTES
1538 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1539 #undef TARGET_FUNCTION_ARG_ADVANCE
1540 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1541 #undef TARGET_FUNCTION_ARG
1542 #define TARGET_FUNCTION_ARG rs6000_function_arg
1543 #undef TARGET_FUNCTION_ARG_BOUNDARY
1544 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1546 #undef TARGET_BUILD_BUILTIN_VA_LIST
1547 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1549 #undef TARGET_EXPAND_BUILTIN_VA_START
1550 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1552 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1553 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1555 #undef TARGET_EH_RETURN_FILTER_MODE
1556 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1558 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1559 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1561 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1562 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1564 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1565 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1567 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
1568 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
1570 #undef TARGET_MD_ASM_CLOBBERS
1571 #define TARGET_MD_ASM_CLOBBERS rs6000_md_asm_clobbers
1573 #undef TARGET_OPTION_OVERRIDE
1574 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1576 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1577 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1578 rs6000_builtin_vectorized_function
1580 #if !TARGET_MACHO
1581 #undef TARGET_STACK_PROTECT_FAIL
1582 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1583 #endif
1585 /* MPC604EUM 3.5.2 Weak Consistency between Multiple Processors
1586 The PowerPC architecture requires only weak consistency among
1587 processors--that is, memory accesses between processors need not be
1588 sequentially consistent and memory accesses among processors can occur
1589 in any order. The ability to order memory accesses weakly provides
1590 opportunities for more efficient use of the system bus. Unless a
1591 dependency exists, the 604e allows read operations to precede store
1592 operations. */
1593 #undef TARGET_RELAXED_ORDERING
1594 #define TARGET_RELAXED_ORDERING true
1596 #ifdef HAVE_AS_TLS
1597 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1598 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1599 #endif
1601 /* Use a 32-bit anchor range. This leads to sequences like:
1603 addis tmp,anchor,high
1604 add dest,tmp,low
1606 where tmp itself acts as an anchor, and can be shared between
1607 accesses to the same 64k page. */
1608 #undef TARGET_MIN_ANCHOR_OFFSET
1609 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1610 #undef TARGET_MAX_ANCHOR_OFFSET
1611 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1612 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1613 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1614 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1615 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1617 #undef TARGET_BUILTIN_RECIPROCAL
1618 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1620 #undef TARGET_EXPAND_TO_RTL_HOOK
1621 #define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot
1623 #undef TARGET_INSTANTIATE_DECLS
1624 #define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
1626 #undef TARGET_SECONDARY_RELOAD
1627 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1629 #undef TARGET_LEGITIMATE_ADDRESS_P
1630 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1632 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1633 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1635 #undef TARGET_LRA_P
1636 #define TARGET_LRA_P rs6000_lra_p
1638 #undef TARGET_CAN_ELIMINATE
1639 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1641 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1642 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1644 #undef TARGET_TRAMPOLINE_INIT
1645 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1647 #undef TARGET_FUNCTION_VALUE
1648 #define TARGET_FUNCTION_VALUE rs6000_function_value
1650 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1651 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1653 #undef TARGET_OPTION_SAVE
1654 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1656 #undef TARGET_OPTION_RESTORE
1657 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1659 #undef TARGET_OPTION_PRINT
1660 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1662 #undef TARGET_CAN_INLINE_P
1663 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1665 #undef TARGET_SET_CURRENT_FUNCTION
1666 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1668 #undef TARGET_LEGITIMATE_CONSTANT_P
1669 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1671 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
1672 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok
1674 #undef TARGET_CAN_USE_DOLOOP_P
1675 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1677 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1678 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1680 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1681 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1682 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1683 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1684 #undef TARGET_UNWIND_WORD_MODE
1685 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1688 /* Processor table. */
1689 struct rs6000_ptt
1691 const char *const name; /* Canonical processor name. */
1692 const enum processor_type processor; /* Processor type enum value. */
1693 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1696 static struct rs6000_ptt const processor_target_table[] =
1698 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1699 #include "rs6000-cpus.def"
1700 #undef RS6000_CPU
1703 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1704 name is invalid. */
1706 static int
1707 rs6000_cpu_name_lookup (const char *name)
1709 size_t i;
1711 if (name != NULL)
1713 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1714 if (! strcmp (name, processor_target_table[i].name))
1715 return (int)i;
1718 return -1;
1722 /* Return number of consecutive hard regs needed starting at reg REGNO
1723 to hold something of mode MODE.
1724 This is ordinarily the length in words of a value of mode MODE
1725 but can be less for certain modes in special long registers.
1727 For the SPE, GPRs are 64 bits but only 32 bits are visible in
1728 scalar instructions. The upper 32 bits are only available to the
1729 SIMD instructions.
1731 POWER and PowerPC GPRs hold 32 bits worth;
1732 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1734 static int
1735 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1737 unsigned HOST_WIDE_INT reg_size;
1739 /* TF/TD modes are special in that they always take 2 registers. */
1740 if (FP_REGNO_P (regno))
1741 reg_size = ((VECTOR_MEM_VSX_P (mode) && mode != TDmode && mode != TFmode)
1742 ? UNITS_PER_VSX_WORD
1743 : UNITS_PER_FP_WORD);
1745 else if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1746 reg_size = UNITS_PER_SPE_WORD;
1748 else if (ALTIVEC_REGNO_P (regno))
1749 reg_size = UNITS_PER_ALTIVEC_WORD;
1751 /* The value returned for SCmode in the E500 double case is 2 for
1752 ABI compatibility; storing an SCmode value in a single register
1753 would require function_arg and rs6000_spe_function_arg to handle
1754 SCmode so as to pass the value correctly in a pair of
1755 registers. */
1756 else if (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) && mode != SCmode
1757 && !DECIMAL_FLOAT_MODE_P (mode) && SPE_SIMD_REGNO_P (regno))
1758 reg_size = UNITS_PER_FP_WORD;
1760 else
1761 reg_size = UNITS_PER_WORD;
1763 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1766 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1767 MODE. */
1768 static int
1769 rs6000_hard_regno_mode_ok (int regno, machine_mode mode)
1771 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1773 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1774 register combinations, and use PTImode where we need to deal with quad
1775 word memory operations. Don't allow quad words in the argument or frame
1776 pointer registers, just registers 0..31. */
1777 if (mode == PTImode)
1778 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1779 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1780 && ((regno & 1) == 0));
1782 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1783 implementations. Don't allow an item to be split between a FP register
1784 and an Altivec register. Allow TImode in all VSX registers if the user
1785 asked for it. */
1786 if (TARGET_VSX && VSX_REGNO_P (regno)
1787 && (VECTOR_MEM_VSX_P (mode)
1788 || reg_addr[mode].scalar_in_vmx_p
1789 || (TARGET_VSX_TIMODE && mode == TImode)
1790 || (TARGET_VADDUQM && mode == V1TImode)))
1792 if (FP_REGNO_P (regno))
1793 return FP_REGNO_P (last_regno);
1795 if (ALTIVEC_REGNO_P (regno))
1797 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
1798 return 0;
1800 return ALTIVEC_REGNO_P (last_regno);
1804 /* The GPRs can hold any mode, but values bigger than one register
1805 cannot go past R31. */
1806 if (INT_REGNO_P (regno))
1807 return INT_REGNO_P (last_regno);
1809 /* The float registers (except for VSX vector modes) can only hold floating
1810 modes and DImode. */
1811 if (FP_REGNO_P (regno))
1813 if (SCALAR_FLOAT_MODE_P (mode)
1814 && (mode != TDmode || (regno % 2) == 0)
1815 && FP_REGNO_P (last_regno))
1816 return 1;
1818 if (GET_MODE_CLASS (mode) == MODE_INT
1819 && GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1820 return 1;
1822 if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT
1823 && PAIRED_VECTOR_MODE (mode))
1824 return 1;
1826 return 0;
1829 /* The CR register can only hold CC modes. */
1830 if (CR_REGNO_P (regno))
1831 return GET_MODE_CLASS (mode) == MODE_CC;
1833 if (CA_REGNO_P (regno))
1834 return mode == Pmode || mode == SImode;
1836 /* AltiVec only in AldyVec registers. */
1837 if (ALTIVEC_REGNO_P (regno))
1838 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1839 || mode == V1TImode);
1841 /* ...but GPRs can hold SIMD data on the SPE in one register. */
1842 if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1843 return 1;
1845 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1846 and it must be able to fit within the register set. */
1848 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1851 /* Print interesting facts about registers. */
1852 static void
1853 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
1855 int r, m;
1857 for (r = first_regno; r <= last_regno; ++r)
1859 const char *comma = "";
1860 int len;
1862 if (first_regno == last_regno)
1863 fprintf (stderr, "%s:\t", reg_name);
1864 else
1865 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
1867 len = 8;
1868 for (m = 0; m < NUM_MACHINE_MODES; ++m)
1869 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
1871 if (len > 70)
1873 fprintf (stderr, ",\n\t");
1874 len = 8;
1875 comma = "";
1878 if (rs6000_hard_regno_nregs[m][r] > 1)
1879 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
1880 rs6000_hard_regno_nregs[m][r]);
1881 else
1882 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
1884 comma = ", ";
1887 if (call_used_regs[r])
1889 if (len > 70)
1891 fprintf (stderr, ",\n\t");
1892 len = 8;
1893 comma = "";
1896 len += fprintf (stderr, "%s%s", comma, "call-used");
1897 comma = ", ";
1900 if (fixed_regs[r])
1902 if (len > 70)
1904 fprintf (stderr, ",\n\t");
1905 len = 8;
1906 comma = "";
1909 len += fprintf (stderr, "%s%s", comma, "fixed");
1910 comma = ", ";
1913 if (len > 70)
1915 fprintf (stderr, ",\n\t");
1916 comma = "";
1919 len += fprintf (stderr, "%sreg-class = %s", comma,
1920 reg_class_names[(int)rs6000_regno_regclass[r]]);
1921 comma = ", ";
1923 if (len > 70)
1925 fprintf (stderr, ",\n\t");
1926 comma = "";
1929 fprintf (stderr, "%sregno = %d\n", comma, r);
1933 static const char *
1934 rs6000_debug_vector_unit (enum rs6000_vector v)
1936 const char *ret;
1938 switch (v)
1940 case VECTOR_NONE: ret = "none"; break;
1941 case VECTOR_ALTIVEC: ret = "altivec"; break;
1942 case VECTOR_VSX: ret = "vsx"; break;
1943 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
1944 case VECTOR_PAIRED: ret = "paired"; break;
1945 case VECTOR_SPE: ret = "spe"; break;
1946 case VECTOR_OTHER: ret = "other"; break;
1947 default: ret = "unknown"; break;
1950 return ret;
1953 /* Inner function printing just the address mask for a particular reload
1954 register class. */
1955 DEBUG_FUNCTION char *
1956 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
1958 static char ret[8];
1959 char *p = ret;
1961 if ((mask & RELOAD_REG_VALID) != 0)
1962 *p++ = 'v';
1963 else if (keep_spaces)
1964 *p++ = ' ';
1966 if ((mask & RELOAD_REG_MULTIPLE) != 0)
1967 *p++ = 'm';
1968 else if (keep_spaces)
1969 *p++ = ' ';
1971 if ((mask & RELOAD_REG_INDEXED) != 0)
1972 *p++ = 'i';
1973 else if (keep_spaces)
1974 *p++ = ' ';
1976 if ((mask & RELOAD_REG_OFFSET) != 0)
1977 *p++ = 'o';
1978 else if (keep_spaces)
1979 *p++ = ' ';
1981 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
1982 *p++ = '+';
1983 else if (keep_spaces)
1984 *p++ = ' ';
1986 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
1987 *p++ = '+';
1988 else if (keep_spaces)
1989 *p++ = ' ';
1991 if ((mask & RELOAD_REG_AND_M16) != 0)
1992 *p++ = '&';
1993 else if (keep_spaces)
1994 *p++ = ' ';
1996 *p = '\0';
1998 return ret;
2001 /* Print the address masks in a human readble fashion. */
2002 DEBUG_FUNCTION void
2003 rs6000_debug_print_mode (ssize_t m)
2005 ssize_t rc;
2007 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2008 for (rc = 0; rc < N_RELOAD_REG; rc++)
2009 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2010 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2012 if (rs6000_vector_unit[m] != VECTOR_NONE
2013 || rs6000_vector_mem[m] != VECTOR_NONE
2014 || (reg_addr[m].reload_store != CODE_FOR_nothing)
2015 || (reg_addr[m].reload_load != CODE_FOR_nothing)
2016 || reg_addr[m].scalar_in_vmx_p)
2018 fprintf (stderr,
2019 " Vector-arith=%-10s Vector-mem=%-10s Reload=%c%c Upper=%c",
2020 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2021 rs6000_debug_vector_unit (rs6000_vector_mem[m]),
2022 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2023 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*',
2024 (reg_addr[m].scalar_in_vmx_p) ? 'y' : 'n');
2027 fputs ("\n", stderr);
2030 #define DEBUG_FMT_ID "%-32s= "
2031 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2032 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2033 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2035 /* Print various interesting information with -mdebug=reg. */
2036 static void
2037 rs6000_debug_reg_global (void)
2039 static const char *const tf[2] = { "false", "true" };
2040 const char *nl = (const char *)0;
2041 int m;
2042 size_t m1, m2, v;
2043 char costly_num[20];
2044 char nop_num[20];
2045 char flags_buffer[40];
2046 const char *costly_str;
2047 const char *nop_str;
2048 const char *trace_str;
2049 const char *abi_str;
2050 const char *cmodel_str;
2051 struct cl_target_option cl_opts;
2053 /* Modes we want tieable information on. */
2054 static const machine_mode print_tieable_modes[] = {
2055 QImode,
2056 HImode,
2057 SImode,
2058 DImode,
2059 TImode,
2060 PTImode,
2061 SFmode,
2062 DFmode,
2063 TFmode,
2064 SDmode,
2065 DDmode,
2066 TDmode,
2067 V8QImode,
2068 V4HImode,
2069 V2SImode,
2070 V16QImode,
2071 V8HImode,
2072 V4SImode,
2073 V2DImode,
2074 V1TImode,
2075 V32QImode,
2076 V16HImode,
2077 V8SImode,
2078 V4DImode,
2079 V2TImode,
2080 V2SFmode,
2081 V4SFmode,
2082 V2DFmode,
2083 V8SFmode,
2084 V4DFmode,
2085 CCmode,
2086 CCUNSmode,
2087 CCEQmode,
2090 /* Virtual regs we are interested in. */
2091 const static struct {
2092 int regno; /* register number. */
2093 const char *name; /* register name. */
2094 } virtual_regs[] = {
2095 { STACK_POINTER_REGNUM, "stack pointer:" },
2096 { TOC_REGNUM, "toc: " },
2097 { STATIC_CHAIN_REGNUM, "static chain: " },
2098 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2099 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2100 { ARG_POINTER_REGNUM, "arg pointer: " },
2101 { FRAME_POINTER_REGNUM, "frame pointer:" },
2102 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2103 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2104 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2105 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2106 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2107 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2108 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2109 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2110 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2113 fputs ("\nHard register information:\n", stderr);
2114 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2115 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2116 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2117 LAST_ALTIVEC_REGNO,
2118 "vs");
2119 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2120 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2121 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2122 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2123 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2124 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2125 rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a");
2126 rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f");
2128 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2129 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2130 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2132 fprintf (stderr,
2133 "\n"
2134 "d reg_class = %s\n"
2135 "f reg_class = %s\n"
2136 "v reg_class = %s\n"
2137 "wa reg_class = %s\n"
2138 "wd reg_class = %s\n"
2139 "wf reg_class = %s\n"
2140 "wg reg_class = %s\n"
2141 "wh reg_class = %s\n"
2142 "wi reg_class = %s\n"
2143 "wj reg_class = %s\n"
2144 "wk reg_class = %s\n"
2145 "wl reg_class = %s\n"
2146 "wm reg_class = %s\n"
2147 "wr reg_class = %s\n"
2148 "ws reg_class = %s\n"
2149 "wt reg_class = %s\n"
2150 "wu reg_class = %s\n"
2151 "wv reg_class = %s\n"
2152 "ww reg_class = %s\n"
2153 "wx reg_class = %s\n"
2154 "wy reg_class = %s\n"
2155 "wz reg_class = %s\n"
2156 "\n",
2157 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2158 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2159 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2160 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2161 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
2162 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
2163 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
2164 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]],
2165 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]],
2166 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]],
2167 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]],
2168 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
2169 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
2170 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2171 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
2172 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
2173 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
2174 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
2175 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
2176 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2177 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
2178 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]]);
2180 nl = "\n";
2181 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2182 rs6000_debug_print_mode (m);
2184 fputs ("\n", stderr);
2186 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2188 machine_mode mode1 = print_tieable_modes[m1];
2189 bool first_time = true;
2191 nl = (const char *)0;
2192 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2194 machine_mode mode2 = print_tieable_modes[m2];
2195 if (mode1 != mode2 && MODES_TIEABLE_P (mode1, mode2))
2197 if (first_time)
2199 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2200 nl = "\n";
2201 first_time = false;
2204 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2208 if (!first_time)
2209 fputs ("\n", stderr);
2212 if (nl)
2213 fputs (nl, stderr);
2215 if (rs6000_recip_control)
2217 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2219 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2220 if (rs6000_recip_bits[m])
2222 fprintf (stderr,
2223 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2224 GET_MODE_NAME (m),
2225 (RS6000_RECIP_AUTO_RE_P (m)
2226 ? "auto"
2227 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2228 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2229 ? "auto"
2230 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2233 fputs ("\n", stderr);
2236 if (rs6000_cpu_index >= 0)
2238 const char *name = processor_target_table[rs6000_cpu_index].name;
2239 HOST_WIDE_INT flags
2240 = processor_target_table[rs6000_cpu_index].target_enable;
2242 sprintf (flags_buffer, "-mcpu=%s flags", name);
2243 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2245 else
2246 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2248 if (rs6000_tune_index >= 0)
2250 const char *name = processor_target_table[rs6000_tune_index].name;
2251 HOST_WIDE_INT flags
2252 = processor_target_table[rs6000_tune_index].target_enable;
2254 sprintf (flags_buffer, "-mtune=%s flags", name);
2255 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2257 else
2258 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2260 cl_target_option_save (&cl_opts, &global_options);
2261 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2262 rs6000_isa_flags);
2264 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2265 rs6000_isa_flags_explicit);
2267 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2268 rs6000_builtin_mask);
2270 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2272 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2273 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2275 switch (rs6000_sched_costly_dep)
2277 case max_dep_latency:
2278 costly_str = "max_dep_latency";
2279 break;
2281 case no_dep_costly:
2282 costly_str = "no_dep_costly";
2283 break;
2285 case all_deps_costly:
2286 costly_str = "all_deps_costly";
2287 break;
2289 case true_store_to_load_dep_costly:
2290 costly_str = "true_store_to_load_dep_costly";
2291 break;
2293 case store_to_load_dep_costly:
2294 costly_str = "store_to_load_dep_costly";
2295 break;
2297 default:
2298 costly_str = costly_num;
2299 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2300 break;
2303 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2305 switch (rs6000_sched_insert_nops)
2307 case sched_finish_regroup_exact:
2308 nop_str = "sched_finish_regroup_exact";
2309 break;
2311 case sched_finish_pad_groups:
2312 nop_str = "sched_finish_pad_groups";
2313 break;
2315 case sched_finish_none:
2316 nop_str = "sched_finish_none";
2317 break;
2319 default:
2320 nop_str = nop_num;
2321 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2322 break;
2325 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2327 switch (rs6000_sdata)
2329 default:
2330 case SDATA_NONE:
2331 break;
2333 case SDATA_DATA:
2334 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2335 break;
2337 case SDATA_SYSV:
2338 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2339 break;
2341 case SDATA_EABI:
2342 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2343 break;
2347 switch (rs6000_traceback)
2349 case traceback_default: trace_str = "default"; break;
2350 case traceback_none: trace_str = "none"; break;
2351 case traceback_part: trace_str = "part"; break;
2352 case traceback_full: trace_str = "full"; break;
2353 default: trace_str = "unknown"; break;
2356 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2358 switch (rs6000_current_cmodel)
2360 case CMODEL_SMALL: cmodel_str = "small"; break;
2361 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2362 case CMODEL_LARGE: cmodel_str = "large"; break;
2363 default: cmodel_str = "unknown"; break;
2366 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2368 switch (rs6000_current_abi)
2370 case ABI_NONE: abi_str = "none"; break;
2371 case ABI_AIX: abi_str = "aix"; break;
2372 case ABI_ELFv2: abi_str = "ELFv2"; break;
2373 case ABI_V4: abi_str = "V4"; break;
2374 case ABI_DARWIN: abi_str = "darwin"; break;
2375 default: abi_str = "unknown"; break;
2378 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2380 if (rs6000_altivec_abi)
2381 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2383 if (rs6000_spe_abi)
2384 fprintf (stderr, DEBUG_FMT_S, "spe_abi", "true");
2386 if (rs6000_darwin64_abi)
2387 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2389 if (rs6000_float_gprs)
2390 fprintf (stderr, DEBUG_FMT_S, "float_gprs", "true");
2392 fprintf (stderr, DEBUG_FMT_S, "fprs",
2393 (TARGET_FPRS ? "true" : "false"));
2395 fprintf (stderr, DEBUG_FMT_S, "single_float",
2396 (TARGET_SINGLE_FLOAT ? "true" : "false"));
2398 fprintf (stderr, DEBUG_FMT_S, "double_float",
2399 (TARGET_DOUBLE_FLOAT ? "true" : "false"));
2401 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2402 (TARGET_SOFT_FLOAT ? "true" : "false"));
2404 fprintf (stderr, DEBUG_FMT_S, "e500_single",
2405 (TARGET_E500_SINGLE ? "true" : "false"));
2407 fprintf (stderr, DEBUG_FMT_S, "e500_double",
2408 (TARGET_E500_DOUBLE ? "true" : "false"));
2410 if (TARGET_LINK_STACK)
2411 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2413 if (targetm.lra_p ())
2414 fprintf (stderr, DEBUG_FMT_S, "lra", "true");
2416 if (TARGET_P8_FUSION)
2417 fprintf (stderr, DEBUG_FMT_S, "p8 fusion",
2418 (TARGET_P8_FUSION_SIGN) ? "zero+sign" : "zero");
2420 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2421 TARGET_SECURE_PLT ? "secure" : "bss");
2422 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2423 aix_struct_return ? "aix" : "sysv");
2424 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2425 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2426 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2427 tf[!!rs6000_align_branch_targets]);
2428 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2429 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2430 rs6000_long_double_type_size);
2431 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2432 (int)rs6000_sched_restricted_insns_priority);
2433 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2434 (int)END_BUILTINS);
2435 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2436 (int)RS6000_BUILTIN_COUNT);
2438 if (TARGET_VSX)
2439 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2440 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2444 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2445 legitimate address support to figure out the appropriate addressing to
2446 use. */
2448 static void
2449 rs6000_setup_reg_addr_masks (void)
2451 ssize_t rc, reg, m, nregs;
2452 addr_mask_type any_addr_mask, addr_mask;
2454 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2456 machine_mode m2 = (machine_mode)m;
2458 /* SDmode is special in that we want to access it only via REG+REG
2459 addressing on power7 and above, since we want to use the LFIWZX and
2460 STFIWZX instructions to load it. */
2461 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2463 any_addr_mask = 0;
2464 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2466 addr_mask = 0;
2467 reg = reload_reg_map[rc].reg;
2469 /* Can mode values go in the GPR/FPR/Altivec registers? */
2470 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2472 nregs = rs6000_hard_regno_nregs[m][reg];
2473 addr_mask |= RELOAD_REG_VALID;
2475 /* Indicate if the mode takes more than 1 physical register. If
2476 it takes a single register, indicate it can do REG+REG
2477 addressing. */
2478 if (nregs > 1 || m == BLKmode)
2479 addr_mask |= RELOAD_REG_MULTIPLE;
2480 else
2481 addr_mask |= RELOAD_REG_INDEXED;
2483 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2484 addressing. Restrict addressing on SPE for 64-bit types
2485 because of the SUBREG hackery used to address 64-bit floats in
2486 '32-bit' GPRs. */
2488 if (TARGET_UPDATE
2489 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2490 && GET_MODE_SIZE (m2) <= 8
2491 && !VECTOR_MODE_P (m2)
2492 && !COMPLEX_MODE_P (m2)
2493 && !indexed_only_p
2494 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (m2) == 8))
2496 addr_mask |= RELOAD_REG_PRE_INCDEC;
2498 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2499 we don't allow PRE_MODIFY for some multi-register
2500 operations. */
2501 switch (m)
2503 default:
2504 addr_mask |= RELOAD_REG_PRE_MODIFY;
2505 break;
2507 case DImode:
2508 if (TARGET_POWERPC64)
2509 addr_mask |= RELOAD_REG_PRE_MODIFY;
2510 break;
2512 case DFmode:
2513 case DDmode:
2514 if (TARGET_DF_INSN)
2515 addr_mask |= RELOAD_REG_PRE_MODIFY;
2516 break;
2521 /* GPR and FPR registers can do REG+OFFSET addressing, except
2522 possibly for SDmode. */
2523 if ((addr_mask != 0) && !indexed_only_p
2524 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR))
2525 addr_mask |= RELOAD_REG_OFFSET;
2527 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2528 addressing on 128-bit types. */
2529 if (rc == RELOAD_REG_VMX && GET_MODE_SIZE (m2) == 16
2530 && (addr_mask & RELOAD_REG_VALID) != 0)
2531 addr_mask |= RELOAD_REG_AND_M16;
2533 reg_addr[m].addr_mask[rc] = addr_mask;
2534 any_addr_mask |= addr_mask;
2537 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2542 /* Initialize the various global tables that are based on register size. */
2543 static void
2544 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2546 ssize_t r, m, c;
2547 int align64;
2548 int align32;
2550 /* Precalculate REGNO_REG_CLASS. */
2551 rs6000_regno_regclass[0] = GENERAL_REGS;
2552 for (r = 1; r < 32; ++r)
2553 rs6000_regno_regclass[r] = BASE_REGS;
2555 for (r = 32; r < 64; ++r)
2556 rs6000_regno_regclass[r] = FLOAT_REGS;
2558 for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r)
2559 rs6000_regno_regclass[r] = NO_REGS;
2561 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2562 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2564 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2565 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2566 rs6000_regno_regclass[r] = CR_REGS;
2568 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2569 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2570 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2571 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2572 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2573 rs6000_regno_regclass[SPE_ACC_REGNO] = SPE_ACC_REGS;
2574 rs6000_regno_regclass[SPEFSCR_REGNO] = SPEFSCR_REGS;
2575 rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
2576 rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
2577 rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
2578 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2579 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2581 /* Precalculate register class to simpler reload register class. We don't
2582 need all of the register classes that are combinations of different
2583 classes, just the simple ones that have constraint letters. */
2584 for (c = 0; c < N_REG_CLASSES; c++)
2585 reg_class_to_reg_type[c] = NO_REG_TYPE;
2587 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2588 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2589 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2590 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2591 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2592 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2593 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2594 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2595 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2596 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2597 reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
2598 reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
2600 if (TARGET_VSX)
2602 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2603 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2605 else
2607 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2608 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2611 /* Precalculate the valid memory formats as well as the vector information,
2612 this must be set up before the rs6000_hard_regno_nregs_internal calls
2613 below. */
2614 gcc_assert ((int)VECTOR_NONE == 0);
2615 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2616 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
2618 gcc_assert ((int)CODE_FOR_nothing == 0);
2619 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2621 gcc_assert ((int)NO_REGS == 0);
2622 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2624 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2625 believes it can use native alignment or still uses 128-bit alignment. */
2626 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2628 align64 = 64;
2629 align32 = 32;
2631 else
2633 align64 = 128;
2634 align32 = 128;
2637 /* V2DF mode, VSX only. */
2638 if (TARGET_VSX)
2640 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2641 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2642 rs6000_vector_align[V2DFmode] = align64;
2645 /* V4SF mode, either VSX or Altivec. */
2646 if (TARGET_VSX)
2648 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2649 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2650 rs6000_vector_align[V4SFmode] = align32;
2652 else if (TARGET_ALTIVEC)
2654 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2655 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2656 rs6000_vector_align[V4SFmode] = align32;
2659 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2660 and stores. */
2661 if (TARGET_ALTIVEC)
2663 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2664 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2665 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2666 rs6000_vector_align[V4SImode] = align32;
2667 rs6000_vector_align[V8HImode] = align32;
2668 rs6000_vector_align[V16QImode] = align32;
2670 if (TARGET_VSX)
2672 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2673 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2674 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2676 else
2678 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2679 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2680 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2684 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2685 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2686 if (TARGET_VSX)
2688 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2689 rs6000_vector_unit[V2DImode]
2690 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2691 rs6000_vector_align[V2DImode] = align64;
2693 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2694 rs6000_vector_unit[V1TImode]
2695 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2696 rs6000_vector_align[V1TImode] = 128;
2699 /* DFmode, see if we want to use the VSX unit. Memory is handled
2700 differently, so don't set rs6000_vector_mem. */
2701 if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE)
2703 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2704 rs6000_vector_align[DFmode] = 64;
2707 /* SFmode, see if we want to use the VSX unit. */
2708 if (TARGET_P8_VECTOR && TARGET_VSX_SCALAR_FLOAT)
2710 rs6000_vector_unit[SFmode] = VECTOR_VSX;
2711 rs6000_vector_align[SFmode] = 32;
2714 /* Allow TImode in VSX register and set the VSX memory macros. */
2715 if (TARGET_VSX && TARGET_VSX_TIMODE)
2717 rs6000_vector_mem[TImode] = VECTOR_VSX;
2718 rs6000_vector_align[TImode] = align64;
2721 /* TODO add SPE and paired floating point vector support. */
2723 /* Register class constraints for the constraints that depend on compile
2724 switches. When the VSX code was added, different constraints were added
2725 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2726 of the VSX registers are used. The register classes for scalar floating
2727 point types is set, based on whether we allow that type into the upper
2728 (Altivec) registers. GCC has register classes to target the Altivec
2729 registers for load/store operations, to select using a VSX memory
2730 operation instead of the traditional floating point operation. The
2731 constraints are:
2733 d - Register class to use with traditional DFmode instructions.
2734 f - Register class to use with traditional SFmode instructions.
2735 v - Altivec register.
2736 wa - Any VSX register.
2737 wc - Reserved to represent individual CR bits (used in LLVM).
2738 wd - Preferred register class for V2DFmode.
2739 wf - Preferred register class for V4SFmode.
2740 wg - Float register for power6x move insns.
2741 wh - FP register for direct move instructions.
2742 wi - FP or VSX register to hold 64-bit integers for VSX insns.
2743 wj - FP or VSX register to hold 64-bit integers for direct moves.
2744 wk - FP or VSX register to hold 64-bit doubles for direct moves.
2745 wl - Float register if we can do 32-bit signed int loads.
2746 wm - VSX register for ISA 2.07 direct move operations.
2747 wn - always NO_REGS.
2748 wr - GPR if 64-bit mode is permitted.
2749 ws - Register class to do ISA 2.06 DF operations.
2750 wt - VSX register for TImode in VSX registers.
2751 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
2752 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
2753 ww - Register class to do SF conversions in with VSX operations.
2754 wx - Float register if we can do 32-bit int stores.
2755 wy - Register class to do ISA 2.07 SF operations.
2756 wz - Float register if we can do 32-bit unsigned int loads. */
2758 if (TARGET_HARD_FLOAT && TARGET_FPRS)
2759 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
2761 if (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
2762 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
2764 if (TARGET_VSX)
2766 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2767 rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */
2768 rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */
2769 rs6000_constraints[RS6000_CONSTRAINT_wi] = FLOAT_REGS; /* DImode */
2771 if (TARGET_VSX_TIMODE)
2772 rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */
2774 if (TARGET_UPPER_REGS_DF) /* DFmode */
2776 rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS;
2777 rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
2779 else
2780 rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS;
2783 /* Add conditional constraints based on various options, to allow us to
2784 collapse multiple insn patterns. */
2785 if (TARGET_ALTIVEC)
2786 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
2788 if (TARGET_MFPGPR) /* DFmode */
2789 rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
2791 if (TARGET_LFIWAX)
2792 rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */
2794 if (TARGET_DIRECT_MOVE)
2796 rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS;
2797 rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */
2798 = rs6000_constraints[RS6000_CONSTRAINT_wi];
2799 rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */
2800 = rs6000_constraints[RS6000_CONSTRAINT_ws];
2801 rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
2804 if (TARGET_POWERPC64)
2805 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
2807 if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF) /* SFmode */
2809 rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
2810 rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
2811 rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
2813 else if (TARGET_P8_VECTOR)
2815 rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS;
2816 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
2818 else if (TARGET_VSX)
2819 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
2821 if (TARGET_STFIWX)
2822 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
2824 if (TARGET_LFIWZX)
2825 rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */
2827 /* Set up the reload helper and direct move functions. */
2828 if (TARGET_VSX || TARGET_ALTIVEC)
2830 if (TARGET_64BIT)
2832 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
2833 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
2834 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
2835 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
2836 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
2837 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
2838 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
2839 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
2840 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
2841 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
2842 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
2843 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
2844 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
2845 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
2846 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
2847 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
2848 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
2849 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
2850 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
2851 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
2852 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
2853 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
2855 if (TARGET_VSX_TIMODE)
2857 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
2858 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
2861 if (TARGET_DIRECT_MOVE)
2863 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
2864 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
2865 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
2866 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
2867 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
2868 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
2869 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
2870 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
2871 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
2873 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
2874 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
2875 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
2876 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
2877 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
2878 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
2879 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
2880 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
2881 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
2884 else
2886 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
2887 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
2888 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
2889 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
2890 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
2891 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
2892 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
2893 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
2894 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
2895 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
2896 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
2897 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
2898 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
2899 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
2900 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
2901 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
2902 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
2903 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
2904 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
2905 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
2906 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
2907 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
2909 if (TARGET_VSX_TIMODE)
2911 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
2912 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
2915 if (TARGET_DIRECT_MOVE)
2917 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
2918 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
2919 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
2923 if (TARGET_UPPER_REGS_DF)
2924 reg_addr[DFmode].scalar_in_vmx_p = true;
2926 if (TARGET_UPPER_REGS_SF)
2927 reg_addr[SFmode].scalar_in_vmx_p = true;
2930 /* Precalculate HARD_REGNO_NREGS. */
2931 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
2932 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2933 rs6000_hard_regno_nregs[m][r]
2934 = rs6000_hard_regno_nregs_internal (r, (machine_mode)m);
2936 /* Precalculate HARD_REGNO_MODE_OK. */
2937 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
2938 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2939 if (rs6000_hard_regno_mode_ok (r, (machine_mode)m))
2940 rs6000_hard_regno_mode_ok_p[m][r] = true;
2942 /* Precalculate CLASS_MAX_NREGS sizes. */
2943 for (c = 0; c < LIM_REG_CLASSES; ++c)
2945 int reg_size;
2947 if (TARGET_VSX && VSX_REG_CLASS_P (c))
2948 reg_size = UNITS_PER_VSX_WORD;
2950 else if (c == ALTIVEC_REGS)
2951 reg_size = UNITS_PER_ALTIVEC_WORD;
2953 else if (c == FLOAT_REGS)
2954 reg_size = UNITS_PER_FP_WORD;
2956 else
2957 reg_size = UNITS_PER_WORD;
2959 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2961 machine_mode m2 = (machine_mode)m;
2962 int reg_size2 = reg_size;
2964 /* TFmode/TDmode always takes 2 registers, even in VSX. */
2965 if (TARGET_VSX && VSX_REG_CLASS_P (c)
2966 && (m == TDmode || m == TFmode))
2967 reg_size2 = UNITS_PER_FP_WORD;
2969 rs6000_class_max_nregs[m][c]
2970 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
2974 if (TARGET_E500_DOUBLE)
2975 rs6000_class_max_nregs[DFmode][GENERAL_REGS] = 1;
2977 /* Calculate which modes to automatically generate code to use a the
2978 reciprocal divide and square root instructions. In the future, possibly
2979 automatically generate the instructions even if the user did not specify
2980 -mrecip. The older machines double precision reciprocal sqrt estimate is
2981 not accurate enough. */
2982 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
2983 if (TARGET_FRES)
2984 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
2985 if (TARGET_FRE)
2986 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
2987 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
2988 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
2989 if (VECTOR_UNIT_VSX_P (V2DFmode))
2990 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
2992 if (TARGET_FRSQRTES)
2993 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
2994 if (TARGET_FRSQRTE)
2995 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
2996 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
2997 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
2998 if (VECTOR_UNIT_VSX_P (V2DFmode))
2999 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3001 if (rs6000_recip_control)
3003 if (!flag_finite_math_only)
3004 warning (0, "-mrecip requires -ffinite-math or -ffast-math");
3005 if (flag_trapping_math)
3006 warning (0, "-mrecip requires -fno-trapping-math or -ffast-math");
3007 if (!flag_reciprocal_math)
3008 warning (0, "-mrecip requires -freciprocal-math or -ffast-math");
3009 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3011 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3012 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3013 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3015 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3016 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3017 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3019 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3020 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3021 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3023 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3024 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3025 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3027 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3028 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3029 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3031 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3032 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3033 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3035 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3036 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3037 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3039 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3040 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3041 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3045 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3046 legitimate address support to figure out the appropriate addressing to
3047 use. */
3048 rs6000_setup_reg_addr_masks ();
3050 if (global_init_p || TARGET_DEBUG_TARGET)
3052 if (TARGET_DEBUG_REG)
3053 rs6000_debug_reg_global ();
3055 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3056 fprintf (stderr,
3057 "SImode variable mult cost = %d\n"
3058 "SImode constant mult cost = %d\n"
3059 "SImode short constant mult cost = %d\n"
3060 "DImode multipliciation cost = %d\n"
3061 "SImode division cost = %d\n"
3062 "DImode division cost = %d\n"
3063 "Simple fp operation cost = %d\n"
3064 "DFmode multiplication cost = %d\n"
3065 "SFmode division cost = %d\n"
3066 "DFmode division cost = %d\n"
3067 "cache line size = %d\n"
3068 "l1 cache size = %d\n"
3069 "l2 cache size = %d\n"
3070 "simultaneous prefetches = %d\n"
3071 "\n",
3072 rs6000_cost->mulsi,
3073 rs6000_cost->mulsi_const,
3074 rs6000_cost->mulsi_const9,
3075 rs6000_cost->muldi,
3076 rs6000_cost->divsi,
3077 rs6000_cost->divdi,
3078 rs6000_cost->fp,
3079 rs6000_cost->dmul,
3080 rs6000_cost->sdiv,
3081 rs6000_cost->ddiv,
3082 rs6000_cost->cache_line_size,
3083 rs6000_cost->l1_cache_size,
3084 rs6000_cost->l2_cache_size,
3085 rs6000_cost->simultaneous_prefetches);
3089 #if TARGET_MACHO
3090 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3092 static void
3093 darwin_rs6000_override_options (void)
3095 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3096 off. */
3097 rs6000_altivec_abi = 1;
3098 TARGET_ALTIVEC_VRSAVE = 1;
3099 rs6000_current_abi = ABI_DARWIN;
3101 if (DEFAULT_ABI == ABI_DARWIN
3102 && TARGET_64BIT)
3103 darwin_one_byte_bool = 1;
3105 if (TARGET_64BIT && ! TARGET_POWERPC64)
3107 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3108 warning (0, "-m64 requires PowerPC64 architecture, enabling");
3110 if (flag_mkernel)
3112 rs6000_default_long_calls = 1;
3113 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3116 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3117 Altivec. */
3118 if (!flag_mkernel && !flag_apple_kext
3119 && TARGET_64BIT
3120 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3121 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3123 /* Unless the user (not the configurer) has explicitly overridden
3124 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3125 G4 unless targeting the kernel. */
3126 if (!flag_mkernel
3127 && !flag_apple_kext
3128 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3129 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3130 && ! global_options_set.x_rs6000_cpu_index)
3132 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3135 #endif
3137 /* If not otherwise specified by a target, make 'long double' equivalent to
3138 'double'. */
3140 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3141 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3142 #endif
3144 /* Return the builtin mask of the various options used that could affect which
3145 builtins were used. In the past we used target_flags, but we've run out of
3146 bits, and some options like SPE and PAIRED are no longer in
3147 target_flags. */
3149 HOST_WIDE_INT
3150 rs6000_builtin_mask_calculate (void)
3152 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3153 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3154 | ((TARGET_SPE) ? RS6000_BTM_SPE : 0)
3155 | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0)
3156 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3157 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3158 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3159 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3160 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3161 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3162 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3163 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3164 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3165 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3166 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3167 | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0));
3170 /* Implement TARGET_MD_ASM_CLOBBERS. All asm statements are considered
3171 to clobber the XER[CA] bit because clobbering that bit without telling
3172 the compiler worked just fine with versions of GCC before GCC 5, and
3173 breaking a lot of older code in ways that are hard to track down is
3174 not such a great idea. */
3176 static tree
3177 rs6000_md_asm_clobbers (tree, tree, tree clobbers)
3179 tree s = build_string (strlen (reg_names[CA_REGNO]), reg_names[CA_REGNO]);
3180 return tree_cons (NULL_TREE, s, clobbers);
3183 /* Override command line options. Mostly we process the processor type and
3184 sometimes adjust other TARGET_ options. */
3186 static bool
3187 rs6000_option_override_internal (bool global_init_p)
3189 bool ret = true;
3190 bool have_cpu = false;
3192 /* The default cpu requested at configure time, if any. */
3193 const char *implicit_cpu = OPTION_TARGET_CPU_DEFAULT;
3195 HOST_WIDE_INT set_masks;
3196 int cpu_index;
3197 int tune_index;
3198 struct cl_target_option *main_target_opt
3199 = ((global_init_p || target_option_default_node == NULL)
3200 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3202 /* Remember the explicit arguments. */
3203 if (global_init_p)
3204 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3206 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3207 library functions, so warn about it. The flag may be useful for
3208 performance studies from time to time though, so don't disable it
3209 entirely. */
3210 if (global_options_set.x_rs6000_alignment_flags
3211 && rs6000_alignment_flags == MASK_ALIGN_POWER
3212 && DEFAULT_ABI == ABI_DARWIN
3213 && TARGET_64BIT)
3214 warning (0, "-malign-power is not supported for 64-bit Darwin;"
3215 " it is incompatible with the installed C and C++ libraries");
3217 /* Numerous experiment shows that IRA based loop pressure
3218 calculation works better for RTL loop invariant motion on targets
3219 with enough (>= 32) registers. It is an expensive optimization.
3220 So it is on only for peak performance. */
3221 if (optimize >= 3 && global_init_p
3222 && !global_options_set.x_flag_ira_loop_pressure)
3223 flag_ira_loop_pressure = 1;
3225 /* Set the pointer size. */
3226 if (TARGET_64BIT)
3228 rs6000_pmode = (int)DImode;
3229 rs6000_pointer_size = 64;
3231 else
3233 rs6000_pmode = (int)SImode;
3234 rs6000_pointer_size = 32;
3237 /* Some OSs don't support saving the high part of 64-bit registers on context
3238 switch. Other OSs don't support saving Altivec registers. On those OSs,
3239 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3240 if the user wants either, the user must explicitly specify them and we
3241 won't interfere with the user's specification. */
3243 set_masks = POWERPC_MASKS;
3244 #ifdef OS_MISSING_POWERPC64
3245 if (OS_MISSING_POWERPC64)
3246 set_masks &= ~OPTION_MASK_POWERPC64;
3247 #endif
3248 #ifdef OS_MISSING_ALTIVEC
3249 if (OS_MISSING_ALTIVEC)
3250 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX);
3251 #endif
3253 /* Don't override by the processor default if given explicitly. */
3254 set_masks &= ~rs6000_isa_flags_explicit;
3256 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3257 the cpu in a target attribute or pragma, but did not specify a tuning
3258 option, use the cpu for the tuning option rather than the option specified
3259 with -mtune on the command line. Process a '--with-cpu' configuration
3260 request as an implicit --cpu. */
3261 if (rs6000_cpu_index >= 0)
3263 cpu_index = rs6000_cpu_index;
3264 have_cpu = true;
3266 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3268 rs6000_cpu_index = cpu_index = main_target_opt->x_rs6000_cpu_index;
3269 have_cpu = true;
3271 else if (implicit_cpu)
3273 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (implicit_cpu);
3274 have_cpu = true;
3276 else
3278 const char *default_cpu = (TARGET_POWERPC64 ? "powerpc64" : "powerpc");
3279 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
3280 have_cpu = false;
3283 gcc_assert (cpu_index >= 0);
3285 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3286 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3287 with those from the cpu, except for options that were explicitly set. If
3288 we don't have a cpu, do not override the target bits set in
3289 TARGET_DEFAULT. */
3290 if (have_cpu)
3292 rs6000_isa_flags &= ~set_masks;
3293 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3294 & set_masks);
3296 else
3297 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3298 & ~rs6000_isa_flags_explicit);
3300 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3301 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3302 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3303 to using rs6000_isa_flags, we need to do the initialization here. */
3304 if (!have_cpu)
3305 rs6000_isa_flags |= (TARGET_DEFAULT & ~rs6000_isa_flags_explicit);
3307 if (rs6000_tune_index >= 0)
3308 tune_index = rs6000_tune_index;
3309 else if (have_cpu)
3310 rs6000_tune_index = tune_index = cpu_index;
3311 else
3313 size_t i;
3314 enum processor_type tune_proc
3315 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3317 tune_index = -1;
3318 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3319 if (processor_target_table[i].processor == tune_proc)
3321 rs6000_tune_index = tune_index = i;
3322 break;
3326 gcc_assert (tune_index >= 0);
3327 rs6000_cpu = processor_target_table[tune_index].processor;
3329 /* Pick defaults for SPE related control flags. Do this early to make sure
3330 that the TARGET_ macros are representative ASAP. */
3332 int spe_capable_cpu =
3333 (rs6000_cpu == PROCESSOR_PPC8540
3334 || rs6000_cpu == PROCESSOR_PPC8548);
3336 if (!global_options_set.x_rs6000_spe_abi)
3337 rs6000_spe_abi = spe_capable_cpu;
3339 if (!global_options_set.x_rs6000_spe)
3340 rs6000_spe = spe_capable_cpu;
3342 if (!global_options_set.x_rs6000_float_gprs)
3343 rs6000_float_gprs =
3344 (rs6000_cpu == PROCESSOR_PPC8540 ? 1
3345 : rs6000_cpu == PROCESSOR_PPC8548 ? 2
3346 : 0);
3349 if (global_options_set.x_rs6000_spe_abi
3350 && rs6000_spe_abi
3351 && !TARGET_SPE_ABI)
3352 error ("not configured for SPE ABI");
3354 if (global_options_set.x_rs6000_spe
3355 && rs6000_spe
3356 && !TARGET_SPE)
3357 error ("not configured for SPE instruction set");
3359 if (main_target_opt != NULL
3360 && ((main_target_opt->x_rs6000_spe_abi != rs6000_spe_abi)
3361 || (main_target_opt->x_rs6000_spe != rs6000_spe)
3362 || (main_target_opt->x_rs6000_float_gprs != rs6000_float_gprs)))
3363 error ("target attribute or pragma changes SPE ABI");
3365 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3366 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3367 || rs6000_cpu == PROCESSOR_PPCE5500)
3369 if (TARGET_ALTIVEC)
3370 error ("AltiVec not supported in this target");
3371 if (TARGET_SPE)
3372 error ("SPE not supported in this target");
3374 if (rs6000_cpu == PROCESSOR_PPCE6500)
3376 if (TARGET_SPE)
3377 error ("SPE not supported in this target");
3380 /* Disable Cell microcode if we are optimizing for the Cell
3381 and not optimizing for size. */
3382 if (rs6000_gen_cell_microcode == -1)
3383 rs6000_gen_cell_microcode = !(rs6000_cpu == PROCESSOR_CELL
3384 && !optimize_size);
3386 /* If we are optimizing big endian systems for space and it's OK to
3387 use instructions that would be microcoded on the Cell, use the
3388 load/store multiple and string instructions. */
3389 if (BYTES_BIG_ENDIAN && optimize_size && rs6000_gen_cell_microcode)
3390 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & (OPTION_MASK_MULTIPLE
3391 | OPTION_MASK_STRING);
3393 /* Don't allow -mmultiple or -mstring on little endian systems
3394 unless the cpu is a 750, because the hardware doesn't support the
3395 instructions used in little endian mode, and causes an alignment
3396 trap. The 750 does not cause an alignment trap (except when the
3397 target is unaligned). */
3399 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750)
3401 if (TARGET_MULTIPLE)
3403 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3404 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3405 warning (0, "-mmultiple is not supported on little endian systems");
3408 if (TARGET_STRING)
3410 rs6000_isa_flags &= ~OPTION_MASK_STRING;
3411 if ((rs6000_isa_flags_explicit & OPTION_MASK_STRING) != 0)
3412 warning (0, "-mstring is not supported on little endian systems");
3416 /* If little-endian, default to -mstrict-align on older processors.
3417 Testing for htm matches power8 and later. */
3418 if (!BYTES_BIG_ENDIAN
3419 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
3420 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3422 /* -maltivec={le,be} implies -maltivec. */
3423 if (rs6000_altivec_element_order != 0)
3424 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3426 /* Disallow -maltivec=le in big endian mode for now. This is not
3427 known to be useful for anyone. */
3428 if (BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 1)
3430 warning (0, N_("-maltivec=le not allowed for big-endian targets"));
3431 rs6000_altivec_element_order = 0;
3434 /* Add some warnings for VSX. */
3435 if (TARGET_VSX)
3437 const char *msg = NULL;
3438 if (!TARGET_HARD_FLOAT || !TARGET_FPRS
3439 || !TARGET_SINGLE_FLOAT || !TARGET_DOUBLE_FLOAT)
3441 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3442 msg = N_("-mvsx requires hardware floating point");
3443 else
3445 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3446 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3449 else if (TARGET_PAIRED_FLOAT)
3450 msg = N_("-mvsx and -mpaired are incompatible");
3451 else if (TARGET_AVOID_XFORM > 0)
3452 msg = N_("-mvsx needs indexed addressing");
3453 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3454 & OPTION_MASK_ALTIVEC))
3456 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3457 msg = N_("-mvsx and -mno-altivec are incompatible");
3458 else
3459 msg = N_("-mno-altivec disables vsx");
3462 if (msg)
3464 warning (0, msg);
3465 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3466 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3470 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3471 the -mcpu setting to enable options that conflict. */
3472 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3473 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3474 | OPTION_MASK_ALTIVEC
3475 | OPTION_MASK_VSX)) != 0)
3476 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3477 | OPTION_MASK_DIRECT_MOVE)
3478 & ~rs6000_isa_flags_explicit);
3480 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3481 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3483 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3484 unless the user explicitly used the -mno-<option> to disable the code. */
3485 if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3486 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3487 else if (TARGET_VSX)
3488 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3489 else if (TARGET_POPCNTD)
3490 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
3491 else if (TARGET_DFP)
3492 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3493 else if (TARGET_CMPB)
3494 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
3495 else if (TARGET_FPRND)
3496 rs6000_isa_flags |= (ISA_2_4_MASKS & ~rs6000_isa_flags_explicit);
3497 else if (TARGET_POPCNTB)
3498 rs6000_isa_flags |= (ISA_2_2_MASKS & ~rs6000_isa_flags_explicit);
3499 else if (TARGET_ALTIVEC)
3500 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~rs6000_isa_flags_explicit);
3502 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3504 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3505 error ("-mcrypto requires -maltivec");
3506 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3509 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3511 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3512 error ("-mdirect-move requires -mvsx");
3513 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3516 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3518 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3519 error ("-mpower8-vector requires -maltivec");
3520 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3523 if (TARGET_P8_VECTOR && !TARGET_VSX)
3525 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3526 error ("-mpower8-vector requires -mvsx");
3527 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3530 if (TARGET_VSX_TIMODE && !TARGET_VSX)
3532 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
3533 error ("-mvsx-timode requires -mvsx");
3534 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
3537 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3539 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3540 error ("-mhard-dfp requires -mhard-float");
3541 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3544 /* Allow an explicit -mupper-regs to set both -mupper-regs-df and
3545 -mupper-regs-sf, depending on the cpu, unless the user explicitly also set
3546 the individual option. */
3547 if (TARGET_UPPER_REGS > 0)
3549 if (TARGET_VSX
3550 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
3552 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DF;
3553 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
3555 if (TARGET_P8_VECTOR
3556 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
3558 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_SF;
3559 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
3562 else if (TARGET_UPPER_REGS == 0)
3564 if (TARGET_VSX
3565 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
3567 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
3568 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
3570 if (TARGET_P8_VECTOR
3571 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
3573 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
3574 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
3578 if (TARGET_UPPER_REGS_DF && !TARGET_VSX)
3580 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
3581 error ("-mupper-regs-df requires -mvsx");
3582 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
3585 if (TARGET_UPPER_REGS_SF && !TARGET_P8_VECTOR)
3587 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
3588 error ("-mupper-regs-sf requires -mpower8-vector");
3589 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
3592 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3593 silently turn off quad memory mode. */
3594 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
3596 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3597 warning (0, N_("-mquad-memory requires 64-bit mode"));
3599 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
3600 warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
3602 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
3603 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
3606 /* Non-atomic quad memory load/store are disabled for little endian, since
3607 the words are reversed, but atomic operations can still be done by
3608 swapping the words. */
3609 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
3611 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3612 warning (0, N_("-mquad-memory is not available in little endian mode"));
3614 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
3617 /* Assume if the user asked for normal quad memory instructions, they want
3618 the atomic versions as well, unless they explicity told us not to use quad
3619 word atomic instructions. */
3620 if (TARGET_QUAD_MEMORY
3621 && !TARGET_QUAD_MEMORY_ATOMIC
3622 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
3623 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
3625 /* Enable power8 fusion if we are tuning for power8, even if we aren't
3626 generating power8 instructions. */
3627 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
3628 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
3629 & OPTION_MASK_P8_FUSION);
3631 /* Power8 does not fuse sign extended loads with the addis. If we are
3632 optimizing at high levels for speed, convert a sign extended load into a
3633 zero extending load, and an explicit sign extension. */
3634 if (TARGET_P8_FUSION
3635 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
3636 && optimize_function_for_speed_p (cfun)
3637 && optimize >= 3)
3638 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
3640 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3641 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
3643 /* E500mc does "better" if we inline more aggressively. Respect the
3644 user's opinion, though. */
3645 if (rs6000_block_move_inline_limit == 0
3646 && (rs6000_cpu == PROCESSOR_PPCE500MC
3647 || rs6000_cpu == PROCESSOR_PPCE500MC64
3648 || rs6000_cpu == PROCESSOR_PPCE5500
3649 || rs6000_cpu == PROCESSOR_PPCE6500))
3650 rs6000_block_move_inline_limit = 128;
3652 /* store_one_arg depends on expand_block_move to handle at least the
3653 size of reg_parm_stack_space. */
3654 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
3655 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
3657 if (global_init_p)
3659 /* If the appropriate debug option is enabled, replace the target hooks
3660 with debug versions that call the real version and then prints
3661 debugging information. */
3662 if (TARGET_DEBUG_COST)
3664 targetm.rtx_costs = rs6000_debug_rtx_costs;
3665 targetm.address_cost = rs6000_debug_address_cost;
3666 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
3669 if (TARGET_DEBUG_ADDR)
3671 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
3672 targetm.legitimize_address = rs6000_debug_legitimize_address;
3673 rs6000_secondary_reload_class_ptr
3674 = rs6000_debug_secondary_reload_class;
3675 rs6000_secondary_memory_needed_ptr
3676 = rs6000_debug_secondary_memory_needed;
3677 rs6000_cannot_change_mode_class_ptr
3678 = rs6000_debug_cannot_change_mode_class;
3679 rs6000_preferred_reload_class_ptr
3680 = rs6000_debug_preferred_reload_class;
3681 rs6000_legitimize_reload_address_ptr
3682 = rs6000_debug_legitimize_reload_address;
3683 rs6000_mode_dependent_address_ptr
3684 = rs6000_debug_mode_dependent_address;
3687 if (rs6000_veclibabi_name)
3689 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
3690 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
3691 else
3693 error ("unknown vectorization library ABI type (%s) for "
3694 "-mveclibabi= switch", rs6000_veclibabi_name);
3695 ret = false;
3700 if (!global_options_set.x_rs6000_long_double_type_size)
3702 if (main_target_opt != NULL
3703 && (main_target_opt->x_rs6000_long_double_type_size
3704 != RS6000_DEFAULT_LONG_DOUBLE_SIZE))
3705 error ("target attribute or pragma changes long double size");
3706 else
3707 rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE;
3710 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
3711 if (!global_options_set.x_rs6000_ieeequad)
3712 rs6000_ieeequad = 1;
3713 #endif
3715 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
3716 target attribute or pragma which automatically enables both options,
3717 unless the altivec ABI was set. This is set by default for 64-bit, but
3718 not for 32-bit. */
3719 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
3720 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC)
3721 & ~rs6000_isa_flags_explicit);
3723 /* Enable Altivec ABI for AIX -maltivec. */
3724 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
3726 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
3727 error ("target attribute or pragma changes AltiVec ABI");
3728 else
3729 rs6000_altivec_abi = 1;
3732 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
3733 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
3734 be explicitly overridden in either case. */
3735 if (TARGET_ELF)
3737 if (!global_options_set.x_rs6000_altivec_abi
3738 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
3740 if (main_target_opt != NULL &&
3741 !main_target_opt->x_rs6000_altivec_abi)
3742 error ("target attribute or pragma changes AltiVec ABI");
3743 else
3744 rs6000_altivec_abi = 1;
3748 /* Set the Darwin64 ABI as default for 64-bit Darwin.
3749 So far, the only darwin64 targets are also MACH-O. */
3750 if (TARGET_MACHO
3751 && DEFAULT_ABI == ABI_DARWIN
3752 && TARGET_64BIT)
3754 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
3755 error ("target attribute or pragma changes darwin64 ABI");
3756 else
3758 rs6000_darwin64_abi = 1;
3759 /* Default to natural alignment, for better performance. */
3760 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
3764 /* Place FP constants in the constant pool instead of TOC
3765 if section anchors enabled. */
3766 if (flag_section_anchors
3767 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
3768 TARGET_NO_FP_IN_TOC = 1;
3770 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3771 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
3773 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3774 SUBTARGET_OVERRIDE_OPTIONS;
3775 #endif
3776 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3777 SUBSUBTARGET_OVERRIDE_OPTIONS;
3778 #endif
3779 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
3780 SUB3TARGET_OVERRIDE_OPTIONS;
3781 #endif
3783 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3784 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
3786 /* For the E500 family of cores, reset the single/double FP flags to let us
3787 check that they remain constant across attributes or pragmas. Also,
3788 clear a possible request for string instructions, not supported and which
3789 we might have silently queried above for -Os.
3791 For other families, clear ISEL in case it was set implicitly.
3794 switch (rs6000_cpu)
3796 case PROCESSOR_PPC8540:
3797 case PROCESSOR_PPC8548:
3798 case PROCESSOR_PPCE500MC:
3799 case PROCESSOR_PPCE500MC64:
3800 case PROCESSOR_PPCE5500:
3801 case PROCESSOR_PPCE6500:
3803 rs6000_single_float = TARGET_E500_SINGLE || TARGET_E500_DOUBLE;
3804 rs6000_double_float = TARGET_E500_DOUBLE;
3806 rs6000_isa_flags &= ~OPTION_MASK_STRING;
3808 break;
3810 default:
3812 if (have_cpu && !(rs6000_isa_flags_explicit & OPTION_MASK_ISEL))
3813 rs6000_isa_flags &= ~OPTION_MASK_ISEL;
3815 break;
3818 if (main_target_opt)
3820 if (main_target_opt->x_rs6000_single_float != rs6000_single_float)
3821 error ("target attribute or pragma changes single precision floating "
3822 "point");
3823 if (main_target_opt->x_rs6000_double_float != rs6000_double_float)
3824 error ("target attribute or pragma changes double precision floating "
3825 "point");
3828 /* Detect invalid option combinations with E500. */
3829 CHECK_E500_OPTIONS;
3831 rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4
3832 && rs6000_cpu != PROCESSOR_POWER5
3833 && rs6000_cpu != PROCESSOR_POWER6
3834 && rs6000_cpu != PROCESSOR_POWER7
3835 && rs6000_cpu != PROCESSOR_POWER8
3836 && rs6000_cpu != PROCESSOR_PPCA2
3837 && rs6000_cpu != PROCESSOR_CELL
3838 && rs6000_cpu != PROCESSOR_PPC476);
3839 rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
3840 || rs6000_cpu == PROCESSOR_POWER5
3841 || rs6000_cpu == PROCESSOR_POWER7
3842 || rs6000_cpu == PROCESSOR_POWER8);
3843 rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
3844 || rs6000_cpu == PROCESSOR_POWER5
3845 || rs6000_cpu == PROCESSOR_POWER6
3846 || rs6000_cpu == PROCESSOR_POWER7
3847 || rs6000_cpu == PROCESSOR_POWER8
3848 || rs6000_cpu == PROCESSOR_PPCE500MC
3849 || rs6000_cpu == PROCESSOR_PPCE500MC64
3850 || rs6000_cpu == PROCESSOR_PPCE5500
3851 || rs6000_cpu == PROCESSOR_PPCE6500);
3853 /* Allow debug switches to override the above settings. These are set to -1
3854 in rs6000.opt to indicate the user hasn't directly set the switch. */
3855 if (TARGET_ALWAYS_HINT >= 0)
3856 rs6000_always_hint = TARGET_ALWAYS_HINT;
3858 if (TARGET_SCHED_GROUPS >= 0)
3859 rs6000_sched_groups = TARGET_SCHED_GROUPS;
3861 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
3862 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
3864 rs6000_sched_restricted_insns_priority
3865 = (rs6000_sched_groups ? 1 : 0);
3867 /* Handle -msched-costly-dep option. */
3868 rs6000_sched_costly_dep
3869 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
3871 if (rs6000_sched_costly_dep_str)
3873 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
3874 rs6000_sched_costly_dep = no_dep_costly;
3875 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
3876 rs6000_sched_costly_dep = all_deps_costly;
3877 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
3878 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
3879 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
3880 rs6000_sched_costly_dep = store_to_load_dep_costly;
3881 else
3882 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
3883 atoi (rs6000_sched_costly_dep_str));
3886 /* Handle -minsert-sched-nops option. */
3887 rs6000_sched_insert_nops
3888 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
3890 if (rs6000_sched_insert_nops_str)
3892 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
3893 rs6000_sched_insert_nops = sched_finish_none;
3894 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
3895 rs6000_sched_insert_nops = sched_finish_pad_groups;
3896 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
3897 rs6000_sched_insert_nops = sched_finish_regroup_exact;
3898 else
3899 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
3900 atoi (rs6000_sched_insert_nops_str));
3903 if (global_init_p)
3905 #ifdef TARGET_REGNAMES
3906 /* If the user desires alternate register names, copy in the
3907 alternate names now. */
3908 if (TARGET_REGNAMES)
3909 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
3910 #endif
3912 /* Set aix_struct_return last, after the ABI is determined.
3913 If -maix-struct-return or -msvr4-struct-return was explicitly
3914 used, don't override with the ABI default. */
3915 if (!global_options_set.x_aix_struct_return)
3916 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
3918 #if 0
3919 /* IBM XL compiler defaults to unsigned bitfields. */
3920 if (TARGET_XL_COMPAT)
3921 flag_signed_bitfields = 0;
3922 #endif
3924 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
3925 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
3927 if (TARGET_TOC)
3928 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
3930 /* We can only guarantee the availability of DI pseudo-ops when
3931 assembling for 64-bit targets. */
3932 if (!TARGET_64BIT)
3934 targetm.asm_out.aligned_op.di = NULL;
3935 targetm.asm_out.unaligned_op.di = NULL;
3939 /* Set branch target alignment, if not optimizing for size. */
3940 if (!optimize_size)
3942 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
3943 aligned 8byte to avoid misprediction by the branch predictor. */
3944 if (rs6000_cpu == PROCESSOR_TITAN
3945 || rs6000_cpu == PROCESSOR_CELL)
3947 if (align_functions <= 0)
3948 align_functions = 8;
3949 if (align_jumps <= 0)
3950 align_jumps = 8;
3951 if (align_loops <= 0)
3952 align_loops = 8;
3954 if (rs6000_align_branch_targets)
3956 if (align_functions <= 0)
3957 align_functions = 16;
3958 if (align_jumps <= 0)
3959 align_jumps = 16;
3960 if (align_loops <= 0)
3962 can_override_loop_align = 1;
3963 align_loops = 16;
3966 if (align_jumps_max_skip <= 0)
3967 align_jumps_max_skip = 15;
3968 if (align_loops_max_skip <= 0)
3969 align_loops_max_skip = 15;
3972 /* Arrange to save and restore machine status around nested functions. */
3973 init_machine_status = rs6000_init_machine_status;
3975 /* We should always be splitting complex arguments, but we can't break
3976 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
3977 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
3978 targetm.calls.split_complex_arg = NULL;
3981 /* Initialize rs6000_cost with the appropriate target costs. */
3982 if (optimize_size)
3983 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
3984 else
3985 switch (rs6000_cpu)
3987 case PROCESSOR_RS64A:
3988 rs6000_cost = &rs64a_cost;
3989 break;
3991 case PROCESSOR_MPCCORE:
3992 rs6000_cost = &mpccore_cost;
3993 break;
3995 case PROCESSOR_PPC403:
3996 rs6000_cost = &ppc403_cost;
3997 break;
3999 case PROCESSOR_PPC405:
4000 rs6000_cost = &ppc405_cost;
4001 break;
4003 case PROCESSOR_PPC440:
4004 rs6000_cost = &ppc440_cost;
4005 break;
4007 case PROCESSOR_PPC476:
4008 rs6000_cost = &ppc476_cost;
4009 break;
4011 case PROCESSOR_PPC601:
4012 rs6000_cost = &ppc601_cost;
4013 break;
4015 case PROCESSOR_PPC603:
4016 rs6000_cost = &ppc603_cost;
4017 break;
4019 case PROCESSOR_PPC604:
4020 rs6000_cost = &ppc604_cost;
4021 break;
4023 case PROCESSOR_PPC604e:
4024 rs6000_cost = &ppc604e_cost;
4025 break;
4027 case PROCESSOR_PPC620:
4028 rs6000_cost = &ppc620_cost;
4029 break;
4031 case PROCESSOR_PPC630:
4032 rs6000_cost = &ppc630_cost;
4033 break;
4035 case PROCESSOR_CELL:
4036 rs6000_cost = &ppccell_cost;
4037 break;
4039 case PROCESSOR_PPC750:
4040 case PROCESSOR_PPC7400:
4041 rs6000_cost = &ppc750_cost;
4042 break;
4044 case PROCESSOR_PPC7450:
4045 rs6000_cost = &ppc7450_cost;
4046 break;
4048 case PROCESSOR_PPC8540:
4049 case PROCESSOR_PPC8548:
4050 rs6000_cost = &ppc8540_cost;
4051 break;
4053 case PROCESSOR_PPCE300C2:
4054 case PROCESSOR_PPCE300C3:
4055 rs6000_cost = &ppce300c2c3_cost;
4056 break;
4058 case PROCESSOR_PPCE500MC:
4059 rs6000_cost = &ppce500mc_cost;
4060 break;
4062 case PROCESSOR_PPCE500MC64:
4063 rs6000_cost = &ppce500mc64_cost;
4064 break;
4066 case PROCESSOR_PPCE5500:
4067 rs6000_cost = &ppce5500_cost;
4068 break;
4070 case PROCESSOR_PPCE6500:
4071 rs6000_cost = &ppce6500_cost;
4072 break;
4074 case PROCESSOR_TITAN:
4075 rs6000_cost = &titan_cost;
4076 break;
4078 case PROCESSOR_POWER4:
4079 case PROCESSOR_POWER5:
4080 rs6000_cost = &power4_cost;
4081 break;
4083 case PROCESSOR_POWER6:
4084 rs6000_cost = &power6_cost;
4085 break;
4087 case PROCESSOR_POWER7:
4088 rs6000_cost = &power7_cost;
4089 break;
4091 case PROCESSOR_POWER8:
4092 rs6000_cost = &power8_cost;
4093 break;
4095 case PROCESSOR_PPCA2:
4096 rs6000_cost = &ppca2_cost;
4097 break;
4099 default:
4100 gcc_unreachable ();
4103 if (global_init_p)
4105 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4106 rs6000_cost->simultaneous_prefetches,
4107 global_options.x_param_values,
4108 global_options_set.x_param_values);
4109 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
4110 global_options.x_param_values,
4111 global_options_set.x_param_values);
4112 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4113 rs6000_cost->cache_line_size,
4114 global_options.x_param_values,
4115 global_options_set.x_param_values);
4116 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
4117 global_options.x_param_values,
4118 global_options_set.x_param_values);
4120 /* Increase loop peeling limits based on performance analysis. */
4121 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
4122 global_options.x_param_values,
4123 global_options_set.x_param_values);
4124 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
4125 global_options.x_param_values,
4126 global_options_set.x_param_values);
4128 /* If using typedef char *va_list, signal that
4129 __builtin_va_start (&ap, 0) can be optimized to
4130 ap = __builtin_next_arg (0). */
4131 if (DEFAULT_ABI != ABI_V4)
4132 targetm.expand_builtin_va_start = NULL;
4135 /* Set up single/double float flags.
4136 If TARGET_HARD_FLOAT is set, but neither single or double is set,
4137 then set both flags. */
4138 if (TARGET_HARD_FLOAT && TARGET_FPRS
4139 && rs6000_single_float == 0 && rs6000_double_float == 0)
4140 rs6000_single_float = rs6000_double_float = 1;
4142 /* If not explicitly specified via option, decide whether to generate indexed
4143 load/store instructions. */
4144 if (TARGET_AVOID_XFORM == -1)
4145 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4146 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4147 need indexed accesses and the type used is the scalar type of the element
4148 being loaded or stored. */
4149 TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB
4150 && !TARGET_ALTIVEC);
4152 /* Set the -mrecip options. */
4153 if (rs6000_recip_name)
4155 char *p = ASTRDUP (rs6000_recip_name);
4156 char *q;
4157 unsigned int mask, i;
4158 bool invert;
4160 while ((q = strtok (p, ",")) != NULL)
4162 p = NULL;
4163 if (*q == '!')
4165 invert = true;
4166 q++;
4168 else
4169 invert = false;
4171 if (!strcmp (q, "default"))
4172 mask = ((TARGET_RECIP_PRECISION)
4173 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4174 else
4176 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4177 if (!strcmp (q, recip_options[i].string))
4179 mask = recip_options[i].mask;
4180 break;
4183 if (i == ARRAY_SIZE (recip_options))
4185 error ("unknown option for -mrecip=%s", q);
4186 invert = false;
4187 mask = 0;
4188 ret = false;
4192 if (invert)
4193 rs6000_recip_control &= ~mask;
4194 else
4195 rs6000_recip_control |= mask;
4199 /* Set the builtin mask of the various options used that could affect which
4200 builtins were used. In the past we used target_flags, but we've run out
4201 of bits, and some options like SPE and PAIRED are no longer in
4202 target_flags. */
4203 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4204 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4206 fprintf (stderr,
4207 "new builtin mask = " HOST_WIDE_INT_PRINT_HEX ", ",
4208 rs6000_builtin_mask);
4209 rs6000_print_builtin_options (stderr, 0, NULL, rs6000_builtin_mask);
4212 /* Initialize all of the registers. */
4213 rs6000_init_hard_regno_mode_ok (global_init_p);
4215 /* Save the initial options in case the user does function specific options */
4216 if (global_init_p)
4217 target_option_default_node = target_option_current_node
4218 = build_target_option_node (&global_options);
4220 /* If not explicitly specified via option, decide whether to generate the
4221 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4222 if (TARGET_LINK_STACK == -1)
4223 SET_TARGET_LINK_STACK (rs6000_cpu == PROCESSOR_PPC476 && flag_pic);
4225 return ret;
4228 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4229 define the target cpu type. */
4231 static void
4232 rs6000_option_override (void)
4234 (void) rs6000_option_override_internal (true);
4236 /* Register machine-specific passes. This needs to be done at start-up.
4237 It's convenient to do it here (like i386 does). */
4238 opt_pass *pass_analyze_swaps = make_pass_analyze_swaps (g);
4240 struct register_pass_info analyze_swaps_info
4241 = { pass_analyze_swaps, "cse1", 1, PASS_POS_INSERT_BEFORE };
4243 register_pass (&analyze_swaps_info);
4247 /* Implement targetm.vectorize.builtin_mask_for_load. */
4248 static tree
4249 rs6000_builtin_mask_for_load (void)
4251 if (TARGET_ALTIVEC || TARGET_VSX)
4252 return altivec_builtin_mask_for_load;
4253 else
4254 return 0;
4257 /* Implement LOOP_ALIGN. */
4259 rs6000_loop_align (rtx label)
4261 basic_block bb;
4262 int ninsns;
4264 /* Don't override loop alignment if -falign-loops was specified. */
4265 if (!can_override_loop_align)
4266 return align_loops_log;
4268 bb = BLOCK_FOR_INSN (label);
4269 ninsns = num_loop_insns(bb->loop_father);
4271 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4272 if (ninsns > 4 && ninsns <= 8
4273 && (rs6000_cpu == PROCESSOR_POWER4
4274 || rs6000_cpu == PROCESSOR_POWER5
4275 || rs6000_cpu == PROCESSOR_POWER6
4276 || rs6000_cpu == PROCESSOR_POWER7
4277 || rs6000_cpu == PROCESSOR_POWER8))
4278 return 5;
4279 else
4280 return align_loops_log;
4283 /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
4284 static int
4285 rs6000_loop_align_max_skip (rtx_insn *label)
4287 return (1 << rs6000_loop_align (label)) - 1;
4290 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4291 after applying N number of iterations. This routine does not determine
4292 how may iterations are required to reach desired alignment. */
4294 static bool
4295 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4297 if (is_packed)
4298 return false;
4300 if (TARGET_32BIT)
4302 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4303 return true;
4305 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4306 return true;
4308 return false;
4310 else
4312 if (TARGET_MACHO)
4313 return false;
4315 /* Assuming that all other types are naturally aligned. CHECKME! */
4316 return true;
4320 /* Return true if the vector misalignment factor is supported by the
4321 target. */
4322 static bool
4323 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4324 const_tree type,
4325 int misalignment,
4326 bool is_packed)
4328 if (TARGET_VSX)
4330 /* Return if movmisalign pattern is not supported for this mode. */
4331 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4332 return false;
4334 if (misalignment == -1)
4336 /* Misalignment factor is unknown at compile time but we know
4337 it's word aligned. */
4338 if (rs6000_vector_alignment_reachable (type, is_packed))
4340 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
4342 if (element_size == 64 || element_size == 32)
4343 return true;
4346 return false;
4349 /* VSX supports word-aligned vector. */
4350 if (misalignment % 4 == 0)
4351 return true;
4353 return false;
4356 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4357 static int
4358 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4359 tree vectype, int misalign)
4361 unsigned elements;
4362 tree elem_type;
4364 switch (type_of_cost)
4366 case scalar_stmt:
4367 case scalar_load:
4368 case scalar_store:
4369 case vector_stmt:
4370 case vector_load:
4371 case vector_store:
4372 case vec_to_scalar:
4373 case scalar_to_vec:
4374 case cond_branch_not_taken:
4375 return 1;
4377 case vec_perm:
4378 if (TARGET_VSX)
4379 return 3;
4380 else
4381 return 1;
4383 case vec_promote_demote:
4384 if (TARGET_VSX)
4385 return 4;
4386 else
4387 return 1;
4389 case cond_branch_taken:
4390 return 3;
4392 case unaligned_load:
4393 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4395 elements = TYPE_VECTOR_SUBPARTS (vectype);
4396 if (elements == 2)
4397 /* Double word aligned. */
4398 return 2;
4400 if (elements == 4)
4402 switch (misalign)
4404 case 8:
4405 /* Double word aligned. */
4406 return 2;
4408 case -1:
4409 /* Unknown misalignment. */
4410 case 4:
4411 case 12:
4412 /* Word aligned. */
4413 return 22;
4415 default:
4416 gcc_unreachable ();
4421 if (TARGET_ALTIVEC)
4422 /* Misaligned loads are not supported. */
4423 gcc_unreachable ();
4425 return 2;
4427 case unaligned_store:
4428 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4430 elements = TYPE_VECTOR_SUBPARTS (vectype);
4431 if (elements == 2)
4432 /* Double word aligned. */
4433 return 2;
4435 if (elements == 4)
4437 switch (misalign)
4439 case 8:
4440 /* Double word aligned. */
4441 return 2;
4443 case -1:
4444 /* Unknown misalignment. */
4445 case 4:
4446 case 12:
4447 /* Word aligned. */
4448 return 23;
4450 default:
4451 gcc_unreachable ();
4456 if (TARGET_ALTIVEC)
4457 /* Misaligned stores are not supported. */
4458 gcc_unreachable ();
4460 return 2;
4462 case vec_construct:
4463 elements = TYPE_VECTOR_SUBPARTS (vectype);
4464 elem_type = TREE_TYPE (vectype);
4465 /* 32-bit vectors loaded into registers are stored as double
4466 precision, so we need n/2 converts in addition to the usual
4467 n/2 merges to construct a vector of short floats from them. */
4468 if (SCALAR_FLOAT_TYPE_P (elem_type)
4469 && TYPE_PRECISION (elem_type) == 32)
4470 return elements + 1;
4471 else
4472 return elements / 2 + 1;
4474 default:
4475 gcc_unreachable ();
4479 /* Implement targetm.vectorize.preferred_simd_mode. */
4481 static machine_mode
4482 rs6000_preferred_simd_mode (machine_mode mode)
4484 if (TARGET_VSX)
4485 switch (mode)
4487 case DFmode:
4488 return V2DFmode;
4489 default:;
4491 if (TARGET_ALTIVEC || TARGET_VSX)
4492 switch (mode)
4494 case SFmode:
4495 return V4SFmode;
4496 case TImode:
4497 return V1TImode;
4498 case DImode:
4499 return V2DImode;
4500 case SImode:
4501 return V4SImode;
4502 case HImode:
4503 return V8HImode;
4504 case QImode:
4505 return V16QImode;
4506 default:;
4508 if (TARGET_SPE)
4509 switch (mode)
4511 case SFmode:
4512 return V2SFmode;
4513 case SImode:
4514 return V2SImode;
4515 default:;
4517 if (TARGET_PAIRED_FLOAT
4518 && mode == SFmode)
4519 return V2SFmode;
4520 return word_mode;
4523 typedef struct _rs6000_cost_data
4525 struct loop *loop_info;
4526 unsigned cost[3];
4527 } rs6000_cost_data;
4529 /* Test for likely overcommitment of vector hardware resources. If a
4530 loop iteration is relatively large, and too large a percentage of
4531 instructions in the loop are vectorized, the cost model may not
4532 adequately reflect delays from unavailable vector resources.
4533 Penalize the loop body cost for this case. */
4535 static void
4536 rs6000_density_test (rs6000_cost_data *data)
4538 const int DENSITY_PCT_THRESHOLD = 85;
4539 const int DENSITY_SIZE_THRESHOLD = 70;
4540 const int DENSITY_PENALTY = 10;
4541 struct loop *loop = data->loop_info;
4542 basic_block *bbs = get_loop_body (loop);
4543 int nbbs = loop->num_nodes;
4544 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
4545 int i, density_pct;
4547 for (i = 0; i < nbbs; i++)
4549 basic_block bb = bbs[i];
4550 gimple_stmt_iterator gsi;
4552 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4554 gimple stmt = gsi_stmt (gsi);
4555 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4557 if (!STMT_VINFO_RELEVANT_P (stmt_info)
4558 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
4559 not_vec_cost++;
4563 free (bbs);
4564 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
4566 if (density_pct > DENSITY_PCT_THRESHOLD
4567 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
4569 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
4570 if (dump_enabled_p ())
4571 dump_printf_loc (MSG_NOTE, vect_location,
4572 "density %d%%, cost %d exceeds threshold, penalizing "
4573 "loop body cost by %d%%", density_pct,
4574 vec_cost + not_vec_cost, DENSITY_PENALTY);
4578 /* Implement targetm.vectorize.init_cost. */
4580 static void *
4581 rs6000_init_cost (struct loop *loop_info)
4583 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
4584 data->loop_info = loop_info;
4585 data->cost[vect_prologue] = 0;
4586 data->cost[vect_body] = 0;
4587 data->cost[vect_epilogue] = 0;
4588 return data;
4591 /* Implement targetm.vectorize.add_stmt_cost. */
4593 static unsigned
4594 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
4595 struct _stmt_vec_info *stmt_info, int misalign,
4596 enum vect_cost_model_location where)
4598 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
4599 unsigned retval = 0;
4601 if (flag_vect_cost_model)
4603 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4604 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
4605 misalign);
4606 /* Statements in an inner loop relative to the loop being
4607 vectorized are weighted more heavily. The value here is
4608 arbitrary and could potentially be improved with analysis. */
4609 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4610 count *= 50; /* FIXME. */
4612 retval = (unsigned) (count * stmt_cost);
4613 cost_data->cost[where] += retval;
4616 return retval;
4619 /* Implement targetm.vectorize.finish_cost. */
4621 static void
4622 rs6000_finish_cost (void *data, unsigned *prologue_cost,
4623 unsigned *body_cost, unsigned *epilogue_cost)
4625 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
4627 if (cost_data->loop_info)
4628 rs6000_density_test (cost_data);
4630 *prologue_cost = cost_data->cost[vect_prologue];
4631 *body_cost = cost_data->cost[vect_body];
4632 *epilogue_cost = cost_data->cost[vect_epilogue];
4635 /* Implement targetm.vectorize.destroy_cost_data. */
4637 static void
4638 rs6000_destroy_cost_data (void *data)
4640 free (data);
4643 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
4644 library with vectorized intrinsics. */
4646 static tree
4647 rs6000_builtin_vectorized_libmass (tree fndecl, tree type_out, tree type_in)
4649 char name[32];
4650 const char *suffix = NULL;
4651 tree fntype, new_fndecl, bdecl = NULL_TREE;
4652 int n_args = 1;
4653 const char *bname;
4654 machine_mode el_mode, in_mode;
4655 int n, in_n;
4657 /* Libmass is suitable for unsafe math only as it does not correctly support
4658 parts of IEEE with the required precision such as denormals. Only support
4659 it if we have VSX to use the simd d2 or f4 functions.
4660 XXX: Add variable length support. */
4661 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
4662 return NULL_TREE;
4664 el_mode = TYPE_MODE (TREE_TYPE (type_out));
4665 n = TYPE_VECTOR_SUBPARTS (type_out);
4666 in_mode = TYPE_MODE (TREE_TYPE (type_in));
4667 in_n = TYPE_VECTOR_SUBPARTS (type_in);
4668 if (el_mode != in_mode
4669 || n != in_n)
4670 return NULL_TREE;
4672 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
4674 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
4675 switch (fn)
4677 case BUILT_IN_ATAN2:
4678 case BUILT_IN_HYPOT:
4679 case BUILT_IN_POW:
4680 n_args = 2;
4681 /* fall through */
4683 case BUILT_IN_ACOS:
4684 case BUILT_IN_ACOSH:
4685 case BUILT_IN_ASIN:
4686 case BUILT_IN_ASINH:
4687 case BUILT_IN_ATAN:
4688 case BUILT_IN_ATANH:
4689 case BUILT_IN_CBRT:
4690 case BUILT_IN_COS:
4691 case BUILT_IN_COSH:
4692 case BUILT_IN_ERF:
4693 case BUILT_IN_ERFC:
4694 case BUILT_IN_EXP2:
4695 case BUILT_IN_EXP:
4696 case BUILT_IN_EXPM1:
4697 case BUILT_IN_LGAMMA:
4698 case BUILT_IN_LOG10:
4699 case BUILT_IN_LOG1P:
4700 case BUILT_IN_LOG2:
4701 case BUILT_IN_LOG:
4702 case BUILT_IN_SIN:
4703 case BUILT_IN_SINH:
4704 case BUILT_IN_SQRT:
4705 case BUILT_IN_TAN:
4706 case BUILT_IN_TANH:
4707 bdecl = builtin_decl_implicit (fn);
4708 suffix = "d2"; /* pow -> powd2 */
4709 if (el_mode != DFmode
4710 || n != 2
4711 || !bdecl)
4712 return NULL_TREE;
4713 break;
4715 case BUILT_IN_ATAN2F:
4716 case BUILT_IN_HYPOTF:
4717 case BUILT_IN_POWF:
4718 n_args = 2;
4719 /* fall through */
4721 case BUILT_IN_ACOSF:
4722 case BUILT_IN_ACOSHF:
4723 case BUILT_IN_ASINF:
4724 case BUILT_IN_ASINHF:
4725 case BUILT_IN_ATANF:
4726 case BUILT_IN_ATANHF:
4727 case BUILT_IN_CBRTF:
4728 case BUILT_IN_COSF:
4729 case BUILT_IN_COSHF:
4730 case BUILT_IN_ERFF:
4731 case BUILT_IN_ERFCF:
4732 case BUILT_IN_EXP2F:
4733 case BUILT_IN_EXPF:
4734 case BUILT_IN_EXPM1F:
4735 case BUILT_IN_LGAMMAF:
4736 case BUILT_IN_LOG10F:
4737 case BUILT_IN_LOG1PF:
4738 case BUILT_IN_LOG2F:
4739 case BUILT_IN_LOGF:
4740 case BUILT_IN_SINF:
4741 case BUILT_IN_SINHF:
4742 case BUILT_IN_SQRTF:
4743 case BUILT_IN_TANF:
4744 case BUILT_IN_TANHF:
4745 bdecl = builtin_decl_implicit (fn);
4746 suffix = "4"; /* powf -> powf4 */
4747 if (el_mode != SFmode
4748 || n != 4
4749 || !bdecl)
4750 return NULL_TREE;
4751 break;
4753 default:
4754 return NULL_TREE;
4757 else
4758 return NULL_TREE;
4760 gcc_assert (suffix != NULL);
4761 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
4762 if (!bname)
4763 return NULL_TREE;
4765 strcpy (name, bname + sizeof ("__builtin_") - 1);
4766 strcat (name, suffix);
4768 if (n_args == 1)
4769 fntype = build_function_type_list (type_out, type_in, NULL);
4770 else if (n_args == 2)
4771 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
4772 else
4773 gcc_unreachable ();
4775 /* Build a function declaration for the vectorized function. */
4776 new_fndecl = build_decl (BUILTINS_LOCATION,
4777 FUNCTION_DECL, get_identifier (name), fntype);
4778 TREE_PUBLIC (new_fndecl) = 1;
4779 DECL_EXTERNAL (new_fndecl) = 1;
4780 DECL_IS_NOVOPS (new_fndecl) = 1;
4781 TREE_READONLY (new_fndecl) = 1;
4783 return new_fndecl;
4786 /* Returns a function decl for a vectorized version of the builtin function
4787 with builtin function code FN and the result vector type TYPE, or NULL_TREE
4788 if it is not available. */
4790 static tree
4791 rs6000_builtin_vectorized_function (tree fndecl, tree type_out,
4792 tree type_in)
4794 machine_mode in_mode, out_mode;
4795 int in_n, out_n;
4797 if (TARGET_DEBUG_BUILTIN)
4798 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
4799 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
4800 GET_MODE_NAME (TYPE_MODE (type_out)),
4801 GET_MODE_NAME (TYPE_MODE (type_in)));
4803 if (TREE_CODE (type_out) != VECTOR_TYPE
4804 || TREE_CODE (type_in) != VECTOR_TYPE
4805 || !TARGET_VECTORIZE_BUILTINS)
4806 return NULL_TREE;
4808 out_mode = TYPE_MODE (TREE_TYPE (type_out));
4809 out_n = TYPE_VECTOR_SUBPARTS (type_out);
4810 in_mode = TYPE_MODE (TREE_TYPE (type_in));
4811 in_n = TYPE_VECTOR_SUBPARTS (type_in);
4813 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
4815 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
4816 switch (fn)
4818 case BUILT_IN_CLZIMAX:
4819 case BUILT_IN_CLZLL:
4820 case BUILT_IN_CLZL:
4821 case BUILT_IN_CLZ:
4822 if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
4824 if (out_mode == QImode && out_n == 16)
4825 return rs6000_builtin_decls[P8V_BUILTIN_VCLZB];
4826 else if (out_mode == HImode && out_n == 8)
4827 return rs6000_builtin_decls[P8V_BUILTIN_VCLZH];
4828 else if (out_mode == SImode && out_n == 4)
4829 return rs6000_builtin_decls[P8V_BUILTIN_VCLZW];
4830 else if (out_mode == DImode && out_n == 2)
4831 return rs6000_builtin_decls[P8V_BUILTIN_VCLZD];
4833 break;
4834 case BUILT_IN_COPYSIGN:
4835 if (VECTOR_UNIT_VSX_P (V2DFmode)
4836 && out_mode == DFmode && out_n == 2
4837 && in_mode == DFmode && in_n == 2)
4838 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
4839 break;
4840 case BUILT_IN_COPYSIGNF:
4841 if (out_mode != SFmode || out_n != 4
4842 || in_mode != SFmode || in_n != 4)
4843 break;
4844 if (VECTOR_UNIT_VSX_P (V4SFmode))
4845 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
4846 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4847 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
4848 break;
4849 case BUILT_IN_POPCOUNTIMAX:
4850 case BUILT_IN_POPCOUNTLL:
4851 case BUILT_IN_POPCOUNTL:
4852 case BUILT_IN_POPCOUNT:
4853 if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
4855 if (out_mode == QImode && out_n == 16)
4856 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTB];
4857 else if (out_mode == HImode && out_n == 8)
4858 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTH];
4859 else if (out_mode == SImode && out_n == 4)
4860 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTW];
4861 else if (out_mode == DImode && out_n == 2)
4862 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTD];
4864 break;
4865 case BUILT_IN_SQRT:
4866 if (VECTOR_UNIT_VSX_P (V2DFmode)
4867 && out_mode == DFmode && out_n == 2
4868 && in_mode == DFmode && in_n == 2)
4869 return rs6000_builtin_decls[VSX_BUILTIN_XVSQRTDP];
4870 break;
4871 case BUILT_IN_SQRTF:
4872 if (VECTOR_UNIT_VSX_P (V4SFmode)
4873 && out_mode == SFmode && out_n == 4
4874 && in_mode == SFmode && in_n == 4)
4875 return rs6000_builtin_decls[VSX_BUILTIN_XVSQRTSP];
4876 break;
4877 case BUILT_IN_CEIL:
4878 if (VECTOR_UNIT_VSX_P (V2DFmode)
4879 && out_mode == DFmode && out_n == 2
4880 && in_mode == DFmode && in_n == 2)
4881 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
4882 break;
4883 case BUILT_IN_CEILF:
4884 if (out_mode != SFmode || out_n != 4
4885 || in_mode != SFmode || in_n != 4)
4886 break;
4887 if (VECTOR_UNIT_VSX_P (V4SFmode))
4888 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
4889 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4890 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
4891 break;
4892 case BUILT_IN_FLOOR:
4893 if (VECTOR_UNIT_VSX_P (V2DFmode)
4894 && out_mode == DFmode && out_n == 2
4895 && in_mode == DFmode && in_n == 2)
4896 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
4897 break;
4898 case BUILT_IN_FLOORF:
4899 if (out_mode != SFmode || out_n != 4
4900 || in_mode != SFmode || in_n != 4)
4901 break;
4902 if (VECTOR_UNIT_VSX_P (V4SFmode))
4903 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
4904 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4905 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
4906 break;
4907 case BUILT_IN_FMA:
4908 if (VECTOR_UNIT_VSX_P (V2DFmode)
4909 && out_mode == DFmode && out_n == 2
4910 && in_mode == DFmode && in_n == 2)
4911 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
4912 break;
4913 case BUILT_IN_FMAF:
4914 if (VECTOR_UNIT_VSX_P (V4SFmode)
4915 && out_mode == SFmode && out_n == 4
4916 && in_mode == SFmode && in_n == 4)
4917 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
4918 else if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
4919 && out_mode == SFmode && out_n == 4
4920 && in_mode == SFmode && in_n == 4)
4921 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
4922 break;
4923 case BUILT_IN_TRUNC:
4924 if (VECTOR_UNIT_VSX_P (V2DFmode)
4925 && out_mode == DFmode && out_n == 2
4926 && in_mode == DFmode && in_n == 2)
4927 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
4928 break;
4929 case BUILT_IN_TRUNCF:
4930 if (out_mode != SFmode || out_n != 4
4931 || in_mode != SFmode || in_n != 4)
4932 break;
4933 if (VECTOR_UNIT_VSX_P (V4SFmode))
4934 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
4935 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4936 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
4937 break;
4938 case BUILT_IN_NEARBYINT:
4939 if (VECTOR_UNIT_VSX_P (V2DFmode)
4940 && flag_unsafe_math_optimizations
4941 && out_mode == DFmode && out_n == 2
4942 && in_mode == DFmode && in_n == 2)
4943 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
4944 break;
4945 case BUILT_IN_NEARBYINTF:
4946 if (VECTOR_UNIT_VSX_P (V4SFmode)
4947 && flag_unsafe_math_optimizations
4948 && out_mode == SFmode && out_n == 4
4949 && in_mode == SFmode && in_n == 4)
4950 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
4951 break;
4952 case BUILT_IN_RINT:
4953 if (VECTOR_UNIT_VSX_P (V2DFmode)
4954 && !flag_trapping_math
4955 && out_mode == DFmode && out_n == 2
4956 && in_mode == DFmode && in_n == 2)
4957 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
4958 break;
4959 case BUILT_IN_RINTF:
4960 if (VECTOR_UNIT_VSX_P (V4SFmode)
4961 && !flag_trapping_math
4962 && out_mode == SFmode && out_n == 4
4963 && in_mode == SFmode && in_n == 4)
4964 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
4965 break;
4966 default:
4967 break;
4971 else if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
4973 enum rs6000_builtins fn
4974 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
4975 switch (fn)
4977 case RS6000_BUILTIN_RSQRTF:
4978 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
4979 && out_mode == SFmode && out_n == 4
4980 && in_mode == SFmode && in_n == 4)
4981 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
4982 break;
4983 case RS6000_BUILTIN_RSQRT:
4984 if (VECTOR_UNIT_VSX_P (V2DFmode)
4985 && out_mode == DFmode && out_n == 2
4986 && in_mode == DFmode && in_n == 2)
4987 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
4988 break;
4989 case RS6000_BUILTIN_RECIPF:
4990 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
4991 && out_mode == SFmode && out_n == 4
4992 && in_mode == SFmode && in_n == 4)
4993 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
4994 break;
4995 case RS6000_BUILTIN_RECIP:
4996 if (VECTOR_UNIT_VSX_P (V2DFmode)
4997 && out_mode == DFmode && out_n == 2
4998 && in_mode == DFmode && in_n == 2)
4999 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5000 break;
5001 default:
5002 break;
5006 /* Generate calls to libmass if appropriate. */
5007 if (rs6000_veclib_handler)
5008 return rs6000_veclib_handler (fndecl, type_out, type_in);
5010 return NULL_TREE;
5013 /* Default CPU string for rs6000*_file_start functions. */
5014 static const char *rs6000_default_cpu;
5016 /* Do anything needed at the start of the asm file. */
5018 static void
5019 rs6000_file_start (void)
5021 char buffer[80];
5022 const char *start = buffer;
5023 FILE *file = asm_out_file;
5025 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5027 default_file_start ();
5029 if (flag_verbose_asm)
5031 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5033 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5035 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5036 start = "";
5039 if (global_options_set.x_rs6000_cpu_index)
5041 fprintf (file, "%s -mcpu=%s", start,
5042 processor_target_table[rs6000_cpu_index].name);
5043 start = "";
5046 if (global_options_set.x_rs6000_tune_index)
5048 fprintf (file, "%s -mtune=%s", start,
5049 processor_target_table[rs6000_tune_index].name);
5050 start = "";
5053 if (PPC405_ERRATUM77)
5055 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5056 start = "";
5059 #ifdef USING_ELFOS_H
5060 switch (rs6000_sdata)
5062 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5063 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5064 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5065 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5068 if (rs6000_sdata && g_switch_value)
5070 fprintf (file, "%s -G %d", start,
5071 g_switch_value);
5072 start = "";
5074 #endif
5076 if (*start == '\0')
5077 putc ('\n', file);
5080 #ifdef USING_ELFOS_H
5081 if (rs6000_default_cpu == 0 || rs6000_default_cpu[0] == '\0'
5082 || !global_options_set.x_rs6000_cpu_index)
5084 fputs ("\t.machine ", asm_out_file);
5085 if ((rs6000_isa_flags & OPTION_MASK_DIRECT_MOVE) != 0)
5086 fputs ("power8\n", asm_out_file);
5087 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTD) != 0)
5088 fputs ("power7\n", asm_out_file);
5089 else if ((rs6000_isa_flags & OPTION_MASK_CMPB) != 0)
5090 fputs ("power6\n", asm_out_file);
5091 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTB) != 0)
5092 fputs ("power5\n", asm_out_file);
5093 else if ((rs6000_isa_flags & OPTION_MASK_MFCRF) != 0)
5094 fputs ("power4\n", asm_out_file);
5095 else if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) != 0)
5096 fputs ("ppc64\n", asm_out_file);
5097 else
5098 fputs ("ppc\n", asm_out_file);
5100 #endif
5102 if (DEFAULT_ABI == ABI_ELFv2)
5103 fprintf (file, "\t.abiversion 2\n");
5105 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2
5106 || (TARGET_ELF && flag_pic == 2))
5108 switch_to_section (toc_section);
5109 switch_to_section (text_section);
5114 /* Return nonzero if this function is known to have a null epilogue. */
5117 direct_return (void)
5119 if (reload_completed)
5121 rs6000_stack_t *info = rs6000_stack_info ();
5123 if (info->first_gp_reg_save == 32
5124 && info->first_fp_reg_save == 64
5125 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5126 && ! info->lr_save_p
5127 && ! info->cr_save_p
5128 && info->vrsave_mask == 0
5129 && ! info->push_p)
5130 return 1;
5133 return 0;
5136 /* Return the number of instructions it takes to form a constant in an
5137 integer register. */
5140 num_insns_constant_wide (HOST_WIDE_INT value)
5142 /* signed constant loadable with addi */
5143 if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
5144 return 1;
5146 /* constant loadable with addis */
5147 else if ((value & 0xffff) == 0
5148 && (value >> 31 == -1 || value >> 31 == 0))
5149 return 1;
5151 else if (TARGET_POWERPC64)
5153 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5154 HOST_WIDE_INT high = value >> 31;
5156 if (high == 0 || high == -1)
5157 return 2;
5159 high >>= 1;
5161 if (low == 0)
5162 return num_insns_constant_wide (high) + 1;
5163 else if (high == 0)
5164 return num_insns_constant_wide (low) + 1;
5165 else
5166 return (num_insns_constant_wide (high)
5167 + num_insns_constant_wide (low) + 1);
5170 else
5171 return 2;
5175 num_insns_constant (rtx op, machine_mode mode)
5177 HOST_WIDE_INT low, high;
5179 switch (GET_CODE (op))
5181 case CONST_INT:
5182 if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1
5183 && mask64_operand (op, mode))
5184 return 2;
5185 else
5186 return num_insns_constant_wide (INTVAL (op));
5188 case CONST_WIDE_INT:
5190 int i;
5191 int ins = CONST_WIDE_INT_NUNITS (op) - 1;
5192 for (i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
5193 ins += num_insns_constant_wide (CONST_WIDE_INT_ELT (op, i));
5194 return ins;
5197 case CONST_DOUBLE:
5198 if (mode == SFmode || mode == SDmode)
5200 long l;
5201 REAL_VALUE_TYPE rv;
5203 REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
5204 if (DECIMAL_FLOAT_MODE_P (mode))
5205 REAL_VALUE_TO_TARGET_DECIMAL32 (rv, l);
5206 else
5207 REAL_VALUE_TO_TARGET_SINGLE (rv, l);
5208 return num_insns_constant_wide ((HOST_WIDE_INT) l);
5211 long l[2];
5212 REAL_VALUE_TYPE rv;
5214 REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
5215 if (DECIMAL_FLOAT_MODE_P (mode))
5216 REAL_VALUE_TO_TARGET_DECIMAL64 (rv, l);
5217 else
5218 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
5219 high = l[WORDS_BIG_ENDIAN == 0];
5220 low = l[WORDS_BIG_ENDIAN != 0];
5222 if (TARGET_32BIT)
5223 return (num_insns_constant_wide (low)
5224 + num_insns_constant_wide (high));
5225 else
5227 if ((high == 0 && low >= 0)
5228 || (high == -1 && low < 0))
5229 return num_insns_constant_wide (low);
5231 else if (mask64_operand (op, mode))
5232 return 2;
5234 else if (low == 0)
5235 return num_insns_constant_wide (high) + 1;
5237 else
5238 return (num_insns_constant_wide (high)
5239 + num_insns_constant_wide (low) + 1);
5242 default:
5243 gcc_unreachable ();
5247 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
5248 If the mode of OP is MODE_VECTOR_INT, this simply returns the
5249 corresponding element of the vector, but for V4SFmode and V2SFmode,
5250 the corresponding "float" is interpreted as an SImode integer. */
5252 HOST_WIDE_INT
5253 const_vector_elt_as_int (rtx op, unsigned int elt)
5255 rtx tmp;
5257 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
5258 gcc_assert (GET_MODE (op) != V2DImode
5259 && GET_MODE (op) != V2DFmode);
5261 tmp = CONST_VECTOR_ELT (op, elt);
5262 if (GET_MODE (op) == V4SFmode
5263 || GET_MODE (op) == V2SFmode)
5264 tmp = gen_lowpart (SImode, tmp);
5265 return INTVAL (tmp);
5268 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
5269 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
5270 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
5271 all items are set to the same value and contain COPIES replicas of the
5272 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
5273 operand and the others are set to the value of the operand's msb. */
5275 static bool
5276 vspltis_constant (rtx op, unsigned step, unsigned copies)
5278 machine_mode mode = GET_MODE (op);
5279 machine_mode inner = GET_MODE_INNER (mode);
5281 unsigned i;
5282 unsigned nunits;
5283 unsigned bitsize;
5284 unsigned mask;
5286 HOST_WIDE_INT val;
5287 HOST_WIDE_INT splat_val;
5288 HOST_WIDE_INT msb_val;
5290 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
5291 return false;
5293 nunits = GET_MODE_NUNITS (mode);
5294 bitsize = GET_MODE_BITSIZE (inner);
5295 mask = GET_MODE_MASK (inner);
5297 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5298 splat_val = val;
5299 msb_val = val >= 0 ? 0 : -1;
5301 /* Construct the value to be splatted, if possible. If not, return 0. */
5302 for (i = 2; i <= copies; i *= 2)
5304 HOST_WIDE_INT small_val;
5305 bitsize /= 2;
5306 small_val = splat_val >> bitsize;
5307 mask >>= bitsize;
5308 if (splat_val != ((small_val << bitsize) | (small_val & mask)))
5309 return false;
5310 splat_val = small_val;
5313 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
5314 if (EASY_VECTOR_15 (splat_val))
5317 /* Also check if we can splat, and then add the result to itself. Do so if
5318 the value is positive, of if the splat instruction is using OP's mode;
5319 for splat_val < 0, the splat and the add should use the same mode. */
5320 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
5321 && (splat_val >= 0 || (step == 1 && copies == 1)))
5324 /* Also check if are loading up the most significant bit which can be done by
5325 loading up -1 and shifting the value left by -1. */
5326 else if (EASY_VECTOR_MSB (splat_val, inner))
5329 else
5330 return false;
5332 /* Check if VAL is present in every STEP-th element, and the
5333 other elements are filled with its most significant bit. */
5334 for (i = 1; i < nunits; ++i)
5336 HOST_WIDE_INT desired_val;
5337 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
5338 if ((i & (step - 1)) == 0)
5339 desired_val = val;
5340 else
5341 desired_val = msb_val;
5343 if (desired_val != const_vector_elt_as_int (op, elt))
5344 return false;
5347 return true;
5351 /* Return true if OP is of the given MODE and can be synthesized
5352 with a vspltisb, vspltish or vspltisw. */
5354 bool
5355 easy_altivec_constant (rtx op, machine_mode mode)
5357 unsigned step, copies;
5359 if (mode == VOIDmode)
5360 mode = GET_MODE (op);
5361 else if (mode != GET_MODE (op))
5362 return false;
5364 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
5365 constants. */
5366 if (mode == V2DFmode)
5367 return zero_constant (op, mode);
5369 else if (mode == V2DImode)
5371 if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
5372 || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT)
5373 return false;
5375 if (zero_constant (op, mode))
5376 return true;
5378 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
5379 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
5380 return true;
5382 return false;
5385 /* V1TImode is a special container for TImode. Ignore for now. */
5386 else if (mode == V1TImode)
5387 return false;
5389 /* Start with a vspltisw. */
5390 step = GET_MODE_NUNITS (mode) / 4;
5391 copies = 1;
5393 if (vspltis_constant (op, step, copies))
5394 return true;
5396 /* Then try with a vspltish. */
5397 if (step == 1)
5398 copies <<= 1;
5399 else
5400 step >>= 1;
5402 if (vspltis_constant (op, step, copies))
5403 return true;
5405 /* And finally a vspltisb. */
5406 if (step == 1)
5407 copies <<= 1;
5408 else
5409 step >>= 1;
5411 if (vspltis_constant (op, step, copies))
5412 return true;
5414 return false;
5417 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
5418 result is OP. Abort if it is not possible. */
5421 gen_easy_altivec_constant (rtx op)
5423 machine_mode mode = GET_MODE (op);
5424 int nunits = GET_MODE_NUNITS (mode);
5425 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5426 unsigned step = nunits / 4;
5427 unsigned copies = 1;
5429 /* Start with a vspltisw. */
5430 if (vspltis_constant (op, step, copies))
5431 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
5433 /* Then try with a vspltish. */
5434 if (step == 1)
5435 copies <<= 1;
5436 else
5437 step >>= 1;
5439 if (vspltis_constant (op, step, copies))
5440 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
5442 /* And finally a vspltisb. */
5443 if (step == 1)
5444 copies <<= 1;
5445 else
5446 step >>= 1;
5448 if (vspltis_constant (op, step, copies))
5449 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
5451 gcc_unreachable ();
5454 const char *
5455 output_vec_const_move (rtx *operands)
5457 int cst, cst2;
5458 machine_mode mode;
5459 rtx dest, vec;
5461 dest = operands[0];
5462 vec = operands[1];
5463 mode = GET_MODE (dest);
5465 if (TARGET_VSX)
5467 if (zero_constant (vec, mode))
5468 return "xxlxor %x0,%x0,%x0";
5470 if ((mode == V2DImode || mode == V1TImode)
5471 && INTVAL (CONST_VECTOR_ELT (vec, 0)) == -1
5472 && INTVAL (CONST_VECTOR_ELT (vec, 1)) == -1)
5473 return "vspltisw %0,-1";
5476 if (TARGET_ALTIVEC)
5478 rtx splat_vec;
5479 if (zero_constant (vec, mode))
5480 return "vxor %0,%0,%0";
5482 splat_vec = gen_easy_altivec_constant (vec);
5483 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
5484 operands[1] = XEXP (splat_vec, 0);
5485 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
5486 return "#";
5488 switch (GET_MODE (splat_vec))
5490 case V4SImode:
5491 return "vspltisw %0,%1";
5493 case V8HImode:
5494 return "vspltish %0,%1";
5496 case V16QImode:
5497 return "vspltisb %0,%1";
5499 default:
5500 gcc_unreachable ();
5504 gcc_assert (TARGET_SPE);
5506 /* Vector constant 0 is handled as a splitter of V2SI, and in the
5507 pattern of V1DI, V4HI, and V2SF.
5509 FIXME: We should probably return # and add post reload
5510 splitters for these, but this way is so easy ;-). */
5511 cst = INTVAL (CONST_VECTOR_ELT (vec, 0));
5512 cst2 = INTVAL (CONST_VECTOR_ELT (vec, 1));
5513 operands[1] = CONST_VECTOR_ELT (vec, 0);
5514 operands[2] = CONST_VECTOR_ELT (vec, 1);
5515 if (cst == cst2)
5516 return "li %0,%1\n\tevmergelo %0,%0,%0";
5517 else if (WORDS_BIG_ENDIAN)
5518 return "li %0,%1\n\tevmergelo %0,%0,%0\n\tli %0,%2";
5519 else
5520 return "li %0,%2\n\tevmergelo %0,%0,%0\n\tli %0,%1";
5523 /* Initialize TARGET of vector PAIRED to VALS. */
5525 void
5526 paired_expand_vector_init (rtx target, rtx vals)
5528 machine_mode mode = GET_MODE (target);
5529 int n_elts = GET_MODE_NUNITS (mode);
5530 int n_var = 0;
5531 rtx x, new_rtx, tmp, constant_op, op1, op2;
5532 int i;
5534 for (i = 0; i < n_elts; ++i)
5536 x = XVECEXP (vals, 0, i);
5537 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
5538 ++n_var;
5540 if (n_var == 0)
5542 /* Load from constant pool. */
5543 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
5544 return;
5547 if (n_var == 2)
5549 /* The vector is initialized only with non-constants. */
5550 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, XVECEXP (vals, 0, 0),
5551 XVECEXP (vals, 0, 1));
5553 emit_move_insn (target, new_rtx);
5554 return;
5557 /* One field is non-constant and the other one is a constant. Load the
5558 constant from the constant pool and use ps_merge instruction to
5559 construct the whole vector. */
5560 op1 = XVECEXP (vals, 0, 0);
5561 op2 = XVECEXP (vals, 0, 1);
5563 constant_op = (CONSTANT_P (op1)) ? op1 : op2;
5565 tmp = gen_reg_rtx (GET_MODE (constant_op));
5566 emit_move_insn (tmp, constant_op);
5568 if (CONSTANT_P (op1))
5569 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, tmp, op2);
5570 else
5571 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, op1, tmp);
5573 emit_move_insn (target, new_rtx);
5576 void
5577 paired_expand_vector_move (rtx operands[])
5579 rtx op0 = operands[0], op1 = operands[1];
5581 emit_move_insn (op0, op1);
5584 /* Emit vector compare for code RCODE. DEST is destination, OP1 and
5585 OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two
5586 operands for the relation operation COND. This is a recursive
5587 function. */
5589 static void
5590 paired_emit_vector_compare (enum rtx_code rcode,
5591 rtx dest, rtx op0, rtx op1,
5592 rtx cc_op0, rtx cc_op1)
5594 rtx tmp = gen_reg_rtx (V2SFmode);
5595 rtx tmp1, max, min;
5597 gcc_assert (TARGET_PAIRED_FLOAT);
5598 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
5600 switch (rcode)
5602 case LT:
5603 case LTU:
5604 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
5605 return;
5606 case GE:
5607 case GEU:
5608 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
5609 emit_insn (gen_selv2sf4 (dest, tmp, op0, op1, CONST0_RTX (SFmode)));
5610 return;
5611 case LE:
5612 case LEU:
5613 paired_emit_vector_compare (GE, dest, op0, op1, cc_op1, cc_op0);
5614 return;
5615 case GT:
5616 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
5617 return;
5618 case EQ:
5619 tmp1 = gen_reg_rtx (V2SFmode);
5620 max = gen_reg_rtx (V2SFmode);
5621 min = gen_reg_rtx (V2SFmode);
5622 gen_reg_rtx (V2SFmode);
5624 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
5625 emit_insn (gen_selv2sf4
5626 (max, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
5627 emit_insn (gen_subv2sf3 (tmp, cc_op1, cc_op0));
5628 emit_insn (gen_selv2sf4
5629 (min, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
5630 emit_insn (gen_subv2sf3 (tmp1, min, max));
5631 emit_insn (gen_selv2sf4 (dest, tmp1, op0, op1, CONST0_RTX (SFmode)));
5632 return;
5633 case NE:
5634 paired_emit_vector_compare (EQ, dest, op1, op0, cc_op0, cc_op1);
5635 return;
5636 case UNLE:
5637 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
5638 return;
5639 case UNLT:
5640 paired_emit_vector_compare (LT, dest, op1, op0, cc_op0, cc_op1);
5641 return;
5642 case UNGE:
5643 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
5644 return;
5645 case UNGT:
5646 paired_emit_vector_compare (GT, dest, op1, op0, cc_op0, cc_op1);
5647 return;
5648 default:
5649 gcc_unreachable ();
5652 return;
5655 /* Emit vector conditional expression.
5656 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5657 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5660 paired_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
5661 rtx cond, rtx cc_op0, rtx cc_op1)
5663 enum rtx_code rcode = GET_CODE (cond);
5665 if (!TARGET_PAIRED_FLOAT)
5666 return 0;
5668 paired_emit_vector_compare (rcode, dest, op1, op2, cc_op0, cc_op1);
5670 return 1;
5673 /* Initialize vector TARGET to VALS. */
5675 void
5676 rs6000_expand_vector_init (rtx target, rtx vals)
5678 machine_mode mode = GET_MODE (target);
5679 machine_mode inner_mode = GET_MODE_INNER (mode);
5680 int n_elts = GET_MODE_NUNITS (mode);
5681 int n_var = 0, one_var = -1;
5682 bool all_same = true, all_const_zero = true;
5683 rtx x, mem;
5684 int i;
5686 for (i = 0; i < n_elts; ++i)
5688 x = XVECEXP (vals, 0, i);
5689 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
5690 ++n_var, one_var = i;
5691 else if (x != CONST0_RTX (inner_mode))
5692 all_const_zero = false;
5694 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
5695 all_same = false;
5698 if (n_var == 0)
5700 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
5701 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
5702 if ((int_vector_p || TARGET_VSX) && all_const_zero)
5704 /* Zero register. */
5705 emit_insn (gen_rtx_SET (VOIDmode, target,
5706 gen_rtx_XOR (mode, target, target)));
5707 return;
5709 else if (int_vector_p && easy_vector_constant (const_vec, mode))
5711 /* Splat immediate. */
5712 emit_insn (gen_rtx_SET (VOIDmode, target, const_vec));
5713 return;
5715 else
5717 /* Load from constant pool. */
5718 emit_move_insn (target, const_vec);
5719 return;
5723 /* Double word values on VSX can use xxpermdi or lxvdsx. */
5724 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
5726 rtx op0 = XVECEXP (vals, 0, 0);
5727 rtx op1 = XVECEXP (vals, 0, 1);
5728 if (all_same)
5730 if (!MEM_P (op0) && !REG_P (op0))
5731 op0 = force_reg (inner_mode, op0);
5732 if (mode == V2DFmode)
5733 emit_insn (gen_vsx_splat_v2df (target, op0));
5734 else
5735 emit_insn (gen_vsx_splat_v2di (target, op0));
5737 else
5739 op0 = force_reg (inner_mode, op0);
5740 op1 = force_reg (inner_mode, op1);
5741 if (mode == V2DFmode)
5742 emit_insn (gen_vsx_concat_v2df (target, op0, op1));
5743 else
5744 emit_insn (gen_vsx_concat_v2di (target, op0, op1));
5746 return;
5749 /* With single precision floating point on VSX, know that internally single
5750 precision is actually represented as a double, and either make 2 V2DF
5751 vectors, and convert these vectors to single precision, or do one
5752 conversion, and splat the result to the other elements. */
5753 if (mode == V4SFmode && VECTOR_MEM_VSX_P (mode))
5755 if (all_same)
5757 rtx freg = gen_reg_rtx (V4SFmode);
5758 rtx sreg = force_reg (SFmode, XVECEXP (vals, 0, 0));
5759 rtx cvt = ((TARGET_XSCVDPSPN)
5760 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
5761 : gen_vsx_xscvdpsp_scalar (freg, sreg));
5763 emit_insn (cvt);
5764 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg, const0_rtx));
5766 else
5768 rtx dbl_even = gen_reg_rtx (V2DFmode);
5769 rtx dbl_odd = gen_reg_rtx (V2DFmode);
5770 rtx flt_even = gen_reg_rtx (V4SFmode);
5771 rtx flt_odd = gen_reg_rtx (V4SFmode);
5772 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
5773 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
5774 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
5775 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
5777 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
5778 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
5779 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
5780 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
5781 rs6000_expand_extract_even (target, flt_even, flt_odd);
5783 return;
5786 /* Store value to stack temp. Load vector element. Splat. However, splat
5787 of 64-bit items is not supported on Altivec. */
5788 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
5790 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
5791 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
5792 XVECEXP (vals, 0, 0));
5793 x = gen_rtx_UNSPEC (VOIDmode,
5794 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
5795 emit_insn (gen_rtx_PARALLEL (VOIDmode,
5796 gen_rtvec (2,
5797 gen_rtx_SET (VOIDmode,
5798 target, mem),
5799 x)));
5800 x = gen_rtx_VEC_SELECT (inner_mode, target,
5801 gen_rtx_PARALLEL (VOIDmode,
5802 gen_rtvec (1, const0_rtx)));
5803 emit_insn (gen_rtx_SET (VOIDmode, target,
5804 gen_rtx_VEC_DUPLICATE (mode, x)));
5805 return;
5808 /* One field is non-constant. Load constant then overwrite
5809 varying field. */
5810 if (n_var == 1)
5812 rtx copy = copy_rtx (vals);
5814 /* Load constant part of vector, substitute neighboring value for
5815 varying element. */
5816 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
5817 rs6000_expand_vector_init (target, copy);
5819 /* Insert variable. */
5820 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
5821 return;
5824 /* Construct the vector in memory one field at a time
5825 and load the whole vector. */
5826 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
5827 for (i = 0; i < n_elts; i++)
5828 emit_move_insn (adjust_address_nv (mem, inner_mode,
5829 i * GET_MODE_SIZE (inner_mode)),
5830 XVECEXP (vals, 0, i));
5831 emit_move_insn (target, mem);
5834 /* Set field ELT of TARGET to VAL. */
5836 void
5837 rs6000_expand_vector_set (rtx target, rtx val, int elt)
5839 machine_mode mode = GET_MODE (target);
5840 machine_mode inner_mode = GET_MODE_INNER (mode);
5841 rtx reg = gen_reg_rtx (mode);
5842 rtx mask, mem, x;
5843 int width = GET_MODE_SIZE (inner_mode);
5844 int i;
5846 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
5848 rtx (*set_func) (rtx, rtx, rtx, rtx)
5849 = ((mode == V2DFmode) ? gen_vsx_set_v2df : gen_vsx_set_v2di);
5850 emit_insn (set_func (target, target, val, GEN_INT (elt)));
5851 return;
5854 /* Simplify setting single element vectors like V1TImode. */
5855 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
5857 emit_move_insn (target, gen_lowpart (mode, val));
5858 return;
5861 /* Load single variable value. */
5862 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
5863 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
5864 x = gen_rtx_UNSPEC (VOIDmode,
5865 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
5866 emit_insn (gen_rtx_PARALLEL (VOIDmode,
5867 gen_rtvec (2,
5868 gen_rtx_SET (VOIDmode,
5869 reg, mem),
5870 x)));
5872 /* Linear sequence. */
5873 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
5874 for (i = 0; i < 16; ++i)
5875 XVECEXP (mask, 0, i) = GEN_INT (i);
5877 /* Set permute mask to insert element into target. */
5878 for (i = 0; i < width; ++i)
5879 XVECEXP (mask, 0, elt*width + i)
5880 = GEN_INT (i + 0x10);
5881 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
5883 if (BYTES_BIG_ENDIAN)
5884 x = gen_rtx_UNSPEC (mode,
5885 gen_rtvec (3, target, reg,
5886 force_reg (V16QImode, x)),
5887 UNSPEC_VPERM);
5888 else
5890 /* Invert selector. We prefer to generate VNAND on P8 so
5891 that future fusion opportunities can kick in, but must
5892 generate VNOR elsewhere. */
5893 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
5894 rtx iorx = (TARGET_P8_VECTOR
5895 ? gen_rtx_IOR (V16QImode, notx, notx)
5896 : gen_rtx_AND (V16QImode, notx, notx));
5897 rtx tmp = gen_reg_rtx (V16QImode);
5898 emit_insn (gen_rtx_SET (VOIDmode, tmp, iorx));
5900 /* Permute with operands reversed and adjusted selector. */
5901 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
5902 UNSPEC_VPERM);
5905 emit_insn (gen_rtx_SET (VOIDmode, target, x));
5908 /* Extract field ELT from VEC into TARGET. */
5910 void
5911 rs6000_expand_vector_extract (rtx target, rtx vec, int elt)
5913 machine_mode mode = GET_MODE (vec);
5914 machine_mode inner_mode = GET_MODE_INNER (mode);
5915 rtx mem;
5917 if (VECTOR_MEM_VSX_P (mode))
5919 switch (mode)
5921 default:
5922 break;
5923 case V1TImode:
5924 gcc_assert (elt == 0 && inner_mode == TImode);
5925 emit_move_insn (target, gen_lowpart (TImode, vec));
5926 break;
5927 case V2DFmode:
5928 emit_insn (gen_vsx_extract_v2df (target, vec, GEN_INT (elt)));
5929 return;
5930 case V2DImode:
5931 emit_insn (gen_vsx_extract_v2di (target, vec, GEN_INT (elt)));
5932 return;
5933 case V4SFmode:
5934 emit_insn (gen_vsx_extract_v4sf (target, vec, GEN_INT (elt)));
5935 return;
5939 /* Allocate mode-sized buffer. */
5940 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
5942 emit_move_insn (mem, vec);
5944 /* Add offset to field within buffer matching vector element. */
5945 mem = adjust_address_nv (mem, inner_mode, elt * GET_MODE_SIZE (inner_mode));
5947 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
5950 /* Generates shifts and masks for a pair of rldicl or rldicr insns to
5951 implement ANDing by the mask IN. */
5952 void
5953 build_mask64_2_operands (rtx in, rtx *out)
5955 unsigned HOST_WIDE_INT c, lsb, m1, m2;
5956 int shift;
5958 gcc_assert (GET_CODE (in) == CONST_INT);
5960 c = INTVAL (in);
5961 if (c & 1)
5963 /* Assume c initially something like 0x00fff000000fffff. The idea
5964 is to rotate the word so that the middle ^^^^^^ group of zeros
5965 is at the MS end and can be cleared with an rldicl mask. We then
5966 rotate back and clear off the MS ^^ group of zeros with a
5967 second rldicl. */
5968 c = ~c; /* c == 0xff000ffffff00000 */
5969 lsb = c & -c; /* lsb == 0x0000000000100000 */
5970 m1 = -lsb; /* m1 == 0xfffffffffff00000 */
5971 c = ~c; /* c == 0x00fff000000fffff */
5972 c &= -lsb; /* c == 0x00fff00000000000 */
5973 lsb = c & -c; /* lsb == 0x0000100000000000 */
5974 c = ~c; /* c == 0xff000fffffffffff */
5975 c &= -lsb; /* c == 0xff00000000000000 */
5976 shift = 0;
5977 while ((lsb >>= 1) != 0)
5978 shift++; /* shift == 44 on exit from loop */
5979 m1 <<= 64 - shift; /* m1 == 0xffffff0000000000 */
5980 m1 = ~m1; /* m1 == 0x000000ffffffffff */
5981 m2 = ~c; /* m2 == 0x00ffffffffffffff */
5983 else
5985 /* Assume c initially something like 0xff000f0000000000. The idea
5986 is to rotate the word so that the ^^^ middle group of zeros
5987 is at the LS end and can be cleared with an rldicr mask. We then
5988 rotate back and clear off the LS group of ^^^^^^^^^^ zeros with
5989 a second rldicr. */
5990 lsb = c & -c; /* lsb == 0x0000010000000000 */
5991 m2 = -lsb; /* m2 == 0xffffff0000000000 */
5992 c = ~c; /* c == 0x00fff0ffffffffff */
5993 c &= -lsb; /* c == 0x00fff00000000000 */
5994 lsb = c & -c; /* lsb == 0x0000100000000000 */
5995 c = ~c; /* c == 0xff000fffffffffff */
5996 c &= -lsb; /* c == 0xff00000000000000 */
5997 shift = 0;
5998 while ((lsb >>= 1) != 0)
5999 shift++; /* shift == 44 on exit from loop */
6000 m1 = ~c; /* m1 == 0x00ffffffffffffff */
6001 m1 >>= shift; /* m1 == 0x0000000000000fff */
6002 m1 = ~m1; /* m1 == 0xfffffffffffff000 */
6005 /* Note that when we only have two 0->1 and 1->0 transitions, one of the
6006 masks will be all 1's. We are guaranteed more than one transition. */
6007 out[0] = GEN_INT (64 - shift);
6008 out[1] = GEN_INT (m1);
6009 out[2] = GEN_INT (shift);
6010 out[3] = GEN_INT (m2);
6013 /* Return TRUE if OP is an invalid SUBREG operation on the e500. */
6015 bool
6016 invalid_e500_subreg (rtx op, machine_mode mode)
6018 if (TARGET_E500_DOUBLE)
6020 /* Reject (subreg:SI (reg:DF)); likewise with subreg:DI or
6021 subreg:TI and reg:TF. Decimal float modes are like integer
6022 modes (only low part of each register used) for this
6023 purpose. */
6024 if (GET_CODE (op) == SUBREG
6025 && (mode == SImode || mode == DImode || mode == TImode
6026 || mode == DDmode || mode == TDmode || mode == PTImode)
6027 && REG_P (SUBREG_REG (op))
6028 && (GET_MODE (SUBREG_REG (op)) == DFmode
6029 || GET_MODE (SUBREG_REG (op)) == TFmode))
6030 return true;
6032 /* Reject (subreg:DF (reg:DI)); likewise with subreg:TF and
6033 reg:TI. */
6034 if (GET_CODE (op) == SUBREG
6035 && (mode == DFmode || mode == TFmode)
6036 && REG_P (SUBREG_REG (op))
6037 && (GET_MODE (SUBREG_REG (op)) == DImode
6038 || GET_MODE (SUBREG_REG (op)) == TImode
6039 || GET_MODE (SUBREG_REG (op)) == PTImode
6040 || GET_MODE (SUBREG_REG (op)) == DDmode
6041 || GET_MODE (SUBREG_REG (op)) == TDmode))
6042 return true;
6045 if (TARGET_SPE
6046 && GET_CODE (op) == SUBREG
6047 && mode == SImode
6048 && REG_P (SUBREG_REG (op))
6049 && SPE_VECTOR_MODE (GET_MODE (SUBREG_REG (op))))
6050 return true;
6052 return false;
6055 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
6056 selects whether the alignment is abi mandated, optional, or
6057 both abi and optional alignment. */
6059 unsigned int
6060 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
6062 if (how != align_opt)
6064 if (TREE_CODE (type) == VECTOR_TYPE)
6066 if ((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (type)))
6067 || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type))))
6069 if (align < 64)
6070 align = 64;
6072 else if (align < 128)
6073 align = 128;
6075 else if (TARGET_E500_DOUBLE
6076 && TREE_CODE (type) == REAL_TYPE
6077 && TYPE_MODE (type) == DFmode)
6079 if (align < 64)
6080 align = 64;
6084 if (how != align_abi)
6086 if (TREE_CODE (type) == ARRAY_TYPE
6087 && TYPE_MODE (TREE_TYPE (type)) == QImode)
6089 if (align < BITS_PER_WORD)
6090 align = BITS_PER_WORD;
6094 return align;
6097 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
6099 bool
6100 rs6000_special_adjust_field_align_p (tree field, unsigned int computed)
6102 if (TARGET_ALTIVEC && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6104 if (computed != 128)
6106 static bool warned;
6107 if (!warned && warn_psabi)
6109 warned = true;
6110 inform (input_location,
6111 "the layout of aggregates containing vectors with"
6112 " %d-byte alignment has changed in GCC 5",
6113 computed / BITS_PER_UNIT);
6116 /* In current GCC there is no special case. */
6117 return false;
6120 return false;
6123 /* AIX increases natural record alignment to doubleword if the first
6124 field is an FP double while the FP fields remain word aligned. */
6126 unsigned int
6127 rs6000_special_round_type_align (tree type, unsigned int computed,
6128 unsigned int specified)
6130 unsigned int align = MAX (computed, specified);
6131 tree field = TYPE_FIELDS (type);
6133 /* Skip all non field decls */
6134 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
6135 field = DECL_CHAIN (field);
6137 if (field != NULL && field != type)
6139 type = TREE_TYPE (field);
6140 while (TREE_CODE (type) == ARRAY_TYPE)
6141 type = TREE_TYPE (type);
6143 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
6144 align = MAX (align, 64);
6147 return align;
6150 /* Darwin increases record alignment to the natural alignment of
6151 the first field. */
6153 unsigned int
6154 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
6155 unsigned int specified)
6157 unsigned int align = MAX (computed, specified);
6159 if (TYPE_PACKED (type))
6160 return align;
6162 /* Find the first field, looking down into aggregates. */
6163 do {
6164 tree field = TYPE_FIELDS (type);
6165 /* Skip all non field decls */
6166 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
6167 field = DECL_CHAIN (field);
6168 if (! field)
6169 break;
6170 /* A packed field does not contribute any extra alignment. */
6171 if (DECL_PACKED (field))
6172 return align;
6173 type = TREE_TYPE (field);
6174 while (TREE_CODE (type) == ARRAY_TYPE)
6175 type = TREE_TYPE (type);
6176 } while (AGGREGATE_TYPE_P (type));
6178 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
6179 align = MAX (align, TYPE_ALIGN (type));
6181 return align;
6184 /* Return 1 for an operand in small memory on V.4/eabi. */
6187 small_data_operand (rtx op ATTRIBUTE_UNUSED,
6188 machine_mode mode ATTRIBUTE_UNUSED)
6190 #if TARGET_ELF
6191 rtx sym_ref;
6193 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
6194 return 0;
6196 if (DEFAULT_ABI != ABI_V4)
6197 return 0;
6199 /* Vector and float memory instructions have a limited offset on the
6200 SPE, so using a vector or float variable directly as an operand is
6201 not useful. */
6202 if (TARGET_SPE
6203 && (SPE_VECTOR_MODE (mode) || FLOAT_MODE_P (mode)))
6204 return 0;
6206 if (GET_CODE (op) == SYMBOL_REF)
6207 sym_ref = op;
6209 else if (GET_CODE (op) != CONST
6210 || GET_CODE (XEXP (op, 0)) != PLUS
6211 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF
6212 || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT)
6213 return 0;
6215 else
6217 rtx sum = XEXP (op, 0);
6218 HOST_WIDE_INT summand;
6220 /* We have to be careful here, because it is the referenced address
6221 that must be 32k from _SDA_BASE_, not just the symbol. */
6222 summand = INTVAL (XEXP (sum, 1));
6223 if (summand < 0 || summand > g_switch_value)
6224 return 0;
6226 sym_ref = XEXP (sum, 0);
6229 return SYMBOL_REF_SMALL_P (sym_ref);
6230 #else
6231 return 0;
6232 #endif
6235 /* Return true if either operand is a general purpose register. */
6237 bool
6238 gpr_or_gpr_p (rtx op0, rtx op1)
6240 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
6241 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
6244 /* Return true if this is a move direct operation between GPR registers and
6245 floating point/VSX registers. */
6247 bool
6248 direct_move_p (rtx op0, rtx op1)
6250 int regno0, regno1;
6252 if (!REG_P (op0) || !REG_P (op1))
6253 return false;
6255 if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
6256 return false;
6258 regno0 = REGNO (op0);
6259 regno1 = REGNO (op1);
6260 if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
6261 return false;
6263 if (INT_REGNO_P (regno0))
6264 return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
6266 else if (INT_REGNO_P (regno1))
6268 if (TARGET_MFPGPR && FP_REGNO_P (regno0))
6269 return true;
6271 else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
6272 return true;
6275 return false;
6278 /* Return true if this is a load or store quad operation. This function does
6279 not handle the atomic quad memory instructions. */
6281 bool
6282 quad_load_store_p (rtx op0, rtx op1)
6284 bool ret;
6286 if (!TARGET_QUAD_MEMORY)
6287 ret = false;
6289 else if (REG_P (op0) && MEM_P (op1))
6290 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
6291 && quad_memory_operand (op1, GET_MODE (op1))
6292 && !reg_overlap_mentioned_p (op0, op1));
6294 else if (MEM_P (op0) && REG_P (op1))
6295 ret = (quad_memory_operand (op0, GET_MODE (op0))
6296 && quad_int_reg_operand (op1, GET_MODE (op1)));
6298 else
6299 ret = false;
6301 if (TARGET_DEBUG_ADDR)
6303 fprintf (stderr, "\n========== quad_load_store, return %s\n",
6304 ret ? "true" : "false");
6305 debug_rtx (gen_rtx_SET (VOIDmode, op0, op1));
6308 return ret;
6311 /* Given an address, return a constant offset term if one exists. */
6313 static rtx
6314 address_offset (rtx op)
6316 if (GET_CODE (op) == PRE_INC
6317 || GET_CODE (op) == PRE_DEC)
6318 op = XEXP (op, 0);
6319 else if (GET_CODE (op) == PRE_MODIFY
6320 || GET_CODE (op) == LO_SUM)
6321 op = XEXP (op, 1);
6323 if (GET_CODE (op) == CONST)
6324 op = XEXP (op, 0);
6326 if (GET_CODE (op) == PLUS)
6327 op = XEXP (op, 1);
6329 if (CONST_INT_P (op))
6330 return op;
6332 return NULL_RTX;
6335 /* Return true if the MEM operand is a memory operand suitable for use
6336 with a (full width, possibly multiple) gpr load/store. On
6337 powerpc64 this means the offset must be divisible by 4.
6338 Implements 'Y' constraint.
6340 Accept direct, indexed, offset, lo_sum and tocref. Since this is
6341 a constraint function we know the operand has satisfied a suitable
6342 memory predicate. Also accept some odd rtl generated by reload
6343 (see rs6000_legitimize_reload_address for various forms). It is
6344 important that reload rtl be accepted by appropriate constraints
6345 but not by the operand predicate.
6347 Offsetting a lo_sum should not be allowed, except where we know by
6348 alignment that a 32k boundary is not crossed, but see the ???
6349 comment in rs6000_legitimize_reload_address. Note that by
6350 "offsetting" here we mean a further offset to access parts of the
6351 MEM. It's fine to have a lo_sum where the inner address is offset
6352 from a sym, since the same sym+offset will appear in the high part
6353 of the address calculation. */
6355 bool
6356 mem_operand_gpr (rtx op, machine_mode mode)
6358 unsigned HOST_WIDE_INT offset;
6359 int extra;
6360 rtx addr = XEXP (op, 0);
6362 op = address_offset (addr);
6363 if (op == NULL_RTX)
6364 return true;
6366 offset = INTVAL (op);
6367 if (TARGET_POWERPC64 && (offset & 3) != 0)
6368 return false;
6370 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
6371 if (extra < 0)
6372 extra = 0;
6374 if (GET_CODE (addr) == LO_SUM)
6375 /* For lo_sum addresses, we must allow any offset except one that
6376 causes a wrap, so test only the low 16 bits. */
6377 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
6379 return offset + 0x8000 < 0x10000u - extra;
6382 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
6384 static bool
6385 reg_offset_addressing_ok_p (machine_mode mode)
6387 switch (mode)
6389 case V16QImode:
6390 case V8HImode:
6391 case V4SFmode:
6392 case V4SImode:
6393 case V2DFmode:
6394 case V2DImode:
6395 case V1TImode:
6396 case TImode:
6397 /* AltiVec/VSX vector modes. Only reg+reg addressing is valid. While
6398 TImode is not a vector mode, if we want to use the VSX registers to
6399 move it around, we need to restrict ourselves to reg+reg
6400 addressing. */
6401 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
6402 return false;
6403 break;
6405 case V4HImode:
6406 case V2SImode:
6407 case V1DImode:
6408 case V2SFmode:
6409 /* Paired vector modes. Only reg+reg addressing is valid. */
6410 if (TARGET_PAIRED_FLOAT)
6411 return false;
6412 break;
6414 case SDmode:
6415 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
6416 addressing for the LFIWZX and STFIWX instructions. */
6417 if (TARGET_NO_SDMODE_STACK)
6418 return false;
6419 break;
6421 default:
6422 break;
6425 return true;
6428 static bool
6429 virtual_stack_registers_memory_p (rtx op)
6431 int regnum;
6433 if (GET_CODE (op) == REG)
6434 regnum = REGNO (op);
6436 else if (GET_CODE (op) == PLUS
6437 && GET_CODE (XEXP (op, 0)) == REG
6438 && GET_CODE (XEXP (op, 1)) == CONST_INT)
6439 regnum = REGNO (XEXP (op, 0));
6441 else
6442 return false;
6444 return (regnum >= FIRST_VIRTUAL_REGISTER
6445 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
6448 /* Return true if a MODE sized memory accesses to OP plus OFFSET
6449 is known to not straddle a 32k boundary. */
6451 static bool
6452 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
6453 machine_mode mode)
6455 tree decl, type;
6456 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
6458 if (GET_CODE (op) != SYMBOL_REF)
6459 return false;
6461 dsize = GET_MODE_SIZE (mode);
6462 decl = SYMBOL_REF_DECL (op);
6463 if (!decl)
6465 if (dsize == 0)
6466 return false;
6468 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
6469 replacing memory addresses with an anchor plus offset. We
6470 could find the decl by rummaging around in the block->objects
6471 VEC for the given offset but that seems like too much work. */
6472 dalign = BITS_PER_UNIT;
6473 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
6474 && SYMBOL_REF_ANCHOR_P (op)
6475 && SYMBOL_REF_BLOCK (op) != NULL)
6477 struct object_block *block = SYMBOL_REF_BLOCK (op);
6479 dalign = block->alignment;
6480 offset += SYMBOL_REF_BLOCK_OFFSET (op);
6482 else if (CONSTANT_POOL_ADDRESS_P (op))
6484 /* It would be nice to have get_pool_align().. */
6485 machine_mode cmode = get_pool_mode (op);
6487 dalign = GET_MODE_ALIGNMENT (cmode);
6490 else if (DECL_P (decl))
6492 dalign = DECL_ALIGN (decl);
6494 if (dsize == 0)
6496 /* Allow BLKmode when the entire object is known to not
6497 cross a 32k boundary. */
6498 if (!DECL_SIZE_UNIT (decl))
6499 return false;
6501 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
6502 return false;
6504 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
6505 if (dsize > 32768)
6506 return false;
6508 return dalign / BITS_PER_UNIT >= dsize;
6511 else
6513 type = TREE_TYPE (decl);
6515 dalign = TYPE_ALIGN (type);
6516 if (CONSTANT_CLASS_P (decl))
6517 dalign = CONSTANT_ALIGNMENT (decl, dalign);
6518 else
6519 dalign = DATA_ALIGNMENT (decl, dalign);
6521 if (dsize == 0)
6523 /* BLKmode, check the entire object. */
6524 if (TREE_CODE (decl) == STRING_CST)
6525 dsize = TREE_STRING_LENGTH (decl);
6526 else if (TYPE_SIZE_UNIT (type)
6527 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (type)))
6528 dsize = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6529 else
6530 return false;
6531 if (dsize > 32768)
6532 return false;
6534 return dalign / BITS_PER_UNIT >= dsize;
6538 /* Find how many bits of the alignment we know for this access. */
6539 mask = dalign / BITS_PER_UNIT - 1;
6540 lsb = offset & -offset;
6541 mask &= lsb - 1;
6542 dalign = mask + 1;
6544 return dalign >= dsize;
6547 static bool
6548 constant_pool_expr_p (rtx op)
6550 rtx base, offset;
6552 split_const (op, &base, &offset);
6553 return (GET_CODE (base) == SYMBOL_REF
6554 && CONSTANT_POOL_ADDRESS_P (base)
6555 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
6558 static const_rtx tocrel_base, tocrel_offset;
6560 /* Return true if OP is a toc pointer relative address (the output
6561 of create_TOC_reference). If STRICT, do not match high part or
6562 non-split -mcmodel=large/medium toc pointer relative addresses. */
6564 bool
6565 toc_relative_expr_p (const_rtx op, bool strict)
6567 if (!TARGET_TOC)
6568 return false;
6570 if (TARGET_CMODEL != CMODEL_SMALL)
6572 /* Only match the low part. */
6573 if (GET_CODE (op) == LO_SUM
6574 && REG_P (XEXP (op, 0))
6575 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict))
6576 op = XEXP (op, 1);
6577 else if (strict)
6578 return false;
6581 tocrel_base = op;
6582 tocrel_offset = const0_rtx;
6583 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
6585 tocrel_base = XEXP (op, 0);
6586 tocrel_offset = XEXP (op, 1);
6589 return (GET_CODE (tocrel_base) == UNSPEC
6590 && XINT (tocrel_base, 1) == UNSPEC_TOCREL);
6593 /* Return true if X is a constant pool address, and also for cmodel=medium
6594 if X is a toc-relative address known to be offsettable within MODE. */
6596 bool
6597 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
6598 bool strict)
6600 return (toc_relative_expr_p (x, strict)
6601 && (TARGET_CMODEL != CMODEL_MEDIUM
6602 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
6603 || mode == QImode
6604 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
6605 INTVAL (tocrel_offset), mode)));
6608 static bool
6609 legitimate_small_data_p (machine_mode mode, rtx x)
6611 return (DEFAULT_ABI == ABI_V4
6612 && !flag_pic && !TARGET_TOC
6613 && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
6614 && small_data_operand (x, mode));
6617 /* SPE offset addressing is limited to 5-bits worth of double words. */
6618 #define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0)
6620 bool
6621 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
6622 bool strict, bool worst_case)
6624 unsigned HOST_WIDE_INT offset;
6625 unsigned int extra;
6627 if (GET_CODE (x) != PLUS)
6628 return false;
6629 if (!REG_P (XEXP (x, 0)))
6630 return false;
6631 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
6632 return false;
6633 if (!reg_offset_addressing_ok_p (mode))
6634 return virtual_stack_registers_memory_p (x);
6635 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
6636 return true;
6637 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6638 return false;
6640 offset = INTVAL (XEXP (x, 1));
6641 extra = 0;
6642 switch (mode)
6644 case V4HImode:
6645 case V2SImode:
6646 case V1DImode:
6647 case V2SFmode:
6648 /* SPE vector modes. */
6649 return SPE_CONST_OFFSET_OK (offset);
6651 case DFmode:
6652 case DDmode:
6653 case DImode:
6654 /* On e500v2, we may have:
6656 (subreg:DF (mem:DI (plus (reg) (const_int))) 0).
6658 Which gets addressed with evldd instructions. */
6659 if (TARGET_E500_DOUBLE)
6660 return SPE_CONST_OFFSET_OK (offset);
6662 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
6663 addressing. */
6664 if (VECTOR_MEM_VSX_P (mode))
6665 return false;
6667 if (!worst_case)
6668 break;
6669 if (!TARGET_POWERPC64)
6670 extra = 4;
6671 else if (offset & 3)
6672 return false;
6673 break;
6675 case TFmode:
6676 if (TARGET_E500_DOUBLE)
6677 return (SPE_CONST_OFFSET_OK (offset)
6678 && SPE_CONST_OFFSET_OK (offset + 8));
6679 /* fall through */
6681 case TDmode:
6682 case TImode:
6683 case PTImode:
6684 extra = 8;
6685 if (!worst_case)
6686 break;
6687 if (!TARGET_POWERPC64)
6688 extra = 12;
6689 else if (offset & 3)
6690 return false;
6691 break;
6693 default:
6694 break;
6697 offset += 0x8000;
6698 return offset < 0x10000 - extra;
6701 bool
6702 legitimate_indexed_address_p (rtx x, int strict)
6704 rtx op0, op1;
6706 if (GET_CODE (x) != PLUS)
6707 return false;
6709 op0 = XEXP (x, 0);
6710 op1 = XEXP (x, 1);
6712 /* Recognize the rtl generated by reload which we know will later be
6713 replaced with proper base and index regs. */
6714 if (!strict
6715 && reload_in_progress
6716 && (REG_P (op0) || GET_CODE (op0) == PLUS)
6717 && REG_P (op1))
6718 return true;
6720 return (REG_P (op0) && REG_P (op1)
6721 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
6722 && INT_REG_OK_FOR_INDEX_P (op1, strict))
6723 || (INT_REG_OK_FOR_BASE_P (op1, strict)
6724 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
6727 bool
6728 avoiding_indexed_address_p (machine_mode mode)
6730 /* Avoid indexed addressing for modes that have non-indexed
6731 load/store instruction forms. */
6732 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
6735 bool
6736 legitimate_indirect_address_p (rtx x, int strict)
6738 return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict);
6741 bool
6742 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
6744 if (!TARGET_MACHO || !flag_pic
6745 || mode != SImode || GET_CODE (x) != MEM)
6746 return false;
6747 x = XEXP (x, 0);
6749 if (GET_CODE (x) != LO_SUM)
6750 return false;
6751 if (GET_CODE (XEXP (x, 0)) != REG)
6752 return false;
6753 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
6754 return false;
6755 x = XEXP (x, 1);
6757 return CONSTANT_P (x);
6760 static bool
6761 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
6763 if (GET_CODE (x) != LO_SUM)
6764 return false;
6765 if (GET_CODE (XEXP (x, 0)) != REG)
6766 return false;
6767 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
6768 return false;
6769 /* Restrict addressing for DI because of our SUBREG hackery. */
6770 if (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
6771 return false;
6772 x = XEXP (x, 1);
6774 if (TARGET_ELF || TARGET_MACHO)
6776 bool large_toc_ok;
6778 if (DEFAULT_ABI == ABI_V4 && flag_pic)
6779 return false;
6780 /* LRA don't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
6781 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
6782 recognizes some LO_SUM addresses as valid although this
6783 function says opposite. In most cases, LRA through different
6784 transformations can generate correct code for address reloads.
6785 It can not manage only some LO_SUM cases. So we need to add
6786 code analogous to one in rs6000_legitimize_reload_address for
6787 LOW_SUM here saying that some addresses are still valid. */
6788 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
6789 && small_toc_ref (x, VOIDmode));
6790 if (TARGET_TOC && ! large_toc_ok)
6791 return false;
6792 if (GET_MODE_NUNITS (mode) != 1)
6793 return false;
6794 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
6795 && !(/* ??? Assume floating point reg based on mode? */
6796 TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
6797 && (mode == DFmode || mode == DDmode)))
6798 return false;
6800 return CONSTANT_P (x) || large_toc_ok;
6803 return false;
6807 /* Try machine-dependent ways of modifying an illegitimate address
6808 to be legitimate. If we find one, return the new, valid address.
6809 This is used from only one place: `memory_address' in explow.c.
6811 OLDX is the address as it was before break_out_memory_refs was
6812 called. In some cases it is useful to look at this to decide what
6813 needs to be done.
6815 It is always safe for this function to do nothing. It exists to
6816 recognize opportunities to optimize the output.
6818 On RS/6000, first check for the sum of a register with a constant
6819 integer that is out of range. If so, generate code to add the
6820 constant with the low-order 16 bits masked to the register and force
6821 this result into another register (this can be done with `cau').
6822 Then generate an address of REG+(CONST&0xffff), allowing for the
6823 possibility of bit 16 being a one.
6825 Then check for the sum of a register and something not constant, try to
6826 load the other things into a register and return the sum. */
6828 static rtx
6829 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
6830 machine_mode mode)
6832 unsigned int extra;
6834 if (!reg_offset_addressing_ok_p (mode))
6836 if (virtual_stack_registers_memory_p (x))
6837 return x;
6839 /* In theory we should not be seeing addresses of the form reg+0,
6840 but just in case it is generated, optimize it away. */
6841 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
6842 return force_reg (Pmode, XEXP (x, 0));
6844 /* For TImode with load/store quad, restrict addresses to just a single
6845 pointer, so it works with both GPRs and VSX registers. */
6846 /* Make sure both operands are registers. */
6847 else if (GET_CODE (x) == PLUS
6848 && (mode != TImode || !TARGET_QUAD_MEMORY))
6849 return gen_rtx_PLUS (Pmode,
6850 force_reg (Pmode, XEXP (x, 0)),
6851 force_reg (Pmode, XEXP (x, 1)));
6852 else
6853 return force_reg (Pmode, x);
6855 if (GET_CODE (x) == SYMBOL_REF)
6857 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
6858 if (model != 0)
6859 return rs6000_legitimize_tls_address (x, model);
6862 extra = 0;
6863 switch (mode)
6865 case TFmode:
6866 case TDmode:
6867 case TImode:
6868 case PTImode:
6869 /* As in legitimate_offset_address_p we do not assume
6870 worst-case. The mode here is just a hint as to the registers
6871 used. A TImode is usually in gprs, but may actually be in
6872 fprs. Leave worst-case scenario for reload to handle via
6873 insn constraints. PTImode is only GPRs. */
6874 extra = 8;
6875 break;
6876 default:
6877 break;
6880 if (GET_CODE (x) == PLUS
6881 && GET_CODE (XEXP (x, 0)) == REG
6882 && GET_CODE (XEXP (x, 1)) == CONST_INT
6883 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
6884 >= 0x10000 - extra)
6885 && !(SPE_VECTOR_MODE (mode)
6886 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)))
6888 HOST_WIDE_INT high_int, low_int;
6889 rtx sum;
6890 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
6891 if (low_int >= 0x8000 - extra)
6892 low_int = 0;
6893 high_int = INTVAL (XEXP (x, 1)) - low_int;
6894 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
6895 GEN_INT (high_int)), 0);
6896 return plus_constant (Pmode, sum, low_int);
6898 else if (GET_CODE (x) == PLUS
6899 && GET_CODE (XEXP (x, 0)) == REG
6900 && GET_CODE (XEXP (x, 1)) != CONST_INT
6901 && GET_MODE_NUNITS (mode) == 1
6902 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
6903 || (/* ??? Assume floating point reg based on mode? */
6904 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
6905 && (mode == DFmode || mode == DDmode)))
6906 && !avoiding_indexed_address_p (mode))
6908 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
6909 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
6911 else if (SPE_VECTOR_MODE (mode)
6912 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD))
6914 if (mode == DImode)
6915 return x;
6916 /* We accept [reg + reg] and [reg + OFFSET]. */
6918 if (GET_CODE (x) == PLUS)
6920 rtx op1 = XEXP (x, 0);
6921 rtx op2 = XEXP (x, 1);
6922 rtx y;
6924 op1 = force_reg (Pmode, op1);
6926 if (GET_CODE (op2) != REG
6927 && (GET_CODE (op2) != CONST_INT
6928 || !SPE_CONST_OFFSET_OK (INTVAL (op2))
6929 || (GET_MODE_SIZE (mode) > 8
6930 && !SPE_CONST_OFFSET_OK (INTVAL (op2) + 8))))
6931 op2 = force_reg (Pmode, op2);
6933 /* We can't always do [reg + reg] for these, because [reg +
6934 reg + offset] is not a legitimate addressing mode. */
6935 y = gen_rtx_PLUS (Pmode, op1, op2);
6937 if ((GET_MODE_SIZE (mode) > 8 || mode == DDmode) && REG_P (op2))
6938 return force_reg (Pmode, y);
6939 else
6940 return y;
6943 return force_reg (Pmode, x);
6945 else if ((TARGET_ELF
6946 #if TARGET_MACHO
6947 || !MACHO_DYNAMIC_NO_PIC_P
6948 #endif
6950 && TARGET_32BIT
6951 && TARGET_NO_TOC
6952 && ! flag_pic
6953 && GET_CODE (x) != CONST_INT
6954 && GET_CODE (x) != CONST_WIDE_INT
6955 && GET_CODE (x) != CONST_DOUBLE
6956 && CONSTANT_P (x)
6957 && GET_MODE_NUNITS (mode) == 1
6958 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
6959 || (/* ??? Assume floating point reg based on mode? */
6960 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
6961 && (mode == DFmode || mode == DDmode))))
6963 rtx reg = gen_reg_rtx (Pmode);
6964 if (TARGET_ELF)
6965 emit_insn (gen_elf_high (reg, x));
6966 else
6967 emit_insn (gen_macho_high (reg, x));
6968 return gen_rtx_LO_SUM (Pmode, reg, x);
6970 else if (TARGET_TOC
6971 && GET_CODE (x) == SYMBOL_REF
6972 && constant_pool_expr_p (x)
6973 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
6974 return create_TOC_reference (x, NULL_RTX);
6975 else
6976 return x;
6979 /* Debug version of rs6000_legitimize_address. */
6980 static rtx
6981 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
6983 rtx ret;
6984 rtx_insn *insns;
6986 start_sequence ();
6987 ret = rs6000_legitimize_address (x, oldx, mode);
6988 insns = get_insns ();
6989 end_sequence ();
6991 if (ret != x)
6993 fprintf (stderr,
6994 "\nrs6000_legitimize_address: mode %s, old code %s, "
6995 "new code %s, modified\n",
6996 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
6997 GET_RTX_NAME (GET_CODE (ret)));
6999 fprintf (stderr, "Original address:\n");
7000 debug_rtx (x);
7002 fprintf (stderr, "oldx:\n");
7003 debug_rtx (oldx);
7005 fprintf (stderr, "New address:\n");
7006 debug_rtx (ret);
7008 if (insns)
7010 fprintf (stderr, "Insns added:\n");
7011 debug_rtx_list (insns, 20);
7014 else
7016 fprintf (stderr,
7017 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
7018 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
7020 debug_rtx (x);
7023 if (insns)
7024 emit_insn (insns);
7026 return ret;
7029 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7030 We need to emit DTP-relative relocations. */
7032 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
7033 static void
7034 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
7036 switch (size)
7038 case 4:
7039 fputs ("\t.long\t", file);
7040 break;
7041 case 8:
7042 fputs (DOUBLE_INT_ASM_OP, file);
7043 break;
7044 default:
7045 gcc_unreachable ();
7047 output_addr_const (file, x);
7048 fputs ("@dtprel+0x8000", file);
7051 /* Return true if X is a symbol that refers to real (rather than emulated)
7052 TLS. */
7054 static bool
7055 rs6000_real_tls_symbol_ref_p (rtx x)
7057 return (GET_CODE (x) == SYMBOL_REF
7058 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
7061 /* In the name of slightly smaller debug output, and to cater to
7062 general assembler lossage, recognize various UNSPEC sequences
7063 and turn them back into a direct symbol reference. */
7065 static rtx
7066 rs6000_delegitimize_address (rtx orig_x)
7068 rtx x, y, offset;
7070 orig_x = delegitimize_mem_from_attrs (orig_x);
7071 x = orig_x;
7072 if (MEM_P (x))
7073 x = XEXP (x, 0);
7075 y = x;
7076 if (TARGET_CMODEL != CMODEL_SMALL
7077 && GET_CODE (y) == LO_SUM)
7078 y = XEXP (y, 1);
7080 offset = NULL_RTX;
7081 if (GET_CODE (y) == PLUS
7082 && GET_MODE (y) == Pmode
7083 && CONST_INT_P (XEXP (y, 1)))
7085 offset = XEXP (y, 1);
7086 y = XEXP (y, 0);
7089 if (GET_CODE (y) == UNSPEC
7090 && XINT (y, 1) == UNSPEC_TOCREL)
7092 y = XVECEXP (y, 0, 0);
7094 #ifdef HAVE_AS_TLS
7095 /* Do not associate thread-local symbols with the original
7096 constant pool symbol. */
7097 if (TARGET_XCOFF
7098 && GET_CODE (y) == SYMBOL_REF
7099 && CONSTANT_POOL_ADDRESS_P (y)
7100 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
7101 return orig_x;
7102 #endif
7104 if (offset != NULL_RTX)
7105 y = gen_rtx_PLUS (Pmode, y, offset);
7106 if (!MEM_P (orig_x))
7107 return y;
7108 else
7109 return replace_equiv_address_nv (orig_x, y);
7112 if (TARGET_MACHO
7113 && GET_CODE (orig_x) == LO_SUM
7114 && GET_CODE (XEXP (orig_x, 1)) == CONST)
7116 y = XEXP (XEXP (orig_x, 1), 0);
7117 if (GET_CODE (y) == UNSPEC
7118 && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
7119 return XVECEXP (y, 0, 0);
7122 return orig_x;
7125 /* Return true if X shouldn't be emitted into the debug info.
7126 The linker doesn't like .toc section references from
7127 .debug_* sections, so reject .toc section symbols. */
7129 static bool
7130 rs6000_const_not_ok_for_debug_p (rtx x)
7132 if (GET_CODE (x) == SYMBOL_REF
7133 && CONSTANT_POOL_ADDRESS_P (x))
7135 rtx c = get_pool_constant (x);
7136 machine_mode cmode = get_pool_mode (x);
7137 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
7138 return true;
7141 return false;
7144 /* Construct the SYMBOL_REF for the tls_get_addr function. */
7146 static GTY(()) rtx rs6000_tls_symbol;
7147 static rtx
7148 rs6000_tls_get_addr (void)
7150 if (!rs6000_tls_symbol)
7151 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
7153 return rs6000_tls_symbol;
7156 /* Construct the SYMBOL_REF for TLS GOT references. */
7158 static GTY(()) rtx rs6000_got_symbol;
7159 static rtx
7160 rs6000_got_sym (void)
7162 if (!rs6000_got_symbol)
7164 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
7165 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
7166 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
7169 return rs6000_got_symbol;
7172 /* AIX Thread-Local Address support. */
7174 static rtx
7175 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
7177 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
7178 const char *name;
7179 char *tlsname;
7181 name = XSTR (addr, 0);
7182 /* Append TLS CSECT qualifier, unless the symbol already is qualified
7183 or the symbol will be in TLS private data section. */
7184 if (name[strlen (name) - 1] != ']'
7185 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
7186 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
7188 tlsname = XALLOCAVEC (char, strlen (name) + 4);
7189 strcpy (tlsname, name);
7190 strcat (tlsname,
7191 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
7192 tlsaddr = copy_rtx (addr);
7193 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
7195 else
7196 tlsaddr = addr;
7198 /* Place addr into TOC constant pool. */
7199 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
7201 /* Output the TOC entry and create the MEM referencing the value. */
7202 if (constant_pool_expr_p (XEXP (sym, 0))
7203 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
7205 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
7206 mem = gen_const_mem (Pmode, tocref);
7207 set_mem_alias_set (mem, get_TOC_alias_set ());
7209 else
7210 return sym;
7212 /* Use global-dynamic for local-dynamic. */
7213 if (model == TLS_MODEL_GLOBAL_DYNAMIC
7214 || model == TLS_MODEL_LOCAL_DYNAMIC)
7216 /* Create new TOC reference for @m symbol. */
7217 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
7218 tlsname = XALLOCAVEC (char, strlen (name) + 1);
7219 strcpy (tlsname, "*LCM");
7220 strcat (tlsname, name + 3);
7221 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
7222 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
7223 tocref = create_TOC_reference (modaddr, NULL_RTX);
7224 rtx modmem = gen_const_mem (Pmode, tocref);
7225 set_mem_alias_set (modmem, get_TOC_alias_set ());
7227 rtx modreg = gen_reg_rtx (Pmode);
7228 emit_insn (gen_rtx_SET (VOIDmode, modreg, modmem));
7230 tmpreg = gen_reg_rtx (Pmode);
7231 emit_insn (gen_rtx_SET (VOIDmode, tmpreg, mem));
7233 dest = gen_reg_rtx (Pmode);
7234 if (TARGET_32BIT)
7235 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
7236 else
7237 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
7238 return dest;
7240 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
7241 else if (TARGET_32BIT)
7243 tlsreg = gen_reg_rtx (SImode);
7244 emit_insn (gen_tls_get_tpointer (tlsreg));
7246 else
7247 tlsreg = gen_rtx_REG (DImode, 13);
7249 /* Load the TOC value into temporary register. */
7250 tmpreg = gen_reg_rtx (Pmode);
7251 emit_insn (gen_rtx_SET (VOIDmode, tmpreg, mem));
7252 set_unique_reg_note (get_last_insn (), REG_EQUAL,
7253 gen_rtx_MINUS (Pmode, addr, tlsreg));
7255 /* Add TOC symbol value to TLS pointer. */
7256 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
7258 return dest;
7261 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
7262 this (thread-local) address. */
7264 static rtx
7265 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
7267 rtx dest, insn;
7269 if (TARGET_XCOFF)
7270 return rs6000_legitimize_tls_address_aix (addr, model);
7272 dest = gen_reg_rtx (Pmode);
7273 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
7275 rtx tlsreg;
7277 if (TARGET_64BIT)
7279 tlsreg = gen_rtx_REG (Pmode, 13);
7280 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
7282 else
7284 tlsreg = gen_rtx_REG (Pmode, 2);
7285 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
7287 emit_insn (insn);
7289 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
7291 rtx tlsreg, tmp;
7293 tmp = gen_reg_rtx (Pmode);
7294 if (TARGET_64BIT)
7296 tlsreg = gen_rtx_REG (Pmode, 13);
7297 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
7299 else
7301 tlsreg = gen_rtx_REG (Pmode, 2);
7302 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
7304 emit_insn (insn);
7305 if (TARGET_64BIT)
7306 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
7307 else
7308 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
7309 emit_insn (insn);
7311 else
7313 rtx r3, got, tga, tmp1, tmp2, call_insn;
7315 /* We currently use relocations like @got@tlsgd for tls, which
7316 means the linker will handle allocation of tls entries, placing
7317 them in the .got section. So use a pointer to the .got section,
7318 not one to secondary TOC sections used by 64-bit -mminimal-toc,
7319 or to secondary GOT sections used by 32-bit -fPIC. */
7320 if (TARGET_64BIT)
7321 got = gen_rtx_REG (Pmode, 2);
7322 else
7324 if (flag_pic == 1)
7325 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
7326 else
7328 rtx gsym = rs6000_got_sym ();
7329 got = gen_reg_rtx (Pmode);
7330 if (flag_pic == 0)
7331 rs6000_emit_move (got, gsym, Pmode);
7332 else
7334 rtx mem, lab, last;
7336 tmp1 = gen_reg_rtx (Pmode);
7337 tmp2 = gen_reg_rtx (Pmode);
7338 mem = gen_const_mem (Pmode, tmp1);
7339 lab = gen_label_rtx ();
7340 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
7341 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
7342 if (TARGET_LINK_STACK)
7343 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
7344 emit_move_insn (tmp2, mem);
7345 last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
7346 set_unique_reg_note (last, REG_EQUAL, gsym);
7351 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
7353 tga = rs6000_tls_get_addr ();
7354 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
7355 1, const0_rtx, Pmode);
7357 r3 = gen_rtx_REG (Pmode, 3);
7358 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
7360 if (TARGET_64BIT)
7361 insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
7362 else
7363 insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
7365 else if (DEFAULT_ABI == ABI_V4)
7366 insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
7367 else
7368 gcc_unreachable ();
7369 call_insn = last_call_insn ();
7370 PATTERN (call_insn) = insn;
7371 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
7372 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
7373 pic_offset_table_rtx);
7375 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
7377 tga = rs6000_tls_get_addr ();
7378 tmp1 = gen_reg_rtx (Pmode);
7379 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
7380 1, const0_rtx, Pmode);
7382 r3 = gen_rtx_REG (Pmode, 3);
7383 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
7385 if (TARGET_64BIT)
7386 insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
7387 else
7388 insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
7390 else if (DEFAULT_ABI == ABI_V4)
7391 insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
7392 else
7393 gcc_unreachable ();
7394 call_insn = last_call_insn ();
7395 PATTERN (call_insn) = insn;
7396 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
7397 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
7398 pic_offset_table_rtx);
7400 if (rs6000_tls_size == 16)
7402 if (TARGET_64BIT)
7403 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
7404 else
7405 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
7407 else if (rs6000_tls_size == 32)
7409 tmp2 = gen_reg_rtx (Pmode);
7410 if (TARGET_64BIT)
7411 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
7412 else
7413 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
7414 emit_insn (insn);
7415 if (TARGET_64BIT)
7416 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
7417 else
7418 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
7420 else
7422 tmp2 = gen_reg_rtx (Pmode);
7423 if (TARGET_64BIT)
7424 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
7425 else
7426 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
7427 emit_insn (insn);
7428 insn = gen_rtx_SET (Pmode, dest,
7429 gen_rtx_PLUS (Pmode, tmp2, tmp1));
7431 emit_insn (insn);
7433 else
7435 /* IE, or 64-bit offset LE. */
7436 tmp2 = gen_reg_rtx (Pmode);
7437 if (TARGET_64BIT)
7438 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
7439 else
7440 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
7441 emit_insn (insn);
7442 if (TARGET_64BIT)
7443 insn = gen_tls_tls_64 (dest, tmp2, addr);
7444 else
7445 insn = gen_tls_tls_32 (dest, tmp2, addr);
7446 emit_insn (insn);
7450 return dest;
7453 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7455 static bool
7456 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7458 if (GET_CODE (x) == HIGH
7459 && GET_CODE (XEXP (x, 0)) == UNSPEC)
7460 return true;
7462 /* A TLS symbol in the TOC cannot contain a sum. */
7463 if (GET_CODE (x) == CONST
7464 && GET_CODE (XEXP (x, 0)) == PLUS
7465 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7466 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
7467 return true;
7469 /* Do not place an ELF TLS symbol in the constant pool. */
7470 return TARGET_ELF && tls_referenced_p (x);
7473 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
7474 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
7475 can be addressed relative to the toc pointer. */
7477 static bool
7478 use_toc_relative_ref (rtx sym)
7480 return ((constant_pool_expr_p (sym)
7481 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
7482 get_pool_mode (sym)))
7483 || (TARGET_CMODEL == CMODEL_MEDIUM
7484 && SYMBOL_REF_LOCAL_P (sym)));
7487 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
7488 replace the input X, or the original X if no replacement is called for.
7489 The output parameter *WIN is 1 if the calling macro should goto WIN,
7490 0 if it should not.
7492 For RS/6000, we wish to handle large displacements off a base
7493 register by splitting the addend across an addiu/addis and the mem insn.
7494 This cuts number of extra insns needed from 3 to 1.
7496 On Darwin, we use this to generate code for floating point constants.
7497 A movsf_low is generated so we wind up with 2 instructions rather than 3.
7498 The Darwin code is inside #if TARGET_MACHO because only then are the
7499 machopic_* functions defined. */
7500 static rtx
7501 rs6000_legitimize_reload_address (rtx x, machine_mode mode,
7502 int opnum, int type,
7503 int ind_levels ATTRIBUTE_UNUSED, int *win)
7505 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
7507 /* Nasty hack for vsx_splat_V2DF/V2DI load from mem, which takes a
7508 DFmode/DImode MEM. */
7509 if (reg_offset_p
7510 && opnum == 1
7511 && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
7512 || (mode == DImode && recog_data.operand_mode[0] == V2DImode)))
7513 reg_offset_p = false;
7515 /* We must recognize output that we have already generated ourselves. */
7516 if (GET_CODE (x) == PLUS
7517 && GET_CODE (XEXP (x, 0)) == PLUS
7518 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
7519 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7520 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7522 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7523 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
7524 opnum, (enum reload_type) type);
7525 *win = 1;
7526 return x;
7529 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
7530 if (GET_CODE (x) == LO_SUM
7531 && GET_CODE (XEXP (x, 0)) == HIGH)
7533 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7534 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7535 opnum, (enum reload_type) type);
7536 *win = 1;
7537 return x;
7540 #if TARGET_MACHO
7541 if (DEFAULT_ABI == ABI_DARWIN && flag_pic
7542 && GET_CODE (x) == LO_SUM
7543 && GET_CODE (XEXP (x, 0)) == PLUS
7544 && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx
7545 && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH
7546 && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1)
7547 && machopic_operand_p (XEXP (x, 1)))
7549 /* Result of previous invocation of this function on Darwin
7550 floating point constant. */
7551 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7552 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7553 opnum, (enum reload_type) type);
7554 *win = 1;
7555 return x;
7557 #endif
7559 if (TARGET_CMODEL != CMODEL_SMALL
7560 && reg_offset_p
7561 && small_toc_ref (x, VOIDmode))
7563 rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x));
7564 x = gen_rtx_LO_SUM (Pmode, hi, x);
7565 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7566 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7567 opnum, (enum reload_type) type);
7568 *win = 1;
7569 return x;
7572 if (GET_CODE (x) == PLUS
7573 && GET_CODE (XEXP (x, 0)) == REG
7574 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
7575 && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
7576 && GET_CODE (XEXP (x, 1)) == CONST_INT
7577 && reg_offset_p
7578 && !SPE_VECTOR_MODE (mode)
7579 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
7580 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
7582 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
7583 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
7584 HOST_WIDE_INT high
7585 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
7587 /* Check for 32-bit overflow. */
7588 if (high + low != val)
7590 *win = 0;
7591 return x;
7594 /* Reload the high part into a base reg; leave the low part
7595 in the mem directly. */
7597 x = gen_rtx_PLUS (GET_MODE (x),
7598 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
7599 GEN_INT (high)),
7600 GEN_INT (low));
7602 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7603 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
7604 opnum, (enum reload_type) type);
7605 *win = 1;
7606 return x;
7609 if (GET_CODE (x) == SYMBOL_REF
7610 && reg_offset_p
7611 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
7612 && !SPE_VECTOR_MODE (mode)
7613 #if TARGET_MACHO
7614 && DEFAULT_ABI == ABI_DARWIN
7615 && (flag_pic || MACHO_DYNAMIC_NO_PIC_P)
7616 && machopic_symbol_defined_p (x)
7617 #else
7618 && DEFAULT_ABI == ABI_V4
7619 && !flag_pic
7620 #endif
7621 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
7622 The same goes for DImode without 64-bit gprs and DFmode and DDmode
7623 without fprs.
7624 ??? Assume floating point reg based on mode? This assumption is
7625 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
7626 where reload ends up doing a DFmode load of a constant from
7627 mem using two gprs. Unfortunately, at this point reload
7628 hasn't yet selected regs so poking around in reload data
7629 won't help and even if we could figure out the regs reliably,
7630 we'd still want to allow this transformation when the mem is
7631 naturally aligned. Since we say the address is good here, we
7632 can't disable offsets from LO_SUMs in mem_operand_gpr.
7633 FIXME: Allow offset from lo_sum for other modes too, when
7634 mem is sufficiently aligned.
7636 Also disallow this if the type can go in VMX/Altivec registers, since
7637 those registers do not have d-form (reg+offset) address modes. */
7638 && !reg_addr[mode].scalar_in_vmx_p
7639 && mode != TFmode
7640 && mode != TDmode
7641 && (mode != TImode || !TARGET_VSX_TIMODE)
7642 && mode != PTImode
7643 && (mode != DImode || TARGET_POWERPC64)
7644 && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
7645 || (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)))
7647 #if TARGET_MACHO
7648 if (flag_pic)
7650 rtx offset = machopic_gen_offset (x);
7651 x = gen_rtx_LO_SUM (GET_MODE (x),
7652 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
7653 gen_rtx_HIGH (Pmode, offset)), offset);
7655 else
7656 #endif
7657 x = gen_rtx_LO_SUM (GET_MODE (x),
7658 gen_rtx_HIGH (Pmode, x), x);
7660 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7661 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7662 opnum, (enum reload_type) type);
7663 *win = 1;
7664 return x;
7667 /* Reload an offset address wrapped by an AND that represents the
7668 masking of the lower bits. Strip the outer AND and let reload
7669 convert the offset address into an indirect address. For VSX,
7670 force reload to create the address with an AND in a separate
7671 register, because we can't guarantee an altivec register will
7672 be used. */
7673 if (VECTOR_MEM_ALTIVEC_P (mode)
7674 && GET_CODE (x) == AND
7675 && GET_CODE (XEXP (x, 0)) == PLUS
7676 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
7677 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7678 && GET_CODE (XEXP (x, 1)) == CONST_INT
7679 && INTVAL (XEXP (x, 1)) == -16)
7681 x = XEXP (x, 0);
7682 *win = 1;
7683 return x;
7686 if (TARGET_TOC
7687 && reg_offset_p
7688 && GET_CODE (x) == SYMBOL_REF
7689 && use_toc_relative_ref (x))
7691 x = create_TOC_reference (x, NULL_RTX);
7692 if (TARGET_CMODEL != CMODEL_SMALL)
7693 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7694 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7695 opnum, (enum reload_type) type);
7696 *win = 1;
7697 return x;
7699 *win = 0;
7700 return x;
7703 /* Debug version of rs6000_legitimize_reload_address. */
7704 static rtx
7705 rs6000_debug_legitimize_reload_address (rtx x, machine_mode mode,
7706 int opnum, int type,
7707 int ind_levels, int *win)
7709 rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
7710 ind_levels, win);
7711 fprintf (stderr,
7712 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
7713 "type = %d, ind_levels = %d, win = %d, original addr:\n",
7714 GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
7715 debug_rtx (x);
7717 if (x == ret)
7718 fprintf (stderr, "Same address returned\n");
7719 else if (!ret)
7720 fprintf (stderr, "NULL returned\n");
7721 else
7723 fprintf (stderr, "New address:\n");
7724 debug_rtx (ret);
7727 return ret;
7730 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
7731 that is a valid memory address for an instruction.
7732 The MODE argument is the machine mode for the MEM expression
7733 that wants to use this address.
7735 On the RS/6000, there are four valid address: a SYMBOL_REF that
7736 refers to a constant pool entry of an address (or the sum of it
7737 plus a constant), a short (16-bit signed) constant plus a register,
7738 the sum of two registers, or a register indirect, possibly with an
7739 auto-increment. For DFmode, DDmode and DImode with a constant plus
7740 register, we must ensure that both words are addressable or PowerPC64
7741 with offset word aligned.
7743 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
7744 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
7745 because adjacent memory cells are accessed by adding word-sized offsets
7746 during assembly output. */
7747 static bool
7748 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
7750 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
7752 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
7753 if (VECTOR_MEM_ALTIVEC_P (mode)
7754 && GET_CODE (x) == AND
7755 && GET_CODE (XEXP (x, 1)) == CONST_INT
7756 && INTVAL (XEXP (x, 1)) == -16)
7757 x = XEXP (x, 0);
7759 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
7760 return 0;
7761 if (legitimate_indirect_address_p (x, reg_ok_strict))
7762 return 1;
7763 if (TARGET_UPDATE
7764 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
7765 && mode_supports_pre_incdec_p (mode)
7766 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
7767 return 1;
7768 if (virtual_stack_registers_memory_p (x))
7769 return 1;
7770 if (reg_offset_p && legitimate_small_data_p (mode, x))
7771 return 1;
7772 if (reg_offset_p
7773 && legitimate_constant_pool_address_p (x, mode,
7774 reg_ok_strict || lra_in_progress))
7775 return 1;
7776 /* For TImode, if we have load/store quad and TImode in VSX registers, only
7777 allow register indirect addresses. This will allow the values to go in
7778 either GPRs or VSX registers without reloading. The vector types would
7779 tend to go into VSX registers, so we allow REG+REG, while TImode seems
7780 somewhat split, in that some uses are GPR based, and some VSX based. */
7781 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
7782 return 0;
7783 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
7784 if (! reg_ok_strict
7785 && reg_offset_p
7786 && GET_CODE (x) == PLUS
7787 && GET_CODE (XEXP (x, 0)) == REG
7788 && (XEXP (x, 0) == virtual_stack_vars_rtx
7789 || XEXP (x, 0) == arg_pointer_rtx)
7790 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7791 return 1;
7792 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
7793 return 1;
7794 if (mode != TFmode
7795 && mode != TDmode
7796 && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
7797 || TARGET_POWERPC64
7798 || (mode != DFmode && mode != DDmode)
7799 || (TARGET_E500_DOUBLE && mode != DDmode))
7800 && (TARGET_POWERPC64 || mode != DImode)
7801 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
7802 && mode != PTImode
7803 && !avoiding_indexed_address_p (mode)
7804 && legitimate_indexed_address_p (x, reg_ok_strict))
7805 return 1;
7806 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
7807 && mode_supports_pre_modify_p (mode)
7808 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
7809 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
7810 reg_ok_strict, false)
7811 || (!avoiding_indexed_address_p (mode)
7812 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
7813 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7814 return 1;
7815 if (reg_offset_p && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
7816 return 1;
7817 return 0;
7820 /* Debug version of rs6000_legitimate_address_p. */
7821 static bool
7822 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
7823 bool reg_ok_strict)
7825 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
7826 fprintf (stderr,
7827 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
7828 "strict = %d, reload = %s, code = %s\n",
7829 ret ? "true" : "false",
7830 GET_MODE_NAME (mode),
7831 reg_ok_strict,
7832 (reload_completed
7833 ? "after"
7834 : (reload_in_progress ? "progress" : "before")),
7835 GET_RTX_NAME (GET_CODE (x)));
7836 debug_rtx (x);
7838 return ret;
7841 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
7843 static bool
7844 rs6000_mode_dependent_address_p (const_rtx addr,
7845 addr_space_t as ATTRIBUTE_UNUSED)
7847 return rs6000_mode_dependent_address_ptr (addr);
7850 /* Go to LABEL if ADDR (a legitimate address expression)
7851 has an effect that depends on the machine mode it is used for.
7853 On the RS/6000 this is true of all integral offsets (since AltiVec
7854 and VSX modes don't allow them) or is a pre-increment or decrement.
7856 ??? Except that due to conceptual problems in offsettable_address_p
7857 we can't really report the problems of integral offsets. So leave
7858 this assuming that the adjustable offset must be valid for the
7859 sub-words of a TFmode operand, which is what we had before. */
7861 static bool
7862 rs6000_mode_dependent_address (const_rtx addr)
7864 switch (GET_CODE (addr))
7866 case PLUS:
7867 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
7868 is considered a legitimate address before reload, so there
7869 are no offset restrictions in that case. Note that this
7870 condition is safe in strict mode because any address involving
7871 virtual_stack_vars_rtx or arg_pointer_rtx would already have
7872 been rejected as illegitimate. */
7873 if (XEXP (addr, 0) != virtual_stack_vars_rtx
7874 && XEXP (addr, 0) != arg_pointer_rtx
7875 && GET_CODE (XEXP (addr, 1)) == CONST_INT)
7877 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
7878 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
7880 break;
7882 case LO_SUM:
7883 /* Anything in the constant pool is sufficiently aligned that
7884 all bytes have the same high part address. */
7885 return !legitimate_constant_pool_address_p (addr, QImode, false);
7887 /* Auto-increment cases are now treated generically in recog.c. */
7888 case PRE_MODIFY:
7889 return TARGET_UPDATE;
7891 /* AND is only allowed in Altivec loads. */
7892 case AND:
7893 return true;
7895 default:
7896 break;
7899 return false;
7902 /* Debug version of rs6000_mode_dependent_address. */
7903 static bool
7904 rs6000_debug_mode_dependent_address (const_rtx addr)
7906 bool ret = rs6000_mode_dependent_address (addr);
7908 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
7909 ret ? "true" : "false");
7910 debug_rtx (addr);
7912 return ret;
7915 /* Implement FIND_BASE_TERM. */
7918 rs6000_find_base_term (rtx op)
7920 rtx base;
7922 base = op;
7923 if (GET_CODE (base) == CONST)
7924 base = XEXP (base, 0);
7925 if (GET_CODE (base) == PLUS)
7926 base = XEXP (base, 0);
7927 if (GET_CODE (base) == UNSPEC)
7928 switch (XINT (base, 1))
7930 case UNSPEC_TOCREL:
7931 case UNSPEC_MACHOPIC_OFFSET:
7932 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
7933 for aliasing purposes. */
7934 return XVECEXP (base, 0, 0);
7937 return op;
7940 /* More elaborate version of recog's offsettable_memref_p predicate
7941 that works around the ??? note of rs6000_mode_dependent_address.
7942 In particular it accepts
7944 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
7946 in 32-bit mode, that the recog predicate rejects. */
7948 static bool
7949 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode)
7951 bool worst_case;
7953 if (!MEM_P (op))
7954 return false;
7956 /* First mimic offsettable_memref_p. */
7957 if (offsettable_address_p (true, GET_MODE (op), XEXP (op, 0)))
7958 return true;
7960 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
7961 the latter predicate knows nothing about the mode of the memory
7962 reference and, therefore, assumes that it is the largest supported
7963 mode (TFmode). As a consequence, legitimate offsettable memory
7964 references are rejected. rs6000_legitimate_offset_address_p contains
7965 the correct logic for the PLUS case of rs6000_mode_dependent_address,
7966 at least with a little bit of help here given that we know the
7967 actual registers used. */
7968 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
7969 || GET_MODE_SIZE (reg_mode) == 4);
7970 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
7971 true, worst_case);
7974 /* Change register usage conditional on target flags. */
7975 static void
7976 rs6000_conditional_register_usage (void)
7978 int i;
7980 if (TARGET_DEBUG_TARGET)
7981 fprintf (stderr, "rs6000_conditional_register_usage called\n");
7983 /* Set MQ register fixed (already call_used) so that it will not be
7984 allocated. */
7985 fixed_regs[64] = 1;
7987 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
7988 if (TARGET_64BIT)
7989 fixed_regs[13] = call_used_regs[13]
7990 = call_really_used_regs[13] = 1;
7992 /* Conditionally disable FPRs. */
7993 if (TARGET_SOFT_FLOAT || !TARGET_FPRS)
7994 for (i = 32; i < 64; i++)
7995 fixed_regs[i] = call_used_regs[i]
7996 = call_really_used_regs[i] = 1;
7998 /* The TOC register is not killed across calls in a way that is
7999 visible to the compiler. */
8000 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
8001 call_really_used_regs[2] = 0;
8003 if (DEFAULT_ABI == ABI_V4
8004 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM
8005 && flag_pic == 2)
8006 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8008 if (DEFAULT_ABI == ABI_V4
8009 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM
8010 && flag_pic == 1)
8011 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8012 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8013 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8015 if (DEFAULT_ABI == ABI_DARWIN
8016 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
8017 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8018 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8019 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8021 if (TARGET_TOC && TARGET_MINIMAL_TOC)
8022 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8023 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8025 if (TARGET_SPE)
8027 global_regs[SPEFSCR_REGNO] = 1;
8028 /* We used to use r14 as FIXED_SCRATCH to address SPE 64-bit
8029 registers in prologues and epilogues. We no longer use r14
8030 for FIXED_SCRATCH, but we're keeping r14 out of the allocation
8031 pool for link-compatibility with older versions of GCC. Once
8032 "old" code has died out, we can return r14 to the allocation
8033 pool. */
8034 fixed_regs[14]
8035 = call_used_regs[14]
8036 = call_really_used_regs[14] = 1;
8039 if (!TARGET_ALTIVEC && !TARGET_VSX)
8041 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
8042 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
8043 call_really_used_regs[VRSAVE_REGNO] = 1;
8046 if (TARGET_ALTIVEC || TARGET_VSX)
8047 global_regs[VSCR_REGNO] = 1;
8049 if (TARGET_ALTIVEC_ABI)
8051 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
8052 call_used_regs[i] = call_really_used_regs[i] = 1;
8054 /* AIX reserves VR20:31 in non-extended ABI mode. */
8055 if (TARGET_XCOFF)
8056 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
8057 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
8062 /* Output insns to set DEST equal to the constant SOURCE as a series of
8063 lis, ori and shl instructions and return TRUE. */
8065 bool
8066 rs6000_emit_set_const (rtx dest, rtx source)
8068 machine_mode mode = GET_MODE (dest);
8069 rtx temp, set;
8070 rtx_insn *insn;
8071 HOST_WIDE_INT c;
8073 gcc_checking_assert (CONST_INT_P (source));
8074 c = INTVAL (source);
8075 switch (mode)
8077 case QImode:
8078 case HImode:
8079 emit_insn (gen_rtx_SET (VOIDmode, dest, source));
8080 return true;
8082 case SImode:
8083 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
8085 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (temp),
8086 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
8087 emit_insn (gen_rtx_SET (VOIDmode, dest,
8088 gen_rtx_IOR (SImode, copy_rtx (temp),
8089 GEN_INT (c & 0xffff))));
8090 break;
8092 case DImode:
8093 if (!TARGET_POWERPC64)
8095 rtx hi, lo;
8097 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
8098 DImode);
8099 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
8100 DImode);
8101 emit_move_insn (hi, GEN_INT (c >> 32));
8102 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
8103 emit_move_insn (lo, GEN_INT (c));
8105 else
8106 rs6000_emit_set_long_const (dest, c);
8107 break;
8109 default:
8110 gcc_unreachable ();
8113 insn = get_last_insn ();
8114 set = single_set (insn);
8115 if (! CONSTANT_P (SET_SRC (set)))
8116 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
8118 return true;
8121 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
8122 Output insns to set DEST equal to the constant C as a series of
8123 lis, ori and shl instructions. */
8125 static void
8126 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
8128 rtx temp;
8129 HOST_WIDE_INT ud1, ud2, ud3, ud4;
8131 ud1 = c & 0xffff;
8132 c = c >> 16;
8133 ud2 = c & 0xffff;
8134 c = c >> 16;
8135 ud3 = c & 0xffff;
8136 c = c >> 16;
8137 ud4 = c & 0xffff;
8139 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
8140 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
8141 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
8143 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
8144 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
8146 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8148 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8149 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
8150 if (ud1 != 0)
8151 emit_move_insn (dest,
8152 gen_rtx_IOR (DImode, copy_rtx (temp),
8153 GEN_INT (ud1)));
8155 else if (ud3 == 0 && ud4 == 0)
8157 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8159 gcc_assert (ud2 & 0x8000);
8160 emit_move_insn (copy_rtx (temp),
8161 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
8162 if (ud1 != 0)
8163 emit_move_insn (copy_rtx (temp),
8164 gen_rtx_IOR (DImode, copy_rtx (temp),
8165 GEN_INT (ud1)));
8166 emit_move_insn (dest,
8167 gen_rtx_ZERO_EXTEND (DImode,
8168 gen_lowpart (SImode,
8169 copy_rtx (temp))));
8171 else if ((ud4 == 0xffff && (ud3 & 0x8000))
8172 || (ud4 == 0 && ! (ud3 & 0x8000)))
8174 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8176 emit_move_insn (copy_rtx (temp),
8177 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
8178 if (ud2 != 0)
8179 emit_move_insn (copy_rtx (temp),
8180 gen_rtx_IOR (DImode, copy_rtx (temp),
8181 GEN_INT (ud2)));
8182 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8183 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
8184 GEN_INT (16)));
8185 if (ud1 != 0)
8186 emit_move_insn (dest,
8187 gen_rtx_IOR (DImode, copy_rtx (temp),
8188 GEN_INT (ud1)));
8190 else
8192 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8194 emit_move_insn (copy_rtx (temp),
8195 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
8196 if (ud3 != 0)
8197 emit_move_insn (copy_rtx (temp),
8198 gen_rtx_IOR (DImode, copy_rtx (temp),
8199 GEN_INT (ud3)));
8201 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
8202 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
8203 GEN_INT (32)));
8204 if (ud2 != 0)
8205 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8206 gen_rtx_IOR (DImode, copy_rtx (temp),
8207 GEN_INT (ud2 << 16)));
8208 if (ud1 != 0)
8209 emit_move_insn (dest,
8210 gen_rtx_IOR (DImode, copy_rtx (temp),
8211 GEN_INT (ud1)));
8215 /* Helper for the following. Get rid of [r+r] memory refs
8216 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
8218 static void
8219 rs6000_eliminate_indexed_memrefs (rtx operands[2])
8221 if (reload_in_progress)
8222 return;
8224 if (GET_CODE (operands[0]) == MEM
8225 && GET_CODE (XEXP (operands[0], 0)) != REG
8226 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
8227 GET_MODE (operands[0]), false))
8228 operands[0]
8229 = replace_equiv_address (operands[0],
8230 copy_addr_to_reg (XEXP (operands[0], 0)));
8232 if (GET_CODE (operands[1]) == MEM
8233 && GET_CODE (XEXP (operands[1], 0)) != REG
8234 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
8235 GET_MODE (operands[1]), false))
8236 operands[1]
8237 = replace_equiv_address (operands[1],
8238 copy_addr_to_reg (XEXP (operands[1], 0)));
8241 /* Generate a vector of constants to permute MODE for a little-endian
8242 storage operation by swapping the two halves of a vector. */
8243 static rtvec
8244 rs6000_const_vec (machine_mode mode)
8246 int i, subparts;
8247 rtvec v;
8249 switch (mode)
8251 case V1TImode:
8252 subparts = 1;
8253 break;
8254 case V2DFmode:
8255 case V2DImode:
8256 subparts = 2;
8257 break;
8258 case V4SFmode:
8259 case V4SImode:
8260 subparts = 4;
8261 break;
8262 case V8HImode:
8263 subparts = 8;
8264 break;
8265 case V16QImode:
8266 subparts = 16;
8267 break;
8268 default:
8269 gcc_unreachable();
8272 v = rtvec_alloc (subparts);
8274 for (i = 0; i < subparts / 2; ++i)
8275 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
8276 for (i = subparts / 2; i < subparts; ++i)
8277 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
8279 return v;
8282 /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
8283 for a VSX load or store operation. */
8285 rs6000_gen_le_vsx_permute (rtx source, machine_mode mode)
8287 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
8288 return gen_rtx_VEC_SELECT (mode, source, par);
8291 /* Emit a little-endian load from vector memory location SOURCE to VSX
8292 register DEST in mode MODE. The load is done with two permuting
8293 insn's that represent an lxvd2x and xxpermdi. */
8294 void
8295 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
8297 rtx tmp, permute_mem, permute_reg;
8299 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
8300 V1TImode). */
8301 if (mode == TImode || mode == V1TImode)
8303 mode = V2DImode;
8304 dest = gen_lowpart (V2DImode, dest);
8305 source = adjust_address (source, V2DImode, 0);
8308 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
8309 permute_mem = rs6000_gen_le_vsx_permute (source, mode);
8310 permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
8311 emit_insn (gen_rtx_SET (VOIDmode, tmp, permute_mem));
8312 emit_insn (gen_rtx_SET (VOIDmode, dest, permute_reg));
8315 /* Emit a little-endian store to vector memory location DEST from VSX
8316 register SOURCE in mode MODE. The store is done with two permuting
8317 insn's that represent an xxpermdi and an stxvd2x. */
8318 void
8319 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
8321 rtx tmp, permute_src, permute_tmp;
8323 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
8324 V1TImode). */
8325 if (mode == TImode || mode == V1TImode)
8327 mode = V2DImode;
8328 dest = adjust_address (dest, V2DImode, 0);
8329 source = gen_lowpart (V2DImode, source);
8332 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
8333 permute_src = rs6000_gen_le_vsx_permute (source, mode);
8334 permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
8335 emit_insn (gen_rtx_SET (VOIDmode, tmp, permute_src));
8336 emit_insn (gen_rtx_SET (VOIDmode, dest, permute_tmp));
8339 /* Emit a sequence representing a little-endian VSX load or store,
8340 moving data from SOURCE to DEST in mode MODE. This is done
8341 separately from rs6000_emit_move to ensure it is called only
8342 during expand. LE VSX loads and stores introduced later are
8343 handled with a split. The expand-time RTL generation allows
8344 us to optimize away redundant pairs of register-permutes. */
8345 void
8346 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
8348 gcc_assert (!BYTES_BIG_ENDIAN
8349 && VECTOR_MEM_VSX_P (mode)
8350 && !gpr_or_gpr_p (dest, source)
8351 && (MEM_P (source) ^ MEM_P (dest)));
8353 if (MEM_P (source))
8355 gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG);
8356 rs6000_emit_le_vsx_load (dest, source, mode);
8358 else
8360 if (!REG_P (source))
8361 source = force_reg (mode, source);
8362 rs6000_emit_le_vsx_store (dest, source, mode);
8366 /* Emit a move from SOURCE to DEST in mode MODE. */
8367 void
8368 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
8370 rtx operands[2];
8371 operands[0] = dest;
8372 operands[1] = source;
8374 if (TARGET_DEBUG_ADDR)
8376 fprintf (stderr,
8377 "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, "
8378 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
8379 GET_MODE_NAME (mode),
8380 reload_in_progress,
8381 reload_completed,
8382 can_create_pseudo_p ());
8383 debug_rtx (dest);
8384 fprintf (stderr, "source:\n");
8385 debug_rtx (source);
8388 /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
8389 if (CONST_WIDE_INT_P (operands[1])
8390 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
8392 /* This should be fixed with the introduction of CONST_WIDE_INT. */
8393 gcc_unreachable ();
8396 /* Check if GCC is setting up a block move that will end up using FP
8397 registers as temporaries. We must make sure this is acceptable. */
8398 if (GET_CODE (operands[0]) == MEM
8399 && GET_CODE (operands[1]) == MEM
8400 && mode == DImode
8401 && (SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[0]))
8402 || SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[1])))
8403 && ! (SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[0]) > 32
8404 ? 32 : MEM_ALIGN (operands[0])))
8405 || SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[1]) > 32
8406 ? 32
8407 : MEM_ALIGN (operands[1]))))
8408 && ! MEM_VOLATILE_P (operands [0])
8409 && ! MEM_VOLATILE_P (operands [1]))
8411 emit_move_insn (adjust_address (operands[0], SImode, 0),
8412 adjust_address (operands[1], SImode, 0));
8413 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
8414 adjust_address (copy_rtx (operands[1]), SImode, 4));
8415 return;
8418 if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM
8419 && !gpc_reg_operand (operands[1], mode))
8420 operands[1] = force_reg (mode, operands[1]);
8422 /* Recognize the case where operand[1] is a reference to thread-local
8423 data and load its address to a register. */
8424 if (tls_referenced_p (operands[1]))
8426 enum tls_model model;
8427 rtx tmp = operands[1];
8428 rtx addend = NULL;
8430 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
8432 addend = XEXP (XEXP (tmp, 0), 1);
8433 tmp = XEXP (XEXP (tmp, 0), 0);
8436 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
8437 model = SYMBOL_REF_TLS_MODEL (tmp);
8438 gcc_assert (model != 0);
8440 tmp = rs6000_legitimize_tls_address (tmp, model);
8441 if (addend)
8443 tmp = gen_rtx_PLUS (mode, tmp, addend);
8444 tmp = force_operand (tmp, operands[0]);
8446 operands[1] = tmp;
8449 /* Handle the case where reload calls us with an invalid address. */
8450 if (reload_in_progress && mode == Pmode
8451 && (! general_operand (operands[1], mode)
8452 || ! nonimmediate_operand (operands[0], mode)))
8453 goto emit_set;
8455 /* 128-bit constant floating-point values on Darwin should really be loaded
8456 as two parts. However, this premature splitting is a problem when DFmode
8457 values can go into Altivec registers. */
8458 if (!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
8459 && !reg_addr[DFmode].scalar_in_vmx_p
8460 && mode == TFmode && GET_CODE (operands[1]) == CONST_DOUBLE)
8462 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
8463 simplify_gen_subreg (DFmode, operands[1], mode, 0),
8464 DFmode);
8465 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
8466 GET_MODE_SIZE (DFmode)),
8467 simplify_gen_subreg (DFmode, operands[1], mode,
8468 GET_MODE_SIZE (DFmode)),
8469 DFmode);
8470 return;
8473 if (reload_in_progress && cfun->machine->sdmode_stack_slot != NULL_RTX)
8474 cfun->machine->sdmode_stack_slot =
8475 eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
8478 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
8479 p1:SD) if p1 is not of floating point class and p0 is spilled as
8480 we can have no analogous movsd_store for this. */
8481 if (lra_in_progress && mode == DDmode
8482 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
8483 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
8484 && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))
8485 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
8487 enum reg_class cl;
8488 int regno = REGNO (SUBREG_REG (operands[1]));
8490 if (regno >= FIRST_PSEUDO_REGISTER)
8492 cl = reg_preferred_class (regno);
8493 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
8495 if (regno >= 0 && ! FP_REGNO_P (regno))
8497 mode = SDmode;
8498 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
8499 operands[1] = SUBREG_REG (operands[1]);
8502 if (lra_in_progress
8503 && mode == SDmode
8504 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
8505 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
8506 && (REG_P (operands[1])
8507 || (GET_CODE (operands[1]) == SUBREG
8508 && REG_P (SUBREG_REG (operands[1])))))
8510 int regno = REGNO (GET_CODE (operands[1]) == SUBREG
8511 ? SUBREG_REG (operands[1]) : operands[1]);
8512 enum reg_class cl;
8514 if (regno >= FIRST_PSEUDO_REGISTER)
8516 cl = reg_preferred_class (regno);
8517 gcc_assert (cl != NO_REGS);
8518 regno = ira_class_hard_regs[cl][0];
8520 if (FP_REGNO_P (regno))
8522 if (GET_MODE (operands[0]) != DDmode)
8523 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
8524 emit_insn (gen_movsd_store (operands[0], operands[1]));
8526 else if (INT_REGNO_P (regno))
8527 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
8528 else
8529 gcc_unreachable();
8530 return;
8532 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
8533 p:DD)) if p0 is not of floating point class and p1 is spilled as
8534 we can have no analogous movsd_load for this. */
8535 if (lra_in_progress && mode == DDmode
8536 && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0]))
8537 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
8538 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
8539 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
8541 enum reg_class cl;
8542 int regno = REGNO (SUBREG_REG (operands[0]));
8544 if (regno >= FIRST_PSEUDO_REGISTER)
8546 cl = reg_preferred_class (regno);
8547 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
8549 if (regno >= 0 && ! FP_REGNO_P (regno))
8551 mode = SDmode;
8552 operands[0] = SUBREG_REG (operands[0]);
8553 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
8556 if (lra_in_progress
8557 && mode == SDmode
8558 && (REG_P (operands[0])
8559 || (GET_CODE (operands[0]) == SUBREG
8560 && REG_P (SUBREG_REG (operands[0]))))
8561 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
8562 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
8564 int regno = REGNO (GET_CODE (operands[0]) == SUBREG
8565 ? SUBREG_REG (operands[0]) : operands[0]);
8566 enum reg_class cl;
8568 if (regno >= FIRST_PSEUDO_REGISTER)
8570 cl = reg_preferred_class (regno);
8571 gcc_assert (cl != NO_REGS);
8572 regno = ira_class_hard_regs[cl][0];
8574 if (FP_REGNO_P (regno))
8576 if (GET_MODE (operands[1]) != DDmode)
8577 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
8578 emit_insn (gen_movsd_load (operands[0], operands[1]));
8580 else if (INT_REGNO_P (regno))
8581 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
8582 else
8583 gcc_unreachable();
8584 return;
8587 if (reload_in_progress
8588 && mode == SDmode
8589 && cfun->machine->sdmode_stack_slot != NULL_RTX
8590 && MEM_P (operands[0])
8591 && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
8592 && REG_P (operands[1]))
8594 if (FP_REGNO_P (REGNO (operands[1])))
8596 rtx mem = adjust_address_nv (operands[0], DDmode, 0);
8597 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8598 emit_insn (gen_movsd_store (mem, operands[1]));
8600 else if (INT_REGNO_P (REGNO (operands[1])))
8602 rtx mem = operands[0];
8603 if (BYTES_BIG_ENDIAN)
8604 mem = adjust_address_nv (mem, mode, 4);
8605 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8606 emit_insn (gen_movsd_hardfloat (mem, operands[1]));
8608 else
8609 gcc_unreachable();
8610 return;
8612 if (reload_in_progress
8613 && mode == SDmode
8614 && REG_P (operands[0])
8615 && MEM_P (operands[1])
8616 && cfun->machine->sdmode_stack_slot != NULL_RTX
8617 && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
8619 if (FP_REGNO_P (REGNO (operands[0])))
8621 rtx mem = adjust_address_nv (operands[1], DDmode, 0);
8622 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8623 emit_insn (gen_movsd_load (operands[0], mem));
8625 else if (INT_REGNO_P (REGNO (operands[0])))
8627 rtx mem = operands[1];
8628 if (BYTES_BIG_ENDIAN)
8629 mem = adjust_address_nv (mem, mode, 4);
8630 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8631 emit_insn (gen_movsd_hardfloat (operands[0], mem));
8633 else
8634 gcc_unreachable();
8635 return;
8638 /* FIXME: In the long term, this switch statement should go away
8639 and be replaced by a sequence of tests based on things like
8640 mode == Pmode. */
8641 switch (mode)
8643 case HImode:
8644 case QImode:
8645 if (CONSTANT_P (operands[1])
8646 && GET_CODE (operands[1]) != CONST_INT)
8647 operands[1] = force_const_mem (mode, operands[1]);
8648 break;
8650 case TFmode:
8651 case TDmode:
8652 rs6000_eliminate_indexed_memrefs (operands);
8653 /* fall through */
8655 case DFmode:
8656 case DDmode:
8657 case SFmode:
8658 case SDmode:
8659 if (CONSTANT_P (operands[1])
8660 && ! easy_fp_constant (operands[1], mode))
8661 operands[1] = force_const_mem (mode, operands[1]);
8662 break;
8664 case V16QImode:
8665 case V8HImode:
8666 case V4SFmode:
8667 case V4SImode:
8668 case V4HImode:
8669 case V2SFmode:
8670 case V2SImode:
8671 case V1DImode:
8672 case V2DFmode:
8673 case V2DImode:
8674 case V1TImode:
8675 if (CONSTANT_P (operands[1])
8676 && !easy_vector_constant (operands[1], mode))
8677 operands[1] = force_const_mem (mode, operands[1]);
8678 break;
8680 case SImode:
8681 case DImode:
8682 /* Use default pattern for address of ELF small data */
8683 if (TARGET_ELF
8684 && mode == Pmode
8685 && DEFAULT_ABI == ABI_V4
8686 && (GET_CODE (operands[1]) == SYMBOL_REF
8687 || GET_CODE (operands[1]) == CONST)
8688 && small_data_operand (operands[1], mode))
8690 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8691 return;
8694 if (DEFAULT_ABI == ABI_V4
8695 && mode == Pmode && mode == SImode
8696 && flag_pic == 1 && got_operand (operands[1], mode))
8698 emit_insn (gen_movsi_got (operands[0], operands[1]));
8699 return;
8702 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
8703 && TARGET_NO_TOC
8704 && ! flag_pic
8705 && mode == Pmode
8706 && CONSTANT_P (operands[1])
8707 && GET_CODE (operands[1]) != HIGH
8708 && GET_CODE (operands[1]) != CONST_INT)
8710 rtx target = (!can_create_pseudo_p ()
8711 ? operands[0]
8712 : gen_reg_rtx (mode));
8714 /* If this is a function address on -mcall-aixdesc,
8715 convert it to the address of the descriptor. */
8716 if (DEFAULT_ABI == ABI_AIX
8717 && GET_CODE (operands[1]) == SYMBOL_REF
8718 && XSTR (operands[1], 0)[0] == '.')
8720 const char *name = XSTR (operands[1], 0);
8721 rtx new_ref;
8722 while (*name == '.')
8723 name++;
8724 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
8725 CONSTANT_POOL_ADDRESS_P (new_ref)
8726 = CONSTANT_POOL_ADDRESS_P (operands[1]);
8727 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
8728 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
8729 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
8730 operands[1] = new_ref;
8733 if (DEFAULT_ABI == ABI_DARWIN)
8735 #if TARGET_MACHO
8736 if (MACHO_DYNAMIC_NO_PIC_P)
8738 /* Take care of any required data indirection. */
8739 operands[1] = rs6000_machopic_legitimize_pic_address (
8740 operands[1], mode, operands[0]);
8741 if (operands[0] != operands[1])
8742 emit_insn (gen_rtx_SET (VOIDmode,
8743 operands[0], operands[1]));
8744 return;
8746 #endif
8747 emit_insn (gen_macho_high (target, operands[1]));
8748 emit_insn (gen_macho_low (operands[0], target, operands[1]));
8749 return;
8752 emit_insn (gen_elf_high (target, operands[1]));
8753 emit_insn (gen_elf_low (operands[0], target, operands[1]));
8754 return;
8757 /* If this is a SYMBOL_REF that refers to a constant pool entry,
8758 and we have put it in the TOC, we just need to make a TOC-relative
8759 reference to it. */
8760 if (TARGET_TOC
8761 && GET_CODE (operands[1]) == SYMBOL_REF
8762 && use_toc_relative_ref (operands[1]))
8763 operands[1] = create_TOC_reference (operands[1], operands[0]);
8764 else if (mode == Pmode
8765 && CONSTANT_P (operands[1])
8766 && GET_CODE (operands[1]) != HIGH
8767 && ((GET_CODE (operands[1]) != CONST_INT
8768 && ! easy_fp_constant (operands[1], mode))
8769 || (GET_CODE (operands[1]) == CONST_INT
8770 && (num_insns_constant (operands[1], mode)
8771 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
8772 || (GET_CODE (operands[0]) == REG
8773 && FP_REGNO_P (REGNO (operands[0]))))
8774 && !toc_relative_expr_p (operands[1], false)
8775 && (TARGET_CMODEL == CMODEL_SMALL
8776 || can_create_pseudo_p ()
8777 || (REG_P (operands[0])
8778 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
8781 #if TARGET_MACHO
8782 /* Darwin uses a special PIC legitimizer. */
8783 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
8785 operands[1] =
8786 rs6000_machopic_legitimize_pic_address (operands[1], mode,
8787 operands[0]);
8788 if (operands[0] != operands[1])
8789 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8790 return;
8792 #endif
8794 /* If we are to limit the number of things we put in the TOC and
8795 this is a symbol plus a constant we can add in one insn,
8796 just put the symbol in the TOC and add the constant. Don't do
8797 this if reload is in progress. */
8798 if (GET_CODE (operands[1]) == CONST
8799 && TARGET_NO_SUM_IN_TOC && ! reload_in_progress
8800 && GET_CODE (XEXP (operands[1], 0)) == PLUS
8801 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
8802 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
8803 || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF)
8804 && ! side_effects_p (operands[0]))
8806 rtx sym =
8807 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
8808 rtx other = XEXP (XEXP (operands[1], 0), 1);
8810 sym = force_reg (mode, sym);
8811 emit_insn (gen_add3_insn (operands[0], sym, other));
8812 return;
8815 operands[1] = force_const_mem (mode, operands[1]);
8817 if (TARGET_TOC
8818 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
8819 && constant_pool_expr_p (XEXP (operands[1], 0))
8820 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (
8821 get_pool_constant (XEXP (operands[1], 0)),
8822 get_pool_mode (XEXP (operands[1], 0))))
8824 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
8825 operands[0]);
8826 operands[1] = gen_const_mem (mode, tocref);
8827 set_mem_alias_set (operands[1], get_TOC_alias_set ());
8830 break;
8832 case TImode:
8833 if (!VECTOR_MEM_VSX_P (TImode))
8834 rs6000_eliminate_indexed_memrefs (operands);
8835 break;
8837 case PTImode:
8838 rs6000_eliminate_indexed_memrefs (operands);
8839 break;
8841 default:
8842 fatal_insn ("bad move", gen_rtx_SET (VOIDmode, dest, source));
8845 /* Above, we may have called force_const_mem which may have returned
8846 an invalid address. If we can, fix this up; otherwise, reload will
8847 have to deal with it. */
8848 if (GET_CODE (operands[1]) == MEM && ! reload_in_progress)
8849 operands[1] = validize_mem (operands[1]);
8851 emit_set:
8852 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8855 /* Return true if a structure, union or array containing FIELD should be
8856 accessed using `BLKMODE'.
8858 For the SPE, simd types are V2SI, and gcc can be tempted to put the
8859 entire thing in a DI and use subregs to access the internals.
8860 store_bit_field() will force (subreg:DI (reg:V2SI x))'s to the
8861 back-end. Because a single GPR can hold a V2SI, but not a DI, the
8862 best thing to do is set structs to BLKmode and avoid Severe Tire
8863 Damage.
8865 On e500 v2, DF and DI modes suffer from the same anomaly. DF can
8866 fit into 1, whereas DI still needs two. */
8868 static bool
8869 rs6000_member_type_forces_blk (const_tree field, machine_mode mode)
8871 return ((TARGET_SPE && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
8872 || (TARGET_E500_DOUBLE && mode == DFmode));
8875 /* Nonzero if we can use a floating-point register to pass this arg. */
8876 #define USE_FP_FOR_ARG_P(CUM,MODE) \
8877 (SCALAR_FLOAT_MODE_P (MODE) \
8878 && (CUM)->fregno <= FP_ARG_MAX_REG \
8879 && TARGET_HARD_FLOAT && TARGET_FPRS)
8881 /* Nonzero if we can use an AltiVec register to pass this arg. */
8882 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
8883 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
8884 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
8885 && TARGET_ALTIVEC_ABI \
8886 && (NAMED))
8888 /* Walk down the type tree of TYPE counting consecutive base elements.
8889 If *MODEP is VOIDmode, then set it to the first valid floating point
8890 or vector type. If a non-floating point or vector type is found, or
8891 if a floating point or vector type that doesn't match a non-VOIDmode
8892 *MODEP is found, then return -1, otherwise return the count in the
8893 sub-tree. */
8895 static int
8896 rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
8898 machine_mode mode;
8899 HOST_WIDE_INT size;
8901 switch (TREE_CODE (type))
8903 case REAL_TYPE:
8904 mode = TYPE_MODE (type);
8905 if (!SCALAR_FLOAT_MODE_P (mode))
8906 return -1;
8908 if (*modep == VOIDmode)
8909 *modep = mode;
8911 if (*modep == mode)
8912 return 1;
8914 break;
8916 case COMPLEX_TYPE:
8917 mode = TYPE_MODE (TREE_TYPE (type));
8918 if (!SCALAR_FLOAT_MODE_P (mode))
8919 return -1;
8921 if (*modep == VOIDmode)
8922 *modep = mode;
8924 if (*modep == mode)
8925 return 2;
8927 break;
8929 case VECTOR_TYPE:
8930 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
8931 return -1;
8933 /* Use V4SImode as representative of all 128-bit vector types. */
8934 size = int_size_in_bytes (type);
8935 switch (size)
8937 case 16:
8938 mode = V4SImode;
8939 break;
8940 default:
8941 return -1;
8944 if (*modep == VOIDmode)
8945 *modep = mode;
8947 /* Vector modes are considered to be opaque: two vectors are
8948 equivalent for the purposes of being homogeneous aggregates
8949 if they are the same size. */
8950 if (*modep == mode)
8951 return 1;
8953 break;
8955 case ARRAY_TYPE:
8957 int count;
8958 tree index = TYPE_DOMAIN (type);
8960 /* Can't handle incomplete types nor sizes that are not
8961 fixed. */
8962 if (!COMPLETE_TYPE_P (type)
8963 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
8964 return -1;
8966 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
8967 if (count == -1
8968 || !index
8969 || !TYPE_MAX_VALUE (index)
8970 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
8971 || !TYPE_MIN_VALUE (index)
8972 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
8973 || count < 0)
8974 return -1;
8976 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
8977 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
8979 /* There must be no padding. */
8980 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
8981 return -1;
8983 return count;
8986 case RECORD_TYPE:
8988 int count = 0;
8989 int sub_count;
8990 tree field;
8992 /* Can't handle incomplete types nor sizes that are not
8993 fixed. */
8994 if (!COMPLETE_TYPE_P (type)
8995 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
8996 return -1;
8998 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
9000 if (TREE_CODE (field) != FIELD_DECL)
9001 continue;
9003 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
9004 if (sub_count < 0)
9005 return -1;
9006 count += sub_count;
9009 /* There must be no padding. */
9010 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
9011 return -1;
9013 return count;
9016 case UNION_TYPE:
9017 case QUAL_UNION_TYPE:
9019 /* These aren't very interesting except in a degenerate case. */
9020 int count = 0;
9021 int sub_count;
9022 tree field;
9024 /* Can't handle incomplete types nor sizes that are not
9025 fixed. */
9026 if (!COMPLETE_TYPE_P (type)
9027 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
9028 return -1;
9030 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
9032 if (TREE_CODE (field) != FIELD_DECL)
9033 continue;
9035 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
9036 if (sub_count < 0)
9037 return -1;
9038 count = count > sub_count ? count : sub_count;
9041 /* There must be no padding. */
9042 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
9043 return -1;
9045 return count;
9048 default:
9049 break;
9052 return -1;
9055 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
9056 float or vector aggregate that shall be passed in FP/vector registers
9057 according to the ELFv2 ABI, return the homogeneous element mode in
9058 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
9060 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
9062 static bool
9063 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
9064 machine_mode *elt_mode,
9065 int *n_elts)
9067 /* Note that we do not accept complex types at the top level as
9068 homogeneous aggregates; these types are handled via the
9069 targetm.calls.split_complex_arg mechanism. Complex types
9070 can be elements of homogeneous aggregates, however. */
9071 if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type))
9073 machine_mode field_mode = VOIDmode;
9074 int field_count = rs6000_aggregate_candidate (type, &field_mode);
9076 if (field_count > 0)
9078 int n_regs = (SCALAR_FLOAT_MODE_P (field_mode)?
9079 (GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
9081 /* The ELFv2 ABI allows homogeneous aggregates to occupy
9082 up to AGGR_ARG_NUM_REG registers. */
9083 if (field_count * n_regs <= AGGR_ARG_NUM_REG)
9085 if (elt_mode)
9086 *elt_mode = field_mode;
9087 if (n_elts)
9088 *n_elts = field_count;
9089 return true;
9094 if (elt_mode)
9095 *elt_mode = mode;
9096 if (n_elts)
9097 *n_elts = 1;
9098 return false;
9101 /* Return a nonzero value to say to return the function value in
9102 memory, just as large structures are always returned. TYPE will be
9103 the data type of the value, and FNTYPE will be the type of the
9104 function doing the returning, or @code{NULL} for libcalls.
9106 The AIX ABI for the RS/6000 specifies that all structures are
9107 returned in memory. The Darwin ABI does the same.
9109 For the Darwin 64 Bit ABI, a function result can be returned in
9110 registers or in memory, depending on the size of the return data
9111 type. If it is returned in registers, the value occupies the same
9112 registers as it would if it were the first and only function
9113 argument. Otherwise, the function places its result in memory at
9114 the location pointed to by GPR3.
9116 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
9117 but a draft put them in memory, and GCC used to implement the draft
9118 instead of the final standard. Therefore, aix_struct_return
9119 controls this instead of DEFAULT_ABI; V.4 targets needing backward
9120 compatibility can change DRAFT_V4_STRUCT_RET to override the
9121 default, and -m switches get the final word. See
9122 rs6000_option_override_internal for more details.
9124 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
9125 long double support is enabled. These values are returned in memory.
9127 int_size_in_bytes returns -1 for variable size objects, which go in
9128 memory always. The cast to unsigned makes -1 > 8. */
9130 static bool
9131 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9133 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
9134 if (TARGET_MACHO
9135 && rs6000_darwin64_abi
9136 && TREE_CODE (type) == RECORD_TYPE
9137 && int_size_in_bytes (type) > 0)
9139 CUMULATIVE_ARGS valcum;
9140 rtx valret;
9142 valcum.words = 0;
9143 valcum.fregno = FP_ARG_MIN_REG;
9144 valcum.vregno = ALTIVEC_ARG_MIN_REG;
9145 /* Do a trial code generation as if this were going to be passed
9146 as an argument; if any part goes in memory, we return NULL. */
9147 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
9148 if (valret)
9149 return false;
9150 /* Otherwise fall through to more conventional ABI rules. */
9153 #if HAVE_UPC_PTS_STRUCT_REP
9154 if (POINTER_TYPE_P (type) && upc_shared_type_p (TREE_TYPE (type)))
9155 return true;
9156 #endif
9158 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
9159 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
9160 NULL, NULL))
9161 return false;
9163 /* The ELFv2 ABI returns aggregates up to 16B in registers */
9164 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
9165 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
9166 return false;
9168 if (AGGREGATE_TYPE_P (type)
9169 && (aix_struct_return
9170 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
9171 return true;
9173 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
9174 modes only exist for GCC vector types if -maltivec. */
9175 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
9176 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
9177 return false;
9179 /* Return synthetic vectors in memory. */
9180 if (TREE_CODE (type) == VECTOR_TYPE
9181 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
9183 static bool warned_for_return_big_vectors = false;
9184 if (!warned_for_return_big_vectors)
9186 warning (0, "GCC vector returned by reference: "
9187 "non-standard ABI extension with no compatibility guarantee");
9188 warned_for_return_big_vectors = true;
9190 return true;
9193 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD && TYPE_MODE (type) == TFmode)
9194 return true;
9196 return false;
9199 /* Specify whether values returned in registers should be at the most
9200 significant end of a register. We want aggregates returned by
9201 value to match the way aggregates are passed to functions. */
9203 static bool
9204 rs6000_return_in_msb (const_tree valtype)
9206 return (DEFAULT_ABI == ABI_ELFv2
9207 && BYTES_BIG_ENDIAN
9208 && AGGREGATE_TYPE_P (valtype)
9209 && FUNCTION_ARG_PADDING (TYPE_MODE (valtype), valtype) == upward);
9212 #ifdef HAVE_AS_GNU_ATTRIBUTE
9213 /* Return TRUE if a call to function FNDECL may be one that
9214 potentially affects the function calling ABI of the object file. */
9216 static bool
9217 call_ABI_of_interest (tree fndecl)
9219 if (symtab->state == EXPANSION)
9221 struct cgraph_node *c_node;
9223 /* Libcalls are always interesting. */
9224 if (fndecl == NULL_TREE)
9225 return true;
9227 /* Any call to an external function is interesting. */
9228 if (DECL_EXTERNAL (fndecl))
9229 return true;
9231 /* Interesting functions that we are emitting in this object file. */
9232 c_node = cgraph_node::get (fndecl);
9233 c_node = c_node->ultimate_alias_target ();
9234 return !c_node->only_called_directly_p ();
9236 return false;
9238 #endif
9240 /* Initialize a variable CUM of type CUMULATIVE_ARGS
9241 for a call to a function whose data type is FNTYPE.
9242 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
9244 For incoming args we set the number of arguments in the prototype large
9245 so we never return a PARALLEL. */
9247 void
9248 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
9249 rtx libname ATTRIBUTE_UNUSED, int incoming,
9250 int libcall, int n_named_args,
9251 tree fndecl ATTRIBUTE_UNUSED,
9252 machine_mode return_mode ATTRIBUTE_UNUSED)
9254 static CUMULATIVE_ARGS zero_cumulative;
9256 *cum = zero_cumulative;
9257 cum->words = 0;
9258 cum->fregno = FP_ARG_MIN_REG;
9259 cum->vregno = ALTIVEC_ARG_MIN_REG;
9260 cum->prototype = (fntype && prototype_p (fntype));
9261 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
9262 ? CALL_LIBCALL : CALL_NORMAL);
9263 cum->sysv_gregno = GP_ARG_MIN_REG;
9264 cum->stdarg = stdarg_p (fntype);
9266 cum->nargs_prototype = 0;
9267 if (incoming || cum->prototype)
9268 cum->nargs_prototype = n_named_args;
9270 /* Check for a longcall attribute. */
9271 if ((!fntype && rs6000_default_long_calls)
9272 || (fntype
9273 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
9274 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
9275 cum->call_cookie |= CALL_LONG;
9277 if (TARGET_DEBUG_ARG)
9279 fprintf (stderr, "\ninit_cumulative_args:");
9280 if (fntype)
9282 tree ret_type = TREE_TYPE (fntype);
9283 fprintf (stderr, " ret code = %s,",
9284 get_tree_code_name (TREE_CODE (ret_type)));
9287 if (cum->call_cookie & CALL_LONG)
9288 fprintf (stderr, " longcall,");
9290 fprintf (stderr, " proto = %d, nargs = %d\n",
9291 cum->prototype, cum->nargs_prototype);
9294 #ifdef HAVE_AS_GNU_ATTRIBUTE
9295 if (DEFAULT_ABI == ABI_V4)
9297 cum->escapes = call_ABI_of_interest (fndecl);
9298 if (cum->escapes)
9300 tree return_type;
9302 if (fntype)
9304 return_type = TREE_TYPE (fntype);
9305 return_mode = TYPE_MODE (return_type);
9307 else
9308 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
9310 if (return_type != NULL)
9312 if (TREE_CODE (return_type) == RECORD_TYPE
9313 && TYPE_TRANSPARENT_AGGR (return_type))
9315 return_type = TREE_TYPE (first_field (return_type));
9316 return_mode = TYPE_MODE (return_type);
9318 if (AGGREGATE_TYPE_P (return_type)
9319 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
9320 <= 8))
9321 rs6000_returns_struct = true;
9323 if (SCALAR_FLOAT_MODE_P (return_mode))
9324 rs6000_passes_float = true;
9325 else if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode)
9326 || SPE_VECTOR_MODE (return_mode))
9327 rs6000_passes_vector = true;
9330 #endif
9332 if (fntype
9333 && !TARGET_ALTIVEC
9334 && TARGET_ALTIVEC_ABI
9335 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
9337 error ("cannot return value in vector register because"
9338 " altivec instructions are disabled, use -maltivec"
9339 " to enable them");
9343 /* The mode the ABI uses for a word. This is not the same as word_mode
9344 for -m32 -mpowerpc64. This is used to implement various target hooks. */
9346 static machine_mode
9347 rs6000_abi_word_mode (void)
9349 return TARGET_32BIT ? SImode : DImode;
9352 /* On rs6000, function arguments are promoted, as are function return
9353 values. */
9355 static machine_mode
9356 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9357 machine_mode mode,
9358 int *punsignedp ATTRIBUTE_UNUSED,
9359 const_tree, int)
9361 PROMOTE_MODE (mode, *punsignedp, type);
9363 return mode;
9366 /* Return true if TYPE must be passed on the stack and not in registers. */
9368 static bool
9369 rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
9371 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
9372 return must_pass_in_stack_var_size (mode, type);
9373 else
9374 return must_pass_in_stack_var_size_or_pad (mode, type);
9377 /* If defined, a C expression which determines whether, and in which
9378 direction, to pad out an argument with extra space. The value
9379 should be of type `enum direction': either `upward' to pad above
9380 the argument, `downward' to pad below, or `none' to inhibit
9381 padding.
9383 For the AIX ABI structs are always stored left shifted in their
9384 argument slot. */
9386 enum direction
9387 function_arg_padding (machine_mode mode, const_tree type)
9389 #ifndef AGGREGATE_PADDING_FIXED
9390 #define AGGREGATE_PADDING_FIXED 0
9391 #endif
9392 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
9393 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
9394 #endif
9396 if (!AGGREGATE_PADDING_FIXED)
9398 /* GCC used to pass structures of the same size as integer types as
9399 if they were in fact integers, ignoring FUNCTION_ARG_PADDING.
9400 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
9401 passed padded downward, except that -mstrict-align further
9402 muddied the water in that multi-component structures of 2 and 4
9403 bytes in size were passed padded upward.
9405 The following arranges for best compatibility with previous
9406 versions of gcc, but removes the -mstrict-align dependency. */
9407 if (BYTES_BIG_ENDIAN)
9409 HOST_WIDE_INT size = 0;
9411 if (mode == BLKmode)
9413 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
9414 size = int_size_in_bytes (type);
9416 else
9417 size = GET_MODE_SIZE (mode);
9419 if (size == 1 || size == 2 || size == 4)
9420 return downward;
9422 return upward;
9425 if (AGGREGATES_PAD_UPWARD_ALWAYS)
9427 if (type != 0 && AGGREGATE_TYPE_P (type))
9428 return upward;
9431 /* Fall back to the default. */
9432 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
9435 /* If defined, a C expression that gives the alignment boundary, in bits,
9436 of an argument with the specified mode and type. If it is not defined,
9437 PARM_BOUNDARY is used for all arguments.
9439 V.4 wants long longs and doubles to be double word aligned. Just
9440 testing the mode size is a boneheaded way to do this as it means
9441 that other types such as complex int are also double word aligned.
9442 However, we're stuck with this because changing the ABI might break
9443 existing library interfaces.
9445 Doubleword align SPE vectors.
9446 Quadword align Altivec/VSX vectors.
9447 Quadword align large synthetic vector types. */
9449 static unsigned int
9450 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
9452 machine_mode elt_mode;
9453 int n_elts;
9455 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
9457 if (DEFAULT_ABI == ABI_V4
9458 && (GET_MODE_SIZE (mode) == 8
9459 || (TARGET_HARD_FLOAT
9460 && TARGET_FPRS
9461 && (mode == TFmode || mode == TDmode))))
9462 return 64;
9463 else if (SPE_VECTOR_MODE (mode)
9464 || (type && TREE_CODE (type) == VECTOR_TYPE
9465 && int_size_in_bytes (type) >= 8
9466 && int_size_in_bytes (type) < 16))
9467 return 64;
9468 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
9469 || (type && TREE_CODE (type) == VECTOR_TYPE
9470 && int_size_in_bytes (type) >= 16))
9471 return 128;
9473 /* Aggregate types that need > 8 byte alignment are quadword-aligned
9474 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
9475 -mcompat-align-parm is used. */
9476 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
9477 || DEFAULT_ABI == ABI_ELFv2)
9478 && type && TYPE_ALIGN (type) > 64)
9480 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
9481 or homogeneous float/vector aggregates here. We already handled
9482 vector aggregates above, but still need to check for float here. */
9483 bool aggregate_p = (AGGREGATE_TYPE_P (type)
9484 && !SCALAR_FLOAT_MODE_P (elt_mode));
9486 /* We used to check for BLKmode instead of the above aggregate type
9487 check. Warn when this results in any difference to the ABI. */
9488 if (aggregate_p != (mode == BLKmode))
9490 static bool warned;
9491 if (!warned && warn_psabi)
9493 warned = true;
9494 inform (input_location,
9495 "the ABI of passing aggregates with %d-byte alignment"
9496 " has changed in GCC 5",
9497 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
9501 if (aggregate_p)
9502 return 128;
9505 /* Similar for the Darwin64 ABI. Note that for historical reasons we
9506 implement the "aggregate type" check as a BLKmode check here; this
9507 means certain aggregate types are in fact not aligned. */
9508 if (TARGET_MACHO && rs6000_darwin64_abi
9509 && mode == BLKmode
9510 && type && TYPE_ALIGN (type) > 64)
9511 return 128;
9513 return PARM_BOUNDARY;
9516 /* The offset in words to the start of the parameter save area. */
9518 static unsigned int
9519 rs6000_parm_offset (void)
9521 return (DEFAULT_ABI == ABI_V4 ? 2
9522 : DEFAULT_ABI == ABI_ELFv2 ? 4
9523 : 6);
9526 /* For a function parm of MODE and TYPE, return the starting word in
9527 the parameter area. NWORDS of the parameter area are already used. */
9529 static unsigned int
9530 rs6000_parm_start (machine_mode mode, const_tree type,
9531 unsigned int nwords)
9533 unsigned int align;
9535 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
9536 return nwords + (-(rs6000_parm_offset () + nwords) & align);
9539 /* Compute the size (in words) of a function argument. */
9541 static unsigned long
9542 rs6000_arg_size (machine_mode mode, const_tree type)
9544 unsigned long size;
9546 if (mode != BLKmode)
9547 size = GET_MODE_SIZE (mode);
9548 else
9549 size = int_size_in_bytes (type);
9551 if (TARGET_32BIT)
9552 return (size + 3) >> 2;
9553 else
9554 return (size + 7) >> 3;
9557 /* Use this to flush pending int fields. */
9559 static void
9560 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
9561 HOST_WIDE_INT bitpos, int final)
9563 unsigned int startbit, endbit;
9564 int intregs, intoffset;
9565 machine_mode mode;
9567 /* Handle the situations where a float is taking up the first half
9568 of the GPR, and the other half is empty (typically due to
9569 alignment restrictions). We can detect this by a 8-byte-aligned
9570 int field, or by seeing that this is the final flush for this
9571 argument. Count the word and continue on. */
9572 if (cum->floats_in_gpr == 1
9573 && (cum->intoffset % 64 == 0
9574 || (cum->intoffset == -1 && final)))
9576 cum->words++;
9577 cum->floats_in_gpr = 0;
9580 if (cum->intoffset == -1)
9581 return;
9583 intoffset = cum->intoffset;
9584 cum->intoffset = -1;
9585 cum->floats_in_gpr = 0;
9587 if (intoffset % BITS_PER_WORD != 0)
9589 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
9590 MODE_INT, 0);
9591 if (mode == BLKmode)
9593 /* We couldn't find an appropriate mode, which happens,
9594 e.g., in packed structs when there are 3 bytes to load.
9595 Back intoffset back to the beginning of the word in this
9596 case. */
9597 intoffset = intoffset & -BITS_PER_WORD;
9601 startbit = intoffset & -BITS_PER_WORD;
9602 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
9603 intregs = (endbit - startbit) / BITS_PER_WORD;
9604 cum->words += intregs;
9605 /* words should be unsigned. */
9606 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
9608 int pad = (endbit/BITS_PER_WORD) - cum->words;
9609 cum->words += pad;
9613 /* The darwin64 ABI calls for us to recurse down through structs,
9614 looking for elements passed in registers. Unfortunately, we have
9615 to track int register count here also because of misalignments
9616 in powerpc alignment mode. */
9618 static void
9619 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
9620 const_tree type,
9621 HOST_WIDE_INT startbitpos)
9623 tree f;
9625 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
9626 if (TREE_CODE (f) == FIELD_DECL)
9628 HOST_WIDE_INT bitpos = startbitpos;
9629 tree ftype = TREE_TYPE (f);
9630 machine_mode mode;
9631 if (ftype == error_mark_node)
9632 continue;
9633 mode = TYPE_MODE (ftype);
9635 if (DECL_SIZE (f) != 0
9636 && tree_fits_uhwi_p (bit_position (f)))
9637 bitpos += int_bit_position (f);
9639 /* ??? FIXME: else assume zero offset. */
9641 if (TREE_CODE (ftype) == RECORD_TYPE)
9642 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
9643 else if (USE_FP_FOR_ARG_P (cum, mode))
9645 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
9646 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
9647 cum->fregno += n_fpregs;
9648 /* Single-precision floats present a special problem for
9649 us, because they are smaller than an 8-byte GPR, and so
9650 the structure-packing rules combined with the standard
9651 varargs behavior mean that we want to pack float/float
9652 and float/int combinations into a single register's
9653 space. This is complicated by the arg advance flushing,
9654 which works on arbitrarily large groups of int-type
9655 fields. */
9656 if (mode == SFmode)
9658 if (cum->floats_in_gpr == 1)
9660 /* Two floats in a word; count the word and reset
9661 the float count. */
9662 cum->words++;
9663 cum->floats_in_gpr = 0;
9665 else if (bitpos % 64 == 0)
9667 /* A float at the beginning of an 8-byte word;
9668 count it and put off adjusting cum->words until
9669 we see if a arg advance flush is going to do it
9670 for us. */
9671 cum->floats_in_gpr++;
9673 else
9675 /* The float is at the end of a word, preceded
9676 by integer fields, so the arg advance flush
9677 just above has already set cum->words and
9678 everything is taken care of. */
9681 else
9682 cum->words += n_fpregs;
9684 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
9686 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
9687 cum->vregno++;
9688 cum->words += 2;
9690 else if (cum->intoffset == -1)
9691 cum->intoffset = bitpos;
9695 /* Check for an item that needs to be considered specially under the darwin 64
9696 bit ABI. These are record types where the mode is BLK or the structure is
9697 8 bytes in size. */
9698 static int
9699 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
9701 return rs6000_darwin64_abi
9702 && ((mode == BLKmode
9703 && TREE_CODE (type) == RECORD_TYPE
9704 && int_size_in_bytes (type) > 0)
9705 || (type && TREE_CODE (type) == RECORD_TYPE
9706 && int_size_in_bytes (type) == 8)) ? 1 : 0;
9709 /* Update the data in CUM to advance over an argument
9710 of mode MODE and data type TYPE.
9711 (TYPE is null for libcalls where that information may not be available.)
9713 Note that for args passed by reference, function_arg will be called
9714 with MODE and TYPE set to that of the pointer to the arg, not the arg
9715 itself. */
9717 static void
9718 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
9719 const_tree type, bool named, int depth)
9721 machine_mode elt_mode;
9722 int n_elts;
9724 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
9726 /* Only tick off an argument if we're not recursing. */
9727 if (depth == 0)
9728 cum->nargs_prototype--;
9730 #ifdef HAVE_AS_GNU_ATTRIBUTE
9731 if (DEFAULT_ABI == ABI_V4
9732 && cum->escapes)
9734 if (SCALAR_FLOAT_MODE_P (mode))
9735 rs6000_passes_float = true;
9736 else if (named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
9737 rs6000_passes_vector = true;
9738 else if (SPE_VECTOR_MODE (mode)
9739 && !cum->stdarg
9740 && cum->sysv_gregno <= GP_ARG_MAX_REG)
9741 rs6000_passes_vector = true;
9743 #endif
9745 if (TARGET_ALTIVEC_ABI
9746 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
9747 || (type && TREE_CODE (type) == VECTOR_TYPE
9748 && int_size_in_bytes (type) == 16)))
9750 bool stack = false;
9752 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
9754 cum->vregno += n_elts;
9756 if (!TARGET_ALTIVEC)
9757 error ("cannot pass argument in vector register because"
9758 " altivec instructions are disabled, use -maltivec"
9759 " to enable them");
9761 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
9762 even if it is going to be passed in a vector register.
9763 Darwin does the same for variable-argument functions. */
9764 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9765 && TARGET_64BIT)
9766 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
9767 stack = true;
9769 else
9770 stack = true;
9772 if (stack)
9774 int align;
9776 /* Vector parameters must be 16-byte aligned. In 32-bit
9777 mode this means we need to take into account the offset
9778 to the parameter save area. In 64-bit mode, they just
9779 have to start on an even word, since the parameter save
9780 area is 16-byte aligned. */
9781 if (TARGET_32BIT)
9782 align = -(rs6000_parm_offset () + cum->words) & 3;
9783 else
9784 align = cum->words & 1;
9785 cum->words += align + rs6000_arg_size (mode, type);
9787 if (TARGET_DEBUG_ARG)
9789 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
9790 cum->words, align);
9791 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
9792 cum->nargs_prototype, cum->prototype,
9793 GET_MODE_NAME (mode));
9797 else if (TARGET_SPE_ABI && TARGET_SPE && SPE_VECTOR_MODE (mode)
9798 && !cum->stdarg
9799 && cum->sysv_gregno <= GP_ARG_MAX_REG)
9800 cum->sysv_gregno++;
9802 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
9804 int size = int_size_in_bytes (type);
9805 /* Variable sized types have size == -1 and are
9806 treated as if consisting entirely of ints.
9807 Pad to 16 byte boundary if needed. */
9808 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
9809 && (cum->words % 2) != 0)
9810 cum->words++;
9811 /* For varargs, we can just go up by the size of the struct. */
9812 if (!named)
9813 cum->words += (size + 7) / 8;
9814 else
9816 /* It is tempting to say int register count just goes up by
9817 sizeof(type)/8, but this is wrong in a case such as
9818 { int; double; int; } [powerpc alignment]. We have to
9819 grovel through the fields for these too. */
9820 cum->intoffset = 0;
9821 cum->floats_in_gpr = 0;
9822 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
9823 rs6000_darwin64_record_arg_advance_flush (cum,
9824 size * BITS_PER_UNIT, 1);
9826 if (TARGET_DEBUG_ARG)
9828 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
9829 cum->words, TYPE_ALIGN (type), size);
9830 fprintf (stderr,
9831 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
9832 cum->nargs_prototype, cum->prototype,
9833 GET_MODE_NAME (mode));
9836 else if (DEFAULT_ABI == ABI_V4)
9838 if (TARGET_HARD_FLOAT && TARGET_FPRS
9839 && ((TARGET_SINGLE_FLOAT && mode == SFmode)
9840 || (TARGET_DOUBLE_FLOAT && mode == DFmode)
9841 || (mode == TFmode && !TARGET_IEEEQUAD)
9842 || mode == SDmode || mode == DDmode || mode == TDmode))
9844 /* _Decimal128 must use an even/odd register pair. This assumes
9845 that the register number is odd when fregno is odd. */
9846 if (mode == TDmode && (cum->fregno % 2) == 1)
9847 cum->fregno++;
9849 if (cum->fregno + (mode == TFmode || mode == TDmode ? 1 : 0)
9850 <= FP_ARG_V4_MAX_REG)
9851 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
9852 else
9854 cum->fregno = FP_ARG_V4_MAX_REG + 1;
9855 if (mode == DFmode || mode == TFmode
9856 || mode == DDmode || mode == TDmode)
9857 cum->words += cum->words & 1;
9858 cum->words += rs6000_arg_size (mode, type);
9861 else
9863 int n_words = rs6000_arg_size (mode, type);
9864 int gregno = cum->sysv_gregno;
9866 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
9867 (r7,r8) or (r9,r10). As does any other 2 word item such
9868 as complex int due to a historical mistake. */
9869 if (n_words == 2)
9870 gregno += (1 - gregno) & 1;
9872 /* Multi-reg args are not split between registers and stack. */
9873 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
9875 /* Long long and SPE vectors are aligned on the stack.
9876 So are other 2 word items such as complex int due to
9877 a historical mistake. */
9878 if (n_words == 2)
9879 cum->words += cum->words & 1;
9880 cum->words += n_words;
9883 /* Note: continuing to accumulate gregno past when we've started
9884 spilling to the stack indicates the fact that we've started
9885 spilling to the stack to expand_builtin_saveregs. */
9886 cum->sysv_gregno = gregno + n_words;
9889 if (TARGET_DEBUG_ARG)
9891 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
9892 cum->words, cum->fregno);
9893 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
9894 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
9895 fprintf (stderr, "mode = %4s, named = %d\n",
9896 GET_MODE_NAME (mode), named);
9899 else
9901 int n_words = rs6000_arg_size (mode, type);
9902 int start_words = cum->words;
9903 int align_words = rs6000_parm_start (mode, type, start_words);
9905 cum->words = align_words + n_words;
9907 if (SCALAR_FLOAT_MODE_P (elt_mode)
9908 && TARGET_HARD_FLOAT && TARGET_FPRS)
9910 /* _Decimal128 must be passed in an even/odd float register pair.
9911 This assumes that the register number is odd when fregno is
9912 odd. */
9913 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
9914 cum->fregno++;
9915 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
9918 if (TARGET_DEBUG_ARG)
9920 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
9921 cum->words, cum->fregno);
9922 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
9923 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
9924 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
9925 named, align_words - start_words, depth);
9930 static void
9931 rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
9932 const_tree type, bool named)
9934 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
9938 static rtx
9939 spe_build_register_parallel (machine_mode mode, int gregno)
9941 rtx r1, r3, r5, r7;
9943 switch (mode)
9945 case DFmode:
9946 r1 = gen_rtx_REG (DImode, gregno);
9947 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
9948 return gen_rtx_PARALLEL (mode, gen_rtvec (1, r1));
9950 case DCmode:
9951 case TFmode:
9952 r1 = gen_rtx_REG (DImode, gregno);
9953 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
9954 r3 = gen_rtx_REG (DImode, gregno + 2);
9955 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
9956 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r3));
9958 case TCmode:
9959 r1 = gen_rtx_REG (DImode, gregno);
9960 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
9961 r3 = gen_rtx_REG (DImode, gregno + 2);
9962 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
9963 r5 = gen_rtx_REG (DImode, gregno + 4);
9964 r5 = gen_rtx_EXPR_LIST (VOIDmode, r5, GEN_INT (16));
9965 r7 = gen_rtx_REG (DImode, gregno + 6);
9966 r7 = gen_rtx_EXPR_LIST (VOIDmode, r7, GEN_INT (24));
9967 return gen_rtx_PARALLEL (mode, gen_rtvec (4, r1, r3, r5, r7));
9969 default:
9970 gcc_unreachable ();
9974 /* Determine where to put a SIMD argument on the SPE. */
9975 static rtx
9976 rs6000_spe_function_arg (const CUMULATIVE_ARGS *cum, machine_mode mode,
9977 const_tree type)
9979 int gregno = cum->sysv_gregno;
9981 /* On E500 v2, double arithmetic is done on the full 64-bit GPR, but
9982 are passed and returned in a pair of GPRs for ABI compatibility. */
9983 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
9984 || mode == DCmode || mode == TCmode))
9986 int n_words = rs6000_arg_size (mode, type);
9988 /* Doubles go in an odd/even register pair (r5/r6, etc). */
9989 if (mode == DFmode)
9990 gregno += (1 - gregno) & 1;
9992 /* Multi-reg args are not split between registers and stack. */
9993 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
9994 return NULL_RTX;
9996 return spe_build_register_parallel (mode, gregno);
9998 if (cum->stdarg)
10000 int n_words = rs6000_arg_size (mode, type);
10002 /* SPE vectors are put in odd registers. */
10003 if (n_words == 2 && (gregno & 1) == 0)
10004 gregno += 1;
10006 if (gregno + n_words - 1 <= GP_ARG_MAX_REG)
10008 rtx r1, r2;
10009 machine_mode m = SImode;
10011 r1 = gen_rtx_REG (m, gregno);
10012 r1 = gen_rtx_EXPR_LIST (m, r1, const0_rtx);
10013 r2 = gen_rtx_REG (m, gregno + 1);
10014 r2 = gen_rtx_EXPR_LIST (m, r2, GEN_INT (4));
10015 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
10017 else
10018 return NULL_RTX;
10020 else
10022 if (gregno <= GP_ARG_MAX_REG)
10023 return gen_rtx_REG (mode, gregno);
10024 else
10025 return NULL_RTX;
10029 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
10030 structure between cum->intoffset and bitpos to integer registers. */
10032 static void
10033 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
10034 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
10036 machine_mode mode;
10037 unsigned int regno;
10038 unsigned int startbit, endbit;
10039 int this_regno, intregs, intoffset;
10040 rtx reg;
10042 if (cum->intoffset == -1)
10043 return;
10045 intoffset = cum->intoffset;
10046 cum->intoffset = -1;
10048 /* If this is the trailing part of a word, try to only load that
10049 much into the register. Otherwise load the whole register. Note
10050 that in the latter case we may pick up unwanted bits. It's not a
10051 problem at the moment but may wish to revisit. */
10053 if (intoffset % BITS_PER_WORD != 0)
10055 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
10056 MODE_INT, 0);
10057 if (mode == BLKmode)
10059 /* We couldn't find an appropriate mode, which happens,
10060 e.g., in packed structs when there are 3 bytes to load.
10061 Back intoffset back to the beginning of the word in this
10062 case. */
10063 intoffset = intoffset & -BITS_PER_WORD;
10064 mode = word_mode;
10067 else
10068 mode = word_mode;
10070 startbit = intoffset & -BITS_PER_WORD;
10071 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
10072 intregs = (endbit - startbit) / BITS_PER_WORD;
10073 this_regno = cum->words + intoffset / BITS_PER_WORD;
10075 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
10076 cum->use_stack = 1;
10078 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
10079 if (intregs <= 0)
10080 return;
10082 intoffset /= BITS_PER_UNIT;
10085 regno = GP_ARG_MIN_REG + this_regno;
10086 reg = gen_rtx_REG (mode, regno);
10087 rvec[(*k)++] =
10088 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
10090 this_regno += 1;
10091 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
10092 mode = word_mode;
10093 intregs -= 1;
10095 while (intregs > 0);
10098 /* Recursive workhorse for the following. */
10100 static void
10101 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
10102 HOST_WIDE_INT startbitpos, rtx rvec[],
10103 int *k)
10105 tree f;
10107 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
10108 if (TREE_CODE (f) == FIELD_DECL)
10110 HOST_WIDE_INT bitpos = startbitpos;
10111 tree ftype = TREE_TYPE (f);
10112 machine_mode mode;
10113 if (ftype == error_mark_node)
10114 continue;
10115 mode = TYPE_MODE (ftype);
10117 if (DECL_SIZE (f) != 0
10118 && tree_fits_uhwi_p (bit_position (f)))
10119 bitpos += int_bit_position (f);
10121 /* ??? FIXME: else assume zero offset. */
10123 if (TREE_CODE (ftype) == RECORD_TYPE)
10124 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
10125 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
10127 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
10128 #if 0
10129 switch (mode)
10131 case SCmode: mode = SFmode; break;
10132 case DCmode: mode = DFmode; break;
10133 case TCmode: mode = TFmode; break;
10134 default: break;
10136 #endif
10137 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
10138 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
10140 gcc_assert (cum->fregno == FP_ARG_MAX_REG
10141 && (mode == TFmode || mode == TDmode));
10142 /* Long double or _Decimal128 split over regs and memory. */
10143 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
10144 cum->use_stack=1;
10146 rvec[(*k)++]
10147 = gen_rtx_EXPR_LIST (VOIDmode,
10148 gen_rtx_REG (mode, cum->fregno++),
10149 GEN_INT (bitpos / BITS_PER_UNIT));
10150 if (mode == TFmode || mode == TDmode)
10151 cum->fregno++;
10153 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
10155 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
10156 rvec[(*k)++]
10157 = gen_rtx_EXPR_LIST (VOIDmode,
10158 gen_rtx_REG (mode, cum->vregno++),
10159 GEN_INT (bitpos / BITS_PER_UNIT));
10161 else if (cum->intoffset == -1)
10162 cum->intoffset = bitpos;
10166 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
10167 the register(s) to be used for each field and subfield of a struct
10168 being passed by value, along with the offset of where the
10169 register's value may be found in the block. FP fields go in FP
10170 register, vector fields go in vector registers, and everything
10171 else goes in int registers, packed as in memory.
10173 This code is also used for function return values. RETVAL indicates
10174 whether this is the case.
10176 Much of this is taken from the SPARC V9 port, which has a similar
10177 calling convention. */
10179 static rtx
10180 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
10181 bool named, bool retval)
10183 rtx rvec[FIRST_PSEUDO_REGISTER];
10184 int k = 1, kbase = 1;
10185 HOST_WIDE_INT typesize = int_size_in_bytes (type);
10186 /* This is a copy; modifications are not visible to our caller. */
10187 CUMULATIVE_ARGS copy_cum = *orig_cum;
10188 CUMULATIVE_ARGS *cum = &copy_cum;
10190 /* Pad to 16 byte boundary if needed. */
10191 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
10192 && (cum->words % 2) != 0)
10193 cum->words++;
10195 cum->intoffset = 0;
10196 cum->use_stack = 0;
10197 cum->named = named;
10199 /* Put entries into rvec[] for individual FP and vector fields, and
10200 for the chunks of memory that go in int regs. Note we start at
10201 element 1; 0 is reserved for an indication of using memory, and
10202 may or may not be filled in below. */
10203 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
10204 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
10206 /* If any part of the struct went on the stack put all of it there.
10207 This hack is because the generic code for
10208 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
10209 parts of the struct are not at the beginning. */
10210 if (cum->use_stack)
10212 if (retval)
10213 return NULL_RTX; /* doesn't go in registers at all */
10214 kbase = 0;
10215 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10217 if (k > 1 || cum->use_stack)
10218 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
10219 else
10220 return NULL_RTX;
10223 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
10225 static rtx
10226 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
10227 int align_words)
10229 int n_units;
10230 int i, k;
10231 rtx rvec[GP_ARG_NUM_REG + 1];
10233 if (align_words >= GP_ARG_NUM_REG)
10234 return NULL_RTX;
10236 n_units = rs6000_arg_size (mode, type);
10238 /* Optimize the simple case where the arg fits in one gpr, except in
10239 the case of BLKmode due to assign_parms assuming that registers are
10240 BITS_PER_WORD wide. */
10241 if (n_units == 0
10242 || (n_units == 1 && mode != BLKmode))
10243 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10245 k = 0;
10246 if (align_words + n_units > GP_ARG_NUM_REG)
10247 /* Not all of the arg fits in gprs. Say that it goes in memory too,
10248 using a magic NULL_RTX component.
10249 This is not strictly correct. Only some of the arg belongs in
10250 memory, not all of it. However, the normal scheme using
10251 function_arg_partial_nregs can result in unusual subregs, eg.
10252 (subreg:SI (reg:DF) 4), which are not handled well. The code to
10253 store the whole arg to memory is often more efficient than code
10254 to store pieces, and we know that space is available in the right
10255 place for the whole arg. */
10256 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10258 i = 0;
10261 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
10262 rtx off = GEN_INT (i++ * 4);
10263 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10265 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
10267 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
10270 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
10271 but must also be copied into the parameter save area starting at
10272 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
10273 to the GPRs and/or memory. Return the number of elements used. */
10275 static int
10276 rs6000_psave_function_arg (machine_mode mode, const_tree type,
10277 int align_words, rtx *rvec)
10279 int k = 0;
10281 if (align_words < GP_ARG_NUM_REG)
10283 int n_words = rs6000_arg_size (mode, type);
10285 if (align_words + n_words > GP_ARG_NUM_REG
10286 || mode == BLKmode
10287 || (TARGET_32BIT && TARGET_POWERPC64))
10289 /* If this is partially on the stack, then we only
10290 include the portion actually in registers here. */
10291 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
10292 int i = 0;
10294 if (align_words + n_words > GP_ARG_NUM_REG)
10296 /* Not all of the arg fits in gprs. Say that it goes in memory
10297 too, using a magic NULL_RTX component. Also see comment in
10298 rs6000_mixed_function_arg for why the normal
10299 function_arg_partial_nregs scheme doesn't work in this case. */
10300 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10305 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
10306 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
10307 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10309 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
10311 else
10313 /* The whole arg fits in gprs. */
10314 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10315 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
10318 else
10320 /* It's entirely in memory. */
10321 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10324 return k;
10327 /* RVEC is a vector of K components of an argument of mode MODE.
10328 Construct the final function_arg return value from it. */
10330 static rtx
10331 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
10333 gcc_assert (k >= 1);
10335 /* Avoid returning a PARALLEL in the trivial cases. */
10336 if (k == 1)
10338 if (XEXP (rvec[0], 0) == NULL_RTX)
10339 return NULL_RTX;
10341 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
10342 return XEXP (rvec[0], 0);
10345 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
10348 /* Determine where to put an argument to a function.
10349 Value is zero to push the argument on the stack,
10350 or a hard register in which to store the argument.
10352 MODE is the argument's machine mode.
10353 TYPE is the data type of the argument (as a tree).
10354 This is null for libcalls where that information may
10355 not be available.
10356 CUM is a variable of type CUMULATIVE_ARGS which gives info about
10357 the preceding args and about the function being called. It is
10358 not modified in this routine.
10359 NAMED is nonzero if this argument is a named parameter
10360 (otherwise it is an extra parameter matching an ellipsis).
10362 On RS/6000 the first eight words of non-FP are normally in registers
10363 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
10364 Under V.4, the first 8 FP args are in registers.
10366 If this is floating-point and no prototype is specified, we use
10367 both an FP and integer register (or possibly FP reg and stack). Library
10368 functions (when CALL_LIBCALL is set) always have the proper types for args,
10369 so we can pass the FP value just in one register. emit_library_function
10370 doesn't support PARALLEL anyway.
10372 Note that for args passed by reference, function_arg will be called
10373 with MODE and TYPE set to that of the pointer to the arg, not the arg
10374 itself. */
10376 static rtx
10377 rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
10378 const_tree type, bool named)
10380 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10381 enum rs6000_abi abi = DEFAULT_ABI;
10382 machine_mode elt_mode;
10383 int n_elts;
10385 /* Return a marker to indicate whether CR1 needs to set or clear the
10386 bit that V.4 uses to say fp args were passed in registers.
10387 Assume that we don't need the marker for software floating point,
10388 or compiler generated library calls. */
10389 if (mode == VOIDmode)
10391 if (abi == ABI_V4
10392 && (cum->call_cookie & CALL_LIBCALL) == 0
10393 && (cum->stdarg
10394 || (cum->nargs_prototype < 0
10395 && (cum->prototype || TARGET_NO_PROTOTYPE))))
10397 /* For the SPE, we need to crxor CR6 always. */
10398 if (TARGET_SPE_ABI)
10399 return GEN_INT (cum->call_cookie | CALL_V4_SET_FP_ARGS);
10400 else if (TARGET_HARD_FLOAT && TARGET_FPRS)
10401 return GEN_INT (cum->call_cookie
10402 | ((cum->fregno == FP_ARG_MIN_REG)
10403 ? CALL_V4_SET_FP_ARGS
10404 : CALL_V4_CLEAR_FP_ARGS));
10407 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
10410 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10412 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
10414 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
10415 if (rslt != NULL_RTX)
10416 return rslt;
10417 /* Else fall through to usual handling. */
10420 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
10422 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
10423 rtx r, off;
10424 int i, k = 0;
10426 /* Do we also need to pass this argument in the parameter
10427 save area? */
10428 if (TARGET_64BIT && ! cum->prototype)
10430 int align_words = (cum->words + 1) & ~1;
10431 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
10434 /* Describe where this argument goes in the vector registers. */
10435 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
10437 r = gen_rtx_REG (elt_mode, cum->vregno + i);
10438 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
10439 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10442 return rs6000_finish_function_arg (mode, rvec, k);
10444 else if (TARGET_ALTIVEC_ABI
10445 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
10446 || (type && TREE_CODE (type) == VECTOR_TYPE
10447 && int_size_in_bytes (type) == 16)))
10449 if (named || abi == ABI_V4)
10450 return NULL_RTX;
10451 else
10453 /* Vector parameters to varargs functions under AIX or Darwin
10454 get passed in memory and possibly also in GPRs. */
10455 int align, align_words, n_words;
10456 machine_mode part_mode;
10458 /* Vector parameters must be 16-byte aligned. In 32-bit
10459 mode this means we need to take into account the offset
10460 to the parameter save area. In 64-bit mode, they just
10461 have to start on an even word, since the parameter save
10462 area is 16-byte aligned. */
10463 if (TARGET_32BIT)
10464 align = -(rs6000_parm_offset () + cum->words) & 3;
10465 else
10466 align = cum->words & 1;
10467 align_words = cum->words + align;
10469 /* Out of registers? Memory, then. */
10470 if (align_words >= GP_ARG_NUM_REG)
10471 return NULL_RTX;
10473 if (TARGET_32BIT && TARGET_POWERPC64)
10474 return rs6000_mixed_function_arg (mode, type, align_words);
10476 /* The vector value goes in GPRs. Only the part of the
10477 value in GPRs is reported here. */
10478 part_mode = mode;
10479 n_words = rs6000_arg_size (mode, type);
10480 if (align_words + n_words > GP_ARG_NUM_REG)
10481 /* Fortunately, there are only two possibilities, the value
10482 is either wholly in GPRs or half in GPRs and half not. */
10483 part_mode = DImode;
10485 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
10488 else if (TARGET_SPE_ABI && TARGET_SPE
10489 && (SPE_VECTOR_MODE (mode)
10490 || (TARGET_E500_DOUBLE && (mode == DFmode
10491 || mode == DCmode
10492 || mode == TFmode
10493 || mode == TCmode))))
10494 return rs6000_spe_function_arg (cum, mode, type);
10496 else if (abi == ABI_V4)
10498 if (TARGET_HARD_FLOAT && TARGET_FPRS
10499 && ((TARGET_SINGLE_FLOAT && mode == SFmode)
10500 || (TARGET_DOUBLE_FLOAT && mode == DFmode)
10501 || (mode == TFmode && !TARGET_IEEEQUAD)
10502 || mode == SDmode || mode == DDmode || mode == TDmode))
10504 /* _Decimal128 must use an even/odd register pair. This assumes
10505 that the register number is odd when fregno is odd. */
10506 if (mode == TDmode && (cum->fregno % 2) == 1)
10507 cum->fregno++;
10509 if (cum->fregno + (mode == TFmode || mode == TDmode ? 1 : 0)
10510 <= FP_ARG_V4_MAX_REG)
10511 return gen_rtx_REG (mode, cum->fregno);
10512 else
10513 return NULL_RTX;
10515 else
10517 int n_words = rs6000_arg_size (mode, type);
10518 int gregno = cum->sysv_gregno;
10520 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
10521 (r7,r8) or (r9,r10). As does any other 2 word item such
10522 as complex int due to a historical mistake. */
10523 if (n_words == 2)
10524 gregno += (1 - gregno) & 1;
10526 /* Multi-reg args are not split between registers and stack. */
10527 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
10528 return NULL_RTX;
10530 if (TARGET_32BIT && TARGET_POWERPC64)
10531 return rs6000_mixed_function_arg (mode, type,
10532 gregno - GP_ARG_MIN_REG);
10533 return gen_rtx_REG (mode, gregno);
10536 else
10538 int align_words = rs6000_parm_start (mode, type, cum->words);
10540 /* _Decimal128 must be passed in an even/odd float register pair.
10541 This assumes that the register number is odd when fregno is odd. */
10542 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
10543 cum->fregno++;
10545 if (USE_FP_FOR_ARG_P (cum, elt_mode))
10547 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
10548 rtx r, off;
10549 int i, k = 0;
10550 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
10551 int fpr_words;
10553 /* Do we also need to pass this argument in the parameter
10554 save area? */
10555 if (type && (cum->nargs_prototype <= 0
10556 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10557 && TARGET_XL_COMPAT
10558 && align_words >= GP_ARG_NUM_REG)))
10559 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
10561 /* Describe where this argument goes in the fprs. */
10562 for (i = 0; i < n_elts
10563 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
10565 /* Check if the argument is split over registers and memory.
10566 This can only ever happen for long double or _Decimal128;
10567 complex types are handled via split_complex_arg. */
10568 machine_mode fmode = elt_mode;
10569 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
10571 gcc_assert (fmode == TFmode || fmode == TDmode);
10572 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
10575 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
10576 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
10577 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10580 /* If there were not enough FPRs to hold the argument, the rest
10581 usually goes into memory. However, if the current position
10582 is still within the register parameter area, a portion may
10583 actually have to go into GPRs.
10585 Note that it may happen that the portion of the argument
10586 passed in the first "half" of the first GPR was already
10587 passed in the last FPR as well.
10589 For unnamed arguments, we already set up GPRs to cover the
10590 whole argument in rs6000_psave_function_arg, so there is
10591 nothing further to do at this point. */
10592 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
10593 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
10594 && cum->nargs_prototype > 0)
10596 static bool warned;
10598 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
10599 int n_words = rs6000_arg_size (mode, type);
10601 align_words += fpr_words;
10602 n_words -= fpr_words;
10606 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
10607 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
10608 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10610 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
10612 if (!warned && warn_psabi)
10614 warned = true;
10615 inform (input_location,
10616 "the ABI of passing homogeneous float aggregates"
10617 " has changed in GCC 5");
10621 return rs6000_finish_function_arg (mode, rvec, k);
10623 else if (align_words < GP_ARG_NUM_REG)
10625 if (TARGET_32BIT && TARGET_POWERPC64)
10626 return rs6000_mixed_function_arg (mode, type, align_words);
10628 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10630 else
10631 return NULL_RTX;
10635 /* For an arg passed partly in registers and partly in memory, this is
10636 the number of bytes passed in registers. For args passed entirely in
10637 registers or entirely in memory, zero. When an arg is described by a
10638 PARALLEL, perhaps using more than one register type, this function
10639 returns the number of bytes used by the first element of the PARALLEL. */
10641 static int
10642 rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
10643 tree type, bool named)
10645 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10646 bool passed_in_gprs = true;
10647 int ret = 0;
10648 int align_words;
10649 machine_mode elt_mode;
10650 int n_elts;
10652 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10654 if (DEFAULT_ABI == ABI_V4)
10655 return 0;
10657 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
10659 /* If we are passing this arg in the fixed parameter save area
10660 (gprs or memory) as well as VRs, we do not use the partial
10661 bytes mechanism; instead, rs6000_function_arg will return a
10662 PARALLEL including a memory element as necessary. */
10663 if (TARGET_64BIT && ! cum->prototype)
10664 return 0;
10666 /* Otherwise, we pass in VRs only. Check for partial copies. */
10667 passed_in_gprs = false;
10668 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
10669 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
10672 /* In this complicated case we just disable the partial_nregs code. */
10673 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
10674 return 0;
10676 align_words = rs6000_parm_start (mode, type, cum->words);
10678 if (USE_FP_FOR_ARG_P (cum, elt_mode))
10680 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
10682 /* If we are passing this arg in the fixed parameter save area
10683 (gprs or memory) as well as FPRs, we do not use the partial
10684 bytes mechanism; instead, rs6000_function_arg will return a
10685 PARALLEL including a memory element as necessary. */
10686 if (type
10687 && (cum->nargs_prototype <= 0
10688 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10689 && TARGET_XL_COMPAT
10690 && align_words >= GP_ARG_NUM_REG)))
10691 return 0;
10693 /* Otherwise, we pass in FPRs only. Check for partial copies. */
10694 passed_in_gprs = false;
10695 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
10697 /* Compute number of bytes / words passed in FPRs. If there
10698 is still space available in the register parameter area
10699 *after* that amount, a part of the argument will be passed
10700 in GPRs. In that case, the total amount passed in any
10701 registers is equal to the amount that would have been passed
10702 in GPRs if everything were passed there, so we fall back to
10703 the GPR code below to compute the appropriate value. */
10704 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
10705 * MIN (8, GET_MODE_SIZE (elt_mode)));
10706 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
10708 if (align_words + fpr_words < GP_ARG_NUM_REG)
10709 passed_in_gprs = true;
10710 else
10711 ret = fpr;
10715 if (passed_in_gprs
10716 && align_words < GP_ARG_NUM_REG
10717 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
10718 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
10720 if (ret != 0 && TARGET_DEBUG_ARG)
10721 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
10723 return ret;
10726 /* A C expression that indicates when an argument must be passed by
10727 reference. If nonzero for an argument, a copy of that argument is
10728 made in memory and a pointer to the argument is passed instead of
10729 the argument itself. The pointer is passed in whatever way is
10730 appropriate for passing a pointer to that type.
10732 Under V.4, aggregates and long double are passed by reference.
10734 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
10735 reference unless the AltiVec vector extension ABI is in force.
10737 As an extension to all ABIs, variable sized types are passed by
10738 reference. */
10740 static bool
10741 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
10742 machine_mode mode, const_tree type,
10743 bool named ATTRIBUTE_UNUSED)
10745 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD && mode == TFmode)
10747 if (TARGET_DEBUG_ARG)
10748 fprintf (stderr, "function_arg_pass_by_reference: V4 long double\n");
10749 return 1;
10752 if (!type)
10753 return 0;
10755 #if HAVE_UPC_PTS_STRUCT_REP
10756 if (DEFAULT_ABI == ABI_V4 && POINTER_TYPE_P (type)
10757 && upc_shared_type_p (TREE_TYPE (type)))
10759 if (TARGET_DEBUG_ARG)
10760 fprintf (stderr,
10761 "function_arg_pass_by_reference: V4 UPC ptr to shared\n");
10762 return 1;
10764 #endif
10766 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
10768 if (TARGET_DEBUG_ARG)
10769 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
10770 return 1;
10773 if (int_size_in_bytes (type) < 0)
10775 if (TARGET_DEBUG_ARG)
10776 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
10777 return 1;
10780 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
10781 modes only exist for GCC vector types if -maltivec. */
10782 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
10784 if (TARGET_DEBUG_ARG)
10785 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
10786 return 1;
10789 /* Pass synthetic vectors in memory. */
10790 if (TREE_CODE (type) == VECTOR_TYPE
10791 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
10793 static bool warned_for_pass_big_vectors = false;
10794 if (TARGET_DEBUG_ARG)
10795 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
10796 if (!warned_for_pass_big_vectors)
10798 warning (0, "GCC vector passed by reference: "
10799 "non-standard ABI extension with no compatibility guarantee");
10800 warned_for_pass_big_vectors = true;
10802 return 1;
10805 return 0;
10808 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
10809 already processes. Return true if the parameter must be passed
10810 (fully or partially) on the stack. */
10812 static bool
10813 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
10815 machine_mode mode;
10816 int unsignedp;
10817 rtx entry_parm;
10819 /* Catch errors. */
10820 if (type == NULL || type == error_mark_node)
10821 return true;
10823 /* Handle types with no storage requirement. */
10824 if (TYPE_MODE (type) == VOIDmode)
10825 return false;
10827 /* Handle complex types. */
10828 if (TREE_CODE (type) == COMPLEX_TYPE)
10829 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
10830 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
10832 /* Handle transparent aggregates. */
10833 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
10834 && TYPE_TRANSPARENT_AGGR (type))
10835 type = TREE_TYPE (first_field (type));
10837 /* See if this arg was passed by invisible reference. */
10838 if (pass_by_reference (get_cumulative_args (args_so_far),
10839 TYPE_MODE (type), type, true))
10840 type = build_pointer_type (type);
10842 /* Find mode as it is passed by the ABI. */
10843 unsignedp = TYPE_UNSIGNED (type);
10844 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
10846 /* If we must pass in stack, we need a stack. */
10847 if (rs6000_must_pass_in_stack (mode, type))
10848 return true;
10850 /* If there is no incoming register, we need a stack. */
10851 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
10852 if (entry_parm == NULL)
10853 return true;
10855 /* Likewise if we need to pass both in registers and on the stack. */
10856 if (GET_CODE (entry_parm) == PARALLEL
10857 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
10858 return true;
10860 /* Also true if we're partially in registers and partially not. */
10861 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
10862 return true;
10864 /* Update info on where next arg arrives in registers. */
10865 rs6000_function_arg_advance (args_so_far, mode, type, true);
10866 return false;
10869 /* Return true if FUN has no prototype, has a variable argument
10870 list, or passes any parameter in memory. */
10872 static bool
10873 rs6000_function_parms_need_stack (tree fun, bool incoming)
10875 tree fntype, result;
10876 CUMULATIVE_ARGS args_so_far_v;
10877 cumulative_args_t args_so_far;
10879 if (!fun)
10880 /* Must be a libcall, all of which only use reg parms. */
10881 return false;
10883 fntype = fun;
10884 if (!TYPE_P (fun))
10885 fntype = TREE_TYPE (fun);
10887 /* Varargs functions need the parameter save area. */
10888 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
10889 return true;
10891 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
10892 args_so_far = pack_cumulative_args (&args_so_far_v);
10894 /* When incoming, we will have been passed the function decl.
10895 It is necessary to use the decl to handle K&R style functions,
10896 where TYPE_ARG_TYPES may not be available. */
10897 if (incoming)
10899 gcc_assert (DECL_P (fun));
10900 result = DECL_RESULT (fun);
10902 else
10903 result = TREE_TYPE (fntype);
10905 if (result && aggregate_value_p (result, fntype))
10907 if (!TYPE_P (result))
10908 result = TREE_TYPE (result);
10909 result = build_pointer_type (result);
10910 rs6000_parm_needs_stack (args_so_far, result);
10913 if (incoming)
10915 tree parm;
10917 for (parm = DECL_ARGUMENTS (fun);
10918 parm && parm != void_list_node;
10919 parm = TREE_CHAIN (parm))
10920 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
10921 return true;
10923 else
10925 function_args_iterator args_iter;
10926 tree arg_type;
10928 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
10929 if (rs6000_parm_needs_stack (args_so_far, arg_type))
10930 return true;
10933 return false;
10936 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
10937 usually a constant depending on the ABI. However, in the ELFv2 ABI
10938 the register parameter area is optional when calling a function that
10939 has a prototype is scope, has no variable argument list, and passes
10940 all parameters in registers. */
10943 rs6000_reg_parm_stack_space (tree fun, bool incoming)
10945 int reg_parm_stack_space;
10947 switch (DEFAULT_ABI)
10949 default:
10950 reg_parm_stack_space = 0;
10951 break;
10953 case ABI_AIX:
10954 case ABI_DARWIN:
10955 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
10956 break;
10958 case ABI_ELFv2:
10959 /* ??? Recomputing this every time is a bit expensive. Is there
10960 a place to cache this information? */
10961 if (rs6000_function_parms_need_stack (fun, incoming))
10962 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
10963 else
10964 reg_parm_stack_space = 0;
10965 break;
10968 return reg_parm_stack_space;
10971 static void
10972 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
10974 int i;
10975 machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
10977 if (nregs == 0)
10978 return;
10980 for (i = 0; i < nregs; i++)
10982 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
10983 if (reload_completed)
10985 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
10986 tem = NULL_RTX;
10987 else
10988 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
10989 i * GET_MODE_SIZE (reg_mode));
10991 else
10992 tem = replace_equiv_address (tem, XEXP (tem, 0));
10994 gcc_assert (tem);
10996 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
11000 /* Perform any needed actions needed for a function that is receiving a
11001 variable number of arguments.
11003 CUM is as above.
11005 MODE and TYPE are the mode and type of the current parameter.
11007 PRETEND_SIZE is a variable that should be set to the amount of stack
11008 that must be pushed by the prolog to pretend that our caller pushed
11011 Normally, this macro will push all remaining incoming registers on the
11012 stack and set PRETEND_SIZE to the length of the registers pushed. */
11014 static void
11015 setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
11016 tree type, int *pretend_size ATTRIBUTE_UNUSED,
11017 int no_rtl)
11019 CUMULATIVE_ARGS next_cum;
11020 int reg_size = TARGET_32BIT ? 4 : 8;
11021 rtx save_area = NULL_RTX, mem;
11022 int first_reg_offset;
11023 alias_set_type set;
11025 /* Skip the last named argument. */
11026 next_cum = *get_cumulative_args (cum);
11027 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
11029 if (DEFAULT_ABI == ABI_V4)
11031 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
11033 if (! no_rtl)
11035 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
11036 HOST_WIDE_INT offset = 0;
11038 /* Try to optimize the size of the varargs save area.
11039 The ABI requires that ap.reg_save_area is doubleword
11040 aligned, but we don't need to allocate space for all
11041 the bytes, only those to which we actually will save
11042 anything. */
11043 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
11044 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
11045 if (TARGET_HARD_FLOAT && TARGET_FPRS
11046 && next_cum.fregno <= FP_ARG_V4_MAX_REG
11047 && cfun->va_list_fpr_size)
11049 if (gpr_reg_num)
11050 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
11051 * UNITS_PER_FP_WORD;
11052 if (cfun->va_list_fpr_size
11053 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
11054 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
11055 else
11056 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
11057 * UNITS_PER_FP_WORD;
11059 if (gpr_reg_num)
11061 offset = -((first_reg_offset * reg_size) & ~7);
11062 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
11064 gpr_reg_num = cfun->va_list_gpr_size;
11065 if (reg_size == 4 && (first_reg_offset & 1))
11066 gpr_reg_num++;
11068 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
11070 else if (fpr_size)
11071 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
11072 * UNITS_PER_FP_WORD
11073 - (int) (GP_ARG_NUM_REG * reg_size);
11075 if (gpr_size + fpr_size)
11077 rtx reg_save_area
11078 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
11079 gcc_assert (GET_CODE (reg_save_area) == MEM);
11080 reg_save_area = XEXP (reg_save_area, 0);
11081 if (GET_CODE (reg_save_area) == PLUS)
11083 gcc_assert (XEXP (reg_save_area, 0)
11084 == virtual_stack_vars_rtx);
11085 gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT);
11086 offset += INTVAL (XEXP (reg_save_area, 1));
11088 else
11089 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
11092 cfun->machine->varargs_save_offset = offset;
11093 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
11096 else
11098 first_reg_offset = next_cum.words;
11099 save_area = virtual_incoming_args_rtx;
11101 if (targetm.calls.must_pass_in_stack (mode, type))
11102 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
11105 set = get_varargs_alias_set ();
11106 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
11107 && cfun->va_list_gpr_size)
11109 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
11111 if (va_list_gpr_counter_field)
11112 /* V4 va_list_gpr_size counts number of registers needed. */
11113 n_gpr = cfun->va_list_gpr_size;
11114 else
11115 /* char * va_list instead counts number of bytes needed. */
11116 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
11118 if (nregs > n_gpr)
11119 nregs = n_gpr;
11121 mem = gen_rtx_MEM (BLKmode,
11122 plus_constant (Pmode, save_area,
11123 first_reg_offset * reg_size));
11124 MEM_NOTRAP_P (mem) = 1;
11125 set_mem_alias_set (mem, set);
11126 set_mem_align (mem, BITS_PER_WORD);
11128 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
11129 nregs);
11132 /* Save FP registers if needed. */
11133 if (DEFAULT_ABI == ABI_V4
11134 && TARGET_HARD_FLOAT && TARGET_FPRS
11135 && ! no_rtl
11136 && next_cum.fregno <= FP_ARG_V4_MAX_REG
11137 && cfun->va_list_fpr_size)
11139 int fregno = next_cum.fregno, nregs;
11140 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
11141 rtx lab = gen_label_rtx ();
11142 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
11143 * UNITS_PER_FP_WORD);
11145 emit_jump_insn
11146 (gen_rtx_SET (VOIDmode,
11147 pc_rtx,
11148 gen_rtx_IF_THEN_ELSE (VOIDmode,
11149 gen_rtx_NE (VOIDmode, cr1,
11150 const0_rtx),
11151 gen_rtx_LABEL_REF (VOIDmode, lab),
11152 pc_rtx)));
11154 for (nregs = 0;
11155 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
11156 fregno++, off += UNITS_PER_FP_WORD, nregs++)
11158 mem = gen_rtx_MEM ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11159 ? DFmode : SFmode,
11160 plus_constant (Pmode, save_area, off));
11161 MEM_NOTRAP_P (mem) = 1;
11162 set_mem_alias_set (mem, set);
11163 set_mem_align (mem, GET_MODE_ALIGNMENT (
11164 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11165 ? DFmode : SFmode));
11166 emit_move_insn (mem, gen_rtx_REG (
11167 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11168 ? DFmode : SFmode, fregno));
11171 emit_label (lab);
11175 /* Create the va_list data type. */
11177 static tree
11178 rs6000_build_builtin_va_list (void)
11180 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
11182 /* For AIX, prefer 'char *' because that's what the system
11183 header files like. */
11184 if (DEFAULT_ABI != ABI_V4)
11185 return build_pointer_type (char_type_node);
11187 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
11188 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
11189 get_identifier ("__va_list_tag"), record);
11191 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
11192 unsigned_char_type_node);
11193 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
11194 unsigned_char_type_node);
11195 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
11196 every user file. */
11197 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11198 get_identifier ("reserved"), short_unsigned_type_node);
11199 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11200 get_identifier ("overflow_arg_area"),
11201 ptr_type_node);
11202 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11203 get_identifier ("reg_save_area"),
11204 ptr_type_node);
11206 va_list_gpr_counter_field = f_gpr;
11207 va_list_fpr_counter_field = f_fpr;
11209 DECL_FIELD_CONTEXT (f_gpr) = record;
11210 DECL_FIELD_CONTEXT (f_fpr) = record;
11211 DECL_FIELD_CONTEXT (f_res) = record;
11212 DECL_FIELD_CONTEXT (f_ovf) = record;
11213 DECL_FIELD_CONTEXT (f_sav) = record;
11215 TYPE_STUB_DECL (record) = type_decl;
11216 TYPE_NAME (record) = type_decl;
11217 TYPE_FIELDS (record) = f_gpr;
11218 DECL_CHAIN (f_gpr) = f_fpr;
11219 DECL_CHAIN (f_fpr) = f_res;
11220 DECL_CHAIN (f_res) = f_ovf;
11221 DECL_CHAIN (f_ovf) = f_sav;
11223 layout_type (record);
11225 /* The correct type is an array type of one element. */
11226 return build_array_type (record, build_index_type (size_zero_node));
11229 /* Implement va_start. */
11231 static void
11232 rs6000_va_start (tree valist, rtx nextarg)
11234 HOST_WIDE_INT words, n_gpr, n_fpr;
11235 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
11236 tree gpr, fpr, ovf, sav, t;
11238 /* Only SVR4 needs something special. */
11239 if (DEFAULT_ABI != ABI_V4)
11241 std_expand_builtin_va_start (valist, nextarg);
11242 return;
11245 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11246 f_fpr = DECL_CHAIN (f_gpr);
11247 f_res = DECL_CHAIN (f_fpr);
11248 f_ovf = DECL_CHAIN (f_res);
11249 f_sav = DECL_CHAIN (f_ovf);
11251 valist = build_simple_mem_ref (valist);
11252 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11253 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
11254 f_fpr, NULL_TREE);
11255 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
11256 f_ovf, NULL_TREE);
11257 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
11258 f_sav, NULL_TREE);
11260 /* Count number of gp and fp argument registers used. */
11261 words = crtl->args.info.words;
11262 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
11263 GP_ARG_NUM_REG);
11264 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
11265 FP_ARG_NUM_REG);
11267 if (TARGET_DEBUG_ARG)
11268 fprintf (stderr, "va_start: words = "HOST_WIDE_INT_PRINT_DEC", n_gpr = "
11269 HOST_WIDE_INT_PRINT_DEC", n_fpr = "HOST_WIDE_INT_PRINT_DEC"\n",
11270 words, n_gpr, n_fpr);
11272 if (cfun->va_list_gpr_size)
11274 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
11275 build_int_cst (NULL_TREE, n_gpr));
11276 TREE_SIDE_EFFECTS (t) = 1;
11277 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11280 if (cfun->va_list_fpr_size)
11282 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
11283 build_int_cst (NULL_TREE, n_fpr));
11284 TREE_SIDE_EFFECTS (t) = 1;
11285 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11287 #ifdef HAVE_AS_GNU_ATTRIBUTE
11288 if (call_ABI_of_interest (cfun->decl))
11289 rs6000_passes_float = true;
11290 #endif
11293 /* Find the overflow area. */
11294 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
11295 if (words != 0)
11296 t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD);
11297 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
11298 TREE_SIDE_EFFECTS (t) = 1;
11299 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11301 /* If there were no va_arg invocations, don't set up the register
11302 save area. */
11303 if (!cfun->va_list_gpr_size
11304 && !cfun->va_list_fpr_size
11305 && n_gpr < GP_ARG_NUM_REG
11306 && n_fpr < FP_ARG_V4_MAX_REG)
11307 return;
11309 /* Find the register save area. */
11310 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
11311 if (cfun->machine->varargs_save_offset)
11312 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
11313 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
11314 TREE_SIDE_EFFECTS (t) = 1;
11315 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11318 /* Implement va_arg. */
11320 static tree
11321 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
11322 gimple_seq *post_p)
11324 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
11325 tree gpr, fpr, ovf, sav, reg, t, u;
11326 int size, rsize, n_reg, sav_ofs, sav_scale;
11327 tree lab_false, lab_over, addr;
11328 int align;
11329 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
11330 int regalign = 0;
11331 gimple stmt;
11333 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
11335 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
11336 return build_va_arg_indirect_ref (t);
11339 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
11340 earlier version of gcc, with the property that it always applied alignment
11341 adjustments to the va-args (even for zero-sized types). The cheapest way
11342 to deal with this is to replicate the effect of the part of
11343 std_gimplify_va_arg_expr that carries out the align adjust, for the case
11344 of relevance.
11345 We don't need to check for pass-by-reference because of the test above.
11346 We can return a simplifed answer, since we know there's no offset to add. */
11348 if (((TARGET_MACHO
11349 && rs6000_darwin64_abi)
11350 || DEFAULT_ABI == ABI_ELFv2
11351 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
11352 && integer_zerop (TYPE_SIZE (type)))
11354 unsigned HOST_WIDE_INT align, boundary;
11355 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
11356 align = PARM_BOUNDARY / BITS_PER_UNIT;
11357 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
11358 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
11359 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
11360 boundary /= BITS_PER_UNIT;
11361 if (boundary > align)
11363 tree t ;
11364 /* This updates arg ptr by the amount that would be necessary
11365 to align the zero-sized (but not zero-alignment) item. */
11366 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
11367 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
11368 gimplify_and_add (t, pre_p);
11370 t = fold_convert (sizetype, valist_tmp);
11371 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
11372 fold_convert (TREE_TYPE (valist),
11373 fold_build2 (BIT_AND_EXPR, sizetype, t,
11374 size_int (-boundary))));
11375 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
11376 gimplify_and_add (t, pre_p);
11378 /* Since it is zero-sized there's no increment for the item itself. */
11379 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
11380 return build_va_arg_indirect_ref (valist_tmp);
11383 if (DEFAULT_ABI != ABI_V4)
11385 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
11387 tree elem_type = TREE_TYPE (type);
11388 machine_mode elem_mode = TYPE_MODE (elem_type);
11389 int elem_size = GET_MODE_SIZE (elem_mode);
11391 if (elem_size < UNITS_PER_WORD)
11393 tree real_part, imag_part;
11394 gimple_seq post = NULL;
11396 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
11397 &post);
11398 /* Copy the value into a temporary, lest the formal temporary
11399 be reused out from under us. */
11400 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
11401 gimple_seq_add_seq (pre_p, post);
11403 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
11404 post_p);
11406 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
11410 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
11413 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11414 f_fpr = DECL_CHAIN (f_gpr);
11415 f_res = DECL_CHAIN (f_fpr);
11416 f_ovf = DECL_CHAIN (f_res);
11417 f_sav = DECL_CHAIN (f_ovf);
11419 valist = build_va_arg_indirect_ref (valist);
11420 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11421 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
11422 f_fpr, NULL_TREE);
11423 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
11424 f_ovf, NULL_TREE);
11425 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
11426 f_sav, NULL_TREE);
11428 size = int_size_in_bytes (type);
11429 rsize = (size + 3) / 4;
11430 align = 1;
11432 if (TARGET_HARD_FLOAT && TARGET_FPRS
11433 && ((TARGET_SINGLE_FLOAT && TYPE_MODE (type) == SFmode)
11434 || (TARGET_DOUBLE_FLOAT
11435 && (TYPE_MODE (type) == DFmode
11436 || TYPE_MODE (type) == TFmode
11437 || TYPE_MODE (type) == SDmode
11438 || TYPE_MODE (type) == DDmode
11439 || TYPE_MODE (type) == TDmode))))
11441 /* FP args go in FP registers, if present. */
11442 reg = fpr;
11443 n_reg = (size + 7) / 8;
11444 sav_ofs = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4) * 4;
11445 sav_scale = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4);
11446 if (TYPE_MODE (type) != SFmode && TYPE_MODE (type) != SDmode)
11447 align = 8;
11449 else
11451 /* Otherwise into GP registers. */
11452 reg = gpr;
11453 n_reg = rsize;
11454 sav_ofs = 0;
11455 sav_scale = 4;
11456 if (n_reg == 2)
11457 align = 8;
11460 /* Pull the value out of the saved registers.... */
11462 lab_over = NULL;
11463 addr = create_tmp_var (ptr_type_node, "addr");
11465 /* AltiVec vectors never go in registers when -mabi=altivec. */
11466 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
11467 align = 16;
11468 else
11470 lab_false = create_artificial_label (input_location);
11471 lab_over = create_artificial_label (input_location);
11473 /* Long long and SPE vectors are aligned in the registers.
11474 As are any other 2 gpr item such as complex int due to a
11475 historical mistake. */
11476 u = reg;
11477 if (n_reg == 2 && reg == gpr)
11479 regalign = 1;
11480 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
11481 build_int_cst (TREE_TYPE (reg), n_reg - 1));
11482 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
11483 unshare_expr (reg), u);
11485 /* _Decimal128 is passed in even/odd fpr pairs; the stored
11486 reg number is 0 for f1, so we want to make it odd. */
11487 else if (reg == fpr && TYPE_MODE (type) == TDmode)
11489 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
11490 build_int_cst (TREE_TYPE (reg), 1));
11491 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
11494 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
11495 t = build2 (GE_EXPR, boolean_type_node, u, t);
11496 u = build1 (GOTO_EXPR, void_type_node, lab_false);
11497 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
11498 gimplify_and_add (t, pre_p);
11500 t = sav;
11501 if (sav_ofs)
11502 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
11504 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
11505 build_int_cst (TREE_TYPE (reg), n_reg));
11506 u = fold_convert (sizetype, u);
11507 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
11508 t = fold_build_pointer_plus (t, u);
11510 /* _Decimal32 varargs are located in the second word of the 64-bit
11511 FP register for 32-bit binaries. */
11512 if (TARGET_32BIT
11513 && TARGET_HARD_FLOAT && TARGET_FPRS
11514 && TYPE_MODE (type) == SDmode)
11515 t = fold_build_pointer_plus_hwi (t, size);
11517 gimplify_assign (addr, t, pre_p);
11519 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
11521 stmt = gimple_build_label (lab_false);
11522 gimple_seq_add_stmt (pre_p, stmt);
11524 if ((n_reg == 2 && !regalign) || n_reg > 2)
11526 /* Ensure that we don't find any more args in regs.
11527 Alignment has taken care of for special cases. */
11528 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
11532 /* ... otherwise out of the overflow area. */
11534 /* Care for on-stack alignment if needed. */
11535 t = ovf;
11536 if (align != 1)
11538 t = fold_build_pointer_plus_hwi (t, align - 1);
11539 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
11540 build_int_cst (TREE_TYPE (t), -align));
11542 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
11544 gimplify_assign (unshare_expr (addr), t, pre_p);
11546 t = fold_build_pointer_plus_hwi (t, size);
11547 gimplify_assign (unshare_expr (ovf), t, pre_p);
11549 if (lab_over)
11551 stmt = gimple_build_label (lab_over);
11552 gimple_seq_add_stmt (pre_p, stmt);
11555 if (STRICT_ALIGNMENT
11556 && (TYPE_ALIGN (type)
11557 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
11559 /* The value (of type complex double, for example) may not be
11560 aligned in memory in the saved registers, so copy via a
11561 temporary. (This is the same code as used for SPARC.) */
11562 tree tmp = create_tmp_var (type, "va_arg_tmp");
11563 tree dest_addr = build_fold_addr_expr (tmp);
11565 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
11566 3, dest_addr, addr, size_int (rsize * 4));
11568 gimplify_and_add (copy, pre_p);
11569 addr = dest_addr;
11572 addr = fold_convert (ptrtype, addr);
11573 return build_va_arg_indirect_ref (addr);
11576 /* Builtins. */
11578 static void
11579 def_builtin (const char *name, tree type, enum rs6000_builtins code)
11581 tree t;
11582 unsigned classify = rs6000_builtin_info[(int)code].attr;
11583 const char *attr_string = "";
11585 gcc_assert (name != NULL);
11586 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
11588 if (rs6000_builtin_decls[(int)code])
11589 fatal_error (input_location,
11590 "internal error: builtin function %s already processed", name);
11592 rs6000_builtin_decls[(int)code] = t =
11593 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
11595 /* Set any special attributes. */
11596 if ((classify & RS6000_BTC_CONST) != 0)
11598 /* const function, function only depends on the inputs. */
11599 TREE_READONLY (t) = 1;
11600 TREE_NOTHROW (t) = 1;
11601 attr_string = ", pure";
11603 else if ((classify & RS6000_BTC_PURE) != 0)
11605 /* pure function, function can read global memory, but does not set any
11606 external state. */
11607 DECL_PURE_P (t) = 1;
11608 TREE_NOTHROW (t) = 1;
11609 attr_string = ", const";
11611 else if ((classify & RS6000_BTC_FP) != 0)
11613 /* Function is a math function. If rounding mode is on, then treat the
11614 function as not reading global memory, but it can have arbitrary side
11615 effects. If it is off, then assume the function is a const function.
11616 This mimics the ATTR_MATHFN_FPROUNDING attribute in
11617 builtin-attribute.def that is used for the math functions. */
11618 TREE_NOTHROW (t) = 1;
11619 if (flag_rounding_math)
11621 DECL_PURE_P (t) = 1;
11622 DECL_IS_NOVOPS (t) = 1;
11623 attr_string = ", fp, pure";
11625 else
11627 TREE_READONLY (t) = 1;
11628 attr_string = ", fp, const";
11631 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
11632 gcc_unreachable ();
11634 if (TARGET_DEBUG_BUILTIN)
11635 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
11636 (int)code, name, attr_string);
11639 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
11641 #undef RS6000_BUILTIN_1
11642 #undef RS6000_BUILTIN_2
11643 #undef RS6000_BUILTIN_3
11644 #undef RS6000_BUILTIN_A
11645 #undef RS6000_BUILTIN_D
11646 #undef RS6000_BUILTIN_E
11647 #undef RS6000_BUILTIN_H
11648 #undef RS6000_BUILTIN_P
11649 #undef RS6000_BUILTIN_Q
11650 #undef RS6000_BUILTIN_S
11651 #undef RS6000_BUILTIN_X
11653 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11654 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11655 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
11656 { MASK, ICODE, NAME, ENUM },
11658 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11659 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11660 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11661 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11662 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11663 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11664 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11665 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11667 static const struct builtin_description bdesc_3arg[] =
11669 #include "rs6000-builtin.def"
11672 /* DST operations: void foo (void *, const int, const char). */
11674 #undef RS6000_BUILTIN_1
11675 #undef RS6000_BUILTIN_2
11676 #undef RS6000_BUILTIN_3
11677 #undef RS6000_BUILTIN_A
11678 #undef RS6000_BUILTIN_D
11679 #undef RS6000_BUILTIN_E
11680 #undef RS6000_BUILTIN_H
11681 #undef RS6000_BUILTIN_P
11682 #undef RS6000_BUILTIN_Q
11683 #undef RS6000_BUILTIN_S
11684 #undef RS6000_BUILTIN_X
11686 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11687 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11688 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11689 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11690 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
11691 { MASK, ICODE, NAME, ENUM },
11693 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11694 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11695 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11696 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11697 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11698 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11700 static const struct builtin_description bdesc_dst[] =
11702 #include "rs6000-builtin.def"
11705 /* Simple binary operations: VECc = foo (VECa, VECb). */
11707 #undef RS6000_BUILTIN_1
11708 #undef RS6000_BUILTIN_2
11709 #undef RS6000_BUILTIN_3
11710 #undef RS6000_BUILTIN_A
11711 #undef RS6000_BUILTIN_D
11712 #undef RS6000_BUILTIN_E
11713 #undef RS6000_BUILTIN_H
11714 #undef RS6000_BUILTIN_P
11715 #undef RS6000_BUILTIN_Q
11716 #undef RS6000_BUILTIN_S
11717 #undef RS6000_BUILTIN_X
11719 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11720 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
11721 { MASK, ICODE, NAME, ENUM },
11723 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11724 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11725 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11726 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11727 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11728 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11729 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11730 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11731 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11733 static const struct builtin_description bdesc_2arg[] =
11735 #include "rs6000-builtin.def"
11738 #undef RS6000_BUILTIN_1
11739 #undef RS6000_BUILTIN_2
11740 #undef RS6000_BUILTIN_3
11741 #undef RS6000_BUILTIN_A
11742 #undef RS6000_BUILTIN_D
11743 #undef RS6000_BUILTIN_E
11744 #undef RS6000_BUILTIN_H
11745 #undef RS6000_BUILTIN_P
11746 #undef RS6000_BUILTIN_Q
11747 #undef RS6000_BUILTIN_S
11748 #undef RS6000_BUILTIN_X
11750 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11751 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11752 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11753 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11754 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11755 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11756 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11757 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
11758 { MASK, ICODE, NAME, ENUM },
11760 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11761 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11762 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11764 /* AltiVec predicates. */
11766 static const struct builtin_description bdesc_altivec_preds[] =
11768 #include "rs6000-builtin.def"
11771 /* SPE predicates. */
11772 #undef RS6000_BUILTIN_1
11773 #undef RS6000_BUILTIN_2
11774 #undef RS6000_BUILTIN_3
11775 #undef RS6000_BUILTIN_A
11776 #undef RS6000_BUILTIN_D
11777 #undef RS6000_BUILTIN_E
11778 #undef RS6000_BUILTIN_H
11779 #undef RS6000_BUILTIN_P
11780 #undef RS6000_BUILTIN_Q
11781 #undef RS6000_BUILTIN_S
11782 #undef RS6000_BUILTIN_X
11784 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11785 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11786 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11787 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11788 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11789 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11790 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11791 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11792 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11793 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
11794 { MASK, ICODE, NAME, ENUM },
11796 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11798 static const struct builtin_description bdesc_spe_predicates[] =
11800 #include "rs6000-builtin.def"
11803 /* SPE evsel predicates. */
11804 #undef RS6000_BUILTIN_1
11805 #undef RS6000_BUILTIN_2
11806 #undef RS6000_BUILTIN_3
11807 #undef RS6000_BUILTIN_A
11808 #undef RS6000_BUILTIN_D
11809 #undef RS6000_BUILTIN_E
11810 #undef RS6000_BUILTIN_H
11811 #undef RS6000_BUILTIN_P
11812 #undef RS6000_BUILTIN_Q
11813 #undef RS6000_BUILTIN_S
11814 #undef RS6000_BUILTIN_X
11816 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11817 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11818 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11819 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11820 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11821 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
11822 { MASK, ICODE, NAME, ENUM },
11824 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11825 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11826 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11827 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11828 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11830 static const struct builtin_description bdesc_spe_evsel[] =
11832 #include "rs6000-builtin.def"
11835 /* PAIRED predicates. */
11836 #undef RS6000_BUILTIN_1
11837 #undef RS6000_BUILTIN_2
11838 #undef RS6000_BUILTIN_3
11839 #undef RS6000_BUILTIN_A
11840 #undef RS6000_BUILTIN_D
11841 #undef RS6000_BUILTIN_E
11842 #undef RS6000_BUILTIN_H
11843 #undef RS6000_BUILTIN_P
11844 #undef RS6000_BUILTIN_Q
11845 #undef RS6000_BUILTIN_S
11846 #undef RS6000_BUILTIN_X
11848 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11849 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11850 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11851 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11852 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11853 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11854 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11855 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11856 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
11857 { MASK, ICODE, NAME, ENUM },
11859 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11860 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11862 static const struct builtin_description bdesc_paired_preds[] =
11864 #include "rs6000-builtin.def"
11867 /* ABS* operations. */
11869 #undef RS6000_BUILTIN_1
11870 #undef RS6000_BUILTIN_2
11871 #undef RS6000_BUILTIN_3
11872 #undef RS6000_BUILTIN_A
11873 #undef RS6000_BUILTIN_D
11874 #undef RS6000_BUILTIN_E
11875 #undef RS6000_BUILTIN_H
11876 #undef RS6000_BUILTIN_P
11877 #undef RS6000_BUILTIN_Q
11878 #undef RS6000_BUILTIN_S
11879 #undef RS6000_BUILTIN_X
11881 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11882 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11883 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11884 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
11885 { MASK, ICODE, NAME, ENUM },
11887 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11888 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11889 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11890 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11891 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11892 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11893 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11895 static const struct builtin_description bdesc_abs[] =
11897 #include "rs6000-builtin.def"
11900 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
11901 foo (VECa). */
11903 #undef RS6000_BUILTIN_1
11904 #undef RS6000_BUILTIN_2
11905 #undef RS6000_BUILTIN_3
11906 #undef RS6000_BUILTIN_A
11907 #undef RS6000_BUILTIN_D
11908 #undef RS6000_BUILTIN_E
11909 #undef RS6000_BUILTIN_H
11910 #undef RS6000_BUILTIN_P
11911 #undef RS6000_BUILTIN_Q
11912 #undef RS6000_BUILTIN_S
11913 #undef RS6000_BUILTIN_X
11915 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
11916 { MASK, ICODE, NAME, ENUM },
11918 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11919 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11920 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11921 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11922 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11923 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11924 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11925 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11926 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11927 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11929 static const struct builtin_description bdesc_1arg[] =
11931 #include "rs6000-builtin.def"
11934 /* HTM builtins. */
11935 #undef RS6000_BUILTIN_1
11936 #undef RS6000_BUILTIN_2
11937 #undef RS6000_BUILTIN_3
11938 #undef RS6000_BUILTIN_A
11939 #undef RS6000_BUILTIN_D
11940 #undef RS6000_BUILTIN_E
11941 #undef RS6000_BUILTIN_H
11942 #undef RS6000_BUILTIN_P
11943 #undef RS6000_BUILTIN_Q
11944 #undef RS6000_BUILTIN_S
11945 #undef RS6000_BUILTIN_X
11947 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11948 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11949 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11950 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11951 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11952 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11953 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
11954 { MASK, ICODE, NAME, ENUM },
11956 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11957 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11958 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11959 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11961 static const struct builtin_description bdesc_htm[] =
11963 #include "rs6000-builtin.def"
11966 #undef RS6000_BUILTIN_1
11967 #undef RS6000_BUILTIN_2
11968 #undef RS6000_BUILTIN_3
11969 #undef RS6000_BUILTIN_A
11970 #undef RS6000_BUILTIN_D
11971 #undef RS6000_BUILTIN_E
11972 #undef RS6000_BUILTIN_H
11973 #undef RS6000_BUILTIN_P
11974 #undef RS6000_BUILTIN_Q
11975 #undef RS6000_BUILTIN_S
11977 /* Return true if a builtin function is overloaded. */
11978 bool
11979 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
11981 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
11984 /* Expand an expression EXP that calls a builtin without arguments. */
11985 static rtx
11986 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
11988 rtx pat;
11989 machine_mode tmode = insn_data[icode].operand[0].mode;
11991 if (icode == CODE_FOR_nothing)
11992 /* Builtin not supported on this processor. */
11993 return 0;
11995 if (target == 0
11996 || GET_MODE (target) != tmode
11997 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11998 target = gen_reg_rtx (tmode);
12000 pat = GEN_FCN (icode) (target);
12001 if (! pat)
12002 return 0;
12003 emit_insn (pat);
12005 return target;
12009 static rtx
12010 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
12012 rtx pat;
12013 tree arg0 = CALL_EXPR_ARG (exp, 0);
12014 tree arg1 = CALL_EXPR_ARG (exp, 1);
12015 rtx op0 = expand_normal (arg0);
12016 rtx op1 = expand_normal (arg1);
12017 machine_mode mode0 = insn_data[icode].operand[0].mode;
12018 machine_mode mode1 = insn_data[icode].operand[1].mode;
12020 if (icode == CODE_FOR_nothing)
12021 /* Builtin not supported on this processor. */
12022 return 0;
12024 /* If we got invalid arguments bail out before generating bad rtl. */
12025 if (arg0 == error_mark_node || arg1 == error_mark_node)
12026 return const0_rtx;
12028 if (GET_CODE (op0) != CONST_INT
12029 || INTVAL (op0) > 255
12030 || INTVAL (op0) < 0)
12032 error ("argument 1 must be an 8-bit field value");
12033 return const0_rtx;
12036 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12037 op0 = copy_to_mode_reg (mode0, op0);
12039 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12040 op1 = copy_to_mode_reg (mode1, op1);
12042 pat = GEN_FCN (icode) (op0, op1);
12043 if (! pat)
12044 return const0_rtx;
12045 emit_insn (pat);
12047 return NULL_RTX;
12051 static rtx
12052 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
12054 rtx pat;
12055 tree arg0 = CALL_EXPR_ARG (exp, 0);
12056 rtx op0 = expand_normal (arg0);
12057 machine_mode tmode = insn_data[icode].operand[0].mode;
12058 machine_mode mode0 = insn_data[icode].operand[1].mode;
12060 if (icode == CODE_FOR_nothing)
12061 /* Builtin not supported on this processor. */
12062 return 0;
12064 /* If we got invalid arguments bail out before generating bad rtl. */
12065 if (arg0 == error_mark_node)
12066 return const0_rtx;
12068 if (icode == CODE_FOR_altivec_vspltisb
12069 || icode == CODE_FOR_altivec_vspltish
12070 || icode == CODE_FOR_altivec_vspltisw
12071 || icode == CODE_FOR_spe_evsplatfi
12072 || icode == CODE_FOR_spe_evsplati)
12074 /* Only allow 5-bit *signed* literals. */
12075 if (GET_CODE (op0) != CONST_INT
12076 || INTVAL (op0) > 15
12077 || INTVAL (op0) < -16)
12079 error ("argument 1 must be a 5-bit signed literal");
12080 return const0_rtx;
12084 if (target == 0
12085 || GET_MODE (target) != tmode
12086 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12087 target = gen_reg_rtx (tmode);
12089 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12090 op0 = copy_to_mode_reg (mode0, op0);
12092 pat = GEN_FCN (icode) (target, op0);
12093 if (! pat)
12094 return 0;
12095 emit_insn (pat);
12097 return target;
12100 static rtx
12101 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
12103 rtx pat, scratch1, scratch2;
12104 tree arg0 = CALL_EXPR_ARG (exp, 0);
12105 rtx op0 = expand_normal (arg0);
12106 machine_mode tmode = insn_data[icode].operand[0].mode;
12107 machine_mode mode0 = insn_data[icode].operand[1].mode;
12109 /* If we have invalid arguments, bail out before generating bad rtl. */
12110 if (arg0 == error_mark_node)
12111 return const0_rtx;
12113 if (target == 0
12114 || GET_MODE (target) != tmode
12115 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12116 target = gen_reg_rtx (tmode);
12118 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12119 op0 = copy_to_mode_reg (mode0, op0);
12121 scratch1 = gen_reg_rtx (mode0);
12122 scratch2 = gen_reg_rtx (mode0);
12124 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
12125 if (! pat)
12126 return 0;
12127 emit_insn (pat);
12129 return target;
12132 static rtx
12133 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
12135 rtx pat;
12136 tree arg0 = CALL_EXPR_ARG (exp, 0);
12137 tree arg1 = CALL_EXPR_ARG (exp, 1);
12138 rtx op0 = expand_normal (arg0);
12139 rtx op1 = expand_normal (arg1);
12140 machine_mode tmode = insn_data[icode].operand[0].mode;
12141 machine_mode mode0 = insn_data[icode].operand[1].mode;
12142 machine_mode mode1 = insn_data[icode].operand[2].mode;
12144 if (icode == CODE_FOR_nothing)
12145 /* Builtin not supported on this processor. */
12146 return 0;
12148 /* If we got invalid arguments bail out before generating bad rtl. */
12149 if (arg0 == error_mark_node || arg1 == error_mark_node)
12150 return const0_rtx;
12152 if (icode == CODE_FOR_altivec_vcfux
12153 || icode == CODE_FOR_altivec_vcfsx
12154 || icode == CODE_FOR_altivec_vctsxs
12155 || icode == CODE_FOR_altivec_vctuxs
12156 || icode == CODE_FOR_altivec_vspltb
12157 || icode == CODE_FOR_altivec_vsplth
12158 || icode == CODE_FOR_altivec_vspltw
12159 || icode == CODE_FOR_spe_evaddiw
12160 || icode == CODE_FOR_spe_evldd
12161 || icode == CODE_FOR_spe_evldh
12162 || icode == CODE_FOR_spe_evldw
12163 || icode == CODE_FOR_spe_evlhhesplat
12164 || icode == CODE_FOR_spe_evlhhossplat
12165 || icode == CODE_FOR_spe_evlhhousplat
12166 || icode == CODE_FOR_spe_evlwhe
12167 || icode == CODE_FOR_spe_evlwhos
12168 || icode == CODE_FOR_spe_evlwhou
12169 || icode == CODE_FOR_spe_evlwhsplat
12170 || icode == CODE_FOR_spe_evlwwsplat
12171 || icode == CODE_FOR_spe_evrlwi
12172 || icode == CODE_FOR_spe_evslwi
12173 || icode == CODE_FOR_spe_evsrwis
12174 || icode == CODE_FOR_spe_evsubifw
12175 || icode == CODE_FOR_spe_evsrwiu)
12177 /* Only allow 5-bit unsigned literals. */
12178 STRIP_NOPS (arg1);
12179 if (TREE_CODE (arg1) != INTEGER_CST
12180 || TREE_INT_CST_LOW (arg1) & ~0x1f)
12182 error ("argument 2 must be a 5-bit unsigned literal");
12183 return const0_rtx;
12187 if (target == 0
12188 || GET_MODE (target) != tmode
12189 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12190 target = gen_reg_rtx (tmode);
12192 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12193 op0 = copy_to_mode_reg (mode0, op0);
12194 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12195 op1 = copy_to_mode_reg (mode1, op1);
12197 pat = GEN_FCN (icode) (target, op0, op1);
12198 if (! pat)
12199 return 0;
12200 emit_insn (pat);
12202 return target;
12205 static rtx
12206 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
12208 rtx pat, scratch;
12209 tree cr6_form = CALL_EXPR_ARG (exp, 0);
12210 tree arg0 = CALL_EXPR_ARG (exp, 1);
12211 tree arg1 = CALL_EXPR_ARG (exp, 2);
12212 rtx op0 = expand_normal (arg0);
12213 rtx op1 = expand_normal (arg1);
12214 machine_mode tmode = SImode;
12215 machine_mode mode0 = insn_data[icode].operand[1].mode;
12216 machine_mode mode1 = insn_data[icode].operand[2].mode;
12217 int cr6_form_int;
12219 if (TREE_CODE (cr6_form) != INTEGER_CST)
12221 error ("argument 1 of __builtin_altivec_predicate must be a constant");
12222 return const0_rtx;
12224 else
12225 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
12227 gcc_assert (mode0 == mode1);
12229 /* If we have invalid arguments, bail out before generating bad rtl. */
12230 if (arg0 == error_mark_node || arg1 == error_mark_node)
12231 return const0_rtx;
12233 if (target == 0
12234 || GET_MODE (target) != tmode
12235 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12236 target = gen_reg_rtx (tmode);
12238 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12239 op0 = copy_to_mode_reg (mode0, op0);
12240 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12241 op1 = copy_to_mode_reg (mode1, op1);
12243 scratch = gen_reg_rtx (mode0);
12245 pat = GEN_FCN (icode) (scratch, op0, op1);
12246 if (! pat)
12247 return 0;
12248 emit_insn (pat);
12250 /* The vec_any* and vec_all* predicates use the same opcodes for two
12251 different operations, but the bits in CR6 will be different
12252 depending on what information we want. So we have to play tricks
12253 with CR6 to get the right bits out.
12255 If you think this is disgusting, look at the specs for the
12256 AltiVec predicates. */
12258 switch (cr6_form_int)
12260 case 0:
12261 emit_insn (gen_cr6_test_for_zero (target));
12262 break;
12263 case 1:
12264 emit_insn (gen_cr6_test_for_zero_reverse (target));
12265 break;
12266 case 2:
12267 emit_insn (gen_cr6_test_for_lt (target));
12268 break;
12269 case 3:
12270 emit_insn (gen_cr6_test_for_lt_reverse (target));
12271 break;
12272 default:
12273 error ("argument 1 of __builtin_altivec_predicate is out of range");
12274 break;
12277 return target;
12280 static rtx
12281 paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
12283 rtx pat, addr;
12284 tree arg0 = CALL_EXPR_ARG (exp, 0);
12285 tree arg1 = CALL_EXPR_ARG (exp, 1);
12286 machine_mode tmode = insn_data[icode].operand[0].mode;
12287 machine_mode mode0 = Pmode;
12288 machine_mode mode1 = Pmode;
12289 rtx op0 = expand_normal (arg0);
12290 rtx op1 = expand_normal (arg1);
12292 if (icode == CODE_FOR_nothing)
12293 /* Builtin not supported on this processor. */
12294 return 0;
12296 /* If we got invalid arguments bail out before generating bad rtl. */
12297 if (arg0 == error_mark_node || arg1 == error_mark_node)
12298 return const0_rtx;
12300 if (target == 0
12301 || GET_MODE (target) != tmode
12302 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12303 target = gen_reg_rtx (tmode);
12305 op1 = copy_to_mode_reg (mode1, op1);
12307 if (op0 == const0_rtx)
12309 addr = gen_rtx_MEM (tmode, op1);
12311 else
12313 op0 = copy_to_mode_reg (mode0, op0);
12314 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op0, op1));
12317 pat = GEN_FCN (icode) (target, addr);
12319 if (! pat)
12320 return 0;
12321 emit_insn (pat);
12323 return target;
12326 /* Return a constant vector for use as a little-endian permute control vector
12327 to reverse the order of elements of the given vector mode. */
12328 static rtx
12329 swap_selector_for_mode (machine_mode mode)
12331 /* These are little endian vectors, so their elements are reversed
12332 from what you would normally expect for a permute control vector. */
12333 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
12334 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
12335 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
12336 unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
12337 unsigned int *swaparray, i;
12338 rtx perm[16];
12340 switch (mode)
12342 case V2DFmode:
12343 case V2DImode:
12344 swaparray = swap2;
12345 break;
12346 case V4SFmode:
12347 case V4SImode:
12348 swaparray = swap4;
12349 break;
12350 case V8HImode:
12351 swaparray = swap8;
12352 break;
12353 case V16QImode:
12354 swaparray = swap16;
12355 break;
12356 default:
12357 gcc_unreachable ();
12360 for (i = 0; i < 16; ++i)
12361 perm[i] = GEN_INT (swaparray[i]);
12363 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)));
12366 /* Generate code for an "lvx", "lvxl", or "lve*x" built-in for a little endian target
12367 with -maltivec=be specified. Issue the load followed by an element-reversing
12368 permute. */
12369 void
12370 altivec_expand_lvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
12372 rtx tmp = gen_reg_rtx (mode);
12373 rtx load = gen_rtx_SET (VOIDmode, tmp, op1);
12374 rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
12375 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx));
12376 rtx sel = swap_selector_for_mode (mode);
12377 rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM);
12379 gcc_assert (REG_P (op0));
12380 emit_insn (par);
12381 emit_insn (gen_rtx_SET (VOIDmode, op0, vperm));
12384 /* Generate code for a "stvx" or "stvxl" built-in for a little endian target
12385 with -maltivec=be specified. Issue the store preceded by an element-reversing
12386 permute. */
12387 void
12388 altivec_expand_stvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
12390 rtx tmp = gen_reg_rtx (mode);
12391 rtx store = gen_rtx_SET (VOIDmode, op0, tmp);
12392 rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
12393 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx));
12394 rtx sel = swap_selector_for_mode (mode);
12395 rtx vperm;
12397 gcc_assert (REG_P (op1));
12398 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
12399 emit_insn (gen_rtx_SET (VOIDmode, tmp, vperm));
12400 emit_insn (par);
12403 /* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
12404 specified. Issue the store preceded by an element-reversing permute. */
12405 void
12406 altivec_expand_stvex_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
12408 machine_mode inner_mode = GET_MODE_INNER (mode);
12409 rtx tmp = gen_reg_rtx (mode);
12410 rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec);
12411 rtx sel = swap_selector_for_mode (mode);
12412 rtx vperm;
12414 gcc_assert (REG_P (op1));
12415 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
12416 emit_insn (gen_rtx_SET (VOIDmode, tmp, vperm));
12417 emit_insn (gen_rtx_SET (VOIDmode, op0, stvx));
12420 static rtx
12421 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
12423 rtx pat, addr;
12424 tree arg0 = CALL_EXPR_ARG (exp, 0);
12425 tree arg1 = CALL_EXPR_ARG (exp, 1);
12426 machine_mode tmode = insn_data[icode].operand[0].mode;
12427 machine_mode mode0 = Pmode;
12428 machine_mode mode1 = Pmode;
12429 rtx op0 = expand_normal (arg0);
12430 rtx op1 = expand_normal (arg1);
12432 if (icode == CODE_FOR_nothing)
12433 /* Builtin not supported on this processor. */
12434 return 0;
12436 /* If we got invalid arguments bail out before generating bad rtl. */
12437 if (arg0 == error_mark_node || arg1 == error_mark_node)
12438 return const0_rtx;
12440 if (target == 0
12441 || GET_MODE (target) != tmode
12442 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12443 target = gen_reg_rtx (tmode);
12445 op1 = copy_to_mode_reg (mode1, op1);
12447 if (op0 == const0_rtx)
12449 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
12451 else
12453 op0 = copy_to_mode_reg (mode0, op0);
12454 addr = gen_rtx_MEM (blk ? BLKmode : tmode, gen_rtx_PLUS (Pmode, op0, op1));
12457 pat = GEN_FCN (icode) (target, addr);
12459 if (! pat)
12460 return 0;
12461 emit_insn (pat);
12463 return target;
12466 static rtx
12467 spe_expand_stv_builtin (enum insn_code icode, tree exp)
12469 tree arg0 = CALL_EXPR_ARG (exp, 0);
12470 tree arg1 = CALL_EXPR_ARG (exp, 1);
12471 tree arg2 = CALL_EXPR_ARG (exp, 2);
12472 rtx op0 = expand_normal (arg0);
12473 rtx op1 = expand_normal (arg1);
12474 rtx op2 = expand_normal (arg2);
12475 rtx pat;
12476 machine_mode mode0 = insn_data[icode].operand[0].mode;
12477 machine_mode mode1 = insn_data[icode].operand[1].mode;
12478 machine_mode mode2 = insn_data[icode].operand[2].mode;
12480 /* Invalid arguments. Bail before doing anything stoopid! */
12481 if (arg0 == error_mark_node
12482 || arg1 == error_mark_node
12483 || arg2 == error_mark_node)
12484 return const0_rtx;
12486 if (! (*insn_data[icode].operand[2].predicate) (op0, mode2))
12487 op0 = copy_to_mode_reg (mode2, op0);
12488 if (! (*insn_data[icode].operand[0].predicate) (op1, mode0))
12489 op1 = copy_to_mode_reg (mode0, op1);
12490 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
12491 op2 = copy_to_mode_reg (mode1, op2);
12493 pat = GEN_FCN (icode) (op1, op2, op0);
12494 if (pat)
12495 emit_insn (pat);
12496 return NULL_RTX;
12499 static rtx
12500 paired_expand_stv_builtin (enum insn_code icode, tree exp)
12502 tree arg0 = CALL_EXPR_ARG (exp, 0);
12503 tree arg1 = CALL_EXPR_ARG (exp, 1);
12504 tree arg2 = CALL_EXPR_ARG (exp, 2);
12505 rtx op0 = expand_normal (arg0);
12506 rtx op1 = expand_normal (arg1);
12507 rtx op2 = expand_normal (arg2);
12508 rtx pat, addr;
12509 machine_mode tmode = insn_data[icode].operand[0].mode;
12510 machine_mode mode1 = Pmode;
12511 machine_mode mode2 = Pmode;
12513 /* Invalid arguments. Bail before doing anything stoopid! */
12514 if (arg0 == error_mark_node
12515 || arg1 == error_mark_node
12516 || arg2 == error_mark_node)
12517 return const0_rtx;
12519 if (! (*insn_data[icode].operand[1].predicate) (op0, tmode))
12520 op0 = copy_to_mode_reg (tmode, op0);
12522 op2 = copy_to_mode_reg (mode2, op2);
12524 if (op1 == const0_rtx)
12526 addr = gen_rtx_MEM (tmode, op2);
12528 else
12530 op1 = copy_to_mode_reg (mode1, op1);
12531 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
12534 pat = GEN_FCN (icode) (addr, op0);
12535 if (pat)
12536 emit_insn (pat);
12537 return NULL_RTX;
12540 static rtx
12541 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
12543 tree arg0 = CALL_EXPR_ARG (exp, 0);
12544 tree arg1 = CALL_EXPR_ARG (exp, 1);
12545 tree arg2 = CALL_EXPR_ARG (exp, 2);
12546 rtx op0 = expand_normal (arg0);
12547 rtx op1 = expand_normal (arg1);
12548 rtx op2 = expand_normal (arg2);
12549 rtx pat, addr;
12550 machine_mode tmode = insn_data[icode].operand[0].mode;
12551 machine_mode smode = insn_data[icode].operand[1].mode;
12552 machine_mode mode1 = Pmode;
12553 machine_mode mode2 = Pmode;
12555 /* Invalid arguments. Bail before doing anything stoopid! */
12556 if (arg0 == error_mark_node
12557 || arg1 == error_mark_node
12558 || arg2 == error_mark_node)
12559 return const0_rtx;
12561 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
12562 op0 = copy_to_mode_reg (smode, op0);
12564 op2 = copy_to_mode_reg (mode2, op2);
12566 if (op1 == const0_rtx)
12568 addr = gen_rtx_MEM (tmode, op2);
12570 else
12572 op1 = copy_to_mode_reg (mode1, op1);
12573 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
12576 pat = GEN_FCN (icode) (addr, op0);
12577 if (pat)
12578 emit_insn (pat);
12579 return NULL_RTX;
12582 /* Return the appropriate SPR number associated with the given builtin. */
12583 static inline HOST_WIDE_INT
12584 htm_spr_num (enum rs6000_builtins code)
12586 if (code == HTM_BUILTIN_GET_TFHAR
12587 || code == HTM_BUILTIN_SET_TFHAR)
12588 return TFHAR_SPR;
12589 else if (code == HTM_BUILTIN_GET_TFIAR
12590 || code == HTM_BUILTIN_SET_TFIAR)
12591 return TFIAR_SPR;
12592 else if (code == HTM_BUILTIN_GET_TEXASR
12593 || code == HTM_BUILTIN_SET_TEXASR)
12594 return TEXASR_SPR;
12595 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
12596 || code == HTM_BUILTIN_SET_TEXASRU);
12597 return TEXASRU_SPR;
12600 /* Return the appropriate SPR regno associated with the given builtin. */
12601 static inline HOST_WIDE_INT
12602 htm_spr_regno (enum rs6000_builtins code)
12604 if (code == HTM_BUILTIN_GET_TFHAR
12605 || code == HTM_BUILTIN_SET_TFHAR)
12606 return TFHAR_REGNO;
12607 else if (code == HTM_BUILTIN_GET_TFIAR
12608 || code == HTM_BUILTIN_SET_TFIAR)
12609 return TFIAR_REGNO;
12610 gcc_assert (code == HTM_BUILTIN_GET_TEXASR
12611 || code == HTM_BUILTIN_SET_TEXASR
12612 || code == HTM_BUILTIN_GET_TEXASRU
12613 || code == HTM_BUILTIN_SET_TEXASRU);
12614 return TEXASR_REGNO;
12617 /* Return the correct ICODE value depending on whether we are
12618 setting or reading the HTM SPRs. */
12619 static inline enum insn_code
12620 rs6000_htm_spr_icode (bool nonvoid)
12622 if (nonvoid)
12623 return (TARGET_64BIT) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
12624 else
12625 return (TARGET_64BIT) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
12628 /* Expand the HTM builtin in EXP and store the result in TARGET.
12629 Store true in *EXPANDEDP if we found a builtin to expand. */
12630 static rtx
12631 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
12633 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12634 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
12635 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
12636 const struct builtin_description *d;
12637 size_t i;
12639 *expandedp = false;
12641 /* Expand the HTM builtins. */
12642 d = bdesc_htm;
12643 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
12644 if (d->code == fcode)
12646 rtx op[MAX_HTM_OPERANDS], pat;
12647 int nopnds = 0;
12648 tree arg;
12649 call_expr_arg_iterator iter;
12650 unsigned attr = rs6000_builtin_info[fcode].attr;
12651 enum insn_code icode = d->icode;
12653 if (attr & RS6000_BTC_SPR)
12654 icode = rs6000_htm_spr_icode (nonvoid);
12656 if (nonvoid)
12658 machine_mode tmode = insn_data[icode].operand[0].mode;
12659 if (!target
12660 || GET_MODE (target) != tmode
12661 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
12662 target = gen_reg_rtx (tmode);
12663 op[nopnds++] = target;
12666 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
12668 const struct insn_operand_data *insn_op;
12670 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
12671 return NULL_RTX;
12673 insn_op = &insn_data[icode].operand[nopnds];
12675 op[nopnds] = expand_normal (arg);
12677 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
12679 if (!strcmp (insn_op->constraint, "n"))
12681 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
12682 if (!CONST_INT_P (op[nopnds]))
12683 error ("argument %d must be an unsigned literal", arg_num);
12684 else
12685 error ("argument %d is an unsigned literal that is "
12686 "out of range", arg_num);
12687 return const0_rtx;
12689 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
12692 nopnds++;
12695 /* Handle the builtins for extended mnemonics. These accept
12696 no arguments, but map to builtins that take arguments. */
12697 switch (fcode)
12699 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
12700 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
12701 op[nopnds++] = GEN_INT (1);
12702 #ifdef ENABLE_CHECKING
12703 attr |= RS6000_BTC_UNARY;
12704 #endif
12705 break;
12706 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
12707 op[nopnds++] = GEN_INT (0);
12708 #ifdef ENABLE_CHECKING
12709 attr |= RS6000_BTC_UNARY;
12710 #endif
12711 break;
12712 default:
12713 break;
12716 /* If this builtin accesses SPRs, then pass in the appropriate
12717 SPR number and SPR regno as the last two operands. */
12718 if (attr & RS6000_BTC_SPR)
12720 op[nopnds++] = gen_rtx_CONST_INT (Pmode, htm_spr_num (fcode));
12721 op[nopnds++] = gen_rtx_REG (Pmode, htm_spr_regno (fcode));
12724 #ifdef ENABLE_CHECKING
12725 int expected_nopnds = 0;
12726 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
12727 expected_nopnds = 1;
12728 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
12729 expected_nopnds = 2;
12730 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
12731 expected_nopnds = 3;
12732 if (!(attr & RS6000_BTC_VOID))
12733 expected_nopnds += 1;
12734 if (attr & RS6000_BTC_SPR)
12735 expected_nopnds += 2;
12737 gcc_assert (nopnds == expected_nopnds && nopnds <= MAX_HTM_OPERANDS);
12738 #endif
12740 switch (nopnds)
12742 case 1:
12743 pat = GEN_FCN (icode) (op[0]);
12744 break;
12745 case 2:
12746 pat = GEN_FCN (icode) (op[0], op[1]);
12747 break;
12748 case 3:
12749 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
12750 break;
12751 case 4:
12752 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
12753 break;
12754 default:
12755 gcc_unreachable ();
12757 if (!pat)
12758 return NULL_RTX;
12759 emit_insn (pat);
12761 *expandedp = true;
12762 if (nonvoid)
12763 return target;
12764 return const0_rtx;
12767 return NULL_RTX;
12770 static rtx
12771 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
12773 rtx pat;
12774 tree arg0 = CALL_EXPR_ARG (exp, 0);
12775 tree arg1 = CALL_EXPR_ARG (exp, 1);
12776 tree arg2 = CALL_EXPR_ARG (exp, 2);
12777 rtx op0 = expand_normal (arg0);
12778 rtx op1 = expand_normal (arg1);
12779 rtx op2 = expand_normal (arg2);
12780 machine_mode tmode = insn_data[icode].operand[0].mode;
12781 machine_mode mode0 = insn_data[icode].operand[1].mode;
12782 machine_mode mode1 = insn_data[icode].operand[2].mode;
12783 machine_mode mode2 = insn_data[icode].operand[3].mode;
12785 if (icode == CODE_FOR_nothing)
12786 /* Builtin not supported on this processor. */
12787 return 0;
12789 /* If we got invalid arguments bail out before generating bad rtl. */
12790 if (arg0 == error_mark_node
12791 || arg1 == error_mark_node
12792 || arg2 == error_mark_node)
12793 return const0_rtx;
12795 /* Check and prepare argument depending on the instruction code.
12797 Note that a switch statement instead of the sequence of tests
12798 would be incorrect as many of the CODE_FOR values could be
12799 CODE_FOR_nothing and that would yield multiple alternatives
12800 with identical values. We'd never reach here at runtime in
12801 this case. */
12802 if (icode == CODE_FOR_altivec_vsldoi_v4sf
12803 || icode == CODE_FOR_altivec_vsldoi_v4si
12804 || icode == CODE_FOR_altivec_vsldoi_v8hi
12805 || icode == CODE_FOR_altivec_vsldoi_v16qi)
12807 /* Only allow 4-bit unsigned literals. */
12808 STRIP_NOPS (arg2);
12809 if (TREE_CODE (arg2) != INTEGER_CST
12810 || TREE_INT_CST_LOW (arg2) & ~0xf)
12812 error ("argument 3 must be a 4-bit unsigned literal");
12813 return const0_rtx;
12816 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
12817 || icode == CODE_FOR_vsx_xxpermdi_v2di
12818 || icode == CODE_FOR_vsx_xxsldwi_v16qi
12819 || icode == CODE_FOR_vsx_xxsldwi_v8hi
12820 || icode == CODE_FOR_vsx_xxsldwi_v4si
12821 || icode == CODE_FOR_vsx_xxsldwi_v4sf
12822 || icode == CODE_FOR_vsx_xxsldwi_v2di
12823 || icode == CODE_FOR_vsx_xxsldwi_v2df)
12825 /* Only allow 2-bit unsigned literals. */
12826 STRIP_NOPS (arg2);
12827 if (TREE_CODE (arg2) != INTEGER_CST
12828 || TREE_INT_CST_LOW (arg2) & ~0x3)
12830 error ("argument 3 must be a 2-bit unsigned literal");
12831 return const0_rtx;
12834 else if (icode == CODE_FOR_vsx_set_v2df
12835 || icode == CODE_FOR_vsx_set_v2di
12836 || icode == CODE_FOR_bcdadd
12837 || icode == CODE_FOR_bcdadd_lt
12838 || icode == CODE_FOR_bcdadd_eq
12839 || icode == CODE_FOR_bcdadd_gt
12840 || icode == CODE_FOR_bcdsub
12841 || icode == CODE_FOR_bcdsub_lt
12842 || icode == CODE_FOR_bcdsub_eq
12843 || icode == CODE_FOR_bcdsub_gt)
12845 /* Only allow 1-bit unsigned literals. */
12846 STRIP_NOPS (arg2);
12847 if (TREE_CODE (arg2) != INTEGER_CST
12848 || TREE_INT_CST_LOW (arg2) & ~0x1)
12850 error ("argument 3 must be a 1-bit unsigned literal");
12851 return const0_rtx;
12854 else if (icode == CODE_FOR_dfp_ddedpd_dd
12855 || icode == CODE_FOR_dfp_ddedpd_td)
12857 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
12858 STRIP_NOPS (arg0);
12859 if (TREE_CODE (arg0) != INTEGER_CST
12860 || TREE_INT_CST_LOW (arg2) & ~0x3)
12862 error ("argument 1 must be 0 or 2");
12863 return const0_rtx;
12866 else if (icode == CODE_FOR_dfp_denbcd_dd
12867 || icode == CODE_FOR_dfp_denbcd_td)
12869 /* Only allow 1-bit unsigned literals. */
12870 STRIP_NOPS (arg0);
12871 if (TREE_CODE (arg0) != INTEGER_CST
12872 || TREE_INT_CST_LOW (arg0) & ~0x1)
12874 error ("argument 1 must be a 1-bit unsigned literal");
12875 return const0_rtx;
12878 else if (icode == CODE_FOR_dfp_dscli_dd
12879 || icode == CODE_FOR_dfp_dscli_td
12880 || icode == CODE_FOR_dfp_dscri_dd
12881 || icode == CODE_FOR_dfp_dscri_td)
12883 /* Only allow 6-bit unsigned literals. */
12884 STRIP_NOPS (arg1);
12885 if (TREE_CODE (arg1) != INTEGER_CST
12886 || TREE_INT_CST_LOW (arg1) & ~0x3f)
12888 error ("argument 2 must be a 6-bit unsigned literal");
12889 return const0_rtx;
12892 else if (icode == CODE_FOR_crypto_vshasigmaw
12893 || icode == CODE_FOR_crypto_vshasigmad)
12895 /* Check whether the 2nd and 3rd arguments are integer constants and in
12896 range and prepare arguments. */
12897 STRIP_NOPS (arg1);
12898 if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (arg1, 2))
12900 error ("argument 2 must be 0 or 1");
12901 return const0_rtx;
12904 STRIP_NOPS (arg2);
12905 if (TREE_CODE (arg2) != INTEGER_CST || wi::geu_p (arg1, 16))
12907 error ("argument 3 must be in the range 0..15");
12908 return const0_rtx;
12912 if (target == 0
12913 || GET_MODE (target) != tmode
12914 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12915 target = gen_reg_rtx (tmode);
12917 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12918 op0 = copy_to_mode_reg (mode0, op0);
12919 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12920 op1 = copy_to_mode_reg (mode1, op1);
12921 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12922 op2 = copy_to_mode_reg (mode2, op2);
12924 if (TARGET_PAIRED_FLOAT && icode == CODE_FOR_selv2sf4)
12925 pat = GEN_FCN (icode) (target, op0, op1, op2, CONST0_RTX (SFmode));
12926 else
12927 pat = GEN_FCN (icode) (target, op0, op1, op2);
12928 if (! pat)
12929 return 0;
12930 emit_insn (pat);
12932 return target;
12935 /* Expand the lvx builtins. */
12936 static rtx
12937 altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp)
12939 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12940 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12941 tree arg0;
12942 machine_mode tmode, mode0;
12943 rtx pat, op0;
12944 enum insn_code icode;
12946 switch (fcode)
12948 case ALTIVEC_BUILTIN_LD_INTERNAL_16qi:
12949 icode = CODE_FOR_vector_altivec_load_v16qi;
12950 break;
12951 case ALTIVEC_BUILTIN_LD_INTERNAL_8hi:
12952 icode = CODE_FOR_vector_altivec_load_v8hi;
12953 break;
12954 case ALTIVEC_BUILTIN_LD_INTERNAL_4si:
12955 icode = CODE_FOR_vector_altivec_load_v4si;
12956 break;
12957 case ALTIVEC_BUILTIN_LD_INTERNAL_4sf:
12958 icode = CODE_FOR_vector_altivec_load_v4sf;
12959 break;
12960 case ALTIVEC_BUILTIN_LD_INTERNAL_2df:
12961 icode = CODE_FOR_vector_altivec_load_v2df;
12962 break;
12963 case ALTIVEC_BUILTIN_LD_INTERNAL_2di:
12964 icode = CODE_FOR_vector_altivec_load_v2di;
12965 case ALTIVEC_BUILTIN_LD_INTERNAL_1ti:
12966 icode = CODE_FOR_vector_altivec_load_v1ti;
12967 break;
12968 default:
12969 *expandedp = false;
12970 return NULL_RTX;
12973 *expandedp = true;
12975 arg0 = CALL_EXPR_ARG (exp, 0);
12976 op0 = expand_normal (arg0);
12977 tmode = insn_data[icode].operand[0].mode;
12978 mode0 = insn_data[icode].operand[1].mode;
12980 if (target == 0
12981 || GET_MODE (target) != tmode
12982 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12983 target = gen_reg_rtx (tmode);
12985 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12986 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12988 pat = GEN_FCN (icode) (target, op0);
12989 if (! pat)
12990 return 0;
12991 emit_insn (pat);
12992 return target;
12995 /* Expand the stvx builtins. */
12996 static rtx
12997 altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
12998 bool *expandedp)
13000 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13001 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13002 tree arg0, arg1;
13003 machine_mode mode0, mode1;
13004 rtx pat, op0, op1;
13005 enum insn_code icode;
13007 switch (fcode)
13009 case ALTIVEC_BUILTIN_ST_INTERNAL_16qi:
13010 icode = CODE_FOR_vector_altivec_store_v16qi;
13011 break;
13012 case ALTIVEC_BUILTIN_ST_INTERNAL_8hi:
13013 icode = CODE_FOR_vector_altivec_store_v8hi;
13014 break;
13015 case ALTIVEC_BUILTIN_ST_INTERNAL_4si:
13016 icode = CODE_FOR_vector_altivec_store_v4si;
13017 break;
13018 case ALTIVEC_BUILTIN_ST_INTERNAL_4sf:
13019 icode = CODE_FOR_vector_altivec_store_v4sf;
13020 break;
13021 case ALTIVEC_BUILTIN_ST_INTERNAL_2df:
13022 icode = CODE_FOR_vector_altivec_store_v2df;
13023 break;
13024 case ALTIVEC_BUILTIN_ST_INTERNAL_2di:
13025 icode = CODE_FOR_vector_altivec_store_v2di;
13026 case ALTIVEC_BUILTIN_ST_INTERNAL_1ti:
13027 icode = CODE_FOR_vector_altivec_store_v1ti;
13028 break;
13029 default:
13030 *expandedp = false;
13031 return NULL_RTX;
13034 arg0 = CALL_EXPR_ARG (exp, 0);
13035 arg1 = CALL_EXPR_ARG (exp, 1);
13036 op0 = expand_normal (arg0);
13037 op1 = expand_normal (arg1);
13038 mode0 = insn_data[icode].operand[0].mode;
13039 mode1 = insn_data[icode].operand[1].mode;
13041 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13042 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13043 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13044 op1 = copy_to_mode_reg (mode1, op1);
13046 pat = GEN_FCN (icode) (op0, op1);
13047 if (pat)
13048 emit_insn (pat);
13050 *expandedp = true;
13051 return NULL_RTX;
13054 /* Expand the dst builtins. */
13055 static rtx
13056 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
13057 bool *expandedp)
13059 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13060 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13061 tree arg0, arg1, arg2;
13062 machine_mode mode0, mode1;
13063 rtx pat, op0, op1, op2;
13064 const struct builtin_description *d;
13065 size_t i;
13067 *expandedp = false;
13069 /* Handle DST variants. */
13070 d = bdesc_dst;
13071 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
13072 if (d->code == fcode)
13074 arg0 = CALL_EXPR_ARG (exp, 0);
13075 arg1 = CALL_EXPR_ARG (exp, 1);
13076 arg2 = CALL_EXPR_ARG (exp, 2);
13077 op0 = expand_normal (arg0);
13078 op1 = expand_normal (arg1);
13079 op2 = expand_normal (arg2);
13080 mode0 = insn_data[d->icode].operand[0].mode;
13081 mode1 = insn_data[d->icode].operand[1].mode;
13083 /* Invalid arguments, bail out before generating bad rtl. */
13084 if (arg0 == error_mark_node
13085 || arg1 == error_mark_node
13086 || arg2 == error_mark_node)
13087 return const0_rtx;
13089 *expandedp = true;
13090 STRIP_NOPS (arg2);
13091 if (TREE_CODE (arg2) != INTEGER_CST
13092 || TREE_INT_CST_LOW (arg2) & ~0x3)
13094 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
13095 return const0_rtx;
13098 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13099 op0 = copy_to_mode_reg (Pmode, op0);
13100 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13101 op1 = copy_to_mode_reg (mode1, op1);
13103 pat = GEN_FCN (d->icode) (op0, op1, op2);
13104 if (pat != 0)
13105 emit_insn (pat);
13107 return NULL_RTX;
13110 return NULL_RTX;
13113 /* Expand vec_init builtin. */
13114 static rtx
13115 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
13117 machine_mode tmode = TYPE_MODE (type);
13118 machine_mode inner_mode = GET_MODE_INNER (tmode);
13119 int i, n_elt = GET_MODE_NUNITS (tmode);
13121 gcc_assert (VECTOR_MODE_P (tmode));
13122 gcc_assert (n_elt == call_expr_nargs (exp));
13124 if (!target || !register_operand (target, tmode))
13125 target = gen_reg_rtx (tmode);
13127 /* If we have a vector compromised of a single element, such as V1TImode, do
13128 the initialization directly. */
13129 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
13131 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
13132 emit_move_insn (target, gen_lowpart (tmode, x));
13134 else
13136 rtvec v = rtvec_alloc (n_elt);
13138 for (i = 0; i < n_elt; ++i)
13140 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
13141 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
13144 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
13147 return target;
13150 /* Return the integer constant in ARG. Constrain it to be in the range
13151 of the subparts of VEC_TYPE; issue an error if not. */
13153 static int
13154 get_element_number (tree vec_type, tree arg)
13156 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
13158 if (!tree_fits_uhwi_p (arg)
13159 || (elt = tree_to_uhwi (arg), elt > max))
13161 error ("selector must be an integer constant in the range 0..%wi", max);
13162 return 0;
13165 return elt;
13168 /* Expand vec_set builtin. */
13169 static rtx
13170 altivec_expand_vec_set_builtin (tree exp)
13172 machine_mode tmode, mode1;
13173 tree arg0, arg1, arg2;
13174 int elt;
13175 rtx op0, op1;
13177 arg0 = CALL_EXPR_ARG (exp, 0);
13178 arg1 = CALL_EXPR_ARG (exp, 1);
13179 arg2 = CALL_EXPR_ARG (exp, 2);
13181 tmode = TYPE_MODE (TREE_TYPE (arg0));
13182 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
13183 gcc_assert (VECTOR_MODE_P (tmode));
13185 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
13186 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
13187 elt = get_element_number (TREE_TYPE (arg0), arg2);
13189 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
13190 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
13192 op0 = force_reg (tmode, op0);
13193 op1 = force_reg (mode1, op1);
13195 rs6000_expand_vector_set (op0, op1, elt);
13197 return op0;
13200 /* Expand vec_ext builtin. */
13201 static rtx
13202 altivec_expand_vec_ext_builtin (tree exp, rtx target)
13204 machine_mode tmode, mode0;
13205 tree arg0, arg1;
13206 int elt;
13207 rtx op0;
13209 arg0 = CALL_EXPR_ARG (exp, 0);
13210 arg1 = CALL_EXPR_ARG (exp, 1);
13212 op0 = expand_normal (arg0);
13213 elt = get_element_number (TREE_TYPE (arg0), arg1);
13215 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
13216 mode0 = TYPE_MODE (TREE_TYPE (arg0));
13217 gcc_assert (VECTOR_MODE_P (mode0));
13219 op0 = force_reg (mode0, op0);
13221 if (optimize || !target || !register_operand (target, tmode))
13222 target = gen_reg_rtx (tmode);
13224 rs6000_expand_vector_extract (target, op0, elt);
13226 return target;
13229 /* Expand the builtin in EXP and store the result in TARGET. Store
13230 true in *EXPANDEDP if we found a builtin to expand. */
13231 static rtx
13232 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
13234 const struct builtin_description *d;
13235 size_t i;
13236 enum insn_code icode;
13237 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13238 tree arg0;
13239 rtx op0, pat;
13240 machine_mode tmode, mode0;
13241 enum rs6000_builtins fcode
13242 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13244 if (rs6000_overloaded_builtin_p (fcode))
13246 *expandedp = true;
13247 error ("unresolved overload for Altivec builtin %qF", fndecl);
13249 /* Given it is invalid, just generate a normal call. */
13250 return expand_call (exp, target, false);
13253 target = altivec_expand_ld_builtin (exp, target, expandedp);
13254 if (*expandedp)
13255 return target;
13257 target = altivec_expand_st_builtin (exp, target, expandedp);
13258 if (*expandedp)
13259 return target;
13261 target = altivec_expand_dst_builtin (exp, target, expandedp);
13262 if (*expandedp)
13263 return target;
13265 *expandedp = true;
13267 switch (fcode)
13269 case ALTIVEC_BUILTIN_STVX_V2DF:
13270 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df, exp);
13271 case ALTIVEC_BUILTIN_STVX_V2DI:
13272 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di, exp);
13273 case ALTIVEC_BUILTIN_STVX_V4SF:
13274 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf, exp);
13275 case ALTIVEC_BUILTIN_STVX:
13276 case ALTIVEC_BUILTIN_STVX_V4SI:
13277 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si, exp);
13278 case ALTIVEC_BUILTIN_STVX_V8HI:
13279 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi, exp);
13280 case ALTIVEC_BUILTIN_STVX_V16QI:
13281 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi, exp);
13282 case ALTIVEC_BUILTIN_STVEBX:
13283 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
13284 case ALTIVEC_BUILTIN_STVEHX:
13285 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
13286 case ALTIVEC_BUILTIN_STVEWX:
13287 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
13288 case ALTIVEC_BUILTIN_STVXL_V2DF:
13289 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
13290 case ALTIVEC_BUILTIN_STVXL_V2DI:
13291 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
13292 case ALTIVEC_BUILTIN_STVXL_V4SF:
13293 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
13294 case ALTIVEC_BUILTIN_STVXL:
13295 case ALTIVEC_BUILTIN_STVXL_V4SI:
13296 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
13297 case ALTIVEC_BUILTIN_STVXL_V8HI:
13298 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
13299 case ALTIVEC_BUILTIN_STVXL_V16QI:
13300 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
13302 case ALTIVEC_BUILTIN_STVLX:
13303 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
13304 case ALTIVEC_BUILTIN_STVLXL:
13305 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
13306 case ALTIVEC_BUILTIN_STVRX:
13307 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
13308 case ALTIVEC_BUILTIN_STVRXL:
13309 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
13311 case VSX_BUILTIN_STXVD2X_V1TI:
13312 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
13313 case VSX_BUILTIN_STXVD2X_V2DF:
13314 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
13315 case VSX_BUILTIN_STXVD2X_V2DI:
13316 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
13317 case VSX_BUILTIN_STXVW4X_V4SF:
13318 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
13319 case VSX_BUILTIN_STXVW4X_V4SI:
13320 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
13321 case VSX_BUILTIN_STXVW4X_V8HI:
13322 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
13323 case VSX_BUILTIN_STXVW4X_V16QI:
13324 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
13326 case ALTIVEC_BUILTIN_MFVSCR:
13327 icode = CODE_FOR_altivec_mfvscr;
13328 tmode = insn_data[icode].operand[0].mode;
13330 if (target == 0
13331 || GET_MODE (target) != tmode
13332 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13333 target = gen_reg_rtx (tmode);
13335 pat = GEN_FCN (icode) (target);
13336 if (! pat)
13337 return 0;
13338 emit_insn (pat);
13339 return target;
13341 case ALTIVEC_BUILTIN_MTVSCR:
13342 icode = CODE_FOR_altivec_mtvscr;
13343 arg0 = CALL_EXPR_ARG (exp, 0);
13344 op0 = expand_normal (arg0);
13345 mode0 = insn_data[icode].operand[0].mode;
13347 /* If we got invalid arguments bail out before generating bad rtl. */
13348 if (arg0 == error_mark_node)
13349 return const0_rtx;
13351 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13352 op0 = copy_to_mode_reg (mode0, op0);
13354 pat = GEN_FCN (icode) (op0);
13355 if (pat)
13356 emit_insn (pat);
13357 return NULL_RTX;
13359 case ALTIVEC_BUILTIN_DSSALL:
13360 emit_insn (gen_altivec_dssall ());
13361 return NULL_RTX;
13363 case ALTIVEC_BUILTIN_DSS:
13364 icode = CODE_FOR_altivec_dss;
13365 arg0 = CALL_EXPR_ARG (exp, 0);
13366 STRIP_NOPS (arg0);
13367 op0 = expand_normal (arg0);
13368 mode0 = insn_data[icode].operand[0].mode;
13370 /* If we got invalid arguments bail out before generating bad rtl. */
13371 if (arg0 == error_mark_node)
13372 return const0_rtx;
13374 if (TREE_CODE (arg0) != INTEGER_CST
13375 || TREE_INT_CST_LOW (arg0) & ~0x3)
13377 error ("argument to dss must be a 2-bit unsigned literal");
13378 return const0_rtx;
13381 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13382 op0 = copy_to_mode_reg (mode0, op0);
13384 emit_insn (gen_altivec_dss (op0));
13385 return NULL_RTX;
13387 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
13388 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
13389 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
13390 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
13391 case VSX_BUILTIN_VEC_INIT_V2DF:
13392 case VSX_BUILTIN_VEC_INIT_V2DI:
13393 case VSX_BUILTIN_VEC_INIT_V1TI:
13394 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
13396 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
13397 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
13398 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
13399 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
13400 case VSX_BUILTIN_VEC_SET_V2DF:
13401 case VSX_BUILTIN_VEC_SET_V2DI:
13402 case VSX_BUILTIN_VEC_SET_V1TI:
13403 return altivec_expand_vec_set_builtin (exp);
13405 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
13406 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
13407 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
13408 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
13409 case VSX_BUILTIN_VEC_EXT_V2DF:
13410 case VSX_BUILTIN_VEC_EXT_V2DI:
13411 case VSX_BUILTIN_VEC_EXT_V1TI:
13412 return altivec_expand_vec_ext_builtin (exp, target);
13414 default:
13415 break;
13416 /* Fall through. */
13419 /* Expand abs* operations. */
13420 d = bdesc_abs;
13421 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
13422 if (d->code == fcode)
13423 return altivec_expand_abs_builtin (d->icode, exp, target);
13425 /* Expand the AltiVec predicates. */
13426 d = bdesc_altivec_preds;
13427 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
13428 if (d->code == fcode)
13429 return altivec_expand_predicate_builtin (d->icode, exp, target);
13431 /* LV* are funky. We initialized them differently. */
13432 switch (fcode)
13434 case ALTIVEC_BUILTIN_LVSL:
13435 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
13436 exp, target, false);
13437 case ALTIVEC_BUILTIN_LVSR:
13438 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
13439 exp, target, false);
13440 case ALTIVEC_BUILTIN_LVEBX:
13441 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
13442 exp, target, false);
13443 case ALTIVEC_BUILTIN_LVEHX:
13444 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
13445 exp, target, false);
13446 case ALTIVEC_BUILTIN_LVEWX:
13447 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
13448 exp, target, false);
13449 case ALTIVEC_BUILTIN_LVXL_V2DF:
13450 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
13451 exp, target, false);
13452 case ALTIVEC_BUILTIN_LVXL_V2DI:
13453 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
13454 exp, target, false);
13455 case ALTIVEC_BUILTIN_LVXL_V4SF:
13456 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
13457 exp, target, false);
13458 case ALTIVEC_BUILTIN_LVXL:
13459 case ALTIVEC_BUILTIN_LVXL_V4SI:
13460 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
13461 exp, target, false);
13462 case ALTIVEC_BUILTIN_LVXL_V8HI:
13463 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
13464 exp, target, false);
13465 case ALTIVEC_BUILTIN_LVXL_V16QI:
13466 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
13467 exp, target, false);
13468 case ALTIVEC_BUILTIN_LVX_V2DF:
13469 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df,
13470 exp, target, false);
13471 case ALTIVEC_BUILTIN_LVX_V2DI:
13472 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di,
13473 exp, target, false);
13474 case ALTIVEC_BUILTIN_LVX_V4SF:
13475 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf,
13476 exp, target, false);
13477 case ALTIVEC_BUILTIN_LVX:
13478 case ALTIVEC_BUILTIN_LVX_V4SI:
13479 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si,
13480 exp, target, false);
13481 case ALTIVEC_BUILTIN_LVX_V8HI:
13482 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi,
13483 exp, target, false);
13484 case ALTIVEC_BUILTIN_LVX_V16QI:
13485 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi,
13486 exp, target, false);
13487 case ALTIVEC_BUILTIN_LVLX:
13488 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
13489 exp, target, true);
13490 case ALTIVEC_BUILTIN_LVLXL:
13491 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
13492 exp, target, true);
13493 case ALTIVEC_BUILTIN_LVRX:
13494 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
13495 exp, target, true);
13496 case ALTIVEC_BUILTIN_LVRXL:
13497 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
13498 exp, target, true);
13499 case VSX_BUILTIN_LXVD2X_V1TI:
13500 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
13501 exp, target, false);
13502 case VSX_BUILTIN_LXVD2X_V2DF:
13503 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
13504 exp, target, false);
13505 case VSX_BUILTIN_LXVD2X_V2DI:
13506 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
13507 exp, target, false);
13508 case VSX_BUILTIN_LXVW4X_V4SF:
13509 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
13510 exp, target, false);
13511 case VSX_BUILTIN_LXVW4X_V4SI:
13512 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
13513 exp, target, false);
13514 case VSX_BUILTIN_LXVW4X_V8HI:
13515 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
13516 exp, target, false);
13517 case VSX_BUILTIN_LXVW4X_V16QI:
13518 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
13519 exp, target, false);
13520 break;
13521 default:
13522 break;
13523 /* Fall through. */
13526 *expandedp = false;
13527 return NULL_RTX;
13530 /* Expand the builtin in EXP and store the result in TARGET. Store
13531 true in *EXPANDEDP if we found a builtin to expand. */
13532 static rtx
13533 paired_expand_builtin (tree exp, rtx target, bool * expandedp)
13535 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13536 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13537 const struct builtin_description *d;
13538 size_t i;
13540 *expandedp = true;
13542 switch (fcode)
13544 case PAIRED_BUILTIN_STX:
13545 return paired_expand_stv_builtin (CODE_FOR_paired_stx, exp);
13546 case PAIRED_BUILTIN_LX:
13547 return paired_expand_lv_builtin (CODE_FOR_paired_lx, exp, target);
13548 default:
13549 break;
13550 /* Fall through. */
13553 /* Expand the paired predicates. */
13554 d = bdesc_paired_preds;
13555 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); i++, d++)
13556 if (d->code == fcode)
13557 return paired_expand_predicate_builtin (d->icode, exp, target);
13559 *expandedp = false;
13560 return NULL_RTX;
13563 /* Binops that need to be initialized manually, but can be expanded
13564 automagically by rs6000_expand_binop_builtin. */
13565 static const struct builtin_description bdesc_2arg_spe[] =
13567 { RS6000_BTM_SPE, CODE_FOR_spe_evlddx, "__builtin_spe_evlddx", SPE_BUILTIN_EVLDDX },
13568 { RS6000_BTM_SPE, CODE_FOR_spe_evldwx, "__builtin_spe_evldwx", SPE_BUILTIN_EVLDWX },
13569 { RS6000_BTM_SPE, CODE_FOR_spe_evldhx, "__builtin_spe_evldhx", SPE_BUILTIN_EVLDHX },
13570 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhex, "__builtin_spe_evlwhex", SPE_BUILTIN_EVLWHEX },
13571 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhoux, "__builtin_spe_evlwhoux", SPE_BUILTIN_EVLWHOUX },
13572 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhosx, "__builtin_spe_evlwhosx", SPE_BUILTIN_EVLWHOSX },
13573 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplatx, "__builtin_spe_evlwwsplatx", SPE_BUILTIN_EVLWWSPLATX },
13574 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplatx, "__builtin_spe_evlwhsplatx", SPE_BUILTIN_EVLWHSPLATX },
13575 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplatx, "__builtin_spe_evlhhesplatx", SPE_BUILTIN_EVLHHESPLATX },
13576 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplatx, "__builtin_spe_evlhhousplatx", SPE_BUILTIN_EVLHHOUSPLATX },
13577 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplatx, "__builtin_spe_evlhhossplatx", SPE_BUILTIN_EVLHHOSSPLATX },
13578 { RS6000_BTM_SPE, CODE_FOR_spe_evldd, "__builtin_spe_evldd", SPE_BUILTIN_EVLDD },
13579 { RS6000_BTM_SPE, CODE_FOR_spe_evldw, "__builtin_spe_evldw", SPE_BUILTIN_EVLDW },
13580 { RS6000_BTM_SPE, CODE_FOR_spe_evldh, "__builtin_spe_evldh", SPE_BUILTIN_EVLDH },
13581 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhe, "__builtin_spe_evlwhe", SPE_BUILTIN_EVLWHE },
13582 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhou, "__builtin_spe_evlwhou", SPE_BUILTIN_EVLWHOU },
13583 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhos, "__builtin_spe_evlwhos", SPE_BUILTIN_EVLWHOS },
13584 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplat, "__builtin_spe_evlwwsplat", SPE_BUILTIN_EVLWWSPLAT },
13585 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplat, "__builtin_spe_evlwhsplat", SPE_BUILTIN_EVLWHSPLAT },
13586 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplat, "__builtin_spe_evlhhesplat", SPE_BUILTIN_EVLHHESPLAT },
13587 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplat, "__builtin_spe_evlhhousplat", SPE_BUILTIN_EVLHHOUSPLAT },
13588 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplat, "__builtin_spe_evlhhossplat", SPE_BUILTIN_EVLHHOSSPLAT }
13591 /* Expand the builtin in EXP and store the result in TARGET. Store
13592 true in *EXPANDEDP if we found a builtin to expand.
13594 This expands the SPE builtins that are not simple unary and binary
13595 operations. */
13596 static rtx
13597 spe_expand_builtin (tree exp, rtx target, bool *expandedp)
13599 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13600 tree arg1, arg0;
13601 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13602 enum insn_code icode;
13603 machine_mode tmode, mode0;
13604 rtx pat, op0;
13605 const struct builtin_description *d;
13606 size_t i;
13608 *expandedp = true;
13610 /* Syntax check for a 5-bit unsigned immediate. */
13611 switch (fcode)
13613 case SPE_BUILTIN_EVSTDD:
13614 case SPE_BUILTIN_EVSTDH:
13615 case SPE_BUILTIN_EVSTDW:
13616 case SPE_BUILTIN_EVSTWHE:
13617 case SPE_BUILTIN_EVSTWHO:
13618 case SPE_BUILTIN_EVSTWWE:
13619 case SPE_BUILTIN_EVSTWWO:
13620 arg1 = CALL_EXPR_ARG (exp, 2);
13621 if (TREE_CODE (arg1) != INTEGER_CST
13622 || TREE_INT_CST_LOW (arg1) & ~0x1f)
13624 error ("argument 2 must be a 5-bit unsigned literal");
13625 return const0_rtx;
13627 break;
13628 default:
13629 break;
13632 /* The evsplat*i instructions are not quite generic. */
13633 switch (fcode)
13635 case SPE_BUILTIN_EVSPLATFI:
13636 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplatfi,
13637 exp, target);
13638 case SPE_BUILTIN_EVSPLATI:
13639 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplati,
13640 exp, target);
13641 default:
13642 break;
13645 d = bdesc_2arg_spe;
13646 for (i = 0; i < ARRAY_SIZE (bdesc_2arg_spe); ++i, ++d)
13647 if (d->code == fcode)
13648 return rs6000_expand_binop_builtin (d->icode, exp, target);
13650 d = bdesc_spe_predicates;
13651 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, ++d)
13652 if (d->code == fcode)
13653 return spe_expand_predicate_builtin (d->icode, exp, target);
13655 d = bdesc_spe_evsel;
13656 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, ++d)
13657 if (d->code == fcode)
13658 return spe_expand_evsel_builtin (d->icode, exp, target);
13660 switch (fcode)
13662 case SPE_BUILTIN_EVSTDDX:
13663 return spe_expand_stv_builtin (CODE_FOR_spe_evstddx, exp);
13664 case SPE_BUILTIN_EVSTDHX:
13665 return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx, exp);
13666 case SPE_BUILTIN_EVSTDWX:
13667 return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx, exp);
13668 case SPE_BUILTIN_EVSTWHEX:
13669 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex, exp);
13670 case SPE_BUILTIN_EVSTWHOX:
13671 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox, exp);
13672 case SPE_BUILTIN_EVSTWWEX:
13673 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex, exp);
13674 case SPE_BUILTIN_EVSTWWOX:
13675 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox, exp);
13676 case SPE_BUILTIN_EVSTDD:
13677 return spe_expand_stv_builtin (CODE_FOR_spe_evstdd, exp);
13678 case SPE_BUILTIN_EVSTDH:
13679 return spe_expand_stv_builtin (CODE_FOR_spe_evstdh, exp);
13680 case SPE_BUILTIN_EVSTDW:
13681 return spe_expand_stv_builtin (CODE_FOR_spe_evstdw, exp);
13682 case SPE_BUILTIN_EVSTWHE:
13683 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe, exp);
13684 case SPE_BUILTIN_EVSTWHO:
13685 return spe_expand_stv_builtin (CODE_FOR_spe_evstwho, exp);
13686 case SPE_BUILTIN_EVSTWWE:
13687 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe, exp);
13688 case SPE_BUILTIN_EVSTWWO:
13689 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo, exp);
13690 case SPE_BUILTIN_MFSPEFSCR:
13691 icode = CODE_FOR_spe_mfspefscr;
13692 tmode = insn_data[icode].operand[0].mode;
13694 if (target == 0
13695 || GET_MODE (target) != tmode
13696 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13697 target = gen_reg_rtx (tmode);
13699 pat = GEN_FCN (icode) (target);
13700 if (! pat)
13701 return 0;
13702 emit_insn (pat);
13703 return target;
13704 case SPE_BUILTIN_MTSPEFSCR:
13705 icode = CODE_FOR_spe_mtspefscr;
13706 arg0 = CALL_EXPR_ARG (exp, 0);
13707 op0 = expand_normal (arg0);
13708 mode0 = insn_data[icode].operand[0].mode;
13710 if (arg0 == error_mark_node)
13711 return const0_rtx;
13713 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13714 op0 = copy_to_mode_reg (mode0, op0);
13716 pat = GEN_FCN (icode) (op0);
13717 if (pat)
13718 emit_insn (pat);
13719 return NULL_RTX;
13720 default:
13721 break;
13724 *expandedp = false;
13725 return NULL_RTX;
13728 static rtx
13729 paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
13731 rtx pat, scratch, tmp;
13732 tree form = CALL_EXPR_ARG (exp, 0);
13733 tree arg0 = CALL_EXPR_ARG (exp, 1);
13734 tree arg1 = CALL_EXPR_ARG (exp, 2);
13735 rtx op0 = expand_normal (arg0);
13736 rtx op1 = expand_normal (arg1);
13737 machine_mode mode0 = insn_data[icode].operand[1].mode;
13738 machine_mode mode1 = insn_data[icode].operand[2].mode;
13739 int form_int;
13740 enum rtx_code code;
13742 if (TREE_CODE (form) != INTEGER_CST)
13744 error ("argument 1 of __builtin_paired_predicate must be a constant");
13745 return const0_rtx;
13747 else
13748 form_int = TREE_INT_CST_LOW (form);
13750 gcc_assert (mode0 == mode1);
13752 if (arg0 == error_mark_node || arg1 == error_mark_node)
13753 return const0_rtx;
13755 if (target == 0
13756 || GET_MODE (target) != SImode
13757 || !(*insn_data[icode].operand[0].predicate) (target, SImode))
13758 target = gen_reg_rtx (SImode);
13759 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
13760 op0 = copy_to_mode_reg (mode0, op0);
13761 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
13762 op1 = copy_to_mode_reg (mode1, op1);
13764 scratch = gen_reg_rtx (CCFPmode);
13766 pat = GEN_FCN (icode) (scratch, op0, op1);
13767 if (!pat)
13768 return const0_rtx;
13770 emit_insn (pat);
13772 switch (form_int)
13774 /* LT bit. */
13775 case 0:
13776 code = LT;
13777 break;
13778 /* GT bit. */
13779 case 1:
13780 code = GT;
13781 break;
13782 /* EQ bit. */
13783 case 2:
13784 code = EQ;
13785 break;
13786 /* UN bit. */
13787 case 3:
13788 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
13789 return target;
13790 default:
13791 error ("argument 1 of __builtin_paired_predicate is out of range");
13792 return const0_rtx;
13795 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
13796 emit_move_insn (target, tmp);
13797 return target;
13800 static rtx
13801 spe_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
13803 rtx pat, scratch, tmp;
13804 tree form = CALL_EXPR_ARG (exp, 0);
13805 tree arg0 = CALL_EXPR_ARG (exp, 1);
13806 tree arg1 = CALL_EXPR_ARG (exp, 2);
13807 rtx op0 = expand_normal (arg0);
13808 rtx op1 = expand_normal (arg1);
13809 machine_mode mode0 = insn_data[icode].operand[1].mode;
13810 machine_mode mode1 = insn_data[icode].operand[2].mode;
13811 int form_int;
13812 enum rtx_code code;
13814 if (TREE_CODE (form) != INTEGER_CST)
13816 error ("argument 1 of __builtin_spe_predicate must be a constant");
13817 return const0_rtx;
13819 else
13820 form_int = TREE_INT_CST_LOW (form);
13822 gcc_assert (mode0 == mode1);
13824 if (arg0 == error_mark_node || arg1 == error_mark_node)
13825 return const0_rtx;
13827 if (target == 0
13828 || GET_MODE (target) != SImode
13829 || ! (*insn_data[icode].operand[0].predicate) (target, SImode))
13830 target = gen_reg_rtx (SImode);
13832 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13833 op0 = copy_to_mode_reg (mode0, op0);
13834 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13835 op1 = copy_to_mode_reg (mode1, op1);
13837 scratch = gen_reg_rtx (CCmode);
13839 pat = GEN_FCN (icode) (scratch, op0, op1);
13840 if (! pat)
13841 return const0_rtx;
13842 emit_insn (pat);
13844 /* There are 4 variants for each predicate: _any_, _all_, _upper_,
13845 _lower_. We use one compare, but look in different bits of the
13846 CR for each variant.
13848 There are 2 elements in each SPE simd type (upper/lower). The CR
13849 bits are set as follows:
13851 BIT0 | BIT 1 | BIT 2 | BIT 3
13852 U | L | (U | L) | (U & L)
13854 So, for an "all" relationship, BIT 3 would be set.
13855 For an "any" relationship, BIT 2 would be set. Etc.
13857 Following traditional nomenclature, these bits map to:
13859 BIT0 | BIT 1 | BIT 2 | BIT 3
13860 LT | GT | EQ | OV
13862 Later, we will generate rtl to look in the LT/EQ/EQ/OV bits.
13865 switch (form_int)
13867 /* All variant. OV bit. */
13868 case 0:
13869 /* We need to get to the OV bit, which is the ORDERED bit. We
13870 could generate (ordered:SI (reg:CC xx) (const_int 0)), but
13871 that's ugly and will make validate_condition_mode die.
13872 So let's just use another pattern. */
13873 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
13874 return target;
13875 /* Any variant. EQ bit. */
13876 case 1:
13877 code = EQ;
13878 break;
13879 /* Upper variant. LT bit. */
13880 case 2:
13881 code = LT;
13882 break;
13883 /* Lower variant. GT bit. */
13884 case 3:
13885 code = GT;
13886 break;
13887 default:
13888 error ("argument 1 of __builtin_spe_predicate is out of range");
13889 return const0_rtx;
13892 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
13893 emit_move_insn (target, tmp);
13895 return target;
13898 /* The evsel builtins look like this:
13900 e = __builtin_spe_evsel_OP (a, b, c, d);
13902 and work like this:
13904 e[upper] = a[upper] *OP* b[upper] ? c[upper] : d[upper];
13905 e[lower] = a[lower] *OP* b[lower] ? c[lower] : d[lower];
13908 static rtx
13909 spe_expand_evsel_builtin (enum insn_code icode, tree exp, rtx target)
13911 rtx pat, scratch;
13912 tree arg0 = CALL_EXPR_ARG (exp, 0);
13913 tree arg1 = CALL_EXPR_ARG (exp, 1);
13914 tree arg2 = CALL_EXPR_ARG (exp, 2);
13915 tree arg3 = CALL_EXPR_ARG (exp, 3);
13916 rtx op0 = expand_normal (arg0);
13917 rtx op1 = expand_normal (arg1);
13918 rtx op2 = expand_normal (arg2);
13919 rtx op3 = expand_normal (arg3);
13920 machine_mode mode0 = insn_data[icode].operand[1].mode;
13921 machine_mode mode1 = insn_data[icode].operand[2].mode;
13923 gcc_assert (mode0 == mode1);
13925 if (arg0 == error_mark_node || arg1 == error_mark_node
13926 || arg2 == error_mark_node || arg3 == error_mark_node)
13927 return const0_rtx;
13929 if (target == 0
13930 || GET_MODE (target) != mode0
13931 || ! (*insn_data[icode].operand[0].predicate) (target, mode0))
13932 target = gen_reg_rtx (mode0);
13934 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13935 op0 = copy_to_mode_reg (mode0, op0);
13936 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13937 op1 = copy_to_mode_reg (mode0, op1);
13938 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
13939 op2 = copy_to_mode_reg (mode0, op2);
13940 if (! (*insn_data[icode].operand[1].predicate) (op3, mode1))
13941 op3 = copy_to_mode_reg (mode0, op3);
13943 /* Generate the compare. */
13944 scratch = gen_reg_rtx (CCmode);
13945 pat = GEN_FCN (icode) (scratch, op0, op1);
13946 if (! pat)
13947 return const0_rtx;
13948 emit_insn (pat);
13950 if (mode0 == V2SImode)
13951 emit_insn (gen_spe_evsel (target, op2, op3, scratch));
13952 else
13953 emit_insn (gen_spe_evsel_fs (target, op2, op3, scratch));
13955 return target;
13958 /* Raise an error message for a builtin function that is called without the
13959 appropriate target options being set. */
13961 static void
13962 rs6000_invalid_builtin (enum rs6000_builtins fncode)
13964 size_t uns_fncode = (size_t)fncode;
13965 const char *name = rs6000_builtin_info[uns_fncode].name;
13966 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
13968 gcc_assert (name != NULL);
13969 if ((fnmask & RS6000_BTM_CELL) != 0)
13970 error ("Builtin function %s is only valid for the cell processor", name);
13971 else if ((fnmask & RS6000_BTM_VSX) != 0)
13972 error ("Builtin function %s requires the -mvsx option", name);
13973 else if ((fnmask & RS6000_BTM_HTM) != 0)
13974 error ("Builtin function %s requires the -mhtm option", name);
13975 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
13976 error ("Builtin function %s requires the -maltivec option", name);
13977 else if ((fnmask & RS6000_BTM_PAIRED) != 0)
13978 error ("Builtin function %s requires the -mpaired option", name);
13979 else if ((fnmask & RS6000_BTM_SPE) != 0)
13980 error ("Builtin function %s requires the -mspe option", name);
13981 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
13982 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
13983 error ("Builtin function %s requires the -mhard-dfp and"
13984 " -mpower8-vector options", name);
13985 else if ((fnmask & RS6000_BTM_DFP) != 0)
13986 error ("Builtin function %s requires the -mhard-dfp option", name);
13987 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
13988 error ("Builtin function %s requires the -mpower8-vector option", name);
13989 else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
13990 == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
13991 error ("Builtin function %s requires the -mhard-float and"
13992 " -mlong-double-128 options", name);
13993 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
13994 error ("Builtin function %s requires the -mhard-float option", name);
13995 else
13996 error ("Builtin function %s is not supported with the current options",
13997 name);
14000 /* Expand an expression EXP that calls a built-in function,
14001 with result going to TARGET if that's convenient
14002 (and in mode MODE if that's convenient).
14003 SUBTARGET may be used as the target for computing one of EXP's operands.
14004 IGNORE is nonzero if the value is to be ignored. */
14006 static rtx
14007 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
14008 machine_mode mode ATTRIBUTE_UNUSED,
14009 int ignore ATTRIBUTE_UNUSED)
14011 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14012 enum rs6000_builtins fcode
14013 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
14014 size_t uns_fcode = (size_t)fcode;
14015 const struct builtin_description *d;
14016 size_t i;
14017 rtx ret;
14018 bool success;
14019 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
14020 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
14022 if (TARGET_DEBUG_BUILTIN)
14024 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
14025 const char *name1 = rs6000_builtin_info[uns_fcode].name;
14026 const char *name2 = ((icode != CODE_FOR_nothing)
14027 ? get_insn_name ((int)icode)
14028 : "nothing");
14029 const char *name3;
14031 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
14033 default: name3 = "unknown"; break;
14034 case RS6000_BTC_SPECIAL: name3 = "special"; break;
14035 case RS6000_BTC_UNARY: name3 = "unary"; break;
14036 case RS6000_BTC_BINARY: name3 = "binary"; break;
14037 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
14038 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
14039 case RS6000_BTC_ABS: name3 = "abs"; break;
14040 case RS6000_BTC_EVSEL: name3 = "evsel"; break;
14041 case RS6000_BTC_DST: name3 = "dst"; break;
14045 fprintf (stderr,
14046 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
14047 (name1) ? name1 : "---", fcode,
14048 (name2) ? name2 : "---", (int)icode,
14049 name3,
14050 func_valid_p ? "" : ", not valid");
14053 if (!func_valid_p)
14055 rs6000_invalid_builtin (fcode);
14057 /* Given it is invalid, just generate a normal call. */
14058 return expand_call (exp, target, ignore);
14061 switch (fcode)
14063 case RS6000_BUILTIN_RECIP:
14064 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
14066 case RS6000_BUILTIN_RECIPF:
14067 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
14069 case RS6000_BUILTIN_RSQRTF:
14070 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
14072 case RS6000_BUILTIN_RSQRT:
14073 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
14075 case POWER7_BUILTIN_BPERMD:
14076 return rs6000_expand_binop_builtin (((TARGET_64BIT)
14077 ? CODE_FOR_bpermd_di
14078 : CODE_FOR_bpermd_si), exp, target);
14080 case RS6000_BUILTIN_GET_TB:
14081 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
14082 target);
14084 case RS6000_BUILTIN_MFTB:
14085 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
14086 ? CODE_FOR_rs6000_mftb_di
14087 : CODE_FOR_rs6000_mftb_si),
14088 target);
14090 case RS6000_BUILTIN_MFFS:
14091 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
14093 case RS6000_BUILTIN_MTFSF:
14094 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
14096 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
14097 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
14099 int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
14100 : (int) CODE_FOR_altivec_lvsl_direct);
14101 machine_mode tmode = insn_data[icode].operand[0].mode;
14102 machine_mode mode = insn_data[icode].operand[1].mode;
14103 tree arg;
14104 rtx op, addr, pat;
14106 gcc_assert (TARGET_ALTIVEC);
14108 arg = CALL_EXPR_ARG (exp, 0);
14109 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
14110 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
14111 addr = memory_address (mode, op);
14112 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
14113 op = addr;
14114 else
14116 /* For the load case need to negate the address. */
14117 op = gen_reg_rtx (GET_MODE (addr));
14118 emit_insn (gen_rtx_SET (VOIDmode, op,
14119 gen_rtx_NEG (GET_MODE (addr), addr)));
14121 op = gen_rtx_MEM (mode, op);
14123 if (target == 0
14124 || GET_MODE (target) != tmode
14125 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14126 target = gen_reg_rtx (tmode);
14128 pat = GEN_FCN (icode) (target, op);
14129 if (!pat)
14130 return 0;
14131 emit_insn (pat);
14133 return target;
14136 case ALTIVEC_BUILTIN_VCFUX:
14137 case ALTIVEC_BUILTIN_VCFSX:
14138 case ALTIVEC_BUILTIN_VCTUXS:
14139 case ALTIVEC_BUILTIN_VCTSXS:
14140 /* FIXME: There's got to be a nicer way to handle this case than
14141 constructing a new CALL_EXPR. */
14142 if (call_expr_nargs (exp) == 1)
14144 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
14145 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
14147 break;
14149 default:
14150 break;
14153 if (TARGET_ALTIVEC)
14155 ret = altivec_expand_builtin (exp, target, &success);
14157 if (success)
14158 return ret;
14160 if (TARGET_SPE)
14162 ret = spe_expand_builtin (exp, target, &success);
14164 if (success)
14165 return ret;
14167 if (TARGET_PAIRED_FLOAT)
14169 ret = paired_expand_builtin (exp, target, &success);
14171 if (success)
14172 return ret;
14174 if (TARGET_HTM)
14176 ret = htm_expand_builtin (exp, target, &success);
14178 if (success)
14179 return ret;
14182 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
14183 gcc_assert (attr == RS6000_BTC_UNARY
14184 || attr == RS6000_BTC_BINARY
14185 || attr == RS6000_BTC_TERNARY);
14187 /* Handle simple unary operations. */
14188 d = bdesc_1arg;
14189 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14190 if (d->code == fcode)
14191 return rs6000_expand_unop_builtin (d->icode, exp, target);
14193 /* Handle simple binary operations. */
14194 d = bdesc_2arg;
14195 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14196 if (d->code == fcode)
14197 return rs6000_expand_binop_builtin (d->icode, exp, target);
14199 /* Handle simple ternary operations. */
14200 d = bdesc_3arg;
14201 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
14202 if (d->code == fcode)
14203 return rs6000_expand_ternop_builtin (d->icode, exp, target);
14205 gcc_unreachable ();
14208 static void
14209 rs6000_init_builtins (void)
14211 tree tdecl;
14212 tree ftype;
14213 machine_mode mode;
14215 if (TARGET_DEBUG_BUILTIN)
14216 fprintf (stderr, "rs6000_init_builtins%s%s%s%s\n",
14217 (TARGET_PAIRED_FLOAT) ? ", paired" : "",
14218 (TARGET_SPE) ? ", spe" : "",
14219 (TARGET_ALTIVEC) ? ", altivec" : "",
14220 (TARGET_VSX) ? ", vsx" : "");
14222 V2SI_type_node = build_vector_type (intSI_type_node, 2);
14223 V2SF_type_node = build_vector_type (float_type_node, 2);
14224 V2DI_type_node = build_vector_type (intDI_type_node, 2);
14225 V2DF_type_node = build_vector_type (double_type_node, 2);
14226 V4HI_type_node = build_vector_type (intHI_type_node, 4);
14227 V4SI_type_node = build_vector_type (intSI_type_node, 4);
14228 V4SF_type_node = build_vector_type (float_type_node, 4);
14229 V8HI_type_node = build_vector_type (intHI_type_node, 8);
14230 V16QI_type_node = build_vector_type (intQI_type_node, 16);
14232 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
14233 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
14234 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
14235 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
14237 opaque_V2SF_type_node = build_opaque_vector_type (float_type_node, 2);
14238 opaque_V2SI_type_node = build_opaque_vector_type (intSI_type_node, 2);
14239 opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node);
14240 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
14242 /* We use V1TI mode as a special container to hold __int128_t items that
14243 must live in VSX registers. */
14244 if (intTI_type_node)
14246 V1TI_type_node = build_vector_type (intTI_type_node, 1);
14247 unsigned_V1TI_type_node = build_vector_type (unsigned_intTI_type_node, 1);
14250 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
14251 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
14252 'vector unsigned short'. */
14254 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
14255 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
14256 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
14257 bool_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
14258 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
14260 long_integer_type_internal_node = long_integer_type_node;
14261 long_unsigned_type_internal_node = long_unsigned_type_node;
14262 long_long_integer_type_internal_node = long_long_integer_type_node;
14263 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
14264 intQI_type_internal_node = intQI_type_node;
14265 uintQI_type_internal_node = unsigned_intQI_type_node;
14266 intHI_type_internal_node = intHI_type_node;
14267 uintHI_type_internal_node = unsigned_intHI_type_node;
14268 intSI_type_internal_node = intSI_type_node;
14269 uintSI_type_internal_node = unsigned_intSI_type_node;
14270 intDI_type_internal_node = intDI_type_node;
14271 uintDI_type_internal_node = unsigned_intDI_type_node;
14272 intTI_type_internal_node = intTI_type_node;
14273 uintTI_type_internal_node = unsigned_intTI_type_node;
14274 float_type_internal_node = float_type_node;
14275 double_type_internal_node = double_type_node;
14276 long_double_type_internal_node = long_double_type_node;
14277 dfloat64_type_internal_node = dfloat64_type_node;
14278 dfloat128_type_internal_node = dfloat128_type_node;
14279 void_type_internal_node = void_type_node;
14281 /* Initialize the modes for builtin_function_type, mapping a machine mode to
14282 tree type node. */
14283 builtin_mode_to_type[QImode][0] = integer_type_node;
14284 builtin_mode_to_type[HImode][0] = integer_type_node;
14285 builtin_mode_to_type[SImode][0] = intSI_type_node;
14286 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
14287 builtin_mode_to_type[DImode][0] = intDI_type_node;
14288 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
14289 builtin_mode_to_type[TImode][0] = intTI_type_node;
14290 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
14291 builtin_mode_to_type[SFmode][0] = float_type_node;
14292 builtin_mode_to_type[DFmode][0] = double_type_node;
14293 builtin_mode_to_type[TFmode][0] = long_double_type_node;
14294 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
14295 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
14296 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
14297 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
14298 builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
14299 builtin_mode_to_type[V2SFmode][0] = V2SF_type_node;
14300 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
14301 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
14302 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
14303 builtin_mode_to_type[V4HImode][0] = V4HI_type_node;
14304 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
14305 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
14306 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
14307 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
14308 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
14309 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
14310 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
14312 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
14313 TYPE_NAME (bool_char_type_node) = tdecl;
14315 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
14316 TYPE_NAME (bool_short_type_node) = tdecl;
14318 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
14319 TYPE_NAME (bool_int_type_node) = tdecl;
14321 tdecl = add_builtin_type ("__pixel", pixel_type_node);
14322 TYPE_NAME (pixel_type_node) = tdecl;
14324 bool_V16QI_type_node = build_vector_type (bool_char_type_node, 16);
14325 bool_V8HI_type_node = build_vector_type (bool_short_type_node, 8);
14326 bool_V4SI_type_node = build_vector_type (bool_int_type_node, 4);
14327 bool_V2DI_type_node = build_vector_type (bool_long_type_node, 2);
14328 pixel_V8HI_type_node = build_vector_type (pixel_type_node, 8);
14330 tdecl = add_builtin_type ("__vector unsigned char", unsigned_V16QI_type_node);
14331 TYPE_NAME (unsigned_V16QI_type_node) = tdecl;
14333 tdecl = add_builtin_type ("__vector signed char", V16QI_type_node);
14334 TYPE_NAME (V16QI_type_node) = tdecl;
14336 tdecl = add_builtin_type ("__vector __bool char", bool_V16QI_type_node);
14337 TYPE_NAME ( bool_V16QI_type_node) = tdecl;
14339 tdecl = add_builtin_type ("__vector unsigned short", unsigned_V8HI_type_node);
14340 TYPE_NAME (unsigned_V8HI_type_node) = tdecl;
14342 tdecl = add_builtin_type ("__vector signed short", V8HI_type_node);
14343 TYPE_NAME (V8HI_type_node) = tdecl;
14345 tdecl = add_builtin_type ("__vector __bool short", bool_V8HI_type_node);
14346 TYPE_NAME (bool_V8HI_type_node) = tdecl;
14348 tdecl = add_builtin_type ("__vector unsigned int", unsigned_V4SI_type_node);
14349 TYPE_NAME (unsigned_V4SI_type_node) = tdecl;
14351 tdecl = add_builtin_type ("__vector signed int", V4SI_type_node);
14352 TYPE_NAME (V4SI_type_node) = tdecl;
14354 tdecl = add_builtin_type ("__vector __bool int", bool_V4SI_type_node);
14355 TYPE_NAME (bool_V4SI_type_node) = tdecl;
14357 tdecl = add_builtin_type ("__vector float", V4SF_type_node);
14358 TYPE_NAME (V4SF_type_node) = tdecl;
14360 tdecl = add_builtin_type ("__vector __pixel", pixel_V8HI_type_node);
14361 TYPE_NAME (pixel_V8HI_type_node) = tdecl;
14363 tdecl = add_builtin_type ("__vector double", V2DF_type_node);
14364 TYPE_NAME (V2DF_type_node) = tdecl;
14366 if (TARGET_POWERPC64)
14368 tdecl = add_builtin_type ("__vector long", V2DI_type_node);
14369 TYPE_NAME (V2DI_type_node) = tdecl;
14371 tdecl = add_builtin_type ("__vector unsigned long",
14372 unsigned_V2DI_type_node);
14373 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
14375 tdecl = add_builtin_type ("__vector __bool long", bool_V2DI_type_node);
14376 TYPE_NAME (bool_V2DI_type_node) = tdecl;
14378 else
14380 tdecl = add_builtin_type ("__vector long long", V2DI_type_node);
14381 TYPE_NAME (V2DI_type_node) = tdecl;
14383 tdecl = add_builtin_type ("__vector unsigned long long",
14384 unsigned_V2DI_type_node);
14385 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
14387 tdecl = add_builtin_type ("__vector __bool long long",
14388 bool_V2DI_type_node);
14389 TYPE_NAME (bool_V2DI_type_node) = tdecl;
14392 if (V1TI_type_node)
14394 tdecl = add_builtin_type ("__vector __int128", V1TI_type_node);
14395 TYPE_NAME (V1TI_type_node) = tdecl;
14397 tdecl = add_builtin_type ("__vector unsigned __int128",
14398 unsigned_V1TI_type_node);
14399 TYPE_NAME (unsigned_V1TI_type_node) = tdecl;
14402 /* Paired and SPE builtins are only available if you build a compiler with
14403 the appropriate options, so only create those builtins with the
14404 appropriate compiler option. Create Altivec and VSX builtins on machines
14405 with at least the general purpose extensions (970 and newer) to allow the
14406 use of the target attribute. */
14407 if (TARGET_PAIRED_FLOAT)
14408 paired_init_builtins ();
14409 if (TARGET_SPE)
14410 spe_init_builtins ();
14411 if (TARGET_EXTRA_BUILTINS)
14412 altivec_init_builtins ();
14413 if (TARGET_HTM)
14414 htm_init_builtins ();
14416 if (TARGET_EXTRA_BUILTINS || TARGET_SPE || TARGET_PAIRED_FLOAT)
14417 rs6000_common_init_builtins ();
14419 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
14420 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
14421 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
14423 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
14424 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
14425 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
14427 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
14428 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
14429 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
14431 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
14432 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
14433 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
14435 mode = (TARGET_64BIT) ? DImode : SImode;
14436 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
14437 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
14438 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
14440 ftype = build_function_type_list (unsigned_intDI_type_node,
14441 NULL_TREE);
14442 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
14444 if (TARGET_64BIT)
14445 ftype = build_function_type_list (unsigned_intDI_type_node,
14446 NULL_TREE);
14447 else
14448 ftype = build_function_type_list (unsigned_intSI_type_node,
14449 NULL_TREE);
14450 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
14452 ftype = build_function_type_list (double_type_node, NULL_TREE);
14453 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
14455 ftype = build_function_type_list (void_type_node,
14456 intSI_type_node, double_type_node,
14457 NULL_TREE);
14458 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
14460 #if TARGET_XCOFF
14461 /* AIX libm provides clog as __clog. */
14462 if ((tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
14463 set_user_assembler_name (tdecl, "__clog");
14464 #endif
14466 #ifdef SUBTARGET_INIT_BUILTINS
14467 SUBTARGET_INIT_BUILTINS;
14468 #endif
14471 /* Returns the rs6000 builtin decl for CODE. */
14473 static tree
14474 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
14476 HOST_WIDE_INT fnmask;
14478 if (code >= RS6000_BUILTIN_COUNT)
14479 return error_mark_node;
14481 fnmask = rs6000_builtin_info[code].mask;
14482 if ((fnmask & rs6000_builtin_mask) != fnmask)
14484 rs6000_invalid_builtin ((enum rs6000_builtins)code);
14485 return error_mark_node;
14488 return rs6000_builtin_decls[code];
14491 static void
14492 spe_init_builtins (void)
14494 tree puint_type_node = build_pointer_type (unsigned_type_node);
14495 tree pushort_type_node = build_pointer_type (short_unsigned_type_node);
14496 const struct builtin_description *d;
14497 size_t i;
14499 tree v2si_ftype_4_v2si
14500 = build_function_type_list (opaque_V2SI_type_node,
14501 opaque_V2SI_type_node,
14502 opaque_V2SI_type_node,
14503 opaque_V2SI_type_node,
14504 opaque_V2SI_type_node,
14505 NULL_TREE);
14507 tree v2sf_ftype_4_v2sf
14508 = build_function_type_list (opaque_V2SF_type_node,
14509 opaque_V2SF_type_node,
14510 opaque_V2SF_type_node,
14511 opaque_V2SF_type_node,
14512 opaque_V2SF_type_node,
14513 NULL_TREE);
14515 tree int_ftype_int_v2si_v2si
14516 = build_function_type_list (integer_type_node,
14517 integer_type_node,
14518 opaque_V2SI_type_node,
14519 opaque_V2SI_type_node,
14520 NULL_TREE);
14522 tree int_ftype_int_v2sf_v2sf
14523 = build_function_type_list (integer_type_node,
14524 integer_type_node,
14525 opaque_V2SF_type_node,
14526 opaque_V2SF_type_node,
14527 NULL_TREE);
14529 tree void_ftype_v2si_puint_int
14530 = build_function_type_list (void_type_node,
14531 opaque_V2SI_type_node,
14532 puint_type_node,
14533 integer_type_node,
14534 NULL_TREE);
14536 tree void_ftype_v2si_puint_char
14537 = build_function_type_list (void_type_node,
14538 opaque_V2SI_type_node,
14539 puint_type_node,
14540 char_type_node,
14541 NULL_TREE);
14543 tree void_ftype_v2si_pv2si_int
14544 = build_function_type_list (void_type_node,
14545 opaque_V2SI_type_node,
14546 opaque_p_V2SI_type_node,
14547 integer_type_node,
14548 NULL_TREE);
14550 tree void_ftype_v2si_pv2si_char
14551 = build_function_type_list (void_type_node,
14552 opaque_V2SI_type_node,
14553 opaque_p_V2SI_type_node,
14554 char_type_node,
14555 NULL_TREE);
14557 tree void_ftype_int
14558 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
14560 tree int_ftype_void
14561 = build_function_type_list (integer_type_node, NULL_TREE);
14563 tree v2si_ftype_pv2si_int
14564 = build_function_type_list (opaque_V2SI_type_node,
14565 opaque_p_V2SI_type_node,
14566 integer_type_node,
14567 NULL_TREE);
14569 tree v2si_ftype_puint_int
14570 = build_function_type_list (opaque_V2SI_type_node,
14571 puint_type_node,
14572 integer_type_node,
14573 NULL_TREE);
14575 tree v2si_ftype_pushort_int
14576 = build_function_type_list (opaque_V2SI_type_node,
14577 pushort_type_node,
14578 integer_type_node,
14579 NULL_TREE);
14581 tree v2si_ftype_signed_char
14582 = build_function_type_list (opaque_V2SI_type_node,
14583 signed_char_type_node,
14584 NULL_TREE);
14586 add_builtin_type ("__ev64_opaque__", opaque_V2SI_type_node);
14588 /* Initialize irregular SPE builtins. */
14590 def_builtin ("__builtin_spe_mtspefscr", void_ftype_int, SPE_BUILTIN_MTSPEFSCR);
14591 def_builtin ("__builtin_spe_mfspefscr", int_ftype_void, SPE_BUILTIN_MFSPEFSCR);
14592 def_builtin ("__builtin_spe_evstddx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDDX);
14593 def_builtin ("__builtin_spe_evstdhx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDHX);
14594 def_builtin ("__builtin_spe_evstdwx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDWX);
14595 def_builtin ("__builtin_spe_evstwhex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHEX);
14596 def_builtin ("__builtin_spe_evstwhox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHOX);
14597 def_builtin ("__builtin_spe_evstwwex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWEX);
14598 def_builtin ("__builtin_spe_evstwwox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWOX);
14599 def_builtin ("__builtin_spe_evstdd", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDD);
14600 def_builtin ("__builtin_spe_evstdh", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDH);
14601 def_builtin ("__builtin_spe_evstdw", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDW);
14602 def_builtin ("__builtin_spe_evstwhe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHE);
14603 def_builtin ("__builtin_spe_evstwho", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHO);
14604 def_builtin ("__builtin_spe_evstwwe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWE);
14605 def_builtin ("__builtin_spe_evstwwo", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWO);
14606 def_builtin ("__builtin_spe_evsplatfi", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATFI);
14607 def_builtin ("__builtin_spe_evsplati", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATI);
14609 /* Loads. */
14610 def_builtin ("__builtin_spe_evlddx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDDX);
14611 def_builtin ("__builtin_spe_evldwx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDWX);
14612 def_builtin ("__builtin_spe_evldhx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDHX);
14613 def_builtin ("__builtin_spe_evlwhex", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHEX);
14614 def_builtin ("__builtin_spe_evlwhoux", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOUX);
14615 def_builtin ("__builtin_spe_evlwhosx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOSX);
14616 def_builtin ("__builtin_spe_evlwwsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLATX);
14617 def_builtin ("__builtin_spe_evlwhsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLATX);
14618 def_builtin ("__builtin_spe_evlhhesplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLATX);
14619 def_builtin ("__builtin_spe_evlhhousplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLATX);
14620 def_builtin ("__builtin_spe_evlhhossplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLATX);
14621 def_builtin ("__builtin_spe_evldd", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDD);
14622 def_builtin ("__builtin_spe_evldw", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDW);
14623 def_builtin ("__builtin_spe_evldh", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDH);
14624 def_builtin ("__builtin_spe_evlhhesplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLAT);
14625 def_builtin ("__builtin_spe_evlhhossplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLAT);
14626 def_builtin ("__builtin_spe_evlhhousplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLAT);
14627 def_builtin ("__builtin_spe_evlwhe", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHE);
14628 def_builtin ("__builtin_spe_evlwhos", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOS);
14629 def_builtin ("__builtin_spe_evlwhou", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOU);
14630 def_builtin ("__builtin_spe_evlwhsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLAT);
14631 def_builtin ("__builtin_spe_evlwwsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLAT);
14633 /* Predicates. */
14634 d = bdesc_spe_predicates;
14635 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, d++)
14637 tree type;
14639 switch (insn_data[d->icode].operand[1].mode)
14641 case V2SImode:
14642 type = int_ftype_int_v2si_v2si;
14643 break;
14644 case V2SFmode:
14645 type = int_ftype_int_v2sf_v2sf;
14646 break;
14647 default:
14648 gcc_unreachable ();
14651 def_builtin (d->name, type, d->code);
14654 /* Evsel predicates. */
14655 d = bdesc_spe_evsel;
14656 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, d++)
14658 tree type;
14660 switch (insn_data[d->icode].operand[1].mode)
14662 case V2SImode:
14663 type = v2si_ftype_4_v2si;
14664 break;
14665 case V2SFmode:
14666 type = v2sf_ftype_4_v2sf;
14667 break;
14668 default:
14669 gcc_unreachable ();
14672 def_builtin (d->name, type, d->code);
14676 static void
14677 paired_init_builtins (void)
14679 const struct builtin_description *d;
14680 size_t i;
14682 tree int_ftype_int_v2sf_v2sf
14683 = build_function_type_list (integer_type_node,
14684 integer_type_node,
14685 V2SF_type_node,
14686 V2SF_type_node,
14687 NULL_TREE);
14688 tree pcfloat_type_node =
14689 build_pointer_type (build_qualified_type
14690 (float_type_node, TYPE_QUAL_CONST));
14692 tree v2sf_ftype_long_pcfloat = build_function_type_list (V2SF_type_node,
14693 long_integer_type_node,
14694 pcfloat_type_node,
14695 NULL_TREE);
14696 tree void_ftype_v2sf_long_pcfloat =
14697 build_function_type_list (void_type_node,
14698 V2SF_type_node,
14699 long_integer_type_node,
14700 pcfloat_type_node,
14701 NULL_TREE);
14704 def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat,
14705 PAIRED_BUILTIN_LX);
14708 def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat,
14709 PAIRED_BUILTIN_STX);
14711 /* Predicates. */
14712 d = bdesc_paired_preds;
14713 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); ++i, d++)
14715 tree type;
14717 if (TARGET_DEBUG_BUILTIN)
14718 fprintf (stderr, "paired pred #%d, insn = %s [%d], mode = %s\n",
14719 (int)i, get_insn_name (d->icode), (int)d->icode,
14720 GET_MODE_NAME (insn_data[d->icode].operand[1].mode));
14722 switch (insn_data[d->icode].operand[1].mode)
14724 case V2SFmode:
14725 type = int_ftype_int_v2sf_v2sf;
14726 break;
14727 default:
14728 gcc_unreachable ();
14731 def_builtin (d->name, type, d->code);
14735 static void
14736 altivec_init_builtins (void)
14738 const struct builtin_description *d;
14739 size_t i;
14740 tree ftype;
14741 tree decl;
14743 tree pvoid_type_node = build_pointer_type (void_type_node);
14745 tree pcvoid_type_node
14746 = build_pointer_type (build_qualified_type (void_type_node,
14747 TYPE_QUAL_CONST));
14749 tree int_ftype_opaque
14750 = build_function_type_list (integer_type_node,
14751 opaque_V4SI_type_node, NULL_TREE);
14752 tree opaque_ftype_opaque
14753 = build_function_type_list (integer_type_node, NULL_TREE);
14754 tree opaque_ftype_opaque_int
14755 = build_function_type_list (opaque_V4SI_type_node,
14756 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
14757 tree opaque_ftype_opaque_opaque_int
14758 = build_function_type_list (opaque_V4SI_type_node,
14759 opaque_V4SI_type_node, opaque_V4SI_type_node,
14760 integer_type_node, NULL_TREE);
14761 tree int_ftype_int_opaque_opaque
14762 = build_function_type_list (integer_type_node,
14763 integer_type_node, opaque_V4SI_type_node,
14764 opaque_V4SI_type_node, NULL_TREE);
14765 tree int_ftype_int_v4si_v4si
14766 = build_function_type_list (integer_type_node,
14767 integer_type_node, V4SI_type_node,
14768 V4SI_type_node, NULL_TREE);
14769 tree int_ftype_int_v2di_v2di
14770 = build_function_type_list (integer_type_node,
14771 integer_type_node, V2DI_type_node,
14772 V2DI_type_node, NULL_TREE);
14773 tree void_ftype_v4si
14774 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
14775 tree v8hi_ftype_void
14776 = build_function_type_list (V8HI_type_node, NULL_TREE);
14777 tree void_ftype_void
14778 = build_function_type_list (void_type_node, NULL_TREE);
14779 tree void_ftype_int
14780 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
14782 tree opaque_ftype_long_pcvoid
14783 = build_function_type_list (opaque_V4SI_type_node,
14784 long_integer_type_node, pcvoid_type_node,
14785 NULL_TREE);
14786 tree v16qi_ftype_long_pcvoid
14787 = build_function_type_list (V16QI_type_node,
14788 long_integer_type_node, pcvoid_type_node,
14789 NULL_TREE);
14790 tree v8hi_ftype_long_pcvoid
14791 = build_function_type_list (V8HI_type_node,
14792 long_integer_type_node, pcvoid_type_node,
14793 NULL_TREE);
14794 tree v4si_ftype_long_pcvoid
14795 = build_function_type_list (V4SI_type_node,
14796 long_integer_type_node, pcvoid_type_node,
14797 NULL_TREE);
14798 tree v4sf_ftype_long_pcvoid
14799 = build_function_type_list (V4SF_type_node,
14800 long_integer_type_node, pcvoid_type_node,
14801 NULL_TREE);
14802 tree v2df_ftype_long_pcvoid
14803 = build_function_type_list (V2DF_type_node,
14804 long_integer_type_node, pcvoid_type_node,
14805 NULL_TREE);
14806 tree v2di_ftype_long_pcvoid
14807 = build_function_type_list (V2DI_type_node,
14808 long_integer_type_node, pcvoid_type_node,
14809 NULL_TREE);
14811 tree void_ftype_opaque_long_pvoid
14812 = build_function_type_list (void_type_node,
14813 opaque_V4SI_type_node, long_integer_type_node,
14814 pvoid_type_node, NULL_TREE);
14815 tree void_ftype_v4si_long_pvoid
14816 = build_function_type_list (void_type_node,
14817 V4SI_type_node, long_integer_type_node,
14818 pvoid_type_node, NULL_TREE);
14819 tree void_ftype_v16qi_long_pvoid
14820 = build_function_type_list (void_type_node,
14821 V16QI_type_node, long_integer_type_node,
14822 pvoid_type_node, NULL_TREE);
14823 tree void_ftype_v8hi_long_pvoid
14824 = build_function_type_list (void_type_node,
14825 V8HI_type_node, long_integer_type_node,
14826 pvoid_type_node, NULL_TREE);
14827 tree void_ftype_v4sf_long_pvoid
14828 = build_function_type_list (void_type_node,
14829 V4SF_type_node, long_integer_type_node,
14830 pvoid_type_node, NULL_TREE);
14831 tree void_ftype_v2df_long_pvoid
14832 = build_function_type_list (void_type_node,
14833 V2DF_type_node, long_integer_type_node,
14834 pvoid_type_node, NULL_TREE);
14835 tree void_ftype_v2di_long_pvoid
14836 = build_function_type_list (void_type_node,
14837 V2DI_type_node, long_integer_type_node,
14838 pvoid_type_node, NULL_TREE);
14839 tree int_ftype_int_v8hi_v8hi
14840 = build_function_type_list (integer_type_node,
14841 integer_type_node, V8HI_type_node,
14842 V8HI_type_node, NULL_TREE);
14843 tree int_ftype_int_v16qi_v16qi
14844 = build_function_type_list (integer_type_node,
14845 integer_type_node, V16QI_type_node,
14846 V16QI_type_node, NULL_TREE);
14847 tree int_ftype_int_v4sf_v4sf
14848 = build_function_type_list (integer_type_node,
14849 integer_type_node, V4SF_type_node,
14850 V4SF_type_node, NULL_TREE);
14851 tree int_ftype_int_v2df_v2df
14852 = build_function_type_list (integer_type_node,
14853 integer_type_node, V2DF_type_node,
14854 V2DF_type_node, NULL_TREE);
14855 tree v2di_ftype_v2di
14856 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
14857 tree v4si_ftype_v4si
14858 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
14859 tree v8hi_ftype_v8hi
14860 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
14861 tree v16qi_ftype_v16qi
14862 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
14863 tree v4sf_ftype_v4sf
14864 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
14865 tree v2df_ftype_v2df
14866 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
14867 tree void_ftype_pcvoid_int_int
14868 = build_function_type_list (void_type_node,
14869 pcvoid_type_node, integer_type_node,
14870 integer_type_node, NULL_TREE);
14872 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
14873 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
14874 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
14875 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
14876 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
14877 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
14878 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
14879 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
14880 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
14881 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
14882 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
14883 ALTIVEC_BUILTIN_LVXL_V2DF);
14884 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
14885 ALTIVEC_BUILTIN_LVXL_V2DI);
14886 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
14887 ALTIVEC_BUILTIN_LVXL_V4SF);
14888 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
14889 ALTIVEC_BUILTIN_LVXL_V4SI);
14890 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
14891 ALTIVEC_BUILTIN_LVXL_V8HI);
14892 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
14893 ALTIVEC_BUILTIN_LVXL_V16QI);
14894 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
14895 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
14896 ALTIVEC_BUILTIN_LVX_V2DF);
14897 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
14898 ALTIVEC_BUILTIN_LVX_V2DI);
14899 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
14900 ALTIVEC_BUILTIN_LVX_V4SF);
14901 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
14902 ALTIVEC_BUILTIN_LVX_V4SI);
14903 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
14904 ALTIVEC_BUILTIN_LVX_V8HI);
14905 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
14906 ALTIVEC_BUILTIN_LVX_V16QI);
14907 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
14908 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
14909 ALTIVEC_BUILTIN_STVX_V2DF);
14910 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
14911 ALTIVEC_BUILTIN_STVX_V2DI);
14912 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
14913 ALTIVEC_BUILTIN_STVX_V4SF);
14914 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
14915 ALTIVEC_BUILTIN_STVX_V4SI);
14916 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
14917 ALTIVEC_BUILTIN_STVX_V8HI);
14918 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
14919 ALTIVEC_BUILTIN_STVX_V16QI);
14920 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
14921 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
14922 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
14923 ALTIVEC_BUILTIN_STVXL_V2DF);
14924 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
14925 ALTIVEC_BUILTIN_STVXL_V2DI);
14926 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
14927 ALTIVEC_BUILTIN_STVXL_V4SF);
14928 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
14929 ALTIVEC_BUILTIN_STVXL_V4SI);
14930 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
14931 ALTIVEC_BUILTIN_STVXL_V8HI);
14932 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
14933 ALTIVEC_BUILTIN_STVXL_V16QI);
14934 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
14935 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
14936 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
14937 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
14938 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
14939 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
14940 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
14941 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
14942 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
14943 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
14944 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
14945 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
14946 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
14947 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
14948 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
14949 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
14951 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
14952 VSX_BUILTIN_LXVD2X_V2DF);
14953 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
14954 VSX_BUILTIN_LXVD2X_V2DI);
14955 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
14956 VSX_BUILTIN_LXVW4X_V4SF);
14957 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
14958 VSX_BUILTIN_LXVW4X_V4SI);
14959 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
14960 VSX_BUILTIN_LXVW4X_V8HI);
14961 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
14962 VSX_BUILTIN_LXVW4X_V16QI);
14963 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
14964 VSX_BUILTIN_STXVD2X_V2DF);
14965 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
14966 VSX_BUILTIN_STXVD2X_V2DI);
14967 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
14968 VSX_BUILTIN_STXVW4X_V4SF);
14969 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
14970 VSX_BUILTIN_STXVW4X_V4SI);
14971 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
14972 VSX_BUILTIN_STXVW4X_V8HI);
14973 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
14974 VSX_BUILTIN_STXVW4X_V16QI);
14975 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
14976 VSX_BUILTIN_VEC_LD);
14977 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
14978 VSX_BUILTIN_VEC_ST);
14980 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
14981 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
14982 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
14984 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
14985 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
14986 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
14987 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
14988 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
14989 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
14990 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
14991 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
14992 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
14993 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
14994 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
14995 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
14997 /* Cell builtins. */
14998 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
14999 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
15000 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
15001 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
15003 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
15004 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
15005 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
15006 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
15008 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
15009 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
15010 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
15011 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
15013 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
15014 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
15015 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
15016 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
15018 /* Add the DST variants. */
15019 d = bdesc_dst;
15020 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
15021 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
15023 /* Initialize the predicates. */
15024 d = bdesc_altivec_preds;
15025 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
15027 machine_mode mode1;
15028 tree type;
15030 if (rs6000_overloaded_builtin_p (d->code))
15031 mode1 = VOIDmode;
15032 else
15033 mode1 = insn_data[d->icode].operand[1].mode;
15035 switch (mode1)
15037 case VOIDmode:
15038 type = int_ftype_int_opaque_opaque;
15039 break;
15040 case V2DImode:
15041 type = int_ftype_int_v2di_v2di;
15042 break;
15043 case V4SImode:
15044 type = int_ftype_int_v4si_v4si;
15045 break;
15046 case V8HImode:
15047 type = int_ftype_int_v8hi_v8hi;
15048 break;
15049 case V16QImode:
15050 type = int_ftype_int_v16qi_v16qi;
15051 break;
15052 case V4SFmode:
15053 type = int_ftype_int_v4sf_v4sf;
15054 break;
15055 case V2DFmode:
15056 type = int_ftype_int_v2df_v2df;
15057 break;
15058 default:
15059 gcc_unreachable ();
15062 def_builtin (d->name, type, d->code);
15065 /* Initialize the abs* operators. */
15066 d = bdesc_abs;
15067 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
15069 machine_mode mode0;
15070 tree type;
15072 mode0 = insn_data[d->icode].operand[0].mode;
15074 switch (mode0)
15076 case V2DImode:
15077 type = v2di_ftype_v2di;
15078 break;
15079 case V4SImode:
15080 type = v4si_ftype_v4si;
15081 break;
15082 case V8HImode:
15083 type = v8hi_ftype_v8hi;
15084 break;
15085 case V16QImode:
15086 type = v16qi_ftype_v16qi;
15087 break;
15088 case V4SFmode:
15089 type = v4sf_ftype_v4sf;
15090 break;
15091 case V2DFmode:
15092 type = v2df_ftype_v2df;
15093 break;
15094 default:
15095 gcc_unreachable ();
15098 def_builtin (d->name, type, d->code);
15101 /* Initialize target builtin that implements
15102 targetm.vectorize.builtin_mask_for_load. */
15104 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
15105 v16qi_ftype_long_pcvoid,
15106 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
15107 BUILT_IN_MD, NULL, NULL_TREE);
15108 TREE_READONLY (decl) = 1;
15109 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
15110 altivec_builtin_mask_for_load = decl;
15112 /* Access to the vec_init patterns. */
15113 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
15114 integer_type_node, integer_type_node,
15115 integer_type_node, NULL_TREE);
15116 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
15118 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
15119 short_integer_type_node,
15120 short_integer_type_node,
15121 short_integer_type_node,
15122 short_integer_type_node,
15123 short_integer_type_node,
15124 short_integer_type_node,
15125 short_integer_type_node, NULL_TREE);
15126 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
15128 ftype = build_function_type_list (V16QI_type_node, char_type_node,
15129 char_type_node, char_type_node,
15130 char_type_node, char_type_node,
15131 char_type_node, char_type_node,
15132 char_type_node, char_type_node,
15133 char_type_node, char_type_node,
15134 char_type_node, char_type_node,
15135 char_type_node, char_type_node,
15136 char_type_node, NULL_TREE);
15137 def_builtin ("__builtin_vec_init_v16qi", ftype,
15138 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
15140 ftype = build_function_type_list (V4SF_type_node, float_type_node,
15141 float_type_node, float_type_node,
15142 float_type_node, NULL_TREE);
15143 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
15145 /* VSX builtins. */
15146 ftype = build_function_type_list (V2DF_type_node, double_type_node,
15147 double_type_node, NULL_TREE);
15148 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
15150 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
15151 intDI_type_node, NULL_TREE);
15152 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
15154 /* Access to the vec_set patterns. */
15155 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
15156 intSI_type_node,
15157 integer_type_node, NULL_TREE);
15158 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
15160 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15161 intHI_type_node,
15162 integer_type_node, NULL_TREE);
15163 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
15165 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
15166 intQI_type_node,
15167 integer_type_node, NULL_TREE);
15168 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
15170 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
15171 float_type_node,
15172 integer_type_node, NULL_TREE);
15173 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
15175 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
15176 double_type_node,
15177 integer_type_node, NULL_TREE);
15178 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
15180 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
15181 intDI_type_node,
15182 integer_type_node, NULL_TREE);
15183 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
15185 /* Access to the vec_extract patterns. */
15186 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15187 integer_type_node, NULL_TREE);
15188 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
15190 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15191 integer_type_node, NULL_TREE);
15192 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
15194 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
15195 integer_type_node, NULL_TREE);
15196 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
15198 ftype = build_function_type_list (float_type_node, V4SF_type_node,
15199 integer_type_node, NULL_TREE);
15200 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
15202 ftype = build_function_type_list (double_type_node, V2DF_type_node,
15203 integer_type_node, NULL_TREE);
15204 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
15206 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
15207 integer_type_node, NULL_TREE);
15208 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
15211 if (V1TI_type_node)
15213 tree v1ti_ftype_long_pcvoid
15214 = build_function_type_list (V1TI_type_node,
15215 long_integer_type_node, pcvoid_type_node,
15216 NULL_TREE);
15217 tree void_ftype_v1ti_long_pvoid
15218 = build_function_type_list (void_type_node,
15219 V1TI_type_node, long_integer_type_node,
15220 pvoid_type_node, NULL_TREE);
15221 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
15222 VSX_BUILTIN_LXVD2X_V1TI);
15223 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
15224 VSX_BUILTIN_STXVD2X_V1TI);
15225 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
15226 NULL_TREE, NULL_TREE);
15227 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
15228 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
15229 intTI_type_node,
15230 integer_type_node, NULL_TREE);
15231 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
15232 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
15233 integer_type_node, NULL_TREE);
15234 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
15239 static void
15240 htm_init_builtins (void)
15242 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
15243 const struct builtin_description *d;
15244 size_t i;
15246 d = bdesc_htm;
15247 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
15249 tree op[MAX_HTM_OPERANDS], type;
15250 HOST_WIDE_INT mask = d->mask;
15251 unsigned attr = rs6000_builtin_info[d->code].attr;
15252 bool void_func = (attr & RS6000_BTC_VOID);
15253 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
15254 int nopnds = 0;
15255 tree argtype = (attr & RS6000_BTC_SPR) ? long_unsigned_type_node
15256 : unsigned_type_node;
15258 if ((mask & builtin_mask) != mask)
15260 if (TARGET_DEBUG_BUILTIN)
15261 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
15262 continue;
15265 if (d->name == 0)
15267 if (TARGET_DEBUG_BUILTIN)
15268 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
15269 (long unsigned) i);
15270 continue;
15273 op[nopnds++] = (void_func) ? void_type_node : argtype;
15275 if (attr_args == RS6000_BTC_UNARY)
15276 op[nopnds++] = argtype;
15277 else if (attr_args == RS6000_BTC_BINARY)
15279 op[nopnds++] = argtype;
15280 op[nopnds++] = argtype;
15282 else if (attr_args == RS6000_BTC_TERNARY)
15284 op[nopnds++] = argtype;
15285 op[nopnds++] = argtype;
15286 op[nopnds++] = argtype;
15289 switch (nopnds)
15291 case 1:
15292 type = build_function_type_list (op[0], NULL_TREE);
15293 break;
15294 case 2:
15295 type = build_function_type_list (op[0], op[1], NULL_TREE);
15296 break;
15297 case 3:
15298 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
15299 break;
15300 case 4:
15301 type = build_function_type_list (op[0], op[1], op[2], op[3],
15302 NULL_TREE);
15303 break;
15304 default:
15305 gcc_unreachable ();
15308 def_builtin (d->name, type, d->code);
15312 /* Hash function for builtin functions with up to 3 arguments and a return
15313 type. */
15314 hashval_t
15315 builtin_hasher::hash (builtin_hash_struct *bh)
15317 unsigned ret = 0;
15318 int i;
15320 for (i = 0; i < 4; i++)
15322 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
15323 ret = (ret * 2) + bh->uns_p[i];
15326 return ret;
15329 /* Compare builtin hash entries H1 and H2 for equivalence. */
15330 bool
15331 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
15333 return ((p1->mode[0] == p2->mode[0])
15334 && (p1->mode[1] == p2->mode[1])
15335 && (p1->mode[2] == p2->mode[2])
15336 && (p1->mode[3] == p2->mode[3])
15337 && (p1->uns_p[0] == p2->uns_p[0])
15338 && (p1->uns_p[1] == p2->uns_p[1])
15339 && (p1->uns_p[2] == p2->uns_p[2])
15340 && (p1->uns_p[3] == p2->uns_p[3]));
15343 /* Map types for builtin functions with an explicit return type and up to 3
15344 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
15345 of the argument. */
15346 static tree
15347 builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
15348 machine_mode mode_arg1, machine_mode mode_arg2,
15349 enum rs6000_builtins builtin, const char *name)
15351 struct builtin_hash_struct h;
15352 struct builtin_hash_struct *h2;
15353 int num_args = 3;
15354 int i;
15355 tree ret_type = NULL_TREE;
15356 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
15358 /* Create builtin_hash_table. */
15359 if (builtin_hash_table == NULL)
15360 builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
15362 h.type = NULL_TREE;
15363 h.mode[0] = mode_ret;
15364 h.mode[1] = mode_arg0;
15365 h.mode[2] = mode_arg1;
15366 h.mode[3] = mode_arg2;
15367 h.uns_p[0] = 0;
15368 h.uns_p[1] = 0;
15369 h.uns_p[2] = 0;
15370 h.uns_p[3] = 0;
15372 /* If the builtin is a type that produces unsigned results or takes unsigned
15373 arguments, and it is returned as a decl for the vectorizer (such as
15374 widening multiplies, permute), make sure the arguments and return value
15375 are type correct. */
15376 switch (builtin)
15378 /* unsigned 1 argument functions. */
15379 case CRYPTO_BUILTIN_VSBOX:
15380 case P8V_BUILTIN_VGBBD:
15381 case MISC_BUILTIN_CDTBCD:
15382 case MISC_BUILTIN_CBCDTD:
15383 h.uns_p[0] = 1;
15384 h.uns_p[1] = 1;
15385 break;
15387 /* unsigned 2 argument functions. */
15388 case ALTIVEC_BUILTIN_VMULEUB_UNS:
15389 case ALTIVEC_BUILTIN_VMULEUH_UNS:
15390 case ALTIVEC_BUILTIN_VMULOUB_UNS:
15391 case ALTIVEC_BUILTIN_VMULOUH_UNS:
15392 case CRYPTO_BUILTIN_VCIPHER:
15393 case CRYPTO_BUILTIN_VCIPHERLAST:
15394 case CRYPTO_BUILTIN_VNCIPHER:
15395 case CRYPTO_BUILTIN_VNCIPHERLAST:
15396 case CRYPTO_BUILTIN_VPMSUMB:
15397 case CRYPTO_BUILTIN_VPMSUMH:
15398 case CRYPTO_BUILTIN_VPMSUMW:
15399 case CRYPTO_BUILTIN_VPMSUMD:
15400 case CRYPTO_BUILTIN_VPMSUM:
15401 case MISC_BUILTIN_ADDG6S:
15402 case MISC_BUILTIN_DIVWEU:
15403 case MISC_BUILTIN_DIVWEUO:
15404 case MISC_BUILTIN_DIVDEU:
15405 case MISC_BUILTIN_DIVDEUO:
15406 h.uns_p[0] = 1;
15407 h.uns_p[1] = 1;
15408 h.uns_p[2] = 1;
15409 break;
15411 /* unsigned 3 argument functions. */
15412 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
15413 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
15414 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
15415 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
15416 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
15417 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
15418 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
15419 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
15420 case VSX_BUILTIN_VPERM_16QI_UNS:
15421 case VSX_BUILTIN_VPERM_8HI_UNS:
15422 case VSX_BUILTIN_VPERM_4SI_UNS:
15423 case VSX_BUILTIN_VPERM_2DI_UNS:
15424 case VSX_BUILTIN_XXSEL_16QI_UNS:
15425 case VSX_BUILTIN_XXSEL_8HI_UNS:
15426 case VSX_BUILTIN_XXSEL_4SI_UNS:
15427 case VSX_BUILTIN_XXSEL_2DI_UNS:
15428 case CRYPTO_BUILTIN_VPERMXOR:
15429 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
15430 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
15431 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
15432 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
15433 case CRYPTO_BUILTIN_VSHASIGMAW:
15434 case CRYPTO_BUILTIN_VSHASIGMAD:
15435 case CRYPTO_BUILTIN_VSHASIGMA:
15436 h.uns_p[0] = 1;
15437 h.uns_p[1] = 1;
15438 h.uns_p[2] = 1;
15439 h.uns_p[3] = 1;
15440 break;
15442 /* signed permute functions with unsigned char mask. */
15443 case ALTIVEC_BUILTIN_VPERM_16QI:
15444 case ALTIVEC_BUILTIN_VPERM_8HI:
15445 case ALTIVEC_BUILTIN_VPERM_4SI:
15446 case ALTIVEC_BUILTIN_VPERM_4SF:
15447 case ALTIVEC_BUILTIN_VPERM_2DI:
15448 case ALTIVEC_BUILTIN_VPERM_2DF:
15449 case VSX_BUILTIN_VPERM_16QI:
15450 case VSX_BUILTIN_VPERM_8HI:
15451 case VSX_BUILTIN_VPERM_4SI:
15452 case VSX_BUILTIN_VPERM_4SF:
15453 case VSX_BUILTIN_VPERM_2DI:
15454 case VSX_BUILTIN_VPERM_2DF:
15455 h.uns_p[3] = 1;
15456 break;
15458 /* unsigned args, signed return. */
15459 case VSX_BUILTIN_XVCVUXDDP_UNS:
15460 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
15461 h.uns_p[1] = 1;
15462 break;
15464 /* signed args, unsigned return. */
15465 case VSX_BUILTIN_XVCVDPUXDS_UNS:
15466 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
15467 case MISC_BUILTIN_UNPACK_TD:
15468 case MISC_BUILTIN_UNPACK_V1TI:
15469 h.uns_p[0] = 1;
15470 break;
15472 /* unsigned arguments for 128-bit pack instructions. */
15473 case MISC_BUILTIN_PACK_TD:
15474 case MISC_BUILTIN_PACK_V1TI:
15475 h.uns_p[1] = 1;
15476 h.uns_p[2] = 1;
15477 break;
15479 default:
15480 break;
15483 /* Figure out how many args are present. */
15484 while (num_args > 0 && h.mode[num_args] == VOIDmode)
15485 num_args--;
15487 if (num_args == 0)
15488 fatal_error (input_location,
15489 "internal error: builtin function %s had no type", name);
15491 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
15492 if (!ret_type && h.uns_p[0])
15493 ret_type = builtin_mode_to_type[h.mode[0]][0];
15495 if (!ret_type)
15496 fatal_error (input_location,
15497 "internal error: builtin function %s had an unexpected "
15498 "return type %s", name, GET_MODE_NAME (h.mode[0]));
15500 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
15501 arg_type[i] = NULL_TREE;
15503 for (i = 0; i < num_args; i++)
15505 int m = (int) h.mode[i+1];
15506 int uns_p = h.uns_p[i+1];
15508 arg_type[i] = builtin_mode_to_type[m][uns_p];
15509 if (!arg_type[i] && uns_p)
15510 arg_type[i] = builtin_mode_to_type[m][0];
15512 if (!arg_type[i])
15513 fatal_error (input_location,
15514 "internal error: builtin function %s, argument %d "
15515 "had unexpected argument type %s", name, i,
15516 GET_MODE_NAME (m));
15519 builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
15520 if (*found == NULL)
15522 h2 = ggc_alloc<builtin_hash_struct> ();
15523 *h2 = h;
15524 *found = h2;
15526 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
15527 arg_type[2], NULL_TREE);
15530 return (*found)->type;
15533 static void
15534 rs6000_common_init_builtins (void)
15536 const struct builtin_description *d;
15537 size_t i;
15539 tree opaque_ftype_opaque = NULL_TREE;
15540 tree opaque_ftype_opaque_opaque = NULL_TREE;
15541 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
15542 tree v2si_ftype_qi = NULL_TREE;
15543 tree v2si_ftype_v2si_qi = NULL_TREE;
15544 tree v2si_ftype_int_qi = NULL_TREE;
15545 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
15547 if (!TARGET_PAIRED_FLOAT)
15549 builtin_mode_to_type[V2SImode][0] = opaque_V2SI_type_node;
15550 builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node;
15553 /* Paired and SPE builtins are only available if you build a compiler with
15554 the appropriate options, so only create those builtins with the
15555 appropriate compiler option. Create Altivec and VSX builtins on machines
15556 with at least the general purpose extensions (970 and newer) to allow the
15557 use of the target attribute.. */
15559 if (TARGET_EXTRA_BUILTINS)
15560 builtin_mask |= RS6000_BTM_COMMON;
15562 /* Add the ternary operators. */
15563 d = bdesc_3arg;
15564 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
15566 tree type;
15567 HOST_WIDE_INT mask = d->mask;
15569 if ((mask & builtin_mask) != mask)
15571 if (TARGET_DEBUG_BUILTIN)
15572 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
15573 continue;
15576 if (rs6000_overloaded_builtin_p (d->code))
15578 if (! (type = opaque_ftype_opaque_opaque_opaque))
15579 type = opaque_ftype_opaque_opaque_opaque
15580 = build_function_type_list (opaque_V4SI_type_node,
15581 opaque_V4SI_type_node,
15582 opaque_V4SI_type_node,
15583 opaque_V4SI_type_node,
15584 NULL_TREE);
15586 else
15588 enum insn_code icode = d->icode;
15589 if (d->name == 0)
15591 if (TARGET_DEBUG_BUILTIN)
15592 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
15593 (long unsigned)i);
15595 continue;
15598 if (icode == CODE_FOR_nothing)
15600 if (TARGET_DEBUG_BUILTIN)
15601 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
15602 d->name);
15604 continue;
15607 type = builtin_function_type (insn_data[icode].operand[0].mode,
15608 insn_data[icode].operand[1].mode,
15609 insn_data[icode].operand[2].mode,
15610 insn_data[icode].operand[3].mode,
15611 d->code, d->name);
15614 def_builtin (d->name, type, d->code);
15617 /* Add the binary operators. */
15618 d = bdesc_2arg;
15619 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15621 machine_mode mode0, mode1, mode2;
15622 tree type;
15623 HOST_WIDE_INT mask = d->mask;
15625 if ((mask & builtin_mask) != mask)
15627 if (TARGET_DEBUG_BUILTIN)
15628 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
15629 continue;
15632 if (rs6000_overloaded_builtin_p (d->code))
15634 if (! (type = opaque_ftype_opaque_opaque))
15635 type = opaque_ftype_opaque_opaque
15636 = build_function_type_list (opaque_V4SI_type_node,
15637 opaque_V4SI_type_node,
15638 opaque_V4SI_type_node,
15639 NULL_TREE);
15641 else
15643 enum insn_code icode = d->icode;
15644 if (d->name == 0)
15646 if (TARGET_DEBUG_BUILTIN)
15647 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
15648 (long unsigned)i);
15650 continue;
15653 if (icode == CODE_FOR_nothing)
15655 if (TARGET_DEBUG_BUILTIN)
15656 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
15657 d->name);
15659 continue;
15662 mode0 = insn_data[icode].operand[0].mode;
15663 mode1 = insn_data[icode].operand[1].mode;
15664 mode2 = insn_data[icode].operand[2].mode;
15666 if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode)
15668 if (! (type = v2si_ftype_v2si_qi))
15669 type = v2si_ftype_v2si_qi
15670 = build_function_type_list (opaque_V2SI_type_node,
15671 opaque_V2SI_type_node,
15672 char_type_node,
15673 NULL_TREE);
15676 else if (mode0 == V2SImode && GET_MODE_CLASS (mode1) == MODE_INT
15677 && mode2 == QImode)
15679 if (! (type = v2si_ftype_int_qi))
15680 type = v2si_ftype_int_qi
15681 = build_function_type_list (opaque_V2SI_type_node,
15682 integer_type_node,
15683 char_type_node,
15684 NULL_TREE);
15687 else
15688 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
15689 d->code, d->name);
15692 def_builtin (d->name, type, d->code);
15695 /* Add the simple unary operators. */
15696 d = bdesc_1arg;
15697 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15699 machine_mode mode0, mode1;
15700 tree type;
15701 HOST_WIDE_INT mask = d->mask;
15703 if ((mask & builtin_mask) != mask)
15705 if (TARGET_DEBUG_BUILTIN)
15706 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
15707 continue;
15710 if (rs6000_overloaded_builtin_p (d->code))
15712 if (! (type = opaque_ftype_opaque))
15713 type = opaque_ftype_opaque
15714 = build_function_type_list (opaque_V4SI_type_node,
15715 opaque_V4SI_type_node,
15716 NULL_TREE);
15718 else
15720 enum insn_code icode = d->icode;
15721 if (d->name == 0)
15723 if (TARGET_DEBUG_BUILTIN)
15724 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
15725 (long unsigned)i);
15727 continue;
15730 if (icode == CODE_FOR_nothing)
15732 if (TARGET_DEBUG_BUILTIN)
15733 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
15734 d->name);
15736 continue;
15739 mode0 = insn_data[icode].operand[0].mode;
15740 mode1 = insn_data[icode].operand[1].mode;
15742 if (mode0 == V2SImode && mode1 == QImode)
15744 if (! (type = v2si_ftype_qi))
15745 type = v2si_ftype_qi
15746 = build_function_type_list (opaque_V2SI_type_node,
15747 char_type_node,
15748 NULL_TREE);
15751 else
15752 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
15753 d->code, d->name);
15756 def_builtin (d->name, type, d->code);
15760 static void
15761 rs6000_init_libfuncs (void)
15763 if (!TARGET_IEEEQUAD)
15764 /* AIX/Darwin/64-bit Linux quad floating point routines. */
15765 if (!TARGET_XL_COMPAT)
15767 set_optab_libfunc (add_optab, TFmode, "__gcc_qadd");
15768 set_optab_libfunc (sub_optab, TFmode, "__gcc_qsub");
15769 set_optab_libfunc (smul_optab, TFmode, "__gcc_qmul");
15770 set_optab_libfunc (sdiv_optab, TFmode, "__gcc_qdiv");
15772 if (!(TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)))
15774 set_optab_libfunc (neg_optab, TFmode, "__gcc_qneg");
15775 set_optab_libfunc (eq_optab, TFmode, "__gcc_qeq");
15776 set_optab_libfunc (ne_optab, TFmode, "__gcc_qne");
15777 set_optab_libfunc (gt_optab, TFmode, "__gcc_qgt");
15778 set_optab_libfunc (ge_optab, TFmode, "__gcc_qge");
15779 set_optab_libfunc (lt_optab, TFmode, "__gcc_qlt");
15780 set_optab_libfunc (le_optab, TFmode, "__gcc_qle");
15782 set_conv_libfunc (sext_optab, TFmode, SFmode, "__gcc_stoq");
15783 set_conv_libfunc (sext_optab, TFmode, DFmode, "__gcc_dtoq");
15784 set_conv_libfunc (trunc_optab, SFmode, TFmode, "__gcc_qtos");
15785 set_conv_libfunc (trunc_optab, DFmode, TFmode, "__gcc_qtod");
15786 set_conv_libfunc (sfix_optab, SImode, TFmode, "__gcc_qtoi");
15787 set_conv_libfunc (ufix_optab, SImode, TFmode, "__gcc_qtou");
15788 set_conv_libfunc (sfloat_optab, TFmode, SImode, "__gcc_itoq");
15789 set_conv_libfunc (ufloat_optab, TFmode, SImode, "__gcc_utoq");
15792 if (!(TARGET_HARD_FLOAT && TARGET_FPRS))
15793 set_optab_libfunc (unord_optab, TFmode, "__gcc_qunord");
15795 else
15797 set_optab_libfunc (add_optab, TFmode, "_xlqadd");
15798 set_optab_libfunc (sub_optab, TFmode, "_xlqsub");
15799 set_optab_libfunc (smul_optab, TFmode, "_xlqmul");
15800 set_optab_libfunc (sdiv_optab, TFmode, "_xlqdiv");
15802 else
15804 /* 32-bit SVR4 quad floating point routines. */
15806 set_optab_libfunc (add_optab, TFmode, "_q_add");
15807 set_optab_libfunc (sub_optab, TFmode, "_q_sub");
15808 set_optab_libfunc (neg_optab, TFmode, "_q_neg");
15809 set_optab_libfunc (smul_optab, TFmode, "_q_mul");
15810 set_optab_libfunc (sdiv_optab, TFmode, "_q_div");
15811 if (TARGET_PPC_GPOPT)
15812 set_optab_libfunc (sqrt_optab, TFmode, "_q_sqrt");
15814 set_optab_libfunc (eq_optab, TFmode, "_q_feq");
15815 set_optab_libfunc (ne_optab, TFmode, "_q_fne");
15816 set_optab_libfunc (gt_optab, TFmode, "_q_fgt");
15817 set_optab_libfunc (ge_optab, TFmode, "_q_fge");
15818 set_optab_libfunc (lt_optab, TFmode, "_q_flt");
15819 set_optab_libfunc (le_optab, TFmode, "_q_fle");
15821 set_conv_libfunc (sext_optab, TFmode, SFmode, "_q_stoq");
15822 set_conv_libfunc (sext_optab, TFmode, DFmode, "_q_dtoq");
15823 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_q_qtos");
15824 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_q_qtod");
15825 set_conv_libfunc (sfix_optab, SImode, TFmode, "_q_qtoi");
15826 set_conv_libfunc (ufix_optab, SImode, TFmode, "_q_qtou");
15827 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_q_itoq");
15828 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_q_utoq");
15833 /* Expand a block clear operation, and return 1 if successful. Return 0
15834 if we should let the compiler generate normal code.
15836 operands[0] is the destination
15837 operands[1] is the length
15838 operands[3] is the alignment */
15841 expand_block_clear (rtx operands[])
15843 rtx orig_dest = operands[0];
15844 rtx bytes_rtx = operands[1];
15845 rtx align_rtx = operands[3];
15846 bool constp = (GET_CODE (bytes_rtx) == CONST_INT);
15847 HOST_WIDE_INT align;
15848 HOST_WIDE_INT bytes;
15849 int offset;
15850 int clear_bytes;
15851 int clear_step;
15853 /* If this is not a fixed size move, just call memcpy */
15854 if (! constp)
15855 return 0;
15857 /* This must be a fixed size alignment */
15858 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
15859 align = INTVAL (align_rtx) * BITS_PER_UNIT;
15861 /* Anything to clear? */
15862 bytes = INTVAL (bytes_rtx);
15863 if (bytes <= 0)
15864 return 1;
15866 /* Use the builtin memset after a point, to avoid huge code bloat.
15867 When optimize_size, avoid any significant code bloat; calling
15868 memset is about 4 instructions, so allow for one instruction to
15869 load zero and three to do clearing. */
15870 if (TARGET_ALTIVEC && align >= 128)
15871 clear_step = 16;
15872 else if (TARGET_POWERPC64 && (align >= 64 || !STRICT_ALIGNMENT))
15873 clear_step = 8;
15874 else if (TARGET_SPE && align >= 64)
15875 clear_step = 8;
15876 else
15877 clear_step = 4;
15879 if (optimize_size && bytes > 3 * clear_step)
15880 return 0;
15881 if (! optimize_size && bytes > 8 * clear_step)
15882 return 0;
15884 for (offset = 0; bytes > 0; offset += clear_bytes, bytes -= clear_bytes)
15886 machine_mode mode = BLKmode;
15887 rtx dest;
15889 if (bytes >= 16 && TARGET_ALTIVEC && align >= 128)
15891 clear_bytes = 16;
15892 mode = V4SImode;
15894 else if (bytes >= 8 && TARGET_SPE && align >= 64)
15896 clear_bytes = 8;
15897 mode = V2SImode;
15899 else if (bytes >= 8 && TARGET_POWERPC64
15900 && (align >= 64 || !STRICT_ALIGNMENT))
15902 clear_bytes = 8;
15903 mode = DImode;
15904 if (offset == 0 && align < 64)
15906 rtx addr;
15908 /* If the address form is reg+offset with offset not a
15909 multiple of four, reload into reg indirect form here
15910 rather than waiting for reload. This way we get one
15911 reload, not one per store. */
15912 addr = XEXP (orig_dest, 0);
15913 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
15914 && GET_CODE (XEXP (addr, 1)) == CONST_INT
15915 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
15917 addr = copy_addr_to_reg (addr);
15918 orig_dest = replace_equiv_address (orig_dest, addr);
15922 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
15923 { /* move 4 bytes */
15924 clear_bytes = 4;
15925 mode = SImode;
15927 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
15928 { /* move 2 bytes */
15929 clear_bytes = 2;
15930 mode = HImode;
15932 else /* move 1 byte at a time */
15934 clear_bytes = 1;
15935 mode = QImode;
15938 dest = adjust_address (orig_dest, mode, offset);
15940 emit_move_insn (dest, CONST0_RTX (mode));
15943 return 1;
15947 /* Expand a block move operation, and return 1 if successful. Return 0
15948 if we should let the compiler generate normal code.
15950 operands[0] is the destination
15951 operands[1] is the source
15952 operands[2] is the length
15953 operands[3] is the alignment */
15955 #define MAX_MOVE_REG 4
15958 expand_block_move (rtx operands[])
15960 rtx orig_dest = operands[0];
15961 rtx orig_src = operands[1];
15962 rtx bytes_rtx = operands[2];
15963 rtx align_rtx = operands[3];
15964 int constp = (GET_CODE (bytes_rtx) == CONST_INT);
15965 int align;
15966 int bytes;
15967 int offset;
15968 int move_bytes;
15969 rtx stores[MAX_MOVE_REG];
15970 int num_reg = 0;
15972 /* If this is not a fixed size move, just call memcpy */
15973 if (! constp)
15974 return 0;
15976 /* This must be a fixed size alignment */
15977 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
15978 align = INTVAL (align_rtx) * BITS_PER_UNIT;
15980 /* Anything to move? */
15981 bytes = INTVAL (bytes_rtx);
15982 if (bytes <= 0)
15983 return 1;
15985 if (bytes > rs6000_block_move_inline_limit)
15986 return 0;
15988 for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes)
15990 union {
15991 rtx (*movmemsi) (rtx, rtx, rtx, rtx);
15992 rtx (*mov) (rtx, rtx);
15993 } gen_func;
15994 machine_mode mode = BLKmode;
15995 rtx src, dest;
15997 /* Altivec first, since it will be faster than a string move
15998 when it applies, and usually not significantly larger. */
15999 if (TARGET_ALTIVEC && bytes >= 16 && align >= 128)
16001 move_bytes = 16;
16002 mode = V4SImode;
16003 gen_func.mov = gen_movv4si;
16005 else if (TARGET_SPE && bytes >= 8 && align >= 64)
16007 move_bytes = 8;
16008 mode = V2SImode;
16009 gen_func.mov = gen_movv2si;
16011 else if (TARGET_STRING
16012 && bytes > 24 /* move up to 32 bytes at a time */
16013 && ! fixed_regs[5]
16014 && ! fixed_regs[6]
16015 && ! fixed_regs[7]
16016 && ! fixed_regs[8]
16017 && ! fixed_regs[9]
16018 && ! fixed_regs[10]
16019 && ! fixed_regs[11]
16020 && ! fixed_regs[12])
16022 move_bytes = (bytes > 32) ? 32 : bytes;
16023 gen_func.movmemsi = gen_movmemsi_8reg;
16025 else if (TARGET_STRING
16026 && bytes > 16 /* move up to 24 bytes at a time */
16027 && ! fixed_regs[5]
16028 && ! fixed_regs[6]
16029 && ! fixed_regs[7]
16030 && ! fixed_regs[8]
16031 && ! fixed_regs[9]
16032 && ! fixed_regs[10])
16034 move_bytes = (bytes > 24) ? 24 : bytes;
16035 gen_func.movmemsi = gen_movmemsi_6reg;
16037 else if (TARGET_STRING
16038 && bytes > 8 /* move up to 16 bytes at a time */
16039 && ! fixed_regs[5]
16040 && ! fixed_regs[6]
16041 && ! fixed_regs[7]
16042 && ! fixed_regs[8])
16044 move_bytes = (bytes > 16) ? 16 : bytes;
16045 gen_func.movmemsi = gen_movmemsi_4reg;
16047 else if (bytes >= 8 && TARGET_POWERPC64
16048 && (align >= 64 || !STRICT_ALIGNMENT))
16050 move_bytes = 8;
16051 mode = DImode;
16052 gen_func.mov = gen_movdi;
16053 if (offset == 0 && align < 64)
16055 rtx addr;
16057 /* If the address form is reg+offset with offset not a
16058 multiple of four, reload into reg indirect form here
16059 rather than waiting for reload. This way we get one
16060 reload, not one per load and/or store. */
16061 addr = XEXP (orig_dest, 0);
16062 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
16063 && GET_CODE (XEXP (addr, 1)) == CONST_INT
16064 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
16066 addr = copy_addr_to_reg (addr);
16067 orig_dest = replace_equiv_address (orig_dest, addr);
16069 addr = XEXP (orig_src, 0);
16070 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
16071 && GET_CODE (XEXP (addr, 1)) == CONST_INT
16072 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
16074 addr = copy_addr_to_reg (addr);
16075 orig_src = replace_equiv_address (orig_src, addr);
16079 else if (TARGET_STRING && bytes > 4 && !TARGET_POWERPC64)
16080 { /* move up to 8 bytes at a time */
16081 move_bytes = (bytes > 8) ? 8 : bytes;
16082 gen_func.movmemsi = gen_movmemsi_2reg;
16084 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
16085 { /* move 4 bytes */
16086 move_bytes = 4;
16087 mode = SImode;
16088 gen_func.mov = gen_movsi;
16090 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
16091 { /* move 2 bytes */
16092 move_bytes = 2;
16093 mode = HImode;
16094 gen_func.mov = gen_movhi;
16096 else if (TARGET_STRING && bytes > 1)
16097 { /* move up to 4 bytes at a time */
16098 move_bytes = (bytes > 4) ? 4 : bytes;
16099 gen_func.movmemsi = gen_movmemsi_1reg;
16101 else /* move 1 byte at a time */
16103 move_bytes = 1;
16104 mode = QImode;
16105 gen_func.mov = gen_movqi;
16108 src = adjust_address (orig_src, mode, offset);
16109 dest = adjust_address (orig_dest, mode, offset);
16111 if (mode != BLKmode)
16113 rtx tmp_reg = gen_reg_rtx (mode);
16115 emit_insn ((*gen_func.mov) (tmp_reg, src));
16116 stores[num_reg++] = (*gen_func.mov) (dest, tmp_reg);
16119 if (mode == BLKmode || num_reg >= MAX_MOVE_REG || bytes == move_bytes)
16121 int i;
16122 for (i = 0; i < num_reg; i++)
16123 emit_insn (stores[i]);
16124 num_reg = 0;
16127 if (mode == BLKmode)
16129 /* Move the address into scratch registers. The movmemsi
16130 patterns require zero offset. */
16131 if (!REG_P (XEXP (src, 0)))
16133 rtx src_reg = copy_addr_to_reg (XEXP (src, 0));
16134 src = replace_equiv_address (src, src_reg);
16136 set_mem_size (src, move_bytes);
16138 if (!REG_P (XEXP (dest, 0)))
16140 rtx dest_reg = copy_addr_to_reg (XEXP (dest, 0));
16141 dest = replace_equiv_address (dest, dest_reg);
16143 set_mem_size (dest, move_bytes);
16145 emit_insn ((*gen_func.movmemsi) (dest, src,
16146 GEN_INT (move_bytes & 31),
16147 align_rtx));
16151 return 1;
16155 /* Return a string to perform a load_multiple operation.
16156 operands[0] is the vector.
16157 operands[1] is the source address.
16158 operands[2] is the first destination register. */
16160 const char *
16161 rs6000_output_load_multiple (rtx operands[3])
16163 /* We have to handle the case where the pseudo used to contain the address
16164 is assigned to one of the output registers. */
16165 int i, j;
16166 int words = XVECLEN (operands[0], 0);
16167 rtx xop[10];
16169 if (XVECLEN (operands[0], 0) == 1)
16170 return "lwz %2,0(%1)";
16172 for (i = 0; i < words; i++)
16173 if (refers_to_regno_p (REGNO (operands[2]) + i, operands[1]))
16175 if (i == words-1)
16177 xop[0] = GEN_INT (4 * (words-1));
16178 xop[1] = operands[1];
16179 xop[2] = operands[2];
16180 output_asm_insn ("lswi %2,%1,%0\n\tlwz %1,%0(%1)", xop);
16181 return "";
16183 else if (i == 0)
16185 xop[0] = GEN_INT (4 * (words-1));
16186 xop[1] = operands[1];
16187 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16188 output_asm_insn ("addi %1,%1,4\n\tlswi %2,%1,%0\n\tlwz %1,-4(%1)", xop);
16189 return "";
16191 else
16193 for (j = 0; j < words; j++)
16194 if (j != i)
16196 xop[0] = GEN_INT (j * 4);
16197 xop[1] = operands[1];
16198 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + j);
16199 output_asm_insn ("lwz %2,%0(%1)", xop);
16201 xop[0] = GEN_INT (i * 4);
16202 xop[1] = operands[1];
16203 output_asm_insn ("lwz %1,%0(%1)", xop);
16204 return "";
16208 return "lswi %2,%1,%N0";
16212 /* A validation routine: say whether CODE, a condition code, and MODE
16213 match. The other alternatives either don't make sense or should
16214 never be generated. */
16216 void
16217 validate_condition_mode (enum rtx_code code, machine_mode mode)
16219 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
16220 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
16221 && GET_MODE_CLASS (mode) == MODE_CC);
16223 /* These don't make sense. */
16224 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
16225 || mode != CCUNSmode);
16227 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
16228 || mode == CCUNSmode);
16230 gcc_assert (mode == CCFPmode
16231 || (code != ORDERED && code != UNORDERED
16232 && code != UNEQ && code != LTGT
16233 && code != UNGT && code != UNLT
16234 && code != UNGE && code != UNLE));
16236 /* These should never be generated except for
16237 flag_finite_math_only. */
16238 gcc_assert (mode != CCFPmode
16239 || flag_finite_math_only
16240 || (code != LE && code != GE
16241 && code != UNEQ && code != LTGT
16242 && code != UNGT && code != UNLT));
16244 /* These are invalid; the information is not there. */
16245 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
16249 /* Return 1 if ANDOP is a mask that has no bits on that are not in the
16250 mask required to convert the result of a rotate insn into a shift
16251 left insn of SHIFTOP bits. Both are known to be SImode CONST_INT. */
16254 includes_lshift_p (rtx shiftop, rtx andop)
16256 unsigned HOST_WIDE_INT shift_mask = ~(unsigned HOST_WIDE_INT) 0;
16258 shift_mask <<= INTVAL (shiftop);
16260 return (INTVAL (andop) & 0xffffffff & ~shift_mask) == 0;
16263 /* Similar, but for right shift. */
16266 includes_rshift_p (rtx shiftop, rtx andop)
16268 unsigned HOST_WIDE_INT shift_mask = ~(unsigned HOST_WIDE_INT) 0;
16270 shift_mask >>= INTVAL (shiftop);
16272 return (INTVAL (andop) & 0xffffffff & ~shift_mask) == 0;
16275 /* Return 1 if ANDOP is a mask suitable for use with an rldic insn
16276 to perform a left shift. It must have exactly SHIFTOP least
16277 significant 0's, then one or more 1's, then zero or more 0's. */
16280 includes_rldic_lshift_p (rtx shiftop, rtx andop)
16282 if (GET_CODE (andop) == CONST_INT)
16284 unsigned HOST_WIDE_INT c, lsb, shift_mask;
16286 c = INTVAL (andop);
16287 if (c == 0 || c == HOST_WIDE_INT_M1U)
16288 return 0;
16290 shift_mask = HOST_WIDE_INT_M1U;
16291 shift_mask <<= INTVAL (shiftop);
16293 /* Find the least significant one bit. */
16294 lsb = c & -c;
16296 /* It must coincide with the LSB of the shift mask. */
16297 if (-lsb != shift_mask)
16298 return 0;
16300 /* Invert to look for the next transition (if any). */
16301 c = ~c;
16303 /* Remove the low group of ones (originally low group of zeros). */
16304 c &= -lsb;
16306 /* Again find the lsb, and check we have all 1's above. */
16307 lsb = c & -c;
16308 return c == -lsb;
16310 else
16311 return 0;
16314 /* Return 1 if ANDOP is a mask suitable for use with an rldicr insn
16315 to perform a left shift. It must have SHIFTOP or more least
16316 significant 0's, with the remainder of the word 1's. */
16319 includes_rldicr_lshift_p (rtx shiftop, rtx andop)
16321 if (GET_CODE (andop) == CONST_INT)
16323 unsigned HOST_WIDE_INT c, lsb, shift_mask;
16325 shift_mask = HOST_WIDE_INT_M1U;
16326 shift_mask <<= INTVAL (shiftop);
16327 c = INTVAL (andop);
16329 /* Find the least significant one bit. */
16330 lsb = c & -c;
16332 /* It must be covered by the shift mask.
16333 This test also rejects c == 0. */
16334 if ((lsb & shift_mask) == 0)
16335 return 0;
16337 /* Check we have all 1's above the transition, and reject all 1's. */
16338 return c == -lsb && lsb != 1;
16340 else
16341 return 0;
16344 /* Return 1 if operands will generate a valid arguments to rlwimi
16345 instruction for insert with right shift in 64-bit mode. The mask may
16346 not start on the first bit or stop on the last bit because wrap-around
16347 effects of instruction do not correspond to semantics of RTL insn. */
16350 insvdi_rshift_rlwimi_p (rtx sizeop, rtx startop, rtx shiftop)
16352 if (INTVAL (startop) > 32
16353 && INTVAL (startop) < 64
16354 && INTVAL (sizeop) > 1
16355 && INTVAL (sizeop) + INTVAL (startop) < 64
16356 && INTVAL (shiftop) > 0
16357 && INTVAL (sizeop) + INTVAL (shiftop) < 32
16358 && (64 - (INTVAL (shiftop) & 63)) >= INTVAL (sizeop))
16359 return 1;
16361 return 0;
16364 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
16365 for lfq and stfq insns iff the registers are hard registers. */
16368 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
16370 /* We might have been passed a SUBREG. */
16371 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
16372 return 0;
16374 /* We might have been passed non floating point registers. */
16375 if (!FP_REGNO_P (REGNO (reg1))
16376 || !FP_REGNO_P (REGNO (reg2)))
16377 return 0;
16379 return (REGNO (reg1) == REGNO (reg2) - 1);
16382 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
16383 addr1 and addr2 must be in consecutive memory locations
16384 (addr2 == addr1 + 8). */
16387 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
16389 rtx addr1, addr2;
16390 unsigned int reg1, reg2;
16391 int offset1, offset2;
16393 /* The mems cannot be volatile. */
16394 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
16395 return 0;
16397 addr1 = XEXP (mem1, 0);
16398 addr2 = XEXP (mem2, 0);
16400 /* Extract an offset (if used) from the first addr. */
16401 if (GET_CODE (addr1) == PLUS)
16403 /* If not a REG, return zero. */
16404 if (GET_CODE (XEXP (addr1, 0)) != REG)
16405 return 0;
16406 else
16408 reg1 = REGNO (XEXP (addr1, 0));
16409 /* The offset must be constant! */
16410 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
16411 return 0;
16412 offset1 = INTVAL (XEXP (addr1, 1));
16415 else if (GET_CODE (addr1) != REG)
16416 return 0;
16417 else
16419 reg1 = REGNO (addr1);
16420 /* This was a simple (mem (reg)) expression. Offset is 0. */
16421 offset1 = 0;
16424 /* And now for the second addr. */
16425 if (GET_CODE (addr2) == PLUS)
16427 /* If not a REG, return zero. */
16428 if (GET_CODE (XEXP (addr2, 0)) != REG)
16429 return 0;
16430 else
16432 reg2 = REGNO (XEXP (addr2, 0));
16433 /* The offset must be constant. */
16434 if (GET_CODE (XEXP (addr2, 1)) != CONST_INT)
16435 return 0;
16436 offset2 = INTVAL (XEXP (addr2, 1));
16439 else if (GET_CODE (addr2) != REG)
16440 return 0;
16441 else
16443 reg2 = REGNO (addr2);
16444 /* This was a simple (mem (reg)) expression. Offset is 0. */
16445 offset2 = 0;
16448 /* Both of these must have the same base register. */
16449 if (reg1 != reg2)
16450 return 0;
16452 /* The offset for the second addr must be 8 more than the first addr. */
16453 if (offset2 != offset1 + 8)
16454 return 0;
16456 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
16457 instructions. */
16458 return 1;
16463 rs6000_secondary_memory_needed_rtx (machine_mode mode)
16465 static bool eliminated = false;
16466 rtx ret;
16468 if (mode != SDmode || TARGET_NO_SDMODE_STACK)
16469 ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
16470 else
16472 rtx mem = cfun->machine->sdmode_stack_slot;
16473 gcc_assert (mem != NULL_RTX);
16475 if (!eliminated)
16477 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
16478 cfun->machine->sdmode_stack_slot = mem;
16479 eliminated = true;
16481 ret = mem;
16484 if (TARGET_DEBUG_ADDR)
16486 fprintf (stderr, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
16487 GET_MODE_NAME (mode));
16488 if (!ret)
16489 fprintf (stderr, "\tNULL_RTX\n");
16490 else
16491 debug_rtx (ret);
16494 return ret;
16497 /* Return the mode to be used for memory when a secondary memory
16498 location is needed. For SDmode values we need to use DDmode, in
16499 all other cases we can use the same mode. */
16500 machine_mode
16501 rs6000_secondary_memory_needed_mode (machine_mode mode)
16503 if (lra_in_progress && mode == SDmode)
16504 return DDmode;
16505 return mode;
16508 static tree
16509 rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
16511 /* Don't walk into types. */
16512 if (*tp == NULL_TREE || *tp == error_mark_node || TYPE_P (*tp))
16514 *walk_subtrees = 0;
16515 return NULL_TREE;
16518 switch (TREE_CODE (*tp))
16520 case VAR_DECL:
16521 case PARM_DECL:
16522 case FIELD_DECL:
16523 case RESULT_DECL:
16524 case SSA_NAME:
16525 case REAL_CST:
16526 case MEM_REF:
16527 case VIEW_CONVERT_EXPR:
16528 if (TYPE_MODE (TREE_TYPE (*tp)) == SDmode)
16529 return *tp;
16530 break;
16531 default:
16532 break;
16535 return NULL_TREE;
16538 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
16539 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
16540 only work on the traditional altivec registers, note if an altivec register
16541 was chosen. */
16543 static enum rs6000_reg_type
16544 register_to_reg_type (rtx reg, bool *is_altivec)
16546 HOST_WIDE_INT regno;
16547 enum reg_class rclass;
16549 if (GET_CODE (reg) == SUBREG)
16550 reg = SUBREG_REG (reg);
16552 if (!REG_P (reg))
16553 return NO_REG_TYPE;
16555 regno = REGNO (reg);
16556 if (regno >= FIRST_PSEUDO_REGISTER)
16558 if (!lra_in_progress && !reload_in_progress && !reload_completed)
16559 return PSEUDO_REG_TYPE;
16561 regno = true_regnum (reg);
16562 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
16563 return PSEUDO_REG_TYPE;
16566 gcc_assert (regno >= 0);
16568 if (is_altivec && ALTIVEC_REGNO_P (regno))
16569 *is_altivec = true;
16571 rclass = rs6000_regno_regclass[regno];
16572 return reg_class_to_reg_type[(int)rclass];
16575 /* Helper function to return the cost of adding a TOC entry address. */
16577 static inline int
16578 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
16580 int ret;
16582 if (TARGET_CMODEL != CMODEL_SMALL)
16583 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
16585 else
16586 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
16588 return ret;
16591 /* Helper function for rs6000_secondary_reload to determine whether the memory
16592 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
16593 needs reloading. Return negative if the memory is not handled by the memory
16594 helper functions and to try a different reload method, 0 if no additional
16595 instructions are need, and positive to give the extra cost for the
16596 memory. */
16598 static int
16599 rs6000_secondary_reload_memory (rtx addr,
16600 enum reg_class rclass,
16601 enum machine_mode mode)
16603 int extra_cost = 0;
16604 rtx reg, and_arg, plus_arg0, plus_arg1;
16605 addr_mask_type addr_mask;
16606 const char *type = NULL;
16607 const char *fail_msg = NULL;
16609 if (GPR_REG_CLASS_P (rclass))
16610 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
16612 else if (rclass == FLOAT_REGS)
16613 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
16615 else if (rclass == ALTIVEC_REGS)
16616 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
16618 /* For the combined VSX_REGS, turn off Altivec AND -16. */
16619 else if (rclass == VSX_REGS)
16620 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
16621 & ~RELOAD_REG_AND_M16);
16623 else
16625 if (TARGET_DEBUG_ADDR)
16626 fprintf (stderr,
16627 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
16628 "class is not GPR, FPR, VMX\n",
16629 GET_MODE_NAME (mode), reg_class_names[rclass]);
16631 return -1;
16634 /* If the register isn't valid in this register class, just return now. */
16635 if ((addr_mask & RELOAD_REG_VALID) == 0)
16637 if (TARGET_DEBUG_ADDR)
16638 fprintf (stderr,
16639 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
16640 "not valid in class\n",
16641 GET_MODE_NAME (mode), reg_class_names[rclass]);
16643 return -1;
16646 switch (GET_CODE (addr))
16648 /* Does the register class supports auto update forms for this mode? We
16649 don't need a scratch register, since the powerpc only supports
16650 PRE_INC, PRE_DEC, and PRE_MODIFY. */
16651 case PRE_INC:
16652 case PRE_DEC:
16653 reg = XEXP (addr, 0);
16654 if (!base_reg_operand (addr, GET_MODE (reg)))
16656 fail_msg = "no base register #1";
16657 extra_cost = -1;
16660 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
16662 extra_cost = 1;
16663 type = "update";
16665 break;
16667 case PRE_MODIFY:
16668 reg = XEXP (addr, 0);
16669 plus_arg1 = XEXP (addr, 1);
16670 if (!base_reg_operand (reg, GET_MODE (reg))
16671 || GET_CODE (plus_arg1) != PLUS
16672 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
16674 fail_msg = "bad PRE_MODIFY";
16675 extra_cost = -1;
16678 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
16680 extra_cost = 1;
16681 type = "update";
16683 break;
16685 /* Do we need to simulate AND -16 to clear the bottom address bits used
16686 in VMX load/stores? Only allow the AND for vector sizes. */
16687 case AND:
16688 and_arg = XEXP (addr, 0);
16689 if (GET_MODE_SIZE (mode) != 16
16690 || GET_CODE (XEXP (addr, 1)) != CONST_INT
16691 || INTVAL (XEXP (addr, 1)) != -16)
16693 fail_msg = "bad Altivec AND #1";
16694 extra_cost = -1;
16697 if (rclass != ALTIVEC_REGS)
16699 if (legitimate_indirect_address_p (and_arg, false))
16700 extra_cost = 1;
16702 else if (legitimate_indexed_address_p (and_arg, false))
16703 extra_cost = 2;
16705 else
16707 fail_msg = "bad Altivec AND #2";
16708 extra_cost = -1;
16711 type = "and";
16713 break;
16715 /* If this is an indirect address, make sure it is a base register. */
16716 case REG:
16717 case SUBREG:
16718 if (!legitimate_indirect_address_p (addr, false))
16720 extra_cost = 1;
16721 type = "move";
16723 break;
16725 /* If this is an indexed address, make sure the register class can handle
16726 indexed addresses for this mode. */
16727 case PLUS:
16728 plus_arg0 = XEXP (addr, 0);
16729 plus_arg1 = XEXP (addr, 1);
16731 /* (plus (plus (reg) (constant)) (constant)) is generated during
16732 push_reload processing, so handle it now. */
16733 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
16735 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
16737 extra_cost = 1;
16738 type = "offset";
16742 /* (plus (plus (reg) (constant)) (reg)) is also generated during
16743 push_reload processing, so handle it now. */
16744 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
16746 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
16748 extra_cost = 1;
16749 type = "indexed #2";
16753 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
16755 fail_msg = "no base register #2";
16756 extra_cost = -1;
16759 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
16761 if ((addr_mask & RELOAD_REG_INDEXED) == 0
16762 || !legitimate_indexed_address_p (addr, false))
16764 extra_cost = 1;
16765 type = "indexed";
16769 /* Make sure the register class can handle offset addresses. */
16770 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
16772 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
16774 extra_cost = 1;
16775 type = "offset";
16779 else
16781 fail_msg = "bad PLUS";
16782 extra_cost = -1;
16785 break;
16787 case LO_SUM:
16788 if (!legitimate_lo_sum_address_p (mode, addr, false))
16790 fail_msg = "bad LO_SUM";
16791 extra_cost = -1;
16794 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
16796 extra_cost = 1;
16797 type = "lo_sum";
16799 break;
16801 /* Static addresses need to create a TOC entry. */
16802 case CONST:
16803 case SYMBOL_REF:
16804 case LABEL_REF:
16805 type = "address";
16806 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
16807 break;
16809 /* TOC references look like offsetable memory. */
16810 case UNSPEC:
16811 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
16813 fail_msg = "bad UNSPEC";
16814 extra_cost = -1;
16817 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
16819 extra_cost = 1;
16820 type = "toc reference";
16822 break;
16824 default:
16826 fail_msg = "bad address";
16827 extra_cost = -1;
16831 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
16833 if (extra_cost < 0)
16834 fprintf (stderr,
16835 "rs6000_secondary_reload_memory error: mode = %s, "
16836 "class = %s, addr_mask = '%s', %s\n",
16837 GET_MODE_NAME (mode),
16838 reg_class_names[rclass],
16839 rs6000_debug_addr_mask (addr_mask, false),
16840 (fail_msg != NULL) ? fail_msg : "<bad address>");
16842 else
16843 fprintf (stderr,
16844 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
16845 "addr_mask = '%s', extra cost = %d, %s\n",
16846 GET_MODE_NAME (mode),
16847 reg_class_names[rclass],
16848 rs6000_debug_addr_mask (addr_mask, false),
16849 extra_cost,
16850 (type) ? type : "<none>");
16852 debug_rtx (addr);
16855 return extra_cost;
16858 /* Helper function for rs6000_secondary_reload to return true if a move to a
16859 different register classe is really a simple move. */
16861 static bool
16862 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
16863 enum rs6000_reg_type from_type,
16864 machine_mode mode)
16866 int size;
16868 /* Add support for various direct moves available. In this function, we only
16869 look at cases where we don't need any extra registers, and one or more
16870 simple move insns are issued. At present, 32-bit integers are not allowed
16871 in FPR/VSX registers. Single precision binary floating is not a simple
16872 move because we need to convert to the single precision memory layout.
16873 The 4-byte SDmode can be moved. */
16874 size = GET_MODE_SIZE (mode);
16875 if (TARGET_DIRECT_MOVE
16876 && ((mode == SDmode) || (TARGET_POWERPC64 && size == 8))
16877 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
16878 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
16879 return true;
16881 else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
16882 && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
16883 || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
16884 return true;
16886 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
16887 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
16888 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
16889 return true;
16891 return false;
16894 /* Power8 helper function for rs6000_secondary_reload, handle all of the
16895 special direct moves that involve allocating an extra register, return the
16896 insn code of the helper function if there is such a function or
16897 CODE_FOR_nothing if not. */
16899 static bool
16900 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
16901 enum rs6000_reg_type from_type,
16902 machine_mode mode,
16903 secondary_reload_info *sri,
16904 bool altivec_p)
16906 bool ret = false;
16907 enum insn_code icode = CODE_FOR_nothing;
16908 int cost = 0;
16909 int size = GET_MODE_SIZE (mode);
16911 if (TARGET_POWERPC64)
16913 if (size == 16)
16915 /* Handle moving 128-bit values from GPRs to VSX point registers on
16916 power8 when running in 64-bit mode using XXPERMDI to glue the two
16917 64-bit values back together. */
16918 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
16920 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
16921 icode = reg_addr[mode].reload_vsx_gpr;
16924 /* Handle moving 128-bit values from VSX point registers to GPRs on
16925 power8 when running in 64-bit mode using XXPERMDI to get access to the
16926 bottom 64-bit value. */
16927 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
16929 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
16930 icode = reg_addr[mode].reload_gpr_vsx;
16934 else if (mode == SFmode)
16936 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
16938 cost = 3; /* xscvdpspn, mfvsrd, and. */
16939 icode = reg_addr[mode].reload_gpr_vsx;
16942 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
16944 cost = 2; /* mtvsrz, xscvspdpn. */
16945 icode = reg_addr[mode].reload_vsx_gpr;
16950 if (TARGET_POWERPC64 && size == 16)
16952 /* Handle moving 128-bit values from GPRs to VSX point registers on
16953 power8 when running in 64-bit mode using XXPERMDI to glue the two
16954 64-bit values back together. */
16955 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
16957 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
16958 icode = reg_addr[mode].reload_vsx_gpr;
16961 /* Handle moving 128-bit values from VSX point registers to GPRs on
16962 power8 when running in 64-bit mode using XXPERMDI to get access to the
16963 bottom 64-bit value. */
16964 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
16966 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
16967 icode = reg_addr[mode].reload_gpr_vsx;
16971 else if (!TARGET_POWERPC64 && size == 8)
16973 /* Handle moving 64-bit values from GPRs to floating point registers on
16974 power8 when running in 32-bit mode using FMRGOW to glue the two 32-bit
16975 values back together. Altivec register classes must be handled
16976 specially since a different instruction is used, and the secondary
16977 reload support requires a single instruction class in the scratch
16978 register constraint. However, right now TFmode is not allowed in
16979 Altivec registers, so the pattern will never match. */
16980 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
16982 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
16983 icode = reg_addr[mode].reload_fpr_gpr;
16987 if (icode != CODE_FOR_nothing)
16989 ret = true;
16990 if (sri)
16992 sri->icode = icode;
16993 sri->extra_cost = cost;
16997 return ret;
17000 /* Return whether a move between two register classes can be done either
17001 directly (simple move) or via a pattern that uses a single extra temporary
17002 (using power8's direct move in this case. */
17004 static bool
17005 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
17006 enum rs6000_reg_type from_type,
17007 machine_mode mode,
17008 secondary_reload_info *sri,
17009 bool altivec_p)
17011 /* Fall back to load/store reloads if either type is not a register. */
17012 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
17013 return false;
17015 /* If we haven't allocated registers yet, assume the move can be done for the
17016 standard register types. */
17017 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
17018 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
17019 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
17020 return true;
17022 /* Moves to the same set of registers is a simple move for non-specialized
17023 registers. */
17024 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
17025 return true;
17027 /* Check whether a simple move can be done directly. */
17028 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
17030 if (sri)
17032 sri->icode = CODE_FOR_nothing;
17033 sri->extra_cost = 0;
17035 return true;
17038 /* Now check if we can do it in a few steps. */
17039 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
17040 altivec_p);
17043 /* Inform reload about cases where moving X with a mode MODE to a register in
17044 RCLASS requires an extra scratch or immediate register. Return the class
17045 needed for the immediate register.
17047 For VSX and Altivec, we may need a register to convert sp+offset into
17048 reg+sp.
17050 For misaligned 64-bit gpr loads and stores we need a register to
17051 convert an offset address to indirect. */
17053 static reg_class_t
17054 rs6000_secondary_reload (bool in_p,
17055 rtx x,
17056 reg_class_t rclass_i,
17057 machine_mode mode,
17058 secondary_reload_info *sri)
17060 enum reg_class rclass = (enum reg_class) rclass_i;
17061 reg_class_t ret = ALL_REGS;
17062 enum insn_code icode;
17063 bool default_p = false;
17064 bool done_p = false;
17066 /* Allow subreg of memory before/during reload. */
17067 bool memory_p = (MEM_P (x)
17068 || (!reload_completed && GET_CODE (x) == SUBREG
17069 && MEM_P (SUBREG_REG (x))));
17071 sri->icode = CODE_FOR_nothing;
17072 sri->extra_cost = 0;
17073 icode = ((in_p)
17074 ? reg_addr[mode].reload_load
17075 : reg_addr[mode].reload_store);
17077 if (REG_P (x) || register_operand (x, mode))
17079 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
17080 bool altivec_p = (rclass == ALTIVEC_REGS);
17081 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
17083 if (!in_p)
17085 enum rs6000_reg_type exchange = to_type;
17086 to_type = from_type;
17087 from_type = exchange;
17090 /* Can we do a direct move of some sort? */
17091 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
17092 altivec_p))
17094 icode = (enum insn_code)sri->icode;
17095 default_p = false;
17096 done_p = true;
17097 ret = NO_REGS;
17101 /* Make sure 0.0 is not reloaded or forced into memory. */
17102 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
17104 ret = NO_REGS;
17105 default_p = false;
17106 done_p = true;
17109 /* If this is a scalar floating point value and we want to load it into the
17110 traditional Altivec registers, do it via a move via a traditional floating
17111 point register. Also make sure that non-zero constants use a FPR. */
17112 if (!done_p && reg_addr[mode].scalar_in_vmx_p
17113 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
17114 && (memory_p || (GET_CODE (x) == CONST_DOUBLE)))
17116 ret = FLOAT_REGS;
17117 default_p = false;
17118 done_p = true;
17121 /* Handle reload of load/stores if we have reload helper functions. */
17122 if (!done_p && icode != CODE_FOR_nothing && memory_p)
17124 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
17125 mode);
17127 if (extra_cost >= 0)
17129 done_p = true;
17130 ret = NO_REGS;
17131 if (extra_cost > 0)
17133 sri->extra_cost = extra_cost;
17134 sri->icode = icode;
17139 /* Handle unaligned loads and stores of integer registers. */
17140 if (!done_p && TARGET_POWERPC64
17141 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
17142 && memory_p
17143 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
17145 rtx addr = XEXP (x, 0);
17146 rtx off = address_offset (addr);
17148 if (off != NULL_RTX)
17150 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
17151 unsigned HOST_WIDE_INT offset = INTVAL (off);
17153 /* We need a secondary reload when our legitimate_address_p
17154 says the address is good (as otherwise the entire address
17155 will be reloaded), and the offset is not a multiple of
17156 four or we have an address wrap. Address wrap will only
17157 occur for LO_SUMs since legitimate_offset_address_p
17158 rejects addresses for 16-byte mems that will wrap. */
17159 if (GET_CODE (addr) == LO_SUM
17160 ? (1 /* legitimate_address_p allows any offset for lo_sum */
17161 && ((offset & 3) != 0
17162 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
17163 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
17164 && (offset & 3) != 0))
17166 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
17167 if (in_p)
17168 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
17169 : CODE_FOR_reload_di_load);
17170 else
17171 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
17172 : CODE_FOR_reload_di_store);
17173 sri->extra_cost = 2;
17174 ret = NO_REGS;
17175 done_p = true;
17177 else
17178 default_p = true;
17180 else
17181 default_p = true;
17184 if (!done_p && !TARGET_POWERPC64
17185 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
17186 && memory_p
17187 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
17189 rtx addr = XEXP (x, 0);
17190 rtx off = address_offset (addr);
17192 if (off != NULL_RTX)
17194 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
17195 unsigned HOST_WIDE_INT offset = INTVAL (off);
17197 /* We need a secondary reload when our legitimate_address_p
17198 says the address is good (as otherwise the entire address
17199 will be reloaded), and we have a wrap.
17201 legitimate_lo_sum_address_p allows LO_SUM addresses to
17202 have any offset so test for wrap in the low 16 bits.
17204 legitimate_offset_address_p checks for the range
17205 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
17206 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
17207 [0x7ff4,0x7fff] respectively, so test for the
17208 intersection of these ranges, [0x7ffc,0x7fff] and
17209 [0x7ff4,0x7ff7] respectively.
17211 Note that the address we see here may have been
17212 manipulated by legitimize_reload_address. */
17213 if (GET_CODE (addr) == LO_SUM
17214 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
17215 : offset - (0x8000 - extra) < UNITS_PER_WORD)
17217 if (in_p)
17218 sri->icode = CODE_FOR_reload_si_load;
17219 else
17220 sri->icode = CODE_FOR_reload_si_store;
17221 sri->extra_cost = 2;
17222 ret = NO_REGS;
17223 done_p = true;
17225 else
17226 default_p = true;
17228 else
17229 default_p = true;
17232 if (!done_p)
17233 default_p = true;
17235 if (default_p)
17236 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
17238 gcc_assert (ret != ALL_REGS);
17240 if (TARGET_DEBUG_ADDR)
17242 fprintf (stderr,
17243 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
17244 "mode = %s",
17245 reg_class_names[ret],
17246 in_p ? "true" : "false",
17247 reg_class_names[rclass],
17248 GET_MODE_NAME (mode));
17250 if (reload_completed)
17251 fputs (", after reload", stderr);
17253 if (!done_p)
17254 fputs (", done_p not set", stderr);
17256 if (default_p)
17257 fputs (", default secondary reload", stderr);
17259 if (sri->icode != CODE_FOR_nothing)
17260 fprintf (stderr, ", reload func = %s, extra cost = %d",
17261 insn_data[sri->icode].name, sri->extra_cost);
17263 fputs ("\n", stderr);
17264 debug_rtx (x);
17267 return ret;
17270 /* Better tracing for rs6000_secondary_reload_inner. */
17272 static void
17273 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
17274 bool store_p)
17276 rtx set, clobber;
17278 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
17280 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
17281 store_p ? "store" : "load");
17283 if (store_p)
17284 set = gen_rtx_SET (VOIDmode, mem, reg);
17285 else
17286 set = gen_rtx_SET (VOIDmode, reg, mem);
17288 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
17289 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
17292 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
17293 ATTRIBUTE_NORETURN;
17295 static void
17296 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
17297 bool store_p)
17299 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
17300 gcc_unreachable ();
17303 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
17304 reload helper functions. These were identified in
17305 rs6000_secondary_reload_memory, and if reload decided to use the secondary
17306 reload, it calls the insns:
17307 reload_<RELOAD:mode>_<P:mptrsize>_store
17308 reload_<RELOAD:mode>_<P:mptrsize>_load
17310 which in turn calls this function, to do whatever is necessary to create
17311 valid addresses. */
17313 void
17314 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
17316 int regno = true_regnum (reg);
17317 machine_mode mode = GET_MODE (reg);
17318 addr_mask_type addr_mask;
17319 rtx addr;
17320 rtx new_addr;
17321 rtx op_reg, op0, op1;
17322 rtx and_op;
17323 rtx cc_clobber;
17324 rtvec rv;
17326 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER || !MEM_P (mem)
17327 || !base_reg_operand (scratch, GET_MODE (scratch)))
17328 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17330 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
17331 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
17333 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
17334 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
17336 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
17337 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
17339 else
17340 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17342 /* Make sure the mode is valid in this register class. */
17343 if ((addr_mask & RELOAD_REG_VALID) == 0)
17344 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17346 if (TARGET_DEBUG_ADDR)
17347 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
17349 new_addr = addr = XEXP (mem, 0);
17350 switch (GET_CODE (addr))
17352 /* Does the register class support auto update forms for this mode? If
17353 not, do the update now. We don't need a scratch register, since the
17354 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
17355 case PRE_INC:
17356 case PRE_DEC:
17357 op_reg = XEXP (addr, 0);
17358 if (!base_reg_operand (op_reg, Pmode))
17359 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17361 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
17363 emit_insn (gen_add2_insn (op_reg, GEN_INT (GET_MODE_SIZE (mode))));
17364 new_addr = op_reg;
17366 break;
17368 case PRE_MODIFY:
17369 op0 = XEXP (addr, 0);
17370 op1 = XEXP (addr, 1);
17371 if (!base_reg_operand (op0, Pmode)
17372 || GET_CODE (op1) != PLUS
17373 || !rtx_equal_p (op0, XEXP (op1, 0)))
17374 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17376 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
17378 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17379 new_addr = reg;
17381 break;
17383 /* Do we need to simulate AND -16 to clear the bottom address bits used
17384 in VMX load/stores? */
17385 case AND:
17386 op0 = XEXP (addr, 0);
17387 op1 = XEXP (addr, 1);
17388 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
17390 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
17391 op_reg = op0;
17393 else if (GET_CODE (op1) == PLUS)
17395 emit_insn (gen_rtx_SET (VOIDmode, scratch, op1));
17396 op_reg = scratch;
17399 else
17400 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17402 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
17403 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
17404 rv = gen_rtvec (2, gen_rtx_SET (VOIDmode, scratch, and_op), cc_clobber);
17405 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
17406 new_addr = scratch;
17408 break;
17410 /* If this is an indirect address, make sure it is a base register. */
17411 case REG:
17412 case SUBREG:
17413 if (!base_reg_operand (addr, GET_MODE (addr)))
17415 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17416 new_addr = scratch;
17418 break;
17420 /* If this is an indexed address, make sure the register class can handle
17421 indexed addresses for this mode. */
17422 case PLUS:
17423 op0 = XEXP (addr, 0);
17424 op1 = XEXP (addr, 1);
17425 if (!base_reg_operand (op0, Pmode))
17426 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17428 else if (int_reg_operand (op1, Pmode))
17430 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
17432 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17433 new_addr = scratch;
17437 /* Make sure the register class can handle offset addresses. */
17438 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
17440 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
17442 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17443 new_addr = scratch;
17447 else
17448 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17450 break;
17452 case LO_SUM:
17453 op0 = XEXP (addr, 0);
17454 op1 = XEXP (addr, 1);
17455 if (!base_reg_operand (op0, Pmode))
17456 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17458 else if (int_reg_operand (op1, Pmode))
17460 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
17462 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17463 new_addr = scratch;
17467 /* Make sure the register class can handle offset addresses. */
17468 else if (legitimate_lo_sum_address_p (mode, addr, false))
17470 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
17472 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17473 new_addr = scratch;
17477 else
17478 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17480 break;
17482 case SYMBOL_REF:
17483 case CONST:
17484 case LABEL_REF:
17485 rs6000_emit_move (scratch, addr, Pmode);
17486 new_addr = scratch;
17487 break;
17489 default:
17490 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17493 /* Adjust the address if it changed. */
17494 if (addr != new_addr)
17496 mem = replace_equiv_address_nv (mem, new_addr);
17497 if (TARGET_DEBUG_ADDR)
17498 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
17501 /* Now create the move. */
17502 if (store_p)
17503 emit_insn (gen_rtx_SET (VOIDmode, mem, reg));
17504 else
17505 emit_insn (gen_rtx_SET (VOIDmode, reg, mem));
17507 return;
17510 /* Convert reloads involving 64-bit gprs and misaligned offset
17511 addressing, or multiple 32-bit gprs and offsets that are too large,
17512 to use indirect addressing. */
17514 void
17515 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
17517 int regno = true_regnum (reg);
17518 enum reg_class rclass;
17519 rtx addr;
17520 rtx scratch_or_premodify = scratch;
17522 if (TARGET_DEBUG_ADDR)
17524 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
17525 store_p ? "store" : "load");
17526 fprintf (stderr, "reg:\n");
17527 debug_rtx (reg);
17528 fprintf (stderr, "mem:\n");
17529 debug_rtx (mem);
17530 fprintf (stderr, "scratch:\n");
17531 debug_rtx (scratch);
17534 gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
17535 gcc_assert (GET_CODE (mem) == MEM);
17536 rclass = REGNO_REG_CLASS (regno);
17537 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
17538 addr = XEXP (mem, 0);
17540 if (GET_CODE (addr) == PRE_MODIFY)
17542 scratch_or_premodify = XEXP (addr, 0);
17543 gcc_assert (REG_P (scratch_or_premodify));
17544 addr = XEXP (addr, 1);
17546 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
17548 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
17550 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
17552 /* Now create the move. */
17553 if (store_p)
17554 emit_insn (gen_rtx_SET (VOIDmode, mem, reg));
17555 else
17556 emit_insn (gen_rtx_SET (VOIDmode, reg, mem));
17558 return;
17561 /* Allocate a 64-bit stack slot to be used for copying SDmode values through if
17562 this function has any SDmode references. If we are on a power7 or later, we
17563 don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
17564 can load/store the value. */
17566 static void
17567 rs6000_alloc_sdmode_stack_slot (void)
17569 tree t;
17570 basic_block bb;
17571 gimple_stmt_iterator gsi;
17573 gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
17574 /* We use a different approach for dealing with the secondary
17575 memory in LRA. */
17576 if (ira_use_lra_p)
17577 return;
17579 if (TARGET_NO_SDMODE_STACK)
17580 return;
17582 FOR_EACH_BB_FN (bb, cfun)
17583 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
17585 tree ret = walk_gimple_op (gsi_stmt (gsi), rs6000_check_sdmode, NULL);
17586 if (ret)
17588 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
17589 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
17590 SDmode, 0);
17591 return;
17595 /* Check for any SDmode parameters of the function. */
17596 for (t = DECL_ARGUMENTS (cfun->decl); t; t = DECL_CHAIN (t))
17598 if (TREE_TYPE (t) == error_mark_node)
17599 continue;
17601 if (TYPE_MODE (TREE_TYPE (t)) == SDmode
17602 || TYPE_MODE (DECL_ARG_TYPE (t)) == SDmode)
17604 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
17605 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
17606 SDmode, 0);
17607 return;
17612 static void
17613 rs6000_instantiate_decls (void)
17615 if (cfun->machine->sdmode_stack_slot != NULL_RTX)
17616 instantiate_decl_rtl (cfun->machine->sdmode_stack_slot);
17619 /* Given an rtx X being reloaded into a reg required to be
17620 in class CLASS, return the class of reg to actually use.
17621 In general this is just CLASS; but on some machines
17622 in some cases it is preferable to use a more restrictive class.
17624 On the RS/6000, we have to return NO_REGS when we want to reload a
17625 floating-point CONST_DOUBLE to force it to be copied to memory.
17627 We also don't want to reload integer values into floating-point
17628 registers if we can at all help it. In fact, this can
17629 cause reload to die, if it tries to generate a reload of CTR
17630 into a FP register and discovers it doesn't have the memory location
17631 required.
17633 ??? Would it be a good idea to have reload do the converse, that is
17634 try to reload floating modes into FP registers if possible?
17637 static enum reg_class
17638 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
17640 machine_mode mode = GET_MODE (x);
17641 bool is_constant = CONSTANT_P (x);
17643 /* Do VSX tests before handling traditional floaitng point registers. */
17644 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
17646 if (is_constant)
17648 /* Zero is always allowed in all VSX registers. */
17649 if (x == CONST0_RTX (mode))
17650 return rclass;
17652 /* If this is a vector constant that can be formed with a few Altivec
17653 instructions, we want altivec registers. */
17654 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
17655 return ALTIVEC_REGS;
17657 /* Force constant to memory. */
17658 return NO_REGS;
17661 /* If this is a scalar floating point value, prefer the traditional
17662 floating point registers so that we can use D-form (register+offset)
17663 addressing. */
17664 if (GET_MODE_SIZE (mode) < 16)
17665 return FLOAT_REGS;
17667 /* Prefer the Altivec registers if Altivec is handling the vector
17668 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
17669 loads. */
17670 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
17671 || mode == V1TImode)
17672 return ALTIVEC_REGS;
17674 return rclass;
17677 if (is_constant || GET_CODE (x) == PLUS)
17679 if (reg_class_subset_p (GENERAL_REGS, rclass))
17680 return GENERAL_REGS;
17681 if (reg_class_subset_p (BASE_REGS, rclass))
17682 return BASE_REGS;
17683 return NO_REGS;
17686 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
17687 return GENERAL_REGS;
17689 return rclass;
17692 /* Debug version of rs6000_preferred_reload_class. */
17693 static enum reg_class
17694 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
17696 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
17698 fprintf (stderr,
17699 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
17700 "mode = %s, x:\n",
17701 reg_class_names[ret], reg_class_names[rclass],
17702 GET_MODE_NAME (GET_MODE (x)));
17703 debug_rtx (x);
17705 return ret;
17708 /* If we are copying between FP or AltiVec registers and anything else, we need
17709 a memory location. The exception is when we are targeting ppc64 and the
17710 move to/from fpr to gpr instructions are available. Also, under VSX, you
17711 can copy vector registers from the FP register set to the Altivec register
17712 set and vice versa. */
17714 static bool
17715 rs6000_secondary_memory_needed (enum reg_class from_class,
17716 enum reg_class to_class,
17717 machine_mode mode)
17719 enum rs6000_reg_type from_type, to_type;
17720 bool altivec_p = ((from_class == ALTIVEC_REGS)
17721 || (to_class == ALTIVEC_REGS));
17723 /* If a simple/direct move is available, we don't need secondary memory */
17724 from_type = reg_class_to_reg_type[(int)from_class];
17725 to_type = reg_class_to_reg_type[(int)to_class];
17727 if (rs6000_secondary_reload_move (to_type, from_type, mode,
17728 (secondary_reload_info *)0, altivec_p))
17729 return false;
17731 /* If we have a floating point or vector register class, we need to use
17732 memory to transfer the data. */
17733 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
17734 return true;
17736 return false;
17739 /* Debug version of rs6000_secondary_memory_needed. */
17740 static bool
17741 rs6000_debug_secondary_memory_needed (enum reg_class from_class,
17742 enum reg_class to_class,
17743 machine_mode mode)
17745 bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
17747 fprintf (stderr,
17748 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
17749 "to_class = %s, mode = %s\n",
17750 ret ? "true" : "false",
17751 reg_class_names[from_class],
17752 reg_class_names[to_class],
17753 GET_MODE_NAME (mode));
17755 return ret;
17758 /* Return the register class of a scratch register needed to copy IN into
17759 or out of a register in RCLASS in MODE. If it can be done directly,
17760 NO_REGS is returned. */
17762 static enum reg_class
17763 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
17764 rtx in)
17766 int regno;
17768 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
17769 #if TARGET_MACHO
17770 && MACHOPIC_INDIRECT
17771 #endif
17774 /* We cannot copy a symbolic operand directly into anything
17775 other than BASE_REGS for TARGET_ELF. So indicate that a
17776 register from BASE_REGS is needed as an intermediate
17777 register.
17779 On Darwin, pic addresses require a load from memory, which
17780 needs a base register. */
17781 if (rclass != BASE_REGS
17782 && (GET_CODE (in) == SYMBOL_REF
17783 || GET_CODE (in) == HIGH
17784 || GET_CODE (in) == LABEL_REF
17785 || GET_CODE (in) == CONST))
17786 return BASE_REGS;
17789 if (GET_CODE (in) == REG)
17791 regno = REGNO (in);
17792 if (regno >= FIRST_PSEUDO_REGISTER)
17794 regno = true_regnum (in);
17795 if (regno >= FIRST_PSEUDO_REGISTER)
17796 regno = -1;
17799 else if (GET_CODE (in) == SUBREG)
17801 regno = true_regnum (in);
17802 if (regno >= FIRST_PSEUDO_REGISTER)
17803 regno = -1;
17805 else
17806 regno = -1;
17808 /* If we have VSX register moves, prefer moving scalar values between
17809 Altivec registers and GPR by going via an FPR (and then via memory)
17810 instead of reloading the secondary memory address for Altivec moves. */
17811 if (TARGET_VSX
17812 && GET_MODE_SIZE (mode) < 16
17813 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
17814 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
17815 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
17816 && (regno >= 0 && INT_REGNO_P (regno)))))
17817 return FLOAT_REGS;
17819 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
17820 into anything. */
17821 if (rclass == GENERAL_REGS || rclass == BASE_REGS
17822 || (regno >= 0 && INT_REGNO_P (regno)))
17823 return NO_REGS;
17825 /* Constants, memory, and VSX registers can go into VSX registers (both the
17826 traditional floating point and the altivec registers). */
17827 if (rclass == VSX_REGS
17828 && (regno == -1 || VSX_REGNO_P (regno)))
17829 return NO_REGS;
17831 /* Constants, memory, and FP registers can go into FP registers. */
17832 if ((regno == -1 || FP_REGNO_P (regno))
17833 && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
17834 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
17836 /* Memory, and AltiVec registers can go into AltiVec registers. */
17837 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
17838 && rclass == ALTIVEC_REGS)
17839 return NO_REGS;
17841 /* We can copy among the CR registers. */
17842 if ((rclass == CR_REGS || rclass == CR0_REGS)
17843 && regno >= 0 && CR_REGNO_P (regno))
17844 return NO_REGS;
17846 /* Otherwise, we need GENERAL_REGS. */
17847 return GENERAL_REGS;
17850 /* Debug version of rs6000_secondary_reload_class. */
17851 static enum reg_class
17852 rs6000_debug_secondary_reload_class (enum reg_class rclass,
17853 machine_mode mode, rtx in)
17855 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
17856 fprintf (stderr,
17857 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
17858 "mode = %s, input rtx:\n",
17859 reg_class_names[ret], reg_class_names[rclass],
17860 GET_MODE_NAME (mode));
17861 debug_rtx (in);
17863 return ret;
17866 /* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
17868 static bool
17869 rs6000_cannot_change_mode_class (machine_mode from,
17870 machine_mode to,
17871 enum reg_class rclass)
17873 unsigned from_size = GET_MODE_SIZE (from);
17874 unsigned to_size = GET_MODE_SIZE (to);
17876 if (from_size != to_size)
17878 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
17880 if (reg_classes_intersect_p (xclass, rclass))
17882 unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to];
17883 unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from];
17885 /* Don't allow 64-bit types to overlap with 128-bit types that take a
17886 single register under VSX because the scalar part of the register
17887 is in the upper 64-bits, and not the lower 64-bits. Types like
17888 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
17889 IEEE floating point can't overlap, and neither can small
17890 values. */
17892 if (TARGET_IEEEQUAD && (to == TFmode || from == TFmode))
17893 return true;
17895 /* TDmode in floating-mode registers must always go into a register
17896 pair with the most significant word in the even-numbered register
17897 to match ISA requirements. In little-endian mode, this does not
17898 match subreg numbering, so we cannot allow subregs. */
17899 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
17900 return true;
17902 if (from_size < 8 || to_size < 8)
17903 return true;
17905 if (from_size == 8 && (8 * to_nregs) != to_size)
17906 return true;
17908 if (to_size == 8 && (8 * from_nregs) != from_size)
17909 return true;
17911 return false;
17913 else
17914 return false;
17917 if (TARGET_E500_DOUBLE
17918 && ((((to) == DFmode) + ((from) == DFmode)) == 1
17919 || (((to) == TFmode) + ((from) == TFmode)) == 1
17920 || (((to) == DDmode) + ((from) == DDmode)) == 1
17921 || (((to) == TDmode) + ((from) == TDmode)) == 1
17922 || (((to) == DImode) + ((from) == DImode)) == 1))
17923 return true;
17925 /* Since the VSX register set includes traditional floating point registers
17926 and altivec registers, just check for the size being different instead of
17927 trying to check whether the modes are vector modes. Otherwise it won't
17928 allow say DF and DI to change classes. For types like TFmode and TDmode
17929 that take 2 64-bit registers, rather than a single 128-bit register, don't
17930 allow subregs of those types to other 128 bit types. */
17931 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
17933 unsigned num_regs = (from_size + 15) / 16;
17934 if (hard_regno_nregs[FIRST_FPR_REGNO][to] > num_regs
17935 || hard_regno_nregs[FIRST_FPR_REGNO][from] > num_regs)
17936 return true;
17938 return (from_size != 8 && from_size != 16);
17941 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
17942 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
17943 return true;
17945 if (TARGET_SPE && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1
17946 && reg_classes_intersect_p (GENERAL_REGS, rclass))
17947 return true;
17949 return false;
17952 /* Debug version of rs6000_cannot_change_mode_class. */
17953 static bool
17954 rs6000_debug_cannot_change_mode_class (machine_mode from,
17955 machine_mode to,
17956 enum reg_class rclass)
17958 bool ret = rs6000_cannot_change_mode_class (from, to, rclass);
17960 fprintf (stderr,
17961 "rs6000_cannot_change_mode_class, return %s, from = %s, "
17962 "to = %s, rclass = %s\n",
17963 ret ? "true" : "false",
17964 GET_MODE_NAME (from), GET_MODE_NAME (to),
17965 reg_class_names[rclass]);
17967 return ret;
17970 /* Return a string to do a move operation of 128 bits of data. */
17972 const char *
17973 rs6000_output_move_128bit (rtx operands[])
17975 rtx dest = operands[0];
17976 rtx src = operands[1];
17977 machine_mode mode = GET_MODE (dest);
17978 int dest_regno;
17979 int src_regno;
17980 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
17981 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
17983 if (REG_P (dest))
17985 dest_regno = REGNO (dest);
17986 dest_gpr_p = INT_REGNO_P (dest_regno);
17987 dest_fp_p = FP_REGNO_P (dest_regno);
17988 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
17989 dest_vsx_p = dest_fp_p | dest_vmx_p;
17991 else
17993 dest_regno = -1;
17994 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
17997 if (REG_P (src))
17999 src_regno = REGNO (src);
18000 src_gpr_p = INT_REGNO_P (src_regno);
18001 src_fp_p = FP_REGNO_P (src_regno);
18002 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
18003 src_vsx_p = src_fp_p | src_vmx_p;
18005 else
18007 src_regno = -1;
18008 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
18011 /* Register moves. */
18012 if (dest_regno >= 0 && src_regno >= 0)
18014 if (dest_gpr_p)
18016 if (src_gpr_p)
18017 return "#";
18019 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
18020 return "#";
18023 else if (TARGET_VSX && dest_vsx_p)
18025 if (src_vsx_p)
18026 return "xxlor %x0,%x1,%x1";
18028 else if (TARGET_DIRECT_MOVE && src_gpr_p)
18029 return "#";
18032 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
18033 return "vor %0,%1,%1";
18035 else if (dest_fp_p && src_fp_p)
18036 return "#";
18039 /* Loads. */
18040 else if (dest_regno >= 0 && MEM_P (src))
18042 if (dest_gpr_p)
18044 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
18045 return "lq %0,%1";
18046 else
18047 return "#";
18050 else if (TARGET_ALTIVEC && dest_vmx_p
18051 && altivec_indexed_or_indirect_operand (src, mode))
18052 return "lvx %0,%y1";
18054 else if (TARGET_VSX && dest_vsx_p)
18056 if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
18057 return "lxvw4x %x0,%y1";
18058 else
18059 return "lxvd2x %x0,%y1";
18062 else if (TARGET_ALTIVEC && dest_vmx_p)
18063 return "lvx %0,%y1";
18065 else if (dest_fp_p)
18066 return "#";
18069 /* Stores. */
18070 else if (src_regno >= 0 && MEM_P (dest))
18072 if (src_gpr_p)
18074 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
18075 return "stq %1,%0";
18076 else
18077 return "#";
18080 else if (TARGET_ALTIVEC && src_vmx_p
18081 && altivec_indexed_or_indirect_operand (src, mode))
18082 return "stvx %1,%y0";
18084 else if (TARGET_VSX && src_vsx_p)
18086 if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
18087 return "stxvw4x %x1,%y0";
18088 else
18089 return "stxvd2x %x1,%y0";
18092 else if (TARGET_ALTIVEC && src_vmx_p)
18093 return "stvx %1,%y0";
18095 else if (src_fp_p)
18096 return "#";
18099 /* Constants. */
18100 else if (dest_regno >= 0
18101 && (GET_CODE (src) == CONST_INT
18102 || GET_CODE (src) == CONST_WIDE_INT
18103 || GET_CODE (src) == CONST_DOUBLE
18104 || GET_CODE (src) == CONST_VECTOR))
18106 if (dest_gpr_p)
18107 return "#";
18109 else if (TARGET_VSX && dest_vsx_p && zero_constant (src, mode))
18110 return "xxlxor %x0,%x0,%x0";
18112 else if (TARGET_ALTIVEC && dest_vmx_p)
18113 return output_vec_const_move (operands);
18116 if (TARGET_DEBUG_ADDR)
18118 fprintf (stderr, "\n===== Bad 128 bit move:\n");
18119 debug_rtx (gen_rtx_SET (VOIDmode, dest, src));
18122 gcc_unreachable ();
18125 /* Validate a 128-bit move. */
18126 bool
18127 rs6000_move_128bit_ok_p (rtx operands[])
18129 machine_mode mode = GET_MODE (operands[0]);
18130 return (gpc_reg_operand (operands[0], mode)
18131 || gpc_reg_operand (operands[1], mode));
18134 /* Return true if a 128-bit move needs to be split. */
18135 bool
18136 rs6000_split_128bit_ok_p (rtx operands[])
18138 if (!reload_completed)
18139 return false;
18141 if (!gpr_or_gpr_p (operands[0], operands[1]))
18142 return false;
18144 if (quad_load_store_p (operands[0], operands[1]))
18145 return false;
18147 return true;
18151 /* Given a comparison operation, return the bit number in CCR to test. We
18152 know this is a valid comparison.
18154 SCC_P is 1 if this is for an scc. That means that %D will have been
18155 used instead of %C, so the bits will be in different places.
18157 Return -1 if OP isn't a valid comparison for some reason. */
18160 ccr_bit (rtx op, int scc_p)
18162 enum rtx_code code = GET_CODE (op);
18163 machine_mode cc_mode;
18164 int cc_regnum;
18165 int base_bit;
18166 rtx reg;
18168 if (!COMPARISON_P (op))
18169 return -1;
18171 reg = XEXP (op, 0);
18173 gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg)));
18175 cc_mode = GET_MODE (reg);
18176 cc_regnum = REGNO (reg);
18177 base_bit = 4 * (cc_regnum - CR0_REGNO);
18179 validate_condition_mode (code, cc_mode);
18181 /* When generating a sCOND operation, only positive conditions are
18182 allowed. */
18183 gcc_assert (!scc_p
18184 || code == EQ || code == GT || code == LT || code == UNORDERED
18185 || code == GTU || code == LTU);
18187 switch (code)
18189 case NE:
18190 return scc_p ? base_bit + 3 : base_bit + 2;
18191 case EQ:
18192 return base_bit + 2;
18193 case GT: case GTU: case UNLE:
18194 return base_bit + 1;
18195 case LT: case LTU: case UNGE:
18196 return base_bit;
18197 case ORDERED: case UNORDERED:
18198 return base_bit + 3;
18200 case GE: case GEU:
18201 /* If scc, we will have done a cror to put the bit in the
18202 unordered position. So test that bit. For integer, this is ! LT
18203 unless this is an scc insn. */
18204 return scc_p ? base_bit + 3 : base_bit;
18206 case LE: case LEU:
18207 return scc_p ? base_bit + 3 : base_bit + 1;
18209 default:
18210 gcc_unreachable ();
18214 /* Return the GOT register. */
18217 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
18219 /* The second flow pass currently (June 1999) can't update
18220 regs_ever_live without disturbing other parts of the compiler, so
18221 update it here to make the prolog/epilogue code happy. */
18222 if (!can_create_pseudo_p ()
18223 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
18224 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
18226 crtl->uses_pic_offset_table = 1;
18228 return pic_offset_table_rtx;
18231 static rs6000_stack_t stack_info;
18233 /* Function to init struct machine_function.
18234 This will be called, via a pointer variable,
18235 from push_function_context. */
18237 static struct machine_function *
18238 rs6000_init_machine_status (void)
18240 stack_info.reload_completed = 0;
18241 return ggc_cleared_alloc<machine_function> ();
18244 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
18247 extract_MB (rtx op)
18249 int i;
18250 unsigned long val = INTVAL (op);
18252 /* If the high bit is zero, the value is the first 1 bit we find
18253 from the left. */
18254 if ((val & 0x80000000) == 0)
18256 gcc_assert (val & 0xffffffff);
18258 i = 1;
18259 while (((val <<= 1) & 0x80000000) == 0)
18260 ++i;
18261 return i;
18264 /* If the high bit is set and the low bit is not, or the mask is all
18265 1's, the value is zero. */
18266 if ((val & 1) == 0 || (val & 0xffffffff) == 0xffffffff)
18267 return 0;
18269 /* Otherwise we have a wrap-around mask. Look for the first 0 bit
18270 from the right. */
18271 i = 31;
18272 while (((val >>= 1) & 1) != 0)
18273 --i;
18275 return i;
18279 extract_ME (rtx op)
18281 int i;
18282 unsigned long val = INTVAL (op);
18284 /* If the low bit is zero, the value is the first 1 bit we find from
18285 the right. */
18286 if ((val & 1) == 0)
18288 gcc_assert (val & 0xffffffff);
18290 i = 30;
18291 while (((val >>= 1) & 1) == 0)
18292 --i;
18294 return i;
18297 /* If the low bit is set and the high bit is not, or the mask is all
18298 1's, the value is 31. */
18299 if ((val & 0x80000000) == 0 || (val & 0xffffffff) == 0xffffffff)
18300 return 31;
18302 /* Otherwise we have a wrap-around mask. Look for the first 0 bit
18303 from the left. */
18304 i = 0;
18305 while (((val <<= 1) & 0x80000000) != 0)
18306 ++i;
18308 return i;
18311 /* Write out a function code label. */
18313 void
18314 rs6000_output_function_entry (FILE *file, const char *fname)
18316 if (fname[0] != '.')
18318 switch (DEFAULT_ABI)
18320 default:
18321 gcc_unreachable ();
18323 case ABI_AIX:
18324 if (DOT_SYMBOLS)
18325 putc ('.', file);
18326 else
18327 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
18328 break;
18330 case ABI_ELFv2:
18331 case ABI_V4:
18332 case ABI_DARWIN:
18333 break;
18337 RS6000_OUTPUT_BASENAME (file, fname);
18340 /* Print an operand. Recognize special options, documented below. */
18342 #if TARGET_ELF
18343 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
18344 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
18345 #else
18346 #define SMALL_DATA_RELOC "sda21"
18347 #define SMALL_DATA_REG 0
18348 #endif
18350 void
18351 print_operand (FILE *file, rtx x, int code)
18353 int i;
18354 unsigned HOST_WIDE_INT uval;
18356 switch (code)
18358 /* %a is output_address. */
18360 case 'b':
18361 /* If constant, low-order 16 bits of constant, unsigned.
18362 Otherwise, write normally. */
18363 if (INT_P (x))
18364 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0xffff);
18365 else
18366 print_operand (file, x, 0);
18367 return;
18369 case 'B':
18370 /* If the low-order bit is zero, write 'r'; otherwise, write 'l'
18371 for 64-bit mask direction. */
18372 putc (((INTVAL (x) & 1) == 0 ? 'r' : 'l'), file);
18373 return;
18375 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
18376 output_operand. */
18378 case 'D':
18379 /* Like 'J' but get to the GT bit only. */
18380 gcc_assert (REG_P (x));
18382 /* Bit 1 is GT bit. */
18383 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
18385 /* Add one for shift count in rlinm for scc. */
18386 fprintf (file, "%d", i + 1);
18387 return;
18389 case 'e':
18390 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
18391 if (! INT_P (x))
18393 output_operand_lossage ("invalid %%e value");
18394 return;
18397 uval = INTVAL (x);
18398 if ((uval & 0xffff) == 0 && uval != 0)
18399 putc ('s', file);
18400 return;
18402 case 'E':
18403 /* X is a CR register. Print the number of the EQ bit of the CR */
18404 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
18405 output_operand_lossage ("invalid %%E value");
18406 else
18407 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
18408 return;
18410 case 'f':
18411 /* X is a CR register. Print the shift count needed to move it
18412 to the high-order four bits. */
18413 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
18414 output_operand_lossage ("invalid %%f value");
18415 else
18416 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
18417 return;
18419 case 'F':
18420 /* Similar, but print the count for the rotate in the opposite
18421 direction. */
18422 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
18423 output_operand_lossage ("invalid %%F value");
18424 else
18425 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
18426 return;
18428 case 'G':
18429 /* X is a constant integer. If it is negative, print "m",
18430 otherwise print "z". This is to make an aze or ame insn. */
18431 if (GET_CODE (x) != CONST_INT)
18432 output_operand_lossage ("invalid %%G value");
18433 else if (INTVAL (x) >= 0)
18434 putc ('z', file);
18435 else
18436 putc ('m', file);
18437 return;
18439 case 'h':
18440 /* If constant, output low-order five bits. Otherwise, write
18441 normally. */
18442 if (INT_P (x))
18443 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
18444 else
18445 print_operand (file, x, 0);
18446 return;
18448 case 'H':
18449 /* If constant, output low-order six bits. Otherwise, write
18450 normally. */
18451 if (INT_P (x))
18452 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
18453 else
18454 print_operand (file, x, 0);
18455 return;
18457 case 'I':
18458 /* Print `i' if this is a constant, else nothing. */
18459 if (INT_P (x))
18460 putc ('i', file);
18461 return;
18463 case 'j':
18464 /* Write the bit number in CCR for jump. */
18465 i = ccr_bit (x, 0);
18466 if (i == -1)
18467 output_operand_lossage ("invalid %%j code");
18468 else
18469 fprintf (file, "%d", i);
18470 return;
18472 case 'J':
18473 /* Similar, but add one for shift count in rlinm for scc and pass
18474 scc flag to `ccr_bit'. */
18475 i = ccr_bit (x, 1);
18476 if (i == -1)
18477 output_operand_lossage ("invalid %%J code");
18478 else
18479 /* If we want bit 31, write a shift count of zero, not 32. */
18480 fprintf (file, "%d", i == 31 ? 0 : i + 1);
18481 return;
18483 case 'k':
18484 /* X must be a constant. Write the 1's complement of the
18485 constant. */
18486 if (! INT_P (x))
18487 output_operand_lossage ("invalid %%k value");
18488 else
18489 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
18490 return;
18492 case 'K':
18493 /* X must be a symbolic constant on ELF. Write an
18494 expression suitable for an 'addi' that adds in the low 16
18495 bits of the MEM. */
18496 if (GET_CODE (x) == CONST)
18498 if (GET_CODE (XEXP (x, 0)) != PLUS
18499 || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
18500 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
18501 || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
18502 output_operand_lossage ("invalid %%K value");
18504 print_operand_address (file, x);
18505 fputs ("@l", file);
18506 return;
18508 /* %l is output_asm_label. */
18510 case 'L':
18511 /* Write second word of DImode or DFmode reference. Works on register
18512 or non-indexed memory only. */
18513 if (REG_P (x))
18514 fputs (reg_names[REGNO (x) + 1], file);
18515 else if (MEM_P (x))
18517 /* Handle possible auto-increment. Since it is pre-increment and
18518 we have already done it, we can just use an offset of word. */
18519 if (GET_CODE (XEXP (x, 0)) == PRE_INC
18520 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
18521 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
18522 UNITS_PER_WORD));
18523 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
18524 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
18525 UNITS_PER_WORD));
18526 else
18527 output_address (XEXP (adjust_address_nv (x, SImode,
18528 UNITS_PER_WORD),
18529 0));
18531 if (small_data_operand (x, GET_MODE (x)))
18532 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
18533 reg_names[SMALL_DATA_REG]);
18535 return;
18537 case 'm':
18538 /* MB value for a mask operand. */
18539 if (! mask_operand (x, SImode))
18540 output_operand_lossage ("invalid %%m value");
18542 fprintf (file, "%d", extract_MB (x));
18543 return;
18545 case 'M':
18546 /* ME value for a mask operand. */
18547 if (! mask_operand (x, SImode))
18548 output_operand_lossage ("invalid %%M value");
18550 fprintf (file, "%d", extract_ME (x));
18551 return;
18553 /* %n outputs the negative of its operand. */
18555 case 'N':
18556 /* Write the number of elements in the vector times 4. */
18557 if (GET_CODE (x) != PARALLEL)
18558 output_operand_lossage ("invalid %%N value");
18559 else
18560 fprintf (file, "%d", XVECLEN (x, 0) * 4);
18561 return;
18563 case 'O':
18564 /* Similar, but subtract 1 first. */
18565 if (GET_CODE (x) != PARALLEL)
18566 output_operand_lossage ("invalid %%O value");
18567 else
18568 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
18569 return;
18571 case 'p':
18572 /* X is a CONST_INT that is a power of two. Output the logarithm. */
18573 if (! INT_P (x)
18574 || INTVAL (x) < 0
18575 || (i = exact_log2 (INTVAL (x))) < 0)
18576 output_operand_lossage ("invalid %%p value");
18577 else
18578 fprintf (file, "%d", i);
18579 return;
18581 case 'P':
18582 /* The operand must be an indirect memory reference. The result
18583 is the register name. */
18584 if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG
18585 || REGNO (XEXP (x, 0)) >= 32)
18586 output_operand_lossage ("invalid %%P value");
18587 else
18588 fputs (reg_names[REGNO (XEXP (x, 0))], file);
18589 return;
18591 case 'q':
18592 /* This outputs the logical code corresponding to a boolean
18593 expression. The expression may have one or both operands
18594 negated (if one, only the first one). For condition register
18595 logical operations, it will also treat the negated
18596 CR codes as NOTs, but not handle NOTs of them. */
18598 const char *const *t = 0;
18599 const char *s;
18600 enum rtx_code code = GET_CODE (x);
18601 static const char * const tbl[3][3] = {
18602 { "and", "andc", "nor" },
18603 { "or", "orc", "nand" },
18604 { "xor", "eqv", "xor" } };
18606 if (code == AND)
18607 t = tbl[0];
18608 else if (code == IOR)
18609 t = tbl[1];
18610 else if (code == XOR)
18611 t = tbl[2];
18612 else
18613 output_operand_lossage ("invalid %%q value");
18615 if (GET_CODE (XEXP (x, 0)) != NOT)
18616 s = t[0];
18617 else
18619 if (GET_CODE (XEXP (x, 1)) == NOT)
18620 s = t[2];
18621 else
18622 s = t[1];
18625 fputs (s, file);
18627 return;
18629 case 'Q':
18630 if (! TARGET_MFCRF)
18631 return;
18632 fputc (',', file);
18633 /* FALLTHRU */
18635 case 'R':
18636 /* X is a CR register. Print the mask for `mtcrf'. */
18637 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
18638 output_operand_lossage ("invalid %%R value");
18639 else
18640 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
18641 return;
18643 case 's':
18644 /* Low 5 bits of 32 - value */
18645 if (! INT_P (x))
18646 output_operand_lossage ("invalid %%s value");
18647 else
18648 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
18649 return;
18651 case 'S':
18652 /* PowerPC64 mask position. All 0's is excluded.
18653 CONST_INT 32-bit mask is considered sign-extended so any
18654 transition must occur within the CONST_INT, not on the boundary. */
18655 if (! mask64_operand (x, DImode))
18656 output_operand_lossage ("invalid %%S value");
18658 uval = INTVAL (x);
18660 if (uval & 1) /* Clear Left */
18662 #if HOST_BITS_PER_WIDE_INT > 64
18663 uval &= ((unsigned HOST_WIDE_INT) 1 << 64) - 1;
18664 #endif
18665 i = 64;
18667 else /* Clear Right */
18669 uval = ~uval;
18670 #if HOST_BITS_PER_WIDE_INT > 64
18671 uval &= ((unsigned HOST_WIDE_INT) 1 << 64) - 1;
18672 #endif
18673 i = 63;
18675 while (uval != 0)
18676 --i, uval >>= 1;
18677 gcc_assert (i >= 0);
18678 fprintf (file, "%d", i);
18679 return;
18681 case 't':
18682 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
18683 gcc_assert (REG_P (x) && GET_MODE (x) == CCmode);
18685 /* Bit 3 is OV bit. */
18686 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
18688 /* If we want bit 31, write a shift count of zero, not 32. */
18689 fprintf (file, "%d", i == 31 ? 0 : i + 1);
18690 return;
18692 case 'T':
18693 /* Print the symbolic name of a branch target register. */
18694 if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO
18695 && REGNO (x) != CTR_REGNO))
18696 output_operand_lossage ("invalid %%T value");
18697 else if (REGNO (x) == LR_REGNO)
18698 fputs ("lr", file);
18699 else
18700 fputs ("ctr", file);
18701 return;
18703 case 'u':
18704 /* High-order or low-order 16 bits of constant, whichever is non-zero,
18705 for use in unsigned operand. */
18706 if (! INT_P (x))
18708 output_operand_lossage ("invalid %%u value");
18709 return;
18712 uval = INTVAL (x);
18713 if ((uval & 0xffff) == 0)
18714 uval >>= 16;
18716 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
18717 return;
18719 case 'v':
18720 /* High-order 16 bits of constant for use in signed operand. */
18721 if (! INT_P (x))
18722 output_operand_lossage ("invalid %%v value");
18723 else
18724 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
18725 (INTVAL (x) >> 16) & 0xffff);
18726 return;
18728 case 'U':
18729 /* Print `u' if this has an auto-increment or auto-decrement. */
18730 if (MEM_P (x)
18731 && (GET_CODE (XEXP (x, 0)) == PRE_INC
18732 || GET_CODE (XEXP (x, 0)) == PRE_DEC
18733 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
18734 putc ('u', file);
18735 return;
18737 case 'V':
18738 /* Print the trap code for this operand. */
18739 switch (GET_CODE (x))
18741 case EQ:
18742 fputs ("eq", file); /* 4 */
18743 break;
18744 case NE:
18745 fputs ("ne", file); /* 24 */
18746 break;
18747 case LT:
18748 fputs ("lt", file); /* 16 */
18749 break;
18750 case LE:
18751 fputs ("le", file); /* 20 */
18752 break;
18753 case GT:
18754 fputs ("gt", file); /* 8 */
18755 break;
18756 case GE:
18757 fputs ("ge", file); /* 12 */
18758 break;
18759 case LTU:
18760 fputs ("llt", file); /* 2 */
18761 break;
18762 case LEU:
18763 fputs ("lle", file); /* 6 */
18764 break;
18765 case GTU:
18766 fputs ("lgt", file); /* 1 */
18767 break;
18768 case GEU:
18769 fputs ("lge", file); /* 5 */
18770 break;
18771 default:
18772 gcc_unreachable ();
18774 break;
18776 case 'w':
18777 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
18778 normally. */
18779 if (INT_P (x))
18780 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
18781 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
18782 else
18783 print_operand (file, x, 0);
18784 return;
18786 case 'W':
18787 /* MB value for a PowerPC64 rldic operand. */
18788 i = clz_hwi (INTVAL (x));
18790 fprintf (file, "%d", i);
18791 return;
18793 case 'x':
18794 /* X is a FPR or Altivec register used in a VSX context. */
18795 if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
18796 output_operand_lossage ("invalid %%x value");
18797 else
18799 int reg = REGNO (x);
18800 int vsx_reg = (FP_REGNO_P (reg)
18801 ? reg - 32
18802 : reg - FIRST_ALTIVEC_REGNO + 32);
18804 #ifdef TARGET_REGNAMES
18805 if (TARGET_REGNAMES)
18806 fprintf (file, "%%vs%d", vsx_reg);
18807 else
18808 #endif
18809 fprintf (file, "%d", vsx_reg);
18811 return;
18813 case 'X':
18814 if (MEM_P (x)
18815 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
18816 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
18817 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
18818 putc ('x', file);
18819 return;
18821 case 'Y':
18822 /* Like 'L', for third word of TImode/PTImode */
18823 if (REG_P (x))
18824 fputs (reg_names[REGNO (x) + 2], file);
18825 else if (MEM_P (x))
18827 if (GET_CODE (XEXP (x, 0)) == PRE_INC
18828 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
18829 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 8));
18830 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
18831 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 8));
18832 else
18833 output_address (XEXP (adjust_address_nv (x, SImode, 8), 0));
18834 if (small_data_operand (x, GET_MODE (x)))
18835 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
18836 reg_names[SMALL_DATA_REG]);
18838 return;
18840 case 'z':
18841 /* X is a SYMBOL_REF. Write out the name preceded by a
18842 period and without any trailing data in brackets. Used for function
18843 names. If we are configured for System V (or the embedded ABI) on
18844 the PowerPC, do not emit the period, since those systems do not use
18845 TOCs and the like. */
18846 gcc_assert (GET_CODE (x) == SYMBOL_REF);
18848 /* For macho, check to see if we need a stub. */
18849 if (TARGET_MACHO)
18851 const char *name = XSTR (x, 0);
18852 #if TARGET_MACHO
18853 if (darwin_emit_branch_islands
18854 && MACHOPIC_INDIRECT
18855 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
18856 name = machopic_indirection_name (x, /*stub_p=*/true);
18857 #endif
18858 assemble_name (file, name);
18860 else if (!DOT_SYMBOLS)
18861 assemble_name (file, XSTR (x, 0));
18862 else
18863 rs6000_output_function_entry (file, XSTR (x, 0));
18864 return;
18866 case 'Z':
18867 /* Like 'L', for last word of TImode/PTImode. */
18868 if (REG_P (x))
18869 fputs (reg_names[REGNO (x) + 3], file);
18870 else if (MEM_P (x))
18872 if (GET_CODE (XEXP (x, 0)) == PRE_INC
18873 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
18874 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 12));
18875 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
18876 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 12));
18877 else
18878 output_address (XEXP (adjust_address_nv (x, SImode, 12), 0));
18879 if (small_data_operand (x, GET_MODE (x)))
18880 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
18881 reg_names[SMALL_DATA_REG]);
18883 return;
18885 /* Print AltiVec or SPE memory operand. */
18886 case 'y':
18888 rtx tmp;
18890 gcc_assert (MEM_P (x));
18892 tmp = XEXP (x, 0);
18894 /* Ugly hack because %y is overloaded. */
18895 if ((TARGET_SPE || TARGET_E500_DOUBLE)
18896 && (GET_MODE_SIZE (GET_MODE (x)) == 8
18897 || GET_MODE (x) == TFmode
18898 || GET_MODE (x) == TImode
18899 || GET_MODE (x) == PTImode))
18901 /* Handle [reg]. */
18902 if (REG_P (tmp))
18904 fprintf (file, "0(%s)", reg_names[REGNO (tmp)]);
18905 break;
18907 /* Handle [reg+UIMM]. */
18908 else if (GET_CODE (tmp) == PLUS &&
18909 GET_CODE (XEXP (tmp, 1)) == CONST_INT)
18911 int x;
18913 gcc_assert (REG_P (XEXP (tmp, 0)));
18915 x = INTVAL (XEXP (tmp, 1));
18916 fprintf (file, "%d(%s)", x, reg_names[REGNO (XEXP (tmp, 0))]);
18917 break;
18920 /* Fall through. Must be [reg+reg]. */
18922 if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x))
18923 && GET_CODE (tmp) == AND
18924 && GET_CODE (XEXP (tmp, 1)) == CONST_INT
18925 && INTVAL (XEXP (tmp, 1)) == -16)
18926 tmp = XEXP (tmp, 0);
18927 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
18928 && GET_CODE (tmp) == PRE_MODIFY)
18929 tmp = XEXP (tmp, 1);
18930 if (REG_P (tmp))
18931 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
18932 else
18934 if (GET_CODE (tmp) != PLUS
18935 || !REG_P (XEXP (tmp, 0))
18936 || !REG_P (XEXP (tmp, 1)))
18938 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
18939 break;
18942 if (REGNO (XEXP (tmp, 0)) == 0)
18943 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
18944 reg_names[ REGNO (XEXP (tmp, 0)) ]);
18945 else
18946 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
18947 reg_names[ REGNO (XEXP (tmp, 1)) ]);
18949 break;
18952 case 0:
18953 if (REG_P (x))
18954 fprintf (file, "%s", reg_names[REGNO (x)]);
18955 else if (MEM_P (x))
18957 /* We need to handle PRE_INC and PRE_DEC here, since we need to
18958 know the width from the mode. */
18959 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
18960 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
18961 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
18962 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
18963 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
18964 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
18965 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
18966 output_address (XEXP (XEXP (x, 0), 1));
18967 else
18968 output_address (XEXP (x, 0));
18970 else
18972 if (toc_relative_expr_p (x, false))
18973 /* This hack along with a corresponding hack in
18974 rs6000_output_addr_const_extra arranges to output addends
18975 where the assembler expects to find them. eg.
18976 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
18977 without this hack would be output as "x@toc+4". We
18978 want "x+4@toc". */
18979 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
18980 else
18981 output_addr_const (file, x);
18983 return;
18985 case '&':
18986 if (const char *name = get_some_local_dynamic_name ())
18987 assemble_name (file, name);
18988 else
18989 output_operand_lossage ("'%%&' used without any "
18990 "local dynamic TLS references");
18991 return;
18993 default:
18994 output_operand_lossage ("invalid %%xn code");
18998 /* Print the address of an operand. */
19000 void
19001 print_operand_address (FILE *file, rtx x)
19003 if (REG_P (x))
19004 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
19005 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST
19006 || GET_CODE (x) == LABEL_REF)
19008 output_addr_const (file, x);
19009 if (small_data_operand (x, GET_MODE (x)))
19010 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
19011 reg_names[SMALL_DATA_REG]);
19012 else
19013 gcc_assert (!TARGET_TOC);
19015 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
19016 && REG_P (XEXP (x, 1)))
19018 if (REGNO (XEXP (x, 0)) == 0)
19019 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
19020 reg_names[ REGNO (XEXP (x, 0)) ]);
19021 else
19022 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
19023 reg_names[ REGNO (XEXP (x, 1)) ]);
19025 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
19026 && GET_CODE (XEXP (x, 1)) == CONST_INT)
19027 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
19028 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
19029 #if TARGET_MACHO
19030 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
19031 && CONSTANT_P (XEXP (x, 1)))
19033 fprintf (file, "lo16(");
19034 output_addr_const (file, XEXP (x, 1));
19035 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
19037 #endif
19038 #if TARGET_ELF
19039 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
19040 && CONSTANT_P (XEXP (x, 1)))
19042 output_addr_const (file, XEXP (x, 1));
19043 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
19045 #endif
19046 else if (toc_relative_expr_p (x, false))
19048 /* This hack along with a corresponding hack in
19049 rs6000_output_addr_const_extra arranges to output addends
19050 where the assembler expects to find them. eg.
19051 (lo_sum (reg 9)
19052 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
19053 without this hack would be output as "x@toc+8@l(9)". We
19054 want "x+8@toc@l(9)". */
19055 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
19056 if (GET_CODE (x) == LO_SUM)
19057 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
19058 else
19059 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base, 0, 1))]);
19061 else
19062 gcc_unreachable ();
19065 /* Implement TARGET_OUTPUT_ADDR_CONST_EXTRA. */
19067 static bool
19068 rs6000_output_addr_const_extra (FILE *file, rtx x)
19070 if (GET_CODE (x) == UNSPEC)
19071 switch (XINT (x, 1))
19073 case UNSPEC_TOCREL:
19074 gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF
19075 && REG_P (XVECEXP (x, 0, 1))
19076 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
19077 output_addr_const (file, XVECEXP (x, 0, 0));
19078 if (x == tocrel_base && tocrel_offset != const0_rtx)
19080 if (INTVAL (tocrel_offset) >= 0)
19081 fprintf (file, "+");
19082 output_addr_const (file, CONST_CAST_RTX (tocrel_offset));
19084 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
19086 putc ('-', file);
19087 assemble_name (file, toc_label_name);
19089 else if (TARGET_ELF)
19090 fputs ("@toc", file);
19091 return true;
19093 #if TARGET_MACHO
19094 case UNSPEC_MACHOPIC_OFFSET:
19095 output_addr_const (file, XVECEXP (x, 0, 0));
19096 putc ('-', file);
19097 machopic_output_function_base_name (file);
19098 return true;
19099 #endif
19101 return false;
19104 /* Target hook for assembling integer objects. The PowerPC version has
19105 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
19106 is defined. It also needs to handle DI-mode objects on 64-bit
19107 targets. */
19109 static bool
19110 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
19112 #ifdef RELOCATABLE_NEEDS_FIXUP
19113 /* Special handling for SI values. */
19114 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
19116 static int recurse = 0;
19118 /* For -mrelocatable, we mark all addresses that need to be fixed up in
19119 the .fixup section. Since the TOC section is already relocated, we
19120 don't need to mark it here. We used to skip the text section, but it
19121 should never be valid for relocated addresses to be placed in the text
19122 section. */
19123 if (TARGET_RELOCATABLE
19124 && in_section != toc_section
19125 && !recurse
19126 && !CONST_SCALAR_INT_P (x)
19127 && CONSTANT_P (x))
19129 char buf[256];
19131 recurse = 1;
19132 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
19133 fixuplabelno++;
19134 ASM_OUTPUT_LABEL (asm_out_file, buf);
19135 fprintf (asm_out_file, "\t.long\t(");
19136 output_addr_const (asm_out_file, x);
19137 fprintf (asm_out_file, ")@fixup\n");
19138 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
19139 ASM_OUTPUT_ALIGN (asm_out_file, 2);
19140 fprintf (asm_out_file, "\t.long\t");
19141 assemble_name (asm_out_file, buf);
19142 fprintf (asm_out_file, "\n\t.previous\n");
19143 recurse = 0;
19144 return true;
19146 /* Remove initial .'s to turn a -mcall-aixdesc function
19147 address into the address of the descriptor, not the function
19148 itself. */
19149 else if (GET_CODE (x) == SYMBOL_REF
19150 && XSTR (x, 0)[0] == '.'
19151 && DEFAULT_ABI == ABI_AIX)
19153 const char *name = XSTR (x, 0);
19154 while (*name == '.')
19155 name++;
19157 fprintf (asm_out_file, "\t.long\t%s\n", name);
19158 return true;
19161 #endif /* RELOCATABLE_NEEDS_FIXUP */
19162 return default_assemble_integer (x, size, aligned_p);
19165 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
19166 /* Emit an assembler directive to set symbol visibility for DECL to
19167 VISIBILITY_TYPE. */
19169 static void
19170 rs6000_assemble_visibility (tree decl, int vis)
19172 if (TARGET_XCOFF)
19173 return;
19175 /* Functions need to have their entry point symbol visibility set as
19176 well as their descriptor symbol visibility. */
19177 if (DEFAULT_ABI == ABI_AIX
19178 && DOT_SYMBOLS
19179 && TREE_CODE (decl) == FUNCTION_DECL)
19181 static const char * const visibility_types[] = {
19182 NULL, "internal", "hidden", "protected"
19185 const char *name, *type;
19187 name = ((* targetm.strip_name_encoding)
19188 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
19189 type = visibility_types[vis];
19191 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
19192 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
19194 else
19195 default_assemble_visibility (decl, vis);
19197 #endif
19199 enum rtx_code
19200 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
19202 /* Reversal of FP compares takes care -- an ordered compare
19203 becomes an unordered compare and vice versa. */
19204 if (mode == CCFPmode
19205 && (!flag_finite_math_only
19206 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
19207 || code == UNEQ || code == LTGT))
19208 return reverse_condition_maybe_unordered (code);
19209 else
19210 return reverse_condition (code);
19213 /* Generate a compare for CODE. Return a brand-new rtx that
19214 represents the result of the compare. */
19216 static rtx
19217 rs6000_generate_compare (rtx cmp, machine_mode mode)
19219 machine_mode comp_mode;
19220 rtx compare_result;
19221 enum rtx_code code = GET_CODE (cmp);
19222 rtx op0 = XEXP (cmp, 0);
19223 rtx op1 = XEXP (cmp, 1);
19225 if (FLOAT_MODE_P (mode))
19226 comp_mode = CCFPmode;
19227 else if (code == GTU || code == LTU
19228 || code == GEU || code == LEU)
19229 comp_mode = CCUNSmode;
19230 else if ((code == EQ || code == NE)
19231 && unsigned_reg_p (op0)
19232 && (unsigned_reg_p (op1)
19233 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
19234 /* These are unsigned values, perhaps there will be a later
19235 ordering compare that can be shared with this one. */
19236 comp_mode = CCUNSmode;
19237 else
19238 comp_mode = CCmode;
19240 /* If we have an unsigned compare, make sure we don't have a signed value as
19241 an immediate. */
19242 if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT
19243 && INTVAL (op1) < 0)
19245 op0 = copy_rtx_if_shared (op0);
19246 op1 = force_reg (GET_MODE (op0), op1);
19247 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
19250 /* First, the compare. */
19251 compare_result = gen_reg_rtx (comp_mode);
19253 /* E500 FP compare instructions on the GPRs. Yuck! */
19254 if ((!TARGET_FPRS && TARGET_HARD_FLOAT)
19255 && FLOAT_MODE_P (mode))
19257 rtx cmp, or_result, compare_result2;
19258 machine_mode op_mode = GET_MODE (op0);
19259 bool reverse_p;
19261 if (op_mode == VOIDmode)
19262 op_mode = GET_MODE (op1);
19264 /* First reverse the condition codes that aren't directly supported. */
19265 switch (code)
19267 case NE:
19268 case UNLT:
19269 case UNLE:
19270 case UNGT:
19271 case UNGE:
19272 code = reverse_condition_maybe_unordered (code);
19273 reverse_p = true;
19274 break;
19276 case EQ:
19277 case LT:
19278 case LE:
19279 case GT:
19280 case GE:
19281 reverse_p = false;
19282 break;
19284 default:
19285 gcc_unreachable ();
19288 /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only.
19289 This explains the following mess. */
19291 switch (code)
19293 case EQ:
19294 switch (op_mode)
19296 case SFmode:
19297 cmp = (flag_finite_math_only && !flag_trapping_math)
19298 ? gen_tstsfeq_gpr (compare_result, op0, op1)
19299 : gen_cmpsfeq_gpr (compare_result, op0, op1);
19300 break;
19302 case DFmode:
19303 cmp = (flag_finite_math_only && !flag_trapping_math)
19304 ? gen_tstdfeq_gpr (compare_result, op0, op1)
19305 : gen_cmpdfeq_gpr (compare_result, op0, op1);
19306 break;
19308 case TFmode:
19309 cmp = (flag_finite_math_only && !flag_trapping_math)
19310 ? gen_tsttfeq_gpr (compare_result, op0, op1)
19311 : gen_cmptfeq_gpr (compare_result, op0, op1);
19312 break;
19314 default:
19315 gcc_unreachable ();
19317 break;
19319 case GT:
19320 case GE:
19321 switch (op_mode)
19323 case SFmode:
19324 cmp = (flag_finite_math_only && !flag_trapping_math)
19325 ? gen_tstsfgt_gpr (compare_result, op0, op1)
19326 : gen_cmpsfgt_gpr (compare_result, op0, op1);
19327 break;
19329 case DFmode:
19330 cmp = (flag_finite_math_only && !flag_trapping_math)
19331 ? gen_tstdfgt_gpr (compare_result, op0, op1)
19332 : gen_cmpdfgt_gpr (compare_result, op0, op1);
19333 break;
19335 case TFmode:
19336 cmp = (flag_finite_math_only && !flag_trapping_math)
19337 ? gen_tsttfgt_gpr (compare_result, op0, op1)
19338 : gen_cmptfgt_gpr (compare_result, op0, op1);
19339 break;
19341 default:
19342 gcc_unreachable ();
19344 break;
19346 case LT:
19347 case LE:
19348 switch (op_mode)
19350 case SFmode:
19351 cmp = (flag_finite_math_only && !flag_trapping_math)
19352 ? gen_tstsflt_gpr (compare_result, op0, op1)
19353 : gen_cmpsflt_gpr (compare_result, op0, op1);
19354 break;
19356 case DFmode:
19357 cmp = (flag_finite_math_only && !flag_trapping_math)
19358 ? gen_tstdflt_gpr (compare_result, op0, op1)
19359 : gen_cmpdflt_gpr (compare_result, op0, op1);
19360 break;
19362 case TFmode:
19363 cmp = (flag_finite_math_only && !flag_trapping_math)
19364 ? gen_tsttflt_gpr (compare_result, op0, op1)
19365 : gen_cmptflt_gpr (compare_result, op0, op1);
19366 break;
19368 default:
19369 gcc_unreachable ();
19371 break;
19373 default:
19374 gcc_unreachable ();
19377 /* Synthesize LE and GE from LT/GT || EQ. */
19378 if (code == LE || code == GE)
19380 emit_insn (cmp);
19382 compare_result2 = gen_reg_rtx (CCFPmode);
19384 /* Do the EQ. */
19385 switch (op_mode)
19387 case SFmode:
19388 cmp = (flag_finite_math_only && !flag_trapping_math)
19389 ? gen_tstsfeq_gpr (compare_result2, op0, op1)
19390 : gen_cmpsfeq_gpr (compare_result2, op0, op1);
19391 break;
19393 case DFmode:
19394 cmp = (flag_finite_math_only && !flag_trapping_math)
19395 ? gen_tstdfeq_gpr (compare_result2, op0, op1)
19396 : gen_cmpdfeq_gpr (compare_result2, op0, op1);
19397 break;
19399 case TFmode:
19400 cmp = (flag_finite_math_only && !flag_trapping_math)
19401 ? gen_tsttfeq_gpr (compare_result2, op0, op1)
19402 : gen_cmptfeq_gpr (compare_result2, op0, op1);
19403 break;
19405 default:
19406 gcc_unreachable ();
19409 emit_insn (cmp);
19411 /* OR them together. */
19412 or_result = gen_reg_rtx (CCFPmode);
19413 cmp = gen_e500_cr_ior_compare (or_result, compare_result,
19414 compare_result2);
19415 compare_result = or_result;
19418 code = reverse_p ? NE : EQ;
19420 emit_insn (cmp);
19422 else
19424 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
19425 CLOBBERs to match cmptf_internal2 pattern. */
19426 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
19427 && GET_MODE (op0) == TFmode
19428 && !TARGET_IEEEQUAD
19429 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128)
19430 emit_insn (gen_rtx_PARALLEL (VOIDmode,
19431 gen_rtvec (10,
19432 gen_rtx_SET (VOIDmode,
19433 compare_result,
19434 gen_rtx_COMPARE (comp_mode, op0, op1)),
19435 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19436 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19437 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19438 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19439 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19440 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19441 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19442 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19443 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
19444 else if (GET_CODE (op1) == UNSPEC
19445 && XINT (op1, 1) == UNSPEC_SP_TEST)
19447 rtx op1b = XVECEXP (op1, 0, 0);
19448 comp_mode = CCEQmode;
19449 compare_result = gen_reg_rtx (CCEQmode);
19450 if (TARGET_64BIT)
19451 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
19452 else
19453 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
19455 else
19456 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
19457 gen_rtx_COMPARE (comp_mode, op0, op1)));
19460 /* Some kinds of FP comparisons need an OR operation;
19461 under flag_finite_math_only we don't bother. */
19462 if (FLOAT_MODE_P (mode)
19463 && !flag_finite_math_only
19464 && !(TARGET_HARD_FLOAT && !TARGET_FPRS)
19465 && (code == LE || code == GE
19466 || code == UNEQ || code == LTGT
19467 || code == UNGT || code == UNLT))
19469 enum rtx_code or1, or2;
19470 rtx or1_rtx, or2_rtx, compare2_rtx;
19471 rtx or_result = gen_reg_rtx (CCEQmode);
19473 switch (code)
19475 case LE: or1 = LT; or2 = EQ; break;
19476 case GE: or1 = GT; or2 = EQ; break;
19477 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
19478 case LTGT: or1 = LT; or2 = GT; break;
19479 case UNGT: or1 = UNORDERED; or2 = GT; break;
19480 case UNLT: or1 = UNORDERED; or2 = LT; break;
19481 default: gcc_unreachable ();
19483 validate_condition_mode (or1, comp_mode);
19484 validate_condition_mode (or2, comp_mode);
19485 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
19486 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
19487 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
19488 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
19489 const_true_rtx);
19490 emit_insn (gen_rtx_SET (VOIDmode, or_result, compare2_rtx));
19492 compare_result = or_result;
19493 code = EQ;
19496 validate_condition_mode (code, GET_MODE (compare_result));
19498 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
19502 /* Emit the RTL for an sISEL pattern. */
19504 void
19505 rs6000_emit_sISEL (machine_mode mode ATTRIBUTE_UNUSED, rtx operands[])
19507 rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx);
19510 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
19511 can be used as that dest register. Return the dest register. */
19514 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
19516 if (op2 == const0_rtx)
19517 return op1;
19519 if (GET_CODE (scratch) == SCRATCH)
19520 scratch = gen_reg_rtx (mode);
19522 if (logical_operand (op2, mode))
19523 emit_insn (gen_rtx_SET (VOIDmode, scratch, gen_rtx_XOR (mode, op1, op2)));
19524 else
19525 emit_insn (gen_rtx_SET (VOIDmode, scratch,
19526 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
19528 return scratch;
19531 void
19532 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
19534 rtx condition_rtx;
19535 machine_mode op_mode;
19536 enum rtx_code cond_code;
19537 rtx result = operands[0];
19539 condition_rtx = rs6000_generate_compare (operands[1], mode);
19540 cond_code = GET_CODE (condition_rtx);
19542 if (FLOAT_MODE_P (mode)
19543 && !TARGET_FPRS && TARGET_HARD_FLOAT)
19545 rtx t;
19547 PUT_MODE (condition_rtx, SImode);
19548 t = XEXP (condition_rtx, 0);
19550 gcc_assert (cond_code == NE || cond_code == EQ);
19552 if (cond_code == NE)
19553 emit_insn (gen_e500_flip_gt_bit (t, t));
19555 emit_insn (gen_move_from_CR_gt_bit (result, t));
19556 return;
19559 if (cond_code == NE
19560 || cond_code == GE || cond_code == LE
19561 || cond_code == GEU || cond_code == LEU
19562 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
19564 rtx not_result = gen_reg_rtx (CCEQmode);
19565 rtx not_op, rev_cond_rtx;
19566 machine_mode cc_mode;
19568 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
19570 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
19571 SImode, XEXP (condition_rtx, 0), const0_rtx);
19572 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
19573 emit_insn (gen_rtx_SET (VOIDmode, not_result, not_op));
19574 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
19577 op_mode = GET_MODE (XEXP (operands[1], 0));
19578 if (op_mode == VOIDmode)
19579 op_mode = GET_MODE (XEXP (operands[1], 1));
19581 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
19583 PUT_MODE (condition_rtx, DImode);
19584 convert_move (result, condition_rtx, 0);
19586 else
19588 PUT_MODE (condition_rtx, SImode);
19589 emit_insn (gen_rtx_SET (VOIDmode, result, condition_rtx));
19593 /* Emit a branch of kind CODE to location LOC. */
19595 void
19596 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
19598 rtx condition_rtx, loc_ref;
19600 condition_rtx = rs6000_generate_compare (operands[0], mode);
19601 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
19602 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
19603 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
19604 loc_ref, pc_rtx)));
19607 /* Return the string to output a conditional branch to LABEL, which is
19608 the operand template of the label, or NULL if the branch is really a
19609 conditional return.
19611 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
19612 condition code register and its mode specifies what kind of
19613 comparison we made.
19615 REVERSED is nonzero if we should reverse the sense of the comparison.
19617 INSN is the insn. */
19619 char *
19620 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
19622 static char string[64];
19623 enum rtx_code code = GET_CODE (op);
19624 rtx cc_reg = XEXP (op, 0);
19625 machine_mode mode = GET_MODE (cc_reg);
19626 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
19627 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
19628 int really_reversed = reversed ^ need_longbranch;
19629 char *s = string;
19630 const char *ccode;
19631 const char *pred;
19632 rtx note;
19634 validate_condition_mode (code, mode);
19636 /* Work out which way this really branches. We could use
19637 reverse_condition_maybe_unordered here always but this
19638 makes the resulting assembler clearer. */
19639 if (really_reversed)
19641 /* Reversal of FP compares takes care -- an ordered compare
19642 becomes an unordered compare and vice versa. */
19643 if (mode == CCFPmode)
19644 code = reverse_condition_maybe_unordered (code);
19645 else
19646 code = reverse_condition (code);
19649 if ((!TARGET_FPRS && TARGET_HARD_FLOAT) && mode == CCFPmode)
19651 /* The efscmp/tst* instructions twiddle bit 2, which maps nicely
19652 to the GT bit. */
19653 switch (code)
19655 case EQ:
19656 /* Opposite of GT. */
19657 code = GT;
19658 break;
19660 case NE:
19661 code = UNLE;
19662 break;
19664 default:
19665 gcc_unreachable ();
19669 switch (code)
19671 /* Not all of these are actually distinct opcodes, but
19672 we distinguish them for clarity of the resulting assembler. */
19673 case NE: case LTGT:
19674 ccode = "ne"; break;
19675 case EQ: case UNEQ:
19676 ccode = "eq"; break;
19677 case GE: case GEU:
19678 ccode = "ge"; break;
19679 case GT: case GTU: case UNGT:
19680 ccode = "gt"; break;
19681 case LE: case LEU:
19682 ccode = "le"; break;
19683 case LT: case LTU: case UNLT:
19684 ccode = "lt"; break;
19685 case UNORDERED: ccode = "un"; break;
19686 case ORDERED: ccode = "nu"; break;
19687 case UNGE: ccode = "nl"; break;
19688 case UNLE: ccode = "ng"; break;
19689 default:
19690 gcc_unreachable ();
19693 /* Maybe we have a guess as to how likely the branch is. */
19694 pred = "";
19695 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
19696 if (note != NULL_RTX)
19698 /* PROB is the difference from 50%. */
19699 int prob = XINT (note, 0) - REG_BR_PROB_BASE / 2;
19701 /* Only hint for highly probable/improbable branches on newer
19702 cpus as static prediction overrides processor dynamic
19703 prediction. For older cpus we may as well always hint, but
19704 assume not taken for branches that are very close to 50% as a
19705 mispredicted taken branch is more expensive than a
19706 mispredicted not-taken branch. */
19707 if (rs6000_always_hint
19708 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
19709 && br_prob_note_reliable_p (note)))
19711 if (abs (prob) > REG_BR_PROB_BASE / 20
19712 && ((prob > 0) ^ need_longbranch))
19713 pred = "+";
19714 else
19715 pred = "-";
19719 if (label == NULL)
19720 s += sprintf (s, "b%slr%s ", ccode, pred);
19721 else
19722 s += sprintf (s, "b%s%s ", ccode, pred);
19724 /* We need to escape any '%' characters in the reg_names string.
19725 Assume they'd only be the first character.... */
19726 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
19727 *s++ = '%';
19728 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
19730 if (label != NULL)
19732 /* If the branch distance was too far, we may have to use an
19733 unconditional branch to go the distance. */
19734 if (need_longbranch)
19735 s += sprintf (s, ",$+8\n\tb %s", label);
19736 else
19737 s += sprintf (s, ",%s", label);
19740 return string;
19743 /* Return the string to flip the GT bit on a CR. */
19744 char *
19745 output_e500_flip_gt_bit (rtx dst, rtx src)
19747 static char string[64];
19748 int a, b;
19750 gcc_assert (GET_CODE (dst) == REG && CR_REGNO_P (REGNO (dst))
19751 && GET_CODE (src) == REG && CR_REGNO_P (REGNO (src)));
19753 /* GT bit. */
19754 a = 4 * (REGNO (dst) - CR0_REGNO) + 1;
19755 b = 4 * (REGNO (src) - CR0_REGNO) + 1;
19757 sprintf (string, "crnot %d,%d", a, b);
19758 return string;
19761 /* Return insn for VSX or Altivec comparisons. */
19763 static rtx
19764 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
19766 rtx mask;
19767 machine_mode mode = GET_MODE (op0);
19769 switch (code)
19771 default:
19772 break;
19774 case GE:
19775 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19776 return NULL_RTX;
19778 case EQ:
19779 case GT:
19780 case GTU:
19781 case ORDERED:
19782 case UNORDERED:
19783 case UNEQ:
19784 case LTGT:
19785 mask = gen_reg_rtx (mode);
19786 emit_insn (gen_rtx_SET (VOIDmode,
19787 mask,
19788 gen_rtx_fmt_ee (code, mode, op0, op1)));
19789 return mask;
19792 return NULL_RTX;
19795 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
19796 DMODE is expected destination mode. This is a recursive function. */
19798 static rtx
19799 rs6000_emit_vector_compare (enum rtx_code rcode,
19800 rtx op0, rtx op1,
19801 machine_mode dmode)
19803 rtx mask;
19804 bool swap_operands = false;
19805 bool try_again = false;
19807 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
19808 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
19810 /* See if the comparison works as is. */
19811 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
19812 if (mask)
19813 return mask;
19815 switch (rcode)
19817 case LT:
19818 rcode = GT;
19819 swap_operands = true;
19820 try_again = true;
19821 break;
19822 case LTU:
19823 rcode = GTU;
19824 swap_operands = true;
19825 try_again = true;
19826 break;
19827 case NE:
19828 case UNLE:
19829 case UNLT:
19830 case UNGE:
19831 case UNGT:
19832 /* Invert condition and try again.
19833 e.g., A != B becomes ~(A==B). */
19835 enum rtx_code rev_code;
19836 enum insn_code nor_code;
19837 rtx mask2;
19839 rev_code = reverse_condition_maybe_unordered (rcode);
19840 if (rev_code == UNKNOWN)
19841 return NULL_RTX;
19843 nor_code = optab_handler (one_cmpl_optab, dmode);
19844 if (nor_code == CODE_FOR_nothing)
19845 return NULL_RTX;
19847 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
19848 if (!mask2)
19849 return NULL_RTX;
19851 mask = gen_reg_rtx (dmode);
19852 emit_insn (GEN_FCN (nor_code) (mask, mask2));
19853 return mask;
19855 break;
19856 case GE:
19857 case GEU:
19858 case LE:
19859 case LEU:
19860 /* Try GT/GTU/LT/LTU OR EQ */
19862 rtx c_rtx, eq_rtx;
19863 enum insn_code ior_code;
19864 enum rtx_code new_code;
19866 switch (rcode)
19868 case GE:
19869 new_code = GT;
19870 break;
19872 case GEU:
19873 new_code = GTU;
19874 break;
19876 case LE:
19877 new_code = LT;
19878 break;
19880 case LEU:
19881 new_code = LTU;
19882 break;
19884 default:
19885 gcc_unreachable ();
19888 ior_code = optab_handler (ior_optab, dmode);
19889 if (ior_code == CODE_FOR_nothing)
19890 return NULL_RTX;
19892 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
19893 if (!c_rtx)
19894 return NULL_RTX;
19896 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
19897 if (!eq_rtx)
19898 return NULL_RTX;
19900 mask = gen_reg_rtx (dmode);
19901 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
19902 return mask;
19904 break;
19905 default:
19906 return NULL_RTX;
19909 if (try_again)
19911 if (swap_operands)
19912 std::swap (op0, op1);
19914 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
19915 if (mask)
19916 return mask;
19919 /* You only get two chances. */
19920 return NULL_RTX;
19923 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
19924 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
19925 operands for the relation operation COND. */
19928 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
19929 rtx cond, rtx cc_op0, rtx cc_op1)
19931 machine_mode dest_mode = GET_MODE (dest);
19932 machine_mode mask_mode = GET_MODE (cc_op0);
19933 enum rtx_code rcode = GET_CODE (cond);
19934 machine_mode cc_mode = CCmode;
19935 rtx mask;
19936 rtx cond2;
19937 rtx tmp;
19938 bool invert_move = false;
19940 if (VECTOR_UNIT_NONE_P (dest_mode))
19941 return 0;
19943 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
19944 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
19946 switch (rcode)
19948 /* Swap operands if we can, and fall back to doing the operation as
19949 specified, and doing a NOR to invert the test. */
19950 case NE:
19951 case UNLE:
19952 case UNLT:
19953 case UNGE:
19954 case UNGT:
19955 /* Invert condition and try again.
19956 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
19957 invert_move = true;
19958 rcode = reverse_condition_maybe_unordered (rcode);
19959 if (rcode == UNKNOWN)
19960 return 0;
19961 break;
19963 /* Mark unsigned tests with CCUNSmode. */
19964 case GTU:
19965 case GEU:
19966 case LTU:
19967 case LEU:
19968 cc_mode = CCUNSmode;
19969 break;
19971 default:
19972 break;
19975 /* Get the vector mask for the given relational operations. */
19976 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
19978 if (!mask)
19979 return 0;
19981 if (invert_move)
19983 tmp = op_true;
19984 op_true = op_false;
19985 op_false = tmp;
19988 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
19989 CONST0_RTX (dest_mode));
19990 emit_insn (gen_rtx_SET (VOIDmode,
19991 dest,
19992 gen_rtx_IF_THEN_ELSE (dest_mode,
19993 cond2,
19994 op_true,
19995 op_false)));
19996 return 1;
19999 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
20000 operands of the last comparison is nonzero/true, FALSE_COND if it
20001 is zero/false. Return 0 if the hardware has no such operation. */
20004 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
20006 enum rtx_code code = GET_CODE (op);
20007 rtx op0 = XEXP (op, 0);
20008 rtx op1 = XEXP (op, 1);
20009 REAL_VALUE_TYPE c1;
20010 machine_mode compare_mode = GET_MODE (op0);
20011 machine_mode result_mode = GET_MODE (dest);
20012 rtx temp;
20013 bool is_against_zero;
20015 /* These modes should always match. */
20016 if (GET_MODE (op1) != compare_mode
20017 /* In the isel case however, we can use a compare immediate, so
20018 op1 may be a small constant. */
20019 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
20020 return 0;
20021 if (GET_MODE (true_cond) != result_mode)
20022 return 0;
20023 if (GET_MODE (false_cond) != result_mode)
20024 return 0;
20026 /* Don't allow using floating point comparisons for integer results for
20027 now. */
20028 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
20029 return 0;
20031 /* First, work out if the hardware can do this at all, or
20032 if it's too slow.... */
20033 if (!FLOAT_MODE_P (compare_mode))
20035 if (TARGET_ISEL)
20036 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
20037 return 0;
20039 else if (TARGET_HARD_FLOAT && !TARGET_FPRS
20040 && SCALAR_FLOAT_MODE_P (compare_mode))
20041 return 0;
20043 is_against_zero = op1 == CONST0_RTX (compare_mode);
20045 /* A floating-point subtract might overflow, underflow, or produce
20046 an inexact result, thus changing the floating-point flags, so it
20047 can't be generated if we care about that. It's safe if one side
20048 of the construct is zero, since then no subtract will be
20049 generated. */
20050 if (SCALAR_FLOAT_MODE_P (compare_mode)
20051 && flag_trapping_math && ! is_against_zero)
20052 return 0;
20054 /* Eliminate half of the comparisons by switching operands, this
20055 makes the remaining code simpler. */
20056 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
20057 || code == LTGT || code == LT || code == UNLE)
20059 code = reverse_condition_maybe_unordered (code);
20060 temp = true_cond;
20061 true_cond = false_cond;
20062 false_cond = temp;
20065 /* UNEQ and LTGT take four instructions for a comparison with zero,
20066 it'll probably be faster to use a branch here too. */
20067 if (code == UNEQ && HONOR_NANS (compare_mode))
20068 return 0;
20070 if (GET_CODE (op1) == CONST_DOUBLE)
20071 REAL_VALUE_FROM_CONST_DOUBLE (c1, op1);
20073 /* We're going to try to implement comparisons by performing
20074 a subtract, then comparing against zero. Unfortunately,
20075 Inf - Inf is NaN which is not zero, and so if we don't
20076 know that the operand is finite and the comparison
20077 would treat EQ different to UNORDERED, we can't do it. */
20078 if (HONOR_INFINITIES (compare_mode)
20079 && code != GT && code != UNGE
20080 && (GET_CODE (op1) != CONST_DOUBLE || real_isinf (&c1))
20081 /* Constructs of the form (a OP b ? a : b) are safe. */
20082 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
20083 || (! rtx_equal_p (op0, true_cond)
20084 && ! rtx_equal_p (op1, true_cond))))
20085 return 0;
20087 /* At this point we know we can use fsel. */
20089 /* Reduce the comparison to a comparison against zero. */
20090 if (! is_against_zero)
20092 temp = gen_reg_rtx (compare_mode);
20093 emit_insn (gen_rtx_SET (VOIDmode, temp,
20094 gen_rtx_MINUS (compare_mode, op0, op1)));
20095 op0 = temp;
20096 op1 = CONST0_RTX (compare_mode);
20099 /* If we don't care about NaNs we can reduce some of the comparisons
20100 down to faster ones. */
20101 if (! HONOR_NANS (compare_mode))
20102 switch (code)
20104 case GT:
20105 code = LE;
20106 temp = true_cond;
20107 true_cond = false_cond;
20108 false_cond = temp;
20109 break;
20110 case UNGE:
20111 code = GE;
20112 break;
20113 case UNEQ:
20114 code = EQ;
20115 break;
20116 default:
20117 break;
20120 /* Now, reduce everything down to a GE. */
20121 switch (code)
20123 case GE:
20124 break;
20126 case LE:
20127 temp = gen_reg_rtx (compare_mode);
20128 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_NEG (compare_mode, op0)));
20129 op0 = temp;
20130 break;
20132 case ORDERED:
20133 temp = gen_reg_rtx (compare_mode);
20134 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_ABS (compare_mode, op0)));
20135 op0 = temp;
20136 break;
20138 case EQ:
20139 temp = gen_reg_rtx (compare_mode);
20140 emit_insn (gen_rtx_SET (VOIDmode, temp,
20141 gen_rtx_NEG (compare_mode,
20142 gen_rtx_ABS (compare_mode, op0))));
20143 op0 = temp;
20144 break;
20146 case UNGE:
20147 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
20148 temp = gen_reg_rtx (result_mode);
20149 emit_insn (gen_rtx_SET (VOIDmode, temp,
20150 gen_rtx_IF_THEN_ELSE (result_mode,
20151 gen_rtx_GE (VOIDmode,
20152 op0, op1),
20153 true_cond, false_cond)));
20154 false_cond = true_cond;
20155 true_cond = temp;
20157 temp = gen_reg_rtx (compare_mode);
20158 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_NEG (compare_mode, op0)));
20159 op0 = temp;
20160 break;
20162 case GT:
20163 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
20164 temp = gen_reg_rtx (result_mode);
20165 emit_insn (gen_rtx_SET (VOIDmode, temp,
20166 gen_rtx_IF_THEN_ELSE (result_mode,
20167 gen_rtx_GE (VOIDmode,
20168 op0, op1),
20169 true_cond, false_cond)));
20170 true_cond = false_cond;
20171 false_cond = temp;
20173 temp = gen_reg_rtx (compare_mode);
20174 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_NEG (compare_mode, op0)));
20175 op0 = temp;
20176 break;
20178 default:
20179 gcc_unreachable ();
20182 emit_insn (gen_rtx_SET (VOIDmode, dest,
20183 gen_rtx_IF_THEN_ELSE (result_mode,
20184 gen_rtx_GE (VOIDmode,
20185 op0, op1),
20186 true_cond, false_cond)));
20187 return 1;
20190 /* Same as above, but for ints (isel). */
20192 static int
20193 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
20195 rtx condition_rtx, cr;
20196 machine_mode mode = GET_MODE (dest);
20197 enum rtx_code cond_code;
20198 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
20199 bool signedp;
20201 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
20202 return 0;
20204 /* We still have to do the compare, because isel doesn't do a
20205 compare, it just looks at the CRx bits set by a previous compare
20206 instruction. */
20207 condition_rtx = rs6000_generate_compare (op, mode);
20208 cond_code = GET_CODE (condition_rtx);
20209 cr = XEXP (condition_rtx, 0);
20210 signedp = GET_MODE (cr) == CCmode;
20212 isel_func = (mode == SImode
20213 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
20214 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
20216 switch (cond_code)
20218 case LT: case GT: case LTU: case GTU: case EQ:
20219 /* isel handles these directly. */
20220 break;
20222 default:
20223 /* We need to swap the sense of the comparison. */
20225 std::swap (false_cond, true_cond);
20226 PUT_CODE (condition_rtx, reverse_condition (cond_code));
20228 break;
20231 false_cond = force_reg (mode, false_cond);
20232 if (true_cond != const0_rtx)
20233 true_cond = force_reg (mode, true_cond);
20235 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
20237 return 1;
20240 const char *
20241 output_isel (rtx *operands)
20243 enum rtx_code code;
20245 code = GET_CODE (operands[1]);
20247 if (code == GE || code == GEU || code == LE || code == LEU || code == NE)
20249 gcc_assert (GET_CODE (operands[2]) == REG
20250 && GET_CODE (operands[3]) == REG);
20251 PUT_CODE (operands[1], reverse_condition (code));
20252 return "isel %0,%3,%2,%j1";
20255 return "isel %0,%2,%3,%j1";
20258 void
20259 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20261 machine_mode mode = GET_MODE (op0);
20262 enum rtx_code c;
20263 rtx target;
20265 /* VSX/altivec have direct min/max insns. */
20266 if ((code == SMAX || code == SMIN)
20267 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
20268 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
20270 emit_insn (gen_rtx_SET (VOIDmode,
20271 dest,
20272 gen_rtx_fmt_ee (code, mode, op0, op1)));
20273 return;
20276 if (code == SMAX || code == SMIN)
20277 c = GE;
20278 else
20279 c = GEU;
20281 if (code == SMAX || code == UMAX)
20282 target = emit_conditional_move (dest, c, op0, op1, mode,
20283 op0, op1, mode, 0);
20284 else
20285 target = emit_conditional_move (dest, c, op0, op1, mode,
20286 op1, op0, mode, 0);
20287 gcc_assert (target);
20288 if (target != dest)
20289 emit_move_insn (dest, target);
20292 /* A subroutine of the atomic operation splitters. Jump to LABEL if
20293 COND is true. Mark the jump as unlikely to be taken. */
20295 static void
20296 emit_unlikely_jump (rtx cond, rtx label)
20298 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
20299 rtx x;
20301 x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
20302 x = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, x));
20303 add_int_reg_note (x, REG_BR_PROB, very_unlikely);
20306 /* A subroutine of the atomic operation splitters. Emit a load-locked
20307 instruction in MODE. For QI/HImode, possibly use a pattern than includes
20308 the zero_extend operation. */
20310 static void
20311 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
20313 rtx (*fn) (rtx, rtx) = NULL;
20315 switch (mode)
20317 case QImode:
20318 fn = gen_load_lockedqi;
20319 break;
20320 case HImode:
20321 fn = gen_load_lockedhi;
20322 break;
20323 case SImode:
20324 if (GET_MODE (mem) == QImode)
20325 fn = gen_load_lockedqi_si;
20326 else if (GET_MODE (mem) == HImode)
20327 fn = gen_load_lockedhi_si;
20328 else
20329 fn = gen_load_lockedsi;
20330 break;
20331 case DImode:
20332 fn = gen_load_lockeddi;
20333 break;
20334 case TImode:
20335 fn = gen_load_lockedti;
20336 break;
20337 default:
20338 gcc_unreachable ();
20340 emit_insn (fn (reg, mem));
20343 /* A subroutine of the atomic operation splitters. Emit a store-conditional
20344 instruction in MODE. */
20346 static void
20347 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
20349 rtx (*fn) (rtx, rtx, rtx) = NULL;
20351 switch (mode)
20353 case QImode:
20354 fn = gen_store_conditionalqi;
20355 break;
20356 case HImode:
20357 fn = gen_store_conditionalhi;
20358 break;
20359 case SImode:
20360 fn = gen_store_conditionalsi;
20361 break;
20362 case DImode:
20363 fn = gen_store_conditionaldi;
20364 break;
20365 case TImode:
20366 fn = gen_store_conditionalti;
20367 break;
20368 default:
20369 gcc_unreachable ();
20372 /* Emit sync before stwcx. to address PPC405 Erratum. */
20373 if (PPC405_ERRATUM77)
20374 emit_insn (gen_hwsync ());
20376 emit_insn (fn (res, mem, val));
20379 /* Expand barriers before and after a load_locked/store_cond sequence. */
20381 static rtx
20382 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
20384 rtx addr = XEXP (mem, 0);
20385 int strict_p = (reload_in_progress || reload_completed);
20387 if (!legitimate_indirect_address_p (addr, strict_p)
20388 && !legitimate_indexed_address_p (addr, strict_p))
20390 addr = force_reg (Pmode, addr);
20391 mem = replace_equiv_address_nv (mem, addr);
20394 switch (model)
20396 case MEMMODEL_RELAXED:
20397 case MEMMODEL_CONSUME:
20398 case MEMMODEL_ACQUIRE:
20399 break;
20400 case MEMMODEL_RELEASE:
20401 case MEMMODEL_ACQ_REL:
20402 emit_insn (gen_lwsync ());
20403 break;
20404 case MEMMODEL_SEQ_CST:
20405 emit_insn (gen_hwsync ());
20406 break;
20407 default:
20408 gcc_unreachable ();
20410 return mem;
20413 static void
20414 rs6000_post_atomic_barrier (enum memmodel model)
20416 switch (model)
20418 case MEMMODEL_RELAXED:
20419 case MEMMODEL_CONSUME:
20420 case MEMMODEL_RELEASE:
20421 break;
20422 case MEMMODEL_ACQUIRE:
20423 case MEMMODEL_ACQ_REL:
20424 case MEMMODEL_SEQ_CST:
20425 emit_insn (gen_isync ());
20426 break;
20427 default:
20428 gcc_unreachable ();
20432 /* A subroutine of the various atomic expanders. For sub-word operations,
20433 we must adjust things to operate on SImode. Given the original MEM,
20434 return a new aligned memory. Also build and return the quantities by
20435 which to shift and mask. */
20437 static rtx
20438 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
20440 rtx addr, align, shift, mask, mem;
20441 HOST_WIDE_INT shift_mask;
20442 machine_mode mode = GET_MODE (orig_mem);
20444 /* For smaller modes, we have to implement this via SImode. */
20445 shift_mask = (mode == QImode ? 0x18 : 0x10);
20447 addr = XEXP (orig_mem, 0);
20448 addr = force_reg (GET_MODE (addr), addr);
20450 /* Aligned memory containing subword. Generate a new memory. We
20451 do not want any of the existing MEM_ATTR data, as we're now
20452 accessing memory outside the original object. */
20453 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
20454 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20455 mem = gen_rtx_MEM (SImode, align);
20456 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
20457 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
20458 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
20460 /* Shift amount for subword relative to aligned word. */
20461 shift = gen_reg_rtx (SImode);
20462 addr = gen_lowpart (SImode, addr);
20463 emit_insn (gen_rlwinm (shift, addr, GEN_INT (3), GEN_INT (shift_mask)));
20464 if (BYTES_BIG_ENDIAN)
20465 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
20466 shift, 1, OPTAB_LIB_WIDEN);
20467 *pshift = shift;
20469 /* Mask for insertion. */
20470 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
20471 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
20472 *pmask = mask;
20474 return mem;
20477 /* A subroutine of the various atomic expanders. For sub-word operands,
20478 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
20480 static rtx
20481 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
20483 rtx x;
20485 x = gen_reg_rtx (SImode);
20486 emit_insn (gen_rtx_SET (VOIDmode, x,
20487 gen_rtx_AND (SImode,
20488 gen_rtx_NOT (SImode, mask),
20489 oldval)));
20491 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
20493 return x;
20496 /* A subroutine of the various atomic expanders. For sub-word operands,
20497 extract WIDE to NARROW via SHIFT. */
20499 static void
20500 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
20502 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
20503 wide, 1, OPTAB_LIB_WIDEN);
20504 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
20507 /* Expand an atomic compare and swap operation. */
20509 void
20510 rs6000_expand_atomic_compare_and_swap (rtx operands[])
20512 rtx boolval, retval, mem, oldval, newval, cond;
20513 rtx label1, label2, x, mask, shift;
20514 machine_mode mode, orig_mode;
20515 enum memmodel mod_s, mod_f;
20516 bool is_weak;
20518 boolval = operands[0];
20519 retval = operands[1];
20520 mem = operands[2];
20521 oldval = operands[3];
20522 newval = operands[4];
20523 is_weak = (INTVAL (operands[5]) != 0);
20524 mod_s = (enum memmodel) INTVAL (operands[6]);
20525 mod_f = (enum memmodel) INTVAL (operands[7]);
20526 orig_mode = mode = GET_MODE (mem);
20528 mask = shift = NULL_RTX;
20529 if (mode == QImode || mode == HImode)
20531 /* Before power8, we didn't have access to lbarx/lharx, so generate a
20532 lwarx and shift/mask operations. With power8, we need to do the
20533 comparison in SImode, but the store is still done in QI/HImode. */
20534 oldval = convert_modes (SImode, mode, oldval, 1);
20536 if (!TARGET_SYNC_HI_QI)
20538 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
20540 /* Shift and mask OLDVAL into position with the word. */
20541 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
20542 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20544 /* Shift and mask NEWVAL into position within the word. */
20545 newval = convert_modes (SImode, mode, newval, 1);
20546 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
20547 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20550 /* Prepare to adjust the return value. */
20551 retval = gen_reg_rtx (SImode);
20552 mode = SImode;
20554 else if (reg_overlap_mentioned_p (retval, oldval))
20555 oldval = copy_to_reg (oldval);
20557 mem = rs6000_pre_atomic_barrier (mem, mod_s);
20559 label1 = NULL_RTX;
20560 if (!is_weak)
20562 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
20563 emit_label (XEXP (label1, 0));
20565 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
20567 emit_load_locked (mode, retval, mem);
20569 x = retval;
20570 if (mask)
20572 x = expand_simple_binop (SImode, AND, retval, mask,
20573 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20576 cond = gen_reg_rtx (CCmode);
20577 /* If we have TImode, synthesize a comparison. */
20578 if (mode != TImode)
20579 x = gen_rtx_COMPARE (CCmode, x, oldval);
20580 else
20582 rtx xor1_result = gen_reg_rtx (DImode);
20583 rtx xor2_result = gen_reg_rtx (DImode);
20584 rtx or_result = gen_reg_rtx (DImode);
20585 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
20586 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
20587 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
20588 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
20590 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
20591 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
20592 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
20593 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
20596 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
20598 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20599 emit_unlikely_jump (x, label2);
20601 x = newval;
20602 if (mask)
20603 x = rs6000_mask_atomic_subword (retval, newval, mask);
20605 emit_store_conditional (orig_mode, cond, mem, x);
20607 if (!is_weak)
20609 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20610 emit_unlikely_jump (x, label1);
20613 if (mod_f != MEMMODEL_RELAXED)
20614 emit_label (XEXP (label2, 0));
20616 rs6000_post_atomic_barrier (mod_s);
20618 if (mod_f == MEMMODEL_RELAXED)
20619 emit_label (XEXP (label2, 0));
20621 if (shift)
20622 rs6000_finish_atomic_subword (operands[1], retval, shift);
20623 else if (mode != GET_MODE (operands[1]))
20624 convert_move (operands[1], retval, 1);
20626 /* In all cases, CR0 contains EQ on success, and NE on failure. */
20627 x = gen_rtx_EQ (SImode, cond, const0_rtx);
20628 emit_insn (gen_rtx_SET (VOIDmode, boolval, x));
20631 /* Expand an atomic exchange operation. */
20633 void
20634 rs6000_expand_atomic_exchange (rtx operands[])
20636 rtx retval, mem, val, cond;
20637 machine_mode mode;
20638 enum memmodel model;
20639 rtx label, x, mask, shift;
20641 retval = operands[0];
20642 mem = operands[1];
20643 val = operands[2];
20644 model = (enum memmodel) INTVAL (operands[3]);
20645 mode = GET_MODE (mem);
20647 mask = shift = NULL_RTX;
20648 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
20650 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
20652 /* Shift and mask VAL into position with the word. */
20653 val = convert_modes (SImode, mode, val, 1);
20654 val = expand_simple_binop (SImode, ASHIFT, val, shift,
20655 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20657 /* Prepare to adjust the return value. */
20658 retval = gen_reg_rtx (SImode);
20659 mode = SImode;
20662 mem = rs6000_pre_atomic_barrier (mem, model);
20664 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
20665 emit_label (XEXP (label, 0));
20667 emit_load_locked (mode, retval, mem);
20669 x = val;
20670 if (mask)
20671 x = rs6000_mask_atomic_subword (retval, val, mask);
20673 cond = gen_reg_rtx (CCmode);
20674 emit_store_conditional (mode, cond, mem, x);
20676 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20677 emit_unlikely_jump (x, label);
20679 rs6000_post_atomic_barrier (model);
20681 if (shift)
20682 rs6000_finish_atomic_subword (operands[0], retval, shift);
20685 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
20686 to perform. MEM is the memory on which to operate. VAL is the second
20687 operand of the binary operator. BEFORE and AFTER are optional locations to
20688 return the value of MEM either before of after the operation. MODEL_RTX
20689 is a CONST_INT containing the memory model to use. */
20691 void
20692 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
20693 rtx orig_before, rtx orig_after, rtx model_rtx)
20695 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
20696 machine_mode mode = GET_MODE (mem);
20697 machine_mode store_mode = mode;
20698 rtx label, x, cond, mask, shift;
20699 rtx before = orig_before, after = orig_after;
20701 mask = shift = NULL_RTX;
20702 /* On power8, we want to use SImode for the operation. On previous systems,
20703 use the operation in a subword and shift/mask to get the proper byte or
20704 halfword. */
20705 if (mode == QImode || mode == HImode)
20707 if (TARGET_SYNC_HI_QI)
20709 val = convert_modes (SImode, mode, val, 1);
20711 /* Prepare to adjust the return value. */
20712 before = gen_reg_rtx (SImode);
20713 if (after)
20714 after = gen_reg_rtx (SImode);
20715 mode = SImode;
20717 else
20719 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
20721 /* Shift and mask VAL into position with the word. */
20722 val = convert_modes (SImode, mode, val, 1);
20723 val = expand_simple_binop (SImode, ASHIFT, val, shift,
20724 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20726 switch (code)
20728 case IOR:
20729 case XOR:
20730 /* We've already zero-extended VAL. That is sufficient to
20731 make certain that it does not affect other bits. */
20732 mask = NULL;
20733 break;
20735 case AND:
20736 /* If we make certain that all of the other bits in VAL are
20737 set, that will be sufficient to not affect other bits. */
20738 x = gen_rtx_NOT (SImode, mask);
20739 x = gen_rtx_IOR (SImode, x, val);
20740 emit_insn (gen_rtx_SET (VOIDmode, val, x));
20741 mask = NULL;
20742 break;
20744 case NOT:
20745 case PLUS:
20746 case MINUS:
20747 /* These will all affect bits outside the field and need
20748 adjustment via MASK within the loop. */
20749 break;
20751 default:
20752 gcc_unreachable ();
20755 /* Prepare to adjust the return value. */
20756 before = gen_reg_rtx (SImode);
20757 if (after)
20758 after = gen_reg_rtx (SImode);
20759 store_mode = mode = SImode;
20763 mem = rs6000_pre_atomic_barrier (mem, model);
20765 label = gen_label_rtx ();
20766 emit_label (label);
20767 label = gen_rtx_LABEL_REF (VOIDmode, label);
20769 if (before == NULL_RTX)
20770 before = gen_reg_rtx (mode);
20772 emit_load_locked (mode, before, mem);
20774 if (code == NOT)
20776 x = expand_simple_binop (mode, AND, before, val,
20777 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20778 after = expand_simple_unop (mode, NOT, x, after, 1);
20780 else
20782 after = expand_simple_binop (mode, code, before, val,
20783 after, 1, OPTAB_LIB_WIDEN);
20786 x = after;
20787 if (mask)
20789 x = expand_simple_binop (SImode, AND, after, mask,
20790 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20791 x = rs6000_mask_atomic_subword (before, x, mask);
20793 else if (store_mode != mode)
20794 x = convert_modes (store_mode, mode, x, 1);
20796 cond = gen_reg_rtx (CCmode);
20797 emit_store_conditional (store_mode, cond, mem, x);
20799 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20800 emit_unlikely_jump (x, label);
20802 rs6000_post_atomic_barrier (model);
20804 if (shift)
20806 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
20807 then do the calcuations in a SImode register. */
20808 if (orig_before)
20809 rs6000_finish_atomic_subword (orig_before, before, shift);
20810 if (orig_after)
20811 rs6000_finish_atomic_subword (orig_after, after, shift);
20813 else if (store_mode != mode)
20815 /* QImode/HImode on machines with lbarx/lharx where we do the native
20816 operation and then do the calcuations in a SImode register. */
20817 if (orig_before)
20818 convert_move (orig_before, before, 1);
20819 if (orig_after)
20820 convert_move (orig_after, after, 1);
20822 else if (orig_after && after != orig_after)
20823 emit_move_insn (orig_after, after);
20826 /* Emit instructions to move SRC to DST. Called by splitters for
20827 multi-register moves. It will emit at most one instruction for
20828 each register that is accessed; that is, it won't emit li/lis pairs
20829 (or equivalent for 64-bit code). One of SRC or DST must be a hard
20830 register. */
20832 void
20833 rs6000_split_multireg_move (rtx dst, rtx src)
20835 /* The register number of the first register being moved. */
20836 int reg;
20837 /* The mode that is to be moved. */
20838 machine_mode mode;
20839 /* The mode that the move is being done in, and its size. */
20840 machine_mode reg_mode;
20841 int reg_mode_size;
20842 /* The number of registers that will be moved. */
20843 int nregs;
20845 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
20846 mode = GET_MODE (dst);
20847 nregs = hard_regno_nregs[reg][mode];
20848 if (FP_REGNO_P (reg))
20849 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
20850 ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? DFmode : SFmode);
20851 else if (ALTIVEC_REGNO_P (reg))
20852 reg_mode = V16QImode;
20853 else if (TARGET_E500_DOUBLE && mode == TFmode)
20854 reg_mode = DFmode;
20855 else
20856 reg_mode = word_mode;
20857 reg_mode_size = GET_MODE_SIZE (reg_mode);
20859 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
20861 /* TDmode residing in FP registers is special, since the ISA requires that
20862 the lower-numbered word of a register pair is always the most significant
20863 word, even in little-endian mode. This does not match the usual subreg
20864 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
20865 the appropriate constituent registers "by hand" in little-endian mode.
20867 Note we do not need to check for destructive overlap here since TDmode
20868 can only reside in even/odd register pairs. */
20869 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
20871 rtx p_src, p_dst;
20872 int i;
20874 for (i = 0; i < nregs; i++)
20876 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
20877 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
20878 else
20879 p_src = simplify_gen_subreg (reg_mode, src, mode,
20880 i * reg_mode_size);
20882 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
20883 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
20884 else
20885 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
20886 i * reg_mode_size);
20888 emit_insn (gen_rtx_SET (VOIDmode, p_dst, p_src));
20891 return;
20894 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
20896 /* Move register range backwards, if we might have destructive
20897 overlap. */
20898 int i;
20899 for (i = nregs - 1; i >= 0; i--)
20900 emit_insn (gen_rtx_SET (VOIDmode,
20901 simplify_gen_subreg (reg_mode, dst, mode,
20902 i * reg_mode_size),
20903 simplify_gen_subreg (reg_mode, src, mode,
20904 i * reg_mode_size)));
20906 else
20908 int i;
20909 int j = -1;
20910 bool used_update = false;
20911 rtx restore_basereg = NULL_RTX;
20913 if (MEM_P (src) && INT_REGNO_P (reg))
20915 rtx breg;
20917 if (GET_CODE (XEXP (src, 0)) == PRE_INC
20918 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
20920 rtx delta_rtx;
20921 breg = XEXP (XEXP (src, 0), 0);
20922 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
20923 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
20924 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
20925 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
20926 src = replace_equiv_address (src, breg);
20928 else if (! rs6000_offsettable_memref_p (src, reg_mode))
20930 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
20932 rtx basereg = XEXP (XEXP (src, 0), 0);
20933 if (TARGET_UPDATE)
20935 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
20936 emit_insn (gen_rtx_SET (VOIDmode, ndst,
20937 gen_rtx_MEM (reg_mode, XEXP (src, 0))));
20938 used_update = true;
20940 else
20941 emit_insn (gen_rtx_SET (VOIDmode, basereg,
20942 XEXP (XEXP (src, 0), 1)));
20943 src = replace_equiv_address (src, basereg);
20945 else
20947 rtx basereg = gen_rtx_REG (Pmode, reg);
20948 emit_insn (gen_rtx_SET (VOIDmode, basereg, XEXP (src, 0)));
20949 src = replace_equiv_address (src, basereg);
20953 breg = XEXP (src, 0);
20954 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
20955 breg = XEXP (breg, 0);
20957 /* If the base register we are using to address memory is
20958 also a destination reg, then change that register last. */
20959 if (REG_P (breg)
20960 && REGNO (breg) >= REGNO (dst)
20961 && REGNO (breg) < REGNO (dst) + nregs)
20962 j = REGNO (breg) - REGNO (dst);
20964 else if (MEM_P (dst) && INT_REGNO_P (reg))
20966 rtx breg;
20968 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
20969 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
20971 rtx delta_rtx;
20972 breg = XEXP (XEXP (dst, 0), 0);
20973 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
20974 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
20975 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
20977 /* We have to update the breg before doing the store.
20978 Use store with update, if available. */
20980 if (TARGET_UPDATE)
20982 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
20983 emit_insn (TARGET_32BIT
20984 ? (TARGET_POWERPC64
20985 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
20986 : gen_movsi_update (breg, breg, delta_rtx, nsrc))
20987 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
20988 used_update = true;
20990 else
20991 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
20992 dst = replace_equiv_address (dst, breg);
20994 else if (!rs6000_offsettable_memref_p (dst, reg_mode)
20995 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
20997 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
20999 rtx basereg = XEXP (XEXP (dst, 0), 0);
21000 if (TARGET_UPDATE)
21002 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
21003 emit_insn (gen_rtx_SET (VOIDmode,
21004 gen_rtx_MEM (reg_mode, XEXP (dst, 0)), nsrc));
21005 used_update = true;
21007 else
21008 emit_insn (gen_rtx_SET (VOIDmode, basereg,
21009 XEXP (XEXP (dst, 0), 1)));
21010 dst = replace_equiv_address (dst, basereg);
21012 else
21014 rtx basereg = XEXP (XEXP (dst, 0), 0);
21015 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
21016 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
21017 && REG_P (basereg)
21018 && REG_P (offsetreg)
21019 && REGNO (basereg) != REGNO (offsetreg));
21020 if (REGNO (basereg) == 0)
21022 rtx tmp = offsetreg;
21023 offsetreg = basereg;
21024 basereg = tmp;
21026 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
21027 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
21028 dst = replace_equiv_address (dst, basereg);
21031 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
21032 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode));
21035 for (i = 0; i < nregs; i++)
21037 /* Calculate index to next subword. */
21038 ++j;
21039 if (j == nregs)
21040 j = 0;
21042 /* If compiler already emitted move of first word by
21043 store with update, no need to do anything. */
21044 if (j == 0 && used_update)
21045 continue;
21047 emit_insn (gen_rtx_SET (VOIDmode,
21048 simplify_gen_subreg (reg_mode, dst, mode,
21049 j * reg_mode_size),
21050 simplify_gen_subreg (reg_mode, src, mode,
21051 j * reg_mode_size)));
21053 if (restore_basereg != NULL_RTX)
21054 emit_insn (restore_basereg);
21059 /* This page contains routines that are used to determine what the
21060 function prologue and epilogue code will do and write them out. */
21062 static inline bool
21063 save_reg_p (int r)
21065 return !call_used_regs[r] && df_regs_ever_live_p (r);
21068 /* Return the first fixed-point register that is required to be
21069 saved. 32 if none. */
21072 first_reg_to_save (void)
21074 int first_reg;
21076 /* Find lowest numbered live register. */
21077 for (first_reg = 13; first_reg <= 31; first_reg++)
21078 if (save_reg_p (first_reg))
21079 break;
21081 if (first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM
21082 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
21083 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
21084 || (TARGET_TOC && TARGET_MINIMAL_TOC))
21085 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
21086 first_reg = RS6000_PIC_OFFSET_TABLE_REGNUM;
21088 #if TARGET_MACHO
21089 if (flag_pic
21090 && crtl->uses_pic_offset_table
21091 && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM)
21092 return RS6000_PIC_OFFSET_TABLE_REGNUM;
21093 #endif
21095 return first_reg;
21098 /* Similar, for FP regs. */
21101 first_fp_reg_to_save (void)
21103 int first_reg;
21105 /* Find lowest numbered live register. */
21106 for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
21107 if (save_reg_p (first_reg))
21108 break;
21110 return first_reg;
21113 /* Similar, for AltiVec regs. */
21115 static int
21116 first_altivec_reg_to_save (void)
21118 int i;
21120 /* Stack frame remains as is unless we are in AltiVec ABI. */
21121 if (! TARGET_ALTIVEC_ABI)
21122 return LAST_ALTIVEC_REGNO + 1;
21124 /* On Darwin, the unwind routines are compiled without
21125 TARGET_ALTIVEC, and use save_world to save/restore the
21126 altivec registers when necessary. */
21127 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
21128 && ! TARGET_ALTIVEC)
21129 return FIRST_ALTIVEC_REGNO + 20;
21131 /* Find lowest numbered live register. */
21132 for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
21133 if (save_reg_p (i))
21134 break;
21136 return i;
21139 /* Return a 32-bit mask of the AltiVec registers we need to set in
21140 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
21141 the 32-bit word is 0. */
21143 static unsigned int
21144 compute_vrsave_mask (void)
21146 unsigned int i, mask = 0;
21148 /* On Darwin, the unwind routines are compiled without
21149 TARGET_ALTIVEC, and use save_world to save/restore the
21150 call-saved altivec registers when necessary. */
21151 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
21152 && ! TARGET_ALTIVEC)
21153 mask |= 0xFFF;
21155 /* First, find out if we use _any_ altivec registers. */
21156 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
21157 if (df_regs_ever_live_p (i))
21158 mask |= ALTIVEC_REG_BIT (i);
21160 if (mask == 0)
21161 return mask;
21163 /* Next, remove the argument registers from the set. These must
21164 be in the VRSAVE mask set by the caller, so we don't need to add
21165 them in again. More importantly, the mask we compute here is
21166 used to generate CLOBBERs in the set_vrsave insn, and we do not
21167 wish the argument registers to die. */
21168 for (i = ALTIVEC_ARG_MIN_REG; i < (unsigned) crtl->args.info.vregno; i++)
21169 mask &= ~ALTIVEC_REG_BIT (i);
21171 /* Similarly, remove the return value from the set. */
21173 bool yes = false;
21174 diddle_return_value (is_altivec_return_reg, &yes);
21175 if (yes)
21176 mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
21179 return mask;
21182 /* For a very restricted set of circumstances, we can cut down the
21183 size of prologues/epilogues by calling our own save/restore-the-world
21184 routines. */
21186 static void
21187 compute_save_world_info (rs6000_stack_t *info_ptr)
21189 info_ptr->world_save_p = 1;
21190 info_ptr->world_save_p
21191 = (WORLD_SAVE_P (info_ptr)
21192 && DEFAULT_ABI == ABI_DARWIN
21193 && !cfun->has_nonlocal_label
21194 && info_ptr->first_fp_reg_save == FIRST_SAVED_FP_REGNO
21195 && info_ptr->first_gp_reg_save == FIRST_SAVED_GP_REGNO
21196 && info_ptr->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
21197 && info_ptr->cr_save_p);
21199 /* This will not work in conjunction with sibcalls. Make sure there
21200 are none. (This check is expensive, but seldom executed.) */
21201 if (WORLD_SAVE_P (info_ptr))
21203 rtx_insn *insn;
21204 for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
21205 if (CALL_P (insn) && SIBLING_CALL_P (insn))
21207 info_ptr->world_save_p = 0;
21208 break;
21212 if (WORLD_SAVE_P (info_ptr))
21214 /* Even if we're not touching VRsave, make sure there's room on the
21215 stack for it, if it looks like we're calling SAVE_WORLD, which
21216 will attempt to save it. */
21217 info_ptr->vrsave_size = 4;
21219 /* If we are going to save the world, we need to save the link register too. */
21220 info_ptr->lr_save_p = 1;
21222 /* "Save" the VRsave register too if we're saving the world. */
21223 if (info_ptr->vrsave_mask == 0)
21224 info_ptr->vrsave_mask = compute_vrsave_mask ();
21226 /* Because the Darwin register save/restore routines only handle
21227 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
21228 check. */
21229 gcc_assert (info_ptr->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
21230 && (info_ptr->first_altivec_reg_save
21231 >= FIRST_SAVED_ALTIVEC_REGNO));
21233 return;
21237 static void
21238 is_altivec_return_reg (rtx reg, void *xyes)
21240 bool *yes = (bool *) xyes;
21241 if (REGNO (reg) == ALTIVEC_ARG_RETURN)
21242 *yes = true;
21246 /* Look for user-defined global regs in the range FIRST to LAST-1.
21247 We should not restore these, and so cannot use lmw or out-of-line
21248 restore functions if there are any. We also can't save them
21249 (well, emit frame notes for them), because frame unwinding during
21250 exception handling will restore saved registers. */
21252 static bool
21253 global_regs_p (unsigned first, unsigned last)
21255 while (first < last)
21256 if (global_regs[first++])
21257 return true;
21258 return false;
21261 /* Determine the strategy for savings/restoring registers. */
21263 enum {
21264 SAVRES_MULTIPLE = 0x1,
21265 SAVE_INLINE_FPRS = 0x2,
21266 SAVE_INLINE_GPRS = 0x4,
21267 REST_INLINE_FPRS = 0x8,
21268 REST_INLINE_GPRS = 0x10,
21269 SAVE_NOINLINE_GPRS_SAVES_LR = 0x20,
21270 SAVE_NOINLINE_FPRS_SAVES_LR = 0x40,
21271 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x80,
21272 SAVE_INLINE_VRS = 0x100,
21273 REST_INLINE_VRS = 0x200
21276 static int
21277 rs6000_savres_strategy (rs6000_stack_t *info,
21278 bool using_static_chain_p)
21280 int strategy = 0;
21281 bool lr_save_p;
21283 if (TARGET_MULTIPLE
21284 && !TARGET_POWERPC64
21285 && !(TARGET_SPE_ABI && info->spe_64bit_regs_used)
21286 && info->first_gp_reg_save < 31
21287 && !global_regs_p (info->first_gp_reg_save, 32))
21288 strategy |= SAVRES_MULTIPLE;
21290 if (crtl->calls_eh_return
21291 || cfun->machine->ra_need_lr)
21292 strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
21293 | SAVE_INLINE_GPRS | REST_INLINE_GPRS
21294 | SAVE_INLINE_VRS | REST_INLINE_VRS);
21296 if (info->first_fp_reg_save == 64
21297 /* The out-of-line FP routines use double-precision stores;
21298 we can't use those routines if we don't have such stores. */
21299 || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT)
21300 || global_regs_p (info->first_fp_reg_save, 64))
21301 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
21303 if (info->first_gp_reg_save == 32
21304 || (!(strategy & SAVRES_MULTIPLE)
21305 && global_regs_p (info->first_gp_reg_save, 32)))
21306 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21308 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
21309 || global_regs_p (info->first_altivec_reg_save, LAST_ALTIVEC_REGNO + 1))
21310 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21312 /* Define cutoff for using out-of-line functions to save registers. */
21313 if (DEFAULT_ABI == ABI_V4 || TARGET_ELF)
21315 if (!optimize_size)
21317 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
21318 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21319 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21321 else
21323 /* Prefer out-of-line restore if it will exit. */
21324 if (info->first_fp_reg_save > 61)
21325 strategy |= SAVE_INLINE_FPRS;
21326 if (info->first_gp_reg_save > 29)
21328 if (info->first_fp_reg_save == 64)
21329 strategy |= SAVE_INLINE_GPRS;
21330 else
21331 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21333 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO)
21334 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21337 else if (DEFAULT_ABI == ABI_DARWIN)
21339 if (info->first_fp_reg_save > 60)
21340 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
21341 if (info->first_gp_reg_save > 29)
21342 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21343 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21345 else
21347 gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
21348 if (info->first_fp_reg_save > 61)
21349 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
21350 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21351 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21354 /* Don't bother to try to save things out-of-line if r11 is occupied
21355 by the static chain. It would require too much fiddling and the
21356 static chain is rarely used anyway. FPRs are saved w.r.t the stack
21357 pointer on Darwin, and AIX uses r1 or r12. */
21358 if (using_static_chain_p
21359 && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
21360 strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
21361 | SAVE_INLINE_GPRS
21362 | SAVE_INLINE_VRS | REST_INLINE_VRS);
21364 /* We can only use the out-of-line routines to restore if we've
21365 saved all the registers from first_fp_reg_save in the prologue.
21366 Otherwise, we risk loading garbage. */
21367 if ((strategy & (SAVE_INLINE_FPRS | REST_INLINE_FPRS)) == SAVE_INLINE_FPRS)
21369 int i;
21371 for (i = info->first_fp_reg_save; i < 64; i++)
21372 if (!save_reg_p (i))
21374 strategy |= REST_INLINE_FPRS;
21375 break;
21379 /* If we are going to use store multiple, then don't even bother
21380 with the out-of-line routines, since the store-multiple
21381 instruction will always be smaller. */
21382 if ((strategy & SAVRES_MULTIPLE))
21383 strategy |= SAVE_INLINE_GPRS;
21385 /* info->lr_save_p isn't yet set if the only reason lr needs to be
21386 saved is an out-of-line save or restore. Set up the value for
21387 the next test (excluding out-of-line gpr restore). */
21388 lr_save_p = (info->lr_save_p
21389 || !(strategy & SAVE_INLINE_GPRS)
21390 || !(strategy & SAVE_INLINE_FPRS)
21391 || !(strategy & SAVE_INLINE_VRS)
21392 || !(strategy & REST_INLINE_FPRS)
21393 || !(strategy & REST_INLINE_VRS));
21395 /* The situation is more complicated with load multiple. We'd
21396 prefer to use the out-of-line routines for restores, since the
21397 "exit" out-of-line routines can handle the restore of LR and the
21398 frame teardown. However if doesn't make sense to use the
21399 out-of-line routine if that is the only reason we'd need to save
21400 LR, and we can't use the "exit" out-of-line gpr restore if we
21401 have saved some fprs; In those cases it is advantageous to use
21402 load multiple when available. */
21403 if ((strategy & SAVRES_MULTIPLE)
21404 && (!lr_save_p
21405 || info->first_fp_reg_save != 64))
21406 strategy |= REST_INLINE_GPRS;
21408 /* Saving CR interferes with the exit routines used on the SPE, so
21409 just punt here. */
21410 if (TARGET_SPE_ABI
21411 && info->spe_64bit_regs_used
21412 && info->cr_save_p)
21413 strategy |= REST_INLINE_GPRS;
21415 /* We can only use load multiple or the out-of-line routines to
21416 restore if we've used store multiple or out-of-line routines
21417 in the prologue, i.e. if we've saved all the registers from
21418 first_gp_reg_save. Otherwise, we risk loading garbage. */
21419 if ((strategy & (SAVE_INLINE_GPRS | REST_INLINE_GPRS | SAVRES_MULTIPLE))
21420 == SAVE_INLINE_GPRS)
21422 int i;
21424 for (i = info->first_gp_reg_save; i < 32; i++)
21425 if (!save_reg_p (i))
21427 strategy |= REST_INLINE_GPRS;
21428 break;
21432 if (TARGET_ELF && TARGET_64BIT)
21434 if (!(strategy & SAVE_INLINE_FPRS))
21435 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
21436 else if (!(strategy & SAVE_INLINE_GPRS)
21437 && info->first_fp_reg_save == 64)
21438 strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
21440 else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
21441 strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
21443 if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
21444 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
21446 return strategy;
21449 /* Calculate the stack information for the current function. This is
21450 complicated by having two separate calling sequences, the AIX calling
21451 sequence and the V.4 calling sequence.
21453 AIX (and Darwin/Mac OS X) stack frames look like:
21454 32-bit 64-bit
21455 SP----> +---------------------------------------+
21456 | back chain to caller | 0 0
21457 +---------------------------------------+
21458 | saved CR | 4 8 (8-11)
21459 +---------------------------------------+
21460 | saved LR | 8 16
21461 +---------------------------------------+
21462 | reserved for compilers | 12 24
21463 +---------------------------------------+
21464 | reserved for binders | 16 32
21465 +---------------------------------------+
21466 | saved TOC pointer | 20 40
21467 +---------------------------------------+
21468 | Parameter save area (P) | 24 48
21469 +---------------------------------------+
21470 | Alloca space (A) | 24+P etc.
21471 +---------------------------------------+
21472 | Local variable space (L) | 24+P+A
21473 +---------------------------------------+
21474 | Float/int conversion temporary (X) | 24+P+A+L
21475 +---------------------------------------+
21476 | Save area for AltiVec registers (W) | 24+P+A+L+X
21477 +---------------------------------------+
21478 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
21479 +---------------------------------------+
21480 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
21481 +---------------------------------------+
21482 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
21483 +---------------------------------------+
21484 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
21485 +---------------------------------------+
21486 old SP->| back chain to caller's caller |
21487 +---------------------------------------+
21489 The required alignment for AIX configurations is two words (i.e., 8
21490 or 16 bytes).
21492 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
21494 SP----> +---------------------------------------+
21495 | Back chain to caller | 0
21496 +---------------------------------------+
21497 | Save area for CR | 8
21498 +---------------------------------------+
21499 | Saved LR | 16
21500 +---------------------------------------+
21501 | Saved TOC pointer | 24
21502 +---------------------------------------+
21503 | Parameter save area (P) | 32
21504 +---------------------------------------+
21505 | Alloca space (A) | 32+P
21506 +---------------------------------------+
21507 | Local variable space (L) | 32+P+A
21508 +---------------------------------------+
21509 | Save area for AltiVec registers (W) | 32+P+A+L
21510 +---------------------------------------+
21511 | AltiVec alignment padding (Y) | 32+P+A+L+W
21512 +---------------------------------------+
21513 | Save area for GP registers (G) | 32+P+A+L+W+Y
21514 +---------------------------------------+
21515 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
21516 +---------------------------------------+
21517 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
21518 +---------------------------------------+
21521 V.4 stack frames look like:
21523 SP----> +---------------------------------------+
21524 | back chain to caller | 0
21525 +---------------------------------------+
21526 | caller's saved LR | 4
21527 +---------------------------------------+
21528 | Parameter save area (P) | 8
21529 +---------------------------------------+
21530 | Alloca space (A) | 8+P
21531 +---------------------------------------+
21532 | Varargs save area (V) | 8+P+A
21533 +---------------------------------------+
21534 | Local variable space (L) | 8+P+A+V
21535 +---------------------------------------+
21536 | Float/int conversion temporary (X) | 8+P+A+V+L
21537 +---------------------------------------+
21538 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
21539 +---------------------------------------+
21540 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
21541 +---------------------------------------+
21542 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
21543 +---------------------------------------+
21544 | SPE: area for 64-bit GP registers |
21545 +---------------------------------------+
21546 | SPE alignment padding |
21547 +---------------------------------------+
21548 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
21549 +---------------------------------------+
21550 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
21551 +---------------------------------------+
21552 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
21553 +---------------------------------------+
21554 old SP->| back chain to caller's caller |
21555 +---------------------------------------+
21557 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
21558 given. (But note below and in sysv4.h that we require only 8 and
21559 may round up the size of our stack frame anyways. The historical
21560 reason is early versions of powerpc-linux which didn't properly
21561 align the stack at program startup. A happy side-effect is that
21562 -mno-eabi libraries can be used with -meabi programs.)
21564 The EABI configuration defaults to the V.4 layout. However,
21565 the stack alignment requirements may differ. If -mno-eabi is not
21566 given, the required stack alignment is 8 bytes; if -mno-eabi is
21567 given, the required alignment is 16 bytes. (But see V.4 comment
21568 above.) */
21570 #ifndef ABI_STACK_BOUNDARY
21571 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
21572 #endif
21574 static rs6000_stack_t *
21575 rs6000_stack_info (void)
21577 /* We should never be called for thunks, we are not set up for that. */
21578 gcc_assert (!cfun->is_thunk);
21580 rs6000_stack_t *info_ptr = &stack_info;
21581 int reg_size = TARGET_32BIT ? 4 : 8;
21582 int ehrd_size;
21583 int ehcr_size;
21584 int save_align;
21585 int first_gp;
21586 HOST_WIDE_INT non_fixed_size;
21587 bool using_static_chain_p;
21589 if (reload_completed && info_ptr->reload_completed)
21590 return info_ptr;
21592 memset (info_ptr, 0, sizeof (*info_ptr));
21593 info_ptr->reload_completed = reload_completed;
21595 if (TARGET_SPE)
21597 /* Cache value so we don't rescan instruction chain over and over. */
21598 if (cfun->machine->insn_chain_scanned_p == 0)
21599 cfun->machine->insn_chain_scanned_p
21600 = spe_func_has_64bit_regs_p () + 1;
21601 info_ptr->spe_64bit_regs_used = cfun->machine->insn_chain_scanned_p - 1;
21604 /* Select which calling sequence. */
21605 info_ptr->abi = DEFAULT_ABI;
21607 /* Calculate which registers need to be saved & save area size. */
21608 info_ptr->first_gp_reg_save = first_reg_to_save ();
21609 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
21610 even if it currently looks like we won't. Reload may need it to
21611 get at a constant; if so, it will have already created a constant
21612 pool entry for it. */
21613 if (((TARGET_TOC && TARGET_MINIMAL_TOC)
21614 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
21615 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
21616 && crtl->uses_const_pool
21617 && info_ptr->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
21618 first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
21619 else
21620 first_gp = info_ptr->first_gp_reg_save;
21622 info_ptr->gp_size = reg_size * (32 - first_gp);
21624 /* For the SPE, we have an additional upper 32-bits on each GPR.
21625 Ideally we should save the entire 64-bits only when the upper
21626 half is used in SIMD instructions. Since we only record
21627 registers live (not the size they are used in), this proves
21628 difficult because we'd have to traverse the instruction chain at
21629 the right time, taking reload into account. This is a real pain,
21630 so we opt to save the GPRs in 64-bits always if but one register
21631 gets used in 64-bits. Otherwise, all the registers in the frame
21632 get saved in 32-bits.
21634 So... since when we save all GPRs (except the SP) in 64-bits, the
21635 traditional GP save area will be empty. */
21636 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
21637 info_ptr->gp_size = 0;
21639 info_ptr->first_fp_reg_save = first_fp_reg_to_save ();
21640 info_ptr->fp_size = 8 * (64 - info_ptr->first_fp_reg_save);
21642 info_ptr->first_altivec_reg_save = first_altivec_reg_to_save ();
21643 info_ptr->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
21644 - info_ptr->first_altivec_reg_save);
21646 /* Does this function call anything? */
21647 info_ptr->calls_p = (! crtl->is_leaf
21648 || cfun->machine->ra_needs_full_frame);
21650 /* Determine if we need to save the condition code registers. */
21651 if (df_regs_ever_live_p (CR2_REGNO)
21652 || df_regs_ever_live_p (CR3_REGNO)
21653 || df_regs_ever_live_p (CR4_REGNO))
21655 info_ptr->cr_save_p = 1;
21656 if (DEFAULT_ABI == ABI_V4)
21657 info_ptr->cr_size = reg_size;
21660 /* If the current function calls __builtin_eh_return, then we need
21661 to allocate stack space for registers that will hold data for
21662 the exception handler. */
21663 if (crtl->calls_eh_return)
21665 unsigned int i;
21666 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
21667 continue;
21669 /* SPE saves EH registers in 64-bits. */
21670 ehrd_size = i * (TARGET_SPE_ABI
21671 && info_ptr->spe_64bit_regs_used != 0
21672 ? UNITS_PER_SPE_WORD : UNITS_PER_WORD);
21674 else
21675 ehrd_size = 0;
21677 /* In the ELFv2 ABI, we also need to allocate space for separate
21678 CR field save areas if the function calls __builtin_eh_return. */
21679 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
21681 /* This hard-codes that we have three call-saved CR fields. */
21682 ehcr_size = 3 * reg_size;
21683 /* We do *not* use the regular CR save mechanism. */
21684 info_ptr->cr_save_p = 0;
21686 else
21687 ehcr_size = 0;
21689 /* Determine various sizes. */
21690 info_ptr->reg_size = reg_size;
21691 info_ptr->fixed_size = RS6000_SAVE_AREA;
21692 info_ptr->vars_size = RS6000_ALIGN (get_frame_size (), 8);
21693 info_ptr->parm_size = RS6000_ALIGN (crtl->outgoing_args_size,
21694 TARGET_ALTIVEC ? 16 : 8);
21695 if (FRAME_GROWS_DOWNWARD)
21696 info_ptr->vars_size
21697 += RS6000_ALIGN (info_ptr->fixed_size + info_ptr->vars_size
21698 + info_ptr->parm_size,
21699 ABI_STACK_BOUNDARY / BITS_PER_UNIT)
21700 - (info_ptr->fixed_size + info_ptr->vars_size
21701 + info_ptr->parm_size);
21703 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
21704 info_ptr->spe_gp_size = 8 * (32 - first_gp);
21705 else
21706 info_ptr->spe_gp_size = 0;
21708 if (TARGET_ALTIVEC_ABI)
21709 info_ptr->vrsave_mask = compute_vrsave_mask ();
21710 else
21711 info_ptr->vrsave_mask = 0;
21713 if (TARGET_ALTIVEC_VRSAVE && info_ptr->vrsave_mask)
21714 info_ptr->vrsave_size = 4;
21715 else
21716 info_ptr->vrsave_size = 0;
21718 compute_save_world_info (info_ptr);
21720 /* Calculate the offsets. */
21721 switch (DEFAULT_ABI)
21723 case ABI_NONE:
21724 default:
21725 gcc_unreachable ();
21727 case ABI_AIX:
21728 case ABI_ELFv2:
21729 case ABI_DARWIN:
21730 info_ptr->fp_save_offset = - info_ptr->fp_size;
21731 info_ptr->gp_save_offset = info_ptr->fp_save_offset - info_ptr->gp_size;
21733 if (TARGET_ALTIVEC_ABI)
21735 info_ptr->vrsave_save_offset
21736 = info_ptr->gp_save_offset - info_ptr->vrsave_size;
21738 /* Align stack so vector save area is on a quadword boundary.
21739 The padding goes above the vectors. */
21740 if (info_ptr->altivec_size != 0)
21741 info_ptr->altivec_padding_size
21742 = info_ptr->vrsave_save_offset & 0xF;
21743 else
21744 info_ptr->altivec_padding_size = 0;
21746 info_ptr->altivec_save_offset
21747 = info_ptr->vrsave_save_offset
21748 - info_ptr->altivec_padding_size
21749 - info_ptr->altivec_size;
21750 gcc_assert (info_ptr->altivec_size == 0
21751 || info_ptr->altivec_save_offset % 16 == 0);
21753 /* Adjust for AltiVec case. */
21754 info_ptr->ehrd_offset = info_ptr->altivec_save_offset - ehrd_size;
21756 else
21757 info_ptr->ehrd_offset = info_ptr->gp_save_offset - ehrd_size;
21759 info_ptr->ehcr_offset = info_ptr->ehrd_offset - ehcr_size;
21760 info_ptr->cr_save_offset = reg_size; /* first word when 64-bit. */
21761 info_ptr->lr_save_offset = 2*reg_size;
21762 break;
21764 case ABI_V4:
21765 info_ptr->fp_save_offset = - info_ptr->fp_size;
21766 info_ptr->gp_save_offset = info_ptr->fp_save_offset - info_ptr->gp_size;
21767 info_ptr->cr_save_offset = info_ptr->gp_save_offset - info_ptr->cr_size;
21769 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
21771 /* Align stack so SPE GPR save area is aligned on a
21772 double-word boundary. */
21773 if (info_ptr->spe_gp_size != 0 && info_ptr->cr_save_offset != 0)
21774 info_ptr->spe_padding_size
21775 = 8 - (-info_ptr->cr_save_offset % 8);
21776 else
21777 info_ptr->spe_padding_size = 0;
21779 info_ptr->spe_gp_save_offset
21780 = info_ptr->cr_save_offset
21781 - info_ptr->spe_padding_size
21782 - info_ptr->spe_gp_size;
21784 /* Adjust for SPE case. */
21785 info_ptr->ehrd_offset = info_ptr->spe_gp_save_offset;
21787 else if (TARGET_ALTIVEC_ABI)
21789 info_ptr->vrsave_save_offset
21790 = info_ptr->cr_save_offset - info_ptr->vrsave_size;
21792 /* Align stack so vector save area is on a quadword boundary. */
21793 if (info_ptr->altivec_size != 0)
21794 info_ptr->altivec_padding_size
21795 = 16 - (-info_ptr->vrsave_save_offset % 16);
21796 else
21797 info_ptr->altivec_padding_size = 0;
21799 info_ptr->altivec_save_offset
21800 = info_ptr->vrsave_save_offset
21801 - info_ptr->altivec_padding_size
21802 - info_ptr->altivec_size;
21804 /* Adjust for AltiVec case. */
21805 info_ptr->ehrd_offset = info_ptr->altivec_save_offset;
21807 else
21808 info_ptr->ehrd_offset = info_ptr->cr_save_offset;
21809 info_ptr->ehrd_offset -= ehrd_size;
21810 info_ptr->lr_save_offset = reg_size;
21811 break;
21814 save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
21815 info_ptr->save_size = RS6000_ALIGN (info_ptr->fp_size
21816 + info_ptr->gp_size
21817 + info_ptr->altivec_size
21818 + info_ptr->altivec_padding_size
21819 + info_ptr->spe_gp_size
21820 + info_ptr->spe_padding_size
21821 + ehrd_size
21822 + ehcr_size
21823 + info_ptr->cr_size
21824 + info_ptr->vrsave_size,
21825 save_align);
21827 non_fixed_size = (info_ptr->vars_size
21828 + info_ptr->parm_size
21829 + info_ptr->save_size);
21831 info_ptr->total_size = RS6000_ALIGN (non_fixed_size + info_ptr->fixed_size,
21832 ABI_STACK_BOUNDARY / BITS_PER_UNIT);
21834 /* Determine if we need to save the link register. */
21835 if (info_ptr->calls_p
21836 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
21837 && crtl->profile
21838 && !TARGET_PROFILE_KERNEL)
21839 || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
21840 #ifdef TARGET_RELOCATABLE
21841 || (TARGET_RELOCATABLE && (get_pool_size () != 0))
21842 #endif
21843 || rs6000_ra_ever_killed ())
21844 info_ptr->lr_save_p = 1;
21846 using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
21847 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
21848 && call_used_regs[STATIC_CHAIN_REGNUM]);
21849 info_ptr->savres_strategy = rs6000_savres_strategy (info_ptr,
21850 using_static_chain_p);
21852 if (!(info_ptr->savres_strategy & SAVE_INLINE_GPRS)
21853 || !(info_ptr->savres_strategy & SAVE_INLINE_FPRS)
21854 || !(info_ptr->savres_strategy & SAVE_INLINE_VRS)
21855 || !(info_ptr->savres_strategy & REST_INLINE_GPRS)
21856 || !(info_ptr->savres_strategy & REST_INLINE_FPRS)
21857 || !(info_ptr->savres_strategy & REST_INLINE_VRS))
21858 info_ptr->lr_save_p = 1;
21860 if (info_ptr->lr_save_p)
21861 df_set_regs_ever_live (LR_REGNO, true);
21863 /* Determine if we need to allocate any stack frame:
21865 For AIX we need to push the stack if a frame pointer is needed
21866 (because the stack might be dynamically adjusted), if we are
21867 debugging, if we make calls, or if the sum of fp_save, gp_save,
21868 and local variables are more than the space needed to save all
21869 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
21870 + 18*8 = 288 (GPR13 reserved).
21872 For V.4 we don't have the stack cushion that AIX uses, but assume
21873 that the debugger can handle stackless frames. */
21875 if (info_ptr->calls_p)
21876 info_ptr->push_p = 1;
21878 else if (DEFAULT_ABI == ABI_V4)
21879 info_ptr->push_p = non_fixed_size != 0;
21881 else if (frame_pointer_needed)
21882 info_ptr->push_p = 1;
21884 else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
21885 info_ptr->push_p = 1;
21887 else
21888 info_ptr->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
21890 /* Zero offsets if we're not saving those registers. */
21891 if (info_ptr->fp_size == 0)
21892 info_ptr->fp_save_offset = 0;
21894 if (info_ptr->gp_size == 0)
21895 info_ptr->gp_save_offset = 0;
21897 if (! TARGET_ALTIVEC_ABI || info_ptr->altivec_size == 0)
21898 info_ptr->altivec_save_offset = 0;
21900 /* Zero VRSAVE offset if not saved and restored. */
21901 if (! TARGET_ALTIVEC_VRSAVE || info_ptr->vrsave_mask == 0)
21902 info_ptr->vrsave_save_offset = 0;
21904 if (! TARGET_SPE_ABI
21905 || info_ptr->spe_64bit_regs_used == 0
21906 || info_ptr->spe_gp_size == 0)
21907 info_ptr->spe_gp_save_offset = 0;
21909 if (! info_ptr->lr_save_p)
21910 info_ptr->lr_save_offset = 0;
21912 if (! info_ptr->cr_save_p)
21913 info_ptr->cr_save_offset = 0;
21915 return info_ptr;
21918 /* Return true if the current function uses any GPRs in 64-bit SIMD
21919 mode. */
21921 static bool
21922 spe_func_has_64bit_regs_p (void)
21924 rtx_insn *insns, *insn;
21926 /* Functions that save and restore all the call-saved registers will
21927 need to save/restore the registers in 64-bits. */
21928 if (crtl->calls_eh_return
21929 || cfun->calls_setjmp
21930 || crtl->has_nonlocal_goto)
21931 return true;
21933 insns = get_insns ();
21935 for (insn = NEXT_INSN (insns); insn != NULL_RTX; insn = NEXT_INSN (insn))
21937 if (INSN_P (insn))
21939 rtx i;
21941 /* FIXME: This should be implemented with attributes...
21943 (set_attr "spe64" "true")....then,
21944 if (get_spe64(insn)) return true;
21946 It's the only reliable way to do the stuff below. */
21948 i = PATTERN (insn);
21949 if (GET_CODE (i) == SET)
21951 machine_mode mode = GET_MODE (SET_SRC (i));
21953 if (SPE_VECTOR_MODE (mode))
21954 return true;
21955 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode))
21956 return true;
21961 return false;
21964 static void
21965 debug_stack_info (rs6000_stack_t *info)
21967 const char *abi_string;
21969 if (! info)
21970 info = rs6000_stack_info ();
21972 fprintf (stderr, "\nStack information for function %s:\n",
21973 ((current_function_decl && DECL_NAME (current_function_decl))
21974 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
21975 : "<unknown>"));
21977 switch (info->abi)
21979 default: abi_string = "Unknown"; break;
21980 case ABI_NONE: abi_string = "NONE"; break;
21981 case ABI_AIX: abi_string = "AIX"; break;
21982 case ABI_ELFv2: abi_string = "ELFv2"; break;
21983 case ABI_DARWIN: abi_string = "Darwin"; break;
21984 case ABI_V4: abi_string = "V.4"; break;
21987 fprintf (stderr, "\tABI = %5s\n", abi_string);
21989 if (TARGET_ALTIVEC_ABI)
21990 fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
21992 if (TARGET_SPE_ABI)
21993 fprintf (stderr, "\tSPE ABI extensions enabled.\n");
21995 if (info->first_gp_reg_save != 32)
21996 fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save);
21998 if (info->first_fp_reg_save != 64)
21999 fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save);
22001 if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
22002 fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
22003 info->first_altivec_reg_save);
22005 if (info->lr_save_p)
22006 fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p);
22008 if (info->cr_save_p)
22009 fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p);
22011 if (info->vrsave_mask)
22012 fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask);
22014 if (info->push_p)
22015 fprintf (stderr, "\tpush_p = %5d\n", info->push_p);
22017 if (info->calls_p)
22018 fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p);
22020 if (info->gp_save_offset)
22021 fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset);
22023 if (info->fp_save_offset)
22024 fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset);
22026 if (info->altivec_save_offset)
22027 fprintf (stderr, "\taltivec_save_offset = %5d\n",
22028 info->altivec_save_offset);
22030 if (info->spe_gp_save_offset)
22031 fprintf (stderr, "\tspe_gp_save_offset = %5d\n",
22032 info->spe_gp_save_offset);
22034 if (info->vrsave_save_offset)
22035 fprintf (stderr, "\tvrsave_save_offset = %5d\n",
22036 info->vrsave_save_offset);
22038 if (info->lr_save_offset)
22039 fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset);
22041 if (info->cr_save_offset)
22042 fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset);
22044 if (info->varargs_save_offset)
22045 fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
22047 if (info->total_size)
22048 fprintf (stderr, "\ttotal_size = "HOST_WIDE_INT_PRINT_DEC"\n",
22049 info->total_size);
22051 if (info->vars_size)
22052 fprintf (stderr, "\tvars_size = "HOST_WIDE_INT_PRINT_DEC"\n",
22053 info->vars_size);
22055 if (info->parm_size)
22056 fprintf (stderr, "\tparm_size = %5d\n", info->parm_size);
22058 if (info->fixed_size)
22059 fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size);
22061 if (info->gp_size)
22062 fprintf (stderr, "\tgp_size = %5d\n", info->gp_size);
22064 if (info->spe_gp_size)
22065 fprintf (stderr, "\tspe_gp_size = %5d\n", info->spe_gp_size);
22067 if (info->fp_size)
22068 fprintf (stderr, "\tfp_size = %5d\n", info->fp_size);
22070 if (info->altivec_size)
22071 fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size);
22073 if (info->vrsave_size)
22074 fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size);
22076 if (info->altivec_padding_size)
22077 fprintf (stderr, "\taltivec_padding_size= %5d\n",
22078 info->altivec_padding_size);
22080 if (info->spe_padding_size)
22081 fprintf (stderr, "\tspe_padding_size = %5d\n",
22082 info->spe_padding_size);
22084 if (info->cr_size)
22085 fprintf (stderr, "\tcr_size = %5d\n", info->cr_size);
22087 if (info->save_size)
22088 fprintf (stderr, "\tsave_size = %5d\n", info->save_size);
22090 if (info->reg_size != 4)
22091 fprintf (stderr, "\treg_size = %5d\n", info->reg_size);
22093 fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy);
22095 fprintf (stderr, "\n");
22099 rs6000_return_addr (int count, rtx frame)
22101 /* Currently we don't optimize very well between prolog and body
22102 code and for PIC code the code can be actually quite bad, so
22103 don't try to be too clever here. */
22104 if (count != 0
22105 || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
22107 cfun->machine->ra_needs_full_frame = 1;
22109 return
22110 gen_rtx_MEM
22111 (Pmode,
22112 memory_address
22113 (Pmode,
22114 plus_constant (Pmode,
22115 copy_to_reg
22116 (gen_rtx_MEM (Pmode,
22117 memory_address (Pmode, frame))),
22118 RETURN_ADDRESS_OFFSET)));
22121 cfun->machine->ra_need_lr = 1;
22122 return get_hard_reg_initial_val (Pmode, LR_REGNO);
22125 /* Say whether a function is a candidate for sibcall handling or not. */
22127 static bool
22128 rs6000_function_ok_for_sibcall (tree decl, tree exp)
22130 tree fntype;
22132 if (decl)
22133 fntype = TREE_TYPE (decl);
22134 else
22135 fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
22137 /* We can't do it if the called function has more vector parameters
22138 than the current function; there's nowhere to put the VRsave code. */
22139 if (TARGET_ALTIVEC_ABI
22140 && TARGET_ALTIVEC_VRSAVE
22141 && !(decl && decl == current_function_decl))
22143 function_args_iterator args_iter;
22144 tree type;
22145 int nvreg = 0;
22147 /* Functions with vector parameters are required to have a
22148 prototype, so the argument type info must be available
22149 here. */
22150 FOREACH_FUNCTION_ARGS(fntype, type, args_iter)
22151 if (TREE_CODE (type) == VECTOR_TYPE
22152 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
22153 nvreg++;
22155 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter)
22156 if (TREE_CODE (type) == VECTOR_TYPE
22157 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
22158 nvreg--;
22160 if (nvreg > 0)
22161 return false;
22164 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
22165 functions, because the callee may have a different TOC pointer to
22166 the caller and there's no way to ensure we restore the TOC when
22167 we return. With the secure-plt SYSV ABI we can't make non-local
22168 calls when -fpic/PIC because the plt call stubs use r30. */
22169 if (DEFAULT_ABI == ABI_DARWIN
22170 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
22171 && decl
22172 && !DECL_EXTERNAL (decl)
22173 && (*targetm.binds_local_p) (decl))
22174 || (DEFAULT_ABI == ABI_V4
22175 && (!TARGET_SECURE_PLT
22176 || !flag_pic
22177 || (decl
22178 && (*targetm.binds_local_p) (decl)))))
22180 tree attr_list = TYPE_ATTRIBUTES (fntype);
22182 if (!lookup_attribute ("longcall", attr_list)
22183 || lookup_attribute ("shortcall", attr_list))
22184 return true;
22187 return false;
22190 static int
22191 rs6000_ra_ever_killed (void)
22193 rtx_insn *top;
22194 rtx reg;
22195 rtx_insn *insn;
22197 if (cfun->is_thunk)
22198 return 0;
22200 if (cfun->machine->lr_save_state)
22201 return cfun->machine->lr_save_state - 1;
22203 /* regs_ever_live has LR marked as used if any sibcalls are present,
22204 but this should not force saving and restoring in the
22205 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
22206 clobbers LR, so that is inappropriate. */
22208 /* Also, the prologue can generate a store into LR that
22209 doesn't really count, like this:
22211 move LR->R0
22212 bcl to set PIC register
22213 move LR->R31
22214 move R0->LR
22216 When we're called from the epilogue, we need to avoid counting
22217 this as a store. */
22219 push_topmost_sequence ();
22220 top = get_insns ();
22221 pop_topmost_sequence ();
22222 reg = gen_rtx_REG (Pmode, LR_REGNO);
22224 for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
22226 if (INSN_P (insn))
22228 if (CALL_P (insn))
22230 if (!SIBLING_CALL_P (insn))
22231 return 1;
22233 else if (find_regno_note (insn, REG_INC, LR_REGNO))
22234 return 1;
22235 else if (set_of (reg, insn) != NULL_RTX
22236 && !prologue_epilogue_contains (insn))
22237 return 1;
22240 return 0;
22243 /* Emit instructions needed to load the TOC register.
22244 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
22245 a constant pool; or for SVR4 -fpic. */
22247 void
22248 rs6000_emit_load_toc_table (int fromprolog)
22250 rtx dest;
22251 dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
22253 if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
22255 char buf[30];
22256 rtx lab, tmp1, tmp2, got;
22258 lab = gen_label_rtx ();
22259 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
22260 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
22261 if (flag_pic == 2)
22262 got = gen_rtx_SYMBOL_REF (Pmode, toc_label_name);
22263 else
22264 got = rs6000_got_sym ();
22265 tmp1 = tmp2 = dest;
22266 if (!fromprolog)
22268 tmp1 = gen_reg_rtx (Pmode);
22269 tmp2 = gen_reg_rtx (Pmode);
22271 emit_insn (gen_load_toc_v4_PIC_1 (lab));
22272 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
22273 emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
22274 emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
22276 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
22278 emit_insn (gen_load_toc_v4_pic_si ());
22279 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
22281 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
22283 char buf[30];
22284 rtx temp0 = (fromprolog
22285 ? gen_rtx_REG (Pmode, 0)
22286 : gen_reg_rtx (Pmode));
22288 if (fromprolog)
22290 rtx symF, symL;
22292 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
22293 symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
22295 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
22296 symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
22298 emit_insn (gen_load_toc_v4_PIC_1 (symF));
22299 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
22300 emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
22302 else
22304 rtx tocsym, lab;
22306 tocsym = gen_rtx_SYMBOL_REF (Pmode, toc_label_name);
22307 lab = gen_label_rtx ();
22308 emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
22309 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
22310 if (TARGET_LINK_STACK)
22311 emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
22312 emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
22314 emit_insn (gen_addsi3 (dest, temp0, dest));
22316 else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
22318 /* This is for AIX code running in non-PIC ELF32. */
22319 char buf[30];
22320 rtx realsym;
22321 ASM_GENERATE_INTERNAL_LABEL (buf, "LCTOC", 1);
22322 realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
22324 emit_insn (gen_elf_high (dest, realsym));
22325 emit_insn (gen_elf_low (dest, dest, realsym));
22327 else
22329 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
22331 if (TARGET_32BIT)
22332 emit_insn (gen_load_toc_aix_si (dest));
22333 else
22334 emit_insn (gen_load_toc_aix_di (dest));
22338 /* Emit instructions to restore the link register after determining where
22339 its value has been stored. */
22341 void
22342 rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
22344 rs6000_stack_t *info = rs6000_stack_info ();
22345 rtx operands[2];
22347 operands[0] = source;
22348 operands[1] = scratch;
22350 if (info->lr_save_p)
22352 rtx frame_rtx = stack_pointer_rtx;
22353 HOST_WIDE_INT sp_offset = 0;
22354 rtx tmp;
22356 if (frame_pointer_needed
22357 || cfun->calls_alloca
22358 || info->total_size > 32767)
22360 tmp = gen_frame_mem (Pmode, frame_rtx);
22361 emit_move_insn (operands[1], tmp);
22362 frame_rtx = operands[1];
22364 else if (info->push_p)
22365 sp_offset = info->total_size;
22367 tmp = plus_constant (Pmode, frame_rtx,
22368 info->lr_save_offset + sp_offset);
22369 tmp = gen_frame_mem (Pmode, tmp);
22370 emit_move_insn (tmp, operands[0]);
22372 else
22373 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
22375 /* Freeze lr_save_p. We've just emitted rtl that depends on the
22376 state of lr_save_p so any change from here on would be a bug. In
22377 particular, stop rs6000_ra_ever_killed from considering the SET
22378 of lr we may have added just above. */
22379 cfun->machine->lr_save_state = info->lr_save_p + 1;
22382 static GTY(()) alias_set_type set = -1;
22384 alias_set_type
22385 get_TOC_alias_set (void)
22387 if (set == -1)
22388 set = new_alias_set ();
22389 return set;
22392 /* This returns nonzero if the current function uses the TOC. This is
22393 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
22394 is generated by the ABI_V4 load_toc_* patterns. */
22395 #if TARGET_ELF
22396 static int
22397 uses_TOC (void)
22399 rtx_insn *insn;
22401 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
22402 if (INSN_P (insn))
22404 rtx pat = PATTERN (insn);
22405 int i;
22407 if (GET_CODE (pat) == PARALLEL)
22408 for (i = 0; i < XVECLEN (pat, 0); i++)
22410 rtx sub = XVECEXP (pat, 0, i);
22411 if (GET_CODE (sub) == USE)
22413 sub = XEXP (sub, 0);
22414 if (GET_CODE (sub) == UNSPEC
22415 && XINT (sub, 1) == UNSPEC_TOC)
22416 return 1;
22420 return 0;
22422 #endif
22425 create_TOC_reference (rtx symbol, rtx largetoc_reg)
22427 rtx tocrel, tocreg, hi;
22429 if (TARGET_DEBUG_ADDR)
22431 if (GET_CODE (symbol) == SYMBOL_REF)
22432 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
22433 XSTR (symbol, 0));
22434 else
22436 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
22437 GET_RTX_NAME (GET_CODE (symbol)));
22438 debug_rtx (symbol);
22442 if (!can_create_pseudo_p ())
22443 df_set_regs_ever_live (TOC_REGISTER, true);
22445 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
22446 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
22447 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
22448 return tocrel;
22450 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
22451 if (largetoc_reg != NULL)
22453 emit_move_insn (largetoc_reg, hi);
22454 hi = largetoc_reg;
22456 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
22459 /* Issue assembly directives that create a reference to the given DWARF
22460 FRAME_TABLE_LABEL from the current function section. */
22461 void
22462 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
22464 fprintf (asm_out_file, "\t.ref %s\n",
22465 (* targetm.strip_name_encoding) (frame_table_label));
22468 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
22469 and the change to the stack pointer. */
22471 static void
22472 rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
22474 rtvec p;
22475 int i;
22476 rtx regs[3];
22478 i = 0;
22479 regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
22480 if (hard_frame_needed)
22481 regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
22482 if (!(REGNO (fp) == STACK_POINTER_REGNUM
22483 || (hard_frame_needed
22484 && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
22485 regs[i++] = fp;
22487 p = rtvec_alloc (i);
22488 while (--i >= 0)
22490 rtx mem = gen_frame_mem (BLKmode, regs[i]);
22491 RTVEC_ELT (p, i) = gen_rtx_SET (VOIDmode, mem, const0_rtx);
22494 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
22497 /* Emit the correct code for allocating stack space, as insns.
22498 If COPY_REG, make sure a copy of the old frame is left there.
22499 The generated code may use hard register 0 as a temporary. */
22501 static void
22502 rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
22504 rtx_insn *insn;
22505 rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
22506 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
22507 rtx todec = gen_int_mode (-size, Pmode);
22508 rtx par, set, mem;
22510 if (INTVAL (todec) != -size)
22512 warning (0, "stack frame too large");
22513 emit_insn (gen_trap ());
22514 return;
22517 if (crtl->limit_stack)
22519 if (REG_P (stack_limit_rtx)
22520 && REGNO (stack_limit_rtx) > 1
22521 && REGNO (stack_limit_rtx) <= 31)
22523 emit_insn (gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size)));
22524 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
22525 const0_rtx));
22527 else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF
22528 && TARGET_32BIT
22529 && DEFAULT_ABI == ABI_V4)
22531 rtx toload = gen_rtx_CONST (VOIDmode,
22532 gen_rtx_PLUS (Pmode,
22533 stack_limit_rtx,
22534 GEN_INT (size)));
22536 emit_insn (gen_elf_high (tmp_reg, toload));
22537 emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
22538 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
22539 const0_rtx));
22541 else
22542 warning (0, "stack limit expression is not supported");
22545 if (copy_reg)
22547 if (copy_off != 0)
22548 emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
22549 else
22550 emit_move_insn (copy_reg, stack_reg);
22553 if (size > 32767)
22555 /* Need a note here so that try_split doesn't get confused. */
22556 if (get_last_insn () == NULL_RTX)
22557 emit_note (NOTE_INSN_DELETED);
22558 insn = emit_move_insn (tmp_reg, todec);
22559 try_split (PATTERN (insn), insn, 0);
22560 todec = tmp_reg;
22563 insn = emit_insn (TARGET_32BIT
22564 ? gen_movsi_update_stack (stack_reg, stack_reg,
22565 todec, stack_reg)
22566 : gen_movdi_di_update_stack (stack_reg, stack_reg,
22567 todec, stack_reg));
22568 /* Since we didn't use gen_frame_mem to generate the MEM, grab
22569 it now and set the alias set/attributes. The above gen_*_update
22570 calls will generate a PARALLEL with the MEM set being the first
22571 operation. */
22572 par = PATTERN (insn);
22573 gcc_assert (GET_CODE (par) == PARALLEL);
22574 set = XVECEXP (par, 0, 0);
22575 gcc_assert (GET_CODE (set) == SET);
22576 mem = SET_DEST (set);
22577 gcc_assert (MEM_P (mem));
22578 MEM_NOTRAP_P (mem) = 1;
22579 set_mem_alias_set (mem, get_frame_alias_set ());
22581 RTX_FRAME_RELATED_P (insn) = 1;
22582 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
22583 gen_rtx_SET (VOIDmode, stack_reg,
22584 gen_rtx_PLUS (Pmode, stack_reg,
22585 GEN_INT (-size))));
22588 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
22590 #if PROBE_INTERVAL > 32768
22591 #error Cannot use indexed addressing mode for stack probing
22592 #endif
22594 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
22595 inclusive. These are offsets from the current stack pointer. */
22597 static void
22598 rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
22600 /* See if we have a constant small number of probes to generate. If so,
22601 that's the easy case. */
22602 if (first + size <= 32768)
22604 HOST_WIDE_INT i;
22606 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
22607 it exceeds SIZE. If only one probe is needed, this will not
22608 generate any code. Then probe at FIRST + SIZE. */
22609 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
22610 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
22611 -(first + i)));
22613 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
22614 -(first + size)));
22617 /* Otherwise, do the same as above, but in a loop. Note that we must be
22618 extra careful with variables wrapping around because we might be at
22619 the very top (or the very bottom) of the address space and we have
22620 to be able to handle this case properly; in particular, we use an
22621 equality test for the loop condition. */
22622 else
22624 HOST_WIDE_INT rounded_size;
22625 rtx r12 = gen_rtx_REG (Pmode, 12);
22626 rtx r0 = gen_rtx_REG (Pmode, 0);
22628 /* Sanity check for the addressing mode we're going to use. */
22629 gcc_assert (first <= 32768);
22631 /* Step 1: round SIZE to the previous multiple of the interval. */
22633 rounded_size = size & -PROBE_INTERVAL;
22636 /* Step 2: compute initial and final value of the loop counter. */
22638 /* TEST_ADDR = SP + FIRST. */
22639 emit_insn (gen_rtx_SET (VOIDmode, r12,
22640 plus_constant (Pmode, stack_pointer_rtx,
22641 -first)));
22643 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
22644 if (rounded_size > 32768)
22646 emit_move_insn (r0, GEN_INT (-rounded_size));
22647 emit_insn (gen_rtx_SET (VOIDmode, r0,
22648 gen_rtx_PLUS (Pmode, r12, r0)));
22650 else
22651 emit_insn (gen_rtx_SET (VOIDmode, r0,
22652 plus_constant (Pmode, r12, -rounded_size)));
22655 /* Step 3: the loop
22657 while (TEST_ADDR != LAST_ADDR)
22659 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
22660 probe at TEST_ADDR
22663 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
22664 until it is equal to ROUNDED_SIZE. */
22666 if (TARGET_64BIT)
22667 emit_insn (gen_probe_stack_rangedi (r12, r12, r0));
22668 else
22669 emit_insn (gen_probe_stack_rangesi (r12, r12, r0));
22672 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
22673 that SIZE is equal to ROUNDED_SIZE. */
22675 if (size != rounded_size)
22676 emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
22680 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
22681 absolute addresses. */
22683 const char *
22684 output_probe_stack_range (rtx reg1, rtx reg2)
22686 static int labelno = 0;
22687 char loop_lab[32], end_lab[32];
22688 rtx xops[2];
22690 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
22691 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
22693 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
22695 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
22696 xops[0] = reg1;
22697 xops[1] = reg2;
22698 if (TARGET_64BIT)
22699 output_asm_insn ("cmpd 0,%0,%1", xops);
22700 else
22701 output_asm_insn ("cmpw 0,%0,%1", xops);
22703 fputs ("\tbeq 0,", asm_out_file);
22704 assemble_name_raw (asm_out_file, end_lab);
22705 fputc ('\n', asm_out_file);
22707 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
22708 xops[1] = GEN_INT (-PROBE_INTERVAL);
22709 output_asm_insn ("addi %0,%0,%1", xops);
22711 /* Probe at TEST_ADDR and branch. */
22712 xops[1] = gen_rtx_REG (Pmode, 0);
22713 output_asm_insn ("stw %1,0(%0)", xops);
22714 fprintf (asm_out_file, "\tb ");
22715 assemble_name_raw (asm_out_file, loop_lab);
22716 fputc ('\n', asm_out_file);
22718 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
22720 return "";
22723 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
22724 with (plus:P (reg 1) VAL), and with REG2 replaced with RREG if REG2
22725 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
22726 deduce these equivalences by itself so it wasn't necessary to hold
22727 its hand so much. Don't be tempted to always supply d2_f_d_e with
22728 the actual cfa register, ie. r31 when we are using a hard frame
22729 pointer. That fails when saving regs off r1, and sched moves the
22730 r31 setup past the reg saves. */
22732 static rtx
22733 rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val,
22734 rtx reg2, rtx rreg, rtx split_reg)
22736 rtx real, temp;
22738 if (REGNO (reg) == STACK_POINTER_REGNUM && reg2 == NULL_RTX)
22740 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
22741 int i;
22743 gcc_checking_assert (val == 0);
22744 real = PATTERN (insn);
22745 if (GET_CODE (real) == PARALLEL)
22746 for (i = 0; i < XVECLEN (real, 0); i++)
22747 if (GET_CODE (XVECEXP (real, 0, i)) == SET)
22749 rtx set = XVECEXP (real, 0, i);
22751 RTX_FRAME_RELATED_P (set) = 1;
22753 RTX_FRAME_RELATED_P (insn) = 1;
22754 return insn;
22757 /* copy_rtx will not make unique copies of registers, so we need to
22758 ensure we don't have unwanted sharing here. */
22759 if (reg == reg2)
22760 reg = gen_raw_REG (GET_MODE (reg), REGNO (reg));
22762 if (reg == rreg)
22763 reg = gen_raw_REG (GET_MODE (reg), REGNO (reg));
22765 real = copy_rtx (PATTERN (insn));
22767 if (reg2 != NULL_RTX)
22768 real = replace_rtx (real, reg2, rreg);
22770 if (REGNO (reg) == STACK_POINTER_REGNUM)
22771 gcc_checking_assert (val == 0);
22772 else
22773 real = replace_rtx (real, reg,
22774 gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode,
22775 STACK_POINTER_REGNUM),
22776 GEN_INT (val)));
22778 /* We expect that 'real' is either a SET or a PARALLEL containing
22779 SETs (and possibly other stuff). In a PARALLEL, all the SETs
22780 are important so they all have to be marked RTX_FRAME_RELATED_P. */
22782 if (GET_CODE (real) == SET)
22784 rtx set = real;
22786 temp = simplify_rtx (SET_SRC (set));
22787 if (temp)
22788 SET_SRC (set) = temp;
22789 temp = simplify_rtx (SET_DEST (set));
22790 if (temp)
22791 SET_DEST (set) = temp;
22792 if (GET_CODE (SET_DEST (set)) == MEM)
22794 temp = simplify_rtx (XEXP (SET_DEST (set), 0));
22795 if (temp)
22796 XEXP (SET_DEST (set), 0) = temp;
22799 else
22801 int i;
22803 gcc_assert (GET_CODE (real) == PARALLEL);
22804 for (i = 0; i < XVECLEN (real, 0); i++)
22805 if (GET_CODE (XVECEXP (real, 0, i)) == SET)
22807 rtx set = XVECEXP (real, 0, i);
22809 temp = simplify_rtx (SET_SRC (set));
22810 if (temp)
22811 SET_SRC (set) = temp;
22812 temp = simplify_rtx (SET_DEST (set));
22813 if (temp)
22814 SET_DEST (set) = temp;
22815 if (GET_CODE (SET_DEST (set)) == MEM)
22817 temp = simplify_rtx (XEXP (SET_DEST (set), 0));
22818 if (temp)
22819 XEXP (SET_DEST (set), 0) = temp;
22821 RTX_FRAME_RELATED_P (set) = 1;
22825 /* If a store insn has been split into multiple insns, the
22826 true source register is given by split_reg. */
22827 if (split_reg != NULL_RTX)
22828 real = gen_rtx_SET (VOIDmode, SET_DEST (real), split_reg);
22830 RTX_FRAME_RELATED_P (insn) = 1;
22831 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
22833 return insn;
22836 /* Returns an insn that has a vrsave set operation with the
22837 appropriate CLOBBERs. */
22839 static rtx
22840 generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
22842 int nclobs, i;
22843 rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
22844 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
22846 clobs[0]
22847 = gen_rtx_SET (VOIDmode,
22848 vrsave,
22849 gen_rtx_UNSPEC_VOLATILE (SImode,
22850 gen_rtvec (2, reg, vrsave),
22851 UNSPECV_SET_VRSAVE));
22853 nclobs = 1;
22855 /* We need to clobber the registers in the mask so the scheduler
22856 does not move sets to VRSAVE before sets of AltiVec registers.
22858 However, if the function receives nonlocal gotos, reload will set
22859 all call saved registers live. We will end up with:
22861 (set (reg 999) (mem))
22862 (parallel [ (set (reg vrsave) (unspec blah))
22863 (clobber (reg 999))])
22865 The clobber will cause the store into reg 999 to be dead, and
22866 flow will attempt to delete an epilogue insn. In this case, we
22867 need an unspec use/set of the register. */
22869 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
22870 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
22872 if (!epiloguep || call_used_regs [i])
22873 clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode,
22874 gen_rtx_REG (V4SImode, i));
22875 else
22877 rtx reg = gen_rtx_REG (V4SImode, i);
22879 clobs[nclobs++]
22880 = gen_rtx_SET (VOIDmode,
22881 reg,
22882 gen_rtx_UNSPEC (V4SImode,
22883 gen_rtvec (1, reg), 27));
22887 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
22889 for (i = 0; i < nclobs; ++i)
22890 XVECEXP (insn, 0, i) = clobs[i];
22892 return insn;
22895 static rtx
22896 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
22898 rtx addr, mem;
22900 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
22901 mem = gen_frame_mem (GET_MODE (reg), addr);
22902 return gen_rtx_SET (VOIDmode, store ? mem : reg, store ? reg : mem);
22905 static rtx
22906 gen_frame_load (rtx reg, rtx frame_reg, int offset)
22908 return gen_frame_set (reg, frame_reg, offset, false);
22911 static rtx
22912 gen_frame_store (rtx reg, rtx frame_reg, int offset)
22914 return gen_frame_set (reg, frame_reg, offset, true);
22917 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
22918 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
22920 static rtx
22921 emit_frame_save (rtx frame_reg, machine_mode mode,
22922 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
22924 rtx reg, insn;
22926 /* Some cases that need register indexed addressing. */
22927 gcc_checking_assert (!((TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
22928 || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
22929 || (TARGET_E500_DOUBLE && mode == DFmode)
22930 || (TARGET_SPE_ABI
22931 && SPE_VECTOR_MODE (mode)
22932 && !SPE_CONST_OFFSET_OK (offset))));
22934 reg = gen_rtx_REG (mode, regno);
22935 insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
22936 return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
22937 NULL_RTX, NULL_RTX, NULL_RTX);
22940 /* Emit an offset memory reference suitable for a frame store, while
22941 converting to a valid addressing mode. */
22943 static rtx
22944 gen_frame_mem_offset (machine_mode mode, rtx reg, int offset)
22946 rtx int_rtx, offset_rtx;
22948 int_rtx = GEN_INT (offset);
22950 if ((TARGET_SPE_ABI && SPE_VECTOR_MODE (mode) && !SPE_CONST_OFFSET_OK (offset))
22951 || (TARGET_E500_DOUBLE && mode == DFmode))
22953 offset_rtx = gen_rtx_REG (Pmode, FIXED_SCRATCH);
22954 emit_move_insn (offset_rtx, int_rtx);
22956 else
22957 offset_rtx = int_rtx;
22959 return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, offset_rtx));
22962 #ifndef TARGET_FIX_AND_CONTINUE
22963 #define TARGET_FIX_AND_CONTINUE 0
22964 #endif
22966 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
22967 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
22968 #define LAST_SAVRES_REGISTER 31
22969 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
22971 enum {
22972 SAVRES_LR = 0x1,
22973 SAVRES_SAVE = 0x2,
22974 SAVRES_REG = 0x0c,
22975 SAVRES_GPR = 0,
22976 SAVRES_FPR = 4,
22977 SAVRES_VR = 8
22980 static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
22982 /* Temporary holding space for an out-of-line register save/restore
22983 routine name. */
22984 static char savres_routine_name[30];
22986 /* Return the name for an out-of-line register save/restore routine.
22987 We are saving/restoring GPRs if GPR is true. */
22989 static char *
22990 rs6000_savres_routine_name (rs6000_stack_t *info, int regno, int sel)
22992 const char *prefix = "";
22993 const char *suffix = "";
22995 /* Different targets are supposed to define
22996 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
22997 routine name could be defined with:
22999 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
23001 This is a nice idea in practice, but in reality, things are
23002 complicated in several ways:
23004 - ELF targets have save/restore routines for GPRs.
23006 - SPE targets use different prefixes for 32/64-bit registers, and
23007 neither of them fit neatly in the FOO_{PREFIX,SUFFIX} regimen.
23009 - PPC64 ELF targets have routines for save/restore of GPRs that
23010 differ in what they do with the link register, so having a set
23011 prefix doesn't work. (We only use one of the save routines at
23012 the moment, though.)
23014 - PPC32 elf targets have "exit" versions of the restore routines
23015 that restore the link register and can save some extra space.
23016 These require an extra suffix. (There are also "tail" versions
23017 of the restore routines and "GOT" versions of the save routines,
23018 but we don't generate those at present. Same problems apply,
23019 though.)
23021 We deal with all this by synthesizing our own prefix/suffix and
23022 using that for the simple sprintf call shown above. */
23023 if (TARGET_SPE)
23025 /* No floating point saves on the SPE. */
23026 gcc_assert ((sel & SAVRES_REG) == SAVRES_GPR);
23028 if ((sel & SAVRES_SAVE))
23029 prefix = info->spe_64bit_regs_used ? "_save64gpr_" : "_save32gpr_";
23030 else
23031 prefix = info->spe_64bit_regs_used ? "_rest64gpr_" : "_rest32gpr_";
23033 if ((sel & SAVRES_LR))
23034 suffix = "_x";
23036 else if (DEFAULT_ABI == ABI_V4)
23038 if (TARGET_64BIT)
23039 goto aix_names;
23041 if ((sel & SAVRES_REG) == SAVRES_GPR)
23042 prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
23043 else if ((sel & SAVRES_REG) == SAVRES_FPR)
23044 prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
23045 else if ((sel & SAVRES_REG) == SAVRES_VR)
23046 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
23047 else
23048 abort ();
23050 if ((sel & SAVRES_LR))
23051 suffix = "_x";
23053 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
23055 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
23056 /* No out-of-line save/restore routines for GPRs on AIX. */
23057 gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
23058 #endif
23060 aix_names:
23061 if ((sel & SAVRES_REG) == SAVRES_GPR)
23062 prefix = ((sel & SAVRES_SAVE)
23063 ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
23064 : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
23065 else if ((sel & SAVRES_REG) == SAVRES_FPR)
23067 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
23068 if ((sel & SAVRES_LR))
23069 prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
23070 else
23071 #endif
23073 prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
23074 suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
23077 else if ((sel & SAVRES_REG) == SAVRES_VR)
23078 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
23079 else
23080 abort ();
23083 if (DEFAULT_ABI == ABI_DARWIN)
23085 /* The Darwin approach is (slightly) different, in order to be
23086 compatible with code generated by the system toolchain. There is a
23087 single symbol for the start of save sequence, and the code here
23088 embeds an offset into that code on the basis of the first register
23089 to be saved. */
23090 prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
23091 if ((sel & SAVRES_REG) == SAVRES_GPR)
23092 sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
23093 ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
23094 (regno - 13) * 4, prefix, regno);
23095 else if ((sel & SAVRES_REG) == SAVRES_FPR)
23096 sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
23097 (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
23098 else if ((sel & SAVRES_REG) == SAVRES_VR)
23099 sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
23100 (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
23101 else
23102 abort ();
23104 else
23105 sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
23107 return savres_routine_name;
23110 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
23111 We are saving/restoring GPRs if GPR is true. */
23113 static rtx
23114 rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
23116 int regno = ((sel & SAVRES_REG) == SAVRES_GPR
23117 ? info->first_gp_reg_save
23118 : (sel & SAVRES_REG) == SAVRES_FPR
23119 ? info->first_fp_reg_save - 32
23120 : (sel & SAVRES_REG) == SAVRES_VR
23121 ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
23122 : -1);
23123 rtx sym;
23124 int select = sel;
23126 /* On the SPE, we never have any FPRs, but we do have 32/64-bit
23127 versions of the gpr routines. */
23128 if (TARGET_SPE_ABI && (sel & SAVRES_REG) == SAVRES_GPR
23129 && info->spe_64bit_regs_used)
23130 select ^= SAVRES_FPR ^ SAVRES_GPR;
23132 /* Don't generate bogus routine names. */
23133 gcc_assert (FIRST_SAVRES_REGISTER <= regno
23134 && regno <= LAST_SAVRES_REGISTER
23135 && select >= 0 && select <= 12);
23137 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
23139 if (sym == NULL)
23141 char *name;
23143 name = rs6000_savres_routine_name (info, regno, sel);
23145 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
23146 = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
23147 SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
23150 return sym;
23153 /* Emit a sequence of insns, including a stack tie if needed, for
23154 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
23155 reset the stack pointer, but move the base of the frame into
23156 reg UPDT_REGNO for use by out-of-line register restore routines. */
23158 static rtx
23159 rs6000_emit_stack_reset (rs6000_stack_t *info,
23160 rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
23161 unsigned updt_regno)
23163 rtx updt_reg_rtx;
23165 /* This blockage is needed so that sched doesn't decide to move
23166 the sp change before the register restores. */
23167 if (DEFAULT_ABI == ABI_V4
23168 || (TARGET_SPE_ABI
23169 && info->spe_64bit_regs_used != 0
23170 && info->first_gp_reg_save != 32))
23171 rs6000_emit_stack_tie (frame_reg_rtx, frame_pointer_needed);
23173 /* If we are restoring registers out-of-line, we will be using the
23174 "exit" variants of the restore routines, which will reset the
23175 stack for us. But we do need to point updt_reg into the
23176 right place for those routines. */
23177 updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
23179 if (frame_off != 0)
23180 return emit_insn (gen_add3_insn (updt_reg_rtx,
23181 frame_reg_rtx, GEN_INT (frame_off)));
23182 else if (REGNO (frame_reg_rtx) != updt_regno)
23183 return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
23185 return NULL_RTX;
23188 /* Return the register number used as a pointer by out-of-line
23189 save/restore functions. */
23191 static inline unsigned
23192 ptr_regno_for_savres (int sel)
23194 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
23195 return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
23196 return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
23199 /* Construct a parallel rtx describing the effect of a call to an
23200 out-of-line register save/restore routine, and emit the insn
23201 or jump_insn as appropriate. */
23203 static rtx
23204 rs6000_emit_savres_rtx (rs6000_stack_t *info,
23205 rtx frame_reg_rtx, int save_area_offset, int lr_offset,
23206 machine_mode reg_mode, int sel)
23208 int i;
23209 int offset, start_reg, end_reg, n_regs, use_reg;
23210 int reg_size = GET_MODE_SIZE (reg_mode);
23211 rtx sym;
23212 rtvec p;
23213 rtx par, insn;
23215 offset = 0;
23216 start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
23217 ? info->first_gp_reg_save
23218 : (sel & SAVRES_REG) == SAVRES_FPR
23219 ? info->first_fp_reg_save
23220 : (sel & SAVRES_REG) == SAVRES_VR
23221 ? info->first_altivec_reg_save
23222 : -1);
23223 end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
23224 ? 32
23225 : (sel & SAVRES_REG) == SAVRES_FPR
23226 ? 64
23227 : (sel & SAVRES_REG) == SAVRES_VR
23228 ? LAST_ALTIVEC_REGNO + 1
23229 : -1);
23230 n_regs = end_reg - start_reg;
23231 p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
23232 + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
23233 + n_regs);
23235 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
23236 RTVEC_ELT (p, offset++) = ret_rtx;
23238 RTVEC_ELT (p, offset++)
23239 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
23241 sym = rs6000_savres_routine_sym (info, sel);
23242 RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
23244 use_reg = ptr_regno_for_savres (sel);
23245 if ((sel & SAVRES_REG) == SAVRES_VR)
23247 /* Vector regs are saved/restored using [reg+reg] addressing. */
23248 RTVEC_ELT (p, offset++)
23249 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg));
23250 RTVEC_ELT (p, offset++)
23251 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
23253 else
23254 RTVEC_ELT (p, offset++)
23255 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
23257 for (i = 0; i < end_reg - start_reg; i++)
23258 RTVEC_ELT (p, i + offset)
23259 = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
23260 frame_reg_rtx, save_area_offset + reg_size * i,
23261 (sel & SAVRES_SAVE) != 0);
23263 if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
23264 RTVEC_ELT (p, i + offset)
23265 = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
23267 par = gen_rtx_PARALLEL (VOIDmode, p);
23269 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
23271 insn = emit_jump_insn (par);
23272 JUMP_LABEL (insn) = ret_rtx;
23274 else
23275 insn = emit_insn (par);
23276 return insn;
23279 /* Emit code to store CR fields that need to be saved into REG. */
23281 static void
23282 rs6000_emit_move_from_cr (rtx reg)
23284 /* Only the ELFv2 ABI allows storing only selected fields. */
23285 if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
23287 int i, cr_reg[8], count = 0;
23289 /* Collect CR fields that must be saved. */
23290 for (i = 0; i < 8; i++)
23291 if (save_reg_p (CR0_REGNO + i))
23292 cr_reg[count++] = i;
23294 /* If it's just a single one, use mfcrf. */
23295 if (count == 1)
23297 rtvec p = rtvec_alloc (1);
23298 rtvec r = rtvec_alloc (2);
23299 RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
23300 RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
23301 RTVEC_ELT (p, 0)
23302 = gen_rtx_SET (VOIDmode, reg,
23303 gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
23305 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
23306 return;
23309 /* ??? It might be better to handle count == 2 / 3 cases here
23310 as well, using logical operations to combine the values. */
23313 emit_insn (gen_movesi_from_cr (reg));
23316 /* Determine whether the gp REG is really used. */
23318 static bool
23319 rs6000_reg_live_or_pic_offset_p (int reg)
23321 /* If the function calls eh_return, claim used all the registers that would
23322 be checked for liveness otherwise. This is required for the PIC offset
23323 register with -mminimal-toc on AIX, as it is advertised as "fixed" for
23324 register allocation purposes in this case. */
23326 return (((crtl->calls_eh_return || df_regs_ever_live_p (reg))
23327 && (!call_used_regs[reg]
23328 || (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
23329 && !TARGET_SINGLE_PIC_BASE
23330 && TARGET_TOC && TARGET_MINIMAL_TOC)))
23331 || (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
23332 && !TARGET_SINGLE_PIC_BASE
23333 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
23334 || (DEFAULT_ABI == ABI_DARWIN && flag_pic))));
23337 /* Emit function prologue as insns. */
23339 void
23340 rs6000_emit_prologue (void)
23342 rs6000_stack_t *info = rs6000_stack_info ();
23343 machine_mode reg_mode = Pmode;
23344 int reg_size = TARGET_32BIT ? 4 : 8;
23345 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
23346 rtx frame_reg_rtx = sp_reg_rtx;
23347 unsigned int cr_save_regno;
23348 rtx cr_save_rtx = NULL_RTX;
23349 rtx insn;
23350 int strategy;
23351 int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
23352 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
23353 && call_used_regs[STATIC_CHAIN_REGNUM]);
23354 /* Offset to top of frame for frame_reg and sp respectively. */
23355 HOST_WIDE_INT frame_off = 0;
23356 HOST_WIDE_INT sp_off = 0;
23358 #ifdef ENABLE_CHECKING
23359 /* Track and check usage of r0, r11, r12. */
23360 int reg_inuse = using_static_chain_p ? 1 << 11 : 0;
23361 #define START_USE(R) do \
23363 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
23364 reg_inuse |= 1 << (R); \
23365 } while (0)
23366 #define END_USE(R) do \
23368 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
23369 reg_inuse &= ~(1 << (R)); \
23370 } while (0)
23371 #define NOT_INUSE(R) do \
23373 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
23374 } while (0)
23375 #else
23376 #define START_USE(R) do {} while (0)
23377 #define END_USE(R) do {} while (0)
23378 #define NOT_INUSE(R) do {} while (0)
23379 #endif
23381 if (DEFAULT_ABI == ABI_ELFv2)
23383 cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
23385 /* With -mminimal-toc we may generate an extra use of r2 below. */
23386 if (!TARGET_SINGLE_PIC_BASE
23387 && TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
23388 cfun->machine->r2_setup_needed = true;
23392 if (flag_stack_usage_info)
23393 current_function_static_stack_size = info->total_size;
23395 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
23397 HOST_WIDE_INT size = info->total_size;
23399 if (crtl->is_leaf && !cfun->calls_alloca)
23401 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
23402 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT,
23403 size - STACK_CHECK_PROTECT);
23405 else if (size > 0)
23406 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
23409 if (TARGET_FIX_AND_CONTINUE)
23411 /* gdb on darwin arranges to forward a function from the old
23412 address by modifying the first 5 instructions of the function
23413 to branch to the overriding function. This is necessary to
23414 permit function pointers that point to the old function to
23415 actually forward to the new function. */
23416 emit_insn (gen_nop ());
23417 emit_insn (gen_nop ());
23418 emit_insn (gen_nop ());
23419 emit_insn (gen_nop ());
23420 emit_insn (gen_nop ());
23423 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
23425 reg_mode = V2SImode;
23426 reg_size = 8;
23429 /* Handle world saves specially here. */
23430 if (WORLD_SAVE_P (info))
23432 int i, j, sz;
23433 rtx treg;
23434 rtvec p;
23435 rtx reg0;
23437 /* save_world expects lr in r0. */
23438 reg0 = gen_rtx_REG (Pmode, 0);
23439 if (info->lr_save_p)
23441 insn = emit_move_insn (reg0,
23442 gen_rtx_REG (Pmode, LR_REGNO));
23443 RTX_FRAME_RELATED_P (insn) = 1;
23446 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
23447 assumptions about the offsets of various bits of the stack
23448 frame. */
23449 gcc_assert (info->gp_save_offset == -220
23450 && info->fp_save_offset == -144
23451 && info->lr_save_offset == 8
23452 && info->cr_save_offset == 4
23453 && info->push_p
23454 && info->lr_save_p
23455 && (!crtl->calls_eh_return
23456 || info->ehrd_offset == -432)
23457 && info->vrsave_save_offset == -224
23458 && info->altivec_save_offset == -416);
23460 treg = gen_rtx_REG (SImode, 11);
23461 emit_move_insn (treg, GEN_INT (-info->total_size));
23463 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
23464 in R11. It also clobbers R12, so beware! */
23466 /* Preserve CR2 for save_world prologues */
23467 sz = 5;
23468 sz += 32 - info->first_gp_reg_save;
23469 sz += 64 - info->first_fp_reg_save;
23470 sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
23471 p = rtvec_alloc (sz);
23472 j = 0;
23473 RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
23474 gen_rtx_REG (SImode,
23475 LR_REGNO));
23476 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
23477 gen_rtx_SYMBOL_REF (Pmode,
23478 "*save_world"));
23479 /* We do floats first so that the instruction pattern matches
23480 properly. */
23481 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
23482 RTVEC_ELT (p, j++)
23483 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
23484 ? DFmode : SFmode,
23485 info->first_fp_reg_save + i),
23486 frame_reg_rtx,
23487 info->fp_save_offset + frame_off + 8 * i);
23488 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
23489 RTVEC_ELT (p, j++)
23490 = gen_frame_store (gen_rtx_REG (V4SImode,
23491 info->first_altivec_reg_save + i),
23492 frame_reg_rtx,
23493 info->altivec_save_offset + frame_off + 16 * i);
23494 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23495 RTVEC_ELT (p, j++)
23496 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
23497 frame_reg_rtx,
23498 info->gp_save_offset + frame_off + reg_size * i);
23500 /* CR register traditionally saved as CR2. */
23501 RTVEC_ELT (p, j++)
23502 = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO),
23503 frame_reg_rtx, info->cr_save_offset + frame_off);
23504 /* Explain about use of R0. */
23505 if (info->lr_save_p)
23506 RTVEC_ELT (p, j++)
23507 = gen_frame_store (reg0,
23508 frame_reg_rtx, info->lr_save_offset + frame_off);
23509 /* Explain what happens to the stack pointer. */
23511 rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
23512 RTVEC_ELT (p, j++) = gen_rtx_SET (VOIDmode, sp_reg_rtx, newval);
23515 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
23516 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
23517 treg, GEN_INT (-info->total_size), NULL_RTX);
23518 sp_off = frame_off = info->total_size;
23521 strategy = info->savres_strategy;
23523 /* For V.4, update stack before we do any saving and set back pointer. */
23524 if (! WORLD_SAVE_P (info)
23525 && info->push_p
23526 && (DEFAULT_ABI == ABI_V4
23527 || crtl->calls_eh_return))
23529 bool need_r11 = (TARGET_SPE
23530 ? (!(strategy & SAVE_INLINE_GPRS)
23531 && info->spe_64bit_regs_used == 0)
23532 : (!(strategy & SAVE_INLINE_FPRS)
23533 || !(strategy & SAVE_INLINE_GPRS)
23534 || !(strategy & SAVE_INLINE_VRS)));
23535 int ptr_regno = -1;
23536 rtx ptr_reg = NULL_RTX;
23537 int ptr_off = 0;
23539 if (info->total_size < 32767)
23540 frame_off = info->total_size;
23541 else if (need_r11)
23542 ptr_regno = 11;
23543 else if (info->cr_save_p
23544 || info->lr_save_p
23545 || info->first_fp_reg_save < 64
23546 || info->first_gp_reg_save < 32
23547 || info->altivec_size != 0
23548 || info->vrsave_mask != 0
23549 || crtl->calls_eh_return)
23550 ptr_regno = 12;
23551 else
23553 /* The prologue won't be saving any regs so there is no need
23554 to set up a frame register to access any frame save area.
23555 We also won't be using frame_off anywhere below, but set
23556 the correct value anyway to protect against future
23557 changes to this function. */
23558 frame_off = info->total_size;
23560 if (ptr_regno != -1)
23562 /* Set up the frame offset to that needed by the first
23563 out-of-line save function. */
23564 START_USE (ptr_regno);
23565 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
23566 frame_reg_rtx = ptr_reg;
23567 if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0)
23568 gcc_checking_assert (info->fp_save_offset + info->fp_size == 0);
23569 else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32)
23570 ptr_off = info->gp_save_offset + info->gp_size;
23571 else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0)
23572 ptr_off = info->altivec_save_offset + info->altivec_size;
23573 frame_off = -ptr_off;
23575 rs6000_emit_allocate_stack (info->total_size, ptr_reg, ptr_off);
23576 sp_off = info->total_size;
23577 if (frame_reg_rtx != sp_reg_rtx)
23578 rs6000_emit_stack_tie (frame_reg_rtx, false);
23581 /* If we use the link register, get it into r0. */
23582 if (!WORLD_SAVE_P (info) && info->lr_save_p)
23584 rtx addr, reg, mem;
23586 reg = gen_rtx_REG (Pmode, 0);
23587 START_USE (0);
23588 insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
23589 RTX_FRAME_RELATED_P (insn) = 1;
23591 if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
23592 | SAVE_NOINLINE_FPRS_SAVES_LR)))
23594 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
23595 GEN_INT (info->lr_save_offset + frame_off));
23596 mem = gen_rtx_MEM (Pmode, addr);
23597 /* This should not be of rs6000_sr_alias_set, because of
23598 __builtin_return_address. */
23600 insn = emit_move_insn (mem, reg);
23601 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
23602 NULL_RTX, NULL_RTX, NULL_RTX);
23603 END_USE (0);
23607 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
23608 r12 will be needed by out-of-line gpr restore. */
23609 cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
23610 && !(strategy & (SAVE_INLINE_GPRS
23611 | SAVE_NOINLINE_GPRS_SAVES_LR))
23612 ? 11 : 12);
23613 if (!WORLD_SAVE_P (info)
23614 && info->cr_save_p
23615 && REGNO (frame_reg_rtx) != cr_save_regno
23616 && !(using_static_chain_p && cr_save_regno == 11))
23618 cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
23619 START_USE (cr_save_regno);
23620 rs6000_emit_move_from_cr (cr_save_rtx);
23623 /* Do any required saving of fpr's. If only one or two to save, do
23624 it ourselves. Otherwise, call function. */
23625 if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
23627 int i;
23628 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
23629 if (save_reg_p (info->first_fp_reg_save + i))
23630 emit_frame_save (frame_reg_rtx,
23631 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
23632 ? DFmode : SFmode),
23633 info->first_fp_reg_save + i,
23634 info->fp_save_offset + frame_off + 8 * i,
23635 sp_off - frame_off);
23637 else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
23639 bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
23640 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
23641 unsigned ptr_regno = ptr_regno_for_savres (sel);
23642 rtx ptr_reg = frame_reg_rtx;
23644 if (REGNO (frame_reg_rtx) == ptr_regno)
23645 gcc_checking_assert (frame_off == 0);
23646 else
23648 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
23649 NOT_INUSE (ptr_regno);
23650 emit_insn (gen_add3_insn (ptr_reg,
23651 frame_reg_rtx, GEN_INT (frame_off)));
23653 insn = rs6000_emit_savres_rtx (info, ptr_reg,
23654 info->fp_save_offset,
23655 info->lr_save_offset,
23656 DFmode, sel);
23657 rs6000_frame_related (insn, ptr_reg, sp_off,
23658 NULL_RTX, NULL_RTX, NULL_RTX);
23659 if (lr)
23660 END_USE (0);
23663 /* Save GPRs. This is done as a PARALLEL if we are using
23664 the store-multiple instructions. */
23665 if (!WORLD_SAVE_P (info)
23666 && TARGET_SPE_ABI
23667 && info->spe_64bit_regs_used != 0
23668 && info->first_gp_reg_save != 32)
23670 int i;
23671 rtx spe_save_area_ptr;
23672 HOST_WIDE_INT save_off;
23673 int ool_adjust = 0;
23675 /* Determine whether we can address all of the registers that need
23676 to be saved with an offset from frame_reg_rtx that fits in
23677 the small const field for SPE memory instructions. */
23678 int spe_regs_addressable
23679 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
23680 + reg_size * (32 - info->first_gp_reg_save - 1))
23681 && (strategy & SAVE_INLINE_GPRS));
23683 if (spe_regs_addressable)
23685 spe_save_area_ptr = frame_reg_rtx;
23686 save_off = frame_off;
23688 else
23690 /* Make r11 point to the start of the SPE save area. We need
23691 to be careful here if r11 is holding the static chain. If
23692 it is, then temporarily save it in r0. */
23693 HOST_WIDE_INT offset;
23695 if (!(strategy & SAVE_INLINE_GPRS))
23696 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
23697 offset = info->spe_gp_save_offset + frame_off - ool_adjust;
23698 spe_save_area_ptr = gen_rtx_REG (Pmode, 11);
23699 save_off = frame_off - offset;
23701 if (using_static_chain_p)
23703 rtx r0 = gen_rtx_REG (Pmode, 0);
23705 START_USE (0);
23706 gcc_assert (info->first_gp_reg_save > 11);
23708 emit_move_insn (r0, spe_save_area_ptr);
23710 else if (REGNO (frame_reg_rtx) != 11)
23711 START_USE (11);
23713 emit_insn (gen_addsi3 (spe_save_area_ptr,
23714 frame_reg_rtx, GEN_INT (offset)));
23715 if (!using_static_chain_p && REGNO (frame_reg_rtx) == 11)
23716 frame_off = -info->spe_gp_save_offset + ool_adjust;
23719 if ((strategy & SAVE_INLINE_GPRS))
23721 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23722 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
23723 emit_frame_save (spe_save_area_ptr, reg_mode,
23724 info->first_gp_reg_save + i,
23725 (info->spe_gp_save_offset + save_off
23726 + reg_size * i),
23727 sp_off - save_off);
23729 else
23731 insn = rs6000_emit_savres_rtx (info, spe_save_area_ptr,
23732 info->spe_gp_save_offset + save_off,
23733 0, reg_mode,
23734 SAVRES_SAVE | SAVRES_GPR);
23736 rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off,
23737 NULL_RTX, NULL_RTX, NULL_RTX);
23740 /* Move the static chain pointer back. */
23741 if (!spe_regs_addressable)
23743 if (using_static_chain_p)
23745 emit_move_insn (spe_save_area_ptr, gen_rtx_REG (Pmode, 0));
23746 END_USE (0);
23748 else if (REGNO (frame_reg_rtx) != 11)
23749 END_USE (11);
23752 else if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS))
23754 bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0;
23755 int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0);
23756 unsigned ptr_regno = ptr_regno_for_savres (sel);
23757 rtx ptr_reg = frame_reg_rtx;
23758 bool ptr_set_up = REGNO (ptr_reg) == ptr_regno;
23759 int end_save = info->gp_save_offset + info->gp_size;
23760 int ptr_off;
23762 if (!ptr_set_up)
23763 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
23765 /* Need to adjust r11 (r12) if we saved any FPRs. */
23766 if (end_save + frame_off != 0)
23768 rtx offset = GEN_INT (end_save + frame_off);
23770 if (ptr_set_up)
23771 frame_off = -end_save;
23772 else
23773 NOT_INUSE (ptr_regno);
23774 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
23776 else if (!ptr_set_up)
23778 NOT_INUSE (ptr_regno);
23779 emit_move_insn (ptr_reg, frame_reg_rtx);
23781 ptr_off = -end_save;
23782 insn = rs6000_emit_savres_rtx (info, ptr_reg,
23783 info->gp_save_offset + ptr_off,
23784 info->lr_save_offset + ptr_off,
23785 reg_mode, sel);
23786 rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
23787 NULL_RTX, NULL_RTX, NULL_RTX);
23788 if (lr)
23789 END_USE (0);
23791 else if (!WORLD_SAVE_P (info) && (strategy & SAVRES_MULTIPLE))
23793 rtvec p;
23794 int i;
23795 p = rtvec_alloc (32 - info->first_gp_reg_save);
23796 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23797 RTVEC_ELT (p, i)
23798 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
23799 frame_reg_rtx,
23800 info->gp_save_offset + frame_off + reg_size * i);
23801 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
23802 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
23803 NULL_RTX, NULL_RTX, NULL_RTX);
23805 else if (!WORLD_SAVE_P (info))
23807 int i;
23808 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23809 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
23810 emit_frame_save (frame_reg_rtx, reg_mode,
23811 info->first_gp_reg_save + i,
23812 info->gp_save_offset + frame_off + reg_size * i,
23813 sp_off - frame_off);
23816 if (crtl->calls_eh_return)
23818 unsigned int i;
23819 rtvec p;
23821 for (i = 0; ; ++i)
23823 unsigned int regno = EH_RETURN_DATA_REGNO (i);
23824 if (regno == INVALID_REGNUM)
23825 break;
23828 p = rtvec_alloc (i);
23830 for (i = 0; ; ++i)
23832 unsigned int regno = EH_RETURN_DATA_REGNO (i);
23833 if (regno == INVALID_REGNUM)
23834 break;
23836 insn
23837 = gen_frame_store (gen_rtx_REG (reg_mode, regno),
23838 sp_reg_rtx,
23839 info->ehrd_offset + sp_off + reg_size * (int) i);
23840 RTVEC_ELT (p, i) = insn;
23841 RTX_FRAME_RELATED_P (insn) = 1;
23844 insn = emit_insn (gen_blockage ());
23845 RTX_FRAME_RELATED_P (insn) = 1;
23846 add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p));
23849 /* In AIX ABI we need to make sure r2 is really saved. */
23850 if (TARGET_AIX && crtl->calls_eh_return)
23852 rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
23853 rtx save_insn, join_insn, note;
23854 long toc_restore_insn;
23856 tmp_reg = gen_rtx_REG (Pmode, 11);
23857 tmp_reg_si = gen_rtx_REG (SImode, 11);
23858 if (using_static_chain_p)
23860 START_USE (0);
23861 emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
23863 else
23864 START_USE (11);
23865 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
23866 /* Peek at instruction to which this function returns. If it's
23867 restoring r2, then we know we've already saved r2. We can't
23868 unconditionally save r2 because the value we have will already
23869 be updated if we arrived at this function via a plt call or
23870 toc adjusting stub. */
23871 emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
23872 toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
23873 + RS6000_TOC_SAVE_SLOT);
23874 hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
23875 emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
23876 compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
23877 validate_condition_mode (EQ, CCUNSmode);
23878 lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
23879 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
23880 gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
23881 toc_save_done = gen_label_rtx ();
23882 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
23883 gen_rtx_EQ (VOIDmode, compare_result,
23884 const0_rtx),
23885 gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
23886 pc_rtx);
23887 jump = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, jump));
23888 JUMP_LABEL (jump) = toc_save_done;
23889 LABEL_NUSES (toc_save_done) += 1;
23891 save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
23892 TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
23893 sp_off - frame_off);
23895 emit_label (toc_save_done);
23897 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
23898 have a CFG that has different saves along different paths.
23899 Move the note to a dummy blockage insn, which describes that
23900 R2 is unconditionally saved after the label. */
23901 /* ??? An alternate representation might be a special insn pattern
23902 containing both the branch and the store. That might let the
23903 code that minimizes the number of DW_CFA_advance opcodes better
23904 freedom in placing the annotations. */
23905 note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL);
23906 if (note)
23907 remove_note (save_insn, note);
23908 else
23909 note = alloc_reg_note (REG_FRAME_RELATED_EXPR,
23910 copy_rtx (PATTERN (save_insn)), NULL_RTX);
23911 RTX_FRAME_RELATED_P (save_insn) = 0;
23913 join_insn = emit_insn (gen_blockage ());
23914 REG_NOTES (join_insn) = note;
23915 RTX_FRAME_RELATED_P (join_insn) = 1;
23917 if (using_static_chain_p)
23919 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
23920 END_USE (0);
23922 else
23923 END_USE (11);
23926 /* Save CR if we use any that must be preserved. */
23927 if (!WORLD_SAVE_P (info) && info->cr_save_p)
23929 rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
23930 GEN_INT (info->cr_save_offset + frame_off));
23931 rtx mem = gen_frame_mem (SImode, addr);
23933 /* If we didn't copy cr before, do so now using r0. */
23934 if (cr_save_rtx == NULL_RTX)
23936 START_USE (0);
23937 cr_save_rtx = gen_rtx_REG (SImode, 0);
23938 rs6000_emit_move_from_cr (cr_save_rtx);
23941 /* Saving CR requires a two-instruction sequence: one instruction
23942 to move the CR to a general-purpose register, and a second
23943 instruction that stores the GPR to memory.
23945 We do not emit any DWARF CFI records for the first of these,
23946 because we cannot properly represent the fact that CR is saved in
23947 a register. One reason is that we cannot express that multiple
23948 CR fields are saved; another reason is that on 64-bit, the size
23949 of the CR register in DWARF (4 bytes) differs from the size of
23950 a general-purpose register.
23952 This means if any intervening instruction were to clobber one of
23953 the call-saved CR fields, we'd have incorrect CFI. To prevent
23954 this from happening, we mark the store to memory as a use of
23955 those CR fields, which prevents any such instruction from being
23956 scheduled in between the two instructions. */
23957 rtx crsave_v[9];
23958 int n_crsave = 0;
23959 int i;
23961 crsave_v[n_crsave++] = gen_rtx_SET (VOIDmode, mem, cr_save_rtx);
23962 for (i = 0; i < 8; i++)
23963 if (save_reg_p (CR0_REGNO + i))
23964 crsave_v[n_crsave++]
23965 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
23967 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
23968 gen_rtvec_v (n_crsave, crsave_v)));
23969 END_USE (REGNO (cr_save_rtx));
23971 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
23972 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
23973 so we need to construct a frame expression manually. */
23974 RTX_FRAME_RELATED_P (insn) = 1;
23976 /* Update address to be stack-pointer relative, like
23977 rs6000_frame_related would do. */
23978 addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
23979 GEN_INT (info->cr_save_offset + sp_off));
23980 mem = gen_frame_mem (SImode, addr);
23982 if (DEFAULT_ABI == ABI_ELFv2)
23984 /* In the ELFv2 ABI we generate separate CFI records for each
23985 CR field that was actually saved. They all point to the
23986 same 32-bit stack slot. */
23987 rtx crframe[8];
23988 int n_crframe = 0;
23990 for (i = 0; i < 8; i++)
23991 if (save_reg_p (CR0_REGNO + i))
23993 crframe[n_crframe]
23994 = gen_rtx_SET (VOIDmode, mem,
23995 gen_rtx_REG (SImode, CR0_REGNO + i));
23997 RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
23998 n_crframe++;
24001 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
24002 gen_rtx_PARALLEL (VOIDmode,
24003 gen_rtvec_v (n_crframe, crframe)));
24005 else
24007 /* In other ABIs, by convention, we use a single CR regnum to
24008 represent the fact that all call-saved CR fields are saved.
24009 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
24010 rtx set = gen_rtx_SET (VOIDmode, mem,
24011 gen_rtx_REG (SImode, CR2_REGNO));
24012 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
24016 /* In the ELFv2 ABI we need to save all call-saved CR fields into
24017 *separate* slots if the routine calls __builtin_eh_return, so
24018 that they can be independently restored by the unwinder. */
24019 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
24021 int i, cr_off = info->ehcr_offset;
24022 rtx crsave;
24024 /* ??? We might get better performance by using multiple mfocrf
24025 instructions. */
24026 crsave = gen_rtx_REG (SImode, 0);
24027 emit_insn (gen_movesi_from_cr (crsave));
24029 for (i = 0; i < 8; i++)
24030 if (!call_used_regs[CR0_REGNO + i])
24032 rtvec p = rtvec_alloc (2);
24033 RTVEC_ELT (p, 0)
24034 = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
24035 RTVEC_ELT (p, 1)
24036 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
24038 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
24040 RTX_FRAME_RELATED_P (insn) = 1;
24041 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
24042 gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
24043 sp_reg_rtx, cr_off + sp_off));
24045 cr_off += reg_size;
24049 /* Update stack and set back pointer unless this is V.4,
24050 for which it was done previously. */
24051 if (!WORLD_SAVE_P (info) && info->push_p
24052 && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
24054 rtx ptr_reg = NULL;
24055 int ptr_off = 0;
24057 /* If saving altivec regs we need to be able to address all save
24058 locations using a 16-bit offset. */
24059 if ((strategy & SAVE_INLINE_VRS) == 0
24060 || (info->altivec_size != 0
24061 && (info->altivec_save_offset + info->altivec_size - 16
24062 + info->total_size - frame_off) > 32767)
24063 || (info->vrsave_size != 0
24064 && (info->vrsave_save_offset
24065 + info->total_size - frame_off) > 32767))
24067 int sel = SAVRES_SAVE | SAVRES_VR;
24068 unsigned ptr_regno = ptr_regno_for_savres (sel);
24070 if (using_static_chain_p
24071 && ptr_regno == STATIC_CHAIN_REGNUM)
24072 ptr_regno = 12;
24073 if (REGNO (frame_reg_rtx) != ptr_regno)
24074 START_USE (ptr_regno);
24075 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
24076 frame_reg_rtx = ptr_reg;
24077 ptr_off = info->altivec_save_offset + info->altivec_size;
24078 frame_off = -ptr_off;
24080 else if (REGNO (frame_reg_rtx) == 1)
24081 frame_off = info->total_size;
24082 rs6000_emit_allocate_stack (info->total_size, ptr_reg, ptr_off);
24083 sp_off = info->total_size;
24084 if (frame_reg_rtx != sp_reg_rtx)
24085 rs6000_emit_stack_tie (frame_reg_rtx, false);
24088 /* Set frame pointer, if needed. */
24089 if (frame_pointer_needed)
24091 insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
24092 sp_reg_rtx);
24093 RTX_FRAME_RELATED_P (insn) = 1;
24096 /* Save AltiVec registers if needed. Save here because the red zone does
24097 not always include AltiVec registers. */
24098 if (!WORLD_SAVE_P (info) && TARGET_ALTIVEC_ABI
24099 && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0)
24101 int end_save = info->altivec_save_offset + info->altivec_size;
24102 int ptr_off;
24103 /* Oddly, the vector save/restore functions point r0 at the end
24104 of the save area, then use r11 or r12 to load offsets for
24105 [reg+reg] addressing. */
24106 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
24107 int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR);
24108 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
24110 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
24111 NOT_INUSE (0);
24112 if (end_save + frame_off != 0)
24114 rtx offset = GEN_INT (end_save + frame_off);
24116 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
24118 else
24119 emit_move_insn (ptr_reg, frame_reg_rtx);
24121 ptr_off = -end_save;
24122 insn = rs6000_emit_savres_rtx (info, scratch_reg,
24123 info->altivec_save_offset + ptr_off,
24124 0, V4SImode, SAVRES_SAVE | SAVRES_VR);
24125 rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
24126 NULL_RTX, NULL_RTX, NULL_RTX);
24127 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
24129 /* The oddity mentioned above clobbered our frame reg. */
24130 emit_move_insn (frame_reg_rtx, ptr_reg);
24131 frame_off = ptr_off;
24134 else if (!WORLD_SAVE_P (info) && TARGET_ALTIVEC_ABI
24135 && info->altivec_size != 0)
24137 int i;
24139 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24140 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
24142 rtx areg, savereg, mem, split_reg;
24143 int offset;
24145 offset = (info->altivec_save_offset + frame_off
24146 + 16 * (i - info->first_altivec_reg_save));
24148 savereg = gen_rtx_REG (V4SImode, i);
24150 NOT_INUSE (0);
24151 areg = gen_rtx_REG (Pmode, 0);
24152 emit_move_insn (areg, GEN_INT (offset));
24154 /* AltiVec addressing mode is [reg+reg]. */
24155 mem = gen_frame_mem (V4SImode,
24156 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
24158 insn = emit_move_insn (mem, savereg);
24160 /* When we split a VSX store into two insns, we need to make
24161 sure the DWARF info knows which register we are storing.
24162 Pass it in to be used on the appropriate note. */
24163 if (!BYTES_BIG_ENDIAN
24164 && GET_CODE (PATTERN (insn)) == SET
24165 && GET_CODE (SET_SRC (PATTERN (insn))) == VEC_SELECT)
24166 split_reg = savereg;
24167 else
24168 split_reg = NULL_RTX;
24170 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
24171 areg, GEN_INT (offset), split_reg);
24175 /* VRSAVE is a bit vector representing which AltiVec registers
24176 are used. The OS uses this to determine which vector
24177 registers to save on a context switch. We need to save
24178 VRSAVE on the stack frame, add whatever AltiVec registers we
24179 used in this function, and do the corresponding magic in the
24180 epilogue. */
24182 if (!WORLD_SAVE_P (info)
24183 && TARGET_ALTIVEC
24184 && TARGET_ALTIVEC_VRSAVE
24185 && info->vrsave_mask != 0)
24187 rtx reg, vrsave;
24188 int offset;
24189 int save_regno;
24191 /* Get VRSAVE onto a GPR. Note that ABI_V4 and ABI_DARWIN might
24192 be using r12 as frame_reg_rtx and r11 as the static chain
24193 pointer for nested functions. */
24194 save_regno = 12;
24195 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
24196 && !using_static_chain_p)
24197 save_regno = 11;
24198 else if (REGNO (frame_reg_rtx) == 12)
24200 save_regno = 11;
24201 if (using_static_chain_p)
24202 save_regno = 0;
24205 NOT_INUSE (save_regno);
24206 reg = gen_rtx_REG (SImode, save_regno);
24207 vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
24208 if (TARGET_MACHO)
24209 emit_insn (gen_get_vrsave_internal (reg));
24210 else
24211 emit_insn (gen_rtx_SET (VOIDmode, reg, vrsave));
24213 /* Save VRSAVE. */
24214 offset = info->vrsave_save_offset + frame_off;
24215 insn = emit_insn (gen_frame_store (reg, frame_reg_rtx, offset));
24217 /* Include the registers in the mask. */
24218 emit_insn (gen_iorsi3 (reg, reg, GEN_INT ((int) info->vrsave_mask)));
24220 insn = emit_insn (generate_set_vrsave (reg, info, 0));
24223 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
24224 if (!TARGET_SINGLE_PIC_BASE
24225 && ((TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
24226 || (DEFAULT_ABI == ABI_V4
24227 && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
24228 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
24230 /* If emit_load_toc_table will use the link register, we need to save
24231 it. We use R12 for this purpose because emit_load_toc_table
24232 can use register 0. This allows us to use a plain 'blr' to return
24233 from the procedure more often. */
24234 int save_LR_around_toc_setup = (TARGET_ELF
24235 && DEFAULT_ABI == ABI_V4
24236 && flag_pic
24237 && ! info->lr_save_p
24238 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0);
24239 if (save_LR_around_toc_setup)
24241 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
24242 rtx tmp = gen_rtx_REG (Pmode, 12);
24244 insn = emit_move_insn (tmp, lr);
24245 RTX_FRAME_RELATED_P (insn) = 1;
24247 rs6000_emit_load_toc_table (TRUE);
24249 insn = emit_move_insn (lr, tmp);
24250 add_reg_note (insn, REG_CFA_RESTORE, lr);
24251 RTX_FRAME_RELATED_P (insn) = 1;
24253 else
24254 rs6000_emit_load_toc_table (TRUE);
24257 #if TARGET_MACHO
24258 if (!TARGET_SINGLE_PIC_BASE
24259 && DEFAULT_ABI == ABI_DARWIN
24260 && flag_pic && crtl->uses_pic_offset_table)
24262 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
24263 rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
24265 /* Save and restore LR locally around this call (in R0). */
24266 if (!info->lr_save_p)
24267 emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
24269 emit_insn (gen_load_macho_picbase (src));
24271 emit_move_insn (gen_rtx_REG (Pmode,
24272 RS6000_PIC_OFFSET_TABLE_REGNUM),
24273 lr);
24275 if (!info->lr_save_p)
24276 emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
24278 #endif
24280 /* If we need to, save the TOC register after doing the stack setup.
24281 Do not emit eh frame info for this save. The unwinder wants info,
24282 conceptually attached to instructions in this function, about
24283 register values in the caller of this function. This R2 may have
24284 already been changed from the value in the caller.
24285 We don't attempt to write accurate DWARF EH frame info for R2
24286 because code emitted by gcc for a (non-pointer) function call
24287 doesn't save and restore R2. Instead, R2 is managed out-of-line
24288 by a linker generated plt call stub when the function resides in
24289 a shared library. This behaviour is costly to describe in DWARF,
24290 both in terms of the size of DWARF info and the time taken in the
24291 unwinder to interpret it. R2 changes, apart from the
24292 calls_eh_return case earlier in this function, are handled by
24293 linux-unwind.h frob_update_context. */
24294 if (rs6000_save_toc_in_prologue_p ())
24296 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
24297 emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
24301 /* Output .extern statements for the save/restore routines we use. */
24303 static void
24304 rs6000_output_savres_externs (FILE *file)
24306 rs6000_stack_t *info = rs6000_stack_info ();
24308 if (TARGET_DEBUG_STACK)
24309 debug_stack_info (info);
24311 /* Write .extern for any function we will call to save and restore
24312 fp values. */
24313 if (info->first_fp_reg_save < 64
24314 && !TARGET_MACHO
24315 && !TARGET_ELF)
24317 char *name;
24318 int regno = info->first_fp_reg_save - 32;
24320 if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
24322 bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
24323 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
24324 name = rs6000_savres_routine_name (info, regno, sel);
24325 fprintf (file, "\t.extern %s\n", name);
24327 if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
24329 bool lr = (info->savres_strategy
24330 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
24331 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
24332 name = rs6000_savres_routine_name (info, regno, sel);
24333 fprintf (file, "\t.extern %s\n", name);
24338 /* Write function prologue. */
24340 static void
24341 rs6000_output_function_prologue (FILE *file,
24342 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
24344 if (!cfun->is_thunk)
24345 rs6000_output_savres_externs (file);
24347 /* ELFv2 ABI r2 setup code and local entry point. This must follow
24348 immediately after the global entry point label. */
24349 if (DEFAULT_ABI == ABI_ELFv2 && cfun->machine->r2_setup_needed)
24351 const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
24353 fprintf (file, "0:\taddis 2,12,.TOC.-0b@ha\n");
24354 fprintf (file, "\taddi 2,2,.TOC.-0b@l\n");
24356 fputs ("\t.localentry\t", file);
24357 assemble_name (file, name);
24358 fputs (",.-", file);
24359 assemble_name (file, name);
24360 fputs ("\n", file);
24363 /* Output -mprofile-kernel code. This needs to be done here instead of
24364 in output_function_profile since it must go after the ELFv2 ABI
24365 local entry point. */
24366 if (TARGET_PROFILE_KERNEL && crtl->profile)
24368 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
24369 gcc_assert (!TARGET_32BIT);
24371 asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
24372 asm_fprintf (file, "\tstd %s,16(%s)\n", reg_names[0], reg_names[1]);
24374 /* In the ELFv2 ABI we have no compiler stack word. It must be
24375 the resposibility of _mcount to preserve the static chain
24376 register if required. */
24377 if (DEFAULT_ABI != ABI_ELFv2
24378 && cfun->static_chain_decl != NULL)
24380 asm_fprintf (file, "\tstd %s,24(%s)\n",
24381 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
24382 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
24383 asm_fprintf (file, "\tld %s,24(%s)\n",
24384 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
24386 else
24387 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
24390 rs6000_pic_labelno++;
24393 /* Non-zero if vmx regs are restored before the frame pop, zero if
24394 we restore after the pop when possible. */
24395 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
24397 /* Restoring cr is a two step process: loading a reg from the frame
24398 save, then moving the reg to cr. For ABI_V4 we must let the
24399 unwinder know that the stack location is no longer valid at or
24400 before the stack deallocation, but we can't emit a cfa_restore for
24401 cr at the stack deallocation like we do for other registers.
24402 The trouble is that it is possible for the move to cr to be
24403 scheduled after the stack deallocation. So say exactly where cr
24404 is located on each of the two insns. */
24406 static rtx
24407 load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
24409 rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
24410 rtx reg = gen_rtx_REG (SImode, regno);
24411 rtx_insn *insn = emit_move_insn (reg, mem);
24413 if (!exit_func && DEFAULT_ABI == ABI_V4)
24415 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
24416 rtx set = gen_rtx_SET (VOIDmode, reg, cr);
24418 add_reg_note (insn, REG_CFA_REGISTER, set);
24419 RTX_FRAME_RELATED_P (insn) = 1;
24421 return reg;
24424 /* Reload CR from REG. */
24426 static void
24427 restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
24429 int count = 0;
24430 int i;
24432 if (using_mfcr_multiple)
24434 for (i = 0; i < 8; i++)
24435 if (save_reg_p (CR0_REGNO + i))
24436 count++;
24437 gcc_assert (count);
24440 if (using_mfcr_multiple && count > 1)
24442 rtx_insn *insn;
24443 rtvec p;
24444 int ndx;
24446 p = rtvec_alloc (count);
24448 ndx = 0;
24449 for (i = 0; i < 8; i++)
24450 if (save_reg_p (CR0_REGNO + i))
24452 rtvec r = rtvec_alloc (2);
24453 RTVEC_ELT (r, 0) = reg;
24454 RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
24455 RTVEC_ELT (p, ndx) =
24456 gen_rtx_SET (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i),
24457 gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
24458 ndx++;
24460 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
24461 gcc_assert (ndx == count);
24463 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
24464 CR field separately. */
24465 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
24467 for (i = 0; i < 8; i++)
24468 if (save_reg_p (CR0_REGNO + i))
24469 add_reg_note (insn, REG_CFA_RESTORE,
24470 gen_rtx_REG (SImode, CR0_REGNO + i));
24472 RTX_FRAME_RELATED_P (insn) = 1;
24475 else
24476 for (i = 0; i < 8; i++)
24477 if (save_reg_p (CR0_REGNO + i))
24479 rtx insn = emit_insn (gen_movsi_to_cr_one
24480 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
24482 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
24483 CR field separately, attached to the insn that in fact
24484 restores this particular CR field. */
24485 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
24487 add_reg_note (insn, REG_CFA_RESTORE,
24488 gen_rtx_REG (SImode, CR0_REGNO + i));
24490 RTX_FRAME_RELATED_P (insn) = 1;
24494 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
24495 if (!exit_func && DEFAULT_ABI != ABI_ELFv2
24496 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
24498 rtx_insn *insn = get_last_insn ();
24499 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
24501 add_reg_note (insn, REG_CFA_RESTORE, cr);
24502 RTX_FRAME_RELATED_P (insn) = 1;
24506 /* Like cr, the move to lr instruction can be scheduled after the
24507 stack deallocation, but unlike cr, its stack frame save is still
24508 valid. So we only need to emit the cfa_restore on the correct
24509 instruction. */
24511 static void
24512 load_lr_save (int regno, rtx frame_reg_rtx, int offset)
24514 rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
24515 rtx reg = gen_rtx_REG (Pmode, regno);
24517 emit_move_insn (reg, mem);
24520 static void
24521 restore_saved_lr (int regno, bool exit_func)
24523 rtx reg = gen_rtx_REG (Pmode, regno);
24524 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
24525 rtx_insn *insn = emit_move_insn (lr, reg);
24527 if (!exit_func && flag_shrink_wrap)
24529 add_reg_note (insn, REG_CFA_RESTORE, lr);
24530 RTX_FRAME_RELATED_P (insn) = 1;
24534 static rtx
24535 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
24537 if (DEFAULT_ABI == ABI_ELFv2)
24539 int i;
24540 for (i = 0; i < 8; i++)
24541 if (save_reg_p (CR0_REGNO + i))
24543 rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
24544 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
24545 cfa_restores);
24548 else if (info->cr_save_p)
24549 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
24550 gen_rtx_REG (SImode, CR2_REGNO),
24551 cfa_restores);
24553 if (info->lr_save_p)
24554 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
24555 gen_rtx_REG (Pmode, LR_REGNO),
24556 cfa_restores);
24557 return cfa_restores;
24560 /* Return true if OFFSET from stack pointer can be clobbered by signals.
24561 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
24562 below stack pointer not cloberred by signals. */
24564 static inline bool
24565 offset_below_red_zone_p (HOST_WIDE_INT offset)
24567 return offset < (DEFAULT_ABI == ABI_V4
24569 : TARGET_32BIT ? -220 : -288);
24572 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
24574 static void
24575 emit_cfa_restores (rtx cfa_restores)
24577 rtx_insn *insn = get_last_insn ();
24578 rtx *loc = &REG_NOTES (insn);
24580 while (*loc)
24581 loc = &XEXP (*loc, 1);
24582 *loc = cfa_restores;
24583 RTX_FRAME_RELATED_P (insn) = 1;
24586 /* Emit function epilogue as insns. */
24588 void
24589 rs6000_emit_epilogue (int sibcall)
24591 rs6000_stack_t *info;
24592 int restoring_GPRs_inline;
24593 int restoring_FPRs_inline;
24594 int using_load_multiple;
24595 int using_mtcr_multiple;
24596 int use_backchain_to_restore_sp;
24597 int restore_lr;
24598 int strategy;
24599 HOST_WIDE_INT frame_off = 0;
24600 rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
24601 rtx frame_reg_rtx = sp_reg_rtx;
24602 rtx cfa_restores = NULL_RTX;
24603 rtx insn;
24604 rtx cr_save_reg = NULL_RTX;
24605 machine_mode reg_mode = Pmode;
24606 int reg_size = TARGET_32BIT ? 4 : 8;
24607 int i;
24608 bool exit_func;
24609 unsigned ptr_regno;
24611 info = rs6000_stack_info ();
24613 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
24615 reg_mode = V2SImode;
24616 reg_size = 8;
24619 strategy = info->savres_strategy;
24620 using_load_multiple = strategy & SAVRES_MULTIPLE;
24621 restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
24622 restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
24623 using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601
24624 || rs6000_cpu == PROCESSOR_PPC603
24625 || rs6000_cpu == PROCESSOR_PPC750
24626 || optimize_size);
24627 /* Restore via the backchain when we have a large frame, since this
24628 is more efficient than an addis, addi pair. The second condition
24629 here will not trigger at the moment; We don't actually need a
24630 frame pointer for alloca, but the generic parts of the compiler
24631 give us one anyway. */
24632 use_backchain_to_restore_sp = (info->total_size > 32767 - info->lr_save_offset
24633 || (cfun->calls_alloca
24634 && !frame_pointer_needed));
24635 restore_lr = (info->lr_save_p
24636 && (restoring_FPRs_inline
24637 || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
24638 && (restoring_GPRs_inline
24639 || info->first_fp_reg_save < 64));
24641 if (WORLD_SAVE_P (info))
24643 int i, j;
24644 char rname[30];
24645 const char *alloc_rname;
24646 rtvec p;
24648 /* eh_rest_world_r10 will return to the location saved in the LR
24649 stack slot (which is not likely to be our caller.)
24650 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
24651 rest_world is similar, except any R10 parameter is ignored.
24652 The exception-handling stuff that was here in 2.95 is no
24653 longer necessary. */
24655 p = rtvec_alloc (9
24657 + 32 - info->first_gp_reg_save
24658 + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
24659 + 63 + 1 - info->first_fp_reg_save);
24661 strcpy (rname, ((crtl->calls_eh_return) ?
24662 "*eh_rest_world_r10" : "*rest_world"));
24663 alloc_rname = ggc_strdup (rname);
24665 j = 0;
24666 RTVEC_ELT (p, j++) = ret_rtx;
24667 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
24668 gen_rtx_REG (Pmode,
24669 LR_REGNO));
24670 RTVEC_ELT (p, j++)
24671 = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
24672 /* The instruction pattern requires a clobber here;
24673 it is shared with the restVEC helper. */
24674 RTVEC_ELT (p, j++)
24675 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
24678 /* CR register traditionally saved as CR2. */
24679 rtx reg = gen_rtx_REG (SImode, CR2_REGNO);
24680 RTVEC_ELT (p, j++)
24681 = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset);
24682 if (flag_shrink_wrap)
24684 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
24685 gen_rtx_REG (Pmode, LR_REGNO),
24686 cfa_restores);
24687 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24691 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
24693 rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
24694 RTVEC_ELT (p, j++)
24695 = gen_frame_load (reg,
24696 frame_reg_rtx, info->gp_save_offset + reg_size * i);
24697 if (flag_shrink_wrap)
24698 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24700 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
24702 rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
24703 RTVEC_ELT (p, j++)
24704 = gen_frame_load (reg,
24705 frame_reg_rtx, info->altivec_save_offset + 16 * i);
24706 if (flag_shrink_wrap)
24707 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24709 for (i = 0; info->first_fp_reg_save + i <= 63; i++)
24711 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
24712 ? DFmode : SFmode),
24713 info->first_fp_reg_save + i);
24714 RTVEC_ELT (p, j++)
24715 = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i);
24716 if (flag_shrink_wrap)
24717 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24719 RTVEC_ELT (p, j++)
24720 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
24721 RTVEC_ELT (p, j++)
24722 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
24723 RTVEC_ELT (p, j++)
24724 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
24725 RTVEC_ELT (p, j++)
24726 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
24727 RTVEC_ELT (p, j++)
24728 = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
24729 insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
24731 if (flag_shrink_wrap)
24733 REG_NOTES (insn) = cfa_restores;
24734 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
24735 RTX_FRAME_RELATED_P (insn) = 1;
24737 return;
24740 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
24741 if (info->push_p)
24742 frame_off = info->total_size;
24744 /* Restore AltiVec registers if we must do so before adjusting the
24745 stack. */
24746 if (TARGET_ALTIVEC_ABI
24747 && info->altivec_size != 0
24748 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24749 || (DEFAULT_ABI != ABI_V4
24750 && offset_below_red_zone_p (info->altivec_save_offset))))
24752 int i;
24753 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
24755 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
24756 if (use_backchain_to_restore_sp)
24758 int frame_regno = 11;
24760 if ((strategy & REST_INLINE_VRS) == 0)
24762 /* Of r11 and r12, select the one not clobbered by an
24763 out-of-line restore function for the frame register. */
24764 frame_regno = 11 + 12 - scratch_regno;
24766 frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno);
24767 emit_move_insn (frame_reg_rtx,
24768 gen_rtx_MEM (Pmode, sp_reg_rtx));
24769 frame_off = 0;
24771 else if (frame_pointer_needed)
24772 frame_reg_rtx = hard_frame_pointer_rtx;
24774 if ((strategy & REST_INLINE_VRS) == 0)
24776 int end_save = info->altivec_save_offset + info->altivec_size;
24777 int ptr_off;
24778 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
24779 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
24781 if (end_save + frame_off != 0)
24783 rtx offset = GEN_INT (end_save + frame_off);
24785 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
24787 else
24788 emit_move_insn (ptr_reg, frame_reg_rtx);
24790 ptr_off = -end_save;
24791 insn = rs6000_emit_savres_rtx (info, scratch_reg,
24792 info->altivec_save_offset + ptr_off,
24793 0, V4SImode, SAVRES_VR);
24795 else
24797 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24798 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
24800 rtx addr, areg, mem, reg;
24802 areg = gen_rtx_REG (Pmode, 0);
24803 emit_move_insn
24804 (areg, GEN_INT (info->altivec_save_offset
24805 + frame_off
24806 + 16 * (i - info->first_altivec_reg_save)));
24808 /* AltiVec addressing mode is [reg+reg]. */
24809 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
24810 mem = gen_frame_mem (V4SImode, addr);
24812 reg = gen_rtx_REG (V4SImode, i);
24813 emit_move_insn (reg, mem);
24817 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24818 if (((strategy & REST_INLINE_VRS) == 0
24819 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
24820 && (flag_shrink_wrap
24821 || (offset_below_red_zone_p
24822 (info->altivec_save_offset
24823 + 16 * (i - info->first_altivec_reg_save)))))
24825 rtx reg = gen_rtx_REG (V4SImode, i);
24826 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24830 /* Restore VRSAVE if we must do so before adjusting the stack. */
24831 if (TARGET_ALTIVEC
24832 && TARGET_ALTIVEC_VRSAVE
24833 && info->vrsave_mask != 0
24834 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24835 || (DEFAULT_ABI != ABI_V4
24836 && offset_below_red_zone_p (info->vrsave_save_offset))))
24838 rtx reg;
24840 if (frame_reg_rtx == sp_reg_rtx)
24842 if (use_backchain_to_restore_sp)
24844 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
24845 emit_move_insn (frame_reg_rtx,
24846 gen_rtx_MEM (Pmode, sp_reg_rtx));
24847 frame_off = 0;
24849 else if (frame_pointer_needed)
24850 frame_reg_rtx = hard_frame_pointer_rtx;
24853 reg = gen_rtx_REG (SImode, 12);
24854 emit_insn (gen_frame_load (reg, frame_reg_rtx,
24855 info->vrsave_save_offset + frame_off));
24857 emit_insn (generate_set_vrsave (reg, info, 1));
24860 insn = NULL_RTX;
24861 /* If we have a large stack frame, restore the old stack pointer
24862 using the backchain. */
24863 if (use_backchain_to_restore_sp)
24865 if (frame_reg_rtx == sp_reg_rtx)
24867 /* Under V.4, don't reset the stack pointer until after we're done
24868 loading the saved registers. */
24869 if (DEFAULT_ABI == ABI_V4)
24870 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
24872 insn = emit_move_insn (frame_reg_rtx,
24873 gen_rtx_MEM (Pmode, sp_reg_rtx));
24874 frame_off = 0;
24876 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24877 && DEFAULT_ABI == ABI_V4)
24878 /* frame_reg_rtx has been set up by the altivec restore. */
24880 else
24882 insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
24883 frame_reg_rtx = sp_reg_rtx;
24886 /* If we have a frame pointer, we can restore the old stack pointer
24887 from it. */
24888 else if (frame_pointer_needed)
24890 frame_reg_rtx = sp_reg_rtx;
24891 if (DEFAULT_ABI == ABI_V4)
24892 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
24893 /* Prevent reordering memory accesses against stack pointer restore. */
24894 else if (cfun->calls_alloca
24895 || offset_below_red_zone_p (-info->total_size))
24896 rs6000_emit_stack_tie (frame_reg_rtx, true);
24898 insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
24899 GEN_INT (info->total_size)));
24900 frame_off = 0;
24902 else if (info->push_p
24903 && DEFAULT_ABI != ABI_V4
24904 && !crtl->calls_eh_return)
24906 /* Prevent reordering memory accesses against stack pointer restore. */
24907 if (cfun->calls_alloca
24908 || offset_below_red_zone_p (-info->total_size))
24909 rs6000_emit_stack_tie (frame_reg_rtx, false);
24910 insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
24911 GEN_INT (info->total_size)));
24912 frame_off = 0;
24914 if (insn && frame_reg_rtx == sp_reg_rtx)
24916 if (cfa_restores)
24918 REG_NOTES (insn) = cfa_restores;
24919 cfa_restores = NULL_RTX;
24921 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
24922 RTX_FRAME_RELATED_P (insn) = 1;
24925 /* Restore AltiVec registers if we have not done so already. */
24926 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24927 && TARGET_ALTIVEC_ABI
24928 && info->altivec_size != 0
24929 && (DEFAULT_ABI == ABI_V4
24930 || !offset_below_red_zone_p (info->altivec_save_offset)))
24932 int i;
24934 if ((strategy & REST_INLINE_VRS) == 0)
24936 int end_save = info->altivec_save_offset + info->altivec_size;
24937 int ptr_off;
24938 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
24939 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
24940 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
24942 if (end_save + frame_off != 0)
24944 rtx offset = GEN_INT (end_save + frame_off);
24946 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
24948 else
24949 emit_move_insn (ptr_reg, frame_reg_rtx);
24951 ptr_off = -end_save;
24952 insn = rs6000_emit_savres_rtx (info, scratch_reg,
24953 info->altivec_save_offset + ptr_off,
24954 0, V4SImode, SAVRES_VR);
24955 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
24957 /* Frame reg was clobbered by out-of-line save. Restore it
24958 from ptr_reg, and if we are calling out-of-line gpr or
24959 fpr restore set up the correct pointer and offset. */
24960 unsigned newptr_regno = 1;
24961 if (!restoring_GPRs_inline)
24963 bool lr = info->gp_save_offset + info->gp_size == 0;
24964 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
24965 newptr_regno = ptr_regno_for_savres (sel);
24966 end_save = info->gp_save_offset + info->gp_size;
24968 else if (!restoring_FPRs_inline)
24970 bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR);
24971 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
24972 newptr_regno = ptr_regno_for_savres (sel);
24973 end_save = info->gp_save_offset + info->gp_size;
24976 if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
24977 frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
24979 if (end_save + ptr_off != 0)
24981 rtx offset = GEN_INT (end_save + ptr_off);
24983 frame_off = -end_save;
24984 emit_insn (gen_add3_insn (frame_reg_rtx, ptr_reg, offset));
24986 else
24988 frame_off = ptr_off;
24989 emit_move_insn (frame_reg_rtx, ptr_reg);
24993 else
24995 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24996 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
24998 rtx addr, areg, mem, reg;
25000 areg = gen_rtx_REG (Pmode, 0);
25001 emit_move_insn
25002 (areg, GEN_INT (info->altivec_save_offset
25003 + frame_off
25004 + 16 * (i - info->first_altivec_reg_save)));
25006 /* AltiVec addressing mode is [reg+reg]. */
25007 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
25008 mem = gen_frame_mem (V4SImode, addr);
25010 reg = gen_rtx_REG (V4SImode, i);
25011 emit_move_insn (reg, mem);
25015 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
25016 if (((strategy & REST_INLINE_VRS) == 0
25017 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
25018 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
25020 rtx reg = gen_rtx_REG (V4SImode, i);
25021 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
25025 /* Restore VRSAVE if we have not done so already. */
25026 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
25027 && TARGET_ALTIVEC
25028 && TARGET_ALTIVEC_VRSAVE
25029 && info->vrsave_mask != 0
25030 && (DEFAULT_ABI == ABI_V4
25031 || !offset_below_red_zone_p (info->vrsave_save_offset)))
25033 rtx reg;
25035 reg = gen_rtx_REG (SImode, 12);
25036 emit_insn (gen_frame_load (reg, frame_reg_rtx,
25037 info->vrsave_save_offset + frame_off));
25039 emit_insn (generate_set_vrsave (reg, info, 1));
25042 /* If we exit by an out-of-line restore function on ABI_V4 then that
25043 function will deallocate the stack, so we don't need to worry
25044 about the unwinder restoring cr from an invalid stack frame
25045 location. */
25046 exit_func = (!restoring_FPRs_inline
25047 || (!restoring_GPRs_inline
25048 && info->first_fp_reg_save == 64));
25050 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
25051 *separate* slots if the routine calls __builtin_eh_return, so
25052 that they can be independently restored by the unwinder. */
25053 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
25055 int i, cr_off = info->ehcr_offset;
25057 for (i = 0; i < 8; i++)
25058 if (!call_used_regs[CR0_REGNO + i])
25060 rtx reg = gen_rtx_REG (SImode, 0);
25061 emit_insn (gen_frame_load (reg, frame_reg_rtx,
25062 cr_off + frame_off));
25064 insn = emit_insn (gen_movsi_to_cr_one
25065 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
25067 if (!exit_func && flag_shrink_wrap)
25069 add_reg_note (insn, REG_CFA_RESTORE,
25070 gen_rtx_REG (SImode, CR0_REGNO + i));
25072 RTX_FRAME_RELATED_P (insn) = 1;
25075 cr_off += reg_size;
25079 /* Get the old lr if we saved it. If we are restoring registers
25080 out-of-line, then the out-of-line routines can do this for us. */
25081 if (restore_lr && restoring_GPRs_inline)
25082 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
25084 /* Get the old cr if we saved it. */
25085 if (info->cr_save_p)
25087 unsigned cr_save_regno = 12;
25089 if (!restoring_GPRs_inline)
25091 /* Ensure we don't use the register used by the out-of-line
25092 gpr register restore below. */
25093 bool lr = info->gp_save_offset + info->gp_size == 0;
25094 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
25095 int gpr_ptr_regno = ptr_regno_for_savres (sel);
25097 if (gpr_ptr_regno == 12)
25098 cr_save_regno = 11;
25099 gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno);
25101 else if (REGNO (frame_reg_rtx) == 12)
25102 cr_save_regno = 11;
25104 cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx,
25105 info->cr_save_offset + frame_off,
25106 exit_func);
25109 /* Set LR here to try to overlap restores below. */
25110 if (restore_lr && restoring_GPRs_inline)
25111 restore_saved_lr (0, exit_func);
25113 /* Load exception handler data registers, if needed. */
25114 if (crtl->calls_eh_return)
25116 unsigned int i, regno;
25118 if (TARGET_AIX)
25120 rtx reg = gen_rtx_REG (reg_mode, 2);
25121 emit_insn (gen_frame_load (reg, frame_reg_rtx,
25122 frame_off + RS6000_TOC_SAVE_SLOT));
25125 for (i = 0; ; ++i)
25127 rtx mem;
25129 regno = EH_RETURN_DATA_REGNO (i);
25130 if (regno == INVALID_REGNUM)
25131 break;
25133 /* Note: possible use of r0 here to address SPE regs. */
25134 mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
25135 info->ehrd_offset + frame_off
25136 + reg_size * (int) i);
25138 emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
25142 /* Restore GPRs. This is done as a PARALLEL if we are using
25143 the load-multiple instructions. */
25144 if (TARGET_SPE_ABI
25145 && info->spe_64bit_regs_used
25146 && info->first_gp_reg_save != 32)
25148 /* Determine whether we can address all of the registers that need
25149 to be saved with an offset from frame_reg_rtx that fits in
25150 the small const field for SPE memory instructions. */
25151 int spe_regs_addressable
25152 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
25153 + reg_size * (32 - info->first_gp_reg_save - 1))
25154 && restoring_GPRs_inline);
25156 if (!spe_regs_addressable)
25158 int ool_adjust = 0;
25159 rtx old_frame_reg_rtx = frame_reg_rtx;
25160 /* Make r11 point to the start of the SPE save area. We worried about
25161 not clobbering it when we were saving registers in the prologue.
25162 There's no need to worry here because the static chain is passed
25163 anew to every function. */
25165 if (!restoring_GPRs_inline)
25166 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
25167 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
25168 emit_insn (gen_addsi3 (frame_reg_rtx, old_frame_reg_rtx,
25169 GEN_INT (info->spe_gp_save_offset
25170 + frame_off
25171 - ool_adjust)));
25172 /* Keep the invariant that frame_reg_rtx + frame_off points
25173 at the top of the stack frame. */
25174 frame_off = -info->spe_gp_save_offset + ool_adjust;
25177 if (restoring_GPRs_inline)
25179 HOST_WIDE_INT spe_offset = info->spe_gp_save_offset + frame_off;
25181 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25182 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
25184 rtx offset, addr, mem, reg;
25186 /* We're doing all this to ensure that the immediate offset
25187 fits into the immediate field of 'evldd'. */
25188 gcc_assert (SPE_CONST_OFFSET_OK (spe_offset + reg_size * i));
25190 offset = GEN_INT (spe_offset + reg_size * i);
25191 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, offset);
25192 mem = gen_rtx_MEM (V2SImode, addr);
25193 reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
25195 emit_move_insn (reg, mem);
25198 else
25199 rs6000_emit_savres_rtx (info, frame_reg_rtx,
25200 info->spe_gp_save_offset + frame_off,
25201 info->lr_save_offset + frame_off,
25202 reg_mode,
25203 SAVRES_GPR | SAVRES_LR);
25205 else if (!restoring_GPRs_inline)
25207 /* We are jumping to an out-of-line function. */
25208 rtx ptr_reg;
25209 int end_save = info->gp_save_offset + info->gp_size;
25210 bool can_use_exit = end_save == 0;
25211 int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0);
25212 int ptr_off;
25214 /* Emit stack reset code if we need it. */
25215 ptr_regno = ptr_regno_for_savres (sel);
25216 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
25217 if (can_use_exit)
25218 rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
25219 else if (end_save + frame_off != 0)
25220 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx,
25221 GEN_INT (end_save + frame_off)));
25222 else if (REGNO (frame_reg_rtx) != ptr_regno)
25223 emit_move_insn (ptr_reg, frame_reg_rtx);
25224 if (REGNO (frame_reg_rtx) == ptr_regno)
25225 frame_off = -end_save;
25227 if (can_use_exit && info->cr_save_p)
25228 restore_saved_cr (cr_save_reg, using_mtcr_multiple, true);
25230 ptr_off = -end_save;
25231 rs6000_emit_savres_rtx (info, ptr_reg,
25232 info->gp_save_offset + ptr_off,
25233 info->lr_save_offset + ptr_off,
25234 reg_mode, sel);
25236 else if (using_load_multiple)
25238 rtvec p;
25239 p = rtvec_alloc (32 - info->first_gp_reg_save);
25240 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25241 RTVEC_ELT (p, i)
25242 = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
25243 frame_reg_rtx,
25244 info->gp_save_offset + frame_off + reg_size * i);
25245 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
25247 else
25249 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25250 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
25251 emit_insn (gen_frame_load
25252 (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
25253 frame_reg_rtx,
25254 info->gp_save_offset + frame_off + reg_size * i));
25257 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
25259 /* If the frame pointer was used then we can't delay emitting
25260 a REG_CFA_DEF_CFA note. This must happen on the insn that
25261 restores the frame pointer, r31. We may have already emitted
25262 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
25263 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
25264 be harmless if emitted. */
25265 if (frame_pointer_needed)
25267 insn = get_last_insn ();
25268 add_reg_note (insn, REG_CFA_DEF_CFA,
25269 plus_constant (Pmode, frame_reg_rtx, frame_off));
25270 RTX_FRAME_RELATED_P (insn) = 1;
25273 /* Set up cfa_restores. We always need these when
25274 shrink-wrapping. If not shrink-wrapping then we only need
25275 the cfa_restore when the stack location is no longer valid.
25276 The cfa_restores must be emitted on or before the insn that
25277 invalidates the stack, and of course must not be emitted
25278 before the insn that actually does the restore. The latter
25279 is why it is a bad idea to emit the cfa_restores as a group
25280 on the last instruction here that actually does a restore:
25281 That insn may be reordered with respect to others doing
25282 restores. */
25283 if (flag_shrink_wrap
25284 && !restoring_GPRs_inline
25285 && info->first_fp_reg_save == 64)
25286 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
25288 for (i = info->first_gp_reg_save; i < 32; i++)
25289 if (!restoring_GPRs_inline
25290 || using_load_multiple
25291 || rs6000_reg_live_or_pic_offset_p (i))
25293 rtx reg = gen_rtx_REG (reg_mode, i);
25295 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
25299 if (!restoring_GPRs_inline
25300 && info->first_fp_reg_save == 64)
25302 /* We are jumping to an out-of-line function. */
25303 if (cfa_restores)
25304 emit_cfa_restores (cfa_restores);
25305 return;
25308 if (restore_lr && !restoring_GPRs_inline)
25310 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
25311 restore_saved_lr (0, exit_func);
25314 /* Restore fpr's if we need to do it without calling a function. */
25315 if (restoring_FPRs_inline)
25316 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
25317 if (save_reg_p (info->first_fp_reg_save + i))
25319 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
25320 ? DFmode : SFmode),
25321 info->first_fp_reg_save + i);
25322 emit_insn (gen_frame_load (reg, frame_reg_rtx,
25323 info->fp_save_offset + frame_off + 8 * i));
25324 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
25325 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
25328 /* If we saved cr, restore it here. Just those that were used. */
25329 if (info->cr_save_p)
25330 restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func);
25332 /* If this is V.4, unwind the stack pointer after all of the loads
25333 have been done, or set up r11 if we are restoring fp out of line. */
25334 ptr_regno = 1;
25335 if (!restoring_FPRs_inline)
25337 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
25338 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
25339 ptr_regno = ptr_regno_for_savres (sel);
25342 insn = rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
25343 if (REGNO (frame_reg_rtx) == ptr_regno)
25344 frame_off = 0;
25346 if (insn && restoring_FPRs_inline)
25348 if (cfa_restores)
25350 REG_NOTES (insn) = cfa_restores;
25351 cfa_restores = NULL_RTX;
25353 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
25354 RTX_FRAME_RELATED_P (insn) = 1;
25357 if (crtl->calls_eh_return)
25359 rtx sa = EH_RETURN_STACKADJ_RTX;
25360 emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
25363 if (!sibcall)
25365 rtvec p;
25366 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
25367 if (! restoring_FPRs_inline)
25369 p = rtvec_alloc (4 + 64 - info->first_fp_reg_save);
25370 RTVEC_ELT (p, 0) = ret_rtx;
25372 else
25374 if (cfa_restores)
25376 /* We can't hang the cfa_restores off a simple return,
25377 since the shrink-wrap code sometimes uses an existing
25378 return. This means there might be a path from
25379 pre-prologue code to this return, and dwarf2cfi code
25380 wants the eh_frame unwinder state to be the same on
25381 all paths to any point. So we need to emit the
25382 cfa_restores before the return. For -m64 we really
25383 don't need epilogue cfa_restores at all, except for
25384 this irritating dwarf2cfi with shrink-wrap
25385 requirement; The stack red-zone means eh_frame info
25386 from the prologue telling the unwinder to restore
25387 from the stack is perfectly good right to the end of
25388 the function. */
25389 emit_insn (gen_blockage ());
25390 emit_cfa_restores (cfa_restores);
25391 cfa_restores = NULL_RTX;
25393 p = rtvec_alloc (2);
25394 RTVEC_ELT (p, 0) = simple_return_rtx;
25397 RTVEC_ELT (p, 1) = ((restoring_FPRs_inline || !lr)
25398 ? gen_rtx_USE (VOIDmode,
25399 gen_rtx_REG (Pmode, LR_REGNO))
25400 : gen_rtx_CLOBBER (VOIDmode,
25401 gen_rtx_REG (Pmode, LR_REGNO)));
25403 /* If we have to restore more than two FP registers, branch to the
25404 restore function. It will return to our caller. */
25405 if (! restoring_FPRs_inline)
25407 int i;
25408 int reg;
25409 rtx sym;
25411 if (flag_shrink_wrap)
25412 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
25414 sym = rs6000_savres_routine_sym (info,
25415 SAVRES_FPR | (lr ? SAVRES_LR : 0));
25416 RTVEC_ELT (p, 2) = gen_rtx_USE (VOIDmode, sym);
25417 reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
25418 RTVEC_ELT (p, 3) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
25420 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
25422 rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
25424 RTVEC_ELT (p, i + 4)
25425 = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i);
25426 if (flag_shrink_wrap)
25427 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
25428 cfa_restores);
25432 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
25435 if (cfa_restores)
25437 if (sibcall)
25438 /* Ensure the cfa_restores are hung off an insn that won't
25439 be reordered above other restores. */
25440 emit_insn (gen_blockage ());
25442 emit_cfa_restores (cfa_restores);
25446 /* Write function epilogue. */
25448 static void
25449 rs6000_output_function_epilogue (FILE *file,
25450 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
25452 #if TARGET_MACHO
25453 macho_branch_islands ();
25454 /* Mach-O doesn't support labels at the end of objects, so if
25455 it looks like we might want one, insert a NOP. */
25457 rtx_insn *insn = get_last_insn ();
25458 rtx_insn *deleted_debug_label = NULL;
25459 while (insn
25460 && NOTE_P (insn)
25461 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
25463 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
25464 notes only, instead set their CODE_LABEL_NUMBER to -1,
25465 otherwise there would be code generation differences
25466 in between -g and -g0. */
25467 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
25468 deleted_debug_label = insn;
25469 insn = PREV_INSN (insn);
25471 if (insn
25472 && (LABEL_P (insn)
25473 || (NOTE_P (insn)
25474 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
25475 fputs ("\tnop\n", file);
25476 else if (deleted_debug_label)
25477 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
25478 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
25479 CODE_LABEL_NUMBER (insn) = -1;
25481 #endif
25483 /* Output a traceback table here. See /usr/include/sys/debug.h for info
25484 on its format.
25486 We don't output a traceback table if -finhibit-size-directive was
25487 used. The documentation for -finhibit-size-directive reads
25488 ``don't output a @code{.size} assembler directive, or anything
25489 else that would cause trouble if the function is split in the
25490 middle, and the two halves are placed at locations far apart in
25491 memory.'' The traceback table has this property, since it
25492 includes the offset from the start of the function to the
25493 traceback table itself.
25495 System V.4 Powerpc's (and the embedded ABI derived from it) use a
25496 different traceback table. */
25497 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
25498 && ! flag_inhibit_size_directive
25499 && rs6000_traceback != traceback_none && !cfun->is_thunk)
25501 const char *fname = NULL;
25502 const char *language_string = lang_hooks.name;
25503 int fixed_parms = 0, float_parms = 0, parm_info = 0;
25504 int i;
25505 int optional_tbtab;
25506 rs6000_stack_t *info = rs6000_stack_info ();
25508 if (rs6000_traceback == traceback_full)
25509 optional_tbtab = 1;
25510 else if (rs6000_traceback == traceback_part)
25511 optional_tbtab = 0;
25512 else
25513 optional_tbtab = !optimize_size && !TARGET_ELF;
25515 if (optional_tbtab)
25517 fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25518 while (*fname == '.') /* V.4 encodes . in the name */
25519 fname++;
25521 /* Need label immediately before tbtab, so we can compute
25522 its offset from the function start. */
25523 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
25524 ASM_OUTPUT_LABEL (file, fname);
25527 /* The .tbtab pseudo-op can only be used for the first eight
25528 expressions, since it can't handle the possibly variable
25529 length fields that follow. However, if you omit the optional
25530 fields, the assembler outputs zeros for all optional fields
25531 anyways, giving each variable length field is minimum length
25532 (as defined in sys/debug.h). Thus we can not use the .tbtab
25533 pseudo-op at all. */
25535 /* An all-zero word flags the start of the tbtab, for debuggers
25536 that have to find it by searching forward from the entry
25537 point or from the current pc. */
25538 fputs ("\t.long 0\n", file);
25540 /* Tbtab format type. Use format type 0. */
25541 fputs ("\t.byte 0,", file);
25543 /* Language type. Unfortunately, there does not seem to be any
25544 official way to discover the language being compiled, so we
25545 use language_string.
25546 C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9.
25547 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
25548 a number, so for now use 9. LTO, Go and JIT aren't assigned numbers
25549 either, so for now use 0. */
25550 if (lang_GNU_C ()
25551 || ! strcmp (language_string, "GNU GIMPLE")
25552 || ! strcmp (language_string, "GNU Go")
25553 || ! strcmp (language_string, "libgccjit"))
25554 i = 0;
25555 else if (! strcmp (language_string, "GNU F77")
25556 || ! strcmp (language_string, "GNU Fortran"))
25557 i = 1;
25558 else if (! strcmp (language_string, "GNU Pascal"))
25559 i = 2;
25560 else if (! strcmp (language_string, "GNU Ada"))
25561 i = 3;
25562 else if (lang_GNU_CXX ()
25563 || ! strcmp (language_string, "GNU Objective-C++"))
25564 i = 9;
25565 else if (! strcmp (language_string, "GNU Java"))
25566 i = 13;
25567 else if (! strcmp (language_string, "GNU Objective-C"))
25568 i = 14;
25569 else
25570 gcc_unreachable ();
25571 fprintf (file, "%d,", i);
25573 /* 8 single bit fields: global linkage (not set for C extern linkage,
25574 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
25575 from start of procedure stored in tbtab, internal function, function
25576 has controlled storage, function has no toc, function uses fp,
25577 function logs/aborts fp operations. */
25578 /* Assume that fp operations are used if any fp reg must be saved. */
25579 fprintf (file, "%d,",
25580 (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
25582 /* 6 bitfields: function is interrupt handler, name present in
25583 proc table, function calls alloca, on condition directives
25584 (controls stack walks, 3 bits), saves condition reg, saves
25585 link reg. */
25586 /* The `function calls alloca' bit seems to be set whenever reg 31 is
25587 set up as a frame pointer, even when there is no alloca call. */
25588 fprintf (file, "%d,",
25589 ((optional_tbtab << 6)
25590 | ((optional_tbtab & frame_pointer_needed) << 5)
25591 | (info->cr_save_p << 1)
25592 | (info->lr_save_p)));
25594 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
25595 (6 bits). */
25596 fprintf (file, "%d,",
25597 (info->push_p << 7) | (64 - info->first_fp_reg_save));
25599 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
25600 fprintf (file, "%d,", (32 - first_reg_to_save ()));
25602 if (optional_tbtab)
25604 /* Compute the parameter info from the function decl argument
25605 list. */
25606 tree decl;
25607 int next_parm_info_bit = 31;
25609 for (decl = DECL_ARGUMENTS (current_function_decl);
25610 decl; decl = DECL_CHAIN (decl))
25612 rtx parameter = DECL_INCOMING_RTL (decl);
25613 machine_mode mode = GET_MODE (parameter);
25615 if (GET_CODE (parameter) == REG)
25617 if (SCALAR_FLOAT_MODE_P (mode))
25619 int bits;
25621 float_parms++;
25623 switch (mode)
25625 case SFmode:
25626 case SDmode:
25627 bits = 0x2;
25628 break;
25630 case DFmode:
25631 case DDmode:
25632 case TFmode:
25633 case TDmode:
25634 bits = 0x3;
25635 break;
25637 default:
25638 gcc_unreachable ();
25641 /* If only one bit will fit, don't or in this entry. */
25642 if (next_parm_info_bit > 0)
25643 parm_info |= (bits << (next_parm_info_bit - 1));
25644 next_parm_info_bit -= 2;
25646 else
25648 fixed_parms += ((GET_MODE_SIZE (mode)
25649 + (UNITS_PER_WORD - 1))
25650 / UNITS_PER_WORD);
25651 next_parm_info_bit -= 1;
25657 /* Number of fixed point parameters. */
25658 /* This is actually the number of words of fixed point parameters; thus
25659 an 8 byte struct counts as 2; and thus the maximum value is 8. */
25660 fprintf (file, "%d,", fixed_parms);
25662 /* 2 bitfields: number of floating point parameters (7 bits), parameters
25663 all on stack. */
25664 /* This is actually the number of fp registers that hold parameters;
25665 and thus the maximum value is 13. */
25666 /* Set parameters on stack bit if parameters are not in their original
25667 registers, regardless of whether they are on the stack? Xlc
25668 seems to set the bit when not optimizing. */
25669 fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
25671 if (! optional_tbtab)
25672 return;
25674 /* Optional fields follow. Some are variable length. */
25676 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single float,
25677 11 double float. */
25678 /* There is an entry for each parameter in a register, in the order that
25679 they occur in the parameter list. Any intervening arguments on the
25680 stack are ignored. If the list overflows a long (max possible length
25681 34 bits) then completely leave off all elements that don't fit. */
25682 /* Only emit this long if there was at least one parameter. */
25683 if (fixed_parms || float_parms)
25684 fprintf (file, "\t.long %d\n", parm_info);
25686 /* Offset from start of code to tb table. */
25687 fputs ("\t.long ", file);
25688 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
25689 RS6000_OUTPUT_BASENAME (file, fname);
25690 putc ('-', file);
25691 rs6000_output_function_entry (file, fname);
25692 putc ('\n', file);
25694 /* Interrupt handler mask. */
25695 /* Omit this long, since we never set the interrupt handler bit
25696 above. */
25698 /* Number of CTL (controlled storage) anchors. */
25699 /* Omit this long, since the has_ctl bit is never set above. */
25701 /* Displacement into stack of each CTL anchor. */
25702 /* Omit this list of longs, because there are no CTL anchors. */
25704 /* Length of function name. */
25705 if (*fname == '*')
25706 ++fname;
25707 fprintf (file, "\t.short %d\n", (int) strlen (fname));
25709 /* Function name. */
25710 assemble_string (fname, strlen (fname));
25712 /* Register for alloca automatic storage; this is always reg 31.
25713 Only emit this if the alloca bit was set above. */
25714 if (frame_pointer_needed)
25715 fputs ("\t.byte 31\n", file);
25717 fputs ("\t.align 2\n", file);
25721 /* A C compound statement that outputs the assembler code for a thunk
25722 function, used to implement C++ virtual function calls with
25723 multiple inheritance. The thunk acts as a wrapper around a virtual
25724 function, adjusting the implicit object parameter before handing
25725 control off to the real function.
25727 First, emit code to add the integer DELTA to the location that
25728 contains the incoming first argument. Assume that this argument
25729 contains a pointer, and is the one used to pass the `this' pointer
25730 in C++. This is the incoming argument *before* the function
25731 prologue, e.g. `%o0' on a sparc. The addition must preserve the
25732 values of all other incoming arguments.
25734 After the addition, emit code to jump to FUNCTION, which is a
25735 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
25736 not touch the return address. Hence returning from FUNCTION will
25737 return to whoever called the current `thunk'.
25739 The effect must be as if FUNCTION had been called directly with the
25740 adjusted first argument. This macro is responsible for emitting
25741 all of the code for a thunk function; output_function_prologue()
25742 and output_function_epilogue() are not invoked.
25744 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
25745 been extracted from it.) It might possibly be useful on some
25746 targets, but probably not.
25748 If you do not define this macro, the target-independent code in the
25749 C++ frontend will generate a less efficient heavyweight thunk that
25750 calls FUNCTION instead of jumping to it. The generic approach does
25751 not support varargs. */
25753 static void
25754 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
25755 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
25756 tree function)
25758 rtx this_rtx, funexp;
25759 rtx_insn *insn;
25761 reload_completed = 1;
25762 epilogue_completed = 1;
25764 /* Mark the end of the (empty) prologue. */
25765 emit_note (NOTE_INSN_PROLOGUE_END);
25767 /* Find the "this" pointer. If the function returns a structure,
25768 the structure return pointer is in r3. */
25769 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
25770 this_rtx = gen_rtx_REG (Pmode, 4);
25771 else
25772 this_rtx = gen_rtx_REG (Pmode, 3);
25774 /* Apply the constant offset, if required. */
25775 if (delta)
25776 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
25778 /* Apply the offset from the vtable, if required. */
25779 if (vcall_offset)
25781 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
25782 rtx tmp = gen_rtx_REG (Pmode, 12);
25784 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
25785 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
25787 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
25788 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
25790 else
25792 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
25794 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
25796 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
25799 /* Generate a tail call to the target function. */
25800 if (!TREE_USED (function))
25802 assemble_external (function);
25803 TREE_USED (function) = 1;
25805 funexp = XEXP (DECL_RTL (function), 0);
25806 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
25808 #if TARGET_MACHO
25809 if (MACHOPIC_INDIRECT)
25810 funexp = machopic_indirect_call_target (funexp);
25811 #endif
25813 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
25814 generate sibcall RTL explicitly. */
25815 insn = emit_call_insn (
25816 gen_rtx_PARALLEL (VOIDmode,
25817 gen_rtvec (4,
25818 gen_rtx_CALL (VOIDmode,
25819 funexp, const0_rtx),
25820 gen_rtx_USE (VOIDmode, const0_rtx),
25821 gen_rtx_USE (VOIDmode,
25822 gen_rtx_REG (SImode,
25823 LR_REGNO)),
25824 simple_return_rtx)));
25825 SIBLING_CALL_P (insn) = 1;
25826 emit_barrier ();
25828 /* Ensure we have a global entry point for the thunk. ??? We could
25829 avoid that if the target routine doesn't need a global entry point,
25830 but we do not know whether this is the case at this point. */
25831 if (DEFAULT_ABI == ABI_ELFv2)
25832 cfun->machine->r2_setup_needed = true;
25834 /* Run just enough of rest_of_compilation to get the insns emitted.
25835 There's not really enough bulk here to make other passes such as
25836 instruction scheduling worth while. Note that use_thunk calls
25837 assemble_start_function and assemble_end_function. */
25838 insn = get_insns ();
25839 shorten_branches (insn);
25840 final_start_function (insn, file, 1);
25841 final (insn, file, 1);
25842 final_end_function ();
25844 reload_completed = 0;
25845 epilogue_completed = 0;
25848 /* A quick summary of the various types of 'constant-pool tables'
25849 under PowerPC:
25851 Target Flags Name One table per
25852 AIX (none) AIX TOC object file
25853 AIX -mfull-toc AIX TOC object file
25854 AIX -mminimal-toc AIX minimal TOC translation unit
25855 SVR4/EABI (none) SVR4 SDATA object file
25856 SVR4/EABI -fpic SVR4 pic object file
25857 SVR4/EABI -fPIC SVR4 PIC translation unit
25858 SVR4/EABI -mrelocatable EABI TOC function
25859 SVR4/EABI -maix AIX TOC object file
25860 SVR4/EABI -maix -mminimal-toc
25861 AIX minimal TOC translation unit
25863 Name Reg. Set by entries contains:
25864 made by addrs? fp? sum?
25866 AIX TOC 2 crt0 as Y option option
25867 AIX minimal TOC 30 prolog gcc Y Y option
25868 SVR4 SDATA 13 crt0 gcc N Y N
25869 SVR4 pic 30 prolog ld Y not yet N
25870 SVR4 PIC 30 prolog gcc Y option option
25871 EABI TOC 30 prolog gcc Y option option
25875 /* Hash functions for the hash table. */
25877 static unsigned
25878 rs6000_hash_constant (rtx k)
25880 enum rtx_code code = GET_CODE (k);
25881 machine_mode mode = GET_MODE (k);
25882 unsigned result = (code << 3) ^ mode;
25883 const char *format;
25884 int flen, fidx;
25886 format = GET_RTX_FORMAT (code);
25887 flen = strlen (format);
25888 fidx = 0;
25890 switch (code)
25892 case LABEL_REF:
25893 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
25895 case CONST_WIDE_INT:
25897 int i;
25898 flen = CONST_WIDE_INT_NUNITS (k);
25899 for (i = 0; i < flen; i++)
25900 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
25901 return result;
25904 case CONST_DOUBLE:
25905 if (mode != VOIDmode)
25906 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
25907 flen = 2;
25908 break;
25910 case CODE_LABEL:
25911 fidx = 3;
25912 break;
25914 default:
25915 break;
25918 for (; fidx < flen; fidx++)
25919 switch (format[fidx])
25921 case 's':
25923 unsigned i, len;
25924 const char *str = XSTR (k, fidx);
25925 len = strlen (str);
25926 result = result * 613 + len;
25927 for (i = 0; i < len; i++)
25928 result = result * 613 + (unsigned) str[i];
25929 break;
25931 case 'u':
25932 case 'e':
25933 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
25934 break;
25935 case 'i':
25936 case 'n':
25937 result = result * 613 + (unsigned) XINT (k, fidx);
25938 break;
25939 case 'w':
25940 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
25941 result = result * 613 + (unsigned) XWINT (k, fidx);
25942 else
25944 size_t i;
25945 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
25946 result = result * 613 + (unsigned) (XWINT (k, fidx)
25947 >> CHAR_BIT * i);
25949 break;
25950 case '0':
25951 break;
25952 default:
25953 gcc_unreachable ();
25956 return result;
25959 hashval_t
25960 toc_hasher::hash (toc_hash_struct *thc)
25962 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
25965 /* Compare H1 and H2 for equivalence. */
25967 bool
25968 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
25970 rtx r1 = h1->key;
25971 rtx r2 = h2->key;
25973 if (h1->key_mode != h2->key_mode)
25974 return 0;
25976 return rtx_equal_p (r1, r2);
25979 /* These are the names given by the C++ front-end to vtables, and
25980 vtable-like objects. Ideally, this logic should not be here;
25981 instead, there should be some programmatic way of inquiring as
25982 to whether or not an object is a vtable. */
25984 #define VTABLE_NAME_P(NAME) \
25985 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
25986 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
25987 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
25988 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
25989 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
25991 #ifdef NO_DOLLAR_IN_LABEL
25992 /* Return a GGC-allocated character string translating dollar signs in
25993 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
25995 const char *
25996 rs6000_xcoff_strip_dollar (const char *name)
25998 char *strip, *p;
25999 const char *q;
26000 size_t len;
26002 q = (const char *) strchr (name, '$');
26004 if (q == 0 || q == name)
26005 return name;
26007 len = strlen (name);
26008 strip = XALLOCAVEC (char, len + 1);
26009 strcpy (strip, name);
26010 p = strip + (q - name);
26011 while (p)
26013 *p = '_';
26014 p = strchr (p + 1, '$');
26017 return ggc_alloc_string (strip, len);
26019 #endif
26021 void
26022 rs6000_output_symbol_ref (FILE *file, rtx x)
26024 /* Currently C++ toc references to vtables can be emitted before it
26025 is decided whether the vtable is public or private. If this is
26026 the case, then the linker will eventually complain that there is
26027 a reference to an unknown section. Thus, for vtables only,
26028 we emit the TOC reference to reference the symbol and not the
26029 section. */
26030 const char *name = XSTR (x, 0);
26032 if (VTABLE_NAME_P (name))
26034 RS6000_OUTPUT_BASENAME (file, name);
26036 else
26037 assemble_name (file, name);
26040 /* Output a TOC entry. We derive the entry name from what is being
26041 written. */
26043 void
26044 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
26046 char buf[256];
26047 const char *name = buf;
26048 rtx base = x;
26049 HOST_WIDE_INT offset = 0;
26051 gcc_assert (!TARGET_NO_TOC);
26053 /* When the linker won't eliminate them, don't output duplicate
26054 TOC entries (this happens on AIX if there is any kind of TOC,
26055 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
26056 CODE_LABELs. */
26057 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
26059 struct toc_hash_struct *h;
26061 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
26062 time because GGC is not initialized at that point. */
26063 if (toc_hash_table == NULL)
26064 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
26066 h = ggc_alloc<toc_hash_struct> ();
26067 h->key = x;
26068 h->key_mode = mode;
26069 h->labelno = labelno;
26071 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
26072 if (*found == NULL)
26073 *found = h;
26074 else /* This is indeed a duplicate.
26075 Set this label equal to that label. */
26077 fputs ("\t.set ", file);
26078 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
26079 fprintf (file, "%d,", labelno);
26080 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
26081 fprintf (file, "%d\n", ((*found)->labelno));
26083 #ifdef HAVE_AS_TLS
26084 if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF
26085 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
26086 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
26088 fputs ("\t.set ", file);
26089 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
26090 fprintf (file, "%d,", labelno);
26091 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
26092 fprintf (file, "%d\n", ((*found)->labelno));
26094 #endif
26095 return;
26099 /* If we're going to put a double constant in the TOC, make sure it's
26100 aligned properly when strict alignment is on. */
26101 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
26102 && STRICT_ALIGNMENT
26103 && GET_MODE_BITSIZE (mode) >= 64
26104 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
26105 ASM_OUTPUT_ALIGN (file, 3);
26108 (*targetm.asm_out.internal_label) (file, "LC", labelno);
26110 /* Handle FP constants specially. Note that if we have a minimal
26111 TOC, things we put here aren't actually in the TOC, so we can allow
26112 FP constants. */
26113 if (GET_CODE (x) == CONST_DOUBLE &&
26114 (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode))
26116 REAL_VALUE_TYPE rv;
26117 long k[4];
26119 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
26120 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
26121 REAL_VALUE_TO_TARGET_DECIMAL128 (rv, k);
26122 else
26123 REAL_VALUE_TO_TARGET_LONG_DOUBLE (rv, k);
26125 if (TARGET_64BIT)
26127 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26128 fputs (DOUBLE_INT_ASM_OP, file);
26129 else
26130 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
26131 k[0] & 0xffffffff, k[1] & 0xffffffff,
26132 k[2] & 0xffffffff, k[3] & 0xffffffff);
26133 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
26134 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
26135 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
26136 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
26137 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
26138 return;
26140 else
26142 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26143 fputs ("\t.long ", file);
26144 else
26145 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
26146 k[0] & 0xffffffff, k[1] & 0xffffffff,
26147 k[2] & 0xffffffff, k[3] & 0xffffffff);
26148 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
26149 k[0] & 0xffffffff, k[1] & 0xffffffff,
26150 k[2] & 0xffffffff, k[3] & 0xffffffff);
26151 return;
26154 else if (GET_CODE (x) == CONST_DOUBLE &&
26155 (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
26157 REAL_VALUE_TYPE rv;
26158 long k[2];
26160 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
26162 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
26163 REAL_VALUE_TO_TARGET_DECIMAL64 (rv, k);
26164 else
26165 REAL_VALUE_TO_TARGET_DOUBLE (rv, k);
26167 if (TARGET_64BIT)
26169 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26170 fputs (DOUBLE_INT_ASM_OP, file);
26171 else
26172 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
26173 k[0] & 0xffffffff, k[1] & 0xffffffff);
26174 fprintf (file, "0x%lx%08lx\n",
26175 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
26176 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
26177 return;
26179 else
26181 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26182 fputs ("\t.long ", file);
26183 else
26184 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
26185 k[0] & 0xffffffff, k[1] & 0xffffffff);
26186 fprintf (file, "0x%lx,0x%lx\n",
26187 k[0] & 0xffffffff, k[1] & 0xffffffff);
26188 return;
26191 else if (GET_CODE (x) == CONST_DOUBLE &&
26192 (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
26194 REAL_VALUE_TYPE rv;
26195 long l;
26197 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
26198 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
26199 REAL_VALUE_TO_TARGET_DECIMAL32 (rv, l);
26200 else
26201 REAL_VALUE_TO_TARGET_SINGLE (rv, l);
26203 if (TARGET_64BIT)
26205 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26206 fputs (DOUBLE_INT_ASM_OP, file);
26207 else
26208 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
26209 if (WORDS_BIG_ENDIAN)
26210 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
26211 else
26212 fprintf (file, "0x%lx\n", l & 0xffffffff);
26213 return;
26215 else
26217 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26218 fputs ("\t.long ", file);
26219 else
26220 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
26221 fprintf (file, "0x%lx\n", l & 0xffffffff);
26222 return;
26225 else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT)
26227 unsigned HOST_WIDE_INT low;
26228 HOST_WIDE_INT high;
26230 low = INTVAL (x) & 0xffffffff;
26231 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
26233 /* TOC entries are always Pmode-sized, so when big-endian
26234 smaller integer constants in the TOC need to be padded.
26235 (This is still a win over putting the constants in
26236 a separate constant pool, because then we'd have
26237 to have both a TOC entry _and_ the actual constant.)
26239 For a 32-bit target, CONST_INT values are loaded and shifted
26240 entirely within `low' and can be stored in one TOC entry. */
26242 /* It would be easy to make this work, but it doesn't now. */
26243 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
26245 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
26247 low |= high << 32;
26248 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
26249 high = (HOST_WIDE_INT) low >> 32;
26250 low &= 0xffffffff;
26253 if (TARGET_64BIT)
26255 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26256 fputs (DOUBLE_INT_ASM_OP, file);
26257 else
26258 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
26259 (long) high & 0xffffffff, (long) low & 0xffffffff);
26260 fprintf (file, "0x%lx%08lx\n",
26261 (long) high & 0xffffffff, (long) low & 0xffffffff);
26262 return;
26264 else
26266 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
26268 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26269 fputs ("\t.long ", file);
26270 else
26271 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
26272 (long) high & 0xffffffff, (long) low & 0xffffffff);
26273 fprintf (file, "0x%lx,0x%lx\n",
26274 (long) high & 0xffffffff, (long) low & 0xffffffff);
26276 else
26278 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26279 fputs ("\t.long ", file);
26280 else
26281 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
26282 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
26284 return;
26288 if (GET_CODE (x) == CONST)
26290 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
26291 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
26293 base = XEXP (XEXP (x, 0), 0);
26294 offset = INTVAL (XEXP (XEXP (x, 0), 1));
26297 switch (GET_CODE (base))
26299 case SYMBOL_REF:
26300 name = XSTR (base, 0);
26301 break;
26303 case LABEL_REF:
26304 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
26305 CODE_LABEL_NUMBER (XEXP (base, 0)));
26306 break;
26308 case CODE_LABEL:
26309 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
26310 break;
26312 default:
26313 gcc_unreachable ();
26316 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26317 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
26318 else
26320 fputs ("\t.tc ", file);
26321 RS6000_OUTPUT_BASENAME (file, name);
26323 if (offset < 0)
26324 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
26325 else if (offset)
26326 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
26328 /* Mark large TOC symbols on AIX with [TE] so they are mapped
26329 after other TOC symbols, reducing overflow of small TOC access
26330 to [TC] symbols. */
26331 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
26332 ? "[TE]," : "[TC],", file);
26335 /* Currently C++ toc references to vtables can be emitted before it
26336 is decided whether the vtable is public or private. If this is
26337 the case, then the linker will eventually complain that there is
26338 a TOC reference to an unknown section. Thus, for vtables only,
26339 we emit the TOC reference to reference the symbol and not the
26340 section. */
26341 if (VTABLE_NAME_P (name))
26343 RS6000_OUTPUT_BASENAME (file, name);
26344 if (offset < 0)
26345 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
26346 else if (offset > 0)
26347 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
26349 else
26350 output_addr_const (file, x);
26352 #if HAVE_AS_TLS
26353 if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF
26354 && SYMBOL_REF_TLS_MODEL (base) != 0)
26356 if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_LOCAL_EXEC)
26357 fputs ("@le", file);
26358 else if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_INITIAL_EXEC)
26359 fputs ("@ie", file);
26360 /* Use global-dynamic for local-dynamic. */
26361 else if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_GLOBAL_DYNAMIC
26362 || SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_LOCAL_DYNAMIC)
26364 putc ('\n', file);
26365 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
26366 fputs ("\t.tc .", file);
26367 RS6000_OUTPUT_BASENAME (file, name);
26368 fputs ("[TC],", file);
26369 output_addr_const (file, x);
26370 fputs ("@m", file);
26373 #endif
26375 putc ('\n', file);
26378 /* Output an assembler pseudo-op to write an ASCII string of N characters
26379 starting at P to FILE.
26381 On the RS/6000, we have to do this using the .byte operation and
26382 write out special characters outside the quoted string.
26383 Also, the assembler is broken; very long strings are truncated,
26384 so we must artificially break them up early. */
26386 void
26387 output_ascii (FILE *file, const char *p, int n)
26389 char c;
26390 int i, count_string;
26391 const char *for_string = "\t.byte \"";
26392 const char *for_decimal = "\t.byte ";
26393 const char *to_close = NULL;
26395 count_string = 0;
26396 for (i = 0; i < n; i++)
26398 c = *p++;
26399 if (c >= ' ' && c < 0177)
26401 if (for_string)
26402 fputs (for_string, file);
26403 putc (c, file);
26405 /* Write two quotes to get one. */
26406 if (c == '"')
26408 putc (c, file);
26409 ++count_string;
26412 for_string = NULL;
26413 for_decimal = "\"\n\t.byte ";
26414 to_close = "\"\n";
26415 ++count_string;
26417 if (count_string >= 512)
26419 fputs (to_close, file);
26421 for_string = "\t.byte \"";
26422 for_decimal = "\t.byte ";
26423 to_close = NULL;
26424 count_string = 0;
26427 else
26429 if (for_decimal)
26430 fputs (for_decimal, file);
26431 fprintf (file, "%d", c);
26433 for_string = "\n\t.byte \"";
26434 for_decimal = ", ";
26435 to_close = "\n";
26436 count_string = 0;
26440 /* Now close the string if we have written one. Then end the line. */
26441 if (to_close)
26442 fputs (to_close, file);
26445 /* Generate a unique section name for FILENAME for a section type
26446 represented by SECTION_DESC. Output goes into BUF.
26448 SECTION_DESC can be any string, as long as it is different for each
26449 possible section type.
26451 We name the section in the same manner as xlc. The name begins with an
26452 underscore followed by the filename (after stripping any leading directory
26453 names) with the last period replaced by the string SECTION_DESC. If
26454 FILENAME does not contain a period, SECTION_DESC is appended to the end of
26455 the name. */
26457 void
26458 rs6000_gen_section_name (char **buf, const char *filename,
26459 const char *section_desc)
26461 const char *q, *after_last_slash, *last_period = 0;
26462 char *p;
26463 int len;
26465 after_last_slash = filename;
26466 for (q = filename; *q; q++)
26468 if (*q == '/')
26469 after_last_slash = q + 1;
26470 else if (*q == '.')
26471 last_period = q;
26474 len = strlen (after_last_slash) + strlen (section_desc) + 2;
26475 *buf = (char *) xmalloc (len);
26477 p = *buf;
26478 *p++ = '_';
26480 for (q = after_last_slash; *q; q++)
26482 if (q == last_period)
26484 strcpy (p, section_desc);
26485 p += strlen (section_desc);
26486 break;
26489 else if (ISALNUM (*q))
26490 *p++ = *q;
26493 if (last_period == 0)
26494 strcpy (p, section_desc);
26495 else
26496 *p = '\0';
26499 /* Emit profile function. */
26501 void
26502 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
26504 /* Non-standard profiling for kernels, which just saves LR then calls
26505 _mcount without worrying about arg saves. The idea is to change
26506 the function prologue as little as possible as it isn't easy to
26507 account for arg save/restore code added just for _mcount. */
26508 if (TARGET_PROFILE_KERNEL)
26509 return;
26511 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26513 #ifndef NO_PROFILE_COUNTERS
26514 # define NO_PROFILE_COUNTERS 0
26515 #endif
26516 if (NO_PROFILE_COUNTERS)
26517 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
26518 LCT_NORMAL, VOIDmode, 0);
26519 else
26521 char buf[30];
26522 const char *label_name;
26523 rtx fun;
26525 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
26526 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
26527 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
26529 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
26530 LCT_NORMAL, VOIDmode, 1, fun, Pmode);
26533 else if (DEFAULT_ABI == ABI_DARWIN)
26535 const char *mcount_name = RS6000_MCOUNT;
26536 int caller_addr_regno = LR_REGNO;
26538 /* Be conservative and always set this, at least for now. */
26539 crtl->uses_pic_offset_table = 1;
26541 #if TARGET_MACHO
26542 /* For PIC code, set up a stub and collect the caller's address
26543 from r0, which is where the prologue puts it. */
26544 if (MACHOPIC_INDIRECT
26545 && crtl->uses_pic_offset_table)
26546 caller_addr_regno = 0;
26547 #endif
26548 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
26549 LCT_NORMAL, VOIDmode, 1,
26550 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
26554 /* Write function profiler code. */
26556 void
26557 output_function_profiler (FILE *file, int labelno)
26559 char buf[100];
26561 switch (DEFAULT_ABI)
26563 default:
26564 gcc_unreachable ();
26566 case ABI_V4:
26567 if (!TARGET_32BIT)
26569 warning (0, "no profiling of 64-bit code for this ABI");
26570 return;
26572 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
26573 fprintf (file, "\tmflr %s\n", reg_names[0]);
26574 if (NO_PROFILE_COUNTERS)
26576 asm_fprintf (file, "\tstw %s,4(%s)\n",
26577 reg_names[0], reg_names[1]);
26579 else if (TARGET_SECURE_PLT && flag_pic)
26581 if (TARGET_LINK_STACK)
26583 char name[32];
26584 get_ppc476_thunk_name (name);
26585 asm_fprintf (file, "\tbl %s\n", name);
26587 else
26588 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
26589 asm_fprintf (file, "\tstw %s,4(%s)\n",
26590 reg_names[0], reg_names[1]);
26591 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
26592 asm_fprintf (file, "\taddis %s,%s,",
26593 reg_names[12], reg_names[12]);
26594 assemble_name (file, buf);
26595 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
26596 assemble_name (file, buf);
26597 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
26599 else if (flag_pic == 1)
26601 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
26602 asm_fprintf (file, "\tstw %s,4(%s)\n",
26603 reg_names[0], reg_names[1]);
26604 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
26605 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
26606 assemble_name (file, buf);
26607 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
26609 else if (flag_pic > 1)
26611 asm_fprintf (file, "\tstw %s,4(%s)\n",
26612 reg_names[0], reg_names[1]);
26613 /* Now, we need to get the address of the label. */
26614 if (TARGET_LINK_STACK)
26616 char name[32];
26617 get_ppc476_thunk_name (name);
26618 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
26619 assemble_name (file, buf);
26620 fputs ("-.\n1:", file);
26621 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
26622 asm_fprintf (file, "\taddi %s,%s,4\n",
26623 reg_names[11], reg_names[11]);
26625 else
26627 fputs ("\tbcl 20,31,1f\n\t.long ", file);
26628 assemble_name (file, buf);
26629 fputs ("-.\n1:", file);
26630 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
26632 asm_fprintf (file, "\tlwz %s,0(%s)\n",
26633 reg_names[0], reg_names[11]);
26634 asm_fprintf (file, "\tadd %s,%s,%s\n",
26635 reg_names[0], reg_names[0], reg_names[11]);
26637 else
26639 asm_fprintf (file, "\tlis %s,", reg_names[12]);
26640 assemble_name (file, buf);
26641 fputs ("@ha\n", file);
26642 asm_fprintf (file, "\tstw %s,4(%s)\n",
26643 reg_names[0], reg_names[1]);
26644 asm_fprintf (file, "\tla %s,", reg_names[0]);
26645 assemble_name (file, buf);
26646 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
26649 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
26650 fprintf (file, "\tbl %s%s\n",
26651 RS6000_MCOUNT, flag_pic ? "@plt" : "");
26652 break;
26654 case ABI_AIX:
26655 case ABI_ELFv2:
26656 case ABI_DARWIN:
26657 /* Don't do anything, done in output_profile_hook (). */
26658 break;
26664 /* The following variable value is the last issued insn. */
26666 static rtx last_scheduled_insn;
26668 /* The following variable helps to balance issuing of load and
26669 store instructions */
26671 static int load_store_pendulum;
26673 /* Power4 load update and store update instructions are cracked into a
26674 load or store and an integer insn which are executed in the same cycle.
26675 Branches have their own dispatch slot which does not count against the
26676 GCC issue rate, but it changes the program flow so there are no other
26677 instructions to issue in this cycle. */
26679 static int
26680 rs6000_variable_issue_1 (rtx_insn *insn, int more)
26682 last_scheduled_insn = insn;
26683 if (GET_CODE (PATTERN (insn)) == USE
26684 || GET_CODE (PATTERN (insn)) == CLOBBER)
26686 cached_can_issue_more = more;
26687 return cached_can_issue_more;
26690 if (insn_terminates_group_p (insn, current_group))
26692 cached_can_issue_more = 0;
26693 return cached_can_issue_more;
26696 /* If no reservation, but reach here */
26697 if (recog_memoized (insn) < 0)
26698 return more;
26700 if (rs6000_sched_groups)
26702 if (is_microcoded_insn (insn))
26703 cached_can_issue_more = 0;
26704 else if (is_cracked_insn (insn))
26705 cached_can_issue_more = more > 2 ? more - 2 : 0;
26706 else
26707 cached_can_issue_more = more - 1;
26709 return cached_can_issue_more;
26712 if (rs6000_cpu_attr == CPU_CELL && is_nonpipeline_insn (insn))
26713 return 0;
26715 cached_can_issue_more = more - 1;
26716 return cached_can_issue_more;
26719 static int
26720 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
26722 int r = rs6000_variable_issue_1 (insn, more);
26723 if (verbose)
26724 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
26725 return r;
26728 /* Adjust the cost of a scheduling dependency. Return the new cost of
26729 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
26731 static int
26732 rs6000_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
26734 enum attr_type attr_type;
26736 if (! recog_memoized (insn))
26737 return 0;
26739 switch (REG_NOTE_KIND (link))
26741 case REG_DEP_TRUE:
26743 /* Data dependency; DEP_INSN writes a register that INSN reads
26744 some cycles later. */
26746 /* Separate a load from a narrower, dependent store. */
26747 if (rs6000_sched_groups
26748 && GET_CODE (PATTERN (insn)) == SET
26749 && GET_CODE (PATTERN (dep_insn)) == SET
26750 && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
26751 && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
26752 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
26753 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
26754 return cost + 14;
26756 attr_type = get_attr_type (insn);
26758 switch (attr_type)
26760 case TYPE_JMPREG:
26761 /* Tell the first scheduling pass about the latency between
26762 a mtctr and bctr (and mtlr and br/blr). The first
26763 scheduling pass will not know about this latency since
26764 the mtctr instruction, which has the latency associated
26765 to it, will be generated by reload. */
26766 return 4;
26767 case TYPE_BRANCH:
26768 /* Leave some extra cycles between a compare and its
26769 dependent branch, to inhibit expensive mispredicts. */
26770 if ((rs6000_cpu_attr == CPU_PPC603
26771 || rs6000_cpu_attr == CPU_PPC604
26772 || rs6000_cpu_attr == CPU_PPC604E
26773 || rs6000_cpu_attr == CPU_PPC620
26774 || rs6000_cpu_attr == CPU_PPC630
26775 || rs6000_cpu_attr == CPU_PPC750
26776 || rs6000_cpu_attr == CPU_PPC7400
26777 || rs6000_cpu_attr == CPU_PPC7450
26778 || rs6000_cpu_attr == CPU_PPCE5500
26779 || rs6000_cpu_attr == CPU_PPCE6500
26780 || rs6000_cpu_attr == CPU_POWER4
26781 || rs6000_cpu_attr == CPU_POWER5
26782 || rs6000_cpu_attr == CPU_POWER7
26783 || rs6000_cpu_attr == CPU_POWER8
26784 || rs6000_cpu_attr == CPU_CELL)
26785 && recog_memoized (dep_insn)
26786 && (INSN_CODE (dep_insn) >= 0))
26788 switch (get_attr_type (dep_insn))
26790 case TYPE_CMP:
26791 case TYPE_FPCOMPARE:
26792 case TYPE_CR_LOGICAL:
26793 case TYPE_DELAYED_CR:
26794 return cost + 2;
26795 case TYPE_EXTS:
26796 case TYPE_MUL:
26797 if (get_attr_dot (dep_insn) == DOT_YES)
26798 return cost + 2;
26799 else
26800 break;
26801 case TYPE_SHIFT:
26802 if (get_attr_dot (dep_insn) == DOT_YES
26803 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
26804 return cost + 2;
26805 else
26806 break;
26807 default:
26808 break;
26810 break;
26812 case TYPE_STORE:
26813 case TYPE_FPSTORE:
26814 if ((rs6000_cpu == PROCESSOR_POWER6)
26815 && recog_memoized (dep_insn)
26816 && (INSN_CODE (dep_insn) >= 0))
26819 if (GET_CODE (PATTERN (insn)) != SET)
26820 /* If this happens, we have to extend this to schedule
26821 optimally. Return default for now. */
26822 return cost;
26824 /* Adjust the cost for the case where the value written
26825 by a fixed point operation is used as the address
26826 gen value on a store. */
26827 switch (get_attr_type (dep_insn))
26829 case TYPE_LOAD:
26830 case TYPE_CNTLZ:
26832 if (! store_data_bypass_p (dep_insn, insn))
26833 return get_attr_sign_extend (dep_insn)
26834 == SIGN_EXTEND_YES ? 6 : 4;
26835 break;
26837 case TYPE_SHIFT:
26839 if (! store_data_bypass_p (dep_insn, insn))
26840 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
26841 6 : 3;
26842 break;
26844 case TYPE_INTEGER:
26845 case TYPE_ADD:
26846 case TYPE_LOGICAL:
26847 case TYPE_EXTS:
26848 case TYPE_INSERT:
26850 if (! store_data_bypass_p (dep_insn, insn))
26851 return 3;
26852 break;
26854 case TYPE_STORE:
26855 case TYPE_FPLOAD:
26856 case TYPE_FPSTORE:
26858 if (get_attr_update (dep_insn) == UPDATE_YES
26859 && ! store_data_bypass_p (dep_insn, insn))
26860 return 3;
26861 break;
26863 case TYPE_MUL:
26865 if (! store_data_bypass_p (dep_insn, insn))
26866 return 17;
26867 break;
26869 case TYPE_DIV:
26871 if (! store_data_bypass_p (dep_insn, insn))
26872 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
26873 break;
26875 default:
26876 break;
26879 break;
26881 case TYPE_LOAD:
26882 if ((rs6000_cpu == PROCESSOR_POWER6)
26883 && recog_memoized (dep_insn)
26884 && (INSN_CODE (dep_insn) >= 0))
26887 /* Adjust the cost for the case where the value written
26888 by a fixed point instruction is used within the address
26889 gen portion of a subsequent load(u)(x) */
26890 switch (get_attr_type (dep_insn))
26892 case TYPE_LOAD:
26893 case TYPE_CNTLZ:
26895 if (set_to_load_agen (dep_insn, insn))
26896 return get_attr_sign_extend (dep_insn)
26897 == SIGN_EXTEND_YES ? 6 : 4;
26898 break;
26900 case TYPE_SHIFT:
26902 if (set_to_load_agen (dep_insn, insn))
26903 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
26904 6 : 3;
26905 break;
26907 case TYPE_INTEGER:
26908 case TYPE_ADD:
26909 case TYPE_LOGICAL:
26910 case TYPE_EXTS:
26911 case TYPE_INSERT:
26913 if (set_to_load_agen (dep_insn, insn))
26914 return 3;
26915 break;
26917 case TYPE_STORE:
26918 case TYPE_FPLOAD:
26919 case TYPE_FPSTORE:
26921 if (get_attr_update (dep_insn) == UPDATE_YES
26922 && set_to_load_agen (dep_insn, insn))
26923 return 3;
26924 break;
26926 case TYPE_MUL:
26928 if (set_to_load_agen (dep_insn, insn))
26929 return 17;
26930 break;
26932 case TYPE_DIV:
26934 if (set_to_load_agen (dep_insn, insn))
26935 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
26936 break;
26938 default:
26939 break;
26942 break;
26944 case TYPE_FPLOAD:
26945 if ((rs6000_cpu == PROCESSOR_POWER6)
26946 && get_attr_update (insn) == UPDATE_NO
26947 && recog_memoized (dep_insn)
26948 && (INSN_CODE (dep_insn) >= 0)
26949 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
26950 return 2;
26952 default:
26953 break;
26956 /* Fall out to return default cost. */
26958 break;
26960 case REG_DEP_OUTPUT:
26961 /* Output dependency; DEP_INSN writes a register that INSN writes some
26962 cycles later. */
26963 if ((rs6000_cpu == PROCESSOR_POWER6)
26964 && recog_memoized (dep_insn)
26965 && (INSN_CODE (dep_insn) >= 0))
26967 attr_type = get_attr_type (insn);
26969 switch (attr_type)
26971 case TYPE_FP:
26972 if (get_attr_type (dep_insn) == TYPE_FP)
26973 return 1;
26974 break;
26975 case TYPE_FPLOAD:
26976 if (get_attr_update (insn) == UPDATE_NO
26977 && get_attr_type (dep_insn) == TYPE_MFFGPR)
26978 return 2;
26979 break;
26980 default:
26981 break;
26984 case REG_DEP_ANTI:
26985 /* Anti dependency; DEP_INSN reads a register that INSN writes some
26986 cycles later. */
26987 return 0;
26989 default:
26990 gcc_unreachable ();
26993 return cost;
26996 /* Debug version of rs6000_adjust_cost. */
26998 static int
26999 rs6000_debug_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn,
27000 int cost)
27002 int ret = rs6000_adjust_cost (insn, link, dep_insn, cost);
27004 if (ret != cost)
27006 const char *dep;
27008 switch (REG_NOTE_KIND (link))
27010 default: dep = "unknown depencency"; break;
27011 case REG_DEP_TRUE: dep = "data dependency"; break;
27012 case REG_DEP_OUTPUT: dep = "output dependency"; break;
27013 case REG_DEP_ANTI: dep = "anti depencency"; break;
27016 fprintf (stderr,
27017 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
27018 "%s, insn:\n", ret, cost, dep);
27020 debug_rtx (insn);
27023 return ret;
27026 /* The function returns a true if INSN is microcoded.
27027 Return false otherwise. */
27029 static bool
27030 is_microcoded_insn (rtx_insn *insn)
27032 if (!insn || !NONDEBUG_INSN_P (insn)
27033 || GET_CODE (PATTERN (insn)) == USE
27034 || GET_CODE (PATTERN (insn)) == CLOBBER)
27035 return false;
27037 if (rs6000_cpu_attr == CPU_CELL)
27038 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
27040 if (rs6000_sched_groups
27041 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
27043 enum attr_type type = get_attr_type (insn);
27044 if ((type == TYPE_LOAD
27045 && get_attr_update (insn) == UPDATE_YES
27046 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
27047 || ((type == TYPE_LOAD || type == TYPE_STORE)
27048 && get_attr_update (insn) == UPDATE_YES
27049 && get_attr_indexed (insn) == INDEXED_YES)
27050 || type == TYPE_MFCR)
27051 return true;
27054 return false;
27057 /* The function returns true if INSN is cracked into 2 instructions
27058 by the processor (and therefore occupies 2 issue slots). */
27060 static bool
27061 is_cracked_insn (rtx_insn *insn)
27063 if (!insn || !NONDEBUG_INSN_P (insn)
27064 || GET_CODE (PATTERN (insn)) == USE
27065 || GET_CODE (PATTERN (insn)) == CLOBBER)
27066 return false;
27068 if (rs6000_sched_groups
27069 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
27071 enum attr_type type = get_attr_type (insn);
27072 if ((type == TYPE_LOAD
27073 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
27074 && get_attr_update (insn) == UPDATE_NO)
27075 || (type == TYPE_LOAD
27076 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
27077 && get_attr_update (insn) == UPDATE_YES
27078 && get_attr_indexed (insn) == INDEXED_NO)
27079 || (type == TYPE_STORE
27080 && get_attr_update (insn) == UPDATE_YES
27081 && get_attr_indexed (insn) == INDEXED_NO)
27082 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
27083 && get_attr_update (insn) == UPDATE_YES)
27084 || type == TYPE_DELAYED_CR
27085 || (type == TYPE_EXTS
27086 && get_attr_dot (insn) == DOT_YES)
27087 || (type == TYPE_SHIFT
27088 && get_attr_dot (insn) == DOT_YES
27089 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
27090 || (type == TYPE_MUL
27091 && get_attr_dot (insn) == DOT_YES)
27092 || type == TYPE_DIV
27093 || (type == TYPE_INSERT
27094 && get_attr_size (insn) == SIZE_32))
27095 return true;
27098 return false;
27101 /* The function returns true if INSN can be issued only from
27102 the branch slot. */
27104 static bool
27105 is_branch_slot_insn (rtx_insn *insn)
27107 if (!insn || !NONDEBUG_INSN_P (insn)
27108 || GET_CODE (PATTERN (insn)) == USE
27109 || GET_CODE (PATTERN (insn)) == CLOBBER)
27110 return false;
27112 if (rs6000_sched_groups)
27114 enum attr_type type = get_attr_type (insn);
27115 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
27116 return true;
27117 return false;
27120 return false;
27123 /* The function returns true if out_inst sets a value that is
27124 used in the address generation computation of in_insn */
27125 static bool
27126 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
27128 rtx out_set, in_set;
27130 /* For performance reasons, only handle the simple case where
27131 both loads are a single_set. */
27132 out_set = single_set (out_insn);
27133 if (out_set)
27135 in_set = single_set (in_insn);
27136 if (in_set)
27137 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
27140 return false;
27143 /* Try to determine base/offset/size parts of the given MEM.
27144 Return true if successful, false if all the values couldn't
27145 be determined.
27147 This function only looks for REG or REG+CONST address forms.
27148 REG+REG address form will return false. */
27150 static bool
27151 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
27152 HOST_WIDE_INT *size)
27154 rtx addr_rtx;
27155 if MEM_SIZE_KNOWN_P (mem)
27156 *size = MEM_SIZE (mem);
27157 else
27158 return false;
27160 if (GET_CODE (XEXP (mem, 0)) == PRE_MODIFY)
27161 addr_rtx = XEXP (XEXP (mem, 0), 1);
27162 else
27163 addr_rtx = (XEXP (mem, 0));
27165 if (GET_CODE (addr_rtx) == REG)
27167 *base = addr_rtx;
27168 *offset = 0;
27170 else if (GET_CODE (addr_rtx) == PLUS
27171 && CONST_INT_P (XEXP (addr_rtx, 1)))
27173 *base = XEXP (addr_rtx, 0);
27174 *offset = INTVAL (XEXP (addr_rtx, 1));
27176 else
27177 return false;
27179 return true;
27182 /* The function returns true if the target storage location of
27183 mem1 is adjacent to the target storage location of mem2 */
27184 /* Return 1 if memory locations are adjacent. */
27186 static bool
27187 adjacent_mem_locations (rtx mem1, rtx mem2)
27189 rtx reg1, reg2;
27190 HOST_WIDE_INT off1, size1, off2, size2;
27192 if (get_memref_parts (mem1, &reg1, &off1, &size1)
27193 && get_memref_parts (mem2, &reg2, &off2, &size2))
27194 return ((REGNO (reg1) == REGNO (reg2))
27195 && ((off1 + size1 == off2)
27196 || (off2 + size2 == off1)));
27198 return false;
27201 /* This function returns true if it can be determined that the two MEM
27202 locations overlap by at least 1 byte based on base reg/offset/size. */
27204 static bool
27205 mem_locations_overlap (rtx mem1, rtx mem2)
27207 rtx reg1, reg2;
27208 HOST_WIDE_INT off1, size1, off2, size2;
27210 if (get_memref_parts (mem1, &reg1, &off1, &size1)
27211 && get_memref_parts (mem2, &reg2, &off2, &size2))
27212 return ((REGNO (reg1) == REGNO (reg2))
27213 && (((off1 <= off2) && (off1 + size1 > off2))
27214 || ((off2 <= off1) && (off2 + size2 > off1))));
27216 return false;
27219 /* A C statement (sans semicolon) to update the integer scheduling
27220 priority INSN_PRIORITY (INSN). Increase the priority to execute the
27221 INSN earlier, reduce the priority to execute INSN later. Do not
27222 define this macro if you do not need to adjust the scheduling
27223 priorities of insns. */
27225 static int
27226 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
27228 rtx load_mem, str_mem;
27229 /* On machines (like the 750) which have asymmetric integer units,
27230 where one integer unit can do multiply and divides and the other
27231 can't, reduce the priority of multiply/divide so it is scheduled
27232 before other integer operations. */
27234 #if 0
27235 if (! INSN_P (insn))
27236 return priority;
27238 if (GET_CODE (PATTERN (insn)) == USE)
27239 return priority;
27241 switch (rs6000_cpu_attr) {
27242 case CPU_PPC750:
27243 switch (get_attr_type (insn))
27245 default:
27246 break;
27248 case TYPE_MUL:
27249 case TYPE_DIV:
27250 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
27251 priority, priority);
27252 if (priority >= 0 && priority < 0x01000000)
27253 priority >>= 3;
27254 break;
27257 #endif
27259 if (insn_must_be_first_in_group (insn)
27260 && reload_completed
27261 && current_sched_info->sched_max_insns_priority
27262 && rs6000_sched_restricted_insns_priority)
27265 /* Prioritize insns that can be dispatched only in the first
27266 dispatch slot. */
27267 if (rs6000_sched_restricted_insns_priority == 1)
27268 /* Attach highest priority to insn. This means that in
27269 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
27270 precede 'priority' (critical path) considerations. */
27271 return current_sched_info->sched_max_insns_priority;
27272 else if (rs6000_sched_restricted_insns_priority == 2)
27273 /* Increase priority of insn by a minimal amount. This means that in
27274 haifa-sched.c:ready_sort(), only 'priority' (critical path)
27275 considerations precede dispatch-slot restriction considerations. */
27276 return (priority + 1);
27279 if (rs6000_cpu == PROCESSOR_POWER6
27280 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
27281 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
27282 /* Attach highest priority to insn if the scheduler has just issued two
27283 stores and this instruction is a load, or two loads and this instruction
27284 is a store. Power6 wants loads and stores scheduled alternately
27285 when possible */
27286 return current_sched_info->sched_max_insns_priority;
27288 return priority;
27291 /* Return true if the instruction is nonpipelined on the Cell. */
27292 static bool
27293 is_nonpipeline_insn (rtx_insn *insn)
27295 enum attr_type type;
27296 if (!insn || !NONDEBUG_INSN_P (insn)
27297 || GET_CODE (PATTERN (insn)) == USE
27298 || GET_CODE (PATTERN (insn)) == CLOBBER)
27299 return false;
27301 type = get_attr_type (insn);
27302 if (type == TYPE_MUL
27303 || type == TYPE_DIV
27304 || type == TYPE_SDIV
27305 || type == TYPE_DDIV
27306 || type == TYPE_SSQRT
27307 || type == TYPE_DSQRT
27308 || type == TYPE_MFCR
27309 || type == TYPE_MFCRF
27310 || type == TYPE_MFJMPR)
27312 return true;
27314 return false;
27318 /* Return how many instructions the machine can issue per cycle. */
27320 static int
27321 rs6000_issue_rate (void)
27323 /* Unless scheduling for register pressure, use issue rate of 1 for
27324 first scheduling pass to decrease degradation. */
27325 if (!reload_completed && !flag_sched_pressure)
27326 return 1;
27328 switch (rs6000_cpu_attr) {
27329 case CPU_RS64A:
27330 case CPU_PPC601: /* ? */
27331 case CPU_PPC7450:
27332 return 3;
27333 case CPU_PPC440:
27334 case CPU_PPC603:
27335 case CPU_PPC750:
27336 case CPU_PPC7400:
27337 case CPU_PPC8540:
27338 case CPU_PPC8548:
27339 case CPU_CELL:
27340 case CPU_PPCE300C2:
27341 case CPU_PPCE300C3:
27342 case CPU_PPCE500MC:
27343 case CPU_PPCE500MC64:
27344 case CPU_PPCE5500:
27345 case CPU_PPCE6500:
27346 case CPU_TITAN:
27347 return 2;
27348 case CPU_PPC476:
27349 case CPU_PPC604:
27350 case CPU_PPC604E:
27351 case CPU_PPC620:
27352 case CPU_PPC630:
27353 return 4;
27354 case CPU_POWER4:
27355 case CPU_POWER5:
27356 case CPU_POWER6:
27357 case CPU_POWER7:
27358 return 5;
27359 case CPU_POWER8:
27360 return 7;
27361 default:
27362 return 1;
27366 /* Return how many instructions to look ahead for better insn
27367 scheduling. */
27369 static int
27370 rs6000_use_sched_lookahead (void)
27372 switch (rs6000_cpu_attr)
27374 case CPU_PPC8540:
27375 case CPU_PPC8548:
27376 return 4;
27378 case CPU_CELL:
27379 return (reload_completed ? 8 : 0);
27381 default:
27382 return 0;
27386 /* We are choosing insn from the ready queue. Return zero if INSN can be
27387 chosen. */
27388 static int
27389 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
27391 if (ready_index == 0)
27392 return 0;
27394 if (rs6000_cpu_attr != CPU_CELL)
27395 return 0;
27397 gcc_assert (insn != NULL_RTX && INSN_P (insn));
27399 if (!reload_completed
27400 || is_nonpipeline_insn (insn)
27401 || is_microcoded_insn (insn))
27402 return 1;
27404 return 0;
27407 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
27408 and return true. */
27410 static bool
27411 find_mem_ref (rtx pat, rtx *mem_ref)
27413 const char * fmt;
27414 int i, j;
27416 /* stack_tie does not produce any real memory traffic. */
27417 if (tie_operand (pat, VOIDmode))
27418 return false;
27420 if (GET_CODE (pat) == MEM)
27422 *mem_ref = pat;
27423 return true;
27426 /* Recursively process the pattern. */
27427 fmt = GET_RTX_FORMAT (GET_CODE (pat));
27429 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
27431 if (fmt[i] == 'e')
27433 if (find_mem_ref (XEXP (pat, i), mem_ref))
27434 return true;
27436 else if (fmt[i] == 'E')
27437 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
27439 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
27440 return true;
27444 return false;
27447 /* Determine if PAT is a PATTERN of a load insn. */
27449 static bool
27450 is_load_insn1 (rtx pat, rtx *load_mem)
27452 if (!pat || pat == NULL_RTX)
27453 return false;
27455 if (GET_CODE (pat) == SET)
27456 return find_mem_ref (SET_SRC (pat), load_mem);
27458 if (GET_CODE (pat) == PARALLEL)
27460 int i;
27462 for (i = 0; i < XVECLEN (pat, 0); i++)
27463 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
27464 return true;
27467 return false;
27470 /* Determine if INSN loads from memory. */
27472 static bool
27473 is_load_insn (rtx insn, rtx *load_mem)
27475 if (!insn || !INSN_P (insn))
27476 return false;
27478 if (CALL_P (insn))
27479 return false;
27481 return is_load_insn1 (PATTERN (insn), load_mem);
27484 /* Determine if PAT is a PATTERN of a store insn. */
27486 static bool
27487 is_store_insn1 (rtx pat, rtx *str_mem)
27489 if (!pat || pat == NULL_RTX)
27490 return false;
27492 if (GET_CODE (pat) == SET)
27493 return find_mem_ref (SET_DEST (pat), str_mem);
27495 if (GET_CODE (pat) == PARALLEL)
27497 int i;
27499 for (i = 0; i < XVECLEN (pat, 0); i++)
27500 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
27501 return true;
27504 return false;
27507 /* Determine if INSN stores to memory. */
27509 static bool
27510 is_store_insn (rtx insn, rtx *str_mem)
27512 if (!insn || !INSN_P (insn))
27513 return false;
27515 return is_store_insn1 (PATTERN (insn), str_mem);
27518 /* Returns whether the dependence between INSN and NEXT is considered
27519 costly by the given target. */
27521 static bool
27522 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
27524 rtx insn;
27525 rtx next;
27526 rtx load_mem, str_mem;
27528 /* If the flag is not enabled - no dependence is considered costly;
27529 allow all dependent insns in the same group.
27530 This is the most aggressive option. */
27531 if (rs6000_sched_costly_dep == no_dep_costly)
27532 return false;
27534 /* If the flag is set to 1 - a dependence is always considered costly;
27535 do not allow dependent instructions in the same group.
27536 This is the most conservative option. */
27537 if (rs6000_sched_costly_dep == all_deps_costly)
27538 return true;
27540 insn = DEP_PRO (dep);
27541 next = DEP_CON (dep);
27543 if (rs6000_sched_costly_dep == store_to_load_dep_costly
27544 && is_load_insn (next, &load_mem)
27545 && is_store_insn (insn, &str_mem))
27546 /* Prevent load after store in the same group. */
27547 return true;
27549 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
27550 && is_load_insn (next, &load_mem)
27551 && is_store_insn (insn, &str_mem)
27552 && DEP_TYPE (dep) == REG_DEP_TRUE
27553 && mem_locations_overlap(str_mem, load_mem))
27554 /* Prevent load after store in the same group if it is a true
27555 dependence. */
27556 return true;
27558 /* The flag is set to X; dependences with latency >= X are considered costly,
27559 and will not be scheduled in the same group. */
27560 if (rs6000_sched_costly_dep <= max_dep_latency
27561 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
27562 return true;
27564 return false;
27567 /* Return the next insn after INSN that is found before TAIL is reached,
27568 skipping any "non-active" insns - insns that will not actually occupy
27569 an issue slot. Return NULL_RTX if such an insn is not found. */
27571 static rtx_insn *
27572 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
27574 if (insn == NULL_RTX || insn == tail)
27575 return NULL;
27577 while (1)
27579 insn = NEXT_INSN (insn);
27580 if (insn == NULL_RTX || insn == tail)
27581 return NULL;
27583 if (CALL_P (insn)
27584 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
27585 || (NONJUMP_INSN_P (insn)
27586 && GET_CODE (PATTERN (insn)) != USE
27587 && GET_CODE (PATTERN (insn)) != CLOBBER
27588 && INSN_CODE (insn) != CODE_FOR_stack_tie))
27589 break;
27591 return insn;
27594 /* We are about to begin issuing insns for this clock cycle. */
27596 static int
27597 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
27598 rtx_insn **ready ATTRIBUTE_UNUSED,
27599 int *pn_ready ATTRIBUTE_UNUSED,
27600 int clock_var ATTRIBUTE_UNUSED)
27602 int n_ready = *pn_ready;
27604 if (sched_verbose)
27605 fprintf (dump, "// rs6000_sched_reorder :\n");
27607 /* Reorder the ready list, if the second to last ready insn
27608 is a nonepipeline insn. */
27609 if (rs6000_cpu_attr == CPU_CELL && n_ready > 1)
27611 if (is_nonpipeline_insn (ready[n_ready - 1])
27612 && (recog_memoized (ready[n_ready - 2]) > 0))
27613 /* Simply swap first two insns. */
27614 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
27617 if (rs6000_cpu == PROCESSOR_POWER6)
27618 load_store_pendulum = 0;
27620 return rs6000_issue_rate ();
27623 /* Like rs6000_sched_reorder, but called after issuing each insn. */
27625 static int
27626 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
27627 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
27629 if (sched_verbose)
27630 fprintf (dump, "// rs6000_sched_reorder2 :\n");
27632 /* For Power6, we need to handle some special cases to try and keep the
27633 store queue from overflowing and triggering expensive flushes.
27635 This code monitors how load and store instructions are being issued
27636 and skews the ready list one way or the other to increase the likelihood
27637 that a desired instruction is issued at the proper time.
27639 A couple of things are done. First, we maintain a "load_store_pendulum"
27640 to track the current state of load/store issue.
27642 - If the pendulum is at zero, then no loads or stores have been
27643 issued in the current cycle so we do nothing.
27645 - If the pendulum is 1, then a single load has been issued in this
27646 cycle and we attempt to locate another load in the ready list to
27647 issue with it.
27649 - If the pendulum is -2, then two stores have already been
27650 issued in this cycle, so we increase the priority of the first load
27651 in the ready list to increase it's likelihood of being chosen first
27652 in the next cycle.
27654 - If the pendulum is -1, then a single store has been issued in this
27655 cycle and we attempt to locate another store in the ready list to
27656 issue with it, preferring a store to an adjacent memory location to
27657 facilitate store pairing in the store queue.
27659 - If the pendulum is 2, then two loads have already been
27660 issued in this cycle, so we increase the priority of the first store
27661 in the ready list to increase it's likelihood of being chosen first
27662 in the next cycle.
27664 - If the pendulum < -2 or > 2, then do nothing.
27666 Note: This code covers the most common scenarios. There exist non
27667 load/store instructions which make use of the LSU and which
27668 would need to be accounted for to strictly model the behavior
27669 of the machine. Those instructions are currently unaccounted
27670 for to help minimize compile time overhead of this code.
27672 if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn)
27674 int pos;
27675 int i;
27676 rtx_insn *tmp;
27677 rtx load_mem, str_mem;
27679 if (is_store_insn (last_scheduled_insn, &str_mem))
27680 /* Issuing a store, swing the load_store_pendulum to the left */
27681 load_store_pendulum--;
27682 else if (is_load_insn (last_scheduled_insn, &load_mem))
27683 /* Issuing a load, swing the load_store_pendulum to the right */
27684 load_store_pendulum++;
27685 else
27686 return cached_can_issue_more;
27688 /* If the pendulum is balanced, or there is only one instruction on
27689 the ready list, then all is well, so return. */
27690 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
27691 return cached_can_issue_more;
27693 if (load_store_pendulum == 1)
27695 /* A load has been issued in this cycle. Scan the ready list
27696 for another load to issue with it */
27697 pos = *pn_ready-1;
27699 while (pos >= 0)
27701 if (is_load_insn (ready[pos], &load_mem))
27703 /* Found a load. Move it to the head of the ready list,
27704 and adjust it's priority so that it is more likely to
27705 stay there */
27706 tmp = ready[pos];
27707 for (i=pos; i<*pn_ready-1; i++)
27708 ready[i] = ready[i + 1];
27709 ready[*pn_ready-1] = tmp;
27711 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
27712 INSN_PRIORITY (tmp)++;
27713 break;
27715 pos--;
27718 else if (load_store_pendulum == -2)
27720 /* Two stores have been issued in this cycle. Increase the
27721 priority of the first load in the ready list to favor it for
27722 issuing in the next cycle. */
27723 pos = *pn_ready-1;
27725 while (pos >= 0)
27727 if (is_load_insn (ready[pos], &load_mem)
27728 && !sel_sched_p ()
27729 && INSN_PRIORITY_KNOWN (ready[pos]))
27731 INSN_PRIORITY (ready[pos])++;
27733 /* Adjust the pendulum to account for the fact that a load
27734 was found and increased in priority. This is to prevent
27735 increasing the priority of multiple loads */
27736 load_store_pendulum--;
27738 break;
27740 pos--;
27743 else if (load_store_pendulum == -1)
27745 /* A store has been issued in this cycle. Scan the ready list for
27746 another store to issue with it, preferring a store to an adjacent
27747 memory location */
27748 int first_store_pos = -1;
27750 pos = *pn_ready-1;
27752 while (pos >= 0)
27754 if (is_store_insn (ready[pos], &str_mem))
27756 rtx str_mem2;
27757 /* Maintain the index of the first store found on the
27758 list */
27759 if (first_store_pos == -1)
27760 first_store_pos = pos;
27762 if (is_store_insn (last_scheduled_insn, &str_mem2)
27763 && adjacent_mem_locations (str_mem, str_mem2))
27765 /* Found an adjacent store. Move it to the head of the
27766 ready list, and adjust it's priority so that it is
27767 more likely to stay there */
27768 tmp = ready[pos];
27769 for (i=pos; i<*pn_ready-1; i++)
27770 ready[i] = ready[i + 1];
27771 ready[*pn_ready-1] = tmp;
27773 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
27774 INSN_PRIORITY (tmp)++;
27776 first_store_pos = -1;
27778 break;
27781 pos--;
27784 if (first_store_pos >= 0)
27786 /* An adjacent store wasn't found, but a non-adjacent store was,
27787 so move the non-adjacent store to the front of the ready
27788 list, and adjust its priority so that it is more likely to
27789 stay there. */
27790 tmp = ready[first_store_pos];
27791 for (i=first_store_pos; i<*pn_ready-1; i++)
27792 ready[i] = ready[i + 1];
27793 ready[*pn_ready-1] = tmp;
27794 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
27795 INSN_PRIORITY (tmp)++;
27798 else if (load_store_pendulum == 2)
27800 /* Two loads have been issued in this cycle. Increase the priority
27801 of the first store in the ready list to favor it for issuing in
27802 the next cycle. */
27803 pos = *pn_ready-1;
27805 while (pos >= 0)
27807 if (is_store_insn (ready[pos], &str_mem)
27808 && !sel_sched_p ()
27809 && INSN_PRIORITY_KNOWN (ready[pos]))
27811 INSN_PRIORITY (ready[pos])++;
27813 /* Adjust the pendulum to account for the fact that a store
27814 was found and increased in priority. This is to prevent
27815 increasing the priority of multiple stores */
27816 load_store_pendulum++;
27818 break;
27820 pos--;
27825 return cached_can_issue_more;
27828 /* Return whether the presence of INSN causes a dispatch group termination
27829 of group WHICH_GROUP.
27831 If WHICH_GROUP == current_group, this function will return true if INSN
27832 causes the termination of the current group (i.e, the dispatch group to
27833 which INSN belongs). This means that INSN will be the last insn in the
27834 group it belongs to.
27836 If WHICH_GROUP == previous_group, this function will return true if INSN
27837 causes the termination of the previous group (i.e, the dispatch group that
27838 precedes the group to which INSN belongs). This means that INSN will be
27839 the first insn in the group it belongs to). */
27841 static bool
27842 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
27844 bool first, last;
27846 if (! insn)
27847 return false;
27849 first = insn_must_be_first_in_group (insn);
27850 last = insn_must_be_last_in_group (insn);
27852 if (first && last)
27853 return true;
27855 if (which_group == current_group)
27856 return last;
27857 else if (which_group == previous_group)
27858 return first;
27860 return false;
27864 static bool
27865 insn_must_be_first_in_group (rtx_insn *insn)
27867 enum attr_type type;
27869 if (!insn
27870 || NOTE_P (insn)
27871 || DEBUG_INSN_P (insn)
27872 || GET_CODE (PATTERN (insn)) == USE
27873 || GET_CODE (PATTERN (insn)) == CLOBBER)
27874 return false;
27876 switch (rs6000_cpu)
27878 case PROCESSOR_POWER5:
27879 if (is_cracked_insn (insn))
27880 return true;
27881 case PROCESSOR_POWER4:
27882 if (is_microcoded_insn (insn))
27883 return true;
27885 if (!rs6000_sched_groups)
27886 return false;
27888 type = get_attr_type (insn);
27890 switch (type)
27892 case TYPE_MFCR:
27893 case TYPE_MFCRF:
27894 case TYPE_MTCR:
27895 case TYPE_DELAYED_CR:
27896 case TYPE_CR_LOGICAL:
27897 case TYPE_MTJMPR:
27898 case TYPE_MFJMPR:
27899 case TYPE_DIV:
27900 case TYPE_LOAD_L:
27901 case TYPE_STORE_C:
27902 case TYPE_ISYNC:
27903 case TYPE_SYNC:
27904 return true;
27905 default:
27906 break;
27908 break;
27909 case PROCESSOR_POWER6:
27910 type = get_attr_type (insn);
27912 switch (type)
27914 case TYPE_EXTS:
27915 case TYPE_CNTLZ:
27916 case TYPE_TRAP:
27917 case TYPE_MUL:
27918 case TYPE_INSERT:
27919 case TYPE_FPCOMPARE:
27920 case TYPE_MFCR:
27921 case TYPE_MTCR:
27922 case TYPE_MFJMPR:
27923 case TYPE_MTJMPR:
27924 case TYPE_ISYNC:
27925 case TYPE_SYNC:
27926 case TYPE_LOAD_L:
27927 case TYPE_STORE_C:
27928 return true;
27929 case TYPE_SHIFT:
27930 if (get_attr_dot (insn) == DOT_NO
27931 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
27932 return true;
27933 else
27934 break;
27935 case TYPE_DIV:
27936 if (get_attr_size (insn) == SIZE_32)
27937 return true;
27938 else
27939 break;
27940 case TYPE_LOAD:
27941 case TYPE_STORE:
27942 case TYPE_FPLOAD:
27943 case TYPE_FPSTORE:
27944 if (get_attr_update (insn) == UPDATE_YES)
27945 return true;
27946 else
27947 break;
27948 default:
27949 break;
27951 break;
27952 case PROCESSOR_POWER7:
27953 type = get_attr_type (insn);
27955 switch (type)
27957 case TYPE_CR_LOGICAL:
27958 case TYPE_MFCR:
27959 case TYPE_MFCRF:
27960 case TYPE_MTCR:
27961 case TYPE_DIV:
27962 case TYPE_ISYNC:
27963 case TYPE_LOAD_L:
27964 case TYPE_STORE_C:
27965 case TYPE_MFJMPR:
27966 case TYPE_MTJMPR:
27967 return true;
27968 case TYPE_MUL:
27969 case TYPE_SHIFT:
27970 case TYPE_EXTS:
27971 if (get_attr_dot (insn) == DOT_YES)
27972 return true;
27973 else
27974 break;
27975 case TYPE_LOAD:
27976 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
27977 || get_attr_update (insn) == UPDATE_YES)
27978 return true;
27979 else
27980 break;
27981 case TYPE_STORE:
27982 case TYPE_FPLOAD:
27983 case TYPE_FPSTORE:
27984 if (get_attr_update (insn) == UPDATE_YES)
27985 return true;
27986 else
27987 break;
27988 default:
27989 break;
27991 break;
27992 case PROCESSOR_POWER8:
27993 type = get_attr_type (insn);
27995 switch (type)
27997 case TYPE_CR_LOGICAL:
27998 case TYPE_DELAYED_CR:
27999 case TYPE_MFCR:
28000 case TYPE_MFCRF:
28001 case TYPE_MTCR:
28002 case TYPE_SYNC:
28003 case TYPE_ISYNC:
28004 case TYPE_LOAD_L:
28005 case TYPE_STORE_C:
28006 case TYPE_VECSTORE:
28007 case TYPE_MFJMPR:
28008 case TYPE_MTJMPR:
28009 return true;
28010 case TYPE_SHIFT:
28011 case TYPE_EXTS:
28012 case TYPE_MUL:
28013 if (get_attr_dot (insn) == DOT_YES)
28014 return true;
28015 else
28016 break;
28017 case TYPE_LOAD:
28018 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
28019 || get_attr_update (insn) == UPDATE_YES)
28020 return true;
28021 else
28022 break;
28023 case TYPE_STORE:
28024 if (get_attr_update (insn) == UPDATE_YES
28025 && get_attr_indexed (insn) == INDEXED_YES)
28026 return true;
28027 else
28028 break;
28029 default:
28030 break;
28032 break;
28033 default:
28034 break;
28037 return false;
28040 static bool
28041 insn_must_be_last_in_group (rtx_insn *insn)
28043 enum attr_type type;
28045 if (!insn
28046 || NOTE_P (insn)
28047 || DEBUG_INSN_P (insn)
28048 || GET_CODE (PATTERN (insn)) == USE
28049 || GET_CODE (PATTERN (insn)) == CLOBBER)
28050 return false;
28052 switch (rs6000_cpu) {
28053 case PROCESSOR_POWER4:
28054 case PROCESSOR_POWER5:
28055 if (is_microcoded_insn (insn))
28056 return true;
28058 if (is_branch_slot_insn (insn))
28059 return true;
28061 break;
28062 case PROCESSOR_POWER6:
28063 type = get_attr_type (insn);
28065 switch (type)
28067 case TYPE_EXTS:
28068 case TYPE_CNTLZ:
28069 case TYPE_TRAP:
28070 case TYPE_MUL:
28071 case TYPE_FPCOMPARE:
28072 case TYPE_MFCR:
28073 case TYPE_MTCR:
28074 case TYPE_MFJMPR:
28075 case TYPE_MTJMPR:
28076 case TYPE_ISYNC:
28077 case TYPE_SYNC:
28078 case TYPE_LOAD_L:
28079 case TYPE_STORE_C:
28080 return true;
28081 case TYPE_SHIFT:
28082 if (get_attr_dot (insn) == DOT_NO
28083 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
28084 return true;
28085 else
28086 break;
28087 case TYPE_DIV:
28088 if (get_attr_size (insn) == SIZE_32)
28089 return true;
28090 else
28091 break;
28092 default:
28093 break;
28095 break;
28096 case PROCESSOR_POWER7:
28097 type = get_attr_type (insn);
28099 switch (type)
28101 case TYPE_ISYNC:
28102 case TYPE_SYNC:
28103 case TYPE_LOAD_L:
28104 case TYPE_STORE_C:
28105 return true;
28106 case TYPE_LOAD:
28107 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
28108 && get_attr_update (insn) == UPDATE_YES)
28109 return true;
28110 else
28111 break;
28112 case TYPE_STORE:
28113 if (get_attr_update (insn) == UPDATE_YES
28114 && get_attr_indexed (insn) == INDEXED_YES)
28115 return true;
28116 else
28117 break;
28118 default:
28119 break;
28121 break;
28122 case PROCESSOR_POWER8:
28123 type = get_attr_type (insn);
28125 switch (type)
28127 case TYPE_MFCR:
28128 case TYPE_MTCR:
28129 case TYPE_ISYNC:
28130 case TYPE_SYNC:
28131 case TYPE_LOAD_L:
28132 case TYPE_STORE_C:
28133 return true;
28134 case TYPE_LOAD:
28135 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
28136 && get_attr_update (insn) == UPDATE_YES)
28137 return true;
28138 else
28139 break;
28140 case TYPE_STORE:
28141 if (get_attr_update (insn) == UPDATE_YES
28142 && get_attr_indexed (insn) == INDEXED_YES)
28143 return true;
28144 else
28145 break;
28146 default:
28147 break;
28149 break;
28150 default:
28151 break;
28154 return false;
28157 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
28158 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
28160 static bool
28161 is_costly_group (rtx *group_insns, rtx next_insn)
28163 int i;
28164 int issue_rate = rs6000_issue_rate ();
28166 for (i = 0; i < issue_rate; i++)
28168 sd_iterator_def sd_it;
28169 dep_t dep;
28170 rtx insn = group_insns[i];
28172 if (!insn)
28173 continue;
28175 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
28177 rtx next = DEP_CON (dep);
28179 if (next == next_insn
28180 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
28181 return true;
28185 return false;
28188 /* Utility of the function redefine_groups.
28189 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
28190 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
28191 to keep it "far" (in a separate group) from GROUP_INSNS, following
28192 one of the following schemes, depending on the value of the flag
28193 -minsert_sched_nops = X:
28194 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
28195 in order to force NEXT_INSN into a separate group.
28196 (2) X < sched_finish_regroup_exact: insert exactly X nops.
28197 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
28198 insertion (has a group just ended, how many vacant issue slots remain in the
28199 last group, and how many dispatch groups were encountered so far). */
28201 static int
28202 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
28203 rtx_insn *next_insn, bool *group_end, int can_issue_more,
28204 int *group_count)
28206 rtx nop;
28207 bool force;
28208 int issue_rate = rs6000_issue_rate ();
28209 bool end = *group_end;
28210 int i;
28212 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
28213 return can_issue_more;
28215 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
28216 return can_issue_more;
28218 force = is_costly_group (group_insns, next_insn);
28219 if (!force)
28220 return can_issue_more;
28222 if (sched_verbose > 6)
28223 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
28224 *group_count ,can_issue_more);
28226 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
28228 if (*group_end)
28229 can_issue_more = 0;
28231 /* Since only a branch can be issued in the last issue_slot, it is
28232 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
28233 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
28234 in this case the last nop will start a new group and the branch
28235 will be forced to the new group. */
28236 if (can_issue_more && !is_branch_slot_insn (next_insn))
28237 can_issue_more--;
28239 /* Do we have a special group ending nop? */
28240 if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
28241 || rs6000_cpu_attr == CPU_POWER8)
28243 nop = gen_group_ending_nop ();
28244 emit_insn_before (nop, next_insn);
28245 can_issue_more = 0;
28247 else
28248 while (can_issue_more > 0)
28250 nop = gen_nop ();
28251 emit_insn_before (nop, next_insn);
28252 can_issue_more--;
28255 *group_end = true;
28256 return 0;
28259 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
28261 int n_nops = rs6000_sched_insert_nops;
28263 /* Nops can't be issued from the branch slot, so the effective
28264 issue_rate for nops is 'issue_rate - 1'. */
28265 if (can_issue_more == 0)
28266 can_issue_more = issue_rate;
28267 can_issue_more--;
28268 if (can_issue_more == 0)
28270 can_issue_more = issue_rate - 1;
28271 (*group_count)++;
28272 end = true;
28273 for (i = 0; i < issue_rate; i++)
28275 group_insns[i] = 0;
28279 while (n_nops > 0)
28281 nop = gen_nop ();
28282 emit_insn_before (nop, next_insn);
28283 if (can_issue_more == issue_rate - 1) /* new group begins */
28284 end = false;
28285 can_issue_more--;
28286 if (can_issue_more == 0)
28288 can_issue_more = issue_rate - 1;
28289 (*group_count)++;
28290 end = true;
28291 for (i = 0; i < issue_rate; i++)
28293 group_insns[i] = 0;
28296 n_nops--;
28299 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
28300 can_issue_more++;
28302 /* Is next_insn going to start a new group? */
28303 *group_end
28304 = (end
28305 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
28306 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
28307 || (can_issue_more < issue_rate &&
28308 insn_terminates_group_p (next_insn, previous_group)));
28309 if (*group_end && end)
28310 (*group_count)--;
28312 if (sched_verbose > 6)
28313 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
28314 *group_count, can_issue_more);
28315 return can_issue_more;
28318 return can_issue_more;
28321 /* This function tries to synch the dispatch groups that the compiler "sees"
28322 with the dispatch groups that the processor dispatcher is expected to
28323 form in practice. It tries to achieve this synchronization by forcing the
28324 estimated processor grouping on the compiler (as opposed to the function
28325 'pad_goups' which tries to force the scheduler's grouping on the processor).
28327 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
28328 examines the (estimated) dispatch groups that will be formed by the processor
28329 dispatcher. It marks these group boundaries to reflect the estimated
28330 processor grouping, overriding the grouping that the scheduler had marked.
28331 Depending on the value of the flag '-minsert-sched-nops' this function can
28332 force certain insns into separate groups or force a certain distance between
28333 them by inserting nops, for example, if there exists a "costly dependence"
28334 between the insns.
28336 The function estimates the group boundaries that the processor will form as
28337 follows: It keeps track of how many vacant issue slots are available after
28338 each insn. A subsequent insn will start a new group if one of the following
28339 4 cases applies:
28340 - no more vacant issue slots remain in the current dispatch group.
28341 - only the last issue slot, which is the branch slot, is vacant, but the next
28342 insn is not a branch.
28343 - only the last 2 or less issue slots, including the branch slot, are vacant,
28344 which means that a cracked insn (which occupies two issue slots) can't be
28345 issued in this group.
28346 - less than 'issue_rate' slots are vacant, and the next insn always needs to
28347 start a new group. */
28349 static int
28350 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
28351 rtx_insn *tail)
28353 rtx_insn *insn, *next_insn;
28354 int issue_rate;
28355 int can_issue_more;
28356 int slot, i;
28357 bool group_end;
28358 int group_count = 0;
28359 rtx *group_insns;
28361 /* Initialize. */
28362 issue_rate = rs6000_issue_rate ();
28363 group_insns = XALLOCAVEC (rtx, issue_rate);
28364 for (i = 0; i < issue_rate; i++)
28366 group_insns[i] = 0;
28368 can_issue_more = issue_rate;
28369 slot = 0;
28370 insn = get_next_active_insn (prev_head_insn, tail);
28371 group_end = false;
28373 while (insn != NULL_RTX)
28375 slot = (issue_rate - can_issue_more);
28376 group_insns[slot] = insn;
28377 can_issue_more =
28378 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
28379 if (insn_terminates_group_p (insn, current_group))
28380 can_issue_more = 0;
28382 next_insn = get_next_active_insn (insn, tail);
28383 if (next_insn == NULL_RTX)
28384 return group_count + 1;
28386 /* Is next_insn going to start a new group? */
28387 group_end
28388 = (can_issue_more == 0
28389 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
28390 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
28391 || (can_issue_more < issue_rate &&
28392 insn_terminates_group_p (next_insn, previous_group)));
28394 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
28395 next_insn, &group_end, can_issue_more,
28396 &group_count);
28398 if (group_end)
28400 group_count++;
28401 can_issue_more = 0;
28402 for (i = 0; i < issue_rate; i++)
28404 group_insns[i] = 0;
28408 if (GET_MODE (next_insn) == TImode && can_issue_more)
28409 PUT_MODE (next_insn, VOIDmode);
28410 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
28411 PUT_MODE (next_insn, TImode);
28413 insn = next_insn;
28414 if (can_issue_more == 0)
28415 can_issue_more = issue_rate;
28416 } /* while */
28418 return group_count;
28421 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
28422 dispatch group boundaries that the scheduler had marked. Pad with nops
28423 any dispatch groups which have vacant issue slots, in order to force the
28424 scheduler's grouping on the processor dispatcher. The function
28425 returns the number of dispatch groups found. */
28427 static int
28428 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
28429 rtx_insn *tail)
28431 rtx_insn *insn, *next_insn;
28432 rtx nop;
28433 int issue_rate;
28434 int can_issue_more;
28435 int group_end;
28436 int group_count = 0;
28438 /* Initialize issue_rate. */
28439 issue_rate = rs6000_issue_rate ();
28440 can_issue_more = issue_rate;
28442 insn = get_next_active_insn (prev_head_insn, tail);
28443 next_insn = get_next_active_insn (insn, tail);
28445 while (insn != NULL_RTX)
28447 can_issue_more =
28448 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
28450 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
28452 if (next_insn == NULL_RTX)
28453 break;
28455 if (group_end)
28457 /* If the scheduler had marked group termination at this location
28458 (between insn and next_insn), and neither insn nor next_insn will
28459 force group termination, pad the group with nops to force group
28460 termination. */
28461 if (can_issue_more
28462 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
28463 && !insn_terminates_group_p (insn, current_group)
28464 && !insn_terminates_group_p (next_insn, previous_group))
28466 if (!is_branch_slot_insn (next_insn))
28467 can_issue_more--;
28469 while (can_issue_more)
28471 nop = gen_nop ();
28472 emit_insn_before (nop, next_insn);
28473 can_issue_more--;
28477 can_issue_more = issue_rate;
28478 group_count++;
28481 insn = next_insn;
28482 next_insn = get_next_active_insn (insn, tail);
28485 return group_count;
28488 /* We're beginning a new block. Initialize data structures as necessary. */
28490 static void
28491 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
28492 int sched_verbose ATTRIBUTE_UNUSED,
28493 int max_ready ATTRIBUTE_UNUSED)
28495 last_scheduled_insn = NULL_RTX;
28496 load_store_pendulum = 0;
28499 /* The following function is called at the end of scheduling BB.
28500 After reload, it inserts nops at insn group bundling. */
28502 static void
28503 rs6000_sched_finish (FILE *dump, int sched_verbose)
28505 int n_groups;
28507 if (sched_verbose)
28508 fprintf (dump, "=== Finishing schedule.\n");
28510 if (reload_completed && rs6000_sched_groups)
28512 /* Do not run sched_finish hook when selective scheduling enabled. */
28513 if (sel_sched_p ())
28514 return;
28516 if (rs6000_sched_insert_nops == sched_finish_none)
28517 return;
28519 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
28520 n_groups = pad_groups (dump, sched_verbose,
28521 current_sched_info->prev_head,
28522 current_sched_info->next_tail);
28523 else
28524 n_groups = redefine_groups (dump, sched_verbose,
28525 current_sched_info->prev_head,
28526 current_sched_info->next_tail);
28528 if (sched_verbose >= 6)
28530 fprintf (dump, "ngroups = %d\n", n_groups);
28531 print_rtl (dump, current_sched_info->prev_head);
28532 fprintf (dump, "Done finish_sched\n");
28537 struct _rs6000_sched_context
28539 short cached_can_issue_more;
28540 rtx last_scheduled_insn;
28541 int load_store_pendulum;
28544 typedef struct _rs6000_sched_context rs6000_sched_context_def;
28545 typedef rs6000_sched_context_def *rs6000_sched_context_t;
28547 /* Allocate store for new scheduling context. */
28548 static void *
28549 rs6000_alloc_sched_context (void)
28551 return xmalloc (sizeof (rs6000_sched_context_def));
28554 /* If CLEAN_P is true then initializes _SC with clean data,
28555 and from the global context otherwise. */
28556 static void
28557 rs6000_init_sched_context (void *_sc, bool clean_p)
28559 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
28561 if (clean_p)
28563 sc->cached_can_issue_more = 0;
28564 sc->last_scheduled_insn = NULL_RTX;
28565 sc->load_store_pendulum = 0;
28567 else
28569 sc->cached_can_issue_more = cached_can_issue_more;
28570 sc->last_scheduled_insn = last_scheduled_insn;
28571 sc->load_store_pendulum = load_store_pendulum;
28575 /* Sets the global scheduling context to the one pointed to by _SC. */
28576 static void
28577 rs6000_set_sched_context (void *_sc)
28579 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
28581 gcc_assert (sc != NULL);
28583 cached_can_issue_more = sc->cached_can_issue_more;
28584 last_scheduled_insn = sc->last_scheduled_insn;
28585 load_store_pendulum = sc->load_store_pendulum;
28588 /* Free _SC. */
28589 static void
28590 rs6000_free_sched_context (void *_sc)
28592 gcc_assert (_sc != NULL);
28594 free (_sc);
28598 /* Length in units of the trampoline for entering a nested function. */
28601 rs6000_trampoline_size (void)
28603 int ret = 0;
28605 switch (DEFAULT_ABI)
28607 default:
28608 gcc_unreachable ();
28610 case ABI_AIX:
28611 ret = (TARGET_32BIT) ? 12 : 24;
28612 break;
28614 case ABI_ELFv2:
28615 gcc_assert (!TARGET_32BIT);
28616 ret = 32;
28617 break;
28619 case ABI_DARWIN:
28620 case ABI_V4:
28621 ret = (TARGET_32BIT) ? 40 : 48;
28622 break;
28625 return ret;
28628 /* Emit RTL insns to initialize the variable parts of a trampoline.
28629 FNADDR is an RTX for the address of the function's pure code.
28630 CXT is an RTX for the static chain value for the function. */
28632 static void
28633 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
28635 int regsize = (TARGET_32BIT) ? 4 : 8;
28636 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
28637 rtx ctx_reg = force_reg (Pmode, cxt);
28638 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
28640 switch (DEFAULT_ABI)
28642 default:
28643 gcc_unreachable ();
28645 /* Under AIX, just build the 3 word function descriptor */
28646 case ABI_AIX:
28648 rtx fnmem, fn_reg, toc_reg;
28650 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
28651 error ("You cannot take the address of a nested function if you use "
28652 "the -mno-pointers-to-nested-functions option.");
28654 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
28655 fn_reg = gen_reg_rtx (Pmode);
28656 toc_reg = gen_reg_rtx (Pmode);
28658 /* Macro to shorten the code expansions below. */
28659 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
28661 m_tramp = replace_equiv_address (m_tramp, addr);
28663 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
28664 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
28665 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
28666 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
28667 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
28669 # undef MEM_PLUS
28671 break;
28673 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
28674 case ABI_ELFv2:
28675 case ABI_DARWIN:
28676 case ABI_V4:
28677 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
28678 LCT_NORMAL, VOIDmode, 4,
28679 addr, Pmode,
28680 GEN_INT (rs6000_trampoline_size ()), SImode,
28681 fnaddr, Pmode,
28682 ctx_reg, Pmode);
28683 break;
28688 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
28689 identifier as an argument, so the front end shouldn't look it up. */
28691 static bool
28692 rs6000_attribute_takes_identifier_p (const_tree attr_id)
28694 return is_attribute_p ("altivec", attr_id);
28697 /* Handle the "altivec" attribute. The attribute may have
28698 arguments as follows:
28700 __attribute__((altivec(vector__)))
28701 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
28702 __attribute__((altivec(bool__))) (always followed by 'unsigned')
28704 and may appear more than once (e.g., 'vector bool char') in a
28705 given declaration. */
28707 static tree
28708 rs6000_handle_altivec_attribute (tree *node,
28709 tree name ATTRIBUTE_UNUSED,
28710 tree args,
28711 int flags ATTRIBUTE_UNUSED,
28712 bool *no_add_attrs)
28714 tree type = *node, result = NULL_TREE;
28715 machine_mode mode;
28716 int unsigned_p;
28717 char altivec_type
28718 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
28719 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
28720 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
28721 : '?');
28723 while (POINTER_TYPE_P (type)
28724 || TREE_CODE (type) == FUNCTION_TYPE
28725 || TREE_CODE (type) == METHOD_TYPE
28726 || TREE_CODE (type) == ARRAY_TYPE)
28727 type = TREE_TYPE (type);
28729 mode = TYPE_MODE (type);
28731 /* Check for invalid AltiVec type qualifiers. */
28732 if (type == long_double_type_node)
28733 error ("use of %<long double%> in AltiVec types is invalid");
28734 else if (type == boolean_type_node)
28735 error ("use of boolean types in AltiVec types is invalid");
28736 else if (TREE_CODE (type) == COMPLEX_TYPE)
28737 error ("use of %<complex%> in AltiVec types is invalid");
28738 else if (DECIMAL_FLOAT_MODE_P (mode))
28739 error ("use of decimal floating point types in AltiVec types is invalid");
28740 else if (!TARGET_VSX)
28742 if (type == long_unsigned_type_node || type == long_integer_type_node)
28744 if (TARGET_64BIT)
28745 error ("use of %<long%> in AltiVec types is invalid for "
28746 "64-bit code without -mvsx");
28747 else if (rs6000_warn_altivec_long)
28748 warning (0, "use of %<long%> in AltiVec types is deprecated; "
28749 "use %<int%>");
28751 else if (type == long_long_unsigned_type_node
28752 || type == long_long_integer_type_node)
28753 error ("use of %<long long%> in AltiVec types is invalid without "
28754 "-mvsx");
28755 else if (type == double_type_node)
28756 error ("use of %<double%> in AltiVec types is invalid without -mvsx");
28759 switch (altivec_type)
28761 case 'v':
28762 unsigned_p = TYPE_UNSIGNED (type);
28763 switch (mode)
28765 case TImode:
28766 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
28767 break;
28768 case DImode:
28769 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
28770 break;
28771 case SImode:
28772 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
28773 break;
28774 case HImode:
28775 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
28776 break;
28777 case QImode:
28778 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
28779 break;
28780 case SFmode: result = V4SF_type_node; break;
28781 case DFmode: result = V2DF_type_node; break;
28782 /* If the user says 'vector int bool', we may be handed the 'bool'
28783 attribute _before_ the 'vector' attribute, and so select the
28784 proper type in the 'b' case below. */
28785 case V4SImode: case V8HImode: case V16QImode: case V4SFmode:
28786 case V2DImode: case V2DFmode:
28787 result = type;
28788 default: break;
28790 break;
28791 case 'b':
28792 switch (mode)
28794 case DImode: case V2DImode: result = bool_V2DI_type_node; break;
28795 case SImode: case V4SImode: result = bool_V4SI_type_node; break;
28796 case HImode: case V8HImode: result = bool_V8HI_type_node; break;
28797 case QImode: case V16QImode: result = bool_V16QI_type_node;
28798 default: break;
28800 break;
28801 case 'p':
28802 switch (mode)
28804 case V8HImode: result = pixel_V8HI_type_node;
28805 default: break;
28807 default: break;
28810 /* Propagate qualifiers attached to the element type
28811 onto the vector type. */
28812 if (result && result != type && TYPE_QUALS (type))
28813 result = build_qualified_type (result, TYPE_QUALS (type));
28815 *no_add_attrs = true; /* No need to hang on to the attribute. */
28817 if (result)
28818 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
28820 return NULL_TREE;
28823 /* AltiVec defines four built-in scalar types that serve as vector
28824 elements; we must teach the compiler how to mangle them. */
28826 static const char *
28827 rs6000_mangle_type (const_tree type)
28829 type = TYPE_MAIN_VARIANT (type);
28831 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
28832 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
28833 return NULL;
28835 if (type == bool_char_type_node) return "U6__boolc";
28836 if (type == bool_short_type_node) return "U6__bools";
28837 if (type == pixel_type_node) return "u7__pixel";
28838 if (type == bool_int_type_node) return "U6__booli";
28839 if (type == bool_long_type_node) return "U6__booll";
28841 /* Mangle IBM extended float long double as `g' (__float128) on
28842 powerpc*-linux where long-double-64 previously was the default. */
28843 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
28844 && TARGET_ELF
28845 && TARGET_LONG_DOUBLE_128
28846 && !TARGET_IEEEQUAD)
28847 return "g";
28849 /* For all other types, use normal C++ mangling. */
28850 return NULL;
28853 /* Handle a "longcall" or "shortcall" attribute; arguments as in
28854 struct attribute_spec.handler. */
28856 static tree
28857 rs6000_handle_longcall_attribute (tree *node, tree name,
28858 tree args ATTRIBUTE_UNUSED,
28859 int flags ATTRIBUTE_UNUSED,
28860 bool *no_add_attrs)
28862 if (TREE_CODE (*node) != FUNCTION_TYPE
28863 && TREE_CODE (*node) != FIELD_DECL
28864 && TREE_CODE (*node) != TYPE_DECL)
28866 warning (OPT_Wattributes, "%qE attribute only applies to functions",
28867 name);
28868 *no_add_attrs = true;
28871 return NULL_TREE;
28874 /* Set longcall attributes on all functions declared when
28875 rs6000_default_long_calls is true. */
28876 static void
28877 rs6000_set_default_type_attributes (tree type)
28879 if (rs6000_default_long_calls
28880 && (TREE_CODE (type) == FUNCTION_TYPE
28881 || TREE_CODE (type) == METHOD_TYPE))
28882 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
28883 NULL_TREE,
28884 TYPE_ATTRIBUTES (type));
28886 #if TARGET_MACHO
28887 darwin_set_default_type_attributes (type);
28888 #endif
28891 /* Return a reference suitable for calling a function with the
28892 longcall attribute. */
28895 rs6000_longcall_ref (rtx call_ref)
28897 const char *call_name;
28898 tree node;
28900 if (GET_CODE (call_ref) != SYMBOL_REF)
28901 return call_ref;
28903 /* System V adds '.' to the internal name, so skip them. */
28904 call_name = XSTR (call_ref, 0);
28905 if (*call_name == '.')
28907 while (*call_name == '.')
28908 call_name++;
28910 node = get_identifier (call_name);
28911 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
28914 return force_reg (Pmode, call_ref);
28917 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
28918 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
28919 #endif
28921 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
28922 struct attribute_spec.handler. */
28923 static tree
28924 rs6000_handle_struct_attribute (tree *node, tree name,
28925 tree args ATTRIBUTE_UNUSED,
28926 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
28928 tree *type = NULL;
28929 if (DECL_P (*node))
28931 if (TREE_CODE (*node) == TYPE_DECL)
28932 type = &TREE_TYPE (*node);
28934 else
28935 type = node;
28937 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
28938 || TREE_CODE (*type) == UNION_TYPE)))
28940 warning (OPT_Wattributes, "%qE attribute ignored", name);
28941 *no_add_attrs = true;
28944 else if ((is_attribute_p ("ms_struct", name)
28945 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
28946 || ((is_attribute_p ("gcc_struct", name)
28947 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
28949 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
28950 name);
28951 *no_add_attrs = true;
28954 return NULL_TREE;
28957 static bool
28958 rs6000_ms_bitfield_layout_p (const_tree record_type)
28960 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
28961 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
28962 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
28965 #ifdef USING_ELFOS_H
28967 /* A get_unnamed_section callback, used for switching to toc_section. */
28969 static void
28970 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
28972 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28973 && TARGET_MINIMAL_TOC
28974 && !TARGET_RELOCATABLE)
28976 if (!toc_initialized)
28978 toc_initialized = 1;
28979 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
28980 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
28981 fprintf (asm_out_file, "\t.tc ");
28982 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
28983 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
28984 fprintf (asm_out_file, "\n");
28986 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
28987 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
28988 fprintf (asm_out_file, " = .+32768\n");
28990 else
28991 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
28993 else if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28994 && !TARGET_RELOCATABLE)
28995 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
28996 else
28998 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
28999 if (!toc_initialized)
29001 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
29002 fprintf (asm_out_file, " = .+32768\n");
29003 toc_initialized = 1;
29008 /* Implement TARGET_ASM_INIT_SECTIONS. */
29010 static void
29011 rs6000_elf_asm_init_sections (void)
29013 toc_section
29014 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
29016 sdata2_section
29017 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
29018 SDATA2_SECTION_ASM_OP);
29021 /* Implement TARGET_SELECT_RTX_SECTION. */
29023 static section *
29024 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
29025 unsigned HOST_WIDE_INT align)
29027 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
29028 return toc_section;
29029 else
29030 return default_elf_select_rtx_section (mode, x, align);
29033 /* For a SYMBOL_REF, set generic flags and then perform some
29034 target-specific processing.
29036 When the AIX ABI is requested on a non-AIX system, replace the
29037 function name with the real name (with a leading .) rather than the
29038 function descriptor name. This saves a lot of overriding code to
29039 read the prefixes. */
29041 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
29042 static void
29043 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
29045 default_encode_section_info (decl, rtl, first);
29047 if (first
29048 && TREE_CODE (decl) == FUNCTION_DECL
29049 && !TARGET_AIX
29050 && DEFAULT_ABI == ABI_AIX)
29052 rtx sym_ref = XEXP (rtl, 0);
29053 size_t len = strlen (XSTR (sym_ref, 0));
29054 char *str = XALLOCAVEC (char, len + 2);
29055 str[0] = '.';
29056 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
29057 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
29061 static inline bool
29062 compare_section_name (const char *section, const char *templ)
29064 int len;
29066 len = strlen (templ);
29067 return (strncmp (section, templ, len) == 0
29068 && (section[len] == 0 || section[len] == '.'));
29071 bool
29072 rs6000_elf_in_small_data_p (const_tree decl)
29074 if (rs6000_sdata == SDATA_NONE)
29075 return false;
29077 /* We want to merge strings, so we never consider them small data. */
29078 if (TREE_CODE (decl) == STRING_CST)
29079 return false;
29081 /* Functions are never in the small data area. */
29082 if (TREE_CODE (decl) == FUNCTION_DECL)
29083 return false;
29085 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
29087 const char *section = DECL_SECTION_NAME (decl);
29088 if (compare_section_name (section, ".sdata")
29089 || compare_section_name (section, ".sdata2")
29090 || compare_section_name (section, ".gnu.linkonce.s")
29091 || compare_section_name (section, ".sbss")
29092 || compare_section_name (section, ".sbss2")
29093 || compare_section_name (section, ".gnu.linkonce.sb")
29094 || strcmp (section, ".PPC.EMB.sdata0") == 0
29095 || strcmp (section, ".PPC.EMB.sbss0") == 0)
29096 return true;
29098 else
29100 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
29102 if (size > 0
29103 && size <= g_switch_value
29104 /* If it's not public, and we're not going to reference it there,
29105 there's no need to put it in the small data section. */
29106 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
29107 return true;
29110 return false;
29113 #endif /* USING_ELFOS_H */
29115 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
29117 static bool
29118 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
29120 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
29123 /* Do not place thread-local symbols refs in the object blocks. */
29125 static bool
29126 rs6000_use_blocks_for_decl_p (const_tree decl)
29128 return !DECL_THREAD_LOCAL_P (decl);
29131 /* Return a REG that occurs in ADDR with coefficient 1.
29132 ADDR can be effectively incremented by incrementing REG.
29134 r0 is special and we must not select it as an address
29135 register by this routine since our caller will try to
29136 increment the returned register via an "la" instruction. */
29139 find_addr_reg (rtx addr)
29141 while (GET_CODE (addr) == PLUS)
29143 if (GET_CODE (XEXP (addr, 0)) == REG
29144 && REGNO (XEXP (addr, 0)) != 0)
29145 addr = XEXP (addr, 0);
29146 else if (GET_CODE (XEXP (addr, 1)) == REG
29147 && REGNO (XEXP (addr, 1)) != 0)
29148 addr = XEXP (addr, 1);
29149 else if (CONSTANT_P (XEXP (addr, 0)))
29150 addr = XEXP (addr, 1);
29151 else if (CONSTANT_P (XEXP (addr, 1)))
29152 addr = XEXP (addr, 0);
29153 else
29154 gcc_unreachable ();
29156 gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0);
29157 return addr;
29160 void
29161 rs6000_fatal_bad_address (rtx op)
29163 fatal_insn ("bad address", op);
29166 #if TARGET_MACHO
29168 typedef struct branch_island_d {
29169 tree function_name;
29170 tree label_name;
29171 int line_number;
29172 } branch_island;
29175 static vec<branch_island, va_gc> *branch_islands;
29177 /* Remember to generate a branch island for far calls to the given
29178 function. */
29180 static void
29181 add_compiler_branch_island (tree label_name, tree function_name,
29182 int line_number)
29184 branch_island bi = {function_name, label_name, line_number};
29185 vec_safe_push (branch_islands, bi);
29188 /* Generate far-jump branch islands for everything recorded in
29189 branch_islands. Invoked immediately after the last instruction of
29190 the epilogue has been emitted; the branch islands must be appended
29191 to, and contiguous with, the function body. Mach-O stubs are
29192 generated in machopic_output_stub(). */
29194 static void
29195 macho_branch_islands (void)
29197 char tmp_buf[512];
29199 while (!vec_safe_is_empty (branch_islands))
29201 branch_island *bi = &branch_islands->last ();
29202 const char *label = IDENTIFIER_POINTER (bi->label_name);
29203 const char *name = IDENTIFIER_POINTER (bi->function_name);
29204 char name_buf[512];
29205 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
29206 if (name[0] == '*' || name[0] == '&')
29207 strcpy (name_buf, name+1);
29208 else
29210 name_buf[0] = '_';
29211 strcpy (name_buf+1, name);
29213 strcpy (tmp_buf, "\n");
29214 strcat (tmp_buf, label);
29215 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
29216 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
29217 dbxout_stabd (N_SLINE, bi->line_number);
29218 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
29219 if (flag_pic)
29221 if (TARGET_LINK_STACK)
29223 char name[32];
29224 get_ppc476_thunk_name (name);
29225 strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
29226 strcat (tmp_buf, name);
29227 strcat (tmp_buf, "\n");
29228 strcat (tmp_buf, label);
29229 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
29231 else
29233 strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
29234 strcat (tmp_buf, label);
29235 strcat (tmp_buf, "_pic\n");
29236 strcat (tmp_buf, label);
29237 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
29240 strcat (tmp_buf, "\taddis r11,r11,ha16(");
29241 strcat (tmp_buf, name_buf);
29242 strcat (tmp_buf, " - ");
29243 strcat (tmp_buf, label);
29244 strcat (tmp_buf, "_pic)\n");
29246 strcat (tmp_buf, "\tmtlr r0\n");
29248 strcat (tmp_buf, "\taddi r12,r11,lo16(");
29249 strcat (tmp_buf, name_buf);
29250 strcat (tmp_buf, " - ");
29251 strcat (tmp_buf, label);
29252 strcat (tmp_buf, "_pic)\n");
29254 strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
29256 else
29258 strcat (tmp_buf, ":\nlis r12,hi16(");
29259 strcat (tmp_buf, name_buf);
29260 strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
29261 strcat (tmp_buf, name_buf);
29262 strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
29264 output_asm_insn (tmp_buf, 0);
29265 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
29266 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
29267 dbxout_stabd (N_SLINE, bi->line_number);
29268 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
29269 branch_islands->pop ();
29273 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
29274 already there or not. */
29276 static int
29277 no_previous_def (tree function_name)
29279 branch_island *bi;
29280 unsigned ix;
29282 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
29283 if (function_name == bi->function_name)
29284 return 0;
29285 return 1;
29288 /* GET_PREV_LABEL gets the label name from the previous definition of
29289 the function. */
29291 static tree
29292 get_prev_label (tree function_name)
29294 branch_island *bi;
29295 unsigned ix;
29297 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
29298 if (function_name == bi->function_name)
29299 return bi->label_name;
29300 return NULL_TREE;
29303 /* INSN is either a function call or a millicode call. It may have an
29304 unconditional jump in its delay slot.
29306 CALL_DEST is the routine we are calling. */
29308 char *
29309 output_call (rtx_insn *insn, rtx *operands, int dest_operand_number,
29310 int cookie_operand_number)
29312 static char buf[256];
29313 if (darwin_emit_branch_islands
29314 && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
29315 && (INTVAL (operands[cookie_operand_number]) & CALL_LONG))
29317 tree labelname;
29318 tree funname = get_identifier (XSTR (operands[dest_operand_number], 0));
29320 if (no_previous_def (funname))
29322 rtx label_rtx = gen_label_rtx ();
29323 char *label_buf, temp_buf[256];
29324 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
29325 CODE_LABEL_NUMBER (label_rtx));
29326 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
29327 labelname = get_identifier (label_buf);
29328 add_compiler_branch_island (labelname, funname, insn_line (insn));
29330 else
29331 labelname = get_prev_label (funname);
29333 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
29334 instruction will reach 'foo', otherwise link as 'bl L42'".
29335 "L42" should be a 'branch island', that will do a far jump to
29336 'foo'. Branch islands are generated in
29337 macho_branch_islands(). */
29338 sprintf (buf, "jbsr %%z%d,%.246s",
29339 dest_operand_number, IDENTIFIER_POINTER (labelname));
29341 else
29342 sprintf (buf, "bl %%z%d", dest_operand_number);
29343 return buf;
29346 /* Generate PIC and indirect symbol stubs. */
29348 void
29349 machopic_output_stub (FILE *file, const char *symb, const char *stub)
29351 unsigned int length;
29352 char *symbol_name, *lazy_ptr_name;
29353 char *local_label_0;
29354 static int label = 0;
29356 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
29357 symb = (*targetm.strip_name_encoding) (symb);
29360 length = strlen (symb);
29361 symbol_name = XALLOCAVEC (char, length + 32);
29362 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
29364 lazy_ptr_name = XALLOCAVEC (char, length + 32);
29365 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
29367 if (flag_pic == 2)
29368 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
29369 else
29370 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
29372 if (flag_pic == 2)
29374 fprintf (file, "\t.align 5\n");
29376 fprintf (file, "%s:\n", stub);
29377 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29379 label++;
29380 local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
29381 sprintf (local_label_0, "\"L%011d$spb\"", label);
29383 fprintf (file, "\tmflr r0\n");
29384 if (TARGET_LINK_STACK)
29386 char name[32];
29387 get_ppc476_thunk_name (name);
29388 fprintf (file, "\tbl %s\n", name);
29389 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
29391 else
29393 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
29394 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
29396 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
29397 lazy_ptr_name, local_label_0);
29398 fprintf (file, "\tmtlr r0\n");
29399 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
29400 (TARGET_64BIT ? "ldu" : "lwzu"),
29401 lazy_ptr_name, local_label_0);
29402 fprintf (file, "\tmtctr r12\n");
29403 fprintf (file, "\tbctr\n");
29405 else
29407 fprintf (file, "\t.align 4\n");
29409 fprintf (file, "%s:\n", stub);
29410 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29412 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
29413 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
29414 (TARGET_64BIT ? "ldu" : "lwzu"),
29415 lazy_ptr_name);
29416 fprintf (file, "\tmtctr r12\n");
29417 fprintf (file, "\tbctr\n");
29420 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
29421 fprintf (file, "%s:\n", lazy_ptr_name);
29422 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29423 fprintf (file, "%sdyld_stub_binding_helper\n",
29424 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
29427 /* Legitimize PIC addresses. If the address is already
29428 position-independent, we return ORIG. Newly generated
29429 position-independent addresses go into a reg. This is REG if non
29430 zero, otherwise we allocate register(s) as necessary. */
29432 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
29435 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
29436 rtx reg)
29438 rtx base, offset;
29440 if (reg == NULL && ! reload_in_progress && ! reload_completed)
29441 reg = gen_reg_rtx (Pmode);
29443 if (GET_CODE (orig) == CONST)
29445 rtx reg_temp;
29447 if (GET_CODE (XEXP (orig, 0)) == PLUS
29448 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
29449 return orig;
29451 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
29453 /* Use a different reg for the intermediate value, as
29454 it will be marked UNCHANGING. */
29455 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
29456 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
29457 Pmode, reg_temp);
29458 offset =
29459 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
29460 Pmode, reg);
29462 if (GET_CODE (offset) == CONST_INT)
29464 if (SMALL_INT (offset))
29465 return plus_constant (Pmode, base, INTVAL (offset));
29466 else if (! reload_in_progress && ! reload_completed)
29467 offset = force_reg (Pmode, offset);
29468 else
29470 rtx mem = force_const_mem (Pmode, orig);
29471 return machopic_legitimize_pic_address (mem, Pmode, reg);
29474 return gen_rtx_PLUS (Pmode, base, offset);
29477 /* Fall back on generic machopic code. */
29478 return machopic_legitimize_pic_address (orig, mode, reg);
29481 /* Output a .machine directive for the Darwin assembler, and call
29482 the generic start_file routine. */
29484 static void
29485 rs6000_darwin_file_start (void)
29487 static const struct
29489 const char *arg;
29490 const char *name;
29491 HOST_WIDE_INT if_set;
29492 } mapping[] = {
29493 { "ppc64", "ppc64", MASK_64BIT },
29494 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
29495 { "power4", "ppc970", 0 },
29496 { "G5", "ppc970", 0 },
29497 { "7450", "ppc7450", 0 },
29498 { "7400", "ppc7400", MASK_ALTIVEC },
29499 { "G4", "ppc7400", 0 },
29500 { "750", "ppc750", 0 },
29501 { "740", "ppc750", 0 },
29502 { "G3", "ppc750", 0 },
29503 { "604e", "ppc604e", 0 },
29504 { "604", "ppc604", 0 },
29505 { "603e", "ppc603", 0 },
29506 { "603", "ppc603", 0 },
29507 { "601", "ppc601", 0 },
29508 { NULL, "ppc", 0 } };
29509 const char *cpu_id = "";
29510 size_t i;
29512 rs6000_file_start ();
29513 darwin_file_start ();
29515 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
29517 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
29518 cpu_id = rs6000_default_cpu;
29520 if (global_options_set.x_rs6000_cpu_index)
29521 cpu_id = processor_target_table[rs6000_cpu_index].name;
29523 /* Look through the mapping array. Pick the first name that either
29524 matches the argument, has a bit set in IF_SET that is also set
29525 in the target flags, or has a NULL name. */
29527 i = 0;
29528 while (mapping[i].arg != NULL
29529 && strcmp (mapping[i].arg, cpu_id) != 0
29530 && (mapping[i].if_set & rs6000_isa_flags) == 0)
29531 i++;
29533 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
29536 #endif /* TARGET_MACHO */
29538 #if TARGET_ELF
29539 static int
29540 rs6000_elf_reloc_rw_mask (void)
29542 if (flag_pic)
29543 return 3;
29544 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29545 return 2;
29546 else
29547 return 0;
29550 /* Record an element in the table of global constructors. SYMBOL is
29551 a SYMBOL_REF of the function to be called; PRIORITY is a number
29552 between 0 and MAX_INIT_PRIORITY.
29554 This differs from default_named_section_asm_out_constructor in
29555 that we have special handling for -mrelocatable. */
29557 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
29558 static void
29559 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
29561 const char *section = ".ctors";
29562 char buf[16];
29564 if (priority != DEFAULT_INIT_PRIORITY)
29566 sprintf (buf, ".ctors.%.5u",
29567 /* Invert the numbering so the linker puts us in the proper
29568 order; constructors are run from right to left, and the
29569 linker sorts in increasing order. */
29570 MAX_INIT_PRIORITY - priority);
29571 section = buf;
29574 switch_to_section (get_section (section, SECTION_WRITE, NULL));
29575 assemble_align (POINTER_SIZE);
29577 if (TARGET_RELOCATABLE)
29579 fputs ("\t.long (", asm_out_file);
29580 output_addr_const (asm_out_file, symbol);
29581 fputs (")@fixup\n", asm_out_file);
29583 else
29584 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
29587 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
29588 static void
29589 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
29591 const char *section = ".dtors";
29592 char buf[16];
29594 if (priority != DEFAULT_INIT_PRIORITY)
29596 sprintf (buf, ".dtors.%.5u",
29597 /* Invert the numbering so the linker puts us in the proper
29598 order; constructors are run from right to left, and the
29599 linker sorts in increasing order. */
29600 MAX_INIT_PRIORITY - priority);
29601 section = buf;
29604 switch_to_section (get_section (section, SECTION_WRITE, NULL));
29605 assemble_align (POINTER_SIZE);
29607 if (TARGET_RELOCATABLE)
29609 fputs ("\t.long (", asm_out_file);
29610 output_addr_const (asm_out_file, symbol);
29611 fputs (")@fixup\n", asm_out_file);
29613 else
29614 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
29617 void
29618 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
29620 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
29622 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
29623 ASM_OUTPUT_LABEL (file, name);
29624 fputs (DOUBLE_INT_ASM_OP, file);
29625 rs6000_output_function_entry (file, name);
29626 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
29627 if (DOT_SYMBOLS)
29629 fputs ("\t.size\t", file);
29630 assemble_name (file, name);
29631 fputs (",24\n\t.type\t.", file);
29632 assemble_name (file, name);
29633 fputs (",@function\n", file);
29634 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
29636 fputs ("\t.globl\t.", file);
29637 assemble_name (file, name);
29638 putc ('\n', file);
29641 else
29642 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
29643 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
29644 rs6000_output_function_entry (file, name);
29645 fputs (":\n", file);
29646 return;
29649 if (TARGET_RELOCATABLE
29650 && !TARGET_SECURE_PLT
29651 && (get_pool_size () != 0 || crtl->profile)
29652 && uses_TOC ())
29654 char buf[256];
29656 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
29658 ASM_GENERATE_INTERNAL_LABEL (buf, "LCTOC", 1);
29659 fprintf (file, "\t.long ");
29660 assemble_name (file, buf);
29661 putc ('-', file);
29662 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
29663 assemble_name (file, buf);
29664 putc ('\n', file);
29667 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
29668 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
29670 if (DEFAULT_ABI == ABI_AIX)
29672 const char *desc_name, *orig_name;
29674 orig_name = (*targetm.strip_name_encoding) (name);
29675 desc_name = orig_name;
29676 while (*desc_name == '.')
29677 desc_name++;
29679 if (TREE_PUBLIC (decl))
29680 fprintf (file, "\t.globl %s\n", desc_name);
29682 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
29683 fprintf (file, "%s:\n", desc_name);
29684 fprintf (file, "\t.long %s\n", orig_name);
29685 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
29686 fputs ("\t.long 0\n", file);
29687 fprintf (file, "\t.previous\n");
29689 ASM_OUTPUT_LABEL (file, name);
29692 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
29693 static void
29694 rs6000_elf_file_end (void)
29696 #ifdef HAVE_AS_GNU_ATTRIBUTE
29697 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
29699 if (rs6000_passes_float)
29700 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n",
29701 ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT) ? 1
29702 : (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT) ? 3
29703 : 2));
29704 if (rs6000_passes_vector)
29705 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
29706 (TARGET_ALTIVEC_ABI ? 2
29707 : TARGET_SPE_ABI ? 3
29708 : 1));
29709 if (rs6000_returns_struct)
29710 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
29711 aix_struct_return ? 2 : 1);
29713 #endif
29714 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
29715 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
29716 file_end_indicate_exec_stack ();
29717 #endif
29719 #endif
29721 #if TARGET_XCOFF
29722 static void
29723 rs6000_xcoff_asm_output_anchor (rtx symbol)
29725 char buffer[100];
29727 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
29728 SYMBOL_REF_BLOCK_OFFSET (symbol));
29729 fprintf (asm_out_file, "%s", SET_ASM_OP);
29730 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
29731 fprintf (asm_out_file, ",");
29732 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
29733 fprintf (asm_out_file, "\n");
29736 static void
29737 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
29739 fputs (GLOBAL_ASM_OP, stream);
29740 RS6000_OUTPUT_BASENAME (stream, name);
29741 putc ('\n', stream);
29744 /* A get_unnamed_decl callback, used for read-only sections. PTR
29745 points to the section string variable. */
29747 static void
29748 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
29750 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
29751 *(const char *const *) directive,
29752 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
29755 /* Likewise for read-write sections. */
29757 static void
29758 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
29760 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
29761 *(const char *const *) directive,
29762 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
29765 static void
29766 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
29768 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
29769 *(const char *const *) directive,
29770 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
29773 /* A get_unnamed_section callback, used for switching to toc_section. */
29775 static void
29776 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
29778 if (TARGET_MINIMAL_TOC)
29780 /* toc_section is always selected at least once from
29781 rs6000_xcoff_file_start, so this is guaranteed to
29782 always be defined once and only once in each file. */
29783 if (!toc_initialized)
29785 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
29786 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
29787 toc_initialized = 1;
29789 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
29790 (TARGET_32BIT ? "" : ",3"));
29792 else
29793 fputs ("\t.toc\n", asm_out_file);
29796 /* Implement TARGET_ASM_INIT_SECTIONS. */
29798 static void
29799 rs6000_xcoff_asm_init_sections (void)
29801 read_only_data_section
29802 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
29803 &xcoff_read_only_section_name);
29805 private_data_section
29806 = get_unnamed_section (SECTION_WRITE,
29807 rs6000_xcoff_output_readwrite_section_asm_op,
29808 &xcoff_private_data_section_name);
29810 tls_data_section
29811 = get_unnamed_section (SECTION_TLS,
29812 rs6000_xcoff_output_tls_section_asm_op,
29813 &xcoff_tls_data_section_name);
29815 tls_private_data_section
29816 = get_unnamed_section (SECTION_TLS,
29817 rs6000_xcoff_output_tls_section_asm_op,
29818 &xcoff_private_data_section_name);
29820 read_only_private_data_section
29821 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
29822 &xcoff_private_data_section_name);
29824 toc_section
29825 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
29827 readonly_data_section = read_only_data_section;
29828 exception_section = data_section;
29831 static int
29832 rs6000_xcoff_reloc_rw_mask (void)
29834 return 3;
29837 static void
29838 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
29839 tree decl ATTRIBUTE_UNUSED)
29841 int smclass;
29842 static const char * const suffix[4] = { "PR", "RO", "RW", "TL" };
29844 if (flags & SECTION_CODE)
29845 smclass = 0;
29846 else if (flags & SECTION_TLS)
29847 smclass = 3;
29848 else if (flags & SECTION_WRITE)
29849 smclass = 2;
29850 else
29851 smclass = 1;
29853 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
29854 (flags & SECTION_CODE) ? "." : "",
29855 name, suffix[smclass], flags & SECTION_ENTSIZE);
29858 #define IN_NAMED_SECTION(DECL) \
29859 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
29860 && DECL_SECTION_NAME (DECL) != NULL)
29862 static section *
29863 rs6000_xcoff_select_section (tree decl, int reloc,
29864 unsigned HOST_WIDE_INT align)
29866 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
29867 named section. */
29868 if (align > BIGGEST_ALIGNMENT)
29870 resolve_unique_section (decl, reloc, true);
29871 if (IN_NAMED_SECTION (decl))
29872 return get_named_section (decl, NULL, reloc);
29875 if (decl_readonly_section (decl, reloc))
29877 if (TREE_PUBLIC (decl))
29878 return read_only_data_section;
29879 else
29880 return read_only_private_data_section;
29882 else
29884 #if HAVE_AS_TLS
29885 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
29887 if (TREE_PUBLIC (decl))
29888 return tls_data_section;
29889 else if (bss_initializer_p (decl))
29891 /* Convert to COMMON to emit in BSS. */
29892 DECL_COMMON (decl) = 1;
29893 return tls_comm_section;
29895 else
29896 return tls_private_data_section;
29898 else
29899 #endif
29900 if (TREE_PUBLIC (decl))
29901 return data_section;
29902 else
29903 return private_data_section;
29907 static void
29908 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
29910 const char *name;
29912 /* Use select_section for private data and uninitialized data with
29913 alignment <= BIGGEST_ALIGNMENT. */
29914 if (!TREE_PUBLIC (decl)
29915 || DECL_COMMON (decl)
29916 || (DECL_INITIAL (decl) == NULL_TREE
29917 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
29918 || DECL_INITIAL (decl) == error_mark_node
29919 || (flag_zero_initialized_in_bss
29920 && initializer_zerop (DECL_INITIAL (decl))))
29921 return;
29923 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
29924 name = (*targetm.strip_name_encoding) (name);
29925 set_decl_section_name (decl, name);
29928 /* Select section for constant in constant pool.
29930 On RS/6000, all constants are in the private read-only data area.
29931 However, if this is being placed in the TOC it must be output as a
29932 toc entry. */
29934 static section *
29935 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
29936 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
29938 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
29939 return toc_section;
29940 else
29941 return read_only_private_data_section;
29944 /* Remove any trailing [DS] or the like from the symbol name. */
29946 static const char *
29947 rs6000_xcoff_strip_name_encoding (const char *name)
29949 size_t len;
29950 if (*name == '*')
29951 name++;
29952 len = strlen (name);
29953 if (name[len - 1] == ']')
29954 return ggc_alloc_string (name, len - 4);
29955 else
29956 return name;
29959 /* Section attributes. AIX is always PIC. */
29961 static unsigned int
29962 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
29964 unsigned int align;
29965 unsigned int flags = default_section_type_flags (decl, name, reloc);
29967 /* Align to at least UNIT size. */
29968 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
29969 align = MIN_UNITS_PER_WORD;
29970 else
29971 /* Increase alignment of large objects if not already stricter. */
29972 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
29973 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
29974 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
29976 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
29979 /* Output at beginning of assembler file.
29981 Initialize the section names for the RS/6000 at this point.
29983 Specify filename, including full path, to assembler.
29985 We want to go into the TOC section so at least one .toc will be emitted.
29986 Also, in order to output proper .bs/.es pairs, we need at least one static
29987 [RW] section emitted.
29989 Finally, declare mcount when profiling to make the assembler happy. */
29991 static void
29992 rs6000_xcoff_file_start (void)
29994 rs6000_gen_section_name (&xcoff_bss_section_name,
29995 main_input_filename, ".bss_");
29996 rs6000_gen_section_name (&xcoff_private_data_section_name,
29997 main_input_filename, ".rw_");
29998 rs6000_gen_section_name (&xcoff_read_only_section_name,
29999 main_input_filename, ".ro_");
30000 rs6000_gen_section_name (&xcoff_tls_data_section_name,
30001 main_input_filename, ".tls_");
30002 rs6000_gen_section_name (&xcoff_tbss_section_name,
30003 main_input_filename, ".tbss_[UL]");
30005 fputs ("\t.file\t", asm_out_file);
30006 output_quoted_string (asm_out_file, main_input_filename);
30007 fputc ('\n', asm_out_file);
30008 if (write_symbols != NO_DEBUG)
30009 switch_to_section (private_data_section);
30010 switch_to_section (text_section);
30011 if (profile_flag)
30012 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
30013 rs6000_file_start ();
30016 /* Output at end of assembler file.
30017 On the RS/6000, referencing data should automatically pull in text. */
30019 static void
30020 rs6000_xcoff_file_end (void)
30022 switch_to_section (text_section);
30023 fputs ("_section_.text:\n", asm_out_file);
30024 switch_to_section (data_section);
30025 fputs (TARGET_32BIT
30026 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
30027 asm_out_file);
30030 struct declare_alias_data
30032 FILE *file;
30033 bool function_descriptor;
30036 /* Declare alias N. A helper function for for_node_and_aliases. */
30038 static bool
30039 rs6000_declare_alias (struct symtab_node *n, void *d)
30041 struct declare_alias_data *data = (struct declare_alias_data *)d;
30042 /* Main symbol is output specially, because varasm machinery does part of
30043 the job for us - we do not need to declare .globl/lglobs and such. */
30044 if (!n->alias || n->weakref)
30045 return false;
30047 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
30048 return false;
30050 /* Prevent assemble_alias from trying to use .set pseudo operation
30051 that does not behave as expected by the middle-end. */
30052 TREE_ASM_WRITTEN (n->decl) = true;
30054 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
30055 char *buffer = (char *) alloca (strlen (name) + 2);
30056 char *p;
30057 int dollar_inside = 0;
30059 strcpy (buffer, name);
30060 p = strchr (buffer, '$');
30061 while (p) {
30062 *p = '_';
30063 dollar_inside++;
30064 p = strchr (p + 1, '$');
30066 if (TREE_PUBLIC (n->decl))
30068 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
30070 if (dollar_inside) {
30071 if (data->function_descriptor)
30072 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
30073 else
30074 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
30076 if (data->function_descriptor)
30077 fputs ("\t.globl .", data->file);
30078 else
30079 fputs ("\t.globl ", data->file);
30080 RS6000_OUTPUT_BASENAME (data->file, buffer);
30081 putc ('\n', data->file);
30083 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
30084 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
30086 else
30088 if (dollar_inside)
30090 if (data->function_descriptor)
30091 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
30092 else
30093 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
30095 if (data->function_descriptor)
30096 fputs ("\t.lglobl .", data->file);
30097 else
30098 fputs ("\t.lglobl ", data->file);
30099 RS6000_OUTPUT_BASENAME (data->file, buffer);
30100 putc ('\n', data->file);
30102 if (data->function_descriptor)
30103 fputs (".", data->file);
30104 RS6000_OUTPUT_BASENAME (data->file, buffer);
30105 fputs (":\n", data->file);
30106 return false;
30109 /* This macro produces the initial definition of a function name.
30110 On the RS/6000, we need to place an extra '.' in the function name and
30111 output the function descriptor.
30112 Dollar signs are converted to underscores.
30114 The csect for the function will have already been created when
30115 text_section was selected. We do have to go back to that csect, however.
30117 The third and fourth parameters to the .function pseudo-op (16 and 044)
30118 are placeholders which no longer have any use.
30120 Because AIX assembler's .set command has unexpected semantics, we output
30121 all aliases as alternative labels in front of the definition. */
30123 void
30124 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
30126 char *buffer = (char *) alloca (strlen (name) + 1);
30127 char *p;
30128 int dollar_inside = 0;
30129 struct declare_alias_data data = {file, false};
30131 strcpy (buffer, name);
30132 p = strchr (buffer, '$');
30133 while (p) {
30134 *p = '_';
30135 dollar_inside++;
30136 p = strchr (p + 1, '$');
30138 if (TREE_PUBLIC (decl))
30140 if (!RS6000_WEAK || !DECL_WEAK (decl))
30142 if (dollar_inside) {
30143 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
30144 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
30146 fputs ("\t.globl .", file);
30147 RS6000_OUTPUT_BASENAME (file, buffer);
30148 putc ('\n', file);
30151 else
30153 if (dollar_inside) {
30154 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
30155 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
30157 fputs ("\t.lglobl .", file);
30158 RS6000_OUTPUT_BASENAME (file, buffer);
30159 putc ('\n', file);
30161 fputs ("\t.csect ", file);
30162 RS6000_OUTPUT_BASENAME (file, buffer);
30163 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
30164 RS6000_OUTPUT_BASENAME (file, buffer);
30165 fputs (":\n", file);
30166 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
30167 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
30168 RS6000_OUTPUT_BASENAME (file, buffer);
30169 fputs (", TOC[tc0], 0\n", file);
30170 in_section = NULL;
30171 switch_to_section (function_section (decl));
30172 putc ('.', file);
30173 RS6000_OUTPUT_BASENAME (file, buffer);
30174 fputs (":\n", file);
30175 data.function_descriptor = true;
30176 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
30177 if (write_symbols != NO_DEBUG && !DECL_IGNORED_P (decl))
30178 xcoffout_declare_function (file, decl, buffer);
30179 return;
30182 /* This macro produces the initial definition of a object (variable) name.
30183 Because AIX assembler's .set command has unexpected semantics, we output
30184 all aliases as alternative labels in front of the definition. */
30186 void
30187 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
30189 struct declare_alias_data data = {file, false};
30190 RS6000_OUTPUT_BASENAME (file, name);
30191 fputs (":\n", file);
30192 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
30195 #ifdef HAVE_AS_TLS
30196 static void
30197 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
30199 rtx symbol;
30200 int flags;
30202 default_encode_section_info (decl, rtl, first);
30204 /* Careful not to prod global register variables. */
30205 if (!MEM_P (rtl))
30206 return;
30207 symbol = XEXP (rtl, 0);
30208 if (GET_CODE (symbol) != SYMBOL_REF)
30209 return;
30211 flags = SYMBOL_REF_FLAGS (symbol);
30213 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
30214 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
30216 SYMBOL_REF_FLAGS (symbol) = flags;
30218 #endif /* HAVE_AS_TLS */
30219 #endif /* TARGET_XCOFF */
30221 /* Compute a (partial) cost for rtx X. Return true if the complete
30222 cost has been computed, and false if subexpressions should be
30223 scanned. In either case, *TOTAL contains the cost result. */
30225 static bool
30226 rs6000_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
30227 int *total, bool speed)
30229 machine_mode mode = GET_MODE (x);
30231 switch (code)
30233 /* On the RS/6000, if it is valid in the insn, it is free. */
30234 case CONST_INT:
30235 if (((outer_code == SET
30236 || outer_code == PLUS
30237 || outer_code == MINUS)
30238 && (satisfies_constraint_I (x)
30239 || satisfies_constraint_L (x)))
30240 || (outer_code == AND
30241 && (satisfies_constraint_K (x)
30242 || (mode == SImode
30243 ? satisfies_constraint_L (x)
30244 : satisfies_constraint_J (x))
30245 || mask_operand (x, mode)
30246 || (mode == DImode
30247 && mask64_operand (x, DImode))))
30248 || ((outer_code == IOR || outer_code == XOR)
30249 && (satisfies_constraint_K (x)
30250 || (mode == SImode
30251 ? satisfies_constraint_L (x)
30252 : satisfies_constraint_J (x))))
30253 || outer_code == ASHIFT
30254 || outer_code == ASHIFTRT
30255 || outer_code == LSHIFTRT
30256 || outer_code == ROTATE
30257 || outer_code == ROTATERT
30258 || outer_code == ZERO_EXTRACT
30259 || (outer_code == MULT
30260 && satisfies_constraint_I (x))
30261 || ((outer_code == DIV || outer_code == UDIV
30262 || outer_code == MOD || outer_code == UMOD)
30263 && exact_log2 (INTVAL (x)) >= 0)
30264 || (outer_code == COMPARE
30265 && (satisfies_constraint_I (x)
30266 || satisfies_constraint_K (x)))
30267 || ((outer_code == EQ || outer_code == NE)
30268 && (satisfies_constraint_I (x)
30269 || satisfies_constraint_K (x)
30270 || (mode == SImode
30271 ? satisfies_constraint_L (x)
30272 : satisfies_constraint_J (x))))
30273 || (outer_code == GTU
30274 && satisfies_constraint_I (x))
30275 || (outer_code == LTU
30276 && satisfies_constraint_P (x)))
30278 *total = 0;
30279 return true;
30281 else if ((outer_code == PLUS
30282 && reg_or_add_cint_operand (x, VOIDmode))
30283 || (outer_code == MINUS
30284 && reg_or_sub_cint_operand (x, VOIDmode))
30285 || ((outer_code == SET
30286 || outer_code == IOR
30287 || outer_code == XOR)
30288 && (INTVAL (x)
30289 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
30291 *total = COSTS_N_INSNS (1);
30292 return true;
30294 /* FALLTHRU */
30296 case CONST_DOUBLE:
30297 case CONST_WIDE_INT:
30298 case CONST:
30299 case HIGH:
30300 case SYMBOL_REF:
30301 case MEM:
30302 /* When optimizing for size, MEM should be slightly more expensive
30303 than generating address, e.g., (plus (reg) (const)).
30304 L1 cache latency is about two instructions. */
30305 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
30306 return true;
30308 case LABEL_REF:
30309 *total = 0;
30310 return true;
30312 case PLUS:
30313 case MINUS:
30314 if (FLOAT_MODE_P (mode))
30315 *total = rs6000_cost->fp;
30316 else
30317 *total = COSTS_N_INSNS (1);
30318 return false;
30320 case MULT:
30321 if (GET_CODE (XEXP (x, 1)) == CONST_INT
30322 && satisfies_constraint_I (XEXP (x, 1)))
30324 if (INTVAL (XEXP (x, 1)) >= -256
30325 && INTVAL (XEXP (x, 1)) <= 255)
30326 *total = rs6000_cost->mulsi_const9;
30327 else
30328 *total = rs6000_cost->mulsi_const;
30330 else if (mode == SFmode)
30331 *total = rs6000_cost->fp;
30332 else if (FLOAT_MODE_P (mode))
30333 *total = rs6000_cost->dmul;
30334 else if (mode == DImode)
30335 *total = rs6000_cost->muldi;
30336 else
30337 *total = rs6000_cost->mulsi;
30338 return false;
30340 case FMA:
30341 if (mode == SFmode)
30342 *total = rs6000_cost->fp;
30343 else
30344 *total = rs6000_cost->dmul;
30345 break;
30347 case DIV:
30348 case MOD:
30349 if (FLOAT_MODE_P (mode))
30351 *total = mode == DFmode ? rs6000_cost->ddiv
30352 : rs6000_cost->sdiv;
30353 return false;
30355 /* FALLTHRU */
30357 case UDIV:
30358 case UMOD:
30359 if (GET_CODE (XEXP (x, 1)) == CONST_INT
30360 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
30362 if (code == DIV || code == MOD)
30363 /* Shift, addze */
30364 *total = COSTS_N_INSNS (2);
30365 else
30366 /* Shift */
30367 *total = COSTS_N_INSNS (1);
30369 else
30371 if (GET_MODE (XEXP (x, 1)) == DImode)
30372 *total = rs6000_cost->divdi;
30373 else
30374 *total = rs6000_cost->divsi;
30376 /* Add in shift and subtract for MOD. */
30377 if (code == MOD || code == UMOD)
30378 *total += COSTS_N_INSNS (2);
30379 return false;
30381 case CTZ:
30382 case FFS:
30383 *total = COSTS_N_INSNS (4);
30384 return false;
30386 case POPCOUNT:
30387 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
30388 return false;
30390 case PARITY:
30391 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
30392 return false;
30394 case NOT:
30395 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
30397 *total = 0;
30398 return false;
30400 /* FALLTHRU */
30402 case AND:
30403 case CLZ:
30404 case IOR:
30405 case XOR:
30406 case ZERO_EXTRACT:
30407 *total = COSTS_N_INSNS (1);
30408 return false;
30410 case ASHIFT:
30411 case ASHIFTRT:
30412 case LSHIFTRT:
30413 case ROTATE:
30414 case ROTATERT:
30415 /* Handle mul_highpart. */
30416 if (outer_code == TRUNCATE
30417 && GET_CODE (XEXP (x, 0)) == MULT)
30419 if (mode == DImode)
30420 *total = rs6000_cost->muldi;
30421 else
30422 *total = rs6000_cost->mulsi;
30423 return true;
30425 else if (outer_code == AND)
30426 *total = 0;
30427 else
30428 *total = COSTS_N_INSNS (1);
30429 return false;
30431 case SIGN_EXTEND:
30432 case ZERO_EXTEND:
30433 if (GET_CODE (XEXP (x, 0)) == MEM)
30434 *total = 0;
30435 else
30436 *total = COSTS_N_INSNS (1);
30437 return false;
30439 case COMPARE:
30440 case NEG:
30441 case ABS:
30442 if (!FLOAT_MODE_P (mode))
30444 *total = COSTS_N_INSNS (1);
30445 return false;
30447 /* FALLTHRU */
30449 case FLOAT:
30450 case UNSIGNED_FLOAT:
30451 case FIX:
30452 case UNSIGNED_FIX:
30453 case FLOAT_TRUNCATE:
30454 *total = rs6000_cost->fp;
30455 return false;
30457 case FLOAT_EXTEND:
30458 if (mode == DFmode)
30459 *total = 0;
30460 else
30461 *total = rs6000_cost->fp;
30462 return false;
30464 case UNSPEC:
30465 switch (XINT (x, 1))
30467 case UNSPEC_FRSP:
30468 *total = rs6000_cost->fp;
30469 return true;
30471 default:
30472 break;
30474 break;
30476 case CALL:
30477 case IF_THEN_ELSE:
30478 if (!speed)
30480 *total = COSTS_N_INSNS (1);
30481 return true;
30483 else if (FLOAT_MODE_P (mode)
30484 && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS)
30486 *total = rs6000_cost->fp;
30487 return false;
30489 break;
30491 case NE:
30492 case EQ:
30493 case GTU:
30494 case LTU:
30495 /* Carry bit requires mode == Pmode.
30496 NEG or PLUS already counted so only add one. */
30497 if (mode == Pmode
30498 && (outer_code == NEG || outer_code == PLUS))
30500 *total = COSTS_N_INSNS (1);
30501 return true;
30503 if (outer_code == SET)
30505 if (XEXP (x, 1) == const0_rtx)
30507 if (TARGET_ISEL && !TARGET_MFCRF)
30508 *total = COSTS_N_INSNS (8);
30509 else
30510 *total = COSTS_N_INSNS (2);
30511 return true;
30513 else if (mode == Pmode)
30515 *total = COSTS_N_INSNS (3);
30516 return false;
30519 /* FALLTHRU */
30521 case GT:
30522 case LT:
30523 case UNORDERED:
30524 if (outer_code == SET && (XEXP (x, 1) == const0_rtx))
30526 if (TARGET_ISEL && !TARGET_MFCRF)
30527 *total = COSTS_N_INSNS (8);
30528 else
30529 *total = COSTS_N_INSNS (2);
30530 return true;
30532 /* CC COMPARE. */
30533 if (outer_code == COMPARE)
30535 *total = 0;
30536 return true;
30538 break;
30540 default:
30541 break;
30544 return false;
30547 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
30549 static bool
30550 rs6000_debug_rtx_costs (rtx x, int code, int outer_code, int opno, int *total,
30551 bool speed)
30553 bool ret = rs6000_rtx_costs (x, code, outer_code, opno, total, speed);
30555 fprintf (stderr,
30556 "\nrs6000_rtx_costs, return = %s, code = %s, outer_code = %s, "
30557 "opno = %d, total = %d, speed = %s, x:\n",
30558 ret ? "complete" : "scan inner",
30559 GET_RTX_NAME (code),
30560 GET_RTX_NAME (outer_code),
30561 opno,
30562 *total,
30563 speed ? "true" : "false");
30565 debug_rtx (x);
30567 return ret;
30570 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
30572 static int
30573 rs6000_debug_address_cost (rtx x, machine_mode mode,
30574 addr_space_t as, bool speed)
30576 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
30578 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
30579 ret, speed ? "true" : "false");
30580 debug_rtx (x);
30582 return ret;
30586 /* A C expression returning the cost of moving data from a register of class
30587 CLASS1 to one of CLASS2. */
30589 static int
30590 rs6000_register_move_cost (machine_mode mode,
30591 reg_class_t from, reg_class_t to)
30593 int ret;
30595 if (TARGET_DEBUG_COST)
30596 dbg_cost_ctrl++;
30598 /* Moves from/to GENERAL_REGS. */
30599 if (reg_classes_intersect_p (to, GENERAL_REGS)
30600 || reg_classes_intersect_p (from, GENERAL_REGS))
30602 reg_class_t rclass = from;
30604 if (! reg_classes_intersect_p (to, GENERAL_REGS))
30605 rclass = to;
30607 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
30608 ret = (rs6000_memory_move_cost (mode, rclass, false)
30609 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
30611 /* It's more expensive to move CR_REGS than CR0_REGS because of the
30612 shift. */
30613 else if (rclass == CR_REGS)
30614 ret = 4;
30616 /* For those processors that have slow LR/CTR moves, make them more
30617 expensive than memory in order to bias spills to memory .*/
30618 else if ((rs6000_cpu == PROCESSOR_POWER6
30619 || rs6000_cpu == PROCESSOR_POWER7
30620 || rs6000_cpu == PROCESSOR_POWER8)
30621 && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
30622 ret = 6 * hard_regno_nregs[0][mode];
30624 else
30625 /* A move will cost one instruction per GPR moved. */
30626 ret = 2 * hard_regno_nregs[0][mode];
30629 /* If we have VSX, we can easily move between FPR or Altivec registers. */
30630 else if (VECTOR_MEM_VSX_P (mode)
30631 && reg_classes_intersect_p (to, VSX_REGS)
30632 && reg_classes_intersect_p (from, VSX_REGS))
30633 ret = 2 * hard_regno_nregs[32][mode];
30635 /* Moving between two similar registers is just one instruction. */
30636 else if (reg_classes_intersect_p (to, from))
30637 ret = (mode == TFmode || mode == TDmode) ? 4 : 2;
30639 /* Everything else has to go through GENERAL_REGS. */
30640 else
30641 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
30642 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
30644 if (TARGET_DEBUG_COST)
30646 if (dbg_cost_ctrl == 1)
30647 fprintf (stderr,
30648 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
30649 ret, GET_MODE_NAME (mode), reg_class_names[from],
30650 reg_class_names[to]);
30651 dbg_cost_ctrl--;
30654 return ret;
30657 /* A C expressions returning the cost of moving data of MODE from a register to
30658 or from memory. */
30660 static int
30661 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
30662 bool in ATTRIBUTE_UNUSED)
30664 int ret;
30666 if (TARGET_DEBUG_COST)
30667 dbg_cost_ctrl++;
30669 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
30670 ret = 4 * hard_regno_nregs[0][mode];
30671 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
30672 || reg_classes_intersect_p (rclass, VSX_REGS)))
30673 ret = 4 * hard_regno_nregs[32][mode];
30674 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
30675 ret = 4 * hard_regno_nregs[FIRST_ALTIVEC_REGNO][mode];
30676 else
30677 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
30679 if (TARGET_DEBUG_COST)
30681 if (dbg_cost_ctrl == 1)
30682 fprintf (stderr,
30683 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
30684 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
30685 dbg_cost_ctrl--;
30688 return ret;
30691 /* Returns a code for a target-specific builtin that implements
30692 reciprocal of the function, or NULL_TREE if not available. */
30694 static tree
30695 rs6000_builtin_reciprocal (unsigned int fn, bool md_fn,
30696 bool sqrt ATTRIBUTE_UNUSED)
30698 if (optimize_insn_for_size_p ())
30699 return NULL_TREE;
30701 if (md_fn)
30702 switch (fn)
30704 case VSX_BUILTIN_XVSQRTDP:
30705 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
30706 return NULL_TREE;
30708 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
30710 case VSX_BUILTIN_XVSQRTSP:
30711 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
30712 return NULL_TREE;
30714 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
30716 default:
30717 return NULL_TREE;
30720 else
30721 switch (fn)
30723 case BUILT_IN_SQRT:
30724 if (!RS6000_RECIP_AUTO_RSQRTE_P (DFmode))
30725 return NULL_TREE;
30727 return rs6000_builtin_decls[RS6000_BUILTIN_RSQRT];
30729 case BUILT_IN_SQRTF:
30730 if (!RS6000_RECIP_AUTO_RSQRTE_P (SFmode))
30731 return NULL_TREE;
30733 return rs6000_builtin_decls[RS6000_BUILTIN_RSQRTF];
30735 default:
30736 return NULL_TREE;
30740 /* Load up a constant. If the mode is a vector mode, splat the value across
30741 all of the vector elements. */
30743 static rtx
30744 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
30746 rtx reg;
30748 if (mode == SFmode || mode == DFmode)
30750 rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, mode);
30751 reg = force_reg (mode, d);
30753 else if (mode == V4SFmode)
30755 rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, SFmode);
30756 rtvec v = gen_rtvec (4, d, d, d, d);
30757 reg = gen_reg_rtx (mode);
30758 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
30760 else if (mode == V2DFmode)
30762 rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, DFmode);
30763 rtvec v = gen_rtvec (2, d, d);
30764 reg = gen_reg_rtx (mode);
30765 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
30767 else
30768 gcc_unreachable ();
30770 return reg;
30773 /* Generate an FMA instruction. */
30775 static void
30776 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
30778 machine_mode mode = GET_MODE (target);
30779 rtx dst;
30781 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
30782 gcc_assert (dst != NULL);
30784 if (dst != target)
30785 emit_move_insn (target, dst);
30788 /* Generate a FMSUB instruction: dst = fma(m1, m2, -a). */
30790 static void
30791 rs6000_emit_msub (rtx target, rtx m1, rtx m2, rtx a)
30793 machine_mode mode = GET_MODE (target);
30794 rtx dst;
30796 /* Altivec does not support fms directly;
30797 generate in terms of fma in that case. */
30798 if (optab_handler (fms_optab, mode) != CODE_FOR_nothing)
30799 dst = expand_ternary_op (mode, fms_optab, m1, m2, a, target, 0);
30800 else
30802 a = expand_unop (mode, neg_optab, a, NULL_RTX, 0);
30803 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
30805 gcc_assert (dst != NULL);
30807 if (dst != target)
30808 emit_move_insn (target, dst);
30811 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
30813 static void
30814 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
30816 machine_mode mode = GET_MODE (dst);
30817 rtx r;
30819 /* This is a tad more complicated, since the fnma_optab is for
30820 a different expression: fma(-m1, m2, a), which is the same
30821 thing except in the case of signed zeros.
30823 Fortunately we know that if FMA is supported that FNMSUB is
30824 also supported in the ISA. Just expand it directly. */
30826 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
30828 r = gen_rtx_NEG (mode, a);
30829 r = gen_rtx_FMA (mode, m1, m2, r);
30830 r = gen_rtx_NEG (mode, r);
30831 emit_insn (gen_rtx_SET (VOIDmode, dst, r));
30834 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
30835 add a reg_note saying that this was a division. Support both scalar and
30836 vector divide. Assumes no trapping math and finite arguments. */
30838 void
30839 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
30841 machine_mode mode = GET_MODE (dst);
30842 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
30843 int i;
30845 /* Low precision estimates guarantee 5 bits of accuracy. High
30846 precision estimates guarantee 14 bits of accuracy. SFmode
30847 requires 23 bits of accuracy. DFmode requires 52 bits of
30848 accuracy. Each pass at least doubles the accuracy, leading
30849 to the following. */
30850 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
30851 if (mode == DFmode || mode == V2DFmode)
30852 passes++;
30854 enum insn_code code = optab_handler (smul_optab, mode);
30855 insn_gen_fn gen_mul = GEN_FCN (code);
30857 gcc_assert (code != CODE_FOR_nothing);
30859 one = rs6000_load_constant_and_splat (mode, dconst1);
30861 /* x0 = 1./d estimate */
30862 x0 = gen_reg_rtx (mode);
30863 emit_insn (gen_rtx_SET (VOIDmode, x0,
30864 gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
30865 UNSPEC_FRES)));
30867 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
30868 if (passes > 1) {
30870 /* e0 = 1. - d * x0 */
30871 e0 = gen_reg_rtx (mode);
30872 rs6000_emit_nmsub (e0, d, x0, one);
30874 /* x1 = x0 + e0 * x0 */
30875 x1 = gen_reg_rtx (mode);
30876 rs6000_emit_madd (x1, e0, x0, x0);
30878 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
30879 ++i, xprev = xnext, eprev = enext) {
30881 /* enext = eprev * eprev */
30882 enext = gen_reg_rtx (mode);
30883 emit_insn (gen_mul (enext, eprev, eprev));
30885 /* xnext = xprev + enext * xprev */
30886 xnext = gen_reg_rtx (mode);
30887 rs6000_emit_madd (xnext, enext, xprev, xprev);
30890 } else
30891 xprev = x0;
30893 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
30895 /* u = n * xprev */
30896 u = gen_reg_rtx (mode);
30897 emit_insn (gen_mul (u, n, xprev));
30899 /* v = n - (d * u) */
30900 v = gen_reg_rtx (mode);
30901 rs6000_emit_nmsub (v, d, u, n);
30903 /* dst = (v * xprev) + u */
30904 rs6000_emit_madd (dst, v, xprev, u);
30906 if (note_p)
30907 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
30910 /* Newton-Raphson approximation of single/double-precision floating point
30911 rsqrt. Assumes no trapping math and finite arguments. */
30913 void
30914 rs6000_emit_swrsqrt (rtx dst, rtx src)
30916 machine_mode mode = GET_MODE (src);
30917 rtx x0 = gen_reg_rtx (mode);
30918 rtx y = gen_reg_rtx (mode);
30920 /* Low precision estimates guarantee 5 bits of accuracy. High
30921 precision estimates guarantee 14 bits of accuracy. SFmode
30922 requires 23 bits of accuracy. DFmode requires 52 bits of
30923 accuracy. Each pass at least doubles the accuracy, leading
30924 to the following. */
30925 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
30926 if (mode == DFmode || mode == V2DFmode)
30927 passes++;
30929 REAL_VALUE_TYPE dconst3_2;
30930 int i;
30931 rtx halfthree;
30932 enum insn_code code = optab_handler (smul_optab, mode);
30933 insn_gen_fn gen_mul = GEN_FCN (code);
30935 gcc_assert (code != CODE_FOR_nothing);
30937 /* Load up the constant 1.5 either as a scalar, or as a vector. */
30938 real_from_integer (&dconst3_2, VOIDmode, 3, SIGNED);
30939 SET_REAL_EXP (&dconst3_2, REAL_EXP (&dconst3_2) - 1);
30941 halfthree = rs6000_load_constant_and_splat (mode, dconst3_2);
30943 /* x0 = rsqrt estimate */
30944 emit_insn (gen_rtx_SET (VOIDmode, x0,
30945 gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
30946 UNSPEC_RSQRT)));
30948 /* y = 0.5 * src = 1.5 * src - src -> fewer constants */
30949 rs6000_emit_msub (y, src, halfthree, src);
30951 for (i = 0; i < passes; i++)
30953 rtx x1 = gen_reg_rtx (mode);
30954 rtx u = gen_reg_rtx (mode);
30955 rtx v = gen_reg_rtx (mode);
30957 /* x1 = x0 * (1.5 - y * (x0 * x0)) */
30958 emit_insn (gen_mul (u, x0, x0));
30959 rs6000_emit_nmsub (v, y, u, halfthree);
30960 emit_insn (gen_mul (x1, x0, v));
30961 x0 = x1;
30964 emit_move_insn (dst, x0);
30965 return;
30968 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
30969 (Power7) targets. DST is the target, and SRC is the argument operand. */
30971 void
30972 rs6000_emit_popcount (rtx dst, rtx src)
30974 machine_mode mode = GET_MODE (dst);
30975 rtx tmp1, tmp2;
30977 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
30978 if (TARGET_POPCNTD)
30980 if (mode == SImode)
30981 emit_insn (gen_popcntdsi2 (dst, src));
30982 else
30983 emit_insn (gen_popcntddi2 (dst, src));
30984 return;
30987 tmp1 = gen_reg_rtx (mode);
30989 if (mode == SImode)
30991 emit_insn (gen_popcntbsi2 (tmp1, src));
30992 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
30993 NULL_RTX, 0);
30994 tmp2 = force_reg (SImode, tmp2);
30995 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
30997 else
30999 emit_insn (gen_popcntbdi2 (tmp1, src));
31000 tmp2 = expand_mult (DImode, tmp1,
31001 GEN_INT ((HOST_WIDE_INT)
31002 0x01010101 << 32 | 0x01010101),
31003 NULL_RTX, 0);
31004 tmp2 = force_reg (DImode, tmp2);
31005 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
31010 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
31011 target, and SRC is the argument operand. */
31013 void
31014 rs6000_emit_parity (rtx dst, rtx src)
31016 machine_mode mode = GET_MODE (dst);
31017 rtx tmp;
31019 tmp = gen_reg_rtx (mode);
31021 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
31022 if (TARGET_CMPB)
31024 if (mode == SImode)
31026 emit_insn (gen_popcntbsi2 (tmp, src));
31027 emit_insn (gen_paritysi2_cmpb (dst, tmp));
31029 else
31031 emit_insn (gen_popcntbdi2 (tmp, src));
31032 emit_insn (gen_paritydi2_cmpb (dst, tmp));
31034 return;
31037 if (mode == SImode)
31039 /* Is mult+shift >= shift+xor+shift+xor? */
31040 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
31042 rtx tmp1, tmp2, tmp3, tmp4;
31044 tmp1 = gen_reg_rtx (SImode);
31045 emit_insn (gen_popcntbsi2 (tmp1, src));
31047 tmp2 = gen_reg_rtx (SImode);
31048 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
31049 tmp3 = gen_reg_rtx (SImode);
31050 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
31052 tmp4 = gen_reg_rtx (SImode);
31053 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
31054 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
31056 else
31057 rs6000_emit_popcount (tmp, src);
31058 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
31060 else
31062 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
31063 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
31065 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
31067 tmp1 = gen_reg_rtx (DImode);
31068 emit_insn (gen_popcntbdi2 (tmp1, src));
31070 tmp2 = gen_reg_rtx (DImode);
31071 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
31072 tmp3 = gen_reg_rtx (DImode);
31073 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
31075 tmp4 = gen_reg_rtx (DImode);
31076 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
31077 tmp5 = gen_reg_rtx (DImode);
31078 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
31080 tmp6 = gen_reg_rtx (DImode);
31081 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
31082 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
31084 else
31085 rs6000_emit_popcount (tmp, src);
31086 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
31090 /* Expand an Altivec constant permutation for little endian mode.
31091 There are two issues: First, the two input operands must be
31092 swapped so that together they form a double-wide array in LE
31093 order. Second, the vperm instruction has surprising behavior
31094 in LE mode: it interprets the elements of the source vectors
31095 in BE mode ("left to right") and interprets the elements of
31096 the destination vector in LE mode ("right to left"). To
31097 correct for this, we must subtract each element of the permute
31098 control vector from 31.
31100 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
31101 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
31102 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
31103 serve as the permute control vector. Then, in BE mode,
31105 vperm 9,10,11,12
31107 places the desired result in vr9. However, in LE mode the
31108 vector contents will be
31110 vr10 = 00000003 00000002 00000001 00000000
31111 vr11 = 00000007 00000006 00000005 00000004
31113 The result of the vperm using the same permute control vector is
31115 vr9 = 05000000 07000000 01000000 03000000
31117 That is, the leftmost 4 bytes of vr10 are interpreted as the
31118 source for the rightmost 4 bytes of vr9, and so on.
31120 If we change the permute control vector to
31122 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
31124 and issue
31126 vperm 9,11,10,12
31128 we get the desired
31130 vr9 = 00000006 00000004 00000002 00000000. */
31132 void
31133 altivec_expand_vec_perm_const_le (rtx operands[4])
31135 unsigned int i;
31136 rtx perm[16];
31137 rtx constv, unspec;
31138 rtx target = operands[0];
31139 rtx op0 = operands[1];
31140 rtx op1 = operands[2];
31141 rtx sel = operands[3];
31143 /* Unpack and adjust the constant selector. */
31144 for (i = 0; i < 16; ++i)
31146 rtx e = XVECEXP (sel, 0, i);
31147 unsigned int elt = 31 - (INTVAL (e) & 31);
31148 perm[i] = GEN_INT (elt);
31151 /* Expand to a permute, swapping the inputs and using the
31152 adjusted selector. */
31153 if (!REG_P (op0))
31154 op0 = force_reg (V16QImode, op0);
31155 if (!REG_P (op1))
31156 op1 = force_reg (V16QImode, op1);
31158 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
31159 constv = force_reg (V16QImode, constv);
31160 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
31161 UNSPEC_VPERM);
31162 if (!REG_P (target))
31164 rtx tmp = gen_reg_rtx (V16QImode);
31165 emit_move_insn (tmp, unspec);
31166 unspec = tmp;
31169 emit_move_insn (target, unspec);
31172 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
31173 permute control vector. But here it's not a constant, so we must
31174 generate a vector NAND or NOR to do the adjustment. */
31176 void
31177 altivec_expand_vec_perm_le (rtx operands[4])
31179 rtx notx, iorx, unspec;
31180 rtx target = operands[0];
31181 rtx op0 = operands[1];
31182 rtx op1 = operands[2];
31183 rtx sel = operands[3];
31184 rtx tmp = target;
31185 rtx norreg = gen_reg_rtx (V16QImode);
31186 machine_mode mode = GET_MODE (target);
31188 /* Get everything in regs so the pattern matches. */
31189 if (!REG_P (op0))
31190 op0 = force_reg (mode, op0);
31191 if (!REG_P (op1))
31192 op1 = force_reg (mode, op1);
31193 if (!REG_P (sel))
31194 sel = force_reg (V16QImode, sel);
31195 if (!REG_P (target))
31196 tmp = gen_reg_rtx (mode);
31198 /* Invert the selector with a VNAND if available, else a VNOR.
31199 The VNAND is preferred for future fusion opportunities. */
31200 notx = gen_rtx_NOT (V16QImode, sel);
31201 iorx = (TARGET_P8_VECTOR
31202 ? gen_rtx_IOR (V16QImode, notx, notx)
31203 : gen_rtx_AND (V16QImode, notx, notx));
31204 emit_insn (gen_rtx_SET (VOIDmode, norreg, iorx));
31206 /* Permute with operands reversed and adjusted selector. */
31207 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
31208 UNSPEC_VPERM);
31210 /* Copy into target, possibly by way of a register. */
31211 if (!REG_P (target))
31213 emit_move_insn (tmp, unspec);
31214 unspec = tmp;
31217 emit_move_insn (target, unspec);
31220 /* Expand an Altivec constant permutation. Return true if we match
31221 an efficient implementation; false to fall back to VPERM. */
31223 bool
31224 altivec_expand_vec_perm_const (rtx operands[4])
31226 struct altivec_perm_insn {
31227 HOST_WIDE_INT mask;
31228 enum insn_code impl;
31229 unsigned char perm[16];
31231 static const struct altivec_perm_insn patterns[] = {
31232 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
31233 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
31234 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
31235 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
31236 { OPTION_MASK_ALTIVEC,
31237 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
31238 : CODE_FOR_altivec_vmrglb_direct),
31239 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
31240 { OPTION_MASK_ALTIVEC,
31241 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
31242 : CODE_FOR_altivec_vmrglh_direct),
31243 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
31244 { OPTION_MASK_ALTIVEC,
31245 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
31246 : CODE_FOR_altivec_vmrglw_direct),
31247 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
31248 { OPTION_MASK_ALTIVEC,
31249 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
31250 : CODE_FOR_altivec_vmrghb_direct),
31251 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
31252 { OPTION_MASK_ALTIVEC,
31253 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
31254 : CODE_FOR_altivec_vmrghh_direct),
31255 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
31256 { OPTION_MASK_ALTIVEC,
31257 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
31258 : CODE_FOR_altivec_vmrghw_direct),
31259 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
31260 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
31261 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
31262 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
31263 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
31266 unsigned int i, j, elt, which;
31267 unsigned char perm[16];
31268 rtx target, op0, op1, sel, x;
31269 bool one_vec;
31271 target = operands[0];
31272 op0 = operands[1];
31273 op1 = operands[2];
31274 sel = operands[3];
31276 /* Unpack the constant selector. */
31277 for (i = which = 0; i < 16; ++i)
31279 rtx e = XVECEXP (sel, 0, i);
31280 elt = INTVAL (e) & 31;
31281 which |= (elt < 16 ? 1 : 2);
31282 perm[i] = elt;
31285 /* Simplify the constant selector based on operands. */
31286 switch (which)
31288 default:
31289 gcc_unreachable ();
31291 case 3:
31292 one_vec = false;
31293 if (!rtx_equal_p (op0, op1))
31294 break;
31295 /* FALLTHRU */
31297 case 2:
31298 for (i = 0; i < 16; ++i)
31299 perm[i] &= 15;
31300 op0 = op1;
31301 one_vec = true;
31302 break;
31304 case 1:
31305 op1 = op0;
31306 one_vec = true;
31307 break;
31310 /* Look for splat patterns. */
31311 if (one_vec)
31313 elt = perm[0];
31315 for (i = 0; i < 16; ++i)
31316 if (perm[i] != elt)
31317 break;
31318 if (i == 16)
31320 if (!BYTES_BIG_ENDIAN)
31321 elt = 15 - elt;
31322 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
31323 return true;
31326 if (elt % 2 == 0)
31328 for (i = 0; i < 16; i += 2)
31329 if (perm[i] != elt || perm[i + 1] != elt + 1)
31330 break;
31331 if (i == 16)
31333 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
31334 x = gen_reg_rtx (V8HImode);
31335 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
31336 GEN_INT (field)));
31337 emit_move_insn (target, gen_lowpart (V16QImode, x));
31338 return true;
31342 if (elt % 4 == 0)
31344 for (i = 0; i < 16; i += 4)
31345 if (perm[i] != elt
31346 || perm[i + 1] != elt + 1
31347 || perm[i + 2] != elt + 2
31348 || perm[i + 3] != elt + 3)
31349 break;
31350 if (i == 16)
31352 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
31353 x = gen_reg_rtx (V4SImode);
31354 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
31355 GEN_INT (field)));
31356 emit_move_insn (target, gen_lowpart (V16QImode, x));
31357 return true;
31362 /* Look for merge and pack patterns. */
31363 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
31365 bool swapped;
31367 if ((patterns[j].mask & rs6000_isa_flags) == 0)
31368 continue;
31370 elt = patterns[j].perm[0];
31371 if (perm[0] == elt)
31372 swapped = false;
31373 else if (perm[0] == elt + 16)
31374 swapped = true;
31375 else
31376 continue;
31377 for (i = 1; i < 16; ++i)
31379 elt = patterns[j].perm[i];
31380 if (swapped)
31381 elt = (elt >= 16 ? elt - 16 : elt + 16);
31382 else if (one_vec && elt >= 16)
31383 elt -= 16;
31384 if (perm[i] != elt)
31385 break;
31387 if (i == 16)
31389 enum insn_code icode = patterns[j].impl;
31390 machine_mode omode = insn_data[icode].operand[0].mode;
31391 machine_mode imode = insn_data[icode].operand[1].mode;
31393 /* For little-endian, don't use vpkuwum and vpkuhum if the
31394 underlying vector type is not V4SI and V8HI, respectively.
31395 For example, using vpkuwum with a V8HI picks up the even
31396 halfwords (BE numbering) when the even halfwords (LE
31397 numbering) are what we need. */
31398 if (!BYTES_BIG_ENDIAN
31399 && icode == CODE_FOR_altivec_vpkuwum_direct
31400 && ((GET_CODE (op0) == REG
31401 && GET_MODE (op0) != V4SImode)
31402 || (GET_CODE (op0) == SUBREG
31403 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
31404 continue;
31405 if (!BYTES_BIG_ENDIAN
31406 && icode == CODE_FOR_altivec_vpkuhum_direct
31407 && ((GET_CODE (op0) == REG
31408 && GET_MODE (op0) != V8HImode)
31409 || (GET_CODE (op0) == SUBREG
31410 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
31411 continue;
31413 /* For little-endian, the two input operands must be swapped
31414 (or swapped back) to ensure proper right-to-left numbering
31415 from 0 to 2N-1. */
31416 if (swapped ^ !BYTES_BIG_ENDIAN)
31417 std::swap (op0, op1);
31418 if (imode != V16QImode)
31420 op0 = gen_lowpart (imode, op0);
31421 op1 = gen_lowpart (imode, op1);
31423 if (omode == V16QImode)
31424 x = target;
31425 else
31426 x = gen_reg_rtx (omode);
31427 emit_insn (GEN_FCN (icode) (x, op0, op1));
31428 if (omode != V16QImode)
31429 emit_move_insn (target, gen_lowpart (V16QImode, x));
31430 return true;
31434 if (!BYTES_BIG_ENDIAN)
31436 altivec_expand_vec_perm_const_le (operands);
31437 return true;
31440 return false;
31443 /* Expand a Paired Single, VSX Permute Doubleword, or SPE constant permutation.
31444 Return true if we match an efficient implementation. */
31446 static bool
31447 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
31448 unsigned char perm0, unsigned char perm1)
31450 rtx x;
31452 /* If both selectors come from the same operand, fold to single op. */
31453 if ((perm0 & 2) == (perm1 & 2))
31455 if (perm0 & 2)
31456 op0 = op1;
31457 else
31458 op1 = op0;
31460 /* If both operands are equal, fold to simpler permutation. */
31461 if (rtx_equal_p (op0, op1))
31463 perm0 = perm0 & 1;
31464 perm1 = (perm1 & 1) + 2;
31466 /* If the first selector comes from the second operand, swap. */
31467 else if (perm0 & 2)
31469 if (perm1 & 2)
31470 return false;
31471 perm0 -= 2;
31472 perm1 += 2;
31473 std::swap (op0, op1);
31475 /* If the second selector does not come from the second operand, fail. */
31476 else if ((perm1 & 2) == 0)
31477 return false;
31479 /* Success! */
31480 if (target != NULL)
31482 machine_mode vmode, dmode;
31483 rtvec v;
31485 vmode = GET_MODE (target);
31486 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
31487 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4);
31488 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
31489 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
31490 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
31491 emit_insn (gen_rtx_SET (VOIDmode, target, x));
31493 return true;
31496 bool
31497 rs6000_expand_vec_perm_const (rtx operands[4])
31499 rtx target, op0, op1, sel;
31500 unsigned char perm0, perm1;
31502 target = operands[0];
31503 op0 = operands[1];
31504 op1 = operands[2];
31505 sel = operands[3];
31507 /* Unpack the constant selector. */
31508 perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3;
31509 perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3;
31511 return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1);
31514 /* Test whether a constant permutation is supported. */
31516 static bool
31517 rs6000_vectorize_vec_perm_const_ok (machine_mode vmode,
31518 const unsigned char *sel)
31520 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
31521 if (TARGET_ALTIVEC)
31522 return true;
31524 /* Check for ps_merge* or evmerge* insns. */
31525 if ((TARGET_PAIRED_FLOAT && vmode == V2SFmode)
31526 || (TARGET_SPE && vmode == V2SImode))
31528 rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
31529 rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
31530 return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]);
31533 return false;
31536 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */
31538 static void
31539 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
31540 machine_mode vmode, unsigned nelt, rtx perm[])
31542 machine_mode imode;
31543 rtx x;
31545 imode = vmode;
31546 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT)
31548 imode = GET_MODE_INNER (vmode);
31549 imode = mode_for_size (GET_MODE_BITSIZE (imode), MODE_INT, 0);
31550 imode = mode_for_vector (imode, nelt);
31553 x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm));
31554 x = expand_vec_perm (vmode, op0, op1, x, target);
31555 if (x != target)
31556 emit_move_insn (target, x);
31559 /* Expand an extract even operation. */
31561 void
31562 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
31564 machine_mode vmode = GET_MODE (target);
31565 unsigned i, nelt = GET_MODE_NUNITS (vmode);
31566 rtx perm[16];
31568 for (i = 0; i < nelt; i++)
31569 perm[i] = GEN_INT (i * 2);
31571 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
31574 /* Expand a vector interleave operation. */
31576 void
31577 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
31579 machine_mode vmode = GET_MODE (target);
31580 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
31581 rtx perm[16];
31583 high = (highp ? 0 : nelt / 2);
31584 for (i = 0; i < nelt / 2; i++)
31586 perm[i * 2] = GEN_INT (i + high);
31587 perm[i * 2 + 1] = GEN_INT (i + nelt + high);
31590 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
31593 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
31594 void
31595 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
31597 HOST_WIDE_INT hwi_scale (scale);
31598 REAL_VALUE_TYPE r_pow;
31599 rtvec v = rtvec_alloc (2);
31600 rtx elt;
31601 rtx scale_vec = gen_reg_rtx (V2DFmode);
31602 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
31603 elt = CONST_DOUBLE_FROM_REAL_VALUE (r_pow, DFmode);
31604 RTVEC_ELT (v, 0) = elt;
31605 RTVEC_ELT (v, 1) = elt;
31606 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
31607 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
31610 /* Return an RTX representing where to find the function value of a
31611 function returning MODE. */
31612 static rtx
31613 rs6000_complex_function_value (machine_mode mode)
31615 unsigned int regno;
31616 rtx r1, r2;
31617 machine_mode inner = GET_MODE_INNER (mode);
31618 unsigned int inner_bytes = GET_MODE_SIZE (inner);
31620 if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
31621 regno = FP_ARG_RETURN;
31622 else
31624 regno = GP_ARG_RETURN;
31626 /* 32-bit is OK since it'll go in r3/r4. */
31627 if (TARGET_32BIT && inner_bytes >= 4)
31628 return gen_rtx_REG (mode, regno);
31631 if (inner_bytes >= 8)
31632 return gen_rtx_REG (mode, regno);
31634 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
31635 const0_rtx);
31636 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
31637 GEN_INT (inner_bytes));
31638 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
31641 /* Return an rtx describing a return value of MODE as a PARALLEL
31642 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
31643 stride REG_STRIDE. */
31645 static rtx
31646 rs6000_parallel_return (machine_mode mode,
31647 int n_elts, machine_mode elt_mode,
31648 unsigned int regno, unsigned int reg_stride)
31650 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
31652 int i;
31653 for (i = 0; i < n_elts; i++)
31655 rtx r = gen_rtx_REG (elt_mode, regno);
31656 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
31657 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
31658 regno += reg_stride;
31661 return par;
31664 /* Target hook for TARGET_FUNCTION_VALUE.
31666 On the SPE, both FPs and vectors are returned in r3.
31668 On RS/6000 an integer value is in r3 and a floating-point value is in
31669 fp1, unless -msoft-float. */
31671 static rtx
31672 rs6000_function_value (const_tree valtype,
31673 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
31674 bool outgoing ATTRIBUTE_UNUSED)
31676 machine_mode mode;
31677 unsigned int regno;
31678 machine_mode elt_mode;
31679 int n_elts;
31681 /* Special handling for structs in darwin64. */
31682 if (TARGET_MACHO
31683 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
31685 CUMULATIVE_ARGS valcum;
31686 rtx valret;
31688 valcum.words = 0;
31689 valcum.fregno = FP_ARG_MIN_REG;
31690 valcum.vregno = ALTIVEC_ARG_MIN_REG;
31691 /* Do a trial code generation as if this were going to be passed as
31692 an argument; if any part goes in memory, we return NULL. */
31693 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
31694 if (valret)
31695 return valret;
31696 /* Otherwise fall through to standard ABI rules. */
31699 mode = TYPE_MODE (valtype);
31701 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
31702 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
31704 int first_reg, n_regs;
31706 if (SCALAR_FLOAT_MODE_P (elt_mode))
31708 /* _Decimal128 must use even/odd register pairs. */
31709 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
31710 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
31712 else
31714 first_reg = ALTIVEC_ARG_RETURN;
31715 n_regs = 1;
31718 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
31721 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
31722 if (TARGET_32BIT && TARGET_POWERPC64)
31723 switch (mode)
31725 default:
31726 break;
31727 case DImode:
31728 case SCmode:
31729 case DCmode:
31730 case TCmode:
31731 int count = GET_MODE_SIZE (mode) / 4;
31732 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
31735 if ((INTEGRAL_TYPE_P (valtype)
31736 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
31737 || (POINTER_TYPE_P (valtype) && !upc_shared_type_p (TREE_TYPE (valtype))))
31738 mode = TARGET_32BIT ? SImode : DImode;
31740 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
31741 /* _Decimal128 must use an even/odd register pair. */
31742 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
31743 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT && TARGET_FPRS
31744 && ((TARGET_SINGLE_FLOAT && (mode == SFmode)) || TARGET_DOUBLE_FLOAT))
31745 regno = FP_ARG_RETURN;
31746 else if (TREE_CODE (valtype) == COMPLEX_TYPE
31747 && targetm.calls.split_complex_arg)
31748 return rs6000_complex_function_value (mode);
31749 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
31750 return register is used in both cases, and we won't see V2DImode/V2DFmode
31751 for pure altivec, combine the two cases. */
31752 else if (TREE_CODE (valtype) == VECTOR_TYPE
31753 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
31754 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
31755 regno = ALTIVEC_ARG_RETURN;
31756 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
31757 && (mode == DFmode || mode == DCmode
31758 || mode == TFmode || mode == TCmode))
31759 return spe_build_register_parallel (mode, GP_ARG_RETURN);
31760 else
31761 regno = GP_ARG_RETURN;
31763 return gen_rtx_REG (mode, regno);
31766 /* Define how to find the value returned by a library function
31767 assuming the value has mode MODE. */
31769 rs6000_libcall_value (machine_mode mode)
31771 unsigned int regno;
31773 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
31774 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
31775 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
31777 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
31778 /* _Decimal128 must use an even/odd register pair. */
31779 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
31780 else if (SCALAR_FLOAT_MODE_P (mode)
31781 && TARGET_HARD_FLOAT && TARGET_FPRS
31782 && ((TARGET_SINGLE_FLOAT && mode == SFmode) || TARGET_DOUBLE_FLOAT))
31783 regno = FP_ARG_RETURN;
31784 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
31785 return register is used in both cases, and we won't see V2DImode/V2DFmode
31786 for pure altivec, combine the two cases. */
31787 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
31788 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
31789 regno = ALTIVEC_ARG_RETURN;
31790 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
31791 return rs6000_complex_function_value (mode);
31792 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
31793 && (mode == DFmode || mode == DCmode
31794 || mode == TFmode || mode == TCmode))
31795 return spe_build_register_parallel (mode, GP_ARG_RETURN);
31796 else
31797 regno = GP_ARG_RETURN;
31799 return gen_rtx_REG (mode, regno);
31803 /* Return true if we use LRA instead of reload pass. */
31804 static bool
31805 rs6000_lra_p (void)
31807 return rs6000_lra_flag;
31810 /* Given FROM and TO register numbers, say whether this elimination is allowed.
31811 Frame pointer elimination is automatically handled.
31813 For the RS/6000, if frame pointer elimination is being done, we would like
31814 to convert ap into fp, not sp.
31816 We need r30 if -mminimal-toc was specified, and there are constant pool
31817 references. */
31819 static bool
31820 rs6000_can_eliminate (const int from, const int to)
31822 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
31823 ? ! frame_pointer_needed
31824 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
31825 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC || get_pool_size () == 0
31826 : true);
31829 /* Define the offset between two registers, FROM to be eliminated and its
31830 replacement TO, at the start of a routine. */
31831 HOST_WIDE_INT
31832 rs6000_initial_elimination_offset (int from, int to)
31834 rs6000_stack_t *info = rs6000_stack_info ();
31835 HOST_WIDE_INT offset;
31837 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
31838 offset = info->push_p ? 0 : -info->total_size;
31839 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
31841 offset = info->push_p ? 0 : -info->total_size;
31842 if (FRAME_GROWS_DOWNWARD)
31843 offset += info->fixed_size + info->vars_size + info->parm_size;
31845 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
31846 offset = FRAME_GROWS_DOWNWARD
31847 ? info->fixed_size + info->vars_size + info->parm_size
31848 : 0;
31849 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
31850 offset = info->total_size;
31851 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
31852 offset = info->push_p ? info->total_size : 0;
31853 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
31854 offset = 0;
31855 else
31856 gcc_unreachable ();
31858 return offset;
31861 static rtx
31862 rs6000_dwarf_register_span (rtx reg)
31864 rtx parts[8];
31865 int i, words;
31866 unsigned regno = REGNO (reg);
31867 machine_mode mode = GET_MODE (reg);
31869 if (TARGET_SPE
31870 && regno < 32
31871 && (SPE_VECTOR_MODE (GET_MODE (reg))
31872 || (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode)
31873 && mode != SFmode && mode != SDmode && mode != SCmode)))
31875 else
31876 return NULL_RTX;
31878 regno = REGNO (reg);
31880 /* The duality of the SPE register size wreaks all kinds of havoc.
31881 This is a way of distinguishing r0 in 32-bits from r0 in
31882 64-bits. */
31883 words = (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD;
31884 gcc_assert (words <= 4);
31885 for (i = 0; i < words; i++, regno++)
31887 if (BYTES_BIG_ENDIAN)
31889 parts[2 * i] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
31890 parts[2 * i + 1] = gen_rtx_REG (SImode, regno);
31892 else
31894 parts[2 * i] = gen_rtx_REG (SImode, regno);
31895 parts[2 * i + 1] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
31899 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (words * 2, parts));
31902 /* Fill in sizes for SPE register high parts in table used by unwinder. */
31904 static void
31905 rs6000_init_dwarf_reg_sizes_extra (tree address)
31907 if (TARGET_SPE)
31909 int i;
31910 machine_mode mode = TYPE_MODE (char_type_node);
31911 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
31912 rtx mem = gen_rtx_MEM (BLKmode, addr);
31913 rtx value = gen_int_mode (4, mode);
31915 for (i = FIRST_SPE_HIGH_REGNO; i < LAST_SPE_HIGH_REGNO+1; i++)
31917 int column = DWARF_REG_TO_UNWIND_COLUMN
31918 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
31919 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
31921 emit_move_insn (adjust_address (mem, mode, offset), value);
31925 if (TARGET_MACHO && ! TARGET_ALTIVEC)
31927 int i;
31928 machine_mode mode = TYPE_MODE (char_type_node);
31929 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
31930 rtx mem = gen_rtx_MEM (BLKmode, addr);
31931 rtx value = gen_int_mode (16, mode);
31933 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
31934 The unwinder still needs to know the size of Altivec registers. */
31936 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
31938 int column = DWARF_REG_TO_UNWIND_COLUMN
31939 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
31940 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
31942 emit_move_insn (adjust_address (mem, mode, offset), value);
31947 /* Map internal gcc register numbers to debug format register numbers.
31948 FORMAT specifies the type of debug register number to use:
31949 0 -- debug information, except for frame-related sections
31950 1 -- DWARF .debug_frame section
31951 2 -- DWARF .eh_frame section */
31953 unsigned int
31954 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
31956 /* We never use the GCC internal number for SPE high registers.
31957 Those are mapped to the 1200..1231 range for all debug formats. */
31958 if (SPE_HIGH_REGNO_P (regno))
31959 return regno - FIRST_SPE_HIGH_REGNO + 1200;
31961 /* Except for the above, we use the internal number for non-DWARF
31962 debug information, and also for .eh_frame. */
31963 if ((format == 0 && write_symbols != DWARF2_DEBUG) || format == 2)
31964 return regno;
31966 /* On some platforms, we use the standard DWARF register
31967 numbering for .debug_info and .debug_frame. */
31968 #ifdef RS6000_USE_DWARF_NUMBERING
31969 if (regno <= 63)
31970 return regno;
31971 if (regno == LR_REGNO)
31972 return 108;
31973 if (regno == CTR_REGNO)
31974 return 109;
31975 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
31976 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
31977 The actual code emitted saves the whole of CR, so we map CR2_REGNO
31978 to the DWARF reg for CR. */
31979 if (format == 1 && regno == CR2_REGNO)
31980 return 64;
31981 if (CR_REGNO_P (regno))
31982 return regno - CR0_REGNO + 86;
31983 if (regno == CA_REGNO)
31984 return 101; /* XER */
31985 if (ALTIVEC_REGNO_P (regno))
31986 return regno - FIRST_ALTIVEC_REGNO + 1124;
31987 if (regno == VRSAVE_REGNO)
31988 return 356;
31989 if (regno == VSCR_REGNO)
31990 return 67;
31991 if (regno == SPE_ACC_REGNO)
31992 return 99;
31993 if (regno == SPEFSCR_REGNO)
31994 return 612;
31995 #endif
31996 return regno;
31999 /* target hook eh_return_filter_mode */
32000 static machine_mode
32001 rs6000_eh_return_filter_mode (void)
32003 return TARGET_32BIT ? SImode : word_mode;
32006 /* Target hook for scalar_mode_supported_p. */
32007 static bool
32008 rs6000_scalar_mode_supported_p (machine_mode mode)
32010 /* -m32 does not support TImode. This is the default, from
32011 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
32012 same ABI as for -m32. But default_scalar_mode_supported_p allows
32013 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
32014 for -mpowerpc64. */
32015 if (TARGET_32BIT && mode == TImode)
32016 return false;
32018 if (DECIMAL_FLOAT_MODE_P (mode))
32019 return default_decimal_float_supported_p ();
32020 else
32021 return default_scalar_mode_supported_p (mode);
32024 /* Target hook for vector_mode_supported_p. */
32025 static bool
32026 rs6000_vector_mode_supported_p (machine_mode mode)
32029 if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (mode))
32030 return true;
32032 if (TARGET_SPE && SPE_VECTOR_MODE (mode))
32033 return true;
32035 else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
32036 return true;
32038 else
32039 return false;
32042 /* Target hook for invalid_arg_for_unprototyped_fn. */
32043 static const char *
32044 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
32046 return (!rs6000_darwin64_abi
32047 && typelist == 0
32048 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
32049 && (funcdecl == NULL_TREE
32050 || (TREE_CODE (funcdecl) == FUNCTION_DECL
32051 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
32052 ? N_("AltiVec argument passed to unprototyped function")
32053 : NULL;
32056 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
32057 setup by using __stack_chk_fail_local hidden function instead of
32058 calling __stack_chk_fail directly. Otherwise it is better to call
32059 __stack_chk_fail directly. */
32061 static tree ATTRIBUTE_UNUSED
32062 rs6000_stack_protect_fail (void)
32064 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
32065 ? default_hidden_stack_protect_fail ()
32066 : default_external_stack_protect_fail ();
32069 void
32070 rs6000_final_prescan_insn (rtx_insn *insn, rtx *operand ATTRIBUTE_UNUSED,
32071 int num_operands ATTRIBUTE_UNUSED)
32073 if (rs6000_warn_cell_microcode)
32075 const char *temp;
32076 int insn_code_number = recog_memoized (insn);
32077 location_t location = INSN_LOCATION (insn);
32079 /* Punt on insns we cannot recognize. */
32080 if (insn_code_number < 0)
32081 return;
32083 temp = get_insn_template (insn_code_number, insn);
32085 if (get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS)
32086 warning_at (location, OPT_mwarn_cell_microcode,
32087 "emitting microcode insn %s\t[%s] #%d",
32088 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
32089 else if (get_attr_cell_micro (insn) == CELL_MICRO_CONDITIONAL)
32090 warning_at (location, OPT_mwarn_cell_microcode,
32091 "emitting conditional microcode insn %s\t[%s] #%d",
32092 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
32096 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
32098 #if TARGET_ELF
32099 static unsigned HOST_WIDE_INT
32100 rs6000_asan_shadow_offset (void)
32102 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
32104 #endif
32106 /* Mask options that we want to support inside of attribute((target)) and
32107 #pragma GCC target operations. Note, we do not include things like
32108 64/32-bit, endianess, hard/soft floating point, etc. that would have
32109 different calling sequences. */
32111 struct rs6000_opt_mask {
32112 const char *name; /* option name */
32113 HOST_WIDE_INT mask; /* mask to set */
32114 bool invert; /* invert sense of mask */
32115 bool valid_target; /* option is a target option */
32118 static struct rs6000_opt_mask const rs6000_opt_masks[] =
32120 { "altivec", OPTION_MASK_ALTIVEC, false, true },
32121 { "cmpb", OPTION_MASK_CMPB, false, true },
32122 { "crypto", OPTION_MASK_CRYPTO, false, true },
32123 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
32124 { "dlmzb", OPTION_MASK_DLMZB, false, true },
32125 { "fprnd", OPTION_MASK_FPRND, false, true },
32126 { "hard-dfp", OPTION_MASK_DFP, false, true },
32127 { "htm", OPTION_MASK_HTM, false, true },
32128 { "isel", OPTION_MASK_ISEL, false, true },
32129 { "mfcrf", OPTION_MASK_MFCRF, false, true },
32130 { "mfpgpr", OPTION_MASK_MFPGPR, false, true },
32131 { "mulhw", OPTION_MASK_MULHW, false, true },
32132 { "multiple", OPTION_MASK_MULTIPLE, false, true },
32133 { "popcntb", OPTION_MASK_POPCNTB, false, true },
32134 { "popcntd", OPTION_MASK_POPCNTD, false, true },
32135 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
32136 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
32137 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
32138 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
32139 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
32140 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
32141 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
32142 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
32143 { "string", OPTION_MASK_STRING, false, true },
32144 { "update", OPTION_MASK_NO_UPDATE, true , true },
32145 { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, false },
32146 { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, false },
32147 { "vsx", OPTION_MASK_VSX, false, true },
32148 { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true },
32149 #ifdef OPTION_MASK_64BIT
32150 #if TARGET_AIX_OS
32151 { "aix64", OPTION_MASK_64BIT, false, false },
32152 { "aix32", OPTION_MASK_64BIT, true, false },
32153 #else
32154 { "64", OPTION_MASK_64BIT, false, false },
32155 { "32", OPTION_MASK_64BIT, true, false },
32156 #endif
32157 #endif
32158 #ifdef OPTION_MASK_EABI
32159 { "eabi", OPTION_MASK_EABI, false, false },
32160 #endif
32161 #ifdef OPTION_MASK_LITTLE_ENDIAN
32162 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
32163 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
32164 #endif
32165 #ifdef OPTION_MASK_RELOCATABLE
32166 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
32167 #endif
32168 #ifdef OPTION_MASK_STRICT_ALIGN
32169 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
32170 #endif
32171 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
32172 { "string", OPTION_MASK_STRING, false, false },
32175 /* Builtin mask mapping for printing the flags. */
32176 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
32178 { "altivec", RS6000_BTM_ALTIVEC, false, false },
32179 { "vsx", RS6000_BTM_VSX, false, false },
32180 { "spe", RS6000_BTM_SPE, false, false },
32181 { "paired", RS6000_BTM_PAIRED, false, false },
32182 { "fre", RS6000_BTM_FRE, false, false },
32183 { "fres", RS6000_BTM_FRES, false, false },
32184 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
32185 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
32186 { "popcntd", RS6000_BTM_POPCNTD, false, false },
32187 { "cell", RS6000_BTM_CELL, false, false },
32188 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
32189 { "crypto", RS6000_BTM_CRYPTO, false, false },
32190 { "htm", RS6000_BTM_HTM, false, false },
32191 { "hard-dfp", RS6000_BTM_DFP, false, false },
32192 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
32193 { "long-double-128", RS6000_BTM_LDBL128, false, false },
32196 /* Option variables that we want to support inside attribute((target)) and
32197 #pragma GCC target operations. */
32199 struct rs6000_opt_var {
32200 const char *name; /* option name */
32201 size_t global_offset; /* offset of the option in global_options. */
32202 size_t target_offset; /* offset of the option in target optiosn. */
32205 static struct rs6000_opt_var const rs6000_opt_vars[] =
32207 { "friz",
32208 offsetof (struct gcc_options, x_TARGET_FRIZ),
32209 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
32210 { "avoid-indexed-addresses",
32211 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
32212 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
32213 { "paired",
32214 offsetof (struct gcc_options, x_rs6000_paired_float),
32215 offsetof (struct cl_target_option, x_rs6000_paired_float), },
32216 { "longcall",
32217 offsetof (struct gcc_options, x_rs6000_default_long_calls),
32218 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
32221 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
32222 parsing. Return true if there were no errors. */
32224 static bool
32225 rs6000_inner_target_options (tree args, bool attr_p)
32227 bool ret = true;
32229 if (args == NULL_TREE)
32232 else if (TREE_CODE (args) == STRING_CST)
32234 char *p = ASTRDUP (TREE_STRING_POINTER (args));
32235 char *q;
32237 while ((q = strtok (p, ",")) != NULL)
32239 bool error_p = false;
32240 bool not_valid_p = false;
32241 const char *cpu_opt = NULL;
32243 p = NULL;
32244 if (strncmp (q, "cpu=", 4) == 0)
32246 int cpu_index = rs6000_cpu_name_lookup (q+4);
32247 if (cpu_index >= 0)
32248 rs6000_cpu_index = cpu_index;
32249 else
32251 error_p = true;
32252 cpu_opt = q+4;
32255 else if (strncmp (q, "tune=", 5) == 0)
32257 int tune_index = rs6000_cpu_name_lookup (q+5);
32258 if (tune_index >= 0)
32259 rs6000_tune_index = tune_index;
32260 else
32262 error_p = true;
32263 cpu_opt = q+5;
32266 else
32268 size_t i;
32269 bool invert = false;
32270 char *r = q;
32272 error_p = true;
32273 if (strncmp (r, "no-", 3) == 0)
32275 invert = true;
32276 r += 3;
32279 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
32280 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
32282 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
32284 if (!rs6000_opt_masks[i].valid_target)
32285 not_valid_p = true;
32286 else
32288 error_p = false;
32289 rs6000_isa_flags_explicit |= mask;
32291 /* VSX needs altivec, so -mvsx automagically sets
32292 altivec. */
32293 if (mask == OPTION_MASK_VSX && !invert)
32294 mask |= OPTION_MASK_ALTIVEC;
32296 if (rs6000_opt_masks[i].invert)
32297 invert = !invert;
32299 if (invert)
32300 rs6000_isa_flags &= ~mask;
32301 else
32302 rs6000_isa_flags |= mask;
32304 break;
32307 if (error_p && !not_valid_p)
32309 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
32310 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
32312 size_t j = rs6000_opt_vars[i].global_offset;
32313 *((int *) ((char *)&global_options + j)) = !invert;
32314 error_p = false;
32315 break;
32320 if (error_p)
32322 const char *eprefix, *esuffix;
32324 ret = false;
32325 if (attr_p)
32327 eprefix = "__attribute__((__target__(";
32328 esuffix = ")))";
32330 else
32332 eprefix = "#pragma GCC target ";
32333 esuffix = "";
32336 if (cpu_opt)
32337 error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt, eprefix,
32338 q, esuffix);
32339 else if (not_valid_p)
32340 error ("%s\"%s\"%s is not allowed", eprefix, q, esuffix);
32341 else
32342 error ("%s\"%s\"%s is invalid", eprefix, q, esuffix);
32347 else if (TREE_CODE (args) == TREE_LIST)
32351 tree value = TREE_VALUE (args);
32352 if (value)
32354 bool ret2 = rs6000_inner_target_options (value, attr_p);
32355 if (!ret2)
32356 ret = false;
32358 args = TREE_CHAIN (args);
32360 while (args != NULL_TREE);
32363 else
32364 gcc_unreachable ();
32366 return ret;
32369 /* Print out the target options as a list for -mdebug=target. */
32371 static void
32372 rs6000_debug_target_options (tree args, const char *prefix)
32374 if (args == NULL_TREE)
32375 fprintf (stderr, "%s<NULL>", prefix);
32377 else if (TREE_CODE (args) == STRING_CST)
32379 char *p = ASTRDUP (TREE_STRING_POINTER (args));
32380 char *q;
32382 while ((q = strtok (p, ",")) != NULL)
32384 p = NULL;
32385 fprintf (stderr, "%s\"%s\"", prefix, q);
32386 prefix = ", ";
32390 else if (TREE_CODE (args) == TREE_LIST)
32394 tree value = TREE_VALUE (args);
32395 if (value)
32397 rs6000_debug_target_options (value, prefix);
32398 prefix = ", ";
32400 args = TREE_CHAIN (args);
32402 while (args != NULL_TREE);
32405 else
32406 gcc_unreachable ();
32408 return;
32412 /* Hook to validate attribute((target("..."))). */
32414 static bool
32415 rs6000_valid_attribute_p (tree fndecl,
32416 tree ARG_UNUSED (name),
32417 tree args,
32418 int flags)
32420 struct cl_target_option cur_target;
32421 bool ret;
32422 tree old_optimize = build_optimization_node (&global_options);
32423 tree new_target, new_optimize;
32424 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
32426 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
32428 if (TARGET_DEBUG_TARGET)
32430 tree tname = DECL_NAME (fndecl);
32431 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
32432 if (tname)
32433 fprintf (stderr, "function: %.*s\n",
32434 (int) IDENTIFIER_LENGTH (tname),
32435 IDENTIFIER_POINTER (tname));
32436 else
32437 fprintf (stderr, "function: unknown\n");
32439 fprintf (stderr, "args:");
32440 rs6000_debug_target_options (args, " ");
32441 fprintf (stderr, "\n");
32443 if (flags)
32444 fprintf (stderr, "flags: 0x%x\n", flags);
32446 fprintf (stderr, "--------------------\n");
32449 old_optimize = build_optimization_node (&global_options);
32450 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
32452 /* If the function changed the optimization levels as well as setting target
32453 options, start with the optimizations specified. */
32454 if (func_optimize && func_optimize != old_optimize)
32455 cl_optimization_restore (&global_options,
32456 TREE_OPTIMIZATION (func_optimize));
32458 /* The target attributes may also change some optimization flags, so update
32459 the optimization options if necessary. */
32460 cl_target_option_save (&cur_target, &global_options);
32461 rs6000_cpu_index = rs6000_tune_index = -1;
32462 ret = rs6000_inner_target_options (args, true);
32464 /* Set up any additional state. */
32465 if (ret)
32467 ret = rs6000_option_override_internal (false);
32468 new_target = build_target_option_node (&global_options);
32470 else
32471 new_target = NULL;
32473 new_optimize = build_optimization_node (&global_options);
32475 if (!new_target)
32476 ret = false;
32478 else if (fndecl)
32480 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
32482 if (old_optimize != new_optimize)
32483 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
32486 cl_target_option_restore (&global_options, &cur_target);
32488 if (old_optimize != new_optimize)
32489 cl_optimization_restore (&global_options,
32490 TREE_OPTIMIZATION (old_optimize));
32492 return ret;
32496 /* Hook to validate the current #pragma GCC target and set the state, and
32497 update the macros based on what was changed. If ARGS is NULL, then
32498 POP_TARGET is used to reset the options. */
32500 bool
32501 rs6000_pragma_target_parse (tree args, tree pop_target)
32503 tree prev_tree = build_target_option_node (&global_options);
32504 tree cur_tree;
32505 struct cl_target_option *prev_opt, *cur_opt;
32506 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
32507 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
32509 if (TARGET_DEBUG_TARGET)
32511 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
32512 fprintf (stderr, "args:");
32513 rs6000_debug_target_options (args, " ");
32514 fprintf (stderr, "\n");
32516 if (pop_target)
32518 fprintf (stderr, "pop_target:\n");
32519 debug_tree (pop_target);
32521 else
32522 fprintf (stderr, "pop_target: <NULL>\n");
32524 fprintf (stderr, "--------------------\n");
32527 if (! args)
32529 cur_tree = ((pop_target)
32530 ? pop_target
32531 : target_option_default_node);
32532 cl_target_option_restore (&global_options,
32533 TREE_TARGET_OPTION (cur_tree));
32535 else
32537 rs6000_cpu_index = rs6000_tune_index = -1;
32538 if (!rs6000_inner_target_options (args, false)
32539 || !rs6000_option_override_internal (false)
32540 || (cur_tree = build_target_option_node (&global_options))
32541 == NULL_TREE)
32543 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
32544 fprintf (stderr, "invalid pragma\n");
32546 return false;
32550 target_option_current_node = cur_tree;
32552 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
32553 change the macros that are defined. */
32554 if (rs6000_target_modify_macros_ptr)
32556 prev_opt = TREE_TARGET_OPTION (prev_tree);
32557 prev_bumask = prev_opt->x_rs6000_builtin_mask;
32558 prev_flags = prev_opt->x_rs6000_isa_flags;
32560 cur_opt = TREE_TARGET_OPTION (cur_tree);
32561 cur_flags = cur_opt->x_rs6000_isa_flags;
32562 cur_bumask = cur_opt->x_rs6000_builtin_mask;
32564 diff_bumask = (prev_bumask ^ cur_bumask);
32565 diff_flags = (prev_flags ^ cur_flags);
32567 if ((diff_flags != 0) || (diff_bumask != 0))
32569 /* Delete old macros. */
32570 rs6000_target_modify_macros_ptr (false,
32571 prev_flags & diff_flags,
32572 prev_bumask & diff_bumask);
32574 /* Define new macros. */
32575 rs6000_target_modify_macros_ptr (true,
32576 cur_flags & diff_flags,
32577 cur_bumask & diff_bumask);
32581 return true;
32585 /* Remember the last target of rs6000_set_current_function. */
32586 static GTY(()) tree rs6000_previous_fndecl;
32588 /* Establish appropriate back-end context for processing the function
32589 FNDECL. The argument might be NULL to indicate processing at top
32590 level, outside of any function scope. */
32591 static void
32592 rs6000_set_current_function (tree fndecl)
32594 tree old_tree = (rs6000_previous_fndecl
32595 ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl)
32596 : NULL_TREE);
32598 tree new_tree = (fndecl
32599 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
32600 : NULL_TREE);
32602 if (TARGET_DEBUG_TARGET)
32604 bool print_final = false;
32605 fprintf (stderr, "\n==================== rs6000_set_current_function");
32607 if (fndecl)
32608 fprintf (stderr, ", fndecl %s (%p)",
32609 (DECL_NAME (fndecl)
32610 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
32611 : "<unknown>"), (void *)fndecl);
32613 if (rs6000_previous_fndecl)
32614 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
32616 fprintf (stderr, "\n");
32617 if (new_tree)
32619 fprintf (stderr, "\nnew fndecl target specific options:\n");
32620 debug_tree (new_tree);
32621 print_final = true;
32624 if (old_tree)
32626 fprintf (stderr, "\nold fndecl target specific options:\n");
32627 debug_tree (old_tree);
32628 print_final = true;
32631 if (print_final)
32632 fprintf (stderr, "--------------------\n");
32635 /* Only change the context if the function changes. This hook is called
32636 several times in the course of compiling a function, and we don't want to
32637 slow things down too much or call target_reinit when it isn't safe. */
32638 if (fndecl && fndecl != rs6000_previous_fndecl)
32640 rs6000_previous_fndecl = fndecl;
32641 if (old_tree == new_tree)
32644 else if (new_tree && new_tree != target_option_default_node)
32646 cl_target_option_restore (&global_options,
32647 TREE_TARGET_OPTION (new_tree));
32648 if (TREE_TARGET_GLOBALS (new_tree))
32649 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
32650 else
32651 TREE_TARGET_GLOBALS (new_tree)
32652 = save_target_globals_default_opts ();
32655 else if (old_tree && old_tree != target_option_default_node)
32657 new_tree = target_option_current_node;
32658 cl_target_option_restore (&global_options,
32659 TREE_TARGET_OPTION (new_tree));
32660 if (TREE_TARGET_GLOBALS (new_tree))
32661 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
32662 else if (new_tree == target_option_default_node)
32663 restore_target_globals (&default_target_globals);
32664 else
32665 TREE_TARGET_GLOBALS (new_tree)
32666 = save_target_globals_default_opts ();
32672 /* Save the current options */
32674 static void
32675 rs6000_function_specific_save (struct cl_target_option *ptr,
32676 struct gcc_options *opts)
32678 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
32679 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
32682 /* Restore the current options */
32684 static void
32685 rs6000_function_specific_restore (struct gcc_options *opts,
32686 struct cl_target_option *ptr)
32689 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
32690 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
32691 (void) rs6000_option_override_internal (false);
32694 /* Print the current options */
32696 static void
32697 rs6000_function_specific_print (FILE *file, int indent,
32698 struct cl_target_option *ptr)
32700 rs6000_print_isa_options (file, indent, "Isa options set",
32701 ptr->x_rs6000_isa_flags);
32703 rs6000_print_isa_options (file, indent, "Isa options explicit",
32704 ptr->x_rs6000_isa_flags_explicit);
32707 /* Helper function to print the current isa or misc options on a line. */
32709 static void
32710 rs6000_print_options_internal (FILE *file,
32711 int indent,
32712 const char *string,
32713 HOST_WIDE_INT flags,
32714 const char *prefix,
32715 const struct rs6000_opt_mask *opts,
32716 size_t num_elements)
32718 size_t i;
32719 size_t start_column = 0;
32720 size_t cur_column;
32721 size_t max_column = 76;
32722 const char *comma = "";
32724 if (indent)
32725 start_column += fprintf (file, "%*s", indent, "");
32727 if (!flags)
32729 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
32730 return;
32733 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
32735 /* Print the various mask options. */
32736 cur_column = start_column;
32737 for (i = 0; i < num_elements; i++)
32739 if ((flags & opts[i].mask) != 0)
32741 const char *no_str = rs6000_opt_masks[i].invert ? "no-" : "";
32742 size_t len = (strlen (comma)
32743 + strlen (prefix)
32744 + strlen (no_str)
32745 + strlen (rs6000_opt_masks[i].name));
32747 cur_column += len;
32748 if (cur_column > max_column)
32750 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
32751 cur_column = start_column + len;
32752 comma = "";
32755 fprintf (file, "%s%s%s%s", comma, prefix, no_str,
32756 rs6000_opt_masks[i].name);
32757 flags &= ~ opts[i].mask;
32758 comma = ", ";
32762 fputs ("\n", file);
32765 /* Helper function to print the current isa options on a line. */
32767 static void
32768 rs6000_print_isa_options (FILE *file, int indent, const char *string,
32769 HOST_WIDE_INT flags)
32771 rs6000_print_options_internal (file, indent, string, flags, "-m",
32772 &rs6000_opt_masks[0],
32773 ARRAY_SIZE (rs6000_opt_masks));
32776 static void
32777 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
32778 HOST_WIDE_INT flags)
32780 rs6000_print_options_internal (file, indent, string, flags, "",
32781 &rs6000_builtin_mask_names[0],
32782 ARRAY_SIZE (rs6000_builtin_mask_names));
32786 /* Hook to determine if one function can safely inline another. */
32788 static bool
32789 rs6000_can_inline_p (tree caller, tree callee)
32791 bool ret = false;
32792 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
32793 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
32795 /* If callee has no option attributes, then it is ok to inline. */
32796 if (!callee_tree)
32797 ret = true;
32799 /* If caller has no option attributes, but callee does then it is not ok to
32800 inline. */
32801 else if (!caller_tree)
32802 ret = false;
32804 else
32806 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
32807 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
32809 /* Callee's options should a subset of the caller's, i.e. a vsx function
32810 can inline an altivec function but a non-vsx function can't inline a
32811 vsx function. */
32812 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
32813 == callee_opts->x_rs6000_isa_flags)
32814 ret = true;
32817 if (TARGET_DEBUG_TARGET)
32818 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
32819 (DECL_NAME (caller)
32820 ? IDENTIFIER_POINTER (DECL_NAME (caller))
32821 : "<unknown>"),
32822 (DECL_NAME (callee)
32823 ? IDENTIFIER_POINTER (DECL_NAME (callee))
32824 : "<unknown>"),
32825 (ret ? "can" : "cannot"));
32827 return ret;
32830 /* Allocate a stack temp and fixup the address so it meets the particular
32831 memory requirements (either offetable or REG+REG addressing). */
32834 rs6000_allocate_stack_temp (machine_mode mode,
32835 bool offsettable_p,
32836 bool reg_reg_p)
32838 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
32839 rtx addr = XEXP (stack, 0);
32840 int strict_p = (reload_in_progress || reload_completed);
32842 if (!legitimate_indirect_address_p (addr, strict_p))
32844 if (offsettable_p
32845 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
32846 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
32848 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
32849 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
32852 return stack;
32855 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
32856 to such a form to deal with memory reference instructions like STFIWX that
32857 only take reg+reg addressing. */
32860 rs6000_address_for_fpconvert (rtx x)
32862 int strict_p = (reload_in_progress || reload_completed);
32863 rtx addr;
32865 gcc_assert (MEM_P (x));
32866 addr = XEXP (x, 0);
32867 if (! legitimate_indirect_address_p (addr, strict_p)
32868 && ! legitimate_indexed_address_p (addr, strict_p))
32870 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
32872 rtx reg = XEXP (addr, 0);
32873 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
32874 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
32875 gcc_assert (REG_P (reg));
32876 emit_insn (gen_add3_insn (reg, reg, size_rtx));
32877 addr = reg;
32879 else if (GET_CODE (addr) == PRE_MODIFY)
32881 rtx reg = XEXP (addr, 0);
32882 rtx expr = XEXP (addr, 1);
32883 gcc_assert (REG_P (reg));
32884 gcc_assert (GET_CODE (expr) == PLUS);
32885 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
32886 addr = reg;
32889 x = replace_equiv_address (x, copy_addr_to_reg (addr));
32892 return x;
32895 /* Given a memory reference, if it is not in the form for altivec memory
32896 reference instructions (i.e. reg or reg+reg addressing with AND of -16),
32897 convert to the altivec format. */
32900 rs6000_address_for_altivec (rtx x)
32902 gcc_assert (MEM_P (x));
32903 if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x)))
32905 rtx addr = XEXP (x, 0);
32906 int strict_p = (reload_in_progress || reload_completed);
32908 if (!legitimate_indexed_address_p (addr, strict_p)
32909 && !legitimate_indirect_address_p (addr, strict_p))
32910 addr = copy_to_mode_reg (Pmode, addr);
32912 addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
32913 x = change_address (x, GET_MODE (x), addr);
32916 return x;
32919 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
32921 On the RS/6000, all integer constants are acceptable, most won't be valid
32922 for particular insns, though. Only easy FP constants are acceptable. */
32924 static bool
32925 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
32927 if (TARGET_ELF && tls_referenced_p (x))
32928 return false;
32930 return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR)
32931 || GET_MODE (x) == VOIDmode
32932 || (TARGET_POWERPC64 && mode == DImode)
32933 || easy_fp_constant (x, mode)
32934 || easy_vector_constant (x, mode));
32939 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
32941 void
32942 rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
32944 const bool direct_call_p
32945 = GET_CODE (func_desc) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (func_desc);
32946 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
32947 rtx toc_load = NULL_RTX;
32948 rtx toc_restore = NULL_RTX;
32949 rtx func_addr;
32950 rtx abi_reg = NULL_RTX;
32951 rtx call[4];
32952 int n_call;
32953 rtx insn;
32955 /* Handle longcall attributes. */
32956 if (INTVAL (cookie) & CALL_LONG)
32957 func_desc = rs6000_longcall_ref (func_desc);
32959 /* Handle indirect calls. */
32960 if (GET_CODE (func_desc) != SYMBOL_REF
32961 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
32963 /* Save the TOC into its reserved slot before the call,
32964 and prepare to restore it after the call. */
32965 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
32966 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
32967 rtx stack_toc_mem = gen_frame_mem (Pmode,
32968 gen_rtx_PLUS (Pmode, stack_ptr,
32969 stack_toc_offset));
32970 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
32971 gen_rtvec (1, stack_toc_offset),
32972 UNSPEC_TOCSLOT);
32973 toc_restore = gen_rtx_SET (VOIDmode, toc_reg, stack_toc_unspec);
32975 /* Can we optimize saving the TOC in the prologue or
32976 do we need to do it at every call? */
32977 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
32978 cfun->machine->save_toc_in_prologue = true;
32979 else
32981 MEM_VOLATILE_P (stack_toc_mem) = 1;
32982 emit_move_insn (stack_toc_mem, toc_reg);
32985 if (DEFAULT_ABI == ABI_ELFv2)
32987 /* A function pointer in the ELFv2 ABI is just a plain address, but
32988 the ABI requires it to be loaded into r12 before the call. */
32989 func_addr = gen_rtx_REG (Pmode, 12);
32990 emit_move_insn (func_addr, func_desc);
32991 abi_reg = func_addr;
32993 else
32995 /* A function pointer under AIX is a pointer to a data area whose
32996 first word contains the actual address of the function, whose
32997 second word contains a pointer to its TOC, and whose third word
32998 contains a value to place in the static chain register (r11).
32999 Note that if we load the static chain, our "trampoline" need
33000 not have any executable code. */
33002 /* Load up address of the actual function. */
33003 func_desc = force_reg (Pmode, func_desc);
33004 func_addr = gen_reg_rtx (Pmode);
33005 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
33007 /* Prepare to load the TOC of the called function. Note that the
33008 TOC load must happen immediately before the actual call so
33009 that unwinding the TOC registers works correctly. See the
33010 comment in frob_update_context. */
33011 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
33012 rtx func_toc_mem = gen_rtx_MEM (Pmode,
33013 gen_rtx_PLUS (Pmode, func_desc,
33014 func_toc_offset));
33015 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
33017 /* If we have a static chain, load it up. But, if the call was
33018 originally direct, the 3rd word has not been written since no
33019 trampoline has been built, so we ought not to load it, lest we
33020 override a static chain value. */
33021 if (!direct_call_p && TARGET_POINTERS_TO_NESTED_FUNCTIONS)
33023 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
33024 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
33025 rtx func_sc_mem = gen_rtx_MEM (Pmode,
33026 gen_rtx_PLUS (Pmode, func_desc,
33027 func_sc_offset));
33028 emit_move_insn (sc_reg, func_sc_mem);
33029 abi_reg = sc_reg;
33033 else
33035 /* Direct calls use the TOC: for local calls, the callee will
33036 assume the TOC register is set; for non-local calls, the
33037 PLT stub needs the TOC register. */
33038 abi_reg = toc_reg;
33039 func_addr = func_desc;
33042 /* Create the call. */
33043 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
33044 if (value != NULL_RTX)
33045 call[0] = gen_rtx_SET (VOIDmode, value, call[0]);
33046 n_call = 1;
33048 if (toc_load)
33049 call[n_call++] = toc_load;
33050 if (toc_restore)
33051 call[n_call++] = toc_restore;
33053 call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
33055 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
33056 insn = emit_call_insn (insn);
33058 /* Mention all registers defined by the ABI to hold information
33059 as uses in CALL_INSN_FUNCTION_USAGE. */
33060 if (abi_reg)
33061 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
33064 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
33066 void
33067 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
33069 rtx call[2];
33070 rtx insn;
33072 gcc_assert (INTVAL (cookie) == 0);
33074 /* Create the call. */
33075 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
33076 if (value != NULL_RTX)
33077 call[0] = gen_rtx_SET (VOIDmode, value, call[0]);
33079 call[1] = simple_return_rtx;
33081 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
33082 insn = emit_call_insn (insn);
33084 /* Note use of the TOC register. */
33085 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
33086 /* We need to also mark a use of the link register since the function we
33087 sibling-call to will use it to return to our caller. */
33088 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, LR_REGNO));
33091 /* Return whether we need to always update the saved TOC pointer when we update
33092 the stack pointer. */
33094 static bool
33095 rs6000_save_toc_in_prologue_p (void)
33097 return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue);
33100 #ifdef HAVE_GAS_HIDDEN
33101 # define USE_HIDDEN_LINKONCE 1
33102 #else
33103 # define USE_HIDDEN_LINKONCE 0
33104 #endif
33106 /* Fills in the label name that should be used for a 476 link stack thunk. */
33108 void
33109 get_ppc476_thunk_name (char name[32])
33111 gcc_assert (TARGET_LINK_STACK);
33113 if (USE_HIDDEN_LINKONCE)
33114 sprintf (name, "__ppc476.get_thunk");
33115 else
33116 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
33119 /* This function emits the simple thunk routine that is used to preserve
33120 the link stack on the 476 cpu. */
33122 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
33123 static void
33124 rs6000_code_end (void)
33126 char name[32];
33127 tree decl;
33129 if (!TARGET_LINK_STACK)
33130 return;
33132 get_ppc476_thunk_name (name);
33134 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
33135 build_function_type_list (void_type_node, NULL_TREE));
33136 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
33137 NULL_TREE, void_type_node);
33138 TREE_PUBLIC (decl) = 1;
33139 TREE_STATIC (decl) = 1;
33141 #if RS6000_WEAK
33142 if (USE_HIDDEN_LINKONCE)
33144 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
33145 targetm.asm_out.unique_section (decl, 0);
33146 switch_to_section (get_named_section (decl, NULL, 0));
33147 DECL_WEAK (decl) = 1;
33148 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
33149 targetm.asm_out.globalize_label (asm_out_file, name);
33150 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
33151 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
33153 else
33154 #endif
33156 switch_to_section (text_section);
33157 ASM_OUTPUT_LABEL (asm_out_file, name);
33160 DECL_INITIAL (decl) = make_node (BLOCK);
33161 current_function_decl = decl;
33162 init_function_start (decl);
33163 first_function_block_is_cold = false;
33164 /* Make sure unwind info is emitted for the thunk if needed. */
33165 final_start_function (emit_barrier (), asm_out_file, 1);
33167 fputs ("\tblr\n", asm_out_file);
33169 final_end_function ();
33170 init_insn_lengths ();
33171 free_after_compilation (cfun);
33172 set_cfun (NULL);
33173 current_function_decl = NULL;
33176 /* Add r30 to hard reg set if the prologue sets it up and it is not
33177 pic_offset_table_rtx. */
33179 static void
33180 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
33182 if (!TARGET_SINGLE_PIC_BASE
33183 && TARGET_TOC
33184 && TARGET_MINIMAL_TOC
33185 && get_pool_size () != 0)
33186 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
33190 /* Helper function for rs6000_split_logical to emit a logical instruction after
33191 spliting the operation to single GPR registers.
33193 DEST is the destination register.
33194 OP1 and OP2 are the input source registers.
33195 CODE is the base operation (AND, IOR, XOR, NOT).
33196 MODE is the machine mode.
33197 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
33198 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
33199 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
33201 static void
33202 rs6000_split_logical_inner (rtx dest,
33203 rtx op1,
33204 rtx op2,
33205 enum rtx_code code,
33206 machine_mode mode,
33207 bool complement_final_p,
33208 bool complement_op1_p,
33209 bool complement_op2_p)
33211 rtx bool_rtx;
33213 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
33214 if (op2 && GET_CODE (op2) == CONST_INT
33215 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
33216 && !complement_final_p && !complement_op1_p && !complement_op2_p)
33218 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
33219 HOST_WIDE_INT value = INTVAL (op2) & mask;
33221 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
33222 if (code == AND)
33224 if (value == 0)
33226 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
33227 return;
33230 else if (value == mask)
33232 if (!rtx_equal_p (dest, op1))
33233 emit_insn (gen_rtx_SET (VOIDmode, dest, op1));
33234 return;
33238 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
33239 into separate ORI/ORIS or XORI/XORIS instrucitons. */
33240 else if (code == IOR || code == XOR)
33242 if (value == 0)
33244 if (!rtx_equal_p (dest, op1))
33245 emit_insn (gen_rtx_SET (VOIDmode, dest, op1));
33246 return;
33251 if (code == AND && mode == SImode
33252 && !complement_final_p && !complement_op1_p && !complement_op2_p)
33254 emit_insn (gen_andsi3 (dest, op1, op2));
33255 return;
33258 if (complement_op1_p)
33259 op1 = gen_rtx_NOT (mode, op1);
33261 if (complement_op2_p)
33262 op2 = gen_rtx_NOT (mode, op2);
33264 /* For canonical RTL, if only one arm is inverted it is the first. */
33265 if (!complement_op1_p && complement_op2_p)
33266 std::swap (op1, op2);
33268 bool_rtx = ((code == NOT)
33269 ? gen_rtx_NOT (mode, op1)
33270 : gen_rtx_fmt_ee (code, mode, op1, op2));
33272 if (complement_final_p)
33273 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
33275 emit_insn (gen_rtx_SET (VOIDmode, dest, bool_rtx));
33278 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
33279 operations are split immediately during RTL generation to allow for more
33280 optimizations of the AND/IOR/XOR.
33282 OPERANDS is an array containing the destination and two input operands.
33283 CODE is the base operation (AND, IOR, XOR, NOT).
33284 MODE is the machine mode.
33285 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
33286 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
33287 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
33288 CLOBBER_REG is either NULL or a scratch register of type CC to allow
33289 formation of the AND instructions. */
33291 static void
33292 rs6000_split_logical_di (rtx operands[3],
33293 enum rtx_code code,
33294 bool complement_final_p,
33295 bool complement_op1_p,
33296 bool complement_op2_p)
33298 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
33299 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
33300 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
33301 enum hi_lo { hi = 0, lo = 1 };
33302 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
33303 size_t i;
33305 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
33306 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
33307 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
33308 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
33310 if (code == NOT)
33311 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
33312 else
33314 if (GET_CODE (operands[2]) != CONST_INT)
33316 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
33317 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
33319 else
33321 HOST_WIDE_INT value = INTVAL (operands[2]);
33322 HOST_WIDE_INT value_hi_lo[2];
33324 gcc_assert (!complement_final_p);
33325 gcc_assert (!complement_op1_p);
33326 gcc_assert (!complement_op2_p);
33328 value_hi_lo[hi] = value >> 32;
33329 value_hi_lo[lo] = value & lower_32bits;
33331 for (i = 0; i < 2; i++)
33333 HOST_WIDE_INT sub_value = value_hi_lo[i];
33335 if (sub_value & sign_bit)
33336 sub_value |= upper_32bits;
33338 op2_hi_lo[i] = GEN_INT (sub_value);
33340 /* If this is an AND instruction, check to see if we need to load
33341 the value in a register. */
33342 if (code == AND && sub_value != -1 && sub_value != 0
33343 && !and_operand (op2_hi_lo[i], SImode))
33344 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
33349 for (i = 0; i < 2; i++)
33351 /* Split large IOR/XOR operations. */
33352 if ((code == IOR || code == XOR)
33353 && GET_CODE (op2_hi_lo[i]) == CONST_INT
33354 && !complement_final_p
33355 && !complement_op1_p
33356 && !complement_op2_p
33357 && !logical_const_operand (op2_hi_lo[i], SImode))
33359 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
33360 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
33361 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
33362 rtx tmp = gen_reg_rtx (SImode);
33364 /* Make sure the constant is sign extended. */
33365 if ((hi_16bits & sign_bit) != 0)
33366 hi_16bits |= upper_32bits;
33368 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
33369 code, SImode, false, false, false);
33371 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
33372 code, SImode, false, false, false);
33374 else
33375 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
33376 code, SImode, complement_final_p,
33377 complement_op1_p, complement_op2_p);
33380 return;
33383 /* Split the insns that make up boolean operations operating on multiple GPR
33384 registers. The boolean MD patterns ensure that the inputs either are
33385 exactly the same as the output registers, or there is no overlap.
33387 OPERANDS is an array containing the destination and two input operands.
33388 CODE is the base operation (AND, IOR, XOR, NOT).
33389 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
33390 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
33391 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
33393 void
33394 rs6000_split_logical (rtx operands[3],
33395 enum rtx_code code,
33396 bool complement_final_p,
33397 bool complement_op1_p,
33398 bool complement_op2_p)
33400 machine_mode mode = GET_MODE (operands[0]);
33401 machine_mode sub_mode;
33402 rtx op0, op1, op2;
33403 int sub_size, regno0, regno1, nregs, i;
33405 /* If this is DImode, use the specialized version that can run before
33406 register allocation. */
33407 if (mode == DImode && !TARGET_POWERPC64)
33409 rs6000_split_logical_di (operands, code, complement_final_p,
33410 complement_op1_p, complement_op2_p);
33411 return;
33414 op0 = operands[0];
33415 op1 = operands[1];
33416 op2 = (code == NOT) ? NULL_RTX : operands[2];
33417 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
33418 sub_size = GET_MODE_SIZE (sub_mode);
33419 regno0 = REGNO (op0);
33420 regno1 = REGNO (op1);
33422 gcc_assert (reload_completed);
33423 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
33424 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
33426 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
33427 gcc_assert (nregs > 1);
33429 if (op2 && REG_P (op2))
33430 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
33432 for (i = 0; i < nregs; i++)
33434 int offset = i * sub_size;
33435 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
33436 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
33437 rtx sub_op2 = ((code == NOT)
33438 ? NULL_RTX
33439 : simplify_subreg (sub_mode, op2, mode, offset));
33441 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
33442 complement_final_p, complement_op1_p,
33443 complement_op2_p);
33446 return;
33450 /* Return true if the peephole2 can combine a load involving a combination of
33451 an addis instruction and a load with an offset that can be fused together on
33452 a power8. */
33454 bool
33455 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
33456 rtx addis_value, /* addis value. */
33457 rtx target, /* target register that is loaded. */
33458 rtx mem) /* bottom part of the memory addr. */
33460 rtx addr;
33461 rtx base_reg;
33463 /* Validate arguments. */
33464 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
33465 return false;
33467 if (!base_reg_operand (target, GET_MODE (target)))
33468 return false;
33470 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
33471 return false;
33473 /* Allow sign/zero extension. */
33474 if (GET_CODE (mem) == ZERO_EXTEND
33475 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
33476 mem = XEXP (mem, 0);
33478 if (!MEM_P (mem))
33479 return false;
33481 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
33482 return false;
33484 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
33485 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
33486 return false;
33488 /* Validate that the register used to load the high value is either the
33489 register being loaded, or we can safely replace its use.
33491 This function is only called from the peephole2 pass and we assume that
33492 there are 2 instructions in the peephole (addis and load), so we want to
33493 check if the target register was not used in the memory address and the
33494 register to hold the addis result is dead after the peephole. */
33495 if (REGNO (addis_reg) != REGNO (target))
33497 if (reg_mentioned_p (target, mem))
33498 return false;
33500 if (!peep2_reg_dead_p (2, addis_reg))
33501 return false;
33503 /* If the target register being loaded is the stack pointer, we must
33504 avoid loading any other value into it, even temporarily. */
33505 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
33506 return false;
33509 base_reg = XEXP (addr, 0);
33510 return REGNO (addis_reg) == REGNO (base_reg);
33513 /* During the peephole2 pass, adjust and expand the insns for a load fusion
33514 sequence. We adjust the addis register to use the target register. If the
33515 load sign extends, we adjust the code to do the zero extending load, and an
33516 explicit sign extension later since the fusion only covers zero extending
33517 loads.
33519 The operands are:
33520 operands[0] register set with addis (to be replaced with target)
33521 operands[1] value set via addis
33522 operands[2] target register being loaded
33523 operands[3] D-form memory reference using operands[0]. */
33525 void
33526 expand_fusion_gpr_load (rtx *operands)
33528 rtx addis_value = operands[1];
33529 rtx target = operands[2];
33530 rtx orig_mem = operands[3];
33531 rtx new_addr, new_mem, orig_addr, offset;
33532 enum rtx_code plus_or_lo_sum;
33533 machine_mode target_mode = GET_MODE (target);
33534 machine_mode extend_mode = target_mode;
33535 machine_mode ptr_mode = Pmode;
33536 enum rtx_code extend = UNKNOWN;
33538 if (GET_CODE (orig_mem) == ZERO_EXTEND
33539 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
33541 extend = GET_CODE (orig_mem);
33542 orig_mem = XEXP (orig_mem, 0);
33543 target_mode = GET_MODE (orig_mem);
33546 gcc_assert (MEM_P (orig_mem));
33548 orig_addr = XEXP (orig_mem, 0);
33549 plus_or_lo_sum = GET_CODE (orig_addr);
33550 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
33552 offset = XEXP (orig_addr, 1);
33553 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
33554 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
33556 if (extend != UNKNOWN)
33557 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
33559 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
33560 UNSPEC_FUSION_GPR);
33561 emit_insn (gen_rtx_SET (VOIDmode, target, new_mem));
33563 if (extend == SIGN_EXTEND)
33565 int sub_off = ((BYTES_BIG_ENDIAN)
33566 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
33567 : 0);
33568 rtx sign_reg
33569 = simplify_subreg (target_mode, target, extend_mode, sub_off);
33571 emit_insn (gen_rtx_SET (VOIDmode, target,
33572 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
33575 return;
33578 /* Return a string to fuse an addis instruction with a gpr load to the same
33579 register that we loaded up the addis instruction. The address that is used
33580 is the logical address that was formed during peephole2:
33581 (lo_sum (high) (low-part))
33583 The code is complicated, so we call output_asm_insn directly, and just
33584 return "". */
33586 const char *
33587 emit_fusion_gpr_load (rtx target, rtx mem)
33589 rtx addis_value;
33590 rtx fuse_ops[10];
33591 rtx addr;
33592 rtx load_offset;
33593 const char *addis_str = NULL;
33594 const char *load_str = NULL;
33595 const char *mode_name = NULL;
33596 char insn_template[80];
33597 machine_mode mode;
33598 const char *comment_str = ASM_COMMENT_START;
33600 if (GET_CODE (mem) == ZERO_EXTEND)
33601 mem = XEXP (mem, 0);
33603 gcc_assert (REG_P (target) && MEM_P (mem));
33605 if (*comment_str == ' ')
33606 comment_str++;
33608 addr = XEXP (mem, 0);
33609 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
33610 gcc_unreachable ();
33612 addis_value = XEXP (addr, 0);
33613 load_offset = XEXP (addr, 1);
33615 /* Now emit the load instruction to the same register. */
33616 mode = GET_MODE (mem);
33617 switch (mode)
33619 case QImode:
33620 mode_name = "char";
33621 load_str = "lbz";
33622 break;
33624 case HImode:
33625 mode_name = "short";
33626 load_str = "lhz";
33627 break;
33629 case SImode:
33630 mode_name = "int";
33631 load_str = "lwz";
33632 break;
33634 case DImode:
33635 gcc_assert (TARGET_POWERPC64);
33636 mode_name = "long";
33637 load_str = "ld";
33638 break;
33640 default:
33641 gcc_unreachable ();
33644 /* Emit the addis instruction. */
33645 fuse_ops[0] = target;
33646 if (satisfies_constraint_L (addis_value))
33648 fuse_ops[1] = addis_value;
33649 addis_str = "lis %0,%v1";
33652 else if (GET_CODE (addis_value) == PLUS)
33654 rtx op0 = XEXP (addis_value, 0);
33655 rtx op1 = XEXP (addis_value, 1);
33657 if (REG_P (op0) && CONST_INT_P (op1)
33658 && satisfies_constraint_L (op1))
33660 fuse_ops[1] = op0;
33661 fuse_ops[2] = op1;
33662 addis_str = "addis %0,%1,%v2";
33666 else if (GET_CODE (addis_value) == HIGH)
33668 rtx value = XEXP (addis_value, 0);
33669 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
33671 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
33672 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
33673 if (TARGET_ELF)
33674 addis_str = "addis %0,%2,%1@toc@ha";
33676 else if (TARGET_XCOFF)
33677 addis_str = "addis %0,%1@u(%2)";
33679 else
33680 gcc_unreachable ();
33683 else if (GET_CODE (value) == PLUS)
33685 rtx op0 = XEXP (value, 0);
33686 rtx op1 = XEXP (value, 1);
33688 if (GET_CODE (op0) == UNSPEC
33689 && XINT (op0, 1) == UNSPEC_TOCREL
33690 && CONST_INT_P (op1))
33692 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
33693 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
33694 fuse_ops[3] = op1;
33695 if (TARGET_ELF)
33696 addis_str = "addis %0,%2,%1+%3@toc@ha";
33698 else if (TARGET_XCOFF)
33699 addis_str = "addis %0,%1+%3@u(%2)";
33701 else
33702 gcc_unreachable ();
33706 else if (satisfies_constraint_L (value))
33708 fuse_ops[1] = value;
33709 addis_str = "lis %0,%v1";
33712 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
33714 fuse_ops[1] = value;
33715 addis_str = "lis %0,%1@ha";
33719 if (!addis_str)
33720 fatal_insn ("Could not generate addis value for fusion", addis_value);
33722 sprintf (insn_template, "%s\t\t%s gpr load fusion, type %s", addis_str,
33723 comment_str, mode_name);
33724 output_asm_insn (insn_template, fuse_ops);
33726 /* Emit the D-form load instruction. */
33727 if (CONST_INT_P (load_offset) && satisfies_constraint_I (load_offset))
33729 sprintf (insn_template, "%s %%0,%%1(%%0)", load_str);
33730 fuse_ops[1] = load_offset;
33731 output_asm_insn (insn_template, fuse_ops);
33734 else if (GET_CODE (load_offset) == UNSPEC
33735 && XINT (load_offset, 1) == UNSPEC_TOCREL)
33737 if (TARGET_ELF)
33738 sprintf (insn_template, "%s %%0,%%1@toc@l(%%0)", load_str);
33740 else if (TARGET_XCOFF)
33741 sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
33743 else
33744 gcc_unreachable ();
33746 fuse_ops[1] = XVECEXP (load_offset, 0, 0);
33747 output_asm_insn (insn_template, fuse_ops);
33750 else if (GET_CODE (load_offset) == PLUS
33751 && GET_CODE (XEXP (load_offset, 0)) == UNSPEC
33752 && XINT (XEXP (load_offset, 0), 1) == UNSPEC_TOCREL
33753 && CONST_INT_P (XEXP (load_offset, 1)))
33755 rtx tocrel_unspec = XEXP (load_offset, 0);
33756 if (TARGET_ELF)
33757 sprintf (insn_template, "%s %%0,%%1+%%2@toc@l(%%0)", load_str);
33759 else if (TARGET_XCOFF)
33760 sprintf (insn_template, "%s %%0,%%1+%%2@l(%%0)", load_str);
33762 else
33763 gcc_unreachable ();
33765 fuse_ops[1] = XVECEXP (tocrel_unspec, 0, 0);
33766 fuse_ops[2] = XEXP (load_offset, 1);
33767 output_asm_insn (insn_template, fuse_ops);
33770 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (load_offset))
33772 sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
33774 fuse_ops[1] = load_offset;
33775 output_asm_insn (insn_template, fuse_ops);
33778 else
33779 fatal_insn ("Unable to generate load offset for fusion", load_offset);
33781 return "";
33784 /* Analyze vector computations and remove unnecessary doubleword
33785 swaps (xxswapdi instructions). This pass is performed only
33786 for little-endian VSX code generation.
33788 For this specific case, loads and stores of 4x32 and 2x64 vectors
33789 are inefficient. These are implemented using the lvx2dx and
33790 stvx2dx instructions, which invert the order of doublewords in
33791 a vector register. Thus the code generation inserts an xxswapdi
33792 after each such load, and prior to each such store. (For spill
33793 code after register assignment, an additional xxswapdi is inserted
33794 following each store in order to return a hard register to its
33795 unpermuted value.)
33797 The extra xxswapdi instructions reduce performance. This can be
33798 particularly bad for vectorized code. The purpose of this pass
33799 is to reduce the number of xxswapdi instructions required for
33800 correctness.
33802 The primary insight is that much code that operates on vectors
33803 does not care about the relative order of elements in a register,
33804 so long as the correct memory order is preserved. If we have
33805 a computation where all input values are provided by lvxd2x/xxswapdi
33806 sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
33807 and all intermediate computations are pure SIMD (independent of
33808 element order), then all the xxswapdi's associated with the loads
33809 and stores may be removed.
33811 This pass uses some of the infrastructure and logical ideas from
33812 the "web" pass in web.c. We create maximal webs of computations
33813 fitting the description above using union-find. Each such web is
33814 then optimized by removing its unnecessary xxswapdi instructions.
33816 The pass is placed prior to global optimization so that we can
33817 perform the optimization in the safest and simplest way possible;
33818 that is, by replacing each xxswapdi insn with a register copy insn.
33819 Subsequent forward propagation will remove copies where possible.
33821 There are some operations sensitive to element order for which we
33822 can still allow the operation, provided we modify those operations.
33823 These include CONST_VECTORs, for which we must swap the first and
33824 second halves of the constant vector; and SUBREGs, for which we
33825 must adjust the byte offset to account for the swapped doublewords.
33826 A remaining opportunity would be non-immediate-form splats, for
33827 which we should adjust the selected lane of the input. We should
33828 also make code generation adjustments for sum-across operations,
33829 since this is a common vectorizer reduction.
33831 Because we run prior to the first split, we can see loads and stores
33832 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
33833 vector loads and stores that have not yet been split into a permuting
33834 load/store and a swap. (One way this can happen is with a builtin
33835 call to vec_vsx_{ld,st}.) We can handle these as well, but rather
33836 than deleting a swap, we convert the load/store into a permuting
33837 load/store (which effectively removes the swap). */
33839 /* Notes on Permutes
33841 We do not currently handle computations that contain permutes. There
33842 is a general transformation that can be performed correctly, but it
33843 may introduce more expensive code than it replaces. To handle these
33844 would require a cost model to determine when to perform the optimization.
33845 This commentary records how this could be done if desired.
33847 The most general permute is something like this (example for V16QI):
33849 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
33850 (parallel [(const_int a0) (const_int a1)
33852 (const_int a14) (const_int a15)]))
33854 where a0,...,a15 are in [0,31] and select elements from op1 and op2
33855 to produce in the result.
33857 Regardless of mode, we can convert the PARALLEL to a mask of 16
33858 byte-element selectors. Let's call this M, with M[i] representing
33859 the ith byte-element selector value. Then if we swap doublewords
33860 throughout the computation, we can get correct behavior by replacing
33861 M with M' as follows:
33863 { M[i+8]+8 : i < 8, M[i+8] in [0,7] U [16,23]
33864 M'[i] = { M[i+8]-8 : i < 8, M[i+8] in [8,15] U [24,31]
33865 { M[i-8]+8 : i >= 8, M[i-8] in [0,7] U [16,23]
33866 { M[i-8]-8 : i >= 8, M[i-8] in [8,15] U [24,31]
33868 This seems promising at first, since we are just replacing one mask
33869 with another. But certain masks are preferable to others. If M
33870 is a mask that matches a vmrghh pattern, for example, M' certainly
33871 will not. Instead of a single vmrghh, we would generate a load of
33872 M' and a vperm. So we would need to know how many xxswapd's we can
33873 remove as a result of this transformation to determine if it's
33874 profitable; and preferably the logic would need to be aware of all
33875 the special preferable masks.
33877 Another form of permute is an UNSPEC_VPERM, in which the mask is
33878 already in a register. In some cases, this mask may be a constant
33879 that we can discover with ud-chains, in which case the above
33880 transformation is ok. However, the common usage here is for the
33881 mask to be produced by an UNSPEC_LVSL, in which case the mask
33882 cannot be known at compile time. In such a case we would have to
33883 generate several instructions to compute M' as above at run time,
33884 and a cost model is needed again. */
33886 /* This is based on the union-find logic in web.c. web_entry_base is
33887 defined in df.h. */
33888 class swap_web_entry : public web_entry_base
33890 public:
33891 /* Pointer to the insn. */
33892 rtx_insn *insn;
33893 /* Set if insn contains a mention of a vector register. All other
33894 fields are undefined if this field is unset. */
33895 unsigned int is_relevant : 1;
33896 /* Set if insn is a load. */
33897 unsigned int is_load : 1;
33898 /* Set if insn is a store. */
33899 unsigned int is_store : 1;
33900 /* Set if insn is a doubleword swap. This can either be a register swap
33901 or a permuting load or store (test is_load and is_store for this). */
33902 unsigned int is_swap : 1;
33903 /* Set if the insn has a live-in use of a parameter register. */
33904 unsigned int is_live_in : 1;
33905 /* Set if the insn has a live-out def of a return register. */
33906 unsigned int is_live_out : 1;
33907 /* Set if the insn contains a subreg reference of a vector register. */
33908 unsigned int contains_subreg : 1;
33909 /* Set if the insn contains a 128-bit integer operand. */
33910 unsigned int is_128_int : 1;
33911 /* Set if this is a call-insn. */
33912 unsigned int is_call : 1;
33913 /* Set if this insn does not perform a vector operation for which
33914 element order matters, or if we know how to fix it up if it does.
33915 Undefined if is_swap is set. */
33916 unsigned int is_swappable : 1;
33917 /* A nonzero value indicates what kind of special handling for this
33918 insn is required if doublewords are swapped. Undefined if
33919 is_swappable is not set. */
33920 unsigned int special_handling : 3;
33921 /* Set if the web represented by this entry cannot be optimized. */
33922 unsigned int web_not_optimizable : 1;
33923 /* Set if this insn should be deleted. */
33924 unsigned int will_delete : 1;
33927 enum special_handling_values {
33928 SH_NONE = 0,
33929 SH_CONST_VECTOR,
33930 SH_SUBREG,
33931 SH_NOSWAP_LD,
33932 SH_NOSWAP_ST,
33933 SH_EXTRACT,
33934 SH_SPLAT
33937 /* Union INSN with all insns containing definitions that reach USE.
33938 Detect whether USE is live-in to the current function. */
33939 static void
33940 union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
33942 struct df_link *link = DF_REF_CHAIN (use);
33944 if (!link)
33945 insn_entry[INSN_UID (insn)].is_live_in = 1;
33947 while (link)
33949 if (DF_REF_IS_ARTIFICIAL (link->ref))
33950 insn_entry[INSN_UID (insn)].is_live_in = 1;
33952 if (DF_REF_INSN_INFO (link->ref))
33954 rtx def_insn = DF_REF_INSN (link->ref);
33955 (void)unionfind_union (insn_entry + INSN_UID (insn),
33956 insn_entry + INSN_UID (def_insn));
33959 link = link->next;
33963 /* Union INSN with all insns containing uses reached from DEF.
33964 Detect whether DEF is live-out from the current function. */
33965 static void
33966 union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
33968 struct df_link *link = DF_REF_CHAIN (def);
33970 if (!link)
33971 insn_entry[INSN_UID (insn)].is_live_out = 1;
33973 while (link)
33975 /* This could be an eh use or some other artificial use;
33976 we treat these all the same (killing the optimization). */
33977 if (DF_REF_IS_ARTIFICIAL (link->ref))
33978 insn_entry[INSN_UID (insn)].is_live_out = 1;
33980 if (DF_REF_INSN_INFO (link->ref))
33982 rtx use_insn = DF_REF_INSN (link->ref);
33983 (void)unionfind_union (insn_entry + INSN_UID (insn),
33984 insn_entry + INSN_UID (use_insn));
33987 link = link->next;
33991 /* Return 1 iff INSN is a load insn, including permuting loads that
33992 represent an lvxd2x instruction; else return 0. */
33993 static unsigned int
33994 insn_is_load_p (rtx insn)
33996 rtx body = PATTERN (insn);
33998 if (GET_CODE (body) == SET)
34000 if (GET_CODE (SET_SRC (body)) == MEM)
34001 return 1;
34003 if (GET_CODE (SET_SRC (body)) == VEC_SELECT
34004 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM)
34005 return 1;
34007 return 0;
34010 if (GET_CODE (body) != PARALLEL)
34011 return 0;
34013 rtx set = XVECEXP (body, 0, 0);
34015 if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM)
34016 return 1;
34018 return 0;
34021 /* Return 1 iff INSN is a store insn, including permuting stores that
34022 represent an stvxd2x instruction; else return 0. */
34023 static unsigned int
34024 insn_is_store_p (rtx insn)
34026 rtx body = PATTERN (insn);
34027 if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM)
34028 return 1;
34029 if (GET_CODE (body) != PARALLEL)
34030 return 0;
34031 rtx set = XVECEXP (body, 0, 0);
34032 if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM)
34033 return 1;
34034 return 0;
34037 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
34038 a permuting load, or a permuting store. */
34039 static unsigned int
34040 insn_is_swap_p (rtx insn)
34042 rtx body = PATTERN (insn);
34043 if (GET_CODE (body) != SET)
34044 return 0;
34045 rtx rhs = SET_SRC (body);
34046 if (GET_CODE (rhs) != VEC_SELECT)
34047 return 0;
34048 rtx parallel = XEXP (rhs, 1);
34049 if (GET_CODE (parallel) != PARALLEL)
34050 return 0;
34051 unsigned int len = XVECLEN (parallel, 0);
34052 if (len != 2 && len != 4 && len != 8 && len != 16)
34053 return 0;
34054 for (unsigned int i = 0; i < len / 2; ++i)
34056 rtx op = XVECEXP (parallel, 0, i);
34057 if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i)
34058 return 0;
34060 for (unsigned int i = len / 2; i < len; ++i)
34062 rtx op = XVECEXP (parallel, 0, i);
34063 if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2)
34064 return 0;
34066 return 1;
34069 /* Return 1 iff OP is an operand that will not be affected by having
34070 vector doublewords swapped in memory. */
34071 static unsigned int
34072 rtx_is_swappable_p (rtx op, unsigned int *special)
34074 enum rtx_code code = GET_CODE (op);
34075 int i, j;
34076 rtx parallel;
34078 switch (code)
34080 case LABEL_REF:
34081 case SYMBOL_REF:
34082 case CLOBBER:
34083 case REG:
34084 return 1;
34086 case VEC_CONCAT:
34087 case ASM_INPUT:
34088 case ASM_OPERANDS:
34089 return 0;
34091 case CONST_VECTOR:
34093 *special = SH_CONST_VECTOR;
34094 return 1;
34097 case VEC_DUPLICATE:
34098 /* Opportunity: If XEXP (op, 0) has the same mode as the result,
34099 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
34100 it represents a vector splat for which we can do special
34101 handling. */
34102 if (GET_CODE (XEXP (op, 0)) == CONST_INT)
34103 return 1;
34104 else if (GET_CODE (XEXP (op, 0)) == REG
34105 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
34106 /* This catches V2DF and V2DI splat, at a minimum. */
34107 return 1;
34108 else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
34109 /* If the duplicated item is from a select, defer to the select
34110 processing to see if we can change the lane for the splat. */
34111 return rtx_is_swappable_p (XEXP (op, 0), special);
34112 else
34113 return 0;
34115 case VEC_SELECT:
34116 /* A vec_extract operation is ok if we change the lane. */
34117 if (GET_CODE (XEXP (op, 0)) == REG
34118 && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
34119 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
34120 && XVECLEN (parallel, 0) == 1
34121 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT)
34123 *special = SH_EXTRACT;
34124 return 1;
34126 else
34127 return 0;
34129 case UNSPEC:
34131 /* Various operations are unsafe for this optimization, at least
34132 without significant additional work. Permutes are obviously
34133 problematic, as both the permute control vector and the ordering
34134 of the target values are invalidated by doubleword swapping.
34135 Vector pack and unpack modify the number of vector lanes.
34136 Merge-high/low will not operate correctly on swapped operands.
34137 Vector shifts across element boundaries are clearly uncool,
34138 as are vector select and concatenate operations. Vector
34139 sum-across instructions define one operand with a specific
34140 order-dependent element, so additional fixup code would be
34141 needed to make those work. Vector set and non-immediate-form
34142 vector splat are element-order sensitive. A few of these
34143 cases might be workable with special handling if required. */
34144 int val = XINT (op, 1);
34145 switch (val)
34147 default:
34148 break;
34149 case UNSPEC_VMRGH_DIRECT:
34150 case UNSPEC_VMRGL_DIRECT:
34151 case UNSPEC_VPACK_SIGN_SIGN_SAT:
34152 case UNSPEC_VPACK_SIGN_UNS_SAT:
34153 case UNSPEC_VPACK_UNS_UNS_MOD:
34154 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
34155 case UNSPEC_VPACK_UNS_UNS_SAT:
34156 case UNSPEC_VPERM:
34157 case UNSPEC_VPERM_UNS:
34158 case UNSPEC_VPERMHI:
34159 case UNSPEC_VPERMSI:
34160 case UNSPEC_VPKPX:
34161 case UNSPEC_VSLDOI:
34162 case UNSPEC_VSLO:
34163 case UNSPEC_VSRO:
34164 case UNSPEC_VSUM2SWS:
34165 case UNSPEC_VSUM4S:
34166 case UNSPEC_VSUM4UBS:
34167 case UNSPEC_VSUMSWS:
34168 case UNSPEC_VSUMSWS_DIRECT:
34169 case UNSPEC_VSX_CONCAT:
34170 case UNSPEC_VSX_SET:
34171 case UNSPEC_VSX_SLDWI:
34172 case UNSPEC_VUNPACK_HI_SIGN:
34173 case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
34174 case UNSPEC_VUNPACK_LO_SIGN:
34175 case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
34176 case UNSPEC_VUPKHPX:
34177 case UNSPEC_VUPKHS_V4SF:
34178 case UNSPEC_VUPKHU_V4SF:
34179 case UNSPEC_VUPKLPX:
34180 case UNSPEC_VUPKLS_V4SF:
34181 case UNSPEC_VUPKLU_V4SF:
34182 /* The following could be handled as an idiom with XXSPLTW.
34183 These place a scalar in BE element zero, but the XXSPLTW
34184 will currently expect it in BE element 2 in a swapped
34185 region. When one of these feeds an XXSPLTW with no other
34186 defs/uses either way, we can avoid the lane change for
34187 XXSPLTW and things will be correct. TBD. */
34188 case UNSPEC_VSX_CVDPSPN:
34189 case UNSPEC_VSX_CVSPDP:
34190 case UNSPEC_VSX_CVSPDPN:
34191 return 0;
34192 case UNSPEC_VSPLT_DIRECT:
34193 *special = SH_SPLAT;
34194 return 1;
34198 default:
34199 break;
34202 const char *fmt = GET_RTX_FORMAT (code);
34203 int ok = 1;
34205 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
34206 if (fmt[i] == 'e' || fmt[i] == 'u')
34208 unsigned int special_op = SH_NONE;
34209 ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
34210 /* Ensure we never have two kinds of special handling
34211 for the same insn. */
34212 if (*special != SH_NONE && special_op != SH_NONE
34213 && *special != special_op)
34214 return 0;
34215 *special = special_op;
34217 else if (fmt[i] == 'E')
34218 for (j = 0; j < XVECLEN (op, i); ++j)
34220 unsigned int special_op = SH_NONE;
34221 ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
34222 /* Ensure we never have two kinds of special handling
34223 for the same insn. */
34224 if (*special != SH_NONE && special_op != SH_NONE
34225 && *special != special_op)
34226 return 0;
34227 *special = special_op;
34230 return ok;
34233 /* Return 1 iff INSN is an operand that will not be affected by
34234 having vector doublewords swapped in memory (in which case
34235 *SPECIAL is unchanged), or that can be modified to be correct
34236 if vector doublewords are swapped in memory (in which case
34237 *SPECIAL is changed to a value indicating how). */
34238 static unsigned int
34239 insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
34240 unsigned int *special)
34242 /* Calls are always bad. */
34243 if (GET_CODE (insn) == CALL_INSN)
34244 return 0;
34246 /* Loads and stores seen here are not permuting, but we can still
34247 fix them up by converting them to permuting ones. Exceptions:
34248 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
34249 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
34250 for the SET source. */
34251 rtx body = PATTERN (insn);
34252 int i = INSN_UID (insn);
34254 if (insn_entry[i].is_load)
34256 if (GET_CODE (body) == SET)
34258 *special = SH_NOSWAP_LD;
34259 return 1;
34261 else
34262 return 0;
34265 if (insn_entry[i].is_store)
34267 if (GET_CODE (body) == SET && GET_CODE (SET_SRC (body)) != UNSPEC)
34269 *special = SH_NOSWAP_ST;
34270 return 1;
34272 else
34273 return 0;
34276 /* Otherwise check the operands for vector lane violations. */
34277 return rtx_is_swappable_p (body, special);
34280 enum chain_purpose { FOR_LOADS, FOR_STORES };
34282 /* Return true if the UD or DU chain headed by LINK is non-empty,
34283 and every entry on the chain references an insn that is a
34284 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
34285 register swap must have only permuting loads as reaching defs.
34286 If PURPOSE is FOR_STORES, each such register swap must have only
34287 register swaps or permuting stores as reached uses. */
34288 static bool
34289 chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
34290 enum chain_purpose purpose)
34292 if (!link)
34293 return false;
34295 for (; link; link = link->next)
34297 if (!VECTOR_MODE_P (GET_MODE (DF_REF_REG (link->ref))))
34298 continue;
34300 if (DF_REF_IS_ARTIFICIAL (link->ref))
34301 return false;
34303 rtx reached_insn = DF_REF_INSN (link->ref);
34304 unsigned uid = INSN_UID (reached_insn);
34305 struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn);
34307 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
34308 || insn_entry[uid].is_store)
34309 return false;
34311 if (purpose == FOR_LOADS)
34313 df_ref use;
34314 FOR_EACH_INSN_INFO_USE (use, insn_info)
34316 struct df_link *swap_link = DF_REF_CHAIN (use);
34318 while (swap_link)
34320 if (DF_REF_IS_ARTIFICIAL (link->ref))
34321 return false;
34323 rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
34324 unsigned uid2 = INSN_UID (swap_def_insn);
34326 /* Only permuting loads are allowed. */
34327 if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
34328 return false;
34330 swap_link = swap_link->next;
34334 else if (purpose == FOR_STORES)
34336 df_ref def;
34337 FOR_EACH_INSN_INFO_DEF (def, insn_info)
34339 struct df_link *swap_link = DF_REF_CHAIN (def);
34341 while (swap_link)
34343 if (DF_REF_IS_ARTIFICIAL (link->ref))
34344 return false;
34346 rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
34347 unsigned uid2 = INSN_UID (swap_use_insn);
34349 /* Permuting stores or register swaps are allowed. */
34350 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
34351 return false;
34353 swap_link = swap_link->next;
34359 return true;
34362 /* Mark the xxswapdi instructions associated with permuting loads and
34363 stores for removal. Note that we only flag them for deletion here,
34364 as there is a possibility of a swap being reached from multiple
34365 loads, etc. */
34366 static void
34367 mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
34369 rtx insn = insn_entry[i].insn;
34370 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
34372 if (insn_entry[i].is_load)
34374 df_ref def;
34375 FOR_EACH_INSN_INFO_DEF (def, insn_info)
34377 struct df_link *link = DF_REF_CHAIN (def);
34379 /* We know by now that these are swaps, so we can delete
34380 them confidently. */
34381 while (link)
34383 rtx use_insn = DF_REF_INSN (link->ref);
34384 insn_entry[INSN_UID (use_insn)].will_delete = 1;
34385 link = link->next;
34389 else if (insn_entry[i].is_store)
34391 df_ref use;
34392 FOR_EACH_INSN_INFO_USE (use, insn_info)
34394 /* Ignore uses for addressability. */
34395 machine_mode mode = GET_MODE (DF_REF_REG (use));
34396 if (!VECTOR_MODE_P (mode))
34397 continue;
34399 struct df_link *link = DF_REF_CHAIN (use);
34401 /* We know by now that these are swaps, so we can delete
34402 them confidently. */
34403 while (link)
34405 rtx def_insn = DF_REF_INSN (link->ref);
34406 insn_entry[INSN_UID (def_insn)].will_delete = 1;
34407 link = link->next;
34413 /* OP is either a CONST_VECTOR or an expression containing one.
34414 Swap the first half of the vector with the second in the first
34415 case. Recurse to find it in the second. */
34416 static void
34417 swap_const_vector_halves (rtx op)
34419 int i;
34420 enum rtx_code code = GET_CODE (op);
34421 if (GET_CODE (op) == CONST_VECTOR)
34423 int half_units = GET_MODE_NUNITS (GET_MODE (op)) / 2;
34424 for (i = 0; i < half_units; ++i)
34426 rtx temp = CONST_VECTOR_ELT (op, i);
34427 CONST_VECTOR_ELT (op, i) = CONST_VECTOR_ELT (op, i + half_units);
34428 CONST_VECTOR_ELT (op, i + half_units) = temp;
34431 else
34433 int j;
34434 const char *fmt = GET_RTX_FORMAT (code);
34435 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
34436 if (fmt[i] == 'e' || fmt[i] == 'u')
34437 swap_const_vector_halves (XEXP (op, i));
34438 else if (fmt[i] == 'E')
34439 for (j = 0; j < XVECLEN (op, i); ++j)
34440 swap_const_vector_halves (XVECEXP (op, i, j));
34444 /* Find all subregs of a vector expression that perform a narrowing,
34445 and adjust the subreg index to account for doubleword swapping. */
34446 static void
34447 adjust_subreg_index (rtx op)
34449 enum rtx_code code = GET_CODE (op);
34450 if (code == SUBREG
34451 && (GET_MODE_SIZE (GET_MODE (op))
34452 < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
34454 unsigned int index = SUBREG_BYTE (op);
34455 if (index < 8)
34456 index += 8;
34457 else
34458 index -= 8;
34459 SUBREG_BYTE (op) = index;
34462 const char *fmt = GET_RTX_FORMAT (code);
34463 int i,j;
34464 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
34465 if (fmt[i] == 'e' || fmt[i] == 'u')
34466 adjust_subreg_index (XEXP (op, i));
34467 else if (fmt[i] == 'E')
34468 for (j = 0; j < XVECLEN (op, i); ++j)
34469 adjust_subreg_index (XVECEXP (op, i, j));
34472 /* Convert the non-permuting load INSN to a permuting one. */
34473 static void
34474 permute_load (rtx_insn *insn)
34476 rtx body = PATTERN (insn);
34477 rtx mem_op = SET_SRC (body);
34478 rtx tgt_reg = SET_DEST (body);
34479 machine_mode mode = GET_MODE (tgt_reg);
34480 int n_elts = GET_MODE_NUNITS (mode);
34481 int half_elts = n_elts / 2;
34482 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
34483 int i, j;
34484 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
34485 XVECEXP (par, 0, i) = GEN_INT (j);
34486 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
34487 XVECEXP (par, 0, i) = GEN_INT (j);
34488 rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
34489 SET_SRC (body) = sel;
34490 INSN_CODE (insn) = -1; /* Force re-recognition. */
34491 df_insn_rescan (insn);
34493 if (dump_file)
34494 fprintf (dump_file, "Replacing load %d with permuted load\n",
34495 INSN_UID (insn));
34498 /* Convert the non-permuting store INSN to a permuting one. */
34499 static void
34500 permute_store (rtx_insn *insn)
34502 rtx body = PATTERN (insn);
34503 rtx src_reg = SET_SRC (body);
34504 machine_mode mode = GET_MODE (src_reg);
34505 int n_elts = GET_MODE_NUNITS (mode);
34506 int half_elts = n_elts / 2;
34507 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
34508 int i, j;
34509 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
34510 XVECEXP (par, 0, i) = GEN_INT (j);
34511 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
34512 XVECEXP (par, 0, i) = GEN_INT (j);
34513 rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
34514 SET_SRC (body) = sel;
34515 INSN_CODE (insn) = -1; /* Force re-recognition. */
34516 df_insn_rescan (insn);
34518 if (dump_file)
34519 fprintf (dump_file, "Replacing store %d with permuted store\n",
34520 INSN_UID (insn));
34523 /* Given OP that contains a vector extract operation, adjust the index
34524 of the extracted lane to account for the doubleword swap. */
34525 static void
34526 adjust_extract (rtx_insn *insn)
34528 rtx src = SET_SRC (PATTERN (insn));
34529 /* The vec_select may be wrapped in a vec_duplicate for a splat, so
34530 account for that. */
34531 rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
34532 rtx par = XEXP (sel, 1);
34533 int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
34534 int lane = INTVAL (XVECEXP (par, 0, 0));
34535 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
34536 XVECEXP (par, 0, 0) = GEN_INT (lane);
34537 INSN_CODE (insn) = -1; /* Force re-recognition. */
34538 df_insn_rescan (insn);
34540 if (dump_file)
34541 fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
34544 /* Given OP that contains a vector direct-splat operation, adjust the index
34545 of the source lane to account for the doubleword swap. */
34546 static void
34547 adjust_splat (rtx_insn *insn)
34549 rtx body = PATTERN (insn);
34550 rtx unspec = XEXP (body, 1);
34551 int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
34552 int lane = INTVAL (XVECEXP (unspec, 0, 1));
34553 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
34554 XVECEXP (unspec, 0, 1) = GEN_INT (lane);
34555 INSN_CODE (insn) = -1; /* Force re-recognition. */
34556 df_insn_rescan (insn);
34558 if (dump_file)
34559 fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
34562 /* The insn described by INSN_ENTRY[I] can be swapped, but only
34563 with special handling. Take care of that here. */
34564 static void
34565 handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
34567 rtx_insn *insn = insn_entry[i].insn;
34568 rtx body = PATTERN (insn);
34570 switch (insn_entry[i].special_handling)
34572 default:
34573 gcc_unreachable ();
34574 case SH_CONST_VECTOR:
34576 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
34577 gcc_assert (GET_CODE (body) == SET);
34578 rtx rhs = SET_SRC (body);
34579 swap_const_vector_halves (rhs);
34580 if (dump_file)
34581 fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
34582 break;
34584 case SH_SUBREG:
34585 /* A subreg of the same size is already safe. For subregs that
34586 select a smaller portion of a reg, adjust the index for
34587 swapped doublewords. */
34588 adjust_subreg_index (body);
34589 if (dump_file)
34590 fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
34591 break;
34592 case SH_NOSWAP_LD:
34593 /* Convert a non-permuting load to a permuting one. */
34594 permute_load (insn);
34595 break;
34596 case SH_NOSWAP_ST:
34597 /* Convert a non-permuting store to a permuting one. */
34598 permute_store (insn);
34599 break;
34600 case SH_EXTRACT:
34601 /* Change the lane on an extract operation. */
34602 adjust_extract (insn);
34603 break;
34604 case SH_SPLAT:
34605 /* Change the lane on a direct-splat operation. */
34606 adjust_splat (insn);
34607 break;
34611 /* Find the insn from the Ith table entry, which is known to be a
34612 register swap Y = SWAP(X). Replace it with a copy Y = X. */
34613 static void
34614 replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
34616 rtx_insn *insn = insn_entry[i].insn;
34617 rtx body = PATTERN (insn);
34618 rtx src_reg = XEXP (SET_SRC (body), 0);
34619 rtx copy = gen_rtx_SET (VOIDmode, SET_DEST (body), src_reg);
34620 rtx_insn *new_insn = emit_insn_before (copy, insn);
34621 set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
34622 df_insn_rescan (new_insn);
34624 if (dump_file)
34626 unsigned int new_uid = INSN_UID (new_insn);
34627 fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
34630 df_insn_delete (insn);
34631 remove_insn (insn);
34632 insn->set_deleted ();
34635 /* Dump the swap table to DUMP_FILE. */
34636 static void
34637 dump_swap_insn_table (swap_web_entry *insn_entry)
34639 int e = get_max_uid ();
34640 fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
34642 for (int i = 0; i < e; ++i)
34643 if (insn_entry[i].is_relevant)
34645 swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
34646 fprintf (dump_file, "%6d %6d ", i,
34647 pred_entry && pred_entry->insn
34648 ? INSN_UID (pred_entry->insn) : 0);
34649 if (insn_entry[i].is_load)
34650 fputs ("load ", dump_file);
34651 if (insn_entry[i].is_store)
34652 fputs ("store ", dump_file);
34653 if (insn_entry[i].is_swap)
34654 fputs ("swap ", dump_file);
34655 if (insn_entry[i].is_live_in)
34656 fputs ("live-in ", dump_file);
34657 if (insn_entry[i].is_live_out)
34658 fputs ("live-out ", dump_file);
34659 if (insn_entry[i].contains_subreg)
34660 fputs ("subreg ", dump_file);
34661 if (insn_entry[i].is_128_int)
34662 fputs ("int128 ", dump_file);
34663 if (insn_entry[i].is_call)
34664 fputs ("call ", dump_file);
34665 if (insn_entry[i].is_swappable)
34667 fputs ("swappable ", dump_file);
34668 if (insn_entry[i].special_handling == SH_CONST_VECTOR)
34669 fputs ("special:constvec ", dump_file);
34670 else if (insn_entry[i].special_handling == SH_SUBREG)
34671 fputs ("special:subreg ", dump_file);
34672 else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
34673 fputs ("special:load ", dump_file);
34674 else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
34675 fputs ("special:store ", dump_file);
34676 else if (insn_entry[i].special_handling == SH_EXTRACT)
34677 fputs ("special:extract ", dump_file);
34678 else if (insn_entry[i].special_handling == SH_SPLAT)
34679 fputs ("special:splat ", dump_file);
34681 if (insn_entry[i].web_not_optimizable)
34682 fputs ("unoptimizable ", dump_file);
34683 if (insn_entry[i].will_delete)
34684 fputs ("delete ", dump_file);
34685 fputs ("\n", dump_file);
34687 fputs ("\n", dump_file);
34690 /* Main entry point for this pass. */
34691 unsigned int
34692 rs6000_analyze_swaps (function *fun)
34694 swap_web_entry *insn_entry;
34695 basic_block bb;
34696 rtx_insn *insn;
34698 /* Dataflow analysis for use-def chains. */
34699 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
34700 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
34701 df_analyze ();
34702 df_set_flags (DF_DEFER_INSN_RESCAN);
34704 /* Allocate structure to represent webs of insns. */
34705 insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
34707 /* Walk the insns to gather basic data. */
34708 FOR_ALL_BB_FN (bb, fun)
34709 FOR_BB_INSNS (bb, insn)
34711 unsigned int uid = INSN_UID (insn);
34712 if (NONDEBUG_INSN_P (insn))
34714 insn_entry[uid].insn = insn;
34716 if (GET_CODE (insn) == CALL_INSN)
34717 insn_entry[uid].is_call = 1;
34719 /* Walk the uses and defs to see if we mention vector regs.
34720 Record any constraints on optimization of such mentions. */
34721 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
34722 df_ref mention;
34723 FOR_EACH_INSN_INFO_USE (mention, insn_info)
34725 /* We use DF_REF_REAL_REG here to get inside any subregs. */
34726 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
34728 /* If a use gets its value from a call insn, it will be
34729 a hard register and will look like (reg:V4SI 3 3).
34730 The df analysis creates two mentions for GPR3 and GPR4,
34731 both DImode. We must recognize this and treat it as a
34732 vector mention to ensure the call is unioned with this
34733 use. */
34734 if (mode == DImode && DF_REF_INSN_INFO (mention))
34736 rtx feeder = DF_REF_INSN (mention);
34737 /* FIXME: It is pretty hard to get from the df mention
34738 to the mode of the use in the insn. We arbitrarily
34739 pick a vector mode here, even though the use might
34740 be a real DImode. We can be too conservative
34741 (create a web larger than necessary) because of
34742 this, so consider eventually fixing this. */
34743 if (GET_CODE (feeder) == CALL_INSN)
34744 mode = V4SImode;
34747 if (VECTOR_MODE_P (mode))
34749 insn_entry[uid].is_relevant = 1;
34750 if (mode == TImode || mode == V1TImode)
34751 insn_entry[uid].is_128_int = 1;
34752 if (DF_REF_INSN_INFO (mention))
34753 insn_entry[uid].contains_subreg
34754 = !rtx_equal_p (DF_REF_REG (mention),
34755 DF_REF_REAL_REG (mention));
34756 union_defs (insn_entry, insn, mention);
34759 FOR_EACH_INSN_INFO_DEF (mention, insn_info)
34761 /* We use DF_REF_REAL_REG here to get inside any subregs. */
34762 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
34764 /* If we're loading up a hard vector register for a call,
34765 it looks like (set (reg:V4SI 9 9) (...)). The df
34766 analysis creates two mentions for GPR9 and GPR10, both
34767 DImode. So relying on the mode from the mentions
34768 isn't sufficient to ensure we union the call into the
34769 web with the parameter setup code. */
34770 if (mode == DImode && GET_CODE (insn) == SET
34771 && VECTOR_MODE_P (GET_MODE (SET_DEST (insn))))
34772 mode = GET_MODE (SET_DEST (insn));
34774 if (VECTOR_MODE_P (mode))
34776 insn_entry[uid].is_relevant = 1;
34777 if (mode == TImode || mode == V1TImode)
34778 insn_entry[uid].is_128_int = 1;
34779 if (DF_REF_INSN_INFO (mention))
34780 insn_entry[uid].contains_subreg
34781 = !rtx_equal_p (DF_REF_REG (mention),
34782 DF_REF_REAL_REG (mention));
34783 /* REG_FUNCTION_VALUE_P is not valid for subregs. */
34784 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
34785 insn_entry[uid].is_live_out = 1;
34786 union_uses (insn_entry, insn, mention);
34790 if (insn_entry[uid].is_relevant)
34792 /* Determine if this is a load or store. */
34793 insn_entry[uid].is_load = insn_is_load_p (insn);
34794 insn_entry[uid].is_store = insn_is_store_p (insn);
34796 /* Determine if this is a doubleword swap. If not,
34797 determine whether it can legally be swapped. */
34798 if (insn_is_swap_p (insn))
34799 insn_entry[uid].is_swap = 1;
34800 else
34802 unsigned int special = SH_NONE;
34803 insn_entry[uid].is_swappable
34804 = insn_is_swappable_p (insn_entry, insn, &special);
34805 if (special != SH_NONE && insn_entry[uid].contains_subreg)
34806 insn_entry[uid].is_swappable = 0;
34807 else if (special != SH_NONE)
34808 insn_entry[uid].special_handling = special;
34809 else if (insn_entry[uid].contains_subreg)
34810 insn_entry[uid].special_handling = SH_SUBREG;
34816 if (dump_file)
34818 fprintf (dump_file, "\nSwap insn entry table when first built\n");
34819 dump_swap_insn_table (insn_entry);
34822 /* Record unoptimizable webs. */
34823 unsigned e = get_max_uid (), i;
34824 for (i = 0; i < e; ++i)
34826 if (!insn_entry[i].is_relevant)
34827 continue;
34829 swap_web_entry *root
34830 = (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
34832 if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
34833 || (insn_entry[i].contains_subreg
34834 && insn_entry[i].special_handling != SH_SUBREG)
34835 || insn_entry[i].is_128_int || insn_entry[i].is_call
34836 || !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
34837 root->web_not_optimizable = 1;
34839 /* If we have loads or stores that aren't permuting then the
34840 optimization isn't appropriate. */
34841 else if ((insn_entry[i].is_load || insn_entry[i].is_store)
34842 && !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
34843 root->web_not_optimizable = 1;
34845 /* If we have permuting loads or stores that are not accompanied
34846 by a register swap, the optimization isn't appropriate. */
34847 else if (insn_entry[i].is_load && insn_entry[i].is_swap)
34849 rtx insn = insn_entry[i].insn;
34850 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
34851 df_ref def;
34853 FOR_EACH_INSN_INFO_DEF (def, insn_info)
34855 struct df_link *link = DF_REF_CHAIN (def);
34857 if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
34859 root->web_not_optimizable = 1;
34860 break;
34864 else if (insn_entry[i].is_store && insn_entry[i].is_swap)
34866 rtx insn = insn_entry[i].insn;
34867 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
34868 df_ref use;
34870 FOR_EACH_INSN_INFO_USE (use, insn_info)
34872 struct df_link *link = DF_REF_CHAIN (use);
34874 if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
34876 root->web_not_optimizable = 1;
34877 break;
34883 if (dump_file)
34885 fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
34886 dump_swap_insn_table (insn_entry);
34889 /* For each load and store in an optimizable web (which implies
34890 the loads and stores are permuting), find the associated
34891 register swaps and mark them for removal. Due to various
34892 optimizations we may mark the same swap more than once. Also
34893 perform special handling for swappable insns that require it. */
34894 for (i = 0; i < e; ++i)
34895 if ((insn_entry[i].is_load || insn_entry[i].is_store)
34896 && insn_entry[i].is_swap)
34898 swap_web_entry* root_entry
34899 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
34900 if (!root_entry->web_not_optimizable)
34901 mark_swaps_for_removal (insn_entry, i);
34903 else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
34905 swap_web_entry* root_entry
34906 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
34907 if (!root_entry->web_not_optimizable)
34908 handle_special_swappables (insn_entry, i);
34911 /* Now delete the swaps marked for removal. */
34912 for (i = 0; i < e; ++i)
34913 if (insn_entry[i].will_delete)
34914 replace_swap_with_copy (insn_entry, i);
34916 /* Clean up. */
34917 free (insn_entry);
34918 return 0;
34921 const pass_data pass_data_analyze_swaps =
34923 RTL_PASS, /* type */
34924 "swaps", /* name */
34925 OPTGROUP_NONE, /* optinfo_flags */
34926 TV_NONE, /* tv_id */
34927 0, /* properties_required */
34928 0, /* properties_provided */
34929 0, /* properties_destroyed */
34930 0, /* todo_flags_start */
34931 TODO_df_finish, /* todo_flags_finish */
34934 class pass_analyze_swaps : public rtl_opt_pass
34936 public:
34937 pass_analyze_swaps(gcc::context *ctxt)
34938 : rtl_opt_pass(pass_data_analyze_swaps, ctxt)
34941 /* opt_pass methods: */
34942 virtual bool gate (function *)
34944 return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
34945 && rs6000_optimize_swaps);
34948 virtual unsigned int execute (function *fun)
34950 return rs6000_analyze_swaps (fun);
34953 }; // class pass_analyze_swaps
34955 rtl_opt_pass *
34956 make_pass_analyze_swaps (gcc::context *ctxt)
34958 return new pass_analyze_swaps (ctxt);
34961 #ifdef RS6000_GLIBC_ATOMIC_FENV
34962 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
34963 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
34964 #endif
34966 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
34968 static void
34969 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
34971 if (!TARGET_HARD_FLOAT || !TARGET_FPRS)
34973 #ifdef RS6000_GLIBC_ATOMIC_FENV
34974 if (atomic_hold_decl == NULL_TREE)
34976 atomic_hold_decl
34977 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
34978 get_identifier ("__atomic_feholdexcept"),
34979 build_function_type_list (void_type_node,
34980 double_ptr_type_node,
34981 NULL_TREE));
34982 TREE_PUBLIC (atomic_hold_decl) = 1;
34983 DECL_EXTERNAL (atomic_hold_decl) = 1;
34986 if (atomic_clear_decl == NULL_TREE)
34988 atomic_clear_decl
34989 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
34990 get_identifier ("__atomic_feclearexcept"),
34991 build_function_type_list (void_type_node,
34992 NULL_TREE));
34993 TREE_PUBLIC (atomic_clear_decl) = 1;
34994 DECL_EXTERNAL (atomic_clear_decl) = 1;
34997 tree const_double = build_qualified_type (double_type_node,
34998 TYPE_QUAL_CONST);
34999 tree const_double_ptr = build_pointer_type (const_double);
35000 if (atomic_update_decl == NULL_TREE)
35002 atomic_update_decl
35003 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
35004 get_identifier ("__atomic_feupdateenv"),
35005 build_function_type_list (void_type_node,
35006 const_double_ptr,
35007 NULL_TREE));
35008 TREE_PUBLIC (atomic_update_decl) = 1;
35009 DECL_EXTERNAL (atomic_update_decl) = 1;
35012 tree fenv_var = create_tmp_var (double_type_node);
35013 mark_addressable (fenv_var);
35014 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
35016 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
35017 *clear = build_call_expr (atomic_clear_decl, 0);
35018 *update = build_call_expr (atomic_update_decl, 1,
35019 fold_convert (const_double_ptr, fenv_addr));
35020 #endif
35021 return;
35024 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
35025 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
35026 tree call_mffs = build_call_expr (mffs, 0);
35028 /* Generates the equivalent of feholdexcept (&fenv_var)
35030 *fenv_var = __builtin_mffs ();
35031 double fenv_hold;
35032 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
35033 __builtin_mtfsf (0xff, fenv_hold); */
35035 /* Mask to clear everything except for the rounding modes and non-IEEE
35036 arithmetic flag. */
35037 const unsigned HOST_WIDE_INT hold_exception_mask =
35038 HOST_WIDE_INT_C (0xffffffff00000007);
35040 tree fenv_var = create_tmp_var (double_type_node);
35042 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
35044 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
35045 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
35046 build_int_cst (uint64_type_node,
35047 hold_exception_mask));
35049 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
35050 fenv_llu_and);
35052 tree hold_mtfsf = build_call_expr (mtfsf, 2,
35053 build_int_cst (unsigned_type_node, 0xff),
35054 fenv_hold_mtfsf);
35056 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
35058 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
35060 double fenv_clear = __builtin_mffs ();
35061 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
35062 __builtin_mtfsf (0xff, fenv_clear); */
35064 /* Mask to clear everything except for the rounding modes and non-IEEE
35065 arithmetic flag. */
35066 const unsigned HOST_WIDE_INT clear_exception_mask =
35067 HOST_WIDE_INT_C (0xffffffff00000000);
35069 tree fenv_clear = create_tmp_var (double_type_node);
35071 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
35073 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
35074 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
35075 fenv_clean_llu,
35076 build_int_cst (uint64_type_node,
35077 clear_exception_mask));
35079 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
35080 fenv_clear_llu_and);
35082 tree clear_mtfsf = build_call_expr (mtfsf, 2,
35083 build_int_cst (unsigned_type_node, 0xff),
35084 fenv_clear_mtfsf);
35086 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
35088 /* Generates the equivalent of feupdateenv (&fenv_var)
35090 double old_fenv = __builtin_mffs ();
35091 double fenv_update;
35092 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
35093 (*(uint64_t*)fenv_var 0x1ff80fff);
35094 __builtin_mtfsf (0xff, fenv_update); */
35096 const unsigned HOST_WIDE_INT update_exception_mask =
35097 HOST_WIDE_INT_C (0xffffffff1fffff00);
35098 const unsigned HOST_WIDE_INT new_exception_mask =
35099 HOST_WIDE_INT_C (0x1ff80fff);
35101 tree old_fenv = create_tmp_var (double_type_node);
35102 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
35104 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
35105 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
35106 build_int_cst (uint64_type_node,
35107 update_exception_mask));
35109 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
35110 build_int_cst (uint64_type_node,
35111 new_exception_mask));
35113 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
35114 old_llu_and, new_llu_and);
35116 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
35117 new_llu_mask);
35119 tree update_mtfsf = build_call_expr (mtfsf, 2,
35120 build_int_cst (unsigned_type_node, 0xff),
35121 fenv_update_mtfsf);
35123 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
35127 struct gcc_target targetm = TARGET_INITIALIZER;
35129 #include "gt-rs6000.h"