PR target/64876
[official-gcc.git] / gcc / config / rs6000 / rs6000.c
blob4f88328a506351de8947affc744f498734039e60
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2015 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "regs.h"
27 #include "hard-reg-set.h"
28 #include "insn-config.h"
29 #include "conditions.h"
30 #include "insn-attr.h"
31 #include "flags.h"
32 #include "recog.h"
33 #include "obstack.h"
34 #include "hash-set.h"
35 #include "machmode.h"
36 #include "vec.h"
37 #include "double-int.h"
38 #include "input.h"
39 #include "alias.h"
40 #include "symtab.h"
41 #include "wide-int.h"
42 #include "inchash.h"
43 #include "tree.h"
44 #include "fold-const.h"
45 #include "stringpool.h"
46 #include "stor-layout.h"
47 #include "calls.h"
48 #include "print-tree.h"
49 #include "varasm.h"
50 #include "hashtab.h"
51 #include "function.h"
52 #include "statistics.h"
53 #include "real.h"
54 #include "fixed-value.h"
55 #include "expmed.h"
56 #include "dojump.h"
57 #include "explow.h"
58 #include "emit-rtl.h"
59 #include "stmt.h"
60 #include "expr.h"
61 #include "insn-codes.h"
62 #include "optabs.h"
63 #include "except.h"
64 #include "output.h"
65 #include "dbxout.h"
66 #include "predict.h"
67 #include "dominance.h"
68 #include "cfg.h"
69 #include "cfgrtl.h"
70 #include "cfganal.h"
71 #include "lcm.h"
72 #include "cfgbuild.h"
73 #include "cfgcleanup.h"
74 #include "basic-block.h"
75 #include "diagnostic-core.h"
76 #include "toplev.h"
77 #include "ggc.h"
78 #include "tm_p.h"
79 #include "target.h"
80 #include "target-def.h"
81 #include "common/common-target.h"
82 #include "langhooks.h"
83 #include "reload.h"
84 #include "cfgloop.h"
85 #include "sched-int.h"
86 #include "hash-table.h"
87 #include "tree-ssa-alias.h"
88 #include "internal-fn.h"
89 #include "gimple-fold.h"
90 #include "tree-eh.h"
91 #include "gimple-expr.h"
92 #include "is-a.h"
93 #include "gimple.h"
94 #include "gimplify.h"
95 #include "gimple-iterator.h"
96 #include "gimple-walk.h"
97 #include "intl.h"
98 #include "params.h"
99 #include "tm-constrs.h"
100 #include "ira.h"
101 #include "opts.h"
102 #include "tree-vectorizer.h"
103 #include "dumpfile.h"
104 #include "hash-map.h"
105 #include "plugin-api.h"
106 #include "ipa-ref.h"
107 #include "cgraph.h"
108 #include "target-globals.h"
109 #include "builtins.h"
110 #include "context.h"
111 #include "tree-pass.h"
112 #if TARGET_XCOFF
113 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
114 #endif
115 #if TARGET_MACHO
116 #include "gstab.h" /* for N_SLINE */
117 #endif
119 #ifndef TARGET_NO_PROTOTYPE
120 #define TARGET_NO_PROTOTYPE 0
121 #endif
123 #define min(A,B) ((A) < (B) ? (A) : (B))
124 #define max(A,B) ((A) > (B) ? (A) : (B))
126 /* Structure used to define the rs6000 stack */
127 typedef struct rs6000_stack {
128 int reload_completed; /* stack info won't change from here on */
129 int first_gp_reg_save; /* first callee saved GP register used */
130 int first_fp_reg_save; /* first callee saved FP register used */
131 int first_altivec_reg_save; /* first callee saved AltiVec register used */
132 int lr_save_p; /* true if the link reg needs to be saved */
133 int cr_save_p; /* true if the CR reg needs to be saved */
134 unsigned int vrsave_mask; /* mask of vec registers to save */
135 int push_p; /* true if we need to allocate stack space */
136 int calls_p; /* true if the function makes any calls */
137 int world_save_p; /* true if we're saving *everything*:
138 r13-r31, cr, f14-f31, vrsave, v20-v31 */
139 enum rs6000_abi abi; /* which ABI to use */
140 int gp_save_offset; /* offset to save GP regs from initial SP */
141 int fp_save_offset; /* offset to save FP regs from initial SP */
142 int altivec_save_offset; /* offset to save AltiVec regs from initial SP */
143 int lr_save_offset; /* offset to save LR from initial SP */
144 int cr_save_offset; /* offset to save CR from initial SP */
145 int vrsave_save_offset; /* offset to save VRSAVE from initial SP */
146 int spe_gp_save_offset; /* offset to save spe 64-bit gprs */
147 int varargs_save_offset; /* offset to save the varargs registers */
148 int ehrd_offset; /* offset to EH return data */
149 int ehcr_offset; /* offset to EH CR field data */
150 int reg_size; /* register size (4 or 8) */
151 HOST_WIDE_INT vars_size; /* variable save area size */
152 int parm_size; /* outgoing parameter size */
153 int save_size; /* save area size */
154 int fixed_size; /* fixed size of stack frame */
155 int gp_size; /* size of saved GP registers */
156 int fp_size; /* size of saved FP registers */
157 int altivec_size; /* size of saved AltiVec registers */
158 int cr_size; /* size to hold CR if not in save_size */
159 int vrsave_size; /* size to hold VRSAVE if not in save_size */
160 int altivec_padding_size; /* size of altivec alignment padding if
161 not in save_size */
162 int spe_gp_size; /* size of 64-bit GPR save size for SPE */
163 int spe_padding_size;
164 HOST_WIDE_INT total_size; /* total bytes allocated for stack */
165 int spe_64bit_regs_used;
166 int savres_strategy;
167 } rs6000_stack_t;
169 /* A C structure for machine-specific, per-function data.
170 This is added to the cfun structure. */
171 typedef struct GTY(()) machine_function
173 /* Whether the instruction chain has been scanned already. */
174 int insn_chain_scanned_p;
175 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
176 int ra_needs_full_frame;
177 /* Flags if __builtin_return_address (0) was used. */
178 int ra_need_lr;
179 /* Cache lr_save_p after expansion of builtin_eh_return. */
180 int lr_save_state;
181 /* Whether we need to save the TOC to the reserved stack location in the
182 function prologue. */
183 bool save_toc_in_prologue;
184 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
185 varargs save area. */
186 HOST_WIDE_INT varargs_save_offset;
187 /* Temporary stack slot to use for SDmode copies. This slot is
188 64-bits wide and is allocated early enough so that the offset
189 does not overflow the 16-bit load/store offset field. */
190 rtx sdmode_stack_slot;
191 /* Flag if r2 setup is needed with ELFv2 ABI. */
192 bool r2_setup_needed;
193 } machine_function;
195 /* Support targetm.vectorize.builtin_mask_for_load. */
196 static GTY(()) tree altivec_builtin_mask_for_load;
198 /* Set to nonzero once AIX common-mode calls have been defined. */
199 static GTY(()) int common_mode_defined;
201 /* Label number of label created for -mrelocatable, to call to so we can
202 get the address of the GOT section */
203 static int rs6000_pic_labelno;
205 #ifdef USING_ELFOS_H
206 /* Counter for labels which are to be placed in .fixup. */
207 int fixuplabelno = 0;
208 #endif
210 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
211 int dot_symbols;
213 /* Specify the machine mode that pointers have. After generation of rtl, the
214 compiler makes no further distinction between pointers and any other objects
215 of this machine mode. The type is unsigned since not all things that
216 include rs6000.h also include machmode.h. */
217 unsigned rs6000_pmode;
219 /* Width in bits of a pointer. */
220 unsigned rs6000_pointer_size;
222 #ifdef HAVE_AS_GNU_ATTRIBUTE
223 /* Flag whether floating point values have been passed/returned. */
224 static bool rs6000_passes_float;
225 /* Flag whether vector values have been passed/returned. */
226 static bool rs6000_passes_vector;
227 /* Flag whether small (<= 8 byte) structures have been returned. */
228 static bool rs6000_returns_struct;
229 #endif
231 /* Value is TRUE if register/mode pair is acceptable. */
232 bool rs6000_hard_regno_mode_ok_p[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
234 /* Maximum number of registers needed for a given register class and mode. */
235 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
237 /* How many registers are needed for a given register and mode. */
238 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
240 /* Map register number to register class. */
241 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
243 static int dbg_cost_ctrl;
245 /* Built in types. */
246 tree rs6000_builtin_types[RS6000_BTI_MAX];
247 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
249 /* Flag to say the TOC is initialized */
250 int toc_initialized;
251 char toc_label_name[10];
253 /* Cached value of rs6000_variable_issue. This is cached in
254 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
255 static short cached_can_issue_more;
257 static GTY(()) section *read_only_data_section;
258 static GTY(()) section *private_data_section;
259 static GTY(()) section *tls_data_section;
260 static GTY(()) section *tls_private_data_section;
261 static GTY(()) section *read_only_private_data_section;
262 static GTY(()) section *sdata2_section;
263 static GTY(()) section *toc_section;
265 struct builtin_description
267 const HOST_WIDE_INT mask;
268 const enum insn_code icode;
269 const char *const name;
270 const enum rs6000_builtins code;
273 /* Describe the vector unit used for modes. */
274 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
275 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
277 /* Register classes for various constraints that are based on the target
278 switches. */
279 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
281 /* Describe the alignment of a vector. */
282 int rs6000_vector_align[NUM_MACHINE_MODES];
284 /* Map selected modes to types for builtins. */
285 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
287 /* What modes to automatically generate reciprocal divide estimate (fre) and
288 reciprocal sqrt (frsqrte) for. */
289 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
291 /* Masks to determine which reciprocal esitmate instructions to generate
292 automatically. */
293 enum rs6000_recip_mask {
294 RECIP_SF_DIV = 0x001, /* Use divide estimate */
295 RECIP_DF_DIV = 0x002,
296 RECIP_V4SF_DIV = 0x004,
297 RECIP_V2DF_DIV = 0x008,
299 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
300 RECIP_DF_RSQRT = 0x020,
301 RECIP_V4SF_RSQRT = 0x040,
302 RECIP_V2DF_RSQRT = 0x080,
304 /* Various combination of flags for -mrecip=xxx. */
305 RECIP_NONE = 0,
306 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
307 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
308 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
310 RECIP_HIGH_PRECISION = RECIP_ALL,
312 /* On low precision machines like the power5, don't enable double precision
313 reciprocal square root estimate, since it isn't accurate enough. */
314 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
317 /* -mrecip options. */
318 static struct
320 const char *string; /* option name */
321 unsigned int mask; /* mask bits to set */
322 } recip_options[] = {
323 { "all", RECIP_ALL },
324 { "none", RECIP_NONE },
325 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
326 | RECIP_V2DF_DIV) },
327 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
328 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
329 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
330 | RECIP_V2DF_RSQRT) },
331 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
332 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
335 /* Pointer to function (in rs6000-c.c) that can define or undefine target
336 macros that have changed. Languages that don't support the preprocessor
337 don't link in rs6000-c.c, so we can't call it directly. */
338 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
340 /* Simplfy register classes into simpler classifications. We assume
341 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
342 check for standard register classes (gpr/floating/altivec/vsx) and
343 floating/vector classes (float/altivec/vsx). */
345 enum rs6000_reg_type {
346 NO_REG_TYPE,
347 PSEUDO_REG_TYPE,
348 GPR_REG_TYPE,
349 VSX_REG_TYPE,
350 ALTIVEC_REG_TYPE,
351 FPR_REG_TYPE,
352 SPR_REG_TYPE,
353 CR_REG_TYPE,
354 SPE_ACC_TYPE,
355 SPEFSCR_REG_TYPE
358 /* Map register class to register type. */
359 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
361 /* First/last register type for the 'normal' register types (i.e. general
362 purpose, floating point, altivec, and VSX registers). */
363 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
365 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
368 /* Register classes we care about in secondary reload or go if legitimate
369 address. We only need to worry about GPR, FPR, and Altivec registers here,
370 along an ANY field that is the OR of the 3 register classes. */
372 enum rs6000_reload_reg_type {
373 RELOAD_REG_GPR, /* General purpose registers. */
374 RELOAD_REG_FPR, /* Traditional floating point regs. */
375 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
376 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
377 N_RELOAD_REG
380 /* For setting up register classes, loop through the 3 register classes mapping
381 into real registers, and skip the ANY class, which is just an OR of the
382 bits. */
383 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
384 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
386 /* Map reload register type to a register in the register class. */
387 struct reload_reg_map_type {
388 const char *name; /* Register class name. */
389 int reg; /* Register in the register class. */
392 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
393 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
394 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
395 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
396 { "Any", -1 }, /* RELOAD_REG_ANY. */
399 /* Mask bits for each register class, indexed per mode. Historically the
400 compiler has been more restrictive which types can do PRE_MODIFY instead of
401 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
402 typedef unsigned char addr_mask_type;
404 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
405 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
406 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
407 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
408 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
409 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
410 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
412 /* Register type masks based on the type, of valid addressing modes. */
413 struct rs6000_reg_addr {
414 enum insn_code reload_load; /* INSN to reload for loading. */
415 enum insn_code reload_store; /* INSN to reload for storing. */
416 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
417 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
418 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
419 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
420 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
423 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
425 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
426 static inline bool
427 mode_supports_pre_incdec_p (machine_mode mode)
429 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
430 != 0);
433 /* Helper function to say whether a mode supports PRE_MODIFY. */
434 static inline bool
435 mode_supports_pre_modify_p (machine_mode mode)
437 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
438 != 0);
442 /* Target cpu costs. */
444 struct processor_costs {
445 const int mulsi; /* cost of SImode multiplication. */
446 const int mulsi_const; /* cost of SImode multiplication by constant. */
447 const int mulsi_const9; /* cost of SImode mult by short constant. */
448 const int muldi; /* cost of DImode multiplication. */
449 const int divsi; /* cost of SImode division. */
450 const int divdi; /* cost of DImode division. */
451 const int fp; /* cost of simple SFmode and DFmode insns. */
452 const int dmul; /* cost of DFmode multiplication (and fmadd). */
453 const int sdiv; /* cost of SFmode division (fdivs). */
454 const int ddiv; /* cost of DFmode division (fdiv). */
455 const int cache_line_size; /* cache line size in bytes. */
456 const int l1_cache_size; /* size of l1 cache, in kilobytes. */
457 const int l2_cache_size; /* size of l2 cache, in kilobytes. */
458 const int simultaneous_prefetches; /* number of parallel prefetch
459 operations. */
462 const struct processor_costs *rs6000_cost;
464 /* Processor costs (relative to an add) */
466 /* Instruction size costs on 32bit processors. */
467 static const
468 struct processor_costs size32_cost = {
469 COSTS_N_INSNS (1), /* mulsi */
470 COSTS_N_INSNS (1), /* mulsi_const */
471 COSTS_N_INSNS (1), /* mulsi_const9 */
472 COSTS_N_INSNS (1), /* muldi */
473 COSTS_N_INSNS (1), /* divsi */
474 COSTS_N_INSNS (1), /* divdi */
475 COSTS_N_INSNS (1), /* fp */
476 COSTS_N_INSNS (1), /* dmul */
477 COSTS_N_INSNS (1), /* sdiv */
478 COSTS_N_INSNS (1), /* ddiv */
485 /* Instruction size costs on 64bit processors. */
486 static const
487 struct processor_costs size64_cost = {
488 COSTS_N_INSNS (1), /* mulsi */
489 COSTS_N_INSNS (1), /* mulsi_const */
490 COSTS_N_INSNS (1), /* mulsi_const9 */
491 COSTS_N_INSNS (1), /* muldi */
492 COSTS_N_INSNS (1), /* divsi */
493 COSTS_N_INSNS (1), /* divdi */
494 COSTS_N_INSNS (1), /* fp */
495 COSTS_N_INSNS (1), /* dmul */
496 COSTS_N_INSNS (1), /* sdiv */
497 COSTS_N_INSNS (1), /* ddiv */
498 128,
504 /* Instruction costs on RS64A processors. */
505 static const
506 struct processor_costs rs64a_cost = {
507 COSTS_N_INSNS (20), /* mulsi */
508 COSTS_N_INSNS (12), /* mulsi_const */
509 COSTS_N_INSNS (8), /* mulsi_const9 */
510 COSTS_N_INSNS (34), /* muldi */
511 COSTS_N_INSNS (65), /* divsi */
512 COSTS_N_INSNS (67), /* divdi */
513 COSTS_N_INSNS (4), /* fp */
514 COSTS_N_INSNS (4), /* dmul */
515 COSTS_N_INSNS (31), /* sdiv */
516 COSTS_N_INSNS (31), /* ddiv */
517 128, /* cache line size */
518 128, /* l1 cache */
519 2048, /* l2 cache */
520 1, /* streams */
523 /* Instruction costs on MPCCORE processors. */
524 static const
525 struct processor_costs mpccore_cost = {
526 COSTS_N_INSNS (2), /* mulsi */
527 COSTS_N_INSNS (2), /* mulsi_const */
528 COSTS_N_INSNS (2), /* mulsi_const9 */
529 COSTS_N_INSNS (2), /* muldi */
530 COSTS_N_INSNS (6), /* divsi */
531 COSTS_N_INSNS (6), /* divdi */
532 COSTS_N_INSNS (4), /* fp */
533 COSTS_N_INSNS (5), /* dmul */
534 COSTS_N_INSNS (10), /* sdiv */
535 COSTS_N_INSNS (17), /* ddiv */
536 32, /* cache line size */
537 4, /* l1 cache */
538 16, /* l2 cache */
539 1, /* streams */
542 /* Instruction costs on PPC403 processors. */
543 static const
544 struct processor_costs ppc403_cost = {
545 COSTS_N_INSNS (4), /* mulsi */
546 COSTS_N_INSNS (4), /* mulsi_const */
547 COSTS_N_INSNS (4), /* mulsi_const9 */
548 COSTS_N_INSNS (4), /* muldi */
549 COSTS_N_INSNS (33), /* divsi */
550 COSTS_N_INSNS (33), /* divdi */
551 COSTS_N_INSNS (11), /* fp */
552 COSTS_N_INSNS (11), /* dmul */
553 COSTS_N_INSNS (11), /* sdiv */
554 COSTS_N_INSNS (11), /* ddiv */
555 32, /* cache line size */
556 4, /* l1 cache */
557 16, /* l2 cache */
558 1, /* streams */
561 /* Instruction costs on PPC405 processors. */
562 static const
563 struct processor_costs ppc405_cost = {
564 COSTS_N_INSNS (5), /* mulsi */
565 COSTS_N_INSNS (4), /* mulsi_const */
566 COSTS_N_INSNS (3), /* mulsi_const9 */
567 COSTS_N_INSNS (5), /* muldi */
568 COSTS_N_INSNS (35), /* divsi */
569 COSTS_N_INSNS (35), /* divdi */
570 COSTS_N_INSNS (11), /* fp */
571 COSTS_N_INSNS (11), /* dmul */
572 COSTS_N_INSNS (11), /* sdiv */
573 COSTS_N_INSNS (11), /* ddiv */
574 32, /* cache line size */
575 16, /* l1 cache */
576 128, /* l2 cache */
577 1, /* streams */
580 /* Instruction costs on PPC440 processors. */
581 static const
582 struct processor_costs ppc440_cost = {
583 COSTS_N_INSNS (3), /* mulsi */
584 COSTS_N_INSNS (2), /* mulsi_const */
585 COSTS_N_INSNS (2), /* mulsi_const9 */
586 COSTS_N_INSNS (3), /* muldi */
587 COSTS_N_INSNS (34), /* divsi */
588 COSTS_N_INSNS (34), /* divdi */
589 COSTS_N_INSNS (5), /* fp */
590 COSTS_N_INSNS (5), /* dmul */
591 COSTS_N_INSNS (19), /* sdiv */
592 COSTS_N_INSNS (33), /* ddiv */
593 32, /* cache line size */
594 32, /* l1 cache */
595 256, /* l2 cache */
596 1, /* streams */
599 /* Instruction costs on PPC476 processors. */
600 static const
601 struct processor_costs ppc476_cost = {
602 COSTS_N_INSNS (4), /* mulsi */
603 COSTS_N_INSNS (4), /* mulsi_const */
604 COSTS_N_INSNS (4), /* mulsi_const9 */
605 COSTS_N_INSNS (4), /* muldi */
606 COSTS_N_INSNS (11), /* divsi */
607 COSTS_N_INSNS (11), /* divdi */
608 COSTS_N_INSNS (6), /* fp */
609 COSTS_N_INSNS (6), /* dmul */
610 COSTS_N_INSNS (19), /* sdiv */
611 COSTS_N_INSNS (33), /* ddiv */
612 32, /* l1 cache line size */
613 32, /* l1 cache */
614 512, /* l2 cache */
615 1, /* streams */
618 /* Instruction costs on PPC601 processors. */
619 static const
620 struct processor_costs ppc601_cost = {
621 COSTS_N_INSNS (5), /* mulsi */
622 COSTS_N_INSNS (5), /* mulsi_const */
623 COSTS_N_INSNS (5), /* mulsi_const9 */
624 COSTS_N_INSNS (5), /* muldi */
625 COSTS_N_INSNS (36), /* divsi */
626 COSTS_N_INSNS (36), /* divdi */
627 COSTS_N_INSNS (4), /* fp */
628 COSTS_N_INSNS (5), /* dmul */
629 COSTS_N_INSNS (17), /* sdiv */
630 COSTS_N_INSNS (31), /* ddiv */
631 32, /* cache line size */
632 32, /* l1 cache */
633 256, /* l2 cache */
634 1, /* streams */
637 /* Instruction costs on PPC603 processors. */
638 static const
639 struct processor_costs ppc603_cost = {
640 COSTS_N_INSNS (5), /* mulsi */
641 COSTS_N_INSNS (3), /* mulsi_const */
642 COSTS_N_INSNS (2), /* mulsi_const9 */
643 COSTS_N_INSNS (5), /* muldi */
644 COSTS_N_INSNS (37), /* divsi */
645 COSTS_N_INSNS (37), /* divdi */
646 COSTS_N_INSNS (3), /* fp */
647 COSTS_N_INSNS (4), /* dmul */
648 COSTS_N_INSNS (18), /* sdiv */
649 COSTS_N_INSNS (33), /* ddiv */
650 32, /* cache line size */
651 8, /* l1 cache */
652 64, /* l2 cache */
653 1, /* streams */
656 /* Instruction costs on PPC604 processors. */
657 static const
658 struct processor_costs ppc604_cost = {
659 COSTS_N_INSNS (4), /* mulsi */
660 COSTS_N_INSNS (4), /* mulsi_const */
661 COSTS_N_INSNS (4), /* mulsi_const9 */
662 COSTS_N_INSNS (4), /* muldi */
663 COSTS_N_INSNS (20), /* divsi */
664 COSTS_N_INSNS (20), /* divdi */
665 COSTS_N_INSNS (3), /* fp */
666 COSTS_N_INSNS (3), /* dmul */
667 COSTS_N_INSNS (18), /* sdiv */
668 COSTS_N_INSNS (32), /* ddiv */
669 32, /* cache line size */
670 16, /* l1 cache */
671 512, /* l2 cache */
672 1, /* streams */
675 /* Instruction costs on PPC604e processors. */
676 static const
677 struct processor_costs ppc604e_cost = {
678 COSTS_N_INSNS (2), /* mulsi */
679 COSTS_N_INSNS (2), /* mulsi_const */
680 COSTS_N_INSNS (2), /* mulsi_const9 */
681 COSTS_N_INSNS (2), /* muldi */
682 COSTS_N_INSNS (20), /* divsi */
683 COSTS_N_INSNS (20), /* divdi */
684 COSTS_N_INSNS (3), /* fp */
685 COSTS_N_INSNS (3), /* dmul */
686 COSTS_N_INSNS (18), /* sdiv */
687 COSTS_N_INSNS (32), /* ddiv */
688 32, /* cache line size */
689 32, /* l1 cache */
690 1024, /* l2 cache */
691 1, /* streams */
694 /* Instruction costs on PPC620 processors. */
695 static const
696 struct processor_costs ppc620_cost = {
697 COSTS_N_INSNS (5), /* mulsi */
698 COSTS_N_INSNS (4), /* mulsi_const */
699 COSTS_N_INSNS (3), /* mulsi_const9 */
700 COSTS_N_INSNS (7), /* muldi */
701 COSTS_N_INSNS (21), /* divsi */
702 COSTS_N_INSNS (37), /* divdi */
703 COSTS_N_INSNS (3), /* fp */
704 COSTS_N_INSNS (3), /* dmul */
705 COSTS_N_INSNS (18), /* sdiv */
706 COSTS_N_INSNS (32), /* ddiv */
707 128, /* cache line size */
708 32, /* l1 cache */
709 1024, /* l2 cache */
710 1, /* streams */
713 /* Instruction costs on PPC630 processors. */
714 static const
715 struct processor_costs ppc630_cost = {
716 COSTS_N_INSNS (5), /* mulsi */
717 COSTS_N_INSNS (4), /* mulsi_const */
718 COSTS_N_INSNS (3), /* mulsi_const9 */
719 COSTS_N_INSNS (7), /* muldi */
720 COSTS_N_INSNS (21), /* divsi */
721 COSTS_N_INSNS (37), /* divdi */
722 COSTS_N_INSNS (3), /* fp */
723 COSTS_N_INSNS (3), /* dmul */
724 COSTS_N_INSNS (17), /* sdiv */
725 COSTS_N_INSNS (21), /* ddiv */
726 128, /* cache line size */
727 64, /* l1 cache */
728 1024, /* l2 cache */
729 1, /* streams */
732 /* Instruction costs on Cell processor. */
733 /* COSTS_N_INSNS (1) ~ one add. */
734 static const
735 struct processor_costs ppccell_cost = {
736 COSTS_N_INSNS (9/2)+2, /* mulsi */
737 COSTS_N_INSNS (6/2), /* mulsi_const */
738 COSTS_N_INSNS (6/2), /* mulsi_const9 */
739 COSTS_N_INSNS (15/2)+2, /* muldi */
740 COSTS_N_INSNS (38/2), /* divsi */
741 COSTS_N_INSNS (70/2), /* divdi */
742 COSTS_N_INSNS (10/2), /* fp */
743 COSTS_N_INSNS (10/2), /* dmul */
744 COSTS_N_INSNS (74/2), /* sdiv */
745 COSTS_N_INSNS (74/2), /* ddiv */
746 128, /* cache line size */
747 32, /* l1 cache */
748 512, /* l2 cache */
749 6, /* streams */
752 /* Instruction costs on PPC750 and PPC7400 processors. */
753 static const
754 struct processor_costs ppc750_cost = {
755 COSTS_N_INSNS (5), /* mulsi */
756 COSTS_N_INSNS (3), /* mulsi_const */
757 COSTS_N_INSNS (2), /* mulsi_const9 */
758 COSTS_N_INSNS (5), /* muldi */
759 COSTS_N_INSNS (17), /* divsi */
760 COSTS_N_INSNS (17), /* divdi */
761 COSTS_N_INSNS (3), /* fp */
762 COSTS_N_INSNS (3), /* dmul */
763 COSTS_N_INSNS (17), /* sdiv */
764 COSTS_N_INSNS (31), /* ddiv */
765 32, /* cache line size */
766 32, /* l1 cache */
767 512, /* l2 cache */
768 1, /* streams */
771 /* Instruction costs on PPC7450 processors. */
772 static const
773 struct processor_costs ppc7450_cost = {
774 COSTS_N_INSNS (4), /* mulsi */
775 COSTS_N_INSNS (3), /* mulsi_const */
776 COSTS_N_INSNS (3), /* mulsi_const9 */
777 COSTS_N_INSNS (4), /* muldi */
778 COSTS_N_INSNS (23), /* divsi */
779 COSTS_N_INSNS (23), /* divdi */
780 COSTS_N_INSNS (5), /* fp */
781 COSTS_N_INSNS (5), /* dmul */
782 COSTS_N_INSNS (21), /* sdiv */
783 COSTS_N_INSNS (35), /* ddiv */
784 32, /* cache line size */
785 32, /* l1 cache */
786 1024, /* l2 cache */
787 1, /* streams */
790 /* Instruction costs on PPC8540 processors. */
791 static const
792 struct processor_costs ppc8540_cost = {
793 COSTS_N_INSNS (4), /* mulsi */
794 COSTS_N_INSNS (4), /* mulsi_const */
795 COSTS_N_INSNS (4), /* mulsi_const9 */
796 COSTS_N_INSNS (4), /* muldi */
797 COSTS_N_INSNS (19), /* divsi */
798 COSTS_N_INSNS (19), /* divdi */
799 COSTS_N_INSNS (4), /* fp */
800 COSTS_N_INSNS (4), /* dmul */
801 COSTS_N_INSNS (29), /* sdiv */
802 COSTS_N_INSNS (29), /* ddiv */
803 32, /* cache line size */
804 32, /* l1 cache */
805 256, /* l2 cache */
806 1, /* prefetch streams /*/
809 /* Instruction costs on E300C2 and E300C3 cores. */
810 static const
811 struct processor_costs ppce300c2c3_cost = {
812 COSTS_N_INSNS (4), /* mulsi */
813 COSTS_N_INSNS (4), /* mulsi_const */
814 COSTS_N_INSNS (4), /* mulsi_const9 */
815 COSTS_N_INSNS (4), /* muldi */
816 COSTS_N_INSNS (19), /* divsi */
817 COSTS_N_INSNS (19), /* divdi */
818 COSTS_N_INSNS (3), /* fp */
819 COSTS_N_INSNS (4), /* dmul */
820 COSTS_N_INSNS (18), /* sdiv */
821 COSTS_N_INSNS (33), /* ddiv */
823 16, /* l1 cache */
824 16, /* l2 cache */
825 1, /* prefetch streams /*/
828 /* Instruction costs on PPCE500MC processors. */
829 static const
830 struct processor_costs ppce500mc_cost = {
831 COSTS_N_INSNS (4), /* mulsi */
832 COSTS_N_INSNS (4), /* mulsi_const */
833 COSTS_N_INSNS (4), /* mulsi_const9 */
834 COSTS_N_INSNS (4), /* muldi */
835 COSTS_N_INSNS (14), /* divsi */
836 COSTS_N_INSNS (14), /* divdi */
837 COSTS_N_INSNS (8), /* fp */
838 COSTS_N_INSNS (10), /* dmul */
839 COSTS_N_INSNS (36), /* sdiv */
840 COSTS_N_INSNS (66), /* ddiv */
841 64, /* cache line size */
842 32, /* l1 cache */
843 128, /* l2 cache */
844 1, /* prefetch streams /*/
847 /* Instruction costs on PPCE500MC64 processors. */
848 static const
849 struct processor_costs ppce500mc64_cost = {
850 COSTS_N_INSNS (4), /* mulsi */
851 COSTS_N_INSNS (4), /* mulsi_const */
852 COSTS_N_INSNS (4), /* mulsi_const9 */
853 COSTS_N_INSNS (4), /* muldi */
854 COSTS_N_INSNS (14), /* divsi */
855 COSTS_N_INSNS (14), /* divdi */
856 COSTS_N_INSNS (4), /* fp */
857 COSTS_N_INSNS (10), /* dmul */
858 COSTS_N_INSNS (36), /* sdiv */
859 COSTS_N_INSNS (66), /* ddiv */
860 64, /* cache line size */
861 32, /* l1 cache */
862 128, /* l2 cache */
863 1, /* prefetch streams /*/
866 /* Instruction costs on PPCE5500 processors. */
867 static const
868 struct processor_costs ppce5500_cost = {
869 COSTS_N_INSNS (5), /* mulsi */
870 COSTS_N_INSNS (5), /* mulsi_const */
871 COSTS_N_INSNS (4), /* mulsi_const9 */
872 COSTS_N_INSNS (5), /* muldi */
873 COSTS_N_INSNS (14), /* divsi */
874 COSTS_N_INSNS (14), /* divdi */
875 COSTS_N_INSNS (7), /* fp */
876 COSTS_N_INSNS (10), /* dmul */
877 COSTS_N_INSNS (36), /* sdiv */
878 COSTS_N_INSNS (66), /* ddiv */
879 64, /* cache line size */
880 32, /* l1 cache */
881 128, /* l2 cache */
882 1, /* prefetch streams /*/
885 /* Instruction costs on PPCE6500 processors. */
886 static const
887 struct processor_costs ppce6500_cost = {
888 COSTS_N_INSNS (5), /* mulsi */
889 COSTS_N_INSNS (5), /* mulsi_const */
890 COSTS_N_INSNS (4), /* mulsi_const9 */
891 COSTS_N_INSNS (5), /* muldi */
892 COSTS_N_INSNS (14), /* divsi */
893 COSTS_N_INSNS (14), /* divdi */
894 COSTS_N_INSNS (7), /* fp */
895 COSTS_N_INSNS (10), /* dmul */
896 COSTS_N_INSNS (36), /* sdiv */
897 COSTS_N_INSNS (66), /* ddiv */
898 64, /* cache line size */
899 32, /* l1 cache */
900 128, /* l2 cache */
901 1, /* prefetch streams /*/
904 /* Instruction costs on AppliedMicro Titan processors. */
905 static const
906 struct processor_costs titan_cost = {
907 COSTS_N_INSNS (5), /* mulsi */
908 COSTS_N_INSNS (5), /* mulsi_const */
909 COSTS_N_INSNS (5), /* mulsi_const9 */
910 COSTS_N_INSNS (5), /* muldi */
911 COSTS_N_INSNS (18), /* divsi */
912 COSTS_N_INSNS (18), /* divdi */
913 COSTS_N_INSNS (10), /* fp */
914 COSTS_N_INSNS (10), /* dmul */
915 COSTS_N_INSNS (46), /* sdiv */
916 COSTS_N_INSNS (72), /* ddiv */
917 32, /* cache line size */
918 32, /* l1 cache */
919 512, /* l2 cache */
920 1, /* prefetch streams /*/
923 /* Instruction costs on POWER4 and POWER5 processors. */
924 static const
925 struct processor_costs power4_cost = {
926 COSTS_N_INSNS (3), /* mulsi */
927 COSTS_N_INSNS (2), /* mulsi_const */
928 COSTS_N_INSNS (2), /* mulsi_const9 */
929 COSTS_N_INSNS (4), /* muldi */
930 COSTS_N_INSNS (18), /* divsi */
931 COSTS_N_INSNS (34), /* divdi */
932 COSTS_N_INSNS (3), /* fp */
933 COSTS_N_INSNS (3), /* dmul */
934 COSTS_N_INSNS (17), /* sdiv */
935 COSTS_N_INSNS (17), /* ddiv */
936 128, /* cache line size */
937 32, /* l1 cache */
938 1024, /* l2 cache */
939 8, /* prefetch streams /*/
942 /* Instruction costs on POWER6 processors. */
943 static const
944 struct processor_costs power6_cost = {
945 COSTS_N_INSNS (8), /* mulsi */
946 COSTS_N_INSNS (8), /* mulsi_const */
947 COSTS_N_INSNS (8), /* mulsi_const9 */
948 COSTS_N_INSNS (8), /* muldi */
949 COSTS_N_INSNS (22), /* divsi */
950 COSTS_N_INSNS (28), /* divdi */
951 COSTS_N_INSNS (3), /* fp */
952 COSTS_N_INSNS (3), /* dmul */
953 COSTS_N_INSNS (13), /* sdiv */
954 COSTS_N_INSNS (16), /* ddiv */
955 128, /* cache line size */
956 64, /* l1 cache */
957 2048, /* l2 cache */
958 16, /* prefetch streams */
961 /* Instruction costs on POWER7 processors. */
962 static const
963 struct processor_costs power7_cost = {
964 COSTS_N_INSNS (2), /* mulsi */
965 COSTS_N_INSNS (2), /* mulsi_const */
966 COSTS_N_INSNS (2), /* mulsi_const9 */
967 COSTS_N_INSNS (2), /* muldi */
968 COSTS_N_INSNS (18), /* divsi */
969 COSTS_N_INSNS (34), /* divdi */
970 COSTS_N_INSNS (3), /* fp */
971 COSTS_N_INSNS (3), /* dmul */
972 COSTS_N_INSNS (13), /* sdiv */
973 COSTS_N_INSNS (16), /* ddiv */
974 128, /* cache line size */
975 32, /* l1 cache */
976 256, /* l2 cache */
977 12, /* prefetch streams */
980 /* Instruction costs on POWER8 processors. */
981 static const
982 struct processor_costs power8_cost = {
983 COSTS_N_INSNS (3), /* mulsi */
984 COSTS_N_INSNS (3), /* mulsi_const */
985 COSTS_N_INSNS (3), /* mulsi_const9 */
986 COSTS_N_INSNS (3), /* muldi */
987 COSTS_N_INSNS (19), /* divsi */
988 COSTS_N_INSNS (35), /* divdi */
989 COSTS_N_INSNS (3), /* fp */
990 COSTS_N_INSNS (3), /* dmul */
991 COSTS_N_INSNS (14), /* sdiv */
992 COSTS_N_INSNS (17), /* ddiv */
993 128, /* cache line size */
994 32, /* l1 cache */
995 256, /* l2 cache */
996 12, /* prefetch streams */
999 /* Instruction costs on POWER A2 processors. */
1000 static const
1001 struct processor_costs ppca2_cost = {
1002 COSTS_N_INSNS (16), /* mulsi */
1003 COSTS_N_INSNS (16), /* mulsi_const */
1004 COSTS_N_INSNS (16), /* mulsi_const9 */
1005 COSTS_N_INSNS (16), /* muldi */
1006 COSTS_N_INSNS (22), /* divsi */
1007 COSTS_N_INSNS (28), /* divdi */
1008 COSTS_N_INSNS (3), /* fp */
1009 COSTS_N_INSNS (3), /* dmul */
1010 COSTS_N_INSNS (59), /* sdiv */
1011 COSTS_N_INSNS (72), /* ddiv */
1013 16, /* l1 cache */
1014 2048, /* l2 cache */
1015 16, /* prefetch streams */
1019 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1020 #undef RS6000_BUILTIN_1
1021 #undef RS6000_BUILTIN_2
1022 #undef RS6000_BUILTIN_3
1023 #undef RS6000_BUILTIN_A
1024 #undef RS6000_BUILTIN_D
1025 #undef RS6000_BUILTIN_E
1026 #undef RS6000_BUILTIN_H
1027 #undef RS6000_BUILTIN_P
1028 #undef RS6000_BUILTIN_Q
1029 #undef RS6000_BUILTIN_S
1030 #undef RS6000_BUILTIN_X
1032 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1033 { NAME, ICODE, MASK, ATTR },
1035 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1036 { NAME, ICODE, MASK, ATTR },
1038 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1039 { NAME, ICODE, MASK, ATTR },
1041 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1042 { NAME, ICODE, MASK, ATTR },
1044 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1045 { NAME, ICODE, MASK, ATTR },
1047 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
1048 { NAME, ICODE, MASK, ATTR },
1050 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1051 { NAME, ICODE, MASK, ATTR },
1053 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1054 { NAME, ICODE, MASK, ATTR },
1056 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
1057 { NAME, ICODE, MASK, ATTR },
1059 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
1060 { NAME, ICODE, MASK, ATTR },
1062 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1063 { NAME, ICODE, MASK, ATTR },
1065 struct rs6000_builtin_info_type {
1066 const char *name;
1067 const enum insn_code icode;
1068 const HOST_WIDE_INT mask;
1069 const unsigned attr;
1072 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1074 #include "rs6000-builtin.def"
1077 #undef RS6000_BUILTIN_1
1078 #undef RS6000_BUILTIN_2
1079 #undef RS6000_BUILTIN_3
1080 #undef RS6000_BUILTIN_A
1081 #undef RS6000_BUILTIN_D
1082 #undef RS6000_BUILTIN_E
1083 #undef RS6000_BUILTIN_H
1084 #undef RS6000_BUILTIN_P
1085 #undef RS6000_BUILTIN_Q
1086 #undef RS6000_BUILTIN_S
1087 #undef RS6000_BUILTIN_X
1089 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1090 static tree (*rs6000_veclib_handler) (tree, tree, tree);
1093 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1094 static bool spe_func_has_64bit_regs_p (void);
1095 static struct machine_function * rs6000_init_machine_status (void);
1096 static int rs6000_ra_ever_killed (void);
1097 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1098 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1099 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1100 static tree rs6000_builtin_vectorized_libmass (tree, tree, tree);
1101 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1102 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1103 static bool rs6000_debug_rtx_costs (rtx, int, int, int, int *, bool);
1104 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1105 bool);
1106 static int rs6000_debug_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
1107 static bool is_microcoded_insn (rtx_insn *);
1108 static bool is_nonpipeline_insn (rtx_insn *);
1109 static bool is_cracked_insn (rtx_insn *);
1110 static bool is_load_insn (rtx, rtx *);
1111 static bool is_store_insn (rtx, rtx *);
1112 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1113 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1114 static bool insn_must_be_first_in_group (rtx_insn *);
1115 static bool insn_must_be_last_in_group (rtx_insn *);
1116 static void altivec_init_builtins (void);
1117 static tree builtin_function_type (machine_mode, machine_mode,
1118 machine_mode, machine_mode,
1119 enum rs6000_builtins, const char *name);
1120 static void rs6000_common_init_builtins (void);
1121 static void paired_init_builtins (void);
1122 static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx);
1123 static void spe_init_builtins (void);
1124 static void htm_init_builtins (void);
1125 static rtx spe_expand_predicate_builtin (enum insn_code, tree, rtx);
1126 static rtx spe_expand_evsel_builtin (enum insn_code, tree, rtx);
1127 static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
1128 static rs6000_stack_t *rs6000_stack_info (void);
1129 static void is_altivec_return_reg (rtx, void *);
1130 int easy_vector_constant (rtx, machine_mode);
1131 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1132 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1133 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1134 bool, bool);
1135 #if TARGET_MACHO
1136 static void macho_branch_islands (void);
1137 #endif
1138 static rtx rs6000_legitimize_reload_address (rtx, machine_mode, int, int,
1139 int, int *);
1140 static rtx rs6000_debug_legitimize_reload_address (rtx, machine_mode, int,
1141 int, int, int *);
1142 static bool rs6000_mode_dependent_address (const_rtx);
1143 static bool rs6000_debug_mode_dependent_address (const_rtx);
1144 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1145 machine_mode, rtx);
1146 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1147 machine_mode,
1148 rtx);
1149 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1150 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1151 enum reg_class);
1152 static bool rs6000_secondary_memory_needed (enum reg_class, enum reg_class,
1153 machine_mode);
1154 static bool rs6000_debug_secondary_memory_needed (enum reg_class,
1155 enum reg_class,
1156 machine_mode);
1157 static bool rs6000_cannot_change_mode_class (machine_mode,
1158 machine_mode,
1159 enum reg_class);
1160 static bool rs6000_debug_cannot_change_mode_class (machine_mode,
1161 machine_mode,
1162 enum reg_class);
1163 static bool rs6000_save_toc_in_prologue_p (void);
1165 rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int,
1166 int, int *)
1167 = rs6000_legitimize_reload_address;
1169 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1170 = rs6000_mode_dependent_address;
1172 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1173 machine_mode, rtx)
1174 = rs6000_secondary_reload_class;
1176 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1177 = rs6000_preferred_reload_class;
1179 bool (*rs6000_secondary_memory_needed_ptr) (enum reg_class, enum reg_class,
1180 machine_mode)
1181 = rs6000_secondary_memory_needed;
1183 bool (*rs6000_cannot_change_mode_class_ptr) (machine_mode,
1184 machine_mode,
1185 enum reg_class)
1186 = rs6000_cannot_change_mode_class;
1188 const int INSN_NOT_AVAILABLE = -1;
1190 static void rs6000_print_isa_options (FILE *, int, const char *,
1191 HOST_WIDE_INT);
1192 static void rs6000_print_builtin_options (FILE *, int, const char *,
1193 HOST_WIDE_INT);
1195 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1196 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1197 enum rs6000_reg_type,
1198 machine_mode,
1199 secondary_reload_info *,
1200 bool);
1201 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1203 /* Hash table stuff for keeping track of TOC entries. */
1205 struct GTY((for_user)) toc_hash_struct
1207 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1208 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1209 rtx key;
1210 machine_mode key_mode;
1211 int labelno;
1214 struct toc_hasher : ggc_hasher<toc_hash_struct *>
1216 static hashval_t hash (toc_hash_struct *);
1217 static bool equal (toc_hash_struct *, toc_hash_struct *);
1220 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1222 /* Hash table to keep track of the argument types for builtin functions. */
1224 struct GTY((for_user)) builtin_hash_struct
1226 tree type;
1227 machine_mode mode[4]; /* return value + 3 arguments. */
1228 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1231 struct builtin_hasher : ggc_hasher<builtin_hash_struct *>
1233 static hashval_t hash (builtin_hash_struct *);
1234 static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1237 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1240 /* Default register names. */
1241 char rs6000_reg_names[][8] =
1243 "0", "1", "2", "3", "4", "5", "6", "7",
1244 "8", "9", "10", "11", "12", "13", "14", "15",
1245 "16", "17", "18", "19", "20", "21", "22", "23",
1246 "24", "25", "26", "27", "28", "29", "30", "31",
1247 "0", "1", "2", "3", "4", "5", "6", "7",
1248 "8", "9", "10", "11", "12", "13", "14", "15",
1249 "16", "17", "18", "19", "20", "21", "22", "23",
1250 "24", "25", "26", "27", "28", "29", "30", "31",
1251 "mq", "lr", "ctr","ap",
1252 "0", "1", "2", "3", "4", "5", "6", "7",
1253 "ca",
1254 /* AltiVec registers. */
1255 "0", "1", "2", "3", "4", "5", "6", "7",
1256 "8", "9", "10", "11", "12", "13", "14", "15",
1257 "16", "17", "18", "19", "20", "21", "22", "23",
1258 "24", "25", "26", "27", "28", "29", "30", "31",
1259 "vrsave", "vscr",
1260 /* SPE registers. */
1261 "spe_acc", "spefscr",
1262 /* Soft frame pointer. */
1263 "sfp",
1264 /* HTM SPR registers. */
1265 "tfhar", "tfiar", "texasr",
1266 /* SPE High registers. */
1267 "0", "1", "2", "3", "4", "5", "6", "7",
1268 "8", "9", "10", "11", "12", "13", "14", "15",
1269 "16", "17", "18", "19", "20", "21", "22", "23",
1270 "24", "25", "26", "27", "28", "29", "30", "31"
1273 #ifdef TARGET_REGNAMES
1274 static const char alt_reg_names[][8] =
1276 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1277 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1278 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1279 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1280 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1281 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1282 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1283 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1284 "mq", "lr", "ctr", "ap",
1285 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1286 "ca",
1287 /* AltiVec registers. */
1288 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1289 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1290 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1291 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1292 "vrsave", "vscr",
1293 /* SPE registers. */
1294 "spe_acc", "spefscr",
1295 /* Soft frame pointer. */
1296 "sfp",
1297 /* HTM SPR registers. */
1298 "tfhar", "tfiar", "texasr",
1299 /* SPE High registers. */
1300 "%rh0", "%rh1", "%rh2", "%rh3", "%rh4", "%rh5", "%rh6", "%rh7",
1301 "%rh8", "%rh9", "%rh10", "%r11", "%rh12", "%rh13", "%rh14", "%rh15",
1302 "%rh16", "%rh17", "%rh18", "%rh19", "%rh20", "%rh21", "%rh22", "%rh23",
1303 "%rh24", "%rh25", "%rh26", "%rh27", "%rh28", "%rh29", "%rh30", "%rh31"
1305 #endif
1307 /* Table of valid machine attributes. */
1309 static const struct attribute_spec rs6000_attribute_table[] =
1311 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
1312 affects_type_identity } */
1313 { "altivec", 1, 1, false, true, false, rs6000_handle_altivec_attribute,
1314 false },
1315 { "longcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1316 false },
1317 { "shortcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1318 false },
1319 { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1320 false },
1321 { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1322 false },
1323 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1324 SUBTARGET_ATTRIBUTE_TABLE,
1325 #endif
1326 { NULL, 0, 0, false, false, false, NULL, false }
1329 #ifndef TARGET_PROFILE_KERNEL
1330 #define TARGET_PROFILE_KERNEL 0
1331 #endif
1333 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1334 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1336 /* Initialize the GCC target structure. */
1337 #undef TARGET_ATTRIBUTE_TABLE
1338 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1339 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1340 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1341 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1342 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1344 #undef TARGET_ASM_ALIGNED_DI_OP
1345 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1347 /* Default unaligned ops are only provided for ELF. Find the ops needed
1348 for non-ELF systems. */
1349 #ifndef OBJECT_FORMAT_ELF
1350 #if TARGET_XCOFF
1351 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1352 64-bit targets. */
1353 #undef TARGET_ASM_UNALIGNED_HI_OP
1354 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1355 #undef TARGET_ASM_UNALIGNED_SI_OP
1356 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1357 #undef TARGET_ASM_UNALIGNED_DI_OP
1358 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1359 #else
1360 /* For Darwin. */
1361 #undef TARGET_ASM_UNALIGNED_HI_OP
1362 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1363 #undef TARGET_ASM_UNALIGNED_SI_OP
1364 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1365 #undef TARGET_ASM_UNALIGNED_DI_OP
1366 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1367 #undef TARGET_ASM_ALIGNED_DI_OP
1368 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1369 #endif
1370 #endif
1372 /* This hook deals with fixups for relocatable code and DI-mode objects
1373 in 64-bit code. */
1374 #undef TARGET_ASM_INTEGER
1375 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1377 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1378 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1379 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1380 #endif
1382 #undef TARGET_SET_UP_BY_PROLOGUE
1383 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1385 #undef TARGET_HAVE_TLS
1386 #define TARGET_HAVE_TLS HAVE_AS_TLS
1388 #undef TARGET_CANNOT_FORCE_CONST_MEM
1389 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1391 #undef TARGET_DELEGITIMIZE_ADDRESS
1392 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1394 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1395 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1397 #undef TARGET_ASM_FUNCTION_PROLOGUE
1398 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1399 #undef TARGET_ASM_FUNCTION_EPILOGUE
1400 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1402 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1403 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1405 #undef TARGET_LEGITIMIZE_ADDRESS
1406 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1408 #undef TARGET_SCHED_VARIABLE_ISSUE
1409 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1411 #undef TARGET_SCHED_ISSUE_RATE
1412 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1413 #undef TARGET_SCHED_ADJUST_COST
1414 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1415 #undef TARGET_SCHED_ADJUST_PRIORITY
1416 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1417 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1418 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1419 #undef TARGET_SCHED_INIT
1420 #define TARGET_SCHED_INIT rs6000_sched_init
1421 #undef TARGET_SCHED_FINISH
1422 #define TARGET_SCHED_FINISH rs6000_sched_finish
1423 #undef TARGET_SCHED_REORDER
1424 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1425 #undef TARGET_SCHED_REORDER2
1426 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1428 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1429 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1431 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1432 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1434 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1435 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1436 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1437 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1438 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1439 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1440 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1441 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1443 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1444 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1445 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1446 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1447 rs6000_builtin_support_vector_misalignment
1448 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1449 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1450 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1451 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1452 rs6000_builtin_vectorization_cost
1453 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1454 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1455 rs6000_preferred_simd_mode
1456 #undef TARGET_VECTORIZE_INIT_COST
1457 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1458 #undef TARGET_VECTORIZE_ADD_STMT_COST
1459 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1460 #undef TARGET_VECTORIZE_FINISH_COST
1461 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1462 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1463 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1465 #undef TARGET_INIT_BUILTINS
1466 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1467 #undef TARGET_BUILTIN_DECL
1468 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1470 #undef TARGET_EXPAND_BUILTIN
1471 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1473 #undef TARGET_MANGLE_TYPE
1474 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1476 #undef TARGET_INIT_LIBFUNCS
1477 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1479 #if TARGET_MACHO
1480 #undef TARGET_BINDS_LOCAL_P
1481 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1482 #endif
1484 #undef TARGET_MS_BITFIELD_LAYOUT_P
1485 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1487 #undef TARGET_ASM_OUTPUT_MI_THUNK
1488 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1490 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1491 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1493 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1494 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1496 #undef TARGET_REGISTER_MOVE_COST
1497 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1498 #undef TARGET_MEMORY_MOVE_COST
1499 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1500 #undef TARGET_RTX_COSTS
1501 #define TARGET_RTX_COSTS rs6000_rtx_costs
1502 #undef TARGET_ADDRESS_COST
1503 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1505 #undef TARGET_DWARF_REGISTER_SPAN
1506 #define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span
1508 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1509 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1511 #undef TARGET_MEMBER_TYPE_FORCES_BLK
1512 #define TARGET_MEMBER_TYPE_FORCES_BLK rs6000_member_type_forces_blk
1514 #undef TARGET_PROMOTE_FUNCTION_MODE
1515 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1517 #undef TARGET_RETURN_IN_MEMORY
1518 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1520 #undef TARGET_RETURN_IN_MSB
1521 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1523 #undef TARGET_SETUP_INCOMING_VARARGS
1524 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1526 /* Always strict argument naming on rs6000. */
1527 #undef TARGET_STRICT_ARGUMENT_NAMING
1528 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1529 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1530 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1531 #undef TARGET_SPLIT_COMPLEX_ARG
1532 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1533 #undef TARGET_MUST_PASS_IN_STACK
1534 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1535 #undef TARGET_PASS_BY_REFERENCE
1536 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1537 #undef TARGET_ARG_PARTIAL_BYTES
1538 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1539 #undef TARGET_FUNCTION_ARG_ADVANCE
1540 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1541 #undef TARGET_FUNCTION_ARG
1542 #define TARGET_FUNCTION_ARG rs6000_function_arg
1543 #undef TARGET_FUNCTION_ARG_BOUNDARY
1544 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1546 #undef TARGET_BUILD_BUILTIN_VA_LIST
1547 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1549 #undef TARGET_EXPAND_BUILTIN_VA_START
1550 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1552 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1553 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1555 #undef TARGET_EH_RETURN_FILTER_MODE
1556 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1558 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1559 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1561 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1562 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1564 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1565 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1567 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
1568 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
1570 #undef TARGET_MD_ASM_CLOBBERS
1571 #define TARGET_MD_ASM_CLOBBERS rs6000_md_asm_clobbers
1573 #undef TARGET_OPTION_OVERRIDE
1574 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1576 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1577 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1578 rs6000_builtin_vectorized_function
1580 #if !TARGET_MACHO
1581 #undef TARGET_STACK_PROTECT_FAIL
1582 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1583 #endif
1585 /* MPC604EUM 3.5.2 Weak Consistency between Multiple Processors
1586 The PowerPC architecture requires only weak consistency among
1587 processors--that is, memory accesses between processors need not be
1588 sequentially consistent and memory accesses among processors can occur
1589 in any order. The ability to order memory accesses weakly provides
1590 opportunities for more efficient use of the system bus. Unless a
1591 dependency exists, the 604e allows read operations to precede store
1592 operations. */
1593 #undef TARGET_RELAXED_ORDERING
1594 #define TARGET_RELAXED_ORDERING true
1596 #ifdef HAVE_AS_TLS
1597 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1598 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1599 #endif
1601 /* Use a 32-bit anchor range. This leads to sequences like:
1603 addis tmp,anchor,high
1604 add dest,tmp,low
1606 where tmp itself acts as an anchor, and can be shared between
1607 accesses to the same 64k page. */
1608 #undef TARGET_MIN_ANCHOR_OFFSET
1609 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1610 #undef TARGET_MAX_ANCHOR_OFFSET
1611 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1612 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1613 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1614 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1615 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1617 #undef TARGET_BUILTIN_RECIPROCAL
1618 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1620 #undef TARGET_EXPAND_TO_RTL_HOOK
1621 #define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot
1623 #undef TARGET_INSTANTIATE_DECLS
1624 #define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
1626 #undef TARGET_SECONDARY_RELOAD
1627 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1629 #undef TARGET_LEGITIMATE_ADDRESS_P
1630 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1632 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1633 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1635 #undef TARGET_LRA_P
1636 #define TARGET_LRA_P rs6000_lra_p
1638 #undef TARGET_CAN_ELIMINATE
1639 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1641 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1642 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1644 #undef TARGET_TRAMPOLINE_INIT
1645 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1647 #undef TARGET_FUNCTION_VALUE
1648 #define TARGET_FUNCTION_VALUE rs6000_function_value
1650 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1651 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1653 #undef TARGET_OPTION_SAVE
1654 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1656 #undef TARGET_OPTION_RESTORE
1657 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1659 #undef TARGET_OPTION_PRINT
1660 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1662 #undef TARGET_CAN_INLINE_P
1663 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1665 #undef TARGET_SET_CURRENT_FUNCTION
1666 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1668 #undef TARGET_LEGITIMATE_CONSTANT_P
1669 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1671 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
1672 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok
1674 #undef TARGET_CAN_USE_DOLOOP_P
1675 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1677 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1678 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1680 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1681 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1682 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1683 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1684 #undef TARGET_UNWIND_WORD_MODE
1685 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1688 /* Processor table. */
1689 struct rs6000_ptt
1691 const char *const name; /* Canonical processor name. */
1692 const enum processor_type processor; /* Processor type enum value. */
1693 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1696 static struct rs6000_ptt const processor_target_table[] =
1698 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1699 #include "rs6000-cpus.def"
1700 #undef RS6000_CPU
1703 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1704 name is invalid. */
1706 static int
1707 rs6000_cpu_name_lookup (const char *name)
1709 size_t i;
1711 if (name != NULL)
1713 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1714 if (! strcmp (name, processor_target_table[i].name))
1715 return (int)i;
1718 return -1;
1722 /* Return number of consecutive hard regs needed starting at reg REGNO
1723 to hold something of mode MODE.
1724 This is ordinarily the length in words of a value of mode MODE
1725 but can be less for certain modes in special long registers.
1727 For the SPE, GPRs are 64 bits but only 32 bits are visible in
1728 scalar instructions. The upper 32 bits are only available to the
1729 SIMD instructions.
1731 POWER and PowerPC GPRs hold 32 bits worth;
1732 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1734 static int
1735 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1737 unsigned HOST_WIDE_INT reg_size;
1739 /* TF/TD modes are special in that they always take 2 registers. */
1740 if (FP_REGNO_P (regno))
1741 reg_size = ((VECTOR_MEM_VSX_P (mode) && mode != TDmode && mode != TFmode)
1742 ? UNITS_PER_VSX_WORD
1743 : UNITS_PER_FP_WORD);
1745 else if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1746 reg_size = UNITS_PER_SPE_WORD;
1748 else if (ALTIVEC_REGNO_P (regno))
1749 reg_size = UNITS_PER_ALTIVEC_WORD;
1751 /* The value returned for SCmode in the E500 double case is 2 for
1752 ABI compatibility; storing an SCmode value in a single register
1753 would require function_arg and rs6000_spe_function_arg to handle
1754 SCmode so as to pass the value correctly in a pair of
1755 registers. */
1756 else if (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) && mode != SCmode
1757 && !DECIMAL_FLOAT_MODE_P (mode) && SPE_SIMD_REGNO_P (regno))
1758 reg_size = UNITS_PER_FP_WORD;
1760 else
1761 reg_size = UNITS_PER_WORD;
1763 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1766 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1767 MODE. */
1768 static int
1769 rs6000_hard_regno_mode_ok (int regno, machine_mode mode)
1771 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1773 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1774 register combinations, and use PTImode where we need to deal with quad
1775 word memory operations. Don't allow quad words in the argument or frame
1776 pointer registers, just registers 0..31. */
1777 if (mode == PTImode)
1778 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1779 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1780 && ((regno & 1) == 0));
1782 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1783 implementations. Don't allow an item to be split between a FP register
1784 and an Altivec register. Allow TImode in all VSX registers if the user
1785 asked for it. */
1786 if (TARGET_VSX && VSX_REGNO_P (regno)
1787 && (VECTOR_MEM_VSX_P (mode)
1788 || reg_addr[mode].scalar_in_vmx_p
1789 || (TARGET_VSX_TIMODE && mode == TImode)
1790 || (TARGET_VADDUQM && mode == V1TImode)))
1792 if (FP_REGNO_P (regno))
1793 return FP_REGNO_P (last_regno);
1795 if (ALTIVEC_REGNO_P (regno))
1797 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
1798 return 0;
1800 return ALTIVEC_REGNO_P (last_regno);
1804 /* The GPRs can hold any mode, but values bigger than one register
1805 cannot go past R31. */
1806 if (INT_REGNO_P (regno))
1807 return INT_REGNO_P (last_regno);
1809 /* The float registers (except for VSX vector modes) can only hold floating
1810 modes and DImode. */
1811 if (FP_REGNO_P (regno))
1813 if (SCALAR_FLOAT_MODE_P (mode)
1814 && (mode != TDmode || (regno % 2) == 0)
1815 && FP_REGNO_P (last_regno))
1816 return 1;
1818 if (GET_MODE_CLASS (mode) == MODE_INT
1819 && GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1820 return 1;
1822 if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT
1823 && PAIRED_VECTOR_MODE (mode))
1824 return 1;
1826 return 0;
1829 /* The CR register can only hold CC modes. */
1830 if (CR_REGNO_P (regno))
1831 return GET_MODE_CLASS (mode) == MODE_CC;
1833 if (CA_REGNO_P (regno))
1834 return mode == Pmode || mode == SImode;
1836 /* AltiVec only in AldyVec registers. */
1837 if (ALTIVEC_REGNO_P (regno))
1838 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1839 || mode == V1TImode);
1841 /* ...but GPRs can hold SIMD data on the SPE in one register. */
1842 if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1843 return 1;
1845 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1846 and it must be able to fit within the register set. */
1848 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1851 /* Print interesting facts about registers. */
1852 static void
1853 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
1855 int r, m;
1857 for (r = first_regno; r <= last_regno; ++r)
1859 const char *comma = "";
1860 int len;
1862 if (first_regno == last_regno)
1863 fprintf (stderr, "%s:\t", reg_name);
1864 else
1865 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
1867 len = 8;
1868 for (m = 0; m < NUM_MACHINE_MODES; ++m)
1869 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
1871 if (len > 70)
1873 fprintf (stderr, ",\n\t");
1874 len = 8;
1875 comma = "";
1878 if (rs6000_hard_regno_nregs[m][r] > 1)
1879 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
1880 rs6000_hard_regno_nregs[m][r]);
1881 else
1882 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
1884 comma = ", ";
1887 if (call_used_regs[r])
1889 if (len > 70)
1891 fprintf (stderr, ",\n\t");
1892 len = 8;
1893 comma = "";
1896 len += fprintf (stderr, "%s%s", comma, "call-used");
1897 comma = ", ";
1900 if (fixed_regs[r])
1902 if (len > 70)
1904 fprintf (stderr, ",\n\t");
1905 len = 8;
1906 comma = "";
1909 len += fprintf (stderr, "%s%s", comma, "fixed");
1910 comma = ", ";
1913 if (len > 70)
1915 fprintf (stderr, ",\n\t");
1916 comma = "";
1919 len += fprintf (stderr, "%sreg-class = %s", comma,
1920 reg_class_names[(int)rs6000_regno_regclass[r]]);
1921 comma = ", ";
1923 if (len > 70)
1925 fprintf (stderr, ",\n\t");
1926 comma = "";
1929 fprintf (stderr, "%sregno = %d\n", comma, r);
1933 static const char *
1934 rs6000_debug_vector_unit (enum rs6000_vector v)
1936 const char *ret;
1938 switch (v)
1940 case VECTOR_NONE: ret = "none"; break;
1941 case VECTOR_ALTIVEC: ret = "altivec"; break;
1942 case VECTOR_VSX: ret = "vsx"; break;
1943 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
1944 case VECTOR_PAIRED: ret = "paired"; break;
1945 case VECTOR_SPE: ret = "spe"; break;
1946 case VECTOR_OTHER: ret = "other"; break;
1947 default: ret = "unknown"; break;
1950 return ret;
1953 /* Inner function printing just the address mask for a particular reload
1954 register class. */
1955 DEBUG_FUNCTION char *
1956 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
1958 static char ret[8];
1959 char *p = ret;
1961 if ((mask & RELOAD_REG_VALID) != 0)
1962 *p++ = 'v';
1963 else if (keep_spaces)
1964 *p++ = ' ';
1966 if ((mask & RELOAD_REG_MULTIPLE) != 0)
1967 *p++ = 'm';
1968 else if (keep_spaces)
1969 *p++ = ' ';
1971 if ((mask & RELOAD_REG_INDEXED) != 0)
1972 *p++ = 'i';
1973 else if (keep_spaces)
1974 *p++ = ' ';
1976 if ((mask & RELOAD_REG_OFFSET) != 0)
1977 *p++ = 'o';
1978 else if (keep_spaces)
1979 *p++ = ' ';
1981 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
1982 *p++ = '+';
1983 else if (keep_spaces)
1984 *p++ = ' ';
1986 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
1987 *p++ = '+';
1988 else if (keep_spaces)
1989 *p++ = ' ';
1991 if ((mask & RELOAD_REG_AND_M16) != 0)
1992 *p++ = '&';
1993 else if (keep_spaces)
1994 *p++ = ' ';
1996 *p = '\0';
1998 return ret;
2001 /* Print the address masks in a human readble fashion. */
2002 DEBUG_FUNCTION void
2003 rs6000_debug_print_mode (ssize_t m)
2005 ssize_t rc;
2007 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2008 for (rc = 0; rc < N_RELOAD_REG; rc++)
2009 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2010 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2012 if (rs6000_vector_unit[m] != VECTOR_NONE
2013 || rs6000_vector_mem[m] != VECTOR_NONE
2014 || (reg_addr[m].reload_store != CODE_FOR_nothing)
2015 || (reg_addr[m].reload_load != CODE_FOR_nothing)
2016 || reg_addr[m].scalar_in_vmx_p)
2018 fprintf (stderr,
2019 " Vector-arith=%-10s Vector-mem=%-10s Reload=%c%c Upper=%c",
2020 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2021 rs6000_debug_vector_unit (rs6000_vector_mem[m]),
2022 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2023 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*',
2024 (reg_addr[m].scalar_in_vmx_p) ? 'y' : 'n');
2027 fputs ("\n", stderr);
2030 #define DEBUG_FMT_ID "%-32s= "
2031 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2032 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2033 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2035 /* Print various interesting information with -mdebug=reg. */
2036 static void
2037 rs6000_debug_reg_global (void)
2039 static const char *const tf[2] = { "false", "true" };
2040 const char *nl = (const char *)0;
2041 int m;
2042 size_t m1, m2, v;
2043 char costly_num[20];
2044 char nop_num[20];
2045 char flags_buffer[40];
2046 const char *costly_str;
2047 const char *nop_str;
2048 const char *trace_str;
2049 const char *abi_str;
2050 const char *cmodel_str;
2051 struct cl_target_option cl_opts;
2053 /* Modes we want tieable information on. */
2054 static const machine_mode print_tieable_modes[] = {
2055 QImode,
2056 HImode,
2057 SImode,
2058 DImode,
2059 TImode,
2060 PTImode,
2061 SFmode,
2062 DFmode,
2063 TFmode,
2064 SDmode,
2065 DDmode,
2066 TDmode,
2067 V8QImode,
2068 V4HImode,
2069 V2SImode,
2070 V16QImode,
2071 V8HImode,
2072 V4SImode,
2073 V2DImode,
2074 V1TImode,
2075 V32QImode,
2076 V16HImode,
2077 V8SImode,
2078 V4DImode,
2079 V2TImode,
2080 V2SFmode,
2081 V4SFmode,
2082 V2DFmode,
2083 V8SFmode,
2084 V4DFmode,
2085 CCmode,
2086 CCUNSmode,
2087 CCEQmode,
2090 /* Virtual regs we are interested in. */
2091 const static struct {
2092 int regno; /* register number. */
2093 const char *name; /* register name. */
2094 } virtual_regs[] = {
2095 { STACK_POINTER_REGNUM, "stack pointer:" },
2096 { TOC_REGNUM, "toc: " },
2097 { STATIC_CHAIN_REGNUM, "static chain: " },
2098 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2099 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2100 { ARG_POINTER_REGNUM, "arg pointer: " },
2101 { FRAME_POINTER_REGNUM, "frame pointer:" },
2102 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2103 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2104 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2105 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2106 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2107 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2108 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2109 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2110 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2113 fputs ("\nHard register information:\n", stderr);
2114 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2115 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2116 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2117 LAST_ALTIVEC_REGNO,
2118 "vs");
2119 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2120 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2121 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2122 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2123 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2124 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2125 rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a");
2126 rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f");
2128 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2129 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2130 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2132 fprintf (stderr,
2133 "\n"
2134 "d reg_class = %s\n"
2135 "f reg_class = %s\n"
2136 "v reg_class = %s\n"
2137 "wa reg_class = %s\n"
2138 "wd reg_class = %s\n"
2139 "wf reg_class = %s\n"
2140 "wg reg_class = %s\n"
2141 "wh reg_class = %s\n"
2142 "wi reg_class = %s\n"
2143 "wj reg_class = %s\n"
2144 "wk reg_class = %s\n"
2145 "wl reg_class = %s\n"
2146 "wm reg_class = %s\n"
2147 "wr reg_class = %s\n"
2148 "ws reg_class = %s\n"
2149 "wt reg_class = %s\n"
2150 "wu reg_class = %s\n"
2151 "wv reg_class = %s\n"
2152 "ww reg_class = %s\n"
2153 "wx reg_class = %s\n"
2154 "wy reg_class = %s\n"
2155 "wz reg_class = %s\n"
2156 "\n",
2157 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2158 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2159 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2160 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2161 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
2162 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
2163 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
2164 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]],
2165 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]],
2166 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]],
2167 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]],
2168 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
2169 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
2170 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2171 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
2172 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
2173 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
2174 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
2175 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
2176 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2177 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
2178 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]]);
2180 nl = "\n";
2181 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2182 rs6000_debug_print_mode (m);
2184 fputs ("\n", stderr);
2186 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2188 machine_mode mode1 = print_tieable_modes[m1];
2189 bool first_time = true;
2191 nl = (const char *)0;
2192 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2194 machine_mode mode2 = print_tieable_modes[m2];
2195 if (mode1 != mode2 && MODES_TIEABLE_P (mode1, mode2))
2197 if (first_time)
2199 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2200 nl = "\n";
2201 first_time = false;
2204 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2208 if (!first_time)
2209 fputs ("\n", stderr);
2212 if (nl)
2213 fputs (nl, stderr);
2215 if (rs6000_recip_control)
2217 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2219 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2220 if (rs6000_recip_bits[m])
2222 fprintf (stderr,
2223 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2224 GET_MODE_NAME (m),
2225 (RS6000_RECIP_AUTO_RE_P (m)
2226 ? "auto"
2227 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2228 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2229 ? "auto"
2230 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2233 fputs ("\n", stderr);
2236 if (rs6000_cpu_index >= 0)
2238 const char *name = processor_target_table[rs6000_cpu_index].name;
2239 HOST_WIDE_INT flags
2240 = processor_target_table[rs6000_cpu_index].target_enable;
2242 sprintf (flags_buffer, "-mcpu=%s flags", name);
2243 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2245 else
2246 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2248 if (rs6000_tune_index >= 0)
2250 const char *name = processor_target_table[rs6000_tune_index].name;
2251 HOST_WIDE_INT flags
2252 = processor_target_table[rs6000_tune_index].target_enable;
2254 sprintf (flags_buffer, "-mtune=%s flags", name);
2255 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2257 else
2258 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2260 cl_target_option_save (&cl_opts, &global_options);
2261 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2262 rs6000_isa_flags);
2264 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2265 rs6000_isa_flags_explicit);
2267 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2268 rs6000_builtin_mask);
2270 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2272 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2273 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2275 switch (rs6000_sched_costly_dep)
2277 case max_dep_latency:
2278 costly_str = "max_dep_latency";
2279 break;
2281 case no_dep_costly:
2282 costly_str = "no_dep_costly";
2283 break;
2285 case all_deps_costly:
2286 costly_str = "all_deps_costly";
2287 break;
2289 case true_store_to_load_dep_costly:
2290 costly_str = "true_store_to_load_dep_costly";
2291 break;
2293 case store_to_load_dep_costly:
2294 costly_str = "store_to_load_dep_costly";
2295 break;
2297 default:
2298 costly_str = costly_num;
2299 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2300 break;
2303 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2305 switch (rs6000_sched_insert_nops)
2307 case sched_finish_regroup_exact:
2308 nop_str = "sched_finish_regroup_exact";
2309 break;
2311 case sched_finish_pad_groups:
2312 nop_str = "sched_finish_pad_groups";
2313 break;
2315 case sched_finish_none:
2316 nop_str = "sched_finish_none";
2317 break;
2319 default:
2320 nop_str = nop_num;
2321 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2322 break;
2325 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2327 switch (rs6000_sdata)
2329 default:
2330 case SDATA_NONE:
2331 break;
2333 case SDATA_DATA:
2334 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2335 break;
2337 case SDATA_SYSV:
2338 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2339 break;
2341 case SDATA_EABI:
2342 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2343 break;
2347 switch (rs6000_traceback)
2349 case traceback_default: trace_str = "default"; break;
2350 case traceback_none: trace_str = "none"; break;
2351 case traceback_part: trace_str = "part"; break;
2352 case traceback_full: trace_str = "full"; break;
2353 default: trace_str = "unknown"; break;
2356 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2358 switch (rs6000_current_cmodel)
2360 case CMODEL_SMALL: cmodel_str = "small"; break;
2361 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2362 case CMODEL_LARGE: cmodel_str = "large"; break;
2363 default: cmodel_str = "unknown"; break;
2366 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2368 switch (rs6000_current_abi)
2370 case ABI_NONE: abi_str = "none"; break;
2371 case ABI_AIX: abi_str = "aix"; break;
2372 case ABI_ELFv2: abi_str = "ELFv2"; break;
2373 case ABI_V4: abi_str = "V4"; break;
2374 case ABI_DARWIN: abi_str = "darwin"; break;
2375 default: abi_str = "unknown"; break;
2378 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2380 if (rs6000_altivec_abi)
2381 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2383 if (rs6000_spe_abi)
2384 fprintf (stderr, DEBUG_FMT_S, "spe_abi", "true");
2386 if (rs6000_darwin64_abi)
2387 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2389 if (rs6000_float_gprs)
2390 fprintf (stderr, DEBUG_FMT_S, "float_gprs", "true");
2392 fprintf (stderr, DEBUG_FMT_S, "fprs",
2393 (TARGET_FPRS ? "true" : "false"));
2395 fprintf (stderr, DEBUG_FMT_S, "single_float",
2396 (TARGET_SINGLE_FLOAT ? "true" : "false"));
2398 fprintf (stderr, DEBUG_FMT_S, "double_float",
2399 (TARGET_DOUBLE_FLOAT ? "true" : "false"));
2401 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2402 (TARGET_SOFT_FLOAT ? "true" : "false"));
2404 fprintf (stderr, DEBUG_FMT_S, "e500_single",
2405 (TARGET_E500_SINGLE ? "true" : "false"));
2407 fprintf (stderr, DEBUG_FMT_S, "e500_double",
2408 (TARGET_E500_DOUBLE ? "true" : "false"));
2410 if (TARGET_LINK_STACK)
2411 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2413 if (targetm.lra_p ())
2414 fprintf (stderr, DEBUG_FMT_S, "lra", "true");
2416 if (TARGET_P8_FUSION)
2417 fprintf (stderr, DEBUG_FMT_S, "p8 fusion",
2418 (TARGET_P8_FUSION_SIGN) ? "zero+sign" : "zero");
2420 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2421 TARGET_SECURE_PLT ? "secure" : "bss");
2422 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2423 aix_struct_return ? "aix" : "sysv");
2424 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2425 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2426 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2427 tf[!!rs6000_align_branch_targets]);
2428 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2429 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2430 rs6000_long_double_type_size);
2431 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2432 (int)rs6000_sched_restricted_insns_priority);
2433 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2434 (int)END_BUILTINS);
2435 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2436 (int)RS6000_BUILTIN_COUNT);
2438 if (TARGET_VSX)
2439 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2440 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2444 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2445 legitimate address support to figure out the appropriate addressing to
2446 use. */
2448 static void
2449 rs6000_setup_reg_addr_masks (void)
2451 ssize_t rc, reg, m, nregs;
2452 addr_mask_type any_addr_mask, addr_mask;
2454 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2456 machine_mode m2 = (machine_mode)m;
2458 /* SDmode is special in that we want to access it only via REG+REG
2459 addressing on power7 and above, since we want to use the LFIWZX and
2460 STFIWZX instructions to load it. */
2461 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2463 any_addr_mask = 0;
2464 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2466 addr_mask = 0;
2467 reg = reload_reg_map[rc].reg;
2469 /* Can mode values go in the GPR/FPR/Altivec registers? */
2470 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2472 nregs = rs6000_hard_regno_nregs[m][reg];
2473 addr_mask |= RELOAD_REG_VALID;
2475 /* Indicate if the mode takes more than 1 physical register. If
2476 it takes a single register, indicate it can do REG+REG
2477 addressing. */
2478 if (nregs > 1 || m == BLKmode)
2479 addr_mask |= RELOAD_REG_MULTIPLE;
2480 else
2481 addr_mask |= RELOAD_REG_INDEXED;
2483 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2484 addressing. Restrict addressing on SPE for 64-bit types
2485 because of the SUBREG hackery used to address 64-bit floats in
2486 '32-bit' GPRs. */
2488 if (TARGET_UPDATE
2489 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2490 && GET_MODE_SIZE (m2) <= 8
2491 && !VECTOR_MODE_P (m2)
2492 && !COMPLEX_MODE_P (m2)
2493 && !indexed_only_p
2494 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (m2) == 8))
2496 addr_mask |= RELOAD_REG_PRE_INCDEC;
2498 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2499 we don't allow PRE_MODIFY for some multi-register
2500 operations. */
2501 switch (m)
2503 default:
2504 addr_mask |= RELOAD_REG_PRE_MODIFY;
2505 break;
2507 case DImode:
2508 if (TARGET_POWERPC64)
2509 addr_mask |= RELOAD_REG_PRE_MODIFY;
2510 break;
2512 case DFmode:
2513 case DDmode:
2514 if (TARGET_DF_INSN)
2515 addr_mask |= RELOAD_REG_PRE_MODIFY;
2516 break;
2521 /* GPR and FPR registers can do REG+OFFSET addressing, except
2522 possibly for SDmode. */
2523 if ((addr_mask != 0) && !indexed_only_p
2524 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR))
2525 addr_mask |= RELOAD_REG_OFFSET;
2527 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2528 addressing on 128-bit types. */
2529 if (rc == RELOAD_REG_VMX && GET_MODE_SIZE (m2) == 16
2530 && (addr_mask & RELOAD_REG_VALID) != 0)
2531 addr_mask |= RELOAD_REG_AND_M16;
2533 reg_addr[m].addr_mask[rc] = addr_mask;
2534 any_addr_mask |= addr_mask;
2537 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2542 /* Initialize the various global tables that are based on register size. */
2543 static void
2544 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2546 ssize_t r, m, c;
2547 int align64;
2548 int align32;
2550 /* Precalculate REGNO_REG_CLASS. */
2551 rs6000_regno_regclass[0] = GENERAL_REGS;
2552 for (r = 1; r < 32; ++r)
2553 rs6000_regno_regclass[r] = BASE_REGS;
2555 for (r = 32; r < 64; ++r)
2556 rs6000_regno_regclass[r] = FLOAT_REGS;
2558 for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r)
2559 rs6000_regno_regclass[r] = NO_REGS;
2561 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2562 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2564 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2565 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2566 rs6000_regno_regclass[r] = CR_REGS;
2568 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2569 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2570 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2571 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2572 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2573 rs6000_regno_regclass[SPE_ACC_REGNO] = SPE_ACC_REGS;
2574 rs6000_regno_regclass[SPEFSCR_REGNO] = SPEFSCR_REGS;
2575 rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
2576 rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
2577 rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
2578 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2579 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2581 /* Precalculate register class to simpler reload register class. We don't
2582 need all of the register classes that are combinations of different
2583 classes, just the simple ones that have constraint letters. */
2584 for (c = 0; c < N_REG_CLASSES; c++)
2585 reg_class_to_reg_type[c] = NO_REG_TYPE;
2587 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2588 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2589 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2590 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2591 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2592 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2593 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2594 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2595 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2596 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2597 reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
2598 reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
2600 if (TARGET_VSX)
2602 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2603 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2605 else
2607 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2608 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2611 /* Precalculate the valid memory formats as well as the vector information,
2612 this must be set up before the rs6000_hard_regno_nregs_internal calls
2613 below. */
2614 gcc_assert ((int)VECTOR_NONE == 0);
2615 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2616 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
2618 gcc_assert ((int)CODE_FOR_nothing == 0);
2619 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2621 gcc_assert ((int)NO_REGS == 0);
2622 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2624 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2625 believes it can use native alignment or still uses 128-bit alignment. */
2626 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2628 align64 = 64;
2629 align32 = 32;
2631 else
2633 align64 = 128;
2634 align32 = 128;
2637 /* V2DF mode, VSX only. */
2638 if (TARGET_VSX)
2640 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2641 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2642 rs6000_vector_align[V2DFmode] = align64;
2645 /* V4SF mode, either VSX or Altivec. */
2646 if (TARGET_VSX)
2648 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2649 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2650 rs6000_vector_align[V4SFmode] = align32;
2652 else if (TARGET_ALTIVEC)
2654 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2655 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2656 rs6000_vector_align[V4SFmode] = align32;
2659 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2660 and stores. */
2661 if (TARGET_ALTIVEC)
2663 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2664 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2665 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2666 rs6000_vector_align[V4SImode] = align32;
2667 rs6000_vector_align[V8HImode] = align32;
2668 rs6000_vector_align[V16QImode] = align32;
2670 if (TARGET_VSX)
2672 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2673 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2674 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2676 else
2678 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2679 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2680 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2684 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2685 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2686 if (TARGET_VSX)
2688 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2689 rs6000_vector_unit[V2DImode]
2690 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2691 rs6000_vector_align[V2DImode] = align64;
2693 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2694 rs6000_vector_unit[V1TImode]
2695 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2696 rs6000_vector_align[V1TImode] = 128;
2699 /* DFmode, see if we want to use the VSX unit. Memory is handled
2700 differently, so don't set rs6000_vector_mem. */
2701 if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE)
2703 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2704 rs6000_vector_align[DFmode] = 64;
2707 /* SFmode, see if we want to use the VSX unit. */
2708 if (TARGET_P8_VECTOR && TARGET_VSX_SCALAR_FLOAT)
2710 rs6000_vector_unit[SFmode] = VECTOR_VSX;
2711 rs6000_vector_align[SFmode] = 32;
2714 /* Allow TImode in VSX register and set the VSX memory macros. */
2715 if (TARGET_VSX && TARGET_VSX_TIMODE)
2717 rs6000_vector_mem[TImode] = VECTOR_VSX;
2718 rs6000_vector_align[TImode] = align64;
2721 /* TODO add SPE and paired floating point vector support. */
2723 /* Register class constraints for the constraints that depend on compile
2724 switches. When the VSX code was added, different constraints were added
2725 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2726 of the VSX registers are used. The register classes for scalar floating
2727 point types is set, based on whether we allow that type into the upper
2728 (Altivec) registers. GCC has register classes to target the Altivec
2729 registers for load/store operations, to select using a VSX memory
2730 operation instead of the traditional floating point operation. The
2731 constraints are:
2733 d - Register class to use with traditional DFmode instructions.
2734 f - Register class to use with traditional SFmode instructions.
2735 v - Altivec register.
2736 wa - Any VSX register.
2737 wc - Reserved to represent individual CR bits (used in LLVM).
2738 wd - Preferred register class for V2DFmode.
2739 wf - Preferred register class for V4SFmode.
2740 wg - Float register for power6x move insns.
2741 wh - FP register for direct move instructions.
2742 wi - FP or VSX register to hold 64-bit integers for VSX insns.
2743 wj - FP or VSX register to hold 64-bit integers for direct moves.
2744 wk - FP or VSX register to hold 64-bit doubles for direct moves.
2745 wl - Float register if we can do 32-bit signed int loads.
2746 wm - VSX register for ISA 2.07 direct move operations.
2747 wn - always NO_REGS.
2748 wr - GPR if 64-bit mode is permitted.
2749 ws - Register class to do ISA 2.06 DF operations.
2750 wt - VSX register for TImode in VSX registers.
2751 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
2752 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
2753 ww - Register class to do SF conversions in with VSX operations.
2754 wx - Float register if we can do 32-bit int stores.
2755 wy - Register class to do ISA 2.07 SF operations.
2756 wz - Float register if we can do 32-bit unsigned int loads. */
2758 if (TARGET_HARD_FLOAT && TARGET_FPRS)
2759 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
2761 if (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
2762 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
2764 if (TARGET_VSX)
2766 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2767 rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */
2768 rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */
2769 rs6000_constraints[RS6000_CONSTRAINT_wi] = FLOAT_REGS; /* DImode */
2771 if (TARGET_VSX_TIMODE)
2772 rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */
2774 if (TARGET_UPPER_REGS_DF) /* DFmode */
2776 rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS;
2777 rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
2779 else
2780 rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS;
2783 /* Add conditional constraints based on various options, to allow us to
2784 collapse multiple insn patterns. */
2785 if (TARGET_ALTIVEC)
2786 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
2788 if (TARGET_MFPGPR) /* DFmode */
2789 rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
2791 if (TARGET_LFIWAX)
2792 rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */
2794 if (TARGET_DIRECT_MOVE)
2796 rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS;
2797 rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */
2798 = rs6000_constraints[RS6000_CONSTRAINT_wi];
2799 rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */
2800 = rs6000_constraints[RS6000_CONSTRAINT_ws];
2801 rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
2804 if (TARGET_POWERPC64)
2805 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
2807 if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF) /* SFmode */
2809 rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
2810 rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
2811 rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
2813 else if (TARGET_P8_VECTOR)
2815 rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS;
2816 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
2818 else if (TARGET_VSX)
2819 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
2821 if (TARGET_STFIWX)
2822 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
2824 if (TARGET_LFIWZX)
2825 rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */
2827 /* Set up the reload helper and direct move functions. */
2828 if (TARGET_VSX || TARGET_ALTIVEC)
2830 if (TARGET_64BIT)
2832 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
2833 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
2834 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
2835 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
2836 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
2837 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
2838 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
2839 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
2840 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
2841 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
2842 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
2843 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
2844 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
2845 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
2846 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
2847 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
2848 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
2849 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
2850 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
2851 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
2852 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
2853 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
2855 if (TARGET_VSX_TIMODE)
2857 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
2858 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
2861 if (TARGET_DIRECT_MOVE)
2863 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
2864 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
2865 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
2866 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
2867 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
2868 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
2869 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
2870 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
2871 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
2873 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
2874 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
2875 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
2876 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
2877 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
2878 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
2879 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
2880 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
2881 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
2884 else
2886 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
2887 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
2888 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
2889 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
2890 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
2891 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
2892 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
2893 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
2894 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
2895 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
2896 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
2897 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
2898 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
2899 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
2900 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
2901 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
2902 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
2903 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
2904 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
2905 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
2906 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
2907 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
2909 if (TARGET_VSX_TIMODE)
2911 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
2912 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
2915 if (TARGET_DIRECT_MOVE)
2917 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
2918 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
2919 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
2923 if (TARGET_UPPER_REGS_DF)
2924 reg_addr[DFmode].scalar_in_vmx_p = true;
2926 if (TARGET_UPPER_REGS_SF)
2927 reg_addr[SFmode].scalar_in_vmx_p = true;
2930 /* Precalculate HARD_REGNO_NREGS. */
2931 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
2932 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2933 rs6000_hard_regno_nregs[m][r]
2934 = rs6000_hard_regno_nregs_internal (r, (machine_mode)m);
2936 /* Precalculate HARD_REGNO_MODE_OK. */
2937 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
2938 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2939 if (rs6000_hard_regno_mode_ok (r, (machine_mode)m))
2940 rs6000_hard_regno_mode_ok_p[m][r] = true;
2942 /* Precalculate CLASS_MAX_NREGS sizes. */
2943 for (c = 0; c < LIM_REG_CLASSES; ++c)
2945 int reg_size;
2947 if (TARGET_VSX && VSX_REG_CLASS_P (c))
2948 reg_size = UNITS_PER_VSX_WORD;
2950 else if (c == ALTIVEC_REGS)
2951 reg_size = UNITS_PER_ALTIVEC_WORD;
2953 else if (c == FLOAT_REGS)
2954 reg_size = UNITS_PER_FP_WORD;
2956 else
2957 reg_size = UNITS_PER_WORD;
2959 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2961 machine_mode m2 = (machine_mode)m;
2962 int reg_size2 = reg_size;
2964 /* TFmode/TDmode always takes 2 registers, even in VSX. */
2965 if (TARGET_VSX && VSX_REG_CLASS_P (c)
2966 && (m == TDmode || m == TFmode))
2967 reg_size2 = UNITS_PER_FP_WORD;
2969 rs6000_class_max_nregs[m][c]
2970 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
2974 if (TARGET_E500_DOUBLE)
2975 rs6000_class_max_nregs[DFmode][GENERAL_REGS] = 1;
2977 /* Calculate which modes to automatically generate code to use a the
2978 reciprocal divide and square root instructions. In the future, possibly
2979 automatically generate the instructions even if the user did not specify
2980 -mrecip. The older machines double precision reciprocal sqrt estimate is
2981 not accurate enough. */
2982 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
2983 if (TARGET_FRES)
2984 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
2985 if (TARGET_FRE)
2986 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
2987 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
2988 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
2989 if (VECTOR_UNIT_VSX_P (V2DFmode))
2990 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
2992 if (TARGET_FRSQRTES)
2993 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
2994 if (TARGET_FRSQRTE)
2995 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
2996 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
2997 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
2998 if (VECTOR_UNIT_VSX_P (V2DFmode))
2999 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3001 if (rs6000_recip_control)
3003 if (!flag_finite_math_only)
3004 warning (0, "-mrecip requires -ffinite-math or -ffast-math");
3005 if (flag_trapping_math)
3006 warning (0, "-mrecip requires -fno-trapping-math or -ffast-math");
3007 if (!flag_reciprocal_math)
3008 warning (0, "-mrecip requires -freciprocal-math or -ffast-math");
3009 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3011 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3012 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3013 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3015 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3016 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3017 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3019 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3020 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3021 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3023 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3024 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3025 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3027 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3028 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3029 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3031 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3032 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3033 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3035 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3036 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3037 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3039 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3040 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3041 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3045 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3046 legitimate address support to figure out the appropriate addressing to
3047 use. */
3048 rs6000_setup_reg_addr_masks ();
3050 if (global_init_p || TARGET_DEBUG_TARGET)
3052 if (TARGET_DEBUG_REG)
3053 rs6000_debug_reg_global ();
3055 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3056 fprintf (stderr,
3057 "SImode variable mult cost = %d\n"
3058 "SImode constant mult cost = %d\n"
3059 "SImode short constant mult cost = %d\n"
3060 "DImode multipliciation cost = %d\n"
3061 "SImode division cost = %d\n"
3062 "DImode division cost = %d\n"
3063 "Simple fp operation cost = %d\n"
3064 "DFmode multiplication cost = %d\n"
3065 "SFmode division cost = %d\n"
3066 "DFmode division cost = %d\n"
3067 "cache line size = %d\n"
3068 "l1 cache size = %d\n"
3069 "l2 cache size = %d\n"
3070 "simultaneous prefetches = %d\n"
3071 "\n",
3072 rs6000_cost->mulsi,
3073 rs6000_cost->mulsi_const,
3074 rs6000_cost->mulsi_const9,
3075 rs6000_cost->muldi,
3076 rs6000_cost->divsi,
3077 rs6000_cost->divdi,
3078 rs6000_cost->fp,
3079 rs6000_cost->dmul,
3080 rs6000_cost->sdiv,
3081 rs6000_cost->ddiv,
3082 rs6000_cost->cache_line_size,
3083 rs6000_cost->l1_cache_size,
3084 rs6000_cost->l2_cache_size,
3085 rs6000_cost->simultaneous_prefetches);
3089 #if TARGET_MACHO
3090 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3092 static void
3093 darwin_rs6000_override_options (void)
3095 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3096 off. */
3097 rs6000_altivec_abi = 1;
3098 TARGET_ALTIVEC_VRSAVE = 1;
3099 rs6000_current_abi = ABI_DARWIN;
3101 if (DEFAULT_ABI == ABI_DARWIN
3102 && TARGET_64BIT)
3103 darwin_one_byte_bool = 1;
3105 if (TARGET_64BIT && ! TARGET_POWERPC64)
3107 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3108 warning (0, "-m64 requires PowerPC64 architecture, enabling");
3110 if (flag_mkernel)
3112 rs6000_default_long_calls = 1;
3113 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3116 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3117 Altivec. */
3118 if (!flag_mkernel && !flag_apple_kext
3119 && TARGET_64BIT
3120 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3121 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3123 /* Unless the user (not the configurer) has explicitly overridden
3124 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3125 G4 unless targeting the kernel. */
3126 if (!flag_mkernel
3127 && !flag_apple_kext
3128 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3129 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3130 && ! global_options_set.x_rs6000_cpu_index)
3132 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3135 #endif
3137 /* If not otherwise specified by a target, make 'long double' equivalent to
3138 'double'. */
3140 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3141 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3142 #endif
3144 /* Return the builtin mask of the various options used that could affect which
3145 builtins were used. In the past we used target_flags, but we've run out of
3146 bits, and some options like SPE and PAIRED are no longer in
3147 target_flags. */
3149 HOST_WIDE_INT
3150 rs6000_builtin_mask_calculate (void)
3152 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3153 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3154 | ((TARGET_SPE) ? RS6000_BTM_SPE : 0)
3155 | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0)
3156 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3157 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3158 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3159 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3160 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3161 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3162 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3163 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3164 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3165 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3166 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3167 | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0));
3170 /* Implement TARGET_MD_ASM_CLOBBERS. All asm statements are considered
3171 to clobber the XER[CA] bit because clobbering that bit without telling
3172 the compiler worked just fine with versions of GCC before GCC 5, and
3173 breaking a lot of older code in ways that are hard to track down is
3174 not such a great idea. */
3176 static tree
3177 rs6000_md_asm_clobbers (tree, tree, tree clobbers)
3179 tree s = build_string (strlen (reg_names[CA_REGNO]), reg_names[CA_REGNO]);
3180 return tree_cons (NULL_TREE, s, clobbers);
3183 /* Override command line options. Mostly we process the processor type and
3184 sometimes adjust other TARGET_ options. */
3186 static bool
3187 rs6000_option_override_internal (bool global_init_p)
3189 bool ret = true;
3190 bool have_cpu = false;
3192 /* The default cpu requested at configure time, if any. */
3193 const char *implicit_cpu = OPTION_TARGET_CPU_DEFAULT;
3195 HOST_WIDE_INT set_masks;
3196 int cpu_index;
3197 int tune_index;
3198 struct cl_target_option *main_target_opt
3199 = ((global_init_p || target_option_default_node == NULL)
3200 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3202 /* Remember the explicit arguments. */
3203 if (global_init_p)
3204 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3206 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3207 library functions, so warn about it. The flag may be useful for
3208 performance studies from time to time though, so don't disable it
3209 entirely. */
3210 if (global_options_set.x_rs6000_alignment_flags
3211 && rs6000_alignment_flags == MASK_ALIGN_POWER
3212 && DEFAULT_ABI == ABI_DARWIN
3213 && TARGET_64BIT)
3214 warning (0, "-malign-power is not supported for 64-bit Darwin;"
3215 " it is incompatible with the installed C and C++ libraries");
3217 /* Numerous experiment shows that IRA based loop pressure
3218 calculation works better for RTL loop invariant motion on targets
3219 with enough (>= 32) registers. It is an expensive optimization.
3220 So it is on only for peak performance. */
3221 if (optimize >= 3 && global_init_p
3222 && !global_options_set.x_flag_ira_loop_pressure)
3223 flag_ira_loop_pressure = 1;
3225 /* Set the pointer size. */
3226 if (TARGET_64BIT)
3228 rs6000_pmode = (int)DImode;
3229 rs6000_pointer_size = 64;
3231 else
3233 rs6000_pmode = (int)SImode;
3234 rs6000_pointer_size = 32;
3237 /* Some OSs don't support saving the high part of 64-bit registers on context
3238 switch. Other OSs don't support saving Altivec registers. On those OSs,
3239 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3240 if the user wants either, the user must explicitly specify them and we
3241 won't interfere with the user's specification. */
3243 set_masks = POWERPC_MASKS;
3244 #ifdef OS_MISSING_POWERPC64
3245 if (OS_MISSING_POWERPC64)
3246 set_masks &= ~OPTION_MASK_POWERPC64;
3247 #endif
3248 #ifdef OS_MISSING_ALTIVEC
3249 if (OS_MISSING_ALTIVEC)
3250 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX);
3251 #endif
3253 /* Don't override by the processor default if given explicitly. */
3254 set_masks &= ~rs6000_isa_flags_explicit;
3256 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3257 the cpu in a target attribute or pragma, but did not specify a tuning
3258 option, use the cpu for the tuning option rather than the option specified
3259 with -mtune on the command line. Process a '--with-cpu' configuration
3260 request as an implicit --cpu. */
3261 if (rs6000_cpu_index >= 0)
3263 cpu_index = rs6000_cpu_index;
3264 have_cpu = true;
3266 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3268 rs6000_cpu_index = cpu_index = main_target_opt->x_rs6000_cpu_index;
3269 have_cpu = true;
3271 else if (implicit_cpu)
3273 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (implicit_cpu);
3274 have_cpu = true;
3276 else
3278 const char *default_cpu = (TARGET_POWERPC64 ? "powerpc64" : "powerpc");
3279 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
3280 have_cpu = false;
3283 gcc_assert (cpu_index >= 0);
3285 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3286 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3287 with those from the cpu, except for options that were explicitly set. If
3288 we don't have a cpu, do not override the target bits set in
3289 TARGET_DEFAULT. */
3290 if (have_cpu)
3292 rs6000_isa_flags &= ~set_masks;
3293 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3294 & set_masks);
3296 else
3297 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3298 & ~rs6000_isa_flags_explicit);
3300 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3301 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3302 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3303 to using rs6000_isa_flags, we need to do the initialization here. */
3304 if (!have_cpu)
3305 rs6000_isa_flags |= (TARGET_DEFAULT & ~rs6000_isa_flags_explicit);
3307 if (rs6000_tune_index >= 0)
3308 tune_index = rs6000_tune_index;
3309 else if (have_cpu)
3310 rs6000_tune_index = tune_index = cpu_index;
3311 else
3313 size_t i;
3314 enum processor_type tune_proc
3315 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3317 tune_index = -1;
3318 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3319 if (processor_target_table[i].processor == tune_proc)
3321 rs6000_tune_index = tune_index = i;
3322 break;
3326 gcc_assert (tune_index >= 0);
3327 rs6000_cpu = processor_target_table[tune_index].processor;
3329 /* Pick defaults for SPE related control flags. Do this early to make sure
3330 that the TARGET_ macros are representative ASAP. */
3332 int spe_capable_cpu =
3333 (rs6000_cpu == PROCESSOR_PPC8540
3334 || rs6000_cpu == PROCESSOR_PPC8548);
3336 if (!global_options_set.x_rs6000_spe_abi)
3337 rs6000_spe_abi = spe_capable_cpu;
3339 if (!global_options_set.x_rs6000_spe)
3340 rs6000_spe = spe_capable_cpu;
3342 if (!global_options_set.x_rs6000_float_gprs)
3343 rs6000_float_gprs =
3344 (rs6000_cpu == PROCESSOR_PPC8540 ? 1
3345 : rs6000_cpu == PROCESSOR_PPC8548 ? 2
3346 : 0);
3349 if (global_options_set.x_rs6000_spe_abi
3350 && rs6000_spe_abi
3351 && !TARGET_SPE_ABI)
3352 error ("not configured for SPE ABI");
3354 if (global_options_set.x_rs6000_spe
3355 && rs6000_spe
3356 && !TARGET_SPE)
3357 error ("not configured for SPE instruction set");
3359 if (main_target_opt != NULL
3360 && ((main_target_opt->x_rs6000_spe_abi != rs6000_spe_abi)
3361 || (main_target_opt->x_rs6000_spe != rs6000_spe)
3362 || (main_target_opt->x_rs6000_float_gprs != rs6000_float_gprs)))
3363 error ("target attribute or pragma changes SPE ABI");
3365 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3366 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3367 || rs6000_cpu == PROCESSOR_PPCE5500)
3369 if (TARGET_ALTIVEC)
3370 error ("AltiVec not supported in this target");
3371 if (TARGET_SPE)
3372 error ("SPE not supported in this target");
3374 if (rs6000_cpu == PROCESSOR_PPCE6500)
3376 if (TARGET_SPE)
3377 error ("SPE not supported in this target");
3380 /* Disable Cell microcode if we are optimizing for the Cell
3381 and not optimizing for size. */
3382 if (rs6000_gen_cell_microcode == -1)
3383 rs6000_gen_cell_microcode = !(rs6000_cpu == PROCESSOR_CELL
3384 && !optimize_size);
3386 /* If we are optimizing big endian systems for space and it's OK to
3387 use instructions that would be microcoded on the Cell, use the
3388 load/store multiple and string instructions. */
3389 if (BYTES_BIG_ENDIAN && optimize_size && rs6000_gen_cell_microcode)
3390 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & (OPTION_MASK_MULTIPLE
3391 | OPTION_MASK_STRING);
3393 /* Don't allow -mmultiple or -mstring on little endian systems
3394 unless the cpu is a 750, because the hardware doesn't support the
3395 instructions used in little endian mode, and causes an alignment
3396 trap. The 750 does not cause an alignment trap (except when the
3397 target is unaligned). */
3399 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750)
3401 if (TARGET_MULTIPLE)
3403 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3404 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3405 warning (0, "-mmultiple is not supported on little endian systems");
3408 if (TARGET_STRING)
3410 rs6000_isa_flags &= ~OPTION_MASK_STRING;
3411 if ((rs6000_isa_flags_explicit & OPTION_MASK_STRING) != 0)
3412 warning (0, "-mstring is not supported on little endian systems");
3416 /* If little-endian, default to -mstrict-align on older processors.
3417 Testing for htm matches power8 and later. */
3418 if (!BYTES_BIG_ENDIAN
3419 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
3420 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3422 /* -maltivec={le,be} implies -maltivec. */
3423 if (rs6000_altivec_element_order != 0)
3424 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3426 /* Disallow -maltivec=le in big endian mode for now. This is not
3427 known to be useful for anyone. */
3428 if (BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 1)
3430 warning (0, N_("-maltivec=le not allowed for big-endian targets"));
3431 rs6000_altivec_element_order = 0;
3434 /* Add some warnings for VSX. */
3435 if (TARGET_VSX)
3437 const char *msg = NULL;
3438 if (!TARGET_HARD_FLOAT || !TARGET_FPRS
3439 || !TARGET_SINGLE_FLOAT || !TARGET_DOUBLE_FLOAT)
3441 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3442 msg = N_("-mvsx requires hardware floating point");
3443 else
3445 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3446 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3449 else if (TARGET_PAIRED_FLOAT)
3450 msg = N_("-mvsx and -mpaired are incompatible");
3451 else if (TARGET_AVOID_XFORM > 0)
3452 msg = N_("-mvsx needs indexed addressing");
3453 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3454 & OPTION_MASK_ALTIVEC))
3456 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3457 msg = N_("-mvsx and -mno-altivec are incompatible");
3458 else
3459 msg = N_("-mno-altivec disables vsx");
3462 if (msg)
3464 warning (0, msg);
3465 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3466 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3470 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3471 the -mcpu setting to enable options that conflict. */
3472 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3473 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3474 | OPTION_MASK_ALTIVEC
3475 | OPTION_MASK_VSX)) != 0)
3476 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3477 | OPTION_MASK_DIRECT_MOVE)
3478 & ~rs6000_isa_flags_explicit);
3480 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3481 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3483 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3484 unless the user explicitly used the -mno-<option> to disable the code. */
3485 if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3486 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3487 else if (TARGET_VSX)
3488 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3489 else if (TARGET_POPCNTD)
3490 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
3491 else if (TARGET_DFP)
3492 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3493 else if (TARGET_CMPB)
3494 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
3495 else if (TARGET_FPRND)
3496 rs6000_isa_flags |= (ISA_2_4_MASKS & ~rs6000_isa_flags_explicit);
3497 else if (TARGET_POPCNTB)
3498 rs6000_isa_flags |= (ISA_2_2_MASKS & ~rs6000_isa_flags_explicit);
3499 else if (TARGET_ALTIVEC)
3500 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~rs6000_isa_flags_explicit);
3502 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3504 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3505 error ("-mcrypto requires -maltivec");
3506 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3509 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3511 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3512 error ("-mdirect-move requires -mvsx");
3513 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3516 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3518 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3519 error ("-mpower8-vector requires -maltivec");
3520 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3523 if (TARGET_P8_VECTOR && !TARGET_VSX)
3525 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3526 error ("-mpower8-vector requires -mvsx");
3527 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3530 if (TARGET_VSX_TIMODE && !TARGET_VSX)
3532 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
3533 error ("-mvsx-timode requires -mvsx");
3534 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
3537 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3539 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3540 error ("-mhard-dfp requires -mhard-float");
3541 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3544 /* Allow an explicit -mupper-regs to set both -mupper-regs-df and
3545 -mupper-regs-sf, depending on the cpu, unless the user explicitly also set
3546 the individual option. */
3547 if (TARGET_UPPER_REGS > 0)
3549 if (TARGET_VSX
3550 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
3552 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DF;
3553 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
3555 if (TARGET_P8_VECTOR
3556 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
3558 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_SF;
3559 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
3562 else if (TARGET_UPPER_REGS == 0)
3564 if (TARGET_VSX
3565 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
3567 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
3568 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
3570 if (TARGET_P8_VECTOR
3571 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
3573 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
3574 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
3578 if (TARGET_UPPER_REGS_DF && !TARGET_VSX)
3580 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
3581 error ("-mupper-regs-df requires -mvsx");
3582 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
3585 if (TARGET_UPPER_REGS_SF && !TARGET_P8_VECTOR)
3587 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
3588 error ("-mupper-regs-sf requires -mpower8-vector");
3589 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
3592 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3593 silently turn off quad memory mode. */
3594 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
3596 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3597 warning (0, N_("-mquad-memory requires 64-bit mode"));
3599 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
3600 warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
3602 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
3603 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
3606 /* Non-atomic quad memory load/store are disabled for little endian, since
3607 the words are reversed, but atomic operations can still be done by
3608 swapping the words. */
3609 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
3611 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3612 warning (0, N_("-mquad-memory is not available in little endian mode"));
3614 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
3617 /* Assume if the user asked for normal quad memory instructions, they want
3618 the atomic versions as well, unless they explicity told us not to use quad
3619 word atomic instructions. */
3620 if (TARGET_QUAD_MEMORY
3621 && !TARGET_QUAD_MEMORY_ATOMIC
3622 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
3623 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
3625 /* Enable power8 fusion if we are tuning for power8, even if we aren't
3626 generating power8 instructions. */
3627 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
3628 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
3629 & OPTION_MASK_P8_FUSION);
3631 /* Power8 does not fuse sign extended loads with the addis. If we are
3632 optimizing at high levels for speed, convert a sign extended load into a
3633 zero extending load, and an explicit sign extension. */
3634 if (TARGET_P8_FUSION
3635 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
3636 && optimize_function_for_speed_p (cfun)
3637 && optimize >= 3)
3638 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
3640 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3641 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
3643 /* E500mc does "better" if we inline more aggressively. Respect the
3644 user's opinion, though. */
3645 if (rs6000_block_move_inline_limit == 0
3646 && (rs6000_cpu == PROCESSOR_PPCE500MC
3647 || rs6000_cpu == PROCESSOR_PPCE500MC64
3648 || rs6000_cpu == PROCESSOR_PPCE5500
3649 || rs6000_cpu == PROCESSOR_PPCE6500))
3650 rs6000_block_move_inline_limit = 128;
3652 /* store_one_arg depends on expand_block_move to handle at least the
3653 size of reg_parm_stack_space. */
3654 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
3655 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
3657 if (global_init_p)
3659 /* If the appropriate debug option is enabled, replace the target hooks
3660 with debug versions that call the real version and then prints
3661 debugging information. */
3662 if (TARGET_DEBUG_COST)
3664 targetm.rtx_costs = rs6000_debug_rtx_costs;
3665 targetm.address_cost = rs6000_debug_address_cost;
3666 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
3669 if (TARGET_DEBUG_ADDR)
3671 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
3672 targetm.legitimize_address = rs6000_debug_legitimize_address;
3673 rs6000_secondary_reload_class_ptr
3674 = rs6000_debug_secondary_reload_class;
3675 rs6000_secondary_memory_needed_ptr
3676 = rs6000_debug_secondary_memory_needed;
3677 rs6000_cannot_change_mode_class_ptr
3678 = rs6000_debug_cannot_change_mode_class;
3679 rs6000_preferred_reload_class_ptr
3680 = rs6000_debug_preferred_reload_class;
3681 rs6000_legitimize_reload_address_ptr
3682 = rs6000_debug_legitimize_reload_address;
3683 rs6000_mode_dependent_address_ptr
3684 = rs6000_debug_mode_dependent_address;
3687 if (rs6000_veclibabi_name)
3689 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
3690 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
3691 else
3693 error ("unknown vectorization library ABI type (%s) for "
3694 "-mveclibabi= switch", rs6000_veclibabi_name);
3695 ret = false;
3700 if (!global_options_set.x_rs6000_long_double_type_size)
3702 if (main_target_opt != NULL
3703 && (main_target_opt->x_rs6000_long_double_type_size
3704 != RS6000_DEFAULT_LONG_DOUBLE_SIZE))
3705 error ("target attribute or pragma changes long double size");
3706 else
3707 rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE;
3710 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
3711 if (!global_options_set.x_rs6000_ieeequad)
3712 rs6000_ieeequad = 1;
3713 #endif
3715 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
3716 target attribute or pragma which automatically enables both options,
3717 unless the altivec ABI was set. This is set by default for 64-bit, but
3718 not for 32-bit. */
3719 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
3720 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC)
3721 & ~rs6000_isa_flags_explicit);
3723 /* Enable Altivec ABI for AIX -maltivec. */
3724 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
3726 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
3727 error ("target attribute or pragma changes AltiVec ABI");
3728 else
3729 rs6000_altivec_abi = 1;
3732 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
3733 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
3734 be explicitly overridden in either case. */
3735 if (TARGET_ELF)
3737 if (!global_options_set.x_rs6000_altivec_abi
3738 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
3740 if (main_target_opt != NULL &&
3741 !main_target_opt->x_rs6000_altivec_abi)
3742 error ("target attribute or pragma changes AltiVec ABI");
3743 else
3744 rs6000_altivec_abi = 1;
3748 /* Set the Darwin64 ABI as default for 64-bit Darwin.
3749 So far, the only darwin64 targets are also MACH-O. */
3750 if (TARGET_MACHO
3751 && DEFAULT_ABI == ABI_DARWIN
3752 && TARGET_64BIT)
3754 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
3755 error ("target attribute or pragma changes darwin64 ABI");
3756 else
3758 rs6000_darwin64_abi = 1;
3759 /* Default to natural alignment, for better performance. */
3760 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
3764 /* Place FP constants in the constant pool instead of TOC
3765 if section anchors enabled. */
3766 if (flag_section_anchors
3767 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
3768 TARGET_NO_FP_IN_TOC = 1;
3770 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3771 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
3773 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3774 SUBTARGET_OVERRIDE_OPTIONS;
3775 #endif
3776 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3777 SUBSUBTARGET_OVERRIDE_OPTIONS;
3778 #endif
3779 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
3780 SUB3TARGET_OVERRIDE_OPTIONS;
3781 #endif
3783 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3784 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
3786 /* For the E500 family of cores, reset the single/double FP flags to let us
3787 check that they remain constant across attributes or pragmas. Also,
3788 clear a possible request for string instructions, not supported and which
3789 we might have silently queried above for -Os.
3791 For other families, clear ISEL in case it was set implicitly.
3794 switch (rs6000_cpu)
3796 case PROCESSOR_PPC8540:
3797 case PROCESSOR_PPC8548:
3798 case PROCESSOR_PPCE500MC:
3799 case PROCESSOR_PPCE500MC64:
3800 case PROCESSOR_PPCE5500:
3801 case PROCESSOR_PPCE6500:
3803 rs6000_single_float = TARGET_E500_SINGLE || TARGET_E500_DOUBLE;
3804 rs6000_double_float = TARGET_E500_DOUBLE;
3806 rs6000_isa_flags &= ~OPTION_MASK_STRING;
3808 break;
3810 default:
3812 if (have_cpu && !(rs6000_isa_flags_explicit & OPTION_MASK_ISEL))
3813 rs6000_isa_flags &= ~OPTION_MASK_ISEL;
3815 break;
3818 if (main_target_opt)
3820 if (main_target_opt->x_rs6000_single_float != rs6000_single_float)
3821 error ("target attribute or pragma changes single precision floating "
3822 "point");
3823 if (main_target_opt->x_rs6000_double_float != rs6000_double_float)
3824 error ("target attribute or pragma changes double precision floating "
3825 "point");
3828 /* Detect invalid option combinations with E500. */
3829 CHECK_E500_OPTIONS;
3831 rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4
3832 && rs6000_cpu != PROCESSOR_POWER5
3833 && rs6000_cpu != PROCESSOR_POWER6
3834 && rs6000_cpu != PROCESSOR_POWER7
3835 && rs6000_cpu != PROCESSOR_POWER8
3836 && rs6000_cpu != PROCESSOR_PPCA2
3837 && rs6000_cpu != PROCESSOR_CELL
3838 && rs6000_cpu != PROCESSOR_PPC476);
3839 rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
3840 || rs6000_cpu == PROCESSOR_POWER5
3841 || rs6000_cpu == PROCESSOR_POWER7
3842 || rs6000_cpu == PROCESSOR_POWER8);
3843 rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
3844 || rs6000_cpu == PROCESSOR_POWER5
3845 || rs6000_cpu == PROCESSOR_POWER6
3846 || rs6000_cpu == PROCESSOR_POWER7
3847 || rs6000_cpu == PROCESSOR_POWER8
3848 || rs6000_cpu == PROCESSOR_PPCE500MC
3849 || rs6000_cpu == PROCESSOR_PPCE500MC64
3850 || rs6000_cpu == PROCESSOR_PPCE5500
3851 || rs6000_cpu == PROCESSOR_PPCE6500);
3853 /* Allow debug switches to override the above settings. These are set to -1
3854 in rs6000.opt to indicate the user hasn't directly set the switch. */
3855 if (TARGET_ALWAYS_HINT >= 0)
3856 rs6000_always_hint = TARGET_ALWAYS_HINT;
3858 if (TARGET_SCHED_GROUPS >= 0)
3859 rs6000_sched_groups = TARGET_SCHED_GROUPS;
3861 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
3862 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
3864 rs6000_sched_restricted_insns_priority
3865 = (rs6000_sched_groups ? 1 : 0);
3867 /* Handle -msched-costly-dep option. */
3868 rs6000_sched_costly_dep
3869 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
3871 if (rs6000_sched_costly_dep_str)
3873 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
3874 rs6000_sched_costly_dep = no_dep_costly;
3875 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
3876 rs6000_sched_costly_dep = all_deps_costly;
3877 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
3878 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
3879 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
3880 rs6000_sched_costly_dep = store_to_load_dep_costly;
3881 else
3882 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
3883 atoi (rs6000_sched_costly_dep_str));
3886 /* Handle -minsert-sched-nops option. */
3887 rs6000_sched_insert_nops
3888 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
3890 if (rs6000_sched_insert_nops_str)
3892 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
3893 rs6000_sched_insert_nops = sched_finish_none;
3894 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
3895 rs6000_sched_insert_nops = sched_finish_pad_groups;
3896 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
3897 rs6000_sched_insert_nops = sched_finish_regroup_exact;
3898 else
3899 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
3900 atoi (rs6000_sched_insert_nops_str));
3903 if (global_init_p)
3905 #ifdef TARGET_REGNAMES
3906 /* If the user desires alternate register names, copy in the
3907 alternate names now. */
3908 if (TARGET_REGNAMES)
3909 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
3910 #endif
3912 /* Set aix_struct_return last, after the ABI is determined.
3913 If -maix-struct-return or -msvr4-struct-return was explicitly
3914 used, don't override with the ABI default. */
3915 if (!global_options_set.x_aix_struct_return)
3916 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
3918 #if 0
3919 /* IBM XL compiler defaults to unsigned bitfields. */
3920 if (TARGET_XL_COMPAT)
3921 flag_signed_bitfields = 0;
3922 #endif
3924 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
3925 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
3927 if (TARGET_TOC)
3928 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
3930 /* We can only guarantee the availability of DI pseudo-ops when
3931 assembling for 64-bit targets. */
3932 if (!TARGET_64BIT)
3934 targetm.asm_out.aligned_op.di = NULL;
3935 targetm.asm_out.unaligned_op.di = NULL;
3939 /* Set branch target alignment, if not optimizing for size. */
3940 if (!optimize_size)
3942 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
3943 aligned 8byte to avoid misprediction by the branch predictor. */
3944 if (rs6000_cpu == PROCESSOR_TITAN
3945 || rs6000_cpu == PROCESSOR_CELL)
3947 if (align_functions <= 0)
3948 align_functions = 8;
3949 if (align_jumps <= 0)
3950 align_jumps = 8;
3951 if (align_loops <= 0)
3952 align_loops = 8;
3954 if (rs6000_align_branch_targets)
3956 if (align_functions <= 0)
3957 align_functions = 16;
3958 if (align_jumps <= 0)
3959 align_jumps = 16;
3960 if (align_loops <= 0)
3962 can_override_loop_align = 1;
3963 align_loops = 16;
3966 if (align_jumps_max_skip <= 0)
3967 align_jumps_max_skip = 15;
3968 if (align_loops_max_skip <= 0)
3969 align_loops_max_skip = 15;
3972 /* Arrange to save and restore machine status around nested functions. */
3973 init_machine_status = rs6000_init_machine_status;
3975 /* We should always be splitting complex arguments, but we can't break
3976 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
3977 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
3978 targetm.calls.split_complex_arg = NULL;
3981 /* Initialize rs6000_cost with the appropriate target costs. */
3982 if (optimize_size)
3983 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
3984 else
3985 switch (rs6000_cpu)
3987 case PROCESSOR_RS64A:
3988 rs6000_cost = &rs64a_cost;
3989 break;
3991 case PROCESSOR_MPCCORE:
3992 rs6000_cost = &mpccore_cost;
3993 break;
3995 case PROCESSOR_PPC403:
3996 rs6000_cost = &ppc403_cost;
3997 break;
3999 case PROCESSOR_PPC405:
4000 rs6000_cost = &ppc405_cost;
4001 break;
4003 case PROCESSOR_PPC440:
4004 rs6000_cost = &ppc440_cost;
4005 break;
4007 case PROCESSOR_PPC476:
4008 rs6000_cost = &ppc476_cost;
4009 break;
4011 case PROCESSOR_PPC601:
4012 rs6000_cost = &ppc601_cost;
4013 break;
4015 case PROCESSOR_PPC603:
4016 rs6000_cost = &ppc603_cost;
4017 break;
4019 case PROCESSOR_PPC604:
4020 rs6000_cost = &ppc604_cost;
4021 break;
4023 case PROCESSOR_PPC604e:
4024 rs6000_cost = &ppc604e_cost;
4025 break;
4027 case PROCESSOR_PPC620:
4028 rs6000_cost = &ppc620_cost;
4029 break;
4031 case PROCESSOR_PPC630:
4032 rs6000_cost = &ppc630_cost;
4033 break;
4035 case PROCESSOR_CELL:
4036 rs6000_cost = &ppccell_cost;
4037 break;
4039 case PROCESSOR_PPC750:
4040 case PROCESSOR_PPC7400:
4041 rs6000_cost = &ppc750_cost;
4042 break;
4044 case PROCESSOR_PPC7450:
4045 rs6000_cost = &ppc7450_cost;
4046 break;
4048 case PROCESSOR_PPC8540:
4049 case PROCESSOR_PPC8548:
4050 rs6000_cost = &ppc8540_cost;
4051 break;
4053 case PROCESSOR_PPCE300C2:
4054 case PROCESSOR_PPCE300C3:
4055 rs6000_cost = &ppce300c2c3_cost;
4056 break;
4058 case PROCESSOR_PPCE500MC:
4059 rs6000_cost = &ppce500mc_cost;
4060 break;
4062 case PROCESSOR_PPCE500MC64:
4063 rs6000_cost = &ppce500mc64_cost;
4064 break;
4066 case PROCESSOR_PPCE5500:
4067 rs6000_cost = &ppce5500_cost;
4068 break;
4070 case PROCESSOR_PPCE6500:
4071 rs6000_cost = &ppce6500_cost;
4072 break;
4074 case PROCESSOR_TITAN:
4075 rs6000_cost = &titan_cost;
4076 break;
4078 case PROCESSOR_POWER4:
4079 case PROCESSOR_POWER5:
4080 rs6000_cost = &power4_cost;
4081 break;
4083 case PROCESSOR_POWER6:
4084 rs6000_cost = &power6_cost;
4085 break;
4087 case PROCESSOR_POWER7:
4088 rs6000_cost = &power7_cost;
4089 break;
4091 case PROCESSOR_POWER8:
4092 rs6000_cost = &power8_cost;
4093 break;
4095 case PROCESSOR_PPCA2:
4096 rs6000_cost = &ppca2_cost;
4097 break;
4099 default:
4100 gcc_unreachable ();
4103 if (global_init_p)
4105 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4106 rs6000_cost->simultaneous_prefetches,
4107 global_options.x_param_values,
4108 global_options_set.x_param_values);
4109 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
4110 global_options.x_param_values,
4111 global_options_set.x_param_values);
4112 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4113 rs6000_cost->cache_line_size,
4114 global_options.x_param_values,
4115 global_options_set.x_param_values);
4116 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
4117 global_options.x_param_values,
4118 global_options_set.x_param_values);
4120 /* Increase loop peeling limits based on performance analysis. */
4121 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
4122 global_options.x_param_values,
4123 global_options_set.x_param_values);
4124 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
4125 global_options.x_param_values,
4126 global_options_set.x_param_values);
4128 /* If using typedef char *va_list, signal that
4129 __builtin_va_start (&ap, 0) can be optimized to
4130 ap = __builtin_next_arg (0). */
4131 if (DEFAULT_ABI != ABI_V4)
4132 targetm.expand_builtin_va_start = NULL;
4135 /* Set up single/double float flags.
4136 If TARGET_HARD_FLOAT is set, but neither single or double is set,
4137 then set both flags. */
4138 if (TARGET_HARD_FLOAT && TARGET_FPRS
4139 && rs6000_single_float == 0 && rs6000_double_float == 0)
4140 rs6000_single_float = rs6000_double_float = 1;
4142 /* If not explicitly specified via option, decide whether to generate indexed
4143 load/store instructions. */
4144 if (TARGET_AVOID_XFORM == -1)
4145 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4146 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4147 need indexed accesses and the type used is the scalar type of the element
4148 being loaded or stored. */
4149 TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB
4150 && !TARGET_ALTIVEC);
4152 /* Set the -mrecip options. */
4153 if (rs6000_recip_name)
4155 char *p = ASTRDUP (rs6000_recip_name);
4156 char *q;
4157 unsigned int mask, i;
4158 bool invert;
4160 while ((q = strtok (p, ",")) != NULL)
4162 p = NULL;
4163 if (*q == '!')
4165 invert = true;
4166 q++;
4168 else
4169 invert = false;
4171 if (!strcmp (q, "default"))
4172 mask = ((TARGET_RECIP_PRECISION)
4173 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4174 else
4176 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4177 if (!strcmp (q, recip_options[i].string))
4179 mask = recip_options[i].mask;
4180 break;
4183 if (i == ARRAY_SIZE (recip_options))
4185 error ("unknown option for -mrecip=%s", q);
4186 invert = false;
4187 mask = 0;
4188 ret = false;
4192 if (invert)
4193 rs6000_recip_control &= ~mask;
4194 else
4195 rs6000_recip_control |= mask;
4199 /* Set the builtin mask of the various options used that could affect which
4200 builtins were used. In the past we used target_flags, but we've run out
4201 of bits, and some options like SPE and PAIRED are no longer in
4202 target_flags. */
4203 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4204 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4206 fprintf (stderr,
4207 "new builtin mask = " HOST_WIDE_INT_PRINT_HEX ", ",
4208 rs6000_builtin_mask);
4209 rs6000_print_builtin_options (stderr, 0, NULL, rs6000_builtin_mask);
4212 /* Initialize all of the registers. */
4213 rs6000_init_hard_regno_mode_ok (global_init_p);
4215 /* Save the initial options in case the user does function specific options */
4216 if (global_init_p)
4217 target_option_default_node = target_option_current_node
4218 = build_target_option_node (&global_options);
4220 /* If not explicitly specified via option, decide whether to generate the
4221 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4222 if (TARGET_LINK_STACK == -1)
4223 SET_TARGET_LINK_STACK (rs6000_cpu == PROCESSOR_PPC476 && flag_pic);
4225 return ret;
4228 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4229 define the target cpu type. */
4231 static void
4232 rs6000_option_override (void)
4234 (void) rs6000_option_override_internal (true);
4236 /* Register machine-specific passes. This needs to be done at start-up.
4237 It's convenient to do it here (like i386 does). */
4238 opt_pass *pass_analyze_swaps = make_pass_analyze_swaps (g);
4240 struct register_pass_info analyze_swaps_info
4241 = { pass_analyze_swaps, "cse1", 1, PASS_POS_INSERT_BEFORE };
4243 register_pass (&analyze_swaps_info);
4247 /* Implement targetm.vectorize.builtin_mask_for_load. */
4248 static tree
4249 rs6000_builtin_mask_for_load (void)
4251 if (TARGET_ALTIVEC || TARGET_VSX)
4252 return altivec_builtin_mask_for_load;
4253 else
4254 return 0;
4257 /* Implement LOOP_ALIGN. */
4259 rs6000_loop_align (rtx label)
4261 basic_block bb;
4262 int ninsns;
4264 /* Don't override loop alignment if -falign-loops was specified. */
4265 if (!can_override_loop_align)
4266 return align_loops_log;
4268 bb = BLOCK_FOR_INSN (label);
4269 ninsns = num_loop_insns(bb->loop_father);
4271 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4272 if (ninsns > 4 && ninsns <= 8
4273 && (rs6000_cpu == PROCESSOR_POWER4
4274 || rs6000_cpu == PROCESSOR_POWER5
4275 || rs6000_cpu == PROCESSOR_POWER6
4276 || rs6000_cpu == PROCESSOR_POWER7
4277 || rs6000_cpu == PROCESSOR_POWER8))
4278 return 5;
4279 else
4280 return align_loops_log;
4283 /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
4284 static int
4285 rs6000_loop_align_max_skip (rtx_insn *label)
4287 return (1 << rs6000_loop_align (label)) - 1;
4290 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4291 after applying N number of iterations. This routine does not determine
4292 how may iterations are required to reach desired alignment. */
4294 static bool
4295 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4297 if (is_packed)
4298 return false;
4300 if (TARGET_32BIT)
4302 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4303 return true;
4305 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4306 return true;
4308 return false;
4310 else
4312 if (TARGET_MACHO)
4313 return false;
4315 /* Assuming that all other types are naturally aligned. CHECKME! */
4316 return true;
4320 /* Return true if the vector misalignment factor is supported by the
4321 target. */
4322 static bool
4323 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4324 const_tree type,
4325 int misalignment,
4326 bool is_packed)
4328 if (TARGET_VSX)
4330 /* Return if movmisalign pattern is not supported for this mode. */
4331 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4332 return false;
4334 if (misalignment == -1)
4336 /* Misalignment factor is unknown at compile time but we know
4337 it's word aligned. */
4338 if (rs6000_vector_alignment_reachable (type, is_packed))
4340 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
4342 if (element_size == 64 || element_size == 32)
4343 return true;
4346 return false;
4349 /* VSX supports word-aligned vector. */
4350 if (misalignment % 4 == 0)
4351 return true;
4353 return false;
4356 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4357 static int
4358 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4359 tree vectype, int misalign)
4361 unsigned elements;
4362 tree elem_type;
4364 switch (type_of_cost)
4366 case scalar_stmt:
4367 case scalar_load:
4368 case scalar_store:
4369 case vector_stmt:
4370 case vector_load:
4371 case vector_store:
4372 case vec_to_scalar:
4373 case scalar_to_vec:
4374 case cond_branch_not_taken:
4375 return 1;
4377 case vec_perm:
4378 if (TARGET_VSX)
4379 return 3;
4380 else
4381 return 1;
4383 case vec_promote_demote:
4384 if (TARGET_VSX)
4385 return 4;
4386 else
4387 return 1;
4389 case cond_branch_taken:
4390 return 3;
4392 case unaligned_load:
4393 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4395 elements = TYPE_VECTOR_SUBPARTS (vectype);
4396 if (elements == 2)
4397 /* Double word aligned. */
4398 return 2;
4400 if (elements == 4)
4402 switch (misalign)
4404 case 8:
4405 /* Double word aligned. */
4406 return 2;
4408 case -1:
4409 /* Unknown misalignment. */
4410 case 4:
4411 case 12:
4412 /* Word aligned. */
4413 return 22;
4415 default:
4416 gcc_unreachable ();
4421 if (TARGET_ALTIVEC)
4422 /* Misaligned loads are not supported. */
4423 gcc_unreachable ();
4425 return 2;
4427 case unaligned_store:
4428 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4430 elements = TYPE_VECTOR_SUBPARTS (vectype);
4431 if (elements == 2)
4432 /* Double word aligned. */
4433 return 2;
4435 if (elements == 4)
4437 switch (misalign)
4439 case 8:
4440 /* Double word aligned. */
4441 return 2;
4443 case -1:
4444 /* Unknown misalignment. */
4445 case 4:
4446 case 12:
4447 /* Word aligned. */
4448 return 23;
4450 default:
4451 gcc_unreachable ();
4456 if (TARGET_ALTIVEC)
4457 /* Misaligned stores are not supported. */
4458 gcc_unreachable ();
4460 return 2;
4462 case vec_construct:
4463 elements = TYPE_VECTOR_SUBPARTS (vectype);
4464 elem_type = TREE_TYPE (vectype);
4465 /* 32-bit vectors loaded into registers are stored as double
4466 precision, so we need n/2 converts in addition to the usual
4467 n/2 merges to construct a vector of short floats from them. */
4468 if (SCALAR_FLOAT_TYPE_P (elem_type)
4469 && TYPE_PRECISION (elem_type) == 32)
4470 return elements + 1;
4471 else
4472 return elements / 2 + 1;
4474 default:
4475 gcc_unreachable ();
4479 /* Implement targetm.vectorize.preferred_simd_mode. */
4481 static machine_mode
4482 rs6000_preferred_simd_mode (machine_mode mode)
4484 if (TARGET_VSX)
4485 switch (mode)
4487 case DFmode:
4488 return V2DFmode;
4489 default:;
4491 if (TARGET_ALTIVEC || TARGET_VSX)
4492 switch (mode)
4494 case SFmode:
4495 return V4SFmode;
4496 case TImode:
4497 return V1TImode;
4498 case DImode:
4499 return V2DImode;
4500 case SImode:
4501 return V4SImode;
4502 case HImode:
4503 return V8HImode;
4504 case QImode:
4505 return V16QImode;
4506 default:;
4508 if (TARGET_SPE)
4509 switch (mode)
4511 case SFmode:
4512 return V2SFmode;
4513 case SImode:
4514 return V2SImode;
4515 default:;
4517 if (TARGET_PAIRED_FLOAT
4518 && mode == SFmode)
4519 return V2SFmode;
4520 return word_mode;
4523 typedef struct _rs6000_cost_data
4525 struct loop *loop_info;
4526 unsigned cost[3];
4527 } rs6000_cost_data;
4529 /* Test for likely overcommitment of vector hardware resources. If a
4530 loop iteration is relatively large, and too large a percentage of
4531 instructions in the loop are vectorized, the cost model may not
4532 adequately reflect delays from unavailable vector resources.
4533 Penalize the loop body cost for this case. */
4535 static void
4536 rs6000_density_test (rs6000_cost_data *data)
4538 const int DENSITY_PCT_THRESHOLD = 85;
4539 const int DENSITY_SIZE_THRESHOLD = 70;
4540 const int DENSITY_PENALTY = 10;
4541 struct loop *loop = data->loop_info;
4542 basic_block *bbs = get_loop_body (loop);
4543 int nbbs = loop->num_nodes;
4544 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
4545 int i, density_pct;
4547 for (i = 0; i < nbbs; i++)
4549 basic_block bb = bbs[i];
4550 gimple_stmt_iterator gsi;
4552 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4554 gimple stmt = gsi_stmt (gsi);
4555 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4557 if (!STMT_VINFO_RELEVANT_P (stmt_info)
4558 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
4559 not_vec_cost++;
4563 free (bbs);
4564 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
4566 if (density_pct > DENSITY_PCT_THRESHOLD
4567 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
4569 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
4570 if (dump_enabled_p ())
4571 dump_printf_loc (MSG_NOTE, vect_location,
4572 "density %d%%, cost %d exceeds threshold, penalizing "
4573 "loop body cost by %d%%", density_pct,
4574 vec_cost + not_vec_cost, DENSITY_PENALTY);
4578 /* Implement targetm.vectorize.init_cost. */
4580 static void *
4581 rs6000_init_cost (struct loop *loop_info)
4583 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
4584 data->loop_info = loop_info;
4585 data->cost[vect_prologue] = 0;
4586 data->cost[vect_body] = 0;
4587 data->cost[vect_epilogue] = 0;
4588 return data;
4591 /* Implement targetm.vectorize.add_stmt_cost. */
4593 static unsigned
4594 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
4595 struct _stmt_vec_info *stmt_info, int misalign,
4596 enum vect_cost_model_location where)
4598 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
4599 unsigned retval = 0;
4601 if (flag_vect_cost_model)
4603 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4604 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
4605 misalign);
4606 /* Statements in an inner loop relative to the loop being
4607 vectorized are weighted more heavily. The value here is
4608 arbitrary and could potentially be improved with analysis. */
4609 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4610 count *= 50; /* FIXME. */
4612 retval = (unsigned) (count * stmt_cost);
4613 cost_data->cost[where] += retval;
4616 return retval;
4619 /* Implement targetm.vectorize.finish_cost. */
4621 static void
4622 rs6000_finish_cost (void *data, unsigned *prologue_cost,
4623 unsigned *body_cost, unsigned *epilogue_cost)
4625 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
4627 if (cost_data->loop_info)
4628 rs6000_density_test (cost_data);
4630 *prologue_cost = cost_data->cost[vect_prologue];
4631 *body_cost = cost_data->cost[vect_body];
4632 *epilogue_cost = cost_data->cost[vect_epilogue];
4635 /* Implement targetm.vectorize.destroy_cost_data. */
4637 static void
4638 rs6000_destroy_cost_data (void *data)
4640 free (data);
4643 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
4644 library with vectorized intrinsics. */
4646 static tree
4647 rs6000_builtin_vectorized_libmass (tree fndecl, tree type_out, tree type_in)
4649 char name[32];
4650 const char *suffix = NULL;
4651 tree fntype, new_fndecl, bdecl = NULL_TREE;
4652 int n_args = 1;
4653 const char *bname;
4654 machine_mode el_mode, in_mode;
4655 int n, in_n;
4657 /* Libmass is suitable for unsafe math only as it does not correctly support
4658 parts of IEEE with the required precision such as denormals. Only support
4659 it if we have VSX to use the simd d2 or f4 functions.
4660 XXX: Add variable length support. */
4661 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
4662 return NULL_TREE;
4664 el_mode = TYPE_MODE (TREE_TYPE (type_out));
4665 n = TYPE_VECTOR_SUBPARTS (type_out);
4666 in_mode = TYPE_MODE (TREE_TYPE (type_in));
4667 in_n = TYPE_VECTOR_SUBPARTS (type_in);
4668 if (el_mode != in_mode
4669 || n != in_n)
4670 return NULL_TREE;
4672 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
4674 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
4675 switch (fn)
4677 case BUILT_IN_ATAN2:
4678 case BUILT_IN_HYPOT:
4679 case BUILT_IN_POW:
4680 n_args = 2;
4681 /* fall through */
4683 case BUILT_IN_ACOS:
4684 case BUILT_IN_ACOSH:
4685 case BUILT_IN_ASIN:
4686 case BUILT_IN_ASINH:
4687 case BUILT_IN_ATAN:
4688 case BUILT_IN_ATANH:
4689 case BUILT_IN_CBRT:
4690 case BUILT_IN_COS:
4691 case BUILT_IN_COSH:
4692 case BUILT_IN_ERF:
4693 case BUILT_IN_ERFC:
4694 case BUILT_IN_EXP2:
4695 case BUILT_IN_EXP:
4696 case BUILT_IN_EXPM1:
4697 case BUILT_IN_LGAMMA:
4698 case BUILT_IN_LOG10:
4699 case BUILT_IN_LOG1P:
4700 case BUILT_IN_LOG2:
4701 case BUILT_IN_LOG:
4702 case BUILT_IN_SIN:
4703 case BUILT_IN_SINH:
4704 case BUILT_IN_SQRT:
4705 case BUILT_IN_TAN:
4706 case BUILT_IN_TANH:
4707 bdecl = builtin_decl_implicit (fn);
4708 suffix = "d2"; /* pow -> powd2 */
4709 if (el_mode != DFmode
4710 || n != 2
4711 || !bdecl)
4712 return NULL_TREE;
4713 break;
4715 case BUILT_IN_ATAN2F:
4716 case BUILT_IN_HYPOTF:
4717 case BUILT_IN_POWF:
4718 n_args = 2;
4719 /* fall through */
4721 case BUILT_IN_ACOSF:
4722 case BUILT_IN_ACOSHF:
4723 case BUILT_IN_ASINF:
4724 case BUILT_IN_ASINHF:
4725 case BUILT_IN_ATANF:
4726 case BUILT_IN_ATANHF:
4727 case BUILT_IN_CBRTF:
4728 case BUILT_IN_COSF:
4729 case BUILT_IN_COSHF:
4730 case BUILT_IN_ERFF:
4731 case BUILT_IN_ERFCF:
4732 case BUILT_IN_EXP2F:
4733 case BUILT_IN_EXPF:
4734 case BUILT_IN_EXPM1F:
4735 case BUILT_IN_LGAMMAF:
4736 case BUILT_IN_LOG10F:
4737 case BUILT_IN_LOG1PF:
4738 case BUILT_IN_LOG2F:
4739 case BUILT_IN_LOGF:
4740 case BUILT_IN_SINF:
4741 case BUILT_IN_SINHF:
4742 case BUILT_IN_SQRTF:
4743 case BUILT_IN_TANF:
4744 case BUILT_IN_TANHF:
4745 bdecl = builtin_decl_implicit (fn);
4746 suffix = "4"; /* powf -> powf4 */
4747 if (el_mode != SFmode
4748 || n != 4
4749 || !bdecl)
4750 return NULL_TREE;
4751 break;
4753 default:
4754 return NULL_TREE;
4757 else
4758 return NULL_TREE;
4760 gcc_assert (suffix != NULL);
4761 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
4762 if (!bname)
4763 return NULL_TREE;
4765 strcpy (name, bname + sizeof ("__builtin_") - 1);
4766 strcat (name, suffix);
4768 if (n_args == 1)
4769 fntype = build_function_type_list (type_out, type_in, NULL);
4770 else if (n_args == 2)
4771 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
4772 else
4773 gcc_unreachable ();
4775 /* Build a function declaration for the vectorized function. */
4776 new_fndecl = build_decl (BUILTINS_LOCATION,
4777 FUNCTION_DECL, get_identifier (name), fntype);
4778 TREE_PUBLIC (new_fndecl) = 1;
4779 DECL_EXTERNAL (new_fndecl) = 1;
4780 DECL_IS_NOVOPS (new_fndecl) = 1;
4781 TREE_READONLY (new_fndecl) = 1;
4783 return new_fndecl;
4786 /* Returns a function decl for a vectorized version of the builtin function
4787 with builtin function code FN and the result vector type TYPE, or NULL_TREE
4788 if it is not available. */
4790 static tree
4791 rs6000_builtin_vectorized_function (tree fndecl, tree type_out,
4792 tree type_in)
4794 machine_mode in_mode, out_mode;
4795 int in_n, out_n;
4797 if (TARGET_DEBUG_BUILTIN)
4798 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
4799 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
4800 GET_MODE_NAME (TYPE_MODE (type_out)),
4801 GET_MODE_NAME (TYPE_MODE (type_in)));
4803 if (TREE_CODE (type_out) != VECTOR_TYPE
4804 || TREE_CODE (type_in) != VECTOR_TYPE
4805 || !TARGET_VECTORIZE_BUILTINS)
4806 return NULL_TREE;
4808 out_mode = TYPE_MODE (TREE_TYPE (type_out));
4809 out_n = TYPE_VECTOR_SUBPARTS (type_out);
4810 in_mode = TYPE_MODE (TREE_TYPE (type_in));
4811 in_n = TYPE_VECTOR_SUBPARTS (type_in);
4813 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
4815 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
4816 switch (fn)
4818 case BUILT_IN_CLZIMAX:
4819 case BUILT_IN_CLZLL:
4820 case BUILT_IN_CLZL:
4821 case BUILT_IN_CLZ:
4822 if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
4824 if (out_mode == QImode && out_n == 16)
4825 return rs6000_builtin_decls[P8V_BUILTIN_VCLZB];
4826 else if (out_mode == HImode && out_n == 8)
4827 return rs6000_builtin_decls[P8V_BUILTIN_VCLZH];
4828 else if (out_mode == SImode && out_n == 4)
4829 return rs6000_builtin_decls[P8V_BUILTIN_VCLZW];
4830 else if (out_mode == DImode && out_n == 2)
4831 return rs6000_builtin_decls[P8V_BUILTIN_VCLZD];
4833 break;
4834 case BUILT_IN_COPYSIGN:
4835 if (VECTOR_UNIT_VSX_P (V2DFmode)
4836 && out_mode == DFmode && out_n == 2
4837 && in_mode == DFmode && in_n == 2)
4838 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
4839 break;
4840 case BUILT_IN_COPYSIGNF:
4841 if (out_mode != SFmode || out_n != 4
4842 || in_mode != SFmode || in_n != 4)
4843 break;
4844 if (VECTOR_UNIT_VSX_P (V4SFmode))
4845 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
4846 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4847 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
4848 break;
4849 case BUILT_IN_POPCOUNTIMAX:
4850 case BUILT_IN_POPCOUNTLL:
4851 case BUILT_IN_POPCOUNTL:
4852 case BUILT_IN_POPCOUNT:
4853 if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
4855 if (out_mode == QImode && out_n == 16)
4856 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTB];
4857 else if (out_mode == HImode && out_n == 8)
4858 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTH];
4859 else if (out_mode == SImode && out_n == 4)
4860 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTW];
4861 else if (out_mode == DImode && out_n == 2)
4862 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTD];
4864 break;
4865 case BUILT_IN_SQRT:
4866 if (VECTOR_UNIT_VSX_P (V2DFmode)
4867 && out_mode == DFmode && out_n == 2
4868 && in_mode == DFmode && in_n == 2)
4869 return rs6000_builtin_decls[VSX_BUILTIN_XVSQRTDP];
4870 break;
4871 case BUILT_IN_SQRTF:
4872 if (VECTOR_UNIT_VSX_P (V4SFmode)
4873 && out_mode == SFmode && out_n == 4
4874 && in_mode == SFmode && in_n == 4)
4875 return rs6000_builtin_decls[VSX_BUILTIN_XVSQRTSP];
4876 break;
4877 case BUILT_IN_CEIL:
4878 if (VECTOR_UNIT_VSX_P (V2DFmode)
4879 && out_mode == DFmode && out_n == 2
4880 && in_mode == DFmode && in_n == 2)
4881 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
4882 break;
4883 case BUILT_IN_CEILF:
4884 if (out_mode != SFmode || out_n != 4
4885 || in_mode != SFmode || in_n != 4)
4886 break;
4887 if (VECTOR_UNIT_VSX_P (V4SFmode))
4888 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
4889 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4890 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
4891 break;
4892 case BUILT_IN_FLOOR:
4893 if (VECTOR_UNIT_VSX_P (V2DFmode)
4894 && out_mode == DFmode && out_n == 2
4895 && in_mode == DFmode && in_n == 2)
4896 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
4897 break;
4898 case BUILT_IN_FLOORF:
4899 if (out_mode != SFmode || out_n != 4
4900 || in_mode != SFmode || in_n != 4)
4901 break;
4902 if (VECTOR_UNIT_VSX_P (V4SFmode))
4903 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
4904 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4905 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
4906 break;
4907 case BUILT_IN_FMA:
4908 if (VECTOR_UNIT_VSX_P (V2DFmode)
4909 && out_mode == DFmode && out_n == 2
4910 && in_mode == DFmode && in_n == 2)
4911 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
4912 break;
4913 case BUILT_IN_FMAF:
4914 if (VECTOR_UNIT_VSX_P (V4SFmode)
4915 && out_mode == SFmode && out_n == 4
4916 && in_mode == SFmode && in_n == 4)
4917 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
4918 else if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
4919 && out_mode == SFmode && out_n == 4
4920 && in_mode == SFmode && in_n == 4)
4921 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
4922 break;
4923 case BUILT_IN_TRUNC:
4924 if (VECTOR_UNIT_VSX_P (V2DFmode)
4925 && out_mode == DFmode && out_n == 2
4926 && in_mode == DFmode && in_n == 2)
4927 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
4928 break;
4929 case BUILT_IN_TRUNCF:
4930 if (out_mode != SFmode || out_n != 4
4931 || in_mode != SFmode || in_n != 4)
4932 break;
4933 if (VECTOR_UNIT_VSX_P (V4SFmode))
4934 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
4935 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4936 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
4937 break;
4938 case BUILT_IN_NEARBYINT:
4939 if (VECTOR_UNIT_VSX_P (V2DFmode)
4940 && flag_unsafe_math_optimizations
4941 && out_mode == DFmode && out_n == 2
4942 && in_mode == DFmode && in_n == 2)
4943 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
4944 break;
4945 case BUILT_IN_NEARBYINTF:
4946 if (VECTOR_UNIT_VSX_P (V4SFmode)
4947 && flag_unsafe_math_optimizations
4948 && out_mode == SFmode && out_n == 4
4949 && in_mode == SFmode && in_n == 4)
4950 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
4951 break;
4952 case BUILT_IN_RINT:
4953 if (VECTOR_UNIT_VSX_P (V2DFmode)
4954 && !flag_trapping_math
4955 && out_mode == DFmode && out_n == 2
4956 && in_mode == DFmode && in_n == 2)
4957 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
4958 break;
4959 case BUILT_IN_RINTF:
4960 if (VECTOR_UNIT_VSX_P (V4SFmode)
4961 && !flag_trapping_math
4962 && out_mode == SFmode && out_n == 4
4963 && in_mode == SFmode && in_n == 4)
4964 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
4965 break;
4966 default:
4967 break;
4971 else if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
4973 enum rs6000_builtins fn
4974 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
4975 switch (fn)
4977 case RS6000_BUILTIN_RSQRTF:
4978 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
4979 && out_mode == SFmode && out_n == 4
4980 && in_mode == SFmode && in_n == 4)
4981 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
4982 break;
4983 case RS6000_BUILTIN_RSQRT:
4984 if (VECTOR_UNIT_VSX_P (V2DFmode)
4985 && out_mode == DFmode && out_n == 2
4986 && in_mode == DFmode && in_n == 2)
4987 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
4988 break;
4989 case RS6000_BUILTIN_RECIPF:
4990 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
4991 && out_mode == SFmode && out_n == 4
4992 && in_mode == SFmode && in_n == 4)
4993 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
4994 break;
4995 case RS6000_BUILTIN_RECIP:
4996 if (VECTOR_UNIT_VSX_P (V2DFmode)
4997 && out_mode == DFmode && out_n == 2
4998 && in_mode == DFmode && in_n == 2)
4999 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5000 break;
5001 default:
5002 break;
5006 /* Generate calls to libmass if appropriate. */
5007 if (rs6000_veclib_handler)
5008 return rs6000_veclib_handler (fndecl, type_out, type_in);
5010 return NULL_TREE;
5013 /* Default CPU string for rs6000*_file_start functions. */
5014 static const char *rs6000_default_cpu;
5016 /* Do anything needed at the start of the asm file. */
5018 static void
5019 rs6000_file_start (void)
5021 char buffer[80];
5022 const char *start = buffer;
5023 FILE *file = asm_out_file;
5025 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5027 default_file_start ();
5029 if (flag_verbose_asm)
5031 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5033 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5035 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5036 start = "";
5039 if (global_options_set.x_rs6000_cpu_index)
5041 fprintf (file, "%s -mcpu=%s", start,
5042 processor_target_table[rs6000_cpu_index].name);
5043 start = "";
5046 if (global_options_set.x_rs6000_tune_index)
5048 fprintf (file, "%s -mtune=%s", start,
5049 processor_target_table[rs6000_tune_index].name);
5050 start = "";
5053 if (PPC405_ERRATUM77)
5055 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5056 start = "";
5059 #ifdef USING_ELFOS_H
5060 switch (rs6000_sdata)
5062 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5063 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5064 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5065 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5068 if (rs6000_sdata && g_switch_value)
5070 fprintf (file, "%s -G %d", start,
5071 g_switch_value);
5072 start = "";
5074 #endif
5076 if (*start == '\0')
5077 putc ('\n', file);
5080 #ifdef USING_ELFOS_H
5081 if (rs6000_default_cpu == 0 || rs6000_default_cpu[0] == '\0'
5082 || !global_options_set.x_rs6000_cpu_index)
5084 fputs ("\t.machine ", asm_out_file);
5085 if ((rs6000_isa_flags & OPTION_MASK_DIRECT_MOVE) != 0)
5086 fputs ("power8\n", asm_out_file);
5087 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTD) != 0)
5088 fputs ("power7\n", asm_out_file);
5089 else if ((rs6000_isa_flags & OPTION_MASK_CMPB) != 0)
5090 fputs ("power6\n", asm_out_file);
5091 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTB) != 0)
5092 fputs ("power5\n", asm_out_file);
5093 else if ((rs6000_isa_flags & OPTION_MASK_MFCRF) != 0)
5094 fputs ("power4\n", asm_out_file);
5095 else if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) != 0)
5096 fputs ("ppc64\n", asm_out_file);
5097 else
5098 fputs ("ppc\n", asm_out_file);
5100 #endif
5102 if (DEFAULT_ABI == ABI_ELFv2)
5103 fprintf (file, "\t.abiversion 2\n");
5105 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2
5106 || (TARGET_ELF && flag_pic == 2))
5108 switch_to_section (toc_section);
5109 switch_to_section (text_section);
5114 /* Return nonzero if this function is known to have a null epilogue. */
5117 direct_return (void)
5119 if (reload_completed)
5121 rs6000_stack_t *info = rs6000_stack_info ();
5123 if (info->first_gp_reg_save == 32
5124 && info->first_fp_reg_save == 64
5125 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5126 && ! info->lr_save_p
5127 && ! info->cr_save_p
5128 && info->vrsave_mask == 0
5129 && ! info->push_p)
5130 return 1;
5133 return 0;
5136 /* Return the number of instructions it takes to form a constant in an
5137 integer register. */
5140 num_insns_constant_wide (HOST_WIDE_INT value)
5142 /* signed constant loadable with addi */
5143 if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
5144 return 1;
5146 /* constant loadable with addis */
5147 else if ((value & 0xffff) == 0
5148 && (value >> 31 == -1 || value >> 31 == 0))
5149 return 1;
5151 else if (TARGET_POWERPC64)
5153 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5154 HOST_WIDE_INT high = value >> 31;
5156 if (high == 0 || high == -1)
5157 return 2;
5159 high >>= 1;
5161 if (low == 0)
5162 return num_insns_constant_wide (high) + 1;
5163 else if (high == 0)
5164 return num_insns_constant_wide (low) + 1;
5165 else
5166 return (num_insns_constant_wide (high)
5167 + num_insns_constant_wide (low) + 1);
5170 else
5171 return 2;
5175 num_insns_constant (rtx op, machine_mode mode)
5177 HOST_WIDE_INT low, high;
5179 switch (GET_CODE (op))
5181 case CONST_INT:
5182 if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1
5183 && mask64_operand (op, mode))
5184 return 2;
5185 else
5186 return num_insns_constant_wide (INTVAL (op));
5188 case CONST_WIDE_INT:
5190 int i;
5191 int ins = CONST_WIDE_INT_NUNITS (op) - 1;
5192 for (i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
5193 ins += num_insns_constant_wide (CONST_WIDE_INT_ELT (op, i));
5194 return ins;
5197 case CONST_DOUBLE:
5198 if (mode == SFmode || mode == SDmode)
5200 long l;
5201 REAL_VALUE_TYPE rv;
5203 REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
5204 if (DECIMAL_FLOAT_MODE_P (mode))
5205 REAL_VALUE_TO_TARGET_DECIMAL32 (rv, l);
5206 else
5207 REAL_VALUE_TO_TARGET_SINGLE (rv, l);
5208 return num_insns_constant_wide ((HOST_WIDE_INT) l);
5211 long l[2];
5212 REAL_VALUE_TYPE rv;
5214 REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
5215 if (DECIMAL_FLOAT_MODE_P (mode))
5216 REAL_VALUE_TO_TARGET_DECIMAL64 (rv, l);
5217 else
5218 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
5219 high = l[WORDS_BIG_ENDIAN == 0];
5220 low = l[WORDS_BIG_ENDIAN != 0];
5222 if (TARGET_32BIT)
5223 return (num_insns_constant_wide (low)
5224 + num_insns_constant_wide (high));
5225 else
5227 if ((high == 0 && low >= 0)
5228 || (high == -1 && low < 0))
5229 return num_insns_constant_wide (low);
5231 else if (mask64_operand (op, mode))
5232 return 2;
5234 else if (low == 0)
5235 return num_insns_constant_wide (high) + 1;
5237 else
5238 return (num_insns_constant_wide (high)
5239 + num_insns_constant_wide (low) + 1);
5242 default:
5243 gcc_unreachable ();
5247 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
5248 If the mode of OP is MODE_VECTOR_INT, this simply returns the
5249 corresponding element of the vector, but for V4SFmode and V2SFmode,
5250 the corresponding "float" is interpreted as an SImode integer. */
5252 HOST_WIDE_INT
5253 const_vector_elt_as_int (rtx op, unsigned int elt)
5255 rtx tmp;
5257 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
5258 gcc_assert (GET_MODE (op) != V2DImode
5259 && GET_MODE (op) != V2DFmode);
5261 tmp = CONST_VECTOR_ELT (op, elt);
5262 if (GET_MODE (op) == V4SFmode
5263 || GET_MODE (op) == V2SFmode)
5264 tmp = gen_lowpart (SImode, tmp);
5265 return INTVAL (tmp);
5268 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
5269 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
5270 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
5271 all items are set to the same value and contain COPIES replicas of the
5272 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
5273 operand and the others are set to the value of the operand's msb. */
5275 static bool
5276 vspltis_constant (rtx op, unsigned step, unsigned copies)
5278 machine_mode mode = GET_MODE (op);
5279 machine_mode inner = GET_MODE_INNER (mode);
5281 unsigned i;
5282 unsigned nunits;
5283 unsigned bitsize;
5284 unsigned mask;
5286 HOST_WIDE_INT val;
5287 HOST_WIDE_INT splat_val;
5288 HOST_WIDE_INT msb_val;
5290 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
5291 return false;
5293 nunits = GET_MODE_NUNITS (mode);
5294 bitsize = GET_MODE_BITSIZE (inner);
5295 mask = GET_MODE_MASK (inner);
5297 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5298 splat_val = val;
5299 msb_val = val >= 0 ? 0 : -1;
5301 /* Construct the value to be splatted, if possible. If not, return 0. */
5302 for (i = 2; i <= copies; i *= 2)
5304 HOST_WIDE_INT small_val;
5305 bitsize /= 2;
5306 small_val = splat_val >> bitsize;
5307 mask >>= bitsize;
5308 if (splat_val != ((small_val << bitsize) | (small_val & mask)))
5309 return false;
5310 splat_val = small_val;
5313 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
5314 if (EASY_VECTOR_15 (splat_val))
5317 /* Also check if we can splat, and then add the result to itself. Do so if
5318 the value is positive, of if the splat instruction is using OP's mode;
5319 for splat_val < 0, the splat and the add should use the same mode. */
5320 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
5321 && (splat_val >= 0 || (step == 1 && copies == 1)))
5324 /* Also check if are loading up the most significant bit which can be done by
5325 loading up -1 and shifting the value left by -1. */
5326 else if (EASY_VECTOR_MSB (splat_val, inner))
5329 else
5330 return false;
5332 /* Check if VAL is present in every STEP-th element, and the
5333 other elements are filled with its most significant bit. */
5334 for (i = 1; i < nunits; ++i)
5336 HOST_WIDE_INT desired_val;
5337 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
5338 if ((i & (step - 1)) == 0)
5339 desired_val = val;
5340 else
5341 desired_val = msb_val;
5343 if (desired_val != const_vector_elt_as_int (op, elt))
5344 return false;
5347 return true;
5351 /* Return true if OP is of the given MODE and can be synthesized
5352 with a vspltisb, vspltish or vspltisw. */
5354 bool
5355 easy_altivec_constant (rtx op, machine_mode mode)
5357 unsigned step, copies;
5359 if (mode == VOIDmode)
5360 mode = GET_MODE (op);
5361 else if (mode != GET_MODE (op))
5362 return false;
5364 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
5365 constants. */
5366 if (mode == V2DFmode)
5367 return zero_constant (op, mode);
5369 else if (mode == V2DImode)
5371 if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
5372 || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT)
5373 return false;
5375 if (zero_constant (op, mode))
5376 return true;
5378 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
5379 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
5380 return true;
5382 return false;
5385 /* V1TImode is a special container for TImode. Ignore for now. */
5386 else if (mode == V1TImode)
5387 return false;
5389 /* Start with a vspltisw. */
5390 step = GET_MODE_NUNITS (mode) / 4;
5391 copies = 1;
5393 if (vspltis_constant (op, step, copies))
5394 return true;
5396 /* Then try with a vspltish. */
5397 if (step == 1)
5398 copies <<= 1;
5399 else
5400 step >>= 1;
5402 if (vspltis_constant (op, step, copies))
5403 return true;
5405 /* And finally a vspltisb. */
5406 if (step == 1)
5407 copies <<= 1;
5408 else
5409 step >>= 1;
5411 if (vspltis_constant (op, step, copies))
5412 return true;
5414 return false;
5417 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
5418 result is OP. Abort if it is not possible. */
5421 gen_easy_altivec_constant (rtx op)
5423 machine_mode mode = GET_MODE (op);
5424 int nunits = GET_MODE_NUNITS (mode);
5425 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5426 unsigned step = nunits / 4;
5427 unsigned copies = 1;
5429 /* Start with a vspltisw. */
5430 if (vspltis_constant (op, step, copies))
5431 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
5433 /* Then try with a vspltish. */
5434 if (step == 1)
5435 copies <<= 1;
5436 else
5437 step >>= 1;
5439 if (vspltis_constant (op, step, copies))
5440 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
5442 /* And finally a vspltisb. */
5443 if (step == 1)
5444 copies <<= 1;
5445 else
5446 step >>= 1;
5448 if (vspltis_constant (op, step, copies))
5449 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
5451 gcc_unreachable ();
5454 const char *
5455 output_vec_const_move (rtx *operands)
5457 int cst, cst2;
5458 machine_mode mode;
5459 rtx dest, vec;
5461 dest = operands[0];
5462 vec = operands[1];
5463 mode = GET_MODE (dest);
5465 if (TARGET_VSX)
5467 if (zero_constant (vec, mode))
5468 return "xxlxor %x0,%x0,%x0";
5470 if ((mode == V2DImode || mode == V1TImode)
5471 && INTVAL (CONST_VECTOR_ELT (vec, 0)) == -1
5472 && INTVAL (CONST_VECTOR_ELT (vec, 1)) == -1)
5473 return "vspltisw %0,-1";
5476 if (TARGET_ALTIVEC)
5478 rtx splat_vec;
5479 if (zero_constant (vec, mode))
5480 return "vxor %0,%0,%0";
5482 splat_vec = gen_easy_altivec_constant (vec);
5483 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
5484 operands[1] = XEXP (splat_vec, 0);
5485 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
5486 return "#";
5488 switch (GET_MODE (splat_vec))
5490 case V4SImode:
5491 return "vspltisw %0,%1";
5493 case V8HImode:
5494 return "vspltish %0,%1";
5496 case V16QImode:
5497 return "vspltisb %0,%1";
5499 default:
5500 gcc_unreachable ();
5504 gcc_assert (TARGET_SPE);
5506 /* Vector constant 0 is handled as a splitter of V2SI, and in the
5507 pattern of V1DI, V4HI, and V2SF.
5509 FIXME: We should probably return # and add post reload
5510 splitters for these, but this way is so easy ;-). */
5511 cst = INTVAL (CONST_VECTOR_ELT (vec, 0));
5512 cst2 = INTVAL (CONST_VECTOR_ELT (vec, 1));
5513 operands[1] = CONST_VECTOR_ELT (vec, 0);
5514 operands[2] = CONST_VECTOR_ELT (vec, 1);
5515 if (cst == cst2)
5516 return "li %0,%1\n\tevmergelo %0,%0,%0";
5517 else if (WORDS_BIG_ENDIAN)
5518 return "li %0,%1\n\tevmergelo %0,%0,%0\n\tli %0,%2";
5519 else
5520 return "li %0,%2\n\tevmergelo %0,%0,%0\n\tli %0,%1";
5523 /* Initialize TARGET of vector PAIRED to VALS. */
5525 void
5526 paired_expand_vector_init (rtx target, rtx vals)
5528 machine_mode mode = GET_MODE (target);
5529 int n_elts = GET_MODE_NUNITS (mode);
5530 int n_var = 0;
5531 rtx x, new_rtx, tmp, constant_op, op1, op2;
5532 int i;
5534 for (i = 0; i < n_elts; ++i)
5536 x = XVECEXP (vals, 0, i);
5537 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
5538 ++n_var;
5540 if (n_var == 0)
5542 /* Load from constant pool. */
5543 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
5544 return;
5547 if (n_var == 2)
5549 /* The vector is initialized only with non-constants. */
5550 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, XVECEXP (vals, 0, 0),
5551 XVECEXP (vals, 0, 1));
5553 emit_move_insn (target, new_rtx);
5554 return;
5557 /* One field is non-constant and the other one is a constant. Load the
5558 constant from the constant pool and use ps_merge instruction to
5559 construct the whole vector. */
5560 op1 = XVECEXP (vals, 0, 0);
5561 op2 = XVECEXP (vals, 0, 1);
5563 constant_op = (CONSTANT_P (op1)) ? op1 : op2;
5565 tmp = gen_reg_rtx (GET_MODE (constant_op));
5566 emit_move_insn (tmp, constant_op);
5568 if (CONSTANT_P (op1))
5569 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, tmp, op2);
5570 else
5571 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, op1, tmp);
5573 emit_move_insn (target, new_rtx);
5576 void
5577 paired_expand_vector_move (rtx operands[])
5579 rtx op0 = operands[0], op1 = operands[1];
5581 emit_move_insn (op0, op1);
5584 /* Emit vector compare for code RCODE. DEST is destination, OP1 and
5585 OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two
5586 operands for the relation operation COND. This is a recursive
5587 function. */
5589 static void
5590 paired_emit_vector_compare (enum rtx_code rcode,
5591 rtx dest, rtx op0, rtx op1,
5592 rtx cc_op0, rtx cc_op1)
5594 rtx tmp = gen_reg_rtx (V2SFmode);
5595 rtx tmp1, max, min;
5597 gcc_assert (TARGET_PAIRED_FLOAT);
5598 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
5600 switch (rcode)
5602 case LT:
5603 case LTU:
5604 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
5605 return;
5606 case GE:
5607 case GEU:
5608 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
5609 emit_insn (gen_selv2sf4 (dest, tmp, op0, op1, CONST0_RTX (SFmode)));
5610 return;
5611 case LE:
5612 case LEU:
5613 paired_emit_vector_compare (GE, dest, op0, op1, cc_op1, cc_op0);
5614 return;
5615 case GT:
5616 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
5617 return;
5618 case EQ:
5619 tmp1 = gen_reg_rtx (V2SFmode);
5620 max = gen_reg_rtx (V2SFmode);
5621 min = gen_reg_rtx (V2SFmode);
5622 gen_reg_rtx (V2SFmode);
5624 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
5625 emit_insn (gen_selv2sf4
5626 (max, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
5627 emit_insn (gen_subv2sf3 (tmp, cc_op1, cc_op0));
5628 emit_insn (gen_selv2sf4
5629 (min, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
5630 emit_insn (gen_subv2sf3 (tmp1, min, max));
5631 emit_insn (gen_selv2sf4 (dest, tmp1, op0, op1, CONST0_RTX (SFmode)));
5632 return;
5633 case NE:
5634 paired_emit_vector_compare (EQ, dest, op1, op0, cc_op0, cc_op1);
5635 return;
5636 case UNLE:
5637 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
5638 return;
5639 case UNLT:
5640 paired_emit_vector_compare (LT, dest, op1, op0, cc_op0, cc_op1);
5641 return;
5642 case UNGE:
5643 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
5644 return;
5645 case UNGT:
5646 paired_emit_vector_compare (GT, dest, op1, op0, cc_op0, cc_op1);
5647 return;
5648 default:
5649 gcc_unreachable ();
5652 return;
5655 /* Emit vector conditional expression.
5656 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5657 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5660 paired_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
5661 rtx cond, rtx cc_op0, rtx cc_op1)
5663 enum rtx_code rcode = GET_CODE (cond);
5665 if (!TARGET_PAIRED_FLOAT)
5666 return 0;
5668 paired_emit_vector_compare (rcode, dest, op1, op2, cc_op0, cc_op1);
5670 return 1;
5673 /* Initialize vector TARGET to VALS. */
5675 void
5676 rs6000_expand_vector_init (rtx target, rtx vals)
5678 machine_mode mode = GET_MODE (target);
5679 machine_mode inner_mode = GET_MODE_INNER (mode);
5680 int n_elts = GET_MODE_NUNITS (mode);
5681 int n_var = 0, one_var = -1;
5682 bool all_same = true, all_const_zero = true;
5683 rtx x, mem;
5684 int i;
5686 for (i = 0; i < n_elts; ++i)
5688 x = XVECEXP (vals, 0, i);
5689 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
5690 ++n_var, one_var = i;
5691 else if (x != CONST0_RTX (inner_mode))
5692 all_const_zero = false;
5694 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
5695 all_same = false;
5698 if (n_var == 0)
5700 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
5701 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
5702 if ((int_vector_p || TARGET_VSX) && all_const_zero)
5704 /* Zero register. */
5705 emit_insn (gen_rtx_SET (VOIDmode, target,
5706 gen_rtx_XOR (mode, target, target)));
5707 return;
5709 else if (int_vector_p && easy_vector_constant (const_vec, mode))
5711 /* Splat immediate. */
5712 emit_insn (gen_rtx_SET (VOIDmode, target, const_vec));
5713 return;
5715 else
5717 /* Load from constant pool. */
5718 emit_move_insn (target, const_vec);
5719 return;
5723 /* Double word values on VSX can use xxpermdi or lxvdsx. */
5724 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
5726 rtx op0 = XVECEXP (vals, 0, 0);
5727 rtx op1 = XVECEXP (vals, 0, 1);
5728 if (all_same)
5730 if (!MEM_P (op0) && !REG_P (op0))
5731 op0 = force_reg (inner_mode, op0);
5732 if (mode == V2DFmode)
5733 emit_insn (gen_vsx_splat_v2df (target, op0));
5734 else
5735 emit_insn (gen_vsx_splat_v2di (target, op0));
5737 else
5739 op0 = force_reg (inner_mode, op0);
5740 op1 = force_reg (inner_mode, op1);
5741 if (mode == V2DFmode)
5742 emit_insn (gen_vsx_concat_v2df (target, op0, op1));
5743 else
5744 emit_insn (gen_vsx_concat_v2di (target, op0, op1));
5746 return;
5749 /* With single precision floating point on VSX, know that internally single
5750 precision is actually represented as a double, and either make 2 V2DF
5751 vectors, and convert these vectors to single precision, or do one
5752 conversion, and splat the result to the other elements. */
5753 if (mode == V4SFmode && VECTOR_MEM_VSX_P (mode))
5755 if (all_same)
5757 rtx freg = gen_reg_rtx (V4SFmode);
5758 rtx sreg = force_reg (SFmode, XVECEXP (vals, 0, 0));
5759 rtx cvt = ((TARGET_XSCVDPSPN)
5760 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
5761 : gen_vsx_xscvdpsp_scalar (freg, sreg));
5763 emit_insn (cvt);
5764 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg, const0_rtx));
5766 else
5768 rtx dbl_even = gen_reg_rtx (V2DFmode);
5769 rtx dbl_odd = gen_reg_rtx (V2DFmode);
5770 rtx flt_even = gen_reg_rtx (V4SFmode);
5771 rtx flt_odd = gen_reg_rtx (V4SFmode);
5772 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
5773 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
5774 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
5775 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
5777 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
5778 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
5779 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
5780 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
5781 rs6000_expand_extract_even (target, flt_even, flt_odd);
5783 return;
5786 /* Store value to stack temp. Load vector element. Splat. However, splat
5787 of 64-bit items is not supported on Altivec. */
5788 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
5790 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
5791 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
5792 XVECEXP (vals, 0, 0));
5793 x = gen_rtx_UNSPEC (VOIDmode,
5794 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
5795 emit_insn (gen_rtx_PARALLEL (VOIDmode,
5796 gen_rtvec (2,
5797 gen_rtx_SET (VOIDmode,
5798 target, mem),
5799 x)));
5800 x = gen_rtx_VEC_SELECT (inner_mode, target,
5801 gen_rtx_PARALLEL (VOIDmode,
5802 gen_rtvec (1, const0_rtx)));
5803 emit_insn (gen_rtx_SET (VOIDmode, target,
5804 gen_rtx_VEC_DUPLICATE (mode, x)));
5805 return;
5808 /* One field is non-constant. Load constant then overwrite
5809 varying field. */
5810 if (n_var == 1)
5812 rtx copy = copy_rtx (vals);
5814 /* Load constant part of vector, substitute neighboring value for
5815 varying element. */
5816 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
5817 rs6000_expand_vector_init (target, copy);
5819 /* Insert variable. */
5820 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
5821 return;
5824 /* Construct the vector in memory one field at a time
5825 and load the whole vector. */
5826 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
5827 for (i = 0; i < n_elts; i++)
5828 emit_move_insn (adjust_address_nv (mem, inner_mode,
5829 i * GET_MODE_SIZE (inner_mode)),
5830 XVECEXP (vals, 0, i));
5831 emit_move_insn (target, mem);
5834 /* Set field ELT of TARGET to VAL. */
5836 void
5837 rs6000_expand_vector_set (rtx target, rtx val, int elt)
5839 machine_mode mode = GET_MODE (target);
5840 machine_mode inner_mode = GET_MODE_INNER (mode);
5841 rtx reg = gen_reg_rtx (mode);
5842 rtx mask, mem, x;
5843 int width = GET_MODE_SIZE (inner_mode);
5844 int i;
5846 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
5848 rtx (*set_func) (rtx, rtx, rtx, rtx)
5849 = ((mode == V2DFmode) ? gen_vsx_set_v2df : gen_vsx_set_v2di);
5850 emit_insn (set_func (target, target, val, GEN_INT (elt)));
5851 return;
5854 /* Simplify setting single element vectors like V1TImode. */
5855 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
5857 emit_move_insn (target, gen_lowpart (mode, val));
5858 return;
5861 /* Load single variable value. */
5862 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
5863 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
5864 x = gen_rtx_UNSPEC (VOIDmode,
5865 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
5866 emit_insn (gen_rtx_PARALLEL (VOIDmode,
5867 gen_rtvec (2,
5868 gen_rtx_SET (VOIDmode,
5869 reg, mem),
5870 x)));
5872 /* Linear sequence. */
5873 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
5874 for (i = 0; i < 16; ++i)
5875 XVECEXP (mask, 0, i) = GEN_INT (i);
5877 /* Set permute mask to insert element into target. */
5878 for (i = 0; i < width; ++i)
5879 XVECEXP (mask, 0, elt*width + i)
5880 = GEN_INT (i + 0x10);
5881 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
5883 if (BYTES_BIG_ENDIAN)
5884 x = gen_rtx_UNSPEC (mode,
5885 gen_rtvec (3, target, reg,
5886 force_reg (V16QImode, x)),
5887 UNSPEC_VPERM);
5888 else
5890 /* Invert selector. We prefer to generate VNAND on P8 so
5891 that future fusion opportunities can kick in, but must
5892 generate VNOR elsewhere. */
5893 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
5894 rtx iorx = (TARGET_P8_VECTOR
5895 ? gen_rtx_IOR (V16QImode, notx, notx)
5896 : gen_rtx_AND (V16QImode, notx, notx));
5897 rtx tmp = gen_reg_rtx (V16QImode);
5898 emit_insn (gen_rtx_SET (VOIDmode, tmp, iorx));
5900 /* Permute with operands reversed and adjusted selector. */
5901 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
5902 UNSPEC_VPERM);
5905 emit_insn (gen_rtx_SET (VOIDmode, target, x));
5908 /* Extract field ELT from VEC into TARGET. */
5910 void
5911 rs6000_expand_vector_extract (rtx target, rtx vec, int elt)
5913 machine_mode mode = GET_MODE (vec);
5914 machine_mode inner_mode = GET_MODE_INNER (mode);
5915 rtx mem;
5917 if (VECTOR_MEM_VSX_P (mode))
5919 switch (mode)
5921 default:
5922 break;
5923 case V1TImode:
5924 gcc_assert (elt == 0 && inner_mode == TImode);
5925 emit_move_insn (target, gen_lowpart (TImode, vec));
5926 break;
5927 case V2DFmode:
5928 emit_insn (gen_vsx_extract_v2df (target, vec, GEN_INT (elt)));
5929 return;
5930 case V2DImode:
5931 emit_insn (gen_vsx_extract_v2di (target, vec, GEN_INT (elt)));
5932 return;
5933 case V4SFmode:
5934 emit_insn (gen_vsx_extract_v4sf (target, vec, GEN_INT (elt)));
5935 return;
5939 /* Allocate mode-sized buffer. */
5940 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
5942 emit_move_insn (mem, vec);
5944 /* Add offset to field within buffer matching vector element. */
5945 mem = adjust_address_nv (mem, inner_mode, elt * GET_MODE_SIZE (inner_mode));
5947 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
5950 /* Generates shifts and masks for a pair of rldicl or rldicr insns to
5951 implement ANDing by the mask IN. */
5952 void
5953 build_mask64_2_operands (rtx in, rtx *out)
5955 unsigned HOST_WIDE_INT c, lsb, m1, m2;
5956 int shift;
5958 gcc_assert (GET_CODE (in) == CONST_INT);
5960 c = INTVAL (in);
5961 if (c & 1)
5963 /* Assume c initially something like 0x00fff000000fffff. The idea
5964 is to rotate the word so that the middle ^^^^^^ group of zeros
5965 is at the MS end and can be cleared with an rldicl mask. We then
5966 rotate back and clear off the MS ^^ group of zeros with a
5967 second rldicl. */
5968 c = ~c; /* c == 0xff000ffffff00000 */
5969 lsb = c & -c; /* lsb == 0x0000000000100000 */
5970 m1 = -lsb; /* m1 == 0xfffffffffff00000 */
5971 c = ~c; /* c == 0x00fff000000fffff */
5972 c &= -lsb; /* c == 0x00fff00000000000 */
5973 lsb = c & -c; /* lsb == 0x0000100000000000 */
5974 c = ~c; /* c == 0xff000fffffffffff */
5975 c &= -lsb; /* c == 0xff00000000000000 */
5976 shift = 0;
5977 while ((lsb >>= 1) != 0)
5978 shift++; /* shift == 44 on exit from loop */
5979 m1 <<= 64 - shift; /* m1 == 0xffffff0000000000 */
5980 m1 = ~m1; /* m1 == 0x000000ffffffffff */
5981 m2 = ~c; /* m2 == 0x00ffffffffffffff */
5983 else
5985 /* Assume c initially something like 0xff000f0000000000. The idea
5986 is to rotate the word so that the ^^^ middle group of zeros
5987 is at the LS end and can be cleared with an rldicr mask. We then
5988 rotate back and clear off the LS group of ^^^^^^^^^^ zeros with
5989 a second rldicr. */
5990 lsb = c & -c; /* lsb == 0x0000010000000000 */
5991 m2 = -lsb; /* m2 == 0xffffff0000000000 */
5992 c = ~c; /* c == 0x00fff0ffffffffff */
5993 c &= -lsb; /* c == 0x00fff00000000000 */
5994 lsb = c & -c; /* lsb == 0x0000100000000000 */
5995 c = ~c; /* c == 0xff000fffffffffff */
5996 c &= -lsb; /* c == 0xff00000000000000 */
5997 shift = 0;
5998 while ((lsb >>= 1) != 0)
5999 shift++; /* shift == 44 on exit from loop */
6000 m1 = ~c; /* m1 == 0x00ffffffffffffff */
6001 m1 >>= shift; /* m1 == 0x0000000000000fff */
6002 m1 = ~m1; /* m1 == 0xfffffffffffff000 */
6005 /* Note that when we only have two 0->1 and 1->0 transitions, one of the
6006 masks will be all 1's. We are guaranteed more than one transition. */
6007 out[0] = GEN_INT (64 - shift);
6008 out[1] = GEN_INT (m1);
6009 out[2] = GEN_INT (shift);
6010 out[3] = GEN_INT (m2);
6013 /* Return TRUE if OP is an invalid SUBREG operation on the e500. */
6015 bool
6016 invalid_e500_subreg (rtx op, machine_mode mode)
6018 if (TARGET_E500_DOUBLE)
6020 /* Reject (subreg:SI (reg:DF)); likewise with subreg:DI or
6021 subreg:TI and reg:TF. Decimal float modes are like integer
6022 modes (only low part of each register used) for this
6023 purpose. */
6024 if (GET_CODE (op) == SUBREG
6025 && (mode == SImode || mode == DImode || mode == TImode
6026 || mode == DDmode || mode == TDmode || mode == PTImode)
6027 && REG_P (SUBREG_REG (op))
6028 && (GET_MODE (SUBREG_REG (op)) == DFmode
6029 || GET_MODE (SUBREG_REG (op)) == TFmode))
6030 return true;
6032 /* Reject (subreg:DF (reg:DI)); likewise with subreg:TF and
6033 reg:TI. */
6034 if (GET_CODE (op) == SUBREG
6035 && (mode == DFmode || mode == TFmode)
6036 && REG_P (SUBREG_REG (op))
6037 && (GET_MODE (SUBREG_REG (op)) == DImode
6038 || GET_MODE (SUBREG_REG (op)) == TImode
6039 || GET_MODE (SUBREG_REG (op)) == PTImode
6040 || GET_MODE (SUBREG_REG (op)) == DDmode
6041 || GET_MODE (SUBREG_REG (op)) == TDmode))
6042 return true;
6045 if (TARGET_SPE
6046 && GET_CODE (op) == SUBREG
6047 && mode == SImode
6048 && REG_P (SUBREG_REG (op))
6049 && SPE_VECTOR_MODE (GET_MODE (SUBREG_REG (op))))
6050 return true;
6052 return false;
6055 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
6056 selects whether the alignment is abi mandated, optional, or
6057 both abi and optional alignment. */
6059 unsigned int
6060 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
6062 if (how != align_opt)
6064 if (TREE_CODE (type) == VECTOR_TYPE)
6066 if ((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (type)))
6067 || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type))))
6069 if (align < 64)
6070 align = 64;
6072 else if (align < 128)
6073 align = 128;
6075 else if (TARGET_E500_DOUBLE
6076 && TREE_CODE (type) == REAL_TYPE
6077 && TYPE_MODE (type) == DFmode)
6079 if (align < 64)
6080 align = 64;
6084 if (how != align_abi)
6086 if (TREE_CODE (type) == ARRAY_TYPE
6087 && TYPE_MODE (TREE_TYPE (type)) == QImode)
6089 if (align < BITS_PER_WORD)
6090 align = BITS_PER_WORD;
6094 return align;
6097 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
6099 bool
6100 rs6000_special_adjust_field_align_p (tree field, unsigned int computed)
6102 if (TARGET_ALTIVEC && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6104 if (computed != 128)
6106 static bool warned;
6107 if (!warned && warn_psabi)
6109 warned = true;
6110 inform (input_location,
6111 "the layout of aggregates containing vectors with"
6112 " %d-byte alignment has changed in GCC 5",
6113 computed / BITS_PER_UNIT);
6116 /* In current GCC there is no special case. */
6117 return false;
6120 return false;
6123 /* AIX increases natural record alignment to doubleword if the first
6124 field is an FP double while the FP fields remain word aligned. */
6126 unsigned int
6127 rs6000_special_round_type_align (tree type, unsigned int computed,
6128 unsigned int specified)
6130 unsigned int align = MAX (computed, specified);
6131 tree field = TYPE_FIELDS (type);
6133 /* Skip all non field decls */
6134 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
6135 field = DECL_CHAIN (field);
6137 if (field != NULL && field != type)
6139 type = TREE_TYPE (field);
6140 while (TREE_CODE (type) == ARRAY_TYPE)
6141 type = TREE_TYPE (type);
6143 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
6144 align = MAX (align, 64);
6147 return align;
6150 /* Darwin increases record alignment to the natural alignment of
6151 the first field. */
6153 unsigned int
6154 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
6155 unsigned int specified)
6157 unsigned int align = MAX (computed, specified);
6159 if (TYPE_PACKED (type))
6160 return align;
6162 /* Find the first field, looking down into aggregates. */
6163 do {
6164 tree field = TYPE_FIELDS (type);
6165 /* Skip all non field decls */
6166 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
6167 field = DECL_CHAIN (field);
6168 if (! field)
6169 break;
6170 /* A packed field does not contribute any extra alignment. */
6171 if (DECL_PACKED (field))
6172 return align;
6173 type = TREE_TYPE (field);
6174 while (TREE_CODE (type) == ARRAY_TYPE)
6175 type = TREE_TYPE (type);
6176 } while (AGGREGATE_TYPE_P (type));
6178 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
6179 align = MAX (align, TYPE_ALIGN (type));
6181 return align;
6184 /* Return 1 for an operand in small memory on V.4/eabi. */
6187 small_data_operand (rtx op ATTRIBUTE_UNUSED,
6188 machine_mode mode ATTRIBUTE_UNUSED)
6190 #if TARGET_ELF
6191 rtx sym_ref;
6193 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
6194 return 0;
6196 if (DEFAULT_ABI != ABI_V4)
6197 return 0;
6199 /* Vector and float memory instructions have a limited offset on the
6200 SPE, so using a vector or float variable directly as an operand is
6201 not useful. */
6202 if (TARGET_SPE
6203 && (SPE_VECTOR_MODE (mode) || FLOAT_MODE_P (mode)))
6204 return 0;
6206 if (GET_CODE (op) == SYMBOL_REF)
6207 sym_ref = op;
6209 else if (GET_CODE (op) != CONST
6210 || GET_CODE (XEXP (op, 0)) != PLUS
6211 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF
6212 || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT)
6213 return 0;
6215 else
6217 rtx sum = XEXP (op, 0);
6218 HOST_WIDE_INT summand;
6220 /* We have to be careful here, because it is the referenced address
6221 that must be 32k from _SDA_BASE_, not just the symbol. */
6222 summand = INTVAL (XEXP (sum, 1));
6223 if (summand < 0 || summand > g_switch_value)
6224 return 0;
6226 sym_ref = XEXP (sum, 0);
6229 return SYMBOL_REF_SMALL_P (sym_ref);
6230 #else
6231 return 0;
6232 #endif
6235 /* Return true if either operand is a general purpose register. */
6237 bool
6238 gpr_or_gpr_p (rtx op0, rtx op1)
6240 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
6241 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
6244 /* Return true if this is a move direct operation between GPR registers and
6245 floating point/VSX registers. */
6247 bool
6248 direct_move_p (rtx op0, rtx op1)
6250 int regno0, regno1;
6252 if (!REG_P (op0) || !REG_P (op1))
6253 return false;
6255 if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
6256 return false;
6258 regno0 = REGNO (op0);
6259 regno1 = REGNO (op1);
6260 if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
6261 return false;
6263 if (INT_REGNO_P (regno0))
6264 return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
6266 else if (INT_REGNO_P (regno1))
6268 if (TARGET_MFPGPR && FP_REGNO_P (regno0))
6269 return true;
6271 else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
6272 return true;
6275 return false;
6278 /* Return true if this is a load or store quad operation. This function does
6279 not handle the atomic quad memory instructions. */
6281 bool
6282 quad_load_store_p (rtx op0, rtx op1)
6284 bool ret;
6286 if (!TARGET_QUAD_MEMORY)
6287 ret = false;
6289 else if (REG_P (op0) && MEM_P (op1))
6290 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
6291 && quad_memory_operand (op1, GET_MODE (op1))
6292 && !reg_overlap_mentioned_p (op0, op1));
6294 else if (MEM_P (op0) && REG_P (op1))
6295 ret = (quad_memory_operand (op0, GET_MODE (op0))
6296 && quad_int_reg_operand (op1, GET_MODE (op1)));
6298 else
6299 ret = false;
6301 if (TARGET_DEBUG_ADDR)
6303 fprintf (stderr, "\n========== quad_load_store, return %s\n",
6304 ret ? "true" : "false");
6305 debug_rtx (gen_rtx_SET (VOIDmode, op0, op1));
6308 return ret;
6311 /* Given an address, return a constant offset term if one exists. */
6313 static rtx
6314 address_offset (rtx op)
6316 if (GET_CODE (op) == PRE_INC
6317 || GET_CODE (op) == PRE_DEC)
6318 op = XEXP (op, 0);
6319 else if (GET_CODE (op) == PRE_MODIFY
6320 || GET_CODE (op) == LO_SUM)
6321 op = XEXP (op, 1);
6323 if (GET_CODE (op) == CONST)
6324 op = XEXP (op, 0);
6326 if (GET_CODE (op) == PLUS)
6327 op = XEXP (op, 1);
6329 if (CONST_INT_P (op))
6330 return op;
6332 return NULL_RTX;
6335 /* Return true if the MEM operand is a memory operand suitable for use
6336 with a (full width, possibly multiple) gpr load/store. On
6337 powerpc64 this means the offset must be divisible by 4.
6338 Implements 'Y' constraint.
6340 Accept direct, indexed, offset, lo_sum and tocref. Since this is
6341 a constraint function we know the operand has satisfied a suitable
6342 memory predicate. Also accept some odd rtl generated by reload
6343 (see rs6000_legitimize_reload_address for various forms). It is
6344 important that reload rtl be accepted by appropriate constraints
6345 but not by the operand predicate.
6347 Offsetting a lo_sum should not be allowed, except where we know by
6348 alignment that a 32k boundary is not crossed, but see the ???
6349 comment in rs6000_legitimize_reload_address. Note that by
6350 "offsetting" here we mean a further offset to access parts of the
6351 MEM. It's fine to have a lo_sum where the inner address is offset
6352 from a sym, since the same sym+offset will appear in the high part
6353 of the address calculation. */
6355 bool
6356 mem_operand_gpr (rtx op, machine_mode mode)
6358 unsigned HOST_WIDE_INT offset;
6359 int extra;
6360 rtx addr = XEXP (op, 0);
6362 op = address_offset (addr);
6363 if (op == NULL_RTX)
6364 return true;
6366 offset = INTVAL (op);
6367 if (TARGET_POWERPC64 && (offset & 3) != 0)
6368 return false;
6370 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
6371 if (extra < 0)
6372 extra = 0;
6374 if (GET_CODE (addr) == LO_SUM)
6375 /* For lo_sum addresses, we must allow any offset except one that
6376 causes a wrap, so test only the low 16 bits. */
6377 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
6379 return offset + 0x8000 < 0x10000u - extra;
6382 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
6384 static bool
6385 reg_offset_addressing_ok_p (machine_mode mode)
6387 switch (mode)
6389 case V16QImode:
6390 case V8HImode:
6391 case V4SFmode:
6392 case V4SImode:
6393 case V2DFmode:
6394 case V2DImode:
6395 case V1TImode:
6396 case TImode:
6397 /* AltiVec/VSX vector modes. Only reg+reg addressing is valid. While
6398 TImode is not a vector mode, if we want to use the VSX registers to
6399 move it around, we need to restrict ourselves to reg+reg
6400 addressing. */
6401 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
6402 return false;
6403 break;
6405 case V4HImode:
6406 case V2SImode:
6407 case V1DImode:
6408 case V2SFmode:
6409 /* Paired vector modes. Only reg+reg addressing is valid. */
6410 if (TARGET_PAIRED_FLOAT)
6411 return false;
6412 break;
6414 case SDmode:
6415 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
6416 addressing for the LFIWZX and STFIWX instructions. */
6417 if (TARGET_NO_SDMODE_STACK)
6418 return false;
6419 break;
6421 default:
6422 break;
6425 return true;
6428 static bool
6429 virtual_stack_registers_memory_p (rtx op)
6431 int regnum;
6433 if (GET_CODE (op) == REG)
6434 regnum = REGNO (op);
6436 else if (GET_CODE (op) == PLUS
6437 && GET_CODE (XEXP (op, 0)) == REG
6438 && GET_CODE (XEXP (op, 1)) == CONST_INT)
6439 regnum = REGNO (XEXP (op, 0));
6441 else
6442 return false;
6444 return (regnum >= FIRST_VIRTUAL_REGISTER
6445 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
6448 /* Return true if a MODE sized memory accesses to OP plus OFFSET
6449 is known to not straddle a 32k boundary. */
6451 static bool
6452 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
6453 machine_mode mode)
6455 tree decl, type;
6456 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
6458 if (GET_CODE (op) != SYMBOL_REF)
6459 return false;
6461 dsize = GET_MODE_SIZE (mode);
6462 decl = SYMBOL_REF_DECL (op);
6463 if (!decl)
6465 if (dsize == 0)
6466 return false;
6468 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
6469 replacing memory addresses with an anchor plus offset. We
6470 could find the decl by rummaging around in the block->objects
6471 VEC for the given offset but that seems like too much work. */
6472 dalign = BITS_PER_UNIT;
6473 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
6474 && SYMBOL_REF_ANCHOR_P (op)
6475 && SYMBOL_REF_BLOCK (op) != NULL)
6477 struct object_block *block = SYMBOL_REF_BLOCK (op);
6479 dalign = block->alignment;
6480 offset += SYMBOL_REF_BLOCK_OFFSET (op);
6482 else if (CONSTANT_POOL_ADDRESS_P (op))
6484 /* It would be nice to have get_pool_align().. */
6485 machine_mode cmode = get_pool_mode (op);
6487 dalign = GET_MODE_ALIGNMENT (cmode);
6490 else if (DECL_P (decl))
6492 dalign = DECL_ALIGN (decl);
6494 if (dsize == 0)
6496 /* Allow BLKmode when the entire object is known to not
6497 cross a 32k boundary. */
6498 if (!DECL_SIZE_UNIT (decl))
6499 return false;
6501 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
6502 return false;
6504 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
6505 if (dsize > 32768)
6506 return false;
6508 return dalign / BITS_PER_UNIT >= dsize;
6511 else
6513 type = TREE_TYPE (decl);
6515 dalign = TYPE_ALIGN (type);
6516 if (CONSTANT_CLASS_P (decl))
6517 dalign = CONSTANT_ALIGNMENT (decl, dalign);
6518 else
6519 dalign = DATA_ALIGNMENT (decl, dalign);
6521 if (dsize == 0)
6523 /* BLKmode, check the entire object. */
6524 if (TREE_CODE (decl) == STRING_CST)
6525 dsize = TREE_STRING_LENGTH (decl);
6526 else if (TYPE_SIZE_UNIT (type)
6527 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (type)))
6528 dsize = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6529 else
6530 return false;
6531 if (dsize > 32768)
6532 return false;
6534 return dalign / BITS_PER_UNIT >= dsize;
6538 /* Find how many bits of the alignment we know for this access. */
6539 mask = dalign / BITS_PER_UNIT - 1;
6540 lsb = offset & -offset;
6541 mask &= lsb - 1;
6542 dalign = mask + 1;
6544 return dalign >= dsize;
6547 static bool
6548 constant_pool_expr_p (rtx op)
6550 rtx base, offset;
6552 split_const (op, &base, &offset);
6553 return (GET_CODE (base) == SYMBOL_REF
6554 && CONSTANT_POOL_ADDRESS_P (base)
6555 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
6558 static const_rtx tocrel_base, tocrel_offset;
6560 /* Return true if OP is a toc pointer relative address (the output
6561 of create_TOC_reference). If STRICT, do not match high part or
6562 non-split -mcmodel=large/medium toc pointer relative addresses. */
6564 bool
6565 toc_relative_expr_p (const_rtx op, bool strict)
6567 if (!TARGET_TOC)
6568 return false;
6570 if (TARGET_CMODEL != CMODEL_SMALL)
6572 /* Only match the low part. */
6573 if (GET_CODE (op) == LO_SUM
6574 && REG_P (XEXP (op, 0))
6575 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict))
6576 op = XEXP (op, 1);
6577 else if (strict)
6578 return false;
6581 tocrel_base = op;
6582 tocrel_offset = const0_rtx;
6583 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
6585 tocrel_base = XEXP (op, 0);
6586 tocrel_offset = XEXP (op, 1);
6589 return (GET_CODE (tocrel_base) == UNSPEC
6590 && XINT (tocrel_base, 1) == UNSPEC_TOCREL);
6593 /* Return true if X is a constant pool address, and also for cmodel=medium
6594 if X is a toc-relative address known to be offsettable within MODE. */
6596 bool
6597 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
6598 bool strict)
6600 return (toc_relative_expr_p (x, strict)
6601 && (TARGET_CMODEL != CMODEL_MEDIUM
6602 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
6603 || mode == QImode
6604 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
6605 INTVAL (tocrel_offset), mode)));
6608 static bool
6609 legitimate_small_data_p (machine_mode mode, rtx x)
6611 return (DEFAULT_ABI == ABI_V4
6612 && !flag_pic && !TARGET_TOC
6613 && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
6614 && small_data_operand (x, mode));
6617 /* SPE offset addressing is limited to 5-bits worth of double words. */
6618 #define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0)
6620 bool
6621 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
6622 bool strict, bool worst_case)
6624 unsigned HOST_WIDE_INT offset;
6625 unsigned int extra;
6627 if (GET_CODE (x) != PLUS)
6628 return false;
6629 if (!REG_P (XEXP (x, 0)))
6630 return false;
6631 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
6632 return false;
6633 if (!reg_offset_addressing_ok_p (mode))
6634 return virtual_stack_registers_memory_p (x);
6635 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
6636 return true;
6637 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6638 return false;
6640 offset = INTVAL (XEXP (x, 1));
6641 extra = 0;
6642 switch (mode)
6644 case V4HImode:
6645 case V2SImode:
6646 case V1DImode:
6647 case V2SFmode:
6648 /* SPE vector modes. */
6649 return SPE_CONST_OFFSET_OK (offset);
6651 case DFmode:
6652 case DDmode:
6653 case DImode:
6654 /* On e500v2, we may have:
6656 (subreg:DF (mem:DI (plus (reg) (const_int))) 0).
6658 Which gets addressed with evldd instructions. */
6659 if (TARGET_E500_DOUBLE)
6660 return SPE_CONST_OFFSET_OK (offset);
6662 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
6663 addressing. */
6664 if (VECTOR_MEM_VSX_P (mode))
6665 return false;
6667 if (!worst_case)
6668 break;
6669 if (!TARGET_POWERPC64)
6670 extra = 4;
6671 else if (offset & 3)
6672 return false;
6673 break;
6675 case TFmode:
6676 if (TARGET_E500_DOUBLE)
6677 return (SPE_CONST_OFFSET_OK (offset)
6678 && SPE_CONST_OFFSET_OK (offset + 8));
6679 /* fall through */
6681 case TDmode:
6682 case TImode:
6683 case PTImode:
6684 extra = 8;
6685 if (!worst_case)
6686 break;
6687 if (!TARGET_POWERPC64)
6688 extra = 12;
6689 else if (offset & 3)
6690 return false;
6691 break;
6693 default:
6694 break;
6697 offset += 0x8000;
6698 return offset < 0x10000 - extra;
6701 bool
6702 legitimate_indexed_address_p (rtx x, int strict)
6704 rtx op0, op1;
6706 if (GET_CODE (x) != PLUS)
6707 return false;
6709 op0 = XEXP (x, 0);
6710 op1 = XEXP (x, 1);
6712 /* Recognize the rtl generated by reload which we know will later be
6713 replaced with proper base and index regs. */
6714 if (!strict
6715 && reload_in_progress
6716 && (REG_P (op0) || GET_CODE (op0) == PLUS)
6717 && REG_P (op1))
6718 return true;
6720 return (REG_P (op0) && REG_P (op1)
6721 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
6722 && INT_REG_OK_FOR_INDEX_P (op1, strict))
6723 || (INT_REG_OK_FOR_BASE_P (op1, strict)
6724 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
6727 bool
6728 avoiding_indexed_address_p (machine_mode mode)
6730 /* Avoid indexed addressing for modes that have non-indexed
6731 load/store instruction forms. */
6732 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
6735 bool
6736 legitimate_indirect_address_p (rtx x, int strict)
6738 return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict);
6741 bool
6742 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
6744 if (!TARGET_MACHO || !flag_pic
6745 || mode != SImode || GET_CODE (x) != MEM)
6746 return false;
6747 x = XEXP (x, 0);
6749 if (GET_CODE (x) != LO_SUM)
6750 return false;
6751 if (GET_CODE (XEXP (x, 0)) != REG)
6752 return false;
6753 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
6754 return false;
6755 x = XEXP (x, 1);
6757 return CONSTANT_P (x);
6760 static bool
6761 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
6763 if (GET_CODE (x) != LO_SUM)
6764 return false;
6765 if (GET_CODE (XEXP (x, 0)) != REG)
6766 return false;
6767 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
6768 return false;
6769 /* Restrict addressing for DI because of our SUBREG hackery. */
6770 if (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
6771 return false;
6772 x = XEXP (x, 1);
6774 if (TARGET_ELF || TARGET_MACHO)
6776 bool large_toc_ok;
6778 if (DEFAULT_ABI == ABI_V4 && flag_pic)
6779 return false;
6780 /* LRA don't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
6781 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
6782 recognizes some LO_SUM addresses as valid although this
6783 function says opposite. In most cases, LRA through different
6784 transformations can generate correct code for address reloads.
6785 It can not manage only some LO_SUM cases. So we need to add
6786 code analogous to one in rs6000_legitimize_reload_address for
6787 LOW_SUM here saying that some addresses are still valid. */
6788 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
6789 && small_toc_ref (x, VOIDmode));
6790 if (TARGET_TOC && ! large_toc_ok)
6791 return false;
6792 if (GET_MODE_NUNITS (mode) != 1)
6793 return false;
6794 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
6795 && !(/* ??? Assume floating point reg based on mode? */
6796 TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
6797 && (mode == DFmode || mode == DDmode)))
6798 return false;
6800 return CONSTANT_P (x) || large_toc_ok;
6803 return false;
6807 /* Try machine-dependent ways of modifying an illegitimate address
6808 to be legitimate. If we find one, return the new, valid address.
6809 This is used from only one place: `memory_address' in explow.c.
6811 OLDX is the address as it was before break_out_memory_refs was
6812 called. In some cases it is useful to look at this to decide what
6813 needs to be done.
6815 It is always safe for this function to do nothing. It exists to
6816 recognize opportunities to optimize the output.
6818 On RS/6000, first check for the sum of a register with a constant
6819 integer that is out of range. If so, generate code to add the
6820 constant with the low-order 16 bits masked to the register and force
6821 this result into another register (this can be done with `cau').
6822 Then generate an address of REG+(CONST&0xffff), allowing for the
6823 possibility of bit 16 being a one.
6825 Then check for the sum of a register and something not constant, try to
6826 load the other things into a register and return the sum. */
6828 static rtx
6829 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
6830 machine_mode mode)
6832 unsigned int extra;
6834 if (!reg_offset_addressing_ok_p (mode))
6836 if (virtual_stack_registers_memory_p (x))
6837 return x;
6839 /* In theory we should not be seeing addresses of the form reg+0,
6840 but just in case it is generated, optimize it away. */
6841 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
6842 return force_reg (Pmode, XEXP (x, 0));
6844 /* For TImode with load/store quad, restrict addresses to just a single
6845 pointer, so it works with both GPRs and VSX registers. */
6846 /* Make sure both operands are registers. */
6847 else if (GET_CODE (x) == PLUS
6848 && (mode != TImode || !TARGET_QUAD_MEMORY))
6849 return gen_rtx_PLUS (Pmode,
6850 force_reg (Pmode, XEXP (x, 0)),
6851 force_reg (Pmode, XEXP (x, 1)));
6852 else
6853 return force_reg (Pmode, x);
6855 if (GET_CODE (x) == SYMBOL_REF)
6857 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
6858 if (model != 0)
6859 return rs6000_legitimize_tls_address (x, model);
6862 extra = 0;
6863 switch (mode)
6865 case TFmode:
6866 case TDmode:
6867 case TImode:
6868 case PTImode:
6869 /* As in legitimate_offset_address_p we do not assume
6870 worst-case. The mode here is just a hint as to the registers
6871 used. A TImode is usually in gprs, but may actually be in
6872 fprs. Leave worst-case scenario for reload to handle via
6873 insn constraints. PTImode is only GPRs. */
6874 extra = 8;
6875 break;
6876 default:
6877 break;
6880 if (GET_CODE (x) == PLUS
6881 && GET_CODE (XEXP (x, 0)) == REG
6882 && GET_CODE (XEXP (x, 1)) == CONST_INT
6883 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
6884 >= 0x10000 - extra)
6885 && !(SPE_VECTOR_MODE (mode)
6886 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)))
6888 HOST_WIDE_INT high_int, low_int;
6889 rtx sum;
6890 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
6891 if (low_int >= 0x8000 - extra)
6892 low_int = 0;
6893 high_int = INTVAL (XEXP (x, 1)) - low_int;
6894 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
6895 GEN_INT (high_int)), 0);
6896 return plus_constant (Pmode, sum, low_int);
6898 else if (GET_CODE (x) == PLUS
6899 && GET_CODE (XEXP (x, 0)) == REG
6900 && GET_CODE (XEXP (x, 1)) != CONST_INT
6901 && GET_MODE_NUNITS (mode) == 1
6902 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
6903 || (/* ??? Assume floating point reg based on mode? */
6904 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
6905 && (mode == DFmode || mode == DDmode)))
6906 && !avoiding_indexed_address_p (mode))
6908 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
6909 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
6911 else if (SPE_VECTOR_MODE (mode)
6912 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD))
6914 if (mode == DImode)
6915 return x;
6916 /* We accept [reg + reg] and [reg + OFFSET]. */
6918 if (GET_CODE (x) == PLUS)
6920 rtx op1 = XEXP (x, 0);
6921 rtx op2 = XEXP (x, 1);
6922 rtx y;
6924 op1 = force_reg (Pmode, op1);
6926 if (GET_CODE (op2) != REG
6927 && (GET_CODE (op2) != CONST_INT
6928 || !SPE_CONST_OFFSET_OK (INTVAL (op2))
6929 || (GET_MODE_SIZE (mode) > 8
6930 && !SPE_CONST_OFFSET_OK (INTVAL (op2) + 8))))
6931 op2 = force_reg (Pmode, op2);
6933 /* We can't always do [reg + reg] for these, because [reg +
6934 reg + offset] is not a legitimate addressing mode. */
6935 y = gen_rtx_PLUS (Pmode, op1, op2);
6937 if ((GET_MODE_SIZE (mode) > 8 || mode == DDmode) && REG_P (op2))
6938 return force_reg (Pmode, y);
6939 else
6940 return y;
6943 return force_reg (Pmode, x);
6945 else if ((TARGET_ELF
6946 #if TARGET_MACHO
6947 || !MACHO_DYNAMIC_NO_PIC_P
6948 #endif
6950 && TARGET_32BIT
6951 && TARGET_NO_TOC
6952 && ! flag_pic
6953 && GET_CODE (x) != CONST_INT
6954 && GET_CODE (x) != CONST_WIDE_INT
6955 && GET_CODE (x) != CONST_DOUBLE
6956 && CONSTANT_P (x)
6957 && GET_MODE_NUNITS (mode) == 1
6958 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
6959 || (/* ??? Assume floating point reg based on mode? */
6960 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
6961 && (mode == DFmode || mode == DDmode))))
6963 rtx reg = gen_reg_rtx (Pmode);
6964 if (TARGET_ELF)
6965 emit_insn (gen_elf_high (reg, x));
6966 else
6967 emit_insn (gen_macho_high (reg, x));
6968 return gen_rtx_LO_SUM (Pmode, reg, x);
6970 else if (TARGET_TOC
6971 && GET_CODE (x) == SYMBOL_REF
6972 && constant_pool_expr_p (x)
6973 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
6974 return create_TOC_reference (x, NULL_RTX);
6975 else
6976 return x;
6979 /* Debug version of rs6000_legitimize_address. */
6980 static rtx
6981 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
6983 rtx ret;
6984 rtx_insn *insns;
6986 start_sequence ();
6987 ret = rs6000_legitimize_address (x, oldx, mode);
6988 insns = get_insns ();
6989 end_sequence ();
6991 if (ret != x)
6993 fprintf (stderr,
6994 "\nrs6000_legitimize_address: mode %s, old code %s, "
6995 "new code %s, modified\n",
6996 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
6997 GET_RTX_NAME (GET_CODE (ret)));
6999 fprintf (stderr, "Original address:\n");
7000 debug_rtx (x);
7002 fprintf (stderr, "oldx:\n");
7003 debug_rtx (oldx);
7005 fprintf (stderr, "New address:\n");
7006 debug_rtx (ret);
7008 if (insns)
7010 fprintf (stderr, "Insns added:\n");
7011 debug_rtx_list (insns, 20);
7014 else
7016 fprintf (stderr,
7017 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
7018 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
7020 debug_rtx (x);
7023 if (insns)
7024 emit_insn (insns);
7026 return ret;
7029 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7030 We need to emit DTP-relative relocations. */
7032 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
7033 static void
7034 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
7036 switch (size)
7038 case 4:
7039 fputs ("\t.long\t", file);
7040 break;
7041 case 8:
7042 fputs (DOUBLE_INT_ASM_OP, file);
7043 break;
7044 default:
7045 gcc_unreachable ();
7047 output_addr_const (file, x);
7048 fputs ("@dtprel+0x8000", file);
7051 /* Return true if X is a symbol that refers to real (rather than emulated)
7052 TLS. */
7054 static bool
7055 rs6000_real_tls_symbol_ref_p (rtx x)
7057 return (GET_CODE (x) == SYMBOL_REF
7058 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
7061 /* In the name of slightly smaller debug output, and to cater to
7062 general assembler lossage, recognize various UNSPEC sequences
7063 and turn them back into a direct symbol reference. */
7065 static rtx
7066 rs6000_delegitimize_address (rtx orig_x)
7068 rtx x, y, offset;
7070 orig_x = delegitimize_mem_from_attrs (orig_x);
7071 x = orig_x;
7072 if (MEM_P (x))
7073 x = XEXP (x, 0);
7075 y = x;
7076 if (TARGET_CMODEL != CMODEL_SMALL
7077 && GET_CODE (y) == LO_SUM)
7078 y = XEXP (y, 1);
7080 offset = NULL_RTX;
7081 if (GET_CODE (y) == PLUS
7082 && GET_MODE (y) == Pmode
7083 && CONST_INT_P (XEXP (y, 1)))
7085 offset = XEXP (y, 1);
7086 y = XEXP (y, 0);
7089 if (GET_CODE (y) == UNSPEC
7090 && XINT (y, 1) == UNSPEC_TOCREL)
7092 y = XVECEXP (y, 0, 0);
7094 #ifdef HAVE_AS_TLS
7095 /* Do not associate thread-local symbols with the original
7096 constant pool symbol. */
7097 if (TARGET_XCOFF
7098 && GET_CODE (y) == SYMBOL_REF
7099 && CONSTANT_POOL_ADDRESS_P (y)
7100 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
7101 return orig_x;
7102 #endif
7104 if (offset != NULL_RTX)
7105 y = gen_rtx_PLUS (Pmode, y, offset);
7106 if (!MEM_P (orig_x))
7107 return y;
7108 else
7109 return replace_equiv_address_nv (orig_x, y);
7112 if (TARGET_MACHO
7113 && GET_CODE (orig_x) == LO_SUM
7114 && GET_CODE (XEXP (orig_x, 1)) == CONST)
7116 y = XEXP (XEXP (orig_x, 1), 0);
7117 if (GET_CODE (y) == UNSPEC
7118 && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
7119 return XVECEXP (y, 0, 0);
7122 return orig_x;
7125 /* Return true if X shouldn't be emitted into the debug info.
7126 The linker doesn't like .toc section references from
7127 .debug_* sections, so reject .toc section symbols. */
7129 static bool
7130 rs6000_const_not_ok_for_debug_p (rtx x)
7132 if (GET_CODE (x) == SYMBOL_REF
7133 && CONSTANT_POOL_ADDRESS_P (x))
7135 rtx c = get_pool_constant (x);
7136 machine_mode cmode = get_pool_mode (x);
7137 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
7138 return true;
7141 return false;
7144 /* Construct the SYMBOL_REF for the tls_get_addr function. */
7146 static GTY(()) rtx rs6000_tls_symbol;
7147 static rtx
7148 rs6000_tls_get_addr (void)
7150 if (!rs6000_tls_symbol)
7151 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
7153 return rs6000_tls_symbol;
7156 /* Construct the SYMBOL_REF for TLS GOT references. */
7158 static GTY(()) rtx rs6000_got_symbol;
7159 static rtx
7160 rs6000_got_sym (void)
7162 if (!rs6000_got_symbol)
7164 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
7165 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
7166 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
7169 return rs6000_got_symbol;
7172 /* AIX Thread-Local Address support. */
7174 static rtx
7175 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
7177 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
7178 const char *name;
7179 char *tlsname;
7181 name = XSTR (addr, 0);
7182 /* Append TLS CSECT qualifier, unless the symbol already is qualified
7183 or the symbol will be in TLS private data section. */
7184 if (name[strlen (name) - 1] != ']'
7185 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
7186 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
7188 tlsname = XALLOCAVEC (char, strlen (name) + 4);
7189 strcpy (tlsname, name);
7190 strcat (tlsname,
7191 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
7192 tlsaddr = copy_rtx (addr);
7193 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
7195 else
7196 tlsaddr = addr;
7198 /* Place addr into TOC constant pool. */
7199 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
7201 /* Output the TOC entry and create the MEM referencing the value. */
7202 if (constant_pool_expr_p (XEXP (sym, 0))
7203 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
7205 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
7206 mem = gen_const_mem (Pmode, tocref);
7207 set_mem_alias_set (mem, get_TOC_alias_set ());
7209 else
7210 return sym;
7212 /* Use global-dynamic for local-dynamic. */
7213 if (model == TLS_MODEL_GLOBAL_DYNAMIC
7214 || model == TLS_MODEL_LOCAL_DYNAMIC)
7216 /* Create new TOC reference for @m symbol. */
7217 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
7218 tlsname = XALLOCAVEC (char, strlen (name) + 1);
7219 strcpy (tlsname, "*LCM");
7220 strcat (tlsname, name + 3);
7221 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
7222 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
7223 tocref = create_TOC_reference (modaddr, NULL_RTX);
7224 rtx modmem = gen_const_mem (Pmode, tocref);
7225 set_mem_alias_set (modmem, get_TOC_alias_set ());
7227 rtx modreg = gen_reg_rtx (Pmode);
7228 emit_insn (gen_rtx_SET (VOIDmode, modreg, modmem));
7230 tmpreg = gen_reg_rtx (Pmode);
7231 emit_insn (gen_rtx_SET (VOIDmode, tmpreg, mem));
7233 dest = gen_reg_rtx (Pmode);
7234 if (TARGET_32BIT)
7235 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
7236 else
7237 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
7238 return dest;
7240 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
7241 else if (TARGET_32BIT)
7243 tlsreg = gen_reg_rtx (SImode);
7244 emit_insn (gen_tls_get_tpointer (tlsreg));
7246 else
7247 tlsreg = gen_rtx_REG (DImode, 13);
7249 /* Load the TOC value into temporary register. */
7250 tmpreg = gen_reg_rtx (Pmode);
7251 emit_insn (gen_rtx_SET (VOIDmode, tmpreg, mem));
7252 set_unique_reg_note (get_last_insn (), REG_EQUAL,
7253 gen_rtx_MINUS (Pmode, addr, tlsreg));
7255 /* Add TOC symbol value to TLS pointer. */
7256 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
7258 return dest;
7261 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
7262 this (thread-local) address. */
7264 static rtx
7265 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
7267 rtx dest, insn;
7269 if (TARGET_XCOFF)
7270 return rs6000_legitimize_tls_address_aix (addr, model);
7272 dest = gen_reg_rtx (Pmode);
7273 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
7275 rtx tlsreg;
7277 if (TARGET_64BIT)
7279 tlsreg = gen_rtx_REG (Pmode, 13);
7280 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
7282 else
7284 tlsreg = gen_rtx_REG (Pmode, 2);
7285 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
7287 emit_insn (insn);
7289 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
7291 rtx tlsreg, tmp;
7293 tmp = gen_reg_rtx (Pmode);
7294 if (TARGET_64BIT)
7296 tlsreg = gen_rtx_REG (Pmode, 13);
7297 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
7299 else
7301 tlsreg = gen_rtx_REG (Pmode, 2);
7302 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
7304 emit_insn (insn);
7305 if (TARGET_64BIT)
7306 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
7307 else
7308 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
7309 emit_insn (insn);
7311 else
7313 rtx r3, got, tga, tmp1, tmp2, call_insn;
7315 /* We currently use relocations like @got@tlsgd for tls, which
7316 means the linker will handle allocation of tls entries, placing
7317 them in the .got section. So use a pointer to the .got section,
7318 not one to secondary TOC sections used by 64-bit -mminimal-toc,
7319 or to secondary GOT sections used by 32-bit -fPIC. */
7320 if (TARGET_64BIT)
7321 got = gen_rtx_REG (Pmode, 2);
7322 else
7324 if (flag_pic == 1)
7325 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
7326 else
7328 rtx gsym = rs6000_got_sym ();
7329 got = gen_reg_rtx (Pmode);
7330 if (flag_pic == 0)
7331 rs6000_emit_move (got, gsym, Pmode);
7332 else
7334 rtx mem, lab, last;
7336 tmp1 = gen_reg_rtx (Pmode);
7337 tmp2 = gen_reg_rtx (Pmode);
7338 mem = gen_const_mem (Pmode, tmp1);
7339 lab = gen_label_rtx ();
7340 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
7341 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
7342 if (TARGET_LINK_STACK)
7343 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
7344 emit_move_insn (tmp2, mem);
7345 last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
7346 set_unique_reg_note (last, REG_EQUAL, gsym);
7351 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
7353 tga = rs6000_tls_get_addr ();
7354 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
7355 1, const0_rtx, Pmode);
7357 r3 = gen_rtx_REG (Pmode, 3);
7358 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
7360 if (TARGET_64BIT)
7361 insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
7362 else
7363 insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
7365 else if (DEFAULT_ABI == ABI_V4)
7366 insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
7367 else
7368 gcc_unreachable ();
7369 call_insn = last_call_insn ();
7370 PATTERN (call_insn) = insn;
7371 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
7372 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
7373 pic_offset_table_rtx);
7375 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
7377 tga = rs6000_tls_get_addr ();
7378 tmp1 = gen_reg_rtx (Pmode);
7379 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
7380 1, const0_rtx, Pmode);
7382 r3 = gen_rtx_REG (Pmode, 3);
7383 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
7385 if (TARGET_64BIT)
7386 insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
7387 else
7388 insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
7390 else if (DEFAULT_ABI == ABI_V4)
7391 insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
7392 else
7393 gcc_unreachable ();
7394 call_insn = last_call_insn ();
7395 PATTERN (call_insn) = insn;
7396 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
7397 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
7398 pic_offset_table_rtx);
7400 if (rs6000_tls_size == 16)
7402 if (TARGET_64BIT)
7403 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
7404 else
7405 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
7407 else if (rs6000_tls_size == 32)
7409 tmp2 = gen_reg_rtx (Pmode);
7410 if (TARGET_64BIT)
7411 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
7412 else
7413 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
7414 emit_insn (insn);
7415 if (TARGET_64BIT)
7416 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
7417 else
7418 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
7420 else
7422 tmp2 = gen_reg_rtx (Pmode);
7423 if (TARGET_64BIT)
7424 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
7425 else
7426 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
7427 emit_insn (insn);
7428 insn = gen_rtx_SET (Pmode, dest,
7429 gen_rtx_PLUS (Pmode, tmp2, tmp1));
7431 emit_insn (insn);
7433 else
7435 /* IE, or 64-bit offset LE. */
7436 tmp2 = gen_reg_rtx (Pmode);
7437 if (TARGET_64BIT)
7438 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
7439 else
7440 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
7441 emit_insn (insn);
7442 if (TARGET_64BIT)
7443 insn = gen_tls_tls_64 (dest, tmp2, addr);
7444 else
7445 insn = gen_tls_tls_32 (dest, tmp2, addr);
7446 emit_insn (insn);
7450 return dest;
7453 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7455 static bool
7456 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7458 if (GET_CODE (x) == HIGH
7459 && GET_CODE (XEXP (x, 0)) == UNSPEC)
7460 return true;
7462 /* A TLS symbol in the TOC cannot contain a sum. */
7463 if (GET_CODE (x) == CONST
7464 && GET_CODE (XEXP (x, 0)) == PLUS
7465 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7466 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
7467 return true;
7469 /* Do not place an ELF TLS symbol in the constant pool. */
7470 return TARGET_ELF && tls_referenced_p (x);
7473 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
7474 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
7475 can be addressed relative to the toc pointer. */
7477 static bool
7478 use_toc_relative_ref (rtx sym)
7480 return ((constant_pool_expr_p (sym)
7481 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
7482 get_pool_mode (sym)))
7483 || (TARGET_CMODEL == CMODEL_MEDIUM
7484 && SYMBOL_REF_LOCAL_P (sym)));
7487 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
7488 replace the input X, or the original X if no replacement is called for.
7489 The output parameter *WIN is 1 if the calling macro should goto WIN,
7490 0 if it should not.
7492 For RS/6000, we wish to handle large displacements off a base
7493 register by splitting the addend across an addiu/addis and the mem insn.
7494 This cuts number of extra insns needed from 3 to 1.
7496 On Darwin, we use this to generate code for floating point constants.
7497 A movsf_low is generated so we wind up with 2 instructions rather than 3.
7498 The Darwin code is inside #if TARGET_MACHO because only then are the
7499 machopic_* functions defined. */
7500 static rtx
7501 rs6000_legitimize_reload_address (rtx x, machine_mode mode,
7502 int opnum, int type,
7503 int ind_levels ATTRIBUTE_UNUSED, int *win)
7505 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
7507 /* Nasty hack for vsx_splat_V2DF/V2DI load from mem, which takes a
7508 DFmode/DImode MEM. */
7509 if (reg_offset_p
7510 && opnum == 1
7511 && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
7512 || (mode == DImode && recog_data.operand_mode[0] == V2DImode)))
7513 reg_offset_p = false;
7515 /* We must recognize output that we have already generated ourselves. */
7516 if (GET_CODE (x) == PLUS
7517 && GET_CODE (XEXP (x, 0)) == PLUS
7518 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
7519 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7520 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7522 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7523 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
7524 opnum, (enum reload_type) type);
7525 *win = 1;
7526 return x;
7529 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
7530 if (GET_CODE (x) == LO_SUM
7531 && GET_CODE (XEXP (x, 0)) == HIGH)
7533 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7534 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7535 opnum, (enum reload_type) type);
7536 *win = 1;
7537 return x;
7540 #if TARGET_MACHO
7541 if (DEFAULT_ABI == ABI_DARWIN && flag_pic
7542 && GET_CODE (x) == LO_SUM
7543 && GET_CODE (XEXP (x, 0)) == PLUS
7544 && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx
7545 && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH
7546 && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1)
7547 && machopic_operand_p (XEXP (x, 1)))
7549 /* Result of previous invocation of this function on Darwin
7550 floating point constant. */
7551 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7552 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7553 opnum, (enum reload_type) type);
7554 *win = 1;
7555 return x;
7557 #endif
7559 if (TARGET_CMODEL != CMODEL_SMALL
7560 && reg_offset_p
7561 && small_toc_ref (x, VOIDmode))
7563 rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x));
7564 x = gen_rtx_LO_SUM (Pmode, hi, x);
7565 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7566 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7567 opnum, (enum reload_type) type);
7568 *win = 1;
7569 return x;
7572 if (GET_CODE (x) == PLUS
7573 && GET_CODE (XEXP (x, 0)) == REG
7574 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
7575 && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
7576 && GET_CODE (XEXP (x, 1)) == CONST_INT
7577 && reg_offset_p
7578 && !SPE_VECTOR_MODE (mode)
7579 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
7580 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
7582 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
7583 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
7584 HOST_WIDE_INT high
7585 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
7587 /* Check for 32-bit overflow. */
7588 if (high + low != val)
7590 *win = 0;
7591 return x;
7594 /* Reload the high part into a base reg; leave the low part
7595 in the mem directly. */
7597 x = gen_rtx_PLUS (GET_MODE (x),
7598 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
7599 GEN_INT (high)),
7600 GEN_INT (low));
7602 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7603 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
7604 opnum, (enum reload_type) type);
7605 *win = 1;
7606 return x;
7609 if (GET_CODE (x) == SYMBOL_REF
7610 && reg_offset_p
7611 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
7612 && !SPE_VECTOR_MODE (mode)
7613 #if TARGET_MACHO
7614 && DEFAULT_ABI == ABI_DARWIN
7615 && (flag_pic || MACHO_DYNAMIC_NO_PIC_P)
7616 && machopic_symbol_defined_p (x)
7617 #else
7618 && DEFAULT_ABI == ABI_V4
7619 && !flag_pic
7620 #endif
7621 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
7622 The same goes for DImode without 64-bit gprs and DFmode and DDmode
7623 without fprs.
7624 ??? Assume floating point reg based on mode? This assumption is
7625 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
7626 where reload ends up doing a DFmode load of a constant from
7627 mem using two gprs. Unfortunately, at this point reload
7628 hasn't yet selected regs so poking around in reload data
7629 won't help and even if we could figure out the regs reliably,
7630 we'd still want to allow this transformation when the mem is
7631 naturally aligned. Since we say the address is good here, we
7632 can't disable offsets from LO_SUMs in mem_operand_gpr.
7633 FIXME: Allow offset from lo_sum for other modes too, when
7634 mem is sufficiently aligned.
7636 Also disallow this if the type can go in VMX/Altivec registers, since
7637 those registers do not have d-form (reg+offset) address modes. */
7638 && !reg_addr[mode].scalar_in_vmx_p
7639 && mode != TFmode
7640 && mode != TDmode
7641 && (mode != TImode || !TARGET_VSX_TIMODE)
7642 && mode != PTImode
7643 && (mode != DImode || TARGET_POWERPC64)
7644 && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
7645 || (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)))
7647 #if TARGET_MACHO
7648 if (flag_pic)
7650 rtx offset = machopic_gen_offset (x);
7651 x = gen_rtx_LO_SUM (GET_MODE (x),
7652 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
7653 gen_rtx_HIGH (Pmode, offset)), offset);
7655 else
7656 #endif
7657 x = gen_rtx_LO_SUM (GET_MODE (x),
7658 gen_rtx_HIGH (Pmode, x), x);
7660 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7661 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7662 opnum, (enum reload_type) type);
7663 *win = 1;
7664 return x;
7667 /* Reload an offset address wrapped by an AND that represents the
7668 masking of the lower bits. Strip the outer AND and let reload
7669 convert the offset address into an indirect address. For VSX,
7670 force reload to create the address with an AND in a separate
7671 register, because we can't guarantee an altivec register will
7672 be used. */
7673 if (VECTOR_MEM_ALTIVEC_P (mode)
7674 && GET_CODE (x) == AND
7675 && GET_CODE (XEXP (x, 0)) == PLUS
7676 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
7677 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7678 && GET_CODE (XEXP (x, 1)) == CONST_INT
7679 && INTVAL (XEXP (x, 1)) == -16)
7681 x = XEXP (x, 0);
7682 *win = 1;
7683 return x;
7686 if (TARGET_TOC
7687 && reg_offset_p
7688 && GET_CODE (x) == SYMBOL_REF
7689 && use_toc_relative_ref (x))
7691 x = create_TOC_reference (x, NULL_RTX);
7692 if (TARGET_CMODEL != CMODEL_SMALL)
7693 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7694 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7695 opnum, (enum reload_type) type);
7696 *win = 1;
7697 return x;
7699 *win = 0;
7700 return x;
7703 /* Debug version of rs6000_legitimize_reload_address. */
7704 static rtx
7705 rs6000_debug_legitimize_reload_address (rtx x, machine_mode mode,
7706 int opnum, int type,
7707 int ind_levels, int *win)
7709 rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
7710 ind_levels, win);
7711 fprintf (stderr,
7712 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
7713 "type = %d, ind_levels = %d, win = %d, original addr:\n",
7714 GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
7715 debug_rtx (x);
7717 if (x == ret)
7718 fprintf (stderr, "Same address returned\n");
7719 else if (!ret)
7720 fprintf (stderr, "NULL returned\n");
7721 else
7723 fprintf (stderr, "New address:\n");
7724 debug_rtx (ret);
7727 return ret;
7730 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
7731 that is a valid memory address for an instruction.
7732 The MODE argument is the machine mode for the MEM expression
7733 that wants to use this address.
7735 On the RS/6000, there are four valid address: a SYMBOL_REF that
7736 refers to a constant pool entry of an address (or the sum of it
7737 plus a constant), a short (16-bit signed) constant plus a register,
7738 the sum of two registers, or a register indirect, possibly with an
7739 auto-increment. For DFmode, DDmode and DImode with a constant plus
7740 register, we must ensure that both words are addressable or PowerPC64
7741 with offset word aligned.
7743 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
7744 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
7745 because adjacent memory cells are accessed by adding word-sized offsets
7746 during assembly output. */
7747 static bool
7748 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
7750 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
7752 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
7753 if (VECTOR_MEM_ALTIVEC_P (mode)
7754 && GET_CODE (x) == AND
7755 && GET_CODE (XEXP (x, 1)) == CONST_INT
7756 && INTVAL (XEXP (x, 1)) == -16)
7757 x = XEXP (x, 0);
7759 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
7760 return 0;
7761 if (legitimate_indirect_address_p (x, reg_ok_strict))
7762 return 1;
7763 if (TARGET_UPDATE
7764 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
7765 && mode_supports_pre_incdec_p (mode)
7766 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
7767 return 1;
7768 if (virtual_stack_registers_memory_p (x))
7769 return 1;
7770 if (reg_offset_p && legitimate_small_data_p (mode, x))
7771 return 1;
7772 if (reg_offset_p
7773 && legitimate_constant_pool_address_p (x, mode,
7774 reg_ok_strict || lra_in_progress))
7775 return 1;
7776 /* For TImode, if we have load/store quad and TImode in VSX registers, only
7777 allow register indirect addresses. This will allow the values to go in
7778 either GPRs or VSX registers without reloading. The vector types would
7779 tend to go into VSX registers, so we allow REG+REG, while TImode seems
7780 somewhat split, in that some uses are GPR based, and some VSX based. */
7781 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
7782 return 0;
7783 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
7784 if (! reg_ok_strict
7785 && reg_offset_p
7786 && GET_CODE (x) == PLUS
7787 && GET_CODE (XEXP (x, 0)) == REG
7788 && (XEXP (x, 0) == virtual_stack_vars_rtx
7789 || XEXP (x, 0) == arg_pointer_rtx)
7790 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7791 return 1;
7792 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
7793 return 1;
7794 if (mode != TFmode
7795 && mode != TDmode
7796 && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
7797 || TARGET_POWERPC64
7798 || (mode != DFmode && mode != DDmode)
7799 || (TARGET_E500_DOUBLE && mode != DDmode))
7800 && (TARGET_POWERPC64 || mode != DImode)
7801 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
7802 && mode != PTImode
7803 && !avoiding_indexed_address_p (mode)
7804 && legitimate_indexed_address_p (x, reg_ok_strict))
7805 return 1;
7806 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
7807 && mode_supports_pre_modify_p (mode)
7808 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
7809 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
7810 reg_ok_strict, false)
7811 || (!avoiding_indexed_address_p (mode)
7812 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
7813 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7814 return 1;
7815 if (reg_offset_p && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
7816 return 1;
7817 return 0;
7820 /* Debug version of rs6000_legitimate_address_p. */
7821 static bool
7822 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
7823 bool reg_ok_strict)
7825 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
7826 fprintf (stderr,
7827 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
7828 "strict = %d, reload = %s, code = %s\n",
7829 ret ? "true" : "false",
7830 GET_MODE_NAME (mode),
7831 reg_ok_strict,
7832 (reload_completed
7833 ? "after"
7834 : (reload_in_progress ? "progress" : "before")),
7835 GET_RTX_NAME (GET_CODE (x)));
7836 debug_rtx (x);
7838 return ret;
7841 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
7843 static bool
7844 rs6000_mode_dependent_address_p (const_rtx addr,
7845 addr_space_t as ATTRIBUTE_UNUSED)
7847 return rs6000_mode_dependent_address_ptr (addr);
7850 /* Go to LABEL if ADDR (a legitimate address expression)
7851 has an effect that depends on the machine mode it is used for.
7853 On the RS/6000 this is true of all integral offsets (since AltiVec
7854 and VSX modes don't allow them) or is a pre-increment or decrement.
7856 ??? Except that due to conceptual problems in offsettable_address_p
7857 we can't really report the problems of integral offsets. So leave
7858 this assuming that the adjustable offset must be valid for the
7859 sub-words of a TFmode operand, which is what we had before. */
7861 static bool
7862 rs6000_mode_dependent_address (const_rtx addr)
7864 switch (GET_CODE (addr))
7866 case PLUS:
7867 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
7868 is considered a legitimate address before reload, so there
7869 are no offset restrictions in that case. Note that this
7870 condition is safe in strict mode because any address involving
7871 virtual_stack_vars_rtx or arg_pointer_rtx would already have
7872 been rejected as illegitimate. */
7873 if (XEXP (addr, 0) != virtual_stack_vars_rtx
7874 && XEXP (addr, 0) != arg_pointer_rtx
7875 && GET_CODE (XEXP (addr, 1)) == CONST_INT)
7877 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
7878 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
7880 break;
7882 case LO_SUM:
7883 /* Anything in the constant pool is sufficiently aligned that
7884 all bytes have the same high part address. */
7885 return !legitimate_constant_pool_address_p (addr, QImode, false);
7887 /* Auto-increment cases are now treated generically in recog.c. */
7888 case PRE_MODIFY:
7889 return TARGET_UPDATE;
7891 /* AND is only allowed in Altivec loads. */
7892 case AND:
7893 return true;
7895 default:
7896 break;
7899 return false;
7902 /* Debug version of rs6000_mode_dependent_address. */
7903 static bool
7904 rs6000_debug_mode_dependent_address (const_rtx addr)
7906 bool ret = rs6000_mode_dependent_address (addr);
7908 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
7909 ret ? "true" : "false");
7910 debug_rtx (addr);
7912 return ret;
7915 /* Implement FIND_BASE_TERM. */
7918 rs6000_find_base_term (rtx op)
7920 rtx base;
7922 base = op;
7923 if (GET_CODE (base) == CONST)
7924 base = XEXP (base, 0);
7925 if (GET_CODE (base) == PLUS)
7926 base = XEXP (base, 0);
7927 if (GET_CODE (base) == UNSPEC)
7928 switch (XINT (base, 1))
7930 case UNSPEC_TOCREL:
7931 case UNSPEC_MACHOPIC_OFFSET:
7932 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
7933 for aliasing purposes. */
7934 return XVECEXP (base, 0, 0);
7937 return op;
7940 /* More elaborate version of recog's offsettable_memref_p predicate
7941 that works around the ??? note of rs6000_mode_dependent_address.
7942 In particular it accepts
7944 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
7946 in 32-bit mode, that the recog predicate rejects. */
7948 static bool
7949 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode)
7951 bool worst_case;
7953 if (!MEM_P (op))
7954 return false;
7956 /* First mimic offsettable_memref_p. */
7957 if (offsettable_address_p (true, GET_MODE (op), XEXP (op, 0)))
7958 return true;
7960 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
7961 the latter predicate knows nothing about the mode of the memory
7962 reference and, therefore, assumes that it is the largest supported
7963 mode (TFmode). As a consequence, legitimate offsettable memory
7964 references are rejected. rs6000_legitimate_offset_address_p contains
7965 the correct logic for the PLUS case of rs6000_mode_dependent_address,
7966 at least with a little bit of help here given that we know the
7967 actual registers used. */
7968 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
7969 || GET_MODE_SIZE (reg_mode) == 4);
7970 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
7971 true, worst_case);
7974 /* Change register usage conditional on target flags. */
7975 static void
7976 rs6000_conditional_register_usage (void)
7978 int i;
7980 if (TARGET_DEBUG_TARGET)
7981 fprintf (stderr, "rs6000_conditional_register_usage called\n");
7983 /* Set MQ register fixed (already call_used) so that it will not be
7984 allocated. */
7985 fixed_regs[64] = 1;
7987 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
7988 if (TARGET_64BIT)
7989 fixed_regs[13] = call_used_regs[13]
7990 = call_really_used_regs[13] = 1;
7992 /* Conditionally disable FPRs. */
7993 if (TARGET_SOFT_FLOAT || !TARGET_FPRS)
7994 for (i = 32; i < 64; i++)
7995 fixed_regs[i] = call_used_regs[i]
7996 = call_really_used_regs[i] = 1;
7998 /* The TOC register is not killed across calls in a way that is
7999 visible to the compiler. */
8000 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
8001 call_really_used_regs[2] = 0;
8003 if (DEFAULT_ABI == ABI_V4
8004 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM
8005 && flag_pic == 2)
8006 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8008 if (DEFAULT_ABI == ABI_V4
8009 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM
8010 && flag_pic == 1)
8011 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8012 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8013 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8015 if (DEFAULT_ABI == ABI_DARWIN
8016 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
8017 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8018 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8019 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8021 if (TARGET_TOC && TARGET_MINIMAL_TOC)
8022 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8023 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8025 if (TARGET_SPE)
8027 global_regs[SPEFSCR_REGNO] = 1;
8028 /* We used to use r14 as FIXED_SCRATCH to address SPE 64-bit
8029 registers in prologues and epilogues. We no longer use r14
8030 for FIXED_SCRATCH, but we're keeping r14 out of the allocation
8031 pool for link-compatibility with older versions of GCC. Once
8032 "old" code has died out, we can return r14 to the allocation
8033 pool. */
8034 fixed_regs[14]
8035 = call_used_regs[14]
8036 = call_really_used_regs[14] = 1;
8039 if (!TARGET_ALTIVEC && !TARGET_VSX)
8041 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
8042 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
8043 call_really_used_regs[VRSAVE_REGNO] = 1;
8046 if (TARGET_ALTIVEC || TARGET_VSX)
8047 global_regs[VSCR_REGNO] = 1;
8049 if (TARGET_ALTIVEC_ABI)
8051 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
8052 call_used_regs[i] = call_really_used_regs[i] = 1;
8054 /* AIX reserves VR20:31 in non-extended ABI mode. */
8055 if (TARGET_XCOFF)
8056 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
8057 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
8062 /* Output insns to set DEST equal to the constant SOURCE as a series of
8063 lis, ori and shl instructions and return TRUE. */
8065 bool
8066 rs6000_emit_set_const (rtx dest, rtx source)
8068 machine_mode mode = GET_MODE (dest);
8069 rtx temp, set;
8070 rtx_insn *insn;
8071 HOST_WIDE_INT c;
8073 gcc_checking_assert (CONST_INT_P (source));
8074 c = INTVAL (source);
8075 switch (mode)
8077 case QImode:
8078 case HImode:
8079 emit_insn (gen_rtx_SET (VOIDmode, dest, source));
8080 return true;
8082 case SImode:
8083 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
8085 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (temp),
8086 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
8087 emit_insn (gen_rtx_SET (VOIDmode, dest,
8088 gen_rtx_IOR (SImode, copy_rtx (temp),
8089 GEN_INT (c & 0xffff))));
8090 break;
8092 case DImode:
8093 if (!TARGET_POWERPC64)
8095 rtx hi, lo;
8097 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
8098 DImode);
8099 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
8100 DImode);
8101 emit_move_insn (hi, GEN_INT (c >> 32));
8102 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
8103 emit_move_insn (lo, GEN_INT (c));
8105 else
8106 rs6000_emit_set_long_const (dest, c);
8107 break;
8109 default:
8110 gcc_unreachable ();
8113 insn = get_last_insn ();
8114 set = single_set (insn);
8115 if (! CONSTANT_P (SET_SRC (set)))
8116 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
8118 return true;
8121 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
8122 Output insns to set DEST equal to the constant C as a series of
8123 lis, ori and shl instructions. */
8125 static void
8126 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
8128 rtx temp;
8129 HOST_WIDE_INT ud1, ud2, ud3, ud4;
8131 ud1 = c & 0xffff;
8132 c = c >> 16;
8133 ud2 = c & 0xffff;
8134 c = c >> 16;
8135 ud3 = c & 0xffff;
8136 c = c >> 16;
8137 ud4 = c & 0xffff;
8139 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
8140 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
8141 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
8143 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
8144 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
8146 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8148 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8149 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
8150 if (ud1 != 0)
8151 emit_move_insn (dest,
8152 gen_rtx_IOR (DImode, copy_rtx (temp),
8153 GEN_INT (ud1)));
8155 else if (ud3 == 0 && ud4 == 0)
8157 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8159 gcc_assert (ud2 & 0x8000);
8160 emit_move_insn (copy_rtx (temp),
8161 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
8162 if (ud1 != 0)
8163 emit_move_insn (copy_rtx (temp),
8164 gen_rtx_IOR (DImode, copy_rtx (temp),
8165 GEN_INT (ud1)));
8166 emit_move_insn (dest,
8167 gen_rtx_ZERO_EXTEND (DImode,
8168 gen_lowpart (SImode,
8169 copy_rtx (temp))));
8171 else if ((ud4 == 0xffff && (ud3 & 0x8000))
8172 || (ud4 == 0 && ! (ud3 & 0x8000)))
8174 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8176 emit_move_insn (copy_rtx (temp),
8177 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
8178 if (ud2 != 0)
8179 emit_move_insn (copy_rtx (temp),
8180 gen_rtx_IOR (DImode, copy_rtx (temp),
8181 GEN_INT (ud2)));
8182 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8183 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
8184 GEN_INT (16)));
8185 if (ud1 != 0)
8186 emit_move_insn (dest,
8187 gen_rtx_IOR (DImode, copy_rtx (temp),
8188 GEN_INT (ud1)));
8190 else
8192 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8194 emit_move_insn (copy_rtx (temp),
8195 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
8196 if (ud3 != 0)
8197 emit_move_insn (copy_rtx (temp),
8198 gen_rtx_IOR (DImode, copy_rtx (temp),
8199 GEN_INT (ud3)));
8201 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
8202 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
8203 GEN_INT (32)));
8204 if (ud2 != 0)
8205 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8206 gen_rtx_IOR (DImode, copy_rtx (temp),
8207 GEN_INT (ud2 << 16)));
8208 if (ud1 != 0)
8209 emit_move_insn (dest,
8210 gen_rtx_IOR (DImode, copy_rtx (temp),
8211 GEN_INT (ud1)));
8215 /* Helper for the following. Get rid of [r+r] memory refs
8216 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
8218 static void
8219 rs6000_eliminate_indexed_memrefs (rtx operands[2])
8221 if (reload_in_progress)
8222 return;
8224 if (GET_CODE (operands[0]) == MEM
8225 && GET_CODE (XEXP (operands[0], 0)) != REG
8226 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
8227 GET_MODE (operands[0]), false))
8228 operands[0]
8229 = replace_equiv_address (operands[0],
8230 copy_addr_to_reg (XEXP (operands[0], 0)));
8232 if (GET_CODE (operands[1]) == MEM
8233 && GET_CODE (XEXP (operands[1], 0)) != REG
8234 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
8235 GET_MODE (operands[1]), false))
8236 operands[1]
8237 = replace_equiv_address (operands[1],
8238 copy_addr_to_reg (XEXP (operands[1], 0)));
8241 /* Generate a vector of constants to permute MODE for a little-endian
8242 storage operation by swapping the two halves of a vector. */
8243 static rtvec
8244 rs6000_const_vec (machine_mode mode)
8246 int i, subparts;
8247 rtvec v;
8249 switch (mode)
8251 case V1TImode:
8252 subparts = 1;
8253 break;
8254 case V2DFmode:
8255 case V2DImode:
8256 subparts = 2;
8257 break;
8258 case V4SFmode:
8259 case V4SImode:
8260 subparts = 4;
8261 break;
8262 case V8HImode:
8263 subparts = 8;
8264 break;
8265 case V16QImode:
8266 subparts = 16;
8267 break;
8268 default:
8269 gcc_unreachable();
8272 v = rtvec_alloc (subparts);
8274 for (i = 0; i < subparts / 2; ++i)
8275 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
8276 for (i = subparts / 2; i < subparts; ++i)
8277 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
8279 return v;
8282 /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
8283 for a VSX load or store operation. */
8285 rs6000_gen_le_vsx_permute (rtx source, machine_mode mode)
8287 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
8288 return gen_rtx_VEC_SELECT (mode, source, par);
8291 /* Emit a little-endian load from vector memory location SOURCE to VSX
8292 register DEST in mode MODE. The load is done with two permuting
8293 insn's that represent an lxvd2x and xxpermdi. */
8294 void
8295 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
8297 rtx tmp, permute_mem, permute_reg;
8299 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
8300 V1TImode). */
8301 if (mode == TImode || mode == V1TImode)
8303 mode = V2DImode;
8304 dest = gen_lowpart (V2DImode, dest);
8305 source = adjust_address (source, V2DImode, 0);
8308 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
8309 permute_mem = rs6000_gen_le_vsx_permute (source, mode);
8310 permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
8311 emit_insn (gen_rtx_SET (VOIDmode, tmp, permute_mem));
8312 emit_insn (gen_rtx_SET (VOIDmode, dest, permute_reg));
8315 /* Emit a little-endian store to vector memory location DEST from VSX
8316 register SOURCE in mode MODE. The store is done with two permuting
8317 insn's that represent an xxpermdi and an stxvd2x. */
8318 void
8319 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
8321 rtx tmp, permute_src, permute_tmp;
8323 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
8324 V1TImode). */
8325 if (mode == TImode || mode == V1TImode)
8327 mode = V2DImode;
8328 dest = adjust_address (dest, V2DImode, 0);
8329 source = gen_lowpart (V2DImode, source);
8332 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
8333 permute_src = rs6000_gen_le_vsx_permute (source, mode);
8334 permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
8335 emit_insn (gen_rtx_SET (VOIDmode, tmp, permute_src));
8336 emit_insn (gen_rtx_SET (VOIDmode, dest, permute_tmp));
8339 /* Emit a sequence representing a little-endian VSX load or store,
8340 moving data from SOURCE to DEST in mode MODE. This is done
8341 separately from rs6000_emit_move to ensure it is called only
8342 during expand. LE VSX loads and stores introduced later are
8343 handled with a split. The expand-time RTL generation allows
8344 us to optimize away redundant pairs of register-permutes. */
8345 void
8346 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
8348 gcc_assert (!BYTES_BIG_ENDIAN
8349 && VECTOR_MEM_VSX_P (mode)
8350 && !gpr_or_gpr_p (dest, source)
8351 && (MEM_P (source) ^ MEM_P (dest)));
8353 if (MEM_P (source))
8355 gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG);
8356 rs6000_emit_le_vsx_load (dest, source, mode);
8358 else
8360 if (!REG_P (source))
8361 source = force_reg (mode, source);
8362 rs6000_emit_le_vsx_store (dest, source, mode);
8366 /* Emit a move from SOURCE to DEST in mode MODE. */
8367 void
8368 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
8370 rtx operands[2];
8371 operands[0] = dest;
8372 operands[1] = source;
8374 if (TARGET_DEBUG_ADDR)
8376 fprintf (stderr,
8377 "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, "
8378 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
8379 GET_MODE_NAME (mode),
8380 reload_in_progress,
8381 reload_completed,
8382 can_create_pseudo_p ());
8383 debug_rtx (dest);
8384 fprintf (stderr, "source:\n");
8385 debug_rtx (source);
8388 /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
8389 if (CONST_WIDE_INT_P (operands[1])
8390 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
8392 /* This should be fixed with the introduction of CONST_WIDE_INT. */
8393 gcc_unreachable ();
8396 /* Check if GCC is setting up a block move that will end up using FP
8397 registers as temporaries. We must make sure this is acceptable. */
8398 if (GET_CODE (operands[0]) == MEM
8399 && GET_CODE (operands[1]) == MEM
8400 && mode == DImode
8401 && (SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[0]))
8402 || SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[1])))
8403 && ! (SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[0]) > 32
8404 ? 32 : MEM_ALIGN (operands[0])))
8405 || SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[1]) > 32
8406 ? 32
8407 : MEM_ALIGN (operands[1]))))
8408 && ! MEM_VOLATILE_P (operands [0])
8409 && ! MEM_VOLATILE_P (operands [1]))
8411 emit_move_insn (adjust_address (operands[0], SImode, 0),
8412 adjust_address (operands[1], SImode, 0));
8413 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
8414 adjust_address (copy_rtx (operands[1]), SImode, 4));
8415 return;
8418 if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM
8419 && !gpc_reg_operand (operands[1], mode))
8420 operands[1] = force_reg (mode, operands[1]);
8422 /* Recognize the case where operand[1] is a reference to thread-local
8423 data and load its address to a register. */
8424 if (tls_referenced_p (operands[1]))
8426 enum tls_model model;
8427 rtx tmp = operands[1];
8428 rtx addend = NULL;
8430 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
8432 addend = XEXP (XEXP (tmp, 0), 1);
8433 tmp = XEXP (XEXP (tmp, 0), 0);
8436 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
8437 model = SYMBOL_REF_TLS_MODEL (tmp);
8438 gcc_assert (model != 0);
8440 tmp = rs6000_legitimize_tls_address (tmp, model);
8441 if (addend)
8443 tmp = gen_rtx_PLUS (mode, tmp, addend);
8444 tmp = force_operand (tmp, operands[0]);
8446 operands[1] = tmp;
8449 /* Handle the case where reload calls us with an invalid address. */
8450 if (reload_in_progress && mode == Pmode
8451 && (! general_operand (operands[1], mode)
8452 || ! nonimmediate_operand (operands[0], mode)))
8453 goto emit_set;
8455 /* 128-bit constant floating-point values on Darwin should really be loaded
8456 as two parts. However, this premature splitting is a problem when DFmode
8457 values can go into Altivec registers. */
8458 if (!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
8459 && !reg_addr[DFmode].scalar_in_vmx_p
8460 && mode == TFmode && GET_CODE (operands[1]) == CONST_DOUBLE)
8462 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
8463 simplify_gen_subreg (DFmode, operands[1], mode, 0),
8464 DFmode);
8465 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
8466 GET_MODE_SIZE (DFmode)),
8467 simplify_gen_subreg (DFmode, operands[1], mode,
8468 GET_MODE_SIZE (DFmode)),
8469 DFmode);
8470 return;
8473 if (reload_in_progress && cfun->machine->sdmode_stack_slot != NULL_RTX)
8474 cfun->machine->sdmode_stack_slot =
8475 eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
8478 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
8479 p1:SD) if p1 is not of floating point class and p0 is spilled as
8480 we can have no analogous movsd_store for this. */
8481 if (lra_in_progress && mode == DDmode
8482 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
8483 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
8484 && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))
8485 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
8487 enum reg_class cl;
8488 int regno = REGNO (SUBREG_REG (operands[1]));
8490 if (regno >= FIRST_PSEUDO_REGISTER)
8492 cl = reg_preferred_class (regno);
8493 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
8495 if (regno >= 0 && ! FP_REGNO_P (regno))
8497 mode = SDmode;
8498 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
8499 operands[1] = SUBREG_REG (operands[1]);
8502 if (lra_in_progress
8503 && mode == SDmode
8504 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
8505 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
8506 && (REG_P (operands[1])
8507 || (GET_CODE (operands[1]) == SUBREG
8508 && REG_P (SUBREG_REG (operands[1])))))
8510 int regno = REGNO (GET_CODE (operands[1]) == SUBREG
8511 ? SUBREG_REG (operands[1]) : operands[1]);
8512 enum reg_class cl;
8514 if (regno >= FIRST_PSEUDO_REGISTER)
8516 cl = reg_preferred_class (regno);
8517 gcc_assert (cl != NO_REGS);
8518 regno = ira_class_hard_regs[cl][0];
8520 if (FP_REGNO_P (regno))
8522 if (GET_MODE (operands[0]) != DDmode)
8523 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
8524 emit_insn (gen_movsd_store (operands[0], operands[1]));
8526 else if (INT_REGNO_P (regno))
8527 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
8528 else
8529 gcc_unreachable();
8530 return;
8532 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
8533 p:DD)) if p0 is not of floating point class and p1 is spilled as
8534 we can have no analogous movsd_load for this. */
8535 if (lra_in_progress && mode == DDmode
8536 && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0]))
8537 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
8538 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
8539 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
8541 enum reg_class cl;
8542 int regno = REGNO (SUBREG_REG (operands[0]));
8544 if (regno >= FIRST_PSEUDO_REGISTER)
8546 cl = reg_preferred_class (regno);
8547 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
8549 if (regno >= 0 && ! FP_REGNO_P (regno))
8551 mode = SDmode;
8552 operands[0] = SUBREG_REG (operands[0]);
8553 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
8556 if (lra_in_progress
8557 && mode == SDmode
8558 && (REG_P (operands[0])
8559 || (GET_CODE (operands[0]) == SUBREG
8560 && REG_P (SUBREG_REG (operands[0]))))
8561 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
8562 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
8564 int regno = REGNO (GET_CODE (operands[0]) == SUBREG
8565 ? SUBREG_REG (operands[0]) : operands[0]);
8566 enum reg_class cl;
8568 if (regno >= FIRST_PSEUDO_REGISTER)
8570 cl = reg_preferred_class (regno);
8571 gcc_assert (cl != NO_REGS);
8572 regno = ira_class_hard_regs[cl][0];
8574 if (FP_REGNO_P (regno))
8576 if (GET_MODE (operands[1]) != DDmode)
8577 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
8578 emit_insn (gen_movsd_load (operands[0], operands[1]));
8580 else if (INT_REGNO_P (regno))
8581 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
8582 else
8583 gcc_unreachable();
8584 return;
8587 if (reload_in_progress
8588 && mode == SDmode
8589 && cfun->machine->sdmode_stack_slot != NULL_RTX
8590 && MEM_P (operands[0])
8591 && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
8592 && REG_P (operands[1]))
8594 if (FP_REGNO_P (REGNO (operands[1])))
8596 rtx mem = adjust_address_nv (operands[0], DDmode, 0);
8597 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8598 emit_insn (gen_movsd_store (mem, operands[1]));
8600 else if (INT_REGNO_P (REGNO (operands[1])))
8602 rtx mem = operands[0];
8603 if (BYTES_BIG_ENDIAN)
8604 mem = adjust_address_nv (mem, mode, 4);
8605 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8606 emit_insn (gen_movsd_hardfloat (mem, operands[1]));
8608 else
8609 gcc_unreachable();
8610 return;
8612 if (reload_in_progress
8613 && mode == SDmode
8614 && REG_P (operands[0])
8615 && MEM_P (operands[1])
8616 && cfun->machine->sdmode_stack_slot != NULL_RTX
8617 && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
8619 if (FP_REGNO_P (REGNO (operands[0])))
8621 rtx mem = adjust_address_nv (operands[1], DDmode, 0);
8622 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8623 emit_insn (gen_movsd_load (operands[0], mem));
8625 else if (INT_REGNO_P (REGNO (operands[0])))
8627 rtx mem = operands[1];
8628 if (BYTES_BIG_ENDIAN)
8629 mem = adjust_address_nv (mem, mode, 4);
8630 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8631 emit_insn (gen_movsd_hardfloat (operands[0], mem));
8633 else
8634 gcc_unreachable();
8635 return;
8638 /* FIXME: In the long term, this switch statement should go away
8639 and be replaced by a sequence of tests based on things like
8640 mode == Pmode. */
8641 switch (mode)
8643 case HImode:
8644 case QImode:
8645 if (CONSTANT_P (operands[1])
8646 && GET_CODE (operands[1]) != CONST_INT)
8647 operands[1] = force_const_mem (mode, operands[1]);
8648 break;
8650 case TFmode:
8651 case TDmode:
8652 rs6000_eliminate_indexed_memrefs (operands);
8653 /* fall through */
8655 case DFmode:
8656 case DDmode:
8657 case SFmode:
8658 case SDmode:
8659 if (CONSTANT_P (operands[1])
8660 && ! easy_fp_constant (operands[1], mode))
8661 operands[1] = force_const_mem (mode, operands[1]);
8662 break;
8664 case V16QImode:
8665 case V8HImode:
8666 case V4SFmode:
8667 case V4SImode:
8668 case V4HImode:
8669 case V2SFmode:
8670 case V2SImode:
8671 case V1DImode:
8672 case V2DFmode:
8673 case V2DImode:
8674 case V1TImode:
8675 if (CONSTANT_P (operands[1])
8676 && !easy_vector_constant (operands[1], mode))
8677 operands[1] = force_const_mem (mode, operands[1]);
8678 break;
8680 case SImode:
8681 case DImode:
8682 /* Use default pattern for address of ELF small data */
8683 if (TARGET_ELF
8684 && mode == Pmode
8685 && DEFAULT_ABI == ABI_V4
8686 && (GET_CODE (operands[1]) == SYMBOL_REF
8687 || GET_CODE (operands[1]) == CONST)
8688 && small_data_operand (operands[1], mode))
8690 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8691 return;
8694 if (DEFAULT_ABI == ABI_V4
8695 && mode == Pmode && mode == SImode
8696 && flag_pic == 1 && got_operand (operands[1], mode))
8698 emit_insn (gen_movsi_got (operands[0], operands[1]));
8699 return;
8702 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
8703 && TARGET_NO_TOC
8704 && ! flag_pic
8705 && mode == Pmode
8706 && CONSTANT_P (operands[1])
8707 && GET_CODE (operands[1]) != HIGH
8708 && GET_CODE (operands[1]) != CONST_INT)
8710 rtx target = (!can_create_pseudo_p ()
8711 ? operands[0]
8712 : gen_reg_rtx (mode));
8714 /* If this is a function address on -mcall-aixdesc,
8715 convert it to the address of the descriptor. */
8716 if (DEFAULT_ABI == ABI_AIX
8717 && GET_CODE (operands[1]) == SYMBOL_REF
8718 && XSTR (operands[1], 0)[0] == '.')
8720 const char *name = XSTR (operands[1], 0);
8721 rtx new_ref;
8722 while (*name == '.')
8723 name++;
8724 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
8725 CONSTANT_POOL_ADDRESS_P (new_ref)
8726 = CONSTANT_POOL_ADDRESS_P (operands[1]);
8727 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
8728 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
8729 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
8730 operands[1] = new_ref;
8733 if (DEFAULT_ABI == ABI_DARWIN)
8735 #if TARGET_MACHO
8736 if (MACHO_DYNAMIC_NO_PIC_P)
8738 /* Take care of any required data indirection. */
8739 operands[1] = rs6000_machopic_legitimize_pic_address (
8740 operands[1], mode, operands[0]);
8741 if (operands[0] != operands[1])
8742 emit_insn (gen_rtx_SET (VOIDmode,
8743 operands[0], operands[1]));
8744 return;
8746 #endif
8747 emit_insn (gen_macho_high (target, operands[1]));
8748 emit_insn (gen_macho_low (operands[0], target, operands[1]));
8749 return;
8752 emit_insn (gen_elf_high (target, operands[1]));
8753 emit_insn (gen_elf_low (operands[0], target, operands[1]));
8754 return;
8757 /* If this is a SYMBOL_REF that refers to a constant pool entry,
8758 and we have put it in the TOC, we just need to make a TOC-relative
8759 reference to it. */
8760 if (TARGET_TOC
8761 && GET_CODE (operands[1]) == SYMBOL_REF
8762 && use_toc_relative_ref (operands[1]))
8763 operands[1] = create_TOC_reference (operands[1], operands[0]);
8764 else if (mode == Pmode
8765 && CONSTANT_P (operands[1])
8766 && GET_CODE (operands[1]) != HIGH
8767 && ((GET_CODE (operands[1]) != CONST_INT
8768 && ! easy_fp_constant (operands[1], mode))
8769 || (GET_CODE (operands[1]) == CONST_INT
8770 && (num_insns_constant (operands[1], mode)
8771 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
8772 || (GET_CODE (operands[0]) == REG
8773 && FP_REGNO_P (REGNO (operands[0]))))
8774 && !toc_relative_expr_p (operands[1], false)
8775 && (TARGET_CMODEL == CMODEL_SMALL
8776 || can_create_pseudo_p ()
8777 || (REG_P (operands[0])
8778 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
8781 #if TARGET_MACHO
8782 /* Darwin uses a special PIC legitimizer. */
8783 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
8785 operands[1] =
8786 rs6000_machopic_legitimize_pic_address (operands[1], mode,
8787 operands[0]);
8788 if (operands[0] != operands[1])
8789 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8790 return;
8792 #endif
8794 /* If we are to limit the number of things we put in the TOC and
8795 this is a symbol plus a constant we can add in one insn,
8796 just put the symbol in the TOC and add the constant. Don't do
8797 this if reload is in progress. */
8798 if (GET_CODE (operands[1]) == CONST
8799 && TARGET_NO_SUM_IN_TOC && ! reload_in_progress
8800 && GET_CODE (XEXP (operands[1], 0)) == PLUS
8801 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
8802 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
8803 || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF)
8804 && ! side_effects_p (operands[0]))
8806 rtx sym =
8807 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
8808 rtx other = XEXP (XEXP (operands[1], 0), 1);
8810 sym = force_reg (mode, sym);
8811 emit_insn (gen_add3_insn (operands[0], sym, other));
8812 return;
8815 operands[1] = force_const_mem (mode, operands[1]);
8817 if (TARGET_TOC
8818 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
8819 && constant_pool_expr_p (XEXP (operands[1], 0))
8820 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (
8821 get_pool_constant (XEXP (operands[1], 0)),
8822 get_pool_mode (XEXP (operands[1], 0))))
8824 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
8825 operands[0]);
8826 operands[1] = gen_const_mem (mode, tocref);
8827 set_mem_alias_set (operands[1], get_TOC_alias_set ());
8830 break;
8832 case TImode:
8833 if (!VECTOR_MEM_VSX_P (TImode))
8834 rs6000_eliminate_indexed_memrefs (operands);
8835 break;
8837 case PTImode:
8838 rs6000_eliminate_indexed_memrefs (operands);
8839 break;
8841 default:
8842 fatal_insn ("bad move", gen_rtx_SET (VOIDmode, dest, source));
8845 /* Above, we may have called force_const_mem which may have returned
8846 an invalid address. If we can, fix this up; otherwise, reload will
8847 have to deal with it. */
8848 if (GET_CODE (operands[1]) == MEM && ! reload_in_progress)
8849 operands[1] = validize_mem (operands[1]);
8851 emit_set:
8852 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8855 /* Return true if a structure, union or array containing FIELD should be
8856 accessed using `BLKMODE'.
8858 For the SPE, simd types are V2SI, and gcc can be tempted to put the
8859 entire thing in a DI and use subregs to access the internals.
8860 store_bit_field() will force (subreg:DI (reg:V2SI x))'s to the
8861 back-end. Because a single GPR can hold a V2SI, but not a DI, the
8862 best thing to do is set structs to BLKmode and avoid Severe Tire
8863 Damage.
8865 On e500 v2, DF and DI modes suffer from the same anomaly. DF can
8866 fit into 1, whereas DI still needs two. */
8868 static bool
8869 rs6000_member_type_forces_blk (const_tree field, machine_mode mode)
8871 return ((TARGET_SPE && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
8872 || (TARGET_E500_DOUBLE && mode == DFmode));
8875 /* Nonzero if we can use a floating-point register to pass this arg. */
8876 #define USE_FP_FOR_ARG_P(CUM,MODE) \
8877 (SCALAR_FLOAT_MODE_P (MODE) \
8878 && (CUM)->fregno <= FP_ARG_MAX_REG \
8879 && TARGET_HARD_FLOAT && TARGET_FPRS)
8881 /* Nonzero if we can use an AltiVec register to pass this arg. */
8882 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
8883 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
8884 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
8885 && TARGET_ALTIVEC_ABI \
8886 && (NAMED))
8888 /* Walk down the type tree of TYPE counting consecutive base elements.
8889 If *MODEP is VOIDmode, then set it to the first valid floating point
8890 or vector type. If a non-floating point or vector type is found, or
8891 if a floating point or vector type that doesn't match a non-VOIDmode
8892 *MODEP is found, then return -1, otherwise return the count in the
8893 sub-tree. */
8895 static int
8896 rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
8898 machine_mode mode;
8899 HOST_WIDE_INT size;
8901 switch (TREE_CODE (type))
8903 case REAL_TYPE:
8904 mode = TYPE_MODE (type);
8905 if (!SCALAR_FLOAT_MODE_P (mode))
8906 return -1;
8908 if (*modep == VOIDmode)
8909 *modep = mode;
8911 if (*modep == mode)
8912 return 1;
8914 break;
8916 case COMPLEX_TYPE:
8917 mode = TYPE_MODE (TREE_TYPE (type));
8918 if (!SCALAR_FLOAT_MODE_P (mode))
8919 return -1;
8921 if (*modep == VOIDmode)
8922 *modep = mode;
8924 if (*modep == mode)
8925 return 2;
8927 break;
8929 case VECTOR_TYPE:
8930 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
8931 return -1;
8933 /* Use V4SImode as representative of all 128-bit vector types. */
8934 size = int_size_in_bytes (type);
8935 switch (size)
8937 case 16:
8938 mode = V4SImode;
8939 break;
8940 default:
8941 return -1;
8944 if (*modep == VOIDmode)
8945 *modep = mode;
8947 /* Vector modes are considered to be opaque: two vectors are
8948 equivalent for the purposes of being homogeneous aggregates
8949 if they are the same size. */
8950 if (*modep == mode)
8951 return 1;
8953 break;
8955 case ARRAY_TYPE:
8957 int count;
8958 tree index = TYPE_DOMAIN (type);
8960 /* Can't handle incomplete types nor sizes that are not
8961 fixed. */
8962 if (!COMPLETE_TYPE_P (type)
8963 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
8964 return -1;
8966 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
8967 if (count == -1
8968 || !index
8969 || !TYPE_MAX_VALUE (index)
8970 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
8971 || !TYPE_MIN_VALUE (index)
8972 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
8973 || count < 0)
8974 return -1;
8976 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
8977 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
8979 /* There must be no padding. */
8980 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
8981 return -1;
8983 return count;
8986 case RECORD_TYPE:
8988 int count = 0;
8989 int sub_count;
8990 tree field;
8992 /* Can't handle incomplete types nor sizes that are not
8993 fixed. */
8994 if (!COMPLETE_TYPE_P (type)
8995 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
8996 return -1;
8998 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
9000 if (TREE_CODE (field) != FIELD_DECL)
9001 continue;
9003 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
9004 if (sub_count < 0)
9005 return -1;
9006 count += sub_count;
9009 /* There must be no padding. */
9010 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
9011 return -1;
9013 return count;
9016 case UNION_TYPE:
9017 case QUAL_UNION_TYPE:
9019 /* These aren't very interesting except in a degenerate case. */
9020 int count = 0;
9021 int sub_count;
9022 tree field;
9024 /* Can't handle incomplete types nor sizes that are not
9025 fixed. */
9026 if (!COMPLETE_TYPE_P (type)
9027 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
9028 return -1;
9030 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
9032 if (TREE_CODE (field) != FIELD_DECL)
9033 continue;
9035 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
9036 if (sub_count < 0)
9037 return -1;
9038 count = count > sub_count ? count : sub_count;
9041 /* There must be no padding. */
9042 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
9043 return -1;
9045 return count;
9048 default:
9049 break;
9052 return -1;
9055 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
9056 float or vector aggregate that shall be passed in FP/vector registers
9057 according to the ELFv2 ABI, return the homogeneous element mode in
9058 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
9060 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
9062 static bool
9063 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
9064 machine_mode *elt_mode,
9065 int *n_elts)
9067 /* Note that we do not accept complex types at the top level as
9068 homogeneous aggregates; these types are handled via the
9069 targetm.calls.split_complex_arg mechanism. Complex types
9070 can be elements of homogeneous aggregates, however. */
9071 if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type))
9073 machine_mode field_mode = VOIDmode;
9074 int field_count = rs6000_aggregate_candidate (type, &field_mode);
9076 if (field_count > 0)
9078 int n_regs = (SCALAR_FLOAT_MODE_P (field_mode)?
9079 (GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
9081 /* The ELFv2 ABI allows homogeneous aggregates to occupy
9082 up to AGGR_ARG_NUM_REG registers. */
9083 if (field_count * n_regs <= AGGR_ARG_NUM_REG)
9085 if (elt_mode)
9086 *elt_mode = field_mode;
9087 if (n_elts)
9088 *n_elts = field_count;
9089 return true;
9094 if (elt_mode)
9095 *elt_mode = mode;
9096 if (n_elts)
9097 *n_elts = 1;
9098 return false;
9101 /* Return a nonzero value to say to return the function value in
9102 memory, just as large structures are always returned. TYPE will be
9103 the data type of the value, and FNTYPE will be the type of the
9104 function doing the returning, or @code{NULL} for libcalls.
9106 The AIX ABI for the RS/6000 specifies that all structures are
9107 returned in memory. The Darwin ABI does the same.
9109 For the Darwin 64 Bit ABI, a function result can be returned in
9110 registers or in memory, depending on the size of the return data
9111 type. If it is returned in registers, the value occupies the same
9112 registers as it would if it were the first and only function
9113 argument. Otherwise, the function places its result in memory at
9114 the location pointed to by GPR3.
9116 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
9117 but a draft put them in memory, and GCC used to implement the draft
9118 instead of the final standard. Therefore, aix_struct_return
9119 controls this instead of DEFAULT_ABI; V.4 targets needing backward
9120 compatibility can change DRAFT_V4_STRUCT_RET to override the
9121 default, and -m switches get the final word. See
9122 rs6000_option_override_internal for more details.
9124 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
9125 long double support is enabled. These values are returned in memory.
9127 int_size_in_bytes returns -1 for variable size objects, which go in
9128 memory always. The cast to unsigned makes -1 > 8. */
9130 static bool
9131 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9133 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
9134 if (TARGET_MACHO
9135 && rs6000_darwin64_abi
9136 && TREE_CODE (type) == RECORD_TYPE
9137 && int_size_in_bytes (type) > 0)
9139 CUMULATIVE_ARGS valcum;
9140 rtx valret;
9142 valcum.words = 0;
9143 valcum.fregno = FP_ARG_MIN_REG;
9144 valcum.vregno = ALTIVEC_ARG_MIN_REG;
9145 /* Do a trial code generation as if this were going to be passed
9146 as an argument; if any part goes in memory, we return NULL. */
9147 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
9148 if (valret)
9149 return false;
9150 /* Otherwise fall through to more conventional ABI rules. */
9153 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
9154 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
9155 NULL, NULL))
9156 return false;
9158 /* The ELFv2 ABI returns aggregates up to 16B in registers */
9159 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
9160 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
9161 return false;
9163 if (AGGREGATE_TYPE_P (type)
9164 && (aix_struct_return
9165 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
9166 return true;
9168 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
9169 modes only exist for GCC vector types if -maltivec. */
9170 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
9171 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
9172 return false;
9174 /* Return synthetic vectors in memory. */
9175 if (TREE_CODE (type) == VECTOR_TYPE
9176 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
9178 static bool warned_for_return_big_vectors = false;
9179 if (!warned_for_return_big_vectors)
9181 warning (0, "GCC vector returned by reference: "
9182 "non-standard ABI extension with no compatibility guarantee");
9183 warned_for_return_big_vectors = true;
9185 return true;
9188 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD && TYPE_MODE (type) == TFmode)
9189 return true;
9191 return false;
9194 /* Specify whether values returned in registers should be at the most
9195 significant end of a register. We want aggregates returned by
9196 value to match the way aggregates are passed to functions. */
9198 static bool
9199 rs6000_return_in_msb (const_tree valtype)
9201 return (DEFAULT_ABI == ABI_ELFv2
9202 && BYTES_BIG_ENDIAN
9203 && AGGREGATE_TYPE_P (valtype)
9204 && FUNCTION_ARG_PADDING (TYPE_MODE (valtype), valtype) == upward);
9207 #ifdef HAVE_AS_GNU_ATTRIBUTE
9208 /* Return TRUE if a call to function FNDECL may be one that
9209 potentially affects the function calling ABI of the object file. */
9211 static bool
9212 call_ABI_of_interest (tree fndecl)
9214 if (symtab->state == EXPANSION)
9216 struct cgraph_node *c_node;
9218 /* Libcalls are always interesting. */
9219 if (fndecl == NULL_TREE)
9220 return true;
9222 /* Any call to an external function is interesting. */
9223 if (DECL_EXTERNAL (fndecl))
9224 return true;
9226 /* Interesting functions that we are emitting in this object file. */
9227 c_node = cgraph_node::get (fndecl);
9228 c_node = c_node->ultimate_alias_target ();
9229 return !c_node->only_called_directly_p ();
9231 return false;
9233 #endif
9235 /* Initialize a variable CUM of type CUMULATIVE_ARGS
9236 for a call to a function whose data type is FNTYPE.
9237 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
9239 For incoming args we set the number of arguments in the prototype large
9240 so we never return a PARALLEL. */
9242 void
9243 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
9244 rtx libname ATTRIBUTE_UNUSED, int incoming,
9245 int libcall, int n_named_args,
9246 tree fndecl ATTRIBUTE_UNUSED,
9247 machine_mode return_mode ATTRIBUTE_UNUSED)
9249 static CUMULATIVE_ARGS zero_cumulative;
9251 *cum = zero_cumulative;
9252 cum->words = 0;
9253 cum->fregno = FP_ARG_MIN_REG;
9254 cum->vregno = ALTIVEC_ARG_MIN_REG;
9255 cum->prototype = (fntype && prototype_p (fntype));
9256 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
9257 ? CALL_LIBCALL : CALL_NORMAL);
9258 cum->sysv_gregno = GP_ARG_MIN_REG;
9259 cum->stdarg = stdarg_p (fntype);
9261 cum->nargs_prototype = 0;
9262 if (incoming || cum->prototype)
9263 cum->nargs_prototype = n_named_args;
9265 /* Check for a longcall attribute. */
9266 if ((!fntype && rs6000_default_long_calls)
9267 || (fntype
9268 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
9269 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
9270 cum->call_cookie |= CALL_LONG;
9272 if (TARGET_DEBUG_ARG)
9274 fprintf (stderr, "\ninit_cumulative_args:");
9275 if (fntype)
9277 tree ret_type = TREE_TYPE (fntype);
9278 fprintf (stderr, " ret code = %s,",
9279 get_tree_code_name (TREE_CODE (ret_type)));
9282 if (cum->call_cookie & CALL_LONG)
9283 fprintf (stderr, " longcall,");
9285 fprintf (stderr, " proto = %d, nargs = %d\n",
9286 cum->prototype, cum->nargs_prototype);
9289 #ifdef HAVE_AS_GNU_ATTRIBUTE
9290 if (DEFAULT_ABI == ABI_V4)
9292 cum->escapes = call_ABI_of_interest (fndecl);
9293 if (cum->escapes)
9295 tree return_type;
9297 if (fntype)
9299 return_type = TREE_TYPE (fntype);
9300 return_mode = TYPE_MODE (return_type);
9302 else
9303 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
9305 if (return_type != NULL)
9307 if (TREE_CODE (return_type) == RECORD_TYPE
9308 && TYPE_TRANSPARENT_AGGR (return_type))
9310 return_type = TREE_TYPE (first_field (return_type));
9311 return_mode = TYPE_MODE (return_type);
9313 if (AGGREGATE_TYPE_P (return_type)
9314 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
9315 <= 8))
9316 rs6000_returns_struct = true;
9318 if (SCALAR_FLOAT_MODE_P (return_mode))
9319 rs6000_passes_float = true;
9320 else if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode)
9321 || SPE_VECTOR_MODE (return_mode))
9322 rs6000_passes_vector = true;
9325 #endif
9327 if (fntype
9328 && !TARGET_ALTIVEC
9329 && TARGET_ALTIVEC_ABI
9330 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
9332 error ("cannot return value in vector register because"
9333 " altivec instructions are disabled, use -maltivec"
9334 " to enable them");
9338 /* The mode the ABI uses for a word. This is not the same as word_mode
9339 for -m32 -mpowerpc64. This is used to implement various target hooks. */
9341 static machine_mode
9342 rs6000_abi_word_mode (void)
9344 return TARGET_32BIT ? SImode : DImode;
9347 /* On rs6000, function arguments are promoted, as are function return
9348 values. */
9350 static machine_mode
9351 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9352 machine_mode mode,
9353 int *punsignedp ATTRIBUTE_UNUSED,
9354 const_tree, int)
9356 PROMOTE_MODE (mode, *punsignedp, type);
9358 return mode;
9361 /* Return true if TYPE must be passed on the stack and not in registers. */
9363 static bool
9364 rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
9366 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
9367 return must_pass_in_stack_var_size (mode, type);
9368 else
9369 return must_pass_in_stack_var_size_or_pad (mode, type);
9372 /* If defined, a C expression which determines whether, and in which
9373 direction, to pad out an argument with extra space. The value
9374 should be of type `enum direction': either `upward' to pad above
9375 the argument, `downward' to pad below, or `none' to inhibit
9376 padding.
9378 For the AIX ABI structs are always stored left shifted in their
9379 argument slot. */
9381 enum direction
9382 function_arg_padding (machine_mode mode, const_tree type)
9384 #ifndef AGGREGATE_PADDING_FIXED
9385 #define AGGREGATE_PADDING_FIXED 0
9386 #endif
9387 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
9388 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
9389 #endif
9391 if (!AGGREGATE_PADDING_FIXED)
9393 /* GCC used to pass structures of the same size as integer types as
9394 if they were in fact integers, ignoring FUNCTION_ARG_PADDING.
9395 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
9396 passed padded downward, except that -mstrict-align further
9397 muddied the water in that multi-component structures of 2 and 4
9398 bytes in size were passed padded upward.
9400 The following arranges for best compatibility with previous
9401 versions of gcc, but removes the -mstrict-align dependency. */
9402 if (BYTES_BIG_ENDIAN)
9404 HOST_WIDE_INT size = 0;
9406 if (mode == BLKmode)
9408 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
9409 size = int_size_in_bytes (type);
9411 else
9412 size = GET_MODE_SIZE (mode);
9414 if (size == 1 || size == 2 || size == 4)
9415 return downward;
9417 return upward;
9420 if (AGGREGATES_PAD_UPWARD_ALWAYS)
9422 if (type != 0 && AGGREGATE_TYPE_P (type))
9423 return upward;
9426 /* Fall back to the default. */
9427 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
9430 /* If defined, a C expression that gives the alignment boundary, in bits,
9431 of an argument with the specified mode and type. If it is not defined,
9432 PARM_BOUNDARY is used for all arguments.
9434 V.4 wants long longs and doubles to be double word aligned. Just
9435 testing the mode size is a boneheaded way to do this as it means
9436 that other types such as complex int are also double word aligned.
9437 However, we're stuck with this because changing the ABI might break
9438 existing library interfaces.
9440 Doubleword align SPE vectors.
9441 Quadword align Altivec/VSX vectors.
9442 Quadword align large synthetic vector types. */
9444 static unsigned int
9445 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
9447 machine_mode elt_mode;
9448 int n_elts;
9450 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
9452 if (DEFAULT_ABI == ABI_V4
9453 && (GET_MODE_SIZE (mode) == 8
9454 || (TARGET_HARD_FLOAT
9455 && TARGET_FPRS
9456 && (mode == TFmode || mode == TDmode))))
9457 return 64;
9458 else if (SPE_VECTOR_MODE (mode)
9459 || (type && TREE_CODE (type) == VECTOR_TYPE
9460 && int_size_in_bytes (type) >= 8
9461 && int_size_in_bytes (type) < 16))
9462 return 64;
9463 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
9464 || (type && TREE_CODE (type) == VECTOR_TYPE
9465 && int_size_in_bytes (type) >= 16))
9466 return 128;
9468 /* Aggregate types that need > 8 byte alignment are quadword-aligned
9469 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
9470 -mcompat-align-parm is used. */
9471 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
9472 || DEFAULT_ABI == ABI_ELFv2)
9473 && type && TYPE_ALIGN (type) > 64)
9475 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
9476 or homogeneous float/vector aggregates here. We already handled
9477 vector aggregates above, but still need to check for float here. */
9478 bool aggregate_p = (AGGREGATE_TYPE_P (type)
9479 && !SCALAR_FLOAT_MODE_P (elt_mode));
9481 /* We used to check for BLKmode instead of the above aggregate type
9482 check. Warn when this results in any difference to the ABI. */
9483 if (aggregate_p != (mode == BLKmode))
9485 static bool warned;
9486 if (!warned && warn_psabi)
9488 warned = true;
9489 inform (input_location,
9490 "the ABI of passing aggregates with %d-byte alignment"
9491 " has changed in GCC 5",
9492 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
9496 if (aggregate_p)
9497 return 128;
9500 /* Similar for the Darwin64 ABI. Note that for historical reasons we
9501 implement the "aggregate type" check as a BLKmode check here; this
9502 means certain aggregate types are in fact not aligned. */
9503 if (TARGET_MACHO && rs6000_darwin64_abi
9504 && mode == BLKmode
9505 && type && TYPE_ALIGN (type) > 64)
9506 return 128;
9508 return PARM_BOUNDARY;
9511 /* The offset in words to the start of the parameter save area. */
9513 static unsigned int
9514 rs6000_parm_offset (void)
9516 return (DEFAULT_ABI == ABI_V4 ? 2
9517 : DEFAULT_ABI == ABI_ELFv2 ? 4
9518 : 6);
9521 /* For a function parm of MODE and TYPE, return the starting word in
9522 the parameter area. NWORDS of the parameter area are already used. */
9524 static unsigned int
9525 rs6000_parm_start (machine_mode mode, const_tree type,
9526 unsigned int nwords)
9528 unsigned int align;
9530 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
9531 return nwords + (-(rs6000_parm_offset () + nwords) & align);
9534 /* Compute the size (in words) of a function argument. */
9536 static unsigned long
9537 rs6000_arg_size (machine_mode mode, const_tree type)
9539 unsigned long size;
9541 if (mode != BLKmode)
9542 size = GET_MODE_SIZE (mode);
9543 else
9544 size = int_size_in_bytes (type);
9546 if (TARGET_32BIT)
9547 return (size + 3) >> 2;
9548 else
9549 return (size + 7) >> 3;
9552 /* Use this to flush pending int fields. */
9554 static void
9555 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
9556 HOST_WIDE_INT bitpos, int final)
9558 unsigned int startbit, endbit;
9559 int intregs, intoffset;
9560 machine_mode mode;
9562 /* Handle the situations where a float is taking up the first half
9563 of the GPR, and the other half is empty (typically due to
9564 alignment restrictions). We can detect this by a 8-byte-aligned
9565 int field, or by seeing that this is the final flush for this
9566 argument. Count the word and continue on. */
9567 if (cum->floats_in_gpr == 1
9568 && (cum->intoffset % 64 == 0
9569 || (cum->intoffset == -1 && final)))
9571 cum->words++;
9572 cum->floats_in_gpr = 0;
9575 if (cum->intoffset == -1)
9576 return;
9578 intoffset = cum->intoffset;
9579 cum->intoffset = -1;
9580 cum->floats_in_gpr = 0;
9582 if (intoffset % BITS_PER_WORD != 0)
9584 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
9585 MODE_INT, 0);
9586 if (mode == BLKmode)
9588 /* We couldn't find an appropriate mode, which happens,
9589 e.g., in packed structs when there are 3 bytes to load.
9590 Back intoffset back to the beginning of the word in this
9591 case. */
9592 intoffset = intoffset & -BITS_PER_WORD;
9596 startbit = intoffset & -BITS_PER_WORD;
9597 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
9598 intregs = (endbit - startbit) / BITS_PER_WORD;
9599 cum->words += intregs;
9600 /* words should be unsigned. */
9601 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
9603 int pad = (endbit/BITS_PER_WORD) - cum->words;
9604 cum->words += pad;
9608 /* The darwin64 ABI calls for us to recurse down through structs,
9609 looking for elements passed in registers. Unfortunately, we have
9610 to track int register count here also because of misalignments
9611 in powerpc alignment mode. */
9613 static void
9614 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
9615 const_tree type,
9616 HOST_WIDE_INT startbitpos)
9618 tree f;
9620 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
9621 if (TREE_CODE (f) == FIELD_DECL)
9623 HOST_WIDE_INT bitpos = startbitpos;
9624 tree ftype = TREE_TYPE (f);
9625 machine_mode mode;
9626 if (ftype == error_mark_node)
9627 continue;
9628 mode = TYPE_MODE (ftype);
9630 if (DECL_SIZE (f) != 0
9631 && tree_fits_uhwi_p (bit_position (f)))
9632 bitpos += int_bit_position (f);
9634 /* ??? FIXME: else assume zero offset. */
9636 if (TREE_CODE (ftype) == RECORD_TYPE)
9637 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
9638 else if (USE_FP_FOR_ARG_P (cum, mode))
9640 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
9641 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
9642 cum->fregno += n_fpregs;
9643 /* Single-precision floats present a special problem for
9644 us, because they are smaller than an 8-byte GPR, and so
9645 the structure-packing rules combined with the standard
9646 varargs behavior mean that we want to pack float/float
9647 and float/int combinations into a single register's
9648 space. This is complicated by the arg advance flushing,
9649 which works on arbitrarily large groups of int-type
9650 fields. */
9651 if (mode == SFmode)
9653 if (cum->floats_in_gpr == 1)
9655 /* Two floats in a word; count the word and reset
9656 the float count. */
9657 cum->words++;
9658 cum->floats_in_gpr = 0;
9660 else if (bitpos % 64 == 0)
9662 /* A float at the beginning of an 8-byte word;
9663 count it and put off adjusting cum->words until
9664 we see if a arg advance flush is going to do it
9665 for us. */
9666 cum->floats_in_gpr++;
9668 else
9670 /* The float is at the end of a word, preceded
9671 by integer fields, so the arg advance flush
9672 just above has already set cum->words and
9673 everything is taken care of. */
9676 else
9677 cum->words += n_fpregs;
9679 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
9681 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
9682 cum->vregno++;
9683 cum->words += 2;
9685 else if (cum->intoffset == -1)
9686 cum->intoffset = bitpos;
9690 /* Check for an item that needs to be considered specially under the darwin 64
9691 bit ABI. These are record types where the mode is BLK or the structure is
9692 8 bytes in size. */
9693 static int
9694 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
9696 return rs6000_darwin64_abi
9697 && ((mode == BLKmode
9698 && TREE_CODE (type) == RECORD_TYPE
9699 && int_size_in_bytes (type) > 0)
9700 || (type && TREE_CODE (type) == RECORD_TYPE
9701 && int_size_in_bytes (type) == 8)) ? 1 : 0;
9704 /* Update the data in CUM to advance over an argument
9705 of mode MODE and data type TYPE.
9706 (TYPE is null for libcalls where that information may not be available.)
9708 Note that for args passed by reference, function_arg will be called
9709 with MODE and TYPE set to that of the pointer to the arg, not the arg
9710 itself. */
9712 static void
9713 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
9714 const_tree type, bool named, int depth)
9716 machine_mode elt_mode;
9717 int n_elts;
9719 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
9721 /* Only tick off an argument if we're not recursing. */
9722 if (depth == 0)
9723 cum->nargs_prototype--;
9725 #ifdef HAVE_AS_GNU_ATTRIBUTE
9726 if (DEFAULT_ABI == ABI_V4
9727 && cum->escapes)
9729 if (SCALAR_FLOAT_MODE_P (mode))
9730 rs6000_passes_float = true;
9731 else if (named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
9732 rs6000_passes_vector = true;
9733 else if (SPE_VECTOR_MODE (mode)
9734 && !cum->stdarg
9735 && cum->sysv_gregno <= GP_ARG_MAX_REG)
9736 rs6000_passes_vector = true;
9738 #endif
9740 if (TARGET_ALTIVEC_ABI
9741 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
9742 || (type && TREE_CODE (type) == VECTOR_TYPE
9743 && int_size_in_bytes (type) == 16)))
9745 bool stack = false;
9747 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
9749 cum->vregno += n_elts;
9751 if (!TARGET_ALTIVEC)
9752 error ("cannot pass argument in vector register because"
9753 " altivec instructions are disabled, use -maltivec"
9754 " to enable them");
9756 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
9757 even if it is going to be passed in a vector register.
9758 Darwin does the same for variable-argument functions. */
9759 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9760 && TARGET_64BIT)
9761 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
9762 stack = true;
9764 else
9765 stack = true;
9767 if (stack)
9769 int align;
9771 /* Vector parameters must be 16-byte aligned. In 32-bit
9772 mode this means we need to take into account the offset
9773 to the parameter save area. In 64-bit mode, they just
9774 have to start on an even word, since the parameter save
9775 area is 16-byte aligned. */
9776 if (TARGET_32BIT)
9777 align = -(rs6000_parm_offset () + cum->words) & 3;
9778 else
9779 align = cum->words & 1;
9780 cum->words += align + rs6000_arg_size (mode, type);
9782 if (TARGET_DEBUG_ARG)
9784 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
9785 cum->words, align);
9786 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
9787 cum->nargs_prototype, cum->prototype,
9788 GET_MODE_NAME (mode));
9792 else if (TARGET_SPE_ABI && TARGET_SPE && SPE_VECTOR_MODE (mode)
9793 && !cum->stdarg
9794 && cum->sysv_gregno <= GP_ARG_MAX_REG)
9795 cum->sysv_gregno++;
9797 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
9799 int size = int_size_in_bytes (type);
9800 /* Variable sized types have size == -1 and are
9801 treated as if consisting entirely of ints.
9802 Pad to 16 byte boundary if needed. */
9803 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
9804 && (cum->words % 2) != 0)
9805 cum->words++;
9806 /* For varargs, we can just go up by the size of the struct. */
9807 if (!named)
9808 cum->words += (size + 7) / 8;
9809 else
9811 /* It is tempting to say int register count just goes up by
9812 sizeof(type)/8, but this is wrong in a case such as
9813 { int; double; int; } [powerpc alignment]. We have to
9814 grovel through the fields for these too. */
9815 cum->intoffset = 0;
9816 cum->floats_in_gpr = 0;
9817 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
9818 rs6000_darwin64_record_arg_advance_flush (cum,
9819 size * BITS_PER_UNIT, 1);
9821 if (TARGET_DEBUG_ARG)
9823 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
9824 cum->words, TYPE_ALIGN (type), size);
9825 fprintf (stderr,
9826 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
9827 cum->nargs_prototype, cum->prototype,
9828 GET_MODE_NAME (mode));
9831 else if (DEFAULT_ABI == ABI_V4)
9833 if (TARGET_HARD_FLOAT && TARGET_FPRS
9834 && ((TARGET_SINGLE_FLOAT && mode == SFmode)
9835 || (TARGET_DOUBLE_FLOAT && mode == DFmode)
9836 || (mode == TFmode && !TARGET_IEEEQUAD)
9837 || mode == SDmode || mode == DDmode || mode == TDmode))
9839 /* _Decimal128 must use an even/odd register pair. This assumes
9840 that the register number is odd when fregno is odd. */
9841 if (mode == TDmode && (cum->fregno % 2) == 1)
9842 cum->fregno++;
9844 if (cum->fregno + (mode == TFmode || mode == TDmode ? 1 : 0)
9845 <= FP_ARG_V4_MAX_REG)
9846 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
9847 else
9849 cum->fregno = FP_ARG_V4_MAX_REG + 1;
9850 if (mode == DFmode || mode == TFmode
9851 || mode == DDmode || mode == TDmode)
9852 cum->words += cum->words & 1;
9853 cum->words += rs6000_arg_size (mode, type);
9856 else
9858 int n_words = rs6000_arg_size (mode, type);
9859 int gregno = cum->sysv_gregno;
9861 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
9862 (r7,r8) or (r9,r10). As does any other 2 word item such
9863 as complex int due to a historical mistake. */
9864 if (n_words == 2)
9865 gregno += (1 - gregno) & 1;
9867 /* Multi-reg args are not split between registers and stack. */
9868 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
9870 /* Long long and SPE vectors are aligned on the stack.
9871 So are other 2 word items such as complex int due to
9872 a historical mistake. */
9873 if (n_words == 2)
9874 cum->words += cum->words & 1;
9875 cum->words += n_words;
9878 /* Note: continuing to accumulate gregno past when we've started
9879 spilling to the stack indicates the fact that we've started
9880 spilling to the stack to expand_builtin_saveregs. */
9881 cum->sysv_gregno = gregno + n_words;
9884 if (TARGET_DEBUG_ARG)
9886 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
9887 cum->words, cum->fregno);
9888 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
9889 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
9890 fprintf (stderr, "mode = %4s, named = %d\n",
9891 GET_MODE_NAME (mode), named);
9894 else
9896 int n_words = rs6000_arg_size (mode, type);
9897 int start_words = cum->words;
9898 int align_words = rs6000_parm_start (mode, type, start_words);
9900 cum->words = align_words + n_words;
9902 if (SCALAR_FLOAT_MODE_P (elt_mode)
9903 && TARGET_HARD_FLOAT && TARGET_FPRS)
9905 /* _Decimal128 must be passed in an even/odd float register pair.
9906 This assumes that the register number is odd when fregno is
9907 odd. */
9908 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
9909 cum->fregno++;
9910 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
9913 if (TARGET_DEBUG_ARG)
9915 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
9916 cum->words, cum->fregno);
9917 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
9918 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
9919 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
9920 named, align_words - start_words, depth);
9925 static void
9926 rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
9927 const_tree type, bool named)
9929 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
9933 static rtx
9934 spe_build_register_parallel (machine_mode mode, int gregno)
9936 rtx r1, r3, r5, r7;
9938 switch (mode)
9940 case DFmode:
9941 r1 = gen_rtx_REG (DImode, gregno);
9942 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
9943 return gen_rtx_PARALLEL (mode, gen_rtvec (1, r1));
9945 case DCmode:
9946 case TFmode:
9947 r1 = gen_rtx_REG (DImode, gregno);
9948 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
9949 r3 = gen_rtx_REG (DImode, gregno + 2);
9950 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
9951 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r3));
9953 case TCmode:
9954 r1 = gen_rtx_REG (DImode, gregno);
9955 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
9956 r3 = gen_rtx_REG (DImode, gregno + 2);
9957 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
9958 r5 = gen_rtx_REG (DImode, gregno + 4);
9959 r5 = gen_rtx_EXPR_LIST (VOIDmode, r5, GEN_INT (16));
9960 r7 = gen_rtx_REG (DImode, gregno + 6);
9961 r7 = gen_rtx_EXPR_LIST (VOIDmode, r7, GEN_INT (24));
9962 return gen_rtx_PARALLEL (mode, gen_rtvec (4, r1, r3, r5, r7));
9964 default:
9965 gcc_unreachable ();
9969 /* Determine where to put a SIMD argument on the SPE. */
9970 static rtx
9971 rs6000_spe_function_arg (const CUMULATIVE_ARGS *cum, machine_mode mode,
9972 const_tree type)
9974 int gregno = cum->sysv_gregno;
9976 /* On E500 v2, double arithmetic is done on the full 64-bit GPR, but
9977 are passed and returned in a pair of GPRs for ABI compatibility. */
9978 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
9979 || mode == DCmode || mode == TCmode))
9981 int n_words = rs6000_arg_size (mode, type);
9983 /* Doubles go in an odd/even register pair (r5/r6, etc). */
9984 if (mode == DFmode)
9985 gregno += (1 - gregno) & 1;
9987 /* Multi-reg args are not split between registers and stack. */
9988 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
9989 return NULL_RTX;
9991 return spe_build_register_parallel (mode, gregno);
9993 if (cum->stdarg)
9995 int n_words = rs6000_arg_size (mode, type);
9997 /* SPE vectors are put in odd registers. */
9998 if (n_words == 2 && (gregno & 1) == 0)
9999 gregno += 1;
10001 if (gregno + n_words - 1 <= GP_ARG_MAX_REG)
10003 rtx r1, r2;
10004 machine_mode m = SImode;
10006 r1 = gen_rtx_REG (m, gregno);
10007 r1 = gen_rtx_EXPR_LIST (m, r1, const0_rtx);
10008 r2 = gen_rtx_REG (m, gregno + 1);
10009 r2 = gen_rtx_EXPR_LIST (m, r2, GEN_INT (4));
10010 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
10012 else
10013 return NULL_RTX;
10015 else
10017 if (gregno <= GP_ARG_MAX_REG)
10018 return gen_rtx_REG (mode, gregno);
10019 else
10020 return NULL_RTX;
10024 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
10025 structure between cum->intoffset and bitpos to integer registers. */
10027 static void
10028 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
10029 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
10031 machine_mode mode;
10032 unsigned int regno;
10033 unsigned int startbit, endbit;
10034 int this_regno, intregs, intoffset;
10035 rtx reg;
10037 if (cum->intoffset == -1)
10038 return;
10040 intoffset = cum->intoffset;
10041 cum->intoffset = -1;
10043 /* If this is the trailing part of a word, try to only load that
10044 much into the register. Otherwise load the whole register. Note
10045 that in the latter case we may pick up unwanted bits. It's not a
10046 problem at the moment but may wish to revisit. */
10048 if (intoffset % BITS_PER_WORD != 0)
10050 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
10051 MODE_INT, 0);
10052 if (mode == BLKmode)
10054 /* We couldn't find an appropriate mode, which happens,
10055 e.g., in packed structs when there are 3 bytes to load.
10056 Back intoffset back to the beginning of the word in this
10057 case. */
10058 intoffset = intoffset & -BITS_PER_WORD;
10059 mode = word_mode;
10062 else
10063 mode = word_mode;
10065 startbit = intoffset & -BITS_PER_WORD;
10066 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
10067 intregs = (endbit - startbit) / BITS_PER_WORD;
10068 this_regno = cum->words + intoffset / BITS_PER_WORD;
10070 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
10071 cum->use_stack = 1;
10073 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
10074 if (intregs <= 0)
10075 return;
10077 intoffset /= BITS_PER_UNIT;
10080 regno = GP_ARG_MIN_REG + this_regno;
10081 reg = gen_rtx_REG (mode, regno);
10082 rvec[(*k)++] =
10083 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
10085 this_regno += 1;
10086 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
10087 mode = word_mode;
10088 intregs -= 1;
10090 while (intregs > 0);
10093 /* Recursive workhorse for the following. */
10095 static void
10096 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
10097 HOST_WIDE_INT startbitpos, rtx rvec[],
10098 int *k)
10100 tree f;
10102 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
10103 if (TREE_CODE (f) == FIELD_DECL)
10105 HOST_WIDE_INT bitpos = startbitpos;
10106 tree ftype = TREE_TYPE (f);
10107 machine_mode mode;
10108 if (ftype == error_mark_node)
10109 continue;
10110 mode = TYPE_MODE (ftype);
10112 if (DECL_SIZE (f) != 0
10113 && tree_fits_uhwi_p (bit_position (f)))
10114 bitpos += int_bit_position (f);
10116 /* ??? FIXME: else assume zero offset. */
10118 if (TREE_CODE (ftype) == RECORD_TYPE)
10119 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
10120 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
10122 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
10123 #if 0
10124 switch (mode)
10126 case SCmode: mode = SFmode; break;
10127 case DCmode: mode = DFmode; break;
10128 case TCmode: mode = TFmode; break;
10129 default: break;
10131 #endif
10132 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
10133 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
10135 gcc_assert (cum->fregno == FP_ARG_MAX_REG
10136 && (mode == TFmode || mode == TDmode));
10137 /* Long double or _Decimal128 split over regs and memory. */
10138 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
10139 cum->use_stack=1;
10141 rvec[(*k)++]
10142 = gen_rtx_EXPR_LIST (VOIDmode,
10143 gen_rtx_REG (mode, cum->fregno++),
10144 GEN_INT (bitpos / BITS_PER_UNIT));
10145 if (mode == TFmode || mode == TDmode)
10146 cum->fregno++;
10148 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
10150 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
10151 rvec[(*k)++]
10152 = gen_rtx_EXPR_LIST (VOIDmode,
10153 gen_rtx_REG (mode, cum->vregno++),
10154 GEN_INT (bitpos / BITS_PER_UNIT));
10156 else if (cum->intoffset == -1)
10157 cum->intoffset = bitpos;
10161 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
10162 the register(s) to be used for each field and subfield of a struct
10163 being passed by value, along with the offset of where the
10164 register's value may be found in the block. FP fields go in FP
10165 register, vector fields go in vector registers, and everything
10166 else goes in int registers, packed as in memory.
10168 This code is also used for function return values. RETVAL indicates
10169 whether this is the case.
10171 Much of this is taken from the SPARC V9 port, which has a similar
10172 calling convention. */
10174 static rtx
10175 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
10176 bool named, bool retval)
10178 rtx rvec[FIRST_PSEUDO_REGISTER];
10179 int k = 1, kbase = 1;
10180 HOST_WIDE_INT typesize = int_size_in_bytes (type);
10181 /* This is a copy; modifications are not visible to our caller. */
10182 CUMULATIVE_ARGS copy_cum = *orig_cum;
10183 CUMULATIVE_ARGS *cum = &copy_cum;
10185 /* Pad to 16 byte boundary if needed. */
10186 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
10187 && (cum->words % 2) != 0)
10188 cum->words++;
10190 cum->intoffset = 0;
10191 cum->use_stack = 0;
10192 cum->named = named;
10194 /* Put entries into rvec[] for individual FP and vector fields, and
10195 for the chunks of memory that go in int regs. Note we start at
10196 element 1; 0 is reserved for an indication of using memory, and
10197 may or may not be filled in below. */
10198 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
10199 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
10201 /* If any part of the struct went on the stack put all of it there.
10202 This hack is because the generic code for
10203 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
10204 parts of the struct are not at the beginning. */
10205 if (cum->use_stack)
10207 if (retval)
10208 return NULL_RTX; /* doesn't go in registers at all */
10209 kbase = 0;
10210 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10212 if (k > 1 || cum->use_stack)
10213 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
10214 else
10215 return NULL_RTX;
10218 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
10220 static rtx
10221 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
10222 int align_words)
10224 int n_units;
10225 int i, k;
10226 rtx rvec[GP_ARG_NUM_REG + 1];
10228 if (align_words >= GP_ARG_NUM_REG)
10229 return NULL_RTX;
10231 n_units = rs6000_arg_size (mode, type);
10233 /* Optimize the simple case where the arg fits in one gpr, except in
10234 the case of BLKmode due to assign_parms assuming that registers are
10235 BITS_PER_WORD wide. */
10236 if (n_units == 0
10237 || (n_units == 1 && mode != BLKmode))
10238 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10240 k = 0;
10241 if (align_words + n_units > GP_ARG_NUM_REG)
10242 /* Not all of the arg fits in gprs. Say that it goes in memory too,
10243 using a magic NULL_RTX component.
10244 This is not strictly correct. Only some of the arg belongs in
10245 memory, not all of it. However, the normal scheme using
10246 function_arg_partial_nregs can result in unusual subregs, eg.
10247 (subreg:SI (reg:DF) 4), which are not handled well. The code to
10248 store the whole arg to memory is often more efficient than code
10249 to store pieces, and we know that space is available in the right
10250 place for the whole arg. */
10251 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10253 i = 0;
10256 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
10257 rtx off = GEN_INT (i++ * 4);
10258 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10260 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
10262 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
10265 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
10266 but must also be copied into the parameter save area starting at
10267 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
10268 to the GPRs and/or memory. Return the number of elements used. */
10270 static int
10271 rs6000_psave_function_arg (machine_mode mode, const_tree type,
10272 int align_words, rtx *rvec)
10274 int k = 0;
10276 if (align_words < GP_ARG_NUM_REG)
10278 int n_words = rs6000_arg_size (mode, type);
10280 if (align_words + n_words > GP_ARG_NUM_REG
10281 || mode == BLKmode
10282 || (TARGET_32BIT && TARGET_POWERPC64))
10284 /* If this is partially on the stack, then we only
10285 include the portion actually in registers here. */
10286 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
10287 int i = 0;
10289 if (align_words + n_words > GP_ARG_NUM_REG)
10291 /* Not all of the arg fits in gprs. Say that it goes in memory
10292 too, using a magic NULL_RTX component. Also see comment in
10293 rs6000_mixed_function_arg for why the normal
10294 function_arg_partial_nregs scheme doesn't work in this case. */
10295 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10300 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
10301 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
10302 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10304 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
10306 else
10308 /* The whole arg fits in gprs. */
10309 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10310 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
10313 else
10315 /* It's entirely in memory. */
10316 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10319 return k;
10322 /* RVEC is a vector of K components of an argument of mode MODE.
10323 Construct the final function_arg return value from it. */
10325 static rtx
10326 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
10328 gcc_assert (k >= 1);
10330 /* Avoid returning a PARALLEL in the trivial cases. */
10331 if (k == 1)
10333 if (XEXP (rvec[0], 0) == NULL_RTX)
10334 return NULL_RTX;
10336 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
10337 return XEXP (rvec[0], 0);
10340 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
10343 /* Determine where to put an argument to a function.
10344 Value is zero to push the argument on the stack,
10345 or a hard register in which to store the argument.
10347 MODE is the argument's machine mode.
10348 TYPE is the data type of the argument (as a tree).
10349 This is null for libcalls where that information may
10350 not be available.
10351 CUM is a variable of type CUMULATIVE_ARGS which gives info about
10352 the preceding args and about the function being called. It is
10353 not modified in this routine.
10354 NAMED is nonzero if this argument is a named parameter
10355 (otherwise it is an extra parameter matching an ellipsis).
10357 On RS/6000 the first eight words of non-FP are normally in registers
10358 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
10359 Under V.4, the first 8 FP args are in registers.
10361 If this is floating-point and no prototype is specified, we use
10362 both an FP and integer register (or possibly FP reg and stack). Library
10363 functions (when CALL_LIBCALL is set) always have the proper types for args,
10364 so we can pass the FP value just in one register. emit_library_function
10365 doesn't support PARALLEL anyway.
10367 Note that for args passed by reference, function_arg will be called
10368 with MODE and TYPE set to that of the pointer to the arg, not the arg
10369 itself. */
10371 static rtx
10372 rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
10373 const_tree type, bool named)
10375 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10376 enum rs6000_abi abi = DEFAULT_ABI;
10377 machine_mode elt_mode;
10378 int n_elts;
10380 /* Return a marker to indicate whether CR1 needs to set or clear the
10381 bit that V.4 uses to say fp args were passed in registers.
10382 Assume that we don't need the marker for software floating point,
10383 or compiler generated library calls. */
10384 if (mode == VOIDmode)
10386 if (abi == ABI_V4
10387 && (cum->call_cookie & CALL_LIBCALL) == 0
10388 && (cum->stdarg
10389 || (cum->nargs_prototype < 0
10390 && (cum->prototype || TARGET_NO_PROTOTYPE))))
10392 /* For the SPE, we need to crxor CR6 always. */
10393 if (TARGET_SPE_ABI)
10394 return GEN_INT (cum->call_cookie | CALL_V4_SET_FP_ARGS);
10395 else if (TARGET_HARD_FLOAT && TARGET_FPRS)
10396 return GEN_INT (cum->call_cookie
10397 | ((cum->fregno == FP_ARG_MIN_REG)
10398 ? CALL_V4_SET_FP_ARGS
10399 : CALL_V4_CLEAR_FP_ARGS));
10402 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
10405 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10407 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
10409 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
10410 if (rslt != NULL_RTX)
10411 return rslt;
10412 /* Else fall through to usual handling. */
10415 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
10417 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
10418 rtx r, off;
10419 int i, k = 0;
10421 /* Do we also need to pass this argument in the parameter
10422 save area? */
10423 if (TARGET_64BIT && ! cum->prototype)
10425 int align_words = (cum->words + 1) & ~1;
10426 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
10429 /* Describe where this argument goes in the vector registers. */
10430 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
10432 r = gen_rtx_REG (elt_mode, cum->vregno + i);
10433 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
10434 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10437 return rs6000_finish_function_arg (mode, rvec, k);
10439 else if (TARGET_ALTIVEC_ABI
10440 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
10441 || (type && TREE_CODE (type) == VECTOR_TYPE
10442 && int_size_in_bytes (type) == 16)))
10444 if (named || abi == ABI_V4)
10445 return NULL_RTX;
10446 else
10448 /* Vector parameters to varargs functions under AIX or Darwin
10449 get passed in memory and possibly also in GPRs. */
10450 int align, align_words, n_words;
10451 machine_mode part_mode;
10453 /* Vector parameters must be 16-byte aligned. In 32-bit
10454 mode this means we need to take into account the offset
10455 to the parameter save area. In 64-bit mode, they just
10456 have to start on an even word, since the parameter save
10457 area is 16-byte aligned. */
10458 if (TARGET_32BIT)
10459 align = -(rs6000_parm_offset () + cum->words) & 3;
10460 else
10461 align = cum->words & 1;
10462 align_words = cum->words + align;
10464 /* Out of registers? Memory, then. */
10465 if (align_words >= GP_ARG_NUM_REG)
10466 return NULL_RTX;
10468 if (TARGET_32BIT && TARGET_POWERPC64)
10469 return rs6000_mixed_function_arg (mode, type, align_words);
10471 /* The vector value goes in GPRs. Only the part of the
10472 value in GPRs is reported here. */
10473 part_mode = mode;
10474 n_words = rs6000_arg_size (mode, type);
10475 if (align_words + n_words > GP_ARG_NUM_REG)
10476 /* Fortunately, there are only two possibilities, the value
10477 is either wholly in GPRs or half in GPRs and half not. */
10478 part_mode = DImode;
10480 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
10483 else if (TARGET_SPE_ABI && TARGET_SPE
10484 && (SPE_VECTOR_MODE (mode)
10485 || (TARGET_E500_DOUBLE && (mode == DFmode
10486 || mode == DCmode
10487 || mode == TFmode
10488 || mode == TCmode))))
10489 return rs6000_spe_function_arg (cum, mode, type);
10491 else if (abi == ABI_V4)
10493 if (TARGET_HARD_FLOAT && TARGET_FPRS
10494 && ((TARGET_SINGLE_FLOAT && mode == SFmode)
10495 || (TARGET_DOUBLE_FLOAT && mode == DFmode)
10496 || (mode == TFmode && !TARGET_IEEEQUAD)
10497 || mode == SDmode || mode == DDmode || mode == TDmode))
10499 /* _Decimal128 must use an even/odd register pair. This assumes
10500 that the register number is odd when fregno is odd. */
10501 if (mode == TDmode && (cum->fregno % 2) == 1)
10502 cum->fregno++;
10504 if (cum->fregno + (mode == TFmode || mode == TDmode ? 1 : 0)
10505 <= FP_ARG_V4_MAX_REG)
10506 return gen_rtx_REG (mode, cum->fregno);
10507 else
10508 return NULL_RTX;
10510 else
10512 int n_words = rs6000_arg_size (mode, type);
10513 int gregno = cum->sysv_gregno;
10515 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
10516 (r7,r8) or (r9,r10). As does any other 2 word item such
10517 as complex int due to a historical mistake. */
10518 if (n_words == 2)
10519 gregno += (1 - gregno) & 1;
10521 /* Multi-reg args are not split between registers and stack. */
10522 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
10523 return NULL_RTX;
10525 if (TARGET_32BIT && TARGET_POWERPC64)
10526 return rs6000_mixed_function_arg (mode, type,
10527 gregno - GP_ARG_MIN_REG);
10528 return gen_rtx_REG (mode, gregno);
10531 else
10533 int align_words = rs6000_parm_start (mode, type, cum->words);
10535 /* _Decimal128 must be passed in an even/odd float register pair.
10536 This assumes that the register number is odd when fregno is odd. */
10537 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
10538 cum->fregno++;
10540 if (USE_FP_FOR_ARG_P (cum, elt_mode))
10542 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
10543 rtx r, off;
10544 int i, k = 0;
10545 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
10546 int fpr_words;
10548 /* Do we also need to pass this argument in the parameter
10549 save area? */
10550 if (type && (cum->nargs_prototype <= 0
10551 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10552 && TARGET_XL_COMPAT
10553 && align_words >= GP_ARG_NUM_REG)))
10554 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
10556 /* Describe where this argument goes in the fprs. */
10557 for (i = 0; i < n_elts
10558 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
10560 /* Check if the argument is split over registers and memory.
10561 This can only ever happen for long double or _Decimal128;
10562 complex types are handled via split_complex_arg. */
10563 machine_mode fmode = elt_mode;
10564 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
10566 gcc_assert (fmode == TFmode || fmode == TDmode);
10567 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
10570 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
10571 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
10572 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10575 /* If there were not enough FPRs to hold the argument, the rest
10576 usually goes into memory. However, if the current position
10577 is still within the register parameter area, a portion may
10578 actually have to go into GPRs.
10580 Note that it may happen that the portion of the argument
10581 passed in the first "half" of the first GPR was already
10582 passed in the last FPR as well.
10584 For unnamed arguments, we already set up GPRs to cover the
10585 whole argument in rs6000_psave_function_arg, so there is
10586 nothing further to do at this point. */
10587 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
10588 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
10589 && cum->nargs_prototype > 0)
10591 static bool warned;
10593 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
10594 int n_words = rs6000_arg_size (mode, type);
10596 align_words += fpr_words;
10597 n_words -= fpr_words;
10601 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
10602 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
10603 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10605 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
10607 if (!warned && warn_psabi)
10609 warned = true;
10610 inform (input_location,
10611 "the ABI of passing homogeneous float aggregates"
10612 " has changed in GCC 5");
10616 return rs6000_finish_function_arg (mode, rvec, k);
10618 else if (align_words < GP_ARG_NUM_REG)
10620 if (TARGET_32BIT && TARGET_POWERPC64)
10621 return rs6000_mixed_function_arg (mode, type, align_words);
10623 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10625 else
10626 return NULL_RTX;
10630 /* For an arg passed partly in registers and partly in memory, this is
10631 the number of bytes passed in registers. For args passed entirely in
10632 registers or entirely in memory, zero. When an arg is described by a
10633 PARALLEL, perhaps using more than one register type, this function
10634 returns the number of bytes used by the first element of the PARALLEL. */
10636 static int
10637 rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
10638 tree type, bool named)
10640 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10641 bool passed_in_gprs = true;
10642 int ret = 0;
10643 int align_words;
10644 machine_mode elt_mode;
10645 int n_elts;
10647 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10649 if (DEFAULT_ABI == ABI_V4)
10650 return 0;
10652 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
10654 /* If we are passing this arg in the fixed parameter save area
10655 (gprs or memory) as well as VRs, we do not use the partial
10656 bytes mechanism; instead, rs6000_function_arg will return a
10657 PARALLEL including a memory element as necessary. */
10658 if (TARGET_64BIT && ! cum->prototype)
10659 return 0;
10661 /* Otherwise, we pass in VRs only. Check for partial copies. */
10662 passed_in_gprs = false;
10663 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
10664 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
10667 /* In this complicated case we just disable the partial_nregs code. */
10668 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
10669 return 0;
10671 align_words = rs6000_parm_start (mode, type, cum->words);
10673 if (USE_FP_FOR_ARG_P (cum, elt_mode))
10675 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
10677 /* If we are passing this arg in the fixed parameter save area
10678 (gprs or memory) as well as FPRs, we do not use the partial
10679 bytes mechanism; instead, rs6000_function_arg will return a
10680 PARALLEL including a memory element as necessary. */
10681 if (type
10682 && (cum->nargs_prototype <= 0
10683 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10684 && TARGET_XL_COMPAT
10685 && align_words >= GP_ARG_NUM_REG)))
10686 return 0;
10688 /* Otherwise, we pass in FPRs only. Check for partial copies. */
10689 passed_in_gprs = false;
10690 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
10692 /* Compute number of bytes / words passed in FPRs. If there
10693 is still space available in the register parameter area
10694 *after* that amount, a part of the argument will be passed
10695 in GPRs. In that case, the total amount passed in any
10696 registers is equal to the amount that would have been passed
10697 in GPRs if everything were passed there, so we fall back to
10698 the GPR code below to compute the appropriate value. */
10699 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
10700 * MIN (8, GET_MODE_SIZE (elt_mode)));
10701 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
10703 if (align_words + fpr_words < GP_ARG_NUM_REG)
10704 passed_in_gprs = true;
10705 else
10706 ret = fpr;
10710 if (passed_in_gprs
10711 && align_words < GP_ARG_NUM_REG
10712 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
10713 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
10715 if (ret != 0 && TARGET_DEBUG_ARG)
10716 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
10718 return ret;
10721 /* A C expression that indicates when an argument must be passed by
10722 reference. If nonzero for an argument, a copy of that argument is
10723 made in memory and a pointer to the argument is passed instead of
10724 the argument itself. The pointer is passed in whatever way is
10725 appropriate for passing a pointer to that type.
10727 Under V.4, aggregates and long double are passed by reference.
10729 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
10730 reference unless the AltiVec vector extension ABI is in force.
10732 As an extension to all ABIs, variable sized types are passed by
10733 reference. */
10735 static bool
10736 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
10737 machine_mode mode, const_tree type,
10738 bool named ATTRIBUTE_UNUSED)
10740 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD && mode == TFmode)
10742 if (TARGET_DEBUG_ARG)
10743 fprintf (stderr, "function_arg_pass_by_reference: V4 long double\n");
10744 return 1;
10747 if (!type)
10748 return 0;
10750 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
10752 if (TARGET_DEBUG_ARG)
10753 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
10754 return 1;
10757 if (int_size_in_bytes (type) < 0)
10759 if (TARGET_DEBUG_ARG)
10760 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
10761 return 1;
10764 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
10765 modes only exist for GCC vector types if -maltivec. */
10766 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
10768 if (TARGET_DEBUG_ARG)
10769 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
10770 return 1;
10773 /* Pass synthetic vectors in memory. */
10774 if (TREE_CODE (type) == VECTOR_TYPE
10775 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
10777 static bool warned_for_pass_big_vectors = false;
10778 if (TARGET_DEBUG_ARG)
10779 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
10780 if (!warned_for_pass_big_vectors)
10782 warning (0, "GCC vector passed by reference: "
10783 "non-standard ABI extension with no compatibility guarantee");
10784 warned_for_pass_big_vectors = true;
10786 return 1;
10789 return 0;
10792 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
10793 already processes. Return true if the parameter must be passed
10794 (fully or partially) on the stack. */
10796 static bool
10797 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
10799 machine_mode mode;
10800 int unsignedp;
10801 rtx entry_parm;
10803 /* Catch errors. */
10804 if (type == NULL || type == error_mark_node)
10805 return true;
10807 /* Handle types with no storage requirement. */
10808 if (TYPE_MODE (type) == VOIDmode)
10809 return false;
10811 /* Handle complex types. */
10812 if (TREE_CODE (type) == COMPLEX_TYPE)
10813 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
10814 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
10816 /* Handle transparent aggregates. */
10817 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
10818 && TYPE_TRANSPARENT_AGGR (type))
10819 type = TREE_TYPE (first_field (type));
10821 /* See if this arg was passed by invisible reference. */
10822 if (pass_by_reference (get_cumulative_args (args_so_far),
10823 TYPE_MODE (type), type, true))
10824 type = build_pointer_type (type);
10826 /* Find mode as it is passed by the ABI. */
10827 unsignedp = TYPE_UNSIGNED (type);
10828 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
10830 /* If we must pass in stack, we need a stack. */
10831 if (rs6000_must_pass_in_stack (mode, type))
10832 return true;
10834 /* If there is no incoming register, we need a stack. */
10835 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
10836 if (entry_parm == NULL)
10837 return true;
10839 /* Likewise if we need to pass both in registers and on the stack. */
10840 if (GET_CODE (entry_parm) == PARALLEL
10841 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
10842 return true;
10844 /* Also true if we're partially in registers and partially not. */
10845 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
10846 return true;
10848 /* Update info on where next arg arrives in registers. */
10849 rs6000_function_arg_advance (args_so_far, mode, type, true);
10850 return false;
10853 /* Return true if FUN has no prototype, has a variable argument
10854 list, or passes any parameter in memory. */
10856 static bool
10857 rs6000_function_parms_need_stack (tree fun, bool incoming)
10859 tree fntype, result;
10860 CUMULATIVE_ARGS args_so_far_v;
10861 cumulative_args_t args_so_far;
10863 if (!fun)
10864 /* Must be a libcall, all of which only use reg parms. */
10865 return false;
10867 fntype = fun;
10868 if (!TYPE_P (fun))
10869 fntype = TREE_TYPE (fun);
10871 /* Varargs functions need the parameter save area. */
10872 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
10873 return true;
10875 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
10876 args_so_far = pack_cumulative_args (&args_so_far_v);
10878 /* When incoming, we will have been passed the function decl.
10879 It is necessary to use the decl to handle K&R style functions,
10880 where TYPE_ARG_TYPES may not be available. */
10881 if (incoming)
10883 gcc_assert (DECL_P (fun));
10884 result = DECL_RESULT (fun);
10886 else
10887 result = TREE_TYPE (fntype);
10889 if (result && aggregate_value_p (result, fntype))
10891 if (!TYPE_P (result))
10892 result = TREE_TYPE (result);
10893 result = build_pointer_type (result);
10894 rs6000_parm_needs_stack (args_so_far, result);
10897 if (incoming)
10899 tree parm;
10901 for (parm = DECL_ARGUMENTS (fun);
10902 parm && parm != void_list_node;
10903 parm = TREE_CHAIN (parm))
10904 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
10905 return true;
10907 else
10909 function_args_iterator args_iter;
10910 tree arg_type;
10912 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
10913 if (rs6000_parm_needs_stack (args_so_far, arg_type))
10914 return true;
10917 return false;
10920 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
10921 usually a constant depending on the ABI. However, in the ELFv2 ABI
10922 the register parameter area is optional when calling a function that
10923 has a prototype is scope, has no variable argument list, and passes
10924 all parameters in registers. */
10927 rs6000_reg_parm_stack_space (tree fun, bool incoming)
10929 int reg_parm_stack_space;
10931 switch (DEFAULT_ABI)
10933 default:
10934 reg_parm_stack_space = 0;
10935 break;
10937 case ABI_AIX:
10938 case ABI_DARWIN:
10939 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
10940 break;
10942 case ABI_ELFv2:
10943 /* ??? Recomputing this every time is a bit expensive. Is there
10944 a place to cache this information? */
10945 if (rs6000_function_parms_need_stack (fun, incoming))
10946 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
10947 else
10948 reg_parm_stack_space = 0;
10949 break;
10952 return reg_parm_stack_space;
10955 static void
10956 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
10958 int i;
10959 machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
10961 if (nregs == 0)
10962 return;
10964 for (i = 0; i < nregs; i++)
10966 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
10967 if (reload_completed)
10969 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
10970 tem = NULL_RTX;
10971 else
10972 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
10973 i * GET_MODE_SIZE (reg_mode));
10975 else
10976 tem = replace_equiv_address (tem, XEXP (tem, 0));
10978 gcc_assert (tem);
10980 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
10984 /* Perform any needed actions needed for a function that is receiving a
10985 variable number of arguments.
10987 CUM is as above.
10989 MODE and TYPE are the mode and type of the current parameter.
10991 PRETEND_SIZE is a variable that should be set to the amount of stack
10992 that must be pushed by the prolog to pretend that our caller pushed
10995 Normally, this macro will push all remaining incoming registers on the
10996 stack and set PRETEND_SIZE to the length of the registers pushed. */
10998 static void
10999 setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
11000 tree type, int *pretend_size ATTRIBUTE_UNUSED,
11001 int no_rtl)
11003 CUMULATIVE_ARGS next_cum;
11004 int reg_size = TARGET_32BIT ? 4 : 8;
11005 rtx save_area = NULL_RTX, mem;
11006 int first_reg_offset;
11007 alias_set_type set;
11009 /* Skip the last named argument. */
11010 next_cum = *get_cumulative_args (cum);
11011 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
11013 if (DEFAULT_ABI == ABI_V4)
11015 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
11017 if (! no_rtl)
11019 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
11020 HOST_WIDE_INT offset = 0;
11022 /* Try to optimize the size of the varargs save area.
11023 The ABI requires that ap.reg_save_area is doubleword
11024 aligned, but we don't need to allocate space for all
11025 the bytes, only those to which we actually will save
11026 anything. */
11027 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
11028 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
11029 if (TARGET_HARD_FLOAT && TARGET_FPRS
11030 && next_cum.fregno <= FP_ARG_V4_MAX_REG
11031 && cfun->va_list_fpr_size)
11033 if (gpr_reg_num)
11034 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
11035 * UNITS_PER_FP_WORD;
11036 if (cfun->va_list_fpr_size
11037 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
11038 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
11039 else
11040 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
11041 * UNITS_PER_FP_WORD;
11043 if (gpr_reg_num)
11045 offset = -((first_reg_offset * reg_size) & ~7);
11046 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
11048 gpr_reg_num = cfun->va_list_gpr_size;
11049 if (reg_size == 4 && (first_reg_offset & 1))
11050 gpr_reg_num++;
11052 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
11054 else if (fpr_size)
11055 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
11056 * UNITS_PER_FP_WORD
11057 - (int) (GP_ARG_NUM_REG * reg_size);
11059 if (gpr_size + fpr_size)
11061 rtx reg_save_area
11062 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
11063 gcc_assert (GET_CODE (reg_save_area) == MEM);
11064 reg_save_area = XEXP (reg_save_area, 0);
11065 if (GET_CODE (reg_save_area) == PLUS)
11067 gcc_assert (XEXP (reg_save_area, 0)
11068 == virtual_stack_vars_rtx);
11069 gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT);
11070 offset += INTVAL (XEXP (reg_save_area, 1));
11072 else
11073 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
11076 cfun->machine->varargs_save_offset = offset;
11077 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
11080 else
11082 first_reg_offset = next_cum.words;
11083 save_area = virtual_incoming_args_rtx;
11085 if (targetm.calls.must_pass_in_stack (mode, type))
11086 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
11089 set = get_varargs_alias_set ();
11090 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
11091 && cfun->va_list_gpr_size)
11093 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
11095 if (va_list_gpr_counter_field)
11096 /* V4 va_list_gpr_size counts number of registers needed. */
11097 n_gpr = cfun->va_list_gpr_size;
11098 else
11099 /* char * va_list instead counts number of bytes needed. */
11100 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
11102 if (nregs > n_gpr)
11103 nregs = n_gpr;
11105 mem = gen_rtx_MEM (BLKmode,
11106 plus_constant (Pmode, save_area,
11107 first_reg_offset * reg_size));
11108 MEM_NOTRAP_P (mem) = 1;
11109 set_mem_alias_set (mem, set);
11110 set_mem_align (mem, BITS_PER_WORD);
11112 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
11113 nregs);
11116 /* Save FP registers if needed. */
11117 if (DEFAULT_ABI == ABI_V4
11118 && TARGET_HARD_FLOAT && TARGET_FPRS
11119 && ! no_rtl
11120 && next_cum.fregno <= FP_ARG_V4_MAX_REG
11121 && cfun->va_list_fpr_size)
11123 int fregno = next_cum.fregno, nregs;
11124 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
11125 rtx lab = gen_label_rtx ();
11126 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
11127 * UNITS_PER_FP_WORD);
11129 emit_jump_insn
11130 (gen_rtx_SET (VOIDmode,
11131 pc_rtx,
11132 gen_rtx_IF_THEN_ELSE (VOIDmode,
11133 gen_rtx_NE (VOIDmode, cr1,
11134 const0_rtx),
11135 gen_rtx_LABEL_REF (VOIDmode, lab),
11136 pc_rtx)));
11138 for (nregs = 0;
11139 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
11140 fregno++, off += UNITS_PER_FP_WORD, nregs++)
11142 mem = gen_rtx_MEM ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11143 ? DFmode : SFmode,
11144 plus_constant (Pmode, save_area, off));
11145 MEM_NOTRAP_P (mem) = 1;
11146 set_mem_alias_set (mem, set);
11147 set_mem_align (mem, GET_MODE_ALIGNMENT (
11148 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11149 ? DFmode : SFmode));
11150 emit_move_insn (mem, gen_rtx_REG (
11151 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11152 ? DFmode : SFmode, fregno));
11155 emit_label (lab);
11159 /* Create the va_list data type. */
11161 static tree
11162 rs6000_build_builtin_va_list (void)
11164 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
11166 /* For AIX, prefer 'char *' because that's what the system
11167 header files like. */
11168 if (DEFAULT_ABI != ABI_V4)
11169 return build_pointer_type (char_type_node);
11171 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
11172 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
11173 get_identifier ("__va_list_tag"), record);
11175 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
11176 unsigned_char_type_node);
11177 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
11178 unsigned_char_type_node);
11179 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
11180 every user file. */
11181 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11182 get_identifier ("reserved"), short_unsigned_type_node);
11183 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11184 get_identifier ("overflow_arg_area"),
11185 ptr_type_node);
11186 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11187 get_identifier ("reg_save_area"),
11188 ptr_type_node);
11190 va_list_gpr_counter_field = f_gpr;
11191 va_list_fpr_counter_field = f_fpr;
11193 DECL_FIELD_CONTEXT (f_gpr) = record;
11194 DECL_FIELD_CONTEXT (f_fpr) = record;
11195 DECL_FIELD_CONTEXT (f_res) = record;
11196 DECL_FIELD_CONTEXT (f_ovf) = record;
11197 DECL_FIELD_CONTEXT (f_sav) = record;
11199 TYPE_STUB_DECL (record) = type_decl;
11200 TYPE_NAME (record) = type_decl;
11201 TYPE_FIELDS (record) = f_gpr;
11202 DECL_CHAIN (f_gpr) = f_fpr;
11203 DECL_CHAIN (f_fpr) = f_res;
11204 DECL_CHAIN (f_res) = f_ovf;
11205 DECL_CHAIN (f_ovf) = f_sav;
11207 layout_type (record);
11209 /* The correct type is an array type of one element. */
11210 return build_array_type (record, build_index_type (size_zero_node));
11213 /* Implement va_start. */
11215 static void
11216 rs6000_va_start (tree valist, rtx nextarg)
11218 HOST_WIDE_INT words, n_gpr, n_fpr;
11219 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
11220 tree gpr, fpr, ovf, sav, t;
11222 /* Only SVR4 needs something special. */
11223 if (DEFAULT_ABI != ABI_V4)
11225 std_expand_builtin_va_start (valist, nextarg);
11226 return;
11229 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11230 f_fpr = DECL_CHAIN (f_gpr);
11231 f_res = DECL_CHAIN (f_fpr);
11232 f_ovf = DECL_CHAIN (f_res);
11233 f_sav = DECL_CHAIN (f_ovf);
11235 valist = build_simple_mem_ref (valist);
11236 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11237 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
11238 f_fpr, NULL_TREE);
11239 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
11240 f_ovf, NULL_TREE);
11241 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
11242 f_sav, NULL_TREE);
11244 /* Count number of gp and fp argument registers used. */
11245 words = crtl->args.info.words;
11246 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
11247 GP_ARG_NUM_REG);
11248 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
11249 FP_ARG_NUM_REG);
11251 if (TARGET_DEBUG_ARG)
11252 fprintf (stderr, "va_start: words = "HOST_WIDE_INT_PRINT_DEC", n_gpr = "
11253 HOST_WIDE_INT_PRINT_DEC", n_fpr = "HOST_WIDE_INT_PRINT_DEC"\n",
11254 words, n_gpr, n_fpr);
11256 if (cfun->va_list_gpr_size)
11258 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
11259 build_int_cst (NULL_TREE, n_gpr));
11260 TREE_SIDE_EFFECTS (t) = 1;
11261 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11264 if (cfun->va_list_fpr_size)
11266 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
11267 build_int_cst (NULL_TREE, n_fpr));
11268 TREE_SIDE_EFFECTS (t) = 1;
11269 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11271 #ifdef HAVE_AS_GNU_ATTRIBUTE
11272 if (call_ABI_of_interest (cfun->decl))
11273 rs6000_passes_float = true;
11274 #endif
11277 /* Find the overflow area. */
11278 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
11279 if (words != 0)
11280 t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD);
11281 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
11282 TREE_SIDE_EFFECTS (t) = 1;
11283 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11285 /* If there were no va_arg invocations, don't set up the register
11286 save area. */
11287 if (!cfun->va_list_gpr_size
11288 && !cfun->va_list_fpr_size
11289 && n_gpr < GP_ARG_NUM_REG
11290 && n_fpr < FP_ARG_V4_MAX_REG)
11291 return;
11293 /* Find the register save area. */
11294 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
11295 if (cfun->machine->varargs_save_offset)
11296 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
11297 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
11298 TREE_SIDE_EFFECTS (t) = 1;
11299 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11302 /* Implement va_arg. */
11304 static tree
11305 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
11306 gimple_seq *post_p)
11308 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
11309 tree gpr, fpr, ovf, sav, reg, t, u;
11310 int size, rsize, n_reg, sav_ofs, sav_scale;
11311 tree lab_false, lab_over, addr;
11312 int align;
11313 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
11314 int regalign = 0;
11315 gimple stmt;
11317 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
11319 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
11320 return build_va_arg_indirect_ref (t);
11323 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
11324 earlier version of gcc, with the property that it always applied alignment
11325 adjustments to the va-args (even for zero-sized types). The cheapest way
11326 to deal with this is to replicate the effect of the part of
11327 std_gimplify_va_arg_expr that carries out the align adjust, for the case
11328 of relevance.
11329 We don't need to check for pass-by-reference because of the test above.
11330 We can return a simplifed answer, since we know there's no offset to add. */
11332 if (((TARGET_MACHO
11333 && rs6000_darwin64_abi)
11334 || DEFAULT_ABI == ABI_ELFv2
11335 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
11336 && integer_zerop (TYPE_SIZE (type)))
11338 unsigned HOST_WIDE_INT align, boundary;
11339 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
11340 align = PARM_BOUNDARY / BITS_PER_UNIT;
11341 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
11342 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
11343 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
11344 boundary /= BITS_PER_UNIT;
11345 if (boundary > align)
11347 tree t ;
11348 /* This updates arg ptr by the amount that would be necessary
11349 to align the zero-sized (but not zero-alignment) item. */
11350 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
11351 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
11352 gimplify_and_add (t, pre_p);
11354 t = fold_convert (sizetype, valist_tmp);
11355 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
11356 fold_convert (TREE_TYPE (valist),
11357 fold_build2 (BIT_AND_EXPR, sizetype, t,
11358 size_int (-boundary))));
11359 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
11360 gimplify_and_add (t, pre_p);
11362 /* Since it is zero-sized there's no increment for the item itself. */
11363 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
11364 return build_va_arg_indirect_ref (valist_tmp);
11367 if (DEFAULT_ABI != ABI_V4)
11369 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
11371 tree elem_type = TREE_TYPE (type);
11372 machine_mode elem_mode = TYPE_MODE (elem_type);
11373 int elem_size = GET_MODE_SIZE (elem_mode);
11375 if (elem_size < UNITS_PER_WORD)
11377 tree real_part, imag_part;
11378 gimple_seq post = NULL;
11380 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
11381 &post);
11382 /* Copy the value into a temporary, lest the formal temporary
11383 be reused out from under us. */
11384 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
11385 gimple_seq_add_seq (pre_p, post);
11387 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
11388 post_p);
11390 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
11394 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
11397 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11398 f_fpr = DECL_CHAIN (f_gpr);
11399 f_res = DECL_CHAIN (f_fpr);
11400 f_ovf = DECL_CHAIN (f_res);
11401 f_sav = DECL_CHAIN (f_ovf);
11403 valist = build_va_arg_indirect_ref (valist);
11404 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11405 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
11406 f_fpr, NULL_TREE);
11407 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
11408 f_ovf, NULL_TREE);
11409 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
11410 f_sav, NULL_TREE);
11412 size = int_size_in_bytes (type);
11413 rsize = (size + 3) / 4;
11414 align = 1;
11416 if (TARGET_HARD_FLOAT && TARGET_FPRS
11417 && ((TARGET_SINGLE_FLOAT && TYPE_MODE (type) == SFmode)
11418 || (TARGET_DOUBLE_FLOAT
11419 && (TYPE_MODE (type) == DFmode
11420 || TYPE_MODE (type) == TFmode
11421 || TYPE_MODE (type) == SDmode
11422 || TYPE_MODE (type) == DDmode
11423 || TYPE_MODE (type) == TDmode))))
11425 /* FP args go in FP registers, if present. */
11426 reg = fpr;
11427 n_reg = (size + 7) / 8;
11428 sav_ofs = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4) * 4;
11429 sav_scale = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4);
11430 if (TYPE_MODE (type) != SFmode && TYPE_MODE (type) != SDmode)
11431 align = 8;
11433 else
11435 /* Otherwise into GP registers. */
11436 reg = gpr;
11437 n_reg = rsize;
11438 sav_ofs = 0;
11439 sav_scale = 4;
11440 if (n_reg == 2)
11441 align = 8;
11444 /* Pull the value out of the saved registers.... */
11446 lab_over = NULL;
11447 addr = create_tmp_var (ptr_type_node, "addr");
11449 /* AltiVec vectors never go in registers when -mabi=altivec. */
11450 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
11451 align = 16;
11452 else
11454 lab_false = create_artificial_label (input_location);
11455 lab_over = create_artificial_label (input_location);
11457 /* Long long and SPE vectors are aligned in the registers.
11458 As are any other 2 gpr item such as complex int due to a
11459 historical mistake. */
11460 u = reg;
11461 if (n_reg == 2 && reg == gpr)
11463 regalign = 1;
11464 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
11465 build_int_cst (TREE_TYPE (reg), n_reg - 1));
11466 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
11467 unshare_expr (reg), u);
11469 /* _Decimal128 is passed in even/odd fpr pairs; the stored
11470 reg number is 0 for f1, so we want to make it odd. */
11471 else if (reg == fpr && TYPE_MODE (type) == TDmode)
11473 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
11474 build_int_cst (TREE_TYPE (reg), 1));
11475 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
11478 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
11479 t = build2 (GE_EXPR, boolean_type_node, u, t);
11480 u = build1 (GOTO_EXPR, void_type_node, lab_false);
11481 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
11482 gimplify_and_add (t, pre_p);
11484 t = sav;
11485 if (sav_ofs)
11486 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
11488 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
11489 build_int_cst (TREE_TYPE (reg), n_reg));
11490 u = fold_convert (sizetype, u);
11491 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
11492 t = fold_build_pointer_plus (t, u);
11494 /* _Decimal32 varargs are located in the second word of the 64-bit
11495 FP register for 32-bit binaries. */
11496 if (TARGET_32BIT
11497 && TARGET_HARD_FLOAT && TARGET_FPRS
11498 && TYPE_MODE (type) == SDmode)
11499 t = fold_build_pointer_plus_hwi (t, size);
11501 gimplify_assign (addr, t, pre_p);
11503 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
11505 stmt = gimple_build_label (lab_false);
11506 gimple_seq_add_stmt (pre_p, stmt);
11508 if ((n_reg == 2 && !regalign) || n_reg > 2)
11510 /* Ensure that we don't find any more args in regs.
11511 Alignment has taken care of for special cases. */
11512 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
11516 /* ... otherwise out of the overflow area. */
11518 /* Care for on-stack alignment if needed. */
11519 t = ovf;
11520 if (align != 1)
11522 t = fold_build_pointer_plus_hwi (t, align - 1);
11523 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
11524 build_int_cst (TREE_TYPE (t), -align));
11526 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
11528 gimplify_assign (unshare_expr (addr), t, pre_p);
11530 t = fold_build_pointer_plus_hwi (t, size);
11531 gimplify_assign (unshare_expr (ovf), t, pre_p);
11533 if (lab_over)
11535 stmt = gimple_build_label (lab_over);
11536 gimple_seq_add_stmt (pre_p, stmt);
11539 if (STRICT_ALIGNMENT
11540 && (TYPE_ALIGN (type)
11541 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
11543 /* The value (of type complex double, for example) may not be
11544 aligned in memory in the saved registers, so copy via a
11545 temporary. (This is the same code as used for SPARC.) */
11546 tree tmp = create_tmp_var (type, "va_arg_tmp");
11547 tree dest_addr = build_fold_addr_expr (tmp);
11549 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
11550 3, dest_addr, addr, size_int (rsize * 4));
11552 gimplify_and_add (copy, pre_p);
11553 addr = dest_addr;
11556 addr = fold_convert (ptrtype, addr);
11557 return build_va_arg_indirect_ref (addr);
11560 /* Builtins. */
11562 static void
11563 def_builtin (const char *name, tree type, enum rs6000_builtins code)
11565 tree t;
11566 unsigned classify = rs6000_builtin_info[(int)code].attr;
11567 const char *attr_string = "";
11569 gcc_assert (name != NULL);
11570 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
11572 if (rs6000_builtin_decls[(int)code])
11573 fatal_error (input_location,
11574 "internal error: builtin function %s already processed", name);
11576 rs6000_builtin_decls[(int)code] = t =
11577 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
11579 /* Set any special attributes. */
11580 if ((classify & RS6000_BTC_CONST) != 0)
11582 /* const function, function only depends on the inputs. */
11583 TREE_READONLY (t) = 1;
11584 TREE_NOTHROW (t) = 1;
11585 attr_string = ", pure";
11587 else if ((classify & RS6000_BTC_PURE) != 0)
11589 /* pure function, function can read global memory, but does not set any
11590 external state. */
11591 DECL_PURE_P (t) = 1;
11592 TREE_NOTHROW (t) = 1;
11593 attr_string = ", const";
11595 else if ((classify & RS6000_BTC_FP) != 0)
11597 /* Function is a math function. If rounding mode is on, then treat the
11598 function as not reading global memory, but it can have arbitrary side
11599 effects. If it is off, then assume the function is a const function.
11600 This mimics the ATTR_MATHFN_FPROUNDING attribute in
11601 builtin-attribute.def that is used for the math functions. */
11602 TREE_NOTHROW (t) = 1;
11603 if (flag_rounding_math)
11605 DECL_PURE_P (t) = 1;
11606 DECL_IS_NOVOPS (t) = 1;
11607 attr_string = ", fp, pure";
11609 else
11611 TREE_READONLY (t) = 1;
11612 attr_string = ", fp, const";
11615 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
11616 gcc_unreachable ();
11618 if (TARGET_DEBUG_BUILTIN)
11619 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
11620 (int)code, name, attr_string);
11623 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
11625 #undef RS6000_BUILTIN_1
11626 #undef RS6000_BUILTIN_2
11627 #undef RS6000_BUILTIN_3
11628 #undef RS6000_BUILTIN_A
11629 #undef RS6000_BUILTIN_D
11630 #undef RS6000_BUILTIN_E
11631 #undef RS6000_BUILTIN_H
11632 #undef RS6000_BUILTIN_P
11633 #undef RS6000_BUILTIN_Q
11634 #undef RS6000_BUILTIN_S
11635 #undef RS6000_BUILTIN_X
11637 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11638 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11639 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
11640 { MASK, ICODE, NAME, ENUM },
11642 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11643 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11644 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11645 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11646 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11647 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11648 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11649 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11651 static const struct builtin_description bdesc_3arg[] =
11653 #include "rs6000-builtin.def"
11656 /* DST operations: void foo (void *, const int, const char). */
11658 #undef RS6000_BUILTIN_1
11659 #undef RS6000_BUILTIN_2
11660 #undef RS6000_BUILTIN_3
11661 #undef RS6000_BUILTIN_A
11662 #undef RS6000_BUILTIN_D
11663 #undef RS6000_BUILTIN_E
11664 #undef RS6000_BUILTIN_H
11665 #undef RS6000_BUILTIN_P
11666 #undef RS6000_BUILTIN_Q
11667 #undef RS6000_BUILTIN_S
11668 #undef RS6000_BUILTIN_X
11670 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11671 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11672 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11673 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11674 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
11675 { MASK, ICODE, NAME, ENUM },
11677 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11678 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11679 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11680 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11681 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11682 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11684 static const struct builtin_description bdesc_dst[] =
11686 #include "rs6000-builtin.def"
11689 /* Simple binary operations: VECc = foo (VECa, VECb). */
11691 #undef RS6000_BUILTIN_1
11692 #undef RS6000_BUILTIN_2
11693 #undef RS6000_BUILTIN_3
11694 #undef RS6000_BUILTIN_A
11695 #undef RS6000_BUILTIN_D
11696 #undef RS6000_BUILTIN_E
11697 #undef RS6000_BUILTIN_H
11698 #undef RS6000_BUILTIN_P
11699 #undef RS6000_BUILTIN_Q
11700 #undef RS6000_BUILTIN_S
11701 #undef RS6000_BUILTIN_X
11703 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11704 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
11705 { MASK, ICODE, NAME, ENUM },
11707 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11708 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11709 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11710 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11711 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11712 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11713 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11714 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11715 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11717 static const struct builtin_description bdesc_2arg[] =
11719 #include "rs6000-builtin.def"
11722 #undef RS6000_BUILTIN_1
11723 #undef RS6000_BUILTIN_2
11724 #undef RS6000_BUILTIN_3
11725 #undef RS6000_BUILTIN_A
11726 #undef RS6000_BUILTIN_D
11727 #undef RS6000_BUILTIN_E
11728 #undef RS6000_BUILTIN_H
11729 #undef RS6000_BUILTIN_P
11730 #undef RS6000_BUILTIN_Q
11731 #undef RS6000_BUILTIN_S
11732 #undef RS6000_BUILTIN_X
11734 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11735 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11736 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11737 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11738 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11739 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11740 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11741 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
11742 { MASK, ICODE, NAME, ENUM },
11744 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11745 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11746 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11748 /* AltiVec predicates. */
11750 static const struct builtin_description bdesc_altivec_preds[] =
11752 #include "rs6000-builtin.def"
11755 /* SPE predicates. */
11756 #undef RS6000_BUILTIN_1
11757 #undef RS6000_BUILTIN_2
11758 #undef RS6000_BUILTIN_3
11759 #undef RS6000_BUILTIN_A
11760 #undef RS6000_BUILTIN_D
11761 #undef RS6000_BUILTIN_E
11762 #undef RS6000_BUILTIN_H
11763 #undef RS6000_BUILTIN_P
11764 #undef RS6000_BUILTIN_Q
11765 #undef RS6000_BUILTIN_S
11766 #undef RS6000_BUILTIN_X
11768 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11769 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11770 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11771 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11772 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11773 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11774 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11775 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11776 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11777 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
11778 { MASK, ICODE, NAME, ENUM },
11780 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11782 static const struct builtin_description bdesc_spe_predicates[] =
11784 #include "rs6000-builtin.def"
11787 /* SPE evsel predicates. */
11788 #undef RS6000_BUILTIN_1
11789 #undef RS6000_BUILTIN_2
11790 #undef RS6000_BUILTIN_3
11791 #undef RS6000_BUILTIN_A
11792 #undef RS6000_BUILTIN_D
11793 #undef RS6000_BUILTIN_E
11794 #undef RS6000_BUILTIN_H
11795 #undef RS6000_BUILTIN_P
11796 #undef RS6000_BUILTIN_Q
11797 #undef RS6000_BUILTIN_S
11798 #undef RS6000_BUILTIN_X
11800 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11801 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11802 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11803 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11804 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11805 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
11806 { MASK, ICODE, NAME, ENUM },
11808 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11809 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11810 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11811 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11812 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11814 static const struct builtin_description bdesc_spe_evsel[] =
11816 #include "rs6000-builtin.def"
11819 /* PAIRED predicates. */
11820 #undef RS6000_BUILTIN_1
11821 #undef RS6000_BUILTIN_2
11822 #undef RS6000_BUILTIN_3
11823 #undef RS6000_BUILTIN_A
11824 #undef RS6000_BUILTIN_D
11825 #undef RS6000_BUILTIN_E
11826 #undef RS6000_BUILTIN_H
11827 #undef RS6000_BUILTIN_P
11828 #undef RS6000_BUILTIN_Q
11829 #undef RS6000_BUILTIN_S
11830 #undef RS6000_BUILTIN_X
11832 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11833 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11834 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11835 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11836 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11837 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11838 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11839 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11840 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
11841 { MASK, ICODE, NAME, ENUM },
11843 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11844 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11846 static const struct builtin_description bdesc_paired_preds[] =
11848 #include "rs6000-builtin.def"
11851 /* ABS* operations. */
11853 #undef RS6000_BUILTIN_1
11854 #undef RS6000_BUILTIN_2
11855 #undef RS6000_BUILTIN_3
11856 #undef RS6000_BUILTIN_A
11857 #undef RS6000_BUILTIN_D
11858 #undef RS6000_BUILTIN_E
11859 #undef RS6000_BUILTIN_H
11860 #undef RS6000_BUILTIN_P
11861 #undef RS6000_BUILTIN_Q
11862 #undef RS6000_BUILTIN_S
11863 #undef RS6000_BUILTIN_X
11865 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11866 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11867 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11868 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
11869 { MASK, ICODE, NAME, ENUM },
11871 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11872 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11873 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11874 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11875 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11876 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11877 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11879 static const struct builtin_description bdesc_abs[] =
11881 #include "rs6000-builtin.def"
11884 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
11885 foo (VECa). */
11887 #undef RS6000_BUILTIN_1
11888 #undef RS6000_BUILTIN_2
11889 #undef RS6000_BUILTIN_3
11890 #undef RS6000_BUILTIN_A
11891 #undef RS6000_BUILTIN_D
11892 #undef RS6000_BUILTIN_E
11893 #undef RS6000_BUILTIN_H
11894 #undef RS6000_BUILTIN_P
11895 #undef RS6000_BUILTIN_Q
11896 #undef RS6000_BUILTIN_S
11897 #undef RS6000_BUILTIN_X
11899 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
11900 { MASK, ICODE, NAME, ENUM },
11902 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11903 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11904 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11905 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11906 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11907 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11908 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11909 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11910 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11911 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11913 static const struct builtin_description bdesc_1arg[] =
11915 #include "rs6000-builtin.def"
11918 /* HTM builtins. */
11919 #undef RS6000_BUILTIN_1
11920 #undef RS6000_BUILTIN_2
11921 #undef RS6000_BUILTIN_3
11922 #undef RS6000_BUILTIN_A
11923 #undef RS6000_BUILTIN_D
11924 #undef RS6000_BUILTIN_E
11925 #undef RS6000_BUILTIN_H
11926 #undef RS6000_BUILTIN_P
11927 #undef RS6000_BUILTIN_Q
11928 #undef RS6000_BUILTIN_S
11929 #undef RS6000_BUILTIN_X
11931 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11932 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11933 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11934 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11935 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11936 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11937 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
11938 { MASK, ICODE, NAME, ENUM },
11940 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11941 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11942 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11943 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11945 static const struct builtin_description bdesc_htm[] =
11947 #include "rs6000-builtin.def"
11950 #undef RS6000_BUILTIN_1
11951 #undef RS6000_BUILTIN_2
11952 #undef RS6000_BUILTIN_3
11953 #undef RS6000_BUILTIN_A
11954 #undef RS6000_BUILTIN_D
11955 #undef RS6000_BUILTIN_E
11956 #undef RS6000_BUILTIN_H
11957 #undef RS6000_BUILTIN_P
11958 #undef RS6000_BUILTIN_Q
11959 #undef RS6000_BUILTIN_S
11961 /* Return true if a builtin function is overloaded. */
11962 bool
11963 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
11965 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
11968 /* Expand an expression EXP that calls a builtin without arguments. */
11969 static rtx
11970 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
11972 rtx pat;
11973 machine_mode tmode = insn_data[icode].operand[0].mode;
11975 if (icode == CODE_FOR_nothing)
11976 /* Builtin not supported on this processor. */
11977 return 0;
11979 if (target == 0
11980 || GET_MODE (target) != tmode
11981 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11982 target = gen_reg_rtx (tmode);
11984 pat = GEN_FCN (icode) (target);
11985 if (! pat)
11986 return 0;
11987 emit_insn (pat);
11989 return target;
11993 static rtx
11994 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
11996 rtx pat;
11997 tree arg0 = CALL_EXPR_ARG (exp, 0);
11998 tree arg1 = CALL_EXPR_ARG (exp, 1);
11999 rtx op0 = expand_normal (arg0);
12000 rtx op1 = expand_normal (arg1);
12001 machine_mode mode0 = insn_data[icode].operand[0].mode;
12002 machine_mode mode1 = insn_data[icode].operand[1].mode;
12004 if (icode == CODE_FOR_nothing)
12005 /* Builtin not supported on this processor. */
12006 return 0;
12008 /* If we got invalid arguments bail out before generating bad rtl. */
12009 if (arg0 == error_mark_node || arg1 == error_mark_node)
12010 return const0_rtx;
12012 if (GET_CODE (op0) != CONST_INT
12013 || INTVAL (op0) > 255
12014 || INTVAL (op0) < 0)
12016 error ("argument 1 must be an 8-bit field value");
12017 return const0_rtx;
12020 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12021 op0 = copy_to_mode_reg (mode0, op0);
12023 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12024 op1 = copy_to_mode_reg (mode1, op1);
12026 pat = GEN_FCN (icode) (op0, op1);
12027 if (! pat)
12028 return const0_rtx;
12029 emit_insn (pat);
12031 return NULL_RTX;
12035 static rtx
12036 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
12038 rtx pat;
12039 tree arg0 = CALL_EXPR_ARG (exp, 0);
12040 rtx op0 = expand_normal (arg0);
12041 machine_mode tmode = insn_data[icode].operand[0].mode;
12042 machine_mode mode0 = insn_data[icode].operand[1].mode;
12044 if (icode == CODE_FOR_nothing)
12045 /* Builtin not supported on this processor. */
12046 return 0;
12048 /* If we got invalid arguments bail out before generating bad rtl. */
12049 if (arg0 == error_mark_node)
12050 return const0_rtx;
12052 if (icode == CODE_FOR_altivec_vspltisb
12053 || icode == CODE_FOR_altivec_vspltish
12054 || icode == CODE_FOR_altivec_vspltisw
12055 || icode == CODE_FOR_spe_evsplatfi
12056 || icode == CODE_FOR_spe_evsplati)
12058 /* Only allow 5-bit *signed* literals. */
12059 if (GET_CODE (op0) != CONST_INT
12060 || INTVAL (op0) > 15
12061 || INTVAL (op0) < -16)
12063 error ("argument 1 must be a 5-bit signed literal");
12064 return const0_rtx;
12068 if (target == 0
12069 || GET_MODE (target) != tmode
12070 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12071 target = gen_reg_rtx (tmode);
12073 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12074 op0 = copy_to_mode_reg (mode0, op0);
12076 pat = GEN_FCN (icode) (target, op0);
12077 if (! pat)
12078 return 0;
12079 emit_insn (pat);
12081 return target;
12084 static rtx
12085 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
12087 rtx pat, scratch1, scratch2;
12088 tree arg0 = CALL_EXPR_ARG (exp, 0);
12089 rtx op0 = expand_normal (arg0);
12090 machine_mode tmode = insn_data[icode].operand[0].mode;
12091 machine_mode mode0 = insn_data[icode].operand[1].mode;
12093 /* If we have invalid arguments, bail out before generating bad rtl. */
12094 if (arg0 == error_mark_node)
12095 return const0_rtx;
12097 if (target == 0
12098 || GET_MODE (target) != tmode
12099 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12100 target = gen_reg_rtx (tmode);
12102 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12103 op0 = copy_to_mode_reg (mode0, op0);
12105 scratch1 = gen_reg_rtx (mode0);
12106 scratch2 = gen_reg_rtx (mode0);
12108 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
12109 if (! pat)
12110 return 0;
12111 emit_insn (pat);
12113 return target;
12116 static rtx
12117 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
12119 rtx pat;
12120 tree arg0 = CALL_EXPR_ARG (exp, 0);
12121 tree arg1 = CALL_EXPR_ARG (exp, 1);
12122 rtx op0 = expand_normal (arg0);
12123 rtx op1 = expand_normal (arg1);
12124 machine_mode tmode = insn_data[icode].operand[0].mode;
12125 machine_mode mode0 = insn_data[icode].operand[1].mode;
12126 machine_mode mode1 = insn_data[icode].operand[2].mode;
12128 if (icode == CODE_FOR_nothing)
12129 /* Builtin not supported on this processor. */
12130 return 0;
12132 /* If we got invalid arguments bail out before generating bad rtl. */
12133 if (arg0 == error_mark_node || arg1 == error_mark_node)
12134 return const0_rtx;
12136 if (icode == CODE_FOR_altivec_vcfux
12137 || icode == CODE_FOR_altivec_vcfsx
12138 || icode == CODE_FOR_altivec_vctsxs
12139 || icode == CODE_FOR_altivec_vctuxs
12140 || icode == CODE_FOR_altivec_vspltb
12141 || icode == CODE_FOR_altivec_vsplth
12142 || icode == CODE_FOR_altivec_vspltw
12143 || icode == CODE_FOR_spe_evaddiw
12144 || icode == CODE_FOR_spe_evldd
12145 || icode == CODE_FOR_spe_evldh
12146 || icode == CODE_FOR_spe_evldw
12147 || icode == CODE_FOR_spe_evlhhesplat
12148 || icode == CODE_FOR_spe_evlhhossplat
12149 || icode == CODE_FOR_spe_evlhhousplat
12150 || icode == CODE_FOR_spe_evlwhe
12151 || icode == CODE_FOR_spe_evlwhos
12152 || icode == CODE_FOR_spe_evlwhou
12153 || icode == CODE_FOR_spe_evlwhsplat
12154 || icode == CODE_FOR_spe_evlwwsplat
12155 || icode == CODE_FOR_spe_evrlwi
12156 || icode == CODE_FOR_spe_evslwi
12157 || icode == CODE_FOR_spe_evsrwis
12158 || icode == CODE_FOR_spe_evsubifw
12159 || icode == CODE_FOR_spe_evsrwiu)
12161 /* Only allow 5-bit unsigned literals. */
12162 STRIP_NOPS (arg1);
12163 if (TREE_CODE (arg1) != INTEGER_CST
12164 || TREE_INT_CST_LOW (arg1) & ~0x1f)
12166 error ("argument 2 must be a 5-bit unsigned literal");
12167 return const0_rtx;
12171 if (target == 0
12172 || GET_MODE (target) != tmode
12173 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12174 target = gen_reg_rtx (tmode);
12176 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12177 op0 = copy_to_mode_reg (mode0, op0);
12178 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12179 op1 = copy_to_mode_reg (mode1, op1);
12181 pat = GEN_FCN (icode) (target, op0, op1);
12182 if (! pat)
12183 return 0;
12184 emit_insn (pat);
12186 return target;
12189 static rtx
12190 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
12192 rtx pat, scratch;
12193 tree cr6_form = CALL_EXPR_ARG (exp, 0);
12194 tree arg0 = CALL_EXPR_ARG (exp, 1);
12195 tree arg1 = CALL_EXPR_ARG (exp, 2);
12196 rtx op0 = expand_normal (arg0);
12197 rtx op1 = expand_normal (arg1);
12198 machine_mode tmode = SImode;
12199 machine_mode mode0 = insn_data[icode].operand[1].mode;
12200 machine_mode mode1 = insn_data[icode].operand[2].mode;
12201 int cr6_form_int;
12203 if (TREE_CODE (cr6_form) != INTEGER_CST)
12205 error ("argument 1 of __builtin_altivec_predicate must be a constant");
12206 return const0_rtx;
12208 else
12209 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
12211 gcc_assert (mode0 == mode1);
12213 /* If we have invalid arguments, bail out before generating bad rtl. */
12214 if (arg0 == error_mark_node || arg1 == error_mark_node)
12215 return const0_rtx;
12217 if (target == 0
12218 || GET_MODE (target) != tmode
12219 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12220 target = gen_reg_rtx (tmode);
12222 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12223 op0 = copy_to_mode_reg (mode0, op0);
12224 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12225 op1 = copy_to_mode_reg (mode1, op1);
12227 scratch = gen_reg_rtx (mode0);
12229 pat = GEN_FCN (icode) (scratch, op0, op1);
12230 if (! pat)
12231 return 0;
12232 emit_insn (pat);
12234 /* The vec_any* and vec_all* predicates use the same opcodes for two
12235 different operations, but the bits in CR6 will be different
12236 depending on what information we want. So we have to play tricks
12237 with CR6 to get the right bits out.
12239 If you think this is disgusting, look at the specs for the
12240 AltiVec predicates. */
12242 switch (cr6_form_int)
12244 case 0:
12245 emit_insn (gen_cr6_test_for_zero (target));
12246 break;
12247 case 1:
12248 emit_insn (gen_cr6_test_for_zero_reverse (target));
12249 break;
12250 case 2:
12251 emit_insn (gen_cr6_test_for_lt (target));
12252 break;
12253 case 3:
12254 emit_insn (gen_cr6_test_for_lt_reverse (target));
12255 break;
12256 default:
12257 error ("argument 1 of __builtin_altivec_predicate is out of range");
12258 break;
12261 return target;
12264 static rtx
12265 paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
12267 rtx pat, addr;
12268 tree arg0 = CALL_EXPR_ARG (exp, 0);
12269 tree arg1 = CALL_EXPR_ARG (exp, 1);
12270 machine_mode tmode = insn_data[icode].operand[0].mode;
12271 machine_mode mode0 = Pmode;
12272 machine_mode mode1 = Pmode;
12273 rtx op0 = expand_normal (arg0);
12274 rtx op1 = expand_normal (arg1);
12276 if (icode == CODE_FOR_nothing)
12277 /* Builtin not supported on this processor. */
12278 return 0;
12280 /* If we got invalid arguments bail out before generating bad rtl. */
12281 if (arg0 == error_mark_node || arg1 == error_mark_node)
12282 return const0_rtx;
12284 if (target == 0
12285 || GET_MODE (target) != tmode
12286 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12287 target = gen_reg_rtx (tmode);
12289 op1 = copy_to_mode_reg (mode1, op1);
12291 if (op0 == const0_rtx)
12293 addr = gen_rtx_MEM (tmode, op1);
12295 else
12297 op0 = copy_to_mode_reg (mode0, op0);
12298 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op0, op1));
12301 pat = GEN_FCN (icode) (target, addr);
12303 if (! pat)
12304 return 0;
12305 emit_insn (pat);
12307 return target;
12310 /* Return a constant vector for use as a little-endian permute control vector
12311 to reverse the order of elements of the given vector mode. */
12312 static rtx
12313 swap_selector_for_mode (machine_mode mode)
12315 /* These are little endian vectors, so their elements are reversed
12316 from what you would normally expect for a permute control vector. */
12317 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
12318 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
12319 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
12320 unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
12321 unsigned int *swaparray, i;
12322 rtx perm[16];
12324 switch (mode)
12326 case V2DFmode:
12327 case V2DImode:
12328 swaparray = swap2;
12329 break;
12330 case V4SFmode:
12331 case V4SImode:
12332 swaparray = swap4;
12333 break;
12334 case V8HImode:
12335 swaparray = swap8;
12336 break;
12337 case V16QImode:
12338 swaparray = swap16;
12339 break;
12340 default:
12341 gcc_unreachable ();
12344 for (i = 0; i < 16; ++i)
12345 perm[i] = GEN_INT (swaparray[i]);
12347 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)));
12350 /* Generate code for an "lvx", "lvxl", or "lve*x" built-in for a little endian target
12351 with -maltivec=be specified. Issue the load followed by an element-reversing
12352 permute. */
12353 void
12354 altivec_expand_lvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
12356 rtx tmp = gen_reg_rtx (mode);
12357 rtx load = gen_rtx_SET (VOIDmode, tmp, op1);
12358 rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
12359 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx));
12360 rtx sel = swap_selector_for_mode (mode);
12361 rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM);
12363 gcc_assert (REG_P (op0));
12364 emit_insn (par);
12365 emit_insn (gen_rtx_SET (VOIDmode, op0, vperm));
12368 /* Generate code for a "stvx" or "stvxl" built-in for a little endian target
12369 with -maltivec=be specified. Issue the store preceded by an element-reversing
12370 permute. */
12371 void
12372 altivec_expand_stvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
12374 rtx tmp = gen_reg_rtx (mode);
12375 rtx store = gen_rtx_SET (VOIDmode, op0, tmp);
12376 rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
12377 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx));
12378 rtx sel = swap_selector_for_mode (mode);
12379 rtx vperm;
12381 gcc_assert (REG_P (op1));
12382 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
12383 emit_insn (gen_rtx_SET (VOIDmode, tmp, vperm));
12384 emit_insn (par);
12387 /* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
12388 specified. Issue the store preceded by an element-reversing permute. */
12389 void
12390 altivec_expand_stvex_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
12392 machine_mode inner_mode = GET_MODE_INNER (mode);
12393 rtx tmp = gen_reg_rtx (mode);
12394 rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec);
12395 rtx sel = swap_selector_for_mode (mode);
12396 rtx vperm;
12398 gcc_assert (REG_P (op1));
12399 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
12400 emit_insn (gen_rtx_SET (VOIDmode, tmp, vperm));
12401 emit_insn (gen_rtx_SET (VOIDmode, op0, stvx));
12404 static rtx
12405 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
12407 rtx pat, addr;
12408 tree arg0 = CALL_EXPR_ARG (exp, 0);
12409 tree arg1 = CALL_EXPR_ARG (exp, 1);
12410 machine_mode tmode = insn_data[icode].operand[0].mode;
12411 machine_mode mode0 = Pmode;
12412 machine_mode mode1 = Pmode;
12413 rtx op0 = expand_normal (arg0);
12414 rtx op1 = expand_normal (arg1);
12416 if (icode == CODE_FOR_nothing)
12417 /* Builtin not supported on this processor. */
12418 return 0;
12420 /* If we got invalid arguments bail out before generating bad rtl. */
12421 if (arg0 == error_mark_node || arg1 == error_mark_node)
12422 return const0_rtx;
12424 if (target == 0
12425 || GET_MODE (target) != tmode
12426 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12427 target = gen_reg_rtx (tmode);
12429 op1 = copy_to_mode_reg (mode1, op1);
12431 if (op0 == const0_rtx)
12433 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
12435 else
12437 op0 = copy_to_mode_reg (mode0, op0);
12438 addr = gen_rtx_MEM (blk ? BLKmode : tmode, gen_rtx_PLUS (Pmode, op0, op1));
12441 pat = GEN_FCN (icode) (target, addr);
12443 if (! pat)
12444 return 0;
12445 emit_insn (pat);
12447 return target;
12450 static rtx
12451 spe_expand_stv_builtin (enum insn_code icode, tree exp)
12453 tree arg0 = CALL_EXPR_ARG (exp, 0);
12454 tree arg1 = CALL_EXPR_ARG (exp, 1);
12455 tree arg2 = CALL_EXPR_ARG (exp, 2);
12456 rtx op0 = expand_normal (arg0);
12457 rtx op1 = expand_normal (arg1);
12458 rtx op2 = expand_normal (arg2);
12459 rtx pat;
12460 machine_mode mode0 = insn_data[icode].operand[0].mode;
12461 machine_mode mode1 = insn_data[icode].operand[1].mode;
12462 machine_mode mode2 = insn_data[icode].operand[2].mode;
12464 /* Invalid arguments. Bail before doing anything stoopid! */
12465 if (arg0 == error_mark_node
12466 || arg1 == error_mark_node
12467 || arg2 == error_mark_node)
12468 return const0_rtx;
12470 if (! (*insn_data[icode].operand[2].predicate) (op0, mode2))
12471 op0 = copy_to_mode_reg (mode2, op0);
12472 if (! (*insn_data[icode].operand[0].predicate) (op1, mode0))
12473 op1 = copy_to_mode_reg (mode0, op1);
12474 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
12475 op2 = copy_to_mode_reg (mode1, op2);
12477 pat = GEN_FCN (icode) (op1, op2, op0);
12478 if (pat)
12479 emit_insn (pat);
12480 return NULL_RTX;
12483 static rtx
12484 paired_expand_stv_builtin (enum insn_code icode, tree exp)
12486 tree arg0 = CALL_EXPR_ARG (exp, 0);
12487 tree arg1 = CALL_EXPR_ARG (exp, 1);
12488 tree arg2 = CALL_EXPR_ARG (exp, 2);
12489 rtx op0 = expand_normal (arg0);
12490 rtx op1 = expand_normal (arg1);
12491 rtx op2 = expand_normal (arg2);
12492 rtx pat, addr;
12493 machine_mode tmode = insn_data[icode].operand[0].mode;
12494 machine_mode mode1 = Pmode;
12495 machine_mode mode2 = Pmode;
12497 /* Invalid arguments. Bail before doing anything stoopid! */
12498 if (arg0 == error_mark_node
12499 || arg1 == error_mark_node
12500 || arg2 == error_mark_node)
12501 return const0_rtx;
12503 if (! (*insn_data[icode].operand[1].predicate) (op0, tmode))
12504 op0 = copy_to_mode_reg (tmode, op0);
12506 op2 = copy_to_mode_reg (mode2, op2);
12508 if (op1 == const0_rtx)
12510 addr = gen_rtx_MEM (tmode, op2);
12512 else
12514 op1 = copy_to_mode_reg (mode1, op1);
12515 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
12518 pat = GEN_FCN (icode) (addr, op0);
12519 if (pat)
12520 emit_insn (pat);
12521 return NULL_RTX;
12524 static rtx
12525 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
12527 tree arg0 = CALL_EXPR_ARG (exp, 0);
12528 tree arg1 = CALL_EXPR_ARG (exp, 1);
12529 tree arg2 = CALL_EXPR_ARG (exp, 2);
12530 rtx op0 = expand_normal (arg0);
12531 rtx op1 = expand_normal (arg1);
12532 rtx op2 = expand_normal (arg2);
12533 rtx pat, addr;
12534 machine_mode tmode = insn_data[icode].operand[0].mode;
12535 machine_mode smode = insn_data[icode].operand[1].mode;
12536 machine_mode mode1 = Pmode;
12537 machine_mode mode2 = Pmode;
12539 /* Invalid arguments. Bail before doing anything stoopid! */
12540 if (arg0 == error_mark_node
12541 || arg1 == error_mark_node
12542 || arg2 == error_mark_node)
12543 return const0_rtx;
12545 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
12546 op0 = copy_to_mode_reg (smode, op0);
12548 op2 = copy_to_mode_reg (mode2, op2);
12550 if (op1 == const0_rtx)
12552 addr = gen_rtx_MEM (tmode, op2);
12554 else
12556 op1 = copy_to_mode_reg (mode1, op1);
12557 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
12560 pat = GEN_FCN (icode) (addr, op0);
12561 if (pat)
12562 emit_insn (pat);
12563 return NULL_RTX;
12566 /* Return the appropriate SPR number associated with the given builtin. */
12567 static inline HOST_WIDE_INT
12568 htm_spr_num (enum rs6000_builtins code)
12570 if (code == HTM_BUILTIN_GET_TFHAR
12571 || code == HTM_BUILTIN_SET_TFHAR)
12572 return TFHAR_SPR;
12573 else if (code == HTM_BUILTIN_GET_TFIAR
12574 || code == HTM_BUILTIN_SET_TFIAR)
12575 return TFIAR_SPR;
12576 else if (code == HTM_BUILTIN_GET_TEXASR
12577 || code == HTM_BUILTIN_SET_TEXASR)
12578 return TEXASR_SPR;
12579 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
12580 || code == HTM_BUILTIN_SET_TEXASRU);
12581 return TEXASRU_SPR;
12584 /* Return the appropriate SPR regno associated with the given builtin. */
12585 static inline HOST_WIDE_INT
12586 htm_spr_regno (enum rs6000_builtins code)
12588 if (code == HTM_BUILTIN_GET_TFHAR
12589 || code == HTM_BUILTIN_SET_TFHAR)
12590 return TFHAR_REGNO;
12591 else if (code == HTM_BUILTIN_GET_TFIAR
12592 || code == HTM_BUILTIN_SET_TFIAR)
12593 return TFIAR_REGNO;
12594 gcc_assert (code == HTM_BUILTIN_GET_TEXASR
12595 || code == HTM_BUILTIN_SET_TEXASR
12596 || code == HTM_BUILTIN_GET_TEXASRU
12597 || code == HTM_BUILTIN_SET_TEXASRU);
12598 return TEXASR_REGNO;
12601 /* Return the correct ICODE value depending on whether we are
12602 setting or reading the HTM SPRs. */
12603 static inline enum insn_code
12604 rs6000_htm_spr_icode (bool nonvoid)
12606 if (nonvoid)
12607 return (TARGET_64BIT) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
12608 else
12609 return (TARGET_64BIT) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
12612 /* Expand the HTM builtin in EXP and store the result in TARGET.
12613 Store true in *EXPANDEDP if we found a builtin to expand. */
12614 static rtx
12615 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
12617 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12618 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
12619 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
12620 const struct builtin_description *d;
12621 size_t i;
12623 *expandedp = false;
12625 /* Expand the HTM builtins. */
12626 d = bdesc_htm;
12627 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
12628 if (d->code == fcode)
12630 rtx op[MAX_HTM_OPERANDS], pat;
12631 int nopnds = 0;
12632 tree arg;
12633 call_expr_arg_iterator iter;
12634 unsigned attr = rs6000_builtin_info[fcode].attr;
12635 enum insn_code icode = d->icode;
12637 if (attr & RS6000_BTC_SPR)
12638 icode = rs6000_htm_spr_icode (nonvoid);
12640 if (nonvoid)
12642 machine_mode tmode = insn_data[icode].operand[0].mode;
12643 if (!target
12644 || GET_MODE (target) != tmode
12645 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
12646 target = gen_reg_rtx (tmode);
12647 op[nopnds++] = target;
12650 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
12652 const struct insn_operand_data *insn_op;
12654 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
12655 return NULL_RTX;
12657 insn_op = &insn_data[icode].operand[nopnds];
12659 op[nopnds] = expand_normal (arg);
12661 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
12663 if (!strcmp (insn_op->constraint, "n"))
12665 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
12666 if (!CONST_INT_P (op[nopnds]))
12667 error ("argument %d must be an unsigned literal", arg_num);
12668 else
12669 error ("argument %d is an unsigned literal that is "
12670 "out of range", arg_num);
12671 return const0_rtx;
12673 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
12676 nopnds++;
12679 /* Handle the builtins for extended mnemonics. These accept
12680 no arguments, but map to builtins that take arguments. */
12681 switch (fcode)
12683 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
12684 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
12685 op[nopnds++] = GEN_INT (1);
12686 #ifdef ENABLE_CHECKING
12687 attr |= RS6000_BTC_UNARY;
12688 #endif
12689 break;
12690 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
12691 op[nopnds++] = GEN_INT (0);
12692 #ifdef ENABLE_CHECKING
12693 attr |= RS6000_BTC_UNARY;
12694 #endif
12695 break;
12696 default:
12697 break;
12700 /* If this builtin accesses SPRs, then pass in the appropriate
12701 SPR number and SPR regno as the last two operands. */
12702 if (attr & RS6000_BTC_SPR)
12704 op[nopnds++] = gen_rtx_CONST_INT (Pmode, htm_spr_num (fcode));
12705 op[nopnds++] = gen_rtx_REG (Pmode, htm_spr_regno (fcode));
12708 #ifdef ENABLE_CHECKING
12709 int expected_nopnds = 0;
12710 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
12711 expected_nopnds = 1;
12712 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
12713 expected_nopnds = 2;
12714 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
12715 expected_nopnds = 3;
12716 if (!(attr & RS6000_BTC_VOID))
12717 expected_nopnds += 1;
12718 if (attr & RS6000_BTC_SPR)
12719 expected_nopnds += 2;
12721 gcc_assert (nopnds == expected_nopnds && nopnds <= MAX_HTM_OPERANDS);
12722 #endif
12724 switch (nopnds)
12726 case 1:
12727 pat = GEN_FCN (icode) (op[0]);
12728 break;
12729 case 2:
12730 pat = GEN_FCN (icode) (op[0], op[1]);
12731 break;
12732 case 3:
12733 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
12734 break;
12735 case 4:
12736 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
12737 break;
12738 default:
12739 gcc_unreachable ();
12741 if (!pat)
12742 return NULL_RTX;
12743 emit_insn (pat);
12745 *expandedp = true;
12746 if (nonvoid)
12747 return target;
12748 return const0_rtx;
12751 return NULL_RTX;
12754 static rtx
12755 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
12757 rtx pat;
12758 tree arg0 = CALL_EXPR_ARG (exp, 0);
12759 tree arg1 = CALL_EXPR_ARG (exp, 1);
12760 tree arg2 = CALL_EXPR_ARG (exp, 2);
12761 rtx op0 = expand_normal (arg0);
12762 rtx op1 = expand_normal (arg1);
12763 rtx op2 = expand_normal (arg2);
12764 machine_mode tmode = insn_data[icode].operand[0].mode;
12765 machine_mode mode0 = insn_data[icode].operand[1].mode;
12766 machine_mode mode1 = insn_data[icode].operand[2].mode;
12767 machine_mode mode2 = insn_data[icode].operand[3].mode;
12769 if (icode == CODE_FOR_nothing)
12770 /* Builtin not supported on this processor. */
12771 return 0;
12773 /* If we got invalid arguments bail out before generating bad rtl. */
12774 if (arg0 == error_mark_node
12775 || arg1 == error_mark_node
12776 || arg2 == error_mark_node)
12777 return const0_rtx;
12779 /* Check and prepare argument depending on the instruction code.
12781 Note that a switch statement instead of the sequence of tests
12782 would be incorrect as many of the CODE_FOR values could be
12783 CODE_FOR_nothing and that would yield multiple alternatives
12784 with identical values. We'd never reach here at runtime in
12785 this case. */
12786 if (icode == CODE_FOR_altivec_vsldoi_v4sf
12787 || icode == CODE_FOR_altivec_vsldoi_v4si
12788 || icode == CODE_FOR_altivec_vsldoi_v8hi
12789 || icode == CODE_FOR_altivec_vsldoi_v16qi)
12791 /* Only allow 4-bit unsigned literals. */
12792 STRIP_NOPS (arg2);
12793 if (TREE_CODE (arg2) != INTEGER_CST
12794 || TREE_INT_CST_LOW (arg2) & ~0xf)
12796 error ("argument 3 must be a 4-bit unsigned literal");
12797 return const0_rtx;
12800 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
12801 || icode == CODE_FOR_vsx_xxpermdi_v2di
12802 || icode == CODE_FOR_vsx_xxsldwi_v16qi
12803 || icode == CODE_FOR_vsx_xxsldwi_v8hi
12804 || icode == CODE_FOR_vsx_xxsldwi_v4si
12805 || icode == CODE_FOR_vsx_xxsldwi_v4sf
12806 || icode == CODE_FOR_vsx_xxsldwi_v2di
12807 || icode == CODE_FOR_vsx_xxsldwi_v2df)
12809 /* Only allow 2-bit unsigned literals. */
12810 STRIP_NOPS (arg2);
12811 if (TREE_CODE (arg2) != INTEGER_CST
12812 || TREE_INT_CST_LOW (arg2) & ~0x3)
12814 error ("argument 3 must be a 2-bit unsigned literal");
12815 return const0_rtx;
12818 else if (icode == CODE_FOR_vsx_set_v2df
12819 || icode == CODE_FOR_vsx_set_v2di
12820 || icode == CODE_FOR_bcdadd
12821 || icode == CODE_FOR_bcdadd_lt
12822 || icode == CODE_FOR_bcdadd_eq
12823 || icode == CODE_FOR_bcdadd_gt
12824 || icode == CODE_FOR_bcdsub
12825 || icode == CODE_FOR_bcdsub_lt
12826 || icode == CODE_FOR_bcdsub_eq
12827 || icode == CODE_FOR_bcdsub_gt)
12829 /* Only allow 1-bit unsigned literals. */
12830 STRIP_NOPS (arg2);
12831 if (TREE_CODE (arg2) != INTEGER_CST
12832 || TREE_INT_CST_LOW (arg2) & ~0x1)
12834 error ("argument 3 must be a 1-bit unsigned literal");
12835 return const0_rtx;
12838 else if (icode == CODE_FOR_dfp_ddedpd_dd
12839 || icode == CODE_FOR_dfp_ddedpd_td)
12841 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
12842 STRIP_NOPS (arg0);
12843 if (TREE_CODE (arg0) != INTEGER_CST
12844 || TREE_INT_CST_LOW (arg2) & ~0x3)
12846 error ("argument 1 must be 0 or 2");
12847 return const0_rtx;
12850 else if (icode == CODE_FOR_dfp_denbcd_dd
12851 || icode == CODE_FOR_dfp_denbcd_td)
12853 /* Only allow 1-bit unsigned literals. */
12854 STRIP_NOPS (arg0);
12855 if (TREE_CODE (arg0) != INTEGER_CST
12856 || TREE_INT_CST_LOW (arg0) & ~0x1)
12858 error ("argument 1 must be a 1-bit unsigned literal");
12859 return const0_rtx;
12862 else if (icode == CODE_FOR_dfp_dscli_dd
12863 || icode == CODE_FOR_dfp_dscli_td
12864 || icode == CODE_FOR_dfp_dscri_dd
12865 || icode == CODE_FOR_dfp_dscri_td)
12867 /* Only allow 6-bit unsigned literals. */
12868 STRIP_NOPS (arg1);
12869 if (TREE_CODE (arg1) != INTEGER_CST
12870 || TREE_INT_CST_LOW (arg1) & ~0x3f)
12872 error ("argument 2 must be a 6-bit unsigned literal");
12873 return const0_rtx;
12876 else if (icode == CODE_FOR_crypto_vshasigmaw
12877 || icode == CODE_FOR_crypto_vshasigmad)
12879 /* Check whether the 2nd and 3rd arguments are integer constants and in
12880 range and prepare arguments. */
12881 STRIP_NOPS (arg1);
12882 if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (arg1, 2))
12884 error ("argument 2 must be 0 or 1");
12885 return const0_rtx;
12888 STRIP_NOPS (arg2);
12889 if (TREE_CODE (arg2) != INTEGER_CST || wi::geu_p (arg1, 16))
12891 error ("argument 3 must be in the range 0..15");
12892 return const0_rtx;
12896 if (target == 0
12897 || GET_MODE (target) != tmode
12898 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12899 target = gen_reg_rtx (tmode);
12901 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12902 op0 = copy_to_mode_reg (mode0, op0);
12903 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12904 op1 = copy_to_mode_reg (mode1, op1);
12905 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12906 op2 = copy_to_mode_reg (mode2, op2);
12908 if (TARGET_PAIRED_FLOAT && icode == CODE_FOR_selv2sf4)
12909 pat = GEN_FCN (icode) (target, op0, op1, op2, CONST0_RTX (SFmode));
12910 else
12911 pat = GEN_FCN (icode) (target, op0, op1, op2);
12912 if (! pat)
12913 return 0;
12914 emit_insn (pat);
12916 return target;
12919 /* Expand the lvx builtins. */
12920 static rtx
12921 altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp)
12923 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12924 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12925 tree arg0;
12926 machine_mode tmode, mode0;
12927 rtx pat, op0;
12928 enum insn_code icode;
12930 switch (fcode)
12932 case ALTIVEC_BUILTIN_LD_INTERNAL_16qi:
12933 icode = CODE_FOR_vector_altivec_load_v16qi;
12934 break;
12935 case ALTIVEC_BUILTIN_LD_INTERNAL_8hi:
12936 icode = CODE_FOR_vector_altivec_load_v8hi;
12937 break;
12938 case ALTIVEC_BUILTIN_LD_INTERNAL_4si:
12939 icode = CODE_FOR_vector_altivec_load_v4si;
12940 break;
12941 case ALTIVEC_BUILTIN_LD_INTERNAL_4sf:
12942 icode = CODE_FOR_vector_altivec_load_v4sf;
12943 break;
12944 case ALTIVEC_BUILTIN_LD_INTERNAL_2df:
12945 icode = CODE_FOR_vector_altivec_load_v2df;
12946 break;
12947 case ALTIVEC_BUILTIN_LD_INTERNAL_2di:
12948 icode = CODE_FOR_vector_altivec_load_v2di;
12949 case ALTIVEC_BUILTIN_LD_INTERNAL_1ti:
12950 icode = CODE_FOR_vector_altivec_load_v1ti;
12951 break;
12952 default:
12953 *expandedp = false;
12954 return NULL_RTX;
12957 *expandedp = true;
12959 arg0 = CALL_EXPR_ARG (exp, 0);
12960 op0 = expand_normal (arg0);
12961 tmode = insn_data[icode].operand[0].mode;
12962 mode0 = insn_data[icode].operand[1].mode;
12964 if (target == 0
12965 || GET_MODE (target) != tmode
12966 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12967 target = gen_reg_rtx (tmode);
12969 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12970 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12972 pat = GEN_FCN (icode) (target, op0);
12973 if (! pat)
12974 return 0;
12975 emit_insn (pat);
12976 return target;
12979 /* Expand the stvx builtins. */
12980 static rtx
12981 altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
12982 bool *expandedp)
12984 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12985 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12986 tree arg0, arg1;
12987 machine_mode mode0, mode1;
12988 rtx pat, op0, op1;
12989 enum insn_code icode;
12991 switch (fcode)
12993 case ALTIVEC_BUILTIN_ST_INTERNAL_16qi:
12994 icode = CODE_FOR_vector_altivec_store_v16qi;
12995 break;
12996 case ALTIVEC_BUILTIN_ST_INTERNAL_8hi:
12997 icode = CODE_FOR_vector_altivec_store_v8hi;
12998 break;
12999 case ALTIVEC_BUILTIN_ST_INTERNAL_4si:
13000 icode = CODE_FOR_vector_altivec_store_v4si;
13001 break;
13002 case ALTIVEC_BUILTIN_ST_INTERNAL_4sf:
13003 icode = CODE_FOR_vector_altivec_store_v4sf;
13004 break;
13005 case ALTIVEC_BUILTIN_ST_INTERNAL_2df:
13006 icode = CODE_FOR_vector_altivec_store_v2df;
13007 break;
13008 case ALTIVEC_BUILTIN_ST_INTERNAL_2di:
13009 icode = CODE_FOR_vector_altivec_store_v2di;
13010 case ALTIVEC_BUILTIN_ST_INTERNAL_1ti:
13011 icode = CODE_FOR_vector_altivec_store_v1ti;
13012 break;
13013 default:
13014 *expandedp = false;
13015 return NULL_RTX;
13018 arg0 = CALL_EXPR_ARG (exp, 0);
13019 arg1 = CALL_EXPR_ARG (exp, 1);
13020 op0 = expand_normal (arg0);
13021 op1 = expand_normal (arg1);
13022 mode0 = insn_data[icode].operand[0].mode;
13023 mode1 = insn_data[icode].operand[1].mode;
13025 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13026 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13027 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13028 op1 = copy_to_mode_reg (mode1, op1);
13030 pat = GEN_FCN (icode) (op0, op1);
13031 if (pat)
13032 emit_insn (pat);
13034 *expandedp = true;
13035 return NULL_RTX;
13038 /* Expand the dst builtins. */
13039 static rtx
13040 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
13041 bool *expandedp)
13043 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13044 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13045 tree arg0, arg1, arg2;
13046 machine_mode mode0, mode1;
13047 rtx pat, op0, op1, op2;
13048 const struct builtin_description *d;
13049 size_t i;
13051 *expandedp = false;
13053 /* Handle DST variants. */
13054 d = bdesc_dst;
13055 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
13056 if (d->code == fcode)
13058 arg0 = CALL_EXPR_ARG (exp, 0);
13059 arg1 = CALL_EXPR_ARG (exp, 1);
13060 arg2 = CALL_EXPR_ARG (exp, 2);
13061 op0 = expand_normal (arg0);
13062 op1 = expand_normal (arg1);
13063 op2 = expand_normal (arg2);
13064 mode0 = insn_data[d->icode].operand[0].mode;
13065 mode1 = insn_data[d->icode].operand[1].mode;
13067 /* Invalid arguments, bail out before generating bad rtl. */
13068 if (arg0 == error_mark_node
13069 || arg1 == error_mark_node
13070 || arg2 == error_mark_node)
13071 return const0_rtx;
13073 *expandedp = true;
13074 STRIP_NOPS (arg2);
13075 if (TREE_CODE (arg2) != INTEGER_CST
13076 || TREE_INT_CST_LOW (arg2) & ~0x3)
13078 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
13079 return const0_rtx;
13082 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13083 op0 = copy_to_mode_reg (Pmode, op0);
13084 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13085 op1 = copy_to_mode_reg (mode1, op1);
13087 pat = GEN_FCN (d->icode) (op0, op1, op2);
13088 if (pat != 0)
13089 emit_insn (pat);
13091 return NULL_RTX;
13094 return NULL_RTX;
13097 /* Expand vec_init builtin. */
13098 static rtx
13099 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
13101 machine_mode tmode = TYPE_MODE (type);
13102 machine_mode inner_mode = GET_MODE_INNER (tmode);
13103 int i, n_elt = GET_MODE_NUNITS (tmode);
13105 gcc_assert (VECTOR_MODE_P (tmode));
13106 gcc_assert (n_elt == call_expr_nargs (exp));
13108 if (!target || !register_operand (target, tmode))
13109 target = gen_reg_rtx (tmode);
13111 /* If we have a vector compromised of a single element, such as V1TImode, do
13112 the initialization directly. */
13113 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
13115 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
13116 emit_move_insn (target, gen_lowpart (tmode, x));
13118 else
13120 rtvec v = rtvec_alloc (n_elt);
13122 for (i = 0; i < n_elt; ++i)
13124 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
13125 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
13128 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
13131 return target;
13134 /* Return the integer constant in ARG. Constrain it to be in the range
13135 of the subparts of VEC_TYPE; issue an error if not. */
13137 static int
13138 get_element_number (tree vec_type, tree arg)
13140 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
13142 if (!tree_fits_uhwi_p (arg)
13143 || (elt = tree_to_uhwi (arg), elt > max))
13145 error ("selector must be an integer constant in the range 0..%wi", max);
13146 return 0;
13149 return elt;
13152 /* Expand vec_set builtin. */
13153 static rtx
13154 altivec_expand_vec_set_builtin (tree exp)
13156 machine_mode tmode, mode1;
13157 tree arg0, arg1, arg2;
13158 int elt;
13159 rtx op0, op1;
13161 arg0 = CALL_EXPR_ARG (exp, 0);
13162 arg1 = CALL_EXPR_ARG (exp, 1);
13163 arg2 = CALL_EXPR_ARG (exp, 2);
13165 tmode = TYPE_MODE (TREE_TYPE (arg0));
13166 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
13167 gcc_assert (VECTOR_MODE_P (tmode));
13169 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
13170 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
13171 elt = get_element_number (TREE_TYPE (arg0), arg2);
13173 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
13174 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
13176 op0 = force_reg (tmode, op0);
13177 op1 = force_reg (mode1, op1);
13179 rs6000_expand_vector_set (op0, op1, elt);
13181 return op0;
13184 /* Expand vec_ext builtin. */
13185 static rtx
13186 altivec_expand_vec_ext_builtin (tree exp, rtx target)
13188 machine_mode tmode, mode0;
13189 tree arg0, arg1;
13190 int elt;
13191 rtx op0;
13193 arg0 = CALL_EXPR_ARG (exp, 0);
13194 arg1 = CALL_EXPR_ARG (exp, 1);
13196 op0 = expand_normal (arg0);
13197 elt = get_element_number (TREE_TYPE (arg0), arg1);
13199 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
13200 mode0 = TYPE_MODE (TREE_TYPE (arg0));
13201 gcc_assert (VECTOR_MODE_P (mode0));
13203 op0 = force_reg (mode0, op0);
13205 if (optimize || !target || !register_operand (target, tmode))
13206 target = gen_reg_rtx (tmode);
13208 rs6000_expand_vector_extract (target, op0, elt);
13210 return target;
13213 /* Expand the builtin in EXP and store the result in TARGET. Store
13214 true in *EXPANDEDP if we found a builtin to expand. */
13215 static rtx
13216 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
13218 const struct builtin_description *d;
13219 size_t i;
13220 enum insn_code icode;
13221 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13222 tree arg0;
13223 rtx op0, pat;
13224 machine_mode tmode, mode0;
13225 enum rs6000_builtins fcode
13226 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13228 if (rs6000_overloaded_builtin_p (fcode))
13230 *expandedp = true;
13231 error ("unresolved overload for Altivec builtin %qF", fndecl);
13233 /* Given it is invalid, just generate a normal call. */
13234 return expand_call (exp, target, false);
13237 target = altivec_expand_ld_builtin (exp, target, expandedp);
13238 if (*expandedp)
13239 return target;
13241 target = altivec_expand_st_builtin (exp, target, expandedp);
13242 if (*expandedp)
13243 return target;
13245 target = altivec_expand_dst_builtin (exp, target, expandedp);
13246 if (*expandedp)
13247 return target;
13249 *expandedp = true;
13251 switch (fcode)
13253 case ALTIVEC_BUILTIN_STVX_V2DF:
13254 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df, exp);
13255 case ALTIVEC_BUILTIN_STVX_V2DI:
13256 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di, exp);
13257 case ALTIVEC_BUILTIN_STVX_V4SF:
13258 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf, exp);
13259 case ALTIVEC_BUILTIN_STVX:
13260 case ALTIVEC_BUILTIN_STVX_V4SI:
13261 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si, exp);
13262 case ALTIVEC_BUILTIN_STVX_V8HI:
13263 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi, exp);
13264 case ALTIVEC_BUILTIN_STVX_V16QI:
13265 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi, exp);
13266 case ALTIVEC_BUILTIN_STVEBX:
13267 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
13268 case ALTIVEC_BUILTIN_STVEHX:
13269 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
13270 case ALTIVEC_BUILTIN_STVEWX:
13271 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
13272 case ALTIVEC_BUILTIN_STVXL_V2DF:
13273 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
13274 case ALTIVEC_BUILTIN_STVXL_V2DI:
13275 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
13276 case ALTIVEC_BUILTIN_STVXL_V4SF:
13277 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
13278 case ALTIVEC_BUILTIN_STVXL:
13279 case ALTIVEC_BUILTIN_STVXL_V4SI:
13280 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
13281 case ALTIVEC_BUILTIN_STVXL_V8HI:
13282 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
13283 case ALTIVEC_BUILTIN_STVXL_V16QI:
13284 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
13286 case ALTIVEC_BUILTIN_STVLX:
13287 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
13288 case ALTIVEC_BUILTIN_STVLXL:
13289 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
13290 case ALTIVEC_BUILTIN_STVRX:
13291 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
13292 case ALTIVEC_BUILTIN_STVRXL:
13293 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
13295 case VSX_BUILTIN_STXVD2X_V1TI:
13296 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
13297 case VSX_BUILTIN_STXVD2X_V2DF:
13298 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
13299 case VSX_BUILTIN_STXVD2X_V2DI:
13300 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
13301 case VSX_BUILTIN_STXVW4X_V4SF:
13302 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
13303 case VSX_BUILTIN_STXVW4X_V4SI:
13304 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
13305 case VSX_BUILTIN_STXVW4X_V8HI:
13306 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
13307 case VSX_BUILTIN_STXVW4X_V16QI:
13308 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
13310 case ALTIVEC_BUILTIN_MFVSCR:
13311 icode = CODE_FOR_altivec_mfvscr;
13312 tmode = insn_data[icode].operand[0].mode;
13314 if (target == 0
13315 || GET_MODE (target) != tmode
13316 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13317 target = gen_reg_rtx (tmode);
13319 pat = GEN_FCN (icode) (target);
13320 if (! pat)
13321 return 0;
13322 emit_insn (pat);
13323 return target;
13325 case ALTIVEC_BUILTIN_MTVSCR:
13326 icode = CODE_FOR_altivec_mtvscr;
13327 arg0 = CALL_EXPR_ARG (exp, 0);
13328 op0 = expand_normal (arg0);
13329 mode0 = insn_data[icode].operand[0].mode;
13331 /* If we got invalid arguments bail out before generating bad rtl. */
13332 if (arg0 == error_mark_node)
13333 return const0_rtx;
13335 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13336 op0 = copy_to_mode_reg (mode0, op0);
13338 pat = GEN_FCN (icode) (op0);
13339 if (pat)
13340 emit_insn (pat);
13341 return NULL_RTX;
13343 case ALTIVEC_BUILTIN_DSSALL:
13344 emit_insn (gen_altivec_dssall ());
13345 return NULL_RTX;
13347 case ALTIVEC_BUILTIN_DSS:
13348 icode = CODE_FOR_altivec_dss;
13349 arg0 = CALL_EXPR_ARG (exp, 0);
13350 STRIP_NOPS (arg0);
13351 op0 = expand_normal (arg0);
13352 mode0 = insn_data[icode].operand[0].mode;
13354 /* If we got invalid arguments bail out before generating bad rtl. */
13355 if (arg0 == error_mark_node)
13356 return const0_rtx;
13358 if (TREE_CODE (arg0) != INTEGER_CST
13359 || TREE_INT_CST_LOW (arg0) & ~0x3)
13361 error ("argument to dss must be a 2-bit unsigned literal");
13362 return const0_rtx;
13365 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13366 op0 = copy_to_mode_reg (mode0, op0);
13368 emit_insn (gen_altivec_dss (op0));
13369 return NULL_RTX;
13371 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
13372 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
13373 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
13374 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
13375 case VSX_BUILTIN_VEC_INIT_V2DF:
13376 case VSX_BUILTIN_VEC_INIT_V2DI:
13377 case VSX_BUILTIN_VEC_INIT_V1TI:
13378 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
13380 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
13381 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
13382 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
13383 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
13384 case VSX_BUILTIN_VEC_SET_V2DF:
13385 case VSX_BUILTIN_VEC_SET_V2DI:
13386 case VSX_BUILTIN_VEC_SET_V1TI:
13387 return altivec_expand_vec_set_builtin (exp);
13389 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
13390 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
13391 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
13392 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
13393 case VSX_BUILTIN_VEC_EXT_V2DF:
13394 case VSX_BUILTIN_VEC_EXT_V2DI:
13395 case VSX_BUILTIN_VEC_EXT_V1TI:
13396 return altivec_expand_vec_ext_builtin (exp, target);
13398 default:
13399 break;
13400 /* Fall through. */
13403 /* Expand abs* operations. */
13404 d = bdesc_abs;
13405 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
13406 if (d->code == fcode)
13407 return altivec_expand_abs_builtin (d->icode, exp, target);
13409 /* Expand the AltiVec predicates. */
13410 d = bdesc_altivec_preds;
13411 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
13412 if (d->code == fcode)
13413 return altivec_expand_predicate_builtin (d->icode, exp, target);
13415 /* LV* are funky. We initialized them differently. */
13416 switch (fcode)
13418 case ALTIVEC_BUILTIN_LVSL:
13419 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
13420 exp, target, false);
13421 case ALTIVEC_BUILTIN_LVSR:
13422 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
13423 exp, target, false);
13424 case ALTIVEC_BUILTIN_LVEBX:
13425 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
13426 exp, target, false);
13427 case ALTIVEC_BUILTIN_LVEHX:
13428 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
13429 exp, target, false);
13430 case ALTIVEC_BUILTIN_LVEWX:
13431 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
13432 exp, target, false);
13433 case ALTIVEC_BUILTIN_LVXL_V2DF:
13434 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
13435 exp, target, false);
13436 case ALTIVEC_BUILTIN_LVXL_V2DI:
13437 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
13438 exp, target, false);
13439 case ALTIVEC_BUILTIN_LVXL_V4SF:
13440 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
13441 exp, target, false);
13442 case ALTIVEC_BUILTIN_LVXL:
13443 case ALTIVEC_BUILTIN_LVXL_V4SI:
13444 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
13445 exp, target, false);
13446 case ALTIVEC_BUILTIN_LVXL_V8HI:
13447 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
13448 exp, target, false);
13449 case ALTIVEC_BUILTIN_LVXL_V16QI:
13450 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
13451 exp, target, false);
13452 case ALTIVEC_BUILTIN_LVX_V2DF:
13453 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df,
13454 exp, target, false);
13455 case ALTIVEC_BUILTIN_LVX_V2DI:
13456 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di,
13457 exp, target, false);
13458 case ALTIVEC_BUILTIN_LVX_V4SF:
13459 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf,
13460 exp, target, false);
13461 case ALTIVEC_BUILTIN_LVX:
13462 case ALTIVEC_BUILTIN_LVX_V4SI:
13463 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si,
13464 exp, target, false);
13465 case ALTIVEC_BUILTIN_LVX_V8HI:
13466 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi,
13467 exp, target, false);
13468 case ALTIVEC_BUILTIN_LVX_V16QI:
13469 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi,
13470 exp, target, false);
13471 case ALTIVEC_BUILTIN_LVLX:
13472 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
13473 exp, target, true);
13474 case ALTIVEC_BUILTIN_LVLXL:
13475 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
13476 exp, target, true);
13477 case ALTIVEC_BUILTIN_LVRX:
13478 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
13479 exp, target, true);
13480 case ALTIVEC_BUILTIN_LVRXL:
13481 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
13482 exp, target, true);
13483 case VSX_BUILTIN_LXVD2X_V1TI:
13484 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
13485 exp, target, false);
13486 case VSX_BUILTIN_LXVD2X_V2DF:
13487 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
13488 exp, target, false);
13489 case VSX_BUILTIN_LXVD2X_V2DI:
13490 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
13491 exp, target, false);
13492 case VSX_BUILTIN_LXVW4X_V4SF:
13493 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
13494 exp, target, false);
13495 case VSX_BUILTIN_LXVW4X_V4SI:
13496 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
13497 exp, target, false);
13498 case VSX_BUILTIN_LXVW4X_V8HI:
13499 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
13500 exp, target, false);
13501 case VSX_BUILTIN_LXVW4X_V16QI:
13502 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
13503 exp, target, false);
13504 break;
13505 default:
13506 break;
13507 /* Fall through. */
13510 *expandedp = false;
13511 return NULL_RTX;
13514 /* Expand the builtin in EXP and store the result in TARGET. Store
13515 true in *EXPANDEDP if we found a builtin to expand. */
13516 static rtx
13517 paired_expand_builtin (tree exp, rtx target, bool * expandedp)
13519 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13520 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13521 const struct builtin_description *d;
13522 size_t i;
13524 *expandedp = true;
13526 switch (fcode)
13528 case PAIRED_BUILTIN_STX:
13529 return paired_expand_stv_builtin (CODE_FOR_paired_stx, exp);
13530 case PAIRED_BUILTIN_LX:
13531 return paired_expand_lv_builtin (CODE_FOR_paired_lx, exp, target);
13532 default:
13533 break;
13534 /* Fall through. */
13537 /* Expand the paired predicates. */
13538 d = bdesc_paired_preds;
13539 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); i++, d++)
13540 if (d->code == fcode)
13541 return paired_expand_predicate_builtin (d->icode, exp, target);
13543 *expandedp = false;
13544 return NULL_RTX;
13547 /* Binops that need to be initialized manually, but can be expanded
13548 automagically by rs6000_expand_binop_builtin. */
13549 static const struct builtin_description bdesc_2arg_spe[] =
13551 { RS6000_BTM_SPE, CODE_FOR_spe_evlddx, "__builtin_spe_evlddx", SPE_BUILTIN_EVLDDX },
13552 { RS6000_BTM_SPE, CODE_FOR_spe_evldwx, "__builtin_spe_evldwx", SPE_BUILTIN_EVLDWX },
13553 { RS6000_BTM_SPE, CODE_FOR_spe_evldhx, "__builtin_spe_evldhx", SPE_BUILTIN_EVLDHX },
13554 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhex, "__builtin_spe_evlwhex", SPE_BUILTIN_EVLWHEX },
13555 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhoux, "__builtin_spe_evlwhoux", SPE_BUILTIN_EVLWHOUX },
13556 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhosx, "__builtin_spe_evlwhosx", SPE_BUILTIN_EVLWHOSX },
13557 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplatx, "__builtin_spe_evlwwsplatx", SPE_BUILTIN_EVLWWSPLATX },
13558 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplatx, "__builtin_spe_evlwhsplatx", SPE_BUILTIN_EVLWHSPLATX },
13559 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplatx, "__builtin_spe_evlhhesplatx", SPE_BUILTIN_EVLHHESPLATX },
13560 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplatx, "__builtin_spe_evlhhousplatx", SPE_BUILTIN_EVLHHOUSPLATX },
13561 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplatx, "__builtin_spe_evlhhossplatx", SPE_BUILTIN_EVLHHOSSPLATX },
13562 { RS6000_BTM_SPE, CODE_FOR_spe_evldd, "__builtin_spe_evldd", SPE_BUILTIN_EVLDD },
13563 { RS6000_BTM_SPE, CODE_FOR_spe_evldw, "__builtin_spe_evldw", SPE_BUILTIN_EVLDW },
13564 { RS6000_BTM_SPE, CODE_FOR_spe_evldh, "__builtin_spe_evldh", SPE_BUILTIN_EVLDH },
13565 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhe, "__builtin_spe_evlwhe", SPE_BUILTIN_EVLWHE },
13566 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhou, "__builtin_spe_evlwhou", SPE_BUILTIN_EVLWHOU },
13567 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhos, "__builtin_spe_evlwhos", SPE_BUILTIN_EVLWHOS },
13568 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplat, "__builtin_spe_evlwwsplat", SPE_BUILTIN_EVLWWSPLAT },
13569 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplat, "__builtin_spe_evlwhsplat", SPE_BUILTIN_EVLWHSPLAT },
13570 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplat, "__builtin_spe_evlhhesplat", SPE_BUILTIN_EVLHHESPLAT },
13571 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplat, "__builtin_spe_evlhhousplat", SPE_BUILTIN_EVLHHOUSPLAT },
13572 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplat, "__builtin_spe_evlhhossplat", SPE_BUILTIN_EVLHHOSSPLAT }
13575 /* Expand the builtin in EXP and store the result in TARGET. Store
13576 true in *EXPANDEDP if we found a builtin to expand.
13578 This expands the SPE builtins that are not simple unary and binary
13579 operations. */
13580 static rtx
13581 spe_expand_builtin (tree exp, rtx target, bool *expandedp)
13583 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13584 tree arg1, arg0;
13585 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13586 enum insn_code icode;
13587 machine_mode tmode, mode0;
13588 rtx pat, op0;
13589 const struct builtin_description *d;
13590 size_t i;
13592 *expandedp = true;
13594 /* Syntax check for a 5-bit unsigned immediate. */
13595 switch (fcode)
13597 case SPE_BUILTIN_EVSTDD:
13598 case SPE_BUILTIN_EVSTDH:
13599 case SPE_BUILTIN_EVSTDW:
13600 case SPE_BUILTIN_EVSTWHE:
13601 case SPE_BUILTIN_EVSTWHO:
13602 case SPE_BUILTIN_EVSTWWE:
13603 case SPE_BUILTIN_EVSTWWO:
13604 arg1 = CALL_EXPR_ARG (exp, 2);
13605 if (TREE_CODE (arg1) != INTEGER_CST
13606 || TREE_INT_CST_LOW (arg1) & ~0x1f)
13608 error ("argument 2 must be a 5-bit unsigned literal");
13609 return const0_rtx;
13611 break;
13612 default:
13613 break;
13616 /* The evsplat*i instructions are not quite generic. */
13617 switch (fcode)
13619 case SPE_BUILTIN_EVSPLATFI:
13620 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplatfi,
13621 exp, target);
13622 case SPE_BUILTIN_EVSPLATI:
13623 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplati,
13624 exp, target);
13625 default:
13626 break;
13629 d = bdesc_2arg_spe;
13630 for (i = 0; i < ARRAY_SIZE (bdesc_2arg_spe); ++i, ++d)
13631 if (d->code == fcode)
13632 return rs6000_expand_binop_builtin (d->icode, exp, target);
13634 d = bdesc_spe_predicates;
13635 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, ++d)
13636 if (d->code == fcode)
13637 return spe_expand_predicate_builtin (d->icode, exp, target);
13639 d = bdesc_spe_evsel;
13640 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, ++d)
13641 if (d->code == fcode)
13642 return spe_expand_evsel_builtin (d->icode, exp, target);
13644 switch (fcode)
13646 case SPE_BUILTIN_EVSTDDX:
13647 return spe_expand_stv_builtin (CODE_FOR_spe_evstddx, exp);
13648 case SPE_BUILTIN_EVSTDHX:
13649 return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx, exp);
13650 case SPE_BUILTIN_EVSTDWX:
13651 return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx, exp);
13652 case SPE_BUILTIN_EVSTWHEX:
13653 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex, exp);
13654 case SPE_BUILTIN_EVSTWHOX:
13655 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox, exp);
13656 case SPE_BUILTIN_EVSTWWEX:
13657 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex, exp);
13658 case SPE_BUILTIN_EVSTWWOX:
13659 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox, exp);
13660 case SPE_BUILTIN_EVSTDD:
13661 return spe_expand_stv_builtin (CODE_FOR_spe_evstdd, exp);
13662 case SPE_BUILTIN_EVSTDH:
13663 return spe_expand_stv_builtin (CODE_FOR_spe_evstdh, exp);
13664 case SPE_BUILTIN_EVSTDW:
13665 return spe_expand_stv_builtin (CODE_FOR_spe_evstdw, exp);
13666 case SPE_BUILTIN_EVSTWHE:
13667 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe, exp);
13668 case SPE_BUILTIN_EVSTWHO:
13669 return spe_expand_stv_builtin (CODE_FOR_spe_evstwho, exp);
13670 case SPE_BUILTIN_EVSTWWE:
13671 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe, exp);
13672 case SPE_BUILTIN_EVSTWWO:
13673 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo, exp);
13674 case SPE_BUILTIN_MFSPEFSCR:
13675 icode = CODE_FOR_spe_mfspefscr;
13676 tmode = insn_data[icode].operand[0].mode;
13678 if (target == 0
13679 || GET_MODE (target) != tmode
13680 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13681 target = gen_reg_rtx (tmode);
13683 pat = GEN_FCN (icode) (target);
13684 if (! pat)
13685 return 0;
13686 emit_insn (pat);
13687 return target;
13688 case SPE_BUILTIN_MTSPEFSCR:
13689 icode = CODE_FOR_spe_mtspefscr;
13690 arg0 = CALL_EXPR_ARG (exp, 0);
13691 op0 = expand_normal (arg0);
13692 mode0 = insn_data[icode].operand[0].mode;
13694 if (arg0 == error_mark_node)
13695 return const0_rtx;
13697 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13698 op0 = copy_to_mode_reg (mode0, op0);
13700 pat = GEN_FCN (icode) (op0);
13701 if (pat)
13702 emit_insn (pat);
13703 return NULL_RTX;
13704 default:
13705 break;
13708 *expandedp = false;
13709 return NULL_RTX;
13712 static rtx
13713 paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
13715 rtx pat, scratch, tmp;
13716 tree form = CALL_EXPR_ARG (exp, 0);
13717 tree arg0 = CALL_EXPR_ARG (exp, 1);
13718 tree arg1 = CALL_EXPR_ARG (exp, 2);
13719 rtx op0 = expand_normal (arg0);
13720 rtx op1 = expand_normal (arg1);
13721 machine_mode mode0 = insn_data[icode].operand[1].mode;
13722 machine_mode mode1 = insn_data[icode].operand[2].mode;
13723 int form_int;
13724 enum rtx_code code;
13726 if (TREE_CODE (form) != INTEGER_CST)
13728 error ("argument 1 of __builtin_paired_predicate must be a constant");
13729 return const0_rtx;
13731 else
13732 form_int = TREE_INT_CST_LOW (form);
13734 gcc_assert (mode0 == mode1);
13736 if (arg0 == error_mark_node || arg1 == error_mark_node)
13737 return const0_rtx;
13739 if (target == 0
13740 || GET_MODE (target) != SImode
13741 || !(*insn_data[icode].operand[0].predicate) (target, SImode))
13742 target = gen_reg_rtx (SImode);
13743 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
13744 op0 = copy_to_mode_reg (mode0, op0);
13745 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
13746 op1 = copy_to_mode_reg (mode1, op1);
13748 scratch = gen_reg_rtx (CCFPmode);
13750 pat = GEN_FCN (icode) (scratch, op0, op1);
13751 if (!pat)
13752 return const0_rtx;
13754 emit_insn (pat);
13756 switch (form_int)
13758 /* LT bit. */
13759 case 0:
13760 code = LT;
13761 break;
13762 /* GT bit. */
13763 case 1:
13764 code = GT;
13765 break;
13766 /* EQ bit. */
13767 case 2:
13768 code = EQ;
13769 break;
13770 /* UN bit. */
13771 case 3:
13772 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
13773 return target;
13774 default:
13775 error ("argument 1 of __builtin_paired_predicate is out of range");
13776 return const0_rtx;
13779 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
13780 emit_move_insn (target, tmp);
13781 return target;
13784 static rtx
13785 spe_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
13787 rtx pat, scratch, tmp;
13788 tree form = CALL_EXPR_ARG (exp, 0);
13789 tree arg0 = CALL_EXPR_ARG (exp, 1);
13790 tree arg1 = CALL_EXPR_ARG (exp, 2);
13791 rtx op0 = expand_normal (arg0);
13792 rtx op1 = expand_normal (arg1);
13793 machine_mode mode0 = insn_data[icode].operand[1].mode;
13794 machine_mode mode1 = insn_data[icode].operand[2].mode;
13795 int form_int;
13796 enum rtx_code code;
13798 if (TREE_CODE (form) != INTEGER_CST)
13800 error ("argument 1 of __builtin_spe_predicate must be a constant");
13801 return const0_rtx;
13803 else
13804 form_int = TREE_INT_CST_LOW (form);
13806 gcc_assert (mode0 == mode1);
13808 if (arg0 == error_mark_node || arg1 == error_mark_node)
13809 return const0_rtx;
13811 if (target == 0
13812 || GET_MODE (target) != SImode
13813 || ! (*insn_data[icode].operand[0].predicate) (target, SImode))
13814 target = gen_reg_rtx (SImode);
13816 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13817 op0 = copy_to_mode_reg (mode0, op0);
13818 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13819 op1 = copy_to_mode_reg (mode1, op1);
13821 scratch = gen_reg_rtx (CCmode);
13823 pat = GEN_FCN (icode) (scratch, op0, op1);
13824 if (! pat)
13825 return const0_rtx;
13826 emit_insn (pat);
13828 /* There are 4 variants for each predicate: _any_, _all_, _upper_,
13829 _lower_. We use one compare, but look in different bits of the
13830 CR for each variant.
13832 There are 2 elements in each SPE simd type (upper/lower). The CR
13833 bits are set as follows:
13835 BIT0 | BIT 1 | BIT 2 | BIT 3
13836 U | L | (U | L) | (U & L)
13838 So, for an "all" relationship, BIT 3 would be set.
13839 For an "any" relationship, BIT 2 would be set. Etc.
13841 Following traditional nomenclature, these bits map to:
13843 BIT0 | BIT 1 | BIT 2 | BIT 3
13844 LT | GT | EQ | OV
13846 Later, we will generate rtl to look in the LT/EQ/EQ/OV bits.
13849 switch (form_int)
13851 /* All variant. OV bit. */
13852 case 0:
13853 /* We need to get to the OV bit, which is the ORDERED bit. We
13854 could generate (ordered:SI (reg:CC xx) (const_int 0)), but
13855 that's ugly and will make validate_condition_mode die.
13856 So let's just use another pattern. */
13857 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
13858 return target;
13859 /* Any variant. EQ bit. */
13860 case 1:
13861 code = EQ;
13862 break;
13863 /* Upper variant. LT bit. */
13864 case 2:
13865 code = LT;
13866 break;
13867 /* Lower variant. GT bit. */
13868 case 3:
13869 code = GT;
13870 break;
13871 default:
13872 error ("argument 1 of __builtin_spe_predicate is out of range");
13873 return const0_rtx;
13876 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
13877 emit_move_insn (target, tmp);
13879 return target;
13882 /* The evsel builtins look like this:
13884 e = __builtin_spe_evsel_OP (a, b, c, d);
13886 and work like this:
13888 e[upper] = a[upper] *OP* b[upper] ? c[upper] : d[upper];
13889 e[lower] = a[lower] *OP* b[lower] ? c[lower] : d[lower];
13892 static rtx
13893 spe_expand_evsel_builtin (enum insn_code icode, tree exp, rtx target)
13895 rtx pat, scratch;
13896 tree arg0 = CALL_EXPR_ARG (exp, 0);
13897 tree arg1 = CALL_EXPR_ARG (exp, 1);
13898 tree arg2 = CALL_EXPR_ARG (exp, 2);
13899 tree arg3 = CALL_EXPR_ARG (exp, 3);
13900 rtx op0 = expand_normal (arg0);
13901 rtx op1 = expand_normal (arg1);
13902 rtx op2 = expand_normal (arg2);
13903 rtx op3 = expand_normal (arg3);
13904 machine_mode mode0 = insn_data[icode].operand[1].mode;
13905 machine_mode mode1 = insn_data[icode].operand[2].mode;
13907 gcc_assert (mode0 == mode1);
13909 if (arg0 == error_mark_node || arg1 == error_mark_node
13910 || arg2 == error_mark_node || arg3 == error_mark_node)
13911 return const0_rtx;
13913 if (target == 0
13914 || GET_MODE (target) != mode0
13915 || ! (*insn_data[icode].operand[0].predicate) (target, mode0))
13916 target = gen_reg_rtx (mode0);
13918 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13919 op0 = copy_to_mode_reg (mode0, op0);
13920 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13921 op1 = copy_to_mode_reg (mode0, op1);
13922 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
13923 op2 = copy_to_mode_reg (mode0, op2);
13924 if (! (*insn_data[icode].operand[1].predicate) (op3, mode1))
13925 op3 = copy_to_mode_reg (mode0, op3);
13927 /* Generate the compare. */
13928 scratch = gen_reg_rtx (CCmode);
13929 pat = GEN_FCN (icode) (scratch, op0, op1);
13930 if (! pat)
13931 return const0_rtx;
13932 emit_insn (pat);
13934 if (mode0 == V2SImode)
13935 emit_insn (gen_spe_evsel (target, op2, op3, scratch));
13936 else
13937 emit_insn (gen_spe_evsel_fs (target, op2, op3, scratch));
13939 return target;
13942 /* Raise an error message for a builtin function that is called without the
13943 appropriate target options being set. */
13945 static void
13946 rs6000_invalid_builtin (enum rs6000_builtins fncode)
13948 size_t uns_fncode = (size_t)fncode;
13949 const char *name = rs6000_builtin_info[uns_fncode].name;
13950 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
13952 gcc_assert (name != NULL);
13953 if ((fnmask & RS6000_BTM_CELL) != 0)
13954 error ("Builtin function %s is only valid for the cell processor", name);
13955 else if ((fnmask & RS6000_BTM_VSX) != 0)
13956 error ("Builtin function %s requires the -mvsx option", name);
13957 else if ((fnmask & RS6000_BTM_HTM) != 0)
13958 error ("Builtin function %s requires the -mhtm option", name);
13959 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
13960 error ("Builtin function %s requires the -maltivec option", name);
13961 else if ((fnmask & RS6000_BTM_PAIRED) != 0)
13962 error ("Builtin function %s requires the -mpaired option", name);
13963 else if ((fnmask & RS6000_BTM_SPE) != 0)
13964 error ("Builtin function %s requires the -mspe option", name);
13965 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
13966 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
13967 error ("Builtin function %s requires the -mhard-dfp and"
13968 " -mpower8-vector options", name);
13969 else if ((fnmask & RS6000_BTM_DFP) != 0)
13970 error ("Builtin function %s requires the -mhard-dfp option", name);
13971 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
13972 error ("Builtin function %s requires the -mpower8-vector option", name);
13973 else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
13974 == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
13975 error ("Builtin function %s requires the -mhard-float and"
13976 " -mlong-double-128 options", name);
13977 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
13978 error ("Builtin function %s requires the -mhard-float option", name);
13979 else
13980 error ("Builtin function %s is not supported with the current options",
13981 name);
13984 /* Expand an expression EXP that calls a built-in function,
13985 with result going to TARGET if that's convenient
13986 (and in mode MODE if that's convenient).
13987 SUBTARGET may be used as the target for computing one of EXP's operands.
13988 IGNORE is nonzero if the value is to be ignored. */
13990 static rtx
13991 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13992 machine_mode mode ATTRIBUTE_UNUSED,
13993 int ignore ATTRIBUTE_UNUSED)
13995 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13996 enum rs6000_builtins fcode
13997 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
13998 size_t uns_fcode = (size_t)fcode;
13999 const struct builtin_description *d;
14000 size_t i;
14001 rtx ret;
14002 bool success;
14003 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
14004 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
14006 if (TARGET_DEBUG_BUILTIN)
14008 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
14009 const char *name1 = rs6000_builtin_info[uns_fcode].name;
14010 const char *name2 = ((icode != CODE_FOR_nothing)
14011 ? get_insn_name ((int)icode)
14012 : "nothing");
14013 const char *name3;
14015 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
14017 default: name3 = "unknown"; break;
14018 case RS6000_BTC_SPECIAL: name3 = "special"; break;
14019 case RS6000_BTC_UNARY: name3 = "unary"; break;
14020 case RS6000_BTC_BINARY: name3 = "binary"; break;
14021 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
14022 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
14023 case RS6000_BTC_ABS: name3 = "abs"; break;
14024 case RS6000_BTC_EVSEL: name3 = "evsel"; break;
14025 case RS6000_BTC_DST: name3 = "dst"; break;
14029 fprintf (stderr,
14030 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
14031 (name1) ? name1 : "---", fcode,
14032 (name2) ? name2 : "---", (int)icode,
14033 name3,
14034 func_valid_p ? "" : ", not valid");
14037 if (!func_valid_p)
14039 rs6000_invalid_builtin (fcode);
14041 /* Given it is invalid, just generate a normal call. */
14042 return expand_call (exp, target, ignore);
14045 switch (fcode)
14047 case RS6000_BUILTIN_RECIP:
14048 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
14050 case RS6000_BUILTIN_RECIPF:
14051 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
14053 case RS6000_BUILTIN_RSQRTF:
14054 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
14056 case RS6000_BUILTIN_RSQRT:
14057 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
14059 case POWER7_BUILTIN_BPERMD:
14060 return rs6000_expand_binop_builtin (((TARGET_64BIT)
14061 ? CODE_FOR_bpermd_di
14062 : CODE_FOR_bpermd_si), exp, target);
14064 case RS6000_BUILTIN_GET_TB:
14065 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
14066 target);
14068 case RS6000_BUILTIN_MFTB:
14069 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
14070 ? CODE_FOR_rs6000_mftb_di
14071 : CODE_FOR_rs6000_mftb_si),
14072 target);
14074 case RS6000_BUILTIN_MFFS:
14075 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
14077 case RS6000_BUILTIN_MTFSF:
14078 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
14080 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
14081 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
14083 int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
14084 : (int) CODE_FOR_altivec_lvsl_direct);
14085 machine_mode tmode = insn_data[icode].operand[0].mode;
14086 machine_mode mode = insn_data[icode].operand[1].mode;
14087 tree arg;
14088 rtx op, addr, pat;
14090 gcc_assert (TARGET_ALTIVEC);
14092 arg = CALL_EXPR_ARG (exp, 0);
14093 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
14094 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
14095 addr = memory_address (mode, op);
14096 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
14097 op = addr;
14098 else
14100 /* For the load case need to negate the address. */
14101 op = gen_reg_rtx (GET_MODE (addr));
14102 emit_insn (gen_rtx_SET (VOIDmode, op,
14103 gen_rtx_NEG (GET_MODE (addr), addr)));
14105 op = gen_rtx_MEM (mode, op);
14107 if (target == 0
14108 || GET_MODE (target) != tmode
14109 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14110 target = gen_reg_rtx (tmode);
14112 pat = GEN_FCN (icode) (target, op);
14113 if (!pat)
14114 return 0;
14115 emit_insn (pat);
14117 return target;
14120 case ALTIVEC_BUILTIN_VCFUX:
14121 case ALTIVEC_BUILTIN_VCFSX:
14122 case ALTIVEC_BUILTIN_VCTUXS:
14123 case ALTIVEC_BUILTIN_VCTSXS:
14124 /* FIXME: There's got to be a nicer way to handle this case than
14125 constructing a new CALL_EXPR. */
14126 if (call_expr_nargs (exp) == 1)
14128 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
14129 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
14131 break;
14133 default:
14134 break;
14137 if (TARGET_ALTIVEC)
14139 ret = altivec_expand_builtin (exp, target, &success);
14141 if (success)
14142 return ret;
14144 if (TARGET_SPE)
14146 ret = spe_expand_builtin (exp, target, &success);
14148 if (success)
14149 return ret;
14151 if (TARGET_PAIRED_FLOAT)
14153 ret = paired_expand_builtin (exp, target, &success);
14155 if (success)
14156 return ret;
14158 if (TARGET_HTM)
14160 ret = htm_expand_builtin (exp, target, &success);
14162 if (success)
14163 return ret;
14166 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
14167 gcc_assert (attr == RS6000_BTC_UNARY
14168 || attr == RS6000_BTC_BINARY
14169 || attr == RS6000_BTC_TERNARY);
14171 /* Handle simple unary operations. */
14172 d = bdesc_1arg;
14173 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14174 if (d->code == fcode)
14175 return rs6000_expand_unop_builtin (d->icode, exp, target);
14177 /* Handle simple binary operations. */
14178 d = bdesc_2arg;
14179 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14180 if (d->code == fcode)
14181 return rs6000_expand_binop_builtin (d->icode, exp, target);
14183 /* Handle simple ternary operations. */
14184 d = bdesc_3arg;
14185 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
14186 if (d->code == fcode)
14187 return rs6000_expand_ternop_builtin (d->icode, exp, target);
14189 gcc_unreachable ();
14192 static void
14193 rs6000_init_builtins (void)
14195 tree tdecl;
14196 tree ftype;
14197 machine_mode mode;
14199 if (TARGET_DEBUG_BUILTIN)
14200 fprintf (stderr, "rs6000_init_builtins%s%s%s%s\n",
14201 (TARGET_PAIRED_FLOAT) ? ", paired" : "",
14202 (TARGET_SPE) ? ", spe" : "",
14203 (TARGET_ALTIVEC) ? ", altivec" : "",
14204 (TARGET_VSX) ? ", vsx" : "");
14206 V2SI_type_node = build_vector_type (intSI_type_node, 2);
14207 V2SF_type_node = build_vector_type (float_type_node, 2);
14208 V2DI_type_node = build_vector_type (intDI_type_node, 2);
14209 V2DF_type_node = build_vector_type (double_type_node, 2);
14210 V4HI_type_node = build_vector_type (intHI_type_node, 4);
14211 V4SI_type_node = build_vector_type (intSI_type_node, 4);
14212 V4SF_type_node = build_vector_type (float_type_node, 4);
14213 V8HI_type_node = build_vector_type (intHI_type_node, 8);
14214 V16QI_type_node = build_vector_type (intQI_type_node, 16);
14216 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
14217 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
14218 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
14219 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
14221 opaque_V2SF_type_node = build_opaque_vector_type (float_type_node, 2);
14222 opaque_V2SI_type_node = build_opaque_vector_type (intSI_type_node, 2);
14223 opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node);
14224 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
14226 /* We use V1TI mode as a special container to hold __int128_t items that
14227 must live in VSX registers. */
14228 if (intTI_type_node)
14230 V1TI_type_node = build_vector_type (intTI_type_node, 1);
14231 unsigned_V1TI_type_node = build_vector_type (unsigned_intTI_type_node, 1);
14234 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
14235 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
14236 'vector unsigned short'. */
14238 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
14239 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
14240 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
14241 bool_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
14242 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
14244 long_integer_type_internal_node = long_integer_type_node;
14245 long_unsigned_type_internal_node = long_unsigned_type_node;
14246 long_long_integer_type_internal_node = long_long_integer_type_node;
14247 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
14248 intQI_type_internal_node = intQI_type_node;
14249 uintQI_type_internal_node = unsigned_intQI_type_node;
14250 intHI_type_internal_node = intHI_type_node;
14251 uintHI_type_internal_node = unsigned_intHI_type_node;
14252 intSI_type_internal_node = intSI_type_node;
14253 uintSI_type_internal_node = unsigned_intSI_type_node;
14254 intDI_type_internal_node = intDI_type_node;
14255 uintDI_type_internal_node = unsigned_intDI_type_node;
14256 intTI_type_internal_node = intTI_type_node;
14257 uintTI_type_internal_node = unsigned_intTI_type_node;
14258 float_type_internal_node = float_type_node;
14259 double_type_internal_node = double_type_node;
14260 long_double_type_internal_node = long_double_type_node;
14261 dfloat64_type_internal_node = dfloat64_type_node;
14262 dfloat128_type_internal_node = dfloat128_type_node;
14263 void_type_internal_node = void_type_node;
14265 /* Initialize the modes for builtin_function_type, mapping a machine mode to
14266 tree type node. */
14267 builtin_mode_to_type[QImode][0] = integer_type_node;
14268 builtin_mode_to_type[HImode][0] = integer_type_node;
14269 builtin_mode_to_type[SImode][0] = intSI_type_node;
14270 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
14271 builtin_mode_to_type[DImode][0] = intDI_type_node;
14272 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
14273 builtin_mode_to_type[TImode][0] = intTI_type_node;
14274 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
14275 builtin_mode_to_type[SFmode][0] = float_type_node;
14276 builtin_mode_to_type[DFmode][0] = double_type_node;
14277 builtin_mode_to_type[TFmode][0] = long_double_type_node;
14278 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
14279 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
14280 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
14281 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
14282 builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
14283 builtin_mode_to_type[V2SFmode][0] = V2SF_type_node;
14284 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
14285 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
14286 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
14287 builtin_mode_to_type[V4HImode][0] = V4HI_type_node;
14288 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
14289 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
14290 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
14291 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
14292 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
14293 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
14294 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
14296 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
14297 TYPE_NAME (bool_char_type_node) = tdecl;
14299 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
14300 TYPE_NAME (bool_short_type_node) = tdecl;
14302 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
14303 TYPE_NAME (bool_int_type_node) = tdecl;
14305 tdecl = add_builtin_type ("__pixel", pixel_type_node);
14306 TYPE_NAME (pixel_type_node) = tdecl;
14308 bool_V16QI_type_node = build_vector_type (bool_char_type_node, 16);
14309 bool_V8HI_type_node = build_vector_type (bool_short_type_node, 8);
14310 bool_V4SI_type_node = build_vector_type (bool_int_type_node, 4);
14311 bool_V2DI_type_node = build_vector_type (bool_long_type_node, 2);
14312 pixel_V8HI_type_node = build_vector_type (pixel_type_node, 8);
14314 tdecl = add_builtin_type ("__vector unsigned char", unsigned_V16QI_type_node);
14315 TYPE_NAME (unsigned_V16QI_type_node) = tdecl;
14317 tdecl = add_builtin_type ("__vector signed char", V16QI_type_node);
14318 TYPE_NAME (V16QI_type_node) = tdecl;
14320 tdecl = add_builtin_type ("__vector __bool char", bool_V16QI_type_node);
14321 TYPE_NAME ( bool_V16QI_type_node) = tdecl;
14323 tdecl = add_builtin_type ("__vector unsigned short", unsigned_V8HI_type_node);
14324 TYPE_NAME (unsigned_V8HI_type_node) = tdecl;
14326 tdecl = add_builtin_type ("__vector signed short", V8HI_type_node);
14327 TYPE_NAME (V8HI_type_node) = tdecl;
14329 tdecl = add_builtin_type ("__vector __bool short", bool_V8HI_type_node);
14330 TYPE_NAME (bool_V8HI_type_node) = tdecl;
14332 tdecl = add_builtin_type ("__vector unsigned int", unsigned_V4SI_type_node);
14333 TYPE_NAME (unsigned_V4SI_type_node) = tdecl;
14335 tdecl = add_builtin_type ("__vector signed int", V4SI_type_node);
14336 TYPE_NAME (V4SI_type_node) = tdecl;
14338 tdecl = add_builtin_type ("__vector __bool int", bool_V4SI_type_node);
14339 TYPE_NAME (bool_V4SI_type_node) = tdecl;
14341 tdecl = add_builtin_type ("__vector float", V4SF_type_node);
14342 TYPE_NAME (V4SF_type_node) = tdecl;
14344 tdecl = add_builtin_type ("__vector __pixel", pixel_V8HI_type_node);
14345 TYPE_NAME (pixel_V8HI_type_node) = tdecl;
14347 tdecl = add_builtin_type ("__vector double", V2DF_type_node);
14348 TYPE_NAME (V2DF_type_node) = tdecl;
14350 if (TARGET_POWERPC64)
14352 tdecl = add_builtin_type ("__vector long", V2DI_type_node);
14353 TYPE_NAME (V2DI_type_node) = tdecl;
14355 tdecl = add_builtin_type ("__vector unsigned long",
14356 unsigned_V2DI_type_node);
14357 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
14359 tdecl = add_builtin_type ("__vector __bool long", bool_V2DI_type_node);
14360 TYPE_NAME (bool_V2DI_type_node) = tdecl;
14362 else
14364 tdecl = add_builtin_type ("__vector long long", V2DI_type_node);
14365 TYPE_NAME (V2DI_type_node) = tdecl;
14367 tdecl = add_builtin_type ("__vector unsigned long long",
14368 unsigned_V2DI_type_node);
14369 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
14371 tdecl = add_builtin_type ("__vector __bool long long",
14372 bool_V2DI_type_node);
14373 TYPE_NAME (bool_V2DI_type_node) = tdecl;
14376 if (V1TI_type_node)
14378 tdecl = add_builtin_type ("__vector __int128", V1TI_type_node);
14379 TYPE_NAME (V1TI_type_node) = tdecl;
14381 tdecl = add_builtin_type ("__vector unsigned __int128",
14382 unsigned_V1TI_type_node);
14383 TYPE_NAME (unsigned_V1TI_type_node) = tdecl;
14386 /* Paired and SPE builtins are only available if you build a compiler with
14387 the appropriate options, so only create those builtins with the
14388 appropriate compiler option. Create Altivec and VSX builtins on machines
14389 with at least the general purpose extensions (970 and newer) to allow the
14390 use of the target attribute. */
14391 if (TARGET_PAIRED_FLOAT)
14392 paired_init_builtins ();
14393 if (TARGET_SPE)
14394 spe_init_builtins ();
14395 if (TARGET_EXTRA_BUILTINS)
14396 altivec_init_builtins ();
14397 if (TARGET_HTM)
14398 htm_init_builtins ();
14400 if (TARGET_EXTRA_BUILTINS || TARGET_SPE || TARGET_PAIRED_FLOAT)
14401 rs6000_common_init_builtins ();
14403 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
14404 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
14405 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
14407 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
14408 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
14409 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
14411 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
14412 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
14413 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
14415 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
14416 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
14417 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
14419 mode = (TARGET_64BIT) ? DImode : SImode;
14420 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
14421 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
14422 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
14424 ftype = build_function_type_list (unsigned_intDI_type_node,
14425 NULL_TREE);
14426 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
14428 if (TARGET_64BIT)
14429 ftype = build_function_type_list (unsigned_intDI_type_node,
14430 NULL_TREE);
14431 else
14432 ftype = build_function_type_list (unsigned_intSI_type_node,
14433 NULL_TREE);
14434 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
14436 ftype = build_function_type_list (double_type_node, NULL_TREE);
14437 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
14439 ftype = build_function_type_list (void_type_node,
14440 intSI_type_node, double_type_node,
14441 NULL_TREE);
14442 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
14444 #if TARGET_XCOFF
14445 /* AIX libm provides clog as __clog. */
14446 if ((tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
14447 set_user_assembler_name (tdecl, "__clog");
14448 #endif
14450 #ifdef SUBTARGET_INIT_BUILTINS
14451 SUBTARGET_INIT_BUILTINS;
14452 #endif
14455 /* Returns the rs6000 builtin decl for CODE. */
14457 static tree
14458 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
14460 HOST_WIDE_INT fnmask;
14462 if (code >= RS6000_BUILTIN_COUNT)
14463 return error_mark_node;
14465 fnmask = rs6000_builtin_info[code].mask;
14466 if ((fnmask & rs6000_builtin_mask) != fnmask)
14468 rs6000_invalid_builtin ((enum rs6000_builtins)code);
14469 return error_mark_node;
14472 return rs6000_builtin_decls[code];
14475 static void
14476 spe_init_builtins (void)
14478 tree puint_type_node = build_pointer_type (unsigned_type_node);
14479 tree pushort_type_node = build_pointer_type (short_unsigned_type_node);
14480 const struct builtin_description *d;
14481 size_t i;
14483 tree v2si_ftype_4_v2si
14484 = build_function_type_list (opaque_V2SI_type_node,
14485 opaque_V2SI_type_node,
14486 opaque_V2SI_type_node,
14487 opaque_V2SI_type_node,
14488 opaque_V2SI_type_node,
14489 NULL_TREE);
14491 tree v2sf_ftype_4_v2sf
14492 = build_function_type_list (opaque_V2SF_type_node,
14493 opaque_V2SF_type_node,
14494 opaque_V2SF_type_node,
14495 opaque_V2SF_type_node,
14496 opaque_V2SF_type_node,
14497 NULL_TREE);
14499 tree int_ftype_int_v2si_v2si
14500 = build_function_type_list (integer_type_node,
14501 integer_type_node,
14502 opaque_V2SI_type_node,
14503 opaque_V2SI_type_node,
14504 NULL_TREE);
14506 tree int_ftype_int_v2sf_v2sf
14507 = build_function_type_list (integer_type_node,
14508 integer_type_node,
14509 opaque_V2SF_type_node,
14510 opaque_V2SF_type_node,
14511 NULL_TREE);
14513 tree void_ftype_v2si_puint_int
14514 = build_function_type_list (void_type_node,
14515 opaque_V2SI_type_node,
14516 puint_type_node,
14517 integer_type_node,
14518 NULL_TREE);
14520 tree void_ftype_v2si_puint_char
14521 = build_function_type_list (void_type_node,
14522 opaque_V2SI_type_node,
14523 puint_type_node,
14524 char_type_node,
14525 NULL_TREE);
14527 tree void_ftype_v2si_pv2si_int
14528 = build_function_type_list (void_type_node,
14529 opaque_V2SI_type_node,
14530 opaque_p_V2SI_type_node,
14531 integer_type_node,
14532 NULL_TREE);
14534 tree void_ftype_v2si_pv2si_char
14535 = build_function_type_list (void_type_node,
14536 opaque_V2SI_type_node,
14537 opaque_p_V2SI_type_node,
14538 char_type_node,
14539 NULL_TREE);
14541 tree void_ftype_int
14542 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
14544 tree int_ftype_void
14545 = build_function_type_list (integer_type_node, NULL_TREE);
14547 tree v2si_ftype_pv2si_int
14548 = build_function_type_list (opaque_V2SI_type_node,
14549 opaque_p_V2SI_type_node,
14550 integer_type_node,
14551 NULL_TREE);
14553 tree v2si_ftype_puint_int
14554 = build_function_type_list (opaque_V2SI_type_node,
14555 puint_type_node,
14556 integer_type_node,
14557 NULL_TREE);
14559 tree v2si_ftype_pushort_int
14560 = build_function_type_list (opaque_V2SI_type_node,
14561 pushort_type_node,
14562 integer_type_node,
14563 NULL_TREE);
14565 tree v2si_ftype_signed_char
14566 = build_function_type_list (opaque_V2SI_type_node,
14567 signed_char_type_node,
14568 NULL_TREE);
14570 add_builtin_type ("__ev64_opaque__", opaque_V2SI_type_node);
14572 /* Initialize irregular SPE builtins. */
14574 def_builtin ("__builtin_spe_mtspefscr", void_ftype_int, SPE_BUILTIN_MTSPEFSCR);
14575 def_builtin ("__builtin_spe_mfspefscr", int_ftype_void, SPE_BUILTIN_MFSPEFSCR);
14576 def_builtin ("__builtin_spe_evstddx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDDX);
14577 def_builtin ("__builtin_spe_evstdhx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDHX);
14578 def_builtin ("__builtin_spe_evstdwx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDWX);
14579 def_builtin ("__builtin_spe_evstwhex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHEX);
14580 def_builtin ("__builtin_spe_evstwhox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHOX);
14581 def_builtin ("__builtin_spe_evstwwex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWEX);
14582 def_builtin ("__builtin_spe_evstwwox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWOX);
14583 def_builtin ("__builtin_spe_evstdd", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDD);
14584 def_builtin ("__builtin_spe_evstdh", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDH);
14585 def_builtin ("__builtin_spe_evstdw", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDW);
14586 def_builtin ("__builtin_spe_evstwhe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHE);
14587 def_builtin ("__builtin_spe_evstwho", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHO);
14588 def_builtin ("__builtin_spe_evstwwe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWE);
14589 def_builtin ("__builtin_spe_evstwwo", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWO);
14590 def_builtin ("__builtin_spe_evsplatfi", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATFI);
14591 def_builtin ("__builtin_spe_evsplati", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATI);
14593 /* Loads. */
14594 def_builtin ("__builtin_spe_evlddx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDDX);
14595 def_builtin ("__builtin_spe_evldwx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDWX);
14596 def_builtin ("__builtin_spe_evldhx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDHX);
14597 def_builtin ("__builtin_spe_evlwhex", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHEX);
14598 def_builtin ("__builtin_spe_evlwhoux", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOUX);
14599 def_builtin ("__builtin_spe_evlwhosx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOSX);
14600 def_builtin ("__builtin_spe_evlwwsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLATX);
14601 def_builtin ("__builtin_spe_evlwhsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLATX);
14602 def_builtin ("__builtin_spe_evlhhesplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLATX);
14603 def_builtin ("__builtin_spe_evlhhousplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLATX);
14604 def_builtin ("__builtin_spe_evlhhossplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLATX);
14605 def_builtin ("__builtin_spe_evldd", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDD);
14606 def_builtin ("__builtin_spe_evldw", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDW);
14607 def_builtin ("__builtin_spe_evldh", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDH);
14608 def_builtin ("__builtin_spe_evlhhesplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLAT);
14609 def_builtin ("__builtin_spe_evlhhossplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLAT);
14610 def_builtin ("__builtin_spe_evlhhousplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLAT);
14611 def_builtin ("__builtin_spe_evlwhe", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHE);
14612 def_builtin ("__builtin_spe_evlwhos", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOS);
14613 def_builtin ("__builtin_spe_evlwhou", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOU);
14614 def_builtin ("__builtin_spe_evlwhsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLAT);
14615 def_builtin ("__builtin_spe_evlwwsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLAT);
14617 /* Predicates. */
14618 d = bdesc_spe_predicates;
14619 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, d++)
14621 tree type;
14623 switch (insn_data[d->icode].operand[1].mode)
14625 case V2SImode:
14626 type = int_ftype_int_v2si_v2si;
14627 break;
14628 case V2SFmode:
14629 type = int_ftype_int_v2sf_v2sf;
14630 break;
14631 default:
14632 gcc_unreachable ();
14635 def_builtin (d->name, type, d->code);
14638 /* Evsel predicates. */
14639 d = bdesc_spe_evsel;
14640 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, d++)
14642 tree type;
14644 switch (insn_data[d->icode].operand[1].mode)
14646 case V2SImode:
14647 type = v2si_ftype_4_v2si;
14648 break;
14649 case V2SFmode:
14650 type = v2sf_ftype_4_v2sf;
14651 break;
14652 default:
14653 gcc_unreachable ();
14656 def_builtin (d->name, type, d->code);
14660 static void
14661 paired_init_builtins (void)
14663 const struct builtin_description *d;
14664 size_t i;
14666 tree int_ftype_int_v2sf_v2sf
14667 = build_function_type_list (integer_type_node,
14668 integer_type_node,
14669 V2SF_type_node,
14670 V2SF_type_node,
14671 NULL_TREE);
14672 tree pcfloat_type_node =
14673 build_pointer_type (build_qualified_type
14674 (float_type_node, TYPE_QUAL_CONST));
14676 tree v2sf_ftype_long_pcfloat = build_function_type_list (V2SF_type_node,
14677 long_integer_type_node,
14678 pcfloat_type_node,
14679 NULL_TREE);
14680 tree void_ftype_v2sf_long_pcfloat =
14681 build_function_type_list (void_type_node,
14682 V2SF_type_node,
14683 long_integer_type_node,
14684 pcfloat_type_node,
14685 NULL_TREE);
14688 def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat,
14689 PAIRED_BUILTIN_LX);
14692 def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat,
14693 PAIRED_BUILTIN_STX);
14695 /* Predicates. */
14696 d = bdesc_paired_preds;
14697 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); ++i, d++)
14699 tree type;
14701 if (TARGET_DEBUG_BUILTIN)
14702 fprintf (stderr, "paired pred #%d, insn = %s [%d], mode = %s\n",
14703 (int)i, get_insn_name (d->icode), (int)d->icode,
14704 GET_MODE_NAME (insn_data[d->icode].operand[1].mode));
14706 switch (insn_data[d->icode].operand[1].mode)
14708 case V2SFmode:
14709 type = int_ftype_int_v2sf_v2sf;
14710 break;
14711 default:
14712 gcc_unreachable ();
14715 def_builtin (d->name, type, d->code);
14719 static void
14720 altivec_init_builtins (void)
14722 const struct builtin_description *d;
14723 size_t i;
14724 tree ftype;
14725 tree decl;
14727 tree pvoid_type_node = build_pointer_type (void_type_node);
14729 tree pcvoid_type_node
14730 = build_pointer_type (build_qualified_type (void_type_node,
14731 TYPE_QUAL_CONST));
14733 tree int_ftype_opaque
14734 = build_function_type_list (integer_type_node,
14735 opaque_V4SI_type_node, NULL_TREE);
14736 tree opaque_ftype_opaque
14737 = build_function_type_list (integer_type_node, NULL_TREE);
14738 tree opaque_ftype_opaque_int
14739 = build_function_type_list (opaque_V4SI_type_node,
14740 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
14741 tree opaque_ftype_opaque_opaque_int
14742 = build_function_type_list (opaque_V4SI_type_node,
14743 opaque_V4SI_type_node, opaque_V4SI_type_node,
14744 integer_type_node, NULL_TREE);
14745 tree int_ftype_int_opaque_opaque
14746 = build_function_type_list (integer_type_node,
14747 integer_type_node, opaque_V4SI_type_node,
14748 opaque_V4SI_type_node, NULL_TREE);
14749 tree int_ftype_int_v4si_v4si
14750 = build_function_type_list (integer_type_node,
14751 integer_type_node, V4SI_type_node,
14752 V4SI_type_node, NULL_TREE);
14753 tree int_ftype_int_v2di_v2di
14754 = build_function_type_list (integer_type_node,
14755 integer_type_node, V2DI_type_node,
14756 V2DI_type_node, NULL_TREE);
14757 tree void_ftype_v4si
14758 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
14759 tree v8hi_ftype_void
14760 = build_function_type_list (V8HI_type_node, NULL_TREE);
14761 tree void_ftype_void
14762 = build_function_type_list (void_type_node, NULL_TREE);
14763 tree void_ftype_int
14764 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
14766 tree opaque_ftype_long_pcvoid
14767 = build_function_type_list (opaque_V4SI_type_node,
14768 long_integer_type_node, pcvoid_type_node,
14769 NULL_TREE);
14770 tree v16qi_ftype_long_pcvoid
14771 = build_function_type_list (V16QI_type_node,
14772 long_integer_type_node, pcvoid_type_node,
14773 NULL_TREE);
14774 tree v8hi_ftype_long_pcvoid
14775 = build_function_type_list (V8HI_type_node,
14776 long_integer_type_node, pcvoid_type_node,
14777 NULL_TREE);
14778 tree v4si_ftype_long_pcvoid
14779 = build_function_type_list (V4SI_type_node,
14780 long_integer_type_node, pcvoid_type_node,
14781 NULL_TREE);
14782 tree v4sf_ftype_long_pcvoid
14783 = build_function_type_list (V4SF_type_node,
14784 long_integer_type_node, pcvoid_type_node,
14785 NULL_TREE);
14786 tree v2df_ftype_long_pcvoid
14787 = build_function_type_list (V2DF_type_node,
14788 long_integer_type_node, pcvoid_type_node,
14789 NULL_TREE);
14790 tree v2di_ftype_long_pcvoid
14791 = build_function_type_list (V2DI_type_node,
14792 long_integer_type_node, pcvoid_type_node,
14793 NULL_TREE);
14795 tree void_ftype_opaque_long_pvoid
14796 = build_function_type_list (void_type_node,
14797 opaque_V4SI_type_node, long_integer_type_node,
14798 pvoid_type_node, NULL_TREE);
14799 tree void_ftype_v4si_long_pvoid
14800 = build_function_type_list (void_type_node,
14801 V4SI_type_node, long_integer_type_node,
14802 pvoid_type_node, NULL_TREE);
14803 tree void_ftype_v16qi_long_pvoid
14804 = build_function_type_list (void_type_node,
14805 V16QI_type_node, long_integer_type_node,
14806 pvoid_type_node, NULL_TREE);
14807 tree void_ftype_v8hi_long_pvoid
14808 = build_function_type_list (void_type_node,
14809 V8HI_type_node, long_integer_type_node,
14810 pvoid_type_node, NULL_TREE);
14811 tree void_ftype_v4sf_long_pvoid
14812 = build_function_type_list (void_type_node,
14813 V4SF_type_node, long_integer_type_node,
14814 pvoid_type_node, NULL_TREE);
14815 tree void_ftype_v2df_long_pvoid
14816 = build_function_type_list (void_type_node,
14817 V2DF_type_node, long_integer_type_node,
14818 pvoid_type_node, NULL_TREE);
14819 tree void_ftype_v2di_long_pvoid
14820 = build_function_type_list (void_type_node,
14821 V2DI_type_node, long_integer_type_node,
14822 pvoid_type_node, NULL_TREE);
14823 tree int_ftype_int_v8hi_v8hi
14824 = build_function_type_list (integer_type_node,
14825 integer_type_node, V8HI_type_node,
14826 V8HI_type_node, NULL_TREE);
14827 tree int_ftype_int_v16qi_v16qi
14828 = build_function_type_list (integer_type_node,
14829 integer_type_node, V16QI_type_node,
14830 V16QI_type_node, NULL_TREE);
14831 tree int_ftype_int_v4sf_v4sf
14832 = build_function_type_list (integer_type_node,
14833 integer_type_node, V4SF_type_node,
14834 V4SF_type_node, NULL_TREE);
14835 tree int_ftype_int_v2df_v2df
14836 = build_function_type_list (integer_type_node,
14837 integer_type_node, V2DF_type_node,
14838 V2DF_type_node, NULL_TREE);
14839 tree v2di_ftype_v2di
14840 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
14841 tree v4si_ftype_v4si
14842 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
14843 tree v8hi_ftype_v8hi
14844 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
14845 tree v16qi_ftype_v16qi
14846 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
14847 tree v4sf_ftype_v4sf
14848 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
14849 tree v2df_ftype_v2df
14850 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
14851 tree void_ftype_pcvoid_int_int
14852 = build_function_type_list (void_type_node,
14853 pcvoid_type_node, integer_type_node,
14854 integer_type_node, NULL_TREE);
14856 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
14857 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
14858 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
14859 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
14860 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
14861 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
14862 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
14863 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
14864 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
14865 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
14866 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
14867 ALTIVEC_BUILTIN_LVXL_V2DF);
14868 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
14869 ALTIVEC_BUILTIN_LVXL_V2DI);
14870 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
14871 ALTIVEC_BUILTIN_LVXL_V4SF);
14872 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
14873 ALTIVEC_BUILTIN_LVXL_V4SI);
14874 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
14875 ALTIVEC_BUILTIN_LVXL_V8HI);
14876 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
14877 ALTIVEC_BUILTIN_LVXL_V16QI);
14878 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
14879 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
14880 ALTIVEC_BUILTIN_LVX_V2DF);
14881 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
14882 ALTIVEC_BUILTIN_LVX_V2DI);
14883 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
14884 ALTIVEC_BUILTIN_LVX_V4SF);
14885 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
14886 ALTIVEC_BUILTIN_LVX_V4SI);
14887 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
14888 ALTIVEC_BUILTIN_LVX_V8HI);
14889 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
14890 ALTIVEC_BUILTIN_LVX_V16QI);
14891 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
14892 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
14893 ALTIVEC_BUILTIN_STVX_V2DF);
14894 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
14895 ALTIVEC_BUILTIN_STVX_V2DI);
14896 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
14897 ALTIVEC_BUILTIN_STVX_V4SF);
14898 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
14899 ALTIVEC_BUILTIN_STVX_V4SI);
14900 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
14901 ALTIVEC_BUILTIN_STVX_V8HI);
14902 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
14903 ALTIVEC_BUILTIN_STVX_V16QI);
14904 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
14905 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
14906 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
14907 ALTIVEC_BUILTIN_STVXL_V2DF);
14908 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
14909 ALTIVEC_BUILTIN_STVXL_V2DI);
14910 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
14911 ALTIVEC_BUILTIN_STVXL_V4SF);
14912 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
14913 ALTIVEC_BUILTIN_STVXL_V4SI);
14914 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
14915 ALTIVEC_BUILTIN_STVXL_V8HI);
14916 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
14917 ALTIVEC_BUILTIN_STVXL_V16QI);
14918 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
14919 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
14920 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
14921 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
14922 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
14923 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
14924 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
14925 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
14926 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
14927 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
14928 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
14929 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
14930 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
14931 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
14932 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
14933 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
14935 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
14936 VSX_BUILTIN_LXVD2X_V2DF);
14937 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
14938 VSX_BUILTIN_LXVD2X_V2DI);
14939 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
14940 VSX_BUILTIN_LXVW4X_V4SF);
14941 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
14942 VSX_BUILTIN_LXVW4X_V4SI);
14943 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
14944 VSX_BUILTIN_LXVW4X_V8HI);
14945 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
14946 VSX_BUILTIN_LXVW4X_V16QI);
14947 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
14948 VSX_BUILTIN_STXVD2X_V2DF);
14949 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
14950 VSX_BUILTIN_STXVD2X_V2DI);
14951 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
14952 VSX_BUILTIN_STXVW4X_V4SF);
14953 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
14954 VSX_BUILTIN_STXVW4X_V4SI);
14955 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
14956 VSX_BUILTIN_STXVW4X_V8HI);
14957 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
14958 VSX_BUILTIN_STXVW4X_V16QI);
14959 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
14960 VSX_BUILTIN_VEC_LD);
14961 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
14962 VSX_BUILTIN_VEC_ST);
14964 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
14965 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
14966 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
14968 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
14969 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
14970 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
14971 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
14972 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
14973 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
14974 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
14975 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
14976 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
14977 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
14978 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
14979 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
14981 /* Cell builtins. */
14982 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
14983 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
14984 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
14985 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
14987 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
14988 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
14989 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
14990 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
14992 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
14993 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
14994 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
14995 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
14997 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
14998 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
14999 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
15000 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
15002 /* Add the DST variants. */
15003 d = bdesc_dst;
15004 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
15005 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
15007 /* Initialize the predicates. */
15008 d = bdesc_altivec_preds;
15009 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
15011 machine_mode mode1;
15012 tree type;
15014 if (rs6000_overloaded_builtin_p (d->code))
15015 mode1 = VOIDmode;
15016 else
15017 mode1 = insn_data[d->icode].operand[1].mode;
15019 switch (mode1)
15021 case VOIDmode:
15022 type = int_ftype_int_opaque_opaque;
15023 break;
15024 case V2DImode:
15025 type = int_ftype_int_v2di_v2di;
15026 break;
15027 case V4SImode:
15028 type = int_ftype_int_v4si_v4si;
15029 break;
15030 case V8HImode:
15031 type = int_ftype_int_v8hi_v8hi;
15032 break;
15033 case V16QImode:
15034 type = int_ftype_int_v16qi_v16qi;
15035 break;
15036 case V4SFmode:
15037 type = int_ftype_int_v4sf_v4sf;
15038 break;
15039 case V2DFmode:
15040 type = int_ftype_int_v2df_v2df;
15041 break;
15042 default:
15043 gcc_unreachable ();
15046 def_builtin (d->name, type, d->code);
15049 /* Initialize the abs* operators. */
15050 d = bdesc_abs;
15051 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
15053 machine_mode mode0;
15054 tree type;
15056 mode0 = insn_data[d->icode].operand[0].mode;
15058 switch (mode0)
15060 case V2DImode:
15061 type = v2di_ftype_v2di;
15062 break;
15063 case V4SImode:
15064 type = v4si_ftype_v4si;
15065 break;
15066 case V8HImode:
15067 type = v8hi_ftype_v8hi;
15068 break;
15069 case V16QImode:
15070 type = v16qi_ftype_v16qi;
15071 break;
15072 case V4SFmode:
15073 type = v4sf_ftype_v4sf;
15074 break;
15075 case V2DFmode:
15076 type = v2df_ftype_v2df;
15077 break;
15078 default:
15079 gcc_unreachable ();
15082 def_builtin (d->name, type, d->code);
15085 /* Initialize target builtin that implements
15086 targetm.vectorize.builtin_mask_for_load. */
15088 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
15089 v16qi_ftype_long_pcvoid,
15090 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
15091 BUILT_IN_MD, NULL, NULL_TREE);
15092 TREE_READONLY (decl) = 1;
15093 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
15094 altivec_builtin_mask_for_load = decl;
15096 /* Access to the vec_init patterns. */
15097 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
15098 integer_type_node, integer_type_node,
15099 integer_type_node, NULL_TREE);
15100 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
15102 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
15103 short_integer_type_node,
15104 short_integer_type_node,
15105 short_integer_type_node,
15106 short_integer_type_node,
15107 short_integer_type_node,
15108 short_integer_type_node,
15109 short_integer_type_node, NULL_TREE);
15110 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
15112 ftype = build_function_type_list (V16QI_type_node, char_type_node,
15113 char_type_node, char_type_node,
15114 char_type_node, char_type_node,
15115 char_type_node, char_type_node,
15116 char_type_node, char_type_node,
15117 char_type_node, char_type_node,
15118 char_type_node, char_type_node,
15119 char_type_node, char_type_node,
15120 char_type_node, NULL_TREE);
15121 def_builtin ("__builtin_vec_init_v16qi", ftype,
15122 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
15124 ftype = build_function_type_list (V4SF_type_node, float_type_node,
15125 float_type_node, float_type_node,
15126 float_type_node, NULL_TREE);
15127 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
15129 /* VSX builtins. */
15130 ftype = build_function_type_list (V2DF_type_node, double_type_node,
15131 double_type_node, NULL_TREE);
15132 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
15134 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
15135 intDI_type_node, NULL_TREE);
15136 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
15138 /* Access to the vec_set patterns. */
15139 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
15140 intSI_type_node,
15141 integer_type_node, NULL_TREE);
15142 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
15144 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15145 intHI_type_node,
15146 integer_type_node, NULL_TREE);
15147 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
15149 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
15150 intQI_type_node,
15151 integer_type_node, NULL_TREE);
15152 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
15154 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
15155 float_type_node,
15156 integer_type_node, NULL_TREE);
15157 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
15159 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
15160 double_type_node,
15161 integer_type_node, NULL_TREE);
15162 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
15164 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
15165 intDI_type_node,
15166 integer_type_node, NULL_TREE);
15167 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
15169 /* Access to the vec_extract patterns. */
15170 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15171 integer_type_node, NULL_TREE);
15172 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
15174 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15175 integer_type_node, NULL_TREE);
15176 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
15178 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
15179 integer_type_node, NULL_TREE);
15180 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
15182 ftype = build_function_type_list (float_type_node, V4SF_type_node,
15183 integer_type_node, NULL_TREE);
15184 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
15186 ftype = build_function_type_list (double_type_node, V2DF_type_node,
15187 integer_type_node, NULL_TREE);
15188 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
15190 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
15191 integer_type_node, NULL_TREE);
15192 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
15195 if (V1TI_type_node)
15197 tree v1ti_ftype_long_pcvoid
15198 = build_function_type_list (V1TI_type_node,
15199 long_integer_type_node, pcvoid_type_node,
15200 NULL_TREE);
15201 tree void_ftype_v1ti_long_pvoid
15202 = build_function_type_list (void_type_node,
15203 V1TI_type_node, long_integer_type_node,
15204 pvoid_type_node, NULL_TREE);
15205 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
15206 VSX_BUILTIN_LXVD2X_V1TI);
15207 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
15208 VSX_BUILTIN_STXVD2X_V1TI);
15209 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
15210 NULL_TREE, NULL_TREE);
15211 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
15212 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
15213 intTI_type_node,
15214 integer_type_node, NULL_TREE);
15215 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
15216 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
15217 integer_type_node, NULL_TREE);
15218 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
15223 static void
15224 htm_init_builtins (void)
15226 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
15227 const struct builtin_description *d;
15228 size_t i;
15230 d = bdesc_htm;
15231 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
15233 tree op[MAX_HTM_OPERANDS], type;
15234 HOST_WIDE_INT mask = d->mask;
15235 unsigned attr = rs6000_builtin_info[d->code].attr;
15236 bool void_func = (attr & RS6000_BTC_VOID);
15237 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
15238 int nopnds = 0;
15239 tree argtype = (attr & RS6000_BTC_SPR) ? long_unsigned_type_node
15240 : unsigned_type_node;
15242 if ((mask & builtin_mask) != mask)
15244 if (TARGET_DEBUG_BUILTIN)
15245 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
15246 continue;
15249 if (d->name == 0)
15251 if (TARGET_DEBUG_BUILTIN)
15252 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
15253 (long unsigned) i);
15254 continue;
15257 op[nopnds++] = (void_func) ? void_type_node : argtype;
15259 if (attr_args == RS6000_BTC_UNARY)
15260 op[nopnds++] = argtype;
15261 else if (attr_args == RS6000_BTC_BINARY)
15263 op[nopnds++] = argtype;
15264 op[nopnds++] = argtype;
15266 else if (attr_args == RS6000_BTC_TERNARY)
15268 op[nopnds++] = argtype;
15269 op[nopnds++] = argtype;
15270 op[nopnds++] = argtype;
15273 switch (nopnds)
15275 case 1:
15276 type = build_function_type_list (op[0], NULL_TREE);
15277 break;
15278 case 2:
15279 type = build_function_type_list (op[0], op[1], NULL_TREE);
15280 break;
15281 case 3:
15282 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
15283 break;
15284 case 4:
15285 type = build_function_type_list (op[0], op[1], op[2], op[3],
15286 NULL_TREE);
15287 break;
15288 default:
15289 gcc_unreachable ();
15292 def_builtin (d->name, type, d->code);
15296 /* Hash function for builtin functions with up to 3 arguments and a return
15297 type. */
15298 hashval_t
15299 builtin_hasher::hash (builtin_hash_struct *bh)
15301 unsigned ret = 0;
15302 int i;
15304 for (i = 0; i < 4; i++)
15306 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
15307 ret = (ret * 2) + bh->uns_p[i];
15310 return ret;
15313 /* Compare builtin hash entries H1 and H2 for equivalence. */
15314 bool
15315 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
15317 return ((p1->mode[0] == p2->mode[0])
15318 && (p1->mode[1] == p2->mode[1])
15319 && (p1->mode[2] == p2->mode[2])
15320 && (p1->mode[3] == p2->mode[3])
15321 && (p1->uns_p[0] == p2->uns_p[0])
15322 && (p1->uns_p[1] == p2->uns_p[1])
15323 && (p1->uns_p[2] == p2->uns_p[2])
15324 && (p1->uns_p[3] == p2->uns_p[3]));
15327 /* Map types for builtin functions with an explicit return type and up to 3
15328 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
15329 of the argument. */
15330 static tree
15331 builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
15332 machine_mode mode_arg1, machine_mode mode_arg2,
15333 enum rs6000_builtins builtin, const char *name)
15335 struct builtin_hash_struct h;
15336 struct builtin_hash_struct *h2;
15337 int num_args = 3;
15338 int i;
15339 tree ret_type = NULL_TREE;
15340 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
15342 /* Create builtin_hash_table. */
15343 if (builtin_hash_table == NULL)
15344 builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
15346 h.type = NULL_TREE;
15347 h.mode[0] = mode_ret;
15348 h.mode[1] = mode_arg0;
15349 h.mode[2] = mode_arg1;
15350 h.mode[3] = mode_arg2;
15351 h.uns_p[0] = 0;
15352 h.uns_p[1] = 0;
15353 h.uns_p[2] = 0;
15354 h.uns_p[3] = 0;
15356 /* If the builtin is a type that produces unsigned results or takes unsigned
15357 arguments, and it is returned as a decl for the vectorizer (such as
15358 widening multiplies, permute), make sure the arguments and return value
15359 are type correct. */
15360 switch (builtin)
15362 /* unsigned 1 argument functions. */
15363 case CRYPTO_BUILTIN_VSBOX:
15364 case P8V_BUILTIN_VGBBD:
15365 case MISC_BUILTIN_CDTBCD:
15366 case MISC_BUILTIN_CBCDTD:
15367 h.uns_p[0] = 1;
15368 h.uns_p[1] = 1;
15369 break;
15371 /* unsigned 2 argument functions. */
15372 case ALTIVEC_BUILTIN_VMULEUB_UNS:
15373 case ALTIVEC_BUILTIN_VMULEUH_UNS:
15374 case ALTIVEC_BUILTIN_VMULOUB_UNS:
15375 case ALTIVEC_BUILTIN_VMULOUH_UNS:
15376 case CRYPTO_BUILTIN_VCIPHER:
15377 case CRYPTO_BUILTIN_VCIPHERLAST:
15378 case CRYPTO_BUILTIN_VNCIPHER:
15379 case CRYPTO_BUILTIN_VNCIPHERLAST:
15380 case CRYPTO_BUILTIN_VPMSUMB:
15381 case CRYPTO_BUILTIN_VPMSUMH:
15382 case CRYPTO_BUILTIN_VPMSUMW:
15383 case CRYPTO_BUILTIN_VPMSUMD:
15384 case CRYPTO_BUILTIN_VPMSUM:
15385 case MISC_BUILTIN_ADDG6S:
15386 case MISC_BUILTIN_DIVWEU:
15387 case MISC_BUILTIN_DIVWEUO:
15388 case MISC_BUILTIN_DIVDEU:
15389 case MISC_BUILTIN_DIVDEUO:
15390 h.uns_p[0] = 1;
15391 h.uns_p[1] = 1;
15392 h.uns_p[2] = 1;
15393 break;
15395 /* unsigned 3 argument functions. */
15396 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
15397 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
15398 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
15399 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
15400 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
15401 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
15402 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
15403 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
15404 case VSX_BUILTIN_VPERM_16QI_UNS:
15405 case VSX_BUILTIN_VPERM_8HI_UNS:
15406 case VSX_BUILTIN_VPERM_4SI_UNS:
15407 case VSX_BUILTIN_VPERM_2DI_UNS:
15408 case VSX_BUILTIN_XXSEL_16QI_UNS:
15409 case VSX_BUILTIN_XXSEL_8HI_UNS:
15410 case VSX_BUILTIN_XXSEL_4SI_UNS:
15411 case VSX_BUILTIN_XXSEL_2DI_UNS:
15412 case CRYPTO_BUILTIN_VPERMXOR:
15413 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
15414 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
15415 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
15416 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
15417 case CRYPTO_BUILTIN_VSHASIGMAW:
15418 case CRYPTO_BUILTIN_VSHASIGMAD:
15419 case CRYPTO_BUILTIN_VSHASIGMA:
15420 h.uns_p[0] = 1;
15421 h.uns_p[1] = 1;
15422 h.uns_p[2] = 1;
15423 h.uns_p[3] = 1;
15424 break;
15426 /* signed permute functions with unsigned char mask. */
15427 case ALTIVEC_BUILTIN_VPERM_16QI:
15428 case ALTIVEC_BUILTIN_VPERM_8HI:
15429 case ALTIVEC_BUILTIN_VPERM_4SI:
15430 case ALTIVEC_BUILTIN_VPERM_4SF:
15431 case ALTIVEC_BUILTIN_VPERM_2DI:
15432 case ALTIVEC_BUILTIN_VPERM_2DF:
15433 case VSX_BUILTIN_VPERM_16QI:
15434 case VSX_BUILTIN_VPERM_8HI:
15435 case VSX_BUILTIN_VPERM_4SI:
15436 case VSX_BUILTIN_VPERM_4SF:
15437 case VSX_BUILTIN_VPERM_2DI:
15438 case VSX_BUILTIN_VPERM_2DF:
15439 h.uns_p[3] = 1;
15440 break;
15442 /* unsigned args, signed return. */
15443 case VSX_BUILTIN_XVCVUXDDP_UNS:
15444 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
15445 h.uns_p[1] = 1;
15446 break;
15448 /* signed args, unsigned return. */
15449 case VSX_BUILTIN_XVCVDPUXDS_UNS:
15450 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
15451 case MISC_BUILTIN_UNPACK_TD:
15452 case MISC_BUILTIN_UNPACK_V1TI:
15453 h.uns_p[0] = 1;
15454 break;
15456 /* unsigned arguments for 128-bit pack instructions. */
15457 case MISC_BUILTIN_PACK_TD:
15458 case MISC_BUILTIN_PACK_V1TI:
15459 h.uns_p[1] = 1;
15460 h.uns_p[2] = 1;
15461 break;
15463 default:
15464 break;
15467 /* Figure out how many args are present. */
15468 while (num_args > 0 && h.mode[num_args] == VOIDmode)
15469 num_args--;
15471 if (num_args == 0)
15472 fatal_error (input_location,
15473 "internal error: builtin function %s had no type", name);
15475 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
15476 if (!ret_type && h.uns_p[0])
15477 ret_type = builtin_mode_to_type[h.mode[0]][0];
15479 if (!ret_type)
15480 fatal_error (input_location,
15481 "internal error: builtin function %s had an unexpected "
15482 "return type %s", name, GET_MODE_NAME (h.mode[0]));
15484 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
15485 arg_type[i] = NULL_TREE;
15487 for (i = 0; i < num_args; i++)
15489 int m = (int) h.mode[i+1];
15490 int uns_p = h.uns_p[i+1];
15492 arg_type[i] = builtin_mode_to_type[m][uns_p];
15493 if (!arg_type[i] && uns_p)
15494 arg_type[i] = builtin_mode_to_type[m][0];
15496 if (!arg_type[i])
15497 fatal_error (input_location,
15498 "internal error: builtin function %s, argument %d "
15499 "had unexpected argument type %s", name, i,
15500 GET_MODE_NAME (m));
15503 builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
15504 if (*found == NULL)
15506 h2 = ggc_alloc<builtin_hash_struct> ();
15507 *h2 = h;
15508 *found = h2;
15510 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
15511 arg_type[2], NULL_TREE);
15514 return (*found)->type;
15517 static void
15518 rs6000_common_init_builtins (void)
15520 const struct builtin_description *d;
15521 size_t i;
15523 tree opaque_ftype_opaque = NULL_TREE;
15524 tree opaque_ftype_opaque_opaque = NULL_TREE;
15525 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
15526 tree v2si_ftype_qi = NULL_TREE;
15527 tree v2si_ftype_v2si_qi = NULL_TREE;
15528 tree v2si_ftype_int_qi = NULL_TREE;
15529 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
15531 if (!TARGET_PAIRED_FLOAT)
15533 builtin_mode_to_type[V2SImode][0] = opaque_V2SI_type_node;
15534 builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node;
15537 /* Paired and SPE builtins are only available if you build a compiler with
15538 the appropriate options, so only create those builtins with the
15539 appropriate compiler option. Create Altivec and VSX builtins on machines
15540 with at least the general purpose extensions (970 and newer) to allow the
15541 use of the target attribute.. */
15543 if (TARGET_EXTRA_BUILTINS)
15544 builtin_mask |= RS6000_BTM_COMMON;
15546 /* Add the ternary operators. */
15547 d = bdesc_3arg;
15548 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
15550 tree type;
15551 HOST_WIDE_INT mask = d->mask;
15553 if ((mask & builtin_mask) != mask)
15555 if (TARGET_DEBUG_BUILTIN)
15556 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
15557 continue;
15560 if (rs6000_overloaded_builtin_p (d->code))
15562 if (! (type = opaque_ftype_opaque_opaque_opaque))
15563 type = opaque_ftype_opaque_opaque_opaque
15564 = build_function_type_list (opaque_V4SI_type_node,
15565 opaque_V4SI_type_node,
15566 opaque_V4SI_type_node,
15567 opaque_V4SI_type_node,
15568 NULL_TREE);
15570 else
15572 enum insn_code icode = d->icode;
15573 if (d->name == 0)
15575 if (TARGET_DEBUG_BUILTIN)
15576 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
15577 (long unsigned)i);
15579 continue;
15582 if (icode == CODE_FOR_nothing)
15584 if (TARGET_DEBUG_BUILTIN)
15585 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
15586 d->name);
15588 continue;
15591 type = builtin_function_type (insn_data[icode].operand[0].mode,
15592 insn_data[icode].operand[1].mode,
15593 insn_data[icode].operand[2].mode,
15594 insn_data[icode].operand[3].mode,
15595 d->code, d->name);
15598 def_builtin (d->name, type, d->code);
15601 /* Add the binary operators. */
15602 d = bdesc_2arg;
15603 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15605 machine_mode mode0, mode1, mode2;
15606 tree type;
15607 HOST_WIDE_INT mask = d->mask;
15609 if ((mask & builtin_mask) != mask)
15611 if (TARGET_DEBUG_BUILTIN)
15612 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
15613 continue;
15616 if (rs6000_overloaded_builtin_p (d->code))
15618 if (! (type = opaque_ftype_opaque_opaque))
15619 type = opaque_ftype_opaque_opaque
15620 = build_function_type_list (opaque_V4SI_type_node,
15621 opaque_V4SI_type_node,
15622 opaque_V4SI_type_node,
15623 NULL_TREE);
15625 else
15627 enum insn_code icode = d->icode;
15628 if (d->name == 0)
15630 if (TARGET_DEBUG_BUILTIN)
15631 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
15632 (long unsigned)i);
15634 continue;
15637 if (icode == CODE_FOR_nothing)
15639 if (TARGET_DEBUG_BUILTIN)
15640 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
15641 d->name);
15643 continue;
15646 mode0 = insn_data[icode].operand[0].mode;
15647 mode1 = insn_data[icode].operand[1].mode;
15648 mode2 = insn_data[icode].operand[2].mode;
15650 if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode)
15652 if (! (type = v2si_ftype_v2si_qi))
15653 type = v2si_ftype_v2si_qi
15654 = build_function_type_list (opaque_V2SI_type_node,
15655 opaque_V2SI_type_node,
15656 char_type_node,
15657 NULL_TREE);
15660 else if (mode0 == V2SImode && GET_MODE_CLASS (mode1) == MODE_INT
15661 && mode2 == QImode)
15663 if (! (type = v2si_ftype_int_qi))
15664 type = v2si_ftype_int_qi
15665 = build_function_type_list (opaque_V2SI_type_node,
15666 integer_type_node,
15667 char_type_node,
15668 NULL_TREE);
15671 else
15672 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
15673 d->code, d->name);
15676 def_builtin (d->name, type, d->code);
15679 /* Add the simple unary operators. */
15680 d = bdesc_1arg;
15681 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15683 machine_mode mode0, mode1;
15684 tree type;
15685 HOST_WIDE_INT mask = d->mask;
15687 if ((mask & builtin_mask) != mask)
15689 if (TARGET_DEBUG_BUILTIN)
15690 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
15691 continue;
15694 if (rs6000_overloaded_builtin_p (d->code))
15696 if (! (type = opaque_ftype_opaque))
15697 type = opaque_ftype_opaque
15698 = build_function_type_list (opaque_V4SI_type_node,
15699 opaque_V4SI_type_node,
15700 NULL_TREE);
15702 else
15704 enum insn_code icode = d->icode;
15705 if (d->name == 0)
15707 if (TARGET_DEBUG_BUILTIN)
15708 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
15709 (long unsigned)i);
15711 continue;
15714 if (icode == CODE_FOR_nothing)
15716 if (TARGET_DEBUG_BUILTIN)
15717 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
15718 d->name);
15720 continue;
15723 mode0 = insn_data[icode].operand[0].mode;
15724 mode1 = insn_data[icode].operand[1].mode;
15726 if (mode0 == V2SImode && mode1 == QImode)
15728 if (! (type = v2si_ftype_qi))
15729 type = v2si_ftype_qi
15730 = build_function_type_list (opaque_V2SI_type_node,
15731 char_type_node,
15732 NULL_TREE);
15735 else
15736 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
15737 d->code, d->name);
15740 def_builtin (d->name, type, d->code);
15744 static void
15745 rs6000_init_libfuncs (void)
15747 if (!TARGET_IEEEQUAD)
15748 /* AIX/Darwin/64-bit Linux quad floating point routines. */
15749 if (!TARGET_XL_COMPAT)
15751 set_optab_libfunc (add_optab, TFmode, "__gcc_qadd");
15752 set_optab_libfunc (sub_optab, TFmode, "__gcc_qsub");
15753 set_optab_libfunc (smul_optab, TFmode, "__gcc_qmul");
15754 set_optab_libfunc (sdiv_optab, TFmode, "__gcc_qdiv");
15756 if (!(TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)))
15758 set_optab_libfunc (neg_optab, TFmode, "__gcc_qneg");
15759 set_optab_libfunc (eq_optab, TFmode, "__gcc_qeq");
15760 set_optab_libfunc (ne_optab, TFmode, "__gcc_qne");
15761 set_optab_libfunc (gt_optab, TFmode, "__gcc_qgt");
15762 set_optab_libfunc (ge_optab, TFmode, "__gcc_qge");
15763 set_optab_libfunc (lt_optab, TFmode, "__gcc_qlt");
15764 set_optab_libfunc (le_optab, TFmode, "__gcc_qle");
15766 set_conv_libfunc (sext_optab, TFmode, SFmode, "__gcc_stoq");
15767 set_conv_libfunc (sext_optab, TFmode, DFmode, "__gcc_dtoq");
15768 set_conv_libfunc (trunc_optab, SFmode, TFmode, "__gcc_qtos");
15769 set_conv_libfunc (trunc_optab, DFmode, TFmode, "__gcc_qtod");
15770 set_conv_libfunc (sfix_optab, SImode, TFmode, "__gcc_qtoi");
15771 set_conv_libfunc (ufix_optab, SImode, TFmode, "__gcc_qtou");
15772 set_conv_libfunc (sfloat_optab, TFmode, SImode, "__gcc_itoq");
15773 set_conv_libfunc (ufloat_optab, TFmode, SImode, "__gcc_utoq");
15776 if (!(TARGET_HARD_FLOAT && TARGET_FPRS))
15777 set_optab_libfunc (unord_optab, TFmode, "__gcc_qunord");
15779 else
15781 set_optab_libfunc (add_optab, TFmode, "_xlqadd");
15782 set_optab_libfunc (sub_optab, TFmode, "_xlqsub");
15783 set_optab_libfunc (smul_optab, TFmode, "_xlqmul");
15784 set_optab_libfunc (sdiv_optab, TFmode, "_xlqdiv");
15786 else
15788 /* 32-bit SVR4 quad floating point routines. */
15790 set_optab_libfunc (add_optab, TFmode, "_q_add");
15791 set_optab_libfunc (sub_optab, TFmode, "_q_sub");
15792 set_optab_libfunc (neg_optab, TFmode, "_q_neg");
15793 set_optab_libfunc (smul_optab, TFmode, "_q_mul");
15794 set_optab_libfunc (sdiv_optab, TFmode, "_q_div");
15795 if (TARGET_PPC_GPOPT)
15796 set_optab_libfunc (sqrt_optab, TFmode, "_q_sqrt");
15798 set_optab_libfunc (eq_optab, TFmode, "_q_feq");
15799 set_optab_libfunc (ne_optab, TFmode, "_q_fne");
15800 set_optab_libfunc (gt_optab, TFmode, "_q_fgt");
15801 set_optab_libfunc (ge_optab, TFmode, "_q_fge");
15802 set_optab_libfunc (lt_optab, TFmode, "_q_flt");
15803 set_optab_libfunc (le_optab, TFmode, "_q_fle");
15805 set_conv_libfunc (sext_optab, TFmode, SFmode, "_q_stoq");
15806 set_conv_libfunc (sext_optab, TFmode, DFmode, "_q_dtoq");
15807 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_q_qtos");
15808 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_q_qtod");
15809 set_conv_libfunc (sfix_optab, SImode, TFmode, "_q_qtoi");
15810 set_conv_libfunc (ufix_optab, SImode, TFmode, "_q_qtou");
15811 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_q_itoq");
15812 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_q_utoq");
15817 /* Expand a block clear operation, and return 1 if successful. Return 0
15818 if we should let the compiler generate normal code.
15820 operands[0] is the destination
15821 operands[1] is the length
15822 operands[3] is the alignment */
15825 expand_block_clear (rtx operands[])
15827 rtx orig_dest = operands[0];
15828 rtx bytes_rtx = operands[1];
15829 rtx align_rtx = operands[3];
15830 bool constp = (GET_CODE (bytes_rtx) == CONST_INT);
15831 HOST_WIDE_INT align;
15832 HOST_WIDE_INT bytes;
15833 int offset;
15834 int clear_bytes;
15835 int clear_step;
15837 /* If this is not a fixed size move, just call memcpy */
15838 if (! constp)
15839 return 0;
15841 /* This must be a fixed size alignment */
15842 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
15843 align = INTVAL (align_rtx) * BITS_PER_UNIT;
15845 /* Anything to clear? */
15846 bytes = INTVAL (bytes_rtx);
15847 if (bytes <= 0)
15848 return 1;
15850 /* Use the builtin memset after a point, to avoid huge code bloat.
15851 When optimize_size, avoid any significant code bloat; calling
15852 memset is about 4 instructions, so allow for one instruction to
15853 load zero and three to do clearing. */
15854 if (TARGET_ALTIVEC && align >= 128)
15855 clear_step = 16;
15856 else if (TARGET_POWERPC64 && (align >= 64 || !STRICT_ALIGNMENT))
15857 clear_step = 8;
15858 else if (TARGET_SPE && align >= 64)
15859 clear_step = 8;
15860 else
15861 clear_step = 4;
15863 if (optimize_size && bytes > 3 * clear_step)
15864 return 0;
15865 if (! optimize_size && bytes > 8 * clear_step)
15866 return 0;
15868 for (offset = 0; bytes > 0; offset += clear_bytes, bytes -= clear_bytes)
15870 machine_mode mode = BLKmode;
15871 rtx dest;
15873 if (bytes >= 16 && TARGET_ALTIVEC && align >= 128)
15875 clear_bytes = 16;
15876 mode = V4SImode;
15878 else if (bytes >= 8 && TARGET_SPE && align >= 64)
15880 clear_bytes = 8;
15881 mode = V2SImode;
15883 else if (bytes >= 8 && TARGET_POWERPC64
15884 && (align >= 64 || !STRICT_ALIGNMENT))
15886 clear_bytes = 8;
15887 mode = DImode;
15888 if (offset == 0 && align < 64)
15890 rtx addr;
15892 /* If the address form is reg+offset with offset not a
15893 multiple of four, reload into reg indirect form here
15894 rather than waiting for reload. This way we get one
15895 reload, not one per store. */
15896 addr = XEXP (orig_dest, 0);
15897 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
15898 && GET_CODE (XEXP (addr, 1)) == CONST_INT
15899 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
15901 addr = copy_addr_to_reg (addr);
15902 orig_dest = replace_equiv_address (orig_dest, addr);
15906 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
15907 { /* move 4 bytes */
15908 clear_bytes = 4;
15909 mode = SImode;
15911 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
15912 { /* move 2 bytes */
15913 clear_bytes = 2;
15914 mode = HImode;
15916 else /* move 1 byte at a time */
15918 clear_bytes = 1;
15919 mode = QImode;
15922 dest = adjust_address (orig_dest, mode, offset);
15924 emit_move_insn (dest, CONST0_RTX (mode));
15927 return 1;
15931 /* Expand a block move operation, and return 1 if successful. Return 0
15932 if we should let the compiler generate normal code.
15934 operands[0] is the destination
15935 operands[1] is the source
15936 operands[2] is the length
15937 operands[3] is the alignment */
15939 #define MAX_MOVE_REG 4
15942 expand_block_move (rtx operands[])
15944 rtx orig_dest = operands[0];
15945 rtx orig_src = operands[1];
15946 rtx bytes_rtx = operands[2];
15947 rtx align_rtx = operands[3];
15948 int constp = (GET_CODE (bytes_rtx) == CONST_INT);
15949 int align;
15950 int bytes;
15951 int offset;
15952 int move_bytes;
15953 rtx stores[MAX_MOVE_REG];
15954 int num_reg = 0;
15956 /* If this is not a fixed size move, just call memcpy */
15957 if (! constp)
15958 return 0;
15960 /* This must be a fixed size alignment */
15961 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
15962 align = INTVAL (align_rtx) * BITS_PER_UNIT;
15964 /* Anything to move? */
15965 bytes = INTVAL (bytes_rtx);
15966 if (bytes <= 0)
15967 return 1;
15969 if (bytes > rs6000_block_move_inline_limit)
15970 return 0;
15972 for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes)
15974 union {
15975 rtx (*movmemsi) (rtx, rtx, rtx, rtx);
15976 rtx (*mov) (rtx, rtx);
15977 } gen_func;
15978 machine_mode mode = BLKmode;
15979 rtx src, dest;
15981 /* Altivec first, since it will be faster than a string move
15982 when it applies, and usually not significantly larger. */
15983 if (TARGET_ALTIVEC && bytes >= 16 && align >= 128)
15985 move_bytes = 16;
15986 mode = V4SImode;
15987 gen_func.mov = gen_movv4si;
15989 else if (TARGET_SPE && bytes >= 8 && align >= 64)
15991 move_bytes = 8;
15992 mode = V2SImode;
15993 gen_func.mov = gen_movv2si;
15995 else if (TARGET_STRING
15996 && bytes > 24 /* move up to 32 bytes at a time */
15997 && ! fixed_regs[5]
15998 && ! fixed_regs[6]
15999 && ! fixed_regs[7]
16000 && ! fixed_regs[8]
16001 && ! fixed_regs[9]
16002 && ! fixed_regs[10]
16003 && ! fixed_regs[11]
16004 && ! fixed_regs[12])
16006 move_bytes = (bytes > 32) ? 32 : bytes;
16007 gen_func.movmemsi = gen_movmemsi_8reg;
16009 else if (TARGET_STRING
16010 && bytes > 16 /* move up to 24 bytes at a time */
16011 && ! fixed_regs[5]
16012 && ! fixed_regs[6]
16013 && ! fixed_regs[7]
16014 && ! fixed_regs[8]
16015 && ! fixed_regs[9]
16016 && ! fixed_regs[10])
16018 move_bytes = (bytes > 24) ? 24 : bytes;
16019 gen_func.movmemsi = gen_movmemsi_6reg;
16021 else if (TARGET_STRING
16022 && bytes > 8 /* move up to 16 bytes at a time */
16023 && ! fixed_regs[5]
16024 && ! fixed_regs[6]
16025 && ! fixed_regs[7]
16026 && ! fixed_regs[8])
16028 move_bytes = (bytes > 16) ? 16 : bytes;
16029 gen_func.movmemsi = gen_movmemsi_4reg;
16031 else if (bytes >= 8 && TARGET_POWERPC64
16032 && (align >= 64 || !STRICT_ALIGNMENT))
16034 move_bytes = 8;
16035 mode = DImode;
16036 gen_func.mov = gen_movdi;
16037 if (offset == 0 && align < 64)
16039 rtx addr;
16041 /* If the address form is reg+offset with offset not a
16042 multiple of four, reload into reg indirect form here
16043 rather than waiting for reload. This way we get one
16044 reload, not one per load and/or store. */
16045 addr = XEXP (orig_dest, 0);
16046 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
16047 && GET_CODE (XEXP (addr, 1)) == CONST_INT
16048 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
16050 addr = copy_addr_to_reg (addr);
16051 orig_dest = replace_equiv_address (orig_dest, addr);
16053 addr = XEXP (orig_src, 0);
16054 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
16055 && GET_CODE (XEXP (addr, 1)) == CONST_INT
16056 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
16058 addr = copy_addr_to_reg (addr);
16059 orig_src = replace_equiv_address (orig_src, addr);
16063 else if (TARGET_STRING && bytes > 4 && !TARGET_POWERPC64)
16064 { /* move up to 8 bytes at a time */
16065 move_bytes = (bytes > 8) ? 8 : bytes;
16066 gen_func.movmemsi = gen_movmemsi_2reg;
16068 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
16069 { /* move 4 bytes */
16070 move_bytes = 4;
16071 mode = SImode;
16072 gen_func.mov = gen_movsi;
16074 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
16075 { /* move 2 bytes */
16076 move_bytes = 2;
16077 mode = HImode;
16078 gen_func.mov = gen_movhi;
16080 else if (TARGET_STRING && bytes > 1)
16081 { /* move up to 4 bytes at a time */
16082 move_bytes = (bytes > 4) ? 4 : bytes;
16083 gen_func.movmemsi = gen_movmemsi_1reg;
16085 else /* move 1 byte at a time */
16087 move_bytes = 1;
16088 mode = QImode;
16089 gen_func.mov = gen_movqi;
16092 src = adjust_address (orig_src, mode, offset);
16093 dest = adjust_address (orig_dest, mode, offset);
16095 if (mode != BLKmode)
16097 rtx tmp_reg = gen_reg_rtx (mode);
16099 emit_insn ((*gen_func.mov) (tmp_reg, src));
16100 stores[num_reg++] = (*gen_func.mov) (dest, tmp_reg);
16103 if (mode == BLKmode || num_reg >= MAX_MOVE_REG || bytes == move_bytes)
16105 int i;
16106 for (i = 0; i < num_reg; i++)
16107 emit_insn (stores[i]);
16108 num_reg = 0;
16111 if (mode == BLKmode)
16113 /* Move the address into scratch registers. The movmemsi
16114 patterns require zero offset. */
16115 if (!REG_P (XEXP (src, 0)))
16117 rtx src_reg = copy_addr_to_reg (XEXP (src, 0));
16118 src = replace_equiv_address (src, src_reg);
16120 set_mem_size (src, move_bytes);
16122 if (!REG_P (XEXP (dest, 0)))
16124 rtx dest_reg = copy_addr_to_reg (XEXP (dest, 0));
16125 dest = replace_equiv_address (dest, dest_reg);
16127 set_mem_size (dest, move_bytes);
16129 emit_insn ((*gen_func.movmemsi) (dest, src,
16130 GEN_INT (move_bytes & 31),
16131 align_rtx));
16135 return 1;
16139 /* Return a string to perform a load_multiple operation.
16140 operands[0] is the vector.
16141 operands[1] is the source address.
16142 operands[2] is the first destination register. */
16144 const char *
16145 rs6000_output_load_multiple (rtx operands[3])
16147 /* We have to handle the case where the pseudo used to contain the address
16148 is assigned to one of the output registers. */
16149 int i, j;
16150 int words = XVECLEN (operands[0], 0);
16151 rtx xop[10];
16153 if (XVECLEN (operands[0], 0) == 1)
16154 return "lwz %2,0(%1)";
16156 for (i = 0; i < words; i++)
16157 if (refers_to_regno_p (REGNO (operands[2]) + i, operands[1]))
16159 if (i == words-1)
16161 xop[0] = GEN_INT (4 * (words-1));
16162 xop[1] = operands[1];
16163 xop[2] = operands[2];
16164 output_asm_insn ("lswi %2,%1,%0\n\tlwz %1,%0(%1)", xop);
16165 return "";
16167 else if (i == 0)
16169 xop[0] = GEN_INT (4 * (words-1));
16170 xop[1] = operands[1];
16171 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16172 output_asm_insn ("addi %1,%1,4\n\tlswi %2,%1,%0\n\tlwz %1,-4(%1)", xop);
16173 return "";
16175 else
16177 for (j = 0; j < words; j++)
16178 if (j != i)
16180 xop[0] = GEN_INT (j * 4);
16181 xop[1] = operands[1];
16182 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + j);
16183 output_asm_insn ("lwz %2,%0(%1)", xop);
16185 xop[0] = GEN_INT (i * 4);
16186 xop[1] = operands[1];
16187 output_asm_insn ("lwz %1,%0(%1)", xop);
16188 return "";
16192 return "lswi %2,%1,%N0";
16196 /* A validation routine: say whether CODE, a condition code, and MODE
16197 match. The other alternatives either don't make sense or should
16198 never be generated. */
16200 void
16201 validate_condition_mode (enum rtx_code code, machine_mode mode)
16203 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
16204 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
16205 && GET_MODE_CLASS (mode) == MODE_CC);
16207 /* These don't make sense. */
16208 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
16209 || mode != CCUNSmode);
16211 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
16212 || mode == CCUNSmode);
16214 gcc_assert (mode == CCFPmode
16215 || (code != ORDERED && code != UNORDERED
16216 && code != UNEQ && code != LTGT
16217 && code != UNGT && code != UNLT
16218 && code != UNGE && code != UNLE));
16220 /* These should never be generated except for
16221 flag_finite_math_only. */
16222 gcc_assert (mode != CCFPmode
16223 || flag_finite_math_only
16224 || (code != LE && code != GE
16225 && code != UNEQ && code != LTGT
16226 && code != UNGT && code != UNLT));
16228 /* These are invalid; the information is not there. */
16229 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
16233 /* Return 1 if ANDOP is a mask that has no bits on that are not in the
16234 mask required to convert the result of a rotate insn into a shift
16235 left insn of SHIFTOP bits. Both are known to be SImode CONST_INT. */
16238 includes_lshift_p (rtx shiftop, rtx andop)
16240 unsigned HOST_WIDE_INT shift_mask = ~(unsigned HOST_WIDE_INT) 0;
16242 shift_mask <<= INTVAL (shiftop);
16244 return (INTVAL (andop) & 0xffffffff & ~shift_mask) == 0;
16247 /* Similar, but for right shift. */
16250 includes_rshift_p (rtx shiftop, rtx andop)
16252 unsigned HOST_WIDE_INT shift_mask = ~(unsigned HOST_WIDE_INT) 0;
16254 shift_mask >>= INTVAL (shiftop);
16256 return (INTVAL (andop) & 0xffffffff & ~shift_mask) == 0;
16259 /* Return 1 if ANDOP is a mask suitable for use with an rldic insn
16260 to perform a left shift. It must have exactly SHIFTOP least
16261 significant 0's, then one or more 1's, then zero or more 0's. */
16264 includes_rldic_lshift_p (rtx shiftop, rtx andop)
16266 if (GET_CODE (andop) == CONST_INT)
16268 unsigned HOST_WIDE_INT c, lsb, shift_mask;
16270 c = INTVAL (andop);
16271 if (c == 0 || c == HOST_WIDE_INT_M1U)
16272 return 0;
16274 shift_mask = HOST_WIDE_INT_M1U;
16275 shift_mask <<= INTVAL (shiftop);
16277 /* Find the least significant one bit. */
16278 lsb = c & -c;
16280 /* It must coincide with the LSB of the shift mask. */
16281 if (-lsb != shift_mask)
16282 return 0;
16284 /* Invert to look for the next transition (if any). */
16285 c = ~c;
16287 /* Remove the low group of ones (originally low group of zeros). */
16288 c &= -lsb;
16290 /* Again find the lsb, and check we have all 1's above. */
16291 lsb = c & -c;
16292 return c == -lsb;
16294 else
16295 return 0;
16298 /* Return 1 if ANDOP is a mask suitable for use with an rldicr insn
16299 to perform a left shift. It must have SHIFTOP or more least
16300 significant 0's, with the remainder of the word 1's. */
16303 includes_rldicr_lshift_p (rtx shiftop, rtx andop)
16305 if (GET_CODE (andop) == CONST_INT)
16307 unsigned HOST_WIDE_INT c, lsb, shift_mask;
16309 shift_mask = HOST_WIDE_INT_M1U;
16310 shift_mask <<= INTVAL (shiftop);
16311 c = INTVAL (andop);
16313 /* Find the least significant one bit. */
16314 lsb = c & -c;
16316 /* It must be covered by the shift mask.
16317 This test also rejects c == 0. */
16318 if ((lsb & shift_mask) == 0)
16319 return 0;
16321 /* Check we have all 1's above the transition, and reject all 1's. */
16322 return c == -lsb && lsb != 1;
16324 else
16325 return 0;
16328 /* Return 1 if operands will generate a valid arguments to rlwimi
16329 instruction for insert with right shift in 64-bit mode. The mask may
16330 not start on the first bit or stop on the last bit because wrap-around
16331 effects of instruction do not correspond to semantics of RTL insn. */
16334 insvdi_rshift_rlwimi_p (rtx sizeop, rtx startop, rtx shiftop)
16336 if (INTVAL (startop) > 32
16337 && INTVAL (startop) < 64
16338 && INTVAL (sizeop) > 1
16339 && INTVAL (sizeop) + INTVAL (startop) < 64
16340 && INTVAL (shiftop) > 0
16341 && INTVAL (sizeop) + INTVAL (shiftop) < 32
16342 && (64 - (INTVAL (shiftop) & 63)) >= INTVAL (sizeop))
16343 return 1;
16345 return 0;
16348 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
16349 for lfq and stfq insns iff the registers are hard registers. */
16352 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
16354 /* We might have been passed a SUBREG. */
16355 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
16356 return 0;
16358 /* We might have been passed non floating point registers. */
16359 if (!FP_REGNO_P (REGNO (reg1))
16360 || !FP_REGNO_P (REGNO (reg2)))
16361 return 0;
16363 return (REGNO (reg1) == REGNO (reg2) - 1);
16366 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
16367 addr1 and addr2 must be in consecutive memory locations
16368 (addr2 == addr1 + 8). */
16371 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
16373 rtx addr1, addr2;
16374 unsigned int reg1, reg2;
16375 int offset1, offset2;
16377 /* The mems cannot be volatile. */
16378 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
16379 return 0;
16381 addr1 = XEXP (mem1, 0);
16382 addr2 = XEXP (mem2, 0);
16384 /* Extract an offset (if used) from the first addr. */
16385 if (GET_CODE (addr1) == PLUS)
16387 /* If not a REG, return zero. */
16388 if (GET_CODE (XEXP (addr1, 0)) != REG)
16389 return 0;
16390 else
16392 reg1 = REGNO (XEXP (addr1, 0));
16393 /* The offset must be constant! */
16394 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
16395 return 0;
16396 offset1 = INTVAL (XEXP (addr1, 1));
16399 else if (GET_CODE (addr1) != REG)
16400 return 0;
16401 else
16403 reg1 = REGNO (addr1);
16404 /* This was a simple (mem (reg)) expression. Offset is 0. */
16405 offset1 = 0;
16408 /* And now for the second addr. */
16409 if (GET_CODE (addr2) == PLUS)
16411 /* If not a REG, return zero. */
16412 if (GET_CODE (XEXP (addr2, 0)) != REG)
16413 return 0;
16414 else
16416 reg2 = REGNO (XEXP (addr2, 0));
16417 /* The offset must be constant. */
16418 if (GET_CODE (XEXP (addr2, 1)) != CONST_INT)
16419 return 0;
16420 offset2 = INTVAL (XEXP (addr2, 1));
16423 else if (GET_CODE (addr2) != REG)
16424 return 0;
16425 else
16427 reg2 = REGNO (addr2);
16428 /* This was a simple (mem (reg)) expression. Offset is 0. */
16429 offset2 = 0;
16432 /* Both of these must have the same base register. */
16433 if (reg1 != reg2)
16434 return 0;
16436 /* The offset for the second addr must be 8 more than the first addr. */
16437 if (offset2 != offset1 + 8)
16438 return 0;
16440 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
16441 instructions. */
16442 return 1;
16447 rs6000_secondary_memory_needed_rtx (machine_mode mode)
16449 static bool eliminated = false;
16450 rtx ret;
16452 if (mode != SDmode || TARGET_NO_SDMODE_STACK)
16453 ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
16454 else
16456 rtx mem = cfun->machine->sdmode_stack_slot;
16457 gcc_assert (mem != NULL_RTX);
16459 if (!eliminated)
16461 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
16462 cfun->machine->sdmode_stack_slot = mem;
16463 eliminated = true;
16465 ret = mem;
16468 if (TARGET_DEBUG_ADDR)
16470 fprintf (stderr, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
16471 GET_MODE_NAME (mode));
16472 if (!ret)
16473 fprintf (stderr, "\tNULL_RTX\n");
16474 else
16475 debug_rtx (ret);
16478 return ret;
16481 /* Return the mode to be used for memory when a secondary memory
16482 location is needed. For SDmode values we need to use DDmode, in
16483 all other cases we can use the same mode. */
16484 machine_mode
16485 rs6000_secondary_memory_needed_mode (machine_mode mode)
16487 if (lra_in_progress && mode == SDmode)
16488 return DDmode;
16489 return mode;
16492 static tree
16493 rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
16495 /* Don't walk into types. */
16496 if (*tp == NULL_TREE || *tp == error_mark_node || TYPE_P (*tp))
16498 *walk_subtrees = 0;
16499 return NULL_TREE;
16502 switch (TREE_CODE (*tp))
16504 case VAR_DECL:
16505 case PARM_DECL:
16506 case FIELD_DECL:
16507 case RESULT_DECL:
16508 case SSA_NAME:
16509 case REAL_CST:
16510 case MEM_REF:
16511 case VIEW_CONVERT_EXPR:
16512 if (TYPE_MODE (TREE_TYPE (*tp)) == SDmode)
16513 return *tp;
16514 break;
16515 default:
16516 break;
16519 return NULL_TREE;
16522 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
16523 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
16524 only work on the traditional altivec registers, note if an altivec register
16525 was chosen. */
16527 static enum rs6000_reg_type
16528 register_to_reg_type (rtx reg, bool *is_altivec)
16530 HOST_WIDE_INT regno;
16531 enum reg_class rclass;
16533 if (GET_CODE (reg) == SUBREG)
16534 reg = SUBREG_REG (reg);
16536 if (!REG_P (reg))
16537 return NO_REG_TYPE;
16539 regno = REGNO (reg);
16540 if (regno >= FIRST_PSEUDO_REGISTER)
16542 if (!lra_in_progress && !reload_in_progress && !reload_completed)
16543 return PSEUDO_REG_TYPE;
16545 regno = true_regnum (reg);
16546 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
16547 return PSEUDO_REG_TYPE;
16550 gcc_assert (regno >= 0);
16552 if (is_altivec && ALTIVEC_REGNO_P (regno))
16553 *is_altivec = true;
16555 rclass = rs6000_regno_regclass[regno];
16556 return reg_class_to_reg_type[(int)rclass];
16559 /* Helper function to return the cost of adding a TOC entry address. */
16561 static inline int
16562 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
16564 int ret;
16566 if (TARGET_CMODEL != CMODEL_SMALL)
16567 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
16569 else
16570 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
16572 return ret;
16575 /* Helper function for rs6000_secondary_reload to determine whether the memory
16576 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
16577 needs reloading. Return negative if the memory is not handled by the memory
16578 helper functions and to try a different reload method, 0 if no additional
16579 instructions are need, and positive to give the extra cost for the
16580 memory. */
16582 static int
16583 rs6000_secondary_reload_memory (rtx addr,
16584 enum reg_class rclass,
16585 enum machine_mode mode)
16587 int extra_cost = 0;
16588 rtx reg, and_arg, plus_arg0, plus_arg1;
16589 addr_mask_type addr_mask;
16590 const char *type = NULL;
16591 const char *fail_msg = NULL;
16593 if (GPR_REG_CLASS_P (rclass))
16594 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
16596 else if (rclass == FLOAT_REGS)
16597 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
16599 else if (rclass == ALTIVEC_REGS)
16600 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
16602 /* For the combined VSX_REGS, turn off Altivec AND -16. */
16603 else if (rclass == VSX_REGS)
16604 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
16605 & ~RELOAD_REG_AND_M16);
16607 else
16609 if (TARGET_DEBUG_ADDR)
16610 fprintf (stderr,
16611 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
16612 "class is not GPR, FPR, VMX\n",
16613 GET_MODE_NAME (mode), reg_class_names[rclass]);
16615 return -1;
16618 /* If the register isn't valid in this register class, just return now. */
16619 if ((addr_mask & RELOAD_REG_VALID) == 0)
16621 if (TARGET_DEBUG_ADDR)
16622 fprintf (stderr,
16623 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
16624 "not valid in class\n",
16625 GET_MODE_NAME (mode), reg_class_names[rclass]);
16627 return -1;
16630 switch (GET_CODE (addr))
16632 /* Does the register class supports auto update forms for this mode? We
16633 don't need a scratch register, since the powerpc only supports
16634 PRE_INC, PRE_DEC, and PRE_MODIFY. */
16635 case PRE_INC:
16636 case PRE_DEC:
16637 reg = XEXP (addr, 0);
16638 if (!base_reg_operand (addr, GET_MODE (reg)))
16640 fail_msg = "no base register #1";
16641 extra_cost = -1;
16644 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
16646 extra_cost = 1;
16647 type = "update";
16649 break;
16651 case PRE_MODIFY:
16652 reg = XEXP (addr, 0);
16653 plus_arg1 = XEXP (addr, 1);
16654 if (!base_reg_operand (reg, GET_MODE (reg))
16655 || GET_CODE (plus_arg1) != PLUS
16656 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
16658 fail_msg = "bad PRE_MODIFY";
16659 extra_cost = -1;
16662 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
16664 extra_cost = 1;
16665 type = "update";
16667 break;
16669 /* Do we need to simulate AND -16 to clear the bottom address bits used
16670 in VMX load/stores? Only allow the AND for vector sizes. */
16671 case AND:
16672 and_arg = XEXP (addr, 0);
16673 if (GET_MODE_SIZE (mode) != 16
16674 || GET_CODE (XEXP (addr, 1)) != CONST_INT
16675 || INTVAL (XEXP (addr, 1)) != -16)
16677 fail_msg = "bad Altivec AND #1";
16678 extra_cost = -1;
16681 if (rclass != ALTIVEC_REGS)
16683 if (legitimate_indirect_address_p (and_arg, false))
16684 extra_cost = 1;
16686 else if (legitimate_indexed_address_p (and_arg, false))
16687 extra_cost = 2;
16689 else
16691 fail_msg = "bad Altivec AND #2";
16692 extra_cost = -1;
16695 type = "and";
16697 break;
16699 /* If this is an indirect address, make sure it is a base register. */
16700 case REG:
16701 case SUBREG:
16702 if (!legitimate_indirect_address_p (addr, false))
16704 extra_cost = 1;
16705 type = "move";
16707 break;
16709 /* If this is an indexed address, make sure the register class can handle
16710 indexed addresses for this mode. */
16711 case PLUS:
16712 plus_arg0 = XEXP (addr, 0);
16713 plus_arg1 = XEXP (addr, 1);
16715 /* (plus (plus (reg) (constant)) (constant)) is generated during
16716 push_reload processing, so handle it now. */
16717 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
16719 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
16721 extra_cost = 1;
16722 type = "offset";
16726 /* (plus (plus (reg) (constant)) (reg)) is also generated during
16727 push_reload processing, so handle it now. */
16728 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
16730 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
16732 extra_cost = 1;
16733 type = "indexed #2";
16737 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
16739 fail_msg = "no base register #2";
16740 extra_cost = -1;
16743 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
16745 if ((addr_mask & RELOAD_REG_INDEXED) == 0
16746 || !legitimate_indexed_address_p (addr, false))
16748 extra_cost = 1;
16749 type = "indexed";
16753 /* Make sure the register class can handle offset addresses. */
16754 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
16756 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
16758 extra_cost = 1;
16759 type = "offset";
16763 else
16765 fail_msg = "bad PLUS";
16766 extra_cost = -1;
16769 break;
16771 case LO_SUM:
16772 if (!legitimate_lo_sum_address_p (mode, addr, false))
16774 fail_msg = "bad LO_SUM";
16775 extra_cost = -1;
16778 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
16780 extra_cost = 1;
16781 type = "lo_sum";
16783 break;
16785 /* Static addresses need to create a TOC entry. */
16786 case CONST:
16787 case SYMBOL_REF:
16788 case LABEL_REF:
16789 type = "address";
16790 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
16791 break;
16793 /* TOC references look like offsetable memory. */
16794 case UNSPEC:
16795 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
16797 fail_msg = "bad UNSPEC";
16798 extra_cost = -1;
16801 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
16803 extra_cost = 1;
16804 type = "toc reference";
16806 break;
16808 default:
16810 fail_msg = "bad address";
16811 extra_cost = -1;
16815 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
16817 if (extra_cost < 0)
16818 fprintf (stderr,
16819 "rs6000_secondary_reload_memory error: mode = %s, "
16820 "class = %s, addr_mask = '%s', %s\n",
16821 GET_MODE_NAME (mode),
16822 reg_class_names[rclass],
16823 rs6000_debug_addr_mask (addr_mask, false),
16824 (fail_msg != NULL) ? fail_msg : "<bad address>");
16826 else
16827 fprintf (stderr,
16828 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
16829 "addr_mask = '%s', extra cost = %d, %s\n",
16830 GET_MODE_NAME (mode),
16831 reg_class_names[rclass],
16832 rs6000_debug_addr_mask (addr_mask, false),
16833 extra_cost,
16834 (type) ? type : "<none>");
16836 debug_rtx (addr);
16839 return extra_cost;
16842 /* Helper function for rs6000_secondary_reload to return true if a move to a
16843 different register classe is really a simple move. */
16845 static bool
16846 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
16847 enum rs6000_reg_type from_type,
16848 machine_mode mode)
16850 int size;
16852 /* Add support for various direct moves available. In this function, we only
16853 look at cases where we don't need any extra registers, and one or more
16854 simple move insns are issued. At present, 32-bit integers are not allowed
16855 in FPR/VSX registers. Single precision binary floating is not a simple
16856 move because we need to convert to the single precision memory layout.
16857 The 4-byte SDmode can be moved. */
16858 size = GET_MODE_SIZE (mode);
16859 if (TARGET_DIRECT_MOVE
16860 && ((mode == SDmode) || (TARGET_POWERPC64 && size == 8))
16861 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
16862 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
16863 return true;
16865 else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
16866 && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
16867 || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
16868 return true;
16870 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
16871 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
16872 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
16873 return true;
16875 return false;
16878 /* Power8 helper function for rs6000_secondary_reload, handle all of the
16879 special direct moves that involve allocating an extra register, return the
16880 insn code of the helper function if there is such a function or
16881 CODE_FOR_nothing if not. */
16883 static bool
16884 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
16885 enum rs6000_reg_type from_type,
16886 machine_mode mode,
16887 secondary_reload_info *sri,
16888 bool altivec_p)
16890 bool ret = false;
16891 enum insn_code icode = CODE_FOR_nothing;
16892 int cost = 0;
16893 int size = GET_MODE_SIZE (mode);
16895 if (TARGET_POWERPC64)
16897 if (size == 16)
16899 /* Handle moving 128-bit values from GPRs to VSX point registers on
16900 power8 when running in 64-bit mode using XXPERMDI to glue the two
16901 64-bit values back together. */
16902 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
16904 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
16905 icode = reg_addr[mode].reload_vsx_gpr;
16908 /* Handle moving 128-bit values from VSX point registers to GPRs on
16909 power8 when running in 64-bit mode using XXPERMDI to get access to the
16910 bottom 64-bit value. */
16911 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
16913 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
16914 icode = reg_addr[mode].reload_gpr_vsx;
16918 else if (mode == SFmode)
16920 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
16922 cost = 3; /* xscvdpspn, mfvsrd, and. */
16923 icode = reg_addr[mode].reload_gpr_vsx;
16926 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
16928 cost = 2; /* mtvsrz, xscvspdpn. */
16929 icode = reg_addr[mode].reload_vsx_gpr;
16934 if (TARGET_POWERPC64 && size == 16)
16936 /* Handle moving 128-bit values from GPRs to VSX point registers on
16937 power8 when running in 64-bit mode using XXPERMDI to glue the two
16938 64-bit values back together. */
16939 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
16941 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
16942 icode = reg_addr[mode].reload_vsx_gpr;
16945 /* Handle moving 128-bit values from VSX point registers to GPRs on
16946 power8 when running in 64-bit mode using XXPERMDI to get access to the
16947 bottom 64-bit value. */
16948 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
16950 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
16951 icode = reg_addr[mode].reload_gpr_vsx;
16955 else if (!TARGET_POWERPC64 && size == 8)
16957 /* Handle moving 64-bit values from GPRs to floating point registers on
16958 power8 when running in 32-bit mode using FMRGOW to glue the two 32-bit
16959 values back together. Altivec register classes must be handled
16960 specially since a different instruction is used, and the secondary
16961 reload support requires a single instruction class in the scratch
16962 register constraint. However, right now TFmode is not allowed in
16963 Altivec registers, so the pattern will never match. */
16964 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
16966 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
16967 icode = reg_addr[mode].reload_fpr_gpr;
16971 if (icode != CODE_FOR_nothing)
16973 ret = true;
16974 if (sri)
16976 sri->icode = icode;
16977 sri->extra_cost = cost;
16981 return ret;
16984 /* Return whether a move between two register classes can be done either
16985 directly (simple move) or via a pattern that uses a single extra temporary
16986 (using power8's direct move in this case. */
16988 static bool
16989 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
16990 enum rs6000_reg_type from_type,
16991 machine_mode mode,
16992 secondary_reload_info *sri,
16993 bool altivec_p)
16995 /* Fall back to load/store reloads if either type is not a register. */
16996 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
16997 return false;
16999 /* If we haven't allocated registers yet, assume the move can be done for the
17000 standard register types. */
17001 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
17002 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
17003 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
17004 return true;
17006 /* Moves to the same set of registers is a simple move for non-specialized
17007 registers. */
17008 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
17009 return true;
17011 /* Check whether a simple move can be done directly. */
17012 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
17014 if (sri)
17016 sri->icode = CODE_FOR_nothing;
17017 sri->extra_cost = 0;
17019 return true;
17022 /* Now check if we can do it in a few steps. */
17023 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
17024 altivec_p);
17027 /* Inform reload about cases where moving X with a mode MODE to a register in
17028 RCLASS requires an extra scratch or immediate register. Return the class
17029 needed for the immediate register.
17031 For VSX and Altivec, we may need a register to convert sp+offset into
17032 reg+sp.
17034 For misaligned 64-bit gpr loads and stores we need a register to
17035 convert an offset address to indirect. */
17037 static reg_class_t
17038 rs6000_secondary_reload (bool in_p,
17039 rtx x,
17040 reg_class_t rclass_i,
17041 machine_mode mode,
17042 secondary_reload_info *sri)
17044 enum reg_class rclass = (enum reg_class) rclass_i;
17045 reg_class_t ret = ALL_REGS;
17046 enum insn_code icode;
17047 bool default_p = false;
17048 bool done_p = false;
17050 /* Allow subreg of memory before/during reload. */
17051 bool memory_p = (MEM_P (x)
17052 || (!reload_completed && GET_CODE (x) == SUBREG
17053 && MEM_P (SUBREG_REG (x))));
17055 sri->icode = CODE_FOR_nothing;
17056 sri->extra_cost = 0;
17057 icode = ((in_p)
17058 ? reg_addr[mode].reload_load
17059 : reg_addr[mode].reload_store);
17061 if (REG_P (x) || register_operand (x, mode))
17063 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
17064 bool altivec_p = (rclass == ALTIVEC_REGS);
17065 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
17067 if (!in_p)
17069 enum rs6000_reg_type exchange = to_type;
17070 to_type = from_type;
17071 from_type = exchange;
17074 /* Can we do a direct move of some sort? */
17075 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
17076 altivec_p))
17078 icode = (enum insn_code)sri->icode;
17079 default_p = false;
17080 done_p = true;
17081 ret = NO_REGS;
17085 /* Make sure 0.0 is not reloaded or forced into memory. */
17086 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
17088 ret = NO_REGS;
17089 default_p = false;
17090 done_p = true;
17093 /* If this is a scalar floating point value and we want to load it into the
17094 traditional Altivec registers, do it via a move via a traditional floating
17095 point register. Also make sure that non-zero constants use a FPR. */
17096 if (!done_p && reg_addr[mode].scalar_in_vmx_p
17097 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
17098 && (memory_p || (GET_CODE (x) == CONST_DOUBLE)))
17100 ret = FLOAT_REGS;
17101 default_p = false;
17102 done_p = true;
17105 /* Handle reload of load/stores if we have reload helper functions. */
17106 if (!done_p && icode != CODE_FOR_nothing && memory_p)
17108 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
17109 mode);
17111 if (extra_cost >= 0)
17113 done_p = true;
17114 ret = NO_REGS;
17115 if (extra_cost > 0)
17117 sri->extra_cost = extra_cost;
17118 sri->icode = icode;
17123 /* Handle unaligned loads and stores of integer registers. */
17124 if (!done_p && TARGET_POWERPC64
17125 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
17126 && memory_p
17127 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
17129 rtx addr = XEXP (x, 0);
17130 rtx off = address_offset (addr);
17132 if (off != NULL_RTX)
17134 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
17135 unsigned HOST_WIDE_INT offset = INTVAL (off);
17137 /* We need a secondary reload when our legitimate_address_p
17138 says the address is good (as otherwise the entire address
17139 will be reloaded), and the offset is not a multiple of
17140 four or we have an address wrap. Address wrap will only
17141 occur for LO_SUMs since legitimate_offset_address_p
17142 rejects addresses for 16-byte mems that will wrap. */
17143 if (GET_CODE (addr) == LO_SUM
17144 ? (1 /* legitimate_address_p allows any offset for lo_sum */
17145 && ((offset & 3) != 0
17146 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
17147 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
17148 && (offset & 3) != 0))
17150 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
17151 if (in_p)
17152 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
17153 : CODE_FOR_reload_di_load);
17154 else
17155 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
17156 : CODE_FOR_reload_di_store);
17157 sri->extra_cost = 2;
17158 ret = NO_REGS;
17159 done_p = true;
17161 else
17162 default_p = true;
17164 else
17165 default_p = true;
17168 if (!done_p && !TARGET_POWERPC64
17169 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
17170 && memory_p
17171 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
17173 rtx addr = XEXP (x, 0);
17174 rtx off = address_offset (addr);
17176 if (off != NULL_RTX)
17178 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
17179 unsigned HOST_WIDE_INT offset = INTVAL (off);
17181 /* We need a secondary reload when our legitimate_address_p
17182 says the address is good (as otherwise the entire address
17183 will be reloaded), and we have a wrap.
17185 legitimate_lo_sum_address_p allows LO_SUM addresses to
17186 have any offset so test for wrap in the low 16 bits.
17188 legitimate_offset_address_p checks for the range
17189 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
17190 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
17191 [0x7ff4,0x7fff] respectively, so test for the
17192 intersection of these ranges, [0x7ffc,0x7fff] and
17193 [0x7ff4,0x7ff7] respectively.
17195 Note that the address we see here may have been
17196 manipulated by legitimize_reload_address. */
17197 if (GET_CODE (addr) == LO_SUM
17198 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
17199 : offset - (0x8000 - extra) < UNITS_PER_WORD)
17201 if (in_p)
17202 sri->icode = CODE_FOR_reload_si_load;
17203 else
17204 sri->icode = CODE_FOR_reload_si_store;
17205 sri->extra_cost = 2;
17206 ret = NO_REGS;
17207 done_p = true;
17209 else
17210 default_p = true;
17212 else
17213 default_p = true;
17216 if (!done_p)
17217 default_p = true;
17219 if (default_p)
17220 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
17222 gcc_assert (ret != ALL_REGS);
17224 if (TARGET_DEBUG_ADDR)
17226 fprintf (stderr,
17227 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
17228 "mode = %s",
17229 reg_class_names[ret],
17230 in_p ? "true" : "false",
17231 reg_class_names[rclass],
17232 GET_MODE_NAME (mode));
17234 if (reload_completed)
17235 fputs (", after reload", stderr);
17237 if (!done_p)
17238 fputs (", done_p not set", stderr);
17240 if (default_p)
17241 fputs (", default secondary reload", stderr);
17243 if (sri->icode != CODE_FOR_nothing)
17244 fprintf (stderr, ", reload func = %s, extra cost = %d",
17245 insn_data[sri->icode].name, sri->extra_cost);
17247 fputs ("\n", stderr);
17248 debug_rtx (x);
17251 return ret;
17254 /* Better tracing for rs6000_secondary_reload_inner. */
17256 static void
17257 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
17258 bool store_p)
17260 rtx set, clobber;
17262 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
17264 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
17265 store_p ? "store" : "load");
17267 if (store_p)
17268 set = gen_rtx_SET (VOIDmode, mem, reg);
17269 else
17270 set = gen_rtx_SET (VOIDmode, reg, mem);
17272 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
17273 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
17276 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
17277 ATTRIBUTE_NORETURN;
17279 static void
17280 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
17281 bool store_p)
17283 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
17284 gcc_unreachable ();
17287 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
17288 reload helper functions. These were identified in
17289 rs6000_secondary_reload_memory, and if reload decided to use the secondary
17290 reload, it calls the insns:
17291 reload_<RELOAD:mode>_<P:mptrsize>_store
17292 reload_<RELOAD:mode>_<P:mptrsize>_load
17294 which in turn calls this function, to do whatever is necessary to create
17295 valid addresses. */
17297 void
17298 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
17300 int regno = true_regnum (reg);
17301 machine_mode mode = GET_MODE (reg);
17302 addr_mask_type addr_mask;
17303 rtx addr;
17304 rtx new_addr;
17305 rtx op_reg, op0, op1;
17306 rtx and_op;
17307 rtx cc_clobber;
17308 rtvec rv;
17310 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER || !MEM_P (mem)
17311 || !base_reg_operand (scratch, GET_MODE (scratch)))
17312 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17314 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
17315 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
17317 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
17318 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
17320 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
17321 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
17323 else
17324 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17326 /* Make sure the mode is valid in this register class. */
17327 if ((addr_mask & RELOAD_REG_VALID) == 0)
17328 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17330 if (TARGET_DEBUG_ADDR)
17331 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
17333 new_addr = addr = XEXP (mem, 0);
17334 switch (GET_CODE (addr))
17336 /* Does the register class support auto update forms for this mode? If
17337 not, do the update now. We don't need a scratch register, since the
17338 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
17339 case PRE_INC:
17340 case PRE_DEC:
17341 op_reg = XEXP (addr, 0);
17342 if (!base_reg_operand (op_reg, Pmode))
17343 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17345 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
17347 emit_insn (gen_add2_insn (op_reg, GEN_INT (GET_MODE_SIZE (mode))));
17348 new_addr = op_reg;
17350 break;
17352 case PRE_MODIFY:
17353 op0 = XEXP (addr, 0);
17354 op1 = XEXP (addr, 1);
17355 if (!base_reg_operand (op0, Pmode)
17356 || GET_CODE (op1) != PLUS
17357 || !rtx_equal_p (op0, XEXP (op1, 0)))
17358 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17360 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
17362 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17363 new_addr = reg;
17365 break;
17367 /* Do we need to simulate AND -16 to clear the bottom address bits used
17368 in VMX load/stores? */
17369 case AND:
17370 op0 = XEXP (addr, 0);
17371 op1 = XEXP (addr, 1);
17372 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
17374 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
17375 op_reg = op0;
17377 else if (GET_CODE (op1) == PLUS)
17379 emit_insn (gen_rtx_SET (VOIDmode, scratch, op1));
17380 op_reg = scratch;
17383 else
17384 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17386 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
17387 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
17388 rv = gen_rtvec (2, gen_rtx_SET (VOIDmode, scratch, and_op), cc_clobber);
17389 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
17390 new_addr = scratch;
17392 break;
17394 /* If this is an indirect address, make sure it is a base register. */
17395 case REG:
17396 case SUBREG:
17397 if (!base_reg_operand (addr, GET_MODE (addr)))
17399 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17400 new_addr = scratch;
17402 break;
17404 /* If this is an indexed address, make sure the register class can handle
17405 indexed addresses for this mode. */
17406 case PLUS:
17407 op0 = XEXP (addr, 0);
17408 op1 = XEXP (addr, 1);
17409 if (!base_reg_operand (op0, Pmode))
17410 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17412 else if (int_reg_operand (op1, Pmode))
17414 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
17416 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17417 new_addr = scratch;
17421 /* Make sure the register class can handle offset addresses. */
17422 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
17424 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
17426 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17427 new_addr = scratch;
17431 else
17432 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17434 break;
17436 case LO_SUM:
17437 op0 = XEXP (addr, 0);
17438 op1 = XEXP (addr, 1);
17439 if (!base_reg_operand (op0, Pmode))
17440 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17442 else if (int_reg_operand (op1, Pmode))
17444 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
17446 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17447 new_addr = scratch;
17451 /* Make sure the register class can handle offset addresses. */
17452 else if (legitimate_lo_sum_address_p (mode, addr, false))
17454 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
17456 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17457 new_addr = scratch;
17461 else
17462 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17464 break;
17466 case SYMBOL_REF:
17467 case CONST:
17468 case LABEL_REF:
17469 rs6000_emit_move (scratch, addr, Pmode);
17470 new_addr = scratch;
17471 break;
17473 default:
17474 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17477 /* Adjust the address if it changed. */
17478 if (addr != new_addr)
17480 mem = replace_equiv_address_nv (mem, new_addr);
17481 if (TARGET_DEBUG_ADDR)
17482 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
17485 /* Now create the move. */
17486 if (store_p)
17487 emit_insn (gen_rtx_SET (VOIDmode, mem, reg));
17488 else
17489 emit_insn (gen_rtx_SET (VOIDmode, reg, mem));
17491 return;
17494 /* Convert reloads involving 64-bit gprs and misaligned offset
17495 addressing, or multiple 32-bit gprs and offsets that are too large,
17496 to use indirect addressing. */
17498 void
17499 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
17501 int regno = true_regnum (reg);
17502 enum reg_class rclass;
17503 rtx addr;
17504 rtx scratch_or_premodify = scratch;
17506 if (TARGET_DEBUG_ADDR)
17508 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
17509 store_p ? "store" : "load");
17510 fprintf (stderr, "reg:\n");
17511 debug_rtx (reg);
17512 fprintf (stderr, "mem:\n");
17513 debug_rtx (mem);
17514 fprintf (stderr, "scratch:\n");
17515 debug_rtx (scratch);
17518 gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
17519 gcc_assert (GET_CODE (mem) == MEM);
17520 rclass = REGNO_REG_CLASS (regno);
17521 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
17522 addr = XEXP (mem, 0);
17524 if (GET_CODE (addr) == PRE_MODIFY)
17526 scratch_or_premodify = XEXP (addr, 0);
17527 gcc_assert (REG_P (scratch_or_premodify));
17528 addr = XEXP (addr, 1);
17530 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
17532 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
17534 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
17536 /* Now create the move. */
17537 if (store_p)
17538 emit_insn (gen_rtx_SET (VOIDmode, mem, reg));
17539 else
17540 emit_insn (gen_rtx_SET (VOIDmode, reg, mem));
17542 return;
17545 /* Allocate a 64-bit stack slot to be used for copying SDmode values through if
17546 this function has any SDmode references. If we are on a power7 or later, we
17547 don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
17548 can load/store the value. */
17550 static void
17551 rs6000_alloc_sdmode_stack_slot (void)
17553 tree t;
17554 basic_block bb;
17555 gimple_stmt_iterator gsi;
17557 gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
17558 /* We use a different approach for dealing with the secondary
17559 memory in LRA. */
17560 if (ira_use_lra_p)
17561 return;
17563 if (TARGET_NO_SDMODE_STACK)
17564 return;
17566 FOR_EACH_BB_FN (bb, cfun)
17567 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
17569 tree ret = walk_gimple_op (gsi_stmt (gsi), rs6000_check_sdmode, NULL);
17570 if (ret)
17572 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
17573 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
17574 SDmode, 0);
17575 return;
17579 /* Check for any SDmode parameters of the function. */
17580 for (t = DECL_ARGUMENTS (cfun->decl); t; t = DECL_CHAIN (t))
17582 if (TREE_TYPE (t) == error_mark_node)
17583 continue;
17585 if (TYPE_MODE (TREE_TYPE (t)) == SDmode
17586 || TYPE_MODE (DECL_ARG_TYPE (t)) == SDmode)
17588 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
17589 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
17590 SDmode, 0);
17591 return;
17596 static void
17597 rs6000_instantiate_decls (void)
17599 if (cfun->machine->sdmode_stack_slot != NULL_RTX)
17600 instantiate_decl_rtl (cfun->machine->sdmode_stack_slot);
17603 /* Given an rtx X being reloaded into a reg required to be
17604 in class CLASS, return the class of reg to actually use.
17605 In general this is just CLASS; but on some machines
17606 in some cases it is preferable to use a more restrictive class.
17608 On the RS/6000, we have to return NO_REGS when we want to reload a
17609 floating-point CONST_DOUBLE to force it to be copied to memory.
17611 We also don't want to reload integer values into floating-point
17612 registers if we can at all help it. In fact, this can
17613 cause reload to die, if it tries to generate a reload of CTR
17614 into a FP register and discovers it doesn't have the memory location
17615 required.
17617 ??? Would it be a good idea to have reload do the converse, that is
17618 try to reload floating modes into FP registers if possible?
17621 static enum reg_class
17622 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
17624 machine_mode mode = GET_MODE (x);
17625 bool is_constant = CONSTANT_P (x);
17627 /* Do VSX tests before handling traditional floaitng point registers. */
17628 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
17630 if (is_constant)
17632 /* Zero is always allowed in all VSX registers. */
17633 if (x == CONST0_RTX (mode))
17634 return rclass;
17636 /* If this is a vector constant that can be formed with a few Altivec
17637 instructions, we want altivec registers. */
17638 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
17639 return ALTIVEC_REGS;
17641 /* Force constant to memory. */
17642 return NO_REGS;
17645 /* If this is a scalar floating point value, prefer the traditional
17646 floating point registers so that we can use D-form (register+offset)
17647 addressing. */
17648 if (GET_MODE_SIZE (mode) < 16)
17649 return FLOAT_REGS;
17651 /* Prefer the Altivec registers if Altivec is handling the vector
17652 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
17653 loads. */
17654 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
17655 || mode == V1TImode)
17656 return ALTIVEC_REGS;
17658 return rclass;
17661 if (is_constant || GET_CODE (x) == PLUS)
17663 if (reg_class_subset_p (GENERAL_REGS, rclass))
17664 return GENERAL_REGS;
17665 if (reg_class_subset_p (BASE_REGS, rclass))
17666 return BASE_REGS;
17667 return NO_REGS;
17670 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
17671 return GENERAL_REGS;
17673 return rclass;
17676 /* Debug version of rs6000_preferred_reload_class. */
17677 static enum reg_class
17678 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
17680 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
17682 fprintf (stderr,
17683 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
17684 "mode = %s, x:\n",
17685 reg_class_names[ret], reg_class_names[rclass],
17686 GET_MODE_NAME (GET_MODE (x)));
17687 debug_rtx (x);
17689 return ret;
17692 /* If we are copying between FP or AltiVec registers and anything else, we need
17693 a memory location. The exception is when we are targeting ppc64 and the
17694 move to/from fpr to gpr instructions are available. Also, under VSX, you
17695 can copy vector registers from the FP register set to the Altivec register
17696 set and vice versa. */
17698 static bool
17699 rs6000_secondary_memory_needed (enum reg_class from_class,
17700 enum reg_class to_class,
17701 machine_mode mode)
17703 enum rs6000_reg_type from_type, to_type;
17704 bool altivec_p = ((from_class == ALTIVEC_REGS)
17705 || (to_class == ALTIVEC_REGS));
17707 /* If a simple/direct move is available, we don't need secondary memory */
17708 from_type = reg_class_to_reg_type[(int)from_class];
17709 to_type = reg_class_to_reg_type[(int)to_class];
17711 if (rs6000_secondary_reload_move (to_type, from_type, mode,
17712 (secondary_reload_info *)0, altivec_p))
17713 return false;
17715 /* If we have a floating point or vector register class, we need to use
17716 memory to transfer the data. */
17717 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
17718 return true;
17720 return false;
17723 /* Debug version of rs6000_secondary_memory_needed. */
17724 static bool
17725 rs6000_debug_secondary_memory_needed (enum reg_class from_class,
17726 enum reg_class to_class,
17727 machine_mode mode)
17729 bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
17731 fprintf (stderr,
17732 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
17733 "to_class = %s, mode = %s\n",
17734 ret ? "true" : "false",
17735 reg_class_names[from_class],
17736 reg_class_names[to_class],
17737 GET_MODE_NAME (mode));
17739 return ret;
17742 /* Return the register class of a scratch register needed to copy IN into
17743 or out of a register in RCLASS in MODE. If it can be done directly,
17744 NO_REGS is returned. */
17746 static enum reg_class
17747 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
17748 rtx in)
17750 int regno;
17752 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
17753 #if TARGET_MACHO
17754 && MACHOPIC_INDIRECT
17755 #endif
17758 /* We cannot copy a symbolic operand directly into anything
17759 other than BASE_REGS for TARGET_ELF. So indicate that a
17760 register from BASE_REGS is needed as an intermediate
17761 register.
17763 On Darwin, pic addresses require a load from memory, which
17764 needs a base register. */
17765 if (rclass != BASE_REGS
17766 && (GET_CODE (in) == SYMBOL_REF
17767 || GET_CODE (in) == HIGH
17768 || GET_CODE (in) == LABEL_REF
17769 || GET_CODE (in) == CONST))
17770 return BASE_REGS;
17773 if (GET_CODE (in) == REG)
17775 regno = REGNO (in);
17776 if (regno >= FIRST_PSEUDO_REGISTER)
17778 regno = true_regnum (in);
17779 if (regno >= FIRST_PSEUDO_REGISTER)
17780 regno = -1;
17783 else if (GET_CODE (in) == SUBREG)
17785 regno = true_regnum (in);
17786 if (regno >= FIRST_PSEUDO_REGISTER)
17787 regno = -1;
17789 else
17790 regno = -1;
17792 /* If we have VSX register moves, prefer moving scalar values between
17793 Altivec registers and GPR by going via an FPR (and then via memory)
17794 instead of reloading the secondary memory address for Altivec moves. */
17795 if (TARGET_VSX
17796 && GET_MODE_SIZE (mode) < 16
17797 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
17798 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
17799 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
17800 && (regno >= 0 && INT_REGNO_P (regno)))))
17801 return FLOAT_REGS;
17803 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
17804 into anything. */
17805 if (rclass == GENERAL_REGS || rclass == BASE_REGS
17806 || (regno >= 0 && INT_REGNO_P (regno)))
17807 return NO_REGS;
17809 /* Constants, memory, and VSX registers can go into VSX registers (both the
17810 traditional floating point and the altivec registers). */
17811 if (rclass == VSX_REGS
17812 && (regno == -1 || VSX_REGNO_P (regno)))
17813 return NO_REGS;
17815 /* Constants, memory, and FP registers can go into FP registers. */
17816 if ((regno == -1 || FP_REGNO_P (regno))
17817 && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
17818 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
17820 /* Memory, and AltiVec registers can go into AltiVec registers. */
17821 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
17822 && rclass == ALTIVEC_REGS)
17823 return NO_REGS;
17825 /* We can copy among the CR registers. */
17826 if ((rclass == CR_REGS || rclass == CR0_REGS)
17827 && regno >= 0 && CR_REGNO_P (regno))
17828 return NO_REGS;
17830 /* Otherwise, we need GENERAL_REGS. */
17831 return GENERAL_REGS;
17834 /* Debug version of rs6000_secondary_reload_class. */
17835 static enum reg_class
17836 rs6000_debug_secondary_reload_class (enum reg_class rclass,
17837 machine_mode mode, rtx in)
17839 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
17840 fprintf (stderr,
17841 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
17842 "mode = %s, input rtx:\n",
17843 reg_class_names[ret], reg_class_names[rclass],
17844 GET_MODE_NAME (mode));
17845 debug_rtx (in);
17847 return ret;
17850 /* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
17852 static bool
17853 rs6000_cannot_change_mode_class (machine_mode from,
17854 machine_mode to,
17855 enum reg_class rclass)
17857 unsigned from_size = GET_MODE_SIZE (from);
17858 unsigned to_size = GET_MODE_SIZE (to);
17860 if (from_size != to_size)
17862 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
17864 if (reg_classes_intersect_p (xclass, rclass))
17866 unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to];
17867 unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from];
17869 /* Don't allow 64-bit types to overlap with 128-bit types that take a
17870 single register under VSX because the scalar part of the register
17871 is in the upper 64-bits, and not the lower 64-bits. Types like
17872 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
17873 IEEE floating point can't overlap, and neither can small
17874 values. */
17876 if (TARGET_IEEEQUAD && (to == TFmode || from == TFmode))
17877 return true;
17879 /* TDmode in floating-mode registers must always go into a register
17880 pair with the most significant word in the even-numbered register
17881 to match ISA requirements. In little-endian mode, this does not
17882 match subreg numbering, so we cannot allow subregs. */
17883 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
17884 return true;
17886 if (from_size < 8 || to_size < 8)
17887 return true;
17889 if (from_size == 8 && (8 * to_nregs) != to_size)
17890 return true;
17892 if (to_size == 8 && (8 * from_nregs) != from_size)
17893 return true;
17895 return false;
17897 else
17898 return false;
17901 if (TARGET_E500_DOUBLE
17902 && ((((to) == DFmode) + ((from) == DFmode)) == 1
17903 || (((to) == TFmode) + ((from) == TFmode)) == 1
17904 || (((to) == DDmode) + ((from) == DDmode)) == 1
17905 || (((to) == TDmode) + ((from) == TDmode)) == 1
17906 || (((to) == DImode) + ((from) == DImode)) == 1))
17907 return true;
17909 /* Since the VSX register set includes traditional floating point registers
17910 and altivec registers, just check for the size being different instead of
17911 trying to check whether the modes are vector modes. Otherwise it won't
17912 allow say DF and DI to change classes. For types like TFmode and TDmode
17913 that take 2 64-bit registers, rather than a single 128-bit register, don't
17914 allow subregs of those types to other 128 bit types. */
17915 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
17917 unsigned num_regs = (from_size + 15) / 16;
17918 if (hard_regno_nregs[FIRST_FPR_REGNO][to] > num_regs
17919 || hard_regno_nregs[FIRST_FPR_REGNO][from] > num_regs)
17920 return true;
17922 return (from_size != 8 && from_size != 16);
17925 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
17926 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
17927 return true;
17929 if (TARGET_SPE && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1
17930 && reg_classes_intersect_p (GENERAL_REGS, rclass))
17931 return true;
17933 return false;
17936 /* Debug version of rs6000_cannot_change_mode_class. */
17937 static bool
17938 rs6000_debug_cannot_change_mode_class (machine_mode from,
17939 machine_mode to,
17940 enum reg_class rclass)
17942 bool ret = rs6000_cannot_change_mode_class (from, to, rclass);
17944 fprintf (stderr,
17945 "rs6000_cannot_change_mode_class, return %s, from = %s, "
17946 "to = %s, rclass = %s\n",
17947 ret ? "true" : "false",
17948 GET_MODE_NAME (from), GET_MODE_NAME (to),
17949 reg_class_names[rclass]);
17951 return ret;
17954 /* Return a string to do a move operation of 128 bits of data. */
17956 const char *
17957 rs6000_output_move_128bit (rtx operands[])
17959 rtx dest = operands[0];
17960 rtx src = operands[1];
17961 machine_mode mode = GET_MODE (dest);
17962 int dest_regno;
17963 int src_regno;
17964 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
17965 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
17967 if (REG_P (dest))
17969 dest_regno = REGNO (dest);
17970 dest_gpr_p = INT_REGNO_P (dest_regno);
17971 dest_fp_p = FP_REGNO_P (dest_regno);
17972 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
17973 dest_vsx_p = dest_fp_p | dest_vmx_p;
17975 else
17977 dest_regno = -1;
17978 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
17981 if (REG_P (src))
17983 src_regno = REGNO (src);
17984 src_gpr_p = INT_REGNO_P (src_regno);
17985 src_fp_p = FP_REGNO_P (src_regno);
17986 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
17987 src_vsx_p = src_fp_p | src_vmx_p;
17989 else
17991 src_regno = -1;
17992 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
17995 /* Register moves. */
17996 if (dest_regno >= 0 && src_regno >= 0)
17998 if (dest_gpr_p)
18000 if (src_gpr_p)
18001 return "#";
18003 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
18004 return "#";
18007 else if (TARGET_VSX && dest_vsx_p)
18009 if (src_vsx_p)
18010 return "xxlor %x0,%x1,%x1";
18012 else if (TARGET_DIRECT_MOVE && src_gpr_p)
18013 return "#";
18016 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
18017 return "vor %0,%1,%1";
18019 else if (dest_fp_p && src_fp_p)
18020 return "#";
18023 /* Loads. */
18024 else if (dest_regno >= 0 && MEM_P (src))
18026 if (dest_gpr_p)
18028 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
18029 return "lq %0,%1";
18030 else
18031 return "#";
18034 else if (TARGET_ALTIVEC && dest_vmx_p
18035 && altivec_indexed_or_indirect_operand (src, mode))
18036 return "lvx %0,%y1";
18038 else if (TARGET_VSX && dest_vsx_p)
18040 if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
18041 return "lxvw4x %x0,%y1";
18042 else
18043 return "lxvd2x %x0,%y1";
18046 else if (TARGET_ALTIVEC && dest_vmx_p)
18047 return "lvx %0,%y1";
18049 else if (dest_fp_p)
18050 return "#";
18053 /* Stores. */
18054 else if (src_regno >= 0 && MEM_P (dest))
18056 if (src_gpr_p)
18058 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
18059 return "stq %1,%0";
18060 else
18061 return "#";
18064 else if (TARGET_ALTIVEC && src_vmx_p
18065 && altivec_indexed_or_indirect_operand (src, mode))
18066 return "stvx %1,%y0";
18068 else if (TARGET_VSX && src_vsx_p)
18070 if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
18071 return "stxvw4x %x1,%y0";
18072 else
18073 return "stxvd2x %x1,%y0";
18076 else if (TARGET_ALTIVEC && src_vmx_p)
18077 return "stvx %1,%y0";
18079 else if (src_fp_p)
18080 return "#";
18083 /* Constants. */
18084 else if (dest_regno >= 0
18085 && (GET_CODE (src) == CONST_INT
18086 || GET_CODE (src) == CONST_WIDE_INT
18087 || GET_CODE (src) == CONST_DOUBLE
18088 || GET_CODE (src) == CONST_VECTOR))
18090 if (dest_gpr_p)
18091 return "#";
18093 else if (TARGET_VSX && dest_vsx_p && zero_constant (src, mode))
18094 return "xxlxor %x0,%x0,%x0";
18096 else if (TARGET_ALTIVEC && dest_vmx_p)
18097 return output_vec_const_move (operands);
18100 if (TARGET_DEBUG_ADDR)
18102 fprintf (stderr, "\n===== Bad 128 bit move:\n");
18103 debug_rtx (gen_rtx_SET (VOIDmode, dest, src));
18106 gcc_unreachable ();
18109 /* Validate a 128-bit move. */
18110 bool
18111 rs6000_move_128bit_ok_p (rtx operands[])
18113 machine_mode mode = GET_MODE (operands[0]);
18114 return (gpc_reg_operand (operands[0], mode)
18115 || gpc_reg_operand (operands[1], mode));
18118 /* Return true if a 128-bit move needs to be split. */
18119 bool
18120 rs6000_split_128bit_ok_p (rtx operands[])
18122 if (!reload_completed)
18123 return false;
18125 if (!gpr_or_gpr_p (operands[0], operands[1]))
18126 return false;
18128 if (quad_load_store_p (operands[0], operands[1]))
18129 return false;
18131 return true;
18135 /* Given a comparison operation, return the bit number in CCR to test. We
18136 know this is a valid comparison.
18138 SCC_P is 1 if this is for an scc. That means that %D will have been
18139 used instead of %C, so the bits will be in different places.
18141 Return -1 if OP isn't a valid comparison for some reason. */
18144 ccr_bit (rtx op, int scc_p)
18146 enum rtx_code code = GET_CODE (op);
18147 machine_mode cc_mode;
18148 int cc_regnum;
18149 int base_bit;
18150 rtx reg;
18152 if (!COMPARISON_P (op))
18153 return -1;
18155 reg = XEXP (op, 0);
18157 gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg)));
18159 cc_mode = GET_MODE (reg);
18160 cc_regnum = REGNO (reg);
18161 base_bit = 4 * (cc_regnum - CR0_REGNO);
18163 validate_condition_mode (code, cc_mode);
18165 /* When generating a sCOND operation, only positive conditions are
18166 allowed. */
18167 gcc_assert (!scc_p
18168 || code == EQ || code == GT || code == LT || code == UNORDERED
18169 || code == GTU || code == LTU);
18171 switch (code)
18173 case NE:
18174 return scc_p ? base_bit + 3 : base_bit + 2;
18175 case EQ:
18176 return base_bit + 2;
18177 case GT: case GTU: case UNLE:
18178 return base_bit + 1;
18179 case LT: case LTU: case UNGE:
18180 return base_bit;
18181 case ORDERED: case UNORDERED:
18182 return base_bit + 3;
18184 case GE: case GEU:
18185 /* If scc, we will have done a cror to put the bit in the
18186 unordered position. So test that bit. For integer, this is ! LT
18187 unless this is an scc insn. */
18188 return scc_p ? base_bit + 3 : base_bit;
18190 case LE: case LEU:
18191 return scc_p ? base_bit + 3 : base_bit + 1;
18193 default:
18194 gcc_unreachable ();
18198 /* Return the GOT register. */
18201 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
18203 /* The second flow pass currently (June 1999) can't update
18204 regs_ever_live without disturbing other parts of the compiler, so
18205 update it here to make the prolog/epilogue code happy. */
18206 if (!can_create_pseudo_p ()
18207 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
18208 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
18210 crtl->uses_pic_offset_table = 1;
18212 return pic_offset_table_rtx;
18215 static rs6000_stack_t stack_info;
18217 /* Function to init struct machine_function.
18218 This will be called, via a pointer variable,
18219 from push_function_context. */
18221 static struct machine_function *
18222 rs6000_init_machine_status (void)
18224 stack_info.reload_completed = 0;
18225 return ggc_cleared_alloc<machine_function> ();
18228 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
18231 extract_MB (rtx op)
18233 int i;
18234 unsigned long val = INTVAL (op);
18236 /* If the high bit is zero, the value is the first 1 bit we find
18237 from the left. */
18238 if ((val & 0x80000000) == 0)
18240 gcc_assert (val & 0xffffffff);
18242 i = 1;
18243 while (((val <<= 1) & 0x80000000) == 0)
18244 ++i;
18245 return i;
18248 /* If the high bit is set and the low bit is not, or the mask is all
18249 1's, the value is zero. */
18250 if ((val & 1) == 0 || (val & 0xffffffff) == 0xffffffff)
18251 return 0;
18253 /* Otherwise we have a wrap-around mask. Look for the first 0 bit
18254 from the right. */
18255 i = 31;
18256 while (((val >>= 1) & 1) != 0)
18257 --i;
18259 return i;
18263 extract_ME (rtx op)
18265 int i;
18266 unsigned long val = INTVAL (op);
18268 /* If the low bit is zero, the value is the first 1 bit we find from
18269 the right. */
18270 if ((val & 1) == 0)
18272 gcc_assert (val & 0xffffffff);
18274 i = 30;
18275 while (((val >>= 1) & 1) == 0)
18276 --i;
18278 return i;
18281 /* If the low bit is set and the high bit is not, or the mask is all
18282 1's, the value is 31. */
18283 if ((val & 0x80000000) == 0 || (val & 0xffffffff) == 0xffffffff)
18284 return 31;
18286 /* Otherwise we have a wrap-around mask. Look for the first 0 bit
18287 from the left. */
18288 i = 0;
18289 while (((val <<= 1) & 0x80000000) != 0)
18290 ++i;
18292 return i;
18295 /* Write out a function code label. */
18297 void
18298 rs6000_output_function_entry (FILE *file, const char *fname)
18300 if (fname[0] != '.')
18302 switch (DEFAULT_ABI)
18304 default:
18305 gcc_unreachable ();
18307 case ABI_AIX:
18308 if (DOT_SYMBOLS)
18309 putc ('.', file);
18310 else
18311 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
18312 break;
18314 case ABI_ELFv2:
18315 case ABI_V4:
18316 case ABI_DARWIN:
18317 break;
18321 RS6000_OUTPUT_BASENAME (file, fname);
18324 /* Print an operand. Recognize special options, documented below. */
18326 #if TARGET_ELF
18327 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
18328 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
18329 #else
18330 #define SMALL_DATA_RELOC "sda21"
18331 #define SMALL_DATA_REG 0
18332 #endif
18334 void
18335 print_operand (FILE *file, rtx x, int code)
18337 int i;
18338 unsigned HOST_WIDE_INT uval;
18340 switch (code)
18342 /* %a is output_address. */
18344 case 'b':
18345 /* If constant, low-order 16 bits of constant, unsigned.
18346 Otherwise, write normally. */
18347 if (INT_P (x))
18348 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0xffff);
18349 else
18350 print_operand (file, x, 0);
18351 return;
18353 case 'B':
18354 /* If the low-order bit is zero, write 'r'; otherwise, write 'l'
18355 for 64-bit mask direction. */
18356 putc (((INTVAL (x) & 1) == 0 ? 'r' : 'l'), file);
18357 return;
18359 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
18360 output_operand. */
18362 case 'D':
18363 /* Like 'J' but get to the GT bit only. */
18364 gcc_assert (REG_P (x));
18366 /* Bit 1 is GT bit. */
18367 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
18369 /* Add one for shift count in rlinm for scc. */
18370 fprintf (file, "%d", i + 1);
18371 return;
18373 case 'e':
18374 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
18375 if (! INT_P (x))
18377 output_operand_lossage ("invalid %%e value");
18378 return;
18381 uval = INTVAL (x);
18382 if ((uval & 0xffff) == 0 && uval != 0)
18383 putc ('s', file);
18384 return;
18386 case 'E':
18387 /* X is a CR register. Print the number of the EQ bit of the CR */
18388 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
18389 output_operand_lossage ("invalid %%E value");
18390 else
18391 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
18392 return;
18394 case 'f':
18395 /* X is a CR register. Print the shift count needed to move it
18396 to the high-order four bits. */
18397 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
18398 output_operand_lossage ("invalid %%f value");
18399 else
18400 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
18401 return;
18403 case 'F':
18404 /* Similar, but print the count for the rotate in the opposite
18405 direction. */
18406 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
18407 output_operand_lossage ("invalid %%F value");
18408 else
18409 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
18410 return;
18412 case 'G':
18413 /* X is a constant integer. If it is negative, print "m",
18414 otherwise print "z". This is to make an aze or ame insn. */
18415 if (GET_CODE (x) != CONST_INT)
18416 output_operand_lossage ("invalid %%G value");
18417 else if (INTVAL (x) >= 0)
18418 putc ('z', file);
18419 else
18420 putc ('m', file);
18421 return;
18423 case 'h':
18424 /* If constant, output low-order five bits. Otherwise, write
18425 normally. */
18426 if (INT_P (x))
18427 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
18428 else
18429 print_operand (file, x, 0);
18430 return;
18432 case 'H':
18433 /* If constant, output low-order six bits. Otherwise, write
18434 normally. */
18435 if (INT_P (x))
18436 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
18437 else
18438 print_operand (file, x, 0);
18439 return;
18441 case 'I':
18442 /* Print `i' if this is a constant, else nothing. */
18443 if (INT_P (x))
18444 putc ('i', file);
18445 return;
18447 case 'j':
18448 /* Write the bit number in CCR for jump. */
18449 i = ccr_bit (x, 0);
18450 if (i == -1)
18451 output_operand_lossage ("invalid %%j code");
18452 else
18453 fprintf (file, "%d", i);
18454 return;
18456 case 'J':
18457 /* Similar, but add one for shift count in rlinm for scc and pass
18458 scc flag to `ccr_bit'. */
18459 i = ccr_bit (x, 1);
18460 if (i == -1)
18461 output_operand_lossage ("invalid %%J code");
18462 else
18463 /* If we want bit 31, write a shift count of zero, not 32. */
18464 fprintf (file, "%d", i == 31 ? 0 : i + 1);
18465 return;
18467 case 'k':
18468 /* X must be a constant. Write the 1's complement of the
18469 constant. */
18470 if (! INT_P (x))
18471 output_operand_lossage ("invalid %%k value");
18472 else
18473 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
18474 return;
18476 case 'K':
18477 /* X must be a symbolic constant on ELF. Write an
18478 expression suitable for an 'addi' that adds in the low 16
18479 bits of the MEM. */
18480 if (GET_CODE (x) == CONST)
18482 if (GET_CODE (XEXP (x, 0)) != PLUS
18483 || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
18484 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
18485 || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
18486 output_operand_lossage ("invalid %%K value");
18488 print_operand_address (file, x);
18489 fputs ("@l", file);
18490 return;
18492 /* %l is output_asm_label. */
18494 case 'L':
18495 /* Write second word of DImode or DFmode reference. Works on register
18496 or non-indexed memory only. */
18497 if (REG_P (x))
18498 fputs (reg_names[REGNO (x) + 1], file);
18499 else if (MEM_P (x))
18501 /* Handle possible auto-increment. Since it is pre-increment and
18502 we have already done it, we can just use an offset of word. */
18503 if (GET_CODE (XEXP (x, 0)) == PRE_INC
18504 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
18505 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
18506 UNITS_PER_WORD));
18507 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
18508 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
18509 UNITS_PER_WORD));
18510 else
18511 output_address (XEXP (adjust_address_nv (x, SImode,
18512 UNITS_PER_WORD),
18513 0));
18515 if (small_data_operand (x, GET_MODE (x)))
18516 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
18517 reg_names[SMALL_DATA_REG]);
18519 return;
18521 case 'm':
18522 /* MB value for a mask operand. */
18523 if (! mask_operand (x, SImode))
18524 output_operand_lossage ("invalid %%m value");
18526 fprintf (file, "%d", extract_MB (x));
18527 return;
18529 case 'M':
18530 /* ME value for a mask operand. */
18531 if (! mask_operand (x, SImode))
18532 output_operand_lossage ("invalid %%M value");
18534 fprintf (file, "%d", extract_ME (x));
18535 return;
18537 /* %n outputs the negative of its operand. */
18539 case 'N':
18540 /* Write the number of elements in the vector times 4. */
18541 if (GET_CODE (x) != PARALLEL)
18542 output_operand_lossage ("invalid %%N value");
18543 else
18544 fprintf (file, "%d", XVECLEN (x, 0) * 4);
18545 return;
18547 case 'O':
18548 /* Similar, but subtract 1 first. */
18549 if (GET_CODE (x) != PARALLEL)
18550 output_operand_lossage ("invalid %%O value");
18551 else
18552 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
18553 return;
18555 case 'p':
18556 /* X is a CONST_INT that is a power of two. Output the logarithm. */
18557 if (! INT_P (x)
18558 || INTVAL (x) < 0
18559 || (i = exact_log2 (INTVAL (x))) < 0)
18560 output_operand_lossage ("invalid %%p value");
18561 else
18562 fprintf (file, "%d", i);
18563 return;
18565 case 'P':
18566 /* The operand must be an indirect memory reference. The result
18567 is the register name. */
18568 if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG
18569 || REGNO (XEXP (x, 0)) >= 32)
18570 output_operand_lossage ("invalid %%P value");
18571 else
18572 fputs (reg_names[REGNO (XEXP (x, 0))], file);
18573 return;
18575 case 'q':
18576 /* This outputs the logical code corresponding to a boolean
18577 expression. The expression may have one or both operands
18578 negated (if one, only the first one). For condition register
18579 logical operations, it will also treat the negated
18580 CR codes as NOTs, but not handle NOTs of them. */
18582 const char *const *t = 0;
18583 const char *s;
18584 enum rtx_code code = GET_CODE (x);
18585 static const char * const tbl[3][3] = {
18586 { "and", "andc", "nor" },
18587 { "or", "orc", "nand" },
18588 { "xor", "eqv", "xor" } };
18590 if (code == AND)
18591 t = tbl[0];
18592 else if (code == IOR)
18593 t = tbl[1];
18594 else if (code == XOR)
18595 t = tbl[2];
18596 else
18597 output_operand_lossage ("invalid %%q value");
18599 if (GET_CODE (XEXP (x, 0)) != NOT)
18600 s = t[0];
18601 else
18603 if (GET_CODE (XEXP (x, 1)) == NOT)
18604 s = t[2];
18605 else
18606 s = t[1];
18609 fputs (s, file);
18611 return;
18613 case 'Q':
18614 if (! TARGET_MFCRF)
18615 return;
18616 fputc (',', file);
18617 /* FALLTHRU */
18619 case 'R':
18620 /* X is a CR register. Print the mask for `mtcrf'. */
18621 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
18622 output_operand_lossage ("invalid %%R value");
18623 else
18624 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
18625 return;
18627 case 's':
18628 /* Low 5 bits of 32 - value */
18629 if (! INT_P (x))
18630 output_operand_lossage ("invalid %%s value");
18631 else
18632 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
18633 return;
18635 case 'S':
18636 /* PowerPC64 mask position. All 0's is excluded.
18637 CONST_INT 32-bit mask is considered sign-extended so any
18638 transition must occur within the CONST_INT, not on the boundary. */
18639 if (! mask64_operand (x, DImode))
18640 output_operand_lossage ("invalid %%S value");
18642 uval = INTVAL (x);
18644 if (uval & 1) /* Clear Left */
18646 #if HOST_BITS_PER_WIDE_INT > 64
18647 uval &= ((unsigned HOST_WIDE_INT) 1 << 64) - 1;
18648 #endif
18649 i = 64;
18651 else /* Clear Right */
18653 uval = ~uval;
18654 #if HOST_BITS_PER_WIDE_INT > 64
18655 uval &= ((unsigned HOST_WIDE_INT) 1 << 64) - 1;
18656 #endif
18657 i = 63;
18659 while (uval != 0)
18660 --i, uval >>= 1;
18661 gcc_assert (i >= 0);
18662 fprintf (file, "%d", i);
18663 return;
18665 case 't':
18666 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
18667 gcc_assert (REG_P (x) && GET_MODE (x) == CCmode);
18669 /* Bit 3 is OV bit. */
18670 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
18672 /* If we want bit 31, write a shift count of zero, not 32. */
18673 fprintf (file, "%d", i == 31 ? 0 : i + 1);
18674 return;
18676 case 'T':
18677 /* Print the symbolic name of a branch target register. */
18678 if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO
18679 && REGNO (x) != CTR_REGNO))
18680 output_operand_lossage ("invalid %%T value");
18681 else if (REGNO (x) == LR_REGNO)
18682 fputs ("lr", file);
18683 else
18684 fputs ("ctr", file);
18685 return;
18687 case 'u':
18688 /* High-order or low-order 16 bits of constant, whichever is non-zero,
18689 for use in unsigned operand. */
18690 if (! INT_P (x))
18692 output_operand_lossage ("invalid %%u value");
18693 return;
18696 uval = INTVAL (x);
18697 if ((uval & 0xffff) == 0)
18698 uval >>= 16;
18700 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
18701 return;
18703 case 'v':
18704 /* High-order 16 bits of constant for use in signed operand. */
18705 if (! INT_P (x))
18706 output_operand_lossage ("invalid %%v value");
18707 else
18708 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
18709 (INTVAL (x) >> 16) & 0xffff);
18710 return;
18712 case 'U':
18713 /* Print `u' if this has an auto-increment or auto-decrement. */
18714 if (MEM_P (x)
18715 && (GET_CODE (XEXP (x, 0)) == PRE_INC
18716 || GET_CODE (XEXP (x, 0)) == PRE_DEC
18717 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
18718 putc ('u', file);
18719 return;
18721 case 'V':
18722 /* Print the trap code for this operand. */
18723 switch (GET_CODE (x))
18725 case EQ:
18726 fputs ("eq", file); /* 4 */
18727 break;
18728 case NE:
18729 fputs ("ne", file); /* 24 */
18730 break;
18731 case LT:
18732 fputs ("lt", file); /* 16 */
18733 break;
18734 case LE:
18735 fputs ("le", file); /* 20 */
18736 break;
18737 case GT:
18738 fputs ("gt", file); /* 8 */
18739 break;
18740 case GE:
18741 fputs ("ge", file); /* 12 */
18742 break;
18743 case LTU:
18744 fputs ("llt", file); /* 2 */
18745 break;
18746 case LEU:
18747 fputs ("lle", file); /* 6 */
18748 break;
18749 case GTU:
18750 fputs ("lgt", file); /* 1 */
18751 break;
18752 case GEU:
18753 fputs ("lge", file); /* 5 */
18754 break;
18755 default:
18756 gcc_unreachable ();
18758 break;
18760 case 'w':
18761 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
18762 normally. */
18763 if (INT_P (x))
18764 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
18765 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
18766 else
18767 print_operand (file, x, 0);
18768 return;
18770 case 'W':
18771 /* MB value for a PowerPC64 rldic operand. */
18772 i = clz_hwi (INTVAL (x));
18774 fprintf (file, "%d", i);
18775 return;
18777 case 'x':
18778 /* X is a FPR or Altivec register used in a VSX context. */
18779 if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
18780 output_operand_lossage ("invalid %%x value");
18781 else
18783 int reg = REGNO (x);
18784 int vsx_reg = (FP_REGNO_P (reg)
18785 ? reg - 32
18786 : reg - FIRST_ALTIVEC_REGNO + 32);
18788 #ifdef TARGET_REGNAMES
18789 if (TARGET_REGNAMES)
18790 fprintf (file, "%%vs%d", vsx_reg);
18791 else
18792 #endif
18793 fprintf (file, "%d", vsx_reg);
18795 return;
18797 case 'X':
18798 if (MEM_P (x)
18799 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
18800 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
18801 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
18802 putc ('x', file);
18803 return;
18805 case 'Y':
18806 /* Like 'L', for third word of TImode/PTImode */
18807 if (REG_P (x))
18808 fputs (reg_names[REGNO (x) + 2], file);
18809 else if (MEM_P (x))
18811 if (GET_CODE (XEXP (x, 0)) == PRE_INC
18812 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
18813 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 8));
18814 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
18815 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 8));
18816 else
18817 output_address (XEXP (adjust_address_nv (x, SImode, 8), 0));
18818 if (small_data_operand (x, GET_MODE (x)))
18819 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
18820 reg_names[SMALL_DATA_REG]);
18822 return;
18824 case 'z':
18825 /* X is a SYMBOL_REF. Write out the name preceded by a
18826 period and without any trailing data in brackets. Used for function
18827 names. If we are configured for System V (or the embedded ABI) on
18828 the PowerPC, do not emit the period, since those systems do not use
18829 TOCs and the like. */
18830 gcc_assert (GET_CODE (x) == SYMBOL_REF);
18832 /* For macho, check to see if we need a stub. */
18833 if (TARGET_MACHO)
18835 const char *name = XSTR (x, 0);
18836 #if TARGET_MACHO
18837 if (darwin_emit_branch_islands
18838 && MACHOPIC_INDIRECT
18839 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
18840 name = machopic_indirection_name (x, /*stub_p=*/true);
18841 #endif
18842 assemble_name (file, name);
18844 else if (!DOT_SYMBOLS)
18845 assemble_name (file, XSTR (x, 0));
18846 else
18847 rs6000_output_function_entry (file, XSTR (x, 0));
18848 return;
18850 case 'Z':
18851 /* Like 'L', for last word of TImode/PTImode. */
18852 if (REG_P (x))
18853 fputs (reg_names[REGNO (x) + 3], file);
18854 else if (MEM_P (x))
18856 if (GET_CODE (XEXP (x, 0)) == PRE_INC
18857 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
18858 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 12));
18859 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
18860 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 12));
18861 else
18862 output_address (XEXP (adjust_address_nv (x, SImode, 12), 0));
18863 if (small_data_operand (x, GET_MODE (x)))
18864 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
18865 reg_names[SMALL_DATA_REG]);
18867 return;
18869 /* Print AltiVec or SPE memory operand. */
18870 case 'y':
18872 rtx tmp;
18874 gcc_assert (MEM_P (x));
18876 tmp = XEXP (x, 0);
18878 /* Ugly hack because %y is overloaded. */
18879 if ((TARGET_SPE || TARGET_E500_DOUBLE)
18880 && (GET_MODE_SIZE (GET_MODE (x)) == 8
18881 || GET_MODE (x) == TFmode
18882 || GET_MODE (x) == TImode
18883 || GET_MODE (x) == PTImode))
18885 /* Handle [reg]. */
18886 if (REG_P (tmp))
18888 fprintf (file, "0(%s)", reg_names[REGNO (tmp)]);
18889 break;
18891 /* Handle [reg+UIMM]. */
18892 else if (GET_CODE (tmp) == PLUS &&
18893 GET_CODE (XEXP (tmp, 1)) == CONST_INT)
18895 int x;
18897 gcc_assert (REG_P (XEXP (tmp, 0)));
18899 x = INTVAL (XEXP (tmp, 1));
18900 fprintf (file, "%d(%s)", x, reg_names[REGNO (XEXP (tmp, 0))]);
18901 break;
18904 /* Fall through. Must be [reg+reg]. */
18906 if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x))
18907 && GET_CODE (tmp) == AND
18908 && GET_CODE (XEXP (tmp, 1)) == CONST_INT
18909 && INTVAL (XEXP (tmp, 1)) == -16)
18910 tmp = XEXP (tmp, 0);
18911 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
18912 && GET_CODE (tmp) == PRE_MODIFY)
18913 tmp = XEXP (tmp, 1);
18914 if (REG_P (tmp))
18915 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
18916 else
18918 if (GET_CODE (tmp) != PLUS
18919 || !REG_P (XEXP (tmp, 0))
18920 || !REG_P (XEXP (tmp, 1)))
18922 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
18923 break;
18926 if (REGNO (XEXP (tmp, 0)) == 0)
18927 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
18928 reg_names[ REGNO (XEXP (tmp, 0)) ]);
18929 else
18930 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
18931 reg_names[ REGNO (XEXP (tmp, 1)) ]);
18933 break;
18936 case 0:
18937 if (REG_P (x))
18938 fprintf (file, "%s", reg_names[REGNO (x)]);
18939 else if (MEM_P (x))
18941 /* We need to handle PRE_INC and PRE_DEC here, since we need to
18942 know the width from the mode. */
18943 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
18944 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
18945 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
18946 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
18947 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
18948 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
18949 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
18950 output_address (XEXP (XEXP (x, 0), 1));
18951 else
18952 output_address (XEXP (x, 0));
18954 else
18956 if (toc_relative_expr_p (x, false))
18957 /* This hack along with a corresponding hack in
18958 rs6000_output_addr_const_extra arranges to output addends
18959 where the assembler expects to find them. eg.
18960 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
18961 without this hack would be output as "x@toc+4". We
18962 want "x+4@toc". */
18963 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
18964 else
18965 output_addr_const (file, x);
18967 return;
18969 case '&':
18970 if (const char *name = get_some_local_dynamic_name ())
18971 assemble_name (file, name);
18972 else
18973 output_operand_lossage ("'%%&' used without any "
18974 "local dynamic TLS references");
18975 return;
18977 default:
18978 output_operand_lossage ("invalid %%xn code");
18982 /* Print the address of an operand. */
18984 void
18985 print_operand_address (FILE *file, rtx x)
18987 if (REG_P (x))
18988 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
18989 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST
18990 || GET_CODE (x) == LABEL_REF)
18992 output_addr_const (file, x);
18993 if (small_data_operand (x, GET_MODE (x)))
18994 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
18995 reg_names[SMALL_DATA_REG]);
18996 else
18997 gcc_assert (!TARGET_TOC);
18999 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
19000 && REG_P (XEXP (x, 1)))
19002 if (REGNO (XEXP (x, 0)) == 0)
19003 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
19004 reg_names[ REGNO (XEXP (x, 0)) ]);
19005 else
19006 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
19007 reg_names[ REGNO (XEXP (x, 1)) ]);
19009 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
19010 && GET_CODE (XEXP (x, 1)) == CONST_INT)
19011 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
19012 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
19013 #if TARGET_MACHO
19014 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
19015 && CONSTANT_P (XEXP (x, 1)))
19017 fprintf (file, "lo16(");
19018 output_addr_const (file, XEXP (x, 1));
19019 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
19021 #endif
19022 #if TARGET_ELF
19023 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
19024 && CONSTANT_P (XEXP (x, 1)))
19026 output_addr_const (file, XEXP (x, 1));
19027 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
19029 #endif
19030 else if (toc_relative_expr_p (x, false))
19032 /* This hack along with a corresponding hack in
19033 rs6000_output_addr_const_extra arranges to output addends
19034 where the assembler expects to find them. eg.
19035 (lo_sum (reg 9)
19036 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
19037 without this hack would be output as "x@toc+8@l(9)". We
19038 want "x+8@toc@l(9)". */
19039 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
19040 if (GET_CODE (x) == LO_SUM)
19041 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
19042 else
19043 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base, 0, 1))]);
19045 else
19046 gcc_unreachable ();
19049 /* Implement TARGET_OUTPUT_ADDR_CONST_EXTRA. */
19051 static bool
19052 rs6000_output_addr_const_extra (FILE *file, rtx x)
19054 if (GET_CODE (x) == UNSPEC)
19055 switch (XINT (x, 1))
19057 case UNSPEC_TOCREL:
19058 gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF
19059 && REG_P (XVECEXP (x, 0, 1))
19060 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
19061 output_addr_const (file, XVECEXP (x, 0, 0));
19062 if (x == tocrel_base && tocrel_offset != const0_rtx)
19064 if (INTVAL (tocrel_offset) >= 0)
19065 fprintf (file, "+");
19066 output_addr_const (file, CONST_CAST_RTX (tocrel_offset));
19068 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
19070 putc ('-', file);
19071 assemble_name (file, toc_label_name);
19073 else if (TARGET_ELF)
19074 fputs ("@toc", file);
19075 return true;
19077 #if TARGET_MACHO
19078 case UNSPEC_MACHOPIC_OFFSET:
19079 output_addr_const (file, XVECEXP (x, 0, 0));
19080 putc ('-', file);
19081 machopic_output_function_base_name (file);
19082 return true;
19083 #endif
19085 return false;
19088 /* Target hook for assembling integer objects. The PowerPC version has
19089 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
19090 is defined. It also needs to handle DI-mode objects on 64-bit
19091 targets. */
19093 static bool
19094 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
19096 #ifdef RELOCATABLE_NEEDS_FIXUP
19097 /* Special handling for SI values. */
19098 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
19100 static int recurse = 0;
19102 /* For -mrelocatable, we mark all addresses that need to be fixed up in
19103 the .fixup section. Since the TOC section is already relocated, we
19104 don't need to mark it here. We used to skip the text section, but it
19105 should never be valid for relocated addresses to be placed in the text
19106 section. */
19107 if (TARGET_RELOCATABLE
19108 && in_section != toc_section
19109 && !recurse
19110 && !CONST_SCALAR_INT_P (x)
19111 && CONSTANT_P (x))
19113 char buf[256];
19115 recurse = 1;
19116 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
19117 fixuplabelno++;
19118 ASM_OUTPUT_LABEL (asm_out_file, buf);
19119 fprintf (asm_out_file, "\t.long\t(");
19120 output_addr_const (asm_out_file, x);
19121 fprintf (asm_out_file, ")@fixup\n");
19122 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
19123 ASM_OUTPUT_ALIGN (asm_out_file, 2);
19124 fprintf (asm_out_file, "\t.long\t");
19125 assemble_name (asm_out_file, buf);
19126 fprintf (asm_out_file, "\n\t.previous\n");
19127 recurse = 0;
19128 return true;
19130 /* Remove initial .'s to turn a -mcall-aixdesc function
19131 address into the address of the descriptor, not the function
19132 itself. */
19133 else if (GET_CODE (x) == SYMBOL_REF
19134 && XSTR (x, 0)[0] == '.'
19135 && DEFAULT_ABI == ABI_AIX)
19137 const char *name = XSTR (x, 0);
19138 while (*name == '.')
19139 name++;
19141 fprintf (asm_out_file, "\t.long\t%s\n", name);
19142 return true;
19145 #endif /* RELOCATABLE_NEEDS_FIXUP */
19146 return default_assemble_integer (x, size, aligned_p);
19149 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
19150 /* Emit an assembler directive to set symbol visibility for DECL to
19151 VISIBILITY_TYPE. */
19153 static void
19154 rs6000_assemble_visibility (tree decl, int vis)
19156 if (TARGET_XCOFF)
19157 return;
19159 /* Functions need to have their entry point symbol visibility set as
19160 well as their descriptor symbol visibility. */
19161 if (DEFAULT_ABI == ABI_AIX
19162 && DOT_SYMBOLS
19163 && TREE_CODE (decl) == FUNCTION_DECL)
19165 static const char * const visibility_types[] = {
19166 NULL, "internal", "hidden", "protected"
19169 const char *name, *type;
19171 name = ((* targetm.strip_name_encoding)
19172 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
19173 type = visibility_types[vis];
19175 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
19176 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
19178 else
19179 default_assemble_visibility (decl, vis);
19181 #endif
19183 enum rtx_code
19184 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
19186 /* Reversal of FP compares takes care -- an ordered compare
19187 becomes an unordered compare and vice versa. */
19188 if (mode == CCFPmode
19189 && (!flag_finite_math_only
19190 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
19191 || code == UNEQ || code == LTGT))
19192 return reverse_condition_maybe_unordered (code);
19193 else
19194 return reverse_condition (code);
19197 /* Generate a compare for CODE. Return a brand-new rtx that
19198 represents the result of the compare. */
19200 static rtx
19201 rs6000_generate_compare (rtx cmp, machine_mode mode)
19203 machine_mode comp_mode;
19204 rtx compare_result;
19205 enum rtx_code code = GET_CODE (cmp);
19206 rtx op0 = XEXP (cmp, 0);
19207 rtx op1 = XEXP (cmp, 1);
19209 if (FLOAT_MODE_P (mode))
19210 comp_mode = CCFPmode;
19211 else if (code == GTU || code == LTU
19212 || code == GEU || code == LEU)
19213 comp_mode = CCUNSmode;
19214 else if ((code == EQ || code == NE)
19215 && unsigned_reg_p (op0)
19216 && (unsigned_reg_p (op1)
19217 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
19218 /* These are unsigned values, perhaps there will be a later
19219 ordering compare that can be shared with this one. */
19220 comp_mode = CCUNSmode;
19221 else
19222 comp_mode = CCmode;
19224 /* If we have an unsigned compare, make sure we don't have a signed value as
19225 an immediate. */
19226 if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT
19227 && INTVAL (op1) < 0)
19229 op0 = copy_rtx_if_shared (op0);
19230 op1 = force_reg (GET_MODE (op0), op1);
19231 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
19234 /* First, the compare. */
19235 compare_result = gen_reg_rtx (comp_mode);
19237 /* E500 FP compare instructions on the GPRs. Yuck! */
19238 if ((!TARGET_FPRS && TARGET_HARD_FLOAT)
19239 && FLOAT_MODE_P (mode))
19241 rtx cmp, or_result, compare_result2;
19242 machine_mode op_mode = GET_MODE (op0);
19243 bool reverse_p;
19245 if (op_mode == VOIDmode)
19246 op_mode = GET_MODE (op1);
19248 /* First reverse the condition codes that aren't directly supported. */
19249 switch (code)
19251 case NE:
19252 case UNLT:
19253 case UNLE:
19254 case UNGT:
19255 case UNGE:
19256 code = reverse_condition_maybe_unordered (code);
19257 reverse_p = true;
19258 break;
19260 case EQ:
19261 case LT:
19262 case LE:
19263 case GT:
19264 case GE:
19265 reverse_p = false;
19266 break;
19268 default:
19269 gcc_unreachable ();
19272 /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only.
19273 This explains the following mess. */
19275 switch (code)
19277 case EQ:
19278 switch (op_mode)
19280 case SFmode:
19281 cmp = (flag_finite_math_only && !flag_trapping_math)
19282 ? gen_tstsfeq_gpr (compare_result, op0, op1)
19283 : gen_cmpsfeq_gpr (compare_result, op0, op1);
19284 break;
19286 case DFmode:
19287 cmp = (flag_finite_math_only && !flag_trapping_math)
19288 ? gen_tstdfeq_gpr (compare_result, op0, op1)
19289 : gen_cmpdfeq_gpr (compare_result, op0, op1);
19290 break;
19292 case TFmode:
19293 cmp = (flag_finite_math_only && !flag_trapping_math)
19294 ? gen_tsttfeq_gpr (compare_result, op0, op1)
19295 : gen_cmptfeq_gpr (compare_result, op0, op1);
19296 break;
19298 default:
19299 gcc_unreachable ();
19301 break;
19303 case GT:
19304 case GE:
19305 switch (op_mode)
19307 case SFmode:
19308 cmp = (flag_finite_math_only && !flag_trapping_math)
19309 ? gen_tstsfgt_gpr (compare_result, op0, op1)
19310 : gen_cmpsfgt_gpr (compare_result, op0, op1);
19311 break;
19313 case DFmode:
19314 cmp = (flag_finite_math_only && !flag_trapping_math)
19315 ? gen_tstdfgt_gpr (compare_result, op0, op1)
19316 : gen_cmpdfgt_gpr (compare_result, op0, op1);
19317 break;
19319 case TFmode:
19320 cmp = (flag_finite_math_only && !flag_trapping_math)
19321 ? gen_tsttfgt_gpr (compare_result, op0, op1)
19322 : gen_cmptfgt_gpr (compare_result, op0, op1);
19323 break;
19325 default:
19326 gcc_unreachable ();
19328 break;
19330 case LT:
19331 case LE:
19332 switch (op_mode)
19334 case SFmode:
19335 cmp = (flag_finite_math_only && !flag_trapping_math)
19336 ? gen_tstsflt_gpr (compare_result, op0, op1)
19337 : gen_cmpsflt_gpr (compare_result, op0, op1);
19338 break;
19340 case DFmode:
19341 cmp = (flag_finite_math_only && !flag_trapping_math)
19342 ? gen_tstdflt_gpr (compare_result, op0, op1)
19343 : gen_cmpdflt_gpr (compare_result, op0, op1);
19344 break;
19346 case TFmode:
19347 cmp = (flag_finite_math_only && !flag_trapping_math)
19348 ? gen_tsttflt_gpr (compare_result, op0, op1)
19349 : gen_cmptflt_gpr (compare_result, op0, op1);
19350 break;
19352 default:
19353 gcc_unreachable ();
19355 break;
19357 default:
19358 gcc_unreachable ();
19361 /* Synthesize LE and GE from LT/GT || EQ. */
19362 if (code == LE || code == GE)
19364 emit_insn (cmp);
19366 compare_result2 = gen_reg_rtx (CCFPmode);
19368 /* Do the EQ. */
19369 switch (op_mode)
19371 case SFmode:
19372 cmp = (flag_finite_math_only && !flag_trapping_math)
19373 ? gen_tstsfeq_gpr (compare_result2, op0, op1)
19374 : gen_cmpsfeq_gpr (compare_result2, op0, op1);
19375 break;
19377 case DFmode:
19378 cmp = (flag_finite_math_only && !flag_trapping_math)
19379 ? gen_tstdfeq_gpr (compare_result2, op0, op1)
19380 : gen_cmpdfeq_gpr (compare_result2, op0, op1);
19381 break;
19383 case TFmode:
19384 cmp = (flag_finite_math_only && !flag_trapping_math)
19385 ? gen_tsttfeq_gpr (compare_result2, op0, op1)
19386 : gen_cmptfeq_gpr (compare_result2, op0, op1);
19387 break;
19389 default:
19390 gcc_unreachable ();
19393 emit_insn (cmp);
19395 /* OR them together. */
19396 or_result = gen_reg_rtx (CCFPmode);
19397 cmp = gen_e500_cr_ior_compare (or_result, compare_result,
19398 compare_result2);
19399 compare_result = or_result;
19402 code = reverse_p ? NE : EQ;
19404 emit_insn (cmp);
19406 else
19408 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
19409 CLOBBERs to match cmptf_internal2 pattern. */
19410 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
19411 && GET_MODE (op0) == TFmode
19412 && !TARGET_IEEEQUAD
19413 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128)
19414 emit_insn (gen_rtx_PARALLEL (VOIDmode,
19415 gen_rtvec (10,
19416 gen_rtx_SET (VOIDmode,
19417 compare_result,
19418 gen_rtx_COMPARE (comp_mode, op0, op1)),
19419 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19420 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19421 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19422 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19423 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19424 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19425 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19426 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19427 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
19428 else if (GET_CODE (op1) == UNSPEC
19429 && XINT (op1, 1) == UNSPEC_SP_TEST)
19431 rtx op1b = XVECEXP (op1, 0, 0);
19432 comp_mode = CCEQmode;
19433 compare_result = gen_reg_rtx (CCEQmode);
19434 if (TARGET_64BIT)
19435 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
19436 else
19437 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
19439 else
19440 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
19441 gen_rtx_COMPARE (comp_mode, op0, op1)));
19444 /* Some kinds of FP comparisons need an OR operation;
19445 under flag_finite_math_only we don't bother. */
19446 if (FLOAT_MODE_P (mode)
19447 && !flag_finite_math_only
19448 && !(TARGET_HARD_FLOAT && !TARGET_FPRS)
19449 && (code == LE || code == GE
19450 || code == UNEQ || code == LTGT
19451 || code == UNGT || code == UNLT))
19453 enum rtx_code or1, or2;
19454 rtx or1_rtx, or2_rtx, compare2_rtx;
19455 rtx or_result = gen_reg_rtx (CCEQmode);
19457 switch (code)
19459 case LE: or1 = LT; or2 = EQ; break;
19460 case GE: or1 = GT; or2 = EQ; break;
19461 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
19462 case LTGT: or1 = LT; or2 = GT; break;
19463 case UNGT: or1 = UNORDERED; or2 = GT; break;
19464 case UNLT: or1 = UNORDERED; or2 = LT; break;
19465 default: gcc_unreachable ();
19467 validate_condition_mode (or1, comp_mode);
19468 validate_condition_mode (or2, comp_mode);
19469 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
19470 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
19471 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
19472 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
19473 const_true_rtx);
19474 emit_insn (gen_rtx_SET (VOIDmode, or_result, compare2_rtx));
19476 compare_result = or_result;
19477 code = EQ;
19480 validate_condition_mode (code, GET_MODE (compare_result));
19482 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
19486 /* Emit the RTL for an sISEL pattern. */
19488 void
19489 rs6000_emit_sISEL (machine_mode mode ATTRIBUTE_UNUSED, rtx operands[])
19491 rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx);
19494 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
19495 can be used as that dest register. Return the dest register. */
19498 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
19500 if (op2 == const0_rtx)
19501 return op1;
19503 if (GET_CODE (scratch) == SCRATCH)
19504 scratch = gen_reg_rtx (mode);
19506 if (logical_operand (op2, mode))
19507 emit_insn (gen_rtx_SET (VOIDmode, scratch, gen_rtx_XOR (mode, op1, op2)));
19508 else
19509 emit_insn (gen_rtx_SET (VOIDmode, scratch,
19510 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
19512 return scratch;
19515 void
19516 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
19518 rtx condition_rtx;
19519 machine_mode op_mode;
19520 enum rtx_code cond_code;
19521 rtx result = operands[0];
19523 condition_rtx = rs6000_generate_compare (operands[1], mode);
19524 cond_code = GET_CODE (condition_rtx);
19526 if (FLOAT_MODE_P (mode)
19527 && !TARGET_FPRS && TARGET_HARD_FLOAT)
19529 rtx t;
19531 PUT_MODE (condition_rtx, SImode);
19532 t = XEXP (condition_rtx, 0);
19534 gcc_assert (cond_code == NE || cond_code == EQ);
19536 if (cond_code == NE)
19537 emit_insn (gen_e500_flip_gt_bit (t, t));
19539 emit_insn (gen_move_from_CR_gt_bit (result, t));
19540 return;
19543 if (cond_code == NE
19544 || cond_code == GE || cond_code == LE
19545 || cond_code == GEU || cond_code == LEU
19546 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
19548 rtx not_result = gen_reg_rtx (CCEQmode);
19549 rtx not_op, rev_cond_rtx;
19550 machine_mode cc_mode;
19552 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
19554 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
19555 SImode, XEXP (condition_rtx, 0), const0_rtx);
19556 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
19557 emit_insn (gen_rtx_SET (VOIDmode, not_result, not_op));
19558 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
19561 op_mode = GET_MODE (XEXP (operands[1], 0));
19562 if (op_mode == VOIDmode)
19563 op_mode = GET_MODE (XEXP (operands[1], 1));
19565 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
19567 PUT_MODE (condition_rtx, DImode);
19568 convert_move (result, condition_rtx, 0);
19570 else
19572 PUT_MODE (condition_rtx, SImode);
19573 emit_insn (gen_rtx_SET (VOIDmode, result, condition_rtx));
19577 /* Emit a branch of kind CODE to location LOC. */
19579 void
19580 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
19582 rtx condition_rtx, loc_ref;
19584 condition_rtx = rs6000_generate_compare (operands[0], mode);
19585 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
19586 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
19587 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
19588 loc_ref, pc_rtx)));
19591 /* Return the string to output a conditional branch to LABEL, which is
19592 the operand template of the label, or NULL if the branch is really a
19593 conditional return.
19595 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
19596 condition code register and its mode specifies what kind of
19597 comparison we made.
19599 REVERSED is nonzero if we should reverse the sense of the comparison.
19601 INSN is the insn. */
19603 char *
19604 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
19606 static char string[64];
19607 enum rtx_code code = GET_CODE (op);
19608 rtx cc_reg = XEXP (op, 0);
19609 machine_mode mode = GET_MODE (cc_reg);
19610 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
19611 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
19612 int really_reversed = reversed ^ need_longbranch;
19613 char *s = string;
19614 const char *ccode;
19615 const char *pred;
19616 rtx note;
19618 validate_condition_mode (code, mode);
19620 /* Work out which way this really branches. We could use
19621 reverse_condition_maybe_unordered here always but this
19622 makes the resulting assembler clearer. */
19623 if (really_reversed)
19625 /* Reversal of FP compares takes care -- an ordered compare
19626 becomes an unordered compare and vice versa. */
19627 if (mode == CCFPmode)
19628 code = reverse_condition_maybe_unordered (code);
19629 else
19630 code = reverse_condition (code);
19633 if ((!TARGET_FPRS && TARGET_HARD_FLOAT) && mode == CCFPmode)
19635 /* The efscmp/tst* instructions twiddle bit 2, which maps nicely
19636 to the GT bit. */
19637 switch (code)
19639 case EQ:
19640 /* Opposite of GT. */
19641 code = GT;
19642 break;
19644 case NE:
19645 code = UNLE;
19646 break;
19648 default:
19649 gcc_unreachable ();
19653 switch (code)
19655 /* Not all of these are actually distinct opcodes, but
19656 we distinguish them for clarity of the resulting assembler. */
19657 case NE: case LTGT:
19658 ccode = "ne"; break;
19659 case EQ: case UNEQ:
19660 ccode = "eq"; break;
19661 case GE: case GEU:
19662 ccode = "ge"; break;
19663 case GT: case GTU: case UNGT:
19664 ccode = "gt"; break;
19665 case LE: case LEU:
19666 ccode = "le"; break;
19667 case LT: case LTU: case UNLT:
19668 ccode = "lt"; break;
19669 case UNORDERED: ccode = "un"; break;
19670 case ORDERED: ccode = "nu"; break;
19671 case UNGE: ccode = "nl"; break;
19672 case UNLE: ccode = "ng"; break;
19673 default:
19674 gcc_unreachable ();
19677 /* Maybe we have a guess as to how likely the branch is. */
19678 pred = "";
19679 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
19680 if (note != NULL_RTX)
19682 /* PROB is the difference from 50%. */
19683 int prob = XINT (note, 0) - REG_BR_PROB_BASE / 2;
19685 /* Only hint for highly probable/improbable branches on newer
19686 cpus as static prediction overrides processor dynamic
19687 prediction. For older cpus we may as well always hint, but
19688 assume not taken for branches that are very close to 50% as a
19689 mispredicted taken branch is more expensive than a
19690 mispredicted not-taken branch. */
19691 if (rs6000_always_hint
19692 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
19693 && br_prob_note_reliable_p (note)))
19695 if (abs (prob) > REG_BR_PROB_BASE / 20
19696 && ((prob > 0) ^ need_longbranch))
19697 pred = "+";
19698 else
19699 pred = "-";
19703 if (label == NULL)
19704 s += sprintf (s, "b%slr%s ", ccode, pred);
19705 else
19706 s += sprintf (s, "b%s%s ", ccode, pred);
19708 /* We need to escape any '%' characters in the reg_names string.
19709 Assume they'd only be the first character.... */
19710 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
19711 *s++ = '%';
19712 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
19714 if (label != NULL)
19716 /* If the branch distance was too far, we may have to use an
19717 unconditional branch to go the distance. */
19718 if (need_longbranch)
19719 s += sprintf (s, ",$+8\n\tb %s", label);
19720 else
19721 s += sprintf (s, ",%s", label);
19724 return string;
19727 /* Return the string to flip the GT bit on a CR. */
19728 char *
19729 output_e500_flip_gt_bit (rtx dst, rtx src)
19731 static char string[64];
19732 int a, b;
19734 gcc_assert (GET_CODE (dst) == REG && CR_REGNO_P (REGNO (dst))
19735 && GET_CODE (src) == REG && CR_REGNO_P (REGNO (src)));
19737 /* GT bit. */
19738 a = 4 * (REGNO (dst) - CR0_REGNO) + 1;
19739 b = 4 * (REGNO (src) - CR0_REGNO) + 1;
19741 sprintf (string, "crnot %d,%d", a, b);
19742 return string;
19745 /* Return insn for VSX or Altivec comparisons. */
19747 static rtx
19748 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
19750 rtx mask;
19751 machine_mode mode = GET_MODE (op0);
19753 switch (code)
19755 default:
19756 break;
19758 case GE:
19759 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19760 return NULL_RTX;
19762 case EQ:
19763 case GT:
19764 case GTU:
19765 case ORDERED:
19766 case UNORDERED:
19767 case UNEQ:
19768 case LTGT:
19769 mask = gen_reg_rtx (mode);
19770 emit_insn (gen_rtx_SET (VOIDmode,
19771 mask,
19772 gen_rtx_fmt_ee (code, mode, op0, op1)));
19773 return mask;
19776 return NULL_RTX;
19779 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
19780 DMODE is expected destination mode. This is a recursive function. */
19782 static rtx
19783 rs6000_emit_vector_compare (enum rtx_code rcode,
19784 rtx op0, rtx op1,
19785 machine_mode dmode)
19787 rtx mask;
19788 bool swap_operands = false;
19789 bool try_again = false;
19791 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
19792 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
19794 /* See if the comparison works as is. */
19795 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
19796 if (mask)
19797 return mask;
19799 switch (rcode)
19801 case LT:
19802 rcode = GT;
19803 swap_operands = true;
19804 try_again = true;
19805 break;
19806 case LTU:
19807 rcode = GTU;
19808 swap_operands = true;
19809 try_again = true;
19810 break;
19811 case NE:
19812 case UNLE:
19813 case UNLT:
19814 case UNGE:
19815 case UNGT:
19816 /* Invert condition and try again.
19817 e.g., A != B becomes ~(A==B). */
19819 enum rtx_code rev_code;
19820 enum insn_code nor_code;
19821 rtx mask2;
19823 rev_code = reverse_condition_maybe_unordered (rcode);
19824 if (rev_code == UNKNOWN)
19825 return NULL_RTX;
19827 nor_code = optab_handler (one_cmpl_optab, dmode);
19828 if (nor_code == CODE_FOR_nothing)
19829 return NULL_RTX;
19831 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
19832 if (!mask2)
19833 return NULL_RTX;
19835 mask = gen_reg_rtx (dmode);
19836 emit_insn (GEN_FCN (nor_code) (mask, mask2));
19837 return mask;
19839 break;
19840 case GE:
19841 case GEU:
19842 case LE:
19843 case LEU:
19844 /* Try GT/GTU/LT/LTU OR EQ */
19846 rtx c_rtx, eq_rtx;
19847 enum insn_code ior_code;
19848 enum rtx_code new_code;
19850 switch (rcode)
19852 case GE:
19853 new_code = GT;
19854 break;
19856 case GEU:
19857 new_code = GTU;
19858 break;
19860 case LE:
19861 new_code = LT;
19862 break;
19864 case LEU:
19865 new_code = LTU;
19866 break;
19868 default:
19869 gcc_unreachable ();
19872 ior_code = optab_handler (ior_optab, dmode);
19873 if (ior_code == CODE_FOR_nothing)
19874 return NULL_RTX;
19876 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
19877 if (!c_rtx)
19878 return NULL_RTX;
19880 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
19881 if (!eq_rtx)
19882 return NULL_RTX;
19884 mask = gen_reg_rtx (dmode);
19885 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
19886 return mask;
19888 break;
19889 default:
19890 return NULL_RTX;
19893 if (try_again)
19895 if (swap_operands)
19896 std::swap (op0, op1);
19898 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
19899 if (mask)
19900 return mask;
19903 /* You only get two chances. */
19904 return NULL_RTX;
19907 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
19908 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
19909 operands for the relation operation COND. */
19912 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
19913 rtx cond, rtx cc_op0, rtx cc_op1)
19915 machine_mode dest_mode = GET_MODE (dest);
19916 machine_mode mask_mode = GET_MODE (cc_op0);
19917 enum rtx_code rcode = GET_CODE (cond);
19918 machine_mode cc_mode = CCmode;
19919 rtx mask;
19920 rtx cond2;
19921 rtx tmp;
19922 bool invert_move = false;
19924 if (VECTOR_UNIT_NONE_P (dest_mode))
19925 return 0;
19927 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
19928 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
19930 switch (rcode)
19932 /* Swap operands if we can, and fall back to doing the operation as
19933 specified, and doing a NOR to invert the test. */
19934 case NE:
19935 case UNLE:
19936 case UNLT:
19937 case UNGE:
19938 case UNGT:
19939 /* Invert condition and try again.
19940 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
19941 invert_move = true;
19942 rcode = reverse_condition_maybe_unordered (rcode);
19943 if (rcode == UNKNOWN)
19944 return 0;
19945 break;
19947 /* Mark unsigned tests with CCUNSmode. */
19948 case GTU:
19949 case GEU:
19950 case LTU:
19951 case LEU:
19952 cc_mode = CCUNSmode;
19953 break;
19955 default:
19956 break;
19959 /* Get the vector mask for the given relational operations. */
19960 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
19962 if (!mask)
19963 return 0;
19965 if (invert_move)
19967 tmp = op_true;
19968 op_true = op_false;
19969 op_false = tmp;
19972 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
19973 CONST0_RTX (dest_mode));
19974 emit_insn (gen_rtx_SET (VOIDmode,
19975 dest,
19976 gen_rtx_IF_THEN_ELSE (dest_mode,
19977 cond2,
19978 op_true,
19979 op_false)));
19980 return 1;
19983 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
19984 operands of the last comparison is nonzero/true, FALSE_COND if it
19985 is zero/false. Return 0 if the hardware has no such operation. */
19988 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
19990 enum rtx_code code = GET_CODE (op);
19991 rtx op0 = XEXP (op, 0);
19992 rtx op1 = XEXP (op, 1);
19993 REAL_VALUE_TYPE c1;
19994 machine_mode compare_mode = GET_MODE (op0);
19995 machine_mode result_mode = GET_MODE (dest);
19996 rtx temp;
19997 bool is_against_zero;
19999 /* These modes should always match. */
20000 if (GET_MODE (op1) != compare_mode
20001 /* In the isel case however, we can use a compare immediate, so
20002 op1 may be a small constant. */
20003 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
20004 return 0;
20005 if (GET_MODE (true_cond) != result_mode)
20006 return 0;
20007 if (GET_MODE (false_cond) != result_mode)
20008 return 0;
20010 /* Don't allow using floating point comparisons for integer results for
20011 now. */
20012 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
20013 return 0;
20015 /* First, work out if the hardware can do this at all, or
20016 if it's too slow.... */
20017 if (!FLOAT_MODE_P (compare_mode))
20019 if (TARGET_ISEL)
20020 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
20021 return 0;
20023 else if (TARGET_HARD_FLOAT && !TARGET_FPRS
20024 && SCALAR_FLOAT_MODE_P (compare_mode))
20025 return 0;
20027 is_against_zero = op1 == CONST0_RTX (compare_mode);
20029 /* A floating-point subtract might overflow, underflow, or produce
20030 an inexact result, thus changing the floating-point flags, so it
20031 can't be generated if we care about that. It's safe if one side
20032 of the construct is zero, since then no subtract will be
20033 generated. */
20034 if (SCALAR_FLOAT_MODE_P (compare_mode)
20035 && flag_trapping_math && ! is_against_zero)
20036 return 0;
20038 /* Eliminate half of the comparisons by switching operands, this
20039 makes the remaining code simpler. */
20040 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
20041 || code == LTGT || code == LT || code == UNLE)
20043 code = reverse_condition_maybe_unordered (code);
20044 temp = true_cond;
20045 true_cond = false_cond;
20046 false_cond = temp;
20049 /* UNEQ and LTGT take four instructions for a comparison with zero,
20050 it'll probably be faster to use a branch here too. */
20051 if (code == UNEQ && HONOR_NANS (compare_mode))
20052 return 0;
20054 if (GET_CODE (op1) == CONST_DOUBLE)
20055 REAL_VALUE_FROM_CONST_DOUBLE (c1, op1);
20057 /* We're going to try to implement comparisons by performing
20058 a subtract, then comparing against zero. Unfortunately,
20059 Inf - Inf is NaN which is not zero, and so if we don't
20060 know that the operand is finite and the comparison
20061 would treat EQ different to UNORDERED, we can't do it. */
20062 if (HONOR_INFINITIES (compare_mode)
20063 && code != GT && code != UNGE
20064 && (GET_CODE (op1) != CONST_DOUBLE || real_isinf (&c1))
20065 /* Constructs of the form (a OP b ? a : b) are safe. */
20066 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
20067 || (! rtx_equal_p (op0, true_cond)
20068 && ! rtx_equal_p (op1, true_cond))))
20069 return 0;
20071 /* At this point we know we can use fsel. */
20073 /* Reduce the comparison to a comparison against zero. */
20074 if (! is_against_zero)
20076 temp = gen_reg_rtx (compare_mode);
20077 emit_insn (gen_rtx_SET (VOIDmode, temp,
20078 gen_rtx_MINUS (compare_mode, op0, op1)));
20079 op0 = temp;
20080 op1 = CONST0_RTX (compare_mode);
20083 /* If we don't care about NaNs we can reduce some of the comparisons
20084 down to faster ones. */
20085 if (! HONOR_NANS (compare_mode))
20086 switch (code)
20088 case GT:
20089 code = LE;
20090 temp = true_cond;
20091 true_cond = false_cond;
20092 false_cond = temp;
20093 break;
20094 case UNGE:
20095 code = GE;
20096 break;
20097 case UNEQ:
20098 code = EQ;
20099 break;
20100 default:
20101 break;
20104 /* Now, reduce everything down to a GE. */
20105 switch (code)
20107 case GE:
20108 break;
20110 case LE:
20111 temp = gen_reg_rtx (compare_mode);
20112 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_NEG (compare_mode, op0)));
20113 op0 = temp;
20114 break;
20116 case ORDERED:
20117 temp = gen_reg_rtx (compare_mode);
20118 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_ABS (compare_mode, op0)));
20119 op0 = temp;
20120 break;
20122 case EQ:
20123 temp = gen_reg_rtx (compare_mode);
20124 emit_insn (gen_rtx_SET (VOIDmode, temp,
20125 gen_rtx_NEG (compare_mode,
20126 gen_rtx_ABS (compare_mode, op0))));
20127 op0 = temp;
20128 break;
20130 case UNGE:
20131 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
20132 temp = gen_reg_rtx (result_mode);
20133 emit_insn (gen_rtx_SET (VOIDmode, temp,
20134 gen_rtx_IF_THEN_ELSE (result_mode,
20135 gen_rtx_GE (VOIDmode,
20136 op0, op1),
20137 true_cond, false_cond)));
20138 false_cond = true_cond;
20139 true_cond = temp;
20141 temp = gen_reg_rtx (compare_mode);
20142 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_NEG (compare_mode, op0)));
20143 op0 = temp;
20144 break;
20146 case GT:
20147 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
20148 temp = gen_reg_rtx (result_mode);
20149 emit_insn (gen_rtx_SET (VOIDmode, temp,
20150 gen_rtx_IF_THEN_ELSE (result_mode,
20151 gen_rtx_GE (VOIDmode,
20152 op0, op1),
20153 true_cond, false_cond)));
20154 true_cond = false_cond;
20155 false_cond = temp;
20157 temp = gen_reg_rtx (compare_mode);
20158 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_NEG (compare_mode, op0)));
20159 op0 = temp;
20160 break;
20162 default:
20163 gcc_unreachable ();
20166 emit_insn (gen_rtx_SET (VOIDmode, dest,
20167 gen_rtx_IF_THEN_ELSE (result_mode,
20168 gen_rtx_GE (VOIDmode,
20169 op0, op1),
20170 true_cond, false_cond)));
20171 return 1;
20174 /* Same as above, but for ints (isel). */
20176 static int
20177 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
20179 rtx condition_rtx, cr;
20180 machine_mode mode = GET_MODE (dest);
20181 enum rtx_code cond_code;
20182 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
20183 bool signedp;
20185 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
20186 return 0;
20188 /* We still have to do the compare, because isel doesn't do a
20189 compare, it just looks at the CRx bits set by a previous compare
20190 instruction. */
20191 condition_rtx = rs6000_generate_compare (op, mode);
20192 cond_code = GET_CODE (condition_rtx);
20193 cr = XEXP (condition_rtx, 0);
20194 signedp = GET_MODE (cr) == CCmode;
20196 isel_func = (mode == SImode
20197 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
20198 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
20200 switch (cond_code)
20202 case LT: case GT: case LTU: case GTU: case EQ:
20203 /* isel handles these directly. */
20204 break;
20206 default:
20207 /* We need to swap the sense of the comparison. */
20209 std::swap (false_cond, true_cond);
20210 PUT_CODE (condition_rtx, reverse_condition (cond_code));
20212 break;
20215 false_cond = force_reg (mode, false_cond);
20216 if (true_cond != const0_rtx)
20217 true_cond = force_reg (mode, true_cond);
20219 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
20221 return 1;
20224 const char *
20225 output_isel (rtx *operands)
20227 enum rtx_code code;
20229 code = GET_CODE (operands[1]);
20231 if (code == GE || code == GEU || code == LE || code == LEU || code == NE)
20233 gcc_assert (GET_CODE (operands[2]) == REG
20234 && GET_CODE (operands[3]) == REG);
20235 PUT_CODE (operands[1], reverse_condition (code));
20236 return "isel %0,%3,%2,%j1";
20239 return "isel %0,%2,%3,%j1";
20242 void
20243 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20245 machine_mode mode = GET_MODE (op0);
20246 enum rtx_code c;
20247 rtx target;
20249 /* VSX/altivec have direct min/max insns. */
20250 if ((code == SMAX || code == SMIN)
20251 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
20252 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
20254 emit_insn (gen_rtx_SET (VOIDmode,
20255 dest,
20256 gen_rtx_fmt_ee (code, mode, op0, op1)));
20257 return;
20260 if (code == SMAX || code == SMIN)
20261 c = GE;
20262 else
20263 c = GEU;
20265 if (code == SMAX || code == UMAX)
20266 target = emit_conditional_move (dest, c, op0, op1, mode,
20267 op0, op1, mode, 0);
20268 else
20269 target = emit_conditional_move (dest, c, op0, op1, mode,
20270 op1, op0, mode, 0);
20271 gcc_assert (target);
20272 if (target != dest)
20273 emit_move_insn (dest, target);
20276 /* A subroutine of the atomic operation splitters. Jump to LABEL if
20277 COND is true. Mark the jump as unlikely to be taken. */
20279 static void
20280 emit_unlikely_jump (rtx cond, rtx label)
20282 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
20283 rtx x;
20285 x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
20286 x = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, x));
20287 add_int_reg_note (x, REG_BR_PROB, very_unlikely);
20290 /* A subroutine of the atomic operation splitters. Emit a load-locked
20291 instruction in MODE. For QI/HImode, possibly use a pattern than includes
20292 the zero_extend operation. */
20294 static void
20295 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
20297 rtx (*fn) (rtx, rtx) = NULL;
20299 switch (mode)
20301 case QImode:
20302 fn = gen_load_lockedqi;
20303 break;
20304 case HImode:
20305 fn = gen_load_lockedhi;
20306 break;
20307 case SImode:
20308 if (GET_MODE (mem) == QImode)
20309 fn = gen_load_lockedqi_si;
20310 else if (GET_MODE (mem) == HImode)
20311 fn = gen_load_lockedhi_si;
20312 else
20313 fn = gen_load_lockedsi;
20314 break;
20315 case DImode:
20316 fn = gen_load_lockeddi;
20317 break;
20318 case TImode:
20319 fn = gen_load_lockedti;
20320 break;
20321 default:
20322 gcc_unreachable ();
20324 emit_insn (fn (reg, mem));
20327 /* A subroutine of the atomic operation splitters. Emit a store-conditional
20328 instruction in MODE. */
20330 static void
20331 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
20333 rtx (*fn) (rtx, rtx, rtx) = NULL;
20335 switch (mode)
20337 case QImode:
20338 fn = gen_store_conditionalqi;
20339 break;
20340 case HImode:
20341 fn = gen_store_conditionalhi;
20342 break;
20343 case SImode:
20344 fn = gen_store_conditionalsi;
20345 break;
20346 case DImode:
20347 fn = gen_store_conditionaldi;
20348 break;
20349 case TImode:
20350 fn = gen_store_conditionalti;
20351 break;
20352 default:
20353 gcc_unreachable ();
20356 /* Emit sync before stwcx. to address PPC405 Erratum. */
20357 if (PPC405_ERRATUM77)
20358 emit_insn (gen_hwsync ());
20360 emit_insn (fn (res, mem, val));
20363 /* Expand barriers before and after a load_locked/store_cond sequence. */
20365 static rtx
20366 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
20368 rtx addr = XEXP (mem, 0);
20369 int strict_p = (reload_in_progress || reload_completed);
20371 if (!legitimate_indirect_address_p (addr, strict_p)
20372 && !legitimate_indexed_address_p (addr, strict_p))
20374 addr = force_reg (Pmode, addr);
20375 mem = replace_equiv_address_nv (mem, addr);
20378 switch (model)
20380 case MEMMODEL_RELAXED:
20381 case MEMMODEL_CONSUME:
20382 case MEMMODEL_ACQUIRE:
20383 break;
20384 case MEMMODEL_RELEASE:
20385 case MEMMODEL_ACQ_REL:
20386 emit_insn (gen_lwsync ());
20387 break;
20388 case MEMMODEL_SEQ_CST:
20389 emit_insn (gen_hwsync ());
20390 break;
20391 default:
20392 gcc_unreachable ();
20394 return mem;
20397 static void
20398 rs6000_post_atomic_barrier (enum memmodel model)
20400 switch (model)
20402 case MEMMODEL_RELAXED:
20403 case MEMMODEL_CONSUME:
20404 case MEMMODEL_RELEASE:
20405 break;
20406 case MEMMODEL_ACQUIRE:
20407 case MEMMODEL_ACQ_REL:
20408 case MEMMODEL_SEQ_CST:
20409 emit_insn (gen_isync ());
20410 break;
20411 default:
20412 gcc_unreachable ();
20416 /* A subroutine of the various atomic expanders. For sub-word operations,
20417 we must adjust things to operate on SImode. Given the original MEM,
20418 return a new aligned memory. Also build and return the quantities by
20419 which to shift and mask. */
20421 static rtx
20422 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
20424 rtx addr, align, shift, mask, mem;
20425 HOST_WIDE_INT shift_mask;
20426 machine_mode mode = GET_MODE (orig_mem);
20428 /* For smaller modes, we have to implement this via SImode. */
20429 shift_mask = (mode == QImode ? 0x18 : 0x10);
20431 addr = XEXP (orig_mem, 0);
20432 addr = force_reg (GET_MODE (addr), addr);
20434 /* Aligned memory containing subword. Generate a new memory. We
20435 do not want any of the existing MEM_ATTR data, as we're now
20436 accessing memory outside the original object. */
20437 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
20438 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20439 mem = gen_rtx_MEM (SImode, align);
20440 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
20441 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
20442 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
20444 /* Shift amount for subword relative to aligned word. */
20445 shift = gen_reg_rtx (SImode);
20446 addr = gen_lowpart (SImode, addr);
20447 emit_insn (gen_rlwinm (shift, addr, GEN_INT (3), GEN_INT (shift_mask)));
20448 if (BYTES_BIG_ENDIAN)
20449 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
20450 shift, 1, OPTAB_LIB_WIDEN);
20451 *pshift = shift;
20453 /* Mask for insertion. */
20454 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
20455 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
20456 *pmask = mask;
20458 return mem;
20461 /* A subroutine of the various atomic expanders. For sub-word operands,
20462 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
20464 static rtx
20465 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
20467 rtx x;
20469 x = gen_reg_rtx (SImode);
20470 emit_insn (gen_rtx_SET (VOIDmode, x,
20471 gen_rtx_AND (SImode,
20472 gen_rtx_NOT (SImode, mask),
20473 oldval)));
20475 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
20477 return x;
20480 /* A subroutine of the various atomic expanders. For sub-word operands,
20481 extract WIDE to NARROW via SHIFT. */
20483 static void
20484 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
20486 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
20487 wide, 1, OPTAB_LIB_WIDEN);
20488 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
20491 /* Expand an atomic compare and swap operation. */
20493 void
20494 rs6000_expand_atomic_compare_and_swap (rtx operands[])
20496 rtx boolval, retval, mem, oldval, newval, cond;
20497 rtx label1, label2, x, mask, shift;
20498 machine_mode mode, orig_mode;
20499 enum memmodel mod_s, mod_f;
20500 bool is_weak;
20502 boolval = operands[0];
20503 retval = operands[1];
20504 mem = operands[2];
20505 oldval = operands[3];
20506 newval = operands[4];
20507 is_weak = (INTVAL (operands[5]) != 0);
20508 mod_s = (enum memmodel) INTVAL (operands[6]);
20509 mod_f = (enum memmodel) INTVAL (operands[7]);
20510 orig_mode = mode = GET_MODE (mem);
20512 mask = shift = NULL_RTX;
20513 if (mode == QImode || mode == HImode)
20515 /* Before power8, we didn't have access to lbarx/lharx, so generate a
20516 lwarx and shift/mask operations. With power8, we need to do the
20517 comparison in SImode, but the store is still done in QI/HImode. */
20518 oldval = convert_modes (SImode, mode, oldval, 1);
20520 if (!TARGET_SYNC_HI_QI)
20522 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
20524 /* Shift and mask OLDVAL into position with the word. */
20525 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
20526 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20528 /* Shift and mask NEWVAL into position within the word. */
20529 newval = convert_modes (SImode, mode, newval, 1);
20530 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
20531 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20534 /* Prepare to adjust the return value. */
20535 retval = gen_reg_rtx (SImode);
20536 mode = SImode;
20538 else if (reg_overlap_mentioned_p (retval, oldval))
20539 oldval = copy_to_reg (oldval);
20541 mem = rs6000_pre_atomic_barrier (mem, mod_s);
20543 label1 = NULL_RTX;
20544 if (!is_weak)
20546 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
20547 emit_label (XEXP (label1, 0));
20549 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
20551 emit_load_locked (mode, retval, mem);
20553 x = retval;
20554 if (mask)
20556 x = expand_simple_binop (SImode, AND, retval, mask,
20557 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20560 cond = gen_reg_rtx (CCmode);
20561 /* If we have TImode, synthesize a comparison. */
20562 if (mode != TImode)
20563 x = gen_rtx_COMPARE (CCmode, x, oldval);
20564 else
20566 rtx xor1_result = gen_reg_rtx (DImode);
20567 rtx xor2_result = gen_reg_rtx (DImode);
20568 rtx or_result = gen_reg_rtx (DImode);
20569 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
20570 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
20571 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
20572 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
20574 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
20575 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
20576 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
20577 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
20580 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
20582 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20583 emit_unlikely_jump (x, label2);
20585 x = newval;
20586 if (mask)
20587 x = rs6000_mask_atomic_subword (retval, newval, mask);
20589 emit_store_conditional (orig_mode, cond, mem, x);
20591 if (!is_weak)
20593 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20594 emit_unlikely_jump (x, label1);
20597 if (mod_f != MEMMODEL_RELAXED)
20598 emit_label (XEXP (label2, 0));
20600 rs6000_post_atomic_barrier (mod_s);
20602 if (mod_f == MEMMODEL_RELAXED)
20603 emit_label (XEXP (label2, 0));
20605 if (shift)
20606 rs6000_finish_atomic_subword (operands[1], retval, shift);
20607 else if (mode != GET_MODE (operands[1]))
20608 convert_move (operands[1], retval, 1);
20610 /* In all cases, CR0 contains EQ on success, and NE on failure. */
20611 x = gen_rtx_EQ (SImode, cond, const0_rtx);
20612 emit_insn (gen_rtx_SET (VOIDmode, boolval, x));
20615 /* Expand an atomic exchange operation. */
20617 void
20618 rs6000_expand_atomic_exchange (rtx operands[])
20620 rtx retval, mem, val, cond;
20621 machine_mode mode;
20622 enum memmodel model;
20623 rtx label, x, mask, shift;
20625 retval = operands[0];
20626 mem = operands[1];
20627 val = operands[2];
20628 model = (enum memmodel) INTVAL (operands[3]);
20629 mode = GET_MODE (mem);
20631 mask = shift = NULL_RTX;
20632 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
20634 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
20636 /* Shift and mask VAL into position with the word. */
20637 val = convert_modes (SImode, mode, val, 1);
20638 val = expand_simple_binop (SImode, ASHIFT, val, shift,
20639 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20641 /* Prepare to adjust the return value. */
20642 retval = gen_reg_rtx (SImode);
20643 mode = SImode;
20646 mem = rs6000_pre_atomic_barrier (mem, model);
20648 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
20649 emit_label (XEXP (label, 0));
20651 emit_load_locked (mode, retval, mem);
20653 x = val;
20654 if (mask)
20655 x = rs6000_mask_atomic_subword (retval, val, mask);
20657 cond = gen_reg_rtx (CCmode);
20658 emit_store_conditional (mode, cond, mem, x);
20660 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20661 emit_unlikely_jump (x, label);
20663 rs6000_post_atomic_barrier (model);
20665 if (shift)
20666 rs6000_finish_atomic_subword (operands[0], retval, shift);
20669 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
20670 to perform. MEM is the memory on which to operate. VAL is the second
20671 operand of the binary operator. BEFORE and AFTER are optional locations to
20672 return the value of MEM either before of after the operation. MODEL_RTX
20673 is a CONST_INT containing the memory model to use. */
20675 void
20676 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
20677 rtx orig_before, rtx orig_after, rtx model_rtx)
20679 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
20680 machine_mode mode = GET_MODE (mem);
20681 machine_mode store_mode = mode;
20682 rtx label, x, cond, mask, shift;
20683 rtx before = orig_before, after = orig_after;
20685 mask = shift = NULL_RTX;
20686 /* On power8, we want to use SImode for the operation. On previous systems,
20687 use the operation in a subword and shift/mask to get the proper byte or
20688 halfword. */
20689 if (mode == QImode || mode == HImode)
20691 if (TARGET_SYNC_HI_QI)
20693 val = convert_modes (SImode, mode, val, 1);
20695 /* Prepare to adjust the return value. */
20696 before = gen_reg_rtx (SImode);
20697 if (after)
20698 after = gen_reg_rtx (SImode);
20699 mode = SImode;
20701 else
20703 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
20705 /* Shift and mask VAL into position with the word. */
20706 val = convert_modes (SImode, mode, val, 1);
20707 val = expand_simple_binop (SImode, ASHIFT, val, shift,
20708 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20710 switch (code)
20712 case IOR:
20713 case XOR:
20714 /* We've already zero-extended VAL. That is sufficient to
20715 make certain that it does not affect other bits. */
20716 mask = NULL;
20717 break;
20719 case AND:
20720 /* If we make certain that all of the other bits in VAL are
20721 set, that will be sufficient to not affect other bits. */
20722 x = gen_rtx_NOT (SImode, mask);
20723 x = gen_rtx_IOR (SImode, x, val);
20724 emit_insn (gen_rtx_SET (VOIDmode, val, x));
20725 mask = NULL;
20726 break;
20728 case NOT:
20729 case PLUS:
20730 case MINUS:
20731 /* These will all affect bits outside the field and need
20732 adjustment via MASK within the loop. */
20733 break;
20735 default:
20736 gcc_unreachable ();
20739 /* Prepare to adjust the return value. */
20740 before = gen_reg_rtx (SImode);
20741 if (after)
20742 after = gen_reg_rtx (SImode);
20743 store_mode = mode = SImode;
20747 mem = rs6000_pre_atomic_barrier (mem, model);
20749 label = gen_label_rtx ();
20750 emit_label (label);
20751 label = gen_rtx_LABEL_REF (VOIDmode, label);
20753 if (before == NULL_RTX)
20754 before = gen_reg_rtx (mode);
20756 emit_load_locked (mode, before, mem);
20758 if (code == NOT)
20760 x = expand_simple_binop (mode, AND, before, val,
20761 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20762 after = expand_simple_unop (mode, NOT, x, after, 1);
20764 else
20766 after = expand_simple_binop (mode, code, before, val,
20767 after, 1, OPTAB_LIB_WIDEN);
20770 x = after;
20771 if (mask)
20773 x = expand_simple_binop (SImode, AND, after, mask,
20774 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20775 x = rs6000_mask_atomic_subword (before, x, mask);
20777 else if (store_mode != mode)
20778 x = convert_modes (store_mode, mode, x, 1);
20780 cond = gen_reg_rtx (CCmode);
20781 emit_store_conditional (store_mode, cond, mem, x);
20783 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20784 emit_unlikely_jump (x, label);
20786 rs6000_post_atomic_barrier (model);
20788 if (shift)
20790 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
20791 then do the calcuations in a SImode register. */
20792 if (orig_before)
20793 rs6000_finish_atomic_subword (orig_before, before, shift);
20794 if (orig_after)
20795 rs6000_finish_atomic_subword (orig_after, after, shift);
20797 else if (store_mode != mode)
20799 /* QImode/HImode on machines with lbarx/lharx where we do the native
20800 operation and then do the calcuations in a SImode register. */
20801 if (orig_before)
20802 convert_move (orig_before, before, 1);
20803 if (orig_after)
20804 convert_move (orig_after, after, 1);
20806 else if (orig_after && after != orig_after)
20807 emit_move_insn (orig_after, after);
20810 /* Emit instructions to move SRC to DST. Called by splitters for
20811 multi-register moves. It will emit at most one instruction for
20812 each register that is accessed; that is, it won't emit li/lis pairs
20813 (or equivalent for 64-bit code). One of SRC or DST must be a hard
20814 register. */
20816 void
20817 rs6000_split_multireg_move (rtx dst, rtx src)
20819 /* The register number of the first register being moved. */
20820 int reg;
20821 /* The mode that is to be moved. */
20822 machine_mode mode;
20823 /* The mode that the move is being done in, and its size. */
20824 machine_mode reg_mode;
20825 int reg_mode_size;
20826 /* The number of registers that will be moved. */
20827 int nregs;
20829 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
20830 mode = GET_MODE (dst);
20831 nregs = hard_regno_nregs[reg][mode];
20832 if (FP_REGNO_P (reg))
20833 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
20834 ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? DFmode : SFmode);
20835 else if (ALTIVEC_REGNO_P (reg))
20836 reg_mode = V16QImode;
20837 else if (TARGET_E500_DOUBLE && mode == TFmode)
20838 reg_mode = DFmode;
20839 else
20840 reg_mode = word_mode;
20841 reg_mode_size = GET_MODE_SIZE (reg_mode);
20843 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
20845 /* TDmode residing in FP registers is special, since the ISA requires that
20846 the lower-numbered word of a register pair is always the most significant
20847 word, even in little-endian mode. This does not match the usual subreg
20848 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
20849 the appropriate constituent registers "by hand" in little-endian mode.
20851 Note we do not need to check for destructive overlap here since TDmode
20852 can only reside in even/odd register pairs. */
20853 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
20855 rtx p_src, p_dst;
20856 int i;
20858 for (i = 0; i < nregs; i++)
20860 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
20861 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
20862 else
20863 p_src = simplify_gen_subreg (reg_mode, src, mode,
20864 i * reg_mode_size);
20866 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
20867 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
20868 else
20869 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
20870 i * reg_mode_size);
20872 emit_insn (gen_rtx_SET (VOIDmode, p_dst, p_src));
20875 return;
20878 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
20880 /* Move register range backwards, if we might have destructive
20881 overlap. */
20882 int i;
20883 for (i = nregs - 1; i >= 0; i--)
20884 emit_insn (gen_rtx_SET (VOIDmode,
20885 simplify_gen_subreg (reg_mode, dst, mode,
20886 i * reg_mode_size),
20887 simplify_gen_subreg (reg_mode, src, mode,
20888 i * reg_mode_size)));
20890 else
20892 int i;
20893 int j = -1;
20894 bool used_update = false;
20895 rtx restore_basereg = NULL_RTX;
20897 if (MEM_P (src) && INT_REGNO_P (reg))
20899 rtx breg;
20901 if (GET_CODE (XEXP (src, 0)) == PRE_INC
20902 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
20904 rtx delta_rtx;
20905 breg = XEXP (XEXP (src, 0), 0);
20906 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
20907 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
20908 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
20909 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
20910 src = replace_equiv_address (src, breg);
20912 else if (! rs6000_offsettable_memref_p (src, reg_mode))
20914 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
20916 rtx basereg = XEXP (XEXP (src, 0), 0);
20917 if (TARGET_UPDATE)
20919 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
20920 emit_insn (gen_rtx_SET (VOIDmode, ndst,
20921 gen_rtx_MEM (reg_mode, XEXP (src, 0))));
20922 used_update = true;
20924 else
20925 emit_insn (gen_rtx_SET (VOIDmode, basereg,
20926 XEXP (XEXP (src, 0), 1)));
20927 src = replace_equiv_address (src, basereg);
20929 else
20931 rtx basereg = gen_rtx_REG (Pmode, reg);
20932 emit_insn (gen_rtx_SET (VOIDmode, basereg, XEXP (src, 0)));
20933 src = replace_equiv_address (src, basereg);
20937 breg = XEXP (src, 0);
20938 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
20939 breg = XEXP (breg, 0);
20941 /* If the base register we are using to address memory is
20942 also a destination reg, then change that register last. */
20943 if (REG_P (breg)
20944 && REGNO (breg) >= REGNO (dst)
20945 && REGNO (breg) < REGNO (dst) + nregs)
20946 j = REGNO (breg) - REGNO (dst);
20948 else if (MEM_P (dst) && INT_REGNO_P (reg))
20950 rtx breg;
20952 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
20953 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
20955 rtx delta_rtx;
20956 breg = XEXP (XEXP (dst, 0), 0);
20957 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
20958 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
20959 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
20961 /* We have to update the breg before doing the store.
20962 Use store with update, if available. */
20964 if (TARGET_UPDATE)
20966 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
20967 emit_insn (TARGET_32BIT
20968 ? (TARGET_POWERPC64
20969 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
20970 : gen_movsi_update (breg, breg, delta_rtx, nsrc))
20971 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
20972 used_update = true;
20974 else
20975 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
20976 dst = replace_equiv_address (dst, breg);
20978 else if (!rs6000_offsettable_memref_p (dst, reg_mode)
20979 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
20981 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
20983 rtx basereg = XEXP (XEXP (dst, 0), 0);
20984 if (TARGET_UPDATE)
20986 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
20987 emit_insn (gen_rtx_SET (VOIDmode,
20988 gen_rtx_MEM (reg_mode, XEXP (dst, 0)), nsrc));
20989 used_update = true;
20991 else
20992 emit_insn (gen_rtx_SET (VOIDmode, basereg,
20993 XEXP (XEXP (dst, 0), 1)));
20994 dst = replace_equiv_address (dst, basereg);
20996 else
20998 rtx basereg = XEXP (XEXP (dst, 0), 0);
20999 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
21000 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
21001 && REG_P (basereg)
21002 && REG_P (offsetreg)
21003 && REGNO (basereg) != REGNO (offsetreg));
21004 if (REGNO (basereg) == 0)
21006 rtx tmp = offsetreg;
21007 offsetreg = basereg;
21008 basereg = tmp;
21010 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
21011 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
21012 dst = replace_equiv_address (dst, basereg);
21015 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
21016 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode));
21019 for (i = 0; i < nregs; i++)
21021 /* Calculate index to next subword. */
21022 ++j;
21023 if (j == nregs)
21024 j = 0;
21026 /* If compiler already emitted move of first word by
21027 store with update, no need to do anything. */
21028 if (j == 0 && used_update)
21029 continue;
21031 emit_insn (gen_rtx_SET (VOIDmode,
21032 simplify_gen_subreg (reg_mode, dst, mode,
21033 j * reg_mode_size),
21034 simplify_gen_subreg (reg_mode, src, mode,
21035 j * reg_mode_size)));
21037 if (restore_basereg != NULL_RTX)
21038 emit_insn (restore_basereg);
21043 /* This page contains routines that are used to determine what the
21044 function prologue and epilogue code will do and write them out. */
21046 static inline bool
21047 save_reg_p (int r)
21049 return !call_used_regs[r] && df_regs_ever_live_p (r);
21052 /* Return the first fixed-point register that is required to be
21053 saved. 32 if none. */
21056 first_reg_to_save (void)
21058 int first_reg;
21060 /* Find lowest numbered live register. */
21061 for (first_reg = 13; first_reg <= 31; first_reg++)
21062 if (save_reg_p (first_reg))
21063 break;
21065 if (first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM
21066 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
21067 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
21068 || (TARGET_TOC && TARGET_MINIMAL_TOC))
21069 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
21070 first_reg = RS6000_PIC_OFFSET_TABLE_REGNUM;
21072 #if TARGET_MACHO
21073 if (flag_pic
21074 && crtl->uses_pic_offset_table
21075 && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM)
21076 return RS6000_PIC_OFFSET_TABLE_REGNUM;
21077 #endif
21079 return first_reg;
21082 /* Similar, for FP regs. */
21085 first_fp_reg_to_save (void)
21087 int first_reg;
21089 /* Find lowest numbered live register. */
21090 for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
21091 if (save_reg_p (first_reg))
21092 break;
21094 return first_reg;
21097 /* Similar, for AltiVec regs. */
21099 static int
21100 first_altivec_reg_to_save (void)
21102 int i;
21104 /* Stack frame remains as is unless we are in AltiVec ABI. */
21105 if (! TARGET_ALTIVEC_ABI)
21106 return LAST_ALTIVEC_REGNO + 1;
21108 /* On Darwin, the unwind routines are compiled without
21109 TARGET_ALTIVEC, and use save_world to save/restore the
21110 altivec registers when necessary. */
21111 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
21112 && ! TARGET_ALTIVEC)
21113 return FIRST_ALTIVEC_REGNO + 20;
21115 /* Find lowest numbered live register. */
21116 for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
21117 if (save_reg_p (i))
21118 break;
21120 return i;
21123 /* Return a 32-bit mask of the AltiVec registers we need to set in
21124 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
21125 the 32-bit word is 0. */
21127 static unsigned int
21128 compute_vrsave_mask (void)
21130 unsigned int i, mask = 0;
21132 /* On Darwin, the unwind routines are compiled without
21133 TARGET_ALTIVEC, and use save_world to save/restore the
21134 call-saved altivec registers when necessary. */
21135 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
21136 && ! TARGET_ALTIVEC)
21137 mask |= 0xFFF;
21139 /* First, find out if we use _any_ altivec registers. */
21140 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
21141 if (df_regs_ever_live_p (i))
21142 mask |= ALTIVEC_REG_BIT (i);
21144 if (mask == 0)
21145 return mask;
21147 /* Next, remove the argument registers from the set. These must
21148 be in the VRSAVE mask set by the caller, so we don't need to add
21149 them in again. More importantly, the mask we compute here is
21150 used to generate CLOBBERs in the set_vrsave insn, and we do not
21151 wish the argument registers to die. */
21152 for (i = ALTIVEC_ARG_MIN_REG; i < (unsigned) crtl->args.info.vregno; i++)
21153 mask &= ~ALTIVEC_REG_BIT (i);
21155 /* Similarly, remove the return value from the set. */
21157 bool yes = false;
21158 diddle_return_value (is_altivec_return_reg, &yes);
21159 if (yes)
21160 mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
21163 return mask;
21166 /* For a very restricted set of circumstances, we can cut down the
21167 size of prologues/epilogues by calling our own save/restore-the-world
21168 routines. */
21170 static void
21171 compute_save_world_info (rs6000_stack_t *info_ptr)
21173 info_ptr->world_save_p = 1;
21174 info_ptr->world_save_p
21175 = (WORLD_SAVE_P (info_ptr)
21176 && DEFAULT_ABI == ABI_DARWIN
21177 && !cfun->has_nonlocal_label
21178 && info_ptr->first_fp_reg_save == FIRST_SAVED_FP_REGNO
21179 && info_ptr->first_gp_reg_save == FIRST_SAVED_GP_REGNO
21180 && info_ptr->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
21181 && info_ptr->cr_save_p);
21183 /* This will not work in conjunction with sibcalls. Make sure there
21184 are none. (This check is expensive, but seldom executed.) */
21185 if (WORLD_SAVE_P (info_ptr))
21187 rtx_insn *insn;
21188 for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
21189 if (CALL_P (insn) && SIBLING_CALL_P (insn))
21191 info_ptr->world_save_p = 0;
21192 break;
21196 if (WORLD_SAVE_P (info_ptr))
21198 /* Even if we're not touching VRsave, make sure there's room on the
21199 stack for it, if it looks like we're calling SAVE_WORLD, which
21200 will attempt to save it. */
21201 info_ptr->vrsave_size = 4;
21203 /* If we are going to save the world, we need to save the link register too. */
21204 info_ptr->lr_save_p = 1;
21206 /* "Save" the VRsave register too if we're saving the world. */
21207 if (info_ptr->vrsave_mask == 0)
21208 info_ptr->vrsave_mask = compute_vrsave_mask ();
21210 /* Because the Darwin register save/restore routines only handle
21211 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
21212 check. */
21213 gcc_assert (info_ptr->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
21214 && (info_ptr->first_altivec_reg_save
21215 >= FIRST_SAVED_ALTIVEC_REGNO));
21217 return;
21221 static void
21222 is_altivec_return_reg (rtx reg, void *xyes)
21224 bool *yes = (bool *) xyes;
21225 if (REGNO (reg) == ALTIVEC_ARG_RETURN)
21226 *yes = true;
21230 /* Look for user-defined global regs in the range FIRST to LAST-1.
21231 We should not restore these, and so cannot use lmw or out-of-line
21232 restore functions if there are any. We also can't save them
21233 (well, emit frame notes for them), because frame unwinding during
21234 exception handling will restore saved registers. */
21236 static bool
21237 global_regs_p (unsigned first, unsigned last)
21239 while (first < last)
21240 if (global_regs[first++])
21241 return true;
21242 return false;
21245 /* Determine the strategy for savings/restoring registers. */
21247 enum {
21248 SAVRES_MULTIPLE = 0x1,
21249 SAVE_INLINE_FPRS = 0x2,
21250 SAVE_INLINE_GPRS = 0x4,
21251 REST_INLINE_FPRS = 0x8,
21252 REST_INLINE_GPRS = 0x10,
21253 SAVE_NOINLINE_GPRS_SAVES_LR = 0x20,
21254 SAVE_NOINLINE_FPRS_SAVES_LR = 0x40,
21255 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x80,
21256 SAVE_INLINE_VRS = 0x100,
21257 REST_INLINE_VRS = 0x200
21260 static int
21261 rs6000_savres_strategy (rs6000_stack_t *info,
21262 bool using_static_chain_p)
21264 int strategy = 0;
21265 bool lr_save_p;
21267 if (TARGET_MULTIPLE
21268 && !TARGET_POWERPC64
21269 && !(TARGET_SPE_ABI && info->spe_64bit_regs_used)
21270 && info->first_gp_reg_save < 31
21271 && !global_regs_p (info->first_gp_reg_save, 32))
21272 strategy |= SAVRES_MULTIPLE;
21274 if (crtl->calls_eh_return
21275 || cfun->machine->ra_need_lr)
21276 strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
21277 | SAVE_INLINE_GPRS | REST_INLINE_GPRS
21278 | SAVE_INLINE_VRS | REST_INLINE_VRS);
21280 if (info->first_fp_reg_save == 64
21281 /* The out-of-line FP routines use double-precision stores;
21282 we can't use those routines if we don't have such stores. */
21283 || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT)
21284 || global_regs_p (info->first_fp_reg_save, 64))
21285 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
21287 if (info->first_gp_reg_save == 32
21288 || (!(strategy & SAVRES_MULTIPLE)
21289 && global_regs_p (info->first_gp_reg_save, 32)))
21290 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21292 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
21293 || global_regs_p (info->first_altivec_reg_save, LAST_ALTIVEC_REGNO + 1))
21294 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21296 /* Define cutoff for using out-of-line functions to save registers. */
21297 if (DEFAULT_ABI == ABI_V4 || TARGET_ELF)
21299 if (!optimize_size)
21301 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
21302 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21303 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21305 else
21307 /* Prefer out-of-line restore if it will exit. */
21308 if (info->first_fp_reg_save > 61)
21309 strategy |= SAVE_INLINE_FPRS;
21310 if (info->first_gp_reg_save > 29)
21312 if (info->first_fp_reg_save == 64)
21313 strategy |= SAVE_INLINE_GPRS;
21314 else
21315 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21317 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO)
21318 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21321 else if (DEFAULT_ABI == ABI_DARWIN)
21323 if (info->first_fp_reg_save > 60)
21324 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
21325 if (info->first_gp_reg_save > 29)
21326 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21327 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21329 else
21331 gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
21332 if (info->first_fp_reg_save > 61)
21333 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
21334 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21335 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21338 /* Don't bother to try to save things out-of-line if r11 is occupied
21339 by the static chain. It would require too much fiddling and the
21340 static chain is rarely used anyway. FPRs are saved w.r.t the stack
21341 pointer on Darwin, and AIX uses r1 or r12. */
21342 if (using_static_chain_p
21343 && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
21344 strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
21345 | SAVE_INLINE_GPRS
21346 | SAVE_INLINE_VRS | REST_INLINE_VRS);
21348 /* We can only use the out-of-line routines to restore if we've
21349 saved all the registers from first_fp_reg_save in the prologue.
21350 Otherwise, we risk loading garbage. */
21351 if ((strategy & (SAVE_INLINE_FPRS | REST_INLINE_FPRS)) == SAVE_INLINE_FPRS)
21353 int i;
21355 for (i = info->first_fp_reg_save; i < 64; i++)
21356 if (!save_reg_p (i))
21358 strategy |= REST_INLINE_FPRS;
21359 break;
21363 /* If we are going to use store multiple, then don't even bother
21364 with the out-of-line routines, since the store-multiple
21365 instruction will always be smaller. */
21366 if ((strategy & SAVRES_MULTIPLE))
21367 strategy |= SAVE_INLINE_GPRS;
21369 /* info->lr_save_p isn't yet set if the only reason lr needs to be
21370 saved is an out-of-line save or restore. Set up the value for
21371 the next test (excluding out-of-line gpr restore). */
21372 lr_save_p = (info->lr_save_p
21373 || !(strategy & SAVE_INLINE_GPRS)
21374 || !(strategy & SAVE_INLINE_FPRS)
21375 || !(strategy & SAVE_INLINE_VRS)
21376 || !(strategy & REST_INLINE_FPRS)
21377 || !(strategy & REST_INLINE_VRS));
21379 /* The situation is more complicated with load multiple. We'd
21380 prefer to use the out-of-line routines for restores, since the
21381 "exit" out-of-line routines can handle the restore of LR and the
21382 frame teardown. However if doesn't make sense to use the
21383 out-of-line routine if that is the only reason we'd need to save
21384 LR, and we can't use the "exit" out-of-line gpr restore if we
21385 have saved some fprs; In those cases it is advantageous to use
21386 load multiple when available. */
21387 if ((strategy & SAVRES_MULTIPLE)
21388 && (!lr_save_p
21389 || info->first_fp_reg_save != 64))
21390 strategy |= REST_INLINE_GPRS;
21392 /* Saving CR interferes with the exit routines used on the SPE, so
21393 just punt here. */
21394 if (TARGET_SPE_ABI
21395 && info->spe_64bit_regs_used
21396 && info->cr_save_p)
21397 strategy |= REST_INLINE_GPRS;
21399 /* We can only use load multiple or the out-of-line routines to
21400 restore if we've used store multiple or out-of-line routines
21401 in the prologue, i.e. if we've saved all the registers from
21402 first_gp_reg_save. Otherwise, we risk loading garbage. */
21403 if ((strategy & (SAVE_INLINE_GPRS | REST_INLINE_GPRS | SAVRES_MULTIPLE))
21404 == SAVE_INLINE_GPRS)
21406 int i;
21408 for (i = info->first_gp_reg_save; i < 32; i++)
21409 if (!save_reg_p (i))
21411 strategy |= REST_INLINE_GPRS;
21412 break;
21416 if (TARGET_ELF && TARGET_64BIT)
21418 if (!(strategy & SAVE_INLINE_FPRS))
21419 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
21420 else if (!(strategy & SAVE_INLINE_GPRS)
21421 && info->first_fp_reg_save == 64)
21422 strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
21424 else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
21425 strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
21427 if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
21428 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
21430 return strategy;
21433 /* Calculate the stack information for the current function. This is
21434 complicated by having two separate calling sequences, the AIX calling
21435 sequence and the V.4 calling sequence.
21437 AIX (and Darwin/Mac OS X) stack frames look like:
21438 32-bit 64-bit
21439 SP----> +---------------------------------------+
21440 | back chain to caller | 0 0
21441 +---------------------------------------+
21442 | saved CR | 4 8 (8-11)
21443 +---------------------------------------+
21444 | saved LR | 8 16
21445 +---------------------------------------+
21446 | reserved for compilers | 12 24
21447 +---------------------------------------+
21448 | reserved for binders | 16 32
21449 +---------------------------------------+
21450 | saved TOC pointer | 20 40
21451 +---------------------------------------+
21452 | Parameter save area (P) | 24 48
21453 +---------------------------------------+
21454 | Alloca space (A) | 24+P etc.
21455 +---------------------------------------+
21456 | Local variable space (L) | 24+P+A
21457 +---------------------------------------+
21458 | Float/int conversion temporary (X) | 24+P+A+L
21459 +---------------------------------------+
21460 | Save area for AltiVec registers (W) | 24+P+A+L+X
21461 +---------------------------------------+
21462 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
21463 +---------------------------------------+
21464 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
21465 +---------------------------------------+
21466 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
21467 +---------------------------------------+
21468 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
21469 +---------------------------------------+
21470 old SP->| back chain to caller's caller |
21471 +---------------------------------------+
21473 The required alignment for AIX configurations is two words (i.e., 8
21474 or 16 bytes).
21476 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
21478 SP----> +---------------------------------------+
21479 | Back chain to caller | 0
21480 +---------------------------------------+
21481 | Save area for CR | 8
21482 +---------------------------------------+
21483 | Saved LR | 16
21484 +---------------------------------------+
21485 | Saved TOC pointer | 24
21486 +---------------------------------------+
21487 | Parameter save area (P) | 32
21488 +---------------------------------------+
21489 | Alloca space (A) | 32+P
21490 +---------------------------------------+
21491 | Local variable space (L) | 32+P+A
21492 +---------------------------------------+
21493 | Save area for AltiVec registers (W) | 32+P+A+L
21494 +---------------------------------------+
21495 | AltiVec alignment padding (Y) | 32+P+A+L+W
21496 +---------------------------------------+
21497 | Save area for GP registers (G) | 32+P+A+L+W+Y
21498 +---------------------------------------+
21499 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
21500 +---------------------------------------+
21501 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
21502 +---------------------------------------+
21505 V.4 stack frames look like:
21507 SP----> +---------------------------------------+
21508 | back chain to caller | 0
21509 +---------------------------------------+
21510 | caller's saved LR | 4
21511 +---------------------------------------+
21512 | Parameter save area (P) | 8
21513 +---------------------------------------+
21514 | Alloca space (A) | 8+P
21515 +---------------------------------------+
21516 | Varargs save area (V) | 8+P+A
21517 +---------------------------------------+
21518 | Local variable space (L) | 8+P+A+V
21519 +---------------------------------------+
21520 | Float/int conversion temporary (X) | 8+P+A+V+L
21521 +---------------------------------------+
21522 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
21523 +---------------------------------------+
21524 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
21525 +---------------------------------------+
21526 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
21527 +---------------------------------------+
21528 | SPE: area for 64-bit GP registers |
21529 +---------------------------------------+
21530 | SPE alignment padding |
21531 +---------------------------------------+
21532 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
21533 +---------------------------------------+
21534 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
21535 +---------------------------------------+
21536 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
21537 +---------------------------------------+
21538 old SP->| back chain to caller's caller |
21539 +---------------------------------------+
21541 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
21542 given. (But note below and in sysv4.h that we require only 8 and
21543 may round up the size of our stack frame anyways. The historical
21544 reason is early versions of powerpc-linux which didn't properly
21545 align the stack at program startup. A happy side-effect is that
21546 -mno-eabi libraries can be used with -meabi programs.)
21548 The EABI configuration defaults to the V.4 layout. However,
21549 the stack alignment requirements may differ. If -mno-eabi is not
21550 given, the required stack alignment is 8 bytes; if -mno-eabi is
21551 given, the required alignment is 16 bytes. (But see V.4 comment
21552 above.) */
21554 #ifndef ABI_STACK_BOUNDARY
21555 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
21556 #endif
21558 static rs6000_stack_t *
21559 rs6000_stack_info (void)
21561 /* We should never be called for thunks, we are not set up for that. */
21562 gcc_assert (!cfun->is_thunk);
21564 rs6000_stack_t *info_ptr = &stack_info;
21565 int reg_size = TARGET_32BIT ? 4 : 8;
21566 int ehrd_size;
21567 int ehcr_size;
21568 int save_align;
21569 int first_gp;
21570 HOST_WIDE_INT non_fixed_size;
21571 bool using_static_chain_p;
21573 if (reload_completed && info_ptr->reload_completed)
21574 return info_ptr;
21576 memset (info_ptr, 0, sizeof (*info_ptr));
21577 info_ptr->reload_completed = reload_completed;
21579 if (TARGET_SPE)
21581 /* Cache value so we don't rescan instruction chain over and over. */
21582 if (cfun->machine->insn_chain_scanned_p == 0)
21583 cfun->machine->insn_chain_scanned_p
21584 = spe_func_has_64bit_regs_p () + 1;
21585 info_ptr->spe_64bit_regs_used = cfun->machine->insn_chain_scanned_p - 1;
21588 /* Select which calling sequence. */
21589 info_ptr->abi = DEFAULT_ABI;
21591 /* Calculate which registers need to be saved & save area size. */
21592 info_ptr->first_gp_reg_save = first_reg_to_save ();
21593 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
21594 even if it currently looks like we won't. Reload may need it to
21595 get at a constant; if so, it will have already created a constant
21596 pool entry for it. */
21597 if (((TARGET_TOC && TARGET_MINIMAL_TOC)
21598 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
21599 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
21600 && crtl->uses_const_pool
21601 && info_ptr->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
21602 first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
21603 else
21604 first_gp = info_ptr->first_gp_reg_save;
21606 info_ptr->gp_size = reg_size * (32 - first_gp);
21608 /* For the SPE, we have an additional upper 32-bits on each GPR.
21609 Ideally we should save the entire 64-bits only when the upper
21610 half is used in SIMD instructions. Since we only record
21611 registers live (not the size they are used in), this proves
21612 difficult because we'd have to traverse the instruction chain at
21613 the right time, taking reload into account. This is a real pain,
21614 so we opt to save the GPRs in 64-bits always if but one register
21615 gets used in 64-bits. Otherwise, all the registers in the frame
21616 get saved in 32-bits.
21618 So... since when we save all GPRs (except the SP) in 64-bits, the
21619 traditional GP save area will be empty. */
21620 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
21621 info_ptr->gp_size = 0;
21623 info_ptr->first_fp_reg_save = first_fp_reg_to_save ();
21624 info_ptr->fp_size = 8 * (64 - info_ptr->first_fp_reg_save);
21626 info_ptr->first_altivec_reg_save = first_altivec_reg_to_save ();
21627 info_ptr->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
21628 - info_ptr->first_altivec_reg_save);
21630 /* Does this function call anything? */
21631 info_ptr->calls_p = (! crtl->is_leaf
21632 || cfun->machine->ra_needs_full_frame);
21634 /* Determine if we need to save the condition code registers. */
21635 if (df_regs_ever_live_p (CR2_REGNO)
21636 || df_regs_ever_live_p (CR3_REGNO)
21637 || df_regs_ever_live_p (CR4_REGNO))
21639 info_ptr->cr_save_p = 1;
21640 if (DEFAULT_ABI == ABI_V4)
21641 info_ptr->cr_size = reg_size;
21644 /* If the current function calls __builtin_eh_return, then we need
21645 to allocate stack space for registers that will hold data for
21646 the exception handler. */
21647 if (crtl->calls_eh_return)
21649 unsigned int i;
21650 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
21651 continue;
21653 /* SPE saves EH registers in 64-bits. */
21654 ehrd_size = i * (TARGET_SPE_ABI
21655 && info_ptr->spe_64bit_regs_used != 0
21656 ? UNITS_PER_SPE_WORD : UNITS_PER_WORD);
21658 else
21659 ehrd_size = 0;
21661 /* In the ELFv2 ABI, we also need to allocate space for separate
21662 CR field save areas if the function calls __builtin_eh_return. */
21663 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
21665 /* This hard-codes that we have three call-saved CR fields. */
21666 ehcr_size = 3 * reg_size;
21667 /* We do *not* use the regular CR save mechanism. */
21668 info_ptr->cr_save_p = 0;
21670 else
21671 ehcr_size = 0;
21673 /* Determine various sizes. */
21674 info_ptr->reg_size = reg_size;
21675 info_ptr->fixed_size = RS6000_SAVE_AREA;
21676 info_ptr->vars_size = RS6000_ALIGN (get_frame_size (), 8);
21677 info_ptr->parm_size = RS6000_ALIGN (crtl->outgoing_args_size,
21678 TARGET_ALTIVEC ? 16 : 8);
21679 if (FRAME_GROWS_DOWNWARD)
21680 info_ptr->vars_size
21681 += RS6000_ALIGN (info_ptr->fixed_size + info_ptr->vars_size
21682 + info_ptr->parm_size,
21683 ABI_STACK_BOUNDARY / BITS_PER_UNIT)
21684 - (info_ptr->fixed_size + info_ptr->vars_size
21685 + info_ptr->parm_size);
21687 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
21688 info_ptr->spe_gp_size = 8 * (32 - first_gp);
21689 else
21690 info_ptr->spe_gp_size = 0;
21692 if (TARGET_ALTIVEC_ABI)
21693 info_ptr->vrsave_mask = compute_vrsave_mask ();
21694 else
21695 info_ptr->vrsave_mask = 0;
21697 if (TARGET_ALTIVEC_VRSAVE && info_ptr->vrsave_mask)
21698 info_ptr->vrsave_size = 4;
21699 else
21700 info_ptr->vrsave_size = 0;
21702 compute_save_world_info (info_ptr);
21704 /* Calculate the offsets. */
21705 switch (DEFAULT_ABI)
21707 case ABI_NONE:
21708 default:
21709 gcc_unreachable ();
21711 case ABI_AIX:
21712 case ABI_ELFv2:
21713 case ABI_DARWIN:
21714 info_ptr->fp_save_offset = - info_ptr->fp_size;
21715 info_ptr->gp_save_offset = info_ptr->fp_save_offset - info_ptr->gp_size;
21717 if (TARGET_ALTIVEC_ABI)
21719 info_ptr->vrsave_save_offset
21720 = info_ptr->gp_save_offset - info_ptr->vrsave_size;
21722 /* Align stack so vector save area is on a quadword boundary.
21723 The padding goes above the vectors. */
21724 if (info_ptr->altivec_size != 0)
21725 info_ptr->altivec_padding_size
21726 = info_ptr->vrsave_save_offset & 0xF;
21727 else
21728 info_ptr->altivec_padding_size = 0;
21730 info_ptr->altivec_save_offset
21731 = info_ptr->vrsave_save_offset
21732 - info_ptr->altivec_padding_size
21733 - info_ptr->altivec_size;
21734 gcc_assert (info_ptr->altivec_size == 0
21735 || info_ptr->altivec_save_offset % 16 == 0);
21737 /* Adjust for AltiVec case. */
21738 info_ptr->ehrd_offset = info_ptr->altivec_save_offset - ehrd_size;
21740 else
21741 info_ptr->ehrd_offset = info_ptr->gp_save_offset - ehrd_size;
21743 info_ptr->ehcr_offset = info_ptr->ehrd_offset - ehcr_size;
21744 info_ptr->cr_save_offset = reg_size; /* first word when 64-bit. */
21745 info_ptr->lr_save_offset = 2*reg_size;
21746 break;
21748 case ABI_V4:
21749 info_ptr->fp_save_offset = - info_ptr->fp_size;
21750 info_ptr->gp_save_offset = info_ptr->fp_save_offset - info_ptr->gp_size;
21751 info_ptr->cr_save_offset = info_ptr->gp_save_offset - info_ptr->cr_size;
21753 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
21755 /* Align stack so SPE GPR save area is aligned on a
21756 double-word boundary. */
21757 if (info_ptr->spe_gp_size != 0 && info_ptr->cr_save_offset != 0)
21758 info_ptr->spe_padding_size
21759 = 8 - (-info_ptr->cr_save_offset % 8);
21760 else
21761 info_ptr->spe_padding_size = 0;
21763 info_ptr->spe_gp_save_offset
21764 = info_ptr->cr_save_offset
21765 - info_ptr->spe_padding_size
21766 - info_ptr->spe_gp_size;
21768 /* Adjust for SPE case. */
21769 info_ptr->ehrd_offset = info_ptr->spe_gp_save_offset;
21771 else if (TARGET_ALTIVEC_ABI)
21773 info_ptr->vrsave_save_offset
21774 = info_ptr->cr_save_offset - info_ptr->vrsave_size;
21776 /* Align stack so vector save area is on a quadword boundary. */
21777 if (info_ptr->altivec_size != 0)
21778 info_ptr->altivec_padding_size
21779 = 16 - (-info_ptr->vrsave_save_offset % 16);
21780 else
21781 info_ptr->altivec_padding_size = 0;
21783 info_ptr->altivec_save_offset
21784 = info_ptr->vrsave_save_offset
21785 - info_ptr->altivec_padding_size
21786 - info_ptr->altivec_size;
21788 /* Adjust for AltiVec case. */
21789 info_ptr->ehrd_offset = info_ptr->altivec_save_offset;
21791 else
21792 info_ptr->ehrd_offset = info_ptr->cr_save_offset;
21793 info_ptr->ehrd_offset -= ehrd_size;
21794 info_ptr->lr_save_offset = reg_size;
21795 break;
21798 save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
21799 info_ptr->save_size = RS6000_ALIGN (info_ptr->fp_size
21800 + info_ptr->gp_size
21801 + info_ptr->altivec_size
21802 + info_ptr->altivec_padding_size
21803 + info_ptr->spe_gp_size
21804 + info_ptr->spe_padding_size
21805 + ehrd_size
21806 + ehcr_size
21807 + info_ptr->cr_size
21808 + info_ptr->vrsave_size,
21809 save_align);
21811 non_fixed_size = (info_ptr->vars_size
21812 + info_ptr->parm_size
21813 + info_ptr->save_size);
21815 info_ptr->total_size = RS6000_ALIGN (non_fixed_size + info_ptr->fixed_size,
21816 ABI_STACK_BOUNDARY / BITS_PER_UNIT);
21818 /* Determine if we need to save the link register. */
21819 if (info_ptr->calls_p
21820 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
21821 && crtl->profile
21822 && !TARGET_PROFILE_KERNEL)
21823 || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
21824 #ifdef TARGET_RELOCATABLE
21825 || (TARGET_RELOCATABLE && (get_pool_size () != 0))
21826 #endif
21827 || rs6000_ra_ever_killed ())
21828 info_ptr->lr_save_p = 1;
21830 using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
21831 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
21832 && call_used_regs[STATIC_CHAIN_REGNUM]);
21833 info_ptr->savres_strategy = rs6000_savres_strategy (info_ptr,
21834 using_static_chain_p);
21836 if (!(info_ptr->savres_strategy & SAVE_INLINE_GPRS)
21837 || !(info_ptr->savres_strategy & SAVE_INLINE_FPRS)
21838 || !(info_ptr->savres_strategy & SAVE_INLINE_VRS)
21839 || !(info_ptr->savres_strategy & REST_INLINE_GPRS)
21840 || !(info_ptr->savres_strategy & REST_INLINE_FPRS)
21841 || !(info_ptr->savres_strategy & REST_INLINE_VRS))
21842 info_ptr->lr_save_p = 1;
21844 if (info_ptr->lr_save_p)
21845 df_set_regs_ever_live (LR_REGNO, true);
21847 /* Determine if we need to allocate any stack frame:
21849 For AIX we need to push the stack if a frame pointer is needed
21850 (because the stack might be dynamically adjusted), if we are
21851 debugging, if we make calls, or if the sum of fp_save, gp_save,
21852 and local variables are more than the space needed to save all
21853 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
21854 + 18*8 = 288 (GPR13 reserved).
21856 For V.4 we don't have the stack cushion that AIX uses, but assume
21857 that the debugger can handle stackless frames. */
21859 if (info_ptr->calls_p)
21860 info_ptr->push_p = 1;
21862 else if (DEFAULT_ABI == ABI_V4)
21863 info_ptr->push_p = non_fixed_size != 0;
21865 else if (frame_pointer_needed)
21866 info_ptr->push_p = 1;
21868 else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
21869 info_ptr->push_p = 1;
21871 else
21872 info_ptr->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
21874 /* Zero offsets if we're not saving those registers. */
21875 if (info_ptr->fp_size == 0)
21876 info_ptr->fp_save_offset = 0;
21878 if (info_ptr->gp_size == 0)
21879 info_ptr->gp_save_offset = 0;
21881 if (! TARGET_ALTIVEC_ABI || info_ptr->altivec_size == 0)
21882 info_ptr->altivec_save_offset = 0;
21884 /* Zero VRSAVE offset if not saved and restored. */
21885 if (! TARGET_ALTIVEC_VRSAVE || info_ptr->vrsave_mask == 0)
21886 info_ptr->vrsave_save_offset = 0;
21888 if (! TARGET_SPE_ABI
21889 || info_ptr->spe_64bit_regs_used == 0
21890 || info_ptr->spe_gp_size == 0)
21891 info_ptr->spe_gp_save_offset = 0;
21893 if (! info_ptr->lr_save_p)
21894 info_ptr->lr_save_offset = 0;
21896 if (! info_ptr->cr_save_p)
21897 info_ptr->cr_save_offset = 0;
21899 return info_ptr;
21902 /* Return true if the current function uses any GPRs in 64-bit SIMD
21903 mode. */
21905 static bool
21906 spe_func_has_64bit_regs_p (void)
21908 rtx_insn *insns, *insn;
21910 /* Functions that save and restore all the call-saved registers will
21911 need to save/restore the registers in 64-bits. */
21912 if (crtl->calls_eh_return
21913 || cfun->calls_setjmp
21914 || crtl->has_nonlocal_goto)
21915 return true;
21917 insns = get_insns ();
21919 for (insn = NEXT_INSN (insns); insn != NULL_RTX; insn = NEXT_INSN (insn))
21921 if (INSN_P (insn))
21923 rtx i;
21925 /* FIXME: This should be implemented with attributes...
21927 (set_attr "spe64" "true")....then,
21928 if (get_spe64(insn)) return true;
21930 It's the only reliable way to do the stuff below. */
21932 i = PATTERN (insn);
21933 if (GET_CODE (i) == SET)
21935 machine_mode mode = GET_MODE (SET_SRC (i));
21937 if (SPE_VECTOR_MODE (mode))
21938 return true;
21939 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode))
21940 return true;
21945 return false;
21948 static void
21949 debug_stack_info (rs6000_stack_t *info)
21951 const char *abi_string;
21953 if (! info)
21954 info = rs6000_stack_info ();
21956 fprintf (stderr, "\nStack information for function %s:\n",
21957 ((current_function_decl && DECL_NAME (current_function_decl))
21958 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
21959 : "<unknown>"));
21961 switch (info->abi)
21963 default: abi_string = "Unknown"; break;
21964 case ABI_NONE: abi_string = "NONE"; break;
21965 case ABI_AIX: abi_string = "AIX"; break;
21966 case ABI_ELFv2: abi_string = "ELFv2"; break;
21967 case ABI_DARWIN: abi_string = "Darwin"; break;
21968 case ABI_V4: abi_string = "V.4"; break;
21971 fprintf (stderr, "\tABI = %5s\n", abi_string);
21973 if (TARGET_ALTIVEC_ABI)
21974 fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
21976 if (TARGET_SPE_ABI)
21977 fprintf (stderr, "\tSPE ABI extensions enabled.\n");
21979 if (info->first_gp_reg_save != 32)
21980 fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save);
21982 if (info->first_fp_reg_save != 64)
21983 fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save);
21985 if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
21986 fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
21987 info->first_altivec_reg_save);
21989 if (info->lr_save_p)
21990 fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p);
21992 if (info->cr_save_p)
21993 fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p);
21995 if (info->vrsave_mask)
21996 fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask);
21998 if (info->push_p)
21999 fprintf (stderr, "\tpush_p = %5d\n", info->push_p);
22001 if (info->calls_p)
22002 fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p);
22004 if (info->gp_save_offset)
22005 fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset);
22007 if (info->fp_save_offset)
22008 fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset);
22010 if (info->altivec_save_offset)
22011 fprintf (stderr, "\taltivec_save_offset = %5d\n",
22012 info->altivec_save_offset);
22014 if (info->spe_gp_save_offset)
22015 fprintf (stderr, "\tspe_gp_save_offset = %5d\n",
22016 info->spe_gp_save_offset);
22018 if (info->vrsave_save_offset)
22019 fprintf (stderr, "\tvrsave_save_offset = %5d\n",
22020 info->vrsave_save_offset);
22022 if (info->lr_save_offset)
22023 fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset);
22025 if (info->cr_save_offset)
22026 fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset);
22028 if (info->varargs_save_offset)
22029 fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
22031 if (info->total_size)
22032 fprintf (stderr, "\ttotal_size = "HOST_WIDE_INT_PRINT_DEC"\n",
22033 info->total_size);
22035 if (info->vars_size)
22036 fprintf (stderr, "\tvars_size = "HOST_WIDE_INT_PRINT_DEC"\n",
22037 info->vars_size);
22039 if (info->parm_size)
22040 fprintf (stderr, "\tparm_size = %5d\n", info->parm_size);
22042 if (info->fixed_size)
22043 fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size);
22045 if (info->gp_size)
22046 fprintf (stderr, "\tgp_size = %5d\n", info->gp_size);
22048 if (info->spe_gp_size)
22049 fprintf (stderr, "\tspe_gp_size = %5d\n", info->spe_gp_size);
22051 if (info->fp_size)
22052 fprintf (stderr, "\tfp_size = %5d\n", info->fp_size);
22054 if (info->altivec_size)
22055 fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size);
22057 if (info->vrsave_size)
22058 fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size);
22060 if (info->altivec_padding_size)
22061 fprintf (stderr, "\taltivec_padding_size= %5d\n",
22062 info->altivec_padding_size);
22064 if (info->spe_padding_size)
22065 fprintf (stderr, "\tspe_padding_size = %5d\n",
22066 info->spe_padding_size);
22068 if (info->cr_size)
22069 fprintf (stderr, "\tcr_size = %5d\n", info->cr_size);
22071 if (info->save_size)
22072 fprintf (stderr, "\tsave_size = %5d\n", info->save_size);
22074 if (info->reg_size != 4)
22075 fprintf (stderr, "\treg_size = %5d\n", info->reg_size);
22077 fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy);
22079 fprintf (stderr, "\n");
22083 rs6000_return_addr (int count, rtx frame)
22085 /* Currently we don't optimize very well between prolog and body
22086 code and for PIC code the code can be actually quite bad, so
22087 don't try to be too clever here. */
22088 if (count != 0
22089 || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
22091 cfun->machine->ra_needs_full_frame = 1;
22093 return
22094 gen_rtx_MEM
22095 (Pmode,
22096 memory_address
22097 (Pmode,
22098 plus_constant (Pmode,
22099 copy_to_reg
22100 (gen_rtx_MEM (Pmode,
22101 memory_address (Pmode, frame))),
22102 RETURN_ADDRESS_OFFSET)));
22105 cfun->machine->ra_need_lr = 1;
22106 return get_hard_reg_initial_val (Pmode, LR_REGNO);
22109 /* Say whether a function is a candidate for sibcall handling or not. */
22111 static bool
22112 rs6000_function_ok_for_sibcall (tree decl, tree exp)
22114 tree fntype;
22116 if (decl)
22117 fntype = TREE_TYPE (decl);
22118 else
22119 fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
22121 /* We can't do it if the called function has more vector parameters
22122 than the current function; there's nowhere to put the VRsave code. */
22123 if (TARGET_ALTIVEC_ABI
22124 && TARGET_ALTIVEC_VRSAVE
22125 && !(decl && decl == current_function_decl))
22127 function_args_iterator args_iter;
22128 tree type;
22129 int nvreg = 0;
22131 /* Functions with vector parameters are required to have a
22132 prototype, so the argument type info must be available
22133 here. */
22134 FOREACH_FUNCTION_ARGS(fntype, type, args_iter)
22135 if (TREE_CODE (type) == VECTOR_TYPE
22136 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
22137 nvreg++;
22139 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter)
22140 if (TREE_CODE (type) == VECTOR_TYPE
22141 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
22142 nvreg--;
22144 if (nvreg > 0)
22145 return false;
22148 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
22149 functions, because the callee may have a different TOC pointer to
22150 the caller and there's no way to ensure we restore the TOC when
22151 we return. With the secure-plt SYSV ABI we can't make non-local
22152 calls when -fpic/PIC because the plt call stubs use r30. */
22153 if (DEFAULT_ABI == ABI_DARWIN
22154 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
22155 && decl
22156 && !DECL_EXTERNAL (decl)
22157 && (*targetm.binds_local_p) (decl))
22158 || (DEFAULT_ABI == ABI_V4
22159 && (!TARGET_SECURE_PLT
22160 || !flag_pic
22161 || (decl
22162 && (*targetm.binds_local_p) (decl)))))
22164 tree attr_list = TYPE_ATTRIBUTES (fntype);
22166 if (!lookup_attribute ("longcall", attr_list)
22167 || lookup_attribute ("shortcall", attr_list))
22168 return true;
22171 return false;
22174 static int
22175 rs6000_ra_ever_killed (void)
22177 rtx_insn *top;
22178 rtx reg;
22179 rtx_insn *insn;
22181 if (cfun->is_thunk)
22182 return 0;
22184 if (cfun->machine->lr_save_state)
22185 return cfun->machine->lr_save_state - 1;
22187 /* regs_ever_live has LR marked as used if any sibcalls are present,
22188 but this should not force saving and restoring in the
22189 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
22190 clobbers LR, so that is inappropriate. */
22192 /* Also, the prologue can generate a store into LR that
22193 doesn't really count, like this:
22195 move LR->R0
22196 bcl to set PIC register
22197 move LR->R31
22198 move R0->LR
22200 When we're called from the epilogue, we need to avoid counting
22201 this as a store. */
22203 push_topmost_sequence ();
22204 top = get_insns ();
22205 pop_topmost_sequence ();
22206 reg = gen_rtx_REG (Pmode, LR_REGNO);
22208 for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
22210 if (INSN_P (insn))
22212 if (CALL_P (insn))
22214 if (!SIBLING_CALL_P (insn))
22215 return 1;
22217 else if (find_regno_note (insn, REG_INC, LR_REGNO))
22218 return 1;
22219 else if (set_of (reg, insn) != NULL_RTX
22220 && !prologue_epilogue_contains (insn))
22221 return 1;
22224 return 0;
22227 /* Emit instructions needed to load the TOC register.
22228 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
22229 a constant pool; or for SVR4 -fpic. */
22231 void
22232 rs6000_emit_load_toc_table (int fromprolog)
22234 rtx dest;
22235 dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
22237 if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
22239 char buf[30];
22240 rtx lab, tmp1, tmp2, got;
22242 lab = gen_label_rtx ();
22243 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
22244 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
22245 if (flag_pic == 2)
22246 got = gen_rtx_SYMBOL_REF (Pmode, toc_label_name);
22247 else
22248 got = rs6000_got_sym ();
22249 tmp1 = tmp2 = dest;
22250 if (!fromprolog)
22252 tmp1 = gen_reg_rtx (Pmode);
22253 tmp2 = gen_reg_rtx (Pmode);
22255 emit_insn (gen_load_toc_v4_PIC_1 (lab));
22256 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
22257 emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
22258 emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
22260 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
22262 emit_insn (gen_load_toc_v4_pic_si ());
22263 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
22265 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
22267 char buf[30];
22268 rtx temp0 = (fromprolog
22269 ? gen_rtx_REG (Pmode, 0)
22270 : gen_reg_rtx (Pmode));
22272 if (fromprolog)
22274 rtx symF, symL;
22276 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
22277 symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
22279 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
22280 symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
22282 emit_insn (gen_load_toc_v4_PIC_1 (symF));
22283 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
22284 emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
22286 else
22288 rtx tocsym, lab;
22290 tocsym = gen_rtx_SYMBOL_REF (Pmode, toc_label_name);
22291 lab = gen_label_rtx ();
22292 emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
22293 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
22294 if (TARGET_LINK_STACK)
22295 emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
22296 emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
22298 emit_insn (gen_addsi3 (dest, temp0, dest));
22300 else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
22302 /* This is for AIX code running in non-PIC ELF32. */
22303 char buf[30];
22304 rtx realsym;
22305 ASM_GENERATE_INTERNAL_LABEL (buf, "LCTOC", 1);
22306 realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
22308 emit_insn (gen_elf_high (dest, realsym));
22309 emit_insn (gen_elf_low (dest, dest, realsym));
22311 else
22313 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
22315 if (TARGET_32BIT)
22316 emit_insn (gen_load_toc_aix_si (dest));
22317 else
22318 emit_insn (gen_load_toc_aix_di (dest));
22322 /* Emit instructions to restore the link register after determining where
22323 its value has been stored. */
22325 void
22326 rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
22328 rs6000_stack_t *info = rs6000_stack_info ();
22329 rtx operands[2];
22331 operands[0] = source;
22332 operands[1] = scratch;
22334 if (info->lr_save_p)
22336 rtx frame_rtx = stack_pointer_rtx;
22337 HOST_WIDE_INT sp_offset = 0;
22338 rtx tmp;
22340 if (frame_pointer_needed
22341 || cfun->calls_alloca
22342 || info->total_size > 32767)
22344 tmp = gen_frame_mem (Pmode, frame_rtx);
22345 emit_move_insn (operands[1], tmp);
22346 frame_rtx = operands[1];
22348 else if (info->push_p)
22349 sp_offset = info->total_size;
22351 tmp = plus_constant (Pmode, frame_rtx,
22352 info->lr_save_offset + sp_offset);
22353 tmp = gen_frame_mem (Pmode, tmp);
22354 emit_move_insn (tmp, operands[0]);
22356 else
22357 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
22359 /* Freeze lr_save_p. We've just emitted rtl that depends on the
22360 state of lr_save_p so any change from here on would be a bug. In
22361 particular, stop rs6000_ra_ever_killed from considering the SET
22362 of lr we may have added just above. */
22363 cfun->machine->lr_save_state = info->lr_save_p + 1;
22366 static GTY(()) alias_set_type set = -1;
22368 alias_set_type
22369 get_TOC_alias_set (void)
22371 if (set == -1)
22372 set = new_alias_set ();
22373 return set;
22376 /* This returns nonzero if the current function uses the TOC. This is
22377 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
22378 is generated by the ABI_V4 load_toc_* patterns. */
22379 #if TARGET_ELF
22380 static int
22381 uses_TOC (void)
22383 rtx_insn *insn;
22385 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
22386 if (INSN_P (insn))
22388 rtx pat = PATTERN (insn);
22389 int i;
22391 if (GET_CODE (pat) == PARALLEL)
22392 for (i = 0; i < XVECLEN (pat, 0); i++)
22394 rtx sub = XVECEXP (pat, 0, i);
22395 if (GET_CODE (sub) == USE)
22397 sub = XEXP (sub, 0);
22398 if (GET_CODE (sub) == UNSPEC
22399 && XINT (sub, 1) == UNSPEC_TOC)
22400 return 1;
22404 return 0;
22406 #endif
22409 create_TOC_reference (rtx symbol, rtx largetoc_reg)
22411 rtx tocrel, tocreg, hi;
22413 if (TARGET_DEBUG_ADDR)
22415 if (GET_CODE (symbol) == SYMBOL_REF)
22416 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
22417 XSTR (symbol, 0));
22418 else
22420 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
22421 GET_RTX_NAME (GET_CODE (symbol)));
22422 debug_rtx (symbol);
22426 if (!can_create_pseudo_p ())
22427 df_set_regs_ever_live (TOC_REGISTER, true);
22429 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
22430 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
22431 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
22432 return tocrel;
22434 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
22435 if (largetoc_reg != NULL)
22437 emit_move_insn (largetoc_reg, hi);
22438 hi = largetoc_reg;
22440 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
22443 /* Issue assembly directives that create a reference to the given DWARF
22444 FRAME_TABLE_LABEL from the current function section. */
22445 void
22446 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
22448 fprintf (asm_out_file, "\t.ref %s\n",
22449 (* targetm.strip_name_encoding) (frame_table_label));
22452 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
22453 and the change to the stack pointer. */
22455 static void
22456 rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
22458 rtvec p;
22459 int i;
22460 rtx regs[3];
22462 i = 0;
22463 regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
22464 if (hard_frame_needed)
22465 regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
22466 if (!(REGNO (fp) == STACK_POINTER_REGNUM
22467 || (hard_frame_needed
22468 && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
22469 regs[i++] = fp;
22471 p = rtvec_alloc (i);
22472 while (--i >= 0)
22474 rtx mem = gen_frame_mem (BLKmode, regs[i]);
22475 RTVEC_ELT (p, i) = gen_rtx_SET (VOIDmode, mem, const0_rtx);
22478 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
22481 /* Emit the correct code for allocating stack space, as insns.
22482 If COPY_REG, make sure a copy of the old frame is left there.
22483 The generated code may use hard register 0 as a temporary. */
22485 static void
22486 rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
22488 rtx_insn *insn;
22489 rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
22490 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
22491 rtx todec = gen_int_mode (-size, Pmode);
22492 rtx par, set, mem;
22494 if (INTVAL (todec) != -size)
22496 warning (0, "stack frame too large");
22497 emit_insn (gen_trap ());
22498 return;
22501 if (crtl->limit_stack)
22503 if (REG_P (stack_limit_rtx)
22504 && REGNO (stack_limit_rtx) > 1
22505 && REGNO (stack_limit_rtx) <= 31)
22507 emit_insn (gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size)));
22508 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
22509 const0_rtx));
22511 else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF
22512 && TARGET_32BIT
22513 && DEFAULT_ABI == ABI_V4)
22515 rtx toload = gen_rtx_CONST (VOIDmode,
22516 gen_rtx_PLUS (Pmode,
22517 stack_limit_rtx,
22518 GEN_INT (size)));
22520 emit_insn (gen_elf_high (tmp_reg, toload));
22521 emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
22522 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
22523 const0_rtx));
22525 else
22526 warning (0, "stack limit expression is not supported");
22529 if (copy_reg)
22531 if (copy_off != 0)
22532 emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
22533 else
22534 emit_move_insn (copy_reg, stack_reg);
22537 if (size > 32767)
22539 /* Need a note here so that try_split doesn't get confused. */
22540 if (get_last_insn () == NULL_RTX)
22541 emit_note (NOTE_INSN_DELETED);
22542 insn = emit_move_insn (tmp_reg, todec);
22543 try_split (PATTERN (insn), insn, 0);
22544 todec = tmp_reg;
22547 insn = emit_insn (TARGET_32BIT
22548 ? gen_movsi_update_stack (stack_reg, stack_reg,
22549 todec, stack_reg)
22550 : gen_movdi_di_update_stack (stack_reg, stack_reg,
22551 todec, stack_reg));
22552 /* Since we didn't use gen_frame_mem to generate the MEM, grab
22553 it now and set the alias set/attributes. The above gen_*_update
22554 calls will generate a PARALLEL with the MEM set being the first
22555 operation. */
22556 par = PATTERN (insn);
22557 gcc_assert (GET_CODE (par) == PARALLEL);
22558 set = XVECEXP (par, 0, 0);
22559 gcc_assert (GET_CODE (set) == SET);
22560 mem = SET_DEST (set);
22561 gcc_assert (MEM_P (mem));
22562 MEM_NOTRAP_P (mem) = 1;
22563 set_mem_alias_set (mem, get_frame_alias_set ());
22565 RTX_FRAME_RELATED_P (insn) = 1;
22566 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
22567 gen_rtx_SET (VOIDmode, stack_reg,
22568 gen_rtx_PLUS (Pmode, stack_reg,
22569 GEN_INT (-size))));
22572 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
22574 #if PROBE_INTERVAL > 32768
22575 #error Cannot use indexed addressing mode for stack probing
22576 #endif
22578 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
22579 inclusive. These are offsets from the current stack pointer. */
22581 static void
22582 rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
22584 /* See if we have a constant small number of probes to generate. If so,
22585 that's the easy case. */
22586 if (first + size <= 32768)
22588 HOST_WIDE_INT i;
22590 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
22591 it exceeds SIZE. If only one probe is needed, this will not
22592 generate any code. Then probe at FIRST + SIZE. */
22593 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
22594 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
22595 -(first + i)));
22597 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
22598 -(first + size)));
22601 /* Otherwise, do the same as above, but in a loop. Note that we must be
22602 extra careful with variables wrapping around because we might be at
22603 the very top (or the very bottom) of the address space and we have
22604 to be able to handle this case properly; in particular, we use an
22605 equality test for the loop condition. */
22606 else
22608 HOST_WIDE_INT rounded_size;
22609 rtx r12 = gen_rtx_REG (Pmode, 12);
22610 rtx r0 = gen_rtx_REG (Pmode, 0);
22612 /* Sanity check for the addressing mode we're going to use. */
22613 gcc_assert (first <= 32768);
22615 /* Step 1: round SIZE to the previous multiple of the interval. */
22617 rounded_size = size & -PROBE_INTERVAL;
22620 /* Step 2: compute initial and final value of the loop counter. */
22622 /* TEST_ADDR = SP + FIRST. */
22623 emit_insn (gen_rtx_SET (VOIDmode, r12,
22624 plus_constant (Pmode, stack_pointer_rtx,
22625 -first)));
22627 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
22628 if (rounded_size > 32768)
22630 emit_move_insn (r0, GEN_INT (-rounded_size));
22631 emit_insn (gen_rtx_SET (VOIDmode, r0,
22632 gen_rtx_PLUS (Pmode, r12, r0)));
22634 else
22635 emit_insn (gen_rtx_SET (VOIDmode, r0,
22636 plus_constant (Pmode, r12, -rounded_size)));
22639 /* Step 3: the loop
22641 while (TEST_ADDR != LAST_ADDR)
22643 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
22644 probe at TEST_ADDR
22647 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
22648 until it is equal to ROUNDED_SIZE. */
22650 if (TARGET_64BIT)
22651 emit_insn (gen_probe_stack_rangedi (r12, r12, r0));
22652 else
22653 emit_insn (gen_probe_stack_rangesi (r12, r12, r0));
22656 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
22657 that SIZE is equal to ROUNDED_SIZE. */
22659 if (size != rounded_size)
22660 emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
22664 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
22665 absolute addresses. */
22667 const char *
22668 output_probe_stack_range (rtx reg1, rtx reg2)
22670 static int labelno = 0;
22671 char loop_lab[32], end_lab[32];
22672 rtx xops[2];
22674 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
22675 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
22677 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
22679 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
22680 xops[0] = reg1;
22681 xops[1] = reg2;
22682 if (TARGET_64BIT)
22683 output_asm_insn ("cmpd 0,%0,%1", xops);
22684 else
22685 output_asm_insn ("cmpw 0,%0,%1", xops);
22687 fputs ("\tbeq 0,", asm_out_file);
22688 assemble_name_raw (asm_out_file, end_lab);
22689 fputc ('\n', asm_out_file);
22691 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
22692 xops[1] = GEN_INT (-PROBE_INTERVAL);
22693 output_asm_insn ("addi %0,%0,%1", xops);
22695 /* Probe at TEST_ADDR and branch. */
22696 xops[1] = gen_rtx_REG (Pmode, 0);
22697 output_asm_insn ("stw %1,0(%0)", xops);
22698 fprintf (asm_out_file, "\tb ");
22699 assemble_name_raw (asm_out_file, loop_lab);
22700 fputc ('\n', asm_out_file);
22702 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
22704 return "";
22707 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
22708 with (plus:P (reg 1) VAL), and with REG2 replaced with RREG if REG2
22709 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
22710 deduce these equivalences by itself so it wasn't necessary to hold
22711 its hand so much. Don't be tempted to always supply d2_f_d_e with
22712 the actual cfa register, ie. r31 when we are using a hard frame
22713 pointer. That fails when saving regs off r1, and sched moves the
22714 r31 setup past the reg saves. */
22716 static rtx
22717 rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val,
22718 rtx reg2, rtx rreg, rtx split_reg)
22720 rtx real, temp;
22722 if (REGNO (reg) == STACK_POINTER_REGNUM && reg2 == NULL_RTX)
22724 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
22725 int i;
22727 gcc_checking_assert (val == 0);
22728 real = PATTERN (insn);
22729 if (GET_CODE (real) == PARALLEL)
22730 for (i = 0; i < XVECLEN (real, 0); i++)
22731 if (GET_CODE (XVECEXP (real, 0, i)) == SET)
22733 rtx set = XVECEXP (real, 0, i);
22735 RTX_FRAME_RELATED_P (set) = 1;
22737 RTX_FRAME_RELATED_P (insn) = 1;
22738 return insn;
22741 /* copy_rtx will not make unique copies of registers, so we need to
22742 ensure we don't have unwanted sharing here. */
22743 if (reg == reg2)
22744 reg = gen_raw_REG (GET_MODE (reg), REGNO (reg));
22746 if (reg == rreg)
22747 reg = gen_raw_REG (GET_MODE (reg), REGNO (reg));
22749 real = copy_rtx (PATTERN (insn));
22751 if (reg2 != NULL_RTX)
22752 real = replace_rtx (real, reg2, rreg);
22754 if (REGNO (reg) == STACK_POINTER_REGNUM)
22755 gcc_checking_assert (val == 0);
22756 else
22757 real = replace_rtx (real, reg,
22758 gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode,
22759 STACK_POINTER_REGNUM),
22760 GEN_INT (val)));
22762 /* We expect that 'real' is either a SET or a PARALLEL containing
22763 SETs (and possibly other stuff). In a PARALLEL, all the SETs
22764 are important so they all have to be marked RTX_FRAME_RELATED_P. */
22766 if (GET_CODE (real) == SET)
22768 rtx set = real;
22770 temp = simplify_rtx (SET_SRC (set));
22771 if (temp)
22772 SET_SRC (set) = temp;
22773 temp = simplify_rtx (SET_DEST (set));
22774 if (temp)
22775 SET_DEST (set) = temp;
22776 if (GET_CODE (SET_DEST (set)) == MEM)
22778 temp = simplify_rtx (XEXP (SET_DEST (set), 0));
22779 if (temp)
22780 XEXP (SET_DEST (set), 0) = temp;
22783 else
22785 int i;
22787 gcc_assert (GET_CODE (real) == PARALLEL);
22788 for (i = 0; i < XVECLEN (real, 0); i++)
22789 if (GET_CODE (XVECEXP (real, 0, i)) == SET)
22791 rtx set = XVECEXP (real, 0, i);
22793 temp = simplify_rtx (SET_SRC (set));
22794 if (temp)
22795 SET_SRC (set) = temp;
22796 temp = simplify_rtx (SET_DEST (set));
22797 if (temp)
22798 SET_DEST (set) = temp;
22799 if (GET_CODE (SET_DEST (set)) == MEM)
22801 temp = simplify_rtx (XEXP (SET_DEST (set), 0));
22802 if (temp)
22803 XEXP (SET_DEST (set), 0) = temp;
22805 RTX_FRAME_RELATED_P (set) = 1;
22809 /* If a store insn has been split into multiple insns, the
22810 true source register is given by split_reg. */
22811 if (split_reg != NULL_RTX)
22812 real = gen_rtx_SET (VOIDmode, SET_DEST (real), split_reg);
22814 RTX_FRAME_RELATED_P (insn) = 1;
22815 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
22817 return insn;
22820 /* Returns an insn that has a vrsave set operation with the
22821 appropriate CLOBBERs. */
22823 static rtx
22824 generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
22826 int nclobs, i;
22827 rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
22828 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
22830 clobs[0]
22831 = gen_rtx_SET (VOIDmode,
22832 vrsave,
22833 gen_rtx_UNSPEC_VOLATILE (SImode,
22834 gen_rtvec (2, reg, vrsave),
22835 UNSPECV_SET_VRSAVE));
22837 nclobs = 1;
22839 /* We need to clobber the registers in the mask so the scheduler
22840 does not move sets to VRSAVE before sets of AltiVec registers.
22842 However, if the function receives nonlocal gotos, reload will set
22843 all call saved registers live. We will end up with:
22845 (set (reg 999) (mem))
22846 (parallel [ (set (reg vrsave) (unspec blah))
22847 (clobber (reg 999))])
22849 The clobber will cause the store into reg 999 to be dead, and
22850 flow will attempt to delete an epilogue insn. In this case, we
22851 need an unspec use/set of the register. */
22853 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
22854 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
22856 if (!epiloguep || call_used_regs [i])
22857 clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode,
22858 gen_rtx_REG (V4SImode, i));
22859 else
22861 rtx reg = gen_rtx_REG (V4SImode, i);
22863 clobs[nclobs++]
22864 = gen_rtx_SET (VOIDmode,
22865 reg,
22866 gen_rtx_UNSPEC (V4SImode,
22867 gen_rtvec (1, reg), 27));
22871 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
22873 for (i = 0; i < nclobs; ++i)
22874 XVECEXP (insn, 0, i) = clobs[i];
22876 return insn;
22879 static rtx
22880 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
22882 rtx addr, mem;
22884 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
22885 mem = gen_frame_mem (GET_MODE (reg), addr);
22886 return gen_rtx_SET (VOIDmode, store ? mem : reg, store ? reg : mem);
22889 static rtx
22890 gen_frame_load (rtx reg, rtx frame_reg, int offset)
22892 return gen_frame_set (reg, frame_reg, offset, false);
22895 static rtx
22896 gen_frame_store (rtx reg, rtx frame_reg, int offset)
22898 return gen_frame_set (reg, frame_reg, offset, true);
22901 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
22902 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
22904 static rtx
22905 emit_frame_save (rtx frame_reg, machine_mode mode,
22906 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
22908 rtx reg, insn;
22910 /* Some cases that need register indexed addressing. */
22911 gcc_checking_assert (!((TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
22912 || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
22913 || (TARGET_E500_DOUBLE && mode == DFmode)
22914 || (TARGET_SPE_ABI
22915 && SPE_VECTOR_MODE (mode)
22916 && !SPE_CONST_OFFSET_OK (offset))));
22918 reg = gen_rtx_REG (mode, regno);
22919 insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
22920 return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
22921 NULL_RTX, NULL_RTX, NULL_RTX);
22924 /* Emit an offset memory reference suitable for a frame store, while
22925 converting to a valid addressing mode. */
22927 static rtx
22928 gen_frame_mem_offset (machine_mode mode, rtx reg, int offset)
22930 rtx int_rtx, offset_rtx;
22932 int_rtx = GEN_INT (offset);
22934 if ((TARGET_SPE_ABI && SPE_VECTOR_MODE (mode) && !SPE_CONST_OFFSET_OK (offset))
22935 || (TARGET_E500_DOUBLE && mode == DFmode))
22937 offset_rtx = gen_rtx_REG (Pmode, FIXED_SCRATCH);
22938 emit_move_insn (offset_rtx, int_rtx);
22940 else
22941 offset_rtx = int_rtx;
22943 return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, offset_rtx));
22946 #ifndef TARGET_FIX_AND_CONTINUE
22947 #define TARGET_FIX_AND_CONTINUE 0
22948 #endif
22950 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
22951 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
22952 #define LAST_SAVRES_REGISTER 31
22953 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
22955 enum {
22956 SAVRES_LR = 0x1,
22957 SAVRES_SAVE = 0x2,
22958 SAVRES_REG = 0x0c,
22959 SAVRES_GPR = 0,
22960 SAVRES_FPR = 4,
22961 SAVRES_VR = 8
22964 static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
22966 /* Temporary holding space for an out-of-line register save/restore
22967 routine name. */
22968 static char savres_routine_name[30];
22970 /* Return the name for an out-of-line register save/restore routine.
22971 We are saving/restoring GPRs if GPR is true. */
22973 static char *
22974 rs6000_savres_routine_name (rs6000_stack_t *info, int regno, int sel)
22976 const char *prefix = "";
22977 const char *suffix = "";
22979 /* Different targets are supposed to define
22980 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
22981 routine name could be defined with:
22983 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
22985 This is a nice idea in practice, but in reality, things are
22986 complicated in several ways:
22988 - ELF targets have save/restore routines for GPRs.
22990 - SPE targets use different prefixes for 32/64-bit registers, and
22991 neither of them fit neatly in the FOO_{PREFIX,SUFFIX} regimen.
22993 - PPC64 ELF targets have routines for save/restore of GPRs that
22994 differ in what they do with the link register, so having a set
22995 prefix doesn't work. (We only use one of the save routines at
22996 the moment, though.)
22998 - PPC32 elf targets have "exit" versions of the restore routines
22999 that restore the link register and can save some extra space.
23000 These require an extra suffix. (There are also "tail" versions
23001 of the restore routines and "GOT" versions of the save routines,
23002 but we don't generate those at present. Same problems apply,
23003 though.)
23005 We deal with all this by synthesizing our own prefix/suffix and
23006 using that for the simple sprintf call shown above. */
23007 if (TARGET_SPE)
23009 /* No floating point saves on the SPE. */
23010 gcc_assert ((sel & SAVRES_REG) == SAVRES_GPR);
23012 if ((sel & SAVRES_SAVE))
23013 prefix = info->spe_64bit_regs_used ? "_save64gpr_" : "_save32gpr_";
23014 else
23015 prefix = info->spe_64bit_regs_used ? "_rest64gpr_" : "_rest32gpr_";
23017 if ((sel & SAVRES_LR))
23018 suffix = "_x";
23020 else if (DEFAULT_ABI == ABI_V4)
23022 if (TARGET_64BIT)
23023 goto aix_names;
23025 if ((sel & SAVRES_REG) == SAVRES_GPR)
23026 prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
23027 else if ((sel & SAVRES_REG) == SAVRES_FPR)
23028 prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
23029 else if ((sel & SAVRES_REG) == SAVRES_VR)
23030 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
23031 else
23032 abort ();
23034 if ((sel & SAVRES_LR))
23035 suffix = "_x";
23037 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
23039 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
23040 /* No out-of-line save/restore routines for GPRs on AIX. */
23041 gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
23042 #endif
23044 aix_names:
23045 if ((sel & SAVRES_REG) == SAVRES_GPR)
23046 prefix = ((sel & SAVRES_SAVE)
23047 ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
23048 : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
23049 else if ((sel & SAVRES_REG) == SAVRES_FPR)
23051 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
23052 if ((sel & SAVRES_LR))
23053 prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
23054 else
23055 #endif
23057 prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
23058 suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
23061 else if ((sel & SAVRES_REG) == SAVRES_VR)
23062 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
23063 else
23064 abort ();
23067 if (DEFAULT_ABI == ABI_DARWIN)
23069 /* The Darwin approach is (slightly) different, in order to be
23070 compatible with code generated by the system toolchain. There is a
23071 single symbol for the start of save sequence, and the code here
23072 embeds an offset into that code on the basis of the first register
23073 to be saved. */
23074 prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
23075 if ((sel & SAVRES_REG) == SAVRES_GPR)
23076 sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
23077 ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
23078 (regno - 13) * 4, prefix, regno);
23079 else if ((sel & SAVRES_REG) == SAVRES_FPR)
23080 sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
23081 (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
23082 else if ((sel & SAVRES_REG) == SAVRES_VR)
23083 sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
23084 (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
23085 else
23086 abort ();
23088 else
23089 sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
23091 return savres_routine_name;
23094 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
23095 We are saving/restoring GPRs if GPR is true. */
23097 static rtx
23098 rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
23100 int regno = ((sel & SAVRES_REG) == SAVRES_GPR
23101 ? info->first_gp_reg_save
23102 : (sel & SAVRES_REG) == SAVRES_FPR
23103 ? info->first_fp_reg_save - 32
23104 : (sel & SAVRES_REG) == SAVRES_VR
23105 ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
23106 : -1);
23107 rtx sym;
23108 int select = sel;
23110 /* On the SPE, we never have any FPRs, but we do have 32/64-bit
23111 versions of the gpr routines. */
23112 if (TARGET_SPE_ABI && (sel & SAVRES_REG) == SAVRES_GPR
23113 && info->spe_64bit_regs_used)
23114 select ^= SAVRES_FPR ^ SAVRES_GPR;
23116 /* Don't generate bogus routine names. */
23117 gcc_assert (FIRST_SAVRES_REGISTER <= regno
23118 && regno <= LAST_SAVRES_REGISTER
23119 && select >= 0 && select <= 12);
23121 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
23123 if (sym == NULL)
23125 char *name;
23127 name = rs6000_savres_routine_name (info, regno, sel);
23129 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
23130 = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
23131 SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
23134 return sym;
23137 /* Emit a sequence of insns, including a stack tie if needed, for
23138 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
23139 reset the stack pointer, but move the base of the frame into
23140 reg UPDT_REGNO for use by out-of-line register restore routines. */
23142 static rtx
23143 rs6000_emit_stack_reset (rs6000_stack_t *info,
23144 rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
23145 unsigned updt_regno)
23147 rtx updt_reg_rtx;
23149 /* This blockage is needed so that sched doesn't decide to move
23150 the sp change before the register restores. */
23151 if (DEFAULT_ABI == ABI_V4
23152 || (TARGET_SPE_ABI
23153 && info->spe_64bit_regs_used != 0
23154 && info->first_gp_reg_save != 32))
23155 rs6000_emit_stack_tie (frame_reg_rtx, frame_pointer_needed);
23157 /* If we are restoring registers out-of-line, we will be using the
23158 "exit" variants of the restore routines, which will reset the
23159 stack for us. But we do need to point updt_reg into the
23160 right place for those routines. */
23161 updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
23163 if (frame_off != 0)
23164 return emit_insn (gen_add3_insn (updt_reg_rtx,
23165 frame_reg_rtx, GEN_INT (frame_off)));
23166 else if (REGNO (frame_reg_rtx) != updt_regno)
23167 return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
23169 return NULL_RTX;
23172 /* Return the register number used as a pointer by out-of-line
23173 save/restore functions. */
23175 static inline unsigned
23176 ptr_regno_for_savres (int sel)
23178 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
23179 return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
23180 return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
23183 /* Construct a parallel rtx describing the effect of a call to an
23184 out-of-line register save/restore routine, and emit the insn
23185 or jump_insn as appropriate. */
23187 static rtx
23188 rs6000_emit_savres_rtx (rs6000_stack_t *info,
23189 rtx frame_reg_rtx, int save_area_offset, int lr_offset,
23190 machine_mode reg_mode, int sel)
23192 int i;
23193 int offset, start_reg, end_reg, n_regs, use_reg;
23194 int reg_size = GET_MODE_SIZE (reg_mode);
23195 rtx sym;
23196 rtvec p;
23197 rtx par, insn;
23199 offset = 0;
23200 start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
23201 ? info->first_gp_reg_save
23202 : (sel & SAVRES_REG) == SAVRES_FPR
23203 ? info->first_fp_reg_save
23204 : (sel & SAVRES_REG) == SAVRES_VR
23205 ? info->first_altivec_reg_save
23206 : -1);
23207 end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
23208 ? 32
23209 : (sel & SAVRES_REG) == SAVRES_FPR
23210 ? 64
23211 : (sel & SAVRES_REG) == SAVRES_VR
23212 ? LAST_ALTIVEC_REGNO + 1
23213 : -1);
23214 n_regs = end_reg - start_reg;
23215 p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
23216 + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
23217 + n_regs);
23219 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
23220 RTVEC_ELT (p, offset++) = ret_rtx;
23222 RTVEC_ELT (p, offset++)
23223 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
23225 sym = rs6000_savres_routine_sym (info, sel);
23226 RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
23228 use_reg = ptr_regno_for_savres (sel);
23229 if ((sel & SAVRES_REG) == SAVRES_VR)
23231 /* Vector regs are saved/restored using [reg+reg] addressing. */
23232 RTVEC_ELT (p, offset++)
23233 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg));
23234 RTVEC_ELT (p, offset++)
23235 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
23237 else
23238 RTVEC_ELT (p, offset++)
23239 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
23241 for (i = 0; i < end_reg - start_reg; i++)
23242 RTVEC_ELT (p, i + offset)
23243 = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
23244 frame_reg_rtx, save_area_offset + reg_size * i,
23245 (sel & SAVRES_SAVE) != 0);
23247 if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
23248 RTVEC_ELT (p, i + offset)
23249 = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
23251 par = gen_rtx_PARALLEL (VOIDmode, p);
23253 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
23255 insn = emit_jump_insn (par);
23256 JUMP_LABEL (insn) = ret_rtx;
23258 else
23259 insn = emit_insn (par);
23260 return insn;
23263 /* Emit code to store CR fields that need to be saved into REG. */
23265 static void
23266 rs6000_emit_move_from_cr (rtx reg)
23268 /* Only the ELFv2 ABI allows storing only selected fields. */
23269 if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
23271 int i, cr_reg[8], count = 0;
23273 /* Collect CR fields that must be saved. */
23274 for (i = 0; i < 8; i++)
23275 if (save_reg_p (CR0_REGNO + i))
23276 cr_reg[count++] = i;
23278 /* If it's just a single one, use mfcrf. */
23279 if (count == 1)
23281 rtvec p = rtvec_alloc (1);
23282 rtvec r = rtvec_alloc (2);
23283 RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
23284 RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
23285 RTVEC_ELT (p, 0)
23286 = gen_rtx_SET (VOIDmode, reg,
23287 gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
23289 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
23290 return;
23293 /* ??? It might be better to handle count == 2 / 3 cases here
23294 as well, using logical operations to combine the values. */
23297 emit_insn (gen_movesi_from_cr (reg));
23300 /* Determine whether the gp REG is really used. */
23302 static bool
23303 rs6000_reg_live_or_pic_offset_p (int reg)
23305 /* If the function calls eh_return, claim used all the registers that would
23306 be checked for liveness otherwise. This is required for the PIC offset
23307 register with -mminimal-toc on AIX, as it is advertised as "fixed" for
23308 register allocation purposes in this case. */
23310 return (((crtl->calls_eh_return || df_regs_ever_live_p (reg))
23311 && (!call_used_regs[reg]
23312 || (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
23313 && !TARGET_SINGLE_PIC_BASE
23314 && TARGET_TOC && TARGET_MINIMAL_TOC)))
23315 || (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
23316 && !TARGET_SINGLE_PIC_BASE
23317 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
23318 || (DEFAULT_ABI == ABI_DARWIN && flag_pic))));
23321 /* Emit function prologue as insns. */
23323 void
23324 rs6000_emit_prologue (void)
23326 rs6000_stack_t *info = rs6000_stack_info ();
23327 machine_mode reg_mode = Pmode;
23328 int reg_size = TARGET_32BIT ? 4 : 8;
23329 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
23330 rtx frame_reg_rtx = sp_reg_rtx;
23331 unsigned int cr_save_regno;
23332 rtx cr_save_rtx = NULL_RTX;
23333 rtx insn;
23334 int strategy;
23335 int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
23336 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
23337 && call_used_regs[STATIC_CHAIN_REGNUM]);
23338 /* Offset to top of frame for frame_reg and sp respectively. */
23339 HOST_WIDE_INT frame_off = 0;
23340 HOST_WIDE_INT sp_off = 0;
23342 #ifdef ENABLE_CHECKING
23343 /* Track and check usage of r0, r11, r12. */
23344 int reg_inuse = using_static_chain_p ? 1 << 11 : 0;
23345 #define START_USE(R) do \
23347 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
23348 reg_inuse |= 1 << (R); \
23349 } while (0)
23350 #define END_USE(R) do \
23352 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
23353 reg_inuse &= ~(1 << (R)); \
23354 } while (0)
23355 #define NOT_INUSE(R) do \
23357 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
23358 } while (0)
23359 #else
23360 #define START_USE(R) do {} while (0)
23361 #define END_USE(R) do {} while (0)
23362 #define NOT_INUSE(R) do {} while (0)
23363 #endif
23365 if (DEFAULT_ABI == ABI_ELFv2)
23367 cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
23369 /* With -mminimal-toc we may generate an extra use of r2 below. */
23370 if (!TARGET_SINGLE_PIC_BASE
23371 && TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
23372 cfun->machine->r2_setup_needed = true;
23376 if (flag_stack_usage_info)
23377 current_function_static_stack_size = info->total_size;
23379 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
23381 HOST_WIDE_INT size = info->total_size;
23383 if (crtl->is_leaf && !cfun->calls_alloca)
23385 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
23386 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT,
23387 size - STACK_CHECK_PROTECT);
23389 else if (size > 0)
23390 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
23393 if (TARGET_FIX_AND_CONTINUE)
23395 /* gdb on darwin arranges to forward a function from the old
23396 address by modifying the first 5 instructions of the function
23397 to branch to the overriding function. This is necessary to
23398 permit function pointers that point to the old function to
23399 actually forward to the new function. */
23400 emit_insn (gen_nop ());
23401 emit_insn (gen_nop ());
23402 emit_insn (gen_nop ());
23403 emit_insn (gen_nop ());
23404 emit_insn (gen_nop ());
23407 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
23409 reg_mode = V2SImode;
23410 reg_size = 8;
23413 /* Handle world saves specially here. */
23414 if (WORLD_SAVE_P (info))
23416 int i, j, sz;
23417 rtx treg;
23418 rtvec p;
23419 rtx reg0;
23421 /* save_world expects lr in r0. */
23422 reg0 = gen_rtx_REG (Pmode, 0);
23423 if (info->lr_save_p)
23425 insn = emit_move_insn (reg0,
23426 gen_rtx_REG (Pmode, LR_REGNO));
23427 RTX_FRAME_RELATED_P (insn) = 1;
23430 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
23431 assumptions about the offsets of various bits of the stack
23432 frame. */
23433 gcc_assert (info->gp_save_offset == -220
23434 && info->fp_save_offset == -144
23435 && info->lr_save_offset == 8
23436 && info->cr_save_offset == 4
23437 && info->push_p
23438 && info->lr_save_p
23439 && (!crtl->calls_eh_return
23440 || info->ehrd_offset == -432)
23441 && info->vrsave_save_offset == -224
23442 && info->altivec_save_offset == -416);
23444 treg = gen_rtx_REG (SImode, 11);
23445 emit_move_insn (treg, GEN_INT (-info->total_size));
23447 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
23448 in R11. It also clobbers R12, so beware! */
23450 /* Preserve CR2 for save_world prologues */
23451 sz = 5;
23452 sz += 32 - info->first_gp_reg_save;
23453 sz += 64 - info->first_fp_reg_save;
23454 sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
23455 p = rtvec_alloc (sz);
23456 j = 0;
23457 RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
23458 gen_rtx_REG (SImode,
23459 LR_REGNO));
23460 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
23461 gen_rtx_SYMBOL_REF (Pmode,
23462 "*save_world"));
23463 /* We do floats first so that the instruction pattern matches
23464 properly. */
23465 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
23466 RTVEC_ELT (p, j++)
23467 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
23468 ? DFmode : SFmode,
23469 info->first_fp_reg_save + i),
23470 frame_reg_rtx,
23471 info->fp_save_offset + frame_off + 8 * i);
23472 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
23473 RTVEC_ELT (p, j++)
23474 = gen_frame_store (gen_rtx_REG (V4SImode,
23475 info->first_altivec_reg_save + i),
23476 frame_reg_rtx,
23477 info->altivec_save_offset + frame_off + 16 * i);
23478 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23479 RTVEC_ELT (p, j++)
23480 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
23481 frame_reg_rtx,
23482 info->gp_save_offset + frame_off + reg_size * i);
23484 /* CR register traditionally saved as CR2. */
23485 RTVEC_ELT (p, j++)
23486 = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO),
23487 frame_reg_rtx, info->cr_save_offset + frame_off);
23488 /* Explain about use of R0. */
23489 if (info->lr_save_p)
23490 RTVEC_ELT (p, j++)
23491 = gen_frame_store (reg0,
23492 frame_reg_rtx, info->lr_save_offset + frame_off);
23493 /* Explain what happens to the stack pointer. */
23495 rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
23496 RTVEC_ELT (p, j++) = gen_rtx_SET (VOIDmode, sp_reg_rtx, newval);
23499 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
23500 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
23501 treg, GEN_INT (-info->total_size), NULL_RTX);
23502 sp_off = frame_off = info->total_size;
23505 strategy = info->savres_strategy;
23507 /* For V.4, update stack before we do any saving and set back pointer. */
23508 if (! WORLD_SAVE_P (info)
23509 && info->push_p
23510 && (DEFAULT_ABI == ABI_V4
23511 || crtl->calls_eh_return))
23513 bool need_r11 = (TARGET_SPE
23514 ? (!(strategy & SAVE_INLINE_GPRS)
23515 && info->spe_64bit_regs_used == 0)
23516 : (!(strategy & SAVE_INLINE_FPRS)
23517 || !(strategy & SAVE_INLINE_GPRS)
23518 || !(strategy & SAVE_INLINE_VRS)));
23519 int ptr_regno = -1;
23520 rtx ptr_reg = NULL_RTX;
23521 int ptr_off = 0;
23523 if (info->total_size < 32767)
23524 frame_off = info->total_size;
23525 else if (need_r11)
23526 ptr_regno = 11;
23527 else if (info->cr_save_p
23528 || info->lr_save_p
23529 || info->first_fp_reg_save < 64
23530 || info->first_gp_reg_save < 32
23531 || info->altivec_size != 0
23532 || info->vrsave_mask != 0
23533 || crtl->calls_eh_return)
23534 ptr_regno = 12;
23535 else
23537 /* The prologue won't be saving any regs so there is no need
23538 to set up a frame register to access any frame save area.
23539 We also won't be using frame_off anywhere below, but set
23540 the correct value anyway to protect against future
23541 changes to this function. */
23542 frame_off = info->total_size;
23544 if (ptr_regno != -1)
23546 /* Set up the frame offset to that needed by the first
23547 out-of-line save function. */
23548 START_USE (ptr_regno);
23549 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
23550 frame_reg_rtx = ptr_reg;
23551 if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0)
23552 gcc_checking_assert (info->fp_save_offset + info->fp_size == 0);
23553 else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32)
23554 ptr_off = info->gp_save_offset + info->gp_size;
23555 else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0)
23556 ptr_off = info->altivec_save_offset + info->altivec_size;
23557 frame_off = -ptr_off;
23559 rs6000_emit_allocate_stack (info->total_size, ptr_reg, ptr_off);
23560 sp_off = info->total_size;
23561 if (frame_reg_rtx != sp_reg_rtx)
23562 rs6000_emit_stack_tie (frame_reg_rtx, false);
23565 /* If we use the link register, get it into r0. */
23566 if (!WORLD_SAVE_P (info) && info->lr_save_p)
23568 rtx addr, reg, mem;
23570 reg = gen_rtx_REG (Pmode, 0);
23571 START_USE (0);
23572 insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
23573 RTX_FRAME_RELATED_P (insn) = 1;
23575 if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
23576 | SAVE_NOINLINE_FPRS_SAVES_LR)))
23578 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
23579 GEN_INT (info->lr_save_offset + frame_off));
23580 mem = gen_rtx_MEM (Pmode, addr);
23581 /* This should not be of rs6000_sr_alias_set, because of
23582 __builtin_return_address. */
23584 insn = emit_move_insn (mem, reg);
23585 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
23586 NULL_RTX, NULL_RTX, NULL_RTX);
23587 END_USE (0);
23591 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
23592 r12 will be needed by out-of-line gpr restore. */
23593 cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
23594 && !(strategy & (SAVE_INLINE_GPRS
23595 | SAVE_NOINLINE_GPRS_SAVES_LR))
23596 ? 11 : 12);
23597 if (!WORLD_SAVE_P (info)
23598 && info->cr_save_p
23599 && REGNO (frame_reg_rtx) != cr_save_regno
23600 && !(using_static_chain_p && cr_save_regno == 11))
23602 cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
23603 START_USE (cr_save_regno);
23604 rs6000_emit_move_from_cr (cr_save_rtx);
23607 /* Do any required saving of fpr's. If only one or two to save, do
23608 it ourselves. Otherwise, call function. */
23609 if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
23611 int i;
23612 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
23613 if (save_reg_p (info->first_fp_reg_save + i))
23614 emit_frame_save (frame_reg_rtx,
23615 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
23616 ? DFmode : SFmode),
23617 info->first_fp_reg_save + i,
23618 info->fp_save_offset + frame_off + 8 * i,
23619 sp_off - frame_off);
23621 else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
23623 bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
23624 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
23625 unsigned ptr_regno = ptr_regno_for_savres (sel);
23626 rtx ptr_reg = frame_reg_rtx;
23628 if (REGNO (frame_reg_rtx) == ptr_regno)
23629 gcc_checking_assert (frame_off == 0);
23630 else
23632 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
23633 NOT_INUSE (ptr_regno);
23634 emit_insn (gen_add3_insn (ptr_reg,
23635 frame_reg_rtx, GEN_INT (frame_off)));
23637 insn = rs6000_emit_savres_rtx (info, ptr_reg,
23638 info->fp_save_offset,
23639 info->lr_save_offset,
23640 DFmode, sel);
23641 rs6000_frame_related (insn, ptr_reg, sp_off,
23642 NULL_RTX, NULL_RTX, NULL_RTX);
23643 if (lr)
23644 END_USE (0);
23647 /* Save GPRs. This is done as a PARALLEL if we are using
23648 the store-multiple instructions. */
23649 if (!WORLD_SAVE_P (info)
23650 && TARGET_SPE_ABI
23651 && info->spe_64bit_regs_used != 0
23652 && info->first_gp_reg_save != 32)
23654 int i;
23655 rtx spe_save_area_ptr;
23656 HOST_WIDE_INT save_off;
23657 int ool_adjust = 0;
23659 /* Determine whether we can address all of the registers that need
23660 to be saved with an offset from frame_reg_rtx that fits in
23661 the small const field for SPE memory instructions. */
23662 int spe_regs_addressable
23663 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
23664 + reg_size * (32 - info->first_gp_reg_save - 1))
23665 && (strategy & SAVE_INLINE_GPRS));
23667 if (spe_regs_addressable)
23669 spe_save_area_ptr = frame_reg_rtx;
23670 save_off = frame_off;
23672 else
23674 /* Make r11 point to the start of the SPE save area. We need
23675 to be careful here if r11 is holding the static chain. If
23676 it is, then temporarily save it in r0. */
23677 HOST_WIDE_INT offset;
23679 if (!(strategy & SAVE_INLINE_GPRS))
23680 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
23681 offset = info->spe_gp_save_offset + frame_off - ool_adjust;
23682 spe_save_area_ptr = gen_rtx_REG (Pmode, 11);
23683 save_off = frame_off - offset;
23685 if (using_static_chain_p)
23687 rtx r0 = gen_rtx_REG (Pmode, 0);
23689 START_USE (0);
23690 gcc_assert (info->first_gp_reg_save > 11);
23692 emit_move_insn (r0, spe_save_area_ptr);
23694 else if (REGNO (frame_reg_rtx) != 11)
23695 START_USE (11);
23697 emit_insn (gen_addsi3 (spe_save_area_ptr,
23698 frame_reg_rtx, GEN_INT (offset)));
23699 if (!using_static_chain_p && REGNO (frame_reg_rtx) == 11)
23700 frame_off = -info->spe_gp_save_offset + ool_adjust;
23703 if ((strategy & SAVE_INLINE_GPRS))
23705 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23706 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
23707 emit_frame_save (spe_save_area_ptr, reg_mode,
23708 info->first_gp_reg_save + i,
23709 (info->spe_gp_save_offset + save_off
23710 + reg_size * i),
23711 sp_off - save_off);
23713 else
23715 insn = rs6000_emit_savres_rtx (info, spe_save_area_ptr,
23716 info->spe_gp_save_offset + save_off,
23717 0, reg_mode,
23718 SAVRES_SAVE | SAVRES_GPR);
23720 rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off,
23721 NULL_RTX, NULL_RTX, NULL_RTX);
23724 /* Move the static chain pointer back. */
23725 if (!spe_regs_addressable)
23727 if (using_static_chain_p)
23729 emit_move_insn (spe_save_area_ptr, gen_rtx_REG (Pmode, 0));
23730 END_USE (0);
23732 else if (REGNO (frame_reg_rtx) != 11)
23733 END_USE (11);
23736 else if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS))
23738 bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0;
23739 int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0);
23740 unsigned ptr_regno = ptr_regno_for_savres (sel);
23741 rtx ptr_reg = frame_reg_rtx;
23742 bool ptr_set_up = REGNO (ptr_reg) == ptr_regno;
23743 int end_save = info->gp_save_offset + info->gp_size;
23744 int ptr_off;
23746 if (!ptr_set_up)
23747 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
23749 /* Need to adjust r11 (r12) if we saved any FPRs. */
23750 if (end_save + frame_off != 0)
23752 rtx offset = GEN_INT (end_save + frame_off);
23754 if (ptr_set_up)
23755 frame_off = -end_save;
23756 else
23757 NOT_INUSE (ptr_regno);
23758 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
23760 else if (!ptr_set_up)
23762 NOT_INUSE (ptr_regno);
23763 emit_move_insn (ptr_reg, frame_reg_rtx);
23765 ptr_off = -end_save;
23766 insn = rs6000_emit_savres_rtx (info, ptr_reg,
23767 info->gp_save_offset + ptr_off,
23768 info->lr_save_offset + ptr_off,
23769 reg_mode, sel);
23770 rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
23771 NULL_RTX, NULL_RTX, NULL_RTX);
23772 if (lr)
23773 END_USE (0);
23775 else if (!WORLD_SAVE_P (info) && (strategy & SAVRES_MULTIPLE))
23777 rtvec p;
23778 int i;
23779 p = rtvec_alloc (32 - info->first_gp_reg_save);
23780 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23781 RTVEC_ELT (p, i)
23782 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
23783 frame_reg_rtx,
23784 info->gp_save_offset + frame_off + reg_size * i);
23785 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
23786 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
23787 NULL_RTX, NULL_RTX, NULL_RTX);
23789 else if (!WORLD_SAVE_P (info))
23791 int i;
23792 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23793 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
23794 emit_frame_save (frame_reg_rtx, reg_mode,
23795 info->first_gp_reg_save + i,
23796 info->gp_save_offset + frame_off + reg_size * i,
23797 sp_off - frame_off);
23800 if (crtl->calls_eh_return)
23802 unsigned int i;
23803 rtvec p;
23805 for (i = 0; ; ++i)
23807 unsigned int regno = EH_RETURN_DATA_REGNO (i);
23808 if (regno == INVALID_REGNUM)
23809 break;
23812 p = rtvec_alloc (i);
23814 for (i = 0; ; ++i)
23816 unsigned int regno = EH_RETURN_DATA_REGNO (i);
23817 if (regno == INVALID_REGNUM)
23818 break;
23820 insn
23821 = gen_frame_store (gen_rtx_REG (reg_mode, regno),
23822 sp_reg_rtx,
23823 info->ehrd_offset + sp_off + reg_size * (int) i);
23824 RTVEC_ELT (p, i) = insn;
23825 RTX_FRAME_RELATED_P (insn) = 1;
23828 insn = emit_insn (gen_blockage ());
23829 RTX_FRAME_RELATED_P (insn) = 1;
23830 add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p));
23833 /* In AIX ABI we need to make sure r2 is really saved. */
23834 if (TARGET_AIX && crtl->calls_eh_return)
23836 rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
23837 rtx save_insn, join_insn, note;
23838 long toc_restore_insn;
23840 tmp_reg = gen_rtx_REG (Pmode, 11);
23841 tmp_reg_si = gen_rtx_REG (SImode, 11);
23842 if (using_static_chain_p)
23844 START_USE (0);
23845 emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
23847 else
23848 START_USE (11);
23849 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
23850 /* Peek at instruction to which this function returns. If it's
23851 restoring r2, then we know we've already saved r2. We can't
23852 unconditionally save r2 because the value we have will already
23853 be updated if we arrived at this function via a plt call or
23854 toc adjusting stub. */
23855 emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
23856 toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
23857 + RS6000_TOC_SAVE_SLOT);
23858 hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
23859 emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
23860 compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
23861 validate_condition_mode (EQ, CCUNSmode);
23862 lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
23863 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
23864 gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
23865 toc_save_done = gen_label_rtx ();
23866 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
23867 gen_rtx_EQ (VOIDmode, compare_result,
23868 const0_rtx),
23869 gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
23870 pc_rtx);
23871 jump = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, jump));
23872 JUMP_LABEL (jump) = toc_save_done;
23873 LABEL_NUSES (toc_save_done) += 1;
23875 save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
23876 TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
23877 sp_off - frame_off);
23879 emit_label (toc_save_done);
23881 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
23882 have a CFG that has different saves along different paths.
23883 Move the note to a dummy blockage insn, which describes that
23884 R2 is unconditionally saved after the label. */
23885 /* ??? An alternate representation might be a special insn pattern
23886 containing both the branch and the store. That might let the
23887 code that minimizes the number of DW_CFA_advance opcodes better
23888 freedom in placing the annotations. */
23889 note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL);
23890 if (note)
23891 remove_note (save_insn, note);
23892 else
23893 note = alloc_reg_note (REG_FRAME_RELATED_EXPR,
23894 copy_rtx (PATTERN (save_insn)), NULL_RTX);
23895 RTX_FRAME_RELATED_P (save_insn) = 0;
23897 join_insn = emit_insn (gen_blockage ());
23898 REG_NOTES (join_insn) = note;
23899 RTX_FRAME_RELATED_P (join_insn) = 1;
23901 if (using_static_chain_p)
23903 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
23904 END_USE (0);
23906 else
23907 END_USE (11);
23910 /* Save CR if we use any that must be preserved. */
23911 if (!WORLD_SAVE_P (info) && info->cr_save_p)
23913 rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
23914 GEN_INT (info->cr_save_offset + frame_off));
23915 rtx mem = gen_frame_mem (SImode, addr);
23917 /* If we didn't copy cr before, do so now using r0. */
23918 if (cr_save_rtx == NULL_RTX)
23920 START_USE (0);
23921 cr_save_rtx = gen_rtx_REG (SImode, 0);
23922 rs6000_emit_move_from_cr (cr_save_rtx);
23925 /* Saving CR requires a two-instruction sequence: one instruction
23926 to move the CR to a general-purpose register, and a second
23927 instruction that stores the GPR to memory.
23929 We do not emit any DWARF CFI records for the first of these,
23930 because we cannot properly represent the fact that CR is saved in
23931 a register. One reason is that we cannot express that multiple
23932 CR fields are saved; another reason is that on 64-bit, the size
23933 of the CR register in DWARF (4 bytes) differs from the size of
23934 a general-purpose register.
23936 This means if any intervening instruction were to clobber one of
23937 the call-saved CR fields, we'd have incorrect CFI. To prevent
23938 this from happening, we mark the store to memory as a use of
23939 those CR fields, which prevents any such instruction from being
23940 scheduled in between the two instructions. */
23941 rtx crsave_v[9];
23942 int n_crsave = 0;
23943 int i;
23945 crsave_v[n_crsave++] = gen_rtx_SET (VOIDmode, mem, cr_save_rtx);
23946 for (i = 0; i < 8; i++)
23947 if (save_reg_p (CR0_REGNO + i))
23948 crsave_v[n_crsave++]
23949 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
23951 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
23952 gen_rtvec_v (n_crsave, crsave_v)));
23953 END_USE (REGNO (cr_save_rtx));
23955 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
23956 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
23957 so we need to construct a frame expression manually. */
23958 RTX_FRAME_RELATED_P (insn) = 1;
23960 /* Update address to be stack-pointer relative, like
23961 rs6000_frame_related would do. */
23962 addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
23963 GEN_INT (info->cr_save_offset + sp_off));
23964 mem = gen_frame_mem (SImode, addr);
23966 if (DEFAULT_ABI == ABI_ELFv2)
23968 /* In the ELFv2 ABI we generate separate CFI records for each
23969 CR field that was actually saved. They all point to the
23970 same 32-bit stack slot. */
23971 rtx crframe[8];
23972 int n_crframe = 0;
23974 for (i = 0; i < 8; i++)
23975 if (save_reg_p (CR0_REGNO + i))
23977 crframe[n_crframe]
23978 = gen_rtx_SET (VOIDmode, mem,
23979 gen_rtx_REG (SImode, CR0_REGNO + i));
23981 RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
23982 n_crframe++;
23985 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
23986 gen_rtx_PARALLEL (VOIDmode,
23987 gen_rtvec_v (n_crframe, crframe)));
23989 else
23991 /* In other ABIs, by convention, we use a single CR regnum to
23992 represent the fact that all call-saved CR fields are saved.
23993 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
23994 rtx set = gen_rtx_SET (VOIDmode, mem,
23995 gen_rtx_REG (SImode, CR2_REGNO));
23996 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
24000 /* In the ELFv2 ABI we need to save all call-saved CR fields into
24001 *separate* slots if the routine calls __builtin_eh_return, so
24002 that they can be independently restored by the unwinder. */
24003 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
24005 int i, cr_off = info->ehcr_offset;
24006 rtx crsave;
24008 /* ??? We might get better performance by using multiple mfocrf
24009 instructions. */
24010 crsave = gen_rtx_REG (SImode, 0);
24011 emit_insn (gen_movesi_from_cr (crsave));
24013 for (i = 0; i < 8; i++)
24014 if (!call_used_regs[CR0_REGNO + i])
24016 rtvec p = rtvec_alloc (2);
24017 RTVEC_ELT (p, 0)
24018 = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
24019 RTVEC_ELT (p, 1)
24020 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
24022 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
24024 RTX_FRAME_RELATED_P (insn) = 1;
24025 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
24026 gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
24027 sp_reg_rtx, cr_off + sp_off));
24029 cr_off += reg_size;
24033 /* Update stack and set back pointer unless this is V.4,
24034 for which it was done previously. */
24035 if (!WORLD_SAVE_P (info) && info->push_p
24036 && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
24038 rtx ptr_reg = NULL;
24039 int ptr_off = 0;
24041 /* If saving altivec regs we need to be able to address all save
24042 locations using a 16-bit offset. */
24043 if ((strategy & SAVE_INLINE_VRS) == 0
24044 || (info->altivec_size != 0
24045 && (info->altivec_save_offset + info->altivec_size - 16
24046 + info->total_size - frame_off) > 32767)
24047 || (info->vrsave_size != 0
24048 && (info->vrsave_save_offset
24049 + info->total_size - frame_off) > 32767))
24051 int sel = SAVRES_SAVE | SAVRES_VR;
24052 unsigned ptr_regno = ptr_regno_for_savres (sel);
24054 if (using_static_chain_p
24055 && ptr_regno == STATIC_CHAIN_REGNUM)
24056 ptr_regno = 12;
24057 if (REGNO (frame_reg_rtx) != ptr_regno)
24058 START_USE (ptr_regno);
24059 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
24060 frame_reg_rtx = ptr_reg;
24061 ptr_off = info->altivec_save_offset + info->altivec_size;
24062 frame_off = -ptr_off;
24064 else if (REGNO (frame_reg_rtx) == 1)
24065 frame_off = info->total_size;
24066 rs6000_emit_allocate_stack (info->total_size, ptr_reg, ptr_off);
24067 sp_off = info->total_size;
24068 if (frame_reg_rtx != sp_reg_rtx)
24069 rs6000_emit_stack_tie (frame_reg_rtx, false);
24072 /* Set frame pointer, if needed. */
24073 if (frame_pointer_needed)
24075 insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
24076 sp_reg_rtx);
24077 RTX_FRAME_RELATED_P (insn) = 1;
24080 /* Save AltiVec registers if needed. Save here because the red zone does
24081 not always include AltiVec registers. */
24082 if (!WORLD_SAVE_P (info) && TARGET_ALTIVEC_ABI
24083 && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0)
24085 int end_save = info->altivec_save_offset + info->altivec_size;
24086 int ptr_off;
24087 /* Oddly, the vector save/restore functions point r0 at the end
24088 of the save area, then use r11 or r12 to load offsets for
24089 [reg+reg] addressing. */
24090 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
24091 int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR);
24092 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
24094 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
24095 NOT_INUSE (0);
24096 if (end_save + frame_off != 0)
24098 rtx offset = GEN_INT (end_save + frame_off);
24100 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
24102 else
24103 emit_move_insn (ptr_reg, frame_reg_rtx);
24105 ptr_off = -end_save;
24106 insn = rs6000_emit_savres_rtx (info, scratch_reg,
24107 info->altivec_save_offset + ptr_off,
24108 0, V4SImode, SAVRES_SAVE | SAVRES_VR);
24109 rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
24110 NULL_RTX, NULL_RTX, NULL_RTX);
24111 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
24113 /* The oddity mentioned above clobbered our frame reg. */
24114 emit_move_insn (frame_reg_rtx, ptr_reg);
24115 frame_off = ptr_off;
24118 else if (!WORLD_SAVE_P (info) && TARGET_ALTIVEC_ABI
24119 && info->altivec_size != 0)
24121 int i;
24123 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24124 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
24126 rtx areg, savereg, mem, split_reg;
24127 int offset;
24129 offset = (info->altivec_save_offset + frame_off
24130 + 16 * (i - info->first_altivec_reg_save));
24132 savereg = gen_rtx_REG (V4SImode, i);
24134 NOT_INUSE (0);
24135 areg = gen_rtx_REG (Pmode, 0);
24136 emit_move_insn (areg, GEN_INT (offset));
24138 /* AltiVec addressing mode is [reg+reg]. */
24139 mem = gen_frame_mem (V4SImode,
24140 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
24142 insn = emit_move_insn (mem, savereg);
24144 /* When we split a VSX store into two insns, we need to make
24145 sure the DWARF info knows which register we are storing.
24146 Pass it in to be used on the appropriate note. */
24147 if (!BYTES_BIG_ENDIAN
24148 && GET_CODE (PATTERN (insn)) == SET
24149 && GET_CODE (SET_SRC (PATTERN (insn))) == VEC_SELECT)
24150 split_reg = savereg;
24151 else
24152 split_reg = NULL_RTX;
24154 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
24155 areg, GEN_INT (offset), split_reg);
24159 /* VRSAVE is a bit vector representing which AltiVec registers
24160 are used. The OS uses this to determine which vector
24161 registers to save on a context switch. We need to save
24162 VRSAVE on the stack frame, add whatever AltiVec registers we
24163 used in this function, and do the corresponding magic in the
24164 epilogue. */
24166 if (!WORLD_SAVE_P (info)
24167 && TARGET_ALTIVEC
24168 && TARGET_ALTIVEC_VRSAVE
24169 && info->vrsave_mask != 0)
24171 rtx reg, vrsave;
24172 int offset;
24173 int save_regno;
24175 /* Get VRSAVE onto a GPR. Note that ABI_V4 and ABI_DARWIN might
24176 be using r12 as frame_reg_rtx and r11 as the static chain
24177 pointer for nested functions. */
24178 save_regno = 12;
24179 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
24180 && !using_static_chain_p)
24181 save_regno = 11;
24182 else if (REGNO (frame_reg_rtx) == 12)
24184 save_regno = 11;
24185 if (using_static_chain_p)
24186 save_regno = 0;
24189 NOT_INUSE (save_regno);
24190 reg = gen_rtx_REG (SImode, save_regno);
24191 vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
24192 if (TARGET_MACHO)
24193 emit_insn (gen_get_vrsave_internal (reg));
24194 else
24195 emit_insn (gen_rtx_SET (VOIDmode, reg, vrsave));
24197 /* Save VRSAVE. */
24198 offset = info->vrsave_save_offset + frame_off;
24199 insn = emit_insn (gen_frame_store (reg, frame_reg_rtx, offset));
24201 /* Include the registers in the mask. */
24202 emit_insn (gen_iorsi3 (reg, reg, GEN_INT ((int) info->vrsave_mask)));
24204 insn = emit_insn (generate_set_vrsave (reg, info, 0));
24207 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
24208 if (!TARGET_SINGLE_PIC_BASE
24209 && ((TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
24210 || (DEFAULT_ABI == ABI_V4
24211 && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
24212 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
24214 /* If emit_load_toc_table will use the link register, we need to save
24215 it. We use R12 for this purpose because emit_load_toc_table
24216 can use register 0. This allows us to use a plain 'blr' to return
24217 from the procedure more often. */
24218 int save_LR_around_toc_setup = (TARGET_ELF
24219 && DEFAULT_ABI == ABI_V4
24220 && flag_pic
24221 && ! info->lr_save_p
24222 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0);
24223 if (save_LR_around_toc_setup)
24225 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
24226 rtx tmp = gen_rtx_REG (Pmode, 12);
24228 insn = emit_move_insn (tmp, lr);
24229 RTX_FRAME_RELATED_P (insn) = 1;
24231 rs6000_emit_load_toc_table (TRUE);
24233 insn = emit_move_insn (lr, tmp);
24234 add_reg_note (insn, REG_CFA_RESTORE, lr);
24235 RTX_FRAME_RELATED_P (insn) = 1;
24237 else
24238 rs6000_emit_load_toc_table (TRUE);
24241 #if TARGET_MACHO
24242 if (!TARGET_SINGLE_PIC_BASE
24243 && DEFAULT_ABI == ABI_DARWIN
24244 && flag_pic && crtl->uses_pic_offset_table)
24246 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
24247 rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
24249 /* Save and restore LR locally around this call (in R0). */
24250 if (!info->lr_save_p)
24251 emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
24253 emit_insn (gen_load_macho_picbase (src));
24255 emit_move_insn (gen_rtx_REG (Pmode,
24256 RS6000_PIC_OFFSET_TABLE_REGNUM),
24257 lr);
24259 if (!info->lr_save_p)
24260 emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
24262 #endif
24264 /* If we need to, save the TOC register after doing the stack setup.
24265 Do not emit eh frame info for this save. The unwinder wants info,
24266 conceptually attached to instructions in this function, about
24267 register values in the caller of this function. This R2 may have
24268 already been changed from the value in the caller.
24269 We don't attempt to write accurate DWARF EH frame info for R2
24270 because code emitted by gcc for a (non-pointer) function call
24271 doesn't save and restore R2. Instead, R2 is managed out-of-line
24272 by a linker generated plt call stub when the function resides in
24273 a shared library. This behaviour is costly to describe in DWARF,
24274 both in terms of the size of DWARF info and the time taken in the
24275 unwinder to interpret it. R2 changes, apart from the
24276 calls_eh_return case earlier in this function, are handled by
24277 linux-unwind.h frob_update_context. */
24278 if (rs6000_save_toc_in_prologue_p ())
24280 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
24281 emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
24285 /* Output .extern statements for the save/restore routines we use. */
24287 static void
24288 rs6000_output_savres_externs (FILE *file)
24290 rs6000_stack_t *info = rs6000_stack_info ();
24292 if (TARGET_DEBUG_STACK)
24293 debug_stack_info (info);
24295 /* Write .extern for any function we will call to save and restore
24296 fp values. */
24297 if (info->first_fp_reg_save < 64
24298 && !TARGET_MACHO
24299 && !TARGET_ELF)
24301 char *name;
24302 int regno = info->first_fp_reg_save - 32;
24304 if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
24306 bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
24307 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
24308 name = rs6000_savres_routine_name (info, regno, sel);
24309 fprintf (file, "\t.extern %s\n", name);
24311 if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
24313 bool lr = (info->savres_strategy
24314 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
24315 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
24316 name = rs6000_savres_routine_name (info, regno, sel);
24317 fprintf (file, "\t.extern %s\n", name);
24322 /* Write function prologue. */
24324 static void
24325 rs6000_output_function_prologue (FILE *file,
24326 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
24328 if (!cfun->is_thunk)
24329 rs6000_output_savres_externs (file);
24331 /* ELFv2 ABI r2 setup code and local entry point. This must follow
24332 immediately after the global entry point label. */
24333 if (DEFAULT_ABI == ABI_ELFv2 && cfun->machine->r2_setup_needed)
24335 const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
24337 fprintf (file, "0:\taddis 2,12,.TOC.-0b@ha\n");
24338 fprintf (file, "\taddi 2,2,.TOC.-0b@l\n");
24340 fputs ("\t.localentry\t", file);
24341 assemble_name (file, name);
24342 fputs (",.-", file);
24343 assemble_name (file, name);
24344 fputs ("\n", file);
24347 /* Output -mprofile-kernel code. This needs to be done here instead of
24348 in output_function_profile since it must go after the ELFv2 ABI
24349 local entry point. */
24350 if (TARGET_PROFILE_KERNEL && crtl->profile)
24352 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
24353 gcc_assert (!TARGET_32BIT);
24355 asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
24356 asm_fprintf (file, "\tstd %s,16(%s)\n", reg_names[0], reg_names[1]);
24358 /* In the ELFv2 ABI we have no compiler stack word. It must be
24359 the resposibility of _mcount to preserve the static chain
24360 register if required. */
24361 if (DEFAULT_ABI != ABI_ELFv2
24362 && cfun->static_chain_decl != NULL)
24364 asm_fprintf (file, "\tstd %s,24(%s)\n",
24365 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
24366 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
24367 asm_fprintf (file, "\tld %s,24(%s)\n",
24368 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
24370 else
24371 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
24374 rs6000_pic_labelno++;
24377 /* Non-zero if vmx regs are restored before the frame pop, zero if
24378 we restore after the pop when possible. */
24379 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
24381 /* Restoring cr is a two step process: loading a reg from the frame
24382 save, then moving the reg to cr. For ABI_V4 we must let the
24383 unwinder know that the stack location is no longer valid at or
24384 before the stack deallocation, but we can't emit a cfa_restore for
24385 cr at the stack deallocation like we do for other registers.
24386 The trouble is that it is possible for the move to cr to be
24387 scheduled after the stack deallocation. So say exactly where cr
24388 is located on each of the two insns. */
24390 static rtx
24391 load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
24393 rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
24394 rtx reg = gen_rtx_REG (SImode, regno);
24395 rtx_insn *insn = emit_move_insn (reg, mem);
24397 if (!exit_func && DEFAULT_ABI == ABI_V4)
24399 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
24400 rtx set = gen_rtx_SET (VOIDmode, reg, cr);
24402 add_reg_note (insn, REG_CFA_REGISTER, set);
24403 RTX_FRAME_RELATED_P (insn) = 1;
24405 return reg;
24408 /* Reload CR from REG. */
24410 static void
24411 restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
24413 int count = 0;
24414 int i;
24416 if (using_mfcr_multiple)
24418 for (i = 0; i < 8; i++)
24419 if (save_reg_p (CR0_REGNO + i))
24420 count++;
24421 gcc_assert (count);
24424 if (using_mfcr_multiple && count > 1)
24426 rtx_insn *insn;
24427 rtvec p;
24428 int ndx;
24430 p = rtvec_alloc (count);
24432 ndx = 0;
24433 for (i = 0; i < 8; i++)
24434 if (save_reg_p (CR0_REGNO + i))
24436 rtvec r = rtvec_alloc (2);
24437 RTVEC_ELT (r, 0) = reg;
24438 RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
24439 RTVEC_ELT (p, ndx) =
24440 gen_rtx_SET (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i),
24441 gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
24442 ndx++;
24444 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
24445 gcc_assert (ndx == count);
24447 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
24448 CR field separately. */
24449 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
24451 for (i = 0; i < 8; i++)
24452 if (save_reg_p (CR0_REGNO + i))
24453 add_reg_note (insn, REG_CFA_RESTORE,
24454 gen_rtx_REG (SImode, CR0_REGNO + i));
24456 RTX_FRAME_RELATED_P (insn) = 1;
24459 else
24460 for (i = 0; i < 8; i++)
24461 if (save_reg_p (CR0_REGNO + i))
24463 rtx insn = emit_insn (gen_movsi_to_cr_one
24464 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
24466 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
24467 CR field separately, attached to the insn that in fact
24468 restores this particular CR field. */
24469 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
24471 add_reg_note (insn, REG_CFA_RESTORE,
24472 gen_rtx_REG (SImode, CR0_REGNO + i));
24474 RTX_FRAME_RELATED_P (insn) = 1;
24478 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
24479 if (!exit_func && DEFAULT_ABI != ABI_ELFv2
24480 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
24482 rtx_insn *insn = get_last_insn ();
24483 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
24485 add_reg_note (insn, REG_CFA_RESTORE, cr);
24486 RTX_FRAME_RELATED_P (insn) = 1;
24490 /* Like cr, the move to lr instruction can be scheduled after the
24491 stack deallocation, but unlike cr, its stack frame save is still
24492 valid. So we only need to emit the cfa_restore on the correct
24493 instruction. */
24495 static void
24496 load_lr_save (int regno, rtx frame_reg_rtx, int offset)
24498 rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
24499 rtx reg = gen_rtx_REG (Pmode, regno);
24501 emit_move_insn (reg, mem);
24504 static void
24505 restore_saved_lr (int regno, bool exit_func)
24507 rtx reg = gen_rtx_REG (Pmode, regno);
24508 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
24509 rtx_insn *insn = emit_move_insn (lr, reg);
24511 if (!exit_func && flag_shrink_wrap)
24513 add_reg_note (insn, REG_CFA_RESTORE, lr);
24514 RTX_FRAME_RELATED_P (insn) = 1;
24518 static rtx
24519 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
24521 if (DEFAULT_ABI == ABI_ELFv2)
24523 int i;
24524 for (i = 0; i < 8; i++)
24525 if (save_reg_p (CR0_REGNO + i))
24527 rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
24528 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
24529 cfa_restores);
24532 else if (info->cr_save_p)
24533 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
24534 gen_rtx_REG (SImode, CR2_REGNO),
24535 cfa_restores);
24537 if (info->lr_save_p)
24538 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
24539 gen_rtx_REG (Pmode, LR_REGNO),
24540 cfa_restores);
24541 return cfa_restores;
24544 /* Return true if OFFSET from stack pointer can be clobbered by signals.
24545 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
24546 below stack pointer not cloberred by signals. */
24548 static inline bool
24549 offset_below_red_zone_p (HOST_WIDE_INT offset)
24551 return offset < (DEFAULT_ABI == ABI_V4
24553 : TARGET_32BIT ? -220 : -288);
24556 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
24558 static void
24559 emit_cfa_restores (rtx cfa_restores)
24561 rtx_insn *insn = get_last_insn ();
24562 rtx *loc = &REG_NOTES (insn);
24564 while (*loc)
24565 loc = &XEXP (*loc, 1);
24566 *loc = cfa_restores;
24567 RTX_FRAME_RELATED_P (insn) = 1;
24570 /* Emit function epilogue as insns. */
24572 void
24573 rs6000_emit_epilogue (int sibcall)
24575 rs6000_stack_t *info;
24576 int restoring_GPRs_inline;
24577 int restoring_FPRs_inline;
24578 int using_load_multiple;
24579 int using_mtcr_multiple;
24580 int use_backchain_to_restore_sp;
24581 int restore_lr;
24582 int strategy;
24583 HOST_WIDE_INT frame_off = 0;
24584 rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
24585 rtx frame_reg_rtx = sp_reg_rtx;
24586 rtx cfa_restores = NULL_RTX;
24587 rtx insn;
24588 rtx cr_save_reg = NULL_RTX;
24589 machine_mode reg_mode = Pmode;
24590 int reg_size = TARGET_32BIT ? 4 : 8;
24591 int i;
24592 bool exit_func;
24593 unsigned ptr_regno;
24595 info = rs6000_stack_info ();
24597 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
24599 reg_mode = V2SImode;
24600 reg_size = 8;
24603 strategy = info->savres_strategy;
24604 using_load_multiple = strategy & SAVRES_MULTIPLE;
24605 restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
24606 restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
24607 using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601
24608 || rs6000_cpu == PROCESSOR_PPC603
24609 || rs6000_cpu == PROCESSOR_PPC750
24610 || optimize_size);
24611 /* Restore via the backchain when we have a large frame, since this
24612 is more efficient than an addis, addi pair. The second condition
24613 here will not trigger at the moment; We don't actually need a
24614 frame pointer for alloca, but the generic parts of the compiler
24615 give us one anyway. */
24616 use_backchain_to_restore_sp = (info->total_size > 32767 - info->lr_save_offset
24617 || (cfun->calls_alloca
24618 && !frame_pointer_needed));
24619 restore_lr = (info->lr_save_p
24620 && (restoring_FPRs_inline
24621 || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
24622 && (restoring_GPRs_inline
24623 || info->first_fp_reg_save < 64));
24625 if (WORLD_SAVE_P (info))
24627 int i, j;
24628 char rname[30];
24629 const char *alloc_rname;
24630 rtvec p;
24632 /* eh_rest_world_r10 will return to the location saved in the LR
24633 stack slot (which is not likely to be our caller.)
24634 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
24635 rest_world is similar, except any R10 parameter is ignored.
24636 The exception-handling stuff that was here in 2.95 is no
24637 longer necessary. */
24639 p = rtvec_alloc (9
24641 + 32 - info->first_gp_reg_save
24642 + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
24643 + 63 + 1 - info->first_fp_reg_save);
24645 strcpy (rname, ((crtl->calls_eh_return) ?
24646 "*eh_rest_world_r10" : "*rest_world"));
24647 alloc_rname = ggc_strdup (rname);
24649 j = 0;
24650 RTVEC_ELT (p, j++) = ret_rtx;
24651 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
24652 gen_rtx_REG (Pmode,
24653 LR_REGNO));
24654 RTVEC_ELT (p, j++)
24655 = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
24656 /* The instruction pattern requires a clobber here;
24657 it is shared with the restVEC helper. */
24658 RTVEC_ELT (p, j++)
24659 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
24662 /* CR register traditionally saved as CR2. */
24663 rtx reg = gen_rtx_REG (SImode, CR2_REGNO);
24664 RTVEC_ELT (p, j++)
24665 = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset);
24666 if (flag_shrink_wrap)
24668 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
24669 gen_rtx_REG (Pmode, LR_REGNO),
24670 cfa_restores);
24671 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24675 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
24677 rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
24678 RTVEC_ELT (p, j++)
24679 = gen_frame_load (reg,
24680 frame_reg_rtx, info->gp_save_offset + reg_size * i);
24681 if (flag_shrink_wrap)
24682 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24684 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
24686 rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
24687 RTVEC_ELT (p, j++)
24688 = gen_frame_load (reg,
24689 frame_reg_rtx, info->altivec_save_offset + 16 * i);
24690 if (flag_shrink_wrap)
24691 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24693 for (i = 0; info->first_fp_reg_save + i <= 63; i++)
24695 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
24696 ? DFmode : SFmode),
24697 info->first_fp_reg_save + i);
24698 RTVEC_ELT (p, j++)
24699 = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i);
24700 if (flag_shrink_wrap)
24701 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24703 RTVEC_ELT (p, j++)
24704 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
24705 RTVEC_ELT (p, j++)
24706 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
24707 RTVEC_ELT (p, j++)
24708 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
24709 RTVEC_ELT (p, j++)
24710 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
24711 RTVEC_ELT (p, j++)
24712 = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
24713 insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
24715 if (flag_shrink_wrap)
24717 REG_NOTES (insn) = cfa_restores;
24718 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
24719 RTX_FRAME_RELATED_P (insn) = 1;
24721 return;
24724 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
24725 if (info->push_p)
24726 frame_off = info->total_size;
24728 /* Restore AltiVec registers if we must do so before adjusting the
24729 stack. */
24730 if (TARGET_ALTIVEC_ABI
24731 && info->altivec_size != 0
24732 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24733 || (DEFAULT_ABI != ABI_V4
24734 && offset_below_red_zone_p (info->altivec_save_offset))))
24736 int i;
24737 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
24739 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
24740 if (use_backchain_to_restore_sp)
24742 int frame_regno = 11;
24744 if ((strategy & REST_INLINE_VRS) == 0)
24746 /* Of r11 and r12, select the one not clobbered by an
24747 out-of-line restore function for the frame register. */
24748 frame_regno = 11 + 12 - scratch_regno;
24750 frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno);
24751 emit_move_insn (frame_reg_rtx,
24752 gen_rtx_MEM (Pmode, sp_reg_rtx));
24753 frame_off = 0;
24755 else if (frame_pointer_needed)
24756 frame_reg_rtx = hard_frame_pointer_rtx;
24758 if ((strategy & REST_INLINE_VRS) == 0)
24760 int end_save = info->altivec_save_offset + info->altivec_size;
24761 int ptr_off;
24762 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
24763 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
24765 if (end_save + frame_off != 0)
24767 rtx offset = GEN_INT (end_save + frame_off);
24769 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
24771 else
24772 emit_move_insn (ptr_reg, frame_reg_rtx);
24774 ptr_off = -end_save;
24775 insn = rs6000_emit_savres_rtx (info, scratch_reg,
24776 info->altivec_save_offset + ptr_off,
24777 0, V4SImode, SAVRES_VR);
24779 else
24781 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24782 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
24784 rtx addr, areg, mem, reg;
24786 areg = gen_rtx_REG (Pmode, 0);
24787 emit_move_insn
24788 (areg, GEN_INT (info->altivec_save_offset
24789 + frame_off
24790 + 16 * (i - info->first_altivec_reg_save)));
24792 /* AltiVec addressing mode is [reg+reg]. */
24793 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
24794 mem = gen_frame_mem (V4SImode, addr);
24796 reg = gen_rtx_REG (V4SImode, i);
24797 emit_move_insn (reg, mem);
24801 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24802 if (((strategy & REST_INLINE_VRS) == 0
24803 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
24804 && (flag_shrink_wrap
24805 || (offset_below_red_zone_p
24806 (info->altivec_save_offset
24807 + 16 * (i - info->first_altivec_reg_save)))))
24809 rtx reg = gen_rtx_REG (V4SImode, i);
24810 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24814 /* Restore VRSAVE if we must do so before adjusting the stack. */
24815 if (TARGET_ALTIVEC
24816 && TARGET_ALTIVEC_VRSAVE
24817 && info->vrsave_mask != 0
24818 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24819 || (DEFAULT_ABI != ABI_V4
24820 && offset_below_red_zone_p (info->vrsave_save_offset))))
24822 rtx reg;
24824 if (frame_reg_rtx == sp_reg_rtx)
24826 if (use_backchain_to_restore_sp)
24828 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
24829 emit_move_insn (frame_reg_rtx,
24830 gen_rtx_MEM (Pmode, sp_reg_rtx));
24831 frame_off = 0;
24833 else if (frame_pointer_needed)
24834 frame_reg_rtx = hard_frame_pointer_rtx;
24837 reg = gen_rtx_REG (SImode, 12);
24838 emit_insn (gen_frame_load (reg, frame_reg_rtx,
24839 info->vrsave_save_offset + frame_off));
24841 emit_insn (generate_set_vrsave (reg, info, 1));
24844 insn = NULL_RTX;
24845 /* If we have a large stack frame, restore the old stack pointer
24846 using the backchain. */
24847 if (use_backchain_to_restore_sp)
24849 if (frame_reg_rtx == sp_reg_rtx)
24851 /* Under V.4, don't reset the stack pointer until after we're done
24852 loading the saved registers. */
24853 if (DEFAULT_ABI == ABI_V4)
24854 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
24856 insn = emit_move_insn (frame_reg_rtx,
24857 gen_rtx_MEM (Pmode, sp_reg_rtx));
24858 frame_off = 0;
24860 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24861 && DEFAULT_ABI == ABI_V4)
24862 /* frame_reg_rtx has been set up by the altivec restore. */
24864 else
24866 insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
24867 frame_reg_rtx = sp_reg_rtx;
24870 /* If we have a frame pointer, we can restore the old stack pointer
24871 from it. */
24872 else if (frame_pointer_needed)
24874 frame_reg_rtx = sp_reg_rtx;
24875 if (DEFAULT_ABI == ABI_V4)
24876 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
24877 /* Prevent reordering memory accesses against stack pointer restore. */
24878 else if (cfun->calls_alloca
24879 || offset_below_red_zone_p (-info->total_size))
24880 rs6000_emit_stack_tie (frame_reg_rtx, true);
24882 insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
24883 GEN_INT (info->total_size)));
24884 frame_off = 0;
24886 else if (info->push_p
24887 && DEFAULT_ABI != ABI_V4
24888 && !crtl->calls_eh_return)
24890 /* Prevent reordering memory accesses against stack pointer restore. */
24891 if (cfun->calls_alloca
24892 || offset_below_red_zone_p (-info->total_size))
24893 rs6000_emit_stack_tie (frame_reg_rtx, false);
24894 insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
24895 GEN_INT (info->total_size)));
24896 frame_off = 0;
24898 if (insn && frame_reg_rtx == sp_reg_rtx)
24900 if (cfa_restores)
24902 REG_NOTES (insn) = cfa_restores;
24903 cfa_restores = NULL_RTX;
24905 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
24906 RTX_FRAME_RELATED_P (insn) = 1;
24909 /* Restore AltiVec registers if we have not done so already. */
24910 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24911 && TARGET_ALTIVEC_ABI
24912 && info->altivec_size != 0
24913 && (DEFAULT_ABI == ABI_V4
24914 || !offset_below_red_zone_p (info->altivec_save_offset)))
24916 int i;
24918 if ((strategy & REST_INLINE_VRS) == 0)
24920 int end_save = info->altivec_save_offset + info->altivec_size;
24921 int ptr_off;
24922 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
24923 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
24924 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
24926 if (end_save + frame_off != 0)
24928 rtx offset = GEN_INT (end_save + frame_off);
24930 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
24932 else
24933 emit_move_insn (ptr_reg, frame_reg_rtx);
24935 ptr_off = -end_save;
24936 insn = rs6000_emit_savres_rtx (info, scratch_reg,
24937 info->altivec_save_offset + ptr_off,
24938 0, V4SImode, SAVRES_VR);
24939 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
24941 /* Frame reg was clobbered by out-of-line save. Restore it
24942 from ptr_reg, and if we are calling out-of-line gpr or
24943 fpr restore set up the correct pointer and offset. */
24944 unsigned newptr_regno = 1;
24945 if (!restoring_GPRs_inline)
24947 bool lr = info->gp_save_offset + info->gp_size == 0;
24948 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
24949 newptr_regno = ptr_regno_for_savres (sel);
24950 end_save = info->gp_save_offset + info->gp_size;
24952 else if (!restoring_FPRs_inline)
24954 bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR);
24955 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
24956 newptr_regno = ptr_regno_for_savres (sel);
24957 end_save = info->gp_save_offset + info->gp_size;
24960 if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
24961 frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
24963 if (end_save + ptr_off != 0)
24965 rtx offset = GEN_INT (end_save + ptr_off);
24967 frame_off = -end_save;
24968 emit_insn (gen_add3_insn (frame_reg_rtx, ptr_reg, offset));
24970 else
24972 frame_off = ptr_off;
24973 emit_move_insn (frame_reg_rtx, ptr_reg);
24977 else
24979 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24980 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
24982 rtx addr, areg, mem, reg;
24984 areg = gen_rtx_REG (Pmode, 0);
24985 emit_move_insn
24986 (areg, GEN_INT (info->altivec_save_offset
24987 + frame_off
24988 + 16 * (i - info->first_altivec_reg_save)));
24990 /* AltiVec addressing mode is [reg+reg]. */
24991 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
24992 mem = gen_frame_mem (V4SImode, addr);
24994 reg = gen_rtx_REG (V4SImode, i);
24995 emit_move_insn (reg, mem);
24999 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
25000 if (((strategy & REST_INLINE_VRS) == 0
25001 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
25002 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
25004 rtx reg = gen_rtx_REG (V4SImode, i);
25005 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
25009 /* Restore VRSAVE if we have not done so already. */
25010 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
25011 && TARGET_ALTIVEC
25012 && TARGET_ALTIVEC_VRSAVE
25013 && info->vrsave_mask != 0
25014 && (DEFAULT_ABI == ABI_V4
25015 || !offset_below_red_zone_p (info->vrsave_save_offset)))
25017 rtx reg;
25019 reg = gen_rtx_REG (SImode, 12);
25020 emit_insn (gen_frame_load (reg, frame_reg_rtx,
25021 info->vrsave_save_offset + frame_off));
25023 emit_insn (generate_set_vrsave (reg, info, 1));
25026 /* If we exit by an out-of-line restore function on ABI_V4 then that
25027 function will deallocate the stack, so we don't need to worry
25028 about the unwinder restoring cr from an invalid stack frame
25029 location. */
25030 exit_func = (!restoring_FPRs_inline
25031 || (!restoring_GPRs_inline
25032 && info->first_fp_reg_save == 64));
25034 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
25035 *separate* slots if the routine calls __builtin_eh_return, so
25036 that they can be independently restored by the unwinder. */
25037 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
25039 int i, cr_off = info->ehcr_offset;
25041 for (i = 0; i < 8; i++)
25042 if (!call_used_regs[CR0_REGNO + i])
25044 rtx reg = gen_rtx_REG (SImode, 0);
25045 emit_insn (gen_frame_load (reg, frame_reg_rtx,
25046 cr_off + frame_off));
25048 insn = emit_insn (gen_movsi_to_cr_one
25049 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
25051 if (!exit_func && flag_shrink_wrap)
25053 add_reg_note (insn, REG_CFA_RESTORE,
25054 gen_rtx_REG (SImode, CR0_REGNO + i));
25056 RTX_FRAME_RELATED_P (insn) = 1;
25059 cr_off += reg_size;
25063 /* Get the old lr if we saved it. If we are restoring registers
25064 out-of-line, then the out-of-line routines can do this for us. */
25065 if (restore_lr && restoring_GPRs_inline)
25066 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
25068 /* Get the old cr if we saved it. */
25069 if (info->cr_save_p)
25071 unsigned cr_save_regno = 12;
25073 if (!restoring_GPRs_inline)
25075 /* Ensure we don't use the register used by the out-of-line
25076 gpr register restore below. */
25077 bool lr = info->gp_save_offset + info->gp_size == 0;
25078 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
25079 int gpr_ptr_regno = ptr_regno_for_savres (sel);
25081 if (gpr_ptr_regno == 12)
25082 cr_save_regno = 11;
25083 gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno);
25085 else if (REGNO (frame_reg_rtx) == 12)
25086 cr_save_regno = 11;
25088 cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx,
25089 info->cr_save_offset + frame_off,
25090 exit_func);
25093 /* Set LR here to try to overlap restores below. */
25094 if (restore_lr && restoring_GPRs_inline)
25095 restore_saved_lr (0, exit_func);
25097 /* Load exception handler data registers, if needed. */
25098 if (crtl->calls_eh_return)
25100 unsigned int i, regno;
25102 if (TARGET_AIX)
25104 rtx reg = gen_rtx_REG (reg_mode, 2);
25105 emit_insn (gen_frame_load (reg, frame_reg_rtx,
25106 frame_off + RS6000_TOC_SAVE_SLOT));
25109 for (i = 0; ; ++i)
25111 rtx mem;
25113 regno = EH_RETURN_DATA_REGNO (i);
25114 if (regno == INVALID_REGNUM)
25115 break;
25117 /* Note: possible use of r0 here to address SPE regs. */
25118 mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
25119 info->ehrd_offset + frame_off
25120 + reg_size * (int) i);
25122 emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
25126 /* Restore GPRs. This is done as a PARALLEL if we are using
25127 the load-multiple instructions. */
25128 if (TARGET_SPE_ABI
25129 && info->spe_64bit_regs_used
25130 && info->first_gp_reg_save != 32)
25132 /* Determine whether we can address all of the registers that need
25133 to be saved with an offset from frame_reg_rtx that fits in
25134 the small const field for SPE memory instructions. */
25135 int spe_regs_addressable
25136 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
25137 + reg_size * (32 - info->first_gp_reg_save - 1))
25138 && restoring_GPRs_inline);
25140 if (!spe_regs_addressable)
25142 int ool_adjust = 0;
25143 rtx old_frame_reg_rtx = frame_reg_rtx;
25144 /* Make r11 point to the start of the SPE save area. We worried about
25145 not clobbering it when we were saving registers in the prologue.
25146 There's no need to worry here because the static chain is passed
25147 anew to every function. */
25149 if (!restoring_GPRs_inline)
25150 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
25151 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
25152 emit_insn (gen_addsi3 (frame_reg_rtx, old_frame_reg_rtx,
25153 GEN_INT (info->spe_gp_save_offset
25154 + frame_off
25155 - ool_adjust)));
25156 /* Keep the invariant that frame_reg_rtx + frame_off points
25157 at the top of the stack frame. */
25158 frame_off = -info->spe_gp_save_offset + ool_adjust;
25161 if (restoring_GPRs_inline)
25163 HOST_WIDE_INT spe_offset = info->spe_gp_save_offset + frame_off;
25165 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25166 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
25168 rtx offset, addr, mem, reg;
25170 /* We're doing all this to ensure that the immediate offset
25171 fits into the immediate field of 'evldd'. */
25172 gcc_assert (SPE_CONST_OFFSET_OK (spe_offset + reg_size * i));
25174 offset = GEN_INT (spe_offset + reg_size * i);
25175 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, offset);
25176 mem = gen_rtx_MEM (V2SImode, addr);
25177 reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
25179 emit_move_insn (reg, mem);
25182 else
25183 rs6000_emit_savres_rtx (info, frame_reg_rtx,
25184 info->spe_gp_save_offset + frame_off,
25185 info->lr_save_offset + frame_off,
25186 reg_mode,
25187 SAVRES_GPR | SAVRES_LR);
25189 else if (!restoring_GPRs_inline)
25191 /* We are jumping to an out-of-line function. */
25192 rtx ptr_reg;
25193 int end_save = info->gp_save_offset + info->gp_size;
25194 bool can_use_exit = end_save == 0;
25195 int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0);
25196 int ptr_off;
25198 /* Emit stack reset code if we need it. */
25199 ptr_regno = ptr_regno_for_savres (sel);
25200 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
25201 if (can_use_exit)
25202 rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
25203 else if (end_save + frame_off != 0)
25204 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx,
25205 GEN_INT (end_save + frame_off)));
25206 else if (REGNO (frame_reg_rtx) != ptr_regno)
25207 emit_move_insn (ptr_reg, frame_reg_rtx);
25208 if (REGNO (frame_reg_rtx) == ptr_regno)
25209 frame_off = -end_save;
25211 if (can_use_exit && info->cr_save_p)
25212 restore_saved_cr (cr_save_reg, using_mtcr_multiple, true);
25214 ptr_off = -end_save;
25215 rs6000_emit_savres_rtx (info, ptr_reg,
25216 info->gp_save_offset + ptr_off,
25217 info->lr_save_offset + ptr_off,
25218 reg_mode, sel);
25220 else if (using_load_multiple)
25222 rtvec p;
25223 p = rtvec_alloc (32 - info->first_gp_reg_save);
25224 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25225 RTVEC_ELT (p, i)
25226 = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
25227 frame_reg_rtx,
25228 info->gp_save_offset + frame_off + reg_size * i);
25229 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
25231 else
25233 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25234 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
25235 emit_insn (gen_frame_load
25236 (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
25237 frame_reg_rtx,
25238 info->gp_save_offset + frame_off + reg_size * i));
25241 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
25243 /* If the frame pointer was used then we can't delay emitting
25244 a REG_CFA_DEF_CFA note. This must happen on the insn that
25245 restores the frame pointer, r31. We may have already emitted
25246 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
25247 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
25248 be harmless if emitted. */
25249 if (frame_pointer_needed)
25251 insn = get_last_insn ();
25252 add_reg_note (insn, REG_CFA_DEF_CFA,
25253 plus_constant (Pmode, frame_reg_rtx, frame_off));
25254 RTX_FRAME_RELATED_P (insn) = 1;
25257 /* Set up cfa_restores. We always need these when
25258 shrink-wrapping. If not shrink-wrapping then we only need
25259 the cfa_restore when the stack location is no longer valid.
25260 The cfa_restores must be emitted on or before the insn that
25261 invalidates the stack, and of course must not be emitted
25262 before the insn that actually does the restore. The latter
25263 is why it is a bad idea to emit the cfa_restores as a group
25264 on the last instruction here that actually does a restore:
25265 That insn may be reordered with respect to others doing
25266 restores. */
25267 if (flag_shrink_wrap
25268 && !restoring_GPRs_inline
25269 && info->first_fp_reg_save == 64)
25270 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
25272 for (i = info->first_gp_reg_save; i < 32; i++)
25273 if (!restoring_GPRs_inline
25274 || using_load_multiple
25275 || rs6000_reg_live_or_pic_offset_p (i))
25277 rtx reg = gen_rtx_REG (reg_mode, i);
25279 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
25283 if (!restoring_GPRs_inline
25284 && info->first_fp_reg_save == 64)
25286 /* We are jumping to an out-of-line function. */
25287 if (cfa_restores)
25288 emit_cfa_restores (cfa_restores);
25289 return;
25292 if (restore_lr && !restoring_GPRs_inline)
25294 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
25295 restore_saved_lr (0, exit_func);
25298 /* Restore fpr's if we need to do it without calling a function. */
25299 if (restoring_FPRs_inline)
25300 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
25301 if (save_reg_p (info->first_fp_reg_save + i))
25303 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
25304 ? DFmode : SFmode),
25305 info->first_fp_reg_save + i);
25306 emit_insn (gen_frame_load (reg, frame_reg_rtx,
25307 info->fp_save_offset + frame_off + 8 * i));
25308 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
25309 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
25312 /* If we saved cr, restore it here. Just those that were used. */
25313 if (info->cr_save_p)
25314 restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func);
25316 /* If this is V.4, unwind the stack pointer after all of the loads
25317 have been done, or set up r11 if we are restoring fp out of line. */
25318 ptr_regno = 1;
25319 if (!restoring_FPRs_inline)
25321 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
25322 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
25323 ptr_regno = ptr_regno_for_savres (sel);
25326 insn = rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
25327 if (REGNO (frame_reg_rtx) == ptr_regno)
25328 frame_off = 0;
25330 if (insn && restoring_FPRs_inline)
25332 if (cfa_restores)
25334 REG_NOTES (insn) = cfa_restores;
25335 cfa_restores = NULL_RTX;
25337 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
25338 RTX_FRAME_RELATED_P (insn) = 1;
25341 if (crtl->calls_eh_return)
25343 rtx sa = EH_RETURN_STACKADJ_RTX;
25344 emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
25347 if (!sibcall)
25349 rtvec p;
25350 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
25351 if (! restoring_FPRs_inline)
25353 p = rtvec_alloc (4 + 64 - info->first_fp_reg_save);
25354 RTVEC_ELT (p, 0) = ret_rtx;
25356 else
25358 if (cfa_restores)
25360 /* We can't hang the cfa_restores off a simple return,
25361 since the shrink-wrap code sometimes uses an existing
25362 return. This means there might be a path from
25363 pre-prologue code to this return, and dwarf2cfi code
25364 wants the eh_frame unwinder state to be the same on
25365 all paths to any point. So we need to emit the
25366 cfa_restores before the return. For -m64 we really
25367 don't need epilogue cfa_restores at all, except for
25368 this irritating dwarf2cfi with shrink-wrap
25369 requirement; The stack red-zone means eh_frame info
25370 from the prologue telling the unwinder to restore
25371 from the stack is perfectly good right to the end of
25372 the function. */
25373 emit_insn (gen_blockage ());
25374 emit_cfa_restores (cfa_restores);
25375 cfa_restores = NULL_RTX;
25377 p = rtvec_alloc (2);
25378 RTVEC_ELT (p, 0) = simple_return_rtx;
25381 RTVEC_ELT (p, 1) = ((restoring_FPRs_inline || !lr)
25382 ? gen_rtx_USE (VOIDmode,
25383 gen_rtx_REG (Pmode, LR_REGNO))
25384 : gen_rtx_CLOBBER (VOIDmode,
25385 gen_rtx_REG (Pmode, LR_REGNO)));
25387 /* If we have to restore more than two FP registers, branch to the
25388 restore function. It will return to our caller. */
25389 if (! restoring_FPRs_inline)
25391 int i;
25392 int reg;
25393 rtx sym;
25395 if (flag_shrink_wrap)
25396 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
25398 sym = rs6000_savres_routine_sym (info,
25399 SAVRES_FPR | (lr ? SAVRES_LR : 0));
25400 RTVEC_ELT (p, 2) = gen_rtx_USE (VOIDmode, sym);
25401 reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
25402 RTVEC_ELT (p, 3) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
25404 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
25406 rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
25408 RTVEC_ELT (p, i + 4)
25409 = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i);
25410 if (flag_shrink_wrap)
25411 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
25412 cfa_restores);
25416 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
25419 if (cfa_restores)
25421 if (sibcall)
25422 /* Ensure the cfa_restores are hung off an insn that won't
25423 be reordered above other restores. */
25424 emit_insn (gen_blockage ());
25426 emit_cfa_restores (cfa_restores);
25430 /* Write function epilogue. */
25432 static void
25433 rs6000_output_function_epilogue (FILE *file,
25434 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
25436 #if TARGET_MACHO
25437 macho_branch_islands ();
25438 /* Mach-O doesn't support labels at the end of objects, so if
25439 it looks like we might want one, insert a NOP. */
25441 rtx_insn *insn = get_last_insn ();
25442 rtx_insn *deleted_debug_label = NULL;
25443 while (insn
25444 && NOTE_P (insn)
25445 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
25447 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
25448 notes only, instead set their CODE_LABEL_NUMBER to -1,
25449 otherwise there would be code generation differences
25450 in between -g and -g0. */
25451 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
25452 deleted_debug_label = insn;
25453 insn = PREV_INSN (insn);
25455 if (insn
25456 && (LABEL_P (insn)
25457 || (NOTE_P (insn)
25458 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
25459 fputs ("\tnop\n", file);
25460 else if (deleted_debug_label)
25461 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
25462 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
25463 CODE_LABEL_NUMBER (insn) = -1;
25465 #endif
25467 /* Output a traceback table here. See /usr/include/sys/debug.h for info
25468 on its format.
25470 We don't output a traceback table if -finhibit-size-directive was
25471 used. The documentation for -finhibit-size-directive reads
25472 ``don't output a @code{.size} assembler directive, or anything
25473 else that would cause trouble if the function is split in the
25474 middle, and the two halves are placed at locations far apart in
25475 memory.'' The traceback table has this property, since it
25476 includes the offset from the start of the function to the
25477 traceback table itself.
25479 System V.4 Powerpc's (and the embedded ABI derived from it) use a
25480 different traceback table. */
25481 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
25482 && ! flag_inhibit_size_directive
25483 && rs6000_traceback != traceback_none && !cfun->is_thunk)
25485 const char *fname = NULL;
25486 const char *language_string = lang_hooks.name;
25487 int fixed_parms = 0, float_parms = 0, parm_info = 0;
25488 int i;
25489 int optional_tbtab;
25490 rs6000_stack_t *info = rs6000_stack_info ();
25492 if (rs6000_traceback == traceback_full)
25493 optional_tbtab = 1;
25494 else if (rs6000_traceback == traceback_part)
25495 optional_tbtab = 0;
25496 else
25497 optional_tbtab = !optimize_size && !TARGET_ELF;
25499 if (optional_tbtab)
25501 fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25502 while (*fname == '.') /* V.4 encodes . in the name */
25503 fname++;
25505 /* Need label immediately before tbtab, so we can compute
25506 its offset from the function start. */
25507 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
25508 ASM_OUTPUT_LABEL (file, fname);
25511 /* The .tbtab pseudo-op can only be used for the first eight
25512 expressions, since it can't handle the possibly variable
25513 length fields that follow. However, if you omit the optional
25514 fields, the assembler outputs zeros for all optional fields
25515 anyways, giving each variable length field is minimum length
25516 (as defined in sys/debug.h). Thus we can not use the .tbtab
25517 pseudo-op at all. */
25519 /* An all-zero word flags the start of the tbtab, for debuggers
25520 that have to find it by searching forward from the entry
25521 point or from the current pc. */
25522 fputs ("\t.long 0\n", file);
25524 /* Tbtab format type. Use format type 0. */
25525 fputs ("\t.byte 0,", file);
25527 /* Language type. Unfortunately, there does not seem to be any
25528 official way to discover the language being compiled, so we
25529 use language_string.
25530 C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9.
25531 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
25532 a number, so for now use 9. LTO, Go and JIT aren't assigned numbers
25533 either, so for now use 0. */
25534 if (lang_GNU_C ()
25535 || ! strcmp (language_string, "GNU GIMPLE")
25536 || ! strcmp (language_string, "GNU Go")
25537 || ! strcmp (language_string, "libgccjit"))
25538 i = 0;
25539 else if (! strcmp (language_string, "GNU F77")
25540 || lang_GNU_Fortran ())
25541 i = 1;
25542 else if (! strcmp (language_string, "GNU Pascal"))
25543 i = 2;
25544 else if (! strcmp (language_string, "GNU Ada"))
25545 i = 3;
25546 else if (lang_GNU_CXX ()
25547 || ! strcmp (language_string, "GNU Objective-C++"))
25548 i = 9;
25549 else if (! strcmp (language_string, "GNU Java"))
25550 i = 13;
25551 else if (! strcmp (language_string, "GNU Objective-C"))
25552 i = 14;
25553 else
25554 gcc_unreachable ();
25555 fprintf (file, "%d,", i);
25557 /* 8 single bit fields: global linkage (not set for C extern linkage,
25558 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
25559 from start of procedure stored in tbtab, internal function, function
25560 has controlled storage, function has no toc, function uses fp,
25561 function logs/aborts fp operations. */
25562 /* Assume that fp operations are used if any fp reg must be saved. */
25563 fprintf (file, "%d,",
25564 (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
25566 /* 6 bitfields: function is interrupt handler, name present in
25567 proc table, function calls alloca, on condition directives
25568 (controls stack walks, 3 bits), saves condition reg, saves
25569 link reg. */
25570 /* The `function calls alloca' bit seems to be set whenever reg 31 is
25571 set up as a frame pointer, even when there is no alloca call. */
25572 fprintf (file, "%d,",
25573 ((optional_tbtab << 6)
25574 | ((optional_tbtab & frame_pointer_needed) << 5)
25575 | (info->cr_save_p << 1)
25576 | (info->lr_save_p)));
25578 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
25579 (6 bits). */
25580 fprintf (file, "%d,",
25581 (info->push_p << 7) | (64 - info->first_fp_reg_save));
25583 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
25584 fprintf (file, "%d,", (32 - first_reg_to_save ()));
25586 if (optional_tbtab)
25588 /* Compute the parameter info from the function decl argument
25589 list. */
25590 tree decl;
25591 int next_parm_info_bit = 31;
25593 for (decl = DECL_ARGUMENTS (current_function_decl);
25594 decl; decl = DECL_CHAIN (decl))
25596 rtx parameter = DECL_INCOMING_RTL (decl);
25597 machine_mode mode = GET_MODE (parameter);
25599 if (GET_CODE (parameter) == REG)
25601 if (SCALAR_FLOAT_MODE_P (mode))
25603 int bits;
25605 float_parms++;
25607 switch (mode)
25609 case SFmode:
25610 case SDmode:
25611 bits = 0x2;
25612 break;
25614 case DFmode:
25615 case DDmode:
25616 case TFmode:
25617 case TDmode:
25618 bits = 0x3;
25619 break;
25621 default:
25622 gcc_unreachable ();
25625 /* If only one bit will fit, don't or in this entry. */
25626 if (next_parm_info_bit > 0)
25627 parm_info |= (bits << (next_parm_info_bit - 1));
25628 next_parm_info_bit -= 2;
25630 else
25632 fixed_parms += ((GET_MODE_SIZE (mode)
25633 + (UNITS_PER_WORD - 1))
25634 / UNITS_PER_WORD);
25635 next_parm_info_bit -= 1;
25641 /* Number of fixed point parameters. */
25642 /* This is actually the number of words of fixed point parameters; thus
25643 an 8 byte struct counts as 2; and thus the maximum value is 8. */
25644 fprintf (file, "%d,", fixed_parms);
25646 /* 2 bitfields: number of floating point parameters (7 bits), parameters
25647 all on stack. */
25648 /* This is actually the number of fp registers that hold parameters;
25649 and thus the maximum value is 13. */
25650 /* Set parameters on stack bit if parameters are not in their original
25651 registers, regardless of whether they are on the stack? Xlc
25652 seems to set the bit when not optimizing. */
25653 fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
25655 if (! optional_tbtab)
25656 return;
25658 /* Optional fields follow. Some are variable length. */
25660 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single float,
25661 11 double float. */
25662 /* There is an entry for each parameter in a register, in the order that
25663 they occur in the parameter list. Any intervening arguments on the
25664 stack are ignored. If the list overflows a long (max possible length
25665 34 bits) then completely leave off all elements that don't fit. */
25666 /* Only emit this long if there was at least one parameter. */
25667 if (fixed_parms || float_parms)
25668 fprintf (file, "\t.long %d\n", parm_info);
25670 /* Offset from start of code to tb table. */
25671 fputs ("\t.long ", file);
25672 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
25673 RS6000_OUTPUT_BASENAME (file, fname);
25674 putc ('-', file);
25675 rs6000_output_function_entry (file, fname);
25676 putc ('\n', file);
25678 /* Interrupt handler mask. */
25679 /* Omit this long, since we never set the interrupt handler bit
25680 above. */
25682 /* Number of CTL (controlled storage) anchors. */
25683 /* Omit this long, since the has_ctl bit is never set above. */
25685 /* Displacement into stack of each CTL anchor. */
25686 /* Omit this list of longs, because there are no CTL anchors. */
25688 /* Length of function name. */
25689 if (*fname == '*')
25690 ++fname;
25691 fprintf (file, "\t.short %d\n", (int) strlen (fname));
25693 /* Function name. */
25694 assemble_string (fname, strlen (fname));
25696 /* Register for alloca automatic storage; this is always reg 31.
25697 Only emit this if the alloca bit was set above. */
25698 if (frame_pointer_needed)
25699 fputs ("\t.byte 31\n", file);
25701 fputs ("\t.align 2\n", file);
25705 /* A C compound statement that outputs the assembler code for a thunk
25706 function, used to implement C++ virtual function calls with
25707 multiple inheritance. The thunk acts as a wrapper around a virtual
25708 function, adjusting the implicit object parameter before handing
25709 control off to the real function.
25711 First, emit code to add the integer DELTA to the location that
25712 contains the incoming first argument. Assume that this argument
25713 contains a pointer, and is the one used to pass the `this' pointer
25714 in C++. This is the incoming argument *before* the function
25715 prologue, e.g. `%o0' on a sparc. The addition must preserve the
25716 values of all other incoming arguments.
25718 After the addition, emit code to jump to FUNCTION, which is a
25719 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
25720 not touch the return address. Hence returning from FUNCTION will
25721 return to whoever called the current `thunk'.
25723 The effect must be as if FUNCTION had been called directly with the
25724 adjusted first argument. This macro is responsible for emitting
25725 all of the code for a thunk function; output_function_prologue()
25726 and output_function_epilogue() are not invoked.
25728 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
25729 been extracted from it.) It might possibly be useful on some
25730 targets, but probably not.
25732 If you do not define this macro, the target-independent code in the
25733 C++ frontend will generate a less efficient heavyweight thunk that
25734 calls FUNCTION instead of jumping to it. The generic approach does
25735 not support varargs. */
25737 static void
25738 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
25739 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
25740 tree function)
25742 rtx this_rtx, funexp;
25743 rtx_insn *insn;
25745 reload_completed = 1;
25746 epilogue_completed = 1;
25748 /* Mark the end of the (empty) prologue. */
25749 emit_note (NOTE_INSN_PROLOGUE_END);
25751 /* Find the "this" pointer. If the function returns a structure,
25752 the structure return pointer is in r3. */
25753 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
25754 this_rtx = gen_rtx_REG (Pmode, 4);
25755 else
25756 this_rtx = gen_rtx_REG (Pmode, 3);
25758 /* Apply the constant offset, if required. */
25759 if (delta)
25760 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
25762 /* Apply the offset from the vtable, if required. */
25763 if (vcall_offset)
25765 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
25766 rtx tmp = gen_rtx_REG (Pmode, 12);
25768 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
25769 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
25771 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
25772 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
25774 else
25776 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
25778 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
25780 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
25783 /* Generate a tail call to the target function. */
25784 if (!TREE_USED (function))
25786 assemble_external (function);
25787 TREE_USED (function) = 1;
25789 funexp = XEXP (DECL_RTL (function), 0);
25790 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
25792 #if TARGET_MACHO
25793 if (MACHOPIC_INDIRECT)
25794 funexp = machopic_indirect_call_target (funexp);
25795 #endif
25797 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
25798 generate sibcall RTL explicitly. */
25799 insn = emit_call_insn (
25800 gen_rtx_PARALLEL (VOIDmode,
25801 gen_rtvec (4,
25802 gen_rtx_CALL (VOIDmode,
25803 funexp, const0_rtx),
25804 gen_rtx_USE (VOIDmode, const0_rtx),
25805 gen_rtx_USE (VOIDmode,
25806 gen_rtx_REG (SImode,
25807 LR_REGNO)),
25808 simple_return_rtx)));
25809 SIBLING_CALL_P (insn) = 1;
25810 emit_barrier ();
25812 /* Ensure we have a global entry point for the thunk. ??? We could
25813 avoid that if the target routine doesn't need a global entry point,
25814 but we do not know whether this is the case at this point. */
25815 if (DEFAULT_ABI == ABI_ELFv2)
25816 cfun->machine->r2_setup_needed = true;
25818 /* Run just enough of rest_of_compilation to get the insns emitted.
25819 There's not really enough bulk here to make other passes such as
25820 instruction scheduling worth while. Note that use_thunk calls
25821 assemble_start_function and assemble_end_function. */
25822 insn = get_insns ();
25823 shorten_branches (insn);
25824 final_start_function (insn, file, 1);
25825 final (insn, file, 1);
25826 final_end_function ();
25828 reload_completed = 0;
25829 epilogue_completed = 0;
25832 /* A quick summary of the various types of 'constant-pool tables'
25833 under PowerPC:
25835 Target Flags Name One table per
25836 AIX (none) AIX TOC object file
25837 AIX -mfull-toc AIX TOC object file
25838 AIX -mminimal-toc AIX minimal TOC translation unit
25839 SVR4/EABI (none) SVR4 SDATA object file
25840 SVR4/EABI -fpic SVR4 pic object file
25841 SVR4/EABI -fPIC SVR4 PIC translation unit
25842 SVR4/EABI -mrelocatable EABI TOC function
25843 SVR4/EABI -maix AIX TOC object file
25844 SVR4/EABI -maix -mminimal-toc
25845 AIX minimal TOC translation unit
25847 Name Reg. Set by entries contains:
25848 made by addrs? fp? sum?
25850 AIX TOC 2 crt0 as Y option option
25851 AIX minimal TOC 30 prolog gcc Y Y option
25852 SVR4 SDATA 13 crt0 gcc N Y N
25853 SVR4 pic 30 prolog ld Y not yet N
25854 SVR4 PIC 30 prolog gcc Y option option
25855 EABI TOC 30 prolog gcc Y option option
25859 /* Hash functions for the hash table. */
25861 static unsigned
25862 rs6000_hash_constant (rtx k)
25864 enum rtx_code code = GET_CODE (k);
25865 machine_mode mode = GET_MODE (k);
25866 unsigned result = (code << 3) ^ mode;
25867 const char *format;
25868 int flen, fidx;
25870 format = GET_RTX_FORMAT (code);
25871 flen = strlen (format);
25872 fidx = 0;
25874 switch (code)
25876 case LABEL_REF:
25877 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
25879 case CONST_WIDE_INT:
25881 int i;
25882 flen = CONST_WIDE_INT_NUNITS (k);
25883 for (i = 0; i < flen; i++)
25884 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
25885 return result;
25888 case CONST_DOUBLE:
25889 if (mode != VOIDmode)
25890 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
25891 flen = 2;
25892 break;
25894 case CODE_LABEL:
25895 fidx = 3;
25896 break;
25898 default:
25899 break;
25902 for (; fidx < flen; fidx++)
25903 switch (format[fidx])
25905 case 's':
25907 unsigned i, len;
25908 const char *str = XSTR (k, fidx);
25909 len = strlen (str);
25910 result = result * 613 + len;
25911 for (i = 0; i < len; i++)
25912 result = result * 613 + (unsigned) str[i];
25913 break;
25915 case 'u':
25916 case 'e':
25917 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
25918 break;
25919 case 'i':
25920 case 'n':
25921 result = result * 613 + (unsigned) XINT (k, fidx);
25922 break;
25923 case 'w':
25924 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
25925 result = result * 613 + (unsigned) XWINT (k, fidx);
25926 else
25928 size_t i;
25929 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
25930 result = result * 613 + (unsigned) (XWINT (k, fidx)
25931 >> CHAR_BIT * i);
25933 break;
25934 case '0':
25935 break;
25936 default:
25937 gcc_unreachable ();
25940 return result;
25943 hashval_t
25944 toc_hasher::hash (toc_hash_struct *thc)
25946 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
25949 /* Compare H1 and H2 for equivalence. */
25951 bool
25952 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
25954 rtx r1 = h1->key;
25955 rtx r2 = h2->key;
25957 if (h1->key_mode != h2->key_mode)
25958 return 0;
25960 return rtx_equal_p (r1, r2);
25963 /* These are the names given by the C++ front-end to vtables, and
25964 vtable-like objects. Ideally, this logic should not be here;
25965 instead, there should be some programmatic way of inquiring as
25966 to whether or not an object is a vtable. */
25968 #define VTABLE_NAME_P(NAME) \
25969 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
25970 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
25971 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
25972 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
25973 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
25975 #ifdef NO_DOLLAR_IN_LABEL
25976 /* Return a GGC-allocated character string translating dollar signs in
25977 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
25979 const char *
25980 rs6000_xcoff_strip_dollar (const char *name)
25982 char *strip, *p;
25983 const char *q;
25984 size_t len;
25986 q = (const char *) strchr (name, '$');
25988 if (q == 0 || q == name)
25989 return name;
25991 len = strlen (name);
25992 strip = XALLOCAVEC (char, len + 1);
25993 strcpy (strip, name);
25994 p = strip + (q - name);
25995 while (p)
25997 *p = '_';
25998 p = strchr (p + 1, '$');
26001 return ggc_alloc_string (strip, len);
26003 #endif
26005 void
26006 rs6000_output_symbol_ref (FILE *file, rtx x)
26008 /* Currently C++ toc references to vtables can be emitted before it
26009 is decided whether the vtable is public or private. If this is
26010 the case, then the linker will eventually complain that there is
26011 a reference to an unknown section. Thus, for vtables only,
26012 we emit the TOC reference to reference the symbol and not the
26013 section. */
26014 const char *name = XSTR (x, 0);
26016 if (VTABLE_NAME_P (name))
26018 RS6000_OUTPUT_BASENAME (file, name);
26020 else
26021 assemble_name (file, name);
26024 /* Output a TOC entry. We derive the entry name from what is being
26025 written. */
26027 void
26028 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
26030 char buf[256];
26031 const char *name = buf;
26032 rtx base = x;
26033 HOST_WIDE_INT offset = 0;
26035 gcc_assert (!TARGET_NO_TOC);
26037 /* When the linker won't eliminate them, don't output duplicate
26038 TOC entries (this happens on AIX if there is any kind of TOC,
26039 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
26040 CODE_LABELs. */
26041 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
26043 struct toc_hash_struct *h;
26045 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
26046 time because GGC is not initialized at that point. */
26047 if (toc_hash_table == NULL)
26048 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
26050 h = ggc_alloc<toc_hash_struct> ();
26051 h->key = x;
26052 h->key_mode = mode;
26053 h->labelno = labelno;
26055 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
26056 if (*found == NULL)
26057 *found = h;
26058 else /* This is indeed a duplicate.
26059 Set this label equal to that label. */
26061 fputs ("\t.set ", file);
26062 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
26063 fprintf (file, "%d,", labelno);
26064 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
26065 fprintf (file, "%d\n", ((*found)->labelno));
26067 #ifdef HAVE_AS_TLS
26068 if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF
26069 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
26070 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
26072 fputs ("\t.set ", file);
26073 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
26074 fprintf (file, "%d,", labelno);
26075 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
26076 fprintf (file, "%d\n", ((*found)->labelno));
26078 #endif
26079 return;
26083 /* If we're going to put a double constant in the TOC, make sure it's
26084 aligned properly when strict alignment is on. */
26085 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
26086 && STRICT_ALIGNMENT
26087 && GET_MODE_BITSIZE (mode) >= 64
26088 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
26089 ASM_OUTPUT_ALIGN (file, 3);
26092 (*targetm.asm_out.internal_label) (file, "LC", labelno);
26094 /* Handle FP constants specially. Note that if we have a minimal
26095 TOC, things we put here aren't actually in the TOC, so we can allow
26096 FP constants. */
26097 if (GET_CODE (x) == CONST_DOUBLE &&
26098 (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode))
26100 REAL_VALUE_TYPE rv;
26101 long k[4];
26103 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
26104 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
26105 REAL_VALUE_TO_TARGET_DECIMAL128 (rv, k);
26106 else
26107 REAL_VALUE_TO_TARGET_LONG_DOUBLE (rv, k);
26109 if (TARGET_64BIT)
26111 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26112 fputs (DOUBLE_INT_ASM_OP, file);
26113 else
26114 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
26115 k[0] & 0xffffffff, k[1] & 0xffffffff,
26116 k[2] & 0xffffffff, k[3] & 0xffffffff);
26117 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
26118 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
26119 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
26120 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
26121 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
26122 return;
26124 else
26126 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26127 fputs ("\t.long ", file);
26128 else
26129 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
26130 k[0] & 0xffffffff, k[1] & 0xffffffff,
26131 k[2] & 0xffffffff, k[3] & 0xffffffff);
26132 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
26133 k[0] & 0xffffffff, k[1] & 0xffffffff,
26134 k[2] & 0xffffffff, k[3] & 0xffffffff);
26135 return;
26138 else if (GET_CODE (x) == CONST_DOUBLE &&
26139 (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
26141 REAL_VALUE_TYPE rv;
26142 long k[2];
26144 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
26146 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
26147 REAL_VALUE_TO_TARGET_DECIMAL64 (rv, k);
26148 else
26149 REAL_VALUE_TO_TARGET_DOUBLE (rv, k);
26151 if (TARGET_64BIT)
26153 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26154 fputs (DOUBLE_INT_ASM_OP, file);
26155 else
26156 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
26157 k[0] & 0xffffffff, k[1] & 0xffffffff);
26158 fprintf (file, "0x%lx%08lx\n",
26159 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
26160 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
26161 return;
26163 else
26165 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26166 fputs ("\t.long ", file);
26167 else
26168 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
26169 k[0] & 0xffffffff, k[1] & 0xffffffff);
26170 fprintf (file, "0x%lx,0x%lx\n",
26171 k[0] & 0xffffffff, k[1] & 0xffffffff);
26172 return;
26175 else if (GET_CODE (x) == CONST_DOUBLE &&
26176 (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
26178 REAL_VALUE_TYPE rv;
26179 long l;
26181 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
26182 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
26183 REAL_VALUE_TO_TARGET_DECIMAL32 (rv, l);
26184 else
26185 REAL_VALUE_TO_TARGET_SINGLE (rv, l);
26187 if (TARGET_64BIT)
26189 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26190 fputs (DOUBLE_INT_ASM_OP, file);
26191 else
26192 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
26193 if (WORDS_BIG_ENDIAN)
26194 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
26195 else
26196 fprintf (file, "0x%lx\n", l & 0xffffffff);
26197 return;
26199 else
26201 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26202 fputs ("\t.long ", file);
26203 else
26204 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
26205 fprintf (file, "0x%lx\n", l & 0xffffffff);
26206 return;
26209 else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT)
26211 unsigned HOST_WIDE_INT low;
26212 HOST_WIDE_INT high;
26214 low = INTVAL (x) & 0xffffffff;
26215 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
26217 /* TOC entries are always Pmode-sized, so when big-endian
26218 smaller integer constants in the TOC need to be padded.
26219 (This is still a win over putting the constants in
26220 a separate constant pool, because then we'd have
26221 to have both a TOC entry _and_ the actual constant.)
26223 For a 32-bit target, CONST_INT values are loaded and shifted
26224 entirely within `low' and can be stored in one TOC entry. */
26226 /* It would be easy to make this work, but it doesn't now. */
26227 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
26229 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
26231 low |= high << 32;
26232 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
26233 high = (HOST_WIDE_INT) low >> 32;
26234 low &= 0xffffffff;
26237 if (TARGET_64BIT)
26239 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26240 fputs (DOUBLE_INT_ASM_OP, file);
26241 else
26242 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
26243 (long) high & 0xffffffff, (long) low & 0xffffffff);
26244 fprintf (file, "0x%lx%08lx\n",
26245 (long) high & 0xffffffff, (long) low & 0xffffffff);
26246 return;
26248 else
26250 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
26252 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26253 fputs ("\t.long ", file);
26254 else
26255 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
26256 (long) high & 0xffffffff, (long) low & 0xffffffff);
26257 fprintf (file, "0x%lx,0x%lx\n",
26258 (long) high & 0xffffffff, (long) low & 0xffffffff);
26260 else
26262 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26263 fputs ("\t.long ", file);
26264 else
26265 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
26266 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
26268 return;
26272 if (GET_CODE (x) == CONST)
26274 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
26275 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
26277 base = XEXP (XEXP (x, 0), 0);
26278 offset = INTVAL (XEXP (XEXP (x, 0), 1));
26281 switch (GET_CODE (base))
26283 case SYMBOL_REF:
26284 name = XSTR (base, 0);
26285 break;
26287 case LABEL_REF:
26288 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
26289 CODE_LABEL_NUMBER (XEXP (base, 0)));
26290 break;
26292 case CODE_LABEL:
26293 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
26294 break;
26296 default:
26297 gcc_unreachable ();
26300 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26301 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
26302 else
26304 fputs ("\t.tc ", file);
26305 RS6000_OUTPUT_BASENAME (file, name);
26307 if (offset < 0)
26308 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
26309 else if (offset)
26310 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
26312 /* Mark large TOC symbols on AIX with [TE] so they are mapped
26313 after other TOC symbols, reducing overflow of small TOC access
26314 to [TC] symbols. */
26315 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
26316 ? "[TE]," : "[TC],", file);
26319 /* Currently C++ toc references to vtables can be emitted before it
26320 is decided whether the vtable is public or private. If this is
26321 the case, then the linker will eventually complain that there is
26322 a TOC reference to an unknown section. Thus, for vtables only,
26323 we emit the TOC reference to reference the symbol and not the
26324 section. */
26325 if (VTABLE_NAME_P (name))
26327 RS6000_OUTPUT_BASENAME (file, name);
26328 if (offset < 0)
26329 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
26330 else if (offset > 0)
26331 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
26333 else
26334 output_addr_const (file, x);
26336 #if HAVE_AS_TLS
26337 if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF
26338 && SYMBOL_REF_TLS_MODEL (base) != 0)
26340 if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_LOCAL_EXEC)
26341 fputs ("@le", file);
26342 else if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_INITIAL_EXEC)
26343 fputs ("@ie", file);
26344 /* Use global-dynamic for local-dynamic. */
26345 else if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_GLOBAL_DYNAMIC
26346 || SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_LOCAL_DYNAMIC)
26348 putc ('\n', file);
26349 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
26350 fputs ("\t.tc .", file);
26351 RS6000_OUTPUT_BASENAME (file, name);
26352 fputs ("[TC],", file);
26353 output_addr_const (file, x);
26354 fputs ("@m", file);
26357 #endif
26359 putc ('\n', file);
26362 /* Output an assembler pseudo-op to write an ASCII string of N characters
26363 starting at P to FILE.
26365 On the RS/6000, we have to do this using the .byte operation and
26366 write out special characters outside the quoted string.
26367 Also, the assembler is broken; very long strings are truncated,
26368 so we must artificially break them up early. */
26370 void
26371 output_ascii (FILE *file, const char *p, int n)
26373 char c;
26374 int i, count_string;
26375 const char *for_string = "\t.byte \"";
26376 const char *for_decimal = "\t.byte ";
26377 const char *to_close = NULL;
26379 count_string = 0;
26380 for (i = 0; i < n; i++)
26382 c = *p++;
26383 if (c >= ' ' && c < 0177)
26385 if (for_string)
26386 fputs (for_string, file);
26387 putc (c, file);
26389 /* Write two quotes to get one. */
26390 if (c == '"')
26392 putc (c, file);
26393 ++count_string;
26396 for_string = NULL;
26397 for_decimal = "\"\n\t.byte ";
26398 to_close = "\"\n";
26399 ++count_string;
26401 if (count_string >= 512)
26403 fputs (to_close, file);
26405 for_string = "\t.byte \"";
26406 for_decimal = "\t.byte ";
26407 to_close = NULL;
26408 count_string = 0;
26411 else
26413 if (for_decimal)
26414 fputs (for_decimal, file);
26415 fprintf (file, "%d", c);
26417 for_string = "\n\t.byte \"";
26418 for_decimal = ", ";
26419 to_close = "\n";
26420 count_string = 0;
26424 /* Now close the string if we have written one. Then end the line. */
26425 if (to_close)
26426 fputs (to_close, file);
26429 /* Generate a unique section name for FILENAME for a section type
26430 represented by SECTION_DESC. Output goes into BUF.
26432 SECTION_DESC can be any string, as long as it is different for each
26433 possible section type.
26435 We name the section in the same manner as xlc. The name begins with an
26436 underscore followed by the filename (after stripping any leading directory
26437 names) with the last period replaced by the string SECTION_DESC. If
26438 FILENAME does not contain a period, SECTION_DESC is appended to the end of
26439 the name. */
26441 void
26442 rs6000_gen_section_name (char **buf, const char *filename,
26443 const char *section_desc)
26445 const char *q, *after_last_slash, *last_period = 0;
26446 char *p;
26447 int len;
26449 after_last_slash = filename;
26450 for (q = filename; *q; q++)
26452 if (*q == '/')
26453 after_last_slash = q + 1;
26454 else if (*q == '.')
26455 last_period = q;
26458 len = strlen (after_last_slash) + strlen (section_desc) + 2;
26459 *buf = (char *) xmalloc (len);
26461 p = *buf;
26462 *p++ = '_';
26464 for (q = after_last_slash; *q; q++)
26466 if (q == last_period)
26468 strcpy (p, section_desc);
26469 p += strlen (section_desc);
26470 break;
26473 else if (ISALNUM (*q))
26474 *p++ = *q;
26477 if (last_period == 0)
26478 strcpy (p, section_desc);
26479 else
26480 *p = '\0';
26483 /* Emit profile function. */
26485 void
26486 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
26488 /* Non-standard profiling for kernels, which just saves LR then calls
26489 _mcount without worrying about arg saves. The idea is to change
26490 the function prologue as little as possible as it isn't easy to
26491 account for arg save/restore code added just for _mcount. */
26492 if (TARGET_PROFILE_KERNEL)
26493 return;
26495 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26497 #ifndef NO_PROFILE_COUNTERS
26498 # define NO_PROFILE_COUNTERS 0
26499 #endif
26500 if (NO_PROFILE_COUNTERS)
26501 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
26502 LCT_NORMAL, VOIDmode, 0);
26503 else
26505 char buf[30];
26506 const char *label_name;
26507 rtx fun;
26509 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
26510 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
26511 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
26513 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
26514 LCT_NORMAL, VOIDmode, 1, fun, Pmode);
26517 else if (DEFAULT_ABI == ABI_DARWIN)
26519 const char *mcount_name = RS6000_MCOUNT;
26520 int caller_addr_regno = LR_REGNO;
26522 /* Be conservative and always set this, at least for now. */
26523 crtl->uses_pic_offset_table = 1;
26525 #if TARGET_MACHO
26526 /* For PIC code, set up a stub and collect the caller's address
26527 from r0, which is where the prologue puts it. */
26528 if (MACHOPIC_INDIRECT
26529 && crtl->uses_pic_offset_table)
26530 caller_addr_regno = 0;
26531 #endif
26532 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
26533 LCT_NORMAL, VOIDmode, 1,
26534 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
26538 /* Write function profiler code. */
26540 void
26541 output_function_profiler (FILE *file, int labelno)
26543 char buf[100];
26545 switch (DEFAULT_ABI)
26547 default:
26548 gcc_unreachable ();
26550 case ABI_V4:
26551 if (!TARGET_32BIT)
26553 warning (0, "no profiling of 64-bit code for this ABI");
26554 return;
26556 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
26557 fprintf (file, "\tmflr %s\n", reg_names[0]);
26558 if (NO_PROFILE_COUNTERS)
26560 asm_fprintf (file, "\tstw %s,4(%s)\n",
26561 reg_names[0], reg_names[1]);
26563 else if (TARGET_SECURE_PLT && flag_pic)
26565 if (TARGET_LINK_STACK)
26567 char name[32];
26568 get_ppc476_thunk_name (name);
26569 asm_fprintf (file, "\tbl %s\n", name);
26571 else
26572 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
26573 asm_fprintf (file, "\tstw %s,4(%s)\n",
26574 reg_names[0], reg_names[1]);
26575 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
26576 asm_fprintf (file, "\taddis %s,%s,",
26577 reg_names[12], reg_names[12]);
26578 assemble_name (file, buf);
26579 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
26580 assemble_name (file, buf);
26581 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
26583 else if (flag_pic == 1)
26585 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
26586 asm_fprintf (file, "\tstw %s,4(%s)\n",
26587 reg_names[0], reg_names[1]);
26588 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
26589 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
26590 assemble_name (file, buf);
26591 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
26593 else if (flag_pic > 1)
26595 asm_fprintf (file, "\tstw %s,4(%s)\n",
26596 reg_names[0], reg_names[1]);
26597 /* Now, we need to get the address of the label. */
26598 if (TARGET_LINK_STACK)
26600 char name[32];
26601 get_ppc476_thunk_name (name);
26602 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
26603 assemble_name (file, buf);
26604 fputs ("-.\n1:", file);
26605 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
26606 asm_fprintf (file, "\taddi %s,%s,4\n",
26607 reg_names[11], reg_names[11]);
26609 else
26611 fputs ("\tbcl 20,31,1f\n\t.long ", file);
26612 assemble_name (file, buf);
26613 fputs ("-.\n1:", file);
26614 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
26616 asm_fprintf (file, "\tlwz %s,0(%s)\n",
26617 reg_names[0], reg_names[11]);
26618 asm_fprintf (file, "\tadd %s,%s,%s\n",
26619 reg_names[0], reg_names[0], reg_names[11]);
26621 else
26623 asm_fprintf (file, "\tlis %s,", reg_names[12]);
26624 assemble_name (file, buf);
26625 fputs ("@ha\n", file);
26626 asm_fprintf (file, "\tstw %s,4(%s)\n",
26627 reg_names[0], reg_names[1]);
26628 asm_fprintf (file, "\tla %s,", reg_names[0]);
26629 assemble_name (file, buf);
26630 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
26633 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
26634 fprintf (file, "\tbl %s%s\n",
26635 RS6000_MCOUNT, flag_pic ? "@plt" : "");
26636 break;
26638 case ABI_AIX:
26639 case ABI_ELFv2:
26640 case ABI_DARWIN:
26641 /* Don't do anything, done in output_profile_hook (). */
26642 break;
26648 /* The following variable value is the last issued insn. */
26650 static rtx last_scheduled_insn;
26652 /* The following variable helps to balance issuing of load and
26653 store instructions */
26655 static int load_store_pendulum;
26657 /* Power4 load update and store update instructions are cracked into a
26658 load or store and an integer insn which are executed in the same cycle.
26659 Branches have their own dispatch slot which does not count against the
26660 GCC issue rate, but it changes the program flow so there are no other
26661 instructions to issue in this cycle. */
26663 static int
26664 rs6000_variable_issue_1 (rtx_insn *insn, int more)
26666 last_scheduled_insn = insn;
26667 if (GET_CODE (PATTERN (insn)) == USE
26668 || GET_CODE (PATTERN (insn)) == CLOBBER)
26670 cached_can_issue_more = more;
26671 return cached_can_issue_more;
26674 if (insn_terminates_group_p (insn, current_group))
26676 cached_can_issue_more = 0;
26677 return cached_can_issue_more;
26680 /* If no reservation, but reach here */
26681 if (recog_memoized (insn) < 0)
26682 return more;
26684 if (rs6000_sched_groups)
26686 if (is_microcoded_insn (insn))
26687 cached_can_issue_more = 0;
26688 else if (is_cracked_insn (insn))
26689 cached_can_issue_more = more > 2 ? more - 2 : 0;
26690 else
26691 cached_can_issue_more = more - 1;
26693 return cached_can_issue_more;
26696 if (rs6000_cpu_attr == CPU_CELL && is_nonpipeline_insn (insn))
26697 return 0;
26699 cached_can_issue_more = more - 1;
26700 return cached_can_issue_more;
26703 static int
26704 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
26706 int r = rs6000_variable_issue_1 (insn, more);
26707 if (verbose)
26708 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
26709 return r;
26712 /* Adjust the cost of a scheduling dependency. Return the new cost of
26713 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
26715 static int
26716 rs6000_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
26718 enum attr_type attr_type;
26720 if (! recog_memoized (insn))
26721 return 0;
26723 switch (REG_NOTE_KIND (link))
26725 case REG_DEP_TRUE:
26727 /* Data dependency; DEP_INSN writes a register that INSN reads
26728 some cycles later. */
26730 /* Separate a load from a narrower, dependent store. */
26731 if (rs6000_sched_groups
26732 && GET_CODE (PATTERN (insn)) == SET
26733 && GET_CODE (PATTERN (dep_insn)) == SET
26734 && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
26735 && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
26736 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
26737 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
26738 return cost + 14;
26740 attr_type = get_attr_type (insn);
26742 switch (attr_type)
26744 case TYPE_JMPREG:
26745 /* Tell the first scheduling pass about the latency between
26746 a mtctr and bctr (and mtlr and br/blr). The first
26747 scheduling pass will not know about this latency since
26748 the mtctr instruction, which has the latency associated
26749 to it, will be generated by reload. */
26750 return 4;
26751 case TYPE_BRANCH:
26752 /* Leave some extra cycles between a compare and its
26753 dependent branch, to inhibit expensive mispredicts. */
26754 if ((rs6000_cpu_attr == CPU_PPC603
26755 || rs6000_cpu_attr == CPU_PPC604
26756 || rs6000_cpu_attr == CPU_PPC604E
26757 || rs6000_cpu_attr == CPU_PPC620
26758 || rs6000_cpu_attr == CPU_PPC630
26759 || rs6000_cpu_attr == CPU_PPC750
26760 || rs6000_cpu_attr == CPU_PPC7400
26761 || rs6000_cpu_attr == CPU_PPC7450
26762 || rs6000_cpu_attr == CPU_PPCE5500
26763 || rs6000_cpu_attr == CPU_PPCE6500
26764 || rs6000_cpu_attr == CPU_POWER4
26765 || rs6000_cpu_attr == CPU_POWER5
26766 || rs6000_cpu_attr == CPU_POWER7
26767 || rs6000_cpu_attr == CPU_POWER8
26768 || rs6000_cpu_attr == CPU_CELL)
26769 && recog_memoized (dep_insn)
26770 && (INSN_CODE (dep_insn) >= 0))
26772 switch (get_attr_type (dep_insn))
26774 case TYPE_CMP:
26775 case TYPE_FPCOMPARE:
26776 case TYPE_CR_LOGICAL:
26777 case TYPE_DELAYED_CR:
26778 return cost + 2;
26779 case TYPE_EXTS:
26780 case TYPE_MUL:
26781 if (get_attr_dot (dep_insn) == DOT_YES)
26782 return cost + 2;
26783 else
26784 break;
26785 case TYPE_SHIFT:
26786 if (get_attr_dot (dep_insn) == DOT_YES
26787 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
26788 return cost + 2;
26789 else
26790 break;
26791 default:
26792 break;
26794 break;
26796 case TYPE_STORE:
26797 case TYPE_FPSTORE:
26798 if ((rs6000_cpu == PROCESSOR_POWER6)
26799 && recog_memoized (dep_insn)
26800 && (INSN_CODE (dep_insn) >= 0))
26803 if (GET_CODE (PATTERN (insn)) != SET)
26804 /* If this happens, we have to extend this to schedule
26805 optimally. Return default for now. */
26806 return cost;
26808 /* Adjust the cost for the case where the value written
26809 by a fixed point operation is used as the address
26810 gen value on a store. */
26811 switch (get_attr_type (dep_insn))
26813 case TYPE_LOAD:
26814 case TYPE_CNTLZ:
26816 if (! store_data_bypass_p (dep_insn, insn))
26817 return get_attr_sign_extend (dep_insn)
26818 == SIGN_EXTEND_YES ? 6 : 4;
26819 break;
26821 case TYPE_SHIFT:
26823 if (! store_data_bypass_p (dep_insn, insn))
26824 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
26825 6 : 3;
26826 break;
26828 case TYPE_INTEGER:
26829 case TYPE_ADD:
26830 case TYPE_LOGICAL:
26831 case TYPE_EXTS:
26832 case TYPE_INSERT:
26834 if (! store_data_bypass_p (dep_insn, insn))
26835 return 3;
26836 break;
26838 case TYPE_STORE:
26839 case TYPE_FPLOAD:
26840 case TYPE_FPSTORE:
26842 if (get_attr_update (dep_insn) == UPDATE_YES
26843 && ! store_data_bypass_p (dep_insn, insn))
26844 return 3;
26845 break;
26847 case TYPE_MUL:
26849 if (! store_data_bypass_p (dep_insn, insn))
26850 return 17;
26851 break;
26853 case TYPE_DIV:
26855 if (! store_data_bypass_p (dep_insn, insn))
26856 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
26857 break;
26859 default:
26860 break;
26863 break;
26865 case TYPE_LOAD:
26866 if ((rs6000_cpu == PROCESSOR_POWER6)
26867 && recog_memoized (dep_insn)
26868 && (INSN_CODE (dep_insn) >= 0))
26871 /* Adjust the cost for the case where the value written
26872 by a fixed point instruction is used within the address
26873 gen portion of a subsequent load(u)(x) */
26874 switch (get_attr_type (dep_insn))
26876 case TYPE_LOAD:
26877 case TYPE_CNTLZ:
26879 if (set_to_load_agen (dep_insn, insn))
26880 return get_attr_sign_extend (dep_insn)
26881 == SIGN_EXTEND_YES ? 6 : 4;
26882 break;
26884 case TYPE_SHIFT:
26886 if (set_to_load_agen (dep_insn, insn))
26887 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
26888 6 : 3;
26889 break;
26891 case TYPE_INTEGER:
26892 case TYPE_ADD:
26893 case TYPE_LOGICAL:
26894 case TYPE_EXTS:
26895 case TYPE_INSERT:
26897 if (set_to_load_agen (dep_insn, insn))
26898 return 3;
26899 break;
26901 case TYPE_STORE:
26902 case TYPE_FPLOAD:
26903 case TYPE_FPSTORE:
26905 if (get_attr_update (dep_insn) == UPDATE_YES
26906 && set_to_load_agen (dep_insn, insn))
26907 return 3;
26908 break;
26910 case TYPE_MUL:
26912 if (set_to_load_agen (dep_insn, insn))
26913 return 17;
26914 break;
26916 case TYPE_DIV:
26918 if (set_to_load_agen (dep_insn, insn))
26919 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
26920 break;
26922 default:
26923 break;
26926 break;
26928 case TYPE_FPLOAD:
26929 if ((rs6000_cpu == PROCESSOR_POWER6)
26930 && get_attr_update (insn) == UPDATE_NO
26931 && recog_memoized (dep_insn)
26932 && (INSN_CODE (dep_insn) >= 0)
26933 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
26934 return 2;
26936 default:
26937 break;
26940 /* Fall out to return default cost. */
26942 break;
26944 case REG_DEP_OUTPUT:
26945 /* Output dependency; DEP_INSN writes a register that INSN writes some
26946 cycles later. */
26947 if ((rs6000_cpu == PROCESSOR_POWER6)
26948 && recog_memoized (dep_insn)
26949 && (INSN_CODE (dep_insn) >= 0))
26951 attr_type = get_attr_type (insn);
26953 switch (attr_type)
26955 case TYPE_FP:
26956 if (get_attr_type (dep_insn) == TYPE_FP)
26957 return 1;
26958 break;
26959 case TYPE_FPLOAD:
26960 if (get_attr_update (insn) == UPDATE_NO
26961 && get_attr_type (dep_insn) == TYPE_MFFGPR)
26962 return 2;
26963 break;
26964 default:
26965 break;
26968 case REG_DEP_ANTI:
26969 /* Anti dependency; DEP_INSN reads a register that INSN writes some
26970 cycles later. */
26971 return 0;
26973 default:
26974 gcc_unreachable ();
26977 return cost;
26980 /* Debug version of rs6000_adjust_cost. */
26982 static int
26983 rs6000_debug_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn,
26984 int cost)
26986 int ret = rs6000_adjust_cost (insn, link, dep_insn, cost);
26988 if (ret != cost)
26990 const char *dep;
26992 switch (REG_NOTE_KIND (link))
26994 default: dep = "unknown depencency"; break;
26995 case REG_DEP_TRUE: dep = "data dependency"; break;
26996 case REG_DEP_OUTPUT: dep = "output dependency"; break;
26997 case REG_DEP_ANTI: dep = "anti depencency"; break;
27000 fprintf (stderr,
27001 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
27002 "%s, insn:\n", ret, cost, dep);
27004 debug_rtx (insn);
27007 return ret;
27010 /* The function returns a true if INSN is microcoded.
27011 Return false otherwise. */
27013 static bool
27014 is_microcoded_insn (rtx_insn *insn)
27016 if (!insn || !NONDEBUG_INSN_P (insn)
27017 || GET_CODE (PATTERN (insn)) == USE
27018 || GET_CODE (PATTERN (insn)) == CLOBBER)
27019 return false;
27021 if (rs6000_cpu_attr == CPU_CELL)
27022 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
27024 if (rs6000_sched_groups
27025 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
27027 enum attr_type type = get_attr_type (insn);
27028 if ((type == TYPE_LOAD
27029 && get_attr_update (insn) == UPDATE_YES
27030 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
27031 || ((type == TYPE_LOAD || type == TYPE_STORE)
27032 && get_attr_update (insn) == UPDATE_YES
27033 && get_attr_indexed (insn) == INDEXED_YES)
27034 || type == TYPE_MFCR)
27035 return true;
27038 return false;
27041 /* The function returns true if INSN is cracked into 2 instructions
27042 by the processor (and therefore occupies 2 issue slots). */
27044 static bool
27045 is_cracked_insn (rtx_insn *insn)
27047 if (!insn || !NONDEBUG_INSN_P (insn)
27048 || GET_CODE (PATTERN (insn)) == USE
27049 || GET_CODE (PATTERN (insn)) == CLOBBER)
27050 return false;
27052 if (rs6000_sched_groups
27053 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
27055 enum attr_type type = get_attr_type (insn);
27056 if ((type == TYPE_LOAD
27057 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
27058 && get_attr_update (insn) == UPDATE_NO)
27059 || (type == TYPE_LOAD
27060 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
27061 && get_attr_update (insn) == UPDATE_YES
27062 && get_attr_indexed (insn) == INDEXED_NO)
27063 || (type == TYPE_STORE
27064 && get_attr_update (insn) == UPDATE_YES
27065 && get_attr_indexed (insn) == INDEXED_NO)
27066 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
27067 && get_attr_update (insn) == UPDATE_YES)
27068 || type == TYPE_DELAYED_CR
27069 || (type == TYPE_EXTS
27070 && get_attr_dot (insn) == DOT_YES)
27071 || (type == TYPE_SHIFT
27072 && get_attr_dot (insn) == DOT_YES
27073 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
27074 || (type == TYPE_MUL
27075 && get_attr_dot (insn) == DOT_YES)
27076 || type == TYPE_DIV
27077 || (type == TYPE_INSERT
27078 && get_attr_size (insn) == SIZE_32))
27079 return true;
27082 return false;
27085 /* The function returns true if INSN can be issued only from
27086 the branch slot. */
27088 static bool
27089 is_branch_slot_insn (rtx_insn *insn)
27091 if (!insn || !NONDEBUG_INSN_P (insn)
27092 || GET_CODE (PATTERN (insn)) == USE
27093 || GET_CODE (PATTERN (insn)) == CLOBBER)
27094 return false;
27096 if (rs6000_sched_groups)
27098 enum attr_type type = get_attr_type (insn);
27099 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
27100 return true;
27101 return false;
27104 return false;
27107 /* The function returns true if out_inst sets a value that is
27108 used in the address generation computation of in_insn */
27109 static bool
27110 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
27112 rtx out_set, in_set;
27114 /* For performance reasons, only handle the simple case where
27115 both loads are a single_set. */
27116 out_set = single_set (out_insn);
27117 if (out_set)
27119 in_set = single_set (in_insn);
27120 if (in_set)
27121 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
27124 return false;
27127 /* Try to determine base/offset/size parts of the given MEM.
27128 Return true if successful, false if all the values couldn't
27129 be determined.
27131 This function only looks for REG or REG+CONST address forms.
27132 REG+REG address form will return false. */
27134 static bool
27135 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
27136 HOST_WIDE_INT *size)
27138 rtx addr_rtx;
27139 if MEM_SIZE_KNOWN_P (mem)
27140 *size = MEM_SIZE (mem);
27141 else
27142 return false;
27144 if (GET_CODE (XEXP (mem, 0)) == PRE_MODIFY)
27145 addr_rtx = XEXP (XEXP (mem, 0), 1);
27146 else
27147 addr_rtx = (XEXP (mem, 0));
27149 if (GET_CODE (addr_rtx) == REG)
27151 *base = addr_rtx;
27152 *offset = 0;
27154 else if (GET_CODE (addr_rtx) == PLUS
27155 && CONST_INT_P (XEXP (addr_rtx, 1)))
27157 *base = XEXP (addr_rtx, 0);
27158 *offset = INTVAL (XEXP (addr_rtx, 1));
27160 else
27161 return false;
27163 return true;
27166 /* The function returns true if the target storage location of
27167 mem1 is adjacent to the target storage location of mem2 */
27168 /* Return 1 if memory locations are adjacent. */
27170 static bool
27171 adjacent_mem_locations (rtx mem1, rtx mem2)
27173 rtx reg1, reg2;
27174 HOST_WIDE_INT off1, size1, off2, size2;
27176 if (get_memref_parts (mem1, &reg1, &off1, &size1)
27177 && get_memref_parts (mem2, &reg2, &off2, &size2))
27178 return ((REGNO (reg1) == REGNO (reg2))
27179 && ((off1 + size1 == off2)
27180 || (off2 + size2 == off1)));
27182 return false;
27185 /* This function returns true if it can be determined that the two MEM
27186 locations overlap by at least 1 byte based on base reg/offset/size. */
27188 static bool
27189 mem_locations_overlap (rtx mem1, rtx mem2)
27191 rtx reg1, reg2;
27192 HOST_WIDE_INT off1, size1, off2, size2;
27194 if (get_memref_parts (mem1, &reg1, &off1, &size1)
27195 && get_memref_parts (mem2, &reg2, &off2, &size2))
27196 return ((REGNO (reg1) == REGNO (reg2))
27197 && (((off1 <= off2) && (off1 + size1 > off2))
27198 || ((off2 <= off1) && (off2 + size2 > off1))));
27200 return false;
27203 /* A C statement (sans semicolon) to update the integer scheduling
27204 priority INSN_PRIORITY (INSN). Increase the priority to execute the
27205 INSN earlier, reduce the priority to execute INSN later. Do not
27206 define this macro if you do not need to adjust the scheduling
27207 priorities of insns. */
27209 static int
27210 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
27212 rtx load_mem, str_mem;
27213 /* On machines (like the 750) which have asymmetric integer units,
27214 where one integer unit can do multiply and divides and the other
27215 can't, reduce the priority of multiply/divide so it is scheduled
27216 before other integer operations. */
27218 #if 0
27219 if (! INSN_P (insn))
27220 return priority;
27222 if (GET_CODE (PATTERN (insn)) == USE)
27223 return priority;
27225 switch (rs6000_cpu_attr) {
27226 case CPU_PPC750:
27227 switch (get_attr_type (insn))
27229 default:
27230 break;
27232 case TYPE_MUL:
27233 case TYPE_DIV:
27234 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
27235 priority, priority);
27236 if (priority >= 0 && priority < 0x01000000)
27237 priority >>= 3;
27238 break;
27241 #endif
27243 if (insn_must_be_first_in_group (insn)
27244 && reload_completed
27245 && current_sched_info->sched_max_insns_priority
27246 && rs6000_sched_restricted_insns_priority)
27249 /* Prioritize insns that can be dispatched only in the first
27250 dispatch slot. */
27251 if (rs6000_sched_restricted_insns_priority == 1)
27252 /* Attach highest priority to insn. This means that in
27253 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
27254 precede 'priority' (critical path) considerations. */
27255 return current_sched_info->sched_max_insns_priority;
27256 else if (rs6000_sched_restricted_insns_priority == 2)
27257 /* Increase priority of insn by a minimal amount. This means that in
27258 haifa-sched.c:ready_sort(), only 'priority' (critical path)
27259 considerations precede dispatch-slot restriction considerations. */
27260 return (priority + 1);
27263 if (rs6000_cpu == PROCESSOR_POWER6
27264 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
27265 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
27266 /* Attach highest priority to insn if the scheduler has just issued two
27267 stores and this instruction is a load, or two loads and this instruction
27268 is a store. Power6 wants loads and stores scheduled alternately
27269 when possible */
27270 return current_sched_info->sched_max_insns_priority;
27272 return priority;
27275 /* Return true if the instruction is nonpipelined on the Cell. */
27276 static bool
27277 is_nonpipeline_insn (rtx_insn *insn)
27279 enum attr_type type;
27280 if (!insn || !NONDEBUG_INSN_P (insn)
27281 || GET_CODE (PATTERN (insn)) == USE
27282 || GET_CODE (PATTERN (insn)) == CLOBBER)
27283 return false;
27285 type = get_attr_type (insn);
27286 if (type == TYPE_MUL
27287 || type == TYPE_DIV
27288 || type == TYPE_SDIV
27289 || type == TYPE_DDIV
27290 || type == TYPE_SSQRT
27291 || type == TYPE_DSQRT
27292 || type == TYPE_MFCR
27293 || type == TYPE_MFCRF
27294 || type == TYPE_MFJMPR)
27296 return true;
27298 return false;
27302 /* Return how many instructions the machine can issue per cycle. */
27304 static int
27305 rs6000_issue_rate (void)
27307 /* Unless scheduling for register pressure, use issue rate of 1 for
27308 first scheduling pass to decrease degradation. */
27309 if (!reload_completed && !flag_sched_pressure)
27310 return 1;
27312 switch (rs6000_cpu_attr) {
27313 case CPU_RS64A:
27314 case CPU_PPC601: /* ? */
27315 case CPU_PPC7450:
27316 return 3;
27317 case CPU_PPC440:
27318 case CPU_PPC603:
27319 case CPU_PPC750:
27320 case CPU_PPC7400:
27321 case CPU_PPC8540:
27322 case CPU_PPC8548:
27323 case CPU_CELL:
27324 case CPU_PPCE300C2:
27325 case CPU_PPCE300C3:
27326 case CPU_PPCE500MC:
27327 case CPU_PPCE500MC64:
27328 case CPU_PPCE5500:
27329 case CPU_PPCE6500:
27330 case CPU_TITAN:
27331 return 2;
27332 case CPU_PPC476:
27333 case CPU_PPC604:
27334 case CPU_PPC604E:
27335 case CPU_PPC620:
27336 case CPU_PPC630:
27337 return 4;
27338 case CPU_POWER4:
27339 case CPU_POWER5:
27340 case CPU_POWER6:
27341 case CPU_POWER7:
27342 return 5;
27343 case CPU_POWER8:
27344 return 7;
27345 default:
27346 return 1;
27350 /* Return how many instructions to look ahead for better insn
27351 scheduling. */
27353 static int
27354 rs6000_use_sched_lookahead (void)
27356 switch (rs6000_cpu_attr)
27358 case CPU_PPC8540:
27359 case CPU_PPC8548:
27360 return 4;
27362 case CPU_CELL:
27363 return (reload_completed ? 8 : 0);
27365 default:
27366 return 0;
27370 /* We are choosing insn from the ready queue. Return zero if INSN can be
27371 chosen. */
27372 static int
27373 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
27375 if (ready_index == 0)
27376 return 0;
27378 if (rs6000_cpu_attr != CPU_CELL)
27379 return 0;
27381 gcc_assert (insn != NULL_RTX && INSN_P (insn));
27383 if (!reload_completed
27384 || is_nonpipeline_insn (insn)
27385 || is_microcoded_insn (insn))
27386 return 1;
27388 return 0;
27391 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
27392 and return true. */
27394 static bool
27395 find_mem_ref (rtx pat, rtx *mem_ref)
27397 const char * fmt;
27398 int i, j;
27400 /* stack_tie does not produce any real memory traffic. */
27401 if (tie_operand (pat, VOIDmode))
27402 return false;
27404 if (GET_CODE (pat) == MEM)
27406 *mem_ref = pat;
27407 return true;
27410 /* Recursively process the pattern. */
27411 fmt = GET_RTX_FORMAT (GET_CODE (pat));
27413 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
27415 if (fmt[i] == 'e')
27417 if (find_mem_ref (XEXP (pat, i), mem_ref))
27418 return true;
27420 else if (fmt[i] == 'E')
27421 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
27423 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
27424 return true;
27428 return false;
27431 /* Determine if PAT is a PATTERN of a load insn. */
27433 static bool
27434 is_load_insn1 (rtx pat, rtx *load_mem)
27436 if (!pat || pat == NULL_RTX)
27437 return false;
27439 if (GET_CODE (pat) == SET)
27440 return find_mem_ref (SET_SRC (pat), load_mem);
27442 if (GET_CODE (pat) == PARALLEL)
27444 int i;
27446 for (i = 0; i < XVECLEN (pat, 0); i++)
27447 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
27448 return true;
27451 return false;
27454 /* Determine if INSN loads from memory. */
27456 static bool
27457 is_load_insn (rtx insn, rtx *load_mem)
27459 if (!insn || !INSN_P (insn))
27460 return false;
27462 if (CALL_P (insn))
27463 return false;
27465 return is_load_insn1 (PATTERN (insn), load_mem);
27468 /* Determine if PAT is a PATTERN of a store insn. */
27470 static bool
27471 is_store_insn1 (rtx pat, rtx *str_mem)
27473 if (!pat || pat == NULL_RTX)
27474 return false;
27476 if (GET_CODE (pat) == SET)
27477 return find_mem_ref (SET_DEST (pat), str_mem);
27479 if (GET_CODE (pat) == PARALLEL)
27481 int i;
27483 for (i = 0; i < XVECLEN (pat, 0); i++)
27484 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
27485 return true;
27488 return false;
27491 /* Determine if INSN stores to memory. */
27493 static bool
27494 is_store_insn (rtx insn, rtx *str_mem)
27496 if (!insn || !INSN_P (insn))
27497 return false;
27499 return is_store_insn1 (PATTERN (insn), str_mem);
27502 /* Returns whether the dependence between INSN and NEXT is considered
27503 costly by the given target. */
27505 static bool
27506 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
27508 rtx insn;
27509 rtx next;
27510 rtx load_mem, str_mem;
27512 /* If the flag is not enabled - no dependence is considered costly;
27513 allow all dependent insns in the same group.
27514 This is the most aggressive option. */
27515 if (rs6000_sched_costly_dep == no_dep_costly)
27516 return false;
27518 /* If the flag is set to 1 - a dependence is always considered costly;
27519 do not allow dependent instructions in the same group.
27520 This is the most conservative option. */
27521 if (rs6000_sched_costly_dep == all_deps_costly)
27522 return true;
27524 insn = DEP_PRO (dep);
27525 next = DEP_CON (dep);
27527 if (rs6000_sched_costly_dep == store_to_load_dep_costly
27528 && is_load_insn (next, &load_mem)
27529 && is_store_insn (insn, &str_mem))
27530 /* Prevent load after store in the same group. */
27531 return true;
27533 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
27534 && is_load_insn (next, &load_mem)
27535 && is_store_insn (insn, &str_mem)
27536 && DEP_TYPE (dep) == REG_DEP_TRUE
27537 && mem_locations_overlap(str_mem, load_mem))
27538 /* Prevent load after store in the same group if it is a true
27539 dependence. */
27540 return true;
27542 /* The flag is set to X; dependences with latency >= X are considered costly,
27543 and will not be scheduled in the same group. */
27544 if (rs6000_sched_costly_dep <= max_dep_latency
27545 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
27546 return true;
27548 return false;
27551 /* Return the next insn after INSN that is found before TAIL is reached,
27552 skipping any "non-active" insns - insns that will not actually occupy
27553 an issue slot. Return NULL_RTX if such an insn is not found. */
27555 static rtx_insn *
27556 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
27558 if (insn == NULL_RTX || insn == tail)
27559 return NULL;
27561 while (1)
27563 insn = NEXT_INSN (insn);
27564 if (insn == NULL_RTX || insn == tail)
27565 return NULL;
27567 if (CALL_P (insn)
27568 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
27569 || (NONJUMP_INSN_P (insn)
27570 && GET_CODE (PATTERN (insn)) != USE
27571 && GET_CODE (PATTERN (insn)) != CLOBBER
27572 && INSN_CODE (insn) != CODE_FOR_stack_tie))
27573 break;
27575 return insn;
27578 /* We are about to begin issuing insns for this clock cycle. */
27580 static int
27581 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
27582 rtx_insn **ready ATTRIBUTE_UNUSED,
27583 int *pn_ready ATTRIBUTE_UNUSED,
27584 int clock_var ATTRIBUTE_UNUSED)
27586 int n_ready = *pn_ready;
27588 if (sched_verbose)
27589 fprintf (dump, "// rs6000_sched_reorder :\n");
27591 /* Reorder the ready list, if the second to last ready insn
27592 is a nonepipeline insn. */
27593 if (rs6000_cpu_attr == CPU_CELL && n_ready > 1)
27595 if (is_nonpipeline_insn (ready[n_ready - 1])
27596 && (recog_memoized (ready[n_ready - 2]) > 0))
27597 /* Simply swap first two insns. */
27598 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
27601 if (rs6000_cpu == PROCESSOR_POWER6)
27602 load_store_pendulum = 0;
27604 return rs6000_issue_rate ();
27607 /* Like rs6000_sched_reorder, but called after issuing each insn. */
27609 static int
27610 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
27611 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
27613 if (sched_verbose)
27614 fprintf (dump, "// rs6000_sched_reorder2 :\n");
27616 /* For Power6, we need to handle some special cases to try and keep the
27617 store queue from overflowing and triggering expensive flushes.
27619 This code monitors how load and store instructions are being issued
27620 and skews the ready list one way or the other to increase the likelihood
27621 that a desired instruction is issued at the proper time.
27623 A couple of things are done. First, we maintain a "load_store_pendulum"
27624 to track the current state of load/store issue.
27626 - If the pendulum is at zero, then no loads or stores have been
27627 issued in the current cycle so we do nothing.
27629 - If the pendulum is 1, then a single load has been issued in this
27630 cycle and we attempt to locate another load in the ready list to
27631 issue with it.
27633 - If the pendulum is -2, then two stores have already been
27634 issued in this cycle, so we increase the priority of the first load
27635 in the ready list to increase it's likelihood of being chosen first
27636 in the next cycle.
27638 - If the pendulum is -1, then a single store has been issued in this
27639 cycle and we attempt to locate another store in the ready list to
27640 issue with it, preferring a store to an adjacent memory location to
27641 facilitate store pairing in the store queue.
27643 - If the pendulum is 2, then two loads have already been
27644 issued in this cycle, so we increase the priority of the first store
27645 in the ready list to increase it's likelihood of being chosen first
27646 in the next cycle.
27648 - If the pendulum < -2 or > 2, then do nothing.
27650 Note: This code covers the most common scenarios. There exist non
27651 load/store instructions which make use of the LSU and which
27652 would need to be accounted for to strictly model the behavior
27653 of the machine. Those instructions are currently unaccounted
27654 for to help minimize compile time overhead of this code.
27656 if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn)
27658 int pos;
27659 int i;
27660 rtx_insn *tmp;
27661 rtx load_mem, str_mem;
27663 if (is_store_insn (last_scheduled_insn, &str_mem))
27664 /* Issuing a store, swing the load_store_pendulum to the left */
27665 load_store_pendulum--;
27666 else if (is_load_insn (last_scheduled_insn, &load_mem))
27667 /* Issuing a load, swing the load_store_pendulum to the right */
27668 load_store_pendulum++;
27669 else
27670 return cached_can_issue_more;
27672 /* If the pendulum is balanced, or there is only one instruction on
27673 the ready list, then all is well, so return. */
27674 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
27675 return cached_can_issue_more;
27677 if (load_store_pendulum == 1)
27679 /* A load has been issued in this cycle. Scan the ready list
27680 for another load to issue with it */
27681 pos = *pn_ready-1;
27683 while (pos >= 0)
27685 if (is_load_insn (ready[pos], &load_mem))
27687 /* Found a load. Move it to the head of the ready list,
27688 and adjust it's priority so that it is more likely to
27689 stay there */
27690 tmp = ready[pos];
27691 for (i=pos; i<*pn_ready-1; i++)
27692 ready[i] = ready[i + 1];
27693 ready[*pn_ready-1] = tmp;
27695 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
27696 INSN_PRIORITY (tmp)++;
27697 break;
27699 pos--;
27702 else if (load_store_pendulum == -2)
27704 /* Two stores have been issued in this cycle. Increase the
27705 priority of the first load in the ready list to favor it for
27706 issuing in the next cycle. */
27707 pos = *pn_ready-1;
27709 while (pos >= 0)
27711 if (is_load_insn (ready[pos], &load_mem)
27712 && !sel_sched_p ()
27713 && INSN_PRIORITY_KNOWN (ready[pos]))
27715 INSN_PRIORITY (ready[pos])++;
27717 /* Adjust the pendulum to account for the fact that a load
27718 was found and increased in priority. This is to prevent
27719 increasing the priority of multiple loads */
27720 load_store_pendulum--;
27722 break;
27724 pos--;
27727 else if (load_store_pendulum == -1)
27729 /* A store has been issued in this cycle. Scan the ready list for
27730 another store to issue with it, preferring a store to an adjacent
27731 memory location */
27732 int first_store_pos = -1;
27734 pos = *pn_ready-1;
27736 while (pos >= 0)
27738 if (is_store_insn (ready[pos], &str_mem))
27740 rtx str_mem2;
27741 /* Maintain the index of the first store found on the
27742 list */
27743 if (first_store_pos == -1)
27744 first_store_pos = pos;
27746 if (is_store_insn (last_scheduled_insn, &str_mem2)
27747 && adjacent_mem_locations (str_mem, str_mem2))
27749 /* Found an adjacent store. Move it to the head of the
27750 ready list, and adjust it's priority so that it is
27751 more likely to stay there */
27752 tmp = ready[pos];
27753 for (i=pos; i<*pn_ready-1; i++)
27754 ready[i] = ready[i + 1];
27755 ready[*pn_ready-1] = tmp;
27757 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
27758 INSN_PRIORITY (tmp)++;
27760 first_store_pos = -1;
27762 break;
27765 pos--;
27768 if (first_store_pos >= 0)
27770 /* An adjacent store wasn't found, but a non-adjacent store was,
27771 so move the non-adjacent store to the front of the ready
27772 list, and adjust its priority so that it is more likely to
27773 stay there. */
27774 tmp = ready[first_store_pos];
27775 for (i=first_store_pos; i<*pn_ready-1; i++)
27776 ready[i] = ready[i + 1];
27777 ready[*pn_ready-1] = tmp;
27778 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
27779 INSN_PRIORITY (tmp)++;
27782 else if (load_store_pendulum == 2)
27784 /* Two loads have been issued in this cycle. Increase the priority
27785 of the first store in the ready list to favor it for issuing in
27786 the next cycle. */
27787 pos = *pn_ready-1;
27789 while (pos >= 0)
27791 if (is_store_insn (ready[pos], &str_mem)
27792 && !sel_sched_p ()
27793 && INSN_PRIORITY_KNOWN (ready[pos]))
27795 INSN_PRIORITY (ready[pos])++;
27797 /* Adjust the pendulum to account for the fact that a store
27798 was found and increased in priority. This is to prevent
27799 increasing the priority of multiple stores */
27800 load_store_pendulum++;
27802 break;
27804 pos--;
27809 return cached_can_issue_more;
27812 /* Return whether the presence of INSN causes a dispatch group termination
27813 of group WHICH_GROUP.
27815 If WHICH_GROUP == current_group, this function will return true if INSN
27816 causes the termination of the current group (i.e, the dispatch group to
27817 which INSN belongs). This means that INSN will be the last insn in the
27818 group it belongs to.
27820 If WHICH_GROUP == previous_group, this function will return true if INSN
27821 causes the termination of the previous group (i.e, the dispatch group that
27822 precedes the group to which INSN belongs). This means that INSN will be
27823 the first insn in the group it belongs to). */
27825 static bool
27826 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
27828 bool first, last;
27830 if (! insn)
27831 return false;
27833 first = insn_must_be_first_in_group (insn);
27834 last = insn_must_be_last_in_group (insn);
27836 if (first && last)
27837 return true;
27839 if (which_group == current_group)
27840 return last;
27841 else if (which_group == previous_group)
27842 return first;
27844 return false;
27848 static bool
27849 insn_must_be_first_in_group (rtx_insn *insn)
27851 enum attr_type type;
27853 if (!insn
27854 || NOTE_P (insn)
27855 || DEBUG_INSN_P (insn)
27856 || GET_CODE (PATTERN (insn)) == USE
27857 || GET_CODE (PATTERN (insn)) == CLOBBER)
27858 return false;
27860 switch (rs6000_cpu)
27862 case PROCESSOR_POWER5:
27863 if (is_cracked_insn (insn))
27864 return true;
27865 case PROCESSOR_POWER4:
27866 if (is_microcoded_insn (insn))
27867 return true;
27869 if (!rs6000_sched_groups)
27870 return false;
27872 type = get_attr_type (insn);
27874 switch (type)
27876 case TYPE_MFCR:
27877 case TYPE_MFCRF:
27878 case TYPE_MTCR:
27879 case TYPE_DELAYED_CR:
27880 case TYPE_CR_LOGICAL:
27881 case TYPE_MTJMPR:
27882 case TYPE_MFJMPR:
27883 case TYPE_DIV:
27884 case TYPE_LOAD_L:
27885 case TYPE_STORE_C:
27886 case TYPE_ISYNC:
27887 case TYPE_SYNC:
27888 return true;
27889 default:
27890 break;
27892 break;
27893 case PROCESSOR_POWER6:
27894 type = get_attr_type (insn);
27896 switch (type)
27898 case TYPE_EXTS:
27899 case TYPE_CNTLZ:
27900 case TYPE_TRAP:
27901 case TYPE_MUL:
27902 case TYPE_INSERT:
27903 case TYPE_FPCOMPARE:
27904 case TYPE_MFCR:
27905 case TYPE_MTCR:
27906 case TYPE_MFJMPR:
27907 case TYPE_MTJMPR:
27908 case TYPE_ISYNC:
27909 case TYPE_SYNC:
27910 case TYPE_LOAD_L:
27911 case TYPE_STORE_C:
27912 return true;
27913 case TYPE_SHIFT:
27914 if (get_attr_dot (insn) == DOT_NO
27915 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
27916 return true;
27917 else
27918 break;
27919 case TYPE_DIV:
27920 if (get_attr_size (insn) == SIZE_32)
27921 return true;
27922 else
27923 break;
27924 case TYPE_LOAD:
27925 case TYPE_STORE:
27926 case TYPE_FPLOAD:
27927 case TYPE_FPSTORE:
27928 if (get_attr_update (insn) == UPDATE_YES)
27929 return true;
27930 else
27931 break;
27932 default:
27933 break;
27935 break;
27936 case PROCESSOR_POWER7:
27937 type = get_attr_type (insn);
27939 switch (type)
27941 case TYPE_CR_LOGICAL:
27942 case TYPE_MFCR:
27943 case TYPE_MFCRF:
27944 case TYPE_MTCR:
27945 case TYPE_DIV:
27946 case TYPE_ISYNC:
27947 case TYPE_LOAD_L:
27948 case TYPE_STORE_C:
27949 case TYPE_MFJMPR:
27950 case TYPE_MTJMPR:
27951 return true;
27952 case TYPE_MUL:
27953 case TYPE_SHIFT:
27954 case TYPE_EXTS:
27955 if (get_attr_dot (insn) == DOT_YES)
27956 return true;
27957 else
27958 break;
27959 case TYPE_LOAD:
27960 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
27961 || get_attr_update (insn) == UPDATE_YES)
27962 return true;
27963 else
27964 break;
27965 case TYPE_STORE:
27966 case TYPE_FPLOAD:
27967 case TYPE_FPSTORE:
27968 if (get_attr_update (insn) == UPDATE_YES)
27969 return true;
27970 else
27971 break;
27972 default:
27973 break;
27975 break;
27976 case PROCESSOR_POWER8:
27977 type = get_attr_type (insn);
27979 switch (type)
27981 case TYPE_CR_LOGICAL:
27982 case TYPE_DELAYED_CR:
27983 case TYPE_MFCR:
27984 case TYPE_MFCRF:
27985 case TYPE_MTCR:
27986 case TYPE_SYNC:
27987 case TYPE_ISYNC:
27988 case TYPE_LOAD_L:
27989 case TYPE_STORE_C:
27990 case TYPE_VECSTORE:
27991 case TYPE_MFJMPR:
27992 case TYPE_MTJMPR:
27993 return true;
27994 case TYPE_SHIFT:
27995 case TYPE_EXTS:
27996 case TYPE_MUL:
27997 if (get_attr_dot (insn) == DOT_YES)
27998 return true;
27999 else
28000 break;
28001 case TYPE_LOAD:
28002 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
28003 || get_attr_update (insn) == UPDATE_YES)
28004 return true;
28005 else
28006 break;
28007 case TYPE_STORE:
28008 if (get_attr_update (insn) == UPDATE_YES
28009 && get_attr_indexed (insn) == INDEXED_YES)
28010 return true;
28011 else
28012 break;
28013 default:
28014 break;
28016 break;
28017 default:
28018 break;
28021 return false;
28024 static bool
28025 insn_must_be_last_in_group (rtx_insn *insn)
28027 enum attr_type type;
28029 if (!insn
28030 || NOTE_P (insn)
28031 || DEBUG_INSN_P (insn)
28032 || GET_CODE (PATTERN (insn)) == USE
28033 || GET_CODE (PATTERN (insn)) == CLOBBER)
28034 return false;
28036 switch (rs6000_cpu) {
28037 case PROCESSOR_POWER4:
28038 case PROCESSOR_POWER5:
28039 if (is_microcoded_insn (insn))
28040 return true;
28042 if (is_branch_slot_insn (insn))
28043 return true;
28045 break;
28046 case PROCESSOR_POWER6:
28047 type = get_attr_type (insn);
28049 switch (type)
28051 case TYPE_EXTS:
28052 case TYPE_CNTLZ:
28053 case TYPE_TRAP:
28054 case TYPE_MUL:
28055 case TYPE_FPCOMPARE:
28056 case TYPE_MFCR:
28057 case TYPE_MTCR:
28058 case TYPE_MFJMPR:
28059 case TYPE_MTJMPR:
28060 case TYPE_ISYNC:
28061 case TYPE_SYNC:
28062 case TYPE_LOAD_L:
28063 case TYPE_STORE_C:
28064 return true;
28065 case TYPE_SHIFT:
28066 if (get_attr_dot (insn) == DOT_NO
28067 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
28068 return true;
28069 else
28070 break;
28071 case TYPE_DIV:
28072 if (get_attr_size (insn) == SIZE_32)
28073 return true;
28074 else
28075 break;
28076 default:
28077 break;
28079 break;
28080 case PROCESSOR_POWER7:
28081 type = get_attr_type (insn);
28083 switch (type)
28085 case TYPE_ISYNC:
28086 case TYPE_SYNC:
28087 case TYPE_LOAD_L:
28088 case TYPE_STORE_C:
28089 return true;
28090 case TYPE_LOAD:
28091 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
28092 && get_attr_update (insn) == UPDATE_YES)
28093 return true;
28094 else
28095 break;
28096 case TYPE_STORE:
28097 if (get_attr_update (insn) == UPDATE_YES
28098 && get_attr_indexed (insn) == INDEXED_YES)
28099 return true;
28100 else
28101 break;
28102 default:
28103 break;
28105 break;
28106 case PROCESSOR_POWER8:
28107 type = get_attr_type (insn);
28109 switch (type)
28111 case TYPE_MFCR:
28112 case TYPE_MTCR:
28113 case TYPE_ISYNC:
28114 case TYPE_SYNC:
28115 case TYPE_LOAD_L:
28116 case TYPE_STORE_C:
28117 return true;
28118 case TYPE_LOAD:
28119 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
28120 && get_attr_update (insn) == UPDATE_YES)
28121 return true;
28122 else
28123 break;
28124 case TYPE_STORE:
28125 if (get_attr_update (insn) == UPDATE_YES
28126 && get_attr_indexed (insn) == INDEXED_YES)
28127 return true;
28128 else
28129 break;
28130 default:
28131 break;
28133 break;
28134 default:
28135 break;
28138 return false;
28141 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
28142 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
28144 static bool
28145 is_costly_group (rtx *group_insns, rtx next_insn)
28147 int i;
28148 int issue_rate = rs6000_issue_rate ();
28150 for (i = 0; i < issue_rate; i++)
28152 sd_iterator_def sd_it;
28153 dep_t dep;
28154 rtx insn = group_insns[i];
28156 if (!insn)
28157 continue;
28159 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
28161 rtx next = DEP_CON (dep);
28163 if (next == next_insn
28164 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
28165 return true;
28169 return false;
28172 /* Utility of the function redefine_groups.
28173 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
28174 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
28175 to keep it "far" (in a separate group) from GROUP_INSNS, following
28176 one of the following schemes, depending on the value of the flag
28177 -minsert_sched_nops = X:
28178 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
28179 in order to force NEXT_INSN into a separate group.
28180 (2) X < sched_finish_regroup_exact: insert exactly X nops.
28181 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
28182 insertion (has a group just ended, how many vacant issue slots remain in the
28183 last group, and how many dispatch groups were encountered so far). */
28185 static int
28186 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
28187 rtx_insn *next_insn, bool *group_end, int can_issue_more,
28188 int *group_count)
28190 rtx nop;
28191 bool force;
28192 int issue_rate = rs6000_issue_rate ();
28193 bool end = *group_end;
28194 int i;
28196 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
28197 return can_issue_more;
28199 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
28200 return can_issue_more;
28202 force = is_costly_group (group_insns, next_insn);
28203 if (!force)
28204 return can_issue_more;
28206 if (sched_verbose > 6)
28207 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
28208 *group_count ,can_issue_more);
28210 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
28212 if (*group_end)
28213 can_issue_more = 0;
28215 /* Since only a branch can be issued in the last issue_slot, it is
28216 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
28217 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
28218 in this case the last nop will start a new group and the branch
28219 will be forced to the new group. */
28220 if (can_issue_more && !is_branch_slot_insn (next_insn))
28221 can_issue_more--;
28223 /* Do we have a special group ending nop? */
28224 if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
28225 || rs6000_cpu_attr == CPU_POWER8)
28227 nop = gen_group_ending_nop ();
28228 emit_insn_before (nop, next_insn);
28229 can_issue_more = 0;
28231 else
28232 while (can_issue_more > 0)
28234 nop = gen_nop ();
28235 emit_insn_before (nop, next_insn);
28236 can_issue_more--;
28239 *group_end = true;
28240 return 0;
28243 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
28245 int n_nops = rs6000_sched_insert_nops;
28247 /* Nops can't be issued from the branch slot, so the effective
28248 issue_rate for nops is 'issue_rate - 1'. */
28249 if (can_issue_more == 0)
28250 can_issue_more = issue_rate;
28251 can_issue_more--;
28252 if (can_issue_more == 0)
28254 can_issue_more = issue_rate - 1;
28255 (*group_count)++;
28256 end = true;
28257 for (i = 0; i < issue_rate; i++)
28259 group_insns[i] = 0;
28263 while (n_nops > 0)
28265 nop = gen_nop ();
28266 emit_insn_before (nop, next_insn);
28267 if (can_issue_more == issue_rate - 1) /* new group begins */
28268 end = false;
28269 can_issue_more--;
28270 if (can_issue_more == 0)
28272 can_issue_more = issue_rate - 1;
28273 (*group_count)++;
28274 end = true;
28275 for (i = 0; i < issue_rate; i++)
28277 group_insns[i] = 0;
28280 n_nops--;
28283 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
28284 can_issue_more++;
28286 /* Is next_insn going to start a new group? */
28287 *group_end
28288 = (end
28289 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
28290 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
28291 || (can_issue_more < issue_rate &&
28292 insn_terminates_group_p (next_insn, previous_group)));
28293 if (*group_end && end)
28294 (*group_count)--;
28296 if (sched_verbose > 6)
28297 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
28298 *group_count, can_issue_more);
28299 return can_issue_more;
28302 return can_issue_more;
28305 /* This function tries to synch the dispatch groups that the compiler "sees"
28306 with the dispatch groups that the processor dispatcher is expected to
28307 form in practice. It tries to achieve this synchronization by forcing the
28308 estimated processor grouping on the compiler (as opposed to the function
28309 'pad_goups' which tries to force the scheduler's grouping on the processor).
28311 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
28312 examines the (estimated) dispatch groups that will be formed by the processor
28313 dispatcher. It marks these group boundaries to reflect the estimated
28314 processor grouping, overriding the grouping that the scheduler had marked.
28315 Depending on the value of the flag '-minsert-sched-nops' this function can
28316 force certain insns into separate groups or force a certain distance between
28317 them by inserting nops, for example, if there exists a "costly dependence"
28318 between the insns.
28320 The function estimates the group boundaries that the processor will form as
28321 follows: It keeps track of how many vacant issue slots are available after
28322 each insn. A subsequent insn will start a new group if one of the following
28323 4 cases applies:
28324 - no more vacant issue slots remain in the current dispatch group.
28325 - only the last issue slot, which is the branch slot, is vacant, but the next
28326 insn is not a branch.
28327 - only the last 2 or less issue slots, including the branch slot, are vacant,
28328 which means that a cracked insn (which occupies two issue slots) can't be
28329 issued in this group.
28330 - less than 'issue_rate' slots are vacant, and the next insn always needs to
28331 start a new group. */
28333 static int
28334 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
28335 rtx_insn *tail)
28337 rtx_insn *insn, *next_insn;
28338 int issue_rate;
28339 int can_issue_more;
28340 int slot, i;
28341 bool group_end;
28342 int group_count = 0;
28343 rtx *group_insns;
28345 /* Initialize. */
28346 issue_rate = rs6000_issue_rate ();
28347 group_insns = XALLOCAVEC (rtx, issue_rate);
28348 for (i = 0; i < issue_rate; i++)
28350 group_insns[i] = 0;
28352 can_issue_more = issue_rate;
28353 slot = 0;
28354 insn = get_next_active_insn (prev_head_insn, tail);
28355 group_end = false;
28357 while (insn != NULL_RTX)
28359 slot = (issue_rate - can_issue_more);
28360 group_insns[slot] = insn;
28361 can_issue_more =
28362 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
28363 if (insn_terminates_group_p (insn, current_group))
28364 can_issue_more = 0;
28366 next_insn = get_next_active_insn (insn, tail);
28367 if (next_insn == NULL_RTX)
28368 return group_count + 1;
28370 /* Is next_insn going to start a new group? */
28371 group_end
28372 = (can_issue_more == 0
28373 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
28374 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
28375 || (can_issue_more < issue_rate &&
28376 insn_terminates_group_p (next_insn, previous_group)));
28378 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
28379 next_insn, &group_end, can_issue_more,
28380 &group_count);
28382 if (group_end)
28384 group_count++;
28385 can_issue_more = 0;
28386 for (i = 0; i < issue_rate; i++)
28388 group_insns[i] = 0;
28392 if (GET_MODE (next_insn) == TImode && can_issue_more)
28393 PUT_MODE (next_insn, VOIDmode);
28394 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
28395 PUT_MODE (next_insn, TImode);
28397 insn = next_insn;
28398 if (can_issue_more == 0)
28399 can_issue_more = issue_rate;
28400 } /* while */
28402 return group_count;
28405 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
28406 dispatch group boundaries that the scheduler had marked. Pad with nops
28407 any dispatch groups which have vacant issue slots, in order to force the
28408 scheduler's grouping on the processor dispatcher. The function
28409 returns the number of dispatch groups found. */
28411 static int
28412 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
28413 rtx_insn *tail)
28415 rtx_insn *insn, *next_insn;
28416 rtx nop;
28417 int issue_rate;
28418 int can_issue_more;
28419 int group_end;
28420 int group_count = 0;
28422 /* Initialize issue_rate. */
28423 issue_rate = rs6000_issue_rate ();
28424 can_issue_more = issue_rate;
28426 insn = get_next_active_insn (prev_head_insn, tail);
28427 next_insn = get_next_active_insn (insn, tail);
28429 while (insn != NULL_RTX)
28431 can_issue_more =
28432 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
28434 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
28436 if (next_insn == NULL_RTX)
28437 break;
28439 if (group_end)
28441 /* If the scheduler had marked group termination at this location
28442 (between insn and next_insn), and neither insn nor next_insn will
28443 force group termination, pad the group with nops to force group
28444 termination. */
28445 if (can_issue_more
28446 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
28447 && !insn_terminates_group_p (insn, current_group)
28448 && !insn_terminates_group_p (next_insn, previous_group))
28450 if (!is_branch_slot_insn (next_insn))
28451 can_issue_more--;
28453 while (can_issue_more)
28455 nop = gen_nop ();
28456 emit_insn_before (nop, next_insn);
28457 can_issue_more--;
28461 can_issue_more = issue_rate;
28462 group_count++;
28465 insn = next_insn;
28466 next_insn = get_next_active_insn (insn, tail);
28469 return group_count;
28472 /* We're beginning a new block. Initialize data structures as necessary. */
28474 static void
28475 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
28476 int sched_verbose ATTRIBUTE_UNUSED,
28477 int max_ready ATTRIBUTE_UNUSED)
28479 last_scheduled_insn = NULL_RTX;
28480 load_store_pendulum = 0;
28483 /* The following function is called at the end of scheduling BB.
28484 After reload, it inserts nops at insn group bundling. */
28486 static void
28487 rs6000_sched_finish (FILE *dump, int sched_verbose)
28489 int n_groups;
28491 if (sched_verbose)
28492 fprintf (dump, "=== Finishing schedule.\n");
28494 if (reload_completed && rs6000_sched_groups)
28496 /* Do not run sched_finish hook when selective scheduling enabled. */
28497 if (sel_sched_p ())
28498 return;
28500 if (rs6000_sched_insert_nops == sched_finish_none)
28501 return;
28503 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
28504 n_groups = pad_groups (dump, sched_verbose,
28505 current_sched_info->prev_head,
28506 current_sched_info->next_tail);
28507 else
28508 n_groups = redefine_groups (dump, sched_verbose,
28509 current_sched_info->prev_head,
28510 current_sched_info->next_tail);
28512 if (sched_verbose >= 6)
28514 fprintf (dump, "ngroups = %d\n", n_groups);
28515 print_rtl (dump, current_sched_info->prev_head);
28516 fprintf (dump, "Done finish_sched\n");
28521 struct _rs6000_sched_context
28523 short cached_can_issue_more;
28524 rtx last_scheduled_insn;
28525 int load_store_pendulum;
28528 typedef struct _rs6000_sched_context rs6000_sched_context_def;
28529 typedef rs6000_sched_context_def *rs6000_sched_context_t;
28531 /* Allocate store for new scheduling context. */
28532 static void *
28533 rs6000_alloc_sched_context (void)
28535 return xmalloc (sizeof (rs6000_sched_context_def));
28538 /* If CLEAN_P is true then initializes _SC with clean data,
28539 and from the global context otherwise. */
28540 static void
28541 rs6000_init_sched_context (void *_sc, bool clean_p)
28543 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
28545 if (clean_p)
28547 sc->cached_can_issue_more = 0;
28548 sc->last_scheduled_insn = NULL_RTX;
28549 sc->load_store_pendulum = 0;
28551 else
28553 sc->cached_can_issue_more = cached_can_issue_more;
28554 sc->last_scheduled_insn = last_scheduled_insn;
28555 sc->load_store_pendulum = load_store_pendulum;
28559 /* Sets the global scheduling context to the one pointed to by _SC. */
28560 static void
28561 rs6000_set_sched_context (void *_sc)
28563 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
28565 gcc_assert (sc != NULL);
28567 cached_can_issue_more = sc->cached_can_issue_more;
28568 last_scheduled_insn = sc->last_scheduled_insn;
28569 load_store_pendulum = sc->load_store_pendulum;
28572 /* Free _SC. */
28573 static void
28574 rs6000_free_sched_context (void *_sc)
28576 gcc_assert (_sc != NULL);
28578 free (_sc);
28582 /* Length in units of the trampoline for entering a nested function. */
28585 rs6000_trampoline_size (void)
28587 int ret = 0;
28589 switch (DEFAULT_ABI)
28591 default:
28592 gcc_unreachable ();
28594 case ABI_AIX:
28595 ret = (TARGET_32BIT) ? 12 : 24;
28596 break;
28598 case ABI_ELFv2:
28599 gcc_assert (!TARGET_32BIT);
28600 ret = 32;
28601 break;
28603 case ABI_DARWIN:
28604 case ABI_V4:
28605 ret = (TARGET_32BIT) ? 40 : 48;
28606 break;
28609 return ret;
28612 /* Emit RTL insns to initialize the variable parts of a trampoline.
28613 FNADDR is an RTX for the address of the function's pure code.
28614 CXT is an RTX for the static chain value for the function. */
28616 static void
28617 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
28619 int regsize = (TARGET_32BIT) ? 4 : 8;
28620 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
28621 rtx ctx_reg = force_reg (Pmode, cxt);
28622 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
28624 switch (DEFAULT_ABI)
28626 default:
28627 gcc_unreachable ();
28629 /* Under AIX, just build the 3 word function descriptor */
28630 case ABI_AIX:
28632 rtx fnmem, fn_reg, toc_reg;
28634 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
28635 error ("You cannot take the address of a nested function if you use "
28636 "the -mno-pointers-to-nested-functions option.");
28638 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
28639 fn_reg = gen_reg_rtx (Pmode);
28640 toc_reg = gen_reg_rtx (Pmode);
28642 /* Macro to shorten the code expansions below. */
28643 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
28645 m_tramp = replace_equiv_address (m_tramp, addr);
28647 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
28648 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
28649 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
28650 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
28651 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
28653 # undef MEM_PLUS
28655 break;
28657 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
28658 case ABI_ELFv2:
28659 case ABI_DARWIN:
28660 case ABI_V4:
28661 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
28662 LCT_NORMAL, VOIDmode, 4,
28663 addr, Pmode,
28664 GEN_INT (rs6000_trampoline_size ()), SImode,
28665 fnaddr, Pmode,
28666 ctx_reg, Pmode);
28667 break;
28672 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
28673 identifier as an argument, so the front end shouldn't look it up. */
28675 static bool
28676 rs6000_attribute_takes_identifier_p (const_tree attr_id)
28678 return is_attribute_p ("altivec", attr_id);
28681 /* Handle the "altivec" attribute. The attribute may have
28682 arguments as follows:
28684 __attribute__((altivec(vector__)))
28685 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
28686 __attribute__((altivec(bool__))) (always followed by 'unsigned')
28688 and may appear more than once (e.g., 'vector bool char') in a
28689 given declaration. */
28691 static tree
28692 rs6000_handle_altivec_attribute (tree *node,
28693 tree name ATTRIBUTE_UNUSED,
28694 tree args,
28695 int flags ATTRIBUTE_UNUSED,
28696 bool *no_add_attrs)
28698 tree type = *node, result = NULL_TREE;
28699 machine_mode mode;
28700 int unsigned_p;
28701 char altivec_type
28702 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
28703 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
28704 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
28705 : '?');
28707 while (POINTER_TYPE_P (type)
28708 || TREE_CODE (type) == FUNCTION_TYPE
28709 || TREE_CODE (type) == METHOD_TYPE
28710 || TREE_CODE (type) == ARRAY_TYPE)
28711 type = TREE_TYPE (type);
28713 mode = TYPE_MODE (type);
28715 /* Check for invalid AltiVec type qualifiers. */
28716 if (type == long_double_type_node)
28717 error ("use of %<long double%> in AltiVec types is invalid");
28718 else if (type == boolean_type_node)
28719 error ("use of boolean types in AltiVec types is invalid");
28720 else if (TREE_CODE (type) == COMPLEX_TYPE)
28721 error ("use of %<complex%> in AltiVec types is invalid");
28722 else if (DECIMAL_FLOAT_MODE_P (mode))
28723 error ("use of decimal floating point types in AltiVec types is invalid");
28724 else if (!TARGET_VSX)
28726 if (type == long_unsigned_type_node || type == long_integer_type_node)
28728 if (TARGET_64BIT)
28729 error ("use of %<long%> in AltiVec types is invalid for "
28730 "64-bit code without -mvsx");
28731 else if (rs6000_warn_altivec_long)
28732 warning (0, "use of %<long%> in AltiVec types is deprecated; "
28733 "use %<int%>");
28735 else if (type == long_long_unsigned_type_node
28736 || type == long_long_integer_type_node)
28737 error ("use of %<long long%> in AltiVec types is invalid without "
28738 "-mvsx");
28739 else if (type == double_type_node)
28740 error ("use of %<double%> in AltiVec types is invalid without -mvsx");
28743 switch (altivec_type)
28745 case 'v':
28746 unsigned_p = TYPE_UNSIGNED (type);
28747 switch (mode)
28749 case TImode:
28750 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
28751 break;
28752 case DImode:
28753 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
28754 break;
28755 case SImode:
28756 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
28757 break;
28758 case HImode:
28759 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
28760 break;
28761 case QImode:
28762 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
28763 break;
28764 case SFmode: result = V4SF_type_node; break;
28765 case DFmode: result = V2DF_type_node; break;
28766 /* If the user says 'vector int bool', we may be handed the 'bool'
28767 attribute _before_ the 'vector' attribute, and so select the
28768 proper type in the 'b' case below. */
28769 case V4SImode: case V8HImode: case V16QImode: case V4SFmode:
28770 case V2DImode: case V2DFmode:
28771 result = type;
28772 default: break;
28774 break;
28775 case 'b':
28776 switch (mode)
28778 case DImode: case V2DImode: result = bool_V2DI_type_node; break;
28779 case SImode: case V4SImode: result = bool_V4SI_type_node; break;
28780 case HImode: case V8HImode: result = bool_V8HI_type_node; break;
28781 case QImode: case V16QImode: result = bool_V16QI_type_node;
28782 default: break;
28784 break;
28785 case 'p':
28786 switch (mode)
28788 case V8HImode: result = pixel_V8HI_type_node;
28789 default: break;
28791 default: break;
28794 /* Propagate qualifiers attached to the element type
28795 onto the vector type. */
28796 if (result && result != type && TYPE_QUALS (type))
28797 result = build_qualified_type (result, TYPE_QUALS (type));
28799 *no_add_attrs = true; /* No need to hang on to the attribute. */
28801 if (result)
28802 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
28804 return NULL_TREE;
28807 /* AltiVec defines four built-in scalar types that serve as vector
28808 elements; we must teach the compiler how to mangle them. */
28810 static const char *
28811 rs6000_mangle_type (const_tree type)
28813 type = TYPE_MAIN_VARIANT (type);
28815 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
28816 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
28817 return NULL;
28819 if (type == bool_char_type_node) return "U6__boolc";
28820 if (type == bool_short_type_node) return "U6__bools";
28821 if (type == pixel_type_node) return "u7__pixel";
28822 if (type == bool_int_type_node) return "U6__booli";
28823 if (type == bool_long_type_node) return "U6__booll";
28825 /* Mangle IBM extended float long double as `g' (__float128) on
28826 powerpc*-linux where long-double-64 previously was the default. */
28827 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
28828 && TARGET_ELF
28829 && TARGET_LONG_DOUBLE_128
28830 && !TARGET_IEEEQUAD)
28831 return "g";
28833 /* For all other types, use normal C++ mangling. */
28834 return NULL;
28837 /* Handle a "longcall" or "shortcall" attribute; arguments as in
28838 struct attribute_spec.handler. */
28840 static tree
28841 rs6000_handle_longcall_attribute (tree *node, tree name,
28842 tree args ATTRIBUTE_UNUSED,
28843 int flags ATTRIBUTE_UNUSED,
28844 bool *no_add_attrs)
28846 if (TREE_CODE (*node) != FUNCTION_TYPE
28847 && TREE_CODE (*node) != FIELD_DECL
28848 && TREE_CODE (*node) != TYPE_DECL)
28850 warning (OPT_Wattributes, "%qE attribute only applies to functions",
28851 name);
28852 *no_add_attrs = true;
28855 return NULL_TREE;
28858 /* Set longcall attributes on all functions declared when
28859 rs6000_default_long_calls is true. */
28860 static void
28861 rs6000_set_default_type_attributes (tree type)
28863 if (rs6000_default_long_calls
28864 && (TREE_CODE (type) == FUNCTION_TYPE
28865 || TREE_CODE (type) == METHOD_TYPE))
28866 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
28867 NULL_TREE,
28868 TYPE_ATTRIBUTES (type));
28870 #if TARGET_MACHO
28871 darwin_set_default_type_attributes (type);
28872 #endif
28875 /* Return a reference suitable for calling a function with the
28876 longcall attribute. */
28879 rs6000_longcall_ref (rtx call_ref)
28881 const char *call_name;
28882 tree node;
28884 if (GET_CODE (call_ref) != SYMBOL_REF)
28885 return call_ref;
28887 /* System V adds '.' to the internal name, so skip them. */
28888 call_name = XSTR (call_ref, 0);
28889 if (*call_name == '.')
28891 while (*call_name == '.')
28892 call_name++;
28894 node = get_identifier (call_name);
28895 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
28898 return force_reg (Pmode, call_ref);
28901 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
28902 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
28903 #endif
28905 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
28906 struct attribute_spec.handler. */
28907 static tree
28908 rs6000_handle_struct_attribute (tree *node, tree name,
28909 tree args ATTRIBUTE_UNUSED,
28910 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
28912 tree *type = NULL;
28913 if (DECL_P (*node))
28915 if (TREE_CODE (*node) == TYPE_DECL)
28916 type = &TREE_TYPE (*node);
28918 else
28919 type = node;
28921 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
28922 || TREE_CODE (*type) == UNION_TYPE)))
28924 warning (OPT_Wattributes, "%qE attribute ignored", name);
28925 *no_add_attrs = true;
28928 else if ((is_attribute_p ("ms_struct", name)
28929 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
28930 || ((is_attribute_p ("gcc_struct", name)
28931 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
28933 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
28934 name);
28935 *no_add_attrs = true;
28938 return NULL_TREE;
28941 static bool
28942 rs6000_ms_bitfield_layout_p (const_tree record_type)
28944 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
28945 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
28946 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
28949 #ifdef USING_ELFOS_H
28951 /* A get_unnamed_section callback, used for switching to toc_section. */
28953 static void
28954 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
28956 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28957 && TARGET_MINIMAL_TOC
28958 && !TARGET_RELOCATABLE)
28960 if (!toc_initialized)
28962 toc_initialized = 1;
28963 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
28964 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
28965 fprintf (asm_out_file, "\t.tc ");
28966 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
28967 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
28968 fprintf (asm_out_file, "\n");
28970 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
28971 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
28972 fprintf (asm_out_file, " = .+32768\n");
28974 else
28975 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
28977 else if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28978 && !TARGET_RELOCATABLE)
28979 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
28980 else
28982 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
28983 if (!toc_initialized)
28985 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
28986 fprintf (asm_out_file, " = .+32768\n");
28987 toc_initialized = 1;
28992 /* Implement TARGET_ASM_INIT_SECTIONS. */
28994 static void
28995 rs6000_elf_asm_init_sections (void)
28997 toc_section
28998 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
29000 sdata2_section
29001 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
29002 SDATA2_SECTION_ASM_OP);
29005 /* Implement TARGET_SELECT_RTX_SECTION. */
29007 static section *
29008 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
29009 unsigned HOST_WIDE_INT align)
29011 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
29012 return toc_section;
29013 else
29014 return default_elf_select_rtx_section (mode, x, align);
29017 /* For a SYMBOL_REF, set generic flags and then perform some
29018 target-specific processing.
29020 When the AIX ABI is requested on a non-AIX system, replace the
29021 function name with the real name (with a leading .) rather than the
29022 function descriptor name. This saves a lot of overriding code to
29023 read the prefixes. */
29025 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
29026 static void
29027 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
29029 default_encode_section_info (decl, rtl, first);
29031 if (first
29032 && TREE_CODE (decl) == FUNCTION_DECL
29033 && !TARGET_AIX
29034 && DEFAULT_ABI == ABI_AIX)
29036 rtx sym_ref = XEXP (rtl, 0);
29037 size_t len = strlen (XSTR (sym_ref, 0));
29038 char *str = XALLOCAVEC (char, len + 2);
29039 str[0] = '.';
29040 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
29041 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
29045 static inline bool
29046 compare_section_name (const char *section, const char *templ)
29048 int len;
29050 len = strlen (templ);
29051 return (strncmp (section, templ, len) == 0
29052 && (section[len] == 0 || section[len] == '.'));
29055 bool
29056 rs6000_elf_in_small_data_p (const_tree decl)
29058 if (rs6000_sdata == SDATA_NONE)
29059 return false;
29061 /* We want to merge strings, so we never consider them small data. */
29062 if (TREE_CODE (decl) == STRING_CST)
29063 return false;
29065 /* Functions are never in the small data area. */
29066 if (TREE_CODE (decl) == FUNCTION_DECL)
29067 return false;
29069 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
29071 const char *section = DECL_SECTION_NAME (decl);
29072 if (compare_section_name (section, ".sdata")
29073 || compare_section_name (section, ".sdata2")
29074 || compare_section_name (section, ".gnu.linkonce.s")
29075 || compare_section_name (section, ".sbss")
29076 || compare_section_name (section, ".sbss2")
29077 || compare_section_name (section, ".gnu.linkonce.sb")
29078 || strcmp (section, ".PPC.EMB.sdata0") == 0
29079 || strcmp (section, ".PPC.EMB.sbss0") == 0)
29080 return true;
29082 else
29084 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
29086 if (size > 0
29087 && size <= g_switch_value
29088 /* If it's not public, and we're not going to reference it there,
29089 there's no need to put it in the small data section. */
29090 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
29091 return true;
29094 return false;
29097 #endif /* USING_ELFOS_H */
29099 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
29101 static bool
29102 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
29104 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
29107 /* Do not place thread-local symbols refs in the object blocks. */
29109 static bool
29110 rs6000_use_blocks_for_decl_p (const_tree decl)
29112 return !DECL_THREAD_LOCAL_P (decl);
29115 /* Return a REG that occurs in ADDR with coefficient 1.
29116 ADDR can be effectively incremented by incrementing REG.
29118 r0 is special and we must not select it as an address
29119 register by this routine since our caller will try to
29120 increment the returned register via an "la" instruction. */
29123 find_addr_reg (rtx addr)
29125 while (GET_CODE (addr) == PLUS)
29127 if (GET_CODE (XEXP (addr, 0)) == REG
29128 && REGNO (XEXP (addr, 0)) != 0)
29129 addr = XEXP (addr, 0);
29130 else if (GET_CODE (XEXP (addr, 1)) == REG
29131 && REGNO (XEXP (addr, 1)) != 0)
29132 addr = XEXP (addr, 1);
29133 else if (CONSTANT_P (XEXP (addr, 0)))
29134 addr = XEXP (addr, 1);
29135 else if (CONSTANT_P (XEXP (addr, 1)))
29136 addr = XEXP (addr, 0);
29137 else
29138 gcc_unreachable ();
29140 gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0);
29141 return addr;
29144 void
29145 rs6000_fatal_bad_address (rtx op)
29147 fatal_insn ("bad address", op);
29150 #if TARGET_MACHO
29152 typedef struct branch_island_d {
29153 tree function_name;
29154 tree label_name;
29155 int line_number;
29156 } branch_island;
29159 static vec<branch_island, va_gc> *branch_islands;
29161 /* Remember to generate a branch island for far calls to the given
29162 function. */
29164 static void
29165 add_compiler_branch_island (tree label_name, tree function_name,
29166 int line_number)
29168 branch_island bi = {function_name, label_name, line_number};
29169 vec_safe_push (branch_islands, bi);
29172 /* Generate far-jump branch islands for everything recorded in
29173 branch_islands. Invoked immediately after the last instruction of
29174 the epilogue has been emitted; the branch islands must be appended
29175 to, and contiguous with, the function body. Mach-O stubs are
29176 generated in machopic_output_stub(). */
29178 static void
29179 macho_branch_islands (void)
29181 char tmp_buf[512];
29183 while (!vec_safe_is_empty (branch_islands))
29185 branch_island *bi = &branch_islands->last ();
29186 const char *label = IDENTIFIER_POINTER (bi->label_name);
29187 const char *name = IDENTIFIER_POINTER (bi->function_name);
29188 char name_buf[512];
29189 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
29190 if (name[0] == '*' || name[0] == '&')
29191 strcpy (name_buf, name+1);
29192 else
29194 name_buf[0] = '_';
29195 strcpy (name_buf+1, name);
29197 strcpy (tmp_buf, "\n");
29198 strcat (tmp_buf, label);
29199 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
29200 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
29201 dbxout_stabd (N_SLINE, bi->line_number);
29202 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
29203 if (flag_pic)
29205 if (TARGET_LINK_STACK)
29207 char name[32];
29208 get_ppc476_thunk_name (name);
29209 strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
29210 strcat (tmp_buf, name);
29211 strcat (tmp_buf, "\n");
29212 strcat (tmp_buf, label);
29213 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
29215 else
29217 strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
29218 strcat (tmp_buf, label);
29219 strcat (tmp_buf, "_pic\n");
29220 strcat (tmp_buf, label);
29221 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
29224 strcat (tmp_buf, "\taddis r11,r11,ha16(");
29225 strcat (tmp_buf, name_buf);
29226 strcat (tmp_buf, " - ");
29227 strcat (tmp_buf, label);
29228 strcat (tmp_buf, "_pic)\n");
29230 strcat (tmp_buf, "\tmtlr r0\n");
29232 strcat (tmp_buf, "\taddi r12,r11,lo16(");
29233 strcat (tmp_buf, name_buf);
29234 strcat (tmp_buf, " - ");
29235 strcat (tmp_buf, label);
29236 strcat (tmp_buf, "_pic)\n");
29238 strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
29240 else
29242 strcat (tmp_buf, ":\nlis r12,hi16(");
29243 strcat (tmp_buf, name_buf);
29244 strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
29245 strcat (tmp_buf, name_buf);
29246 strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
29248 output_asm_insn (tmp_buf, 0);
29249 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
29250 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
29251 dbxout_stabd (N_SLINE, bi->line_number);
29252 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
29253 branch_islands->pop ();
29257 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
29258 already there or not. */
29260 static int
29261 no_previous_def (tree function_name)
29263 branch_island *bi;
29264 unsigned ix;
29266 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
29267 if (function_name == bi->function_name)
29268 return 0;
29269 return 1;
29272 /* GET_PREV_LABEL gets the label name from the previous definition of
29273 the function. */
29275 static tree
29276 get_prev_label (tree function_name)
29278 branch_island *bi;
29279 unsigned ix;
29281 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
29282 if (function_name == bi->function_name)
29283 return bi->label_name;
29284 return NULL_TREE;
29287 /* INSN is either a function call or a millicode call. It may have an
29288 unconditional jump in its delay slot.
29290 CALL_DEST is the routine we are calling. */
29292 char *
29293 output_call (rtx_insn *insn, rtx *operands, int dest_operand_number,
29294 int cookie_operand_number)
29296 static char buf[256];
29297 if (darwin_emit_branch_islands
29298 && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
29299 && (INTVAL (operands[cookie_operand_number]) & CALL_LONG))
29301 tree labelname;
29302 tree funname = get_identifier (XSTR (operands[dest_operand_number], 0));
29304 if (no_previous_def (funname))
29306 rtx label_rtx = gen_label_rtx ();
29307 char *label_buf, temp_buf[256];
29308 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
29309 CODE_LABEL_NUMBER (label_rtx));
29310 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
29311 labelname = get_identifier (label_buf);
29312 add_compiler_branch_island (labelname, funname, insn_line (insn));
29314 else
29315 labelname = get_prev_label (funname);
29317 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
29318 instruction will reach 'foo', otherwise link as 'bl L42'".
29319 "L42" should be a 'branch island', that will do a far jump to
29320 'foo'. Branch islands are generated in
29321 macho_branch_islands(). */
29322 sprintf (buf, "jbsr %%z%d,%.246s",
29323 dest_operand_number, IDENTIFIER_POINTER (labelname));
29325 else
29326 sprintf (buf, "bl %%z%d", dest_operand_number);
29327 return buf;
29330 /* Generate PIC and indirect symbol stubs. */
29332 void
29333 machopic_output_stub (FILE *file, const char *symb, const char *stub)
29335 unsigned int length;
29336 char *symbol_name, *lazy_ptr_name;
29337 char *local_label_0;
29338 static int label = 0;
29340 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
29341 symb = (*targetm.strip_name_encoding) (symb);
29344 length = strlen (symb);
29345 symbol_name = XALLOCAVEC (char, length + 32);
29346 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
29348 lazy_ptr_name = XALLOCAVEC (char, length + 32);
29349 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
29351 if (flag_pic == 2)
29352 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
29353 else
29354 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
29356 if (flag_pic == 2)
29358 fprintf (file, "\t.align 5\n");
29360 fprintf (file, "%s:\n", stub);
29361 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29363 label++;
29364 local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
29365 sprintf (local_label_0, "\"L%011d$spb\"", label);
29367 fprintf (file, "\tmflr r0\n");
29368 if (TARGET_LINK_STACK)
29370 char name[32];
29371 get_ppc476_thunk_name (name);
29372 fprintf (file, "\tbl %s\n", name);
29373 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
29375 else
29377 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
29378 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
29380 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
29381 lazy_ptr_name, local_label_0);
29382 fprintf (file, "\tmtlr r0\n");
29383 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
29384 (TARGET_64BIT ? "ldu" : "lwzu"),
29385 lazy_ptr_name, local_label_0);
29386 fprintf (file, "\tmtctr r12\n");
29387 fprintf (file, "\tbctr\n");
29389 else
29391 fprintf (file, "\t.align 4\n");
29393 fprintf (file, "%s:\n", stub);
29394 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29396 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
29397 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
29398 (TARGET_64BIT ? "ldu" : "lwzu"),
29399 lazy_ptr_name);
29400 fprintf (file, "\tmtctr r12\n");
29401 fprintf (file, "\tbctr\n");
29404 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
29405 fprintf (file, "%s:\n", lazy_ptr_name);
29406 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29407 fprintf (file, "%sdyld_stub_binding_helper\n",
29408 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
29411 /* Legitimize PIC addresses. If the address is already
29412 position-independent, we return ORIG. Newly generated
29413 position-independent addresses go into a reg. This is REG if non
29414 zero, otherwise we allocate register(s) as necessary. */
29416 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
29419 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
29420 rtx reg)
29422 rtx base, offset;
29424 if (reg == NULL && ! reload_in_progress && ! reload_completed)
29425 reg = gen_reg_rtx (Pmode);
29427 if (GET_CODE (orig) == CONST)
29429 rtx reg_temp;
29431 if (GET_CODE (XEXP (orig, 0)) == PLUS
29432 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
29433 return orig;
29435 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
29437 /* Use a different reg for the intermediate value, as
29438 it will be marked UNCHANGING. */
29439 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
29440 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
29441 Pmode, reg_temp);
29442 offset =
29443 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
29444 Pmode, reg);
29446 if (GET_CODE (offset) == CONST_INT)
29448 if (SMALL_INT (offset))
29449 return plus_constant (Pmode, base, INTVAL (offset));
29450 else if (! reload_in_progress && ! reload_completed)
29451 offset = force_reg (Pmode, offset);
29452 else
29454 rtx mem = force_const_mem (Pmode, orig);
29455 return machopic_legitimize_pic_address (mem, Pmode, reg);
29458 return gen_rtx_PLUS (Pmode, base, offset);
29461 /* Fall back on generic machopic code. */
29462 return machopic_legitimize_pic_address (orig, mode, reg);
29465 /* Output a .machine directive for the Darwin assembler, and call
29466 the generic start_file routine. */
29468 static void
29469 rs6000_darwin_file_start (void)
29471 static const struct
29473 const char *arg;
29474 const char *name;
29475 HOST_WIDE_INT if_set;
29476 } mapping[] = {
29477 { "ppc64", "ppc64", MASK_64BIT },
29478 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
29479 { "power4", "ppc970", 0 },
29480 { "G5", "ppc970", 0 },
29481 { "7450", "ppc7450", 0 },
29482 { "7400", "ppc7400", MASK_ALTIVEC },
29483 { "G4", "ppc7400", 0 },
29484 { "750", "ppc750", 0 },
29485 { "740", "ppc750", 0 },
29486 { "G3", "ppc750", 0 },
29487 { "604e", "ppc604e", 0 },
29488 { "604", "ppc604", 0 },
29489 { "603e", "ppc603", 0 },
29490 { "603", "ppc603", 0 },
29491 { "601", "ppc601", 0 },
29492 { NULL, "ppc", 0 } };
29493 const char *cpu_id = "";
29494 size_t i;
29496 rs6000_file_start ();
29497 darwin_file_start ();
29499 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
29501 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
29502 cpu_id = rs6000_default_cpu;
29504 if (global_options_set.x_rs6000_cpu_index)
29505 cpu_id = processor_target_table[rs6000_cpu_index].name;
29507 /* Look through the mapping array. Pick the first name that either
29508 matches the argument, has a bit set in IF_SET that is also set
29509 in the target flags, or has a NULL name. */
29511 i = 0;
29512 while (mapping[i].arg != NULL
29513 && strcmp (mapping[i].arg, cpu_id) != 0
29514 && (mapping[i].if_set & rs6000_isa_flags) == 0)
29515 i++;
29517 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
29520 #endif /* TARGET_MACHO */
29522 #if TARGET_ELF
29523 static int
29524 rs6000_elf_reloc_rw_mask (void)
29526 if (flag_pic)
29527 return 3;
29528 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29529 return 2;
29530 else
29531 return 0;
29534 /* Record an element in the table of global constructors. SYMBOL is
29535 a SYMBOL_REF of the function to be called; PRIORITY is a number
29536 between 0 and MAX_INIT_PRIORITY.
29538 This differs from default_named_section_asm_out_constructor in
29539 that we have special handling for -mrelocatable. */
29541 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
29542 static void
29543 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
29545 const char *section = ".ctors";
29546 char buf[16];
29548 if (priority != DEFAULT_INIT_PRIORITY)
29550 sprintf (buf, ".ctors.%.5u",
29551 /* Invert the numbering so the linker puts us in the proper
29552 order; constructors are run from right to left, and the
29553 linker sorts in increasing order. */
29554 MAX_INIT_PRIORITY - priority);
29555 section = buf;
29558 switch_to_section (get_section (section, SECTION_WRITE, NULL));
29559 assemble_align (POINTER_SIZE);
29561 if (TARGET_RELOCATABLE)
29563 fputs ("\t.long (", asm_out_file);
29564 output_addr_const (asm_out_file, symbol);
29565 fputs (")@fixup\n", asm_out_file);
29567 else
29568 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
29571 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
29572 static void
29573 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
29575 const char *section = ".dtors";
29576 char buf[16];
29578 if (priority != DEFAULT_INIT_PRIORITY)
29580 sprintf (buf, ".dtors.%.5u",
29581 /* Invert the numbering so the linker puts us in the proper
29582 order; constructors are run from right to left, and the
29583 linker sorts in increasing order. */
29584 MAX_INIT_PRIORITY - priority);
29585 section = buf;
29588 switch_to_section (get_section (section, SECTION_WRITE, NULL));
29589 assemble_align (POINTER_SIZE);
29591 if (TARGET_RELOCATABLE)
29593 fputs ("\t.long (", asm_out_file);
29594 output_addr_const (asm_out_file, symbol);
29595 fputs (")@fixup\n", asm_out_file);
29597 else
29598 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
29601 void
29602 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
29604 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
29606 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
29607 ASM_OUTPUT_LABEL (file, name);
29608 fputs (DOUBLE_INT_ASM_OP, file);
29609 rs6000_output_function_entry (file, name);
29610 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
29611 if (DOT_SYMBOLS)
29613 fputs ("\t.size\t", file);
29614 assemble_name (file, name);
29615 fputs (",24\n\t.type\t.", file);
29616 assemble_name (file, name);
29617 fputs (",@function\n", file);
29618 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
29620 fputs ("\t.globl\t.", file);
29621 assemble_name (file, name);
29622 putc ('\n', file);
29625 else
29626 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
29627 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
29628 rs6000_output_function_entry (file, name);
29629 fputs (":\n", file);
29630 return;
29633 if (TARGET_RELOCATABLE
29634 && !TARGET_SECURE_PLT
29635 && (get_pool_size () != 0 || crtl->profile)
29636 && uses_TOC ())
29638 char buf[256];
29640 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
29642 ASM_GENERATE_INTERNAL_LABEL (buf, "LCTOC", 1);
29643 fprintf (file, "\t.long ");
29644 assemble_name (file, buf);
29645 putc ('-', file);
29646 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
29647 assemble_name (file, buf);
29648 putc ('\n', file);
29651 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
29652 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
29654 if (DEFAULT_ABI == ABI_AIX)
29656 const char *desc_name, *orig_name;
29658 orig_name = (*targetm.strip_name_encoding) (name);
29659 desc_name = orig_name;
29660 while (*desc_name == '.')
29661 desc_name++;
29663 if (TREE_PUBLIC (decl))
29664 fprintf (file, "\t.globl %s\n", desc_name);
29666 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
29667 fprintf (file, "%s:\n", desc_name);
29668 fprintf (file, "\t.long %s\n", orig_name);
29669 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
29670 fputs ("\t.long 0\n", file);
29671 fprintf (file, "\t.previous\n");
29673 ASM_OUTPUT_LABEL (file, name);
29676 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
29677 static void
29678 rs6000_elf_file_end (void)
29680 #ifdef HAVE_AS_GNU_ATTRIBUTE
29681 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
29683 if (rs6000_passes_float)
29684 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n",
29685 ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT) ? 1
29686 : (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT) ? 3
29687 : 2));
29688 if (rs6000_passes_vector)
29689 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
29690 (TARGET_ALTIVEC_ABI ? 2
29691 : TARGET_SPE_ABI ? 3
29692 : 1));
29693 if (rs6000_returns_struct)
29694 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
29695 aix_struct_return ? 2 : 1);
29697 #endif
29698 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
29699 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
29700 file_end_indicate_exec_stack ();
29701 #endif
29703 #endif
29705 #if TARGET_XCOFF
29706 static void
29707 rs6000_xcoff_asm_output_anchor (rtx symbol)
29709 char buffer[100];
29711 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
29712 SYMBOL_REF_BLOCK_OFFSET (symbol));
29713 fprintf (asm_out_file, "%s", SET_ASM_OP);
29714 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
29715 fprintf (asm_out_file, ",");
29716 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
29717 fprintf (asm_out_file, "\n");
29720 static void
29721 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
29723 fputs (GLOBAL_ASM_OP, stream);
29724 RS6000_OUTPUT_BASENAME (stream, name);
29725 putc ('\n', stream);
29728 /* A get_unnamed_decl callback, used for read-only sections. PTR
29729 points to the section string variable. */
29731 static void
29732 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
29734 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
29735 *(const char *const *) directive,
29736 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
29739 /* Likewise for read-write sections. */
29741 static void
29742 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
29744 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
29745 *(const char *const *) directive,
29746 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
29749 static void
29750 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
29752 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
29753 *(const char *const *) directive,
29754 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
29757 /* A get_unnamed_section callback, used for switching to toc_section. */
29759 static void
29760 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
29762 if (TARGET_MINIMAL_TOC)
29764 /* toc_section is always selected at least once from
29765 rs6000_xcoff_file_start, so this is guaranteed to
29766 always be defined once and only once in each file. */
29767 if (!toc_initialized)
29769 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
29770 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
29771 toc_initialized = 1;
29773 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
29774 (TARGET_32BIT ? "" : ",3"));
29776 else
29777 fputs ("\t.toc\n", asm_out_file);
29780 /* Implement TARGET_ASM_INIT_SECTIONS. */
29782 static void
29783 rs6000_xcoff_asm_init_sections (void)
29785 read_only_data_section
29786 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
29787 &xcoff_read_only_section_name);
29789 private_data_section
29790 = get_unnamed_section (SECTION_WRITE,
29791 rs6000_xcoff_output_readwrite_section_asm_op,
29792 &xcoff_private_data_section_name);
29794 tls_data_section
29795 = get_unnamed_section (SECTION_TLS,
29796 rs6000_xcoff_output_tls_section_asm_op,
29797 &xcoff_tls_data_section_name);
29799 tls_private_data_section
29800 = get_unnamed_section (SECTION_TLS,
29801 rs6000_xcoff_output_tls_section_asm_op,
29802 &xcoff_private_data_section_name);
29804 read_only_private_data_section
29805 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
29806 &xcoff_private_data_section_name);
29808 toc_section
29809 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
29811 readonly_data_section = read_only_data_section;
29812 exception_section = data_section;
29815 static int
29816 rs6000_xcoff_reloc_rw_mask (void)
29818 return 3;
29821 static void
29822 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
29823 tree decl ATTRIBUTE_UNUSED)
29825 int smclass;
29826 static const char * const suffix[4] = { "PR", "RO", "RW", "TL" };
29828 if (flags & SECTION_CODE)
29829 smclass = 0;
29830 else if (flags & SECTION_TLS)
29831 smclass = 3;
29832 else if (flags & SECTION_WRITE)
29833 smclass = 2;
29834 else
29835 smclass = 1;
29837 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
29838 (flags & SECTION_CODE) ? "." : "",
29839 name, suffix[smclass], flags & SECTION_ENTSIZE);
29842 #define IN_NAMED_SECTION(DECL) \
29843 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
29844 && DECL_SECTION_NAME (DECL) != NULL)
29846 static section *
29847 rs6000_xcoff_select_section (tree decl, int reloc,
29848 unsigned HOST_WIDE_INT align)
29850 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
29851 named section. */
29852 if (align > BIGGEST_ALIGNMENT)
29854 resolve_unique_section (decl, reloc, true);
29855 if (IN_NAMED_SECTION (decl))
29856 return get_named_section (decl, NULL, reloc);
29859 if (decl_readonly_section (decl, reloc))
29861 if (TREE_PUBLIC (decl))
29862 return read_only_data_section;
29863 else
29864 return read_only_private_data_section;
29866 else
29868 #if HAVE_AS_TLS
29869 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
29871 if (TREE_PUBLIC (decl))
29872 return tls_data_section;
29873 else if (bss_initializer_p (decl))
29875 /* Convert to COMMON to emit in BSS. */
29876 DECL_COMMON (decl) = 1;
29877 return tls_comm_section;
29879 else
29880 return tls_private_data_section;
29882 else
29883 #endif
29884 if (TREE_PUBLIC (decl))
29885 return data_section;
29886 else
29887 return private_data_section;
29891 static void
29892 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
29894 const char *name;
29896 /* Use select_section for private data and uninitialized data with
29897 alignment <= BIGGEST_ALIGNMENT. */
29898 if (!TREE_PUBLIC (decl)
29899 || DECL_COMMON (decl)
29900 || (DECL_INITIAL (decl) == NULL_TREE
29901 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
29902 || DECL_INITIAL (decl) == error_mark_node
29903 || (flag_zero_initialized_in_bss
29904 && initializer_zerop (DECL_INITIAL (decl))))
29905 return;
29907 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
29908 name = (*targetm.strip_name_encoding) (name);
29909 set_decl_section_name (decl, name);
29912 /* Select section for constant in constant pool.
29914 On RS/6000, all constants are in the private read-only data area.
29915 However, if this is being placed in the TOC it must be output as a
29916 toc entry. */
29918 static section *
29919 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
29920 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
29922 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
29923 return toc_section;
29924 else
29925 return read_only_private_data_section;
29928 /* Remove any trailing [DS] or the like from the symbol name. */
29930 static const char *
29931 rs6000_xcoff_strip_name_encoding (const char *name)
29933 size_t len;
29934 if (*name == '*')
29935 name++;
29936 len = strlen (name);
29937 if (name[len - 1] == ']')
29938 return ggc_alloc_string (name, len - 4);
29939 else
29940 return name;
29943 /* Section attributes. AIX is always PIC. */
29945 static unsigned int
29946 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
29948 unsigned int align;
29949 unsigned int flags = default_section_type_flags (decl, name, reloc);
29951 /* Align to at least UNIT size. */
29952 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
29953 align = MIN_UNITS_PER_WORD;
29954 else
29955 /* Increase alignment of large objects if not already stricter. */
29956 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
29957 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
29958 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
29960 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
29963 /* Output at beginning of assembler file.
29965 Initialize the section names for the RS/6000 at this point.
29967 Specify filename, including full path, to assembler.
29969 We want to go into the TOC section so at least one .toc will be emitted.
29970 Also, in order to output proper .bs/.es pairs, we need at least one static
29971 [RW] section emitted.
29973 Finally, declare mcount when profiling to make the assembler happy. */
29975 static void
29976 rs6000_xcoff_file_start (void)
29978 rs6000_gen_section_name (&xcoff_bss_section_name,
29979 main_input_filename, ".bss_");
29980 rs6000_gen_section_name (&xcoff_private_data_section_name,
29981 main_input_filename, ".rw_");
29982 rs6000_gen_section_name (&xcoff_read_only_section_name,
29983 main_input_filename, ".ro_");
29984 rs6000_gen_section_name (&xcoff_tls_data_section_name,
29985 main_input_filename, ".tls_");
29986 rs6000_gen_section_name (&xcoff_tbss_section_name,
29987 main_input_filename, ".tbss_[UL]");
29989 fputs ("\t.file\t", asm_out_file);
29990 output_quoted_string (asm_out_file, main_input_filename);
29991 fputc ('\n', asm_out_file);
29992 if (write_symbols != NO_DEBUG)
29993 switch_to_section (private_data_section);
29994 switch_to_section (text_section);
29995 if (profile_flag)
29996 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
29997 rs6000_file_start ();
30000 /* Output at end of assembler file.
30001 On the RS/6000, referencing data should automatically pull in text. */
30003 static void
30004 rs6000_xcoff_file_end (void)
30006 switch_to_section (text_section);
30007 fputs ("_section_.text:\n", asm_out_file);
30008 switch_to_section (data_section);
30009 fputs (TARGET_32BIT
30010 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
30011 asm_out_file);
30014 struct declare_alias_data
30016 FILE *file;
30017 bool function_descriptor;
30020 /* Declare alias N. A helper function for for_node_and_aliases. */
30022 static bool
30023 rs6000_declare_alias (struct symtab_node *n, void *d)
30025 struct declare_alias_data *data = (struct declare_alias_data *)d;
30026 /* Main symbol is output specially, because varasm machinery does part of
30027 the job for us - we do not need to declare .globl/lglobs and such. */
30028 if (!n->alias || n->weakref)
30029 return false;
30031 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
30032 return false;
30034 /* Prevent assemble_alias from trying to use .set pseudo operation
30035 that does not behave as expected by the middle-end. */
30036 TREE_ASM_WRITTEN (n->decl) = true;
30038 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
30039 char *buffer = (char *) alloca (strlen (name) + 2);
30040 char *p;
30041 int dollar_inside = 0;
30043 strcpy (buffer, name);
30044 p = strchr (buffer, '$');
30045 while (p) {
30046 *p = '_';
30047 dollar_inside++;
30048 p = strchr (p + 1, '$');
30050 if (TREE_PUBLIC (n->decl))
30052 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
30054 if (dollar_inside) {
30055 if (data->function_descriptor)
30056 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
30057 else
30058 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
30060 if (data->function_descriptor)
30061 fputs ("\t.globl .", data->file);
30062 else
30063 fputs ("\t.globl ", data->file);
30064 RS6000_OUTPUT_BASENAME (data->file, buffer);
30065 putc ('\n', data->file);
30067 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
30068 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
30070 else
30072 if (dollar_inside)
30074 if (data->function_descriptor)
30075 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
30076 else
30077 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
30079 if (data->function_descriptor)
30080 fputs ("\t.lglobl .", data->file);
30081 else
30082 fputs ("\t.lglobl ", data->file);
30083 RS6000_OUTPUT_BASENAME (data->file, buffer);
30084 putc ('\n', data->file);
30086 if (data->function_descriptor)
30087 fputs (".", data->file);
30088 RS6000_OUTPUT_BASENAME (data->file, buffer);
30089 fputs (":\n", data->file);
30090 return false;
30093 /* This macro produces the initial definition of a function name.
30094 On the RS/6000, we need to place an extra '.' in the function name and
30095 output the function descriptor.
30096 Dollar signs are converted to underscores.
30098 The csect for the function will have already been created when
30099 text_section was selected. We do have to go back to that csect, however.
30101 The third and fourth parameters to the .function pseudo-op (16 and 044)
30102 are placeholders which no longer have any use.
30104 Because AIX assembler's .set command has unexpected semantics, we output
30105 all aliases as alternative labels in front of the definition. */
30107 void
30108 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
30110 char *buffer = (char *) alloca (strlen (name) + 1);
30111 char *p;
30112 int dollar_inside = 0;
30113 struct declare_alias_data data = {file, false};
30115 strcpy (buffer, name);
30116 p = strchr (buffer, '$');
30117 while (p) {
30118 *p = '_';
30119 dollar_inside++;
30120 p = strchr (p + 1, '$');
30122 if (TREE_PUBLIC (decl))
30124 if (!RS6000_WEAK || !DECL_WEAK (decl))
30126 if (dollar_inside) {
30127 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
30128 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
30130 fputs ("\t.globl .", file);
30131 RS6000_OUTPUT_BASENAME (file, buffer);
30132 putc ('\n', file);
30135 else
30137 if (dollar_inside) {
30138 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
30139 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
30141 fputs ("\t.lglobl .", file);
30142 RS6000_OUTPUT_BASENAME (file, buffer);
30143 putc ('\n', file);
30145 fputs ("\t.csect ", file);
30146 RS6000_OUTPUT_BASENAME (file, buffer);
30147 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
30148 RS6000_OUTPUT_BASENAME (file, buffer);
30149 fputs (":\n", file);
30150 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
30151 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
30152 RS6000_OUTPUT_BASENAME (file, buffer);
30153 fputs (", TOC[tc0], 0\n", file);
30154 in_section = NULL;
30155 switch_to_section (function_section (decl));
30156 putc ('.', file);
30157 RS6000_OUTPUT_BASENAME (file, buffer);
30158 fputs (":\n", file);
30159 data.function_descriptor = true;
30160 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
30161 if (write_symbols != NO_DEBUG && !DECL_IGNORED_P (decl))
30162 xcoffout_declare_function (file, decl, buffer);
30163 return;
30166 /* This macro produces the initial definition of a object (variable) name.
30167 Because AIX assembler's .set command has unexpected semantics, we output
30168 all aliases as alternative labels in front of the definition. */
30170 void
30171 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
30173 struct declare_alias_data data = {file, false};
30174 RS6000_OUTPUT_BASENAME (file, name);
30175 fputs (":\n", file);
30176 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
30179 #ifdef HAVE_AS_TLS
30180 static void
30181 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
30183 rtx symbol;
30184 int flags;
30186 default_encode_section_info (decl, rtl, first);
30188 /* Careful not to prod global register variables. */
30189 if (!MEM_P (rtl))
30190 return;
30191 symbol = XEXP (rtl, 0);
30192 if (GET_CODE (symbol) != SYMBOL_REF)
30193 return;
30195 flags = SYMBOL_REF_FLAGS (symbol);
30197 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
30198 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
30200 SYMBOL_REF_FLAGS (symbol) = flags;
30202 #endif /* HAVE_AS_TLS */
30203 #endif /* TARGET_XCOFF */
30205 /* Compute a (partial) cost for rtx X. Return true if the complete
30206 cost has been computed, and false if subexpressions should be
30207 scanned. In either case, *TOTAL contains the cost result. */
30209 static bool
30210 rs6000_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
30211 int *total, bool speed)
30213 machine_mode mode = GET_MODE (x);
30215 switch (code)
30217 /* On the RS/6000, if it is valid in the insn, it is free. */
30218 case CONST_INT:
30219 if (((outer_code == SET
30220 || outer_code == PLUS
30221 || outer_code == MINUS)
30222 && (satisfies_constraint_I (x)
30223 || satisfies_constraint_L (x)))
30224 || (outer_code == AND
30225 && (satisfies_constraint_K (x)
30226 || (mode == SImode
30227 ? satisfies_constraint_L (x)
30228 : satisfies_constraint_J (x))
30229 || mask_operand (x, mode)
30230 || (mode == DImode
30231 && mask64_operand (x, DImode))))
30232 || ((outer_code == IOR || outer_code == XOR)
30233 && (satisfies_constraint_K (x)
30234 || (mode == SImode
30235 ? satisfies_constraint_L (x)
30236 : satisfies_constraint_J (x))))
30237 || outer_code == ASHIFT
30238 || outer_code == ASHIFTRT
30239 || outer_code == LSHIFTRT
30240 || outer_code == ROTATE
30241 || outer_code == ROTATERT
30242 || outer_code == ZERO_EXTRACT
30243 || (outer_code == MULT
30244 && satisfies_constraint_I (x))
30245 || ((outer_code == DIV || outer_code == UDIV
30246 || outer_code == MOD || outer_code == UMOD)
30247 && exact_log2 (INTVAL (x)) >= 0)
30248 || (outer_code == COMPARE
30249 && (satisfies_constraint_I (x)
30250 || satisfies_constraint_K (x)))
30251 || ((outer_code == EQ || outer_code == NE)
30252 && (satisfies_constraint_I (x)
30253 || satisfies_constraint_K (x)
30254 || (mode == SImode
30255 ? satisfies_constraint_L (x)
30256 : satisfies_constraint_J (x))))
30257 || (outer_code == GTU
30258 && satisfies_constraint_I (x))
30259 || (outer_code == LTU
30260 && satisfies_constraint_P (x)))
30262 *total = 0;
30263 return true;
30265 else if ((outer_code == PLUS
30266 && reg_or_add_cint_operand (x, VOIDmode))
30267 || (outer_code == MINUS
30268 && reg_or_sub_cint_operand (x, VOIDmode))
30269 || ((outer_code == SET
30270 || outer_code == IOR
30271 || outer_code == XOR)
30272 && (INTVAL (x)
30273 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
30275 *total = COSTS_N_INSNS (1);
30276 return true;
30278 /* FALLTHRU */
30280 case CONST_DOUBLE:
30281 case CONST_WIDE_INT:
30282 case CONST:
30283 case HIGH:
30284 case SYMBOL_REF:
30285 case MEM:
30286 /* When optimizing for size, MEM should be slightly more expensive
30287 than generating address, e.g., (plus (reg) (const)).
30288 L1 cache latency is about two instructions. */
30289 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
30290 return true;
30292 case LABEL_REF:
30293 *total = 0;
30294 return true;
30296 case PLUS:
30297 case MINUS:
30298 if (FLOAT_MODE_P (mode))
30299 *total = rs6000_cost->fp;
30300 else
30301 *total = COSTS_N_INSNS (1);
30302 return false;
30304 case MULT:
30305 if (GET_CODE (XEXP (x, 1)) == CONST_INT
30306 && satisfies_constraint_I (XEXP (x, 1)))
30308 if (INTVAL (XEXP (x, 1)) >= -256
30309 && INTVAL (XEXP (x, 1)) <= 255)
30310 *total = rs6000_cost->mulsi_const9;
30311 else
30312 *total = rs6000_cost->mulsi_const;
30314 else if (mode == SFmode)
30315 *total = rs6000_cost->fp;
30316 else if (FLOAT_MODE_P (mode))
30317 *total = rs6000_cost->dmul;
30318 else if (mode == DImode)
30319 *total = rs6000_cost->muldi;
30320 else
30321 *total = rs6000_cost->mulsi;
30322 return false;
30324 case FMA:
30325 if (mode == SFmode)
30326 *total = rs6000_cost->fp;
30327 else
30328 *total = rs6000_cost->dmul;
30329 break;
30331 case DIV:
30332 case MOD:
30333 if (FLOAT_MODE_P (mode))
30335 *total = mode == DFmode ? rs6000_cost->ddiv
30336 : rs6000_cost->sdiv;
30337 return false;
30339 /* FALLTHRU */
30341 case UDIV:
30342 case UMOD:
30343 if (GET_CODE (XEXP (x, 1)) == CONST_INT
30344 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
30346 if (code == DIV || code == MOD)
30347 /* Shift, addze */
30348 *total = COSTS_N_INSNS (2);
30349 else
30350 /* Shift */
30351 *total = COSTS_N_INSNS (1);
30353 else
30355 if (GET_MODE (XEXP (x, 1)) == DImode)
30356 *total = rs6000_cost->divdi;
30357 else
30358 *total = rs6000_cost->divsi;
30360 /* Add in shift and subtract for MOD. */
30361 if (code == MOD || code == UMOD)
30362 *total += COSTS_N_INSNS (2);
30363 return false;
30365 case CTZ:
30366 case FFS:
30367 *total = COSTS_N_INSNS (4);
30368 return false;
30370 case POPCOUNT:
30371 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
30372 return false;
30374 case PARITY:
30375 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
30376 return false;
30378 case NOT:
30379 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
30381 *total = 0;
30382 return false;
30384 /* FALLTHRU */
30386 case AND:
30387 case CLZ:
30388 case IOR:
30389 case XOR:
30390 case ZERO_EXTRACT:
30391 *total = COSTS_N_INSNS (1);
30392 return false;
30394 case ASHIFT:
30395 case ASHIFTRT:
30396 case LSHIFTRT:
30397 case ROTATE:
30398 case ROTATERT:
30399 /* Handle mul_highpart. */
30400 if (outer_code == TRUNCATE
30401 && GET_CODE (XEXP (x, 0)) == MULT)
30403 if (mode == DImode)
30404 *total = rs6000_cost->muldi;
30405 else
30406 *total = rs6000_cost->mulsi;
30407 return true;
30409 else if (outer_code == AND)
30410 *total = 0;
30411 else
30412 *total = COSTS_N_INSNS (1);
30413 return false;
30415 case SIGN_EXTEND:
30416 case ZERO_EXTEND:
30417 if (GET_CODE (XEXP (x, 0)) == MEM)
30418 *total = 0;
30419 else
30420 *total = COSTS_N_INSNS (1);
30421 return false;
30423 case COMPARE:
30424 case NEG:
30425 case ABS:
30426 if (!FLOAT_MODE_P (mode))
30428 *total = COSTS_N_INSNS (1);
30429 return false;
30431 /* FALLTHRU */
30433 case FLOAT:
30434 case UNSIGNED_FLOAT:
30435 case FIX:
30436 case UNSIGNED_FIX:
30437 case FLOAT_TRUNCATE:
30438 *total = rs6000_cost->fp;
30439 return false;
30441 case FLOAT_EXTEND:
30442 if (mode == DFmode)
30443 *total = 0;
30444 else
30445 *total = rs6000_cost->fp;
30446 return false;
30448 case UNSPEC:
30449 switch (XINT (x, 1))
30451 case UNSPEC_FRSP:
30452 *total = rs6000_cost->fp;
30453 return true;
30455 default:
30456 break;
30458 break;
30460 case CALL:
30461 case IF_THEN_ELSE:
30462 if (!speed)
30464 *total = COSTS_N_INSNS (1);
30465 return true;
30467 else if (FLOAT_MODE_P (mode)
30468 && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS)
30470 *total = rs6000_cost->fp;
30471 return false;
30473 break;
30475 case NE:
30476 case EQ:
30477 case GTU:
30478 case LTU:
30479 /* Carry bit requires mode == Pmode.
30480 NEG or PLUS already counted so only add one. */
30481 if (mode == Pmode
30482 && (outer_code == NEG || outer_code == PLUS))
30484 *total = COSTS_N_INSNS (1);
30485 return true;
30487 if (outer_code == SET)
30489 if (XEXP (x, 1) == const0_rtx)
30491 if (TARGET_ISEL && !TARGET_MFCRF)
30492 *total = COSTS_N_INSNS (8);
30493 else
30494 *total = COSTS_N_INSNS (2);
30495 return true;
30497 else if (mode == Pmode)
30499 *total = COSTS_N_INSNS (3);
30500 return false;
30503 /* FALLTHRU */
30505 case GT:
30506 case LT:
30507 case UNORDERED:
30508 if (outer_code == SET && (XEXP (x, 1) == const0_rtx))
30510 if (TARGET_ISEL && !TARGET_MFCRF)
30511 *total = COSTS_N_INSNS (8);
30512 else
30513 *total = COSTS_N_INSNS (2);
30514 return true;
30516 /* CC COMPARE. */
30517 if (outer_code == COMPARE)
30519 *total = 0;
30520 return true;
30522 break;
30524 default:
30525 break;
30528 return false;
30531 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
30533 static bool
30534 rs6000_debug_rtx_costs (rtx x, int code, int outer_code, int opno, int *total,
30535 bool speed)
30537 bool ret = rs6000_rtx_costs (x, code, outer_code, opno, total, speed);
30539 fprintf (stderr,
30540 "\nrs6000_rtx_costs, return = %s, code = %s, outer_code = %s, "
30541 "opno = %d, total = %d, speed = %s, x:\n",
30542 ret ? "complete" : "scan inner",
30543 GET_RTX_NAME (code),
30544 GET_RTX_NAME (outer_code),
30545 opno,
30546 *total,
30547 speed ? "true" : "false");
30549 debug_rtx (x);
30551 return ret;
30554 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
30556 static int
30557 rs6000_debug_address_cost (rtx x, machine_mode mode,
30558 addr_space_t as, bool speed)
30560 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
30562 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
30563 ret, speed ? "true" : "false");
30564 debug_rtx (x);
30566 return ret;
30570 /* A C expression returning the cost of moving data from a register of class
30571 CLASS1 to one of CLASS2. */
30573 static int
30574 rs6000_register_move_cost (machine_mode mode,
30575 reg_class_t from, reg_class_t to)
30577 int ret;
30579 if (TARGET_DEBUG_COST)
30580 dbg_cost_ctrl++;
30582 /* Moves from/to GENERAL_REGS. */
30583 if (reg_classes_intersect_p (to, GENERAL_REGS)
30584 || reg_classes_intersect_p (from, GENERAL_REGS))
30586 reg_class_t rclass = from;
30588 if (! reg_classes_intersect_p (to, GENERAL_REGS))
30589 rclass = to;
30591 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
30592 ret = (rs6000_memory_move_cost (mode, rclass, false)
30593 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
30595 /* It's more expensive to move CR_REGS than CR0_REGS because of the
30596 shift. */
30597 else if (rclass == CR_REGS)
30598 ret = 4;
30600 /* For those processors that have slow LR/CTR moves, make them more
30601 expensive than memory in order to bias spills to memory .*/
30602 else if ((rs6000_cpu == PROCESSOR_POWER6
30603 || rs6000_cpu == PROCESSOR_POWER7
30604 || rs6000_cpu == PROCESSOR_POWER8)
30605 && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
30606 ret = 6 * hard_regno_nregs[0][mode];
30608 else
30609 /* A move will cost one instruction per GPR moved. */
30610 ret = 2 * hard_regno_nregs[0][mode];
30613 /* If we have VSX, we can easily move between FPR or Altivec registers. */
30614 else if (VECTOR_MEM_VSX_P (mode)
30615 && reg_classes_intersect_p (to, VSX_REGS)
30616 && reg_classes_intersect_p (from, VSX_REGS))
30617 ret = 2 * hard_regno_nregs[32][mode];
30619 /* Moving between two similar registers is just one instruction. */
30620 else if (reg_classes_intersect_p (to, from))
30621 ret = (mode == TFmode || mode == TDmode) ? 4 : 2;
30623 /* Everything else has to go through GENERAL_REGS. */
30624 else
30625 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
30626 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
30628 if (TARGET_DEBUG_COST)
30630 if (dbg_cost_ctrl == 1)
30631 fprintf (stderr,
30632 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
30633 ret, GET_MODE_NAME (mode), reg_class_names[from],
30634 reg_class_names[to]);
30635 dbg_cost_ctrl--;
30638 return ret;
30641 /* A C expressions returning the cost of moving data of MODE from a register to
30642 or from memory. */
30644 static int
30645 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
30646 bool in ATTRIBUTE_UNUSED)
30648 int ret;
30650 if (TARGET_DEBUG_COST)
30651 dbg_cost_ctrl++;
30653 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
30654 ret = 4 * hard_regno_nregs[0][mode];
30655 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
30656 || reg_classes_intersect_p (rclass, VSX_REGS)))
30657 ret = 4 * hard_regno_nregs[32][mode];
30658 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
30659 ret = 4 * hard_regno_nregs[FIRST_ALTIVEC_REGNO][mode];
30660 else
30661 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
30663 if (TARGET_DEBUG_COST)
30665 if (dbg_cost_ctrl == 1)
30666 fprintf (stderr,
30667 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
30668 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
30669 dbg_cost_ctrl--;
30672 return ret;
30675 /* Returns a code for a target-specific builtin that implements
30676 reciprocal of the function, or NULL_TREE if not available. */
30678 static tree
30679 rs6000_builtin_reciprocal (unsigned int fn, bool md_fn,
30680 bool sqrt ATTRIBUTE_UNUSED)
30682 if (optimize_insn_for_size_p ())
30683 return NULL_TREE;
30685 if (md_fn)
30686 switch (fn)
30688 case VSX_BUILTIN_XVSQRTDP:
30689 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
30690 return NULL_TREE;
30692 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
30694 case VSX_BUILTIN_XVSQRTSP:
30695 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
30696 return NULL_TREE;
30698 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
30700 default:
30701 return NULL_TREE;
30704 else
30705 switch (fn)
30707 case BUILT_IN_SQRT:
30708 if (!RS6000_RECIP_AUTO_RSQRTE_P (DFmode))
30709 return NULL_TREE;
30711 return rs6000_builtin_decls[RS6000_BUILTIN_RSQRT];
30713 case BUILT_IN_SQRTF:
30714 if (!RS6000_RECIP_AUTO_RSQRTE_P (SFmode))
30715 return NULL_TREE;
30717 return rs6000_builtin_decls[RS6000_BUILTIN_RSQRTF];
30719 default:
30720 return NULL_TREE;
30724 /* Load up a constant. If the mode is a vector mode, splat the value across
30725 all of the vector elements. */
30727 static rtx
30728 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
30730 rtx reg;
30732 if (mode == SFmode || mode == DFmode)
30734 rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, mode);
30735 reg = force_reg (mode, d);
30737 else if (mode == V4SFmode)
30739 rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, SFmode);
30740 rtvec v = gen_rtvec (4, d, d, d, d);
30741 reg = gen_reg_rtx (mode);
30742 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
30744 else if (mode == V2DFmode)
30746 rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, DFmode);
30747 rtvec v = gen_rtvec (2, d, d);
30748 reg = gen_reg_rtx (mode);
30749 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
30751 else
30752 gcc_unreachable ();
30754 return reg;
30757 /* Generate an FMA instruction. */
30759 static void
30760 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
30762 machine_mode mode = GET_MODE (target);
30763 rtx dst;
30765 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
30766 gcc_assert (dst != NULL);
30768 if (dst != target)
30769 emit_move_insn (target, dst);
30772 /* Generate a FMSUB instruction: dst = fma(m1, m2, -a). */
30774 static void
30775 rs6000_emit_msub (rtx target, rtx m1, rtx m2, rtx a)
30777 machine_mode mode = GET_MODE (target);
30778 rtx dst;
30780 /* Altivec does not support fms directly;
30781 generate in terms of fma in that case. */
30782 if (optab_handler (fms_optab, mode) != CODE_FOR_nothing)
30783 dst = expand_ternary_op (mode, fms_optab, m1, m2, a, target, 0);
30784 else
30786 a = expand_unop (mode, neg_optab, a, NULL_RTX, 0);
30787 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
30789 gcc_assert (dst != NULL);
30791 if (dst != target)
30792 emit_move_insn (target, dst);
30795 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
30797 static void
30798 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
30800 machine_mode mode = GET_MODE (dst);
30801 rtx r;
30803 /* This is a tad more complicated, since the fnma_optab is for
30804 a different expression: fma(-m1, m2, a), which is the same
30805 thing except in the case of signed zeros.
30807 Fortunately we know that if FMA is supported that FNMSUB is
30808 also supported in the ISA. Just expand it directly. */
30810 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
30812 r = gen_rtx_NEG (mode, a);
30813 r = gen_rtx_FMA (mode, m1, m2, r);
30814 r = gen_rtx_NEG (mode, r);
30815 emit_insn (gen_rtx_SET (VOIDmode, dst, r));
30818 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
30819 add a reg_note saying that this was a division. Support both scalar and
30820 vector divide. Assumes no trapping math and finite arguments. */
30822 void
30823 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
30825 machine_mode mode = GET_MODE (dst);
30826 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
30827 int i;
30829 /* Low precision estimates guarantee 5 bits of accuracy. High
30830 precision estimates guarantee 14 bits of accuracy. SFmode
30831 requires 23 bits of accuracy. DFmode requires 52 bits of
30832 accuracy. Each pass at least doubles the accuracy, leading
30833 to the following. */
30834 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
30835 if (mode == DFmode || mode == V2DFmode)
30836 passes++;
30838 enum insn_code code = optab_handler (smul_optab, mode);
30839 insn_gen_fn gen_mul = GEN_FCN (code);
30841 gcc_assert (code != CODE_FOR_nothing);
30843 one = rs6000_load_constant_and_splat (mode, dconst1);
30845 /* x0 = 1./d estimate */
30846 x0 = gen_reg_rtx (mode);
30847 emit_insn (gen_rtx_SET (VOIDmode, x0,
30848 gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
30849 UNSPEC_FRES)));
30851 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
30852 if (passes > 1) {
30854 /* e0 = 1. - d * x0 */
30855 e0 = gen_reg_rtx (mode);
30856 rs6000_emit_nmsub (e0, d, x0, one);
30858 /* x1 = x0 + e0 * x0 */
30859 x1 = gen_reg_rtx (mode);
30860 rs6000_emit_madd (x1, e0, x0, x0);
30862 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
30863 ++i, xprev = xnext, eprev = enext) {
30865 /* enext = eprev * eprev */
30866 enext = gen_reg_rtx (mode);
30867 emit_insn (gen_mul (enext, eprev, eprev));
30869 /* xnext = xprev + enext * xprev */
30870 xnext = gen_reg_rtx (mode);
30871 rs6000_emit_madd (xnext, enext, xprev, xprev);
30874 } else
30875 xprev = x0;
30877 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
30879 /* u = n * xprev */
30880 u = gen_reg_rtx (mode);
30881 emit_insn (gen_mul (u, n, xprev));
30883 /* v = n - (d * u) */
30884 v = gen_reg_rtx (mode);
30885 rs6000_emit_nmsub (v, d, u, n);
30887 /* dst = (v * xprev) + u */
30888 rs6000_emit_madd (dst, v, xprev, u);
30890 if (note_p)
30891 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
30894 /* Newton-Raphson approximation of single/double-precision floating point
30895 rsqrt. Assumes no trapping math and finite arguments. */
30897 void
30898 rs6000_emit_swrsqrt (rtx dst, rtx src)
30900 machine_mode mode = GET_MODE (src);
30901 rtx x0 = gen_reg_rtx (mode);
30902 rtx y = gen_reg_rtx (mode);
30904 /* Low precision estimates guarantee 5 bits of accuracy. High
30905 precision estimates guarantee 14 bits of accuracy. SFmode
30906 requires 23 bits of accuracy. DFmode requires 52 bits of
30907 accuracy. Each pass at least doubles the accuracy, leading
30908 to the following. */
30909 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
30910 if (mode == DFmode || mode == V2DFmode)
30911 passes++;
30913 REAL_VALUE_TYPE dconst3_2;
30914 int i;
30915 rtx halfthree;
30916 enum insn_code code = optab_handler (smul_optab, mode);
30917 insn_gen_fn gen_mul = GEN_FCN (code);
30919 gcc_assert (code != CODE_FOR_nothing);
30921 /* Load up the constant 1.5 either as a scalar, or as a vector. */
30922 real_from_integer (&dconst3_2, VOIDmode, 3, SIGNED);
30923 SET_REAL_EXP (&dconst3_2, REAL_EXP (&dconst3_2) - 1);
30925 halfthree = rs6000_load_constant_and_splat (mode, dconst3_2);
30927 /* x0 = rsqrt estimate */
30928 emit_insn (gen_rtx_SET (VOIDmode, x0,
30929 gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
30930 UNSPEC_RSQRT)));
30932 /* y = 0.5 * src = 1.5 * src - src -> fewer constants */
30933 rs6000_emit_msub (y, src, halfthree, src);
30935 for (i = 0; i < passes; i++)
30937 rtx x1 = gen_reg_rtx (mode);
30938 rtx u = gen_reg_rtx (mode);
30939 rtx v = gen_reg_rtx (mode);
30941 /* x1 = x0 * (1.5 - y * (x0 * x0)) */
30942 emit_insn (gen_mul (u, x0, x0));
30943 rs6000_emit_nmsub (v, y, u, halfthree);
30944 emit_insn (gen_mul (x1, x0, v));
30945 x0 = x1;
30948 emit_move_insn (dst, x0);
30949 return;
30952 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
30953 (Power7) targets. DST is the target, and SRC is the argument operand. */
30955 void
30956 rs6000_emit_popcount (rtx dst, rtx src)
30958 machine_mode mode = GET_MODE (dst);
30959 rtx tmp1, tmp2;
30961 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
30962 if (TARGET_POPCNTD)
30964 if (mode == SImode)
30965 emit_insn (gen_popcntdsi2 (dst, src));
30966 else
30967 emit_insn (gen_popcntddi2 (dst, src));
30968 return;
30971 tmp1 = gen_reg_rtx (mode);
30973 if (mode == SImode)
30975 emit_insn (gen_popcntbsi2 (tmp1, src));
30976 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
30977 NULL_RTX, 0);
30978 tmp2 = force_reg (SImode, tmp2);
30979 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
30981 else
30983 emit_insn (gen_popcntbdi2 (tmp1, src));
30984 tmp2 = expand_mult (DImode, tmp1,
30985 GEN_INT ((HOST_WIDE_INT)
30986 0x01010101 << 32 | 0x01010101),
30987 NULL_RTX, 0);
30988 tmp2 = force_reg (DImode, tmp2);
30989 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
30994 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
30995 target, and SRC is the argument operand. */
30997 void
30998 rs6000_emit_parity (rtx dst, rtx src)
31000 machine_mode mode = GET_MODE (dst);
31001 rtx tmp;
31003 tmp = gen_reg_rtx (mode);
31005 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
31006 if (TARGET_CMPB)
31008 if (mode == SImode)
31010 emit_insn (gen_popcntbsi2 (tmp, src));
31011 emit_insn (gen_paritysi2_cmpb (dst, tmp));
31013 else
31015 emit_insn (gen_popcntbdi2 (tmp, src));
31016 emit_insn (gen_paritydi2_cmpb (dst, tmp));
31018 return;
31021 if (mode == SImode)
31023 /* Is mult+shift >= shift+xor+shift+xor? */
31024 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
31026 rtx tmp1, tmp2, tmp3, tmp4;
31028 tmp1 = gen_reg_rtx (SImode);
31029 emit_insn (gen_popcntbsi2 (tmp1, src));
31031 tmp2 = gen_reg_rtx (SImode);
31032 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
31033 tmp3 = gen_reg_rtx (SImode);
31034 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
31036 tmp4 = gen_reg_rtx (SImode);
31037 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
31038 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
31040 else
31041 rs6000_emit_popcount (tmp, src);
31042 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
31044 else
31046 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
31047 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
31049 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
31051 tmp1 = gen_reg_rtx (DImode);
31052 emit_insn (gen_popcntbdi2 (tmp1, src));
31054 tmp2 = gen_reg_rtx (DImode);
31055 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
31056 tmp3 = gen_reg_rtx (DImode);
31057 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
31059 tmp4 = gen_reg_rtx (DImode);
31060 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
31061 tmp5 = gen_reg_rtx (DImode);
31062 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
31064 tmp6 = gen_reg_rtx (DImode);
31065 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
31066 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
31068 else
31069 rs6000_emit_popcount (tmp, src);
31070 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
31074 /* Expand an Altivec constant permutation for little endian mode.
31075 There are two issues: First, the two input operands must be
31076 swapped so that together they form a double-wide array in LE
31077 order. Second, the vperm instruction has surprising behavior
31078 in LE mode: it interprets the elements of the source vectors
31079 in BE mode ("left to right") and interprets the elements of
31080 the destination vector in LE mode ("right to left"). To
31081 correct for this, we must subtract each element of the permute
31082 control vector from 31.
31084 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
31085 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
31086 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
31087 serve as the permute control vector. Then, in BE mode,
31089 vperm 9,10,11,12
31091 places the desired result in vr9. However, in LE mode the
31092 vector contents will be
31094 vr10 = 00000003 00000002 00000001 00000000
31095 vr11 = 00000007 00000006 00000005 00000004
31097 The result of the vperm using the same permute control vector is
31099 vr9 = 05000000 07000000 01000000 03000000
31101 That is, the leftmost 4 bytes of vr10 are interpreted as the
31102 source for the rightmost 4 bytes of vr9, and so on.
31104 If we change the permute control vector to
31106 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
31108 and issue
31110 vperm 9,11,10,12
31112 we get the desired
31114 vr9 = 00000006 00000004 00000002 00000000. */
31116 void
31117 altivec_expand_vec_perm_const_le (rtx operands[4])
31119 unsigned int i;
31120 rtx perm[16];
31121 rtx constv, unspec;
31122 rtx target = operands[0];
31123 rtx op0 = operands[1];
31124 rtx op1 = operands[2];
31125 rtx sel = operands[3];
31127 /* Unpack and adjust the constant selector. */
31128 for (i = 0; i < 16; ++i)
31130 rtx e = XVECEXP (sel, 0, i);
31131 unsigned int elt = 31 - (INTVAL (e) & 31);
31132 perm[i] = GEN_INT (elt);
31135 /* Expand to a permute, swapping the inputs and using the
31136 adjusted selector. */
31137 if (!REG_P (op0))
31138 op0 = force_reg (V16QImode, op0);
31139 if (!REG_P (op1))
31140 op1 = force_reg (V16QImode, op1);
31142 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
31143 constv = force_reg (V16QImode, constv);
31144 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
31145 UNSPEC_VPERM);
31146 if (!REG_P (target))
31148 rtx tmp = gen_reg_rtx (V16QImode);
31149 emit_move_insn (tmp, unspec);
31150 unspec = tmp;
31153 emit_move_insn (target, unspec);
31156 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
31157 permute control vector. But here it's not a constant, so we must
31158 generate a vector NAND or NOR to do the adjustment. */
31160 void
31161 altivec_expand_vec_perm_le (rtx operands[4])
31163 rtx notx, iorx, unspec;
31164 rtx target = operands[0];
31165 rtx op0 = operands[1];
31166 rtx op1 = operands[2];
31167 rtx sel = operands[3];
31168 rtx tmp = target;
31169 rtx norreg = gen_reg_rtx (V16QImode);
31170 machine_mode mode = GET_MODE (target);
31172 /* Get everything in regs so the pattern matches. */
31173 if (!REG_P (op0))
31174 op0 = force_reg (mode, op0);
31175 if (!REG_P (op1))
31176 op1 = force_reg (mode, op1);
31177 if (!REG_P (sel))
31178 sel = force_reg (V16QImode, sel);
31179 if (!REG_P (target))
31180 tmp = gen_reg_rtx (mode);
31182 /* Invert the selector with a VNAND if available, else a VNOR.
31183 The VNAND is preferred for future fusion opportunities. */
31184 notx = gen_rtx_NOT (V16QImode, sel);
31185 iorx = (TARGET_P8_VECTOR
31186 ? gen_rtx_IOR (V16QImode, notx, notx)
31187 : gen_rtx_AND (V16QImode, notx, notx));
31188 emit_insn (gen_rtx_SET (VOIDmode, norreg, iorx));
31190 /* Permute with operands reversed and adjusted selector. */
31191 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
31192 UNSPEC_VPERM);
31194 /* Copy into target, possibly by way of a register. */
31195 if (!REG_P (target))
31197 emit_move_insn (tmp, unspec);
31198 unspec = tmp;
31201 emit_move_insn (target, unspec);
31204 /* Expand an Altivec constant permutation. Return true if we match
31205 an efficient implementation; false to fall back to VPERM. */
31207 bool
31208 altivec_expand_vec_perm_const (rtx operands[4])
31210 struct altivec_perm_insn {
31211 HOST_WIDE_INT mask;
31212 enum insn_code impl;
31213 unsigned char perm[16];
31215 static const struct altivec_perm_insn patterns[] = {
31216 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
31217 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
31218 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
31219 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
31220 { OPTION_MASK_ALTIVEC,
31221 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
31222 : CODE_FOR_altivec_vmrglb_direct),
31223 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
31224 { OPTION_MASK_ALTIVEC,
31225 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
31226 : CODE_FOR_altivec_vmrglh_direct),
31227 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
31228 { OPTION_MASK_ALTIVEC,
31229 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
31230 : CODE_FOR_altivec_vmrglw_direct),
31231 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
31232 { OPTION_MASK_ALTIVEC,
31233 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
31234 : CODE_FOR_altivec_vmrghb_direct),
31235 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
31236 { OPTION_MASK_ALTIVEC,
31237 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
31238 : CODE_FOR_altivec_vmrghh_direct),
31239 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
31240 { OPTION_MASK_ALTIVEC,
31241 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
31242 : CODE_FOR_altivec_vmrghw_direct),
31243 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
31244 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
31245 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
31246 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
31247 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
31250 unsigned int i, j, elt, which;
31251 unsigned char perm[16];
31252 rtx target, op0, op1, sel, x;
31253 bool one_vec;
31255 target = operands[0];
31256 op0 = operands[1];
31257 op1 = operands[2];
31258 sel = operands[3];
31260 /* Unpack the constant selector. */
31261 for (i = which = 0; i < 16; ++i)
31263 rtx e = XVECEXP (sel, 0, i);
31264 elt = INTVAL (e) & 31;
31265 which |= (elt < 16 ? 1 : 2);
31266 perm[i] = elt;
31269 /* Simplify the constant selector based on operands. */
31270 switch (which)
31272 default:
31273 gcc_unreachable ();
31275 case 3:
31276 one_vec = false;
31277 if (!rtx_equal_p (op0, op1))
31278 break;
31279 /* FALLTHRU */
31281 case 2:
31282 for (i = 0; i < 16; ++i)
31283 perm[i] &= 15;
31284 op0 = op1;
31285 one_vec = true;
31286 break;
31288 case 1:
31289 op1 = op0;
31290 one_vec = true;
31291 break;
31294 /* Look for splat patterns. */
31295 if (one_vec)
31297 elt = perm[0];
31299 for (i = 0; i < 16; ++i)
31300 if (perm[i] != elt)
31301 break;
31302 if (i == 16)
31304 if (!BYTES_BIG_ENDIAN)
31305 elt = 15 - elt;
31306 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
31307 return true;
31310 if (elt % 2 == 0)
31312 for (i = 0; i < 16; i += 2)
31313 if (perm[i] != elt || perm[i + 1] != elt + 1)
31314 break;
31315 if (i == 16)
31317 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
31318 x = gen_reg_rtx (V8HImode);
31319 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
31320 GEN_INT (field)));
31321 emit_move_insn (target, gen_lowpart (V16QImode, x));
31322 return true;
31326 if (elt % 4 == 0)
31328 for (i = 0; i < 16; i += 4)
31329 if (perm[i] != elt
31330 || perm[i + 1] != elt + 1
31331 || perm[i + 2] != elt + 2
31332 || perm[i + 3] != elt + 3)
31333 break;
31334 if (i == 16)
31336 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
31337 x = gen_reg_rtx (V4SImode);
31338 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
31339 GEN_INT (field)));
31340 emit_move_insn (target, gen_lowpart (V16QImode, x));
31341 return true;
31346 /* Look for merge and pack patterns. */
31347 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
31349 bool swapped;
31351 if ((patterns[j].mask & rs6000_isa_flags) == 0)
31352 continue;
31354 elt = patterns[j].perm[0];
31355 if (perm[0] == elt)
31356 swapped = false;
31357 else if (perm[0] == elt + 16)
31358 swapped = true;
31359 else
31360 continue;
31361 for (i = 1; i < 16; ++i)
31363 elt = patterns[j].perm[i];
31364 if (swapped)
31365 elt = (elt >= 16 ? elt - 16 : elt + 16);
31366 else if (one_vec && elt >= 16)
31367 elt -= 16;
31368 if (perm[i] != elt)
31369 break;
31371 if (i == 16)
31373 enum insn_code icode = patterns[j].impl;
31374 machine_mode omode = insn_data[icode].operand[0].mode;
31375 machine_mode imode = insn_data[icode].operand[1].mode;
31377 /* For little-endian, don't use vpkuwum and vpkuhum if the
31378 underlying vector type is not V4SI and V8HI, respectively.
31379 For example, using vpkuwum with a V8HI picks up the even
31380 halfwords (BE numbering) when the even halfwords (LE
31381 numbering) are what we need. */
31382 if (!BYTES_BIG_ENDIAN
31383 && icode == CODE_FOR_altivec_vpkuwum_direct
31384 && ((GET_CODE (op0) == REG
31385 && GET_MODE (op0) != V4SImode)
31386 || (GET_CODE (op0) == SUBREG
31387 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
31388 continue;
31389 if (!BYTES_BIG_ENDIAN
31390 && icode == CODE_FOR_altivec_vpkuhum_direct
31391 && ((GET_CODE (op0) == REG
31392 && GET_MODE (op0) != V8HImode)
31393 || (GET_CODE (op0) == SUBREG
31394 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
31395 continue;
31397 /* For little-endian, the two input operands must be swapped
31398 (or swapped back) to ensure proper right-to-left numbering
31399 from 0 to 2N-1. */
31400 if (swapped ^ !BYTES_BIG_ENDIAN)
31401 std::swap (op0, op1);
31402 if (imode != V16QImode)
31404 op0 = gen_lowpart (imode, op0);
31405 op1 = gen_lowpart (imode, op1);
31407 if (omode == V16QImode)
31408 x = target;
31409 else
31410 x = gen_reg_rtx (omode);
31411 emit_insn (GEN_FCN (icode) (x, op0, op1));
31412 if (omode != V16QImode)
31413 emit_move_insn (target, gen_lowpart (V16QImode, x));
31414 return true;
31418 if (!BYTES_BIG_ENDIAN)
31420 altivec_expand_vec_perm_const_le (operands);
31421 return true;
31424 return false;
31427 /* Expand a Paired Single, VSX Permute Doubleword, or SPE constant permutation.
31428 Return true if we match an efficient implementation. */
31430 static bool
31431 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
31432 unsigned char perm0, unsigned char perm1)
31434 rtx x;
31436 /* If both selectors come from the same operand, fold to single op. */
31437 if ((perm0 & 2) == (perm1 & 2))
31439 if (perm0 & 2)
31440 op0 = op1;
31441 else
31442 op1 = op0;
31444 /* If both operands are equal, fold to simpler permutation. */
31445 if (rtx_equal_p (op0, op1))
31447 perm0 = perm0 & 1;
31448 perm1 = (perm1 & 1) + 2;
31450 /* If the first selector comes from the second operand, swap. */
31451 else if (perm0 & 2)
31453 if (perm1 & 2)
31454 return false;
31455 perm0 -= 2;
31456 perm1 += 2;
31457 std::swap (op0, op1);
31459 /* If the second selector does not come from the second operand, fail. */
31460 else if ((perm1 & 2) == 0)
31461 return false;
31463 /* Success! */
31464 if (target != NULL)
31466 machine_mode vmode, dmode;
31467 rtvec v;
31469 vmode = GET_MODE (target);
31470 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
31471 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4);
31472 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
31473 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
31474 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
31475 emit_insn (gen_rtx_SET (VOIDmode, target, x));
31477 return true;
31480 bool
31481 rs6000_expand_vec_perm_const (rtx operands[4])
31483 rtx target, op0, op1, sel;
31484 unsigned char perm0, perm1;
31486 target = operands[0];
31487 op0 = operands[1];
31488 op1 = operands[2];
31489 sel = operands[3];
31491 /* Unpack the constant selector. */
31492 perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3;
31493 perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3;
31495 return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1);
31498 /* Test whether a constant permutation is supported. */
31500 static bool
31501 rs6000_vectorize_vec_perm_const_ok (machine_mode vmode,
31502 const unsigned char *sel)
31504 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
31505 if (TARGET_ALTIVEC)
31506 return true;
31508 /* Check for ps_merge* or evmerge* insns. */
31509 if ((TARGET_PAIRED_FLOAT && vmode == V2SFmode)
31510 || (TARGET_SPE && vmode == V2SImode))
31512 rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
31513 rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
31514 return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]);
31517 return false;
31520 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */
31522 static void
31523 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
31524 machine_mode vmode, unsigned nelt, rtx perm[])
31526 machine_mode imode;
31527 rtx x;
31529 imode = vmode;
31530 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT)
31532 imode = GET_MODE_INNER (vmode);
31533 imode = mode_for_size (GET_MODE_BITSIZE (imode), MODE_INT, 0);
31534 imode = mode_for_vector (imode, nelt);
31537 x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm));
31538 x = expand_vec_perm (vmode, op0, op1, x, target);
31539 if (x != target)
31540 emit_move_insn (target, x);
31543 /* Expand an extract even operation. */
31545 void
31546 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
31548 machine_mode vmode = GET_MODE (target);
31549 unsigned i, nelt = GET_MODE_NUNITS (vmode);
31550 rtx perm[16];
31552 for (i = 0; i < nelt; i++)
31553 perm[i] = GEN_INT (i * 2);
31555 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
31558 /* Expand a vector interleave operation. */
31560 void
31561 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
31563 machine_mode vmode = GET_MODE (target);
31564 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
31565 rtx perm[16];
31567 high = (highp ? 0 : nelt / 2);
31568 for (i = 0; i < nelt / 2; i++)
31570 perm[i * 2] = GEN_INT (i + high);
31571 perm[i * 2 + 1] = GEN_INT (i + nelt + high);
31574 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
31577 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
31578 void
31579 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
31581 HOST_WIDE_INT hwi_scale (scale);
31582 REAL_VALUE_TYPE r_pow;
31583 rtvec v = rtvec_alloc (2);
31584 rtx elt;
31585 rtx scale_vec = gen_reg_rtx (V2DFmode);
31586 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
31587 elt = CONST_DOUBLE_FROM_REAL_VALUE (r_pow, DFmode);
31588 RTVEC_ELT (v, 0) = elt;
31589 RTVEC_ELT (v, 1) = elt;
31590 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
31591 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
31594 /* Return an RTX representing where to find the function value of a
31595 function returning MODE. */
31596 static rtx
31597 rs6000_complex_function_value (machine_mode mode)
31599 unsigned int regno;
31600 rtx r1, r2;
31601 machine_mode inner = GET_MODE_INNER (mode);
31602 unsigned int inner_bytes = GET_MODE_SIZE (inner);
31604 if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
31605 regno = FP_ARG_RETURN;
31606 else
31608 regno = GP_ARG_RETURN;
31610 /* 32-bit is OK since it'll go in r3/r4. */
31611 if (TARGET_32BIT && inner_bytes >= 4)
31612 return gen_rtx_REG (mode, regno);
31615 if (inner_bytes >= 8)
31616 return gen_rtx_REG (mode, regno);
31618 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
31619 const0_rtx);
31620 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
31621 GEN_INT (inner_bytes));
31622 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
31625 /* Return an rtx describing a return value of MODE as a PARALLEL
31626 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
31627 stride REG_STRIDE. */
31629 static rtx
31630 rs6000_parallel_return (machine_mode mode,
31631 int n_elts, machine_mode elt_mode,
31632 unsigned int regno, unsigned int reg_stride)
31634 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
31636 int i;
31637 for (i = 0; i < n_elts; i++)
31639 rtx r = gen_rtx_REG (elt_mode, regno);
31640 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
31641 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
31642 regno += reg_stride;
31645 return par;
31648 /* Target hook for TARGET_FUNCTION_VALUE.
31650 On the SPE, both FPs and vectors are returned in r3.
31652 On RS/6000 an integer value is in r3 and a floating-point value is in
31653 fp1, unless -msoft-float. */
31655 static rtx
31656 rs6000_function_value (const_tree valtype,
31657 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
31658 bool outgoing ATTRIBUTE_UNUSED)
31660 machine_mode mode;
31661 unsigned int regno;
31662 machine_mode elt_mode;
31663 int n_elts;
31665 /* Special handling for structs in darwin64. */
31666 if (TARGET_MACHO
31667 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
31669 CUMULATIVE_ARGS valcum;
31670 rtx valret;
31672 valcum.words = 0;
31673 valcum.fregno = FP_ARG_MIN_REG;
31674 valcum.vregno = ALTIVEC_ARG_MIN_REG;
31675 /* Do a trial code generation as if this were going to be passed as
31676 an argument; if any part goes in memory, we return NULL. */
31677 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
31678 if (valret)
31679 return valret;
31680 /* Otherwise fall through to standard ABI rules. */
31683 mode = TYPE_MODE (valtype);
31685 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
31686 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
31688 int first_reg, n_regs;
31690 if (SCALAR_FLOAT_MODE_P (elt_mode))
31692 /* _Decimal128 must use even/odd register pairs. */
31693 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
31694 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
31696 else
31698 first_reg = ALTIVEC_ARG_RETURN;
31699 n_regs = 1;
31702 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
31705 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
31706 if (TARGET_32BIT && TARGET_POWERPC64)
31707 switch (mode)
31709 default:
31710 break;
31711 case DImode:
31712 case SCmode:
31713 case DCmode:
31714 case TCmode:
31715 int count = GET_MODE_SIZE (mode) / 4;
31716 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
31719 if ((INTEGRAL_TYPE_P (valtype)
31720 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
31721 || POINTER_TYPE_P (valtype))
31722 mode = TARGET_32BIT ? SImode : DImode;
31724 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
31725 /* _Decimal128 must use an even/odd register pair. */
31726 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
31727 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT && TARGET_FPRS
31728 && ((TARGET_SINGLE_FLOAT && (mode == SFmode)) || TARGET_DOUBLE_FLOAT))
31729 regno = FP_ARG_RETURN;
31730 else if (TREE_CODE (valtype) == COMPLEX_TYPE
31731 && targetm.calls.split_complex_arg)
31732 return rs6000_complex_function_value (mode);
31733 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
31734 return register is used in both cases, and we won't see V2DImode/V2DFmode
31735 for pure altivec, combine the two cases. */
31736 else if (TREE_CODE (valtype) == VECTOR_TYPE
31737 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
31738 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
31739 regno = ALTIVEC_ARG_RETURN;
31740 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
31741 && (mode == DFmode || mode == DCmode
31742 || mode == TFmode || mode == TCmode))
31743 return spe_build_register_parallel (mode, GP_ARG_RETURN);
31744 else
31745 regno = GP_ARG_RETURN;
31747 return gen_rtx_REG (mode, regno);
31750 /* Define how to find the value returned by a library function
31751 assuming the value has mode MODE. */
31753 rs6000_libcall_value (machine_mode mode)
31755 unsigned int regno;
31757 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
31758 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
31759 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
31761 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
31762 /* _Decimal128 must use an even/odd register pair. */
31763 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
31764 else if (SCALAR_FLOAT_MODE_P (mode)
31765 && TARGET_HARD_FLOAT && TARGET_FPRS
31766 && ((TARGET_SINGLE_FLOAT && mode == SFmode) || TARGET_DOUBLE_FLOAT))
31767 regno = FP_ARG_RETURN;
31768 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
31769 return register is used in both cases, and we won't see V2DImode/V2DFmode
31770 for pure altivec, combine the two cases. */
31771 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
31772 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
31773 regno = ALTIVEC_ARG_RETURN;
31774 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
31775 return rs6000_complex_function_value (mode);
31776 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
31777 && (mode == DFmode || mode == DCmode
31778 || mode == TFmode || mode == TCmode))
31779 return spe_build_register_parallel (mode, GP_ARG_RETURN);
31780 else
31781 regno = GP_ARG_RETURN;
31783 return gen_rtx_REG (mode, regno);
31787 /* Return true if we use LRA instead of reload pass. */
31788 static bool
31789 rs6000_lra_p (void)
31791 return rs6000_lra_flag;
31794 /* Given FROM and TO register numbers, say whether this elimination is allowed.
31795 Frame pointer elimination is automatically handled.
31797 For the RS/6000, if frame pointer elimination is being done, we would like
31798 to convert ap into fp, not sp.
31800 We need r30 if -mminimal-toc was specified, and there are constant pool
31801 references. */
31803 static bool
31804 rs6000_can_eliminate (const int from, const int to)
31806 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
31807 ? ! frame_pointer_needed
31808 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
31809 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC || get_pool_size () == 0
31810 : true);
31813 /* Define the offset between two registers, FROM to be eliminated and its
31814 replacement TO, at the start of a routine. */
31815 HOST_WIDE_INT
31816 rs6000_initial_elimination_offset (int from, int to)
31818 rs6000_stack_t *info = rs6000_stack_info ();
31819 HOST_WIDE_INT offset;
31821 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
31822 offset = info->push_p ? 0 : -info->total_size;
31823 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
31825 offset = info->push_p ? 0 : -info->total_size;
31826 if (FRAME_GROWS_DOWNWARD)
31827 offset += info->fixed_size + info->vars_size + info->parm_size;
31829 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
31830 offset = FRAME_GROWS_DOWNWARD
31831 ? info->fixed_size + info->vars_size + info->parm_size
31832 : 0;
31833 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
31834 offset = info->total_size;
31835 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
31836 offset = info->push_p ? info->total_size : 0;
31837 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
31838 offset = 0;
31839 else
31840 gcc_unreachable ();
31842 return offset;
31845 static rtx
31846 rs6000_dwarf_register_span (rtx reg)
31848 rtx parts[8];
31849 int i, words;
31850 unsigned regno = REGNO (reg);
31851 machine_mode mode = GET_MODE (reg);
31853 if (TARGET_SPE
31854 && regno < 32
31855 && (SPE_VECTOR_MODE (GET_MODE (reg))
31856 || (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode)
31857 && mode != SFmode && mode != SDmode && mode != SCmode)))
31859 else
31860 return NULL_RTX;
31862 regno = REGNO (reg);
31864 /* The duality of the SPE register size wreaks all kinds of havoc.
31865 This is a way of distinguishing r0 in 32-bits from r0 in
31866 64-bits. */
31867 words = (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD;
31868 gcc_assert (words <= 4);
31869 for (i = 0; i < words; i++, regno++)
31871 if (BYTES_BIG_ENDIAN)
31873 parts[2 * i] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
31874 parts[2 * i + 1] = gen_rtx_REG (SImode, regno);
31876 else
31878 parts[2 * i] = gen_rtx_REG (SImode, regno);
31879 parts[2 * i + 1] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
31883 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (words * 2, parts));
31886 /* Fill in sizes for SPE register high parts in table used by unwinder. */
31888 static void
31889 rs6000_init_dwarf_reg_sizes_extra (tree address)
31891 if (TARGET_SPE)
31893 int i;
31894 machine_mode mode = TYPE_MODE (char_type_node);
31895 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
31896 rtx mem = gen_rtx_MEM (BLKmode, addr);
31897 rtx value = gen_int_mode (4, mode);
31899 for (i = FIRST_SPE_HIGH_REGNO; i < LAST_SPE_HIGH_REGNO+1; i++)
31901 int column = DWARF_REG_TO_UNWIND_COLUMN
31902 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
31903 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
31905 emit_move_insn (adjust_address (mem, mode, offset), value);
31909 if (TARGET_MACHO && ! TARGET_ALTIVEC)
31911 int i;
31912 machine_mode mode = TYPE_MODE (char_type_node);
31913 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
31914 rtx mem = gen_rtx_MEM (BLKmode, addr);
31915 rtx value = gen_int_mode (16, mode);
31917 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
31918 The unwinder still needs to know the size of Altivec registers. */
31920 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
31922 int column = DWARF_REG_TO_UNWIND_COLUMN
31923 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
31924 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
31926 emit_move_insn (adjust_address (mem, mode, offset), value);
31931 /* Map internal gcc register numbers to debug format register numbers.
31932 FORMAT specifies the type of debug register number to use:
31933 0 -- debug information, except for frame-related sections
31934 1 -- DWARF .debug_frame section
31935 2 -- DWARF .eh_frame section */
31937 unsigned int
31938 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
31940 /* We never use the GCC internal number for SPE high registers.
31941 Those are mapped to the 1200..1231 range for all debug formats. */
31942 if (SPE_HIGH_REGNO_P (regno))
31943 return regno - FIRST_SPE_HIGH_REGNO + 1200;
31945 /* Except for the above, we use the internal number for non-DWARF
31946 debug information, and also for .eh_frame. */
31947 if ((format == 0 && write_symbols != DWARF2_DEBUG) || format == 2)
31948 return regno;
31950 /* On some platforms, we use the standard DWARF register
31951 numbering for .debug_info and .debug_frame. */
31952 #ifdef RS6000_USE_DWARF_NUMBERING
31953 if (regno <= 63)
31954 return regno;
31955 if (regno == LR_REGNO)
31956 return 108;
31957 if (regno == CTR_REGNO)
31958 return 109;
31959 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
31960 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
31961 The actual code emitted saves the whole of CR, so we map CR2_REGNO
31962 to the DWARF reg for CR. */
31963 if (format == 1 && regno == CR2_REGNO)
31964 return 64;
31965 if (CR_REGNO_P (regno))
31966 return regno - CR0_REGNO + 86;
31967 if (regno == CA_REGNO)
31968 return 101; /* XER */
31969 if (ALTIVEC_REGNO_P (regno))
31970 return regno - FIRST_ALTIVEC_REGNO + 1124;
31971 if (regno == VRSAVE_REGNO)
31972 return 356;
31973 if (regno == VSCR_REGNO)
31974 return 67;
31975 if (regno == SPE_ACC_REGNO)
31976 return 99;
31977 if (regno == SPEFSCR_REGNO)
31978 return 612;
31979 #endif
31980 return regno;
31983 /* target hook eh_return_filter_mode */
31984 static machine_mode
31985 rs6000_eh_return_filter_mode (void)
31987 return TARGET_32BIT ? SImode : word_mode;
31990 /* Target hook for scalar_mode_supported_p. */
31991 static bool
31992 rs6000_scalar_mode_supported_p (machine_mode mode)
31994 /* -m32 does not support TImode. This is the default, from
31995 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
31996 same ABI as for -m32. But default_scalar_mode_supported_p allows
31997 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
31998 for -mpowerpc64. */
31999 if (TARGET_32BIT && mode == TImode)
32000 return false;
32002 if (DECIMAL_FLOAT_MODE_P (mode))
32003 return default_decimal_float_supported_p ();
32004 else
32005 return default_scalar_mode_supported_p (mode);
32008 /* Target hook for vector_mode_supported_p. */
32009 static bool
32010 rs6000_vector_mode_supported_p (machine_mode mode)
32013 if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (mode))
32014 return true;
32016 if (TARGET_SPE && SPE_VECTOR_MODE (mode))
32017 return true;
32019 else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
32020 return true;
32022 else
32023 return false;
32026 /* Target hook for invalid_arg_for_unprototyped_fn. */
32027 static const char *
32028 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
32030 return (!rs6000_darwin64_abi
32031 && typelist == 0
32032 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
32033 && (funcdecl == NULL_TREE
32034 || (TREE_CODE (funcdecl) == FUNCTION_DECL
32035 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
32036 ? N_("AltiVec argument passed to unprototyped function")
32037 : NULL;
32040 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
32041 setup by using __stack_chk_fail_local hidden function instead of
32042 calling __stack_chk_fail directly. Otherwise it is better to call
32043 __stack_chk_fail directly. */
32045 static tree ATTRIBUTE_UNUSED
32046 rs6000_stack_protect_fail (void)
32048 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
32049 ? default_hidden_stack_protect_fail ()
32050 : default_external_stack_protect_fail ();
32053 void
32054 rs6000_final_prescan_insn (rtx_insn *insn, rtx *operand ATTRIBUTE_UNUSED,
32055 int num_operands ATTRIBUTE_UNUSED)
32057 if (rs6000_warn_cell_microcode)
32059 const char *temp;
32060 int insn_code_number = recog_memoized (insn);
32061 location_t location = INSN_LOCATION (insn);
32063 /* Punt on insns we cannot recognize. */
32064 if (insn_code_number < 0)
32065 return;
32067 temp = get_insn_template (insn_code_number, insn);
32069 if (get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS)
32070 warning_at (location, OPT_mwarn_cell_microcode,
32071 "emitting microcode insn %s\t[%s] #%d",
32072 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
32073 else if (get_attr_cell_micro (insn) == CELL_MICRO_CONDITIONAL)
32074 warning_at (location, OPT_mwarn_cell_microcode,
32075 "emitting conditional microcode insn %s\t[%s] #%d",
32076 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
32080 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
32082 #if TARGET_ELF
32083 static unsigned HOST_WIDE_INT
32084 rs6000_asan_shadow_offset (void)
32086 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
32088 #endif
32090 /* Mask options that we want to support inside of attribute((target)) and
32091 #pragma GCC target operations. Note, we do not include things like
32092 64/32-bit, endianess, hard/soft floating point, etc. that would have
32093 different calling sequences. */
32095 struct rs6000_opt_mask {
32096 const char *name; /* option name */
32097 HOST_WIDE_INT mask; /* mask to set */
32098 bool invert; /* invert sense of mask */
32099 bool valid_target; /* option is a target option */
32102 static struct rs6000_opt_mask const rs6000_opt_masks[] =
32104 { "altivec", OPTION_MASK_ALTIVEC, false, true },
32105 { "cmpb", OPTION_MASK_CMPB, false, true },
32106 { "crypto", OPTION_MASK_CRYPTO, false, true },
32107 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
32108 { "dlmzb", OPTION_MASK_DLMZB, false, true },
32109 { "fprnd", OPTION_MASK_FPRND, false, true },
32110 { "hard-dfp", OPTION_MASK_DFP, false, true },
32111 { "htm", OPTION_MASK_HTM, false, true },
32112 { "isel", OPTION_MASK_ISEL, false, true },
32113 { "mfcrf", OPTION_MASK_MFCRF, false, true },
32114 { "mfpgpr", OPTION_MASK_MFPGPR, false, true },
32115 { "mulhw", OPTION_MASK_MULHW, false, true },
32116 { "multiple", OPTION_MASK_MULTIPLE, false, true },
32117 { "popcntb", OPTION_MASK_POPCNTB, false, true },
32118 { "popcntd", OPTION_MASK_POPCNTD, false, true },
32119 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
32120 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
32121 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
32122 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
32123 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
32124 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
32125 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
32126 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
32127 { "string", OPTION_MASK_STRING, false, true },
32128 { "update", OPTION_MASK_NO_UPDATE, true , true },
32129 { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, false },
32130 { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, false },
32131 { "vsx", OPTION_MASK_VSX, false, true },
32132 { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true },
32133 #ifdef OPTION_MASK_64BIT
32134 #if TARGET_AIX_OS
32135 { "aix64", OPTION_MASK_64BIT, false, false },
32136 { "aix32", OPTION_MASK_64BIT, true, false },
32137 #else
32138 { "64", OPTION_MASK_64BIT, false, false },
32139 { "32", OPTION_MASK_64BIT, true, false },
32140 #endif
32141 #endif
32142 #ifdef OPTION_MASK_EABI
32143 { "eabi", OPTION_MASK_EABI, false, false },
32144 #endif
32145 #ifdef OPTION_MASK_LITTLE_ENDIAN
32146 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
32147 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
32148 #endif
32149 #ifdef OPTION_MASK_RELOCATABLE
32150 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
32151 #endif
32152 #ifdef OPTION_MASK_STRICT_ALIGN
32153 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
32154 #endif
32155 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
32156 { "string", OPTION_MASK_STRING, false, false },
32159 /* Builtin mask mapping for printing the flags. */
32160 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
32162 { "altivec", RS6000_BTM_ALTIVEC, false, false },
32163 { "vsx", RS6000_BTM_VSX, false, false },
32164 { "spe", RS6000_BTM_SPE, false, false },
32165 { "paired", RS6000_BTM_PAIRED, false, false },
32166 { "fre", RS6000_BTM_FRE, false, false },
32167 { "fres", RS6000_BTM_FRES, false, false },
32168 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
32169 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
32170 { "popcntd", RS6000_BTM_POPCNTD, false, false },
32171 { "cell", RS6000_BTM_CELL, false, false },
32172 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
32173 { "crypto", RS6000_BTM_CRYPTO, false, false },
32174 { "htm", RS6000_BTM_HTM, false, false },
32175 { "hard-dfp", RS6000_BTM_DFP, false, false },
32176 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
32177 { "long-double-128", RS6000_BTM_LDBL128, false, false },
32180 /* Option variables that we want to support inside attribute((target)) and
32181 #pragma GCC target operations. */
32183 struct rs6000_opt_var {
32184 const char *name; /* option name */
32185 size_t global_offset; /* offset of the option in global_options. */
32186 size_t target_offset; /* offset of the option in target optiosn. */
32189 static struct rs6000_opt_var const rs6000_opt_vars[] =
32191 { "friz",
32192 offsetof (struct gcc_options, x_TARGET_FRIZ),
32193 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
32194 { "avoid-indexed-addresses",
32195 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
32196 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
32197 { "paired",
32198 offsetof (struct gcc_options, x_rs6000_paired_float),
32199 offsetof (struct cl_target_option, x_rs6000_paired_float), },
32200 { "longcall",
32201 offsetof (struct gcc_options, x_rs6000_default_long_calls),
32202 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
32205 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
32206 parsing. Return true if there were no errors. */
32208 static bool
32209 rs6000_inner_target_options (tree args, bool attr_p)
32211 bool ret = true;
32213 if (args == NULL_TREE)
32216 else if (TREE_CODE (args) == STRING_CST)
32218 char *p = ASTRDUP (TREE_STRING_POINTER (args));
32219 char *q;
32221 while ((q = strtok (p, ",")) != NULL)
32223 bool error_p = false;
32224 bool not_valid_p = false;
32225 const char *cpu_opt = NULL;
32227 p = NULL;
32228 if (strncmp (q, "cpu=", 4) == 0)
32230 int cpu_index = rs6000_cpu_name_lookup (q+4);
32231 if (cpu_index >= 0)
32232 rs6000_cpu_index = cpu_index;
32233 else
32235 error_p = true;
32236 cpu_opt = q+4;
32239 else if (strncmp (q, "tune=", 5) == 0)
32241 int tune_index = rs6000_cpu_name_lookup (q+5);
32242 if (tune_index >= 0)
32243 rs6000_tune_index = tune_index;
32244 else
32246 error_p = true;
32247 cpu_opt = q+5;
32250 else
32252 size_t i;
32253 bool invert = false;
32254 char *r = q;
32256 error_p = true;
32257 if (strncmp (r, "no-", 3) == 0)
32259 invert = true;
32260 r += 3;
32263 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
32264 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
32266 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
32268 if (!rs6000_opt_masks[i].valid_target)
32269 not_valid_p = true;
32270 else
32272 error_p = false;
32273 rs6000_isa_flags_explicit |= mask;
32275 /* VSX needs altivec, so -mvsx automagically sets
32276 altivec. */
32277 if (mask == OPTION_MASK_VSX && !invert)
32278 mask |= OPTION_MASK_ALTIVEC;
32280 if (rs6000_opt_masks[i].invert)
32281 invert = !invert;
32283 if (invert)
32284 rs6000_isa_flags &= ~mask;
32285 else
32286 rs6000_isa_flags |= mask;
32288 break;
32291 if (error_p && !not_valid_p)
32293 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
32294 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
32296 size_t j = rs6000_opt_vars[i].global_offset;
32297 *((int *) ((char *)&global_options + j)) = !invert;
32298 error_p = false;
32299 break;
32304 if (error_p)
32306 const char *eprefix, *esuffix;
32308 ret = false;
32309 if (attr_p)
32311 eprefix = "__attribute__((__target__(";
32312 esuffix = ")))";
32314 else
32316 eprefix = "#pragma GCC target ";
32317 esuffix = "";
32320 if (cpu_opt)
32321 error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt, eprefix,
32322 q, esuffix);
32323 else if (not_valid_p)
32324 error ("%s\"%s\"%s is not allowed", eprefix, q, esuffix);
32325 else
32326 error ("%s\"%s\"%s is invalid", eprefix, q, esuffix);
32331 else if (TREE_CODE (args) == TREE_LIST)
32335 tree value = TREE_VALUE (args);
32336 if (value)
32338 bool ret2 = rs6000_inner_target_options (value, attr_p);
32339 if (!ret2)
32340 ret = false;
32342 args = TREE_CHAIN (args);
32344 while (args != NULL_TREE);
32347 else
32348 gcc_unreachable ();
32350 return ret;
32353 /* Print out the target options as a list for -mdebug=target. */
32355 static void
32356 rs6000_debug_target_options (tree args, const char *prefix)
32358 if (args == NULL_TREE)
32359 fprintf (stderr, "%s<NULL>", prefix);
32361 else if (TREE_CODE (args) == STRING_CST)
32363 char *p = ASTRDUP (TREE_STRING_POINTER (args));
32364 char *q;
32366 while ((q = strtok (p, ",")) != NULL)
32368 p = NULL;
32369 fprintf (stderr, "%s\"%s\"", prefix, q);
32370 prefix = ", ";
32374 else if (TREE_CODE (args) == TREE_LIST)
32378 tree value = TREE_VALUE (args);
32379 if (value)
32381 rs6000_debug_target_options (value, prefix);
32382 prefix = ", ";
32384 args = TREE_CHAIN (args);
32386 while (args != NULL_TREE);
32389 else
32390 gcc_unreachable ();
32392 return;
32396 /* Hook to validate attribute((target("..."))). */
32398 static bool
32399 rs6000_valid_attribute_p (tree fndecl,
32400 tree ARG_UNUSED (name),
32401 tree args,
32402 int flags)
32404 struct cl_target_option cur_target;
32405 bool ret;
32406 tree old_optimize = build_optimization_node (&global_options);
32407 tree new_target, new_optimize;
32408 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
32410 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
32412 if (TARGET_DEBUG_TARGET)
32414 tree tname = DECL_NAME (fndecl);
32415 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
32416 if (tname)
32417 fprintf (stderr, "function: %.*s\n",
32418 (int) IDENTIFIER_LENGTH (tname),
32419 IDENTIFIER_POINTER (tname));
32420 else
32421 fprintf (stderr, "function: unknown\n");
32423 fprintf (stderr, "args:");
32424 rs6000_debug_target_options (args, " ");
32425 fprintf (stderr, "\n");
32427 if (flags)
32428 fprintf (stderr, "flags: 0x%x\n", flags);
32430 fprintf (stderr, "--------------------\n");
32433 old_optimize = build_optimization_node (&global_options);
32434 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
32436 /* If the function changed the optimization levels as well as setting target
32437 options, start with the optimizations specified. */
32438 if (func_optimize && func_optimize != old_optimize)
32439 cl_optimization_restore (&global_options,
32440 TREE_OPTIMIZATION (func_optimize));
32442 /* The target attributes may also change some optimization flags, so update
32443 the optimization options if necessary. */
32444 cl_target_option_save (&cur_target, &global_options);
32445 rs6000_cpu_index = rs6000_tune_index = -1;
32446 ret = rs6000_inner_target_options (args, true);
32448 /* Set up any additional state. */
32449 if (ret)
32451 ret = rs6000_option_override_internal (false);
32452 new_target = build_target_option_node (&global_options);
32454 else
32455 new_target = NULL;
32457 new_optimize = build_optimization_node (&global_options);
32459 if (!new_target)
32460 ret = false;
32462 else if (fndecl)
32464 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
32466 if (old_optimize != new_optimize)
32467 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
32470 cl_target_option_restore (&global_options, &cur_target);
32472 if (old_optimize != new_optimize)
32473 cl_optimization_restore (&global_options,
32474 TREE_OPTIMIZATION (old_optimize));
32476 return ret;
32480 /* Hook to validate the current #pragma GCC target and set the state, and
32481 update the macros based on what was changed. If ARGS is NULL, then
32482 POP_TARGET is used to reset the options. */
32484 bool
32485 rs6000_pragma_target_parse (tree args, tree pop_target)
32487 tree prev_tree = build_target_option_node (&global_options);
32488 tree cur_tree;
32489 struct cl_target_option *prev_opt, *cur_opt;
32490 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
32491 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
32493 if (TARGET_DEBUG_TARGET)
32495 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
32496 fprintf (stderr, "args:");
32497 rs6000_debug_target_options (args, " ");
32498 fprintf (stderr, "\n");
32500 if (pop_target)
32502 fprintf (stderr, "pop_target:\n");
32503 debug_tree (pop_target);
32505 else
32506 fprintf (stderr, "pop_target: <NULL>\n");
32508 fprintf (stderr, "--------------------\n");
32511 if (! args)
32513 cur_tree = ((pop_target)
32514 ? pop_target
32515 : target_option_default_node);
32516 cl_target_option_restore (&global_options,
32517 TREE_TARGET_OPTION (cur_tree));
32519 else
32521 rs6000_cpu_index = rs6000_tune_index = -1;
32522 if (!rs6000_inner_target_options (args, false)
32523 || !rs6000_option_override_internal (false)
32524 || (cur_tree = build_target_option_node (&global_options))
32525 == NULL_TREE)
32527 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
32528 fprintf (stderr, "invalid pragma\n");
32530 return false;
32534 target_option_current_node = cur_tree;
32536 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
32537 change the macros that are defined. */
32538 if (rs6000_target_modify_macros_ptr)
32540 prev_opt = TREE_TARGET_OPTION (prev_tree);
32541 prev_bumask = prev_opt->x_rs6000_builtin_mask;
32542 prev_flags = prev_opt->x_rs6000_isa_flags;
32544 cur_opt = TREE_TARGET_OPTION (cur_tree);
32545 cur_flags = cur_opt->x_rs6000_isa_flags;
32546 cur_bumask = cur_opt->x_rs6000_builtin_mask;
32548 diff_bumask = (prev_bumask ^ cur_bumask);
32549 diff_flags = (prev_flags ^ cur_flags);
32551 if ((diff_flags != 0) || (diff_bumask != 0))
32553 /* Delete old macros. */
32554 rs6000_target_modify_macros_ptr (false,
32555 prev_flags & diff_flags,
32556 prev_bumask & diff_bumask);
32558 /* Define new macros. */
32559 rs6000_target_modify_macros_ptr (true,
32560 cur_flags & diff_flags,
32561 cur_bumask & diff_bumask);
32565 return true;
32569 /* Remember the last target of rs6000_set_current_function. */
32570 static GTY(()) tree rs6000_previous_fndecl;
32572 /* Establish appropriate back-end context for processing the function
32573 FNDECL. The argument might be NULL to indicate processing at top
32574 level, outside of any function scope. */
32575 static void
32576 rs6000_set_current_function (tree fndecl)
32578 tree old_tree = (rs6000_previous_fndecl
32579 ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl)
32580 : NULL_TREE);
32582 tree new_tree = (fndecl
32583 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
32584 : NULL_TREE);
32586 if (TARGET_DEBUG_TARGET)
32588 bool print_final = false;
32589 fprintf (stderr, "\n==================== rs6000_set_current_function");
32591 if (fndecl)
32592 fprintf (stderr, ", fndecl %s (%p)",
32593 (DECL_NAME (fndecl)
32594 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
32595 : "<unknown>"), (void *)fndecl);
32597 if (rs6000_previous_fndecl)
32598 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
32600 fprintf (stderr, "\n");
32601 if (new_tree)
32603 fprintf (stderr, "\nnew fndecl target specific options:\n");
32604 debug_tree (new_tree);
32605 print_final = true;
32608 if (old_tree)
32610 fprintf (stderr, "\nold fndecl target specific options:\n");
32611 debug_tree (old_tree);
32612 print_final = true;
32615 if (print_final)
32616 fprintf (stderr, "--------------------\n");
32619 /* Only change the context if the function changes. This hook is called
32620 several times in the course of compiling a function, and we don't want to
32621 slow things down too much or call target_reinit when it isn't safe. */
32622 if (fndecl && fndecl != rs6000_previous_fndecl)
32624 rs6000_previous_fndecl = fndecl;
32625 if (old_tree == new_tree)
32628 else if (new_tree && new_tree != target_option_default_node)
32630 cl_target_option_restore (&global_options,
32631 TREE_TARGET_OPTION (new_tree));
32632 if (TREE_TARGET_GLOBALS (new_tree))
32633 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
32634 else
32635 TREE_TARGET_GLOBALS (new_tree)
32636 = save_target_globals_default_opts ();
32639 else if (old_tree && old_tree != target_option_default_node)
32641 new_tree = target_option_current_node;
32642 cl_target_option_restore (&global_options,
32643 TREE_TARGET_OPTION (new_tree));
32644 if (TREE_TARGET_GLOBALS (new_tree))
32645 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
32646 else if (new_tree == target_option_default_node)
32647 restore_target_globals (&default_target_globals);
32648 else
32649 TREE_TARGET_GLOBALS (new_tree)
32650 = save_target_globals_default_opts ();
32656 /* Save the current options */
32658 static void
32659 rs6000_function_specific_save (struct cl_target_option *ptr,
32660 struct gcc_options *opts)
32662 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
32663 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
32666 /* Restore the current options */
32668 static void
32669 rs6000_function_specific_restore (struct gcc_options *opts,
32670 struct cl_target_option *ptr)
32673 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
32674 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
32675 (void) rs6000_option_override_internal (false);
32678 /* Print the current options */
32680 static void
32681 rs6000_function_specific_print (FILE *file, int indent,
32682 struct cl_target_option *ptr)
32684 rs6000_print_isa_options (file, indent, "Isa options set",
32685 ptr->x_rs6000_isa_flags);
32687 rs6000_print_isa_options (file, indent, "Isa options explicit",
32688 ptr->x_rs6000_isa_flags_explicit);
32691 /* Helper function to print the current isa or misc options on a line. */
32693 static void
32694 rs6000_print_options_internal (FILE *file,
32695 int indent,
32696 const char *string,
32697 HOST_WIDE_INT flags,
32698 const char *prefix,
32699 const struct rs6000_opt_mask *opts,
32700 size_t num_elements)
32702 size_t i;
32703 size_t start_column = 0;
32704 size_t cur_column;
32705 size_t max_column = 76;
32706 const char *comma = "";
32708 if (indent)
32709 start_column += fprintf (file, "%*s", indent, "");
32711 if (!flags)
32713 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
32714 return;
32717 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
32719 /* Print the various mask options. */
32720 cur_column = start_column;
32721 for (i = 0; i < num_elements; i++)
32723 if ((flags & opts[i].mask) != 0)
32725 const char *no_str = rs6000_opt_masks[i].invert ? "no-" : "";
32726 size_t len = (strlen (comma)
32727 + strlen (prefix)
32728 + strlen (no_str)
32729 + strlen (rs6000_opt_masks[i].name));
32731 cur_column += len;
32732 if (cur_column > max_column)
32734 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
32735 cur_column = start_column + len;
32736 comma = "";
32739 fprintf (file, "%s%s%s%s", comma, prefix, no_str,
32740 rs6000_opt_masks[i].name);
32741 flags &= ~ opts[i].mask;
32742 comma = ", ";
32746 fputs ("\n", file);
32749 /* Helper function to print the current isa options on a line. */
32751 static void
32752 rs6000_print_isa_options (FILE *file, int indent, const char *string,
32753 HOST_WIDE_INT flags)
32755 rs6000_print_options_internal (file, indent, string, flags, "-m",
32756 &rs6000_opt_masks[0],
32757 ARRAY_SIZE (rs6000_opt_masks));
32760 static void
32761 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
32762 HOST_WIDE_INT flags)
32764 rs6000_print_options_internal (file, indent, string, flags, "",
32765 &rs6000_builtin_mask_names[0],
32766 ARRAY_SIZE (rs6000_builtin_mask_names));
32770 /* Hook to determine if one function can safely inline another. */
32772 static bool
32773 rs6000_can_inline_p (tree caller, tree callee)
32775 bool ret = false;
32776 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
32777 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
32779 /* If callee has no option attributes, then it is ok to inline. */
32780 if (!callee_tree)
32781 ret = true;
32783 /* If caller has no option attributes, but callee does then it is not ok to
32784 inline. */
32785 else if (!caller_tree)
32786 ret = false;
32788 else
32790 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
32791 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
32793 /* Callee's options should a subset of the caller's, i.e. a vsx function
32794 can inline an altivec function but a non-vsx function can't inline a
32795 vsx function. */
32796 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
32797 == callee_opts->x_rs6000_isa_flags)
32798 ret = true;
32801 if (TARGET_DEBUG_TARGET)
32802 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
32803 (DECL_NAME (caller)
32804 ? IDENTIFIER_POINTER (DECL_NAME (caller))
32805 : "<unknown>"),
32806 (DECL_NAME (callee)
32807 ? IDENTIFIER_POINTER (DECL_NAME (callee))
32808 : "<unknown>"),
32809 (ret ? "can" : "cannot"));
32811 return ret;
32814 /* Allocate a stack temp and fixup the address so it meets the particular
32815 memory requirements (either offetable or REG+REG addressing). */
32818 rs6000_allocate_stack_temp (machine_mode mode,
32819 bool offsettable_p,
32820 bool reg_reg_p)
32822 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
32823 rtx addr = XEXP (stack, 0);
32824 int strict_p = (reload_in_progress || reload_completed);
32826 if (!legitimate_indirect_address_p (addr, strict_p))
32828 if (offsettable_p
32829 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
32830 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
32832 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
32833 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
32836 return stack;
32839 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
32840 to such a form to deal with memory reference instructions like STFIWX that
32841 only take reg+reg addressing. */
32844 rs6000_address_for_fpconvert (rtx x)
32846 int strict_p = (reload_in_progress || reload_completed);
32847 rtx addr;
32849 gcc_assert (MEM_P (x));
32850 addr = XEXP (x, 0);
32851 if (! legitimate_indirect_address_p (addr, strict_p)
32852 && ! legitimate_indexed_address_p (addr, strict_p))
32854 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
32856 rtx reg = XEXP (addr, 0);
32857 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
32858 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
32859 gcc_assert (REG_P (reg));
32860 emit_insn (gen_add3_insn (reg, reg, size_rtx));
32861 addr = reg;
32863 else if (GET_CODE (addr) == PRE_MODIFY)
32865 rtx reg = XEXP (addr, 0);
32866 rtx expr = XEXP (addr, 1);
32867 gcc_assert (REG_P (reg));
32868 gcc_assert (GET_CODE (expr) == PLUS);
32869 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
32870 addr = reg;
32873 x = replace_equiv_address (x, copy_addr_to_reg (addr));
32876 return x;
32879 /* Given a memory reference, if it is not in the form for altivec memory
32880 reference instructions (i.e. reg or reg+reg addressing with AND of -16),
32881 convert to the altivec format. */
32884 rs6000_address_for_altivec (rtx x)
32886 gcc_assert (MEM_P (x));
32887 if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x)))
32889 rtx addr = XEXP (x, 0);
32890 int strict_p = (reload_in_progress || reload_completed);
32892 if (!legitimate_indexed_address_p (addr, strict_p)
32893 && !legitimate_indirect_address_p (addr, strict_p))
32894 addr = copy_to_mode_reg (Pmode, addr);
32896 addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
32897 x = change_address (x, GET_MODE (x), addr);
32900 return x;
32903 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
32905 On the RS/6000, all integer constants are acceptable, most won't be valid
32906 for particular insns, though. Only easy FP constants are acceptable. */
32908 static bool
32909 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
32911 if (TARGET_ELF && tls_referenced_p (x))
32912 return false;
32914 return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR)
32915 || GET_MODE (x) == VOIDmode
32916 || (TARGET_POWERPC64 && mode == DImode)
32917 || easy_fp_constant (x, mode)
32918 || easy_vector_constant (x, mode));
32922 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
32924 static bool
32925 chain_already_loaded (rtx_insn *last)
32927 for (; last != NULL; last = PREV_INSN (last))
32929 if (NONJUMP_INSN_P (last))
32931 rtx patt = PATTERN (last);
32933 if (GET_CODE (patt) == SET)
32935 rtx lhs = XEXP (patt, 0);
32937 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
32938 return true;
32942 return false;
32945 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
32947 void
32948 rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
32950 const bool direct_call_p
32951 = GET_CODE (func_desc) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (func_desc);
32952 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
32953 rtx toc_load = NULL_RTX;
32954 rtx toc_restore = NULL_RTX;
32955 rtx func_addr;
32956 rtx abi_reg = NULL_RTX;
32957 rtx call[4];
32958 int n_call;
32959 rtx insn;
32961 /* Handle longcall attributes. */
32962 if (INTVAL (cookie) & CALL_LONG)
32963 func_desc = rs6000_longcall_ref (func_desc);
32965 /* Handle indirect calls. */
32966 if (GET_CODE (func_desc) != SYMBOL_REF
32967 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
32969 /* Save the TOC into its reserved slot before the call,
32970 and prepare to restore it after the call. */
32971 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
32972 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
32973 rtx stack_toc_mem = gen_frame_mem (Pmode,
32974 gen_rtx_PLUS (Pmode, stack_ptr,
32975 stack_toc_offset));
32976 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
32977 gen_rtvec (1, stack_toc_offset),
32978 UNSPEC_TOCSLOT);
32979 toc_restore = gen_rtx_SET (VOIDmode, toc_reg, stack_toc_unspec);
32981 /* Can we optimize saving the TOC in the prologue or
32982 do we need to do it at every call? */
32983 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
32984 cfun->machine->save_toc_in_prologue = true;
32985 else
32987 MEM_VOLATILE_P (stack_toc_mem) = 1;
32988 emit_move_insn (stack_toc_mem, toc_reg);
32991 if (DEFAULT_ABI == ABI_ELFv2)
32993 /* A function pointer in the ELFv2 ABI is just a plain address, but
32994 the ABI requires it to be loaded into r12 before the call. */
32995 func_addr = gen_rtx_REG (Pmode, 12);
32996 emit_move_insn (func_addr, func_desc);
32997 abi_reg = func_addr;
32999 else
33001 /* A function pointer under AIX is a pointer to a data area whose
33002 first word contains the actual address of the function, whose
33003 second word contains a pointer to its TOC, and whose third word
33004 contains a value to place in the static chain register (r11).
33005 Note that if we load the static chain, our "trampoline" need
33006 not have any executable code. */
33008 /* Load up address of the actual function. */
33009 func_desc = force_reg (Pmode, func_desc);
33010 func_addr = gen_reg_rtx (Pmode);
33011 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
33013 /* Prepare to load the TOC of the called function. Note that the
33014 TOC load must happen immediately before the actual call so
33015 that unwinding the TOC registers works correctly. See the
33016 comment in frob_update_context. */
33017 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
33018 rtx func_toc_mem = gen_rtx_MEM (Pmode,
33019 gen_rtx_PLUS (Pmode, func_desc,
33020 func_toc_offset));
33021 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
33023 /* If we have a static chain, load it up. But, if the call was
33024 originally direct, the 3rd word has not been written since no
33025 trampoline has been built, so we ought not to load it, lest we
33026 override a static chain value. */
33027 if (!direct_call_p
33028 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
33029 && !chain_already_loaded (crtl->emit.sequence_stack->last))
33031 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
33032 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
33033 rtx func_sc_mem = gen_rtx_MEM (Pmode,
33034 gen_rtx_PLUS (Pmode, func_desc,
33035 func_sc_offset));
33036 emit_move_insn (sc_reg, func_sc_mem);
33037 abi_reg = sc_reg;
33041 else
33043 /* Direct calls use the TOC: for local calls, the callee will
33044 assume the TOC register is set; for non-local calls, the
33045 PLT stub needs the TOC register. */
33046 abi_reg = toc_reg;
33047 func_addr = func_desc;
33050 /* Create the call. */
33051 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
33052 if (value != NULL_RTX)
33053 call[0] = gen_rtx_SET (VOIDmode, value, call[0]);
33054 n_call = 1;
33056 if (toc_load)
33057 call[n_call++] = toc_load;
33058 if (toc_restore)
33059 call[n_call++] = toc_restore;
33061 call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
33063 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
33064 insn = emit_call_insn (insn);
33066 /* Mention all registers defined by the ABI to hold information
33067 as uses in CALL_INSN_FUNCTION_USAGE. */
33068 if (abi_reg)
33069 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
33072 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
33074 void
33075 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
33077 rtx call[2];
33078 rtx insn;
33080 gcc_assert (INTVAL (cookie) == 0);
33082 /* Create the call. */
33083 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
33084 if (value != NULL_RTX)
33085 call[0] = gen_rtx_SET (VOIDmode, value, call[0]);
33087 call[1] = simple_return_rtx;
33089 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
33090 insn = emit_call_insn (insn);
33092 /* Note use of the TOC register. */
33093 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
33094 /* We need to also mark a use of the link register since the function we
33095 sibling-call to will use it to return to our caller. */
33096 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, LR_REGNO));
33099 /* Return whether we need to always update the saved TOC pointer when we update
33100 the stack pointer. */
33102 static bool
33103 rs6000_save_toc_in_prologue_p (void)
33105 return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue);
33108 #ifdef HAVE_GAS_HIDDEN
33109 # define USE_HIDDEN_LINKONCE 1
33110 #else
33111 # define USE_HIDDEN_LINKONCE 0
33112 #endif
33114 /* Fills in the label name that should be used for a 476 link stack thunk. */
33116 void
33117 get_ppc476_thunk_name (char name[32])
33119 gcc_assert (TARGET_LINK_STACK);
33121 if (USE_HIDDEN_LINKONCE)
33122 sprintf (name, "__ppc476.get_thunk");
33123 else
33124 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
33127 /* This function emits the simple thunk routine that is used to preserve
33128 the link stack on the 476 cpu. */
33130 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
33131 static void
33132 rs6000_code_end (void)
33134 char name[32];
33135 tree decl;
33137 if (!TARGET_LINK_STACK)
33138 return;
33140 get_ppc476_thunk_name (name);
33142 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
33143 build_function_type_list (void_type_node, NULL_TREE));
33144 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
33145 NULL_TREE, void_type_node);
33146 TREE_PUBLIC (decl) = 1;
33147 TREE_STATIC (decl) = 1;
33149 #if RS6000_WEAK
33150 if (USE_HIDDEN_LINKONCE)
33152 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
33153 targetm.asm_out.unique_section (decl, 0);
33154 switch_to_section (get_named_section (decl, NULL, 0));
33155 DECL_WEAK (decl) = 1;
33156 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
33157 targetm.asm_out.globalize_label (asm_out_file, name);
33158 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
33159 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
33161 else
33162 #endif
33164 switch_to_section (text_section);
33165 ASM_OUTPUT_LABEL (asm_out_file, name);
33168 DECL_INITIAL (decl) = make_node (BLOCK);
33169 current_function_decl = decl;
33170 init_function_start (decl);
33171 first_function_block_is_cold = false;
33172 /* Make sure unwind info is emitted for the thunk if needed. */
33173 final_start_function (emit_barrier (), asm_out_file, 1);
33175 fputs ("\tblr\n", asm_out_file);
33177 final_end_function ();
33178 init_insn_lengths ();
33179 free_after_compilation (cfun);
33180 set_cfun (NULL);
33181 current_function_decl = NULL;
33184 /* Add r30 to hard reg set if the prologue sets it up and it is not
33185 pic_offset_table_rtx. */
33187 static void
33188 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
33190 if (!TARGET_SINGLE_PIC_BASE
33191 && TARGET_TOC
33192 && TARGET_MINIMAL_TOC
33193 && get_pool_size () != 0)
33194 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
33198 /* Helper function for rs6000_split_logical to emit a logical instruction after
33199 spliting the operation to single GPR registers.
33201 DEST is the destination register.
33202 OP1 and OP2 are the input source registers.
33203 CODE is the base operation (AND, IOR, XOR, NOT).
33204 MODE is the machine mode.
33205 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
33206 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
33207 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
33209 static void
33210 rs6000_split_logical_inner (rtx dest,
33211 rtx op1,
33212 rtx op2,
33213 enum rtx_code code,
33214 machine_mode mode,
33215 bool complement_final_p,
33216 bool complement_op1_p,
33217 bool complement_op2_p)
33219 rtx bool_rtx;
33221 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
33222 if (op2 && GET_CODE (op2) == CONST_INT
33223 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
33224 && !complement_final_p && !complement_op1_p && !complement_op2_p)
33226 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
33227 HOST_WIDE_INT value = INTVAL (op2) & mask;
33229 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
33230 if (code == AND)
33232 if (value == 0)
33234 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
33235 return;
33238 else if (value == mask)
33240 if (!rtx_equal_p (dest, op1))
33241 emit_insn (gen_rtx_SET (VOIDmode, dest, op1));
33242 return;
33246 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
33247 into separate ORI/ORIS or XORI/XORIS instrucitons. */
33248 else if (code == IOR || code == XOR)
33250 if (value == 0)
33252 if (!rtx_equal_p (dest, op1))
33253 emit_insn (gen_rtx_SET (VOIDmode, dest, op1));
33254 return;
33259 if (code == AND && mode == SImode
33260 && !complement_final_p && !complement_op1_p && !complement_op2_p)
33262 emit_insn (gen_andsi3 (dest, op1, op2));
33263 return;
33266 if (complement_op1_p)
33267 op1 = gen_rtx_NOT (mode, op1);
33269 if (complement_op2_p)
33270 op2 = gen_rtx_NOT (mode, op2);
33272 /* For canonical RTL, if only one arm is inverted it is the first. */
33273 if (!complement_op1_p && complement_op2_p)
33274 std::swap (op1, op2);
33276 bool_rtx = ((code == NOT)
33277 ? gen_rtx_NOT (mode, op1)
33278 : gen_rtx_fmt_ee (code, mode, op1, op2));
33280 if (complement_final_p)
33281 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
33283 emit_insn (gen_rtx_SET (VOIDmode, dest, bool_rtx));
33286 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
33287 operations are split immediately during RTL generation to allow for more
33288 optimizations of the AND/IOR/XOR.
33290 OPERANDS is an array containing the destination and two input operands.
33291 CODE is the base operation (AND, IOR, XOR, NOT).
33292 MODE is the machine mode.
33293 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
33294 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
33295 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
33296 CLOBBER_REG is either NULL or a scratch register of type CC to allow
33297 formation of the AND instructions. */
33299 static void
33300 rs6000_split_logical_di (rtx operands[3],
33301 enum rtx_code code,
33302 bool complement_final_p,
33303 bool complement_op1_p,
33304 bool complement_op2_p)
33306 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
33307 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
33308 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
33309 enum hi_lo { hi = 0, lo = 1 };
33310 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
33311 size_t i;
33313 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
33314 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
33315 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
33316 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
33318 if (code == NOT)
33319 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
33320 else
33322 if (GET_CODE (operands[2]) != CONST_INT)
33324 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
33325 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
33327 else
33329 HOST_WIDE_INT value = INTVAL (operands[2]);
33330 HOST_WIDE_INT value_hi_lo[2];
33332 gcc_assert (!complement_final_p);
33333 gcc_assert (!complement_op1_p);
33334 gcc_assert (!complement_op2_p);
33336 value_hi_lo[hi] = value >> 32;
33337 value_hi_lo[lo] = value & lower_32bits;
33339 for (i = 0; i < 2; i++)
33341 HOST_WIDE_INT sub_value = value_hi_lo[i];
33343 if (sub_value & sign_bit)
33344 sub_value |= upper_32bits;
33346 op2_hi_lo[i] = GEN_INT (sub_value);
33348 /* If this is an AND instruction, check to see if we need to load
33349 the value in a register. */
33350 if (code == AND && sub_value != -1 && sub_value != 0
33351 && !and_operand (op2_hi_lo[i], SImode))
33352 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
33357 for (i = 0; i < 2; i++)
33359 /* Split large IOR/XOR operations. */
33360 if ((code == IOR || code == XOR)
33361 && GET_CODE (op2_hi_lo[i]) == CONST_INT
33362 && !complement_final_p
33363 && !complement_op1_p
33364 && !complement_op2_p
33365 && !logical_const_operand (op2_hi_lo[i], SImode))
33367 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
33368 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
33369 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
33370 rtx tmp = gen_reg_rtx (SImode);
33372 /* Make sure the constant is sign extended. */
33373 if ((hi_16bits & sign_bit) != 0)
33374 hi_16bits |= upper_32bits;
33376 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
33377 code, SImode, false, false, false);
33379 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
33380 code, SImode, false, false, false);
33382 else
33383 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
33384 code, SImode, complement_final_p,
33385 complement_op1_p, complement_op2_p);
33388 return;
33391 /* Split the insns that make up boolean operations operating on multiple GPR
33392 registers. The boolean MD patterns ensure that the inputs either are
33393 exactly the same as the output registers, or there is no overlap.
33395 OPERANDS is an array containing the destination and two input operands.
33396 CODE is the base operation (AND, IOR, XOR, NOT).
33397 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
33398 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
33399 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
33401 void
33402 rs6000_split_logical (rtx operands[3],
33403 enum rtx_code code,
33404 bool complement_final_p,
33405 bool complement_op1_p,
33406 bool complement_op2_p)
33408 machine_mode mode = GET_MODE (operands[0]);
33409 machine_mode sub_mode;
33410 rtx op0, op1, op2;
33411 int sub_size, regno0, regno1, nregs, i;
33413 /* If this is DImode, use the specialized version that can run before
33414 register allocation. */
33415 if (mode == DImode && !TARGET_POWERPC64)
33417 rs6000_split_logical_di (operands, code, complement_final_p,
33418 complement_op1_p, complement_op2_p);
33419 return;
33422 op0 = operands[0];
33423 op1 = operands[1];
33424 op2 = (code == NOT) ? NULL_RTX : operands[2];
33425 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
33426 sub_size = GET_MODE_SIZE (sub_mode);
33427 regno0 = REGNO (op0);
33428 regno1 = REGNO (op1);
33430 gcc_assert (reload_completed);
33431 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
33432 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
33434 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
33435 gcc_assert (nregs > 1);
33437 if (op2 && REG_P (op2))
33438 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
33440 for (i = 0; i < nregs; i++)
33442 int offset = i * sub_size;
33443 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
33444 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
33445 rtx sub_op2 = ((code == NOT)
33446 ? NULL_RTX
33447 : simplify_subreg (sub_mode, op2, mode, offset));
33449 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
33450 complement_final_p, complement_op1_p,
33451 complement_op2_p);
33454 return;
33458 /* Return true if the peephole2 can combine a load involving a combination of
33459 an addis instruction and a load with an offset that can be fused together on
33460 a power8. */
33462 bool
33463 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
33464 rtx addis_value, /* addis value. */
33465 rtx target, /* target register that is loaded. */
33466 rtx mem) /* bottom part of the memory addr. */
33468 rtx addr;
33469 rtx base_reg;
33471 /* Validate arguments. */
33472 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
33473 return false;
33475 if (!base_reg_operand (target, GET_MODE (target)))
33476 return false;
33478 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
33479 return false;
33481 /* Allow sign/zero extension. */
33482 if (GET_CODE (mem) == ZERO_EXTEND
33483 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
33484 mem = XEXP (mem, 0);
33486 if (!MEM_P (mem))
33487 return false;
33489 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
33490 return false;
33492 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
33493 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
33494 return false;
33496 /* Validate that the register used to load the high value is either the
33497 register being loaded, or we can safely replace its use.
33499 This function is only called from the peephole2 pass and we assume that
33500 there are 2 instructions in the peephole (addis and load), so we want to
33501 check if the target register was not used in the memory address and the
33502 register to hold the addis result is dead after the peephole. */
33503 if (REGNO (addis_reg) != REGNO (target))
33505 if (reg_mentioned_p (target, mem))
33506 return false;
33508 if (!peep2_reg_dead_p (2, addis_reg))
33509 return false;
33511 /* If the target register being loaded is the stack pointer, we must
33512 avoid loading any other value into it, even temporarily. */
33513 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
33514 return false;
33517 base_reg = XEXP (addr, 0);
33518 return REGNO (addis_reg) == REGNO (base_reg);
33521 /* During the peephole2 pass, adjust and expand the insns for a load fusion
33522 sequence. We adjust the addis register to use the target register. If the
33523 load sign extends, we adjust the code to do the zero extending load, and an
33524 explicit sign extension later since the fusion only covers zero extending
33525 loads.
33527 The operands are:
33528 operands[0] register set with addis (to be replaced with target)
33529 operands[1] value set via addis
33530 operands[2] target register being loaded
33531 operands[3] D-form memory reference using operands[0]. */
33533 void
33534 expand_fusion_gpr_load (rtx *operands)
33536 rtx addis_value = operands[1];
33537 rtx target = operands[2];
33538 rtx orig_mem = operands[3];
33539 rtx new_addr, new_mem, orig_addr, offset;
33540 enum rtx_code plus_or_lo_sum;
33541 machine_mode target_mode = GET_MODE (target);
33542 machine_mode extend_mode = target_mode;
33543 machine_mode ptr_mode = Pmode;
33544 enum rtx_code extend = UNKNOWN;
33546 if (GET_CODE (orig_mem) == ZERO_EXTEND
33547 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
33549 extend = GET_CODE (orig_mem);
33550 orig_mem = XEXP (orig_mem, 0);
33551 target_mode = GET_MODE (orig_mem);
33554 gcc_assert (MEM_P (orig_mem));
33556 orig_addr = XEXP (orig_mem, 0);
33557 plus_or_lo_sum = GET_CODE (orig_addr);
33558 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
33560 offset = XEXP (orig_addr, 1);
33561 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
33562 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
33564 if (extend != UNKNOWN)
33565 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
33567 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
33568 UNSPEC_FUSION_GPR);
33569 emit_insn (gen_rtx_SET (VOIDmode, target, new_mem));
33571 if (extend == SIGN_EXTEND)
33573 int sub_off = ((BYTES_BIG_ENDIAN)
33574 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
33575 : 0);
33576 rtx sign_reg
33577 = simplify_subreg (target_mode, target, extend_mode, sub_off);
33579 emit_insn (gen_rtx_SET (VOIDmode, target,
33580 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
33583 return;
33586 /* Return a string to fuse an addis instruction with a gpr load to the same
33587 register that we loaded up the addis instruction. The address that is used
33588 is the logical address that was formed during peephole2:
33589 (lo_sum (high) (low-part))
33591 The code is complicated, so we call output_asm_insn directly, and just
33592 return "". */
33594 const char *
33595 emit_fusion_gpr_load (rtx target, rtx mem)
33597 rtx addis_value;
33598 rtx fuse_ops[10];
33599 rtx addr;
33600 rtx load_offset;
33601 const char *addis_str = NULL;
33602 const char *load_str = NULL;
33603 const char *mode_name = NULL;
33604 char insn_template[80];
33605 machine_mode mode;
33606 const char *comment_str = ASM_COMMENT_START;
33608 if (GET_CODE (mem) == ZERO_EXTEND)
33609 mem = XEXP (mem, 0);
33611 gcc_assert (REG_P (target) && MEM_P (mem));
33613 if (*comment_str == ' ')
33614 comment_str++;
33616 addr = XEXP (mem, 0);
33617 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
33618 gcc_unreachable ();
33620 addis_value = XEXP (addr, 0);
33621 load_offset = XEXP (addr, 1);
33623 /* Now emit the load instruction to the same register. */
33624 mode = GET_MODE (mem);
33625 switch (mode)
33627 case QImode:
33628 mode_name = "char";
33629 load_str = "lbz";
33630 break;
33632 case HImode:
33633 mode_name = "short";
33634 load_str = "lhz";
33635 break;
33637 case SImode:
33638 mode_name = "int";
33639 load_str = "lwz";
33640 break;
33642 case DImode:
33643 gcc_assert (TARGET_POWERPC64);
33644 mode_name = "long";
33645 load_str = "ld";
33646 break;
33648 default:
33649 gcc_unreachable ();
33652 /* Emit the addis instruction. */
33653 fuse_ops[0] = target;
33654 if (satisfies_constraint_L (addis_value))
33656 fuse_ops[1] = addis_value;
33657 addis_str = "lis %0,%v1";
33660 else if (GET_CODE (addis_value) == PLUS)
33662 rtx op0 = XEXP (addis_value, 0);
33663 rtx op1 = XEXP (addis_value, 1);
33665 if (REG_P (op0) && CONST_INT_P (op1)
33666 && satisfies_constraint_L (op1))
33668 fuse_ops[1] = op0;
33669 fuse_ops[2] = op1;
33670 addis_str = "addis %0,%1,%v2";
33674 else if (GET_CODE (addis_value) == HIGH)
33676 rtx value = XEXP (addis_value, 0);
33677 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
33679 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
33680 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
33681 if (TARGET_ELF)
33682 addis_str = "addis %0,%2,%1@toc@ha";
33684 else if (TARGET_XCOFF)
33685 addis_str = "addis %0,%1@u(%2)";
33687 else
33688 gcc_unreachable ();
33691 else if (GET_CODE (value) == PLUS)
33693 rtx op0 = XEXP (value, 0);
33694 rtx op1 = XEXP (value, 1);
33696 if (GET_CODE (op0) == UNSPEC
33697 && XINT (op0, 1) == UNSPEC_TOCREL
33698 && CONST_INT_P (op1))
33700 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
33701 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
33702 fuse_ops[3] = op1;
33703 if (TARGET_ELF)
33704 addis_str = "addis %0,%2,%1+%3@toc@ha";
33706 else if (TARGET_XCOFF)
33707 addis_str = "addis %0,%1+%3@u(%2)";
33709 else
33710 gcc_unreachable ();
33714 else if (satisfies_constraint_L (value))
33716 fuse_ops[1] = value;
33717 addis_str = "lis %0,%v1";
33720 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
33722 fuse_ops[1] = value;
33723 addis_str = "lis %0,%1@ha";
33727 if (!addis_str)
33728 fatal_insn ("Could not generate addis value for fusion", addis_value);
33730 sprintf (insn_template, "%s\t\t%s gpr load fusion, type %s", addis_str,
33731 comment_str, mode_name);
33732 output_asm_insn (insn_template, fuse_ops);
33734 /* Emit the D-form load instruction. */
33735 if (CONST_INT_P (load_offset) && satisfies_constraint_I (load_offset))
33737 sprintf (insn_template, "%s %%0,%%1(%%0)", load_str);
33738 fuse_ops[1] = load_offset;
33739 output_asm_insn (insn_template, fuse_ops);
33742 else if (GET_CODE (load_offset) == UNSPEC
33743 && XINT (load_offset, 1) == UNSPEC_TOCREL)
33745 if (TARGET_ELF)
33746 sprintf (insn_template, "%s %%0,%%1@toc@l(%%0)", load_str);
33748 else if (TARGET_XCOFF)
33749 sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
33751 else
33752 gcc_unreachable ();
33754 fuse_ops[1] = XVECEXP (load_offset, 0, 0);
33755 output_asm_insn (insn_template, fuse_ops);
33758 else if (GET_CODE (load_offset) == PLUS
33759 && GET_CODE (XEXP (load_offset, 0)) == UNSPEC
33760 && XINT (XEXP (load_offset, 0), 1) == UNSPEC_TOCREL
33761 && CONST_INT_P (XEXP (load_offset, 1)))
33763 rtx tocrel_unspec = XEXP (load_offset, 0);
33764 if (TARGET_ELF)
33765 sprintf (insn_template, "%s %%0,%%1+%%2@toc@l(%%0)", load_str);
33767 else if (TARGET_XCOFF)
33768 sprintf (insn_template, "%s %%0,%%1+%%2@l(%%0)", load_str);
33770 else
33771 gcc_unreachable ();
33773 fuse_ops[1] = XVECEXP (tocrel_unspec, 0, 0);
33774 fuse_ops[2] = XEXP (load_offset, 1);
33775 output_asm_insn (insn_template, fuse_ops);
33778 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (load_offset))
33780 sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
33782 fuse_ops[1] = load_offset;
33783 output_asm_insn (insn_template, fuse_ops);
33786 else
33787 fatal_insn ("Unable to generate load offset for fusion", load_offset);
33789 return "";
33792 /* Analyze vector computations and remove unnecessary doubleword
33793 swaps (xxswapdi instructions). This pass is performed only
33794 for little-endian VSX code generation.
33796 For this specific case, loads and stores of 4x32 and 2x64 vectors
33797 are inefficient. These are implemented using the lvx2dx and
33798 stvx2dx instructions, which invert the order of doublewords in
33799 a vector register. Thus the code generation inserts an xxswapdi
33800 after each such load, and prior to each such store. (For spill
33801 code after register assignment, an additional xxswapdi is inserted
33802 following each store in order to return a hard register to its
33803 unpermuted value.)
33805 The extra xxswapdi instructions reduce performance. This can be
33806 particularly bad for vectorized code. The purpose of this pass
33807 is to reduce the number of xxswapdi instructions required for
33808 correctness.
33810 The primary insight is that much code that operates on vectors
33811 does not care about the relative order of elements in a register,
33812 so long as the correct memory order is preserved. If we have
33813 a computation where all input values are provided by lvxd2x/xxswapdi
33814 sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
33815 and all intermediate computations are pure SIMD (independent of
33816 element order), then all the xxswapdi's associated with the loads
33817 and stores may be removed.
33819 This pass uses some of the infrastructure and logical ideas from
33820 the "web" pass in web.c. We create maximal webs of computations
33821 fitting the description above using union-find. Each such web is
33822 then optimized by removing its unnecessary xxswapdi instructions.
33824 The pass is placed prior to global optimization so that we can
33825 perform the optimization in the safest and simplest way possible;
33826 that is, by replacing each xxswapdi insn with a register copy insn.
33827 Subsequent forward propagation will remove copies where possible.
33829 There are some operations sensitive to element order for which we
33830 can still allow the operation, provided we modify those operations.
33831 These include CONST_VECTORs, for which we must swap the first and
33832 second halves of the constant vector; and SUBREGs, for which we
33833 must adjust the byte offset to account for the swapped doublewords.
33834 A remaining opportunity would be non-immediate-form splats, for
33835 which we should adjust the selected lane of the input. We should
33836 also make code generation adjustments for sum-across operations,
33837 since this is a common vectorizer reduction.
33839 Because we run prior to the first split, we can see loads and stores
33840 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
33841 vector loads and stores that have not yet been split into a permuting
33842 load/store and a swap. (One way this can happen is with a builtin
33843 call to vec_vsx_{ld,st}.) We can handle these as well, but rather
33844 than deleting a swap, we convert the load/store into a permuting
33845 load/store (which effectively removes the swap). */
33847 /* Notes on Permutes
33849 We do not currently handle computations that contain permutes. There
33850 is a general transformation that can be performed correctly, but it
33851 may introduce more expensive code than it replaces. To handle these
33852 would require a cost model to determine when to perform the optimization.
33853 This commentary records how this could be done if desired.
33855 The most general permute is something like this (example for V16QI):
33857 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
33858 (parallel [(const_int a0) (const_int a1)
33860 (const_int a14) (const_int a15)]))
33862 where a0,...,a15 are in [0,31] and select elements from op1 and op2
33863 to produce in the result.
33865 Regardless of mode, we can convert the PARALLEL to a mask of 16
33866 byte-element selectors. Let's call this M, with M[i] representing
33867 the ith byte-element selector value. Then if we swap doublewords
33868 throughout the computation, we can get correct behavior by replacing
33869 M with M' as follows:
33871 { M[i+8]+8 : i < 8, M[i+8] in [0,7] U [16,23]
33872 M'[i] = { M[i+8]-8 : i < 8, M[i+8] in [8,15] U [24,31]
33873 { M[i-8]+8 : i >= 8, M[i-8] in [0,7] U [16,23]
33874 { M[i-8]-8 : i >= 8, M[i-8] in [8,15] U [24,31]
33876 This seems promising at first, since we are just replacing one mask
33877 with another. But certain masks are preferable to others. If M
33878 is a mask that matches a vmrghh pattern, for example, M' certainly
33879 will not. Instead of a single vmrghh, we would generate a load of
33880 M' and a vperm. So we would need to know how many xxswapd's we can
33881 remove as a result of this transformation to determine if it's
33882 profitable; and preferably the logic would need to be aware of all
33883 the special preferable masks.
33885 Another form of permute is an UNSPEC_VPERM, in which the mask is
33886 already in a register. In some cases, this mask may be a constant
33887 that we can discover with ud-chains, in which case the above
33888 transformation is ok. However, the common usage here is for the
33889 mask to be produced by an UNSPEC_LVSL, in which case the mask
33890 cannot be known at compile time. In such a case we would have to
33891 generate several instructions to compute M' as above at run time,
33892 and a cost model is needed again. */
33894 /* This is based on the union-find logic in web.c. web_entry_base is
33895 defined in df.h. */
33896 class swap_web_entry : public web_entry_base
33898 public:
33899 /* Pointer to the insn. */
33900 rtx_insn *insn;
33901 /* Set if insn contains a mention of a vector register. All other
33902 fields are undefined if this field is unset. */
33903 unsigned int is_relevant : 1;
33904 /* Set if insn is a load. */
33905 unsigned int is_load : 1;
33906 /* Set if insn is a store. */
33907 unsigned int is_store : 1;
33908 /* Set if insn is a doubleword swap. This can either be a register swap
33909 or a permuting load or store (test is_load and is_store for this). */
33910 unsigned int is_swap : 1;
33911 /* Set if the insn has a live-in use of a parameter register. */
33912 unsigned int is_live_in : 1;
33913 /* Set if the insn has a live-out def of a return register. */
33914 unsigned int is_live_out : 1;
33915 /* Set if the insn contains a subreg reference of a vector register. */
33916 unsigned int contains_subreg : 1;
33917 /* Set if the insn contains a 128-bit integer operand. */
33918 unsigned int is_128_int : 1;
33919 /* Set if this is a call-insn. */
33920 unsigned int is_call : 1;
33921 /* Set if this insn does not perform a vector operation for which
33922 element order matters, or if we know how to fix it up if it does.
33923 Undefined if is_swap is set. */
33924 unsigned int is_swappable : 1;
33925 /* A nonzero value indicates what kind of special handling for this
33926 insn is required if doublewords are swapped. Undefined if
33927 is_swappable is not set. */
33928 unsigned int special_handling : 3;
33929 /* Set if the web represented by this entry cannot be optimized. */
33930 unsigned int web_not_optimizable : 1;
33931 /* Set if this insn should be deleted. */
33932 unsigned int will_delete : 1;
33935 enum special_handling_values {
33936 SH_NONE = 0,
33937 SH_CONST_VECTOR,
33938 SH_SUBREG,
33939 SH_NOSWAP_LD,
33940 SH_NOSWAP_ST,
33941 SH_EXTRACT,
33942 SH_SPLAT
33945 /* Union INSN with all insns containing definitions that reach USE.
33946 Detect whether USE is live-in to the current function. */
33947 static void
33948 union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
33950 struct df_link *link = DF_REF_CHAIN (use);
33952 if (!link)
33953 insn_entry[INSN_UID (insn)].is_live_in = 1;
33955 while (link)
33957 if (DF_REF_IS_ARTIFICIAL (link->ref))
33958 insn_entry[INSN_UID (insn)].is_live_in = 1;
33960 if (DF_REF_INSN_INFO (link->ref))
33962 rtx def_insn = DF_REF_INSN (link->ref);
33963 (void)unionfind_union (insn_entry + INSN_UID (insn),
33964 insn_entry + INSN_UID (def_insn));
33967 link = link->next;
33971 /* Union INSN with all insns containing uses reached from DEF.
33972 Detect whether DEF is live-out from the current function. */
33973 static void
33974 union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
33976 struct df_link *link = DF_REF_CHAIN (def);
33978 if (!link)
33979 insn_entry[INSN_UID (insn)].is_live_out = 1;
33981 while (link)
33983 /* This could be an eh use or some other artificial use;
33984 we treat these all the same (killing the optimization). */
33985 if (DF_REF_IS_ARTIFICIAL (link->ref))
33986 insn_entry[INSN_UID (insn)].is_live_out = 1;
33988 if (DF_REF_INSN_INFO (link->ref))
33990 rtx use_insn = DF_REF_INSN (link->ref);
33991 (void)unionfind_union (insn_entry + INSN_UID (insn),
33992 insn_entry + INSN_UID (use_insn));
33995 link = link->next;
33999 /* Return 1 iff INSN is a load insn, including permuting loads that
34000 represent an lvxd2x instruction; else return 0. */
34001 static unsigned int
34002 insn_is_load_p (rtx insn)
34004 rtx body = PATTERN (insn);
34006 if (GET_CODE (body) == SET)
34008 if (GET_CODE (SET_SRC (body)) == MEM)
34009 return 1;
34011 if (GET_CODE (SET_SRC (body)) == VEC_SELECT
34012 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM)
34013 return 1;
34015 return 0;
34018 if (GET_CODE (body) != PARALLEL)
34019 return 0;
34021 rtx set = XVECEXP (body, 0, 0);
34023 if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM)
34024 return 1;
34026 return 0;
34029 /* Return 1 iff INSN is a store insn, including permuting stores that
34030 represent an stvxd2x instruction; else return 0. */
34031 static unsigned int
34032 insn_is_store_p (rtx insn)
34034 rtx body = PATTERN (insn);
34035 if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM)
34036 return 1;
34037 if (GET_CODE (body) != PARALLEL)
34038 return 0;
34039 rtx set = XVECEXP (body, 0, 0);
34040 if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM)
34041 return 1;
34042 return 0;
34045 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
34046 a permuting load, or a permuting store. */
34047 static unsigned int
34048 insn_is_swap_p (rtx insn)
34050 rtx body = PATTERN (insn);
34051 if (GET_CODE (body) != SET)
34052 return 0;
34053 rtx rhs = SET_SRC (body);
34054 if (GET_CODE (rhs) != VEC_SELECT)
34055 return 0;
34056 rtx parallel = XEXP (rhs, 1);
34057 if (GET_CODE (parallel) != PARALLEL)
34058 return 0;
34059 unsigned int len = XVECLEN (parallel, 0);
34060 if (len != 2 && len != 4 && len != 8 && len != 16)
34061 return 0;
34062 for (unsigned int i = 0; i < len / 2; ++i)
34064 rtx op = XVECEXP (parallel, 0, i);
34065 if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i)
34066 return 0;
34068 for (unsigned int i = len / 2; i < len; ++i)
34070 rtx op = XVECEXP (parallel, 0, i);
34071 if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2)
34072 return 0;
34074 return 1;
34077 /* Return 1 iff OP is an operand that will not be affected by having
34078 vector doublewords swapped in memory. */
34079 static unsigned int
34080 rtx_is_swappable_p (rtx op, unsigned int *special)
34082 enum rtx_code code = GET_CODE (op);
34083 int i, j;
34084 rtx parallel;
34086 switch (code)
34088 case LABEL_REF:
34089 case SYMBOL_REF:
34090 case CLOBBER:
34091 case REG:
34092 return 1;
34094 case VEC_CONCAT:
34095 case ASM_INPUT:
34096 case ASM_OPERANDS:
34097 return 0;
34099 case CONST_VECTOR:
34101 *special = SH_CONST_VECTOR;
34102 return 1;
34105 case VEC_DUPLICATE:
34106 /* Opportunity: If XEXP (op, 0) has the same mode as the result,
34107 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
34108 it represents a vector splat for which we can do special
34109 handling. */
34110 if (GET_CODE (XEXP (op, 0)) == CONST_INT)
34111 return 1;
34112 else if (GET_CODE (XEXP (op, 0)) == REG
34113 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
34114 /* This catches V2DF and V2DI splat, at a minimum. */
34115 return 1;
34116 else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
34117 /* If the duplicated item is from a select, defer to the select
34118 processing to see if we can change the lane for the splat. */
34119 return rtx_is_swappable_p (XEXP (op, 0), special);
34120 else
34121 return 0;
34123 case VEC_SELECT:
34124 /* A vec_extract operation is ok if we change the lane. */
34125 if (GET_CODE (XEXP (op, 0)) == REG
34126 && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
34127 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
34128 && XVECLEN (parallel, 0) == 1
34129 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT)
34131 *special = SH_EXTRACT;
34132 return 1;
34134 else
34135 return 0;
34137 case UNSPEC:
34139 /* Various operations are unsafe for this optimization, at least
34140 without significant additional work. Permutes are obviously
34141 problematic, as both the permute control vector and the ordering
34142 of the target values are invalidated by doubleword swapping.
34143 Vector pack and unpack modify the number of vector lanes.
34144 Merge-high/low will not operate correctly on swapped operands.
34145 Vector shifts across element boundaries are clearly uncool,
34146 as are vector select and concatenate operations. Vector
34147 sum-across instructions define one operand with a specific
34148 order-dependent element, so additional fixup code would be
34149 needed to make those work. Vector set and non-immediate-form
34150 vector splat are element-order sensitive. A few of these
34151 cases might be workable with special handling if required. */
34152 int val = XINT (op, 1);
34153 switch (val)
34155 default:
34156 break;
34157 case UNSPEC_VMRGH_DIRECT:
34158 case UNSPEC_VMRGL_DIRECT:
34159 case UNSPEC_VPACK_SIGN_SIGN_SAT:
34160 case UNSPEC_VPACK_SIGN_UNS_SAT:
34161 case UNSPEC_VPACK_UNS_UNS_MOD:
34162 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
34163 case UNSPEC_VPACK_UNS_UNS_SAT:
34164 case UNSPEC_VPERM:
34165 case UNSPEC_VPERM_UNS:
34166 case UNSPEC_VPERMHI:
34167 case UNSPEC_VPERMSI:
34168 case UNSPEC_VPKPX:
34169 case UNSPEC_VSLDOI:
34170 case UNSPEC_VSLO:
34171 case UNSPEC_VSRO:
34172 case UNSPEC_VSUM2SWS:
34173 case UNSPEC_VSUM4S:
34174 case UNSPEC_VSUM4UBS:
34175 case UNSPEC_VSUMSWS:
34176 case UNSPEC_VSUMSWS_DIRECT:
34177 case UNSPEC_VSX_CONCAT:
34178 case UNSPEC_VSX_SET:
34179 case UNSPEC_VSX_SLDWI:
34180 case UNSPEC_VUNPACK_HI_SIGN:
34181 case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
34182 case UNSPEC_VUNPACK_LO_SIGN:
34183 case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
34184 case UNSPEC_VUPKHPX:
34185 case UNSPEC_VUPKHS_V4SF:
34186 case UNSPEC_VUPKHU_V4SF:
34187 case UNSPEC_VUPKLPX:
34188 case UNSPEC_VUPKLS_V4SF:
34189 case UNSPEC_VUPKLU_V4SF:
34190 /* The following could be handled as an idiom with XXSPLTW.
34191 These place a scalar in BE element zero, but the XXSPLTW
34192 will currently expect it in BE element 2 in a swapped
34193 region. When one of these feeds an XXSPLTW with no other
34194 defs/uses either way, we can avoid the lane change for
34195 XXSPLTW and things will be correct. TBD. */
34196 case UNSPEC_VSX_CVDPSPN:
34197 case UNSPEC_VSX_CVSPDP:
34198 case UNSPEC_VSX_CVSPDPN:
34199 return 0;
34200 case UNSPEC_VSPLT_DIRECT:
34201 *special = SH_SPLAT;
34202 return 1;
34206 default:
34207 break;
34210 const char *fmt = GET_RTX_FORMAT (code);
34211 int ok = 1;
34213 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
34214 if (fmt[i] == 'e' || fmt[i] == 'u')
34216 unsigned int special_op = SH_NONE;
34217 ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
34218 /* Ensure we never have two kinds of special handling
34219 for the same insn. */
34220 if (*special != SH_NONE && special_op != SH_NONE
34221 && *special != special_op)
34222 return 0;
34223 *special = special_op;
34225 else if (fmt[i] == 'E')
34226 for (j = 0; j < XVECLEN (op, i); ++j)
34228 unsigned int special_op = SH_NONE;
34229 ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
34230 /* Ensure we never have two kinds of special handling
34231 for the same insn. */
34232 if (*special != SH_NONE && special_op != SH_NONE
34233 && *special != special_op)
34234 return 0;
34235 *special = special_op;
34238 return ok;
34241 /* Return 1 iff INSN is an operand that will not be affected by
34242 having vector doublewords swapped in memory (in which case
34243 *SPECIAL is unchanged), or that can be modified to be correct
34244 if vector doublewords are swapped in memory (in which case
34245 *SPECIAL is changed to a value indicating how). */
34246 static unsigned int
34247 insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
34248 unsigned int *special)
34250 /* Calls are always bad. */
34251 if (GET_CODE (insn) == CALL_INSN)
34252 return 0;
34254 /* Loads and stores seen here are not permuting, but we can still
34255 fix them up by converting them to permuting ones. Exceptions:
34256 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
34257 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
34258 for the SET source. */
34259 rtx body = PATTERN (insn);
34260 int i = INSN_UID (insn);
34262 if (insn_entry[i].is_load)
34264 if (GET_CODE (body) == SET)
34266 *special = SH_NOSWAP_LD;
34267 return 1;
34269 else
34270 return 0;
34273 if (insn_entry[i].is_store)
34275 if (GET_CODE (body) == SET && GET_CODE (SET_SRC (body)) != UNSPEC)
34277 *special = SH_NOSWAP_ST;
34278 return 1;
34280 else
34281 return 0;
34284 /* Otherwise check the operands for vector lane violations. */
34285 return rtx_is_swappable_p (body, special);
34288 enum chain_purpose { FOR_LOADS, FOR_STORES };
34290 /* Return true if the UD or DU chain headed by LINK is non-empty,
34291 and every entry on the chain references an insn that is a
34292 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
34293 register swap must have only permuting loads as reaching defs.
34294 If PURPOSE is FOR_STORES, each such register swap must have only
34295 register swaps or permuting stores as reached uses. */
34296 static bool
34297 chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
34298 enum chain_purpose purpose)
34300 if (!link)
34301 return false;
34303 for (; link; link = link->next)
34305 if (!VECTOR_MODE_P (GET_MODE (DF_REF_REG (link->ref))))
34306 continue;
34308 if (DF_REF_IS_ARTIFICIAL (link->ref))
34309 return false;
34311 rtx reached_insn = DF_REF_INSN (link->ref);
34312 unsigned uid = INSN_UID (reached_insn);
34313 struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn);
34315 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
34316 || insn_entry[uid].is_store)
34317 return false;
34319 if (purpose == FOR_LOADS)
34321 df_ref use;
34322 FOR_EACH_INSN_INFO_USE (use, insn_info)
34324 struct df_link *swap_link = DF_REF_CHAIN (use);
34326 while (swap_link)
34328 if (DF_REF_IS_ARTIFICIAL (link->ref))
34329 return false;
34331 rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
34332 unsigned uid2 = INSN_UID (swap_def_insn);
34334 /* Only permuting loads are allowed. */
34335 if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
34336 return false;
34338 swap_link = swap_link->next;
34342 else if (purpose == FOR_STORES)
34344 df_ref def;
34345 FOR_EACH_INSN_INFO_DEF (def, insn_info)
34347 struct df_link *swap_link = DF_REF_CHAIN (def);
34349 while (swap_link)
34351 if (DF_REF_IS_ARTIFICIAL (link->ref))
34352 return false;
34354 rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
34355 unsigned uid2 = INSN_UID (swap_use_insn);
34357 /* Permuting stores or register swaps are allowed. */
34358 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
34359 return false;
34361 swap_link = swap_link->next;
34367 return true;
34370 /* Mark the xxswapdi instructions associated with permuting loads and
34371 stores for removal. Note that we only flag them for deletion here,
34372 as there is a possibility of a swap being reached from multiple
34373 loads, etc. */
34374 static void
34375 mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
34377 rtx insn = insn_entry[i].insn;
34378 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
34380 if (insn_entry[i].is_load)
34382 df_ref def;
34383 FOR_EACH_INSN_INFO_DEF (def, insn_info)
34385 struct df_link *link = DF_REF_CHAIN (def);
34387 /* We know by now that these are swaps, so we can delete
34388 them confidently. */
34389 while (link)
34391 rtx use_insn = DF_REF_INSN (link->ref);
34392 insn_entry[INSN_UID (use_insn)].will_delete = 1;
34393 link = link->next;
34397 else if (insn_entry[i].is_store)
34399 df_ref use;
34400 FOR_EACH_INSN_INFO_USE (use, insn_info)
34402 /* Ignore uses for addressability. */
34403 machine_mode mode = GET_MODE (DF_REF_REG (use));
34404 if (!VECTOR_MODE_P (mode))
34405 continue;
34407 struct df_link *link = DF_REF_CHAIN (use);
34409 /* We know by now that these are swaps, so we can delete
34410 them confidently. */
34411 while (link)
34413 rtx def_insn = DF_REF_INSN (link->ref);
34414 insn_entry[INSN_UID (def_insn)].will_delete = 1;
34415 link = link->next;
34421 /* OP is either a CONST_VECTOR or an expression containing one.
34422 Swap the first half of the vector with the second in the first
34423 case. Recurse to find it in the second. */
34424 static void
34425 swap_const_vector_halves (rtx op)
34427 int i;
34428 enum rtx_code code = GET_CODE (op);
34429 if (GET_CODE (op) == CONST_VECTOR)
34431 int half_units = GET_MODE_NUNITS (GET_MODE (op)) / 2;
34432 for (i = 0; i < half_units; ++i)
34434 rtx temp = CONST_VECTOR_ELT (op, i);
34435 CONST_VECTOR_ELT (op, i) = CONST_VECTOR_ELT (op, i + half_units);
34436 CONST_VECTOR_ELT (op, i + half_units) = temp;
34439 else
34441 int j;
34442 const char *fmt = GET_RTX_FORMAT (code);
34443 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
34444 if (fmt[i] == 'e' || fmt[i] == 'u')
34445 swap_const_vector_halves (XEXP (op, i));
34446 else if (fmt[i] == 'E')
34447 for (j = 0; j < XVECLEN (op, i); ++j)
34448 swap_const_vector_halves (XVECEXP (op, i, j));
34452 /* Find all subregs of a vector expression that perform a narrowing,
34453 and adjust the subreg index to account for doubleword swapping. */
34454 static void
34455 adjust_subreg_index (rtx op)
34457 enum rtx_code code = GET_CODE (op);
34458 if (code == SUBREG
34459 && (GET_MODE_SIZE (GET_MODE (op))
34460 < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
34462 unsigned int index = SUBREG_BYTE (op);
34463 if (index < 8)
34464 index += 8;
34465 else
34466 index -= 8;
34467 SUBREG_BYTE (op) = index;
34470 const char *fmt = GET_RTX_FORMAT (code);
34471 int i,j;
34472 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
34473 if (fmt[i] == 'e' || fmt[i] == 'u')
34474 adjust_subreg_index (XEXP (op, i));
34475 else if (fmt[i] == 'E')
34476 for (j = 0; j < XVECLEN (op, i); ++j)
34477 adjust_subreg_index (XVECEXP (op, i, j));
34480 /* Convert the non-permuting load INSN to a permuting one. */
34481 static void
34482 permute_load (rtx_insn *insn)
34484 rtx body = PATTERN (insn);
34485 rtx mem_op = SET_SRC (body);
34486 rtx tgt_reg = SET_DEST (body);
34487 machine_mode mode = GET_MODE (tgt_reg);
34488 int n_elts = GET_MODE_NUNITS (mode);
34489 int half_elts = n_elts / 2;
34490 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
34491 int i, j;
34492 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
34493 XVECEXP (par, 0, i) = GEN_INT (j);
34494 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
34495 XVECEXP (par, 0, i) = GEN_INT (j);
34496 rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
34497 SET_SRC (body) = sel;
34498 INSN_CODE (insn) = -1; /* Force re-recognition. */
34499 df_insn_rescan (insn);
34501 if (dump_file)
34502 fprintf (dump_file, "Replacing load %d with permuted load\n",
34503 INSN_UID (insn));
34506 /* Convert the non-permuting store INSN to a permuting one. */
34507 static void
34508 permute_store (rtx_insn *insn)
34510 rtx body = PATTERN (insn);
34511 rtx src_reg = SET_SRC (body);
34512 machine_mode mode = GET_MODE (src_reg);
34513 int n_elts = GET_MODE_NUNITS (mode);
34514 int half_elts = n_elts / 2;
34515 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
34516 int i, j;
34517 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
34518 XVECEXP (par, 0, i) = GEN_INT (j);
34519 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
34520 XVECEXP (par, 0, i) = GEN_INT (j);
34521 rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
34522 SET_SRC (body) = sel;
34523 INSN_CODE (insn) = -1; /* Force re-recognition. */
34524 df_insn_rescan (insn);
34526 if (dump_file)
34527 fprintf (dump_file, "Replacing store %d with permuted store\n",
34528 INSN_UID (insn));
34531 /* Given OP that contains a vector extract operation, adjust the index
34532 of the extracted lane to account for the doubleword swap. */
34533 static void
34534 adjust_extract (rtx_insn *insn)
34536 rtx src = SET_SRC (PATTERN (insn));
34537 /* The vec_select may be wrapped in a vec_duplicate for a splat, so
34538 account for that. */
34539 rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
34540 rtx par = XEXP (sel, 1);
34541 int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
34542 int lane = INTVAL (XVECEXP (par, 0, 0));
34543 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
34544 XVECEXP (par, 0, 0) = GEN_INT (lane);
34545 INSN_CODE (insn) = -1; /* Force re-recognition. */
34546 df_insn_rescan (insn);
34548 if (dump_file)
34549 fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
34552 /* Given OP that contains a vector direct-splat operation, adjust the index
34553 of the source lane to account for the doubleword swap. */
34554 static void
34555 adjust_splat (rtx_insn *insn)
34557 rtx body = PATTERN (insn);
34558 rtx unspec = XEXP (body, 1);
34559 int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
34560 int lane = INTVAL (XVECEXP (unspec, 0, 1));
34561 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
34562 XVECEXP (unspec, 0, 1) = GEN_INT (lane);
34563 INSN_CODE (insn) = -1; /* Force re-recognition. */
34564 df_insn_rescan (insn);
34566 if (dump_file)
34567 fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
34570 /* The insn described by INSN_ENTRY[I] can be swapped, but only
34571 with special handling. Take care of that here. */
34572 static void
34573 handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
34575 rtx_insn *insn = insn_entry[i].insn;
34576 rtx body = PATTERN (insn);
34578 switch (insn_entry[i].special_handling)
34580 default:
34581 gcc_unreachable ();
34582 case SH_CONST_VECTOR:
34584 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
34585 gcc_assert (GET_CODE (body) == SET);
34586 rtx rhs = SET_SRC (body);
34587 swap_const_vector_halves (rhs);
34588 if (dump_file)
34589 fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
34590 break;
34592 case SH_SUBREG:
34593 /* A subreg of the same size is already safe. For subregs that
34594 select a smaller portion of a reg, adjust the index for
34595 swapped doublewords. */
34596 adjust_subreg_index (body);
34597 if (dump_file)
34598 fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
34599 break;
34600 case SH_NOSWAP_LD:
34601 /* Convert a non-permuting load to a permuting one. */
34602 permute_load (insn);
34603 break;
34604 case SH_NOSWAP_ST:
34605 /* Convert a non-permuting store to a permuting one. */
34606 permute_store (insn);
34607 break;
34608 case SH_EXTRACT:
34609 /* Change the lane on an extract operation. */
34610 adjust_extract (insn);
34611 break;
34612 case SH_SPLAT:
34613 /* Change the lane on a direct-splat operation. */
34614 adjust_splat (insn);
34615 break;
34619 /* Find the insn from the Ith table entry, which is known to be a
34620 register swap Y = SWAP(X). Replace it with a copy Y = X. */
34621 static void
34622 replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
34624 rtx_insn *insn = insn_entry[i].insn;
34625 rtx body = PATTERN (insn);
34626 rtx src_reg = XEXP (SET_SRC (body), 0);
34627 rtx copy = gen_rtx_SET (VOIDmode, SET_DEST (body), src_reg);
34628 rtx_insn *new_insn = emit_insn_before (copy, insn);
34629 set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
34630 df_insn_rescan (new_insn);
34632 if (dump_file)
34634 unsigned int new_uid = INSN_UID (new_insn);
34635 fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
34638 df_insn_delete (insn);
34639 remove_insn (insn);
34640 insn->set_deleted ();
34643 /* Dump the swap table to DUMP_FILE. */
34644 static void
34645 dump_swap_insn_table (swap_web_entry *insn_entry)
34647 int e = get_max_uid ();
34648 fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
34650 for (int i = 0; i < e; ++i)
34651 if (insn_entry[i].is_relevant)
34653 swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
34654 fprintf (dump_file, "%6d %6d ", i,
34655 pred_entry && pred_entry->insn
34656 ? INSN_UID (pred_entry->insn) : 0);
34657 if (insn_entry[i].is_load)
34658 fputs ("load ", dump_file);
34659 if (insn_entry[i].is_store)
34660 fputs ("store ", dump_file);
34661 if (insn_entry[i].is_swap)
34662 fputs ("swap ", dump_file);
34663 if (insn_entry[i].is_live_in)
34664 fputs ("live-in ", dump_file);
34665 if (insn_entry[i].is_live_out)
34666 fputs ("live-out ", dump_file);
34667 if (insn_entry[i].contains_subreg)
34668 fputs ("subreg ", dump_file);
34669 if (insn_entry[i].is_128_int)
34670 fputs ("int128 ", dump_file);
34671 if (insn_entry[i].is_call)
34672 fputs ("call ", dump_file);
34673 if (insn_entry[i].is_swappable)
34675 fputs ("swappable ", dump_file);
34676 if (insn_entry[i].special_handling == SH_CONST_VECTOR)
34677 fputs ("special:constvec ", dump_file);
34678 else if (insn_entry[i].special_handling == SH_SUBREG)
34679 fputs ("special:subreg ", dump_file);
34680 else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
34681 fputs ("special:load ", dump_file);
34682 else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
34683 fputs ("special:store ", dump_file);
34684 else if (insn_entry[i].special_handling == SH_EXTRACT)
34685 fputs ("special:extract ", dump_file);
34686 else if (insn_entry[i].special_handling == SH_SPLAT)
34687 fputs ("special:splat ", dump_file);
34689 if (insn_entry[i].web_not_optimizable)
34690 fputs ("unoptimizable ", dump_file);
34691 if (insn_entry[i].will_delete)
34692 fputs ("delete ", dump_file);
34693 fputs ("\n", dump_file);
34695 fputs ("\n", dump_file);
34698 /* Main entry point for this pass. */
34699 unsigned int
34700 rs6000_analyze_swaps (function *fun)
34702 swap_web_entry *insn_entry;
34703 basic_block bb;
34704 rtx_insn *insn;
34706 /* Dataflow analysis for use-def chains. */
34707 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
34708 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
34709 df_analyze ();
34710 df_set_flags (DF_DEFER_INSN_RESCAN);
34712 /* Allocate structure to represent webs of insns. */
34713 insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
34715 /* Walk the insns to gather basic data. */
34716 FOR_ALL_BB_FN (bb, fun)
34717 FOR_BB_INSNS (bb, insn)
34719 unsigned int uid = INSN_UID (insn);
34720 if (NONDEBUG_INSN_P (insn))
34722 insn_entry[uid].insn = insn;
34724 if (GET_CODE (insn) == CALL_INSN)
34725 insn_entry[uid].is_call = 1;
34727 /* Walk the uses and defs to see if we mention vector regs.
34728 Record any constraints on optimization of such mentions. */
34729 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
34730 df_ref mention;
34731 FOR_EACH_INSN_INFO_USE (mention, insn_info)
34733 /* We use DF_REF_REAL_REG here to get inside any subregs. */
34734 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
34736 /* If a use gets its value from a call insn, it will be
34737 a hard register and will look like (reg:V4SI 3 3).
34738 The df analysis creates two mentions for GPR3 and GPR4,
34739 both DImode. We must recognize this and treat it as a
34740 vector mention to ensure the call is unioned with this
34741 use. */
34742 if (mode == DImode && DF_REF_INSN_INFO (mention))
34744 rtx feeder = DF_REF_INSN (mention);
34745 /* FIXME: It is pretty hard to get from the df mention
34746 to the mode of the use in the insn. We arbitrarily
34747 pick a vector mode here, even though the use might
34748 be a real DImode. We can be too conservative
34749 (create a web larger than necessary) because of
34750 this, so consider eventually fixing this. */
34751 if (GET_CODE (feeder) == CALL_INSN)
34752 mode = V4SImode;
34755 if (VECTOR_MODE_P (mode))
34757 insn_entry[uid].is_relevant = 1;
34758 if (mode == TImode || mode == V1TImode)
34759 insn_entry[uid].is_128_int = 1;
34760 if (DF_REF_INSN_INFO (mention))
34761 insn_entry[uid].contains_subreg
34762 = !rtx_equal_p (DF_REF_REG (mention),
34763 DF_REF_REAL_REG (mention));
34764 union_defs (insn_entry, insn, mention);
34767 FOR_EACH_INSN_INFO_DEF (mention, insn_info)
34769 /* We use DF_REF_REAL_REG here to get inside any subregs. */
34770 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
34772 /* If we're loading up a hard vector register for a call,
34773 it looks like (set (reg:V4SI 9 9) (...)). The df
34774 analysis creates two mentions for GPR9 and GPR10, both
34775 DImode. So relying on the mode from the mentions
34776 isn't sufficient to ensure we union the call into the
34777 web with the parameter setup code. */
34778 if (mode == DImode && GET_CODE (insn) == SET
34779 && VECTOR_MODE_P (GET_MODE (SET_DEST (insn))))
34780 mode = GET_MODE (SET_DEST (insn));
34782 if (VECTOR_MODE_P (mode))
34784 insn_entry[uid].is_relevant = 1;
34785 if (mode == TImode || mode == V1TImode)
34786 insn_entry[uid].is_128_int = 1;
34787 if (DF_REF_INSN_INFO (mention))
34788 insn_entry[uid].contains_subreg
34789 = !rtx_equal_p (DF_REF_REG (mention),
34790 DF_REF_REAL_REG (mention));
34791 /* REG_FUNCTION_VALUE_P is not valid for subregs. */
34792 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
34793 insn_entry[uid].is_live_out = 1;
34794 union_uses (insn_entry, insn, mention);
34798 if (insn_entry[uid].is_relevant)
34800 /* Determine if this is a load or store. */
34801 insn_entry[uid].is_load = insn_is_load_p (insn);
34802 insn_entry[uid].is_store = insn_is_store_p (insn);
34804 /* Determine if this is a doubleword swap. If not,
34805 determine whether it can legally be swapped. */
34806 if (insn_is_swap_p (insn))
34807 insn_entry[uid].is_swap = 1;
34808 else
34810 unsigned int special = SH_NONE;
34811 insn_entry[uid].is_swappable
34812 = insn_is_swappable_p (insn_entry, insn, &special);
34813 if (special != SH_NONE && insn_entry[uid].contains_subreg)
34814 insn_entry[uid].is_swappable = 0;
34815 else if (special != SH_NONE)
34816 insn_entry[uid].special_handling = special;
34817 else if (insn_entry[uid].contains_subreg)
34818 insn_entry[uid].special_handling = SH_SUBREG;
34824 if (dump_file)
34826 fprintf (dump_file, "\nSwap insn entry table when first built\n");
34827 dump_swap_insn_table (insn_entry);
34830 /* Record unoptimizable webs. */
34831 unsigned e = get_max_uid (), i;
34832 for (i = 0; i < e; ++i)
34834 if (!insn_entry[i].is_relevant)
34835 continue;
34837 swap_web_entry *root
34838 = (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
34840 if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
34841 || (insn_entry[i].contains_subreg
34842 && insn_entry[i].special_handling != SH_SUBREG)
34843 || insn_entry[i].is_128_int || insn_entry[i].is_call
34844 || !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
34845 root->web_not_optimizable = 1;
34847 /* If we have loads or stores that aren't permuting then the
34848 optimization isn't appropriate. */
34849 else if ((insn_entry[i].is_load || insn_entry[i].is_store)
34850 && !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
34851 root->web_not_optimizable = 1;
34853 /* If we have permuting loads or stores that are not accompanied
34854 by a register swap, the optimization isn't appropriate. */
34855 else if (insn_entry[i].is_load && insn_entry[i].is_swap)
34857 rtx insn = insn_entry[i].insn;
34858 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
34859 df_ref def;
34861 FOR_EACH_INSN_INFO_DEF (def, insn_info)
34863 struct df_link *link = DF_REF_CHAIN (def);
34865 if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
34867 root->web_not_optimizable = 1;
34868 break;
34872 else if (insn_entry[i].is_store && insn_entry[i].is_swap)
34874 rtx insn = insn_entry[i].insn;
34875 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
34876 df_ref use;
34878 FOR_EACH_INSN_INFO_USE (use, insn_info)
34880 struct df_link *link = DF_REF_CHAIN (use);
34882 if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
34884 root->web_not_optimizable = 1;
34885 break;
34891 if (dump_file)
34893 fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
34894 dump_swap_insn_table (insn_entry);
34897 /* For each load and store in an optimizable web (which implies
34898 the loads and stores are permuting), find the associated
34899 register swaps and mark them for removal. Due to various
34900 optimizations we may mark the same swap more than once. Also
34901 perform special handling for swappable insns that require it. */
34902 for (i = 0; i < e; ++i)
34903 if ((insn_entry[i].is_load || insn_entry[i].is_store)
34904 && insn_entry[i].is_swap)
34906 swap_web_entry* root_entry
34907 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
34908 if (!root_entry->web_not_optimizable)
34909 mark_swaps_for_removal (insn_entry, i);
34911 else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
34913 swap_web_entry* root_entry
34914 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
34915 if (!root_entry->web_not_optimizable)
34916 handle_special_swappables (insn_entry, i);
34919 /* Now delete the swaps marked for removal. */
34920 for (i = 0; i < e; ++i)
34921 if (insn_entry[i].will_delete)
34922 replace_swap_with_copy (insn_entry, i);
34924 /* Clean up. */
34925 free (insn_entry);
34926 return 0;
34929 const pass_data pass_data_analyze_swaps =
34931 RTL_PASS, /* type */
34932 "swaps", /* name */
34933 OPTGROUP_NONE, /* optinfo_flags */
34934 TV_NONE, /* tv_id */
34935 0, /* properties_required */
34936 0, /* properties_provided */
34937 0, /* properties_destroyed */
34938 0, /* todo_flags_start */
34939 TODO_df_finish, /* todo_flags_finish */
34942 class pass_analyze_swaps : public rtl_opt_pass
34944 public:
34945 pass_analyze_swaps(gcc::context *ctxt)
34946 : rtl_opt_pass(pass_data_analyze_swaps, ctxt)
34949 /* opt_pass methods: */
34950 virtual bool gate (function *)
34952 return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
34953 && rs6000_optimize_swaps);
34956 virtual unsigned int execute (function *fun)
34958 return rs6000_analyze_swaps (fun);
34961 }; // class pass_analyze_swaps
34963 rtl_opt_pass *
34964 make_pass_analyze_swaps (gcc::context *ctxt)
34966 return new pass_analyze_swaps (ctxt);
34969 #ifdef RS6000_GLIBC_ATOMIC_FENV
34970 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
34971 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
34972 #endif
34974 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
34976 static void
34977 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
34979 if (!TARGET_HARD_FLOAT || !TARGET_FPRS)
34981 #ifdef RS6000_GLIBC_ATOMIC_FENV
34982 if (atomic_hold_decl == NULL_TREE)
34984 atomic_hold_decl
34985 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
34986 get_identifier ("__atomic_feholdexcept"),
34987 build_function_type_list (void_type_node,
34988 double_ptr_type_node,
34989 NULL_TREE));
34990 TREE_PUBLIC (atomic_hold_decl) = 1;
34991 DECL_EXTERNAL (atomic_hold_decl) = 1;
34994 if (atomic_clear_decl == NULL_TREE)
34996 atomic_clear_decl
34997 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
34998 get_identifier ("__atomic_feclearexcept"),
34999 build_function_type_list (void_type_node,
35000 NULL_TREE));
35001 TREE_PUBLIC (atomic_clear_decl) = 1;
35002 DECL_EXTERNAL (atomic_clear_decl) = 1;
35005 tree const_double = build_qualified_type (double_type_node,
35006 TYPE_QUAL_CONST);
35007 tree const_double_ptr = build_pointer_type (const_double);
35008 if (atomic_update_decl == NULL_TREE)
35010 atomic_update_decl
35011 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
35012 get_identifier ("__atomic_feupdateenv"),
35013 build_function_type_list (void_type_node,
35014 const_double_ptr,
35015 NULL_TREE));
35016 TREE_PUBLIC (atomic_update_decl) = 1;
35017 DECL_EXTERNAL (atomic_update_decl) = 1;
35020 tree fenv_var = create_tmp_var (double_type_node);
35021 mark_addressable (fenv_var);
35022 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
35024 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
35025 *clear = build_call_expr (atomic_clear_decl, 0);
35026 *update = build_call_expr (atomic_update_decl, 1,
35027 fold_convert (const_double_ptr, fenv_addr));
35028 #endif
35029 return;
35032 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
35033 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
35034 tree call_mffs = build_call_expr (mffs, 0);
35036 /* Generates the equivalent of feholdexcept (&fenv_var)
35038 *fenv_var = __builtin_mffs ();
35039 double fenv_hold;
35040 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
35041 __builtin_mtfsf (0xff, fenv_hold); */
35043 /* Mask to clear everything except for the rounding modes and non-IEEE
35044 arithmetic flag. */
35045 const unsigned HOST_WIDE_INT hold_exception_mask =
35046 HOST_WIDE_INT_C (0xffffffff00000007);
35048 tree fenv_var = create_tmp_var (double_type_node);
35050 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
35052 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
35053 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
35054 build_int_cst (uint64_type_node,
35055 hold_exception_mask));
35057 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
35058 fenv_llu_and);
35060 tree hold_mtfsf = build_call_expr (mtfsf, 2,
35061 build_int_cst (unsigned_type_node, 0xff),
35062 fenv_hold_mtfsf);
35064 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
35066 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
35068 double fenv_clear = __builtin_mffs ();
35069 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
35070 __builtin_mtfsf (0xff, fenv_clear); */
35072 /* Mask to clear everything except for the rounding modes and non-IEEE
35073 arithmetic flag. */
35074 const unsigned HOST_WIDE_INT clear_exception_mask =
35075 HOST_WIDE_INT_C (0xffffffff00000000);
35077 tree fenv_clear = create_tmp_var (double_type_node);
35079 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
35081 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
35082 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
35083 fenv_clean_llu,
35084 build_int_cst (uint64_type_node,
35085 clear_exception_mask));
35087 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
35088 fenv_clear_llu_and);
35090 tree clear_mtfsf = build_call_expr (mtfsf, 2,
35091 build_int_cst (unsigned_type_node, 0xff),
35092 fenv_clear_mtfsf);
35094 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
35096 /* Generates the equivalent of feupdateenv (&fenv_var)
35098 double old_fenv = __builtin_mffs ();
35099 double fenv_update;
35100 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
35101 (*(uint64_t*)fenv_var 0x1ff80fff);
35102 __builtin_mtfsf (0xff, fenv_update); */
35104 const unsigned HOST_WIDE_INT update_exception_mask =
35105 HOST_WIDE_INT_C (0xffffffff1fffff00);
35106 const unsigned HOST_WIDE_INT new_exception_mask =
35107 HOST_WIDE_INT_C (0x1ff80fff);
35109 tree old_fenv = create_tmp_var (double_type_node);
35110 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
35112 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
35113 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
35114 build_int_cst (uint64_type_node,
35115 update_exception_mask));
35117 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
35118 build_int_cst (uint64_type_node,
35119 new_exception_mask));
35121 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
35122 old_llu_and, new_llu_and);
35124 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
35125 new_llu_mask);
35127 tree update_mtfsf = build_call_expr (mtfsf, 2,
35128 build_int_cst (unsigned_type_node, 0xff),
35129 fenv_update_mtfsf);
35131 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
35135 struct gcc_target targetm = TARGET_INITIALIZER;
35137 #include "gt-rs6000.h"