PR target/64226
[official-gcc.git] / gcc / config / rs6000 / rs6000.c
blobf3818e6595345e8946e15cebba6f9854247805cb
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2014 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "regs.h"
27 #include "hard-reg-set.h"
28 #include "insn-config.h"
29 #include "conditions.h"
30 #include "insn-attr.h"
31 #include "flags.h"
32 #include "recog.h"
33 #include "obstack.h"
34 #include "tree.h"
35 #include "stringpool.h"
36 #include "stor-layout.h"
37 #include "calls.h"
38 #include "print-tree.h"
39 #include "varasm.h"
40 #include "expr.h"
41 #include "insn-codes.h"
42 #include "optabs.h"
43 #include "except.h"
44 #include "hashtab.h"
45 #include "hash-set.h"
46 #include "vec.h"
47 #include "machmode.h"
48 #include "input.h"
49 #include "function.h"
50 #include "output.h"
51 #include "dbxout.h"
52 #include "predict.h"
53 #include "dominance.h"
54 #include "cfg.h"
55 #include "cfgrtl.h"
56 #include "cfganal.h"
57 #include "lcm.h"
58 #include "cfgbuild.h"
59 #include "cfgcleanup.h"
60 #include "basic-block.h"
61 #include "diagnostic-core.h"
62 #include "toplev.h"
63 #include "ggc.h"
64 #include "tm_p.h"
65 #include "target.h"
66 #include "target-def.h"
67 #include "common/common-target.h"
68 #include "langhooks.h"
69 #include "reload.h"
70 #include "cfgloop.h"
71 #include "sched-int.h"
72 #include "hash-table.h"
73 #include "tree-ssa-alias.h"
74 #include "internal-fn.h"
75 #include "gimple-fold.h"
76 #include "tree-eh.h"
77 #include "gimple-expr.h"
78 #include "is-a.h"
79 #include "gimple.h"
80 #include "gimplify.h"
81 #include "gimple-iterator.h"
82 #include "gimple-walk.h"
83 #include "intl.h"
84 #include "params.h"
85 #include "tm-constrs.h"
86 #include "ira.h"
87 #include "opts.h"
88 #include "tree-vectorizer.h"
89 #include "dumpfile.h"
90 #include "hash-map.h"
91 #include "plugin-api.h"
92 #include "ipa-ref.h"
93 #include "cgraph.h"
94 #include "target-globals.h"
95 #include "builtins.h"
96 #include "context.h"
97 #include "tree-pass.h"
98 #include "real.h"
99 #if TARGET_XCOFF
100 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
101 #endif
102 #if TARGET_MACHO
103 #include "gstab.h" /* for N_SLINE */
104 #endif
106 #ifndef TARGET_NO_PROTOTYPE
107 #define TARGET_NO_PROTOTYPE 0
108 #endif
110 #define min(A,B) ((A) < (B) ? (A) : (B))
111 #define max(A,B) ((A) > (B) ? (A) : (B))
113 /* Structure used to define the rs6000 stack */
114 typedef struct rs6000_stack {
115 int reload_completed; /* stack info won't change from here on */
116 int first_gp_reg_save; /* first callee saved GP register used */
117 int first_fp_reg_save; /* first callee saved FP register used */
118 int first_altivec_reg_save; /* first callee saved AltiVec register used */
119 int lr_save_p; /* true if the link reg needs to be saved */
120 int cr_save_p; /* true if the CR reg needs to be saved */
121 unsigned int vrsave_mask; /* mask of vec registers to save */
122 int push_p; /* true if we need to allocate stack space */
123 int calls_p; /* true if the function makes any calls */
124 int world_save_p; /* true if we're saving *everything*:
125 r13-r31, cr, f14-f31, vrsave, v20-v31 */
126 enum rs6000_abi abi; /* which ABI to use */
127 int gp_save_offset; /* offset to save GP regs from initial SP */
128 int fp_save_offset; /* offset to save FP regs from initial SP */
129 int altivec_save_offset; /* offset to save AltiVec regs from initial SP */
130 int lr_save_offset; /* offset to save LR from initial SP */
131 int cr_save_offset; /* offset to save CR from initial SP */
132 int vrsave_save_offset; /* offset to save VRSAVE from initial SP */
133 int spe_gp_save_offset; /* offset to save spe 64-bit gprs */
134 int varargs_save_offset; /* offset to save the varargs registers */
135 int ehrd_offset; /* offset to EH return data */
136 int ehcr_offset; /* offset to EH CR field data */
137 int reg_size; /* register size (4 or 8) */
138 HOST_WIDE_INT vars_size; /* variable save area size */
139 int parm_size; /* outgoing parameter size */
140 int save_size; /* save area size */
141 int fixed_size; /* fixed size of stack frame */
142 int gp_size; /* size of saved GP registers */
143 int fp_size; /* size of saved FP registers */
144 int altivec_size; /* size of saved AltiVec registers */
145 int cr_size; /* size to hold CR if not in save_size */
146 int vrsave_size; /* size to hold VRSAVE if not in save_size */
147 int altivec_padding_size; /* size of altivec alignment padding if
148 not in save_size */
149 int spe_gp_size; /* size of 64-bit GPR save size for SPE */
150 int spe_padding_size;
151 HOST_WIDE_INT total_size; /* total bytes allocated for stack */
152 int spe_64bit_regs_used;
153 int savres_strategy;
154 } rs6000_stack_t;
156 /* A C structure for machine-specific, per-function data.
157 This is added to the cfun structure. */
158 typedef struct GTY(()) machine_function
160 /* Whether the instruction chain has been scanned already. */
161 int insn_chain_scanned_p;
162 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
163 int ra_needs_full_frame;
164 /* Flags if __builtin_return_address (0) was used. */
165 int ra_need_lr;
166 /* Cache lr_save_p after expansion of builtin_eh_return. */
167 int lr_save_state;
168 /* Whether we need to save the TOC to the reserved stack location in the
169 function prologue. */
170 bool save_toc_in_prologue;
171 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
172 varargs save area. */
173 HOST_WIDE_INT varargs_save_offset;
174 /* Temporary stack slot to use for SDmode copies. This slot is
175 64-bits wide and is allocated early enough so that the offset
176 does not overflow the 16-bit load/store offset field. */
177 rtx sdmode_stack_slot;
178 /* Flag if r2 setup is needed with ELFv2 ABI. */
179 bool r2_setup_needed;
180 } machine_function;
182 /* Support targetm.vectorize.builtin_mask_for_load. */
183 static GTY(()) tree altivec_builtin_mask_for_load;
185 /* Set to nonzero once AIX common-mode calls have been defined. */
186 static GTY(()) int common_mode_defined;
188 /* Label number of label created for -mrelocatable, to call to so we can
189 get the address of the GOT section */
190 static int rs6000_pic_labelno;
192 #ifdef USING_ELFOS_H
193 /* Counter for labels which are to be placed in .fixup. */
194 int fixuplabelno = 0;
195 #endif
197 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
198 int dot_symbols;
200 /* Specify the machine mode that pointers have. After generation of rtl, the
201 compiler makes no further distinction between pointers and any other objects
202 of this machine mode. The type is unsigned since not all things that
203 include rs6000.h also include machmode.h. */
204 unsigned rs6000_pmode;
206 /* Width in bits of a pointer. */
207 unsigned rs6000_pointer_size;
209 #ifdef HAVE_AS_GNU_ATTRIBUTE
210 /* Flag whether floating point values have been passed/returned. */
211 static bool rs6000_passes_float;
212 /* Flag whether vector values have been passed/returned. */
213 static bool rs6000_passes_vector;
214 /* Flag whether small (<= 8 byte) structures have been returned. */
215 static bool rs6000_returns_struct;
216 #endif
218 /* Value is TRUE if register/mode pair is acceptable. */
219 bool rs6000_hard_regno_mode_ok_p[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
221 /* Maximum number of registers needed for a given register class and mode. */
222 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
224 /* How many registers are needed for a given register and mode. */
225 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
227 /* Map register number to register class. */
228 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
230 static int dbg_cost_ctrl;
232 /* Built in types. */
233 tree rs6000_builtin_types[RS6000_BTI_MAX];
234 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
236 /* Flag to say the TOC is initialized */
237 int toc_initialized;
238 char toc_label_name[10];
240 /* Cached value of rs6000_variable_issue. This is cached in
241 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
242 static short cached_can_issue_more;
244 static GTY(()) section *read_only_data_section;
245 static GTY(()) section *private_data_section;
246 static GTY(()) section *tls_data_section;
247 static GTY(()) section *tls_private_data_section;
248 static GTY(()) section *read_only_private_data_section;
249 static GTY(()) section *sdata2_section;
250 static GTY(()) section *toc_section;
252 struct builtin_description
254 const HOST_WIDE_INT mask;
255 const enum insn_code icode;
256 const char *const name;
257 const enum rs6000_builtins code;
260 /* Describe the vector unit used for modes. */
261 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
262 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
264 /* Register classes for various constraints that are based on the target
265 switches. */
266 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
268 /* Describe the alignment of a vector. */
269 int rs6000_vector_align[NUM_MACHINE_MODES];
271 /* Map selected modes to types for builtins. */
272 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
274 /* What modes to automatically generate reciprocal divide estimate (fre) and
275 reciprocal sqrt (frsqrte) for. */
276 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
278 /* Masks to determine which reciprocal esitmate instructions to generate
279 automatically. */
280 enum rs6000_recip_mask {
281 RECIP_SF_DIV = 0x001, /* Use divide estimate */
282 RECIP_DF_DIV = 0x002,
283 RECIP_V4SF_DIV = 0x004,
284 RECIP_V2DF_DIV = 0x008,
286 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
287 RECIP_DF_RSQRT = 0x020,
288 RECIP_V4SF_RSQRT = 0x040,
289 RECIP_V2DF_RSQRT = 0x080,
291 /* Various combination of flags for -mrecip=xxx. */
292 RECIP_NONE = 0,
293 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
294 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
295 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
297 RECIP_HIGH_PRECISION = RECIP_ALL,
299 /* On low precision machines like the power5, don't enable double precision
300 reciprocal square root estimate, since it isn't accurate enough. */
301 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
304 /* -mrecip options. */
305 static struct
307 const char *string; /* option name */
308 unsigned int mask; /* mask bits to set */
309 } recip_options[] = {
310 { "all", RECIP_ALL },
311 { "none", RECIP_NONE },
312 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
313 | RECIP_V2DF_DIV) },
314 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
315 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
316 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
317 | RECIP_V2DF_RSQRT) },
318 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
319 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
322 /* Pointer to function (in rs6000-c.c) that can define or undefine target
323 macros that have changed. Languages that don't support the preprocessor
324 don't link in rs6000-c.c, so we can't call it directly. */
325 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
327 /* Simplfy register classes into simpler classifications. We assume
328 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
329 check for standard register classes (gpr/floating/altivec/vsx) and
330 floating/vector classes (float/altivec/vsx). */
332 enum rs6000_reg_type {
333 NO_REG_TYPE,
334 PSEUDO_REG_TYPE,
335 GPR_REG_TYPE,
336 VSX_REG_TYPE,
337 ALTIVEC_REG_TYPE,
338 FPR_REG_TYPE,
339 SPR_REG_TYPE,
340 CR_REG_TYPE,
341 SPE_ACC_TYPE,
342 SPEFSCR_REG_TYPE
345 /* Map register class to register type. */
346 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
348 /* First/last register type for the 'normal' register types (i.e. general
349 purpose, floating point, altivec, and VSX registers). */
350 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
352 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
355 /* Register classes we care about in secondary reload or go if legitimate
356 address. We only need to worry about GPR, FPR, and Altivec registers here,
357 along an ANY field that is the OR of the 3 register classes. */
359 enum rs6000_reload_reg_type {
360 RELOAD_REG_GPR, /* General purpose registers. */
361 RELOAD_REG_FPR, /* Traditional floating point regs. */
362 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
363 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
364 N_RELOAD_REG
367 /* For setting up register classes, loop through the 3 register classes mapping
368 into real registers, and skip the ANY class, which is just an OR of the
369 bits. */
370 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
371 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
373 /* Map reload register type to a register in the register class. */
374 struct reload_reg_map_type {
375 const char *name; /* Register class name. */
376 int reg; /* Register in the register class. */
379 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
380 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
381 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
382 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
383 { "Any", -1 }, /* RELOAD_REG_ANY. */
386 /* Mask bits for each register class, indexed per mode. Historically the
387 compiler has been more restrictive which types can do PRE_MODIFY instead of
388 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
389 typedef unsigned char addr_mask_type;
391 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
392 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
393 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
394 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
395 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
396 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
397 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
399 /* Register type masks based on the type, of valid addressing modes. */
400 struct rs6000_reg_addr {
401 enum insn_code reload_load; /* INSN to reload for loading. */
402 enum insn_code reload_store; /* INSN to reload for storing. */
403 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
404 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
405 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
406 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
407 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
410 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
412 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
413 static inline bool
414 mode_supports_pre_incdec_p (machine_mode mode)
416 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
417 != 0);
420 /* Helper function to say whether a mode supports PRE_MODIFY. */
421 static inline bool
422 mode_supports_pre_modify_p (machine_mode mode)
424 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
425 != 0);
429 /* Target cpu costs. */
431 struct processor_costs {
432 const int mulsi; /* cost of SImode multiplication. */
433 const int mulsi_const; /* cost of SImode multiplication by constant. */
434 const int mulsi_const9; /* cost of SImode mult by short constant. */
435 const int muldi; /* cost of DImode multiplication. */
436 const int divsi; /* cost of SImode division. */
437 const int divdi; /* cost of DImode division. */
438 const int fp; /* cost of simple SFmode and DFmode insns. */
439 const int dmul; /* cost of DFmode multiplication (and fmadd). */
440 const int sdiv; /* cost of SFmode division (fdivs). */
441 const int ddiv; /* cost of DFmode division (fdiv). */
442 const int cache_line_size; /* cache line size in bytes. */
443 const int l1_cache_size; /* size of l1 cache, in kilobytes. */
444 const int l2_cache_size; /* size of l2 cache, in kilobytes. */
445 const int simultaneous_prefetches; /* number of parallel prefetch
446 operations. */
449 const struct processor_costs *rs6000_cost;
451 /* Processor costs (relative to an add) */
453 /* Instruction size costs on 32bit processors. */
454 static const
455 struct processor_costs size32_cost = {
456 COSTS_N_INSNS (1), /* mulsi */
457 COSTS_N_INSNS (1), /* mulsi_const */
458 COSTS_N_INSNS (1), /* mulsi_const9 */
459 COSTS_N_INSNS (1), /* muldi */
460 COSTS_N_INSNS (1), /* divsi */
461 COSTS_N_INSNS (1), /* divdi */
462 COSTS_N_INSNS (1), /* fp */
463 COSTS_N_INSNS (1), /* dmul */
464 COSTS_N_INSNS (1), /* sdiv */
465 COSTS_N_INSNS (1), /* ddiv */
472 /* Instruction size costs on 64bit processors. */
473 static const
474 struct processor_costs size64_cost = {
475 COSTS_N_INSNS (1), /* mulsi */
476 COSTS_N_INSNS (1), /* mulsi_const */
477 COSTS_N_INSNS (1), /* mulsi_const9 */
478 COSTS_N_INSNS (1), /* muldi */
479 COSTS_N_INSNS (1), /* divsi */
480 COSTS_N_INSNS (1), /* divdi */
481 COSTS_N_INSNS (1), /* fp */
482 COSTS_N_INSNS (1), /* dmul */
483 COSTS_N_INSNS (1), /* sdiv */
484 COSTS_N_INSNS (1), /* ddiv */
485 128,
491 /* Instruction costs on RS64A processors. */
492 static const
493 struct processor_costs rs64a_cost = {
494 COSTS_N_INSNS (20), /* mulsi */
495 COSTS_N_INSNS (12), /* mulsi_const */
496 COSTS_N_INSNS (8), /* mulsi_const9 */
497 COSTS_N_INSNS (34), /* muldi */
498 COSTS_N_INSNS (65), /* divsi */
499 COSTS_N_INSNS (67), /* divdi */
500 COSTS_N_INSNS (4), /* fp */
501 COSTS_N_INSNS (4), /* dmul */
502 COSTS_N_INSNS (31), /* sdiv */
503 COSTS_N_INSNS (31), /* ddiv */
504 128, /* cache line size */
505 128, /* l1 cache */
506 2048, /* l2 cache */
507 1, /* streams */
510 /* Instruction costs on MPCCORE processors. */
511 static const
512 struct processor_costs mpccore_cost = {
513 COSTS_N_INSNS (2), /* mulsi */
514 COSTS_N_INSNS (2), /* mulsi_const */
515 COSTS_N_INSNS (2), /* mulsi_const9 */
516 COSTS_N_INSNS (2), /* muldi */
517 COSTS_N_INSNS (6), /* divsi */
518 COSTS_N_INSNS (6), /* divdi */
519 COSTS_N_INSNS (4), /* fp */
520 COSTS_N_INSNS (5), /* dmul */
521 COSTS_N_INSNS (10), /* sdiv */
522 COSTS_N_INSNS (17), /* ddiv */
523 32, /* cache line size */
524 4, /* l1 cache */
525 16, /* l2 cache */
526 1, /* streams */
529 /* Instruction costs on PPC403 processors. */
530 static const
531 struct processor_costs ppc403_cost = {
532 COSTS_N_INSNS (4), /* mulsi */
533 COSTS_N_INSNS (4), /* mulsi_const */
534 COSTS_N_INSNS (4), /* mulsi_const9 */
535 COSTS_N_INSNS (4), /* muldi */
536 COSTS_N_INSNS (33), /* divsi */
537 COSTS_N_INSNS (33), /* divdi */
538 COSTS_N_INSNS (11), /* fp */
539 COSTS_N_INSNS (11), /* dmul */
540 COSTS_N_INSNS (11), /* sdiv */
541 COSTS_N_INSNS (11), /* ddiv */
542 32, /* cache line size */
543 4, /* l1 cache */
544 16, /* l2 cache */
545 1, /* streams */
548 /* Instruction costs on PPC405 processors. */
549 static const
550 struct processor_costs ppc405_cost = {
551 COSTS_N_INSNS (5), /* mulsi */
552 COSTS_N_INSNS (4), /* mulsi_const */
553 COSTS_N_INSNS (3), /* mulsi_const9 */
554 COSTS_N_INSNS (5), /* muldi */
555 COSTS_N_INSNS (35), /* divsi */
556 COSTS_N_INSNS (35), /* divdi */
557 COSTS_N_INSNS (11), /* fp */
558 COSTS_N_INSNS (11), /* dmul */
559 COSTS_N_INSNS (11), /* sdiv */
560 COSTS_N_INSNS (11), /* ddiv */
561 32, /* cache line size */
562 16, /* l1 cache */
563 128, /* l2 cache */
564 1, /* streams */
567 /* Instruction costs on PPC440 processors. */
568 static const
569 struct processor_costs ppc440_cost = {
570 COSTS_N_INSNS (3), /* mulsi */
571 COSTS_N_INSNS (2), /* mulsi_const */
572 COSTS_N_INSNS (2), /* mulsi_const9 */
573 COSTS_N_INSNS (3), /* muldi */
574 COSTS_N_INSNS (34), /* divsi */
575 COSTS_N_INSNS (34), /* divdi */
576 COSTS_N_INSNS (5), /* fp */
577 COSTS_N_INSNS (5), /* dmul */
578 COSTS_N_INSNS (19), /* sdiv */
579 COSTS_N_INSNS (33), /* ddiv */
580 32, /* cache line size */
581 32, /* l1 cache */
582 256, /* l2 cache */
583 1, /* streams */
586 /* Instruction costs on PPC476 processors. */
587 static const
588 struct processor_costs ppc476_cost = {
589 COSTS_N_INSNS (4), /* mulsi */
590 COSTS_N_INSNS (4), /* mulsi_const */
591 COSTS_N_INSNS (4), /* mulsi_const9 */
592 COSTS_N_INSNS (4), /* muldi */
593 COSTS_N_INSNS (11), /* divsi */
594 COSTS_N_INSNS (11), /* divdi */
595 COSTS_N_INSNS (6), /* fp */
596 COSTS_N_INSNS (6), /* dmul */
597 COSTS_N_INSNS (19), /* sdiv */
598 COSTS_N_INSNS (33), /* ddiv */
599 32, /* l1 cache line size */
600 32, /* l1 cache */
601 512, /* l2 cache */
602 1, /* streams */
605 /* Instruction costs on PPC601 processors. */
606 static const
607 struct processor_costs ppc601_cost = {
608 COSTS_N_INSNS (5), /* mulsi */
609 COSTS_N_INSNS (5), /* mulsi_const */
610 COSTS_N_INSNS (5), /* mulsi_const9 */
611 COSTS_N_INSNS (5), /* muldi */
612 COSTS_N_INSNS (36), /* divsi */
613 COSTS_N_INSNS (36), /* divdi */
614 COSTS_N_INSNS (4), /* fp */
615 COSTS_N_INSNS (5), /* dmul */
616 COSTS_N_INSNS (17), /* sdiv */
617 COSTS_N_INSNS (31), /* ddiv */
618 32, /* cache line size */
619 32, /* l1 cache */
620 256, /* l2 cache */
621 1, /* streams */
624 /* Instruction costs on PPC603 processors. */
625 static const
626 struct processor_costs ppc603_cost = {
627 COSTS_N_INSNS (5), /* mulsi */
628 COSTS_N_INSNS (3), /* mulsi_const */
629 COSTS_N_INSNS (2), /* mulsi_const9 */
630 COSTS_N_INSNS (5), /* muldi */
631 COSTS_N_INSNS (37), /* divsi */
632 COSTS_N_INSNS (37), /* divdi */
633 COSTS_N_INSNS (3), /* fp */
634 COSTS_N_INSNS (4), /* dmul */
635 COSTS_N_INSNS (18), /* sdiv */
636 COSTS_N_INSNS (33), /* ddiv */
637 32, /* cache line size */
638 8, /* l1 cache */
639 64, /* l2 cache */
640 1, /* streams */
643 /* Instruction costs on PPC604 processors. */
644 static const
645 struct processor_costs ppc604_cost = {
646 COSTS_N_INSNS (4), /* mulsi */
647 COSTS_N_INSNS (4), /* mulsi_const */
648 COSTS_N_INSNS (4), /* mulsi_const9 */
649 COSTS_N_INSNS (4), /* muldi */
650 COSTS_N_INSNS (20), /* divsi */
651 COSTS_N_INSNS (20), /* divdi */
652 COSTS_N_INSNS (3), /* fp */
653 COSTS_N_INSNS (3), /* dmul */
654 COSTS_N_INSNS (18), /* sdiv */
655 COSTS_N_INSNS (32), /* ddiv */
656 32, /* cache line size */
657 16, /* l1 cache */
658 512, /* l2 cache */
659 1, /* streams */
662 /* Instruction costs on PPC604e processors. */
663 static const
664 struct processor_costs ppc604e_cost = {
665 COSTS_N_INSNS (2), /* mulsi */
666 COSTS_N_INSNS (2), /* mulsi_const */
667 COSTS_N_INSNS (2), /* mulsi_const9 */
668 COSTS_N_INSNS (2), /* muldi */
669 COSTS_N_INSNS (20), /* divsi */
670 COSTS_N_INSNS (20), /* divdi */
671 COSTS_N_INSNS (3), /* fp */
672 COSTS_N_INSNS (3), /* dmul */
673 COSTS_N_INSNS (18), /* sdiv */
674 COSTS_N_INSNS (32), /* ddiv */
675 32, /* cache line size */
676 32, /* l1 cache */
677 1024, /* l2 cache */
678 1, /* streams */
681 /* Instruction costs on PPC620 processors. */
682 static const
683 struct processor_costs ppc620_cost = {
684 COSTS_N_INSNS (5), /* mulsi */
685 COSTS_N_INSNS (4), /* mulsi_const */
686 COSTS_N_INSNS (3), /* mulsi_const9 */
687 COSTS_N_INSNS (7), /* muldi */
688 COSTS_N_INSNS (21), /* divsi */
689 COSTS_N_INSNS (37), /* divdi */
690 COSTS_N_INSNS (3), /* fp */
691 COSTS_N_INSNS (3), /* dmul */
692 COSTS_N_INSNS (18), /* sdiv */
693 COSTS_N_INSNS (32), /* ddiv */
694 128, /* cache line size */
695 32, /* l1 cache */
696 1024, /* l2 cache */
697 1, /* streams */
700 /* Instruction costs on PPC630 processors. */
701 static const
702 struct processor_costs ppc630_cost = {
703 COSTS_N_INSNS (5), /* mulsi */
704 COSTS_N_INSNS (4), /* mulsi_const */
705 COSTS_N_INSNS (3), /* mulsi_const9 */
706 COSTS_N_INSNS (7), /* muldi */
707 COSTS_N_INSNS (21), /* divsi */
708 COSTS_N_INSNS (37), /* divdi */
709 COSTS_N_INSNS (3), /* fp */
710 COSTS_N_INSNS (3), /* dmul */
711 COSTS_N_INSNS (17), /* sdiv */
712 COSTS_N_INSNS (21), /* ddiv */
713 128, /* cache line size */
714 64, /* l1 cache */
715 1024, /* l2 cache */
716 1, /* streams */
719 /* Instruction costs on Cell processor. */
720 /* COSTS_N_INSNS (1) ~ one add. */
721 static const
722 struct processor_costs ppccell_cost = {
723 COSTS_N_INSNS (9/2)+2, /* mulsi */
724 COSTS_N_INSNS (6/2), /* mulsi_const */
725 COSTS_N_INSNS (6/2), /* mulsi_const9 */
726 COSTS_N_INSNS (15/2)+2, /* muldi */
727 COSTS_N_INSNS (38/2), /* divsi */
728 COSTS_N_INSNS (70/2), /* divdi */
729 COSTS_N_INSNS (10/2), /* fp */
730 COSTS_N_INSNS (10/2), /* dmul */
731 COSTS_N_INSNS (74/2), /* sdiv */
732 COSTS_N_INSNS (74/2), /* ddiv */
733 128, /* cache line size */
734 32, /* l1 cache */
735 512, /* l2 cache */
736 6, /* streams */
739 /* Instruction costs on PPC750 and PPC7400 processors. */
740 static const
741 struct processor_costs ppc750_cost = {
742 COSTS_N_INSNS (5), /* mulsi */
743 COSTS_N_INSNS (3), /* mulsi_const */
744 COSTS_N_INSNS (2), /* mulsi_const9 */
745 COSTS_N_INSNS (5), /* muldi */
746 COSTS_N_INSNS (17), /* divsi */
747 COSTS_N_INSNS (17), /* divdi */
748 COSTS_N_INSNS (3), /* fp */
749 COSTS_N_INSNS (3), /* dmul */
750 COSTS_N_INSNS (17), /* sdiv */
751 COSTS_N_INSNS (31), /* ddiv */
752 32, /* cache line size */
753 32, /* l1 cache */
754 512, /* l2 cache */
755 1, /* streams */
758 /* Instruction costs on PPC7450 processors. */
759 static const
760 struct processor_costs ppc7450_cost = {
761 COSTS_N_INSNS (4), /* mulsi */
762 COSTS_N_INSNS (3), /* mulsi_const */
763 COSTS_N_INSNS (3), /* mulsi_const9 */
764 COSTS_N_INSNS (4), /* muldi */
765 COSTS_N_INSNS (23), /* divsi */
766 COSTS_N_INSNS (23), /* divdi */
767 COSTS_N_INSNS (5), /* fp */
768 COSTS_N_INSNS (5), /* dmul */
769 COSTS_N_INSNS (21), /* sdiv */
770 COSTS_N_INSNS (35), /* ddiv */
771 32, /* cache line size */
772 32, /* l1 cache */
773 1024, /* l2 cache */
774 1, /* streams */
777 /* Instruction costs on PPC8540 processors. */
778 static const
779 struct processor_costs ppc8540_cost = {
780 COSTS_N_INSNS (4), /* mulsi */
781 COSTS_N_INSNS (4), /* mulsi_const */
782 COSTS_N_INSNS (4), /* mulsi_const9 */
783 COSTS_N_INSNS (4), /* muldi */
784 COSTS_N_INSNS (19), /* divsi */
785 COSTS_N_INSNS (19), /* divdi */
786 COSTS_N_INSNS (4), /* fp */
787 COSTS_N_INSNS (4), /* dmul */
788 COSTS_N_INSNS (29), /* sdiv */
789 COSTS_N_INSNS (29), /* ddiv */
790 32, /* cache line size */
791 32, /* l1 cache */
792 256, /* l2 cache */
793 1, /* prefetch streams /*/
796 /* Instruction costs on E300C2 and E300C3 cores. */
797 static const
798 struct processor_costs ppce300c2c3_cost = {
799 COSTS_N_INSNS (4), /* mulsi */
800 COSTS_N_INSNS (4), /* mulsi_const */
801 COSTS_N_INSNS (4), /* mulsi_const9 */
802 COSTS_N_INSNS (4), /* muldi */
803 COSTS_N_INSNS (19), /* divsi */
804 COSTS_N_INSNS (19), /* divdi */
805 COSTS_N_INSNS (3), /* fp */
806 COSTS_N_INSNS (4), /* dmul */
807 COSTS_N_INSNS (18), /* sdiv */
808 COSTS_N_INSNS (33), /* ddiv */
810 16, /* l1 cache */
811 16, /* l2 cache */
812 1, /* prefetch streams /*/
815 /* Instruction costs on PPCE500MC processors. */
816 static const
817 struct processor_costs ppce500mc_cost = {
818 COSTS_N_INSNS (4), /* mulsi */
819 COSTS_N_INSNS (4), /* mulsi_const */
820 COSTS_N_INSNS (4), /* mulsi_const9 */
821 COSTS_N_INSNS (4), /* muldi */
822 COSTS_N_INSNS (14), /* divsi */
823 COSTS_N_INSNS (14), /* divdi */
824 COSTS_N_INSNS (8), /* fp */
825 COSTS_N_INSNS (10), /* dmul */
826 COSTS_N_INSNS (36), /* sdiv */
827 COSTS_N_INSNS (66), /* ddiv */
828 64, /* cache line size */
829 32, /* l1 cache */
830 128, /* l2 cache */
831 1, /* prefetch streams /*/
834 /* Instruction costs on PPCE500MC64 processors. */
835 static const
836 struct processor_costs ppce500mc64_cost = {
837 COSTS_N_INSNS (4), /* mulsi */
838 COSTS_N_INSNS (4), /* mulsi_const */
839 COSTS_N_INSNS (4), /* mulsi_const9 */
840 COSTS_N_INSNS (4), /* muldi */
841 COSTS_N_INSNS (14), /* divsi */
842 COSTS_N_INSNS (14), /* divdi */
843 COSTS_N_INSNS (4), /* fp */
844 COSTS_N_INSNS (10), /* dmul */
845 COSTS_N_INSNS (36), /* sdiv */
846 COSTS_N_INSNS (66), /* ddiv */
847 64, /* cache line size */
848 32, /* l1 cache */
849 128, /* l2 cache */
850 1, /* prefetch streams /*/
853 /* Instruction costs on PPCE5500 processors. */
854 static const
855 struct processor_costs ppce5500_cost = {
856 COSTS_N_INSNS (5), /* mulsi */
857 COSTS_N_INSNS (5), /* mulsi_const */
858 COSTS_N_INSNS (4), /* mulsi_const9 */
859 COSTS_N_INSNS (5), /* muldi */
860 COSTS_N_INSNS (14), /* divsi */
861 COSTS_N_INSNS (14), /* divdi */
862 COSTS_N_INSNS (7), /* fp */
863 COSTS_N_INSNS (10), /* dmul */
864 COSTS_N_INSNS (36), /* sdiv */
865 COSTS_N_INSNS (66), /* ddiv */
866 64, /* cache line size */
867 32, /* l1 cache */
868 128, /* l2 cache */
869 1, /* prefetch streams /*/
872 /* Instruction costs on PPCE6500 processors. */
873 static const
874 struct processor_costs ppce6500_cost = {
875 COSTS_N_INSNS (5), /* mulsi */
876 COSTS_N_INSNS (5), /* mulsi_const */
877 COSTS_N_INSNS (4), /* mulsi_const9 */
878 COSTS_N_INSNS (5), /* muldi */
879 COSTS_N_INSNS (14), /* divsi */
880 COSTS_N_INSNS (14), /* divdi */
881 COSTS_N_INSNS (7), /* fp */
882 COSTS_N_INSNS (10), /* dmul */
883 COSTS_N_INSNS (36), /* sdiv */
884 COSTS_N_INSNS (66), /* ddiv */
885 64, /* cache line size */
886 32, /* l1 cache */
887 128, /* l2 cache */
888 1, /* prefetch streams /*/
891 /* Instruction costs on AppliedMicro Titan processors. */
892 static const
893 struct processor_costs titan_cost = {
894 COSTS_N_INSNS (5), /* mulsi */
895 COSTS_N_INSNS (5), /* mulsi_const */
896 COSTS_N_INSNS (5), /* mulsi_const9 */
897 COSTS_N_INSNS (5), /* muldi */
898 COSTS_N_INSNS (18), /* divsi */
899 COSTS_N_INSNS (18), /* divdi */
900 COSTS_N_INSNS (10), /* fp */
901 COSTS_N_INSNS (10), /* dmul */
902 COSTS_N_INSNS (46), /* sdiv */
903 COSTS_N_INSNS (72), /* ddiv */
904 32, /* cache line size */
905 32, /* l1 cache */
906 512, /* l2 cache */
907 1, /* prefetch streams /*/
910 /* Instruction costs on POWER4 and POWER5 processors. */
911 static const
912 struct processor_costs power4_cost = {
913 COSTS_N_INSNS (3), /* mulsi */
914 COSTS_N_INSNS (2), /* mulsi_const */
915 COSTS_N_INSNS (2), /* mulsi_const9 */
916 COSTS_N_INSNS (4), /* muldi */
917 COSTS_N_INSNS (18), /* divsi */
918 COSTS_N_INSNS (34), /* divdi */
919 COSTS_N_INSNS (3), /* fp */
920 COSTS_N_INSNS (3), /* dmul */
921 COSTS_N_INSNS (17), /* sdiv */
922 COSTS_N_INSNS (17), /* ddiv */
923 128, /* cache line size */
924 32, /* l1 cache */
925 1024, /* l2 cache */
926 8, /* prefetch streams /*/
929 /* Instruction costs on POWER6 processors. */
930 static const
931 struct processor_costs power6_cost = {
932 COSTS_N_INSNS (8), /* mulsi */
933 COSTS_N_INSNS (8), /* mulsi_const */
934 COSTS_N_INSNS (8), /* mulsi_const9 */
935 COSTS_N_INSNS (8), /* muldi */
936 COSTS_N_INSNS (22), /* divsi */
937 COSTS_N_INSNS (28), /* divdi */
938 COSTS_N_INSNS (3), /* fp */
939 COSTS_N_INSNS (3), /* dmul */
940 COSTS_N_INSNS (13), /* sdiv */
941 COSTS_N_INSNS (16), /* ddiv */
942 128, /* cache line size */
943 64, /* l1 cache */
944 2048, /* l2 cache */
945 16, /* prefetch streams */
948 /* Instruction costs on POWER7 processors. */
949 static const
950 struct processor_costs power7_cost = {
951 COSTS_N_INSNS (2), /* mulsi */
952 COSTS_N_INSNS (2), /* mulsi_const */
953 COSTS_N_INSNS (2), /* mulsi_const9 */
954 COSTS_N_INSNS (2), /* muldi */
955 COSTS_N_INSNS (18), /* divsi */
956 COSTS_N_INSNS (34), /* divdi */
957 COSTS_N_INSNS (3), /* fp */
958 COSTS_N_INSNS (3), /* dmul */
959 COSTS_N_INSNS (13), /* sdiv */
960 COSTS_N_INSNS (16), /* ddiv */
961 128, /* cache line size */
962 32, /* l1 cache */
963 256, /* l2 cache */
964 12, /* prefetch streams */
967 /* Instruction costs on POWER8 processors. */
968 static const
969 struct processor_costs power8_cost = {
970 COSTS_N_INSNS (3), /* mulsi */
971 COSTS_N_INSNS (3), /* mulsi_const */
972 COSTS_N_INSNS (3), /* mulsi_const9 */
973 COSTS_N_INSNS (3), /* muldi */
974 COSTS_N_INSNS (19), /* divsi */
975 COSTS_N_INSNS (35), /* divdi */
976 COSTS_N_INSNS (3), /* fp */
977 COSTS_N_INSNS (3), /* dmul */
978 COSTS_N_INSNS (14), /* sdiv */
979 COSTS_N_INSNS (17), /* ddiv */
980 128, /* cache line size */
981 32, /* l1 cache */
982 256, /* l2 cache */
983 12, /* prefetch streams */
986 /* Instruction costs on POWER A2 processors. */
987 static const
988 struct processor_costs ppca2_cost = {
989 COSTS_N_INSNS (16), /* mulsi */
990 COSTS_N_INSNS (16), /* mulsi_const */
991 COSTS_N_INSNS (16), /* mulsi_const9 */
992 COSTS_N_INSNS (16), /* muldi */
993 COSTS_N_INSNS (22), /* divsi */
994 COSTS_N_INSNS (28), /* divdi */
995 COSTS_N_INSNS (3), /* fp */
996 COSTS_N_INSNS (3), /* dmul */
997 COSTS_N_INSNS (59), /* sdiv */
998 COSTS_N_INSNS (72), /* ddiv */
1000 16, /* l1 cache */
1001 2048, /* l2 cache */
1002 16, /* prefetch streams */
1006 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1007 #undef RS6000_BUILTIN_1
1008 #undef RS6000_BUILTIN_2
1009 #undef RS6000_BUILTIN_3
1010 #undef RS6000_BUILTIN_A
1011 #undef RS6000_BUILTIN_D
1012 #undef RS6000_BUILTIN_E
1013 #undef RS6000_BUILTIN_H
1014 #undef RS6000_BUILTIN_P
1015 #undef RS6000_BUILTIN_Q
1016 #undef RS6000_BUILTIN_S
1017 #undef RS6000_BUILTIN_X
1019 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1020 { NAME, ICODE, MASK, ATTR },
1022 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1023 { NAME, ICODE, MASK, ATTR },
1025 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1026 { NAME, ICODE, MASK, ATTR },
1028 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1029 { NAME, ICODE, MASK, ATTR },
1031 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1032 { NAME, ICODE, MASK, ATTR },
1034 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
1035 { NAME, ICODE, MASK, ATTR },
1037 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1038 { NAME, ICODE, MASK, ATTR },
1040 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1041 { NAME, ICODE, MASK, ATTR },
1043 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
1044 { NAME, ICODE, MASK, ATTR },
1046 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
1047 { NAME, ICODE, MASK, ATTR },
1049 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1050 { NAME, ICODE, MASK, ATTR },
1052 struct rs6000_builtin_info_type {
1053 const char *name;
1054 const enum insn_code icode;
1055 const HOST_WIDE_INT mask;
1056 const unsigned attr;
1059 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1061 #include "rs6000-builtin.def"
1064 #undef RS6000_BUILTIN_1
1065 #undef RS6000_BUILTIN_2
1066 #undef RS6000_BUILTIN_3
1067 #undef RS6000_BUILTIN_A
1068 #undef RS6000_BUILTIN_D
1069 #undef RS6000_BUILTIN_E
1070 #undef RS6000_BUILTIN_H
1071 #undef RS6000_BUILTIN_P
1072 #undef RS6000_BUILTIN_Q
1073 #undef RS6000_BUILTIN_S
1074 #undef RS6000_BUILTIN_X
1076 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1077 static tree (*rs6000_veclib_handler) (tree, tree, tree);
1080 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1081 static bool spe_func_has_64bit_regs_p (void);
1082 static struct machine_function * rs6000_init_machine_status (void);
1083 static int rs6000_ra_ever_killed (void);
1084 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1085 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1086 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1087 static tree rs6000_builtin_vectorized_libmass (tree, tree, tree);
1088 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1089 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1090 static bool rs6000_debug_rtx_costs (rtx, int, int, int, int *, bool);
1091 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1092 bool);
1093 static int rs6000_debug_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
1094 static bool is_microcoded_insn (rtx_insn *);
1095 static bool is_nonpipeline_insn (rtx_insn *);
1096 static bool is_cracked_insn (rtx_insn *);
1097 static bool is_load_insn (rtx, rtx *);
1098 static bool is_store_insn (rtx, rtx *);
1099 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1100 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1101 static bool insn_must_be_first_in_group (rtx_insn *);
1102 static bool insn_must_be_last_in_group (rtx_insn *);
1103 static void altivec_init_builtins (void);
1104 static tree builtin_function_type (machine_mode, machine_mode,
1105 machine_mode, machine_mode,
1106 enum rs6000_builtins, const char *name);
1107 static void rs6000_common_init_builtins (void);
1108 static void paired_init_builtins (void);
1109 static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx);
1110 static void spe_init_builtins (void);
1111 static void htm_init_builtins (void);
1112 static rtx spe_expand_predicate_builtin (enum insn_code, tree, rtx);
1113 static rtx spe_expand_evsel_builtin (enum insn_code, tree, rtx);
1114 static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
1115 static rs6000_stack_t *rs6000_stack_info (void);
1116 static void is_altivec_return_reg (rtx, void *);
1117 int easy_vector_constant (rtx, machine_mode);
1118 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1119 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1120 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1121 bool, bool);
1122 #if TARGET_MACHO
1123 static void macho_branch_islands (void);
1124 #endif
1125 static rtx rs6000_legitimize_reload_address (rtx, machine_mode, int, int,
1126 int, int *);
1127 static rtx rs6000_debug_legitimize_reload_address (rtx, machine_mode, int,
1128 int, int, int *);
1129 static bool rs6000_mode_dependent_address (const_rtx);
1130 static bool rs6000_debug_mode_dependent_address (const_rtx);
1131 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1132 machine_mode, rtx);
1133 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1134 machine_mode,
1135 rtx);
1136 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1137 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1138 enum reg_class);
1139 static bool rs6000_secondary_memory_needed (enum reg_class, enum reg_class,
1140 machine_mode);
1141 static bool rs6000_debug_secondary_memory_needed (enum reg_class,
1142 enum reg_class,
1143 machine_mode);
1144 static bool rs6000_cannot_change_mode_class (machine_mode,
1145 machine_mode,
1146 enum reg_class);
1147 static bool rs6000_debug_cannot_change_mode_class (machine_mode,
1148 machine_mode,
1149 enum reg_class);
1150 static bool rs6000_save_toc_in_prologue_p (void);
1152 rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int,
1153 int, int *)
1154 = rs6000_legitimize_reload_address;
1156 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1157 = rs6000_mode_dependent_address;
1159 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1160 machine_mode, rtx)
1161 = rs6000_secondary_reload_class;
1163 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1164 = rs6000_preferred_reload_class;
1166 bool (*rs6000_secondary_memory_needed_ptr) (enum reg_class, enum reg_class,
1167 machine_mode)
1168 = rs6000_secondary_memory_needed;
1170 bool (*rs6000_cannot_change_mode_class_ptr) (machine_mode,
1171 machine_mode,
1172 enum reg_class)
1173 = rs6000_cannot_change_mode_class;
1175 const int INSN_NOT_AVAILABLE = -1;
1177 static void rs6000_print_isa_options (FILE *, int, const char *,
1178 HOST_WIDE_INT);
1179 static void rs6000_print_builtin_options (FILE *, int, const char *,
1180 HOST_WIDE_INT);
1182 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1183 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1184 enum rs6000_reg_type,
1185 machine_mode,
1186 secondary_reload_info *,
1187 bool);
1188 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1190 /* Hash table stuff for keeping track of TOC entries. */
1192 struct GTY((for_user)) toc_hash_struct
1194 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1195 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1196 rtx key;
1197 machine_mode key_mode;
1198 int labelno;
1201 struct toc_hasher : ggc_hasher<toc_hash_struct *>
1203 static hashval_t hash (toc_hash_struct *);
1204 static bool equal (toc_hash_struct *, toc_hash_struct *);
1207 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1209 /* Hash table to keep track of the argument types for builtin functions. */
1211 struct GTY((for_user)) builtin_hash_struct
1213 tree type;
1214 machine_mode mode[4]; /* return value + 3 arguments. */
1215 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1218 struct builtin_hasher : ggc_hasher<builtin_hash_struct *>
1220 static hashval_t hash (builtin_hash_struct *);
1221 static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1224 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1227 /* Default register names. */
1228 char rs6000_reg_names[][8] =
1230 "0", "1", "2", "3", "4", "5", "6", "7",
1231 "8", "9", "10", "11", "12", "13", "14", "15",
1232 "16", "17", "18", "19", "20", "21", "22", "23",
1233 "24", "25", "26", "27", "28", "29", "30", "31",
1234 "0", "1", "2", "3", "4", "5", "6", "7",
1235 "8", "9", "10", "11", "12", "13", "14", "15",
1236 "16", "17", "18", "19", "20", "21", "22", "23",
1237 "24", "25", "26", "27", "28", "29", "30", "31",
1238 "mq", "lr", "ctr","ap",
1239 "0", "1", "2", "3", "4", "5", "6", "7",
1240 "ca",
1241 /* AltiVec registers. */
1242 "0", "1", "2", "3", "4", "5", "6", "7",
1243 "8", "9", "10", "11", "12", "13", "14", "15",
1244 "16", "17", "18", "19", "20", "21", "22", "23",
1245 "24", "25", "26", "27", "28", "29", "30", "31",
1246 "vrsave", "vscr",
1247 /* SPE registers. */
1248 "spe_acc", "spefscr",
1249 /* Soft frame pointer. */
1250 "sfp",
1251 /* HTM SPR registers. */
1252 "tfhar", "tfiar", "texasr",
1253 /* SPE High registers. */
1254 "0", "1", "2", "3", "4", "5", "6", "7",
1255 "8", "9", "10", "11", "12", "13", "14", "15",
1256 "16", "17", "18", "19", "20", "21", "22", "23",
1257 "24", "25", "26", "27", "28", "29", "30", "31"
1260 #ifdef TARGET_REGNAMES
1261 static const char alt_reg_names[][8] =
1263 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1264 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1265 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1266 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1267 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1268 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1269 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1270 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1271 "mq", "lr", "ctr", "ap",
1272 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1273 "ca",
1274 /* AltiVec registers. */
1275 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1276 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1277 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1278 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1279 "vrsave", "vscr",
1280 /* SPE registers. */
1281 "spe_acc", "spefscr",
1282 /* Soft frame pointer. */
1283 "sfp",
1284 /* HTM SPR registers. */
1285 "tfhar", "tfiar", "texasr",
1286 /* SPE High registers. */
1287 "%rh0", "%rh1", "%rh2", "%rh3", "%rh4", "%rh5", "%rh6", "%rh7",
1288 "%rh8", "%rh9", "%rh10", "%r11", "%rh12", "%rh13", "%rh14", "%rh15",
1289 "%rh16", "%rh17", "%rh18", "%rh19", "%rh20", "%rh21", "%rh22", "%rh23",
1290 "%rh24", "%rh25", "%rh26", "%rh27", "%rh28", "%rh29", "%rh30", "%rh31"
1292 #endif
1294 /* Table of valid machine attributes. */
1296 static const struct attribute_spec rs6000_attribute_table[] =
1298 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
1299 affects_type_identity } */
1300 { "altivec", 1, 1, false, true, false, rs6000_handle_altivec_attribute,
1301 false },
1302 { "longcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1303 false },
1304 { "shortcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1305 false },
1306 { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1307 false },
1308 { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1309 false },
1310 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1311 SUBTARGET_ATTRIBUTE_TABLE,
1312 #endif
1313 { NULL, 0, 0, false, false, false, NULL, false }
1316 #ifndef TARGET_PROFILE_KERNEL
1317 #define TARGET_PROFILE_KERNEL 0
1318 #endif
1320 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1321 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1323 /* Initialize the GCC target structure. */
1324 #undef TARGET_ATTRIBUTE_TABLE
1325 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1326 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1327 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1328 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1329 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1331 #undef TARGET_ASM_ALIGNED_DI_OP
1332 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1334 /* Default unaligned ops are only provided for ELF. Find the ops needed
1335 for non-ELF systems. */
1336 #ifndef OBJECT_FORMAT_ELF
1337 #if TARGET_XCOFF
1338 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1339 64-bit targets. */
1340 #undef TARGET_ASM_UNALIGNED_HI_OP
1341 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1342 #undef TARGET_ASM_UNALIGNED_SI_OP
1343 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1344 #undef TARGET_ASM_UNALIGNED_DI_OP
1345 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1346 #else
1347 /* For Darwin. */
1348 #undef TARGET_ASM_UNALIGNED_HI_OP
1349 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1350 #undef TARGET_ASM_UNALIGNED_SI_OP
1351 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1352 #undef TARGET_ASM_UNALIGNED_DI_OP
1353 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1354 #undef TARGET_ASM_ALIGNED_DI_OP
1355 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1356 #endif
1357 #endif
1359 /* This hook deals with fixups for relocatable code and DI-mode objects
1360 in 64-bit code. */
1361 #undef TARGET_ASM_INTEGER
1362 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1364 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1365 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1366 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1367 #endif
1369 #undef TARGET_SET_UP_BY_PROLOGUE
1370 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1372 #undef TARGET_HAVE_TLS
1373 #define TARGET_HAVE_TLS HAVE_AS_TLS
1375 #undef TARGET_CANNOT_FORCE_CONST_MEM
1376 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1378 #undef TARGET_DELEGITIMIZE_ADDRESS
1379 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1381 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1382 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1384 #undef TARGET_ASM_FUNCTION_PROLOGUE
1385 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1386 #undef TARGET_ASM_FUNCTION_EPILOGUE
1387 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1389 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1390 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1392 #undef TARGET_LEGITIMIZE_ADDRESS
1393 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1395 #undef TARGET_SCHED_VARIABLE_ISSUE
1396 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1398 #undef TARGET_SCHED_ISSUE_RATE
1399 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1400 #undef TARGET_SCHED_ADJUST_COST
1401 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1402 #undef TARGET_SCHED_ADJUST_PRIORITY
1403 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1404 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1405 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1406 #undef TARGET_SCHED_INIT
1407 #define TARGET_SCHED_INIT rs6000_sched_init
1408 #undef TARGET_SCHED_FINISH
1409 #define TARGET_SCHED_FINISH rs6000_sched_finish
1410 #undef TARGET_SCHED_REORDER
1411 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1412 #undef TARGET_SCHED_REORDER2
1413 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1415 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1416 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1418 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1419 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1421 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1422 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1423 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1424 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1425 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1426 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1427 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1428 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1430 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1431 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1432 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1433 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1434 rs6000_builtin_support_vector_misalignment
1435 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1436 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1437 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1438 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1439 rs6000_builtin_vectorization_cost
1440 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1441 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1442 rs6000_preferred_simd_mode
1443 #undef TARGET_VECTORIZE_INIT_COST
1444 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1445 #undef TARGET_VECTORIZE_ADD_STMT_COST
1446 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1447 #undef TARGET_VECTORIZE_FINISH_COST
1448 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1449 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1450 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1452 #undef TARGET_INIT_BUILTINS
1453 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1454 #undef TARGET_BUILTIN_DECL
1455 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1457 #undef TARGET_EXPAND_BUILTIN
1458 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1460 #undef TARGET_MANGLE_TYPE
1461 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1463 #undef TARGET_INIT_LIBFUNCS
1464 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1466 #if TARGET_MACHO
1467 #undef TARGET_BINDS_LOCAL_P
1468 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1469 #endif
1471 #undef TARGET_MS_BITFIELD_LAYOUT_P
1472 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1474 #undef TARGET_ASM_OUTPUT_MI_THUNK
1475 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1477 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1478 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1480 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1481 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1483 #undef TARGET_REGISTER_MOVE_COST
1484 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1485 #undef TARGET_MEMORY_MOVE_COST
1486 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1487 #undef TARGET_RTX_COSTS
1488 #define TARGET_RTX_COSTS rs6000_rtx_costs
1489 #undef TARGET_ADDRESS_COST
1490 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1492 #undef TARGET_DWARF_REGISTER_SPAN
1493 #define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span
1495 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1496 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1498 #undef TARGET_MEMBER_TYPE_FORCES_BLK
1499 #define TARGET_MEMBER_TYPE_FORCES_BLK rs6000_member_type_forces_blk
1501 /* On rs6000, function arguments are promoted, as are function return
1502 values. */
1503 #undef TARGET_PROMOTE_FUNCTION_MODE
1504 #define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
1506 #undef TARGET_RETURN_IN_MEMORY
1507 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1509 #undef TARGET_RETURN_IN_MSB
1510 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1512 #undef TARGET_SETUP_INCOMING_VARARGS
1513 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1515 /* Always strict argument naming on rs6000. */
1516 #undef TARGET_STRICT_ARGUMENT_NAMING
1517 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1518 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1519 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1520 #undef TARGET_SPLIT_COMPLEX_ARG
1521 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1522 #undef TARGET_MUST_PASS_IN_STACK
1523 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1524 #undef TARGET_PASS_BY_REFERENCE
1525 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1526 #undef TARGET_ARG_PARTIAL_BYTES
1527 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1528 #undef TARGET_FUNCTION_ARG_ADVANCE
1529 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1530 #undef TARGET_FUNCTION_ARG
1531 #define TARGET_FUNCTION_ARG rs6000_function_arg
1532 #undef TARGET_FUNCTION_ARG_BOUNDARY
1533 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1535 #undef TARGET_BUILD_BUILTIN_VA_LIST
1536 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1538 #undef TARGET_EXPAND_BUILTIN_VA_START
1539 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1541 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1542 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1544 #undef TARGET_EH_RETURN_FILTER_MODE
1545 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1547 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1548 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1550 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1551 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1553 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1554 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1556 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
1557 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
1559 #undef TARGET_OPTION_OVERRIDE
1560 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1562 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1563 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1564 rs6000_builtin_vectorized_function
1566 #if !TARGET_MACHO
1567 #undef TARGET_STACK_PROTECT_FAIL
1568 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1569 #endif
1571 /* MPC604EUM 3.5.2 Weak Consistency between Multiple Processors
1572 The PowerPC architecture requires only weak consistency among
1573 processors--that is, memory accesses between processors need not be
1574 sequentially consistent and memory accesses among processors can occur
1575 in any order. The ability to order memory accesses weakly provides
1576 opportunities for more efficient use of the system bus. Unless a
1577 dependency exists, the 604e allows read operations to precede store
1578 operations. */
1579 #undef TARGET_RELAXED_ORDERING
1580 #define TARGET_RELAXED_ORDERING true
1582 #ifdef HAVE_AS_TLS
1583 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1584 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1585 #endif
1587 /* Use a 32-bit anchor range. This leads to sequences like:
1589 addis tmp,anchor,high
1590 add dest,tmp,low
1592 where tmp itself acts as an anchor, and can be shared between
1593 accesses to the same 64k page. */
1594 #undef TARGET_MIN_ANCHOR_OFFSET
1595 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1596 #undef TARGET_MAX_ANCHOR_OFFSET
1597 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1598 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1599 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1600 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1601 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1603 #undef TARGET_BUILTIN_RECIPROCAL
1604 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1606 #undef TARGET_EXPAND_TO_RTL_HOOK
1607 #define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot
1609 #undef TARGET_INSTANTIATE_DECLS
1610 #define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
1612 #undef TARGET_SECONDARY_RELOAD
1613 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1615 #undef TARGET_LEGITIMATE_ADDRESS_P
1616 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1618 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1619 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1621 #undef TARGET_LRA_P
1622 #define TARGET_LRA_P rs6000_lra_p
1624 #undef TARGET_CAN_ELIMINATE
1625 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1627 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1628 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1630 #undef TARGET_TRAMPOLINE_INIT
1631 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1633 #undef TARGET_FUNCTION_VALUE
1634 #define TARGET_FUNCTION_VALUE rs6000_function_value
1636 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1637 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1639 #undef TARGET_OPTION_SAVE
1640 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1642 #undef TARGET_OPTION_RESTORE
1643 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1645 #undef TARGET_OPTION_PRINT
1646 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1648 #undef TARGET_CAN_INLINE_P
1649 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1651 #undef TARGET_SET_CURRENT_FUNCTION
1652 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1654 #undef TARGET_LEGITIMATE_CONSTANT_P
1655 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1657 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
1658 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok
1660 #undef TARGET_CAN_USE_DOLOOP_P
1661 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1663 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1664 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1667 /* Processor table. */
1668 struct rs6000_ptt
1670 const char *const name; /* Canonical processor name. */
1671 const enum processor_type processor; /* Processor type enum value. */
1672 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1675 static struct rs6000_ptt const processor_target_table[] =
1677 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1678 #include "rs6000-cpus.def"
1679 #undef RS6000_CPU
1682 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1683 name is invalid. */
1685 static int
1686 rs6000_cpu_name_lookup (const char *name)
1688 size_t i;
1690 if (name != NULL)
1692 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1693 if (! strcmp (name, processor_target_table[i].name))
1694 return (int)i;
1697 return -1;
1701 /* Return number of consecutive hard regs needed starting at reg REGNO
1702 to hold something of mode MODE.
1703 This is ordinarily the length in words of a value of mode MODE
1704 but can be less for certain modes in special long registers.
1706 For the SPE, GPRs are 64 bits but only 32 bits are visible in
1707 scalar instructions. The upper 32 bits are only available to the
1708 SIMD instructions.
1710 POWER and PowerPC GPRs hold 32 bits worth;
1711 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1713 static int
1714 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1716 unsigned HOST_WIDE_INT reg_size;
1718 /* TF/TD modes are special in that they always take 2 registers. */
1719 if (FP_REGNO_P (regno))
1720 reg_size = ((VECTOR_MEM_VSX_P (mode) && mode != TDmode && mode != TFmode)
1721 ? UNITS_PER_VSX_WORD
1722 : UNITS_PER_FP_WORD);
1724 else if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1725 reg_size = UNITS_PER_SPE_WORD;
1727 else if (ALTIVEC_REGNO_P (regno))
1728 reg_size = UNITS_PER_ALTIVEC_WORD;
1730 /* The value returned for SCmode in the E500 double case is 2 for
1731 ABI compatibility; storing an SCmode value in a single register
1732 would require function_arg and rs6000_spe_function_arg to handle
1733 SCmode so as to pass the value correctly in a pair of
1734 registers. */
1735 else if (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) && mode != SCmode
1736 && !DECIMAL_FLOAT_MODE_P (mode) && SPE_SIMD_REGNO_P (regno))
1737 reg_size = UNITS_PER_FP_WORD;
1739 else
1740 reg_size = UNITS_PER_WORD;
1742 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1745 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1746 MODE. */
1747 static int
1748 rs6000_hard_regno_mode_ok (int regno, machine_mode mode)
1750 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1752 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1753 register combinations, and use PTImode where we need to deal with quad
1754 word memory operations. Don't allow quad words in the argument or frame
1755 pointer registers, just registers 0..31. */
1756 if (mode == PTImode)
1757 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1758 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1759 && ((regno & 1) == 0));
1761 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1762 implementations. Don't allow an item to be split between a FP register
1763 and an Altivec register. Allow TImode in all VSX registers if the user
1764 asked for it. */
1765 if (TARGET_VSX && VSX_REGNO_P (regno)
1766 && (VECTOR_MEM_VSX_P (mode)
1767 || reg_addr[mode].scalar_in_vmx_p
1768 || (TARGET_VSX_TIMODE && mode == TImode)
1769 || (TARGET_VADDUQM && mode == V1TImode)))
1771 if (FP_REGNO_P (regno))
1772 return FP_REGNO_P (last_regno);
1774 if (ALTIVEC_REGNO_P (regno))
1776 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
1777 return 0;
1779 return ALTIVEC_REGNO_P (last_regno);
1783 /* The GPRs can hold any mode, but values bigger than one register
1784 cannot go past R31. */
1785 if (INT_REGNO_P (regno))
1786 return INT_REGNO_P (last_regno);
1788 /* The float registers (except for VSX vector modes) can only hold floating
1789 modes and DImode. */
1790 if (FP_REGNO_P (regno))
1792 if (SCALAR_FLOAT_MODE_P (mode)
1793 && (mode != TDmode || (regno % 2) == 0)
1794 && FP_REGNO_P (last_regno))
1795 return 1;
1797 if (GET_MODE_CLASS (mode) == MODE_INT
1798 && GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1799 return 1;
1801 if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT
1802 && PAIRED_VECTOR_MODE (mode))
1803 return 1;
1805 return 0;
1808 /* The CR register can only hold CC modes. */
1809 if (CR_REGNO_P (regno))
1810 return GET_MODE_CLASS (mode) == MODE_CC;
1812 if (CA_REGNO_P (regno))
1813 return mode == Pmode || mode == SImode;
1815 /* AltiVec only in AldyVec registers. */
1816 if (ALTIVEC_REGNO_P (regno))
1817 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1818 || mode == V1TImode);
1820 /* ...but GPRs can hold SIMD data on the SPE in one register. */
1821 if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1822 return 1;
1824 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1825 and it must be able to fit within the register set. */
1827 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1830 /* Print interesting facts about registers. */
1831 static void
1832 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
1834 int r, m;
1836 for (r = first_regno; r <= last_regno; ++r)
1838 const char *comma = "";
1839 int len;
1841 if (first_regno == last_regno)
1842 fprintf (stderr, "%s:\t", reg_name);
1843 else
1844 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
1846 len = 8;
1847 for (m = 0; m < NUM_MACHINE_MODES; ++m)
1848 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
1850 if (len > 70)
1852 fprintf (stderr, ",\n\t");
1853 len = 8;
1854 comma = "";
1857 if (rs6000_hard_regno_nregs[m][r] > 1)
1858 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
1859 rs6000_hard_regno_nregs[m][r]);
1860 else
1861 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
1863 comma = ", ";
1866 if (call_used_regs[r])
1868 if (len > 70)
1870 fprintf (stderr, ",\n\t");
1871 len = 8;
1872 comma = "";
1875 len += fprintf (stderr, "%s%s", comma, "call-used");
1876 comma = ", ";
1879 if (fixed_regs[r])
1881 if (len > 70)
1883 fprintf (stderr, ",\n\t");
1884 len = 8;
1885 comma = "";
1888 len += fprintf (stderr, "%s%s", comma, "fixed");
1889 comma = ", ";
1892 if (len > 70)
1894 fprintf (stderr, ",\n\t");
1895 comma = "";
1898 len += fprintf (stderr, "%sreg-class = %s", comma,
1899 reg_class_names[(int)rs6000_regno_regclass[r]]);
1900 comma = ", ";
1902 if (len > 70)
1904 fprintf (stderr, ",\n\t");
1905 comma = "";
1908 fprintf (stderr, "%sregno = %d\n", comma, r);
1912 static const char *
1913 rs6000_debug_vector_unit (enum rs6000_vector v)
1915 const char *ret;
1917 switch (v)
1919 case VECTOR_NONE: ret = "none"; break;
1920 case VECTOR_ALTIVEC: ret = "altivec"; break;
1921 case VECTOR_VSX: ret = "vsx"; break;
1922 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
1923 case VECTOR_PAIRED: ret = "paired"; break;
1924 case VECTOR_SPE: ret = "spe"; break;
1925 case VECTOR_OTHER: ret = "other"; break;
1926 default: ret = "unknown"; break;
1929 return ret;
1932 /* Inner function printing just the address mask for a particular reload
1933 register class. */
1934 DEBUG_FUNCTION char *
1935 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
1937 static char ret[8];
1938 char *p = ret;
1940 if ((mask & RELOAD_REG_VALID) != 0)
1941 *p++ = 'v';
1942 else if (keep_spaces)
1943 *p++ = ' ';
1945 if ((mask & RELOAD_REG_MULTIPLE) != 0)
1946 *p++ = 'm';
1947 else if (keep_spaces)
1948 *p++ = ' ';
1950 if ((mask & RELOAD_REG_INDEXED) != 0)
1951 *p++ = 'i';
1952 else if (keep_spaces)
1953 *p++ = ' ';
1955 if ((mask & RELOAD_REG_OFFSET) != 0)
1956 *p++ = 'o';
1957 else if (keep_spaces)
1958 *p++ = ' ';
1960 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
1961 *p++ = '+';
1962 else if (keep_spaces)
1963 *p++ = ' ';
1965 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
1966 *p++ = '+';
1967 else if (keep_spaces)
1968 *p++ = ' ';
1970 if ((mask & RELOAD_REG_AND_M16) != 0)
1971 *p++ = '&';
1972 else if (keep_spaces)
1973 *p++ = ' ';
1975 *p = '\0';
1977 return ret;
1980 /* Print the address masks in a human readble fashion. */
1981 DEBUG_FUNCTION void
1982 rs6000_debug_print_mode (ssize_t m)
1984 ssize_t rc;
1986 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
1987 for (rc = 0; rc < N_RELOAD_REG; rc++)
1988 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
1989 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
1991 if (rs6000_vector_unit[m] != VECTOR_NONE
1992 || rs6000_vector_mem[m] != VECTOR_NONE
1993 || (reg_addr[m].reload_store != CODE_FOR_nothing)
1994 || (reg_addr[m].reload_load != CODE_FOR_nothing)
1995 || reg_addr[m].scalar_in_vmx_p)
1997 fprintf (stderr,
1998 " Vector-arith=%-10s Vector-mem=%-10s Reload=%c%c Upper=%c",
1999 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2000 rs6000_debug_vector_unit (rs6000_vector_mem[m]),
2001 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2002 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*',
2003 (reg_addr[m].scalar_in_vmx_p) ? 'y' : 'n');
2006 fputs ("\n", stderr);
2009 #define DEBUG_FMT_ID "%-32s= "
2010 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2011 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2012 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2014 /* Print various interesting information with -mdebug=reg. */
2015 static void
2016 rs6000_debug_reg_global (void)
2018 static const char *const tf[2] = { "false", "true" };
2019 const char *nl = (const char *)0;
2020 int m;
2021 size_t m1, m2, v;
2022 char costly_num[20];
2023 char nop_num[20];
2024 char flags_buffer[40];
2025 const char *costly_str;
2026 const char *nop_str;
2027 const char *trace_str;
2028 const char *abi_str;
2029 const char *cmodel_str;
2030 struct cl_target_option cl_opts;
2032 /* Modes we want tieable information on. */
2033 static const machine_mode print_tieable_modes[] = {
2034 QImode,
2035 HImode,
2036 SImode,
2037 DImode,
2038 TImode,
2039 PTImode,
2040 SFmode,
2041 DFmode,
2042 TFmode,
2043 SDmode,
2044 DDmode,
2045 TDmode,
2046 V8QImode,
2047 V4HImode,
2048 V2SImode,
2049 V16QImode,
2050 V8HImode,
2051 V4SImode,
2052 V2DImode,
2053 V1TImode,
2054 V32QImode,
2055 V16HImode,
2056 V8SImode,
2057 V4DImode,
2058 V2TImode,
2059 V2SFmode,
2060 V4SFmode,
2061 V2DFmode,
2062 V8SFmode,
2063 V4DFmode,
2064 CCmode,
2065 CCUNSmode,
2066 CCEQmode,
2069 /* Virtual regs we are interested in. */
2070 const static struct {
2071 int regno; /* register number. */
2072 const char *name; /* register name. */
2073 } virtual_regs[] = {
2074 { STACK_POINTER_REGNUM, "stack pointer:" },
2075 { TOC_REGNUM, "toc: " },
2076 { STATIC_CHAIN_REGNUM, "static chain: " },
2077 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2078 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2079 { ARG_POINTER_REGNUM, "arg pointer: " },
2080 { FRAME_POINTER_REGNUM, "frame pointer:" },
2081 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2082 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2083 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2084 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2085 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2086 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2087 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2088 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2089 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2092 fputs ("\nHard register information:\n", stderr);
2093 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2094 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2095 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2096 LAST_ALTIVEC_REGNO,
2097 "vs");
2098 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2099 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2100 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2101 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2102 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2103 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2104 rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a");
2105 rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f");
2107 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2108 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2109 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2111 fprintf (stderr,
2112 "\n"
2113 "d reg_class = %s\n"
2114 "f reg_class = %s\n"
2115 "v reg_class = %s\n"
2116 "wa reg_class = %s\n"
2117 "wd reg_class = %s\n"
2118 "wf reg_class = %s\n"
2119 "wg reg_class = %s\n"
2120 "wh reg_class = %s\n"
2121 "wi reg_class = %s\n"
2122 "wj reg_class = %s\n"
2123 "wk reg_class = %s\n"
2124 "wl reg_class = %s\n"
2125 "wm reg_class = %s\n"
2126 "wr reg_class = %s\n"
2127 "ws reg_class = %s\n"
2128 "wt reg_class = %s\n"
2129 "wu reg_class = %s\n"
2130 "wv reg_class = %s\n"
2131 "ww reg_class = %s\n"
2132 "wx reg_class = %s\n"
2133 "wy reg_class = %s\n"
2134 "wz reg_class = %s\n"
2135 "\n",
2136 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2137 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2138 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2139 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2140 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
2141 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
2142 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
2143 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]],
2144 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]],
2145 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]],
2146 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]],
2147 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
2148 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
2149 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2150 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
2151 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
2152 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
2153 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
2154 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
2155 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2156 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
2157 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]]);
2159 nl = "\n";
2160 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2161 rs6000_debug_print_mode (m);
2163 fputs ("\n", stderr);
2165 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2167 machine_mode mode1 = print_tieable_modes[m1];
2168 bool first_time = true;
2170 nl = (const char *)0;
2171 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2173 machine_mode mode2 = print_tieable_modes[m2];
2174 if (mode1 != mode2 && MODES_TIEABLE_P (mode1, mode2))
2176 if (first_time)
2178 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2179 nl = "\n";
2180 first_time = false;
2183 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2187 if (!first_time)
2188 fputs ("\n", stderr);
2191 if (nl)
2192 fputs (nl, stderr);
2194 if (rs6000_recip_control)
2196 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2198 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2199 if (rs6000_recip_bits[m])
2201 fprintf (stderr,
2202 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2203 GET_MODE_NAME (m),
2204 (RS6000_RECIP_AUTO_RE_P (m)
2205 ? "auto"
2206 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2207 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2208 ? "auto"
2209 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2212 fputs ("\n", stderr);
2215 if (rs6000_cpu_index >= 0)
2217 const char *name = processor_target_table[rs6000_cpu_index].name;
2218 HOST_WIDE_INT flags
2219 = processor_target_table[rs6000_cpu_index].target_enable;
2221 sprintf (flags_buffer, "-mcpu=%s flags", name);
2222 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2224 else
2225 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2227 if (rs6000_tune_index >= 0)
2229 const char *name = processor_target_table[rs6000_tune_index].name;
2230 HOST_WIDE_INT flags
2231 = processor_target_table[rs6000_tune_index].target_enable;
2233 sprintf (flags_buffer, "-mtune=%s flags", name);
2234 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2236 else
2237 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2239 cl_target_option_save (&cl_opts, &global_options);
2240 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2241 rs6000_isa_flags);
2243 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2244 rs6000_isa_flags_explicit);
2246 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2247 rs6000_builtin_mask);
2249 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2251 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2252 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2254 switch (rs6000_sched_costly_dep)
2256 case max_dep_latency:
2257 costly_str = "max_dep_latency";
2258 break;
2260 case no_dep_costly:
2261 costly_str = "no_dep_costly";
2262 break;
2264 case all_deps_costly:
2265 costly_str = "all_deps_costly";
2266 break;
2268 case true_store_to_load_dep_costly:
2269 costly_str = "true_store_to_load_dep_costly";
2270 break;
2272 case store_to_load_dep_costly:
2273 costly_str = "store_to_load_dep_costly";
2274 break;
2276 default:
2277 costly_str = costly_num;
2278 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2279 break;
2282 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2284 switch (rs6000_sched_insert_nops)
2286 case sched_finish_regroup_exact:
2287 nop_str = "sched_finish_regroup_exact";
2288 break;
2290 case sched_finish_pad_groups:
2291 nop_str = "sched_finish_pad_groups";
2292 break;
2294 case sched_finish_none:
2295 nop_str = "sched_finish_none";
2296 break;
2298 default:
2299 nop_str = nop_num;
2300 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2301 break;
2304 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2306 switch (rs6000_sdata)
2308 default:
2309 case SDATA_NONE:
2310 break;
2312 case SDATA_DATA:
2313 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2314 break;
2316 case SDATA_SYSV:
2317 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2318 break;
2320 case SDATA_EABI:
2321 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2322 break;
2326 switch (rs6000_traceback)
2328 case traceback_default: trace_str = "default"; break;
2329 case traceback_none: trace_str = "none"; break;
2330 case traceback_part: trace_str = "part"; break;
2331 case traceback_full: trace_str = "full"; break;
2332 default: trace_str = "unknown"; break;
2335 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2337 switch (rs6000_current_cmodel)
2339 case CMODEL_SMALL: cmodel_str = "small"; break;
2340 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2341 case CMODEL_LARGE: cmodel_str = "large"; break;
2342 default: cmodel_str = "unknown"; break;
2345 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2347 switch (rs6000_current_abi)
2349 case ABI_NONE: abi_str = "none"; break;
2350 case ABI_AIX: abi_str = "aix"; break;
2351 case ABI_ELFv2: abi_str = "ELFv2"; break;
2352 case ABI_V4: abi_str = "V4"; break;
2353 case ABI_DARWIN: abi_str = "darwin"; break;
2354 default: abi_str = "unknown"; break;
2357 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2359 if (rs6000_altivec_abi)
2360 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2362 if (rs6000_spe_abi)
2363 fprintf (stderr, DEBUG_FMT_S, "spe_abi", "true");
2365 if (rs6000_darwin64_abi)
2366 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2368 if (rs6000_float_gprs)
2369 fprintf (stderr, DEBUG_FMT_S, "float_gprs", "true");
2371 fprintf (stderr, DEBUG_FMT_S, "fprs",
2372 (TARGET_FPRS ? "true" : "false"));
2374 fprintf (stderr, DEBUG_FMT_S, "single_float",
2375 (TARGET_SINGLE_FLOAT ? "true" : "false"));
2377 fprintf (stderr, DEBUG_FMT_S, "double_float",
2378 (TARGET_DOUBLE_FLOAT ? "true" : "false"));
2380 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2381 (TARGET_SOFT_FLOAT ? "true" : "false"));
2383 fprintf (stderr, DEBUG_FMT_S, "e500_single",
2384 (TARGET_E500_SINGLE ? "true" : "false"));
2386 fprintf (stderr, DEBUG_FMT_S, "e500_double",
2387 (TARGET_E500_DOUBLE ? "true" : "false"));
2389 if (TARGET_LINK_STACK)
2390 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2392 if (targetm.lra_p ())
2393 fprintf (stderr, DEBUG_FMT_S, "lra", "true");
2395 if (TARGET_P8_FUSION)
2396 fprintf (stderr, DEBUG_FMT_S, "p8 fusion",
2397 (TARGET_P8_FUSION_SIGN) ? "zero+sign" : "zero");
2399 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2400 TARGET_SECURE_PLT ? "secure" : "bss");
2401 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2402 aix_struct_return ? "aix" : "sysv");
2403 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2404 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2405 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2406 tf[!!rs6000_align_branch_targets]);
2407 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2408 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2409 rs6000_long_double_type_size);
2410 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2411 (int)rs6000_sched_restricted_insns_priority);
2412 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2413 (int)END_BUILTINS);
2414 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2415 (int)RS6000_BUILTIN_COUNT);
2417 if (TARGET_VSX)
2418 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2419 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2423 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2424 legitimate address support to figure out the appropriate addressing to
2425 use. */
2427 static void
2428 rs6000_setup_reg_addr_masks (void)
2430 ssize_t rc, reg, m, nregs;
2431 addr_mask_type any_addr_mask, addr_mask;
2433 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2435 machine_mode m2 = (machine_mode)m;
2437 /* SDmode is special in that we want to access it only via REG+REG
2438 addressing on power7 and above, since we want to use the LFIWZX and
2439 STFIWZX instructions to load it. */
2440 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2442 any_addr_mask = 0;
2443 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2445 addr_mask = 0;
2446 reg = reload_reg_map[rc].reg;
2448 /* Can mode values go in the GPR/FPR/Altivec registers? */
2449 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2451 nregs = rs6000_hard_regno_nregs[m][reg];
2452 addr_mask |= RELOAD_REG_VALID;
2454 /* Indicate if the mode takes more than 1 physical register. If
2455 it takes a single register, indicate it can do REG+REG
2456 addressing. */
2457 if (nregs > 1 || m == BLKmode)
2458 addr_mask |= RELOAD_REG_MULTIPLE;
2459 else
2460 addr_mask |= RELOAD_REG_INDEXED;
2462 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2463 addressing. Restrict addressing on SPE for 64-bit types
2464 because of the SUBREG hackery used to address 64-bit floats in
2465 '32-bit' GPRs. */
2467 if (TARGET_UPDATE
2468 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2469 && GET_MODE_SIZE (m2) <= 8
2470 && !VECTOR_MODE_P (m2)
2471 && !COMPLEX_MODE_P (m2)
2472 && !indexed_only_p
2473 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (m2) == 8))
2475 addr_mask |= RELOAD_REG_PRE_INCDEC;
2477 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2478 we don't allow PRE_MODIFY for some multi-register
2479 operations. */
2480 switch (m)
2482 default:
2483 addr_mask |= RELOAD_REG_PRE_MODIFY;
2484 break;
2486 case DImode:
2487 if (TARGET_POWERPC64)
2488 addr_mask |= RELOAD_REG_PRE_MODIFY;
2489 break;
2491 case DFmode:
2492 case DDmode:
2493 if (TARGET_DF_INSN)
2494 addr_mask |= RELOAD_REG_PRE_MODIFY;
2495 break;
2500 /* GPR and FPR registers can do REG+OFFSET addressing, except
2501 possibly for SDmode. */
2502 if ((addr_mask != 0) && !indexed_only_p
2503 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR))
2504 addr_mask |= RELOAD_REG_OFFSET;
2506 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2507 addressing on 128-bit types. */
2508 if (rc == RELOAD_REG_VMX && GET_MODE_SIZE (m2) == 16
2509 && (addr_mask & RELOAD_REG_VALID) != 0)
2510 addr_mask |= RELOAD_REG_AND_M16;
2512 reg_addr[m].addr_mask[rc] = addr_mask;
2513 any_addr_mask |= addr_mask;
2516 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2521 /* Initialize the various global tables that are based on register size. */
2522 static void
2523 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2525 ssize_t r, m, c;
2526 int align64;
2527 int align32;
2529 /* Precalculate REGNO_REG_CLASS. */
2530 rs6000_regno_regclass[0] = GENERAL_REGS;
2531 for (r = 1; r < 32; ++r)
2532 rs6000_regno_regclass[r] = BASE_REGS;
2534 for (r = 32; r < 64; ++r)
2535 rs6000_regno_regclass[r] = FLOAT_REGS;
2537 for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r)
2538 rs6000_regno_regclass[r] = NO_REGS;
2540 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2541 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2543 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2544 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2545 rs6000_regno_regclass[r] = CR_REGS;
2547 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2548 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2549 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2550 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2551 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2552 rs6000_regno_regclass[SPE_ACC_REGNO] = SPE_ACC_REGS;
2553 rs6000_regno_regclass[SPEFSCR_REGNO] = SPEFSCR_REGS;
2554 rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
2555 rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
2556 rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
2557 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2558 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2560 /* Precalculate register class to simpler reload register class. We don't
2561 need all of the register classes that are combinations of different
2562 classes, just the simple ones that have constraint letters. */
2563 for (c = 0; c < N_REG_CLASSES; c++)
2564 reg_class_to_reg_type[c] = NO_REG_TYPE;
2566 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2567 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2568 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2569 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2570 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2571 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2572 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2573 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2574 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2575 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2576 reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
2577 reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
2579 if (TARGET_VSX)
2581 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2582 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2584 else
2586 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2587 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2590 /* Precalculate the valid memory formats as well as the vector information,
2591 this must be set up before the rs6000_hard_regno_nregs_internal calls
2592 below. */
2593 gcc_assert ((int)VECTOR_NONE == 0);
2594 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2595 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
2597 gcc_assert ((int)CODE_FOR_nothing == 0);
2598 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2600 gcc_assert ((int)NO_REGS == 0);
2601 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2603 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2604 believes it can use native alignment or still uses 128-bit alignment. */
2605 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2607 align64 = 64;
2608 align32 = 32;
2610 else
2612 align64 = 128;
2613 align32 = 128;
2616 /* V2DF mode, VSX only. */
2617 if (TARGET_VSX)
2619 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2620 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2621 rs6000_vector_align[V2DFmode] = align64;
2624 /* V4SF mode, either VSX or Altivec. */
2625 if (TARGET_VSX)
2627 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2628 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2629 rs6000_vector_align[V4SFmode] = align32;
2631 else if (TARGET_ALTIVEC)
2633 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2634 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2635 rs6000_vector_align[V4SFmode] = align32;
2638 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2639 and stores. */
2640 if (TARGET_ALTIVEC)
2642 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2643 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2644 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2645 rs6000_vector_align[V4SImode] = align32;
2646 rs6000_vector_align[V8HImode] = align32;
2647 rs6000_vector_align[V16QImode] = align32;
2649 if (TARGET_VSX)
2651 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2652 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2653 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2655 else
2657 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2658 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2659 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2663 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2664 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2665 if (TARGET_VSX)
2667 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2668 rs6000_vector_unit[V2DImode]
2669 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2670 rs6000_vector_align[V2DImode] = align64;
2672 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2673 rs6000_vector_unit[V1TImode]
2674 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2675 rs6000_vector_align[V1TImode] = 128;
2678 /* DFmode, see if we want to use the VSX unit. Memory is handled
2679 differently, so don't set rs6000_vector_mem. */
2680 if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE)
2682 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2683 rs6000_vector_align[DFmode] = 64;
2686 /* SFmode, see if we want to use the VSX unit. */
2687 if (TARGET_P8_VECTOR && TARGET_VSX_SCALAR_FLOAT)
2689 rs6000_vector_unit[SFmode] = VECTOR_VSX;
2690 rs6000_vector_align[SFmode] = 32;
2693 /* Allow TImode in VSX register and set the VSX memory macros. */
2694 if (TARGET_VSX && TARGET_VSX_TIMODE)
2696 rs6000_vector_mem[TImode] = VECTOR_VSX;
2697 rs6000_vector_align[TImode] = align64;
2700 /* TODO add SPE and paired floating point vector support. */
2702 /* Register class constraints for the constraints that depend on compile
2703 switches. When the VSX code was added, different constraints were added
2704 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2705 of the VSX registers are used. The register classes for scalar floating
2706 point types is set, based on whether we allow that type into the upper
2707 (Altivec) registers. GCC has register classes to target the Altivec
2708 registers for load/store operations, to select using a VSX memory
2709 operation instead of the traditional floating point operation. The
2710 constraints are:
2712 d - Register class to use with traditional DFmode instructions.
2713 f - Register class to use with traditional SFmode instructions.
2714 v - Altivec register.
2715 wa - Any VSX register.
2716 wc - Reserved to represent individual CR bits (used in LLVM).
2717 wd - Preferred register class for V2DFmode.
2718 wf - Preferred register class for V4SFmode.
2719 wg - Float register for power6x move insns.
2720 wh - FP register for direct move instructions.
2721 wi - FP or VSX register to hold 64-bit integers for VSX insns.
2722 wj - FP or VSX register to hold 64-bit integers for direct moves.
2723 wk - FP or VSX register to hold 64-bit doubles for direct moves.
2724 wl - Float register if we can do 32-bit signed int loads.
2725 wm - VSX register for ISA 2.07 direct move operations.
2726 wn - always NO_REGS.
2727 wr - GPR if 64-bit mode is permitted.
2728 ws - Register class to do ISA 2.06 DF operations.
2729 wt - VSX register for TImode in VSX registers.
2730 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
2731 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
2732 ww - Register class to do SF conversions in with VSX operations.
2733 wx - Float register if we can do 32-bit int stores.
2734 wy - Register class to do ISA 2.07 SF operations.
2735 wz - Float register if we can do 32-bit unsigned int loads. */
2737 if (TARGET_HARD_FLOAT && TARGET_FPRS)
2738 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
2740 if (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
2741 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
2743 if (TARGET_VSX)
2745 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2746 rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */
2747 rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */
2748 rs6000_constraints[RS6000_CONSTRAINT_wi] = FLOAT_REGS; /* DImode */
2750 if (TARGET_VSX_TIMODE)
2751 rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */
2753 if (TARGET_UPPER_REGS_DF) /* DFmode */
2755 rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS;
2756 rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
2758 else
2759 rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS;
2762 /* Add conditional constraints based on various options, to allow us to
2763 collapse multiple insn patterns. */
2764 if (TARGET_ALTIVEC)
2765 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
2767 if (TARGET_MFPGPR) /* DFmode */
2768 rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
2770 if (TARGET_LFIWAX)
2771 rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */
2773 if (TARGET_DIRECT_MOVE)
2775 rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS;
2776 rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */
2777 = rs6000_constraints[RS6000_CONSTRAINT_wi];
2778 rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */
2779 = rs6000_constraints[RS6000_CONSTRAINT_ws];
2780 rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
2783 if (TARGET_POWERPC64)
2784 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
2786 if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF) /* SFmode */
2788 rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
2789 rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
2790 rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
2792 else if (TARGET_P8_VECTOR)
2794 rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS;
2795 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
2797 else if (TARGET_VSX)
2798 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
2800 if (TARGET_STFIWX)
2801 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
2803 if (TARGET_LFIWZX)
2804 rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */
2806 /* Set up the reload helper and direct move functions. */
2807 if (TARGET_VSX || TARGET_ALTIVEC)
2809 if (TARGET_64BIT)
2811 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
2812 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
2813 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
2814 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
2815 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
2816 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
2817 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
2818 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
2819 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
2820 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
2821 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
2822 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
2823 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
2824 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
2825 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
2826 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
2827 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
2828 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
2829 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
2830 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
2831 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
2832 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
2834 if (TARGET_VSX_TIMODE)
2836 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
2837 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
2840 if (TARGET_DIRECT_MOVE)
2842 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
2843 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
2844 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
2845 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
2846 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
2847 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
2848 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
2849 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
2850 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
2852 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
2853 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
2854 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
2855 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
2856 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
2857 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
2858 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
2859 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
2860 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
2863 else
2865 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
2866 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
2867 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
2868 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
2869 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
2870 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
2871 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
2872 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
2873 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
2874 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
2875 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
2876 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
2877 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
2878 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
2879 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
2880 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
2881 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
2882 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
2883 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
2884 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
2885 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
2886 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
2888 if (TARGET_VSX_TIMODE)
2890 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
2891 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
2894 if (TARGET_DIRECT_MOVE)
2896 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
2897 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
2898 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
2902 if (TARGET_UPPER_REGS_DF)
2903 reg_addr[DFmode].scalar_in_vmx_p = true;
2905 if (TARGET_UPPER_REGS_SF)
2906 reg_addr[SFmode].scalar_in_vmx_p = true;
2909 /* Precalculate HARD_REGNO_NREGS. */
2910 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
2911 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2912 rs6000_hard_regno_nregs[m][r]
2913 = rs6000_hard_regno_nregs_internal (r, (machine_mode)m);
2915 /* Precalculate HARD_REGNO_MODE_OK. */
2916 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
2917 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2918 if (rs6000_hard_regno_mode_ok (r, (machine_mode)m))
2919 rs6000_hard_regno_mode_ok_p[m][r] = true;
2921 /* Precalculate CLASS_MAX_NREGS sizes. */
2922 for (c = 0; c < LIM_REG_CLASSES; ++c)
2924 int reg_size;
2926 if (TARGET_VSX && VSX_REG_CLASS_P (c))
2927 reg_size = UNITS_PER_VSX_WORD;
2929 else if (c == ALTIVEC_REGS)
2930 reg_size = UNITS_PER_ALTIVEC_WORD;
2932 else if (c == FLOAT_REGS)
2933 reg_size = UNITS_PER_FP_WORD;
2935 else
2936 reg_size = UNITS_PER_WORD;
2938 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2940 machine_mode m2 = (machine_mode)m;
2941 int reg_size2 = reg_size;
2943 /* TFmode/TDmode always takes 2 registers, even in VSX. */
2944 if (TARGET_VSX && VSX_REG_CLASS_P (c)
2945 && (m == TDmode || m == TFmode))
2946 reg_size2 = UNITS_PER_FP_WORD;
2948 rs6000_class_max_nregs[m][c]
2949 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
2953 if (TARGET_E500_DOUBLE)
2954 rs6000_class_max_nregs[DFmode][GENERAL_REGS] = 1;
2956 /* Calculate which modes to automatically generate code to use a the
2957 reciprocal divide and square root instructions. In the future, possibly
2958 automatically generate the instructions even if the user did not specify
2959 -mrecip. The older machines double precision reciprocal sqrt estimate is
2960 not accurate enough. */
2961 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
2962 if (TARGET_FRES)
2963 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
2964 if (TARGET_FRE)
2965 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
2966 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
2967 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
2968 if (VECTOR_UNIT_VSX_P (V2DFmode))
2969 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
2971 if (TARGET_FRSQRTES)
2972 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
2973 if (TARGET_FRSQRTE)
2974 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
2975 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
2976 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
2977 if (VECTOR_UNIT_VSX_P (V2DFmode))
2978 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
2980 if (rs6000_recip_control)
2982 if (!flag_finite_math_only)
2983 warning (0, "-mrecip requires -ffinite-math or -ffast-math");
2984 if (flag_trapping_math)
2985 warning (0, "-mrecip requires -fno-trapping-math or -ffast-math");
2986 if (!flag_reciprocal_math)
2987 warning (0, "-mrecip requires -freciprocal-math or -ffast-math");
2988 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
2990 if (RS6000_RECIP_HAVE_RE_P (SFmode)
2991 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
2992 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
2994 if (RS6000_RECIP_HAVE_RE_P (DFmode)
2995 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
2996 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
2998 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
2999 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3000 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3002 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3003 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3004 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3006 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3007 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3008 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3010 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3011 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3012 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3014 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3015 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3016 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3018 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3019 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3020 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3024 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3025 legitimate address support to figure out the appropriate addressing to
3026 use. */
3027 rs6000_setup_reg_addr_masks ();
3029 if (global_init_p || TARGET_DEBUG_TARGET)
3031 if (TARGET_DEBUG_REG)
3032 rs6000_debug_reg_global ();
3034 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3035 fprintf (stderr,
3036 "SImode variable mult cost = %d\n"
3037 "SImode constant mult cost = %d\n"
3038 "SImode short constant mult cost = %d\n"
3039 "DImode multipliciation cost = %d\n"
3040 "SImode division cost = %d\n"
3041 "DImode division cost = %d\n"
3042 "Simple fp operation cost = %d\n"
3043 "DFmode multiplication cost = %d\n"
3044 "SFmode division cost = %d\n"
3045 "DFmode division cost = %d\n"
3046 "cache line size = %d\n"
3047 "l1 cache size = %d\n"
3048 "l2 cache size = %d\n"
3049 "simultaneous prefetches = %d\n"
3050 "\n",
3051 rs6000_cost->mulsi,
3052 rs6000_cost->mulsi_const,
3053 rs6000_cost->mulsi_const9,
3054 rs6000_cost->muldi,
3055 rs6000_cost->divsi,
3056 rs6000_cost->divdi,
3057 rs6000_cost->fp,
3058 rs6000_cost->dmul,
3059 rs6000_cost->sdiv,
3060 rs6000_cost->ddiv,
3061 rs6000_cost->cache_line_size,
3062 rs6000_cost->l1_cache_size,
3063 rs6000_cost->l2_cache_size,
3064 rs6000_cost->simultaneous_prefetches);
3068 #if TARGET_MACHO
3069 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3071 static void
3072 darwin_rs6000_override_options (void)
3074 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3075 off. */
3076 rs6000_altivec_abi = 1;
3077 TARGET_ALTIVEC_VRSAVE = 1;
3078 rs6000_current_abi = ABI_DARWIN;
3080 if (DEFAULT_ABI == ABI_DARWIN
3081 && TARGET_64BIT)
3082 darwin_one_byte_bool = 1;
3084 if (TARGET_64BIT && ! TARGET_POWERPC64)
3086 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3087 warning (0, "-m64 requires PowerPC64 architecture, enabling");
3089 if (flag_mkernel)
3091 rs6000_default_long_calls = 1;
3092 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3095 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3096 Altivec. */
3097 if (!flag_mkernel && !flag_apple_kext
3098 && TARGET_64BIT
3099 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3100 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3102 /* Unless the user (not the configurer) has explicitly overridden
3103 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3104 G4 unless targeting the kernel. */
3105 if (!flag_mkernel
3106 && !flag_apple_kext
3107 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3108 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3109 && ! global_options_set.x_rs6000_cpu_index)
3111 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3114 #endif
3116 /* If not otherwise specified by a target, make 'long double' equivalent to
3117 'double'. */
3119 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3120 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3121 #endif
3123 /* Return the builtin mask of the various options used that could affect which
3124 builtins were used. In the past we used target_flags, but we've run out of
3125 bits, and some options like SPE and PAIRED are no longer in
3126 target_flags. */
3128 HOST_WIDE_INT
3129 rs6000_builtin_mask_calculate (void)
3131 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3132 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3133 | ((TARGET_SPE) ? RS6000_BTM_SPE : 0)
3134 | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0)
3135 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3136 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3137 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3138 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3139 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3140 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3141 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3142 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3143 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3144 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3145 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3146 | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0));
3149 /* Override command line options. Mostly we process the processor type and
3150 sometimes adjust other TARGET_ options. */
3152 static bool
3153 rs6000_option_override_internal (bool global_init_p)
3155 bool ret = true;
3156 bool have_cpu = false;
3158 /* The default cpu requested at configure time, if any. */
3159 const char *implicit_cpu = OPTION_TARGET_CPU_DEFAULT;
3161 HOST_WIDE_INT set_masks;
3162 int cpu_index;
3163 int tune_index;
3164 struct cl_target_option *main_target_opt
3165 = ((global_init_p || target_option_default_node == NULL)
3166 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3168 /* Remember the explicit arguments. */
3169 if (global_init_p)
3170 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3172 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3173 library functions, so warn about it. The flag may be useful for
3174 performance studies from time to time though, so don't disable it
3175 entirely. */
3176 if (global_options_set.x_rs6000_alignment_flags
3177 && rs6000_alignment_flags == MASK_ALIGN_POWER
3178 && DEFAULT_ABI == ABI_DARWIN
3179 && TARGET_64BIT)
3180 warning (0, "-malign-power is not supported for 64-bit Darwin;"
3181 " it is incompatible with the installed C and C++ libraries");
3183 /* Numerous experiment shows that IRA based loop pressure
3184 calculation works better for RTL loop invariant motion on targets
3185 with enough (>= 32) registers. It is an expensive optimization.
3186 So it is on only for peak performance. */
3187 if (optimize >= 3 && global_init_p
3188 && !global_options_set.x_flag_ira_loop_pressure)
3189 flag_ira_loop_pressure = 1;
3191 /* Set the pointer size. */
3192 if (TARGET_64BIT)
3194 rs6000_pmode = (int)DImode;
3195 rs6000_pointer_size = 64;
3197 else
3199 rs6000_pmode = (int)SImode;
3200 rs6000_pointer_size = 32;
3203 /* Some OSs don't support saving the high part of 64-bit registers on context
3204 switch. Other OSs don't support saving Altivec registers. On those OSs,
3205 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3206 if the user wants either, the user must explicitly specify them and we
3207 won't interfere with the user's specification. */
3209 set_masks = POWERPC_MASKS;
3210 #ifdef OS_MISSING_POWERPC64
3211 if (OS_MISSING_POWERPC64)
3212 set_masks &= ~OPTION_MASK_POWERPC64;
3213 #endif
3214 #ifdef OS_MISSING_ALTIVEC
3215 if (OS_MISSING_ALTIVEC)
3216 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX);
3217 #endif
3219 /* Don't override by the processor default if given explicitly. */
3220 set_masks &= ~rs6000_isa_flags_explicit;
3222 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3223 the cpu in a target attribute or pragma, but did not specify a tuning
3224 option, use the cpu for the tuning option rather than the option specified
3225 with -mtune on the command line. Process a '--with-cpu' configuration
3226 request as an implicit --cpu. */
3227 if (rs6000_cpu_index >= 0)
3229 cpu_index = rs6000_cpu_index;
3230 have_cpu = true;
3232 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3234 rs6000_cpu_index = cpu_index = main_target_opt->x_rs6000_cpu_index;
3235 have_cpu = true;
3237 else if (implicit_cpu)
3239 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (implicit_cpu);
3240 have_cpu = true;
3242 else
3244 const char *default_cpu = (TARGET_POWERPC64 ? "powerpc64" : "powerpc");
3245 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
3246 have_cpu = false;
3249 gcc_assert (cpu_index >= 0);
3251 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3252 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3253 with those from the cpu, except for options that were explicitly set. If
3254 we don't have a cpu, do not override the target bits set in
3255 TARGET_DEFAULT. */
3256 if (have_cpu)
3258 rs6000_isa_flags &= ~set_masks;
3259 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3260 & set_masks);
3262 else
3263 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3264 & ~rs6000_isa_flags_explicit);
3266 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3267 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3268 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3269 to using rs6000_isa_flags, we need to do the initialization here. */
3270 if (!have_cpu)
3271 rs6000_isa_flags |= (TARGET_DEFAULT & ~rs6000_isa_flags_explicit);
3273 if (rs6000_tune_index >= 0)
3274 tune_index = rs6000_tune_index;
3275 else if (have_cpu)
3276 rs6000_tune_index = tune_index = cpu_index;
3277 else
3279 size_t i;
3280 enum processor_type tune_proc
3281 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3283 tune_index = -1;
3284 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3285 if (processor_target_table[i].processor == tune_proc)
3287 rs6000_tune_index = tune_index = i;
3288 break;
3292 gcc_assert (tune_index >= 0);
3293 rs6000_cpu = processor_target_table[tune_index].processor;
3295 /* Pick defaults for SPE related control flags. Do this early to make sure
3296 that the TARGET_ macros are representative ASAP. */
3298 int spe_capable_cpu =
3299 (rs6000_cpu == PROCESSOR_PPC8540
3300 || rs6000_cpu == PROCESSOR_PPC8548);
3302 if (!global_options_set.x_rs6000_spe_abi)
3303 rs6000_spe_abi = spe_capable_cpu;
3305 if (!global_options_set.x_rs6000_spe)
3306 rs6000_spe = spe_capable_cpu;
3308 if (!global_options_set.x_rs6000_float_gprs)
3309 rs6000_float_gprs =
3310 (rs6000_cpu == PROCESSOR_PPC8540 ? 1
3311 : rs6000_cpu == PROCESSOR_PPC8548 ? 2
3312 : 0);
3315 if (global_options_set.x_rs6000_spe_abi
3316 && rs6000_spe_abi
3317 && !TARGET_SPE_ABI)
3318 error ("not configured for SPE ABI");
3320 if (global_options_set.x_rs6000_spe
3321 && rs6000_spe
3322 && !TARGET_SPE)
3323 error ("not configured for SPE instruction set");
3325 if (main_target_opt != NULL
3326 && ((main_target_opt->x_rs6000_spe_abi != rs6000_spe_abi)
3327 || (main_target_opt->x_rs6000_spe != rs6000_spe)
3328 || (main_target_opt->x_rs6000_float_gprs != rs6000_float_gprs)))
3329 error ("target attribute or pragma changes SPE ABI");
3331 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3332 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3333 || rs6000_cpu == PROCESSOR_PPCE5500)
3335 if (TARGET_ALTIVEC)
3336 error ("AltiVec not supported in this target");
3337 if (TARGET_SPE)
3338 error ("SPE not supported in this target");
3340 if (rs6000_cpu == PROCESSOR_PPCE6500)
3342 if (TARGET_SPE)
3343 error ("SPE not supported in this target");
3346 /* Disable Cell microcode if we are optimizing for the Cell
3347 and not optimizing for size. */
3348 if (rs6000_gen_cell_microcode == -1)
3349 rs6000_gen_cell_microcode = !(rs6000_cpu == PROCESSOR_CELL
3350 && !optimize_size);
3352 /* If we are optimizing big endian systems for space and it's OK to
3353 use instructions that would be microcoded on the Cell, use the
3354 load/store multiple and string instructions. */
3355 if (BYTES_BIG_ENDIAN && optimize_size && rs6000_gen_cell_microcode)
3356 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & (OPTION_MASK_MULTIPLE
3357 | OPTION_MASK_STRING);
3359 /* Don't allow -mmultiple or -mstring on little endian systems
3360 unless the cpu is a 750, because the hardware doesn't support the
3361 instructions used in little endian mode, and causes an alignment
3362 trap. The 750 does not cause an alignment trap (except when the
3363 target is unaligned). */
3365 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750)
3367 if (TARGET_MULTIPLE)
3369 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3370 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3371 warning (0, "-mmultiple is not supported on little endian systems");
3374 if (TARGET_STRING)
3376 rs6000_isa_flags &= ~OPTION_MASK_STRING;
3377 if ((rs6000_isa_flags_explicit & OPTION_MASK_STRING) != 0)
3378 warning (0, "-mstring is not supported on little endian systems");
3382 /* If little-endian, default to -mstrict-align on older processors.
3383 Testing for htm matches power8 and later. */
3384 if (!BYTES_BIG_ENDIAN
3385 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
3386 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3388 /* -maltivec={le,be} implies -maltivec. */
3389 if (rs6000_altivec_element_order != 0)
3390 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3392 /* Disallow -maltivec=le in big endian mode for now. This is not
3393 known to be useful for anyone. */
3394 if (BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 1)
3396 warning (0, N_("-maltivec=le not allowed for big-endian targets"));
3397 rs6000_altivec_element_order = 0;
3400 /* Add some warnings for VSX. */
3401 if (TARGET_VSX)
3403 const char *msg = NULL;
3404 if (!TARGET_HARD_FLOAT || !TARGET_FPRS
3405 || !TARGET_SINGLE_FLOAT || !TARGET_DOUBLE_FLOAT)
3407 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3408 msg = N_("-mvsx requires hardware floating point");
3409 else
3411 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3412 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3415 else if (TARGET_PAIRED_FLOAT)
3416 msg = N_("-mvsx and -mpaired are incompatible");
3417 else if (TARGET_AVOID_XFORM > 0)
3418 msg = N_("-mvsx needs indexed addressing");
3419 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3420 & OPTION_MASK_ALTIVEC))
3422 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3423 msg = N_("-mvsx and -mno-altivec are incompatible");
3424 else
3425 msg = N_("-mno-altivec disables vsx");
3428 if (msg)
3430 warning (0, msg);
3431 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3432 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3436 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3437 the -mcpu setting to enable options that conflict. */
3438 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3439 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3440 | OPTION_MASK_ALTIVEC
3441 | OPTION_MASK_VSX)) != 0)
3442 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3443 | OPTION_MASK_DIRECT_MOVE)
3444 & ~rs6000_isa_flags_explicit);
3446 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3447 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3449 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3450 unless the user explicitly used the -mno-<option> to disable the code. */
3451 if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3452 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3453 else if (TARGET_VSX)
3454 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3455 else if (TARGET_POPCNTD)
3456 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
3457 else if (TARGET_DFP)
3458 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3459 else if (TARGET_CMPB)
3460 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
3461 else if (TARGET_FPRND)
3462 rs6000_isa_flags |= (ISA_2_4_MASKS & ~rs6000_isa_flags_explicit);
3463 else if (TARGET_POPCNTB)
3464 rs6000_isa_flags |= (ISA_2_2_MASKS & ~rs6000_isa_flags_explicit);
3465 else if (TARGET_ALTIVEC)
3466 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~rs6000_isa_flags_explicit);
3468 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3470 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3471 error ("-mcrypto requires -maltivec");
3472 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3475 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3477 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3478 error ("-mdirect-move requires -mvsx");
3479 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3482 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3484 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3485 error ("-mpower8-vector requires -maltivec");
3486 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3489 if (TARGET_P8_VECTOR && !TARGET_VSX)
3491 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3492 error ("-mpower8-vector requires -mvsx");
3493 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3496 if (TARGET_VSX_TIMODE && !TARGET_VSX)
3498 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
3499 error ("-mvsx-timode requires -mvsx");
3500 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
3503 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3505 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3506 error ("-mhard-dfp requires -mhard-float");
3507 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3510 /* Allow an explicit -mupper-regs to set both -mupper-regs-df and
3511 -mupper-regs-sf, depending on the cpu, unless the user explicitly also set
3512 the individual option. */
3513 if (TARGET_UPPER_REGS > 0)
3515 if (TARGET_VSX
3516 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
3518 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DF;
3519 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
3521 if (TARGET_P8_VECTOR
3522 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
3524 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_SF;
3525 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
3528 else if (TARGET_UPPER_REGS == 0)
3530 if (TARGET_VSX
3531 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
3533 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
3534 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
3536 if (TARGET_P8_VECTOR
3537 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
3539 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
3540 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
3544 if (TARGET_UPPER_REGS_DF && !TARGET_VSX)
3546 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
3547 error ("-mupper-regs-df requires -mvsx");
3548 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
3551 if (TARGET_UPPER_REGS_SF && !TARGET_P8_VECTOR)
3553 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
3554 error ("-mupper-regs-sf requires -mpower8-vector");
3555 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
3558 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3559 silently turn off quad memory mode. */
3560 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
3562 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3563 warning (0, N_("-mquad-memory requires 64-bit mode"));
3565 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
3566 warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
3568 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
3569 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
3572 /* Non-atomic quad memory load/store are disabled for little endian, since
3573 the words are reversed, but atomic operations can still be done by
3574 swapping the words. */
3575 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
3577 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3578 warning (0, N_("-mquad-memory is not available in little endian mode"));
3580 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
3583 /* Assume if the user asked for normal quad memory instructions, they want
3584 the atomic versions as well, unless they explicity told us not to use quad
3585 word atomic instructions. */
3586 if (TARGET_QUAD_MEMORY
3587 && !TARGET_QUAD_MEMORY_ATOMIC
3588 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
3589 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
3591 /* Enable power8 fusion if we are tuning for power8, even if we aren't
3592 generating power8 instructions. */
3593 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
3594 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
3595 & OPTION_MASK_P8_FUSION);
3597 /* Power8 does not fuse sign extended loads with the addis. If we are
3598 optimizing at high levels for speed, convert a sign extended load into a
3599 zero extending load, and an explicit sign extension. */
3600 if (TARGET_P8_FUSION
3601 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
3602 && optimize_function_for_speed_p (cfun)
3603 && optimize >= 3)
3604 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
3606 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3607 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
3609 /* E500mc does "better" if we inline more aggressively. Respect the
3610 user's opinion, though. */
3611 if (rs6000_block_move_inline_limit == 0
3612 && (rs6000_cpu == PROCESSOR_PPCE500MC
3613 || rs6000_cpu == PROCESSOR_PPCE500MC64
3614 || rs6000_cpu == PROCESSOR_PPCE5500
3615 || rs6000_cpu == PROCESSOR_PPCE6500))
3616 rs6000_block_move_inline_limit = 128;
3618 /* store_one_arg depends on expand_block_move to handle at least the
3619 size of reg_parm_stack_space. */
3620 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
3621 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
3623 if (global_init_p)
3625 /* If the appropriate debug option is enabled, replace the target hooks
3626 with debug versions that call the real version and then prints
3627 debugging information. */
3628 if (TARGET_DEBUG_COST)
3630 targetm.rtx_costs = rs6000_debug_rtx_costs;
3631 targetm.address_cost = rs6000_debug_address_cost;
3632 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
3635 if (TARGET_DEBUG_ADDR)
3637 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
3638 targetm.legitimize_address = rs6000_debug_legitimize_address;
3639 rs6000_secondary_reload_class_ptr
3640 = rs6000_debug_secondary_reload_class;
3641 rs6000_secondary_memory_needed_ptr
3642 = rs6000_debug_secondary_memory_needed;
3643 rs6000_cannot_change_mode_class_ptr
3644 = rs6000_debug_cannot_change_mode_class;
3645 rs6000_preferred_reload_class_ptr
3646 = rs6000_debug_preferred_reload_class;
3647 rs6000_legitimize_reload_address_ptr
3648 = rs6000_debug_legitimize_reload_address;
3649 rs6000_mode_dependent_address_ptr
3650 = rs6000_debug_mode_dependent_address;
3653 if (rs6000_veclibabi_name)
3655 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
3656 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
3657 else
3659 error ("unknown vectorization library ABI type (%s) for "
3660 "-mveclibabi= switch", rs6000_veclibabi_name);
3661 ret = false;
3666 if (!global_options_set.x_rs6000_long_double_type_size)
3668 if (main_target_opt != NULL
3669 && (main_target_opt->x_rs6000_long_double_type_size
3670 != RS6000_DEFAULT_LONG_DOUBLE_SIZE))
3671 error ("target attribute or pragma changes long double size");
3672 else
3673 rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE;
3676 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
3677 if (!global_options_set.x_rs6000_ieeequad)
3678 rs6000_ieeequad = 1;
3679 #endif
3681 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
3682 target attribute or pragma which automatically enables both options,
3683 unless the altivec ABI was set. This is set by default for 64-bit, but
3684 not for 32-bit. */
3685 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
3686 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC)
3687 & ~rs6000_isa_flags_explicit);
3689 /* Enable Altivec ABI for AIX -maltivec. */
3690 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
3692 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
3693 error ("target attribute or pragma changes AltiVec ABI");
3694 else
3695 rs6000_altivec_abi = 1;
3698 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
3699 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
3700 be explicitly overridden in either case. */
3701 if (TARGET_ELF)
3703 if (!global_options_set.x_rs6000_altivec_abi
3704 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
3706 if (main_target_opt != NULL &&
3707 !main_target_opt->x_rs6000_altivec_abi)
3708 error ("target attribute or pragma changes AltiVec ABI");
3709 else
3710 rs6000_altivec_abi = 1;
3714 /* Set the Darwin64 ABI as default for 64-bit Darwin.
3715 So far, the only darwin64 targets are also MACH-O. */
3716 if (TARGET_MACHO
3717 && DEFAULT_ABI == ABI_DARWIN
3718 && TARGET_64BIT)
3720 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
3721 error ("target attribute or pragma changes darwin64 ABI");
3722 else
3724 rs6000_darwin64_abi = 1;
3725 /* Default to natural alignment, for better performance. */
3726 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
3730 /* Place FP constants in the constant pool instead of TOC
3731 if section anchors enabled. */
3732 if (flag_section_anchors
3733 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
3734 TARGET_NO_FP_IN_TOC = 1;
3736 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3737 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
3739 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3740 SUBTARGET_OVERRIDE_OPTIONS;
3741 #endif
3742 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3743 SUBSUBTARGET_OVERRIDE_OPTIONS;
3744 #endif
3745 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
3746 SUB3TARGET_OVERRIDE_OPTIONS;
3747 #endif
3749 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3750 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
3752 /* For the E500 family of cores, reset the single/double FP flags to let us
3753 check that they remain constant across attributes or pragmas. Also,
3754 clear a possible request for string instructions, not supported and which
3755 we might have silently queried above for -Os.
3757 For other families, clear ISEL in case it was set implicitly.
3760 switch (rs6000_cpu)
3762 case PROCESSOR_PPC8540:
3763 case PROCESSOR_PPC8548:
3764 case PROCESSOR_PPCE500MC:
3765 case PROCESSOR_PPCE500MC64:
3766 case PROCESSOR_PPCE5500:
3767 case PROCESSOR_PPCE6500:
3769 rs6000_single_float = TARGET_E500_SINGLE || TARGET_E500_DOUBLE;
3770 rs6000_double_float = TARGET_E500_DOUBLE;
3772 rs6000_isa_flags &= ~OPTION_MASK_STRING;
3774 break;
3776 default:
3778 if (have_cpu && !(rs6000_isa_flags_explicit & OPTION_MASK_ISEL))
3779 rs6000_isa_flags &= ~OPTION_MASK_ISEL;
3781 break;
3784 if (main_target_opt)
3786 if (main_target_opt->x_rs6000_single_float != rs6000_single_float)
3787 error ("target attribute or pragma changes single precision floating "
3788 "point");
3789 if (main_target_opt->x_rs6000_double_float != rs6000_double_float)
3790 error ("target attribute or pragma changes double precision floating "
3791 "point");
3794 /* Detect invalid option combinations with E500. */
3795 CHECK_E500_OPTIONS;
3797 rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4
3798 && rs6000_cpu != PROCESSOR_POWER5
3799 && rs6000_cpu != PROCESSOR_POWER6
3800 && rs6000_cpu != PROCESSOR_POWER7
3801 && rs6000_cpu != PROCESSOR_POWER8
3802 && rs6000_cpu != PROCESSOR_PPCA2
3803 && rs6000_cpu != PROCESSOR_CELL
3804 && rs6000_cpu != PROCESSOR_PPC476);
3805 rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
3806 || rs6000_cpu == PROCESSOR_POWER5
3807 || rs6000_cpu == PROCESSOR_POWER7
3808 || rs6000_cpu == PROCESSOR_POWER8);
3809 rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
3810 || rs6000_cpu == PROCESSOR_POWER5
3811 || rs6000_cpu == PROCESSOR_POWER6
3812 || rs6000_cpu == PROCESSOR_POWER7
3813 || rs6000_cpu == PROCESSOR_POWER8
3814 || rs6000_cpu == PROCESSOR_PPCE500MC
3815 || rs6000_cpu == PROCESSOR_PPCE500MC64
3816 || rs6000_cpu == PROCESSOR_PPCE5500
3817 || rs6000_cpu == PROCESSOR_PPCE6500);
3819 /* Allow debug switches to override the above settings. These are set to -1
3820 in rs6000.opt to indicate the user hasn't directly set the switch. */
3821 if (TARGET_ALWAYS_HINT >= 0)
3822 rs6000_always_hint = TARGET_ALWAYS_HINT;
3824 if (TARGET_SCHED_GROUPS >= 0)
3825 rs6000_sched_groups = TARGET_SCHED_GROUPS;
3827 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
3828 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
3830 rs6000_sched_restricted_insns_priority
3831 = (rs6000_sched_groups ? 1 : 0);
3833 /* Handle -msched-costly-dep option. */
3834 rs6000_sched_costly_dep
3835 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
3837 if (rs6000_sched_costly_dep_str)
3839 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
3840 rs6000_sched_costly_dep = no_dep_costly;
3841 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
3842 rs6000_sched_costly_dep = all_deps_costly;
3843 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
3844 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
3845 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
3846 rs6000_sched_costly_dep = store_to_load_dep_costly;
3847 else
3848 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
3849 atoi (rs6000_sched_costly_dep_str));
3852 /* Handle -minsert-sched-nops option. */
3853 rs6000_sched_insert_nops
3854 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
3856 if (rs6000_sched_insert_nops_str)
3858 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
3859 rs6000_sched_insert_nops = sched_finish_none;
3860 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
3861 rs6000_sched_insert_nops = sched_finish_pad_groups;
3862 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
3863 rs6000_sched_insert_nops = sched_finish_regroup_exact;
3864 else
3865 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
3866 atoi (rs6000_sched_insert_nops_str));
3869 if (global_init_p)
3871 #ifdef TARGET_REGNAMES
3872 /* If the user desires alternate register names, copy in the
3873 alternate names now. */
3874 if (TARGET_REGNAMES)
3875 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
3876 #endif
3878 /* Set aix_struct_return last, after the ABI is determined.
3879 If -maix-struct-return or -msvr4-struct-return was explicitly
3880 used, don't override with the ABI default. */
3881 if (!global_options_set.x_aix_struct_return)
3882 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
3884 #if 0
3885 /* IBM XL compiler defaults to unsigned bitfields. */
3886 if (TARGET_XL_COMPAT)
3887 flag_signed_bitfields = 0;
3888 #endif
3890 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
3891 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
3893 if (TARGET_TOC)
3894 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
3896 /* We can only guarantee the availability of DI pseudo-ops when
3897 assembling for 64-bit targets. */
3898 if (!TARGET_64BIT)
3900 targetm.asm_out.aligned_op.di = NULL;
3901 targetm.asm_out.unaligned_op.di = NULL;
3905 /* Set branch target alignment, if not optimizing for size. */
3906 if (!optimize_size)
3908 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
3909 aligned 8byte to avoid misprediction by the branch predictor. */
3910 if (rs6000_cpu == PROCESSOR_TITAN
3911 || rs6000_cpu == PROCESSOR_CELL)
3913 if (align_functions <= 0)
3914 align_functions = 8;
3915 if (align_jumps <= 0)
3916 align_jumps = 8;
3917 if (align_loops <= 0)
3918 align_loops = 8;
3920 if (rs6000_align_branch_targets)
3922 if (align_functions <= 0)
3923 align_functions = 16;
3924 if (align_jumps <= 0)
3925 align_jumps = 16;
3926 if (align_loops <= 0)
3928 can_override_loop_align = 1;
3929 align_loops = 16;
3932 if (align_jumps_max_skip <= 0)
3933 align_jumps_max_skip = 15;
3934 if (align_loops_max_skip <= 0)
3935 align_loops_max_skip = 15;
3938 /* Arrange to save and restore machine status around nested functions. */
3939 init_machine_status = rs6000_init_machine_status;
3941 /* We should always be splitting complex arguments, but we can't break
3942 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
3943 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
3944 targetm.calls.split_complex_arg = NULL;
3947 /* Initialize rs6000_cost with the appropriate target costs. */
3948 if (optimize_size)
3949 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
3950 else
3951 switch (rs6000_cpu)
3953 case PROCESSOR_RS64A:
3954 rs6000_cost = &rs64a_cost;
3955 break;
3957 case PROCESSOR_MPCCORE:
3958 rs6000_cost = &mpccore_cost;
3959 break;
3961 case PROCESSOR_PPC403:
3962 rs6000_cost = &ppc403_cost;
3963 break;
3965 case PROCESSOR_PPC405:
3966 rs6000_cost = &ppc405_cost;
3967 break;
3969 case PROCESSOR_PPC440:
3970 rs6000_cost = &ppc440_cost;
3971 break;
3973 case PROCESSOR_PPC476:
3974 rs6000_cost = &ppc476_cost;
3975 break;
3977 case PROCESSOR_PPC601:
3978 rs6000_cost = &ppc601_cost;
3979 break;
3981 case PROCESSOR_PPC603:
3982 rs6000_cost = &ppc603_cost;
3983 break;
3985 case PROCESSOR_PPC604:
3986 rs6000_cost = &ppc604_cost;
3987 break;
3989 case PROCESSOR_PPC604e:
3990 rs6000_cost = &ppc604e_cost;
3991 break;
3993 case PROCESSOR_PPC620:
3994 rs6000_cost = &ppc620_cost;
3995 break;
3997 case PROCESSOR_PPC630:
3998 rs6000_cost = &ppc630_cost;
3999 break;
4001 case PROCESSOR_CELL:
4002 rs6000_cost = &ppccell_cost;
4003 break;
4005 case PROCESSOR_PPC750:
4006 case PROCESSOR_PPC7400:
4007 rs6000_cost = &ppc750_cost;
4008 break;
4010 case PROCESSOR_PPC7450:
4011 rs6000_cost = &ppc7450_cost;
4012 break;
4014 case PROCESSOR_PPC8540:
4015 case PROCESSOR_PPC8548:
4016 rs6000_cost = &ppc8540_cost;
4017 break;
4019 case PROCESSOR_PPCE300C2:
4020 case PROCESSOR_PPCE300C3:
4021 rs6000_cost = &ppce300c2c3_cost;
4022 break;
4024 case PROCESSOR_PPCE500MC:
4025 rs6000_cost = &ppce500mc_cost;
4026 break;
4028 case PROCESSOR_PPCE500MC64:
4029 rs6000_cost = &ppce500mc64_cost;
4030 break;
4032 case PROCESSOR_PPCE5500:
4033 rs6000_cost = &ppce5500_cost;
4034 break;
4036 case PROCESSOR_PPCE6500:
4037 rs6000_cost = &ppce6500_cost;
4038 break;
4040 case PROCESSOR_TITAN:
4041 rs6000_cost = &titan_cost;
4042 break;
4044 case PROCESSOR_POWER4:
4045 case PROCESSOR_POWER5:
4046 rs6000_cost = &power4_cost;
4047 break;
4049 case PROCESSOR_POWER6:
4050 rs6000_cost = &power6_cost;
4051 break;
4053 case PROCESSOR_POWER7:
4054 rs6000_cost = &power7_cost;
4055 break;
4057 case PROCESSOR_POWER8:
4058 rs6000_cost = &power8_cost;
4059 break;
4061 case PROCESSOR_PPCA2:
4062 rs6000_cost = &ppca2_cost;
4063 break;
4065 default:
4066 gcc_unreachable ();
4069 if (global_init_p)
4071 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4072 rs6000_cost->simultaneous_prefetches,
4073 global_options.x_param_values,
4074 global_options_set.x_param_values);
4075 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
4076 global_options.x_param_values,
4077 global_options_set.x_param_values);
4078 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4079 rs6000_cost->cache_line_size,
4080 global_options.x_param_values,
4081 global_options_set.x_param_values);
4082 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
4083 global_options.x_param_values,
4084 global_options_set.x_param_values);
4086 /* Increase loop peeling limits based on performance analysis. */
4087 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
4088 global_options.x_param_values,
4089 global_options_set.x_param_values);
4090 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
4091 global_options.x_param_values,
4092 global_options_set.x_param_values);
4094 /* If using typedef char *va_list, signal that
4095 __builtin_va_start (&ap, 0) can be optimized to
4096 ap = __builtin_next_arg (0). */
4097 if (DEFAULT_ABI != ABI_V4)
4098 targetm.expand_builtin_va_start = NULL;
4101 /* Set up single/double float flags.
4102 If TARGET_HARD_FLOAT is set, but neither single or double is set,
4103 then set both flags. */
4104 if (TARGET_HARD_FLOAT && TARGET_FPRS
4105 && rs6000_single_float == 0 && rs6000_double_float == 0)
4106 rs6000_single_float = rs6000_double_float = 1;
4108 /* If not explicitly specified via option, decide whether to generate indexed
4109 load/store instructions. */
4110 if (TARGET_AVOID_XFORM == -1)
4111 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4112 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4113 need indexed accesses and the type used is the scalar type of the element
4114 being loaded or stored. */
4115 TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB
4116 && !TARGET_ALTIVEC);
4118 /* Set the -mrecip options. */
4119 if (rs6000_recip_name)
4121 char *p = ASTRDUP (rs6000_recip_name);
4122 char *q;
4123 unsigned int mask, i;
4124 bool invert;
4126 while ((q = strtok (p, ",")) != NULL)
4128 p = NULL;
4129 if (*q == '!')
4131 invert = true;
4132 q++;
4134 else
4135 invert = false;
4137 if (!strcmp (q, "default"))
4138 mask = ((TARGET_RECIP_PRECISION)
4139 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4140 else
4142 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4143 if (!strcmp (q, recip_options[i].string))
4145 mask = recip_options[i].mask;
4146 break;
4149 if (i == ARRAY_SIZE (recip_options))
4151 error ("unknown option for -mrecip=%s", q);
4152 invert = false;
4153 mask = 0;
4154 ret = false;
4158 if (invert)
4159 rs6000_recip_control &= ~mask;
4160 else
4161 rs6000_recip_control |= mask;
4165 /* Set the builtin mask of the various options used that could affect which
4166 builtins were used. In the past we used target_flags, but we've run out
4167 of bits, and some options like SPE and PAIRED are no longer in
4168 target_flags. */
4169 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4170 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4172 fprintf (stderr,
4173 "new builtin mask = " HOST_WIDE_INT_PRINT_HEX ", ",
4174 rs6000_builtin_mask);
4175 rs6000_print_builtin_options (stderr, 0, NULL, rs6000_builtin_mask);
4178 /* Initialize all of the registers. */
4179 rs6000_init_hard_regno_mode_ok (global_init_p);
4181 /* Save the initial options in case the user does function specific options */
4182 if (global_init_p)
4183 target_option_default_node = target_option_current_node
4184 = build_target_option_node (&global_options);
4186 /* If not explicitly specified via option, decide whether to generate the
4187 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4188 if (TARGET_LINK_STACK == -1)
4189 SET_TARGET_LINK_STACK (rs6000_cpu == PROCESSOR_PPC476 && flag_pic);
4191 return ret;
4194 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4195 define the target cpu type. */
4197 static void
4198 rs6000_option_override (void)
4200 (void) rs6000_option_override_internal (true);
4202 /* Register machine-specific passes. This needs to be done at start-up.
4203 It's convenient to do it here (like i386 does). */
4204 opt_pass *pass_analyze_swaps = make_pass_analyze_swaps (g);
4206 struct register_pass_info analyze_swaps_info
4207 = { pass_analyze_swaps, "cse1", 1, PASS_POS_INSERT_BEFORE };
4209 register_pass (&analyze_swaps_info);
4213 /* Implement targetm.vectorize.builtin_mask_for_load. */
4214 static tree
4215 rs6000_builtin_mask_for_load (void)
4217 if (TARGET_ALTIVEC || TARGET_VSX)
4218 return altivec_builtin_mask_for_load;
4219 else
4220 return 0;
4223 /* Implement LOOP_ALIGN. */
4225 rs6000_loop_align (rtx label)
4227 basic_block bb;
4228 int ninsns;
4230 /* Don't override loop alignment if -falign-loops was specified. */
4231 if (!can_override_loop_align)
4232 return align_loops_log;
4234 bb = BLOCK_FOR_INSN (label);
4235 ninsns = num_loop_insns(bb->loop_father);
4237 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4238 if (ninsns > 4 && ninsns <= 8
4239 && (rs6000_cpu == PROCESSOR_POWER4
4240 || rs6000_cpu == PROCESSOR_POWER5
4241 || rs6000_cpu == PROCESSOR_POWER6
4242 || rs6000_cpu == PROCESSOR_POWER7
4243 || rs6000_cpu == PROCESSOR_POWER8))
4244 return 5;
4245 else
4246 return align_loops_log;
4249 /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
4250 static int
4251 rs6000_loop_align_max_skip (rtx_insn *label)
4253 return (1 << rs6000_loop_align (label)) - 1;
4256 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4257 after applying N number of iterations. This routine does not determine
4258 how may iterations are required to reach desired alignment. */
4260 static bool
4261 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4263 if (is_packed)
4264 return false;
4266 if (TARGET_32BIT)
4268 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4269 return true;
4271 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4272 return true;
4274 return false;
4276 else
4278 if (TARGET_MACHO)
4279 return false;
4281 /* Assuming that all other types are naturally aligned. CHECKME! */
4282 return true;
4286 /* Return true if the vector misalignment factor is supported by the
4287 target. */
4288 static bool
4289 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4290 const_tree type,
4291 int misalignment,
4292 bool is_packed)
4294 if (TARGET_VSX)
4296 /* Return if movmisalign pattern is not supported for this mode. */
4297 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4298 return false;
4300 if (misalignment == -1)
4302 /* Misalignment factor is unknown at compile time but we know
4303 it's word aligned. */
4304 if (rs6000_vector_alignment_reachable (type, is_packed))
4306 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
4308 if (element_size == 64 || element_size == 32)
4309 return true;
4312 return false;
4315 /* VSX supports word-aligned vector. */
4316 if (misalignment % 4 == 0)
4317 return true;
4319 return false;
4322 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4323 static int
4324 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4325 tree vectype, int misalign)
4327 unsigned elements;
4328 tree elem_type;
4330 switch (type_of_cost)
4332 case scalar_stmt:
4333 case scalar_load:
4334 case scalar_store:
4335 case vector_stmt:
4336 case vector_load:
4337 case vector_store:
4338 case vec_to_scalar:
4339 case scalar_to_vec:
4340 case cond_branch_not_taken:
4341 return 1;
4343 case vec_perm:
4344 if (TARGET_VSX)
4345 return 3;
4346 else
4347 return 1;
4349 case vec_promote_demote:
4350 if (TARGET_VSX)
4351 return 4;
4352 else
4353 return 1;
4355 case cond_branch_taken:
4356 return 3;
4358 case unaligned_load:
4359 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4361 elements = TYPE_VECTOR_SUBPARTS (vectype);
4362 if (elements == 2)
4363 /* Double word aligned. */
4364 return 2;
4366 if (elements == 4)
4368 switch (misalign)
4370 case 8:
4371 /* Double word aligned. */
4372 return 2;
4374 case -1:
4375 /* Unknown misalignment. */
4376 case 4:
4377 case 12:
4378 /* Word aligned. */
4379 return 22;
4381 default:
4382 gcc_unreachable ();
4387 if (TARGET_ALTIVEC)
4388 /* Misaligned loads are not supported. */
4389 gcc_unreachable ();
4391 return 2;
4393 case unaligned_store:
4394 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4396 elements = TYPE_VECTOR_SUBPARTS (vectype);
4397 if (elements == 2)
4398 /* Double word aligned. */
4399 return 2;
4401 if (elements == 4)
4403 switch (misalign)
4405 case 8:
4406 /* Double word aligned. */
4407 return 2;
4409 case -1:
4410 /* Unknown misalignment. */
4411 case 4:
4412 case 12:
4413 /* Word aligned. */
4414 return 23;
4416 default:
4417 gcc_unreachable ();
4422 if (TARGET_ALTIVEC)
4423 /* Misaligned stores are not supported. */
4424 gcc_unreachable ();
4426 return 2;
4428 case vec_construct:
4429 elements = TYPE_VECTOR_SUBPARTS (vectype);
4430 elem_type = TREE_TYPE (vectype);
4431 /* 32-bit vectors loaded into registers are stored as double
4432 precision, so we need n/2 converts in addition to the usual
4433 n/2 merges to construct a vector of short floats from them. */
4434 if (SCALAR_FLOAT_TYPE_P (elem_type)
4435 && TYPE_PRECISION (elem_type) == 32)
4436 return elements + 1;
4437 else
4438 return elements / 2 + 1;
4440 default:
4441 gcc_unreachable ();
4445 /* Implement targetm.vectorize.preferred_simd_mode. */
4447 static machine_mode
4448 rs6000_preferred_simd_mode (machine_mode mode)
4450 if (TARGET_VSX)
4451 switch (mode)
4453 case DFmode:
4454 return V2DFmode;
4455 default:;
4457 if (TARGET_ALTIVEC || TARGET_VSX)
4458 switch (mode)
4460 case SFmode:
4461 return V4SFmode;
4462 case TImode:
4463 return V1TImode;
4464 case DImode:
4465 return V2DImode;
4466 case SImode:
4467 return V4SImode;
4468 case HImode:
4469 return V8HImode;
4470 case QImode:
4471 return V16QImode;
4472 default:;
4474 if (TARGET_SPE)
4475 switch (mode)
4477 case SFmode:
4478 return V2SFmode;
4479 case SImode:
4480 return V2SImode;
4481 default:;
4483 if (TARGET_PAIRED_FLOAT
4484 && mode == SFmode)
4485 return V2SFmode;
4486 return word_mode;
4489 typedef struct _rs6000_cost_data
4491 struct loop *loop_info;
4492 unsigned cost[3];
4493 } rs6000_cost_data;
4495 /* Test for likely overcommitment of vector hardware resources. If a
4496 loop iteration is relatively large, and too large a percentage of
4497 instructions in the loop are vectorized, the cost model may not
4498 adequately reflect delays from unavailable vector resources.
4499 Penalize the loop body cost for this case. */
4501 static void
4502 rs6000_density_test (rs6000_cost_data *data)
4504 const int DENSITY_PCT_THRESHOLD = 85;
4505 const int DENSITY_SIZE_THRESHOLD = 70;
4506 const int DENSITY_PENALTY = 10;
4507 struct loop *loop = data->loop_info;
4508 basic_block *bbs = get_loop_body (loop);
4509 int nbbs = loop->num_nodes;
4510 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
4511 int i, density_pct;
4513 for (i = 0; i < nbbs; i++)
4515 basic_block bb = bbs[i];
4516 gimple_stmt_iterator gsi;
4518 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4520 gimple stmt = gsi_stmt (gsi);
4521 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4523 if (!STMT_VINFO_RELEVANT_P (stmt_info)
4524 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
4525 not_vec_cost++;
4529 free (bbs);
4530 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
4532 if (density_pct > DENSITY_PCT_THRESHOLD
4533 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
4535 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
4536 if (dump_enabled_p ())
4537 dump_printf_loc (MSG_NOTE, vect_location,
4538 "density %d%%, cost %d exceeds threshold, penalizing "
4539 "loop body cost by %d%%", density_pct,
4540 vec_cost + not_vec_cost, DENSITY_PENALTY);
4544 /* Implement targetm.vectorize.init_cost. */
4546 static void *
4547 rs6000_init_cost (struct loop *loop_info)
4549 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
4550 data->loop_info = loop_info;
4551 data->cost[vect_prologue] = 0;
4552 data->cost[vect_body] = 0;
4553 data->cost[vect_epilogue] = 0;
4554 return data;
4557 /* Implement targetm.vectorize.add_stmt_cost. */
4559 static unsigned
4560 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
4561 struct _stmt_vec_info *stmt_info, int misalign,
4562 enum vect_cost_model_location where)
4564 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
4565 unsigned retval = 0;
4567 if (flag_vect_cost_model)
4569 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4570 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
4571 misalign);
4572 /* Statements in an inner loop relative to the loop being
4573 vectorized are weighted more heavily. The value here is
4574 arbitrary and could potentially be improved with analysis. */
4575 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4576 count *= 50; /* FIXME. */
4578 retval = (unsigned) (count * stmt_cost);
4579 cost_data->cost[where] += retval;
4582 return retval;
4585 /* Implement targetm.vectorize.finish_cost. */
4587 static void
4588 rs6000_finish_cost (void *data, unsigned *prologue_cost,
4589 unsigned *body_cost, unsigned *epilogue_cost)
4591 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
4593 if (cost_data->loop_info)
4594 rs6000_density_test (cost_data);
4596 *prologue_cost = cost_data->cost[vect_prologue];
4597 *body_cost = cost_data->cost[vect_body];
4598 *epilogue_cost = cost_data->cost[vect_epilogue];
4601 /* Implement targetm.vectorize.destroy_cost_data. */
4603 static void
4604 rs6000_destroy_cost_data (void *data)
4606 free (data);
4609 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
4610 library with vectorized intrinsics. */
4612 static tree
4613 rs6000_builtin_vectorized_libmass (tree fndecl, tree type_out, tree type_in)
4615 char name[32];
4616 const char *suffix = NULL;
4617 tree fntype, new_fndecl, bdecl = NULL_TREE;
4618 int n_args = 1;
4619 const char *bname;
4620 machine_mode el_mode, in_mode;
4621 int n, in_n;
4623 /* Libmass is suitable for unsafe math only as it does not correctly support
4624 parts of IEEE with the required precision such as denormals. Only support
4625 it if we have VSX to use the simd d2 or f4 functions.
4626 XXX: Add variable length support. */
4627 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
4628 return NULL_TREE;
4630 el_mode = TYPE_MODE (TREE_TYPE (type_out));
4631 n = TYPE_VECTOR_SUBPARTS (type_out);
4632 in_mode = TYPE_MODE (TREE_TYPE (type_in));
4633 in_n = TYPE_VECTOR_SUBPARTS (type_in);
4634 if (el_mode != in_mode
4635 || n != in_n)
4636 return NULL_TREE;
4638 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
4640 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
4641 switch (fn)
4643 case BUILT_IN_ATAN2:
4644 case BUILT_IN_HYPOT:
4645 case BUILT_IN_POW:
4646 n_args = 2;
4647 /* fall through */
4649 case BUILT_IN_ACOS:
4650 case BUILT_IN_ACOSH:
4651 case BUILT_IN_ASIN:
4652 case BUILT_IN_ASINH:
4653 case BUILT_IN_ATAN:
4654 case BUILT_IN_ATANH:
4655 case BUILT_IN_CBRT:
4656 case BUILT_IN_COS:
4657 case BUILT_IN_COSH:
4658 case BUILT_IN_ERF:
4659 case BUILT_IN_ERFC:
4660 case BUILT_IN_EXP2:
4661 case BUILT_IN_EXP:
4662 case BUILT_IN_EXPM1:
4663 case BUILT_IN_LGAMMA:
4664 case BUILT_IN_LOG10:
4665 case BUILT_IN_LOG1P:
4666 case BUILT_IN_LOG2:
4667 case BUILT_IN_LOG:
4668 case BUILT_IN_SIN:
4669 case BUILT_IN_SINH:
4670 case BUILT_IN_SQRT:
4671 case BUILT_IN_TAN:
4672 case BUILT_IN_TANH:
4673 bdecl = builtin_decl_implicit (fn);
4674 suffix = "d2"; /* pow -> powd2 */
4675 if (el_mode != DFmode
4676 || n != 2
4677 || !bdecl)
4678 return NULL_TREE;
4679 break;
4681 case BUILT_IN_ATAN2F:
4682 case BUILT_IN_HYPOTF:
4683 case BUILT_IN_POWF:
4684 n_args = 2;
4685 /* fall through */
4687 case BUILT_IN_ACOSF:
4688 case BUILT_IN_ACOSHF:
4689 case BUILT_IN_ASINF:
4690 case BUILT_IN_ASINHF:
4691 case BUILT_IN_ATANF:
4692 case BUILT_IN_ATANHF:
4693 case BUILT_IN_CBRTF:
4694 case BUILT_IN_COSF:
4695 case BUILT_IN_COSHF:
4696 case BUILT_IN_ERFF:
4697 case BUILT_IN_ERFCF:
4698 case BUILT_IN_EXP2F:
4699 case BUILT_IN_EXPF:
4700 case BUILT_IN_EXPM1F:
4701 case BUILT_IN_LGAMMAF:
4702 case BUILT_IN_LOG10F:
4703 case BUILT_IN_LOG1PF:
4704 case BUILT_IN_LOG2F:
4705 case BUILT_IN_LOGF:
4706 case BUILT_IN_SINF:
4707 case BUILT_IN_SINHF:
4708 case BUILT_IN_SQRTF:
4709 case BUILT_IN_TANF:
4710 case BUILT_IN_TANHF:
4711 bdecl = builtin_decl_implicit (fn);
4712 suffix = "4"; /* powf -> powf4 */
4713 if (el_mode != SFmode
4714 || n != 4
4715 || !bdecl)
4716 return NULL_TREE;
4717 break;
4719 default:
4720 return NULL_TREE;
4723 else
4724 return NULL_TREE;
4726 gcc_assert (suffix != NULL);
4727 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
4728 if (!bname)
4729 return NULL_TREE;
4731 strcpy (name, bname + sizeof ("__builtin_") - 1);
4732 strcat (name, suffix);
4734 if (n_args == 1)
4735 fntype = build_function_type_list (type_out, type_in, NULL);
4736 else if (n_args == 2)
4737 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
4738 else
4739 gcc_unreachable ();
4741 /* Build a function declaration for the vectorized function. */
4742 new_fndecl = build_decl (BUILTINS_LOCATION,
4743 FUNCTION_DECL, get_identifier (name), fntype);
4744 TREE_PUBLIC (new_fndecl) = 1;
4745 DECL_EXTERNAL (new_fndecl) = 1;
4746 DECL_IS_NOVOPS (new_fndecl) = 1;
4747 TREE_READONLY (new_fndecl) = 1;
4749 return new_fndecl;
4752 /* Returns a function decl for a vectorized version of the builtin function
4753 with builtin function code FN and the result vector type TYPE, or NULL_TREE
4754 if it is not available. */
4756 static tree
4757 rs6000_builtin_vectorized_function (tree fndecl, tree type_out,
4758 tree type_in)
4760 machine_mode in_mode, out_mode;
4761 int in_n, out_n;
4763 if (TARGET_DEBUG_BUILTIN)
4764 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
4765 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
4766 GET_MODE_NAME (TYPE_MODE (type_out)),
4767 GET_MODE_NAME (TYPE_MODE (type_in)));
4769 if (TREE_CODE (type_out) != VECTOR_TYPE
4770 || TREE_CODE (type_in) != VECTOR_TYPE
4771 || !TARGET_VECTORIZE_BUILTINS)
4772 return NULL_TREE;
4774 out_mode = TYPE_MODE (TREE_TYPE (type_out));
4775 out_n = TYPE_VECTOR_SUBPARTS (type_out);
4776 in_mode = TYPE_MODE (TREE_TYPE (type_in));
4777 in_n = TYPE_VECTOR_SUBPARTS (type_in);
4779 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
4781 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
4782 switch (fn)
4784 case BUILT_IN_CLZIMAX:
4785 case BUILT_IN_CLZLL:
4786 case BUILT_IN_CLZL:
4787 case BUILT_IN_CLZ:
4788 if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
4790 if (out_mode == QImode && out_n == 16)
4791 return rs6000_builtin_decls[P8V_BUILTIN_VCLZB];
4792 else if (out_mode == HImode && out_n == 8)
4793 return rs6000_builtin_decls[P8V_BUILTIN_VCLZH];
4794 else if (out_mode == SImode && out_n == 4)
4795 return rs6000_builtin_decls[P8V_BUILTIN_VCLZW];
4796 else if (out_mode == DImode && out_n == 2)
4797 return rs6000_builtin_decls[P8V_BUILTIN_VCLZD];
4799 break;
4800 case BUILT_IN_COPYSIGN:
4801 if (VECTOR_UNIT_VSX_P (V2DFmode)
4802 && out_mode == DFmode && out_n == 2
4803 && in_mode == DFmode && in_n == 2)
4804 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
4805 break;
4806 case BUILT_IN_COPYSIGNF:
4807 if (out_mode != SFmode || out_n != 4
4808 || in_mode != SFmode || in_n != 4)
4809 break;
4810 if (VECTOR_UNIT_VSX_P (V4SFmode))
4811 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
4812 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4813 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
4814 break;
4815 case BUILT_IN_POPCOUNTIMAX:
4816 case BUILT_IN_POPCOUNTLL:
4817 case BUILT_IN_POPCOUNTL:
4818 case BUILT_IN_POPCOUNT:
4819 if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
4821 if (out_mode == QImode && out_n == 16)
4822 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTB];
4823 else if (out_mode == HImode && out_n == 8)
4824 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTH];
4825 else if (out_mode == SImode && out_n == 4)
4826 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTW];
4827 else if (out_mode == DImode && out_n == 2)
4828 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTD];
4830 break;
4831 case BUILT_IN_SQRT:
4832 if (VECTOR_UNIT_VSX_P (V2DFmode)
4833 && out_mode == DFmode && out_n == 2
4834 && in_mode == DFmode && in_n == 2)
4835 return rs6000_builtin_decls[VSX_BUILTIN_XVSQRTDP];
4836 break;
4837 case BUILT_IN_SQRTF:
4838 if (VECTOR_UNIT_VSX_P (V4SFmode)
4839 && out_mode == SFmode && out_n == 4
4840 && in_mode == SFmode && in_n == 4)
4841 return rs6000_builtin_decls[VSX_BUILTIN_XVSQRTSP];
4842 break;
4843 case BUILT_IN_CEIL:
4844 if (VECTOR_UNIT_VSX_P (V2DFmode)
4845 && out_mode == DFmode && out_n == 2
4846 && in_mode == DFmode && in_n == 2)
4847 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
4848 break;
4849 case BUILT_IN_CEILF:
4850 if (out_mode != SFmode || out_n != 4
4851 || in_mode != SFmode || in_n != 4)
4852 break;
4853 if (VECTOR_UNIT_VSX_P (V4SFmode))
4854 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
4855 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4856 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
4857 break;
4858 case BUILT_IN_FLOOR:
4859 if (VECTOR_UNIT_VSX_P (V2DFmode)
4860 && out_mode == DFmode && out_n == 2
4861 && in_mode == DFmode && in_n == 2)
4862 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
4863 break;
4864 case BUILT_IN_FLOORF:
4865 if (out_mode != SFmode || out_n != 4
4866 || in_mode != SFmode || in_n != 4)
4867 break;
4868 if (VECTOR_UNIT_VSX_P (V4SFmode))
4869 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
4870 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4871 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
4872 break;
4873 case BUILT_IN_FMA:
4874 if (VECTOR_UNIT_VSX_P (V2DFmode)
4875 && out_mode == DFmode && out_n == 2
4876 && in_mode == DFmode && in_n == 2)
4877 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
4878 break;
4879 case BUILT_IN_FMAF:
4880 if (VECTOR_UNIT_VSX_P (V4SFmode)
4881 && out_mode == SFmode && out_n == 4
4882 && in_mode == SFmode && in_n == 4)
4883 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
4884 else if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
4885 && out_mode == SFmode && out_n == 4
4886 && in_mode == SFmode && in_n == 4)
4887 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
4888 break;
4889 case BUILT_IN_TRUNC:
4890 if (VECTOR_UNIT_VSX_P (V2DFmode)
4891 && out_mode == DFmode && out_n == 2
4892 && in_mode == DFmode && in_n == 2)
4893 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
4894 break;
4895 case BUILT_IN_TRUNCF:
4896 if (out_mode != SFmode || out_n != 4
4897 || in_mode != SFmode || in_n != 4)
4898 break;
4899 if (VECTOR_UNIT_VSX_P (V4SFmode))
4900 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
4901 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4902 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
4903 break;
4904 case BUILT_IN_NEARBYINT:
4905 if (VECTOR_UNIT_VSX_P (V2DFmode)
4906 && flag_unsafe_math_optimizations
4907 && out_mode == DFmode && out_n == 2
4908 && in_mode == DFmode && in_n == 2)
4909 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
4910 break;
4911 case BUILT_IN_NEARBYINTF:
4912 if (VECTOR_UNIT_VSX_P (V4SFmode)
4913 && flag_unsafe_math_optimizations
4914 && out_mode == SFmode && out_n == 4
4915 && in_mode == SFmode && in_n == 4)
4916 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
4917 break;
4918 case BUILT_IN_RINT:
4919 if (VECTOR_UNIT_VSX_P (V2DFmode)
4920 && !flag_trapping_math
4921 && out_mode == DFmode && out_n == 2
4922 && in_mode == DFmode && in_n == 2)
4923 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
4924 break;
4925 case BUILT_IN_RINTF:
4926 if (VECTOR_UNIT_VSX_P (V4SFmode)
4927 && !flag_trapping_math
4928 && out_mode == SFmode && out_n == 4
4929 && in_mode == SFmode && in_n == 4)
4930 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
4931 break;
4932 default:
4933 break;
4937 else if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
4939 enum rs6000_builtins fn
4940 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
4941 switch (fn)
4943 case RS6000_BUILTIN_RSQRTF:
4944 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
4945 && out_mode == SFmode && out_n == 4
4946 && in_mode == SFmode && in_n == 4)
4947 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
4948 break;
4949 case RS6000_BUILTIN_RSQRT:
4950 if (VECTOR_UNIT_VSX_P (V2DFmode)
4951 && out_mode == DFmode && out_n == 2
4952 && in_mode == DFmode && in_n == 2)
4953 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
4954 break;
4955 case RS6000_BUILTIN_RECIPF:
4956 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
4957 && out_mode == SFmode && out_n == 4
4958 && in_mode == SFmode && in_n == 4)
4959 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
4960 break;
4961 case RS6000_BUILTIN_RECIP:
4962 if (VECTOR_UNIT_VSX_P (V2DFmode)
4963 && out_mode == DFmode && out_n == 2
4964 && in_mode == DFmode && in_n == 2)
4965 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
4966 break;
4967 default:
4968 break;
4972 /* Generate calls to libmass if appropriate. */
4973 if (rs6000_veclib_handler)
4974 return rs6000_veclib_handler (fndecl, type_out, type_in);
4976 return NULL_TREE;
4979 /* Default CPU string for rs6000*_file_start functions. */
4980 static const char *rs6000_default_cpu;
4982 /* Do anything needed at the start of the asm file. */
4984 static void
4985 rs6000_file_start (void)
4987 char buffer[80];
4988 const char *start = buffer;
4989 FILE *file = asm_out_file;
4991 rs6000_default_cpu = TARGET_CPU_DEFAULT;
4993 default_file_start ();
4995 if (flag_verbose_asm)
4997 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
4999 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5001 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5002 start = "";
5005 if (global_options_set.x_rs6000_cpu_index)
5007 fprintf (file, "%s -mcpu=%s", start,
5008 processor_target_table[rs6000_cpu_index].name);
5009 start = "";
5012 if (global_options_set.x_rs6000_tune_index)
5014 fprintf (file, "%s -mtune=%s", start,
5015 processor_target_table[rs6000_tune_index].name);
5016 start = "";
5019 if (PPC405_ERRATUM77)
5021 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5022 start = "";
5025 #ifdef USING_ELFOS_H
5026 switch (rs6000_sdata)
5028 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5029 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5030 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5031 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5034 if (rs6000_sdata && g_switch_value)
5036 fprintf (file, "%s -G %d", start,
5037 g_switch_value);
5038 start = "";
5040 #endif
5042 if (*start == '\0')
5043 putc ('\n', file);
5046 if (DEFAULT_ABI == ABI_ELFv2)
5047 fprintf (file, "\t.abiversion 2\n");
5049 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2
5050 || (TARGET_ELF && flag_pic == 2))
5052 switch_to_section (toc_section);
5053 switch_to_section (text_section);
5058 /* Return nonzero if this function is known to have a null epilogue. */
5061 direct_return (void)
5063 if (reload_completed)
5065 rs6000_stack_t *info = rs6000_stack_info ();
5067 if (info->first_gp_reg_save == 32
5068 && info->first_fp_reg_save == 64
5069 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5070 && ! info->lr_save_p
5071 && ! info->cr_save_p
5072 && info->vrsave_mask == 0
5073 && ! info->push_p)
5074 return 1;
5077 return 0;
5080 /* Return the number of instructions it takes to form a constant in an
5081 integer register. */
5084 num_insns_constant_wide (HOST_WIDE_INT value)
5086 /* signed constant loadable with addi */
5087 if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
5088 return 1;
5090 /* constant loadable with addis */
5091 else if ((value & 0xffff) == 0
5092 && (value >> 31 == -1 || value >> 31 == 0))
5093 return 1;
5095 else if (TARGET_POWERPC64)
5097 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5098 HOST_WIDE_INT high = value >> 31;
5100 if (high == 0 || high == -1)
5101 return 2;
5103 high >>= 1;
5105 if (low == 0)
5106 return num_insns_constant_wide (high) + 1;
5107 else if (high == 0)
5108 return num_insns_constant_wide (low) + 1;
5109 else
5110 return (num_insns_constant_wide (high)
5111 + num_insns_constant_wide (low) + 1);
5114 else
5115 return 2;
5119 num_insns_constant (rtx op, machine_mode mode)
5121 HOST_WIDE_INT low, high;
5123 switch (GET_CODE (op))
5125 case CONST_INT:
5126 if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1
5127 && mask64_operand (op, mode))
5128 return 2;
5129 else
5130 return num_insns_constant_wide (INTVAL (op));
5132 case CONST_WIDE_INT:
5134 int i;
5135 int ins = CONST_WIDE_INT_NUNITS (op) - 1;
5136 for (i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
5137 ins += num_insns_constant_wide (CONST_WIDE_INT_ELT (op, i));
5138 return ins;
5141 case CONST_DOUBLE:
5142 if (mode == SFmode || mode == SDmode)
5144 long l;
5145 REAL_VALUE_TYPE rv;
5147 REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
5148 if (DECIMAL_FLOAT_MODE_P (mode))
5149 REAL_VALUE_TO_TARGET_DECIMAL32 (rv, l);
5150 else
5151 REAL_VALUE_TO_TARGET_SINGLE (rv, l);
5152 return num_insns_constant_wide ((HOST_WIDE_INT) l);
5155 long l[2];
5156 REAL_VALUE_TYPE rv;
5158 REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
5159 if (DECIMAL_FLOAT_MODE_P (mode))
5160 REAL_VALUE_TO_TARGET_DECIMAL64 (rv, l);
5161 else
5162 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
5163 high = l[WORDS_BIG_ENDIAN == 0];
5164 low = l[WORDS_BIG_ENDIAN != 0];
5166 if (TARGET_32BIT)
5167 return (num_insns_constant_wide (low)
5168 + num_insns_constant_wide (high));
5169 else
5171 if ((high == 0 && low >= 0)
5172 || (high == -1 && low < 0))
5173 return num_insns_constant_wide (low);
5175 else if (mask64_operand (op, mode))
5176 return 2;
5178 else if (low == 0)
5179 return num_insns_constant_wide (high) + 1;
5181 else
5182 return (num_insns_constant_wide (high)
5183 + num_insns_constant_wide (low) + 1);
5186 default:
5187 gcc_unreachable ();
5191 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
5192 If the mode of OP is MODE_VECTOR_INT, this simply returns the
5193 corresponding element of the vector, but for V4SFmode and V2SFmode,
5194 the corresponding "float" is interpreted as an SImode integer. */
5196 HOST_WIDE_INT
5197 const_vector_elt_as_int (rtx op, unsigned int elt)
5199 rtx tmp;
5201 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
5202 gcc_assert (GET_MODE (op) != V2DImode
5203 && GET_MODE (op) != V2DFmode);
5205 tmp = CONST_VECTOR_ELT (op, elt);
5206 if (GET_MODE (op) == V4SFmode
5207 || GET_MODE (op) == V2SFmode)
5208 tmp = gen_lowpart (SImode, tmp);
5209 return INTVAL (tmp);
5212 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
5213 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
5214 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
5215 all items are set to the same value and contain COPIES replicas of the
5216 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
5217 operand and the others are set to the value of the operand's msb. */
5219 static bool
5220 vspltis_constant (rtx op, unsigned step, unsigned copies)
5222 machine_mode mode = GET_MODE (op);
5223 machine_mode inner = GET_MODE_INNER (mode);
5225 unsigned i;
5226 unsigned nunits;
5227 unsigned bitsize;
5228 unsigned mask;
5230 HOST_WIDE_INT val;
5231 HOST_WIDE_INT splat_val;
5232 HOST_WIDE_INT msb_val;
5234 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
5235 return false;
5237 nunits = GET_MODE_NUNITS (mode);
5238 bitsize = GET_MODE_BITSIZE (inner);
5239 mask = GET_MODE_MASK (inner);
5241 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5242 splat_val = val;
5243 msb_val = val >= 0 ? 0 : -1;
5245 /* Construct the value to be splatted, if possible. If not, return 0. */
5246 for (i = 2; i <= copies; i *= 2)
5248 HOST_WIDE_INT small_val;
5249 bitsize /= 2;
5250 small_val = splat_val >> bitsize;
5251 mask >>= bitsize;
5252 if (splat_val != ((small_val << bitsize) | (small_val & mask)))
5253 return false;
5254 splat_val = small_val;
5257 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
5258 if (EASY_VECTOR_15 (splat_val))
5261 /* Also check if we can splat, and then add the result to itself. Do so if
5262 the value is positive, of if the splat instruction is using OP's mode;
5263 for splat_val < 0, the splat and the add should use the same mode. */
5264 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
5265 && (splat_val >= 0 || (step == 1 && copies == 1)))
5268 /* Also check if are loading up the most significant bit which can be done by
5269 loading up -1 and shifting the value left by -1. */
5270 else if (EASY_VECTOR_MSB (splat_val, inner))
5273 else
5274 return false;
5276 /* Check if VAL is present in every STEP-th element, and the
5277 other elements are filled with its most significant bit. */
5278 for (i = 1; i < nunits; ++i)
5280 HOST_WIDE_INT desired_val;
5281 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
5282 if ((i & (step - 1)) == 0)
5283 desired_val = val;
5284 else
5285 desired_val = msb_val;
5287 if (desired_val != const_vector_elt_as_int (op, elt))
5288 return false;
5291 return true;
5295 /* Return true if OP is of the given MODE and can be synthesized
5296 with a vspltisb, vspltish or vspltisw. */
5298 bool
5299 easy_altivec_constant (rtx op, machine_mode mode)
5301 unsigned step, copies;
5303 if (mode == VOIDmode)
5304 mode = GET_MODE (op);
5305 else if (mode != GET_MODE (op))
5306 return false;
5308 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
5309 constants. */
5310 if (mode == V2DFmode)
5311 return zero_constant (op, mode);
5313 else if (mode == V2DImode)
5315 if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
5316 || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT)
5317 return false;
5319 if (zero_constant (op, mode))
5320 return true;
5322 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
5323 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
5324 return true;
5326 return false;
5329 /* V1TImode is a special container for TImode. Ignore for now. */
5330 else if (mode == V1TImode)
5331 return false;
5333 /* Start with a vspltisw. */
5334 step = GET_MODE_NUNITS (mode) / 4;
5335 copies = 1;
5337 if (vspltis_constant (op, step, copies))
5338 return true;
5340 /* Then try with a vspltish. */
5341 if (step == 1)
5342 copies <<= 1;
5343 else
5344 step >>= 1;
5346 if (vspltis_constant (op, step, copies))
5347 return true;
5349 /* And finally a vspltisb. */
5350 if (step == 1)
5351 copies <<= 1;
5352 else
5353 step >>= 1;
5355 if (vspltis_constant (op, step, copies))
5356 return true;
5358 return false;
5361 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
5362 result is OP. Abort if it is not possible. */
5365 gen_easy_altivec_constant (rtx op)
5367 machine_mode mode = GET_MODE (op);
5368 int nunits = GET_MODE_NUNITS (mode);
5369 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5370 unsigned step = nunits / 4;
5371 unsigned copies = 1;
5373 /* Start with a vspltisw. */
5374 if (vspltis_constant (op, step, copies))
5375 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
5377 /* Then try with a vspltish. */
5378 if (step == 1)
5379 copies <<= 1;
5380 else
5381 step >>= 1;
5383 if (vspltis_constant (op, step, copies))
5384 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
5386 /* And finally a vspltisb. */
5387 if (step == 1)
5388 copies <<= 1;
5389 else
5390 step >>= 1;
5392 if (vspltis_constant (op, step, copies))
5393 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
5395 gcc_unreachable ();
5398 const char *
5399 output_vec_const_move (rtx *operands)
5401 int cst, cst2;
5402 machine_mode mode;
5403 rtx dest, vec;
5405 dest = operands[0];
5406 vec = operands[1];
5407 mode = GET_MODE (dest);
5409 if (TARGET_VSX)
5411 if (zero_constant (vec, mode))
5412 return "xxlxor %x0,%x0,%x0";
5414 if ((mode == V2DImode || mode == V1TImode)
5415 && INTVAL (CONST_VECTOR_ELT (vec, 0)) == -1
5416 && INTVAL (CONST_VECTOR_ELT (vec, 1)) == -1)
5417 return "vspltisw %0,-1";
5420 if (TARGET_ALTIVEC)
5422 rtx splat_vec;
5423 if (zero_constant (vec, mode))
5424 return "vxor %0,%0,%0";
5426 splat_vec = gen_easy_altivec_constant (vec);
5427 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
5428 operands[1] = XEXP (splat_vec, 0);
5429 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
5430 return "#";
5432 switch (GET_MODE (splat_vec))
5434 case V4SImode:
5435 return "vspltisw %0,%1";
5437 case V8HImode:
5438 return "vspltish %0,%1";
5440 case V16QImode:
5441 return "vspltisb %0,%1";
5443 default:
5444 gcc_unreachable ();
5448 gcc_assert (TARGET_SPE);
5450 /* Vector constant 0 is handled as a splitter of V2SI, and in the
5451 pattern of V1DI, V4HI, and V2SF.
5453 FIXME: We should probably return # and add post reload
5454 splitters for these, but this way is so easy ;-). */
5455 cst = INTVAL (CONST_VECTOR_ELT (vec, 0));
5456 cst2 = INTVAL (CONST_VECTOR_ELT (vec, 1));
5457 operands[1] = CONST_VECTOR_ELT (vec, 0);
5458 operands[2] = CONST_VECTOR_ELT (vec, 1);
5459 if (cst == cst2)
5460 return "li %0,%1\n\tevmergelo %0,%0,%0";
5461 else if (WORDS_BIG_ENDIAN)
5462 return "li %0,%1\n\tevmergelo %0,%0,%0\n\tli %0,%2";
5463 else
5464 return "li %0,%2\n\tevmergelo %0,%0,%0\n\tli %0,%1";
5467 /* Initialize TARGET of vector PAIRED to VALS. */
5469 void
5470 paired_expand_vector_init (rtx target, rtx vals)
5472 machine_mode mode = GET_MODE (target);
5473 int n_elts = GET_MODE_NUNITS (mode);
5474 int n_var = 0;
5475 rtx x, new_rtx, tmp, constant_op, op1, op2;
5476 int i;
5478 for (i = 0; i < n_elts; ++i)
5480 x = XVECEXP (vals, 0, i);
5481 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
5482 ++n_var;
5484 if (n_var == 0)
5486 /* Load from constant pool. */
5487 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
5488 return;
5491 if (n_var == 2)
5493 /* The vector is initialized only with non-constants. */
5494 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, XVECEXP (vals, 0, 0),
5495 XVECEXP (vals, 0, 1));
5497 emit_move_insn (target, new_rtx);
5498 return;
5501 /* One field is non-constant and the other one is a constant. Load the
5502 constant from the constant pool and use ps_merge instruction to
5503 construct the whole vector. */
5504 op1 = XVECEXP (vals, 0, 0);
5505 op2 = XVECEXP (vals, 0, 1);
5507 constant_op = (CONSTANT_P (op1)) ? op1 : op2;
5509 tmp = gen_reg_rtx (GET_MODE (constant_op));
5510 emit_move_insn (tmp, constant_op);
5512 if (CONSTANT_P (op1))
5513 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, tmp, op2);
5514 else
5515 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, op1, tmp);
5517 emit_move_insn (target, new_rtx);
5520 void
5521 paired_expand_vector_move (rtx operands[])
5523 rtx op0 = operands[0], op1 = operands[1];
5525 emit_move_insn (op0, op1);
5528 /* Emit vector compare for code RCODE. DEST is destination, OP1 and
5529 OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two
5530 operands for the relation operation COND. This is a recursive
5531 function. */
5533 static void
5534 paired_emit_vector_compare (enum rtx_code rcode,
5535 rtx dest, rtx op0, rtx op1,
5536 rtx cc_op0, rtx cc_op1)
5538 rtx tmp = gen_reg_rtx (V2SFmode);
5539 rtx tmp1, max, min;
5541 gcc_assert (TARGET_PAIRED_FLOAT);
5542 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
5544 switch (rcode)
5546 case LT:
5547 case LTU:
5548 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
5549 return;
5550 case GE:
5551 case GEU:
5552 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
5553 emit_insn (gen_selv2sf4 (dest, tmp, op0, op1, CONST0_RTX (SFmode)));
5554 return;
5555 case LE:
5556 case LEU:
5557 paired_emit_vector_compare (GE, dest, op0, op1, cc_op1, cc_op0);
5558 return;
5559 case GT:
5560 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
5561 return;
5562 case EQ:
5563 tmp1 = gen_reg_rtx (V2SFmode);
5564 max = gen_reg_rtx (V2SFmode);
5565 min = gen_reg_rtx (V2SFmode);
5566 gen_reg_rtx (V2SFmode);
5568 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
5569 emit_insn (gen_selv2sf4
5570 (max, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
5571 emit_insn (gen_subv2sf3 (tmp, cc_op1, cc_op0));
5572 emit_insn (gen_selv2sf4
5573 (min, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
5574 emit_insn (gen_subv2sf3 (tmp1, min, max));
5575 emit_insn (gen_selv2sf4 (dest, tmp1, op0, op1, CONST0_RTX (SFmode)));
5576 return;
5577 case NE:
5578 paired_emit_vector_compare (EQ, dest, op1, op0, cc_op0, cc_op1);
5579 return;
5580 case UNLE:
5581 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
5582 return;
5583 case UNLT:
5584 paired_emit_vector_compare (LT, dest, op1, op0, cc_op0, cc_op1);
5585 return;
5586 case UNGE:
5587 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
5588 return;
5589 case UNGT:
5590 paired_emit_vector_compare (GT, dest, op1, op0, cc_op0, cc_op1);
5591 return;
5592 default:
5593 gcc_unreachable ();
5596 return;
5599 /* Emit vector conditional expression.
5600 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5601 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5604 paired_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
5605 rtx cond, rtx cc_op0, rtx cc_op1)
5607 enum rtx_code rcode = GET_CODE (cond);
5609 if (!TARGET_PAIRED_FLOAT)
5610 return 0;
5612 paired_emit_vector_compare (rcode, dest, op1, op2, cc_op0, cc_op1);
5614 return 1;
5617 /* Initialize vector TARGET to VALS. */
5619 void
5620 rs6000_expand_vector_init (rtx target, rtx vals)
5622 machine_mode mode = GET_MODE (target);
5623 machine_mode inner_mode = GET_MODE_INNER (mode);
5624 int n_elts = GET_MODE_NUNITS (mode);
5625 int n_var = 0, one_var = -1;
5626 bool all_same = true, all_const_zero = true;
5627 rtx x, mem;
5628 int i;
5630 for (i = 0; i < n_elts; ++i)
5632 x = XVECEXP (vals, 0, i);
5633 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
5634 ++n_var, one_var = i;
5635 else if (x != CONST0_RTX (inner_mode))
5636 all_const_zero = false;
5638 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
5639 all_same = false;
5642 if (n_var == 0)
5644 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
5645 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
5646 if ((int_vector_p || TARGET_VSX) && all_const_zero)
5648 /* Zero register. */
5649 emit_insn (gen_rtx_SET (VOIDmode, target,
5650 gen_rtx_XOR (mode, target, target)));
5651 return;
5653 else if (int_vector_p && easy_vector_constant (const_vec, mode))
5655 /* Splat immediate. */
5656 emit_insn (gen_rtx_SET (VOIDmode, target, const_vec));
5657 return;
5659 else
5661 /* Load from constant pool. */
5662 emit_move_insn (target, const_vec);
5663 return;
5667 /* Double word values on VSX can use xxpermdi or lxvdsx. */
5668 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
5670 rtx op0 = XVECEXP (vals, 0, 0);
5671 rtx op1 = XVECEXP (vals, 0, 1);
5672 if (all_same)
5674 if (!MEM_P (op0) && !REG_P (op0))
5675 op0 = force_reg (inner_mode, op0);
5676 if (mode == V2DFmode)
5677 emit_insn (gen_vsx_splat_v2df (target, op0));
5678 else
5679 emit_insn (gen_vsx_splat_v2di (target, op0));
5681 else
5683 op0 = force_reg (inner_mode, op0);
5684 op1 = force_reg (inner_mode, op1);
5685 if (mode == V2DFmode)
5686 emit_insn (gen_vsx_concat_v2df (target, op0, op1));
5687 else
5688 emit_insn (gen_vsx_concat_v2di (target, op0, op1));
5690 return;
5693 /* With single precision floating point on VSX, know that internally single
5694 precision is actually represented as a double, and either make 2 V2DF
5695 vectors, and convert these vectors to single precision, or do one
5696 conversion, and splat the result to the other elements. */
5697 if (mode == V4SFmode && VECTOR_MEM_VSX_P (mode))
5699 if (all_same)
5701 rtx freg = gen_reg_rtx (V4SFmode);
5702 rtx sreg = force_reg (SFmode, XVECEXP (vals, 0, 0));
5703 rtx cvt = ((TARGET_XSCVDPSPN)
5704 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
5705 : gen_vsx_xscvdpsp_scalar (freg, sreg));
5707 emit_insn (cvt);
5708 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg, const0_rtx));
5710 else
5712 rtx dbl_even = gen_reg_rtx (V2DFmode);
5713 rtx dbl_odd = gen_reg_rtx (V2DFmode);
5714 rtx flt_even = gen_reg_rtx (V4SFmode);
5715 rtx flt_odd = gen_reg_rtx (V4SFmode);
5716 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
5717 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
5718 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
5719 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
5721 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
5722 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
5723 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
5724 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
5725 rs6000_expand_extract_even (target, flt_even, flt_odd);
5727 return;
5730 /* Store value to stack temp. Load vector element. Splat. However, splat
5731 of 64-bit items is not supported on Altivec. */
5732 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
5734 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
5735 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
5736 XVECEXP (vals, 0, 0));
5737 x = gen_rtx_UNSPEC (VOIDmode,
5738 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
5739 emit_insn (gen_rtx_PARALLEL (VOIDmode,
5740 gen_rtvec (2,
5741 gen_rtx_SET (VOIDmode,
5742 target, mem),
5743 x)));
5744 x = gen_rtx_VEC_SELECT (inner_mode, target,
5745 gen_rtx_PARALLEL (VOIDmode,
5746 gen_rtvec (1, const0_rtx)));
5747 emit_insn (gen_rtx_SET (VOIDmode, target,
5748 gen_rtx_VEC_DUPLICATE (mode, x)));
5749 return;
5752 /* One field is non-constant. Load constant then overwrite
5753 varying field. */
5754 if (n_var == 1)
5756 rtx copy = copy_rtx (vals);
5758 /* Load constant part of vector, substitute neighboring value for
5759 varying element. */
5760 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
5761 rs6000_expand_vector_init (target, copy);
5763 /* Insert variable. */
5764 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
5765 return;
5768 /* Construct the vector in memory one field at a time
5769 and load the whole vector. */
5770 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
5771 for (i = 0; i < n_elts; i++)
5772 emit_move_insn (adjust_address_nv (mem, inner_mode,
5773 i * GET_MODE_SIZE (inner_mode)),
5774 XVECEXP (vals, 0, i));
5775 emit_move_insn (target, mem);
5778 /* Set field ELT of TARGET to VAL. */
5780 void
5781 rs6000_expand_vector_set (rtx target, rtx val, int elt)
5783 machine_mode mode = GET_MODE (target);
5784 machine_mode inner_mode = GET_MODE_INNER (mode);
5785 rtx reg = gen_reg_rtx (mode);
5786 rtx mask, mem, x;
5787 int width = GET_MODE_SIZE (inner_mode);
5788 int i;
5790 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
5792 rtx (*set_func) (rtx, rtx, rtx, rtx)
5793 = ((mode == V2DFmode) ? gen_vsx_set_v2df : gen_vsx_set_v2di);
5794 emit_insn (set_func (target, target, val, GEN_INT (elt)));
5795 return;
5798 /* Simplify setting single element vectors like V1TImode. */
5799 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
5801 emit_move_insn (target, gen_lowpart (mode, val));
5802 return;
5805 /* Load single variable value. */
5806 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
5807 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
5808 x = gen_rtx_UNSPEC (VOIDmode,
5809 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
5810 emit_insn (gen_rtx_PARALLEL (VOIDmode,
5811 gen_rtvec (2,
5812 gen_rtx_SET (VOIDmode,
5813 reg, mem),
5814 x)));
5816 /* Linear sequence. */
5817 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
5818 for (i = 0; i < 16; ++i)
5819 XVECEXP (mask, 0, i) = GEN_INT (i);
5821 /* Set permute mask to insert element into target. */
5822 for (i = 0; i < width; ++i)
5823 XVECEXP (mask, 0, elt*width + i)
5824 = GEN_INT (i + 0x10);
5825 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
5827 if (BYTES_BIG_ENDIAN)
5828 x = gen_rtx_UNSPEC (mode,
5829 gen_rtvec (3, target, reg,
5830 force_reg (V16QImode, x)),
5831 UNSPEC_VPERM);
5832 else
5834 /* Invert selector. We prefer to generate VNAND on P8 so
5835 that future fusion opportunities can kick in, but must
5836 generate VNOR elsewhere. */
5837 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
5838 rtx iorx = (TARGET_P8_VECTOR
5839 ? gen_rtx_IOR (V16QImode, notx, notx)
5840 : gen_rtx_AND (V16QImode, notx, notx));
5841 rtx tmp = gen_reg_rtx (V16QImode);
5842 emit_insn (gen_rtx_SET (VOIDmode, tmp, iorx));
5844 /* Permute with operands reversed and adjusted selector. */
5845 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
5846 UNSPEC_VPERM);
5849 emit_insn (gen_rtx_SET (VOIDmode, target, x));
5852 /* Extract field ELT from VEC into TARGET. */
5854 void
5855 rs6000_expand_vector_extract (rtx target, rtx vec, int elt)
5857 machine_mode mode = GET_MODE (vec);
5858 machine_mode inner_mode = GET_MODE_INNER (mode);
5859 rtx mem;
5861 if (VECTOR_MEM_VSX_P (mode))
5863 switch (mode)
5865 default:
5866 break;
5867 case V1TImode:
5868 gcc_assert (elt == 0 && inner_mode == TImode);
5869 emit_move_insn (target, gen_lowpart (TImode, vec));
5870 break;
5871 case V2DFmode:
5872 emit_insn (gen_vsx_extract_v2df (target, vec, GEN_INT (elt)));
5873 return;
5874 case V2DImode:
5875 emit_insn (gen_vsx_extract_v2di (target, vec, GEN_INT (elt)));
5876 return;
5877 case V4SFmode:
5878 emit_insn (gen_vsx_extract_v4sf (target, vec, GEN_INT (elt)));
5879 return;
5883 /* Allocate mode-sized buffer. */
5884 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
5886 emit_move_insn (mem, vec);
5888 /* Add offset to field within buffer matching vector element. */
5889 mem = adjust_address_nv (mem, inner_mode, elt * GET_MODE_SIZE (inner_mode));
5891 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
5894 /* Generates shifts and masks for a pair of rldicl or rldicr insns to
5895 implement ANDing by the mask IN. */
5896 void
5897 build_mask64_2_operands (rtx in, rtx *out)
5899 unsigned HOST_WIDE_INT c, lsb, m1, m2;
5900 int shift;
5902 gcc_assert (GET_CODE (in) == CONST_INT);
5904 c = INTVAL (in);
5905 if (c & 1)
5907 /* Assume c initially something like 0x00fff000000fffff. The idea
5908 is to rotate the word so that the middle ^^^^^^ group of zeros
5909 is at the MS end and can be cleared with an rldicl mask. We then
5910 rotate back and clear off the MS ^^ group of zeros with a
5911 second rldicl. */
5912 c = ~c; /* c == 0xff000ffffff00000 */
5913 lsb = c & -c; /* lsb == 0x0000000000100000 */
5914 m1 = -lsb; /* m1 == 0xfffffffffff00000 */
5915 c = ~c; /* c == 0x00fff000000fffff */
5916 c &= -lsb; /* c == 0x00fff00000000000 */
5917 lsb = c & -c; /* lsb == 0x0000100000000000 */
5918 c = ~c; /* c == 0xff000fffffffffff */
5919 c &= -lsb; /* c == 0xff00000000000000 */
5920 shift = 0;
5921 while ((lsb >>= 1) != 0)
5922 shift++; /* shift == 44 on exit from loop */
5923 m1 <<= 64 - shift; /* m1 == 0xffffff0000000000 */
5924 m1 = ~m1; /* m1 == 0x000000ffffffffff */
5925 m2 = ~c; /* m2 == 0x00ffffffffffffff */
5927 else
5929 /* Assume c initially something like 0xff000f0000000000. The idea
5930 is to rotate the word so that the ^^^ middle group of zeros
5931 is at the LS end and can be cleared with an rldicr mask. We then
5932 rotate back and clear off the LS group of ^^^^^^^^^^ zeros with
5933 a second rldicr. */
5934 lsb = c & -c; /* lsb == 0x0000010000000000 */
5935 m2 = -lsb; /* m2 == 0xffffff0000000000 */
5936 c = ~c; /* c == 0x00fff0ffffffffff */
5937 c &= -lsb; /* c == 0x00fff00000000000 */
5938 lsb = c & -c; /* lsb == 0x0000100000000000 */
5939 c = ~c; /* c == 0xff000fffffffffff */
5940 c &= -lsb; /* c == 0xff00000000000000 */
5941 shift = 0;
5942 while ((lsb >>= 1) != 0)
5943 shift++; /* shift == 44 on exit from loop */
5944 m1 = ~c; /* m1 == 0x00ffffffffffffff */
5945 m1 >>= shift; /* m1 == 0x0000000000000fff */
5946 m1 = ~m1; /* m1 == 0xfffffffffffff000 */
5949 /* Note that when we only have two 0->1 and 1->0 transitions, one of the
5950 masks will be all 1's. We are guaranteed more than one transition. */
5951 out[0] = GEN_INT (64 - shift);
5952 out[1] = GEN_INT (m1);
5953 out[2] = GEN_INT (shift);
5954 out[3] = GEN_INT (m2);
5957 /* Return TRUE if OP is an invalid SUBREG operation on the e500. */
5959 bool
5960 invalid_e500_subreg (rtx op, machine_mode mode)
5962 if (TARGET_E500_DOUBLE)
5964 /* Reject (subreg:SI (reg:DF)); likewise with subreg:DI or
5965 subreg:TI and reg:TF. Decimal float modes are like integer
5966 modes (only low part of each register used) for this
5967 purpose. */
5968 if (GET_CODE (op) == SUBREG
5969 && (mode == SImode || mode == DImode || mode == TImode
5970 || mode == DDmode || mode == TDmode || mode == PTImode)
5971 && REG_P (SUBREG_REG (op))
5972 && (GET_MODE (SUBREG_REG (op)) == DFmode
5973 || GET_MODE (SUBREG_REG (op)) == TFmode))
5974 return true;
5976 /* Reject (subreg:DF (reg:DI)); likewise with subreg:TF and
5977 reg:TI. */
5978 if (GET_CODE (op) == SUBREG
5979 && (mode == DFmode || mode == TFmode)
5980 && REG_P (SUBREG_REG (op))
5981 && (GET_MODE (SUBREG_REG (op)) == DImode
5982 || GET_MODE (SUBREG_REG (op)) == TImode
5983 || GET_MODE (SUBREG_REG (op)) == PTImode
5984 || GET_MODE (SUBREG_REG (op)) == DDmode
5985 || GET_MODE (SUBREG_REG (op)) == TDmode))
5986 return true;
5989 if (TARGET_SPE
5990 && GET_CODE (op) == SUBREG
5991 && mode == SImode
5992 && REG_P (SUBREG_REG (op))
5993 && SPE_VECTOR_MODE (GET_MODE (SUBREG_REG (op))))
5994 return true;
5996 return false;
5999 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
6000 selects whether the alignment is abi mandated, optional, or
6001 both abi and optional alignment. */
6003 unsigned int
6004 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
6006 if (how != align_opt)
6008 if (TREE_CODE (type) == VECTOR_TYPE)
6010 if ((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (type)))
6011 || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type))))
6013 if (align < 64)
6014 align = 64;
6016 else if (align < 128)
6017 align = 128;
6019 else if (TARGET_E500_DOUBLE
6020 && TREE_CODE (type) == REAL_TYPE
6021 && TYPE_MODE (type) == DFmode)
6023 if (align < 64)
6024 align = 64;
6028 if (how != align_abi)
6030 if (TREE_CODE (type) == ARRAY_TYPE
6031 && TYPE_MODE (TREE_TYPE (type)) == QImode)
6033 if (align < BITS_PER_WORD)
6034 align = BITS_PER_WORD;
6038 return align;
6041 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
6043 bool
6044 rs6000_special_adjust_field_align_p (tree field, unsigned int computed)
6046 if (TARGET_ALTIVEC && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6048 if (computed != 128)
6050 static bool warned;
6051 if (!warned && warn_psabi)
6053 warned = true;
6054 inform (input_location,
6055 "the layout of aggregates containing vectors with"
6056 " %d-byte alignment has changed in GCC 5",
6057 computed / BITS_PER_UNIT);
6060 /* In current GCC there is no special case. */
6061 return false;
6064 return false;
6067 /* AIX increases natural record alignment to doubleword if the first
6068 field is an FP double while the FP fields remain word aligned. */
6070 unsigned int
6071 rs6000_special_round_type_align (tree type, unsigned int computed,
6072 unsigned int specified)
6074 unsigned int align = MAX (computed, specified);
6075 tree field = TYPE_FIELDS (type);
6077 /* Skip all non field decls */
6078 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
6079 field = DECL_CHAIN (field);
6081 if (field != NULL && field != type)
6083 type = TREE_TYPE (field);
6084 while (TREE_CODE (type) == ARRAY_TYPE)
6085 type = TREE_TYPE (type);
6087 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
6088 align = MAX (align, 64);
6091 return align;
6094 /* Darwin increases record alignment to the natural alignment of
6095 the first field. */
6097 unsigned int
6098 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
6099 unsigned int specified)
6101 unsigned int align = MAX (computed, specified);
6103 if (TYPE_PACKED (type))
6104 return align;
6106 /* Find the first field, looking down into aggregates. */
6107 do {
6108 tree field = TYPE_FIELDS (type);
6109 /* Skip all non field decls */
6110 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
6111 field = DECL_CHAIN (field);
6112 if (! field)
6113 break;
6114 /* A packed field does not contribute any extra alignment. */
6115 if (DECL_PACKED (field))
6116 return align;
6117 type = TREE_TYPE (field);
6118 while (TREE_CODE (type) == ARRAY_TYPE)
6119 type = TREE_TYPE (type);
6120 } while (AGGREGATE_TYPE_P (type));
6122 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
6123 align = MAX (align, TYPE_ALIGN (type));
6125 return align;
6128 /* Return 1 for an operand in small memory on V.4/eabi. */
6131 small_data_operand (rtx op ATTRIBUTE_UNUSED,
6132 machine_mode mode ATTRIBUTE_UNUSED)
6134 #if TARGET_ELF
6135 rtx sym_ref;
6137 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
6138 return 0;
6140 if (DEFAULT_ABI != ABI_V4)
6141 return 0;
6143 /* Vector and float memory instructions have a limited offset on the
6144 SPE, so using a vector or float variable directly as an operand is
6145 not useful. */
6146 if (TARGET_SPE
6147 && (SPE_VECTOR_MODE (mode) || FLOAT_MODE_P (mode)))
6148 return 0;
6150 if (GET_CODE (op) == SYMBOL_REF)
6151 sym_ref = op;
6153 else if (GET_CODE (op) != CONST
6154 || GET_CODE (XEXP (op, 0)) != PLUS
6155 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF
6156 || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT)
6157 return 0;
6159 else
6161 rtx sum = XEXP (op, 0);
6162 HOST_WIDE_INT summand;
6164 /* We have to be careful here, because it is the referenced address
6165 that must be 32k from _SDA_BASE_, not just the symbol. */
6166 summand = INTVAL (XEXP (sum, 1));
6167 if (summand < 0 || summand > g_switch_value)
6168 return 0;
6170 sym_ref = XEXP (sum, 0);
6173 return SYMBOL_REF_SMALL_P (sym_ref);
6174 #else
6175 return 0;
6176 #endif
6179 /* Return true if either operand is a general purpose register. */
6181 bool
6182 gpr_or_gpr_p (rtx op0, rtx op1)
6184 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
6185 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
6188 /* Return true if this is a move direct operation between GPR registers and
6189 floating point/VSX registers. */
6191 bool
6192 direct_move_p (rtx op0, rtx op1)
6194 int regno0, regno1;
6196 if (!REG_P (op0) || !REG_P (op1))
6197 return false;
6199 if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
6200 return false;
6202 regno0 = REGNO (op0);
6203 regno1 = REGNO (op1);
6204 if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
6205 return false;
6207 if (INT_REGNO_P (regno0))
6208 return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
6210 else if (INT_REGNO_P (regno1))
6212 if (TARGET_MFPGPR && FP_REGNO_P (regno0))
6213 return true;
6215 else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
6216 return true;
6219 return false;
6222 /* Return true if this is a load or store quad operation. This function does
6223 not handle the atomic quad memory instructions. */
6225 bool
6226 quad_load_store_p (rtx op0, rtx op1)
6228 bool ret;
6230 if (!TARGET_QUAD_MEMORY)
6231 ret = false;
6233 else if (REG_P (op0) && MEM_P (op1))
6234 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
6235 && quad_memory_operand (op1, GET_MODE (op1))
6236 && !reg_overlap_mentioned_p (op0, op1));
6238 else if (MEM_P (op0) && REG_P (op1))
6239 ret = (quad_memory_operand (op0, GET_MODE (op0))
6240 && quad_int_reg_operand (op1, GET_MODE (op1)));
6242 else
6243 ret = false;
6245 if (TARGET_DEBUG_ADDR)
6247 fprintf (stderr, "\n========== quad_load_store, return %s\n",
6248 ret ? "true" : "false");
6249 debug_rtx (gen_rtx_SET (VOIDmode, op0, op1));
6252 return ret;
6255 /* Given an address, return a constant offset term if one exists. */
6257 static rtx
6258 address_offset (rtx op)
6260 if (GET_CODE (op) == PRE_INC
6261 || GET_CODE (op) == PRE_DEC)
6262 op = XEXP (op, 0);
6263 else if (GET_CODE (op) == PRE_MODIFY
6264 || GET_CODE (op) == LO_SUM)
6265 op = XEXP (op, 1);
6267 if (GET_CODE (op) == CONST)
6268 op = XEXP (op, 0);
6270 if (GET_CODE (op) == PLUS)
6271 op = XEXP (op, 1);
6273 if (CONST_INT_P (op))
6274 return op;
6276 return NULL_RTX;
6279 /* Return true if the MEM operand is a memory operand suitable for use
6280 with a (full width, possibly multiple) gpr load/store. On
6281 powerpc64 this means the offset must be divisible by 4.
6282 Implements 'Y' constraint.
6284 Accept direct, indexed, offset, lo_sum and tocref. Since this is
6285 a constraint function we know the operand has satisfied a suitable
6286 memory predicate. Also accept some odd rtl generated by reload
6287 (see rs6000_legitimize_reload_address for various forms). It is
6288 important that reload rtl be accepted by appropriate constraints
6289 but not by the operand predicate.
6291 Offsetting a lo_sum should not be allowed, except where we know by
6292 alignment that a 32k boundary is not crossed, but see the ???
6293 comment in rs6000_legitimize_reload_address. Note that by
6294 "offsetting" here we mean a further offset to access parts of the
6295 MEM. It's fine to have a lo_sum where the inner address is offset
6296 from a sym, since the same sym+offset will appear in the high part
6297 of the address calculation. */
6299 bool
6300 mem_operand_gpr (rtx op, machine_mode mode)
6302 unsigned HOST_WIDE_INT offset;
6303 int extra;
6304 rtx addr = XEXP (op, 0);
6306 op = address_offset (addr);
6307 if (op == NULL_RTX)
6308 return true;
6310 offset = INTVAL (op);
6311 if (TARGET_POWERPC64 && (offset & 3) != 0)
6312 return false;
6314 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
6315 if (extra < 0)
6316 extra = 0;
6318 if (GET_CODE (addr) == LO_SUM)
6319 /* For lo_sum addresses, we must allow any offset except one that
6320 causes a wrap, so test only the low 16 bits. */
6321 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
6323 return offset + 0x8000 < 0x10000u - extra;
6326 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
6328 static bool
6329 reg_offset_addressing_ok_p (machine_mode mode)
6331 switch (mode)
6333 case V16QImode:
6334 case V8HImode:
6335 case V4SFmode:
6336 case V4SImode:
6337 case V2DFmode:
6338 case V2DImode:
6339 case V1TImode:
6340 case TImode:
6341 /* AltiVec/VSX vector modes. Only reg+reg addressing is valid. While
6342 TImode is not a vector mode, if we want to use the VSX registers to
6343 move it around, we need to restrict ourselves to reg+reg
6344 addressing. */
6345 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
6346 return false;
6347 break;
6349 case V4HImode:
6350 case V2SImode:
6351 case V1DImode:
6352 case V2SFmode:
6353 /* Paired vector modes. Only reg+reg addressing is valid. */
6354 if (TARGET_PAIRED_FLOAT)
6355 return false;
6356 break;
6358 case SDmode:
6359 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
6360 addressing for the LFIWZX and STFIWX instructions. */
6361 if (TARGET_NO_SDMODE_STACK)
6362 return false;
6363 break;
6365 default:
6366 break;
6369 return true;
6372 static bool
6373 virtual_stack_registers_memory_p (rtx op)
6375 int regnum;
6377 if (GET_CODE (op) == REG)
6378 regnum = REGNO (op);
6380 else if (GET_CODE (op) == PLUS
6381 && GET_CODE (XEXP (op, 0)) == REG
6382 && GET_CODE (XEXP (op, 1)) == CONST_INT)
6383 regnum = REGNO (XEXP (op, 0));
6385 else
6386 return false;
6388 return (regnum >= FIRST_VIRTUAL_REGISTER
6389 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
6392 /* Return true if a MODE sized memory accesses to OP plus OFFSET
6393 is known to not straddle a 32k boundary. */
6395 static bool
6396 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
6397 machine_mode mode)
6399 tree decl, type;
6400 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
6402 if (GET_CODE (op) != SYMBOL_REF)
6403 return false;
6405 dsize = GET_MODE_SIZE (mode);
6406 decl = SYMBOL_REF_DECL (op);
6407 if (!decl)
6409 if (dsize == 0)
6410 return false;
6412 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
6413 replacing memory addresses with an anchor plus offset. We
6414 could find the decl by rummaging around in the block->objects
6415 VEC for the given offset but that seems like too much work. */
6416 dalign = BITS_PER_UNIT;
6417 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
6418 && SYMBOL_REF_ANCHOR_P (op)
6419 && SYMBOL_REF_BLOCK (op) != NULL)
6421 struct object_block *block = SYMBOL_REF_BLOCK (op);
6423 dalign = block->alignment;
6424 offset += SYMBOL_REF_BLOCK_OFFSET (op);
6426 else if (CONSTANT_POOL_ADDRESS_P (op))
6428 /* It would be nice to have get_pool_align().. */
6429 machine_mode cmode = get_pool_mode (op);
6431 dalign = GET_MODE_ALIGNMENT (cmode);
6434 else if (DECL_P (decl))
6436 dalign = DECL_ALIGN (decl);
6438 if (dsize == 0)
6440 /* Allow BLKmode when the entire object is known to not
6441 cross a 32k boundary. */
6442 if (!DECL_SIZE_UNIT (decl))
6443 return false;
6445 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
6446 return false;
6448 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
6449 if (dsize > 32768)
6450 return false;
6452 return dalign / BITS_PER_UNIT >= dsize;
6455 else
6457 type = TREE_TYPE (decl);
6459 dalign = TYPE_ALIGN (type);
6460 if (CONSTANT_CLASS_P (decl))
6461 dalign = CONSTANT_ALIGNMENT (decl, dalign);
6462 else
6463 dalign = DATA_ALIGNMENT (decl, dalign);
6465 if (dsize == 0)
6467 /* BLKmode, check the entire object. */
6468 if (TREE_CODE (decl) == STRING_CST)
6469 dsize = TREE_STRING_LENGTH (decl);
6470 else if (TYPE_SIZE_UNIT (type)
6471 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (type)))
6472 dsize = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6473 else
6474 return false;
6475 if (dsize > 32768)
6476 return false;
6478 return dalign / BITS_PER_UNIT >= dsize;
6482 /* Find how many bits of the alignment we know for this access. */
6483 mask = dalign / BITS_PER_UNIT - 1;
6484 lsb = offset & -offset;
6485 mask &= lsb - 1;
6486 dalign = mask + 1;
6488 return dalign >= dsize;
6491 static bool
6492 constant_pool_expr_p (rtx op)
6494 rtx base, offset;
6496 split_const (op, &base, &offset);
6497 return (GET_CODE (base) == SYMBOL_REF
6498 && CONSTANT_POOL_ADDRESS_P (base)
6499 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
6502 static const_rtx tocrel_base, tocrel_offset;
6504 /* Return true if OP is a toc pointer relative address (the output
6505 of create_TOC_reference). If STRICT, do not match high part or
6506 non-split -mcmodel=large/medium toc pointer relative addresses. */
6508 bool
6509 toc_relative_expr_p (const_rtx op, bool strict)
6511 if (!TARGET_TOC)
6512 return false;
6514 if (TARGET_CMODEL != CMODEL_SMALL)
6516 /* Only match the low part. */
6517 if (GET_CODE (op) == LO_SUM
6518 && REG_P (XEXP (op, 0))
6519 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict))
6520 op = XEXP (op, 1);
6521 else if (strict)
6522 return false;
6525 tocrel_base = op;
6526 tocrel_offset = const0_rtx;
6527 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
6529 tocrel_base = XEXP (op, 0);
6530 tocrel_offset = XEXP (op, 1);
6533 return (GET_CODE (tocrel_base) == UNSPEC
6534 && XINT (tocrel_base, 1) == UNSPEC_TOCREL);
6537 /* Return true if X is a constant pool address, and also for cmodel=medium
6538 if X is a toc-relative address known to be offsettable within MODE. */
6540 bool
6541 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
6542 bool strict)
6544 return (toc_relative_expr_p (x, strict)
6545 && (TARGET_CMODEL != CMODEL_MEDIUM
6546 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
6547 || mode == QImode
6548 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
6549 INTVAL (tocrel_offset), mode)));
6552 static bool
6553 legitimate_small_data_p (machine_mode mode, rtx x)
6555 return (DEFAULT_ABI == ABI_V4
6556 && !flag_pic && !TARGET_TOC
6557 && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
6558 && small_data_operand (x, mode));
6561 /* SPE offset addressing is limited to 5-bits worth of double words. */
6562 #define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0)
6564 bool
6565 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
6566 bool strict, bool worst_case)
6568 unsigned HOST_WIDE_INT offset;
6569 unsigned int extra;
6571 if (GET_CODE (x) != PLUS)
6572 return false;
6573 if (!REG_P (XEXP (x, 0)))
6574 return false;
6575 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
6576 return false;
6577 if (!reg_offset_addressing_ok_p (mode))
6578 return virtual_stack_registers_memory_p (x);
6579 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
6580 return true;
6581 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6582 return false;
6584 offset = INTVAL (XEXP (x, 1));
6585 extra = 0;
6586 switch (mode)
6588 case V4HImode:
6589 case V2SImode:
6590 case V1DImode:
6591 case V2SFmode:
6592 /* SPE vector modes. */
6593 return SPE_CONST_OFFSET_OK (offset);
6595 case DFmode:
6596 case DDmode:
6597 case DImode:
6598 /* On e500v2, we may have:
6600 (subreg:DF (mem:DI (plus (reg) (const_int))) 0).
6602 Which gets addressed with evldd instructions. */
6603 if (TARGET_E500_DOUBLE)
6604 return SPE_CONST_OFFSET_OK (offset);
6606 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
6607 addressing. */
6608 if (VECTOR_MEM_VSX_P (mode))
6609 return false;
6611 if (!worst_case)
6612 break;
6613 if (!TARGET_POWERPC64)
6614 extra = 4;
6615 else if (offset & 3)
6616 return false;
6617 break;
6619 case TFmode:
6620 if (TARGET_E500_DOUBLE)
6621 return (SPE_CONST_OFFSET_OK (offset)
6622 && SPE_CONST_OFFSET_OK (offset + 8));
6623 /* fall through */
6625 case TDmode:
6626 case TImode:
6627 case PTImode:
6628 extra = 8;
6629 if (!worst_case)
6630 break;
6631 if (!TARGET_POWERPC64)
6632 extra = 12;
6633 else if (offset & 3)
6634 return false;
6635 break;
6637 default:
6638 break;
6641 offset += 0x8000;
6642 return offset < 0x10000 - extra;
6645 bool
6646 legitimate_indexed_address_p (rtx x, int strict)
6648 rtx op0, op1;
6650 if (GET_CODE (x) != PLUS)
6651 return false;
6653 op0 = XEXP (x, 0);
6654 op1 = XEXP (x, 1);
6656 /* Recognize the rtl generated by reload which we know will later be
6657 replaced with proper base and index regs. */
6658 if (!strict
6659 && reload_in_progress
6660 && (REG_P (op0) || GET_CODE (op0) == PLUS)
6661 && REG_P (op1))
6662 return true;
6664 return (REG_P (op0) && REG_P (op1)
6665 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
6666 && INT_REG_OK_FOR_INDEX_P (op1, strict))
6667 || (INT_REG_OK_FOR_BASE_P (op1, strict)
6668 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
6671 bool
6672 avoiding_indexed_address_p (machine_mode mode)
6674 /* Avoid indexed addressing for modes that have non-indexed
6675 load/store instruction forms. */
6676 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
6679 bool
6680 legitimate_indirect_address_p (rtx x, int strict)
6682 return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict);
6685 bool
6686 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
6688 if (!TARGET_MACHO || !flag_pic
6689 || mode != SImode || GET_CODE (x) != MEM)
6690 return false;
6691 x = XEXP (x, 0);
6693 if (GET_CODE (x) != LO_SUM)
6694 return false;
6695 if (GET_CODE (XEXP (x, 0)) != REG)
6696 return false;
6697 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
6698 return false;
6699 x = XEXP (x, 1);
6701 return CONSTANT_P (x);
6704 static bool
6705 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
6707 if (GET_CODE (x) != LO_SUM)
6708 return false;
6709 if (GET_CODE (XEXP (x, 0)) != REG)
6710 return false;
6711 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
6712 return false;
6713 /* Restrict addressing for DI because of our SUBREG hackery. */
6714 if (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
6715 return false;
6716 x = XEXP (x, 1);
6718 if (TARGET_ELF || TARGET_MACHO)
6720 bool large_toc_ok;
6722 if (DEFAULT_ABI == ABI_V4 && flag_pic)
6723 return false;
6724 /* LRA don't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
6725 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
6726 recognizes some LO_SUM addresses as valid although this
6727 function says opposite. In most cases, LRA through different
6728 transformations can generate correct code for address reloads.
6729 It can not manage only some LO_SUM cases. So we need to add
6730 code analogous to one in rs6000_legitimize_reload_address for
6731 LOW_SUM here saying that some addresses are still valid. */
6732 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
6733 && small_toc_ref (x, VOIDmode));
6734 if (TARGET_TOC && ! large_toc_ok)
6735 return false;
6736 if (GET_MODE_NUNITS (mode) != 1)
6737 return false;
6738 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
6739 && !(/* ??? Assume floating point reg based on mode? */
6740 TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
6741 && (mode == DFmode || mode == DDmode)))
6742 return false;
6744 return CONSTANT_P (x) || large_toc_ok;
6747 return false;
6751 /* Try machine-dependent ways of modifying an illegitimate address
6752 to be legitimate. If we find one, return the new, valid address.
6753 This is used from only one place: `memory_address' in explow.c.
6755 OLDX is the address as it was before break_out_memory_refs was
6756 called. In some cases it is useful to look at this to decide what
6757 needs to be done.
6759 It is always safe for this function to do nothing. It exists to
6760 recognize opportunities to optimize the output.
6762 On RS/6000, first check for the sum of a register with a constant
6763 integer that is out of range. If so, generate code to add the
6764 constant with the low-order 16 bits masked to the register and force
6765 this result into another register (this can be done with `cau').
6766 Then generate an address of REG+(CONST&0xffff), allowing for the
6767 possibility of bit 16 being a one.
6769 Then check for the sum of a register and something not constant, try to
6770 load the other things into a register and return the sum. */
6772 static rtx
6773 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
6774 machine_mode mode)
6776 unsigned int extra;
6778 if (!reg_offset_addressing_ok_p (mode))
6780 if (virtual_stack_registers_memory_p (x))
6781 return x;
6783 /* In theory we should not be seeing addresses of the form reg+0,
6784 but just in case it is generated, optimize it away. */
6785 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
6786 return force_reg (Pmode, XEXP (x, 0));
6788 /* For TImode with load/store quad, restrict addresses to just a single
6789 pointer, so it works with both GPRs and VSX registers. */
6790 /* Make sure both operands are registers. */
6791 else if (GET_CODE (x) == PLUS
6792 && (mode != TImode || !TARGET_QUAD_MEMORY))
6793 return gen_rtx_PLUS (Pmode,
6794 force_reg (Pmode, XEXP (x, 0)),
6795 force_reg (Pmode, XEXP (x, 1)));
6796 else
6797 return force_reg (Pmode, x);
6799 if (GET_CODE (x) == SYMBOL_REF)
6801 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
6802 if (model != 0)
6803 return rs6000_legitimize_tls_address (x, model);
6806 extra = 0;
6807 switch (mode)
6809 case TFmode:
6810 case TDmode:
6811 case TImode:
6812 case PTImode:
6813 /* As in legitimate_offset_address_p we do not assume
6814 worst-case. The mode here is just a hint as to the registers
6815 used. A TImode is usually in gprs, but may actually be in
6816 fprs. Leave worst-case scenario for reload to handle via
6817 insn constraints. PTImode is only GPRs. */
6818 extra = 8;
6819 break;
6820 default:
6821 break;
6824 if (GET_CODE (x) == PLUS
6825 && GET_CODE (XEXP (x, 0)) == REG
6826 && GET_CODE (XEXP (x, 1)) == CONST_INT
6827 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
6828 >= 0x10000 - extra)
6829 && !(SPE_VECTOR_MODE (mode)
6830 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)))
6832 HOST_WIDE_INT high_int, low_int;
6833 rtx sum;
6834 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
6835 if (low_int >= 0x8000 - extra)
6836 low_int = 0;
6837 high_int = INTVAL (XEXP (x, 1)) - low_int;
6838 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
6839 GEN_INT (high_int)), 0);
6840 return plus_constant (Pmode, sum, low_int);
6842 else if (GET_CODE (x) == PLUS
6843 && GET_CODE (XEXP (x, 0)) == REG
6844 && GET_CODE (XEXP (x, 1)) != CONST_INT
6845 && GET_MODE_NUNITS (mode) == 1
6846 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
6847 || (/* ??? Assume floating point reg based on mode? */
6848 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
6849 && (mode == DFmode || mode == DDmode)))
6850 && !avoiding_indexed_address_p (mode))
6852 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
6853 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
6855 else if (SPE_VECTOR_MODE (mode)
6856 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD))
6858 if (mode == DImode)
6859 return x;
6860 /* We accept [reg + reg] and [reg + OFFSET]. */
6862 if (GET_CODE (x) == PLUS)
6864 rtx op1 = XEXP (x, 0);
6865 rtx op2 = XEXP (x, 1);
6866 rtx y;
6868 op1 = force_reg (Pmode, op1);
6870 if (GET_CODE (op2) != REG
6871 && (GET_CODE (op2) != CONST_INT
6872 || !SPE_CONST_OFFSET_OK (INTVAL (op2))
6873 || (GET_MODE_SIZE (mode) > 8
6874 && !SPE_CONST_OFFSET_OK (INTVAL (op2) + 8))))
6875 op2 = force_reg (Pmode, op2);
6877 /* We can't always do [reg + reg] for these, because [reg +
6878 reg + offset] is not a legitimate addressing mode. */
6879 y = gen_rtx_PLUS (Pmode, op1, op2);
6881 if ((GET_MODE_SIZE (mode) > 8 || mode == DDmode) && REG_P (op2))
6882 return force_reg (Pmode, y);
6883 else
6884 return y;
6887 return force_reg (Pmode, x);
6889 else if ((TARGET_ELF
6890 #if TARGET_MACHO
6891 || !MACHO_DYNAMIC_NO_PIC_P
6892 #endif
6894 && TARGET_32BIT
6895 && TARGET_NO_TOC
6896 && ! flag_pic
6897 && GET_CODE (x) != CONST_INT
6898 && GET_CODE (x) != CONST_WIDE_INT
6899 && GET_CODE (x) != CONST_DOUBLE
6900 && CONSTANT_P (x)
6901 && GET_MODE_NUNITS (mode) == 1
6902 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
6903 || (/* ??? Assume floating point reg based on mode? */
6904 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
6905 && (mode == DFmode || mode == DDmode))))
6907 rtx reg = gen_reg_rtx (Pmode);
6908 if (TARGET_ELF)
6909 emit_insn (gen_elf_high (reg, x));
6910 else
6911 emit_insn (gen_macho_high (reg, x));
6912 return gen_rtx_LO_SUM (Pmode, reg, x);
6914 else if (TARGET_TOC
6915 && GET_CODE (x) == SYMBOL_REF
6916 && constant_pool_expr_p (x)
6917 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
6918 return create_TOC_reference (x, NULL_RTX);
6919 else
6920 return x;
6923 /* Debug version of rs6000_legitimize_address. */
6924 static rtx
6925 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
6927 rtx ret;
6928 rtx_insn *insns;
6930 start_sequence ();
6931 ret = rs6000_legitimize_address (x, oldx, mode);
6932 insns = get_insns ();
6933 end_sequence ();
6935 if (ret != x)
6937 fprintf (stderr,
6938 "\nrs6000_legitimize_address: mode %s, old code %s, "
6939 "new code %s, modified\n",
6940 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
6941 GET_RTX_NAME (GET_CODE (ret)));
6943 fprintf (stderr, "Original address:\n");
6944 debug_rtx (x);
6946 fprintf (stderr, "oldx:\n");
6947 debug_rtx (oldx);
6949 fprintf (stderr, "New address:\n");
6950 debug_rtx (ret);
6952 if (insns)
6954 fprintf (stderr, "Insns added:\n");
6955 debug_rtx_list (insns, 20);
6958 else
6960 fprintf (stderr,
6961 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
6962 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
6964 debug_rtx (x);
6967 if (insns)
6968 emit_insn (insns);
6970 return ret;
6973 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
6974 We need to emit DTP-relative relocations. */
6976 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
6977 static void
6978 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
6980 switch (size)
6982 case 4:
6983 fputs ("\t.long\t", file);
6984 break;
6985 case 8:
6986 fputs (DOUBLE_INT_ASM_OP, file);
6987 break;
6988 default:
6989 gcc_unreachable ();
6991 output_addr_const (file, x);
6992 fputs ("@dtprel+0x8000", file);
6995 /* Return true if X is a symbol that refers to real (rather than emulated)
6996 TLS. */
6998 static bool
6999 rs6000_real_tls_symbol_ref_p (rtx x)
7001 return (GET_CODE (x) == SYMBOL_REF
7002 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
7005 /* In the name of slightly smaller debug output, and to cater to
7006 general assembler lossage, recognize various UNSPEC sequences
7007 and turn them back into a direct symbol reference. */
7009 static rtx
7010 rs6000_delegitimize_address (rtx orig_x)
7012 rtx x, y, offset;
7014 orig_x = delegitimize_mem_from_attrs (orig_x);
7015 x = orig_x;
7016 if (MEM_P (x))
7017 x = XEXP (x, 0);
7019 y = x;
7020 if (TARGET_CMODEL != CMODEL_SMALL
7021 && GET_CODE (y) == LO_SUM)
7022 y = XEXP (y, 1);
7024 offset = NULL_RTX;
7025 if (GET_CODE (y) == PLUS
7026 && GET_MODE (y) == Pmode
7027 && CONST_INT_P (XEXP (y, 1)))
7029 offset = XEXP (y, 1);
7030 y = XEXP (y, 0);
7033 if (GET_CODE (y) == UNSPEC
7034 && XINT (y, 1) == UNSPEC_TOCREL)
7036 y = XVECEXP (y, 0, 0);
7038 #ifdef HAVE_AS_TLS
7039 /* Do not associate thread-local symbols with the original
7040 constant pool symbol. */
7041 if (TARGET_XCOFF
7042 && GET_CODE (y) == SYMBOL_REF
7043 && CONSTANT_POOL_ADDRESS_P (y)
7044 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
7045 return orig_x;
7046 #endif
7048 if (offset != NULL_RTX)
7049 y = gen_rtx_PLUS (Pmode, y, offset);
7050 if (!MEM_P (orig_x))
7051 return y;
7052 else
7053 return replace_equiv_address_nv (orig_x, y);
7056 if (TARGET_MACHO
7057 && GET_CODE (orig_x) == LO_SUM
7058 && GET_CODE (XEXP (orig_x, 1)) == CONST)
7060 y = XEXP (XEXP (orig_x, 1), 0);
7061 if (GET_CODE (y) == UNSPEC
7062 && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
7063 return XVECEXP (y, 0, 0);
7066 return orig_x;
7069 /* Return true if X shouldn't be emitted into the debug info.
7070 The linker doesn't like .toc section references from
7071 .debug_* sections, so reject .toc section symbols. */
7073 static bool
7074 rs6000_const_not_ok_for_debug_p (rtx x)
7076 if (GET_CODE (x) == SYMBOL_REF
7077 && CONSTANT_POOL_ADDRESS_P (x))
7079 rtx c = get_pool_constant (x);
7080 machine_mode cmode = get_pool_mode (x);
7081 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
7082 return true;
7085 return false;
7088 /* Construct the SYMBOL_REF for the tls_get_addr function. */
7090 static GTY(()) rtx rs6000_tls_symbol;
7091 static rtx
7092 rs6000_tls_get_addr (void)
7094 if (!rs6000_tls_symbol)
7095 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
7097 return rs6000_tls_symbol;
7100 /* Construct the SYMBOL_REF for TLS GOT references. */
7102 static GTY(()) rtx rs6000_got_symbol;
7103 static rtx
7104 rs6000_got_sym (void)
7106 if (!rs6000_got_symbol)
7108 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
7109 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
7110 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
7113 return rs6000_got_symbol;
7116 /* AIX Thread-Local Address support. */
7118 static rtx
7119 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
7121 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
7122 const char *name;
7123 char *tlsname;
7125 name = XSTR (addr, 0);
7126 /* Append TLS CSECT qualifier, unless the symbol already is qualified
7127 or the symbol will be in TLS private data section. */
7128 if (name[strlen (name) - 1] != ']'
7129 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
7130 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
7132 tlsname = XALLOCAVEC (char, strlen (name) + 4);
7133 strcpy (tlsname, name);
7134 strcat (tlsname,
7135 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
7136 tlsaddr = copy_rtx (addr);
7137 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
7139 else
7140 tlsaddr = addr;
7142 /* Place addr into TOC constant pool. */
7143 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
7145 /* Output the TOC entry and create the MEM referencing the value. */
7146 if (constant_pool_expr_p (XEXP (sym, 0))
7147 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
7149 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
7150 mem = gen_const_mem (Pmode, tocref);
7151 set_mem_alias_set (mem, get_TOC_alias_set ());
7153 else
7154 return sym;
7156 /* Use global-dynamic for local-dynamic. */
7157 if (model == TLS_MODEL_GLOBAL_DYNAMIC
7158 || model == TLS_MODEL_LOCAL_DYNAMIC)
7160 /* Create new TOC reference for @m symbol. */
7161 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
7162 tlsname = XALLOCAVEC (char, strlen (name) + 1);
7163 strcpy (tlsname, "*LCM");
7164 strcat (tlsname, name + 3);
7165 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
7166 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
7167 tocref = create_TOC_reference (modaddr, NULL_RTX);
7168 rtx modmem = gen_const_mem (Pmode, tocref);
7169 set_mem_alias_set (modmem, get_TOC_alias_set ());
7171 rtx modreg = gen_reg_rtx (Pmode);
7172 emit_insn (gen_rtx_SET (VOIDmode, modreg, modmem));
7174 tmpreg = gen_reg_rtx (Pmode);
7175 emit_insn (gen_rtx_SET (VOIDmode, tmpreg, mem));
7177 dest = gen_reg_rtx (Pmode);
7178 if (TARGET_32BIT)
7179 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
7180 else
7181 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
7182 return dest;
7184 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
7185 else if (TARGET_32BIT)
7187 tlsreg = gen_reg_rtx (SImode);
7188 emit_insn (gen_tls_get_tpointer (tlsreg));
7190 else
7191 tlsreg = gen_rtx_REG (DImode, 13);
7193 /* Load the TOC value into temporary register. */
7194 tmpreg = gen_reg_rtx (Pmode);
7195 emit_insn (gen_rtx_SET (VOIDmode, tmpreg, mem));
7196 set_unique_reg_note (get_last_insn (), REG_EQUAL,
7197 gen_rtx_MINUS (Pmode, addr, tlsreg));
7199 /* Add TOC symbol value to TLS pointer. */
7200 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
7202 return dest;
7205 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
7206 this (thread-local) address. */
7208 static rtx
7209 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
7211 rtx dest, insn;
7213 if (TARGET_XCOFF)
7214 return rs6000_legitimize_tls_address_aix (addr, model);
7216 dest = gen_reg_rtx (Pmode);
7217 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
7219 rtx tlsreg;
7221 if (TARGET_64BIT)
7223 tlsreg = gen_rtx_REG (Pmode, 13);
7224 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
7226 else
7228 tlsreg = gen_rtx_REG (Pmode, 2);
7229 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
7231 emit_insn (insn);
7233 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
7235 rtx tlsreg, tmp;
7237 tmp = gen_reg_rtx (Pmode);
7238 if (TARGET_64BIT)
7240 tlsreg = gen_rtx_REG (Pmode, 13);
7241 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
7243 else
7245 tlsreg = gen_rtx_REG (Pmode, 2);
7246 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
7248 emit_insn (insn);
7249 if (TARGET_64BIT)
7250 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
7251 else
7252 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
7253 emit_insn (insn);
7255 else
7257 rtx r3, got, tga, tmp1, tmp2, call_insn;
7259 /* We currently use relocations like @got@tlsgd for tls, which
7260 means the linker will handle allocation of tls entries, placing
7261 them in the .got section. So use a pointer to the .got section,
7262 not one to secondary TOC sections used by 64-bit -mminimal-toc,
7263 or to secondary GOT sections used by 32-bit -fPIC. */
7264 if (TARGET_64BIT)
7265 got = gen_rtx_REG (Pmode, 2);
7266 else
7268 if (flag_pic == 1)
7269 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
7270 else
7272 rtx gsym = rs6000_got_sym ();
7273 got = gen_reg_rtx (Pmode);
7274 if (flag_pic == 0)
7275 rs6000_emit_move (got, gsym, Pmode);
7276 else
7278 rtx mem, lab, last;
7280 tmp1 = gen_reg_rtx (Pmode);
7281 tmp2 = gen_reg_rtx (Pmode);
7282 mem = gen_const_mem (Pmode, tmp1);
7283 lab = gen_label_rtx ();
7284 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
7285 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
7286 if (TARGET_LINK_STACK)
7287 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
7288 emit_move_insn (tmp2, mem);
7289 last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
7290 set_unique_reg_note (last, REG_EQUAL, gsym);
7295 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
7297 tga = rs6000_tls_get_addr ();
7298 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
7299 1, const0_rtx, Pmode);
7301 r3 = gen_rtx_REG (Pmode, 3);
7302 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
7304 if (TARGET_64BIT)
7305 insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
7306 else
7307 insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
7309 else if (DEFAULT_ABI == ABI_V4)
7310 insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
7311 else
7312 gcc_unreachable ();
7313 call_insn = last_call_insn ();
7314 PATTERN (call_insn) = insn;
7315 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
7316 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
7317 pic_offset_table_rtx);
7319 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
7321 tga = rs6000_tls_get_addr ();
7322 tmp1 = gen_reg_rtx (Pmode);
7323 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
7324 1, const0_rtx, Pmode);
7326 r3 = gen_rtx_REG (Pmode, 3);
7327 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
7329 if (TARGET_64BIT)
7330 insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
7331 else
7332 insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
7334 else if (DEFAULT_ABI == ABI_V4)
7335 insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
7336 else
7337 gcc_unreachable ();
7338 call_insn = last_call_insn ();
7339 PATTERN (call_insn) = insn;
7340 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
7341 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
7342 pic_offset_table_rtx);
7344 if (rs6000_tls_size == 16)
7346 if (TARGET_64BIT)
7347 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
7348 else
7349 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
7351 else if (rs6000_tls_size == 32)
7353 tmp2 = gen_reg_rtx (Pmode);
7354 if (TARGET_64BIT)
7355 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
7356 else
7357 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
7358 emit_insn (insn);
7359 if (TARGET_64BIT)
7360 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
7361 else
7362 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
7364 else
7366 tmp2 = gen_reg_rtx (Pmode);
7367 if (TARGET_64BIT)
7368 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
7369 else
7370 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
7371 emit_insn (insn);
7372 insn = gen_rtx_SET (Pmode, dest,
7373 gen_rtx_PLUS (Pmode, tmp2, tmp1));
7375 emit_insn (insn);
7377 else
7379 /* IE, or 64-bit offset LE. */
7380 tmp2 = gen_reg_rtx (Pmode);
7381 if (TARGET_64BIT)
7382 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
7383 else
7384 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
7385 emit_insn (insn);
7386 if (TARGET_64BIT)
7387 insn = gen_tls_tls_64 (dest, tmp2, addr);
7388 else
7389 insn = gen_tls_tls_32 (dest, tmp2, addr);
7390 emit_insn (insn);
7394 return dest;
7397 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7399 static bool
7400 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7402 if (GET_CODE (x) == HIGH
7403 && GET_CODE (XEXP (x, 0)) == UNSPEC)
7404 return true;
7406 /* A TLS symbol in the TOC cannot contain a sum. */
7407 if (GET_CODE (x) == CONST
7408 && GET_CODE (XEXP (x, 0)) == PLUS
7409 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7410 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
7411 return true;
7413 /* Do not place an ELF TLS symbol in the constant pool. */
7414 return TARGET_ELF && tls_referenced_p (x);
7417 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
7418 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
7419 can be addressed relative to the toc pointer. */
7421 static bool
7422 use_toc_relative_ref (rtx sym)
7424 return ((constant_pool_expr_p (sym)
7425 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
7426 get_pool_mode (sym)))
7427 || (TARGET_CMODEL == CMODEL_MEDIUM
7428 && SYMBOL_REF_LOCAL_P (sym)));
7431 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
7432 replace the input X, or the original X if no replacement is called for.
7433 The output parameter *WIN is 1 if the calling macro should goto WIN,
7434 0 if it should not.
7436 For RS/6000, we wish to handle large displacements off a base
7437 register by splitting the addend across an addiu/addis and the mem insn.
7438 This cuts number of extra insns needed from 3 to 1.
7440 On Darwin, we use this to generate code for floating point constants.
7441 A movsf_low is generated so we wind up with 2 instructions rather than 3.
7442 The Darwin code is inside #if TARGET_MACHO because only then are the
7443 machopic_* functions defined. */
7444 static rtx
7445 rs6000_legitimize_reload_address (rtx x, machine_mode mode,
7446 int opnum, int type,
7447 int ind_levels ATTRIBUTE_UNUSED, int *win)
7449 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
7451 /* Nasty hack for vsx_splat_V2DF/V2DI load from mem, which takes a
7452 DFmode/DImode MEM. */
7453 if (reg_offset_p
7454 && opnum == 1
7455 && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
7456 || (mode == DImode && recog_data.operand_mode[0] == V2DImode)))
7457 reg_offset_p = false;
7459 /* We must recognize output that we have already generated ourselves. */
7460 if (GET_CODE (x) == PLUS
7461 && GET_CODE (XEXP (x, 0)) == PLUS
7462 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
7463 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7464 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7466 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7467 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
7468 opnum, (enum reload_type) type);
7469 *win = 1;
7470 return x;
7473 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
7474 if (GET_CODE (x) == LO_SUM
7475 && GET_CODE (XEXP (x, 0)) == HIGH)
7477 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7478 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7479 opnum, (enum reload_type) type);
7480 *win = 1;
7481 return x;
7484 #if TARGET_MACHO
7485 if (DEFAULT_ABI == ABI_DARWIN && flag_pic
7486 && GET_CODE (x) == LO_SUM
7487 && GET_CODE (XEXP (x, 0)) == PLUS
7488 && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx
7489 && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH
7490 && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1)
7491 && machopic_operand_p (XEXP (x, 1)))
7493 /* Result of previous invocation of this function on Darwin
7494 floating point constant. */
7495 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7496 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7497 opnum, (enum reload_type) type);
7498 *win = 1;
7499 return x;
7501 #endif
7503 if (TARGET_CMODEL != CMODEL_SMALL
7504 && reg_offset_p
7505 && small_toc_ref (x, VOIDmode))
7507 rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x));
7508 x = gen_rtx_LO_SUM (Pmode, hi, x);
7509 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7510 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7511 opnum, (enum reload_type) type);
7512 *win = 1;
7513 return x;
7516 if (GET_CODE (x) == PLUS
7517 && GET_CODE (XEXP (x, 0)) == REG
7518 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
7519 && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
7520 && GET_CODE (XEXP (x, 1)) == CONST_INT
7521 && reg_offset_p
7522 && !SPE_VECTOR_MODE (mode)
7523 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
7524 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
7526 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
7527 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
7528 HOST_WIDE_INT high
7529 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
7531 /* Check for 32-bit overflow. */
7532 if (high + low != val)
7534 *win = 0;
7535 return x;
7538 /* Reload the high part into a base reg; leave the low part
7539 in the mem directly. */
7541 x = gen_rtx_PLUS (GET_MODE (x),
7542 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
7543 GEN_INT (high)),
7544 GEN_INT (low));
7546 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7547 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
7548 opnum, (enum reload_type) type);
7549 *win = 1;
7550 return x;
7553 if (GET_CODE (x) == SYMBOL_REF
7554 && reg_offset_p
7555 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
7556 && !SPE_VECTOR_MODE (mode)
7557 #if TARGET_MACHO
7558 && DEFAULT_ABI == ABI_DARWIN
7559 && (flag_pic || MACHO_DYNAMIC_NO_PIC_P)
7560 && machopic_symbol_defined_p (x)
7561 #else
7562 && DEFAULT_ABI == ABI_V4
7563 && !flag_pic
7564 #endif
7565 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
7566 The same goes for DImode without 64-bit gprs and DFmode and DDmode
7567 without fprs.
7568 ??? Assume floating point reg based on mode? This assumption is
7569 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
7570 where reload ends up doing a DFmode load of a constant from
7571 mem using two gprs. Unfortunately, at this point reload
7572 hasn't yet selected regs so poking around in reload data
7573 won't help and even if we could figure out the regs reliably,
7574 we'd still want to allow this transformation when the mem is
7575 naturally aligned. Since we say the address is good here, we
7576 can't disable offsets from LO_SUMs in mem_operand_gpr.
7577 FIXME: Allow offset from lo_sum for other modes too, when
7578 mem is sufficiently aligned.
7580 Also disallow this if the type can go in VMX/Altivec registers, since
7581 those registers do not have d-form (reg+offset) address modes. */
7582 && !reg_addr[mode].scalar_in_vmx_p
7583 && mode != TFmode
7584 && mode != TDmode
7585 && (mode != TImode || !TARGET_VSX_TIMODE)
7586 && mode != PTImode
7587 && (mode != DImode || TARGET_POWERPC64)
7588 && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
7589 || (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)))
7591 #if TARGET_MACHO
7592 if (flag_pic)
7594 rtx offset = machopic_gen_offset (x);
7595 x = gen_rtx_LO_SUM (GET_MODE (x),
7596 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
7597 gen_rtx_HIGH (Pmode, offset)), offset);
7599 else
7600 #endif
7601 x = gen_rtx_LO_SUM (GET_MODE (x),
7602 gen_rtx_HIGH (Pmode, x), x);
7604 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7605 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7606 opnum, (enum reload_type) type);
7607 *win = 1;
7608 return x;
7611 /* Reload an offset address wrapped by an AND that represents the
7612 masking of the lower bits. Strip the outer AND and let reload
7613 convert the offset address into an indirect address. For VSX,
7614 force reload to create the address with an AND in a separate
7615 register, because we can't guarantee an altivec register will
7616 be used. */
7617 if (VECTOR_MEM_ALTIVEC_P (mode)
7618 && GET_CODE (x) == AND
7619 && GET_CODE (XEXP (x, 0)) == PLUS
7620 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
7621 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7622 && GET_CODE (XEXP (x, 1)) == CONST_INT
7623 && INTVAL (XEXP (x, 1)) == -16)
7625 x = XEXP (x, 0);
7626 *win = 1;
7627 return x;
7630 if (TARGET_TOC
7631 && reg_offset_p
7632 && GET_CODE (x) == SYMBOL_REF
7633 && use_toc_relative_ref (x))
7635 x = create_TOC_reference (x, NULL_RTX);
7636 if (TARGET_CMODEL != CMODEL_SMALL)
7637 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7638 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7639 opnum, (enum reload_type) type);
7640 *win = 1;
7641 return x;
7643 *win = 0;
7644 return x;
7647 /* Debug version of rs6000_legitimize_reload_address. */
7648 static rtx
7649 rs6000_debug_legitimize_reload_address (rtx x, machine_mode mode,
7650 int opnum, int type,
7651 int ind_levels, int *win)
7653 rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
7654 ind_levels, win);
7655 fprintf (stderr,
7656 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
7657 "type = %d, ind_levels = %d, win = %d, original addr:\n",
7658 GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
7659 debug_rtx (x);
7661 if (x == ret)
7662 fprintf (stderr, "Same address returned\n");
7663 else if (!ret)
7664 fprintf (stderr, "NULL returned\n");
7665 else
7667 fprintf (stderr, "New address:\n");
7668 debug_rtx (ret);
7671 return ret;
7674 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
7675 that is a valid memory address for an instruction.
7676 The MODE argument is the machine mode for the MEM expression
7677 that wants to use this address.
7679 On the RS/6000, there are four valid address: a SYMBOL_REF that
7680 refers to a constant pool entry of an address (or the sum of it
7681 plus a constant), a short (16-bit signed) constant plus a register,
7682 the sum of two registers, or a register indirect, possibly with an
7683 auto-increment. For DFmode, DDmode and DImode with a constant plus
7684 register, we must ensure that both words are addressable or PowerPC64
7685 with offset word aligned.
7687 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
7688 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
7689 because adjacent memory cells are accessed by adding word-sized offsets
7690 during assembly output. */
7691 static bool
7692 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
7694 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
7696 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
7697 if (VECTOR_MEM_ALTIVEC_P (mode)
7698 && GET_CODE (x) == AND
7699 && GET_CODE (XEXP (x, 1)) == CONST_INT
7700 && INTVAL (XEXP (x, 1)) == -16)
7701 x = XEXP (x, 0);
7703 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
7704 return 0;
7705 if (legitimate_indirect_address_p (x, reg_ok_strict))
7706 return 1;
7707 if (TARGET_UPDATE
7708 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
7709 && mode_supports_pre_incdec_p (mode)
7710 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
7711 return 1;
7712 if (virtual_stack_registers_memory_p (x))
7713 return 1;
7714 if (reg_offset_p && legitimate_small_data_p (mode, x))
7715 return 1;
7716 if (reg_offset_p
7717 && legitimate_constant_pool_address_p (x, mode,
7718 reg_ok_strict || lra_in_progress))
7719 return 1;
7720 /* For TImode, if we have load/store quad and TImode in VSX registers, only
7721 allow register indirect addresses. This will allow the values to go in
7722 either GPRs or VSX registers without reloading. The vector types would
7723 tend to go into VSX registers, so we allow REG+REG, while TImode seems
7724 somewhat split, in that some uses are GPR based, and some VSX based. */
7725 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
7726 return 0;
7727 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
7728 if (! reg_ok_strict
7729 && reg_offset_p
7730 && GET_CODE (x) == PLUS
7731 && GET_CODE (XEXP (x, 0)) == REG
7732 && (XEXP (x, 0) == virtual_stack_vars_rtx
7733 || XEXP (x, 0) == arg_pointer_rtx)
7734 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7735 return 1;
7736 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
7737 return 1;
7738 if (mode != TFmode
7739 && mode != TDmode
7740 && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
7741 || TARGET_POWERPC64
7742 || (mode != DFmode && mode != DDmode)
7743 || (TARGET_E500_DOUBLE && mode != DDmode))
7744 && (TARGET_POWERPC64 || mode != DImode)
7745 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
7746 && mode != PTImode
7747 && !avoiding_indexed_address_p (mode)
7748 && legitimate_indexed_address_p (x, reg_ok_strict))
7749 return 1;
7750 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
7751 && mode_supports_pre_modify_p (mode)
7752 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
7753 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
7754 reg_ok_strict, false)
7755 || (!avoiding_indexed_address_p (mode)
7756 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
7757 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7758 return 1;
7759 if (reg_offset_p && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
7760 return 1;
7761 return 0;
7764 /* Debug version of rs6000_legitimate_address_p. */
7765 static bool
7766 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
7767 bool reg_ok_strict)
7769 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
7770 fprintf (stderr,
7771 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
7772 "strict = %d, reload = %s, code = %s\n",
7773 ret ? "true" : "false",
7774 GET_MODE_NAME (mode),
7775 reg_ok_strict,
7776 (reload_completed
7777 ? "after"
7778 : (reload_in_progress ? "progress" : "before")),
7779 GET_RTX_NAME (GET_CODE (x)));
7780 debug_rtx (x);
7782 return ret;
7785 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
7787 static bool
7788 rs6000_mode_dependent_address_p (const_rtx addr,
7789 addr_space_t as ATTRIBUTE_UNUSED)
7791 return rs6000_mode_dependent_address_ptr (addr);
7794 /* Go to LABEL if ADDR (a legitimate address expression)
7795 has an effect that depends on the machine mode it is used for.
7797 On the RS/6000 this is true of all integral offsets (since AltiVec
7798 and VSX modes don't allow them) or is a pre-increment or decrement.
7800 ??? Except that due to conceptual problems in offsettable_address_p
7801 we can't really report the problems of integral offsets. So leave
7802 this assuming that the adjustable offset must be valid for the
7803 sub-words of a TFmode operand, which is what we had before. */
7805 static bool
7806 rs6000_mode_dependent_address (const_rtx addr)
7808 switch (GET_CODE (addr))
7810 case PLUS:
7811 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
7812 is considered a legitimate address before reload, so there
7813 are no offset restrictions in that case. Note that this
7814 condition is safe in strict mode because any address involving
7815 virtual_stack_vars_rtx or arg_pointer_rtx would already have
7816 been rejected as illegitimate. */
7817 if (XEXP (addr, 0) != virtual_stack_vars_rtx
7818 && XEXP (addr, 0) != arg_pointer_rtx
7819 && GET_CODE (XEXP (addr, 1)) == CONST_INT)
7821 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
7822 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
7824 break;
7826 case LO_SUM:
7827 /* Anything in the constant pool is sufficiently aligned that
7828 all bytes have the same high part address. */
7829 return !legitimate_constant_pool_address_p (addr, QImode, false);
7831 /* Auto-increment cases are now treated generically in recog.c. */
7832 case PRE_MODIFY:
7833 return TARGET_UPDATE;
7835 /* AND is only allowed in Altivec loads. */
7836 case AND:
7837 return true;
7839 default:
7840 break;
7843 return false;
7846 /* Debug version of rs6000_mode_dependent_address. */
7847 static bool
7848 rs6000_debug_mode_dependent_address (const_rtx addr)
7850 bool ret = rs6000_mode_dependent_address (addr);
7852 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
7853 ret ? "true" : "false");
7854 debug_rtx (addr);
7856 return ret;
7859 /* Implement FIND_BASE_TERM. */
7862 rs6000_find_base_term (rtx op)
7864 rtx base;
7866 base = op;
7867 if (GET_CODE (base) == CONST)
7868 base = XEXP (base, 0);
7869 if (GET_CODE (base) == PLUS)
7870 base = XEXP (base, 0);
7871 if (GET_CODE (base) == UNSPEC)
7872 switch (XINT (base, 1))
7874 case UNSPEC_TOCREL:
7875 case UNSPEC_MACHOPIC_OFFSET:
7876 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
7877 for aliasing purposes. */
7878 return XVECEXP (base, 0, 0);
7881 return op;
7884 /* More elaborate version of recog's offsettable_memref_p predicate
7885 that works around the ??? note of rs6000_mode_dependent_address.
7886 In particular it accepts
7888 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
7890 in 32-bit mode, that the recog predicate rejects. */
7892 static bool
7893 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode)
7895 bool worst_case;
7897 if (!MEM_P (op))
7898 return false;
7900 /* First mimic offsettable_memref_p. */
7901 if (offsettable_address_p (true, GET_MODE (op), XEXP (op, 0)))
7902 return true;
7904 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
7905 the latter predicate knows nothing about the mode of the memory
7906 reference and, therefore, assumes that it is the largest supported
7907 mode (TFmode). As a consequence, legitimate offsettable memory
7908 references are rejected. rs6000_legitimate_offset_address_p contains
7909 the correct logic for the PLUS case of rs6000_mode_dependent_address,
7910 at least with a little bit of help here given that we know the
7911 actual registers used. */
7912 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
7913 || GET_MODE_SIZE (reg_mode) == 4);
7914 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
7915 true, worst_case);
7918 /* Change register usage conditional on target flags. */
7919 static void
7920 rs6000_conditional_register_usage (void)
7922 int i;
7924 if (TARGET_DEBUG_TARGET)
7925 fprintf (stderr, "rs6000_conditional_register_usage called\n");
7927 /* Set MQ register fixed (already call_used) so that it will not be
7928 allocated. */
7929 fixed_regs[64] = 1;
7931 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
7932 if (TARGET_64BIT)
7933 fixed_regs[13] = call_used_regs[13]
7934 = call_really_used_regs[13] = 1;
7936 /* Conditionally disable FPRs. */
7937 if (TARGET_SOFT_FLOAT || !TARGET_FPRS)
7938 for (i = 32; i < 64; i++)
7939 fixed_regs[i] = call_used_regs[i]
7940 = call_really_used_regs[i] = 1;
7942 /* The TOC register is not killed across calls in a way that is
7943 visible to the compiler. */
7944 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
7945 call_really_used_regs[2] = 0;
7947 if (DEFAULT_ABI == ABI_V4
7948 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM
7949 && flag_pic == 2)
7950 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
7952 if (DEFAULT_ABI == ABI_V4
7953 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM
7954 && flag_pic == 1)
7955 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
7956 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
7957 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
7959 if (DEFAULT_ABI == ABI_DARWIN
7960 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
7961 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
7962 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
7963 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
7965 if (TARGET_TOC && TARGET_MINIMAL_TOC)
7966 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
7967 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
7969 if (TARGET_SPE)
7971 global_regs[SPEFSCR_REGNO] = 1;
7972 /* We used to use r14 as FIXED_SCRATCH to address SPE 64-bit
7973 registers in prologues and epilogues. We no longer use r14
7974 for FIXED_SCRATCH, but we're keeping r14 out of the allocation
7975 pool for link-compatibility with older versions of GCC. Once
7976 "old" code has died out, we can return r14 to the allocation
7977 pool. */
7978 fixed_regs[14]
7979 = call_used_regs[14]
7980 = call_really_used_regs[14] = 1;
7983 if (!TARGET_ALTIVEC && !TARGET_VSX)
7985 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
7986 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
7987 call_really_used_regs[VRSAVE_REGNO] = 1;
7990 if (TARGET_ALTIVEC || TARGET_VSX)
7991 global_regs[VSCR_REGNO] = 1;
7993 if (TARGET_ALTIVEC_ABI)
7995 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
7996 call_used_regs[i] = call_really_used_regs[i] = 1;
7998 /* AIX reserves VR20:31 in non-extended ABI mode. */
7999 if (TARGET_XCOFF)
8000 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
8001 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
8006 /* Output insns to set DEST equal to the constant SOURCE as a series of
8007 lis, ori and shl instructions and return TRUE. */
8009 bool
8010 rs6000_emit_set_const (rtx dest, rtx source)
8012 machine_mode mode = GET_MODE (dest);
8013 rtx temp, set;
8014 rtx_insn *insn;
8015 HOST_WIDE_INT c;
8017 gcc_checking_assert (CONST_INT_P (source));
8018 c = INTVAL (source);
8019 switch (mode)
8021 case QImode:
8022 case HImode:
8023 emit_insn (gen_rtx_SET (VOIDmode, dest, source));
8024 return true;
8026 case SImode:
8027 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
8029 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (temp),
8030 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
8031 emit_insn (gen_rtx_SET (VOIDmode, dest,
8032 gen_rtx_IOR (SImode, copy_rtx (temp),
8033 GEN_INT (c & 0xffff))));
8034 break;
8036 case DImode:
8037 if (!TARGET_POWERPC64)
8039 rtx hi, lo;
8041 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
8042 DImode);
8043 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
8044 DImode);
8045 emit_move_insn (hi, GEN_INT (c >> 32));
8046 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
8047 emit_move_insn (lo, GEN_INT (c));
8049 else
8050 rs6000_emit_set_long_const (dest, c);
8051 break;
8053 default:
8054 gcc_unreachable ();
8057 insn = get_last_insn ();
8058 set = single_set (insn);
8059 if (! CONSTANT_P (SET_SRC (set)))
8060 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
8062 return true;
8065 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
8066 Output insns to set DEST equal to the constant C as a series of
8067 lis, ori and shl instructions. */
8069 static void
8070 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
8072 rtx temp;
8073 HOST_WIDE_INT ud1, ud2, ud3, ud4;
8075 ud1 = c & 0xffff;
8076 c = c >> 16;
8077 ud2 = c & 0xffff;
8078 c = c >> 16;
8079 ud3 = c & 0xffff;
8080 c = c >> 16;
8081 ud4 = c & 0xffff;
8083 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
8084 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
8085 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
8087 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
8088 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
8090 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8092 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8093 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
8094 if (ud1 != 0)
8095 emit_move_insn (dest,
8096 gen_rtx_IOR (DImode, copy_rtx (temp),
8097 GEN_INT (ud1)));
8099 else if (ud3 == 0 && ud4 == 0)
8101 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8103 gcc_assert (ud2 & 0x8000);
8104 emit_move_insn (copy_rtx (temp),
8105 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
8106 if (ud1 != 0)
8107 emit_move_insn (copy_rtx (temp),
8108 gen_rtx_IOR (DImode, copy_rtx (temp),
8109 GEN_INT (ud1)));
8110 emit_move_insn (dest,
8111 gen_rtx_ZERO_EXTEND (DImode,
8112 gen_lowpart (SImode,
8113 copy_rtx (temp))));
8115 else if ((ud4 == 0xffff && (ud3 & 0x8000))
8116 || (ud4 == 0 && ! (ud3 & 0x8000)))
8118 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8120 emit_move_insn (copy_rtx (temp),
8121 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
8122 if (ud2 != 0)
8123 emit_move_insn (copy_rtx (temp),
8124 gen_rtx_IOR (DImode, copy_rtx (temp),
8125 GEN_INT (ud2)));
8126 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8127 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
8128 GEN_INT (16)));
8129 if (ud1 != 0)
8130 emit_move_insn (dest,
8131 gen_rtx_IOR (DImode, copy_rtx (temp),
8132 GEN_INT (ud1)));
8134 else
8136 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8138 emit_move_insn (copy_rtx (temp),
8139 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
8140 if (ud3 != 0)
8141 emit_move_insn (copy_rtx (temp),
8142 gen_rtx_IOR (DImode, copy_rtx (temp),
8143 GEN_INT (ud3)));
8145 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
8146 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
8147 GEN_INT (32)));
8148 if (ud2 != 0)
8149 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8150 gen_rtx_IOR (DImode, copy_rtx (temp),
8151 GEN_INT (ud2 << 16)));
8152 if (ud1 != 0)
8153 emit_move_insn (dest,
8154 gen_rtx_IOR (DImode, copy_rtx (temp),
8155 GEN_INT (ud1)));
8159 /* Helper for the following. Get rid of [r+r] memory refs
8160 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
8162 static void
8163 rs6000_eliminate_indexed_memrefs (rtx operands[2])
8165 if (reload_in_progress)
8166 return;
8168 if (GET_CODE (operands[0]) == MEM
8169 && GET_CODE (XEXP (operands[0], 0)) != REG
8170 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
8171 GET_MODE (operands[0]), false))
8172 operands[0]
8173 = replace_equiv_address (operands[0],
8174 copy_addr_to_reg (XEXP (operands[0], 0)));
8176 if (GET_CODE (operands[1]) == MEM
8177 && GET_CODE (XEXP (operands[1], 0)) != REG
8178 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
8179 GET_MODE (operands[1]), false))
8180 operands[1]
8181 = replace_equiv_address (operands[1],
8182 copy_addr_to_reg (XEXP (operands[1], 0)));
8185 /* Generate a vector of constants to permute MODE for a little-endian
8186 storage operation by swapping the two halves of a vector. */
8187 static rtvec
8188 rs6000_const_vec (machine_mode mode)
8190 int i, subparts;
8191 rtvec v;
8193 switch (mode)
8195 case V1TImode:
8196 subparts = 1;
8197 break;
8198 case V2DFmode:
8199 case V2DImode:
8200 subparts = 2;
8201 break;
8202 case V4SFmode:
8203 case V4SImode:
8204 subparts = 4;
8205 break;
8206 case V8HImode:
8207 subparts = 8;
8208 break;
8209 case V16QImode:
8210 subparts = 16;
8211 break;
8212 default:
8213 gcc_unreachable();
8216 v = rtvec_alloc (subparts);
8218 for (i = 0; i < subparts / 2; ++i)
8219 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
8220 for (i = subparts / 2; i < subparts; ++i)
8221 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
8223 return v;
8226 /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
8227 for a VSX load or store operation. */
8229 rs6000_gen_le_vsx_permute (rtx source, machine_mode mode)
8231 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
8232 return gen_rtx_VEC_SELECT (mode, source, par);
8235 /* Emit a little-endian load from vector memory location SOURCE to VSX
8236 register DEST in mode MODE. The load is done with two permuting
8237 insn's that represent an lxvd2x and xxpermdi. */
8238 void
8239 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
8241 rtx tmp, permute_mem, permute_reg;
8243 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
8244 V1TImode). */
8245 if (mode == TImode || mode == V1TImode)
8247 mode = V2DImode;
8248 dest = gen_lowpart (V2DImode, dest);
8249 source = adjust_address (source, V2DImode, 0);
8252 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
8253 permute_mem = rs6000_gen_le_vsx_permute (source, mode);
8254 permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
8255 emit_insn (gen_rtx_SET (VOIDmode, tmp, permute_mem));
8256 emit_insn (gen_rtx_SET (VOIDmode, dest, permute_reg));
8259 /* Emit a little-endian store to vector memory location DEST from VSX
8260 register SOURCE in mode MODE. The store is done with two permuting
8261 insn's that represent an xxpermdi and an stxvd2x. */
8262 void
8263 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
8265 rtx tmp, permute_src, permute_tmp;
8267 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
8268 V1TImode). */
8269 if (mode == TImode || mode == V1TImode)
8271 mode = V2DImode;
8272 dest = adjust_address (dest, V2DImode, 0);
8273 source = gen_lowpart (V2DImode, source);
8276 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
8277 permute_src = rs6000_gen_le_vsx_permute (source, mode);
8278 permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
8279 emit_insn (gen_rtx_SET (VOIDmode, tmp, permute_src));
8280 emit_insn (gen_rtx_SET (VOIDmode, dest, permute_tmp));
8283 /* Emit a sequence representing a little-endian VSX load or store,
8284 moving data from SOURCE to DEST in mode MODE. This is done
8285 separately from rs6000_emit_move to ensure it is called only
8286 during expand. LE VSX loads and stores introduced later are
8287 handled with a split. The expand-time RTL generation allows
8288 us to optimize away redundant pairs of register-permutes. */
8289 void
8290 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
8292 gcc_assert (!BYTES_BIG_ENDIAN
8293 && VECTOR_MEM_VSX_P (mode)
8294 && !gpr_or_gpr_p (dest, source)
8295 && (MEM_P (source) ^ MEM_P (dest)));
8297 if (MEM_P (source))
8299 gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG);
8300 rs6000_emit_le_vsx_load (dest, source, mode);
8302 else
8304 if (!REG_P (source))
8305 source = force_reg (mode, source);
8306 rs6000_emit_le_vsx_store (dest, source, mode);
8310 /* Emit a move from SOURCE to DEST in mode MODE. */
8311 void
8312 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
8314 rtx operands[2];
8315 operands[0] = dest;
8316 operands[1] = source;
8318 if (TARGET_DEBUG_ADDR)
8320 fprintf (stderr,
8321 "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, "
8322 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
8323 GET_MODE_NAME (mode),
8324 reload_in_progress,
8325 reload_completed,
8326 can_create_pseudo_p ());
8327 debug_rtx (dest);
8328 fprintf (stderr, "source:\n");
8329 debug_rtx (source);
8332 /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
8333 if (CONST_WIDE_INT_P (operands[1])
8334 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
8336 /* This should be fixed with the introduction of CONST_WIDE_INT. */
8337 gcc_unreachable ();
8340 /* Check if GCC is setting up a block move that will end up using FP
8341 registers as temporaries. We must make sure this is acceptable. */
8342 if (GET_CODE (operands[0]) == MEM
8343 && GET_CODE (operands[1]) == MEM
8344 && mode == DImode
8345 && (SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[0]))
8346 || SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[1])))
8347 && ! (SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[0]) > 32
8348 ? 32 : MEM_ALIGN (operands[0])))
8349 || SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[1]) > 32
8350 ? 32
8351 : MEM_ALIGN (operands[1]))))
8352 && ! MEM_VOLATILE_P (operands [0])
8353 && ! MEM_VOLATILE_P (operands [1]))
8355 emit_move_insn (adjust_address (operands[0], SImode, 0),
8356 adjust_address (operands[1], SImode, 0));
8357 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
8358 adjust_address (copy_rtx (operands[1]), SImode, 4));
8359 return;
8362 if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM
8363 && !gpc_reg_operand (operands[1], mode))
8364 operands[1] = force_reg (mode, operands[1]);
8366 /* Recognize the case where operand[1] is a reference to thread-local
8367 data and load its address to a register. */
8368 if (tls_referenced_p (operands[1]))
8370 enum tls_model model;
8371 rtx tmp = operands[1];
8372 rtx addend = NULL;
8374 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
8376 addend = XEXP (XEXP (tmp, 0), 1);
8377 tmp = XEXP (XEXP (tmp, 0), 0);
8380 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
8381 model = SYMBOL_REF_TLS_MODEL (tmp);
8382 gcc_assert (model != 0);
8384 tmp = rs6000_legitimize_tls_address (tmp, model);
8385 if (addend)
8387 tmp = gen_rtx_PLUS (mode, tmp, addend);
8388 tmp = force_operand (tmp, operands[0]);
8390 operands[1] = tmp;
8393 /* Handle the case where reload calls us with an invalid address. */
8394 if (reload_in_progress && mode == Pmode
8395 && (! general_operand (operands[1], mode)
8396 || ! nonimmediate_operand (operands[0], mode)))
8397 goto emit_set;
8399 /* 128-bit constant floating-point values on Darwin should really be
8400 loaded as two parts. */
8401 if (!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
8402 && mode == TFmode && GET_CODE (operands[1]) == CONST_DOUBLE)
8404 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
8405 simplify_gen_subreg (DFmode, operands[1], mode, 0),
8406 DFmode);
8407 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
8408 GET_MODE_SIZE (DFmode)),
8409 simplify_gen_subreg (DFmode, operands[1], mode,
8410 GET_MODE_SIZE (DFmode)),
8411 DFmode);
8412 return;
8415 if (reload_in_progress && cfun->machine->sdmode_stack_slot != NULL_RTX)
8416 cfun->machine->sdmode_stack_slot =
8417 eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
8420 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
8421 p1:SD) if p1 is not of floating point class and p0 is spilled as
8422 we can have no analogous movsd_store for this. */
8423 if (lra_in_progress && mode == DDmode
8424 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
8425 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
8426 && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))
8427 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
8429 enum reg_class cl;
8430 int regno = REGNO (SUBREG_REG (operands[1]));
8432 if (regno >= FIRST_PSEUDO_REGISTER)
8434 cl = reg_preferred_class (regno);
8435 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
8437 if (regno >= 0 && ! FP_REGNO_P (regno))
8439 mode = SDmode;
8440 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
8441 operands[1] = SUBREG_REG (operands[1]);
8444 if (lra_in_progress
8445 && mode == SDmode
8446 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
8447 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
8448 && (REG_P (operands[1])
8449 || (GET_CODE (operands[1]) == SUBREG
8450 && REG_P (SUBREG_REG (operands[1])))))
8452 int regno = REGNO (GET_CODE (operands[1]) == SUBREG
8453 ? SUBREG_REG (operands[1]) : operands[1]);
8454 enum reg_class cl;
8456 if (regno >= FIRST_PSEUDO_REGISTER)
8458 cl = reg_preferred_class (regno);
8459 gcc_assert (cl != NO_REGS);
8460 regno = ira_class_hard_regs[cl][0];
8462 if (FP_REGNO_P (regno))
8464 if (GET_MODE (operands[0]) != DDmode)
8465 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
8466 emit_insn (gen_movsd_store (operands[0], operands[1]));
8468 else if (INT_REGNO_P (regno))
8469 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
8470 else
8471 gcc_unreachable();
8472 return;
8474 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
8475 p:DD)) if p0 is not of floating point class and p1 is spilled as
8476 we can have no analogous movsd_load for this. */
8477 if (lra_in_progress && mode == DDmode
8478 && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0]))
8479 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
8480 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
8481 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
8483 enum reg_class cl;
8484 int regno = REGNO (SUBREG_REG (operands[0]));
8486 if (regno >= FIRST_PSEUDO_REGISTER)
8488 cl = reg_preferred_class (regno);
8489 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
8491 if (regno >= 0 && ! FP_REGNO_P (regno))
8493 mode = SDmode;
8494 operands[0] = SUBREG_REG (operands[0]);
8495 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
8498 if (lra_in_progress
8499 && mode == SDmode
8500 && (REG_P (operands[0])
8501 || (GET_CODE (operands[0]) == SUBREG
8502 && REG_P (SUBREG_REG (operands[0]))))
8503 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
8504 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
8506 int regno = REGNO (GET_CODE (operands[0]) == SUBREG
8507 ? SUBREG_REG (operands[0]) : operands[0]);
8508 enum reg_class cl;
8510 if (regno >= FIRST_PSEUDO_REGISTER)
8512 cl = reg_preferred_class (regno);
8513 gcc_assert (cl != NO_REGS);
8514 regno = ira_class_hard_regs[cl][0];
8516 if (FP_REGNO_P (regno))
8518 if (GET_MODE (operands[1]) != DDmode)
8519 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
8520 emit_insn (gen_movsd_load (operands[0], operands[1]));
8522 else if (INT_REGNO_P (regno))
8523 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
8524 else
8525 gcc_unreachable();
8526 return;
8529 if (reload_in_progress
8530 && mode == SDmode
8531 && cfun->machine->sdmode_stack_slot != NULL_RTX
8532 && MEM_P (operands[0])
8533 && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
8534 && REG_P (operands[1]))
8536 if (FP_REGNO_P (REGNO (operands[1])))
8538 rtx mem = adjust_address_nv (operands[0], DDmode, 0);
8539 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8540 emit_insn (gen_movsd_store (mem, operands[1]));
8542 else if (INT_REGNO_P (REGNO (operands[1])))
8544 rtx mem = operands[0];
8545 if (BYTES_BIG_ENDIAN)
8546 mem = adjust_address_nv (mem, mode, 4);
8547 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8548 emit_insn (gen_movsd_hardfloat (mem, operands[1]));
8550 else
8551 gcc_unreachable();
8552 return;
8554 if (reload_in_progress
8555 && mode == SDmode
8556 && REG_P (operands[0])
8557 && MEM_P (operands[1])
8558 && cfun->machine->sdmode_stack_slot != NULL_RTX
8559 && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
8561 if (FP_REGNO_P (REGNO (operands[0])))
8563 rtx mem = adjust_address_nv (operands[1], DDmode, 0);
8564 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8565 emit_insn (gen_movsd_load (operands[0], mem));
8567 else if (INT_REGNO_P (REGNO (operands[0])))
8569 rtx mem = operands[1];
8570 if (BYTES_BIG_ENDIAN)
8571 mem = adjust_address_nv (mem, mode, 4);
8572 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8573 emit_insn (gen_movsd_hardfloat (operands[0], mem));
8575 else
8576 gcc_unreachable();
8577 return;
8580 /* FIXME: In the long term, this switch statement should go away
8581 and be replaced by a sequence of tests based on things like
8582 mode == Pmode. */
8583 switch (mode)
8585 case HImode:
8586 case QImode:
8587 if (CONSTANT_P (operands[1])
8588 && GET_CODE (operands[1]) != CONST_INT)
8589 operands[1] = force_const_mem (mode, operands[1]);
8590 break;
8592 case TFmode:
8593 case TDmode:
8594 rs6000_eliminate_indexed_memrefs (operands);
8595 /* fall through */
8597 case DFmode:
8598 case DDmode:
8599 case SFmode:
8600 case SDmode:
8601 if (CONSTANT_P (operands[1])
8602 && ! easy_fp_constant (operands[1], mode))
8603 operands[1] = force_const_mem (mode, operands[1]);
8604 break;
8606 case V16QImode:
8607 case V8HImode:
8608 case V4SFmode:
8609 case V4SImode:
8610 case V4HImode:
8611 case V2SFmode:
8612 case V2SImode:
8613 case V1DImode:
8614 case V2DFmode:
8615 case V2DImode:
8616 case V1TImode:
8617 if (CONSTANT_P (operands[1])
8618 && !easy_vector_constant (operands[1], mode))
8619 operands[1] = force_const_mem (mode, operands[1]);
8620 break;
8622 case SImode:
8623 case DImode:
8624 /* Use default pattern for address of ELF small data */
8625 if (TARGET_ELF
8626 && mode == Pmode
8627 && DEFAULT_ABI == ABI_V4
8628 && (GET_CODE (operands[1]) == SYMBOL_REF
8629 || GET_CODE (operands[1]) == CONST)
8630 && small_data_operand (operands[1], mode))
8632 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8633 return;
8636 if (DEFAULT_ABI == ABI_V4
8637 && mode == Pmode && mode == SImode
8638 && flag_pic == 1 && got_operand (operands[1], mode))
8640 emit_insn (gen_movsi_got (operands[0], operands[1]));
8641 return;
8644 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
8645 && TARGET_NO_TOC
8646 && ! flag_pic
8647 && mode == Pmode
8648 && CONSTANT_P (operands[1])
8649 && GET_CODE (operands[1]) != HIGH
8650 && GET_CODE (operands[1]) != CONST_INT)
8652 rtx target = (!can_create_pseudo_p ()
8653 ? operands[0]
8654 : gen_reg_rtx (mode));
8656 /* If this is a function address on -mcall-aixdesc,
8657 convert it to the address of the descriptor. */
8658 if (DEFAULT_ABI == ABI_AIX
8659 && GET_CODE (operands[1]) == SYMBOL_REF
8660 && XSTR (operands[1], 0)[0] == '.')
8662 const char *name = XSTR (operands[1], 0);
8663 rtx new_ref;
8664 while (*name == '.')
8665 name++;
8666 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
8667 CONSTANT_POOL_ADDRESS_P (new_ref)
8668 = CONSTANT_POOL_ADDRESS_P (operands[1]);
8669 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
8670 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
8671 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
8672 operands[1] = new_ref;
8675 if (DEFAULT_ABI == ABI_DARWIN)
8677 #if TARGET_MACHO
8678 if (MACHO_DYNAMIC_NO_PIC_P)
8680 /* Take care of any required data indirection. */
8681 operands[1] = rs6000_machopic_legitimize_pic_address (
8682 operands[1], mode, operands[0]);
8683 if (operands[0] != operands[1])
8684 emit_insn (gen_rtx_SET (VOIDmode,
8685 operands[0], operands[1]));
8686 return;
8688 #endif
8689 emit_insn (gen_macho_high (target, operands[1]));
8690 emit_insn (gen_macho_low (operands[0], target, operands[1]));
8691 return;
8694 emit_insn (gen_elf_high (target, operands[1]));
8695 emit_insn (gen_elf_low (operands[0], target, operands[1]));
8696 return;
8699 /* If this is a SYMBOL_REF that refers to a constant pool entry,
8700 and we have put it in the TOC, we just need to make a TOC-relative
8701 reference to it. */
8702 if (TARGET_TOC
8703 && GET_CODE (operands[1]) == SYMBOL_REF
8704 && use_toc_relative_ref (operands[1]))
8705 operands[1] = create_TOC_reference (operands[1], operands[0]);
8706 else if (mode == Pmode
8707 && CONSTANT_P (operands[1])
8708 && GET_CODE (operands[1]) != HIGH
8709 && ((GET_CODE (operands[1]) != CONST_INT
8710 && ! easy_fp_constant (operands[1], mode))
8711 || (GET_CODE (operands[1]) == CONST_INT
8712 && (num_insns_constant (operands[1], mode)
8713 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
8714 || (GET_CODE (operands[0]) == REG
8715 && FP_REGNO_P (REGNO (operands[0]))))
8716 && !toc_relative_expr_p (operands[1], false)
8717 && (TARGET_CMODEL == CMODEL_SMALL
8718 || can_create_pseudo_p ()
8719 || (REG_P (operands[0])
8720 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
8723 #if TARGET_MACHO
8724 /* Darwin uses a special PIC legitimizer. */
8725 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
8727 operands[1] =
8728 rs6000_machopic_legitimize_pic_address (operands[1], mode,
8729 operands[0]);
8730 if (operands[0] != operands[1])
8731 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8732 return;
8734 #endif
8736 /* If we are to limit the number of things we put in the TOC and
8737 this is a symbol plus a constant we can add in one insn,
8738 just put the symbol in the TOC and add the constant. Don't do
8739 this if reload is in progress. */
8740 if (GET_CODE (operands[1]) == CONST
8741 && TARGET_NO_SUM_IN_TOC && ! reload_in_progress
8742 && GET_CODE (XEXP (operands[1], 0)) == PLUS
8743 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
8744 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
8745 || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF)
8746 && ! side_effects_p (operands[0]))
8748 rtx sym =
8749 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
8750 rtx other = XEXP (XEXP (operands[1], 0), 1);
8752 sym = force_reg (mode, sym);
8753 emit_insn (gen_add3_insn (operands[0], sym, other));
8754 return;
8757 operands[1] = force_const_mem (mode, operands[1]);
8759 if (TARGET_TOC
8760 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
8761 && constant_pool_expr_p (XEXP (operands[1], 0))
8762 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (
8763 get_pool_constant (XEXP (operands[1], 0)),
8764 get_pool_mode (XEXP (operands[1], 0))))
8766 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
8767 operands[0]);
8768 operands[1] = gen_const_mem (mode, tocref);
8769 set_mem_alias_set (operands[1], get_TOC_alias_set ());
8772 break;
8774 case TImode:
8775 if (!VECTOR_MEM_VSX_P (TImode))
8776 rs6000_eliminate_indexed_memrefs (operands);
8777 break;
8779 case PTImode:
8780 rs6000_eliminate_indexed_memrefs (operands);
8781 break;
8783 default:
8784 fatal_insn ("bad move", gen_rtx_SET (VOIDmode, dest, source));
8787 /* Above, we may have called force_const_mem which may have returned
8788 an invalid address. If we can, fix this up; otherwise, reload will
8789 have to deal with it. */
8790 if (GET_CODE (operands[1]) == MEM && ! reload_in_progress)
8791 operands[1] = validize_mem (operands[1]);
8793 emit_set:
8794 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8797 /* Return true if a structure, union or array containing FIELD should be
8798 accessed using `BLKMODE'.
8800 For the SPE, simd types are V2SI, and gcc can be tempted to put the
8801 entire thing in a DI and use subregs to access the internals.
8802 store_bit_field() will force (subreg:DI (reg:V2SI x))'s to the
8803 back-end. Because a single GPR can hold a V2SI, but not a DI, the
8804 best thing to do is set structs to BLKmode and avoid Severe Tire
8805 Damage.
8807 On e500 v2, DF and DI modes suffer from the same anomaly. DF can
8808 fit into 1, whereas DI still needs two. */
8810 static bool
8811 rs6000_member_type_forces_blk (const_tree field, machine_mode mode)
8813 return ((TARGET_SPE && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
8814 || (TARGET_E500_DOUBLE && mode == DFmode));
8817 /* Nonzero if we can use a floating-point register to pass this arg. */
8818 #define USE_FP_FOR_ARG_P(CUM,MODE) \
8819 (SCALAR_FLOAT_MODE_P (MODE) \
8820 && (CUM)->fregno <= FP_ARG_MAX_REG \
8821 && TARGET_HARD_FLOAT && TARGET_FPRS)
8823 /* Nonzero if we can use an AltiVec register to pass this arg. */
8824 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
8825 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
8826 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
8827 && TARGET_ALTIVEC_ABI \
8828 && (NAMED))
8830 /* Walk down the type tree of TYPE counting consecutive base elements.
8831 If *MODEP is VOIDmode, then set it to the first valid floating point
8832 or vector type. If a non-floating point or vector type is found, or
8833 if a floating point or vector type that doesn't match a non-VOIDmode
8834 *MODEP is found, then return -1, otherwise return the count in the
8835 sub-tree. */
8837 static int
8838 rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
8840 machine_mode mode;
8841 HOST_WIDE_INT size;
8843 switch (TREE_CODE (type))
8845 case REAL_TYPE:
8846 mode = TYPE_MODE (type);
8847 if (!SCALAR_FLOAT_MODE_P (mode))
8848 return -1;
8850 if (*modep == VOIDmode)
8851 *modep = mode;
8853 if (*modep == mode)
8854 return 1;
8856 break;
8858 case COMPLEX_TYPE:
8859 mode = TYPE_MODE (TREE_TYPE (type));
8860 if (!SCALAR_FLOAT_MODE_P (mode))
8861 return -1;
8863 if (*modep == VOIDmode)
8864 *modep = mode;
8866 if (*modep == mode)
8867 return 2;
8869 break;
8871 case VECTOR_TYPE:
8872 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
8873 return -1;
8875 /* Use V4SImode as representative of all 128-bit vector types. */
8876 size = int_size_in_bytes (type);
8877 switch (size)
8879 case 16:
8880 mode = V4SImode;
8881 break;
8882 default:
8883 return -1;
8886 if (*modep == VOIDmode)
8887 *modep = mode;
8889 /* Vector modes are considered to be opaque: two vectors are
8890 equivalent for the purposes of being homogeneous aggregates
8891 if they are the same size. */
8892 if (*modep == mode)
8893 return 1;
8895 break;
8897 case ARRAY_TYPE:
8899 int count;
8900 tree index = TYPE_DOMAIN (type);
8902 /* Can't handle incomplete types nor sizes that are not
8903 fixed. */
8904 if (!COMPLETE_TYPE_P (type)
8905 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
8906 return -1;
8908 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
8909 if (count == -1
8910 || !index
8911 || !TYPE_MAX_VALUE (index)
8912 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
8913 || !TYPE_MIN_VALUE (index)
8914 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
8915 || count < 0)
8916 return -1;
8918 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
8919 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
8921 /* There must be no padding. */
8922 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
8923 return -1;
8925 return count;
8928 case RECORD_TYPE:
8930 int count = 0;
8931 int sub_count;
8932 tree field;
8934 /* Can't handle incomplete types nor sizes that are not
8935 fixed. */
8936 if (!COMPLETE_TYPE_P (type)
8937 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
8938 return -1;
8940 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
8942 if (TREE_CODE (field) != FIELD_DECL)
8943 continue;
8945 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
8946 if (sub_count < 0)
8947 return -1;
8948 count += sub_count;
8951 /* There must be no padding. */
8952 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
8953 return -1;
8955 return count;
8958 case UNION_TYPE:
8959 case QUAL_UNION_TYPE:
8961 /* These aren't very interesting except in a degenerate case. */
8962 int count = 0;
8963 int sub_count;
8964 tree field;
8966 /* Can't handle incomplete types nor sizes that are not
8967 fixed. */
8968 if (!COMPLETE_TYPE_P (type)
8969 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
8970 return -1;
8972 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
8974 if (TREE_CODE (field) != FIELD_DECL)
8975 continue;
8977 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
8978 if (sub_count < 0)
8979 return -1;
8980 count = count > sub_count ? count : sub_count;
8983 /* There must be no padding. */
8984 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
8985 return -1;
8987 return count;
8990 default:
8991 break;
8994 return -1;
8997 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
8998 float or vector aggregate that shall be passed in FP/vector registers
8999 according to the ELFv2 ABI, return the homogeneous element mode in
9000 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
9002 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
9004 static bool
9005 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
9006 machine_mode *elt_mode,
9007 int *n_elts)
9009 /* Note that we do not accept complex types at the top level as
9010 homogeneous aggregates; these types are handled via the
9011 targetm.calls.split_complex_arg mechanism. Complex types
9012 can be elements of homogeneous aggregates, however. */
9013 if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type))
9015 machine_mode field_mode = VOIDmode;
9016 int field_count = rs6000_aggregate_candidate (type, &field_mode);
9018 if (field_count > 0)
9020 int n_regs = (SCALAR_FLOAT_MODE_P (field_mode)?
9021 (GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
9023 /* The ELFv2 ABI allows homogeneous aggregates to occupy
9024 up to AGGR_ARG_NUM_REG registers. */
9025 if (field_count * n_regs <= AGGR_ARG_NUM_REG)
9027 if (elt_mode)
9028 *elt_mode = field_mode;
9029 if (n_elts)
9030 *n_elts = field_count;
9031 return true;
9036 if (elt_mode)
9037 *elt_mode = mode;
9038 if (n_elts)
9039 *n_elts = 1;
9040 return false;
9043 /* Return a nonzero value to say to return the function value in
9044 memory, just as large structures are always returned. TYPE will be
9045 the data type of the value, and FNTYPE will be the type of the
9046 function doing the returning, or @code{NULL} for libcalls.
9048 The AIX ABI for the RS/6000 specifies that all structures are
9049 returned in memory. The Darwin ABI does the same.
9051 For the Darwin 64 Bit ABI, a function result can be returned in
9052 registers or in memory, depending on the size of the return data
9053 type. If it is returned in registers, the value occupies the same
9054 registers as it would if it were the first and only function
9055 argument. Otherwise, the function places its result in memory at
9056 the location pointed to by GPR3.
9058 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
9059 but a draft put them in memory, and GCC used to implement the draft
9060 instead of the final standard. Therefore, aix_struct_return
9061 controls this instead of DEFAULT_ABI; V.4 targets needing backward
9062 compatibility can change DRAFT_V4_STRUCT_RET to override the
9063 default, and -m switches get the final word. See
9064 rs6000_option_override_internal for more details.
9066 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
9067 long double support is enabled. These values are returned in memory.
9069 int_size_in_bytes returns -1 for variable size objects, which go in
9070 memory always. The cast to unsigned makes -1 > 8. */
9072 static bool
9073 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9075 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
9076 if (TARGET_MACHO
9077 && rs6000_darwin64_abi
9078 && TREE_CODE (type) == RECORD_TYPE
9079 && int_size_in_bytes (type) > 0)
9081 CUMULATIVE_ARGS valcum;
9082 rtx valret;
9084 valcum.words = 0;
9085 valcum.fregno = FP_ARG_MIN_REG;
9086 valcum.vregno = ALTIVEC_ARG_MIN_REG;
9087 /* Do a trial code generation as if this were going to be passed
9088 as an argument; if any part goes in memory, we return NULL. */
9089 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
9090 if (valret)
9091 return false;
9092 /* Otherwise fall through to more conventional ABI rules. */
9095 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
9096 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
9097 NULL, NULL))
9098 return false;
9100 /* The ELFv2 ABI returns aggregates up to 16B in registers */
9101 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
9102 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
9103 return false;
9105 if (AGGREGATE_TYPE_P (type)
9106 && (aix_struct_return
9107 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
9108 return true;
9110 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
9111 modes only exist for GCC vector types if -maltivec. */
9112 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
9113 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
9114 return false;
9116 /* Return synthetic vectors in memory. */
9117 if (TREE_CODE (type) == VECTOR_TYPE
9118 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
9120 static bool warned_for_return_big_vectors = false;
9121 if (!warned_for_return_big_vectors)
9123 warning (0, "GCC vector returned by reference: "
9124 "non-standard ABI extension with no compatibility guarantee");
9125 warned_for_return_big_vectors = true;
9127 return true;
9130 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD && TYPE_MODE (type) == TFmode)
9131 return true;
9133 return false;
9136 /* Specify whether values returned in registers should be at the most
9137 significant end of a register. We want aggregates returned by
9138 value to match the way aggregates are passed to functions. */
9140 static bool
9141 rs6000_return_in_msb (const_tree valtype)
9143 return (DEFAULT_ABI == ABI_ELFv2
9144 && BYTES_BIG_ENDIAN
9145 && AGGREGATE_TYPE_P (valtype)
9146 && FUNCTION_ARG_PADDING (TYPE_MODE (valtype), valtype) == upward);
9149 #ifdef HAVE_AS_GNU_ATTRIBUTE
9150 /* Return TRUE if a call to function FNDECL may be one that
9151 potentially affects the function calling ABI of the object file. */
9153 static bool
9154 call_ABI_of_interest (tree fndecl)
9156 if (symtab->state == EXPANSION)
9158 struct cgraph_node *c_node;
9160 /* Libcalls are always interesting. */
9161 if (fndecl == NULL_TREE)
9162 return true;
9164 /* Any call to an external function is interesting. */
9165 if (DECL_EXTERNAL (fndecl))
9166 return true;
9168 /* Interesting functions that we are emitting in this object file. */
9169 c_node = cgraph_node::get (fndecl);
9170 c_node = c_node->ultimate_alias_target ();
9171 return !c_node->only_called_directly_p ();
9173 return false;
9175 #endif
9177 /* Initialize a variable CUM of type CUMULATIVE_ARGS
9178 for a call to a function whose data type is FNTYPE.
9179 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
9181 For incoming args we set the number of arguments in the prototype large
9182 so we never return a PARALLEL. */
9184 void
9185 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
9186 rtx libname ATTRIBUTE_UNUSED, int incoming,
9187 int libcall, int n_named_args,
9188 tree fndecl ATTRIBUTE_UNUSED,
9189 machine_mode return_mode ATTRIBUTE_UNUSED)
9191 static CUMULATIVE_ARGS zero_cumulative;
9193 *cum = zero_cumulative;
9194 cum->words = 0;
9195 cum->fregno = FP_ARG_MIN_REG;
9196 cum->vregno = ALTIVEC_ARG_MIN_REG;
9197 cum->prototype = (fntype && prototype_p (fntype));
9198 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
9199 ? CALL_LIBCALL : CALL_NORMAL);
9200 cum->sysv_gregno = GP_ARG_MIN_REG;
9201 cum->stdarg = stdarg_p (fntype);
9203 cum->nargs_prototype = 0;
9204 if (incoming || cum->prototype)
9205 cum->nargs_prototype = n_named_args;
9207 /* Check for a longcall attribute. */
9208 if ((!fntype && rs6000_default_long_calls)
9209 || (fntype
9210 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
9211 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
9212 cum->call_cookie |= CALL_LONG;
9214 if (TARGET_DEBUG_ARG)
9216 fprintf (stderr, "\ninit_cumulative_args:");
9217 if (fntype)
9219 tree ret_type = TREE_TYPE (fntype);
9220 fprintf (stderr, " ret code = %s,",
9221 get_tree_code_name (TREE_CODE (ret_type)));
9224 if (cum->call_cookie & CALL_LONG)
9225 fprintf (stderr, " longcall,");
9227 fprintf (stderr, " proto = %d, nargs = %d\n",
9228 cum->prototype, cum->nargs_prototype);
9231 #ifdef HAVE_AS_GNU_ATTRIBUTE
9232 if (DEFAULT_ABI == ABI_V4)
9234 cum->escapes = call_ABI_of_interest (fndecl);
9235 if (cum->escapes)
9237 tree return_type;
9239 if (fntype)
9241 return_type = TREE_TYPE (fntype);
9242 return_mode = TYPE_MODE (return_type);
9244 else
9245 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
9247 if (return_type != NULL)
9249 if (TREE_CODE (return_type) == RECORD_TYPE
9250 && TYPE_TRANSPARENT_AGGR (return_type))
9252 return_type = TREE_TYPE (first_field (return_type));
9253 return_mode = TYPE_MODE (return_type);
9255 if (AGGREGATE_TYPE_P (return_type)
9256 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
9257 <= 8))
9258 rs6000_returns_struct = true;
9260 if (SCALAR_FLOAT_MODE_P (return_mode))
9261 rs6000_passes_float = true;
9262 else if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode)
9263 || SPE_VECTOR_MODE (return_mode))
9264 rs6000_passes_vector = true;
9267 #endif
9269 if (fntype
9270 && !TARGET_ALTIVEC
9271 && TARGET_ALTIVEC_ABI
9272 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
9274 error ("cannot return value in vector register because"
9275 " altivec instructions are disabled, use -maltivec"
9276 " to enable them");
9280 /* Return true if TYPE must be passed on the stack and not in registers. */
9282 static bool
9283 rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
9285 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
9286 return must_pass_in_stack_var_size (mode, type);
9287 else
9288 return must_pass_in_stack_var_size_or_pad (mode, type);
9291 /* If defined, a C expression which determines whether, and in which
9292 direction, to pad out an argument with extra space. The value
9293 should be of type `enum direction': either `upward' to pad above
9294 the argument, `downward' to pad below, or `none' to inhibit
9295 padding.
9297 For the AIX ABI structs are always stored left shifted in their
9298 argument slot. */
9300 enum direction
9301 function_arg_padding (machine_mode mode, const_tree type)
9303 #ifndef AGGREGATE_PADDING_FIXED
9304 #define AGGREGATE_PADDING_FIXED 0
9305 #endif
9306 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
9307 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
9308 #endif
9310 if (!AGGREGATE_PADDING_FIXED)
9312 /* GCC used to pass structures of the same size as integer types as
9313 if they were in fact integers, ignoring FUNCTION_ARG_PADDING.
9314 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
9315 passed padded downward, except that -mstrict-align further
9316 muddied the water in that multi-component structures of 2 and 4
9317 bytes in size were passed padded upward.
9319 The following arranges for best compatibility with previous
9320 versions of gcc, but removes the -mstrict-align dependency. */
9321 if (BYTES_BIG_ENDIAN)
9323 HOST_WIDE_INT size = 0;
9325 if (mode == BLKmode)
9327 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
9328 size = int_size_in_bytes (type);
9330 else
9331 size = GET_MODE_SIZE (mode);
9333 if (size == 1 || size == 2 || size == 4)
9334 return downward;
9336 return upward;
9339 if (AGGREGATES_PAD_UPWARD_ALWAYS)
9341 if (type != 0 && AGGREGATE_TYPE_P (type))
9342 return upward;
9345 /* Fall back to the default. */
9346 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
9349 /* If defined, a C expression that gives the alignment boundary, in bits,
9350 of an argument with the specified mode and type. If it is not defined,
9351 PARM_BOUNDARY is used for all arguments.
9353 V.4 wants long longs and doubles to be double word aligned. Just
9354 testing the mode size is a boneheaded way to do this as it means
9355 that other types such as complex int are also double word aligned.
9356 However, we're stuck with this because changing the ABI might break
9357 existing library interfaces.
9359 Doubleword align SPE vectors.
9360 Quadword align Altivec/VSX vectors.
9361 Quadword align large synthetic vector types. */
9363 static unsigned int
9364 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
9366 machine_mode elt_mode;
9367 int n_elts;
9369 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
9371 if (DEFAULT_ABI == ABI_V4
9372 && (GET_MODE_SIZE (mode) == 8
9373 || (TARGET_HARD_FLOAT
9374 && TARGET_FPRS
9375 && (mode == TFmode || mode == TDmode))))
9376 return 64;
9377 else if (SPE_VECTOR_MODE (mode)
9378 || (type && TREE_CODE (type) == VECTOR_TYPE
9379 && int_size_in_bytes (type) >= 8
9380 && int_size_in_bytes (type) < 16))
9381 return 64;
9382 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
9383 || (type && TREE_CODE (type) == VECTOR_TYPE
9384 && int_size_in_bytes (type) >= 16))
9385 return 128;
9387 /* Aggregate types that need > 8 byte alignment are quadword-aligned
9388 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
9389 -mcompat-align-parm is used. */
9390 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
9391 || DEFAULT_ABI == ABI_ELFv2)
9392 && type && TYPE_ALIGN (type) > 64)
9394 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
9395 or homogeneous float/vector aggregates here. We already handled
9396 vector aggregates above, but still need to check for float here. */
9397 bool aggregate_p = (AGGREGATE_TYPE_P (type)
9398 && !SCALAR_FLOAT_MODE_P (elt_mode));
9400 /* We used to check for BLKmode instead of the above aggregate type
9401 check. Warn when this results in any difference to the ABI. */
9402 if (aggregate_p != (mode == BLKmode))
9404 static bool warned;
9405 if (!warned && warn_psabi)
9407 warned = true;
9408 inform (input_location,
9409 "the ABI of passing aggregates with %d-byte alignment"
9410 " has changed in GCC 5",
9411 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
9415 if (aggregate_p)
9416 return 128;
9419 /* Similar for the Darwin64 ABI. Note that for historical reasons we
9420 implement the "aggregate type" check as a BLKmode check here; this
9421 means certain aggregate types are in fact not aligned. */
9422 if (TARGET_MACHO && rs6000_darwin64_abi
9423 && mode == BLKmode
9424 && type && TYPE_ALIGN (type) > 64)
9425 return 128;
9427 return PARM_BOUNDARY;
9430 /* The offset in words to the start of the parameter save area. */
9432 static unsigned int
9433 rs6000_parm_offset (void)
9435 return (DEFAULT_ABI == ABI_V4 ? 2
9436 : DEFAULT_ABI == ABI_ELFv2 ? 4
9437 : 6);
9440 /* For a function parm of MODE and TYPE, return the starting word in
9441 the parameter area. NWORDS of the parameter area are already used. */
9443 static unsigned int
9444 rs6000_parm_start (machine_mode mode, const_tree type,
9445 unsigned int nwords)
9447 unsigned int align;
9449 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
9450 return nwords + (-(rs6000_parm_offset () + nwords) & align);
9453 /* Compute the size (in words) of a function argument. */
9455 static unsigned long
9456 rs6000_arg_size (machine_mode mode, const_tree type)
9458 unsigned long size;
9460 if (mode != BLKmode)
9461 size = GET_MODE_SIZE (mode);
9462 else
9463 size = int_size_in_bytes (type);
9465 if (TARGET_32BIT)
9466 return (size + 3) >> 2;
9467 else
9468 return (size + 7) >> 3;
9471 /* Use this to flush pending int fields. */
9473 static void
9474 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
9475 HOST_WIDE_INT bitpos, int final)
9477 unsigned int startbit, endbit;
9478 int intregs, intoffset;
9479 machine_mode mode;
9481 /* Handle the situations where a float is taking up the first half
9482 of the GPR, and the other half is empty (typically due to
9483 alignment restrictions). We can detect this by a 8-byte-aligned
9484 int field, or by seeing that this is the final flush for this
9485 argument. Count the word and continue on. */
9486 if (cum->floats_in_gpr == 1
9487 && (cum->intoffset % 64 == 0
9488 || (cum->intoffset == -1 && final)))
9490 cum->words++;
9491 cum->floats_in_gpr = 0;
9494 if (cum->intoffset == -1)
9495 return;
9497 intoffset = cum->intoffset;
9498 cum->intoffset = -1;
9499 cum->floats_in_gpr = 0;
9501 if (intoffset % BITS_PER_WORD != 0)
9503 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
9504 MODE_INT, 0);
9505 if (mode == BLKmode)
9507 /* We couldn't find an appropriate mode, which happens,
9508 e.g., in packed structs when there are 3 bytes to load.
9509 Back intoffset back to the beginning of the word in this
9510 case. */
9511 intoffset = intoffset & -BITS_PER_WORD;
9515 startbit = intoffset & -BITS_PER_WORD;
9516 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
9517 intregs = (endbit - startbit) / BITS_PER_WORD;
9518 cum->words += intregs;
9519 /* words should be unsigned. */
9520 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
9522 int pad = (endbit/BITS_PER_WORD) - cum->words;
9523 cum->words += pad;
9527 /* The darwin64 ABI calls for us to recurse down through structs,
9528 looking for elements passed in registers. Unfortunately, we have
9529 to track int register count here also because of misalignments
9530 in powerpc alignment mode. */
9532 static void
9533 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
9534 const_tree type,
9535 HOST_WIDE_INT startbitpos)
9537 tree f;
9539 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
9540 if (TREE_CODE (f) == FIELD_DECL)
9542 HOST_WIDE_INT bitpos = startbitpos;
9543 tree ftype = TREE_TYPE (f);
9544 machine_mode mode;
9545 if (ftype == error_mark_node)
9546 continue;
9547 mode = TYPE_MODE (ftype);
9549 if (DECL_SIZE (f) != 0
9550 && tree_fits_uhwi_p (bit_position (f)))
9551 bitpos += int_bit_position (f);
9553 /* ??? FIXME: else assume zero offset. */
9555 if (TREE_CODE (ftype) == RECORD_TYPE)
9556 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
9557 else if (USE_FP_FOR_ARG_P (cum, mode))
9559 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
9560 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
9561 cum->fregno += n_fpregs;
9562 /* Single-precision floats present a special problem for
9563 us, because they are smaller than an 8-byte GPR, and so
9564 the structure-packing rules combined with the standard
9565 varargs behavior mean that we want to pack float/float
9566 and float/int combinations into a single register's
9567 space. This is complicated by the arg advance flushing,
9568 which works on arbitrarily large groups of int-type
9569 fields. */
9570 if (mode == SFmode)
9572 if (cum->floats_in_gpr == 1)
9574 /* Two floats in a word; count the word and reset
9575 the float count. */
9576 cum->words++;
9577 cum->floats_in_gpr = 0;
9579 else if (bitpos % 64 == 0)
9581 /* A float at the beginning of an 8-byte word;
9582 count it and put off adjusting cum->words until
9583 we see if a arg advance flush is going to do it
9584 for us. */
9585 cum->floats_in_gpr++;
9587 else
9589 /* The float is at the end of a word, preceded
9590 by integer fields, so the arg advance flush
9591 just above has already set cum->words and
9592 everything is taken care of. */
9595 else
9596 cum->words += n_fpregs;
9598 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
9600 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
9601 cum->vregno++;
9602 cum->words += 2;
9604 else if (cum->intoffset == -1)
9605 cum->intoffset = bitpos;
9609 /* Check for an item that needs to be considered specially under the darwin 64
9610 bit ABI. These are record types where the mode is BLK or the structure is
9611 8 bytes in size. */
9612 static int
9613 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
9615 return rs6000_darwin64_abi
9616 && ((mode == BLKmode
9617 && TREE_CODE (type) == RECORD_TYPE
9618 && int_size_in_bytes (type) > 0)
9619 || (type && TREE_CODE (type) == RECORD_TYPE
9620 && int_size_in_bytes (type) == 8)) ? 1 : 0;
9623 /* Update the data in CUM to advance over an argument
9624 of mode MODE and data type TYPE.
9625 (TYPE is null for libcalls where that information may not be available.)
9627 Note that for args passed by reference, function_arg will be called
9628 with MODE and TYPE set to that of the pointer to the arg, not the arg
9629 itself. */
9631 static void
9632 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
9633 const_tree type, bool named, int depth)
9635 machine_mode elt_mode;
9636 int n_elts;
9638 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
9640 /* Only tick off an argument if we're not recursing. */
9641 if (depth == 0)
9642 cum->nargs_prototype--;
9644 #ifdef HAVE_AS_GNU_ATTRIBUTE
9645 if (DEFAULT_ABI == ABI_V4
9646 && cum->escapes)
9648 if (SCALAR_FLOAT_MODE_P (mode))
9649 rs6000_passes_float = true;
9650 else if (named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
9651 rs6000_passes_vector = true;
9652 else if (SPE_VECTOR_MODE (mode)
9653 && !cum->stdarg
9654 && cum->sysv_gregno <= GP_ARG_MAX_REG)
9655 rs6000_passes_vector = true;
9657 #endif
9659 if (TARGET_ALTIVEC_ABI
9660 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
9661 || (type && TREE_CODE (type) == VECTOR_TYPE
9662 && int_size_in_bytes (type) == 16)))
9664 bool stack = false;
9666 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
9668 cum->vregno += n_elts;
9670 if (!TARGET_ALTIVEC)
9671 error ("cannot pass argument in vector register because"
9672 " altivec instructions are disabled, use -maltivec"
9673 " to enable them");
9675 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
9676 even if it is going to be passed in a vector register.
9677 Darwin does the same for variable-argument functions. */
9678 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9679 && TARGET_64BIT)
9680 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
9681 stack = true;
9683 else
9684 stack = true;
9686 if (stack)
9688 int align;
9690 /* Vector parameters must be 16-byte aligned. In 32-bit
9691 mode this means we need to take into account the offset
9692 to the parameter save area. In 64-bit mode, they just
9693 have to start on an even word, since the parameter save
9694 area is 16-byte aligned. */
9695 if (TARGET_32BIT)
9696 align = -(rs6000_parm_offset () + cum->words) & 3;
9697 else
9698 align = cum->words & 1;
9699 cum->words += align + rs6000_arg_size (mode, type);
9701 if (TARGET_DEBUG_ARG)
9703 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
9704 cum->words, align);
9705 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
9706 cum->nargs_prototype, cum->prototype,
9707 GET_MODE_NAME (mode));
9711 else if (TARGET_SPE_ABI && TARGET_SPE && SPE_VECTOR_MODE (mode)
9712 && !cum->stdarg
9713 && cum->sysv_gregno <= GP_ARG_MAX_REG)
9714 cum->sysv_gregno++;
9716 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
9718 int size = int_size_in_bytes (type);
9719 /* Variable sized types have size == -1 and are
9720 treated as if consisting entirely of ints.
9721 Pad to 16 byte boundary if needed. */
9722 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
9723 && (cum->words % 2) != 0)
9724 cum->words++;
9725 /* For varargs, we can just go up by the size of the struct. */
9726 if (!named)
9727 cum->words += (size + 7) / 8;
9728 else
9730 /* It is tempting to say int register count just goes up by
9731 sizeof(type)/8, but this is wrong in a case such as
9732 { int; double; int; } [powerpc alignment]. We have to
9733 grovel through the fields for these too. */
9734 cum->intoffset = 0;
9735 cum->floats_in_gpr = 0;
9736 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
9737 rs6000_darwin64_record_arg_advance_flush (cum,
9738 size * BITS_PER_UNIT, 1);
9740 if (TARGET_DEBUG_ARG)
9742 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
9743 cum->words, TYPE_ALIGN (type), size);
9744 fprintf (stderr,
9745 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
9746 cum->nargs_prototype, cum->prototype,
9747 GET_MODE_NAME (mode));
9750 else if (DEFAULT_ABI == ABI_V4)
9752 if (TARGET_HARD_FLOAT && TARGET_FPRS
9753 && ((TARGET_SINGLE_FLOAT && mode == SFmode)
9754 || (TARGET_DOUBLE_FLOAT && mode == DFmode)
9755 || (mode == TFmode && !TARGET_IEEEQUAD)
9756 || mode == SDmode || mode == DDmode || mode == TDmode))
9758 /* _Decimal128 must use an even/odd register pair. This assumes
9759 that the register number is odd when fregno is odd. */
9760 if (mode == TDmode && (cum->fregno % 2) == 1)
9761 cum->fregno++;
9763 if (cum->fregno + (mode == TFmode || mode == TDmode ? 1 : 0)
9764 <= FP_ARG_V4_MAX_REG)
9765 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
9766 else
9768 cum->fregno = FP_ARG_V4_MAX_REG + 1;
9769 if (mode == DFmode || mode == TFmode
9770 || mode == DDmode || mode == TDmode)
9771 cum->words += cum->words & 1;
9772 cum->words += rs6000_arg_size (mode, type);
9775 else
9777 int n_words = rs6000_arg_size (mode, type);
9778 int gregno = cum->sysv_gregno;
9780 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
9781 (r7,r8) or (r9,r10). As does any other 2 word item such
9782 as complex int due to a historical mistake. */
9783 if (n_words == 2)
9784 gregno += (1 - gregno) & 1;
9786 /* Multi-reg args are not split between registers and stack. */
9787 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
9789 /* Long long and SPE vectors are aligned on the stack.
9790 So are other 2 word items such as complex int due to
9791 a historical mistake. */
9792 if (n_words == 2)
9793 cum->words += cum->words & 1;
9794 cum->words += n_words;
9797 /* Note: continuing to accumulate gregno past when we've started
9798 spilling to the stack indicates the fact that we've started
9799 spilling to the stack to expand_builtin_saveregs. */
9800 cum->sysv_gregno = gregno + n_words;
9803 if (TARGET_DEBUG_ARG)
9805 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
9806 cum->words, cum->fregno);
9807 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
9808 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
9809 fprintf (stderr, "mode = %4s, named = %d\n",
9810 GET_MODE_NAME (mode), named);
9813 else
9815 int n_words = rs6000_arg_size (mode, type);
9816 int start_words = cum->words;
9817 int align_words = rs6000_parm_start (mode, type, start_words);
9819 cum->words = align_words + n_words;
9821 if (SCALAR_FLOAT_MODE_P (elt_mode)
9822 && TARGET_HARD_FLOAT && TARGET_FPRS)
9824 /* _Decimal128 must be passed in an even/odd float register pair.
9825 This assumes that the register number is odd when fregno is
9826 odd. */
9827 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
9828 cum->fregno++;
9829 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
9832 if (TARGET_DEBUG_ARG)
9834 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
9835 cum->words, cum->fregno);
9836 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
9837 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
9838 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
9839 named, align_words - start_words, depth);
9844 static void
9845 rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
9846 const_tree type, bool named)
9848 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
9852 static rtx
9853 spe_build_register_parallel (machine_mode mode, int gregno)
9855 rtx r1, r3, r5, r7;
9857 switch (mode)
9859 case DFmode:
9860 r1 = gen_rtx_REG (DImode, gregno);
9861 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
9862 return gen_rtx_PARALLEL (mode, gen_rtvec (1, r1));
9864 case DCmode:
9865 case TFmode:
9866 r1 = gen_rtx_REG (DImode, gregno);
9867 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
9868 r3 = gen_rtx_REG (DImode, gregno + 2);
9869 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
9870 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r3));
9872 case TCmode:
9873 r1 = gen_rtx_REG (DImode, gregno);
9874 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
9875 r3 = gen_rtx_REG (DImode, gregno + 2);
9876 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
9877 r5 = gen_rtx_REG (DImode, gregno + 4);
9878 r5 = gen_rtx_EXPR_LIST (VOIDmode, r5, GEN_INT (16));
9879 r7 = gen_rtx_REG (DImode, gregno + 6);
9880 r7 = gen_rtx_EXPR_LIST (VOIDmode, r7, GEN_INT (24));
9881 return gen_rtx_PARALLEL (mode, gen_rtvec (4, r1, r3, r5, r7));
9883 default:
9884 gcc_unreachable ();
9888 /* Determine where to put a SIMD argument on the SPE. */
9889 static rtx
9890 rs6000_spe_function_arg (const CUMULATIVE_ARGS *cum, machine_mode mode,
9891 const_tree type)
9893 int gregno = cum->sysv_gregno;
9895 /* On E500 v2, double arithmetic is done on the full 64-bit GPR, but
9896 are passed and returned in a pair of GPRs for ABI compatibility. */
9897 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
9898 || mode == DCmode || mode == TCmode))
9900 int n_words = rs6000_arg_size (mode, type);
9902 /* Doubles go in an odd/even register pair (r5/r6, etc). */
9903 if (mode == DFmode)
9904 gregno += (1 - gregno) & 1;
9906 /* Multi-reg args are not split between registers and stack. */
9907 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
9908 return NULL_RTX;
9910 return spe_build_register_parallel (mode, gregno);
9912 if (cum->stdarg)
9914 int n_words = rs6000_arg_size (mode, type);
9916 /* SPE vectors are put in odd registers. */
9917 if (n_words == 2 && (gregno & 1) == 0)
9918 gregno += 1;
9920 if (gregno + n_words - 1 <= GP_ARG_MAX_REG)
9922 rtx r1, r2;
9923 machine_mode m = SImode;
9925 r1 = gen_rtx_REG (m, gregno);
9926 r1 = gen_rtx_EXPR_LIST (m, r1, const0_rtx);
9927 r2 = gen_rtx_REG (m, gregno + 1);
9928 r2 = gen_rtx_EXPR_LIST (m, r2, GEN_INT (4));
9929 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
9931 else
9932 return NULL_RTX;
9934 else
9936 if (gregno <= GP_ARG_MAX_REG)
9937 return gen_rtx_REG (mode, gregno);
9938 else
9939 return NULL_RTX;
9943 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
9944 structure between cum->intoffset and bitpos to integer registers. */
9946 static void
9947 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
9948 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
9950 machine_mode mode;
9951 unsigned int regno;
9952 unsigned int startbit, endbit;
9953 int this_regno, intregs, intoffset;
9954 rtx reg;
9956 if (cum->intoffset == -1)
9957 return;
9959 intoffset = cum->intoffset;
9960 cum->intoffset = -1;
9962 /* If this is the trailing part of a word, try to only load that
9963 much into the register. Otherwise load the whole register. Note
9964 that in the latter case we may pick up unwanted bits. It's not a
9965 problem at the moment but may wish to revisit. */
9967 if (intoffset % BITS_PER_WORD != 0)
9969 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
9970 MODE_INT, 0);
9971 if (mode == BLKmode)
9973 /* We couldn't find an appropriate mode, which happens,
9974 e.g., in packed structs when there are 3 bytes to load.
9975 Back intoffset back to the beginning of the word in this
9976 case. */
9977 intoffset = intoffset & -BITS_PER_WORD;
9978 mode = word_mode;
9981 else
9982 mode = word_mode;
9984 startbit = intoffset & -BITS_PER_WORD;
9985 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
9986 intregs = (endbit - startbit) / BITS_PER_WORD;
9987 this_regno = cum->words + intoffset / BITS_PER_WORD;
9989 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
9990 cum->use_stack = 1;
9992 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
9993 if (intregs <= 0)
9994 return;
9996 intoffset /= BITS_PER_UNIT;
9999 regno = GP_ARG_MIN_REG + this_regno;
10000 reg = gen_rtx_REG (mode, regno);
10001 rvec[(*k)++] =
10002 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
10004 this_regno += 1;
10005 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
10006 mode = word_mode;
10007 intregs -= 1;
10009 while (intregs > 0);
10012 /* Recursive workhorse for the following. */
10014 static void
10015 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
10016 HOST_WIDE_INT startbitpos, rtx rvec[],
10017 int *k)
10019 tree f;
10021 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
10022 if (TREE_CODE (f) == FIELD_DECL)
10024 HOST_WIDE_INT bitpos = startbitpos;
10025 tree ftype = TREE_TYPE (f);
10026 machine_mode mode;
10027 if (ftype == error_mark_node)
10028 continue;
10029 mode = TYPE_MODE (ftype);
10031 if (DECL_SIZE (f) != 0
10032 && tree_fits_uhwi_p (bit_position (f)))
10033 bitpos += int_bit_position (f);
10035 /* ??? FIXME: else assume zero offset. */
10037 if (TREE_CODE (ftype) == RECORD_TYPE)
10038 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
10039 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
10041 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
10042 #if 0
10043 switch (mode)
10045 case SCmode: mode = SFmode; break;
10046 case DCmode: mode = DFmode; break;
10047 case TCmode: mode = TFmode; break;
10048 default: break;
10050 #endif
10051 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
10052 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
10054 gcc_assert (cum->fregno == FP_ARG_MAX_REG
10055 && (mode == TFmode || mode == TDmode));
10056 /* Long double or _Decimal128 split over regs and memory. */
10057 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
10058 cum->use_stack=1;
10060 rvec[(*k)++]
10061 = gen_rtx_EXPR_LIST (VOIDmode,
10062 gen_rtx_REG (mode, cum->fregno++),
10063 GEN_INT (bitpos / BITS_PER_UNIT));
10064 if (mode == TFmode || mode == TDmode)
10065 cum->fregno++;
10067 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
10069 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
10070 rvec[(*k)++]
10071 = gen_rtx_EXPR_LIST (VOIDmode,
10072 gen_rtx_REG (mode, cum->vregno++),
10073 GEN_INT (bitpos / BITS_PER_UNIT));
10075 else if (cum->intoffset == -1)
10076 cum->intoffset = bitpos;
10080 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
10081 the register(s) to be used for each field and subfield of a struct
10082 being passed by value, along with the offset of where the
10083 register's value may be found in the block. FP fields go in FP
10084 register, vector fields go in vector registers, and everything
10085 else goes in int registers, packed as in memory.
10087 This code is also used for function return values. RETVAL indicates
10088 whether this is the case.
10090 Much of this is taken from the SPARC V9 port, which has a similar
10091 calling convention. */
10093 static rtx
10094 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
10095 bool named, bool retval)
10097 rtx rvec[FIRST_PSEUDO_REGISTER];
10098 int k = 1, kbase = 1;
10099 HOST_WIDE_INT typesize = int_size_in_bytes (type);
10100 /* This is a copy; modifications are not visible to our caller. */
10101 CUMULATIVE_ARGS copy_cum = *orig_cum;
10102 CUMULATIVE_ARGS *cum = &copy_cum;
10104 /* Pad to 16 byte boundary if needed. */
10105 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
10106 && (cum->words % 2) != 0)
10107 cum->words++;
10109 cum->intoffset = 0;
10110 cum->use_stack = 0;
10111 cum->named = named;
10113 /* Put entries into rvec[] for individual FP and vector fields, and
10114 for the chunks of memory that go in int regs. Note we start at
10115 element 1; 0 is reserved for an indication of using memory, and
10116 may or may not be filled in below. */
10117 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
10118 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
10120 /* If any part of the struct went on the stack put all of it there.
10121 This hack is because the generic code for
10122 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
10123 parts of the struct are not at the beginning. */
10124 if (cum->use_stack)
10126 if (retval)
10127 return NULL_RTX; /* doesn't go in registers at all */
10128 kbase = 0;
10129 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10131 if (k > 1 || cum->use_stack)
10132 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
10133 else
10134 return NULL_RTX;
10137 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
10139 static rtx
10140 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
10141 int align_words)
10143 int n_units;
10144 int i, k;
10145 rtx rvec[GP_ARG_NUM_REG + 1];
10147 if (align_words >= GP_ARG_NUM_REG)
10148 return NULL_RTX;
10150 n_units = rs6000_arg_size (mode, type);
10152 /* Optimize the simple case where the arg fits in one gpr, except in
10153 the case of BLKmode due to assign_parms assuming that registers are
10154 BITS_PER_WORD wide. */
10155 if (n_units == 0
10156 || (n_units == 1 && mode != BLKmode))
10157 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10159 k = 0;
10160 if (align_words + n_units > GP_ARG_NUM_REG)
10161 /* Not all of the arg fits in gprs. Say that it goes in memory too,
10162 using a magic NULL_RTX component.
10163 This is not strictly correct. Only some of the arg belongs in
10164 memory, not all of it. However, the normal scheme using
10165 function_arg_partial_nregs can result in unusual subregs, eg.
10166 (subreg:SI (reg:DF) 4), which are not handled well. The code to
10167 store the whole arg to memory is often more efficient than code
10168 to store pieces, and we know that space is available in the right
10169 place for the whole arg. */
10170 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10172 i = 0;
10175 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
10176 rtx off = GEN_INT (i++ * 4);
10177 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10179 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
10181 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
10184 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
10185 but must also be copied into the parameter save area starting at
10186 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
10187 to the GPRs and/or memory. Return the number of elements used. */
10189 static int
10190 rs6000_psave_function_arg (machine_mode mode, const_tree type,
10191 int align_words, rtx *rvec)
10193 int k = 0;
10195 if (align_words < GP_ARG_NUM_REG)
10197 int n_words = rs6000_arg_size (mode, type);
10199 if (align_words + n_words > GP_ARG_NUM_REG
10200 || mode == BLKmode
10201 || (TARGET_32BIT && TARGET_POWERPC64))
10203 /* If this is partially on the stack, then we only
10204 include the portion actually in registers here. */
10205 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
10206 int i = 0;
10208 if (align_words + n_words > GP_ARG_NUM_REG)
10210 /* Not all of the arg fits in gprs. Say that it goes in memory
10211 too, using a magic NULL_RTX component. Also see comment in
10212 rs6000_mixed_function_arg for why the normal
10213 function_arg_partial_nregs scheme doesn't work in this case. */
10214 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10219 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
10220 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
10221 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10223 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
10225 else
10227 /* The whole arg fits in gprs. */
10228 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10229 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
10232 else
10234 /* It's entirely in memory. */
10235 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10238 return k;
10241 /* RVEC is a vector of K components of an argument of mode MODE.
10242 Construct the final function_arg return value from it. */
10244 static rtx
10245 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
10247 gcc_assert (k >= 1);
10249 /* Avoid returning a PARALLEL in the trivial cases. */
10250 if (k == 1)
10252 if (XEXP (rvec[0], 0) == NULL_RTX)
10253 return NULL_RTX;
10255 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
10256 return XEXP (rvec[0], 0);
10259 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
10262 /* Determine where to put an argument to a function.
10263 Value is zero to push the argument on the stack,
10264 or a hard register in which to store the argument.
10266 MODE is the argument's machine mode.
10267 TYPE is the data type of the argument (as a tree).
10268 This is null for libcalls where that information may
10269 not be available.
10270 CUM is a variable of type CUMULATIVE_ARGS which gives info about
10271 the preceding args and about the function being called. It is
10272 not modified in this routine.
10273 NAMED is nonzero if this argument is a named parameter
10274 (otherwise it is an extra parameter matching an ellipsis).
10276 On RS/6000 the first eight words of non-FP are normally in registers
10277 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
10278 Under V.4, the first 8 FP args are in registers.
10280 If this is floating-point and no prototype is specified, we use
10281 both an FP and integer register (or possibly FP reg and stack). Library
10282 functions (when CALL_LIBCALL is set) always have the proper types for args,
10283 so we can pass the FP value just in one register. emit_library_function
10284 doesn't support PARALLEL anyway.
10286 Note that for args passed by reference, function_arg will be called
10287 with MODE and TYPE set to that of the pointer to the arg, not the arg
10288 itself. */
10290 static rtx
10291 rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
10292 const_tree type, bool named)
10294 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10295 enum rs6000_abi abi = DEFAULT_ABI;
10296 machine_mode elt_mode;
10297 int n_elts;
10299 /* Return a marker to indicate whether CR1 needs to set or clear the
10300 bit that V.4 uses to say fp args were passed in registers.
10301 Assume that we don't need the marker for software floating point,
10302 or compiler generated library calls. */
10303 if (mode == VOIDmode)
10305 if (abi == ABI_V4
10306 && (cum->call_cookie & CALL_LIBCALL) == 0
10307 && (cum->stdarg
10308 || (cum->nargs_prototype < 0
10309 && (cum->prototype || TARGET_NO_PROTOTYPE))))
10311 /* For the SPE, we need to crxor CR6 always. */
10312 if (TARGET_SPE_ABI)
10313 return GEN_INT (cum->call_cookie | CALL_V4_SET_FP_ARGS);
10314 else if (TARGET_HARD_FLOAT && TARGET_FPRS)
10315 return GEN_INT (cum->call_cookie
10316 | ((cum->fregno == FP_ARG_MIN_REG)
10317 ? CALL_V4_SET_FP_ARGS
10318 : CALL_V4_CLEAR_FP_ARGS));
10321 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
10324 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10326 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
10328 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
10329 if (rslt != NULL_RTX)
10330 return rslt;
10331 /* Else fall through to usual handling. */
10334 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
10336 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
10337 rtx r, off;
10338 int i, k = 0;
10340 /* Do we also need to pass this argument in the parameter
10341 save area? */
10342 if (TARGET_64BIT && ! cum->prototype)
10344 int align_words = (cum->words + 1) & ~1;
10345 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
10348 /* Describe where this argument goes in the vector registers. */
10349 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
10351 r = gen_rtx_REG (elt_mode, cum->vregno + i);
10352 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
10353 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10356 return rs6000_finish_function_arg (mode, rvec, k);
10358 else if (TARGET_ALTIVEC_ABI
10359 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
10360 || (type && TREE_CODE (type) == VECTOR_TYPE
10361 && int_size_in_bytes (type) == 16)))
10363 if (named || abi == ABI_V4)
10364 return NULL_RTX;
10365 else
10367 /* Vector parameters to varargs functions under AIX or Darwin
10368 get passed in memory and possibly also in GPRs. */
10369 int align, align_words, n_words;
10370 machine_mode part_mode;
10372 /* Vector parameters must be 16-byte aligned. In 32-bit
10373 mode this means we need to take into account the offset
10374 to the parameter save area. In 64-bit mode, they just
10375 have to start on an even word, since the parameter save
10376 area is 16-byte aligned. */
10377 if (TARGET_32BIT)
10378 align = -(rs6000_parm_offset () + cum->words) & 3;
10379 else
10380 align = cum->words & 1;
10381 align_words = cum->words + align;
10383 /* Out of registers? Memory, then. */
10384 if (align_words >= GP_ARG_NUM_REG)
10385 return NULL_RTX;
10387 if (TARGET_32BIT && TARGET_POWERPC64)
10388 return rs6000_mixed_function_arg (mode, type, align_words);
10390 /* The vector value goes in GPRs. Only the part of the
10391 value in GPRs is reported here. */
10392 part_mode = mode;
10393 n_words = rs6000_arg_size (mode, type);
10394 if (align_words + n_words > GP_ARG_NUM_REG)
10395 /* Fortunately, there are only two possibilities, the value
10396 is either wholly in GPRs or half in GPRs and half not. */
10397 part_mode = DImode;
10399 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
10402 else if (TARGET_SPE_ABI && TARGET_SPE
10403 && (SPE_VECTOR_MODE (mode)
10404 || (TARGET_E500_DOUBLE && (mode == DFmode
10405 || mode == DCmode
10406 || mode == TFmode
10407 || mode == TCmode))))
10408 return rs6000_spe_function_arg (cum, mode, type);
10410 else if (abi == ABI_V4)
10412 if (TARGET_HARD_FLOAT && TARGET_FPRS
10413 && ((TARGET_SINGLE_FLOAT && mode == SFmode)
10414 || (TARGET_DOUBLE_FLOAT && mode == DFmode)
10415 || (mode == TFmode && !TARGET_IEEEQUAD)
10416 || mode == SDmode || mode == DDmode || mode == TDmode))
10418 /* _Decimal128 must use an even/odd register pair. This assumes
10419 that the register number is odd when fregno is odd. */
10420 if (mode == TDmode && (cum->fregno % 2) == 1)
10421 cum->fregno++;
10423 if (cum->fregno + (mode == TFmode || mode == TDmode ? 1 : 0)
10424 <= FP_ARG_V4_MAX_REG)
10425 return gen_rtx_REG (mode, cum->fregno);
10426 else
10427 return NULL_RTX;
10429 else
10431 int n_words = rs6000_arg_size (mode, type);
10432 int gregno = cum->sysv_gregno;
10434 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
10435 (r7,r8) or (r9,r10). As does any other 2 word item such
10436 as complex int due to a historical mistake. */
10437 if (n_words == 2)
10438 gregno += (1 - gregno) & 1;
10440 /* Multi-reg args are not split between registers and stack. */
10441 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
10442 return NULL_RTX;
10444 if (TARGET_32BIT && TARGET_POWERPC64)
10445 return rs6000_mixed_function_arg (mode, type,
10446 gregno - GP_ARG_MIN_REG);
10447 return gen_rtx_REG (mode, gregno);
10450 else
10452 int align_words = rs6000_parm_start (mode, type, cum->words);
10454 /* _Decimal128 must be passed in an even/odd float register pair.
10455 This assumes that the register number is odd when fregno is odd. */
10456 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
10457 cum->fregno++;
10459 if (USE_FP_FOR_ARG_P (cum, elt_mode))
10461 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
10462 rtx r, off;
10463 int i, k = 0;
10464 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
10465 int fpr_words;
10467 /* Do we also need to pass this argument in the parameter
10468 save area? */
10469 if (type && (cum->nargs_prototype <= 0
10470 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10471 && TARGET_XL_COMPAT
10472 && align_words >= GP_ARG_NUM_REG)))
10473 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
10475 /* Describe where this argument goes in the fprs. */
10476 for (i = 0; i < n_elts
10477 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
10479 /* Check if the argument is split over registers and memory.
10480 This can only ever happen for long double or _Decimal128;
10481 complex types are handled via split_complex_arg. */
10482 machine_mode fmode = elt_mode;
10483 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
10485 gcc_assert (fmode == TFmode || fmode == TDmode);
10486 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
10489 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
10490 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
10491 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10494 /* If there were not enough FPRs to hold the argument, the rest
10495 usually goes into memory. However, if the current position
10496 is still within the register parameter area, a portion may
10497 actually have to go into GPRs.
10499 Note that it may happen that the portion of the argument
10500 passed in the first "half" of the first GPR was already
10501 passed in the last FPR as well.
10503 For unnamed arguments, we already set up GPRs to cover the
10504 whole argument in rs6000_psave_function_arg, so there is
10505 nothing further to do at this point. */
10506 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
10507 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
10508 && cum->nargs_prototype > 0)
10510 static bool warned;
10512 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
10513 int n_words = rs6000_arg_size (mode, type);
10515 align_words += fpr_words;
10516 n_words -= fpr_words;
10520 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
10521 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
10522 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10524 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
10526 if (!warned && warn_psabi)
10528 warned = true;
10529 inform (input_location,
10530 "the ABI of passing homogeneous float aggregates"
10531 " has changed in GCC 5");
10535 return rs6000_finish_function_arg (mode, rvec, k);
10537 else if (align_words < GP_ARG_NUM_REG)
10539 if (TARGET_32BIT && TARGET_POWERPC64)
10540 return rs6000_mixed_function_arg (mode, type, align_words);
10542 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10544 else
10545 return NULL_RTX;
10549 /* For an arg passed partly in registers and partly in memory, this is
10550 the number of bytes passed in registers. For args passed entirely in
10551 registers or entirely in memory, zero. When an arg is described by a
10552 PARALLEL, perhaps using more than one register type, this function
10553 returns the number of bytes used by the first element of the PARALLEL. */
10555 static int
10556 rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
10557 tree type, bool named)
10559 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10560 bool passed_in_gprs = true;
10561 int ret = 0;
10562 int align_words;
10563 machine_mode elt_mode;
10564 int n_elts;
10566 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10568 if (DEFAULT_ABI == ABI_V4)
10569 return 0;
10571 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
10573 /* If we are passing this arg in the fixed parameter save area
10574 (gprs or memory) as well as VRs, we do not use the partial
10575 bytes mechanism; instead, rs6000_function_arg will return a
10576 PARALLEL including a memory element as necessary. */
10577 if (TARGET_64BIT && ! cum->prototype)
10578 return 0;
10580 /* Otherwise, we pass in VRs only. Check for partial copies. */
10581 passed_in_gprs = false;
10582 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
10583 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
10586 /* In this complicated case we just disable the partial_nregs code. */
10587 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
10588 return 0;
10590 align_words = rs6000_parm_start (mode, type, cum->words);
10592 if (USE_FP_FOR_ARG_P (cum, elt_mode))
10594 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
10596 /* If we are passing this arg in the fixed parameter save area
10597 (gprs or memory) as well as FPRs, we do not use the partial
10598 bytes mechanism; instead, rs6000_function_arg will return a
10599 PARALLEL including a memory element as necessary. */
10600 if (type
10601 && (cum->nargs_prototype <= 0
10602 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10603 && TARGET_XL_COMPAT
10604 && align_words >= GP_ARG_NUM_REG)))
10605 return 0;
10607 /* Otherwise, we pass in FPRs only. Check for partial copies. */
10608 passed_in_gprs = false;
10609 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
10611 /* Compute number of bytes / words passed in FPRs. If there
10612 is still space available in the register parameter area
10613 *after* that amount, a part of the argument will be passed
10614 in GPRs. In that case, the total amount passed in any
10615 registers is equal to the amount that would have been passed
10616 in GPRs if everything were passed there, so we fall back to
10617 the GPR code below to compute the appropriate value. */
10618 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
10619 * MIN (8, GET_MODE_SIZE (elt_mode)));
10620 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
10622 if (align_words + fpr_words < GP_ARG_NUM_REG)
10623 passed_in_gprs = true;
10624 else
10625 ret = fpr;
10629 if (passed_in_gprs
10630 && align_words < GP_ARG_NUM_REG
10631 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
10632 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
10634 if (ret != 0 && TARGET_DEBUG_ARG)
10635 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
10637 return ret;
10640 /* A C expression that indicates when an argument must be passed by
10641 reference. If nonzero for an argument, a copy of that argument is
10642 made in memory and a pointer to the argument is passed instead of
10643 the argument itself. The pointer is passed in whatever way is
10644 appropriate for passing a pointer to that type.
10646 Under V.4, aggregates and long double are passed by reference.
10648 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
10649 reference unless the AltiVec vector extension ABI is in force.
10651 As an extension to all ABIs, variable sized types are passed by
10652 reference. */
10654 static bool
10655 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
10656 machine_mode mode, const_tree type,
10657 bool named ATTRIBUTE_UNUSED)
10659 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD && mode == TFmode)
10661 if (TARGET_DEBUG_ARG)
10662 fprintf (stderr, "function_arg_pass_by_reference: V4 long double\n");
10663 return 1;
10666 if (!type)
10667 return 0;
10669 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
10671 if (TARGET_DEBUG_ARG)
10672 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
10673 return 1;
10676 if (int_size_in_bytes (type) < 0)
10678 if (TARGET_DEBUG_ARG)
10679 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
10680 return 1;
10683 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
10684 modes only exist for GCC vector types if -maltivec. */
10685 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
10687 if (TARGET_DEBUG_ARG)
10688 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
10689 return 1;
10692 /* Pass synthetic vectors in memory. */
10693 if (TREE_CODE (type) == VECTOR_TYPE
10694 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
10696 static bool warned_for_pass_big_vectors = false;
10697 if (TARGET_DEBUG_ARG)
10698 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
10699 if (!warned_for_pass_big_vectors)
10701 warning (0, "GCC vector passed by reference: "
10702 "non-standard ABI extension with no compatibility guarantee");
10703 warned_for_pass_big_vectors = true;
10705 return 1;
10708 return 0;
10711 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
10712 already processes. Return true if the parameter must be passed
10713 (fully or partially) on the stack. */
10715 static bool
10716 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
10718 machine_mode mode;
10719 int unsignedp;
10720 rtx entry_parm;
10722 /* Catch errors. */
10723 if (type == NULL || type == error_mark_node)
10724 return true;
10726 /* Handle types with no storage requirement. */
10727 if (TYPE_MODE (type) == VOIDmode)
10728 return false;
10730 /* Handle complex types. */
10731 if (TREE_CODE (type) == COMPLEX_TYPE)
10732 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
10733 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
10735 /* Handle transparent aggregates. */
10736 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
10737 && TYPE_TRANSPARENT_AGGR (type))
10738 type = TREE_TYPE (first_field (type));
10740 /* See if this arg was passed by invisible reference. */
10741 if (pass_by_reference (get_cumulative_args (args_so_far),
10742 TYPE_MODE (type), type, true))
10743 type = build_pointer_type (type);
10745 /* Find mode as it is passed by the ABI. */
10746 unsignedp = TYPE_UNSIGNED (type);
10747 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
10749 /* If we must pass in stack, we need a stack. */
10750 if (rs6000_must_pass_in_stack (mode, type))
10751 return true;
10753 /* If there is no incoming register, we need a stack. */
10754 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
10755 if (entry_parm == NULL)
10756 return true;
10758 /* Likewise if we need to pass both in registers and on the stack. */
10759 if (GET_CODE (entry_parm) == PARALLEL
10760 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
10761 return true;
10763 /* Also true if we're partially in registers and partially not. */
10764 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
10765 return true;
10767 /* Update info on where next arg arrives in registers. */
10768 rs6000_function_arg_advance (args_so_far, mode, type, true);
10769 return false;
10772 /* Return true if FUN has no prototype, has a variable argument
10773 list, or passes any parameter in memory. */
10775 static bool
10776 rs6000_function_parms_need_stack (tree fun, bool incoming)
10778 tree fntype, result;
10779 CUMULATIVE_ARGS args_so_far_v;
10780 cumulative_args_t args_so_far;
10782 if (!fun)
10783 /* Must be a libcall, all of which only use reg parms. */
10784 return false;
10786 fntype = fun;
10787 if (!TYPE_P (fun))
10788 fntype = TREE_TYPE (fun);
10790 /* Varargs functions need the parameter save area. */
10791 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
10792 return true;
10794 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
10795 args_so_far = pack_cumulative_args (&args_so_far_v);
10797 /* When incoming, we will have been passed the function decl.
10798 It is necessary to use the decl to handle K&R style functions,
10799 where TYPE_ARG_TYPES may not be available. */
10800 if (incoming)
10802 gcc_assert (DECL_P (fun));
10803 result = DECL_RESULT (fun);
10805 else
10806 result = TREE_TYPE (fntype);
10808 if (result && aggregate_value_p (result, fntype))
10810 if (!TYPE_P (result))
10811 result = TREE_TYPE (result);
10812 result = build_pointer_type (result);
10813 rs6000_parm_needs_stack (args_so_far, result);
10816 if (incoming)
10818 tree parm;
10820 for (parm = DECL_ARGUMENTS (fun);
10821 parm && parm != void_list_node;
10822 parm = TREE_CHAIN (parm))
10823 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
10824 return true;
10826 else
10828 function_args_iterator args_iter;
10829 tree arg_type;
10831 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
10832 if (rs6000_parm_needs_stack (args_so_far, arg_type))
10833 return true;
10836 return false;
10839 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
10840 usually a constant depending on the ABI. However, in the ELFv2 ABI
10841 the register parameter area is optional when calling a function that
10842 has a prototype is scope, has no variable argument list, and passes
10843 all parameters in registers. */
10846 rs6000_reg_parm_stack_space (tree fun, bool incoming)
10848 int reg_parm_stack_space;
10850 switch (DEFAULT_ABI)
10852 default:
10853 reg_parm_stack_space = 0;
10854 break;
10856 case ABI_AIX:
10857 case ABI_DARWIN:
10858 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
10859 break;
10861 case ABI_ELFv2:
10862 /* ??? Recomputing this every time is a bit expensive. Is there
10863 a place to cache this information? */
10864 if (rs6000_function_parms_need_stack (fun, incoming))
10865 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
10866 else
10867 reg_parm_stack_space = 0;
10868 break;
10871 return reg_parm_stack_space;
10874 static void
10875 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
10877 int i;
10878 machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
10880 if (nregs == 0)
10881 return;
10883 for (i = 0; i < nregs; i++)
10885 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
10886 if (reload_completed)
10888 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
10889 tem = NULL_RTX;
10890 else
10891 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
10892 i * GET_MODE_SIZE (reg_mode));
10894 else
10895 tem = replace_equiv_address (tem, XEXP (tem, 0));
10897 gcc_assert (tem);
10899 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
10903 /* Perform any needed actions needed for a function that is receiving a
10904 variable number of arguments.
10906 CUM is as above.
10908 MODE and TYPE are the mode and type of the current parameter.
10910 PRETEND_SIZE is a variable that should be set to the amount of stack
10911 that must be pushed by the prolog to pretend that our caller pushed
10914 Normally, this macro will push all remaining incoming registers on the
10915 stack and set PRETEND_SIZE to the length of the registers pushed. */
10917 static void
10918 setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
10919 tree type, int *pretend_size ATTRIBUTE_UNUSED,
10920 int no_rtl)
10922 CUMULATIVE_ARGS next_cum;
10923 int reg_size = TARGET_32BIT ? 4 : 8;
10924 rtx save_area = NULL_RTX, mem;
10925 int first_reg_offset;
10926 alias_set_type set;
10928 /* Skip the last named argument. */
10929 next_cum = *get_cumulative_args (cum);
10930 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
10932 if (DEFAULT_ABI == ABI_V4)
10934 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
10936 if (! no_rtl)
10938 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
10939 HOST_WIDE_INT offset = 0;
10941 /* Try to optimize the size of the varargs save area.
10942 The ABI requires that ap.reg_save_area is doubleword
10943 aligned, but we don't need to allocate space for all
10944 the bytes, only those to which we actually will save
10945 anything. */
10946 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
10947 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
10948 if (TARGET_HARD_FLOAT && TARGET_FPRS
10949 && next_cum.fregno <= FP_ARG_V4_MAX_REG
10950 && cfun->va_list_fpr_size)
10952 if (gpr_reg_num)
10953 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
10954 * UNITS_PER_FP_WORD;
10955 if (cfun->va_list_fpr_size
10956 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
10957 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
10958 else
10959 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
10960 * UNITS_PER_FP_WORD;
10962 if (gpr_reg_num)
10964 offset = -((first_reg_offset * reg_size) & ~7);
10965 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
10967 gpr_reg_num = cfun->va_list_gpr_size;
10968 if (reg_size == 4 && (first_reg_offset & 1))
10969 gpr_reg_num++;
10971 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
10973 else if (fpr_size)
10974 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
10975 * UNITS_PER_FP_WORD
10976 - (int) (GP_ARG_NUM_REG * reg_size);
10978 if (gpr_size + fpr_size)
10980 rtx reg_save_area
10981 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
10982 gcc_assert (GET_CODE (reg_save_area) == MEM);
10983 reg_save_area = XEXP (reg_save_area, 0);
10984 if (GET_CODE (reg_save_area) == PLUS)
10986 gcc_assert (XEXP (reg_save_area, 0)
10987 == virtual_stack_vars_rtx);
10988 gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT);
10989 offset += INTVAL (XEXP (reg_save_area, 1));
10991 else
10992 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
10995 cfun->machine->varargs_save_offset = offset;
10996 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
10999 else
11001 first_reg_offset = next_cum.words;
11002 save_area = virtual_incoming_args_rtx;
11004 if (targetm.calls.must_pass_in_stack (mode, type))
11005 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
11008 set = get_varargs_alias_set ();
11009 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
11010 && cfun->va_list_gpr_size)
11012 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
11014 if (va_list_gpr_counter_field)
11015 /* V4 va_list_gpr_size counts number of registers needed. */
11016 n_gpr = cfun->va_list_gpr_size;
11017 else
11018 /* char * va_list instead counts number of bytes needed. */
11019 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
11021 if (nregs > n_gpr)
11022 nregs = n_gpr;
11024 mem = gen_rtx_MEM (BLKmode,
11025 plus_constant (Pmode, save_area,
11026 first_reg_offset * reg_size));
11027 MEM_NOTRAP_P (mem) = 1;
11028 set_mem_alias_set (mem, set);
11029 set_mem_align (mem, BITS_PER_WORD);
11031 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
11032 nregs);
11035 /* Save FP registers if needed. */
11036 if (DEFAULT_ABI == ABI_V4
11037 && TARGET_HARD_FLOAT && TARGET_FPRS
11038 && ! no_rtl
11039 && next_cum.fregno <= FP_ARG_V4_MAX_REG
11040 && cfun->va_list_fpr_size)
11042 int fregno = next_cum.fregno, nregs;
11043 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
11044 rtx lab = gen_label_rtx ();
11045 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
11046 * UNITS_PER_FP_WORD);
11048 emit_jump_insn
11049 (gen_rtx_SET (VOIDmode,
11050 pc_rtx,
11051 gen_rtx_IF_THEN_ELSE (VOIDmode,
11052 gen_rtx_NE (VOIDmode, cr1,
11053 const0_rtx),
11054 gen_rtx_LABEL_REF (VOIDmode, lab),
11055 pc_rtx)));
11057 for (nregs = 0;
11058 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
11059 fregno++, off += UNITS_PER_FP_WORD, nregs++)
11061 mem = gen_rtx_MEM ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11062 ? DFmode : SFmode,
11063 plus_constant (Pmode, save_area, off));
11064 MEM_NOTRAP_P (mem) = 1;
11065 set_mem_alias_set (mem, set);
11066 set_mem_align (mem, GET_MODE_ALIGNMENT (
11067 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11068 ? DFmode : SFmode));
11069 emit_move_insn (mem, gen_rtx_REG (
11070 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11071 ? DFmode : SFmode, fregno));
11074 emit_label (lab);
11078 /* Create the va_list data type. */
11080 static tree
11081 rs6000_build_builtin_va_list (void)
11083 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
11085 /* For AIX, prefer 'char *' because that's what the system
11086 header files like. */
11087 if (DEFAULT_ABI != ABI_V4)
11088 return build_pointer_type (char_type_node);
11090 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
11091 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
11092 get_identifier ("__va_list_tag"), record);
11094 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
11095 unsigned_char_type_node);
11096 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
11097 unsigned_char_type_node);
11098 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
11099 every user file. */
11100 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11101 get_identifier ("reserved"), short_unsigned_type_node);
11102 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11103 get_identifier ("overflow_arg_area"),
11104 ptr_type_node);
11105 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11106 get_identifier ("reg_save_area"),
11107 ptr_type_node);
11109 va_list_gpr_counter_field = f_gpr;
11110 va_list_fpr_counter_field = f_fpr;
11112 DECL_FIELD_CONTEXT (f_gpr) = record;
11113 DECL_FIELD_CONTEXT (f_fpr) = record;
11114 DECL_FIELD_CONTEXT (f_res) = record;
11115 DECL_FIELD_CONTEXT (f_ovf) = record;
11116 DECL_FIELD_CONTEXT (f_sav) = record;
11118 TYPE_STUB_DECL (record) = type_decl;
11119 TYPE_NAME (record) = type_decl;
11120 TYPE_FIELDS (record) = f_gpr;
11121 DECL_CHAIN (f_gpr) = f_fpr;
11122 DECL_CHAIN (f_fpr) = f_res;
11123 DECL_CHAIN (f_res) = f_ovf;
11124 DECL_CHAIN (f_ovf) = f_sav;
11126 layout_type (record);
11128 /* The correct type is an array type of one element. */
11129 return build_array_type (record, build_index_type (size_zero_node));
11132 /* Implement va_start. */
11134 static void
11135 rs6000_va_start (tree valist, rtx nextarg)
11137 HOST_WIDE_INT words, n_gpr, n_fpr;
11138 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
11139 tree gpr, fpr, ovf, sav, t;
11141 /* Only SVR4 needs something special. */
11142 if (DEFAULT_ABI != ABI_V4)
11144 std_expand_builtin_va_start (valist, nextarg);
11145 return;
11148 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11149 f_fpr = DECL_CHAIN (f_gpr);
11150 f_res = DECL_CHAIN (f_fpr);
11151 f_ovf = DECL_CHAIN (f_res);
11152 f_sav = DECL_CHAIN (f_ovf);
11154 valist = build_simple_mem_ref (valist);
11155 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11156 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
11157 f_fpr, NULL_TREE);
11158 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
11159 f_ovf, NULL_TREE);
11160 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
11161 f_sav, NULL_TREE);
11163 /* Count number of gp and fp argument registers used. */
11164 words = crtl->args.info.words;
11165 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
11166 GP_ARG_NUM_REG);
11167 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
11168 FP_ARG_NUM_REG);
11170 if (TARGET_DEBUG_ARG)
11171 fprintf (stderr, "va_start: words = "HOST_WIDE_INT_PRINT_DEC", n_gpr = "
11172 HOST_WIDE_INT_PRINT_DEC", n_fpr = "HOST_WIDE_INT_PRINT_DEC"\n",
11173 words, n_gpr, n_fpr);
11175 if (cfun->va_list_gpr_size)
11177 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
11178 build_int_cst (NULL_TREE, n_gpr));
11179 TREE_SIDE_EFFECTS (t) = 1;
11180 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11183 if (cfun->va_list_fpr_size)
11185 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
11186 build_int_cst (NULL_TREE, n_fpr));
11187 TREE_SIDE_EFFECTS (t) = 1;
11188 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11190 #ifdef HAVE_AS_GNU_ATTRIBUTE
11191 if (call_ABI_of_interest (cfun->decl))
11192 rs6000_passes_float = true;
11193 #endif
11196 /* Find the overflow area. */
11197 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
11198 if (words != 0)
11199 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
11200 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
11201 TREE_SIDE_EFFECTS (t) = 1;
11202 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11204 /* If there were no va_arg invocations, don't set up the register
11205 save area. */
11206 if (!cfun->va_list_gpr_size
11207 && !cfun->va_list_fpr_size
11208 && n_gpr < GP_ARG_NUM_REG
11209 && n_fpr < FP_ARG_V4_MAX_REG)
11210 return;
11212 /* Find the register save area. */
11213 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
11214 if (cfun->machine->varargs_save_offset)
11215 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
11216 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
11217 TREE_SIDE_EFFECTS (t) = 1;
11218 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11221 /* Implement va_arg. */
11223 static tree
11224 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
11225 gimple_seq *post_p)
11227 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
11228 tree gpr, fpr, ovf, sav, reg, t, u;
11229 int size, rsize, n_reg, sav_ofs, sav_scale;
11230 tree lab_false, lab_over, addr;
11231 int align;
11232 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
11233 int regalign = 0;
11234 gimple stmt;
11236 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
11238 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
11239 return build_va_arg_indirect_ref (t);
11242 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
11243 earlier version of gcc, with the property that it always applied alignment
11244 adjustments to the va-args (even for zero-sized types). The cheapest way
11245 to deal with this is to replicate the effect of the part of
11246 std_gimplify_va_arg_expr that carries out the align adjust, for the case
11247 of relevance.
11248 We don't need to check for pass-by-reference because of the test above.
11249 We can return a simplifed answer, since we know there's no offset to add. */
11251 if (((TARGET_MACHO
11252 && rs6000_darwin64_abi)
11253 || DEFAULT_ABI == ABI_ELFv2
11254 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
11255 && integer_zerop (TYPE_SIZE (type)))
11257 unsigned HOST_WIDE_INT align, boundary;
11258 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
11259 align = PARM_BOUNDARY / BITS_PER_UNIT;
11260 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
11261 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
11262 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
11263 boundary /= BITS_PER_UNIT;
11264 if (boundary > align)
11266 tree t ;
11267 /* This updates arg ptr by the amount that would be necessary
11268 to align the zero-sized (but not zero-alignment) item. */
11269 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
11270 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
11271 gimplify_and_add (t, pre_p);
11273 t = fold_convert (sizetype, valist_tmp);
11274 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
11275 fold_convert (TREE_TYPE (valist),
11276 fold_build2 (BIT_AND_EXPR, sizetype, t,
11277 size_int (-boundary))));
11278 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
11279 gimplify_and_add (t, pre_p);
11281 /* Since it is zero-sized there's no increment for the item itself. */
11282 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
11283 return build_va_arg_indirect_ref (valist_tmp);
11286 if (DEFAULT_ABI != ABI_V4)
11288 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
11290 tree elem_type = TREE_TYPE (type);
11291 machine_mode elem_mode = TYPE_MODE (elem_type);
11292 int elem_size = GET_MODE_SIZE (elem_mode);
11294 if (elem_size < UNITS_PER_WORD)
11296 tree real_part, imag_part;
11297 gimple_seq post = NULL;
11299 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
11300 &post);
11301 /* Copy the value into a temporary, lest the formal temporary
11302 be reused out from under us. */
11303 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
11304 gimple_seq_add_seq (pre_p, post);
11306 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
11307 post_p);
11309 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
11313 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
11316 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11317 f_fpr = DECL_CHAIN (f_gpr);
11318 f_res = DECL_CHAIN (f_fpr);
11319 f_ovf = DECL_CHAIN (f_res);
11320 f_sav = DECL_CHAIN (f_ovf);
11322 valist = build_va_arg_indirect_ref (valist);
11323 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11324 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
11325 f_fpr, NULL_TREE);
11326 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
11327 f_ovf, NULL_TREE);
11328 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
11329 f_sav, NULL_TREE);
11331 size = int_size_in_bytes (type);
11332 rsize = (size + 3) / 4;
11333 align = 1;
11335 if (TARGET_HARD_FLOAT && TARGET_FPRS
11336 && ((TARGET_SINGLE_FLOAT && TYPE_MODE (type) == SFmode)
11337 || (TARGET_DOUBLE_FLOAT
11338 && (TYPE_MODE (type) == DFmode
11339 || TYPE_MODE (type) == TFmode
11340 || TYPE_MODE (type) == SDmode
11341 || TYPE_MODE (type) == DDmode
11342 || TYPE_MODE (type) == TDmode))))
11344 /* FP args go in FP registers, if present. */
11345 reg = fpr;
11346 n_reg = (size + 7) / 8;
11347 sav_ofs = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4) * 4;
11348 sav_scale = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4);
11349 if (TYPE_MODE (type) != SFmode && TYPE_MODE (type) != SDmode)
11350 align = 8;
11352 else
11354 /* Otherwise into GP registers. */
11355 reg = gpr;
11356 n_reg = rsize;
11357 sav_ofs = 0;
11358 sav_scale = 4;
11359 if (n_reg == 2)
11360 align = 8;
11363 /* Pull the value out of the saved registers.... */
11365 lab_over = NULL;
11366 addr = create_tmp_var (ptr_type_node, "addr");
11368 /* AltiVec vectors never go in registers when -mabi=altivec. */
11369 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
11370 align = 16;
11371 else
11373 lab_false = create_artificial_label (input_location);
11374 lab_over = create_artificial_label (input_location);
11376 /* Long long and SPE vectors are aligned in the registers.
11377 As are any other 2 gpr item such as complex int due to a
11378 historical mistake. */
11379 u = reg;
11380 if (n_reg == 2 && reg == gpr)
11382 regalign = 1;
11383 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
11384 build_int_cst (TREE_TYPE (reg), n_reg - 1));
11385 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
11386 unshare_expr (reg), u);
11388 /* _Decimal128 is passed in even/odd fpr pairs; the stored
11389 reg number is 0 for f1, so we want to make it odd. */
11390 else if (reg == fpr && TYPE_MODE (type) == TDmode)
11392 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
11393 build_int_cst (TREE_TYPE (reg), 1));
11394 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
11397 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
11398 t = build2 (GE_EXPR, boolean_type_node, u, t);
11399 u = build1 (GOTO_EXPR, void_type_node, lab_false);
11400 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
11401 gimplify_and_add (t, pre_p);
11403 t = sav;
11404 if (sav_ofs)
11405 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
11407 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
11408 build_int_cst (TREE_TYPE (reg), n_reg));
11409 u = fold_convert (sizetype, u);
11410 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
11411 t = fold_build_pointer_plus (t, u);
11413 /* _Decimal32 varargs are located in the second word of the 64-bit
11414 FP register for 32-bit binaries. */
11415 if (!TARGET_POWERPC64
11416 && TARGET_HARD_FLOAT && TARGET_FPRS
11417 && TYPE_MODE (type) == SDmode)
11418 t = fold_build_pointer_plus_hwi (t, size);
11420 gimplify_assign (addr, t, pre_p);
11422 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
11424 stmt = gimple_build_label (lab_false);
11425 gimple_seq_add_stmt (pre_p, stmt);
11427 if ((n_reg == 2 && !regalign) || n_reg > 2)
11429 /* Ensure that we don't find any more args in regs.
11430 Alignment has taken care of for special cases. */
11431 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
11435 /* ... otherwise out of the overflow area. */
11437 /* Care for on-stack alignment if needed. */
11438 t = ovf;
11439 if (align != 1)
11441 t = fold_build_pointer_plus_hwi (t, align - 1);
11442 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
11443 build_int_cst (TREE_TYPE (t), -align));
11445 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
11447 gimplify_assign (unshare_expr (addr), t, pre_p);
11449 t = fold_build_pointer_plus_hwi (t, size);
11450 gimplify_assign (unshare_expr (ovf), t, pre_p);
11452 if (lab_over)
11454 stmt = gimple_build_label (lab_over);
11455 gimple_seq_add_stmt (pre_p, stmt);
11458 if (STRICT_ALIGNMENT
11459 && (TYPE_ALIGN (type)
11460 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
11462 /* The value (of type complex double, for example) may not be
11463 aligned in memory in the saved registers, so copy via a
11464 temporary. (This is the same code as used for SPARC.) */
11465 tree tmp = create_tmp_var (type, "va_arg_tmp");
11466 tree dest_addr = build_fold_addr_expr (tmp);
11468 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
11469 3, dest_addr, addr, size_int (rsize * 4));
11471 gimplify_and_add (copy, pre_p);
11472 addr = dest_addr;
11475 addr = fold_convert (ptrtype, addr);
11476 return build_va_arg_indirect_ref (addr);
11479 /* Builtins. */
11481 static void
11482 def_builtin (const char *name, tree type, enum rs6000_builtins code)
11484 tree t;
11485 unsigned classify = rs6000_builtin_info[(int)code].attr;
11486 const char *attr_string = "";
11488 gcc_assert (name != NULL);
11489 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
11491 if (rs6000_builtin_decls[(int)code])
11492 fatal_error ("internal error: builtin function %s already processed", name);
11494 rs6000_builtin_decls[(int)code] = t =
11495 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
11497 /* Set any special attributes. */
11498 if ((classify & RS6000_BTC_CONST) != 0)
11500 /* const function, function only depends on the inputs. */
11501 TREE_READONLY (t) = 1;
11502 TREE_NOTHROW (t) = 1;
11503 attr_string = ", pure";
11505 else if ((classify & RS6000_BTC_PURE) != 0)
11507 /* pure function, function can read global memory, but does not set any
11508 external state. */
11509 DECL_PURE_P (t) = 1;
11510 TREE_NOTHROW (t) = 1;
11511 attr_string = ", const";
11513 else if ((classify & RS6000_BTC_FP) != 0)
11515 /* Function is a math function. If rounding mode is on, then treat the
11516 function as not reading global memory, but it can have arbitrary side
11517 effects. If it is off, then assume the function is a const function.
11518 This mimics the ATTR_MATHFN_FPROUNDING attribute in
11519 builtin-attribute.def that is used for the math functions. */
11520 TREE_NOTHROW (t) = 1;
11521 if (flag_rounding_math)
11523 DECL_PURE_P (t) = 1;
11524 DECL_IS_NOVOPS (t) = 1;
11525 attr_string = ", fp, pure";
11527 else
11529 TREE_READONLY (t) = 1;
11530 attr_string = ", fp, const";
11533 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
11534 gcc_unreachable ();
11536 if (TARGET_DEBUG_BUILTIN)
11537 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
11538 (int)code, name, attr_string);
11541 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
11543 #undef RS6000_BUILTIN_1
11544 #undef RS6000_BUILTIN_2
11545 #undef RS6000_BUILTIN_3
11546 #undef RS6000_BUILTIN_A
11547 #undef RS6000_BUILTIN_D
11548 #undef RS6000_BUILTIN_E
11549 #undef RS6000_BUILTIN_H
11550 #undef RS6000_BUILTIN_P
11551 #undef RS6000_BUILTIN_Q
11552 #undef RS6000_BUILTIN_S
11553 #undef RS6000_BUILTIN_X
11555 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11556 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11557 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
11558 { MASK, ICODE, NAME, ENUM },
11560 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11561 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11562 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11563 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11564 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11565 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11566 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11567 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11569 static const struct builtin_description bdesc_3arg[] =
11571 #include "rs6000-builtin.def"
11574 /* DST operations: void foo (void *, const int, const char). */
11576 #undef RS6000_BUILTIN_1
11577 #undef RS6000_BUILTIN_2
11578 #undef RS6000_BUILTIN_3
11579 #undef RS6000_BUILTIN_A
11580 #undef RS6000_BUILTIN_D
11581 #undef RS6000_BUILTIN_E
11582 #undef RS6000_BUILTIN_H
11583 #undef RS6000_BUILTIN_P
11584 #undef RS6000_BUILTIN_Q
11585 #undef RS6000_BUILTIN_S
11586 #undef RS6000_BUILTIN_X
11588 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11589 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11590 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11591 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11592 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
11593 { MASK, ICODE, NAME, ENUM },
11595 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11596 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11597 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11598 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11599 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11600 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11602 static const struct builtin_description bdesc_dst[] =
11604 #include "rs6000-builtin.def"
11607 /* Simple binary operations: VECc = foo (VECa, VECb). */
11609 #undef RS6000_BUILTIN_1
11610 #undef RS6000_BUILTIN_2
11611 #undef RS6000_BUILTIN_3
11612 #undef RS6000_BUILTIN_A
11613 #undef RS6000_BUILTIN_D
11614 #undef RS6000_BUILTIN_E
11615 #undef RS6000_BUILTIN_H
11616 #undef RS6000_BUILTIN_P
11617 #undef RS6000_BUILTIN_Q
11618 #undef RS6000_BUILTIN_S
11619 #undef RS6000_BUILTIN_X
11621 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11622 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
11623 { MASK, ICODE, NAME, ENUM },
11625 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11626 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11627 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11628 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11629 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11630 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11631 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11632 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11633 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11635 static const struct builtin_description bdesc_2arg[] =
11637 #include "rs6000-builtin.def"
11640 #undef RS6000_BUILTIN_1
11641 #undef RS6000_BUILTIN_2
11642 #undef RS6000_BUILTIN_3
11643 #undef RS6000_BUILTIN_A
11644 #undef RS6000_BUILTIN_D
11645 #undef RS6000_BUILTIN_E
11646 #undef RS6000_BUILTIN_H
11647 #undef RS6000_BUILTIN_P
11648 #undef RS6000_BUILTIN_Q
11649 #undef RS6000_BUILTIN_S
11650 #undef RS6000_BUILTIN_X
11652 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11653 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11654 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11655 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11656 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11657 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11658 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11659 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
11660 { MASK, ICODE, NAME, ENUM },
11662 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11663 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11664 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11666 /* AltiVec predicates. */
11668 static const struct builtin_description bdesc_altivec_preds[] =
11670 #include "rs6000-builtin.def"
11673 /* SPE predicates. */
11674 #undef RS6000_BUILTIN_1
11675 #undef RS6000_BUILTIN_2
11676 #undef RS6000_BUILTIN_3
11677 #undef RS6000_BUILTIN_A
11678 #undef RS6000_BUILTIN_D
11679 #undef RS6000_BUILTIN_E
11680 #undef RS6000_BUILTIN_H
11681 #undef RS6000_BUILTIN_P
11682 #undef RS6000_BUILTIN_Q
11683 #undef RS6000_BUILTIN_S
11684 #undef RS6000_BUILTIN_X
11686 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11687 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11688 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11689 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11690 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11691 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11692 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11693 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11694 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11695 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
11696 { MASK, ICODE, NAME, ENUM },
11698 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11700 static const struct builtin_description bdesc_spe_predicates[] =
11702 #include "rs6000-builtin.def"
11705 /* SPE evsel predicates. */
11706 #undef RS6000_BUILTIN_1
11707 #undef RS6000_BUILTIN_2
11708 #undef RS6000_BUILTIN_3
11709 #undef RS6000_BUILTIN_A
11710 #undef RS6000_BUILTIN_D
11711 #undef RS6000_BUILTIN_E
11712 #undef RS6000_BUILTIN_H
11713 #undef RS6000_BUILTIN_P
11714 #undef RS6000_BUILTIN_Q
11715 #undef RS6000_BUILTIN_S
11716 #undef RS6000_BUILTIN_X
11718 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11719 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11720 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11721 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11722 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11723 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
11724 { MASK, ICODE, NAME, ENUM },
11726 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11727 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11728 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11729 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11730 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11732 static const struct builtin_description bdesc_spe_evsel[] =
11734 #include "rs6000-builtin.def"
11737 /* PAIRED predicates. */
11738 #undef RS6000_BUILTIN_1
11739 #undef RS6000_BUILTIN_2
11740 #undef RS6000_BUILTIN_3
11741 #undef RS6000_BUILTIN_A
11742 #undef RS6000_BUILTIN_D
11743 #undef RS6000_BUILTIN_E
11744 #undef RS6000_BUILTIN_H
11745 #undef RS6000_BUILTIN_P
11746 #undef RS6000_BUILTIN_Q
11747 #undef RS6000_BUILTIN_S
11748 #undef RS6000_BUILTIN_X
11750 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11751 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11752 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11753 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11754 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11755 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11756 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11757 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11758 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
11759 { MASK, ICODE, NAME, ENUM },
11761 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11762 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11764 static const struct builtin_description bdesc_paired_preds[] =
11766 #include "rs6000-builtin.def"
11769 /* ABS* operations. */
11771 #undef RS6000_BUILTIN_1
11772 #undef RS6000_BUILTIN_2
11773 #undef RS6000_BUILTIN_3
11774 #undef RS6000_BUILTIN_A
11775 #undef RS6000_BUILTIN_D
11776 #undef RS6000_BUILTIN_E
11777 #undef RS6000_BUILTIN_H
11778 #undef RS6000_BUILTIN_P
11779 #undef RS6000_BUILTIN_Q
11780 #undef RS6000_BUILTIN_S
11781 #undef RS6000_BUILTIN_X
11783 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11784 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11785 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11786 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
11787 { MASK, ICODE, NAME, ENUM },
11789 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11790 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11791 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11792 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11793 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11794 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11795 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11797 static const struct builtin_description bdesc_abs[] =
11799 #include "rs6000-builtin.def"
11802 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
11803 foo (VECa). */
11805 #undef RS6000_BUILTIN_1
11806 #undef RS6000_BUILTIN_2
11807 #undef RS6000_BUILTIN_3
11808 #undef RS6000_BUILTIN_A
11809 #undef RS6000_BUILTIN_D
11810 #undef RS6000_BUILTIN_E
11811 #undef RS6000_BUILTIN_H
11812 #undef RS6000_BUILTIN_P
11813 #undef RS6000_BUILTIN_Q
11814 #undef RS6000_BUILTIN_S
11815 #undef RS6000_BUILTIN_X
11817 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
11818 { MASK, ICODE, NAME, ENUM },
11820 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11821 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11822 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11823 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11824 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11825 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11826 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11827 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11828 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11829 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11831 static const struct builtin_description bdesc_1arg[] =
11833 #include "rs6000-builtin.def"
11836 /* HTM builtins. */
11837 #undef RS6000_BUILTIN_1
11838 #undef RS6000_BUILTIN_2
11839 #undef RS6000_BUILTIN_3
11840 #undef RS6000_BUILTIN_A
11841 #undef RS6000_BUILTIN_D
11842 #undef RS6000_BUILTIN_E
11843 #undef RS6000_BUILTIN_H
11844 #undef RS6000_BUILTIN_P
11845 #undef RS6000_BUILTIN_Q
11846 #undef RS6000_BUILTIN_S
11847 #undef RS6000_BUILTIN_X
11849 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11850 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11851 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11852 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11853 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11854 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11855 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
11856 { MASK, ICODE, NAME, ENUM },
11858 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11859 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11860 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11861 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11863 static const struct builtin_description bdesc_htm[] =
11865 #include "rs6000-builtin.def"
11868 #undef RS6000_BUILTIN_1
11869 #undef RS6000_BUILTIN_2
11870 #undef RS6000_BUILTIN_3
11871 #undef RS6000_BUILTIN_A
11872 #undef RS6000_BUILTIN_D
11873 #undef RS6000_BUILTIN_E
11874 #undef RS6000_BUILTIN_H
11875 #undef RS6000_BUILTIN_P
11876 #undef RS6000_BUILTIN_Q
11877 #undef RS6000_BUILTIN_S
11879 /* Return true if a builtin function is overloaded. */
11880 bool
11881 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
11883 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
11886 /* Expand an expression EXP that calls a builtin without arguments. */
11887 static rtx
11888 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
11890 rtx pat;
11891 machine_mode tmode = insn_data[icode].operand[0].mode;
11893 if (icode == CODE_FOR_nothing)
11894 /* Builtin not supported on this processor. */
11895 return 0;
11897 if (target == 0
11898 || GET_MODE (target) != tmode
11899 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11900 target = gen_reg_rtx (tmode);
11902 pat = GEN_FCN (icode) (target);
11903 if (! pat)
11904 return 0;
11905 emit_insn (pat);
11907 return target;
11911 static rtx
11912 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
11914 rtx pat;
11915 tree arg0 = CALL_EXPR_ARG (exp, 0);
11916 tree arg1 = CALL_EXPR_ARG (exp, 1);
11917 rtx op0 = expand_normal (arg0);
11918 rtx op1 = expand_normal (arg1);
11919 machine_mode mode0 = insn_data[icode].operand[0].mode;
11920 machine_mode mode1 = insn_data[icode].operand[1].mode;
11922 if (icode == CODE_FOR_nothing)
11923 /* Builtin not supported on this processor. */
11924 return 0;
11926 /* If we got invalid arguments bail out before generating bad rtl. */
11927 if (arg0 == error_mark_node || arg1 == error_mark_node)
11928 return const0_rtx;
11930 if (GET_CODE (op0) != CONST_INT
11931 || INTVAL (op0) > 255
11932 || INTVAL (op0) < 0)
11934 error ("argument 1 must be an 8-bit field value");
11935 return const0_rtx;
11938 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
11939 op0 = copy_to_mode_reg (mode0, op0);
11941 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11942 op1 = copy_to_mode_reg (mode1, op1);
11944 pat = GEN_FCN (icode) (op0, op1);
11945 if (! pat)
11946 return const0_rtx;
11947 emit_insn (pat);
11949 return NULL_RTX;
11953 static rtx
11954 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
11956 rtx pat;
11957 tree arg0 = CALL_EXPR_ARG (exp, 0);
11958 rtx op0 = expand_normal (arg0);
11959 machine_mode tmode = insn_data[icode].operand[0].mode;
11960 machine_mode mode0 = insn_data[icode].operand[1].mode;
11962 if (icode == CODE_FOR_nothing)
11963 /* Builtin not supported on this processor. */
11964 return 0;
11966 /* If we got invalid arguments bail out before generating bad rtl. */
11967 if (arg0 == error_mark_node)
11968 return const0_rtx;
11970 if (icode == CODE_FOR_altivec_vspltisb
11971 || icode == CODE_FOR_altivec_vspltish
11972 || icode == CODE_FOR_altivec_vspltisw
11973 || icode == CODE_FOR_spe_evsplatfi
11974 || icode == CODE_FOR_spe_evsplati)
11976 /* Only allow 5-bit *signed* literals. */
11977 if (GET_CODE (op0) != CONST_INT
11978 || INTVAL (op0) > 15
11979 || INTVAL (op0) < -16)
11981 error ("argument 1 must be a 5-bit signed literal");
11982 return const0_rtx;
11986 if (target == 0
11987 || GET_MODE (target) != tmode
11988 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11989 target = gen_reg_rtx (tmode);
11991 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11992 op0 = copy_to_mode_reg (mode0, op0);
11994 pat = GEN_FCN (icode) (target, op0);
11995 if (! pat)
11996 return 0;
11997 emit_insn (pat);
11999 return target;
12002 static rtx
12003 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
12005 rtx pat, scratch1, scratch2;
12006 tree arg0 = CALL_EXPR_ARG (exp, 0);
12007 rtx op0 = expand_normal (arg0);
12008 machine_mode tmode = insn_data[icode].operand[0].mode;
12009 machine_mode mode0 = insn_data[icode].operand[1].mode;
12011 /* If we have invalid arguments, bail out before generating bad rtl. */
12012 if (arg0 == error_mark_node)
12013 return const0_rtx;
12015 if (target == 0
12016 || GET_MODE (target) != tmode
12017 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12018 target = gen_reg_rtx (tmode);
12020 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12021 op0 = copy_to_mode_reg (mode0, op0);
12023 scratch1 = gen_reg_rtx (mode0);
12024 scratch2 = gen_reg_rtx (mode0);
12026 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
12027 if (! pat)
12028 return 0;
12029 emit_insn (pat);
12031 return target;
12034 static rtx
12035 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
12037 rtx pat;
12038 tree arg0 = CALL_EXPR_ARG (exp, 0);
12039 tree arg1 = CALL_EXPR_ARG (exp, 1);
12040 rtx op0 = expand_normal (arg0);
12041 rtx op1 = expand_normal (arg1);
12042 machine_mode tmode = insn_data[icode].operand[0].mode;
12043 machine_mode mode0 = insn_data[icode].operand[1].mode;
12044 machine_mode mode1 = insn_data[icode].operand[2].mode;
12046 if (icode == CODE_FOR_nothing)
12047 /* Builtin not supported on this processor. */
12048 return 0;
12050 /* If we got invalid arguments bail out before generating bad rtl. */
12051 if (arg0 == error_mark_node || arg1 == error_mark_node)
12052 return const0_rtx;
12054 if (icode == CODE_FOR_altivec_vcfux
12055 || icode == CODE_FOR_altivec_vcfsx
12056 || icode == CODE_FOR_altivec_vctsxs
12057 || icode == CODE_FOR_altivec_vctuxs
12058 || icode == CODE_FOR_altivec_vspltb
12059 || icode == CODE_FOR_altivec_vsplth
12060 || icode == CODE_FOR_altivec_vspltw
12061 || icode == CODE_FOR_spe_evaddiw
12062 || icode == CODE_FOR_spe_evldd
12063 || icode == CODE_FOR_spe_evldh
12064 || icode == CODE_FOR_spe_evldw
12065 || icode == CODE_FOR_spe_evlhhesplat
12066 || icode == CODE_FOR_spe_evlhhossplat
12067 || icode == CODE_FOR_spe_evlhhousplat
12068 || icode == CODE_FOR_spe_evlwhe
12069 || icode == CODE_FOR_spe_evlwhos
12070 || icode == CODE_FOR_spe_evlwhou
12071 || icode == CODE_FOR_spe_evlwhsplat
12072 || icode == CODE_FOR_spe_evlwwsplat
12073 || icode == CODE_FOR_spe_evrlwi
12074 || icode == CODE_FOR_spe_evslwi
12075 || icode == CODE_FOR_spe_evsrwis
12076 || icode == CODE_FOR_spe_evsubifw
12077 || icode == CODE_FOR_spe_evsrwiu)
12079 /* Only allow 5-bit unsigned literals. */
12080 STRIP_NOPS (arg1);
12081 if (TREE_CODE (arg1) != INTEGER_CST
12082 || TREE_INT_CST_LOW (arg1) & ~0x1f)
12084 error ("argument 2 must be a 5-bit unsigned literal");
12085 return const0_rtx;
12089 if (target == 0
12090 || GET_MODE (target) != tmode
12091 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12092 target = gen_reg_rtx (tmode);
12094 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12095 op0 = copy_to_mode_reg (mode0, op0);
12096 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12097 op1 = copy_to_mode_reg (mode1, op1);
12099 pat = GEN_FCN (icode) (target, op0, op1);
12100 if (! pat)
12101 return 0;
12102 emit_insn (pat);
12104 return target;
12107 static rtx
12108 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
12110 rtx pat, scratch;
12111 tree cr6_form = CALL_EXPR_ARG (exp, 0);
12112 tree arg0 = CALL_EXPR_ARG (exp, 1);
12113 tree arg1 = CALL_EXPR_ARG (exp, 2);
12114 rtx op0 = expand_normal (arg0);
12115 rtx op1 = expand_normal (arg1);
12116 machine_mode tmode = SImode;
12117 machine_mode mode0 = insn_data[icode].operand[1].mode;
12118 machine_mode mode1 = insn_data[icode].operand[2].mode;
12119 int cr6_form_int;
12121 if (TREE_CODE (cr6_form) != INTEGER_CST)
12123 error ("argument 1 of __builtin_altivec_predicate must be a constant");
12124 return const0_rtx;
12126 else
12127 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
12129 gcc_assert (mode0 == mode1);
12131 /* If we have invalid arguments, bail out before generating bad rtl. */
12132 if (arg0 == error_mark_node || arg1 == error_mark_node)
12133 return const0_rtx;
12135 if (target == 0
12136 || GET_MODE (target) != tmode
12137 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12138 target = gen_reg_rtx (tmode);
12140 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12141 op0 = copy_to_mode_reg (mode0, op0);
12142 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12143 op1 = copy_to_mode_reg (mode1, op1);
12145 scratch = gen_reg_rtx (mode0);
12147 pat = GEN_FCN (icode) (scratch, op0, op1);
12148 if (! pat)
12149 return 0;
12150 emit_insn (pat);
12152 /* The vec_any* and vec_all* predicates use the same opcodes for two
12153 different operations, but the bits in CR6 will be different
12154 depending on what information we want. So we have to play tricks
12155 with CR6 to get the right bits out.
12157 If you think this is disgusting, look at the specs for the
12158 AltiVec predicates. */
12160 switch (cr6_form_int)
12162 case 0:
12163 emit_insn (gen_cr6_test_for_zero (target));
12164 break;
12165 case 1:
12166 emit_insn (gen_cr6_test_for_zero_reverse (target));
12167 break;
12168 case 2:
12169 emit_insn (gen_cr6_test_for_lt (target));
12170 break;
12171 case 3:
12172 emit_insn (gen_cr6_test_for_lt_reverse (target));
12173 break;
12174 default:
12175 error ("argument 1 of __builtin_altivec_predicate is out of range");
12176 break;
12179 return target;
12182 static rtx
12183 paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
12185 rtx pat, addr;
12186 tree arg0 = CALL_EXPR_ARG (exp, 0);
12187 tree arg1 = CALL_EXPR_ARG (exp, 1);
12188 machine_mode tmode = insn_data[icode].operand[0].mode;
12189 machine_mode mode0 = Pmode;
12190 machine_mode mode1 = Pmode;
12191 rtx op0 = expand_normal (arg0);
12192 rtx op1 = expand_normal (arg1);
12194 if (icode == CODE_FOR_nothing)
12195 /* Builtin not supported on this processor. */
12196 return 0;
12198 /* If we got invalid arguments bail out before generating bad rtl. */
12199 if (arg0 == error_mark_node || arg1 == error_mark_node)
12200 return const0_rtx;
12202 if (target == 0
12203 || GET_MODE (target) != tmode
12204 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12205 target = gen_reg_rtx (tmode);
12207 op1 = copy_to_mode_reg (mode1, op1);
12209 if (op0 == const0_rtx)
12211 addr = gen_rtx_MEM (tmode, op1);
12213 else
12215 op0 = copy_to_mode_reg (mode0, op0);
12216 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op0, op1));
12219 pat = GEN_FCN (icode) (target, addr);
12221 if (! pat)
12222 return 0;
12223 emit_insn (pat);
12225 return target;
12228 /* Return a constant vector for use as a little-endian permute control vector
12229 to reverse the order of elements of the given vector mode. */
12230 static rtx
12231 swap_selector_for_mode (machine_mode mode)
12233 /* These are little endian vectors, so their elements are reversed
12234 from what you would normally expect for a permute control vector. */
12235 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
12236 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
12237 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
12238 unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
12239 unsigned int *swaparray, i;
12240 rtx perm[16];
12242 switch (mode)
12244 case V2DFmode:
12245 case V2DImode:
12246 swaparray = swap2;
12247 break;
12248 case V4SFmode:
12249 case V4SImode:
12250 swaparray = swap4;
12251 break;
12252 case V8HImode:
12253 swaparray = swap8;
12254 break;
12255 case V16QImode:
12256 swaparray = swap16;
12257 break;
12258 default:
12259 gcc_unreachable ();
12262 for (i = 0; i < 16; ++i)
12263 perm[i] = GEN_INT (swaparray[i]);
12265 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)));
12268 /* Generate code for an "lvx", "lvxl", or "lve*x" built-in for a little endian target
12269 with -maltivec=be specified. Issue the load followed by an element-reversing
12270 permute. */
12271 void
12272 altivec_expand_lvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
12274 rtx tmp = gen_reg_rtx (mode);
12275 rtx load = gen_rtx_SET (VOIDmode, tmp, op1);
12276 rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
12277 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx));
12278 rtx sel = swap_selector_for_mode (mode);
12279 rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM);
12281 gcc_assert (REG_P (op0));
12282 emit_insn (par);
12283 emit_insn (gen_rtx_SET (VOIDmode, op0, vperm));
12286 /* Generate code for a "stvx" or "stvxl" built-in for a little endian target
12287 with -maltivec=be specified. Issue the store preceded by an element-reversing
12288 permute. */
12289 void
12290 altivec_expand_stvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
12292 rtx tmp = gen_reg_rtx (mode);
12293 rtx store = gen_rtx_SET (VOIDmode, op0, tmp);
12294 rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
12295 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx));
12296 rtx sel = swap_selector_for_mode (mode);
12297 rtx vperm;
12299 gcc_assert (REG_P (op1));
12300 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
12301 emit_insn (gen_rtx_SET (VOIDmode, tmp, vperm));
12302 emit_insn (par);
12305 /* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
12306 specified. Issue the store preceded by an element-reversing permute. */
12307 void
12308 altivec_expand_stvex_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
12310 machine_mode inner_mode = GET_MODE_INNER (mode);
12311 rtx tmp = gen_reg_rtx (mode);
12312 rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec);
12313 rtx sel = swap_selector_for_mode (mode);
12314 rtx vperm;
12316 gcc_assert (REG_P (op1));
12317 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
12318 emit_insn (gen_rtx_SET (VOIDmode, tmp, vperm));
12319 emit_insn (gen_rtx_SET (VOIDmode, op0, stvx));
12322 static rtx
12323 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
12325 rtx pat, addr;
12326 tree arg0 = CALL_EXPR_ARG (exp, 0);
12327 tree arg1 = CALL_EXPR_ARG (exp, 1);
12328 machine_mode tmode = insn_data[icode].operand[0].mode;
12329 machine_mode mode0 = Pmode;
12330 machine_mode mode1 = Pmode;
12331 rtx op0 = expand_normal (arg0);
12332 rtx op1 = expand_normal (arg1);
12334 if (icode == CODE_FOR_nothing)
12335 /* Builtin not supported on this processor. */
12336 return 0;
12338 /* If we got invalid arguments bail out before generating bad rtl. */
12339 if (arg0 == error_mark_node || arg1 == error_mark_node)
12340 return const0_rtx;
12342 if (target == 0
12343 || GET_MODE (target) != tmode
12344 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12345 target = gen_reg_rtx (tmode);
12347 op1 = copy_to_mode_reg (mode1, op1);
12349 if (op0 == const0_rtx)
12351 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
12353 else
12355 op0 = copy_to_mode_reg (mode0, op0);
12356 addr = gen_rtx_MEM (blk ? BLKmode : tmode, gen_rtx_PLUS (Pmode, op0, op1));
12359 pat = GEN_FCN (icode) (target, addr);
12361 if (! pat)
12362 return 0;
12363 emit_insn (pat);
12365 return target;
12368 static rtx
12369 spe_expand_stv_builtin (enum insn_code icode, tree exp)
12371 tree arg0 = CALL_EXPR_ARG (exp, 0);
12372 tree arg1 = CALL_EXPR_ARG (exp, 1);
12373 tree arg2 = CALL_EXPR_ARG (exp, 2);
12374 rtx op0 = expand_normal (arg0);
12375 rtx op1 = expand_normal (arg1);
12376 rtx op2 = expand_normal (arg2);
12377 rtx pat;
12378 machine_mode mode0 = insn_data[icode].operand[0].mode;
12379 machine_mode mode1 = insn_data[icode].operand[1].mode;
12380 machine_mode mode2 = insn_data[icode].operand[2].mode;
12382 /* Invalid arguments. Bail before doing anything stoopid! */
12383 if (arg0 == error_mark_node
12384 || arg1 == error_mark_node
12385 || arg2 == error_mark_node)
12386 return const0_rtx;
12388 if (! (*insn_data[icode].operand[2].predicate) (op0, mode2))
12389 op0 = copy_to_mode_reg (mode2, op0);
12390 if (! (*insn_data[icode].operand[0].predicate) (op1, mode0))
12391 op1 = copy_to_mode_reg (mode0, op1);
12392 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
12393 op2 = copy_to_mode_reg (mode1, op2);
12395 pat = GEN_FCN (icode) (op1, op2, op0);
12396 if (pat)
12397 emit_insn (pat);
12398 return NULL_RTX;
12401 static rtx
12402 paired_expand_stv_builtin (enum insn_code icode, tree exp)
12404 tree arg0 = CALL_EXPR_ARG (exp, 0);
12405 tree arg1 = CALL_EXPR_ARG (exp, 1);
12406 tree arg2 = CALL_EXPR_ARG (exp, 2);
12407 rtx op0 = expand_normal (arg0);
12408 rtx op1 = expand_normal (arg1);
12409 rtx op2 = expand_normal (arg2);
12410 rtx pat, addr;
12411 machine_mode tmode = insn_data[icode].operand[0].mode;
12412 machine_mode mode1 = Pmode;
12413 machine_mode mode2 = Pmode;
12415 /* Invalid arguments. Bail before doing anything stoopid! */
12416 if (arg0 == error_mark_node
12417 || arg1 == error_mark_node
12418 || arg2 == error_mark_node)
12419 return const0_rtx;
12421 if (! (*insn_data[icode].operand[1].predicate) (op0, tmode))
12422 op0 = copy_to_mode_reg (tmode, op0);
12424 op2 = copy_to_mode_reg (mode2, op2);
12426 if (op1 == const0_rtx)
12428 addr = gen_rtx_MEM (tmode, op2);
12430 else
12432 op1 = copy_to_mode_reg (mode1, op1);
12433 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
12436 pat = GEN_FCN (icode) (addr, op0);
12437 if (pat)
12438 emit_insn (pat);
12439 return NULL_RTX;
12442 static rtx
12443 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
12445 tree arg0 = CALL_EXPR_ARG (exp, 0);
12446 tree arg1 = CALL_EXPR_ARG (exp, 1);
12447 tree arg2 = CALL_EXPR_ARG (exp, 2);
12448 rtx op0 = expand_normal (arg0);
12449 rtx op1 = expand_normal (arg1);
12450 rtx op2 = expand_normal (arg2);
12451 rtx pat, addr;
12452 machine_mode tmode = insn_data[icode].operand[0].mode;
12453 machine_mode smode = insn_data[icode].operand[1].mode;
12454 machine_mode mode1 = Pmode;
12455 machine_mode mode2 = Pmode;
12457 /* Invalid arguments. Bail before doing anything stoopid! */
12458 if (arg0 == error_mark_node
12459 || arg1 == error_mark_node
12460 || arg2 == error_mark_node)
12461 return const0_rtx;
12463 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
12464 op0 = copy_to_mode_reg (smode, op0);
12466 op2 = copy_to_mode_reg (mode2, op2);
12468 if (op1 == const0_rtx)
12470 addr = gen_rtx_MEM (tmode, op2);
12472 else
12474 op1 = copy_to_mode_reg (mode1, op1);
12475 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
12478 pat = GEN_FCN (icode) (addr, op0);
12479 if (pat)
12480 emit_insn (pat);
12481 return NULL_RTX;
12484 /* Return the appropriate SPR number associated with the given builtin. */
12485 static inline HOST_WIDE_INT
12486 htm_spr_num (enum rs6000_builtins code)
12488 if (code == HTM_BUILTIN_GET_TFHAR
12489 || code == HTM_BUILTIN_SET_TFHAR)
12490 return TFHAR_SPR;
12491 else if (code == HTM_BUILTIN_GET_TFIAR
12492 || code == HTM_BUILTIN_SET_TFIAR)
12493 return TFIAR_SPR;
12494 else if (code == HTM_BUILTIN_GET_TEXASR
12495 || code == HTM_BUILTIN_SET_TEXASR)
12496 return TEXASR_SPR;
12497 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
12498 || code == HTM_BUILTIN_SET_TEXASRU);
12499 return TEXASRU_SPR;
12502 /* Return the appropriate SPR regno associated with the given builtin. */
12503 static inline HOST_WIDE_INT
12504 htm_spr_regno (enum rs6000_builtins code)
12506 if (code == HTM_BUILTIN_GET_TFHAR
12507 || code == HTM_BUILTIN_SET_TFHAR)
12508 return TFHAR_REGNO;
12509 else if (code == HTM_BUILTIN_GET_TFIAR
12510 || code == HTM_BUILTIN_SET_TFIAR)
12511 return TFIAR_REGNO;
12512 gcc_assert (code == HTM_BUILTIN_GET_TEXASR
12513 || code == HTM_BUILTIN_SET_TEXASR
12514 || code == HTM_BUILTIN_GET_TEXASRU
12515 || code == HTM_BUILTIN_SET_TEXASRU);
12516 return TEXASR_REGNO;
12519 /* Return the correct ICODE value depending on whether we are
12520 setting or reading the HTM SPRs. */
12521 static inline enum insn_code
12522 rs6000_htm_spr_icode (bool nonvoid)
12524 if (nonvoid)
12525 return (TARGET_64BIT) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
12526 else
12527 return (TARGET_64BIT) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
12530 /* Expand the HTM builtin in EXP and store the result in TARGET.
12531 Store true in *EXPANDEDP if we found a builtin to expand. */
12532 static rtx
12533 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
12535 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12536 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
12537 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
12538 const struct builtin_description *d;
12539 size_t i;
12541 *expandedp = false;
12543 /* Expand the HTM builtins. */
12544 d = bdesc_htm;
12545 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
12546 if (d->code == fcode)
12548 rtx op[MAX_HTM_OPERANDS], pat;
12549 int nopnds = 0;
12550 tree arg;
12551 call_expr_arg_iterator iter;
12552 unsigned attr = rs6000_builtin_info[fcode].attr;
12553 enum insn_code icode = d->icode;
12555 if (attr & RS6000_BTC_SPR)
12556 icode = rs6000_htm_spr_icode (nonvoid);
12558 if (nonvoid)
12560 machine_mode tmode = insn_data[icode].operand[0].mode;
12561 if (!target
12562 || GET_MODE (target) != tmode
12563 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
12564 target = gen_reg_rtx (tmode);
12565 op[nopnds++] = target;
12568 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
12570 const struct insn_operand_data *insn_op;
12572 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
12573 return NULL_RTX;
12575 insn_op = &insn_data[icode].operand[nopnds];
12577 op[nopnds] = expand_normal (arg);
12579 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
12581 if (!strcmp (insn_op->constraint, "n"))
12583 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
12584 if (!CONST_INT_P (op[nopnds]))
12585 error ("argument %d must be an unsigned literal", arg_num);
12586 else
12587 error ("argument %d is an unsigned literal that is "
12588 "out of range", arg_num);
12589 return const0_rtx;
12591 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
12594 nopnds++;
12597 /* Handle the builtins for extended mnemonics. These accept
12598 no arguments, but map to builtins that take arguments. */
12599 switch (fcode)
12601 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
12602 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
12603 op[nopnds++] = GEN_INT (1);
12604 #ifdef ENABLE_CHECKING
12605 attr |= RS6000_BTC_UNARY;
12606 #endif
12607 break;
12608 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
12609 op[nopnds++] = GEN_INT (0);
12610 #ifdef ENABLE_CHECKING
12611 attr |= RS6000_BTC_UNARY;
12612 #endif
12613 break;
12614 default:
12615 break;
12618 /* If this builtin accesses SPRs, then pass in the appropriate
12619 SPR number and SPR regno as the last two operands. */
12620 if (attr & RS6000_BTC_SPR)
12622 op[nopnds++] = gen_rtx_CONST_INT (Pmode, htm_spr_num (fcode));
12623 op[nopnds++] = gen_rtx_REG (Pmode, htm_spr_regno (fcode));
12626 #ifdef ENABLE_CHECKING
12627 int expected_nopnds = 0;
12628 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
12629 expected_nopnds = 1;
12630 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
12631 expected_nopnds = 2;
12632 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
12633 expected_nopnds = 3;
12634 if (!(attr & RS6000_BTC_VOID))
12635 expected_nopnds += 1;
12636 if (attr & RS6000_BTC_SPR)
12637 expected_nopnds += 2;
12639 gcc_assert (nopnds == expected_nopnds && nopnds <= MAX_HTM_OPERANDS);
12640 #endif
12642 switch (nopnds)
12644 case 1:
12645 pat = GEN_FCN (icode) (op[0]);
12646 break;
12647 case 2:
12648 pat = GEN_FCN (icode) (op[0], op[1]);
12649 break;
12650 case 3:
12651 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
12652 break;
12653 case 4:
12654 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
12655 break;
12656 default:
12657 gcc_unreachable ();
12659 if (!pat)
12660 return NULL_RTX;
12661 emit_insn (pat);
12663 *expandedp = true;
12664 if (nonvoid)
12665 return target;
12666 return const0_rtx;
12669 return NULL_RTX;
12672 static rtx
12673 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
12675 rtx pat;
12676 tree arg0 = CALL_EXPR_ARG (exp, 0);
12677 tree arg1 = CALL_EXPR_ARG (exp, 1);
12678 tree arg2 = CALL_EXPR_ARG (exp, 2);
12679 rtx op0 = expand_normal (arg0);
12680 rtx op1 = expand_normal (arg1);
12681 rtx op2 = expand_normal (arg2);
12682 machine_mode tmode = insn_data[icode].operand[0].mode;
12683 machine_mode mode0 = insn_data[icode].operand[1].mode;
12684 machine_mode mode1 = insn_data[icode].operand[2].mode;
12685 machine_mode mode2 = insn_data[icode].operand[3].mode;
12687 if (icode == CODE_FOR_nothing)
12688 /* Builtin not supported on this processor. */
12689 return 0;
12691 /* If we got invalid arguments bail out before generating bad rtl. */
12692 if (arg0 == error_mark_node
12693 || arg1 == error_mark_node
12694 || arg2 == error_mark_node)
12695 return const0_rtx;
12697 /* Check and prepare argument depending on the instruction code.
12699 Note that a switch statement instead of the sequence of tests
12700 would be incorrect as many of the CODE_FOR values could be
12701 CODE_FOR_nothing and that would yield multiple alternatives
12702 with identical values. We'd never reach here at runtime in
12703 this case. */
12704 if (icode == CODE_FOR_altivec_vsldoi_v4sf
12705 || icode == CODE_FOR_altivec_vsldoi_v4si
12706 || icode == CODE_FOR_altivec_vsldoi_v8hi
12707 || icode == CODE_FOR_altivec_vsldoi_v16qi)
12709 /* Only allow 4-bit unsigned literals. */
12710 STRIP_NOPS (arg2);
12711 if (TREE_CODE (arg2) != INTEGER_CST
12712 || TREE_INT_CST_LOW (arg2) & ~0xf)
12714 error ("argument 3 must be a 4-bit unsigned literal");
12715 return const0_rtx;
12718 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
12719 || icode == CODE_FOR_vsx_xxpermdi_v2di
12720 || icode == CODE_FOR_vsx_xxsldwi_v16qi
12721 || icode == CODE_FOR_vsx_xxsldwi_v8hi
12722 || icode == CODE_FOR_vsx_xxsldwi_v4si
12723 || icode == CODE_FOR_vsx_xxsldwi_v4sf
12724 || icode == CODE_FOR_vsx_xxsldwi_v2di
12725 || icode == CODE_FOR_vsx_xxsldwi_v2df)
12727 /* Only allow 2-bit unsigned literals. */
12728 STRIP_NOPS (arg2);
12729 if (TREE_CODE (arg2) != INTEGER_CST
12730 || TREE_INT_CST_LOW (arg2) & ~0x3)
12732 error ("argument 3 must be a 2-bit unsigned literal");
12733 return const0_rtx;
12736 else if (icode == CODE_FOR_vsx_set_v2df
12737 || icode == CODE_FOR_vsx_set_v2di
12738 || icode == CODE_FOR_bcdadd
12739 || icode == CODE_FOR_bcdadd_lt
12740 || icode == CODE_FOR_bcdadd_eq
12741 || icode == CODE_FOR_bcdadd_gt
12742 || icode == CODE_FOR_bcdsub
12743 || icode == CODE_FOR_bcdsub_lt
12744 || icode == CODE_FOR_bcdsub_eq
12745 || icode == CODE_FOR_bcdsub_gt)
12747 /* Only allow 1-bit unsigned literals. */
12748 STRIP_NOPS (arg2);
12749 if (TREE_CODE (arg2) != INTEGER_CST
12750 || TREE_INT_CST_LOW (arg2) & ~0x1)
12752 error ("argument 3 must be a 1-bit unsigned literal");
12753 return const0_rtx;
12756 else if (icode == CODE_FOR_dfp_ddedpd_dd
12757 || icode == CODE_FOR_dfp_ddedpd_td)
12759 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
12760 STRIP_NOPS (arg0);
12761 if (TREE_CODE (arg0) != INTEGER_CST
12762 || TREE_INT_CST_LOW (arg2) & ~0x3)
12764 error ("argument 1 must be 0 or 2");
12765 return const0_rtx;
12768 else if (icode == CODE_FOR_dfp_denbcd_dd
12769 || icode == CODE_FOR_dfp_denbcd_td)
12771 /* Only allow 1-bit unsigned literals. */
12772 STRIP_NOPS (arg0);
12773 if (TREE_CODE (arg0) != INTEGER_CST
12774 || TREE_INT_CST_LOW (arg0) & ~0x1)
12776 error ("argument 1 must be a 1-bit unsigned literal");
12777 return const0_rtx;
12780 else if (icode == CODE_FOR_dfp_dscli_dd
12781 || icode == CODE_FOR_dfp_dscli_td
12782 || icode == CODE_FOR_dfp_dscri_dd
12783 || icode == CODE_FOR_dfp_dscri_td)
12785 /* Only allow 6-bit unsigned literals. */
12786 STRIP_NOPS (arg1);
12787 if (TREE_CODE (arg1) != INTEGER_CST
12788 || TREE_INT_CST_LOW (arg1) & ~0x3f)
12790 error ("argument 2 must be a 6-bit unsigned literal");
12791 return const0_rtx;
12794 else if (icode == CODE_FOR_crypto_vshasigmaw
12795 || icode == CODE_FOR_crypto_vshasigmad)
12797 /* Check whether the 2nd and 3rd arguments are integer constants and in
12798 range and prepare arguments. */
12799 STRIP_NOPS (arg1);
12800 if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (arg1, 2))
12802 error ("argument 2 must be 0 or 1");
12803 return const0_rtx;
12806 STRIP_NOPS (arg2);
12807 if (TREE_CODE (arg2) != INTEGER_CST || wi::geu_p (arg1, 16))
12809 error ("argument 3 must be in the range 0..15");
12810 return const0_rtx;
12814 if (target == 0
12815 || GET_MODE (target) != tmode
12816 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12817 target = gen_reg_rtx (tmode);
12819 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12820 op0 = copy_to_mode_reg (mode0, op0);
12821 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12822 op1 = copy_to_mode_reg (mode1, op1);
12823 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12824 op2 = copy_to_mode_reg (mode2, op2);
12826 if (TARGET_PAIRED_FLOAT && icode == CODE_FOR_selv2sf4)
12827 pat = GEN_FCN (icode) (target, op0, op1, op2, CONST0_RTX (SFmode));
12828 else
12829 pat = GEN_FCN (icode) (target, op0, op1, op2);
12830 if (! pat)
12831 return 0;
12832 emit_insn (pat);
12834 return target;
12837 /* Expand the lvx builtins. */
12838 static rtx
12839 altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp)
12841 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12842 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12843 tree arg0;
12844 machine_mode tmode, mode0;
12845 rtx pat, op0;
12846 enum insn_code icode;
12848 switch (fcode)
12850 case ALTIVEC_BUILTIN_LD_INTERNAL_16qi:
12851 icode = CODE_FOR_vector_altivec_load_v16qi;
12852 break;
12853 case ALTIVEC_BUILTIN_LD_INTERNAL_8hi:
12854 icode = CODE_FOR_vector_altivec_load_v8hi;
12855 break;
12856 case ALTIVEC_BUILTIN_LD_INTERNAL_4si:
12857 icode = CODE_FOR_vector_altivec_load_v4si;
12858 break;
12859 case ALTIVEC_BUILTIN_LD_INTERNAL_4sf:
12860 icode = CODE_FOR_vector_altivec_load_v4sf;
12861 break;
12862 case ALTIVEC_BUILTIN_LD_INTERNAL_2df:
12863 icode = CODE_FOR_vector_altivec_load_v2df;
12864 break;
12865 case ALTIVEC_BUILTIN_LD_INTERNAL_2di:
12866 icode = CODE_FOR_vector_altivec_load_v2di;
12867 case ALTIVEC_BUILTIN_LD_INTERNAL_1ti:
12868 icode = CODE_FOR_vector_altivec_load_v1ti;
12869 break;
12870 default:
12871 *expandedp = false;
12872 return NULL_RTX;
12875 *expandedp = true;
12877 arg0 = CALL_EXPR_ARG (exp, 0);
12878 op0 = expand_normal (arg0);
12879 tmode = insn_data[icode].operand[0].mode;
12880 mode0 = insn_data[icode].operand[1].mode;
12882 if (target == 0
12883 || GET_MODE (target) != tmode
12884 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12885 target = gen_reg_rtx (tmode);
12887 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12888 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12890 pat = GEN_FCN (icode) (target, op0);
12891 if (! pat)
12892 return 0;
12893 emit_insn (pat);
12894 return target;
12897 /* Expand the stvx builtins. */
12898 static rtx
12899 altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
12900 bool *expandedp)
12902 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12903 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12904 tree arg0, arg1;
12905 machine_mode mode0, mode1;
12906 rtx pat, op0, op1;
12907 enum insn_code icode;
12909 switch (fcode)
12911 case ALTIVEC_BUILTIN_ST_INTERNAL_16qi:
12912 icode = CODE_FOR_vector_altivec_store_v16qi;
12913 break;
12914 case ALTIVEC_BUILTIN_ST_INTERNAL_8hi:
12915 icode = CODE_FOR_vector_altivec_store_v8hi;
12916 break;
12917 case ALTIVEC_BUILTIN_ST_INTERNAL_4si:
12918 icode = CODE_FOR_vector_altivec_store_v4si;
12919 break;
12920 case ALTIVEC_BUILTIN_ST_INTERNAL_4sf:
12921 icode = CODE_FOR_vector_altivec_store_v4sf;
12922 break;
12923 case ALTIVEC_BUILTIN_ST_INTERNAL_2df:
12924 icode = CODE_FOR_vector_altivec_store_v2df;
12925 break;
12926 case ALTIVEC_BUILTIN_ST_INTERNAL_2di:
12927 icode = CODE_FOR_vector_altivec_store_v2di;
12928 case ALTIVEC_BUILTIN_ST_INTERNAL_1ti:
12929 icode = CODE_FOR_vector_altivec_store_v1ti;
12930 break;
12931 default:
12932 *expandedp = false;
12933 return NULL_RTX;
12936 arg0 = CALL_EXPR_ARG (exp, 0);
12937 arg1 = CALL_EXPR_ARG (exp, 1);
12938 op0 = expand_normal (arg0);
12939 op1 = expand_normal (arg1);
12940 mode0 = insn_data[icode].operand[0].mode;
12941 mode1 = insn_data[icode].operand[1].mode;
12943 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12944 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12945 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12946 op1 = copy_to_mode_reg (mode1, op1);
12948 pat = GEN_FCN (icode) (op0, op1);
12949 if (pat)
12950 emit_insn (pat);
12952 *expandedp = true;
12953 return NULL_RTX;
12956 /* Expand the dst builtins. */
12957 static rtx
12958 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
12959 bool *expandedp)
12961 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12962 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
12963 tree arg0, arg1, arg2;
12964 machine_mode mode0, mode1;
12965 rtx pat, op0, op1, op2;
12966 const struct builtin_description *d;
12967 size_t i;
12969 *expandedp = false;
12971 /* Handle DST variants. */
12972 d = bdesc_dst;
12973 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
12974 if (d->code == fcode)
12976 arg0 = CALL_EXPR_ARG (exp, 0);
12977 arg1 = CALL_EXPR_ARG (exp, 1);
12978 arg2 = CALL_EXPR_ARG (exp, 2);
12979 op0 = expand_normal (arg0);
12980 op1 = expand_normal (arg1);
12981 op2 = expand_normal (arg2);
12982 mode0 = insn_data[d->icode].operand[0].mode;
12983 mode1 = insn_data[d->icode].operand[1].mode;
12985 /* Invalid arguments, bail out before generating bad rtl. */
12986 if (arg0 == error_mark_node
12987 || arg1 == error_mark_node
12988 || arg2 == error_mark_node)
12989 return const0_rtx;
12991 *expandedp = true;
12992 STRIP_NOPS (arg2);
12993 if (TREE_CODE (arg2) != INTEGER_CST
12994 || TREE_INT_CST_LOW (arg2) & ~0x3)
12996 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
12997 return const0_rtx;
13000 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13001 op0 = copy_to_mode_reg (Pmode, op0);
13002 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13003 op1 = copy_to_mode_reg (mode1, op1);
13005 pat = GEN_FCN (d->icode) (op0, op1, op2);
13006 if (pat != 0)
13007 emit_insn (pat);
13009 return NULL_RTX;
13012 return NULL_RTX;
13015 /* Expand vec_init builtin. */
13016 static rtx
13017 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
13019 machine_mode tmode = TYPE_MODE (type);
13020 machine_mode inner_mode = GET_MODE_INNER (tmode);
13021 int i, n_elt = GET_MODE_NUNITS (tmode);
13023 gcc_assert (VECTOR_MODE_P (tmode));
13024 gcc_assert (n_elt == call_expr_nargs (exp));
13026 if (!target || !register_operand (target, tmode))
13027 target = gen_reg_rtx (tmode);
13029 /* If we have a vector compromised of a single element, such as V1TImode, do
13030 the initialization directly. */
13031 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
13033 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
13034 emit_move_insn (target, gen_lowpart (tmode, x));
13036 else
13038 rtvec v = rtvec_alloc (n_elt);
13040 for (i = 0; i < n_elt; ++i)
13042 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
13043 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
13046 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
13049 return target;
13052 /* Return the integer constant in ARG. Constrain it to be in the range
13053 of the subparts of VEC_TYPE; issue an error if not. */
13055 static int
13056 get_element_number (tree vec_type, tree arg)
13058 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
13060 if (!tree_fits_uhwi_p (arg)
13061 || (elt = tree_to_uhwi (arg), elt > max))
13063 error ("selector must be an integer constant in the range 0..%wi", max);
13064 return 0;
13067 return elt;
13070 /* Expand vec_set builtin. */
13071 static rtx
13072 altivec_expand_vec_set_builtin (tree exp)
13074 machine_mode tmode, mode1;
13075 tree arg0, arg1, arg2;
13076 int elt;
13077 rtx op0, op1;
13079 arg0 = CALL_EXPR_ARG (exp, 0);
13080 arg1 = CALL_EXPR_ARG (exp, 1);
13081 arg2 = CALL_EXPR_ARG (exp, 2);
13083 tmode = TYPE_MODE (TREE_TYPE (arg0));
13084 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
13085 gcc_assert (VECTOR_MODE_P (tmode));
13087 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
13088 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
13089 elt = get_element_number (TREE_TYPE (arg0), arg2);
13091 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
13092 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
13094 op0 = force_reg (tmode, op0);
13095 op1 = force_reg (mode1, op1);
13097 rs6000_expand_vector_set (op0, op1, elt);
13099 return op0;
13102 /* Expand vec_ext builtin. */
13103 static rtx
13104 altivec_expand_vec_ext_builtin (tree exp, rtx target)
13106 machine_mode tmode, mode0;
13107 tree arg0, arg1;
13108 int elt;
13109 rtx op0;
13111 arg0 = CALL_EXPR_ARG (exp, 0);
13112 arg1 = CALL_EXPR_ARG (exp, 1);
13114 op0 = expand_normal (arg0);
13115 elt = get_element_number (TREE_TYPE (arg0), arg1);
13117 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
13118 mode0 = TYPE_MODE (TREE_TYPE (arg0));
13119 gcc_assert (VECTOR_MODE_P (mode0));
13121 op0 = force_reg (mode0, op0);
13123 if (optimize || !target || !register_operand (target, tmode))
13124 target = gen_reg_rtx (tmode);
13126 rs6000_expand_vector_extract (target, op0, elt);
13128 return target;
13131 /* Expand the builtin in EXP and store the result in TARGET. Store
13132 true in *EXPANDEDP if we found a builtin to expand. */
13133 static rtx
13134 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
13136 const struct builtin_description *d;
13137 size_t i;
13138 enum insn_code icode;
13139 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13140 tree arg0;
13141 rtx op0, pat;
13142 machine_mode tmode, mode0;
13143 enum rs6000_builtins fcode
13144 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13146 if (rs6000_overloaded_builtin_p (fcode))
13148 *expandedp = true;
13149 error ("unresolved overload for Altivec builtin %qF", fndecl);
13151 /* Given it is invalid, just generate a normal call. */
13152 return expand_call (exp, target, false);
13155 target = altivec_expand_ld_builtin (exp, target, expandedp);
13156 if (*expandedp)
13157 return target;
13159 target = altivec_expand_st_builtin (exp, target, expandedp);
13160 if (*expandedp)
13161 return target;
13163 target = altivec_expand_dst_builtin (exp, target, expandedp);
13164 if (*expandedp)
13165 return target;
13167 *expandedp = true;
13169 switch (fcode)
13171 case ALTIVEC_BUILTIN_STVX_V2DF:
13172 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df, exp);
13173 case ALTIVEC_BUILTIN_STVX_V2DI:
13174 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di, exp);
13175 case ALTIVEC_BUILTIN_STVX_V4SF:
13176 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf, exp);
13177 case ALTIVEC_BUILTIN_STVX:
13178 case ALTIVEC_BUILTIN_STVX_V4SI:
13179 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si, exp);
13180 case ALTIVEC_BUILTIN_STVX_V8HI:
13181 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi, exp);
13182 case ALTIVEC_BUILTIN_STVX_V16QI:
13183 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi, exp);
13184 case ALTIVEC_BUILTIN_STVEBX:
13185 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
13186 case ALTIVEC_BUILTIN_STVEHX:
13187 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
13188 case ALTIVEC_BUILTIN_STVEWX:
13189 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
13190 case ALTIVEC_BUILTIN_STVXL_V2DF:
13191 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
13192 case ALTIVEC_BUILTIN_STVXL_V2DI:
13193 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
13194 case ALTIVEC_BUILTIN_STVXL_V4SF:
13195 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
13196 case ALTIVEC_BUILTIN_STVXL:
13197 case ALTIVEC_BUILTIN_STVXL_V4SI:
13198 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
13199 case ALTIVEC_BUILTIN_STVXL_V8HI:
13200 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
13201 case ALTIVEC_BUILTIN_STVXL_V16QI:
13202 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
13204 case ALTIVEC_BUILTIN_STVLX:
13205 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
13206 case ALTIVEC_BUILTIN_STVLXL:
13207 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
13208 case ALTIVEC_BUILTIN_STVRX:
13209 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
13210 case ALTIVEC_BUILTIN_STVRXL:
13211 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
13213 case VSX_BUILTIN_STXVD2X_V1TI:
13214 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
13215 case VSX_BUILTIN_STXVD2X_V2DF:
13216 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
13217 case VSX_BUILTIN_STXVD2X_V2DI:
13218 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
13219 case VSX_BUILTIN_STXVW4X_V4SF:
13220 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
13221 case VSX_BUILTIN_STXVW4X_V4SI:
13222 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
13223 case VSX_BUILTIN_STXVW4X_V8HI:
13224 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
13225 case VSX_BUILTIN_STXVW4X_V16QI:
13226 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
13228 case ALTIVEC_BUILTIN_MFVSCR:
13229 icode = CODE_FOR_altivec_mfvscr;
13230 tmode = insn_data[icode].operand[0].mode;
13232 if (target == 0
13233 || GET_MODE (target) != tmode
13234 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13235 target = gen_reg_rtx (tmode);
13237 pat = GEN_FCN (icode) (target);
13238 if (! pat)
13239 return 0;
13240 emit_insn (pat);
13241 return target;
13243 case ALTIVEC_BUILTIN_MTVSCR:
13244 icode = CODE_FOR_altivec_mtvscr;
13245 arg0 = CALL_EXPR_ARG (exp, 0);
13246 op0 = expand_normal (arg0);
13247 mode0 = insn_data[icode].operand[0].mode;
13249 /* If we got invalid arguments bail out before generating bad rtl. */
13250 if (arg0 == error_mark_node)
13251 return const0_rtx;
13253 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13254 op0 = copy_to_mode_reg (mode0, op0);
13256 pat = GEN_FCN (icode) (op0);
13257 if (pat)
13258 emit_insn (pat);
13259 return NULL_RTX;
13261 case ALTIVEC_BUILTIN_DSSALL:
13262 emit_insn (gen_altivec_dssall ());
13263 return NULL_RTX;
13265 case ALTIVEC_BUILTIN_DSS:
13266 icode = CODE_FOR_altivec_dss;
13267 arg0 = CALL_EXPR_ARG (exp, 0);
13268 STRIP_NOPS (arg0);
13269 op0 = expand_normal (arg0);
13270 mode0 = insn_data[icode].operand[0].mode;
13272 /* If we got invalid arguments bail out before generating bad rtl. */
13273 if (arg0 == error_mark_node)
13274 return const0_rtx;
13276 if (TREE_CODE (arg0) != INTEGER_CST
13277 || TREE_INT_CST_LOW (arg0) & ~0x3)
13279 error ("argument to dss must be a 2-bit unsigned literal");
13280 return const0_rtx;
13283 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13284 op0 = copy_to_mode_reg (mode0, op0);
13286 emit_insn (gen_altivec_dss (op0));
13287 return NULL_RTX;
13289 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
13290 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
13291 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
13292 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
13293 case VSX_BUILTIN_VEC_INIT_V2DF:
13294 case VSX_BUILTIN_VEC_INIT_V2DI:
13295 case VSX_BUILTIN_VEC_INIT_V1TI:
13296 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
13298 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
13299 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
13300 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
13301 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
13302 case VSX_BUILTIN_VEC_SET_V2DF:
13303 case VSX_BUILTIN_VEC_SET_V2DI:
13304 case VSX_BUILTIN_VEC_SET_V1TI:
13305 return altivec_expand_vec_set_builtin (exp);
13307 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
13308 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
13309 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
13310 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
13311 case VSX_BUILTIN_VEC_EXT_V2DF:
13312 case VSX_BUILTIN_VEC_EXT_V2DI:
13313 case VSX_BUILTIN_VEC_EXT_V1TI:
13314 return altivec_expand_vec_ext_builtin (exp, target);
13316 default:
13317 break;
13318 /* Fall through. */
13321 /* Expand abs* operations. */
13322 d = bdesc_abs;
13323 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
13324 if (d->code == fcode)
13325 return altivec_expand_abs_builtin (d->icode, exp, target);
13327 /* Expand the AltiVec predicates. */
13328 d = bdesc_altivec_preds;
13329 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
13330 if (d->code == fcode)
13331 return altivec_expand_predicate_builtin (d->icode, exp, target);
13333 /* LV* are funky. We initialized them differently. */
13334 switch (fcode)
13336 case ALTIVEC_BUILTIN_LVSL:
13337 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
13338 exp, target, false);
13339 case ALTIVEC_BUILTIN_LVSR:
13340 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
13341 exp, target, false);
13342 case ALTIVEC_BUILTIN_LVEBX:
13343 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
13344 exp, target, false);
13345 case ALTIVEC_BUILTIN_LVEHX:
13346 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
13347 exp, target, false);
13348 case ALTIVEC_BUILTIN_LVEWX:
13349 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
13350 exp, target, false);
13351 case ALTIVEC_BUILTIN_LVXL_V2DF:
13352 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
13353 exp, target, false);
13354 case ALTIVEC_BUILTIN_LVXL_V2DI:
13355 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
13356 exp, target, false);
13357 case ALTIVEC_BUILTIN_LVXL_V4SF:
13358 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
13359 exp, target, false);
13360 case ALTIVEC_BUILTIN_LVXL:
13361 case ALTIVEC_BUILTIN_LVXL_V4SI:
13362 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
13363 exp, target, false);
13364 case ALTIVEC_BUILTIN_LVXL_V8HI:
13365 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
13366 exp, target, false);
13367 case ALTIVEC_BUILTIN_LVXL_V16QI:
13368 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
13369 exp, target, false);
13370 case ALTIVEC_BUILTIN_LVX_V2DF:
13371 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df,
13372 exp, target, false);
13373 case ALTIVEC_BUILTIN_LVX_V2DI:
13374 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di,
13375 exp, target, false);
13376 case ALTIVEC_BUILTIN_LVX_V4SF:
13377 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf,
13378 exp, target, false);
13379 case ALTIVEC_BUILTIN_LVX:
13380 case ALTIVEC_BUILTIN_LVX_V4SI:
13381 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si,
13382 exp, target, false);
13383 case ALTIVEC_BUILTIN_LVX_V8HI:
13384 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi,
13385 exp, target, false);
13386 case ALTIVEC_BUILTIN_LVX_V16QI:
13387 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi,
13388 exp, target, false);
13389 case ALTIVEC_BUILTIN_LVLX:
13390 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
13391 exp, target, true);
13392 case ALTIVEC_BUILTIN_LVLXL:
13393 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
13394 exp, target, true);
13395 case ALTIVEC_BUILTIN_LVRX:
13396 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
13397 exp, target, true);
13398 case ALTIVEC_BUILTIN_LVRXL:
13399 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
13400 exp, target, true);
13401 case VSX_BUILTIN_LXVD2X_V1TI:
13402 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
13403 exp, target, false);
13404 case VSX_BUILTIN_LXVD2X_V2DF:
13405 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
13406 exp, target, false);
13407 case VSX_BUILTIN_LXVD2X_V2DI:
13408 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
13409 exp, target, false);
13410 case VSX_BUILTIN_LXVW4X_V4SF:
13411 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
13412 exp, target, false);
13413 case VSX_BUILTIN_LXVW4X_V4SI:
13414 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
13415 exp, target, false);
13416 case VSX_BUILTIN_LXVW4X_V8HI:
13417 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
13418 exp, target, false);
13419 case VSX_BUILTIN_LXVW4X_V16QI:
13420 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
13421 exp, target, false);
13422 break;
13423 default:
13424 break;
13425 /* Fall through. */
13428 *expandedp = false;
13429 return NULL_RTX;
13432 /* Expand the builtin in EXP and store the result in TARGET. Store
13433 true in *EXPANDEDP if we found a builtin to expand. */
13434 static rtx
13435 paired_expand_builtin (tree exp, rtx target, bool * expandedp)
13437 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13438 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13439 const struct builtin_description *d;
13440 size_t i;
13442 *expandedp = true;
13444 switch (fcode)
13446 case PAIRED_BUILTIN_STX:
13447 return paired_expand_stv_builtin (CODE_FOR_paired_stx, exp);
13448 case PAIRED_BUILTIN_LX:
13449 return paired_expand_lv_builtin (CODE_FOR_paired_lx, exp, target);
13450 default:
13451 break;
13452 /* Fall through. */
13455 /* Expand the paired predicates. */
13456 d = bdesc_paired_preds;
13457 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); i++, d++)
13458 if (d->code == fcode)
13459 return paired_expand_predicate_builtin (d->icode, exp, target);
13461 *expandedp = false;
13462 return NULL_RTX;
13465 /* Binops that need to be initialized manually, but can be expanded
13466 automagically by rs6000_expand_binop_builtin. */
13467 static const struct builtin_description bdesc_2arg_spe[] =
13469 { RS6000_BTM_SPE, CODE_FOR_spe_evlddx, "__builtin_spe_evlddx", SPE_BUILTIN_EVLDDX },
13470 { RS6000_BTM_SPE, CODE_FOR_spe_evldwx, "__builtin_spe_evldwx", SPE_BUILTIN_EVLDWX },
13471 { RS6000_BTM_SPE, CODE_FOR_spe_evldhx, "__builtin_spe_evldhx", SPE_BUILTIN_EVLDHX },
13472 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhex, "__builtin_spe_evlwhex", SPE_BUILTIN_EVLWHEX },
13473 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhoux, "__builtin_spe_evlwhoux", SPE_BUILTIN_EVLWHOUX },
13474 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhosx, "__builtin_spe_evlwhosx", SPE_BUILTIN_EVLWHOSX },
13475 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplatx, "__builtin_spe_evlwwsplatx", SPE_BUILTIN_EVLWWSPLATX },
13476 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplatx, "__builtin_spe_evlwhsplatx", SPE_BUILTIN_EVLWHSPLATX },
13477 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplatx, "__builtin_spe_evlhhesplatx", SPE_BUILTIN_EVLHHESPLATX },
13478 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplatx, "__builtin_spe_evlhhousplatx", SPE_BUILTIN_EVLHHOUSPLATX },
13479 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplatx, "__builtin_spe_evlhhossplatx", SPE_BUILTIN_EVLHHOSSPLATX },
13480 { RS6000_BTM_SPE, CODE_FOR_spe_evldd, "__builtin_spe_evldd", SPE_BUILTIN_EVLDD },
13481 { RS6000_BTM_SPE, CODE_FOR_spe_evldw, "__builtin_spe_evldw", SPE_BUILTIN_EVLDW },
13482 { RS6000_BTM_SPE, CODE_FOR_spe_evldh, "__builtin_spe_evldh", SPE_BUILTIN_EVLDH },
13483 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhe, "__builtin_spe_evlwhe", SPE_BUILTIN_EVLWHE },
13484 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhou, "__builtin_spe_evlwhou", SPE_BUILTIN_EVLWHOU },
13485 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhos, "__builtin_spe_evlwhos", SPE_BUILTIN_EVLWHOS },
13486 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplat, "__builtin_spe_evlwwsplat", SPE_BUILTIN_EVLWWSPLAT },
13487 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplat, "__builtin_spe_evlwhsplat", SPE_BUILTIN_EVLWHSPLAT },
13488 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplat, "__builtin_spe_evlhhesplat", SPE_BUILTIN_EVLHHESPLAT },
13489 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplat, "__builtin_spe_evlhhousplat", SPE_BUILTIN_EVLHHOUSPLAT },
13490 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplat, "__builtin_spe_evlhhossplat", SPE_BUILTIN_EVLHHOSSPLAT }
13493 /* Expand the builtin in EXP and store the result in TARGET. Store
13494 true in *EXPANDEDP if we found a builtin to expand.
13496 This expands the SPE builtins that are not simple unary and binary
13497 operations. */
13498 static rtx
13499 spe_expand_builtin (tree exp, rtx target, bool *expandedp)
13501 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13502 tree arg1, arg0;
13503 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13504 enum insn_code icode;
13505 machine_mode tmode, mode0;
13506 rtx pat, op0;
13507 const struct builtin_description *d;
13508 size_t i;
13510 *expandedp = true;
13512 /* Syntax check for a 5-bit unsigned immediate. */
13513 switch (fcode)
13515 case SPE_BUILTIN_EVSTDD:
13516 case SPE_BUILTIN_EVSTDH:
13517 case SPE_BUILTIN_EVSTDW:
13518 case SPE_BUILTIN_EVSTWHE:
13519 case SPE_BUILTIN_EVSTWHO:
13520 case SPE_BUILTIN_EVSTWWE:
13521 case SPE_BUILTIN_EVSTWWO:
13522 arg1 = CALL_EXPR_ARG (exp, 2);
13523 if (TREE_CODE (arg1) != INTEGER_CST
13524 || TREE_INT_CST_LOW (arg1) & ~0x1f)
13526 error ("argument 2 must be a 5-bit unsigned literal");
13527 return const0_rtx;
13529 break;
13530 default:
13531 break;
13534 /* The evsplat*i instructions are not quite generic. */
13535 switch (fcode)
13537 case SPE_BUILTIN_EVSPLATFI:
13538 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplatfi,
13539 exp, target);
13540 case SPE_BUILTIN_EVSPLATI:
13541 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplati,
13542 exp, target);
13543 default:
13544 break;
13547 d = bdesc_2arg_spe;
13548 for (i = 0; i < ARRAY_SIZE (bdesc_2arg_spe); ++i, ++d)
13549 if (d->code == fcode)
13550 return rs6000_expand_binop_builtin (d->icode, exp, target);
13552 d = bdesc_spe_predicates;
13553 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, ++d)
13554 if (d->code == fcode)
13555 return spe_expand_predicate_builtin (d->icode, exp, target);
13557 d = bdesc_spe_evsel;
13558 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, ++d)
13559 if (d->code == fcode)
13560 return spe_expand_evsel_builtin (d->icode, exp, target);
13562 switch (fcode)
13564 case SPE_BUILTIN_EVSTDDX:
13565 return spe_expand_stv_builtin (CODE_FOR_spe_evstddx, exp);
13566 case SPE_BUILTIN_EVSTDHX:
13567 return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx, exp);
13568 case SPE_BUILTIN_EVSTDWX:
13569 return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx, exp);
13570 case SPE_BUILTIN_EVSTWHEX:
13571 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex, exp);
13572 case SPE_BUILTIN_EVSTWHOX:
13573 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox, exp);
13574 case SPE_BUILTIN_EVSTWWEX:
13575 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex, exp);
13576 case SPE_BUILTIN_EVSTWWOX:
13577 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox, exp);
13578 case SPE_BUILTIN_EVSTDD:
13579 return spe_expand_stv_builtin (CODE_FOR_spe_evstdd, exp);
13580 case SPE_BUILTIN_EVSTDH:
13581 return spe_expand_stv_builtin (CODE_FOR_spe_evstdh, exp);
13582 case SPE_BUILTIN_EVSTDW:
13583 return spe_expand_stv_builtin (CODE_FOR_spe_evstdw, exp);
13584 case SPE_BUILTIN_EVSTWHE:
13585 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe, exp);
13586 case SPE_BUILTIN_EVSTWHO:
13587 return spe_expand_stv_builtin (CODE_FOR_spe_evstwho, exp);
13588 case SPE_BUILTIN_EVSTWWE:
13589 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe, exp);
13590 case SPE_BUILTIN_EVSTWWO:
13591 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo, exp);
13592 case SPE_BUILTIN_MFSPEFSCR:
13593 icode = CODE_FOR_spe_mfspefscr;
13594 tmode = insn_data[icode].operand[0].mode;
13596 if (target == 0
13597 || GET_MODE (target) != tmode
13598 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13599 target = gen_reg_rtx (tmode);
13601 pat = GEN_FCN (icode) (target);
13602 if (! pat)
13603 return 0;
13604 emit_insn (pat);
13605 return target;
13606 case SPE_BUILTIN_MTSPEFSCR:
13607 icode = CODE_FOR_spe_mtspefscr;
13608 arg0 = CALL_EXPR_ARG (exp, 0);
13609 op0 = expand_normal (arg0);
13610 mode0 = insn_data[icode].operand[0].mode;
13612 if (arg0 == error_mark_node)
13613 return const0_rtx;
13615 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13616 op0 = copy_to_mode_reg (mode0, op0);
13618 pat = GEN_FCN (icode) (op0);
13619 if (pat)
13620 emit_insn (pat);
13621 return NULL_RTX;
13622 default:
13623 break;
13626 *expandedp = false;
13627 return NULL_RTX;
13630 static rtx
13631 paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
13633 rtx pat, scratch, tmp;
13634 tree form = CALL_EXPR_ARG (exp, 0);
13635 tree arg0 = CALL_EXPR_ARG (exp, 1);
13636 tree arg1 = CALL_EXPR_ARG (exp, 2);
13637 rtx op0 = expand_normal (arg0);
13638 rtx op1 = expand_normal (arg1);
13639 machine_mode mode0 = insn_data[icode].operand[1].mode;
13640 machine_mode mode1 = insn_data[icode].operand[2].mode;
13641 int form_int;
13642 enum rtx_code code;
13644 if (TREE_CODE (form) != INTEGER_CST)
13646 error ("argument 1 of __builtin_paired_predicate must be a constant");
13647 return const0_rtx;
13649 else
13650 form_int = TREE_INT_CST_LOW (form);
13652 gcc_assert (mode0 == mode1);
13654 if (arg0 == error_mark_node || arg1 == error_mark_node)
13655 return const0_rtx;
13657 if (target == 0
13658 || GET_MODE (target) != SImode
13659 || !(*insn_data[icode].operand[0].predicate) (target, SImode))
13660 target = gen_reg_rtx (SImode);
13661 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
13662 op0 = copy_to_mode_reg (mode0, op0);
13663 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
13664 op1 = copy_to_mode_reg (mode1, op1);
13666 scratch = gen_reg_rtx (CCFPmode);
13668 pat = GEN_FCN (icode) (scratch, op0, op1);
13669 if (!pat)
13670 return const0_rtx;
13672 emit_insn (pat);
13674 switch (form_int)
13676 /* LT bit. */
13677 case 0:
13678 code = LT;
13679 break;
13680 /* GT bit. */
13681 case 1:
13682 code = GT;
13683 break;
13684 /* EQ bit. */
13685 case 2:
13686 code = EQ;
13687 break;
13688 /* UN bit. */
13689 case 3:
13690 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
13691 return target;
13692 default:
13693 error ("argument 1 of __builtin_paired_predicate is out of range");
13694 return const0_rtx;
13697 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
13698 emit_move_insn (target, tmp);
13699 return target;
13702 static rtx
13703 spe_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
13705 rtx pat, scratch, tmp;
13706 tree form = CALL_EXPR_ARG (exp, 0);
13707 tree arg0 = CALL_EXPR_ARG (exp, 1);
13708 tree arg1 = CALL_EXPR_ARG (exp, 2);
13709 rtx op0 = expand_normal (arg0);
13710 rtx op1 = expand_normal (arg1);
13711 machine_mode mode0 = insn_data[icode].operand[1].mode;
13712 machine_mode mode1 = insn_data[icode].operand[2].mode;
13713 int form_int;
13714 enum rtx_code code;
13716 if (TREE_CODE (form) != INTEGER_CST)
13718 error ("argument 1 of __builtin_spe_predicate must be a constant");
13719 return const0_rtx;
13721 else
13722 form_int = TREE_INT_CST_LOW (form);
13724 gcc_assert (mode0 == mode1);
13726 if (arg0 == error_mark_node || arg1 == error_mark_node)
13727 return const0_rtx;
13729 if (target == 0
13730 || GET_MODE (target) != SImode
13731 || ! (*insn_data[icode].operand[0].predicate) (target, SImode))
13732 target = gen_reg_rtx (SImode);
13734 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13735 op0 = copy_to_mode_reg (mode0, op0);
13736 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13737 op1 = copy_to_mode_reg (mode1, op1);
13739 scratch = gen_reg_rtx (CCmode);
13741 pat = GEN_FCN (icode) (scratch, op0, op1);
13742 if (! pat)
13743 return const0_rtx;
13744 emit_insn (pat);
13746 /* There are 4 variants for each predicate: _any_, _all_, _upper_,
13747 _lower_. We use one compare, but look in different bits of the
13748 CR for each variant.
13750 There are 2 elements in each SPE simd type (upper/lower). The CR
13751 bits are set as follows:
13753 BIT0 | BIT 1 | BIT 2 | BIT 3
13754 U | L | (U | L) | (U & L)
13756 So, for an "all" relationship, BIT 3 would be set.
13757 For an "any" relationship, BIT 2 would be set. Etc.
13759 Following traditional nomenclature, these bits map to:
13761 BIT0 | BIT 1 | BIT 2 | BIT 3
13762 LT | GT | EQ | OV
13764 Later, we will generate rtl to look in the LT/EQ/EQ/OV bits.
13767 switch (form_int)
13769 /* All variant. OV bit. */
13770 case 0:
13771 /* We need to get to the OV bit, which is the ORDERED bit. We
13772 could generate (ordered:SI (reg:CC xx) (const_int 0)), but
13773 that's ugly and will make validate_condition_mode die.
13774 So let's just use another pattern. */
13775 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
13776 return target;
13777 /* Any variant. EQ bit. */
13778 case 1:
13779 code = EQ;
13780 break;
13781 /* Upper variant. LT bit. */
13782 case 2:
13783 code = LT;
13784 break;
13785 /* Lower variant. GT bit. */
13786 case 3:
13787 code = GT;
13788 break;
13789 default:
13790 error ("argument 1 of __builtin_spe_predicate is out of range");
13791 return const0_rtx;
13794 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
13795 emit_move_insn (target, tmp);
13797 return target;
13800 /* The evsel builtins look like this:
13802 e = __builtin_spe_evsel_OP (a, b, c, d);
13804 and work like this:
13806 e[upper] = a[upper] *OP* b[upper] ? c[upper] : d[upper];
13807 e[lower] = a[lower] *OP* b[lower] ? c[lower] : d[lower];
13810 static rtx
13811 spe_expand_evsel_builtin (enum insn_code icode, tree exp, rtx target)
13813 rtx pat, scratch;
13814 tree arg0 = CALL_EXPR_ARG (exp, 0);
13815 tree arg1 = CALL_EXPR_ARG (exp, 1);
13816 tree arg2 = CALL_EXPR_ARG (exp, 2);
13817 tree arg3 = CALL_EXPR_ARG (exp, 3);
13818 rtx op0 = expand_normal (arg0);
13819 rtx op1 = expand_normal (arg1);
13820 rtx op2 = expand_normal (arg2);
13821 rtx op3 = expand_normal (arg3);
13822 machine_mode mode0 = insn_data[icode].operand[1].mode;
13823 machine_mode mode1 = insn_data[icode].operand[2].mode;
13825 gcc_assert (mode0 == mode1);
13827 if (arg0 == error_mark_node || arg1 == error_mark_node
13828 || arg2 == error_mark_node || arg3 == error_mark_node)
13829 return const0_rtx;
13831 if (target == 0
13832 || GET_MODE (target) != mode0
13833 || ! (*insn_data[icode].operand[0].predicate) (target, mode0))
13834 target = gen_reg_rtx (mode0);
13836 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13837 op0 = copy_to_mode_reg (mode0, op0);
13838 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13839 op1 = copy_to_mode_reg (mode0, op1);
13840 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
13841 op2 = copy_to_mode_reg (mode0, op2);
13842 if (! (*insn_data[icode].operand[1].predicate) (op3, mode1))
13843 op3 = copy_to_mode_reg (mode0, op3);
13845 /* Generate the compare. */
13846 scratch = gen_reg_rtx (CCmode);
13847 pat = GEN_FCN (icode) (scratch, op0, op1);
13848 if (! pat)
13849 return const0_rtx;
13850 emit_insn (pat);
13852 if (mode0 == V2SImode)
13853 emit_insn (gen_spe_evsel (target, op2, op3, scratch));
13854 else
13855 emit_insn (gen_spe_evsel_fs (target, op2, op3, scratch));
13857 return target;
13860 /* Raise an error message for a builtin function that is called without the
13861 appropriate target options being set. */
13863 static void
13864 rs6000_invalid_builtin (enum rs6000_builtins fncode)
13866 size_t uns_fncode = (size_t)fncode;
13867 const char *name = rs6000_builtin_info[uns_fncode].name;
13868 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
13870 gcc_assert (name != NULL);
13871 if ((fnmask & RS6000_BTM_CELL) != 0)
13872 error ("Builtin function %s is only valid for the cell processor", name);
13873 else if ((fnmask & RS6000_BTM_VSX) != 0)
13874 error ("Builtin function %s requires the -mvsx option", name);
13875 else if ((fnmask & RS6000_BTM_HTM) != 0)
13876 error ("Builtin function %s requires the -mhtm option", name);
13877 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
13878 error ("Builtin function %s requires the -maltivec option", name);
13879 else if ((fnmask & RS6000_BTM_PAIRED) != 0)
13880 error ("Builtin function %s requires the -mpaired option", name);
13881 else if ((fnmask & RS6000_BTM_SPE) != 0)
13882 error ("Builtin function %s requires the -mspe option", name);
13883 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
13884 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
13885 error ("Builtin function %s requires the -mhard-dfp and"
13886 " -mpower8-vector options", name);
13887 else if ((fnmask & RS6000_BTM_DFP) != 0)
13888 error ("Builtin function %s requires the -mhard-dfp option", name);
13889 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
13890 error ("Builtin function %s requires the -mpower8-vector option", name);
13891 else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
13892 == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
13893 error ("Builtin function %s requires the -mhard-float and"
13894 " -mlong-double-128 options", name);
13895 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
13896 error ("Builtin function %s requires the -mhard-float option", name);
13897 else
13898 error ("Builtin function %s is not supported with the current options",
13899 name);
13902 /* Expand an expression EXP that calls a built-in function,
13903 with result going to TARGET if that's convenient
13904 (and in mode MODE if that's convenient).
13905 SUBTARGET may be used as the target for computing one of EXP's operands.
13906 IGNORE is nonzero if the value is to be ignored. */
13908 static rtx
13909 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13910 machine_mode mode ATTRIBUTE_UNUSED,
13911 int ignore ATTRIBUTE_UNUSED)
13913 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13914 enum rs6000_builtins fcode
13915 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
13916 size_t uns_fcode = (size_t)fcode;
13917 const struct builtin_description *d;
13918 size_t i;
13919 rtx ret;
13920 bool success;
13921 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
13922 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
13924 if (TARGET_DEBUG_BUILTIN)
13926 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
13927 const char *name1 = rs6000_builtin_info[uns_fcode].name;
13928 const char *name2 = ((icode != CODE_FOR_nothing)
13929 ? get_insn_name ((int)icode)
13930 : "nothing");
13931 const char *name3;
13933 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
13935 default: name3 = "unknown"; break;
13936 case RS6000_BTC_SPECIAL: name3 = "special"; break;
13937 case RS6000_BTC_UNARY: name3 = "unary"; break;
13938 case RS6000_BTC_BINARY: name3 = "binary"; break;
13939 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
13940 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
13941 case RS6000_BTC_ABS: name3 = "abs"; break;
13942 case RS6000_BTC_EVSEL: name3 = "evsel"; break;
13943 case RS6000_BTC_DST: name3 = "dst"; break;
13947 fprintf (stderr,
13948 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
13949 (name1) ? name1 : "---", fcode,
13950 (name2) ? name2 : "---", (int)icode,
13951 name3,
13952 func_valid_p ? "" : ", not valid");
13955 if (!func_valid_p)
13957 rs6000_invalid_builtin (fcode);
13959 /* Given it is invalid, just generate a normal call. */
13960 return expand_call (exp, target, ignore);
13963 switch (fcode)
13965 case RS6000_BUILTIN_RECIP:
13966 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
13968 case RS6000_BUILTIN_RECIPF:
13969 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
13971 case RS6000_BUILTIN_RSQRTF:
13972 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
13974 case RS6000_BUILTIN_RSQRT:
13975 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
13977 case POWER7_BUILTIN_BPERMD:
13978 return rs6000_expand_binop_builtin (((TARGET_64BIT)
13979 ? CODE_FOR_bpermd_di
13980 : CODE_FOR_bpermd_si), exp, target);
13982 case RS6000_BUILTIN_GET_TB:
13983 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
13984 target);
13986 case RS6000_BUILTIN_MFTB:
13987 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
13988 ? CODE_FOR_rs6000_mftb_di
13989 : CODE_FOR_rs6000_mftb_si),
13990 target);
13992 case RS6000_BUILTIN_MFFS:
13993 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
13995 case RS6000_BUILTIN_MTFSF:
13996 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
13998 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
13999 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
14001 int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
14002 : (int) CODE_FOR_altivec_lvsl_direct);
14003 machine_mode tmode = insn_data[icode].operand[0].mode;
14004 machine_mode mode = insn_data[icode].operand[1].mode;
14005 tree arg;
14006 rtx op, addr, pat;
14008 gcc_assert (TARGET_ALTIVEC);
14010 arg = CALL_EXPR_ARG (exp, 0);
14011 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
14012 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
14013 addr = memory_address (mode, op);
14014 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
14015 op = addr;
14016 else
14018 /* For the load case need to negate the address. */
14019 op = gen_reg_rtx (GET_MODE (addr));
14020 emit_insn (gen_rtx_SET (VOIDmode, op,
14021 gen_rtx_NEG (GET_MODE (addr), addr)));
14023 op = gen_rtx_MEM (mode, op);
14025 if (target == 0
14026 || GET_MODE (target) != tmode
14027 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14028 target = gen_reg_rtx (tmode);
14030 pat = GEN_FCN (icode) (target, op);
14031 if (!pat)
14032 return 0;
14033 emit_insn (pat);
14035 return target;
14038 case ALTIVEC_BUILTIN_VCFUX:
14039 case ALTIVEC_BUILTIN_VCFSX:
14040 case ALTIVEC_BUILTIN_VCTUXS:
14041 case ALTIVEC_BUILTIN_VCTSXS:
14042 /* FIXME: There's got to be a nicer way to handle this case than
14043 constructing a new CALL_EXPR. */
14044 if (call_expr_nargs (exp) == 1)
14046 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
14047 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
14049 break;
14051 default:
14052 break;
14055 if (TARGET_ALTIVEC)
14057 ret = altivec_expand_builtin (exp, target, &success);
14059 if (success)
14060 return ret;
14062 if (TARGET_SPE)
14064 ret = spe_expand_builtin (exp, target, &success);
14066 if (success)
14067 return ret;
14069 if (TARGET_PAIRED_FLOAT)
14071 ret = paired_expand_builtin (exp, target, &success);
14073 if (success)
14074 return ret;
14076 if (TARGET_HTM)
14078 ret = htm_expand_builtin (exp, target, &success);
14080 if (success)
14081 return ret;
14084 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
14085 gcc_assert (attr == RS6000_BTC_UNARY
14086 || attr == RS6000_BTC_BINARY
14087 || attr == RS6000_BTC_TERNARY);
14089 /* Handle simple unary operations. */
14090 d = bdesc_1arg;
14091 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14092 if (d->code == fcode)
14093 return rs6000_expand_unop_builtin (d->icode, exp, target);
14095 /* Handle simple binary operations. */
14096 d = bdesc_2arg;
14097 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14098 if (d->code == fcode)
14099 return rs6000_expand_binop_builtin (d->icode, exp, target);
14101 /* Handle simple ternary operations. */
14102 d = bdesc_3arg;
14103 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
14104 if (d->code == fcode)
14105 return rs6000_expand_ternop_builtin (d->icode, exp, target);
14107 gcc_unreachable ();
14110 static void
14111 rs6000_init_builtins (void)
14113 tree tdecl;
14114 tree ftype;
14115 machine_mode mode;
14117 if (TARGET_DEBUG_BUILTIN)
14118 fprintf (stderr, "rs6000_init_builtins%s%s%s%s\n",
14119 (TARGET_PAIRED_FLOAT) ? ", paired" : "",
14120 (TARGET_SPE) ? ", spe" : "",
14121 (TARGET_ALTIVEC) ? ", altivec" : "",
14122 (TARGET_VSX) ? ", vsx" : "");
14124 V2SI_type_node = build_vector_type (intSI_type_node, 2);
14125 V2SF_type_node = build_vector_type (float_type_node, 2);
14126 V2DI_type_node = build_vector_type (intDI_type_node, 2);
14127 V2DF_type_node = build_vector_type (double_type_node, 2);
14128 V4HI_type_node = build_vector_type (intHI_type_node, 4);
14129 V4SI_type_node = build_vector_type (intSI_type_node, 4);
14130 V4SF_type_node = build_vector_type (float_type_node, 4);
14131 V8HI_type_node = build_vector_type (intHI_type_node, 8);
14132 V16QI_type_node = build_vector_type (intQI_type_node, 16);
14134 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
14135 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
14136 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
14137 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
14139 opaque_V2SF_type_node = build_opaque_vector_type (float_type_node, 2);
14140 opaque_V2SI_type_node = build_opaque_vector_type (intSI_type_node, 2);
14141 opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node);
14142 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
14144 /* We use V1TI mode as a special container to hold __int128_t items that
14145 must live in VSX registers. */
14146 if (intTI_type_node)
14148 V1TI_type_node = build_vector_type (intTI_type_node, 1);
14149 unsigned_V1TI_type_node = build_vector_type (unsigned_intTI_type_node, 1);
14152 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
14153 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
14154 'vector unsigned short'. */
14156 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
14157 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
14158 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
14159 bool_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
14160 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
14162 long_integer_type_internal_node = long_integer_type_node;
14163 long_unsigned_type_internal_node = long_unsigned_type_node;
14164 long_long_integer_type_internal_node = long_long_integer_type_node;
14165 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
14166 intQI_type_internal_node = intQI_type_node;
14167 uintQI_type_internal_node = unsigned_intQI_type_node;
14168 intHI_type_internal_node = intHI_type_node;
14169 uintHI_type_internal_node = unsigned_intHI_type_node;
14170 intSI_type_internal_node = intSI_type_node;
14171 uintSI_type_internal_node = unsigned_intSI_type_node;
14172 intDI_type_internal_node = intDI_type_node;
14173 uintDI_type_internal_node = unsigned_intDI_type_node;
14174 intTI_type_internal_node = intTI_type_node;
14175 uintTI_type_internal_node = unsigned_intTI_type_node;
14176 float_type_internal_node = float_type_node;
14177 double_type_internal_node = double_type_node;
14178 long_double_type_internal_node = long_double_type_node;
14179 dfloat64_type_internal_node = dfloat64_type_node;
14180 dfloat128_type_internal_node = dfloat128_type_node;
14181 void_type_internal_node = void_type_node;
14183 /* Initialize the modes for builtin_function_type, mapping a machine mode to
14184 tree type node. */
14185 builtin_mode_to_type[QImode][0] = integer_type_node;
14186 builtin_mode_to_type[HImode][0] = integer_type_node;
14187 builtin_mode_to_type[SImode][0] = intSI_type_node;
14188 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
14189 builtin_mode_to_type[DImode][0] = intDI_type_node;
14190 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
14191 builtin_mode_to_type[TImode][0] = intTI_type_node;
14192 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
14193 builtin_mode_to_type[SFmode][0] = float_type_node;
14194 builtin_mode_to_type[DFmode][0] = double_type_node;
14195 builtin_mode_to_type[TFmode][0] = long_double_type_node;
14196 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
14197 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
14198 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
14199 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
14200 builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
14201 builtin_mode_to_type[V2SFmode][0] = V2SF_type_node;
14202 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
14203 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
14204 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
14205 builtin_mode_to_type[V4HImode][0] = V4HI_type_node;
14206 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
14207 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
14208 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
14209 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
14210 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
14211 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
14212 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
14214 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
14215 TYPE_NAME (bool_char_type_node) = tdecl;
14217 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
14218 TYPE_NAME (bool_short_type_node) = tdecl;
14220 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
14221 TYPE_NAME (bool_int_type_node) = tdecl;
14223 tdecl = add_builtin_type ("__pixel", pixel_type_node);
14224 TYPE_NAME (pixel_type_node) = tdecl;
14226 bool_V16QI_type_node = build_vector_type (bool_char_type_node, 16);
14227 bool_V8HI_type_node = build_vector_type (bool_short_type_node, 8);
14228 bool_V4SI_type_node = build_vector_type (bool_int_type_node, 4);
14229 bool_V2DI_type_node = build_vector_type (bool_long_type_node, 2);
14230 pixel_V8HI_type_node = build_vector_type (pixel_type_node, 8);
14232 tdecl = add_builtin_type ("__vector unsigned char", unsigned_V16QI_type_node);
14233 TYPE_NAME (unsigned_V16QI_type_node) = tdecl;
14235 tdecl = add_builtin_type ("__vector signed char", V16QI_type_node);
14236 TYPE_NAME (V16QI_type_node) = tdecl;
14238 tdecl = add_builtin_type ("__vector __bool char", bool_V16QI_type_node);
14239 TYPE_NAME ( bool_V16QI_type_node) = tdecl;
14241 tdecl = add_builtin_type ("__vector unsigned short", unsigned_V8HI_type_node);
14242 TYPE_NAME (unsigned_V8HI_type_node) = tdecl;
14244 tdecl = add_builtin_type ("__vector signed short", V8HI_type_node);
14245 TYPE_NAME (V8HI_type_node) = tdecl;
14247 tdecl = add_builtin_type ("__vector __bool short", bool_V8HI_type_node);
14248 TYPE_NAME (bool_V8HI_type_node) = tdecl;
14250 tdecl = add_builtin_type ("__vector unsigned int", unsigned_V4SI_type_node);
14251 TYPE_NAME (unsigned_V4SI_type_node) = tdecl;
14253 tdecl = add_builtin_type ("__vector signed int", V4SI_type_node);
14254 TYPE_NAME (V4SI_type_node) = tdecl;
14256 tdecl = add_builtin_type ("__vector __bool int", bool_V4SI_type_node);
14257 TYPE_NAME (bool_V4SI_type_node) = tdecl;
14259 tdecl = add_builtin_type ("__vector float", V4SF_type_node);
14260 TYPE_NAME (V4SF_type_node) = tdecl;
14262 tdecl = add_builtin_type ("__vector __pixel", pixel_V8HI_type_node);
14263 TYPE_NAME (pixel_V8HI_type_node) = tdecl;
14265 tdecl = add_builtin_type ("__vector double", V2DF_type_node);
14266 TYPE_NAME (V2DF_type_node) = tdecl;
14268 if (TARGET_POWERPC64)
14270 tdecl = add_builtin_type ("__vector long", V2DI_type_node);
14271 TYPE_NAME (V2DI_type_node) = tdecl;
14273 tdecl = add_builtin_type ("__vector unsigned long",
14274 unsigned_V2DI_type_node);
14275 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
14277 tdecl = add_builtin_type ("__vector __bool long", bool_V2DI_type_node);
14278 TYPE_NAME (bool_V2DI_type_node) = tdecl;
14280 else
14282 tdecl = add_builtin_type ("__vector long long", V2DI_type_node);
14283 TYPE_NAME (V2DI_type_node) = tdecl;
14285 tdecl = add_builtin_type ("__vector unsigned long long",
14286 unsigned_V2DI_type_node);
14287 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
14289 tdecl = add_builtin_type ("__vector __bool long long",
14290 bool_V2DI_type_node);
14291 TYPE_NAME (bool_V2DI_type_node) = tdecl;
14294 if (V1TI_type_node)
14296 tdecl = add_builtin_type ("__vector __int128", V1TI_type_node);
14297 TYPE_NAME (V1TI_type_node) = tdecl;
14299 tdecl = add_builtin_type ("__vector unsigned __int128",
14300 unsigned_V1TI_type_node);
14301 TYPE_NAME (unsigned_V1TI_type_node) = tdecl;
14304 /* Paired and SPE builtins are only available if you build a compiler with
14305 the appropriate options, so only create those builtins with the
14306 appropriate compiler option. Create Altivec and VSX builtins on machines
14307 with at least the general purpose extensions (970 and newer) to allow the
14308 use of the target attribute. */
14309 if (TARGET_PAIRED_FLOAT)
14310 paired_init_builtins ();
14311 if (TARGET_SPE)
14312 spe_init_builtins ();
14313 if (TARGET_EXTRA_BUILTINS)
14314 altivec_init_builtins ();
14315 if (TARGET_HTM)
14316 htm_init_builtins ();
14318 if (TARGET_EXTRA_BUILTINS || TARGET_SPE || TARGET_PAIRED_FLOAT)
14319 rs6000_common_init_builtins ();
14321 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
14322 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
14323 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
14325 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
14326 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
14327 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
14329 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
14330 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
14331 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
14333 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
14334 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
14335 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
14337 mode = (TARGET_64BIT) ? DImode : SImode;
14338 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
14339 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
14340 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
14342 ftype = build_function_type_list (unsigned_intDI_type_node,
14343 NULL_TREE);
14344 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
14346 if (TARGET_64BIT)
14347 ftype = build_function_type_list (unsigned_intDI_type_node,
14348 NULL_TREE);
14349 else
14350 ftype = build_function_type_list (unsigned_intSI_type_node,
14351 NULL_TREE);
14352 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
14354 ftype = build_function_type_list (double_type_node, NULL_TREE);
14355 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
14357 ftype = build_function_type_list (void_type_node,
14358 intSI_type_node, double_type_node,
14359 NULL_TREE);
14360 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
14362 #if TARGET_XCOFF
14363 /* AIX libm provides clog as __clog. */
14364 if ((tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
14365 set_user_assembler_name (tdecl, "__clog");
14366 #endif
14368 #ifdef SUBTARGET_INIT_BUILTINS
14369 SUBTARGET_INIT_BUILTINS;
14370 #endif
14373 /* Returns the rs6000 builtin decl for CODE. */
14375 static tree
14376 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
14378 HOST_WIDE_INT fnmask;
14380 if (code >= RS6000_BUILTIN_COUNT)
14381 return error_mark_node;
14383 fnmask = rs6000_builtin_info[code].mask;
14384 if ((fnmask & rs6000_builtin_mask) != fnmask)
14386 rs6000_invalid_builtin ((enum rs6000_builtins)code);
14387 return error_mark_node;
14390 return rs6000_builtin_decls[code];
14393 static void
14394 spe_init_builtins (void)
14396 tree puint_type_node = build_pointer_type (unsigned_type_node);
14397 tree pushort_type_node = build_pointer_type (short_unsigned_type_node);
14398 const struct builtin_description *d;
14399 size_t i;
14401 tree v2si_ftype_4_v2si
14402 = build_function_type_list (opaque_V2SI_type_node,
14403 opaque_V2SI_type_node,
14404 opaque_V2SI_type_node,
14405 opaque_V2SI_type_node,
14406 opaque_V2SI_type_node,
14407 NULL_TREE);
14409 tree v2sf_ftype_4_v2sf
14410 = build_function_type_list (opaque_V2SF_type_node,
14411 opaque_V2SF_type_node,
14412 opaque_V2SF_type_node,
14413 opaque_V2SF_type_node,
14414 opaque_V2SF_type_node,
14415 NULL_TREE);
14417 tree int_ftype_int_v2si_v2si
14418 = build_function_type_list (integer_type_node,
14419 integer_type_node,
14420 opaque_V2SI_type_node,
14421 opaque_V2SI_type_node,
14422 NULL_TREE);
14424 tree int_ftype_int_v2sf_v2sf
14425 = build_function_type_list (integer_type_node,
14426 integer_type_node,
14427 opaque_V2SF_type_node,
14428 opaque_V2SF_type_node,
14429 NULL_TREE);
14431 tree void_ftype_v2si_puint_int
14432 = build_function_type_list (void_type_node,
14433 opaque_V2SI_type_node,
14434 puint_type_node,
14435 integer_type_node,
14436 NULL_TREE);
14438 tree void_ftype_v2si_puint_char
14439 = build_function_type_list (void_type_node,
14440 opaque_V2SI_type_node,
14441 puint_type_node,
14442 char_type_node,
14443 NULL_TREE);
14445 tree void_ftype_v2si_pv2si_int
14446 = build_function_type_list (void_type_node,
14447 opaque_V2SI_type_node,
14448 opaque_p_V2SI_type_node,
14449 integer_type_node,
14450 NULL_TREE);
14452 tree void_ftype_v2si_pv2si_char
14453 = build_function_type_list (void_type_node,
14454 opaque_V2SI_type_node,
14455 opaque_p_V2SI_type_node,
14456 char_type_node,
14457 NULL_TREE);
14459 tree void_ftype_int
14460 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
14462 tree int_ftype_void
14463 = build_function_type_list (integer_type_node, NULL_TREE);
14465 tree v2si_ftype_pv2si_int
14466 = build_function_type_list (opaque_V2SI_type_node,
14467 opaque_p_V2SI_type_node,
14468 integer_type_node,
14469 NULL_TREE);
14471 tree v2si_ftype_puint_int
14472 = build_function_type_list (opaque_V2SI_type_node,
14473 puint_type_node,
14474 integer_type_node,
14475 NULL_TREE);
14477 tree v2si_ftype_pushort_int
14478 = build_function_type_list (opaque_V2SI_type_node,
14479 pushort_type_node,
14480 integer_type_node,
14481 NULL_TREE);
14483 tree v2si_ftype_signed_char
14484 = build_function_type_list (opaque_V2SI_type_node,
14485 signed_char_type_node,
14486 NULL_TREE);
14488 add_builtin_type ("__ev64_opaque__", opaque_V2SI_type_node);
14490 /* Initialize irregular SPE builtins. */
14492 def_builtin ("__builtin_spe_mtspefscr", void_ftype_int, SPE_BUILTIN_MTSPEFSCR);
14493 def_builtin ("__builtin_spe_mfspefscr", int_ftype_void, SPE_BUILTIN_MFSPEFSCR);
14494 def_builtin ("__builtin_spe_evstddx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDDX);
14495 def_builtin ("__builtin_spe_evstdhx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDHX);
14496 def_builtin ("__builtin_spe_evstdwx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDWX);
14497 def_builtin ("__builtin_spe_evstwhex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHEX);
14498 def_builtin ("__builtin_spe_evstwhox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHOX);
14499 def_builtin ("__builtin_spe_evstwwex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWEX);
14500 def_builtin ("__builtin_spe_evstwwox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWOX);
14501 def_builtin ("__builtin_spe_evstdd", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDD);
14502 def_builtin ("__builtin_spe_evstdh", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDH);
14503 def_builtin ("__builtin_spe_evstdw", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDW);
14504 def_builtin ("__builtin_spe_evstwhe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHE);
14505 def_builtin ("__builtin_spe_evstwho", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHO);
14506 def_builtin ("__builtin_spe_evstwwe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWE);
14507 def_builtin ("__builtin_spe_evstwwo", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWO);
14508 def_builtin ("__builtin_spe_evsplatfi", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATFI);
14509 def_builtin ("__builtin_spe_evsplati", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATI);
14511 /* Loads. */
14512 def_builtin ("__builtin_spe_evlddx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDDX);
14513 def_builtin ("__builtin_spe_evldwx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDWX);
14514 def_builtin ("__builtin_spe_evldhx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDHX);
14515 def_builtin ("__builtin_spe_evlwhex", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHEX);
14516 def_builtin ("__builtin_spe_evlwhoux", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOUX);
14517 def_builtin ("__builtin_spe_evlwhosx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOSX);
14518 def_builtin ("__builtin_spe_evlwwsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLATX);
14519 def_builtin ("__builtin_spe_evlwhsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLATX);
14520 def_builtin ("__builtin_spe_evlhhesplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLATX);
14521 def_builtin ("__builtin_spe_evlhhousplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLATX);
14522 def_builtin ("__builtin_spe_evlhhossplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLATX);
14523 def_builtin ("__builtin_spe_evldd", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDD);
14524 def_builtin ("__builtin_spe_evldw", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDW);
14525 def_builtin ("__builtin_spe_evldh", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDH);
14526 def_builtin ("__builtin_spe_evlhhesplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLAT);
14527 def_builtin ("__builtin_spe_evlhhossplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLAT);
14528 def_builtin ("__builtin_spe_evlhhousplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLAT);
14529 def_builtin ("__builtin_spe_evlwhe", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHE);
14530 def_builtin ("__builtin_spe_evlwhos", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOS);
14531 def_builtin ("__builtin_spe_evlwhou", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOU);
14532 def_builtin ("__builtin_spe_evlwhsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLAT);
14533 def_builtin ("__builtin_spe_evlwwsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLAT);
14535 /* Predicates. */
14536 d = bdesc_spe_predicates;
14537 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, d++)
14539 tree type;
14541 switch (insn_data[d->icode].operand[1].mode)
14543 case V2SImode:
14544 type = int_ftype_int_v2si_v2si;
14545 break;
14546 case V2SFmode:
14547 type = int_ftype_int_v2sf_v2sf;
14548 break;
14549 default:
14550 gcc_unreachable ();
14553 def_builtin (d->name, type, d->code);
14556 /* Evsel predicates. */
14557 d = bdesc_spe_evsel;
14558 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, d++)
14560 tree type;
14562 switch (insn_data[d->icode].operand[1].mode)
14564 case V2SImode:
14565 type = v2si_ftype_4_v2si;
14566 break;
14567 case V2SFmode:
14568 type = v2sf_ftype_4_v2sf;
14569 break;
14570 default:
14571 gcc_unreachable ();
14574 def_builtin (d->name, type, d->code);
14578 static void
14579 paired_init_builtins (void)
14581 const struct builtin_description *d;
14582 size_t i;
14584 tree int_ftype_int_v2sf_v2sf
14585 = build_function_type_list (integer_type_node,
14586 integer_type_node,
14587 V2SF_type_node,
14588 V2SF_type_node,
14589 NULL_TREE);
14590 tree pcfloat_type_node =
14591 build_pointer_type (build_qualified_type
14592 (float_type_node, TYPE_QUAL_CONST));
14594 tree v2sf_ftype_long_pcfloat = build_function_type_list (V2SF_type_node,
14595 long_integer_type_node,
14596 pcfloat_type_node,
14597 NULL_TREE);
14598 tree void_ftype_v2sf_long_pcfloat =
14599 build_function_type_list (void_type_node,
14600 V2SF_type_node,
14601 long_integer_type_node,
14602 pcfloat_type_node,
14603 NULL_TREE);
14606 def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat,
14607 PAIRED_BUILTIN_LX);
14610 def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat,
14611 PAIRED_BUILTIN_STX);
14613 /* Predicates. */
14614 d = bdesc_paired_preds;
14615 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); ++i, d++)
14617 tree type;
14619 if (TARGET_DEBUG_BUILTIN)
14620 fprintf (stderr, "paired pred #%d, insn = %s [%d], mode = %s\n",
14621 (int)i, get_insn_name (d->icode), (int)d->icode,
14622 GET_MODE_NAME (insn_data[d->icode].operand[1].mode));
14624 switch (insn_data[d->icode].operand[1].mode)
14626 case V2SFmode:
14627 type = int_ftype_int_v2sf_v2sf;
14628 break;
14629 default:
14630 gcc_unreachable ();
14633 def_builtin (d->name, type, d->code);
14637 static void
14638 altivec_init_builtins (void)
14640 const struct builtin_description *d;
14641 size_t i;
14642 tree ftype;
14643 tree decl;
14645 tree pvoid_type_node = build_pointer_type (void_type_node);
14647 tree pcvoid_type_node
14648 = build_pointer_type (build_qualified_type (void_type_node,
14649 TYPE_QUAL_CONST));
14651 tree int_ftype_opaque
14652 = build_function_type_list (integer_type_node,
14653 opaque_V4SI_type_node, NULL_TREE);
14654 tree opaque_ftype_opaque
14655 = build_function_type_list (integer_type_node, NULL_TREE);
14656 tree opaque_ftype_opaque_int
14657 = build_function_type_list (opaque_V4SI_type_node,
14658 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
14659 tree opaque_ftype_opaque_opaque_int
14660 = build_function_type_list (opaque_V4SI_type_node,
14661 opaque_V4SI_type_node, opaque_V4SI_type_node,
14662 integer_type_node, NULL_TREE);
14663 tree int_ftype_int_opaque_opaque
14664 = build_function_type_list (integer_type_node,
14665 integer_type_node, opaque_V4SI_type_node,
14666 opaque_V4SI_type_node, NULL_TREE);
14667 tree int_ftype_int_v4si_v4si
14668 = build_function_type_list (integer_type_node,
14669 integer_type_node, V4SI_type_node,
14670 V4SI_type_node, NULL_TREE);
14671 tree int_ftype_int_v2di_v2di
14672 = build_function_type_list (integer_type_node,
14673 integer_type_node, V2DI_type_node,
14674 V2DI_type_node, NULL_TREE);
14675 tree void_ftype_v4si
14676 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
14677 tree v8hi_ftype_void
14678 = build_function_type_list (V8HI_type_node, NULL_TREE);
14679 tree void_ftype_void
14680 = build_function_type_list (void_type_node, NULL_TREE);
14681 tree void_ftype_int
14682 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
14684 tree opaque_ftype_long_pcvoid
14685 = build_function_type_list (opaque_V4SI_type_node,
14686 long_integer_type_node, pcvoid_type_node,
14687 NULL_TREE);
14688 tree v16qi_ftype_long_pcvoid
14689 = build_function_type_list (V16QI_type_node,
14690 long_integer_type_node, pcvoid_type_node,
14691 NULL_TREE);
14692 tree v8hi_ftype_long_pcvoid
14693 = build_function_type_list (V8HI_type_node,
14694 long_integer_type_node, pcvoid_type_node,
14695 NULL_TREE);
14696 tree v4si_ftype_long_pcvoid
14697 = build_function_type_list (V4SI_type_node,
14698 long_integer_type_node, pcvoid_type_node,
14699 NULL_TREE);
14700 tree v4sf_ftype_long_pcvoid
14701 = build_function_type_list (V4SF_type_node,
14702 long_integer_type_node, pcvoid_type_node,
14703 NULL_TREE);
14704 tree v2df_ftype_long_pcvoid
14705 = build_function_type_list (V2DF_type_node,
14706 long_integer_type_node, pcvoid_type_node,
14707 NULL_TREE);
14708 tree v2di_ftype_long_pcvoid
14709 = build_function_type_list (V2DI_type_node,
14710 long_integer_type_node, pcvoid_type_node,
14711 NULL_TREE);
14713 tree void_ftype_opaque_long_pvoid
14714 = build_function_type_list (void_type_node,
14715 opaque_V4SI_type_node, long_integer_type_node,
14716 pvoid_type_node, NULL_TREE);
14717 tree void_ftype_v4si_long_pvoid
14718 = build_function_type_list (void_type_node,
14719 V4SI_type_node, long_integer_type_node,
14720 pvoid_type_node, NULL_TREE);
14721 tree void_ftype_v16qi_long_pvoid
14722 = build_function_type_list (void_type_node,
14723 V16QI_type_node, long_integer_type_node,
14724 pvoid_type_node, NULL_TREE);
14725 tree void_ftype_v8hi_long_pvoid
14726 = build_function_type_list (void_type_node,
14727 V8HI_type_node, long_integer_type_node,
14728 pvoid_type_node, NULL_TREE);
14729 tree void_ftype_v4sf_long_pvoid
14730 = build_function_type_list (void_type_node,
14731 V4SF_type_node, long_integer_type_node,
14732 pvoid_type_node, NULL_TREE);
14733 tree void_ftype_v2df_long_pvoid
14734 = build_function_type_list (void_type_node,
14735 V2DF_type_node, long_integer_type_node,
14736 pvoid_type_node, NULL_TREE);
14737 tree void_ftype_v2di_long_pvoid
14738 = build_function_type_list (void_type_node,
14739 V2DI_type_node, long_integer_type_node,
14740 pvoid_type_node, NULL_TREE);
14741 tree int_ftype_int_v8hi_v8hi
14742 = build_function_type_list (integer_type_node,
14743 integer_type_node, V8HI_type_node,
14744 V8HI_type_node, NULL_TREE);
14745 tree int_ftype_int_v16qi_v16qi
14746 = build_function_type_list (integer_type_node,
14747 integer_type_node, V16QI_type_node,
14748 V16QI_type_node, NULL_TREE);
14749 tree int_ftype_int_v4sf_v4sf
14750 = build_function_type_list (integer_type_node,
14751 integer_type_node, V4SF_type_node,
14752 V4SF_type_node, NULL_TREE);
14753 tree int_ftype_int_v2df_v2df
14754 = build_function_type_list (integer_type_node,
14755 integer_type_node, V2DF_type_node,
14756 V2DF_type_node, NULL_TREE);
14757 tree v2di_ftype_v2di
14758 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
14759 tree v4si_ftype_v4si
14760 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
14761 tree v8hi_ftype_v8hi
14762 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
14763 tree v16qi_ftype_v16qi
14764 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
14765 tree v4sf_ftype_v4sf
14766 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
14767 tree v2df_ftype_v2df
14768 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
14769 tree void_ftype_pcvoid_int_int
14770 = build_function_type_list (void_type_node,
14771 pcvoid_type_node, integer_type_node,
14772 integer_type_node, NULL_TREE);
14774 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
14775 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
14776 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
14777 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
14778 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
14779 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
14780 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
14781 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
14782 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
14783 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
14784 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
14785 ALTIVEC_BUILTIN_LVXL_V2DF);
14786 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
14787 ALTIVEC_BUILTIN_LVXL_V2DI);
14788 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
14789 ALTIVEC_BUILTIN_LVXL_V4SF);
14790 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
14791 ALTIVEC_BUILTIN_LVXL_V4SI);
14792 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
14793 ALTIVEC_BUILTIN_LVXL_V8HI);
14794 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
14795 ALTIVEC_BUILTIN_LVXL_V16QI);
14796 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
14797 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
14798 ALTIVEC_BUILTIN_LVX_V2DF);
14799 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
14800 ALTIVEC_BUILTIN_LVX_V2DI);
14801 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
14802 ALTIVEC_BUILTIN_LVX_V4SF);
14803 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
14804 ALTIVEC_BUILTIN_LVX_V4SI);
14805 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
14806 ALTIVEC_BUILTIN_LVX_V8HI);
14807 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
14808 ALTIVEC_BUILTIN_LVX_V16QI);
14809 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
14810 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
14811 ALTIVEC_BUILTIN_STVX_V2DF);
14812 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
14813 ALTIVEC_BUILTIN_STVX_V2DI);
14814 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
14815 ALTIVEC_BUILTIN_STVX_V4SF);
14816 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
14817 ALTIVEC_BUILTIN_STVX_V4SI);
14818 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
14819 ALTIVEC_BUILTIN_STVX_V8HI);
14820 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
14821 ALTIVEC_BUILTIN_STVX_V16QI);
14822 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
14823 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
14824 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
14825 ALTIVEC_BUILTIN_STVXL_V2DF);
14826 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
14827 ALTIVEC_BUILTIN_STVXL_V2DI);
14828 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
14829 ALTIVEC_BUILTIN_STVXL_V4SF);
14830 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
14831 ALTIVEC_BUILTIN_STVXL_V4SI);
14832 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
14833 ALTIVEC_BUILTIN_STVXL_V8HI);
14834 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
14835 ALTIVEC_BUILTIN_STVXL_V16QI);
14836 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
14837 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
14838 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
14839 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
14840 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
14841 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
14842 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
14843 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
14844 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
14845 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
14846 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
14847 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
14848 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
14849 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
14850 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
14851 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
14853 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
14854 VSX_BUILTIN_LXVD2X_V2DF);
14855 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
14856 VSX_BUILTIN_LXVD2X_V2DI);
14857 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
14858 VSX_BUILTIN_LXVW4X_V4SF);
14859 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
14860 VSX_BUILTIN_LXVW4X_V4SI);
14861 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
14862 VSX_BUILTIN_LXVW4X_V8HI);
14863 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
14864 VSX_BUILTIN_LXVW4X_V16QI);
14865 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
14866 VSX_BUILTIN_STXVD2X_V2DF);
14867 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
14868 VSX_BUILTIN_STXVD2X_V2DI);
14869 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
14870 VSX_BUILTIN_STXVW4X_V4SF);
14871 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
14872 VSX_BUILTIN_STXVW4X_V4SI);
14873 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
14874 VSX_BUILTIN_STXVW4X_V8HI);
14875 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
14876 VSX_BUILTIN_STXVW4X_V16QI);
14877 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
14878 VSX_BUILTIN_VEC_LD);
14879 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
14880 VSX_BUILTIN_VEC_ST);
14882 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
14883 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
14884 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
14886 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
14887 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
14888 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
14889 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
14890 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
14891 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
14892 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
14893 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
14894 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
14895 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
14896 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
14897 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
14899 /* Cell builtins. */
14900 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
14901 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
14902 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
14903 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
14905 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
14906 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
14907 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
14908 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
14910 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
14911 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
14912 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
14913 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
14915 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
14916 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
14917 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
14918 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
14920 /* Add the DST variants. */
14921 d = bdesc_dst;
14922 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
14923 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
14925 /* Initialize the predicates. */
14926 d = bdesc_altivec_preds;
14927 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
14929 machine_mode mode1;
14930 tree type;
14932 if (rs6000_overloaded_builtin_p (d->code))
14933 mode1 = VOIDmode;
14934 else
14935 mode1 = insn_data[d->icode].operand[1].mode;
14937 switch (mode1)
14939 case VOIDmode:
14940 type = int_ftype_int_opaque_opaque;
14941 break;
14942 case V2DImode:
14943 type = int_ftype_int_v2di_v2di;
14944 break;
14945 case V4SImode:
14946 type = int_ftype_int_v4si_v4si;
14947 break;
14948 case V8HImode:
14949 type = int_ftype_int_v8hi_v8hi;
14950 break;
14951 case V16QImode:
14952 type = int_ftype_int_v16qi_v16qi;
14953 break;
14954 case V4SFmode:
14955 type = int_ftype_int_v4sf_v4sf;
14956 break;
14957 case V2DFmode:
14958 type = int_ftype_int_v2df_v2df;
14959 break;
14960 default:
14961 gcc_unreachable ();
14964 def_builtin (d->name, type, d->code);
14967 /* Initialize the abs* operators. */
14968 d = bdesc_abs;
14969 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
14971 machine_mode mode0;
14972 tree type;
14974 mode0 = insn_data[d->icode].operand[0].mode;
14976 switch (mode0)
14978 case V2DImode:
14979 type = v2di_ftype_v2di;
14980 break;
14981 case V4SImode:
14982 type = v4si_ftype_v4si;
14983 break;
14984 case V8HImode:
14985 type = v8hi_ftype_v8hi;
14986 break;
14987 case V16QImode:
14988 type = v16qi_ftype_v16qi;
14989 break;
14990 case V4SFmode:
14991 type = v4sf_ftype_v4sf;
14992 break;
14993 case V2DFmode:
14994 type = v2df_ftype_v2df;
14995 break;
14996 default:
14997 gcc_unreachable ();
15000 def_builtin (d->name, type, d->code);
15003 /* Initialize target builtin that implements
15004 targetm.vectorize.builtin_mask_for_load. */
15006 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
15007 v16qi_ftype_long_pcvoid,
15008 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
15009 BUILT_IN_MD, NULL, NULL_TREE);
15010 TREE_READONLY (decl) = 1;
15011 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
15012 altivec_builtin_mask_for_load = decl;
15014 /* Access to the vec_init patterns. */
15015 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
15016 integer_type_node, integer_type_node,
15017 integer_type_node, NULL_TREE);
15018 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
15020 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
15021 short_integer_type_node,
15022 short_integer_type_node,
15023 short_integer_type_node,
15024 short_integer_type_node,
15025 short_integer_type_node,
15026 short_integer_type_node,
15027 short_integer_type_node, NULL_TREE);
15028 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
15030 ftype = build_function_type_list (V16QI_type_node, char_type_node,
15031 char_type_node, char_type_node,
15032 char_type_node, char_type_node,
15033 char_type_node, char_type_node,
15034 char_type_node, char_type_node,
15035 char_type_node, char_type_node,
15036 char_type_node, char_type_node,
15037 char_type_node, char_type_node,
15038 char_type_node, NULL_TREE);
15039 def_builtin ("__builtin_vec_init_v16qi", ftype,
15040 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
15042 ftype = build_function_type_list (V4SF_type_node, float_type_node,
15043 float_type_node, float_type_node,
15044 float_type_node, NULL_TREE);
15045 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
15047 /* VSX builtins. */
15048 ftype = build_function_type_list (V2DF_type_node, double_type_node,
15049 double_type_node, NULL_TREE);
15050 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
15052 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
15053 intDI_type_node, NULL_TREE);
15054 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
15056 /* Access to the vec_set patterns. */
15057 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
15058 intSI_type_node,
15059 integer_type_node, NULL_TREE);
15060 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
15062 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15063 intHI_type_node,
15064 integer_type_node, NULL_TREE);
15065 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
15067 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
15068 intQI_type_node,
15069 integer_type_node, NULL_TREE);
15070 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
15072 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
15073 float_type_node,
15074 integer_type_node, NULL_TREE);
15075 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
15077 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
15078 double_type_node,
15079 integer_type_node, NULL_TREE);
15080 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
15082 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
15083 intDI_type_node,
15084 integer_type_node, NULL_TREE);
15085 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
15087 /* Access to the vec_extract patterns. */
15088 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15089 integer_type_node, NULL_TREE);
15090 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
15092 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15093 integer_type_node, NULL_TREE);
15094 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
15096 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
15097 integer_type_node, NULL_TREE);
15098 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
15100 ftype = build_function_type_list (float_type_node, V4SF_type_node,
15101 integer_type_node, NULL_TREE);
15102 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
15104 ftype = build_function_type_list (double_type_node, V2DF_type_node,
15105 integer_type_node, NULL_TREE);
15106 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
15108 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
15109 integer_type_node, NULL_TREE);
15110 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
15113 if (V1TI_type_node)
15115 tree v1ti_ftype_long_pcvoid
15116 = build_function_type_list (V1TI_type_node,
15117 long_integer_type_node, pcvoid_type_node,
15118 NULL_TREE);
15119 tree void_ftype_v1ti_long_pvoid
15120 = build_function_type_list (void_type_node,
15121 V1TI_type_node, long_integer_type_node,
15122 pvoid_type_node, NULL_TREE);
15123 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
15124 VSX_BUILTIN_LXVD2X_V1TI);
15125 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
15126 VSX_BUILTIN_STXVD2X_V1TI);
15127 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
15128 NULL_TREE, NULL_TREE);
15129 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
15130 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
15131 intTI_type_node,
15132 integer_type_node, NULL_TREE);
15133 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
15134 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
15135 integer_type_node, NULL_TREE);
15136 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
15141 static void
15142 htm_init_builtins (void)
15144 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
15145 const struct builtin_description *d;
15146 size_t i;
15148 d = bdesc_htm;
15149 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
15151 tree op[MAX_HTM_OPERANDS], type;
15152 HOST_WIDE_INT mask = d->mask;
15153 unsigned attr = rs6000_builtin_info[d->code].attr;
15154 bool void_func = (attr & RS6000_BTC_VOID);
15155 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
15156 int nopnds = 0;
15157 tree argtype = (attr & RS6000_BTC_SPR) ? long_unsigned_type_node
15158 : unsigned_type_node;
15160 if ((mask & builtin_mask) != mask)
15162 if (TARGET_DEBUG_BUILTIN)
15163 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
15164 continue;
15167 if (d->name == 0)
15169 if (TARGET_DEBUG_BUILTIN)
15170 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
15171 (long unsigned) i);
15172 continue;
15175 op[nopnds++] = (void_func) ? void_type_node : argtype;
15177 if (attr_args == RS6000_BTC_UNARY)
15178 op[nopnds++] = argtype;
15179 else if (attr_args == RS6000_BTC_BINARY)
15181 op[nopnds++] = argtype;
15182 op[nopnds++] = argtype;
15184 else if (attr_args == RS6000_BTC_TERNARY)
15186 op[nopnds++] = argtype;
15187 op[nopnds++] = argtype;
15188 op[nopnds++] = argtype;
15191 switch (nopnds)
15193 case 1:
15194 type = build_function_type_list (op[0], NULL_TREE);
15195 break;
15196 case 2:
15197 type = build_function_type_list (op[0], op[1], NULL_TREE);
15198 break;
15199 case 3:
15200 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
15201 break;
15202 case 4:
15203 type = build_function_type_list (op[0], op[1], op[2], op[3],
15204 NULL_TREE);
15205 break;
15206 default:
15207 gcc_unreachable ();
15210 def_builtin (d->name, type, d->code);
15214 /* Hash function for builtin functions with up to 3 arguments and a return
15215 type. */
15216 hashval_t
15217 builtin_hasher::hash (builtin_hash_struct *bh)
15219 unsigned ret = 0;
15220 int i;
15222 for (i = 0; i < 4; i++)
15224 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
15225 ret = (ret * 2) + bh->uns_p[i];
15228 return ret;
15231 /* Compare builtin hash entries H1 and H2 for equivalence. */
15232 bool
15233 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
15235 return ((p1->mode[0] == p2->mode[0])
15236 && (p1->mode[1] == p2->mode[1])
15237 && (p1->mode[2] == p2->mode[2])
15238 && (p1->mode[3] == p2->mode[3])
15239 && (p1->uns_p[0] == p2->uns_p[0])
15240 && (p1->uns_p[1] == p2->uns_p[1])
15241 && (p1->uns_p[2] == p2->uns_p[2])
15242 && (p1->uns_p[3] == p2->uns_p[3]));
15245 /* Map types for builtin functions with an explicit return type and up to 3
15246 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
15247 of the argument. */
15248 static tree
15249 builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
15250 machine_mode mode_arg1, machine_mode mode_arg2,
15251 enum rs6000_builtins builtin, const char *name)
15253 struct builtin_hash_struct h;
15254 struct builtin_hash_struct *h2;
15255 int num_args = 3;
15256 int i;
15257 tree ret_type = NULL_TREE;
15258 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
15260 /* Create builtin_hash_table. */
15261 if (builtin_hash_table == NULL)
15262 builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
15264 h.type = NULL_TREE;
15265 h.mode[0] = mode_ret;
15266 h.mode[1] = mode_arg0;
15267 h.mode[2] = mode_arg1;
15268 h.mode[3] = mode_arg2;
15269 h.uns_p[0] = 0;
15270 h.uns_p[1] = 0;
15271 h.uns_p[2] = 0;
15272 h.uns_p[3] = 0;
15274 /* If the builtin is a type that produces unsigned results or takes unsigned
15275 arguments, and it is returned as a decl for the vectorizer (such as
15276 widening multiplies, permute), make sure the arguments and return value
15277 are type correct. */
15278 switch (builtin)
15280 /* unsigned 1 argument functions. */
15281 case CRYPTO_BUILTIN_VSBOX:
15282 case P8V_BUILTIN_VGBBD:
15283 case MISC_BUILTIN_CDTBCD:
15284 case MISC_BUILTIN_CBCDTD:
15285 h.uns_p[0] = 1;
15286 h.uns_p[1] = 1;
15287 break;
15289 /* unsigned 2 argument functions. */
15290 case ALTIVEC_BUILTIN_VMULEUB_UNS:
15291 case ALTIVEC_BUILTIN_VMULEUH_UNS:
15292 case ALTIVEC_BUILTIN_VMULOUB_UNS:
15293 case ALTIVEC_BUILTIN_VMULOUH_UNS:
15294 case CRYPTO_BUILTIN_VCIPHER:
15295 case CRYPTO_BUILTIN_VCIPHERLAST:
15296 case CRYPTO_BUILTIN_VNCIPHER:
15297 case CRYPTO_BUILTIN_VNCIPHERLAST:
15298 case CRYPTO_BUILTIN_VPMSUMB:
15299 case CRYPTO_BUILTIN_VPMSUMH:
15300 case CRYPTO_BUILTIN_VPMSUMW:
15301 case CRYPTO_BUILTIN_VPMSUMD:
15302 case CRYPTO_BUILTIN_VPMSUM:
15303 case MISC_BUILTIN_ADDG6S:
15304 case MISC_BUILTIN_DIVWEU:
15305 case MISC_BUILTIN_DIVWEUO:
15306 case MISC_BUILTIN_DIVDEU:
15307 case MISC_BUILTIN_DIVDEUO:
15308 h.uns_p[0] = 1;
15309 h.uns_p[1] = 1;
15310 h.uns_p[2] = 1;
15311 break;
15313 /* unsigned 3 argument functions. */
15314 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
15315 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
15316 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
15317 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
15318 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
15319 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
15320 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
15321 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
15322 case VSX_BUILTIN_VPERM_16QI_UNS:
15323 case VSX_BUILTIN_VPERM_8HI_UNS:
15324 case VSX_BUILTIN_VPERM_4SI_UNS:
15325 case VSX_BUILTIN_VPERM_2DI_UNS:
15326 case VSX_BUILTIN_XXSEL_16QI_UNS:
15327 case VSX_BUILTIN_XXSEL_8HI_UNS:
15328 case VSX_BUILTIN_XXSEL_4SI_UNS:
15329 case VSX_BUILTIN_XXSEL_2DI_UNS:
15330 case CRYPTO_BUILTIN_VPERMXOR:
15331 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
15332 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
15333 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
15334 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
15335 case CRYPTO_BUILTIN_VSHASIGMAW:
15336 case CRYPTO_BUILTIN_VSHASIGMAD:
15337 case CRYPTO_BUILTIN_VSHASIGMA:
15338 h.uns_p[0] = 1;
15339 h.uns_p[1] = 1;
15340 h.uns_p[2] = 1;
15341 h.uns_p[3] = 1;
15342 break;
15344 /* signed permute functions with unsigned char mask. */
15345 case ALTIVEC_BUILTIN_VPERM_16QI:
15346 case ALTIVEC_BUILTIN_VPERM_8HI:
15347 case ALTIVEC_BUILTIN_VPERM_4SI:
15348 case ALTIVEC_BUILTIN_VPERM_4SF:
15349 case ALTIVEC_BUILTIN_VPERM_2DI:
15350 case ALTIVEC_BUILTIN_VPERM_2DF:
15351 case VSX_BUILTIN_VPERM_16QI:
15352 case VSX_BUILTIN_VPERM_8HI:
15353 case VSX_BUILTIN_VPERM_4SI:
15354 case VSX_BUILTIN_VPERM_4SF:
15355 case VSX_BUILTIN_VPERM_2DI:
15356 case VSX_BUILTIN_VPERM_2DF:
15357 h.uns_p[3] = 1;
15358 break;
15360 /* unsigned args, signed return. */
15361 case VSX_BUILTIN_XVCVUXDDP_UNS:
15362 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
15363 h.uns_p[1] = 1;
15364 break;
15366 /* signed args, unsigned return. */
15367 case VSX_BUILTIN_XVCVDPUXDS_UNS:
15368 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
15369 case MISC_BUILTIN_UNPACK_TD:
15370 case MISC_BUILTIN_UNPACK_V1TI:
15371 h.uns_p[0] = 1;
15372 break;
15374 /* unsigned arguments for 128-bit pack instructions. */
15375 case MISC_BUILTIN_PACK_TD:
15376 case MISC_BUILTIN_PACK_V1TI:
15377 h.uns_p[1] = 1;
15378 h.uns_p[2] = 1;
15379 break;
15381 default:
15382 break;
15385 /* Figure out how many args are present. */
15386 while (num_args > 0 && h.mode[num_args] == VOIDmode)
15387 num_args--;
15389 if (num_args == 0)
15390 fatal_error ("internal error: builtin function %s had no type", name);
15392 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
15393 if (!ret_type && h.uns_p[0])
15394 ret_type = builtin_mode_to_type[h.mode[0]][0];
15396 if (!ret_type)
15397 fatal_error ("internal error: builtin function %s had an unexpected "
15398 "return type %s", name, GET_MODE_NAME (h.mode[0]));
15400 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
15401 arg_type[i] = NULL_TREE;
15403 for (i = 0; i < num_args; i++)
15405 int m = (int) h.mode[i+1];
15406 int uns_p = h.uns_p[i+1];
15408 arg_type[i] = builtin_mode_to_type[m][uns_p];
15409 if (!arg_type[i] && uns_p)
15410 arg_type[i] = builtin_mode_to_type[m][0];
15412 if (!arg_type[i])
15413 fatal_error ("internal error: builtin function %s, argument %d "
15414 "had unexpected argument type %s", name, i,
15415 GET_MODE_NAME (m));
15418 builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
15419 if (*found == NULL)
15421 h2 = ggc_alloc<builtin_hash_struct> ();
15422 *h2 = h;
15423 *found = h2;
15425 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
15426 arg_type[2], NULL_TREE);
15429 return (*found)->type;
15432 static void
15433 rs6000_common_init_builtins (void)
15435 const struct builtin_description *d;
15436 size_t i;
15438 tree opaque_ftype_opaque = NULL_TREE;
15439 tree opaque_ftype_opaque_opaque = NULL_TREE;
15440 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
15441 tree v2si_ftype_qi = NULL_TREE;
15442 tree v2si_ftype_v2si_qi = NULL_TREE;
15443 tree v2si_ftype_int_qi = NULL_TREE;
15444 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
15446 if (!TARGET_PAIRED_FLOAT)
15448 builtin_mode_to_type[V2SImode][0] = opaque_V2SI_type_node;
15449 builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node;
15452 /* Paired and SPE builtins are only available if you build a compiler with
15453 the appropriate options, so only create those builtins with the
15454 appropriate compiler option. Create Altivec and VSX builtins on machines
15455 with at least the general purpose extensions (970 and newer) to allow the
15456 use of the target attribute.. */
15458 if (TARGET_EXTRA_BUILTINS)
15459 builtin_mask |= RS6000_BTM_COMMON;
15461 /* Add the ternary operators. */
15462 d = bdesc_3arg;
15463 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
15465 tree type;
15466 HOST_WIDE_INT mask = d->mask;
15468 if ((mask & builtin_mask) != mask)
15470 if (TARGET_DEBUG_BUILTIN)
15471 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
15472 continue;
15475 if (rs6000_overloaded_builtin_p (d->code))
15477 if (! (type = opaque_ftype_opaque_opaque_opaque))
15478 type = opaque_ftype_opaque_opaque_opaque
15479 = build_function_type_list (opaque_V4SI_type_node,
15480 opaque_V4SI_type_node,
15481 opaque_V4SI_type_node,
15482 opaque_V4SI_type_node,
15483 NULL_TREE);
15485 else
15487 enum insn_code icode = d->icode;
15488 if (d->name == 0)
15490 if (TARGET_DEBUG_BUILTIN)
15491 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
15492 (long unsigned)i);
15494 continue;
15497 if (icode == CODE_FOR_nothing)
15499 if (TARGET_DEBUG_BUILTIN)
15500 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
15501 d->name);
15503 continue;
15506 type = builtin_function_type (insn_data[icode].operand[0].mode,
15507 insn_data[icode].operand[1].mode,
15508 insn_data[icode].operand[2].mode,
15509 insn_data[icode].operand[3].mode,
15510 d->code, d->name);
15513 def_builtin (d->name, type, d->code);
15516 /* Add the binary operators. */
15517 d = bdesc_2arg;
15518 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15520 machine_mode mode0, mode1, mode2;
15521 tree type;
15522 HOST_WIDE_INT mask = d->mask;
15524 if ((mask & builtin_mask) != mask)
15526 if (TARGET_DEBUG_BUILTIN)
15527 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
15528 continue;
15531 if (rs6000_overloaded_builtin_p (d->code))
15533 if (! (type = opaque_ftype_opaque_opaque))
15534 type = opaque_ftype_opaque_opaque
15535 = build_function_type_list (opaque_V4SI_type_node,
15536 opaque_V4SI_type_node,
15537 opaque_V4SI_type_node,
15538 NULL_TREE);
15540 else
15542 enum insn_code icode = d->icode;
15543 if (d->name == 0)
15545 if (TARGET_DEBUG_BUILTIN)
15546 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
15547 (long unsigned)i);
15549 continue;
15552 if (icode == CODE_FOR_nothing)
15554 if (TARGET_DEBUG_BUILTIN)
15555 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
15556 d->name);
15558 continue;
15561 mode0 = insn_data[icode].operand[0].mode;
15562 mode1 = insn_data[icode].operand[1].mode;
15563 mode2 = insn_data[icode].operand[2].mode;
15565 if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode)
15567 if (! (type = v2si_ftype_v2si_qi))
15568 type = v2si_ftype_v2si_qi
15569 = build_function_type_list (opaque_V2SI_type_node,
15570 opaque_V2SI_type_node,
15571 char_type_node,
15572 NULL_TREE);
15575 else if (mode0 == V2SImode && GET_MODE_CLASS (mode1) == MODE_INT
15576 && mode2 == QImode)
15578 if (! (type = v2si_ftype_int_qi))
15579 type = v2si_ftype_int_qi
15580 = build_function_type_list (opaque_V2SI_type_node,
15581 integer_type_node,
15582 char_type_node,
15583 NULL_TREE);
15586 else
15587 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
15588 d->code, d->name);
15591 def_builtin (d->name, type, d->code);
15594 /* Add the simple unary operators. */
15595 d = bdesc_1arg;
15596 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15598 machine_mode mode0, mode1;
15599 tree type;
15600 HOST_WIDE_INT mask = d->mask;
15602 if ((mask & builtin_mask) != mask)
15604 if (TARGET_DEBUG_BUILTIN)
15605 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
15606 continue;
15609 if (rs6000_overloaded_builtin_p (d->code))
15611 if (! (type = opaque_ftype_opaque))
15612 type = opaque_ftype_opaque
15613 = build_function_type_list (opaque_V4SI_type_node,
15614 opaque_V4SI_type_node,
15615 NULL_TREE);
15617 else
15619 enum insn_code icode = d->icode;
15620 if (d->name == 0)
15622 if (TARGET_DEBUG_BUILTIN)
15623 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
15624 (long unsigned)i);
15626 continue;
15629 if (icode == CODE_FOR_nothing)
15631 if (TARGET_DEBUG_BUILTIN)
15632 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
15633 d->name);
15635 continue;
15638 mode0 = insn_data[icode].operand[0].mode;
15639 mode1 = insn_data[icode].operand[1].mode;
15641 if (mode0 == V2SImode && mode1 == QImode)
15643 if (! (type = v2si_ftype_qi))
15644 type = v2si_ftype_qi
15645 = build_function_type_list (opaque_V2SI_type_node,
15646 char_type_node,
15647 NULL_TREE);
15650 else
15651 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
15652 d->code, d->name);
15655 def_builtin (d->name, type, d->code);
15659 static void
15660 rs6000_init_libfuncs (void)
15662 if (!TARGET_IEEEQUAD)
15663 /* AIX/Darwin/64-bit Linux quad floating point routines. */
15664 if (!TARGET_XL_COMPAT)
15666 set_optab_libfunc (add_optab, TFmode, "__gcc_qadd");
15667 set_optab_libfunc (sub_optab, TFmode, "__gcc_qsub");
15668 set_optab_libfunc (smul_optab, TFmode, "__gcc_qmul");
15669 set_optab_libfunc (sdiv_optab, TFmode, "__gcc_qdiv");
15671 if (!(TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)))
15673 set_optab_libfunc (neg_optab, TFmode, "__gcc_qneg");
15674 set_optab_libfunc (eq_optab, TFmode, "__gcc_qeq");
15675 set_optab_libfunc (ne_optab, TFmode, "__gcc_qne");
15676 set_optab_libfunc (gt_optab, TFmode, "__gcc_qgt");
15677 set_optab_libfunc (ge_optab, TFmode, "__gcc_qge");
15678 set_optab_libfunc (lt_optab, TFmode, "__gcc_qlt");
15679 set_optab_libfunc (le_optab, TFmode, "__gcc_qle");
15681 set_conv_libfunc (sext_optab, TFmode, SFmode, "__gcc_stoq");
15682 set_conv_libfunc (sext_optab, TFmode, DFmode, "__gcc_dtoq");
15683 set_conv_libfunc (trunc_optab, SFmode, TFmode, "__gcc_qtos");
15684 set_conv_libfunc (trunc_optab, DFmode, TFmode, "__gcc_qtod");
15685 set_conv_libfunc (sfix_optab, SImode, TFmode, "__gcc_qtoi");
15686 set_conv_libfunc (ufix_optab, SImode, TFmode, "__gcc_qtou");
15687 set_conv_libfunc (sfloat_optab, TFmode, SImode, "__gcc_itoq");
15688 set_conv_libfunc (ufloat_optab, TFmode, SImode, "__gcc_utoq");
15691 if (!(TARGET_HARD_FLOAT && TARGET_FPRS))
15692 set_optab_libfunc (unord_optab, TFmode, "__gcc_qunord");
15694 else
15696 set_optab_libfunc (add_optab, TFmode, "_xlqadd");
15697 set_optab_libfunc (sub_optab, TFmode, "_xlqsub");
15698 set_optab_libfunc (smul_optab, TFmode, "_xlqmul");
15699 set_optab_libfunc (sdiv_optab, TFmode, "_xlqdiv");
15701 else
15703 /* 32-bit SVR4 quad floating point routines. */
15705 set_optab_libfunc (add_optab, TFmode, "_q_add");
15706 set_optab_libfunc (sub_optab, TFmode, "_q_sub");
15707 set_optab_libfunc (neg_optab, TFmode, "_q_neg");
15708 set_optab_libfunc (smul_optab, TFmode, "_q_mul");
15709 set_optab_libfunc (sdiv_optab, TFmode, "_q_div");
15710 if (TARGET_PPC_GPOPT)
15711 set_optab_libfunc (sqrt_optab, TFmode, "_q_sqrt");
15713 set_optab_libfunc (eq_optab, TFmode, "_q_feq");
15714 set_optab_libfunc (ne_optab, TFmode, "_q_fne");
15715 set_optab_libfunc (gt_optab, TFmode, "_q_fgt");
15716 set_optab_libfunc (ge_optab, TFmode, "_q_fge");
15717 set_optab_libfunc (lt_optab, TFmode, "_q_flt");
15718 set_optab_libfunc (le_optab, TFmode, "_q_fle");
15720 set_conv_libfunc (sext_optab, TFmode, SFmode, "_q_stoq");
15721 set_conv_libfunc (sext_optab, TFmode, DFmode, "_q_dtoq");
15722 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_q_qtos");
15723 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_q_qtod");
15724 set_conv_libfunc (sfix_optab, SImode, TFmode, "_q_qtoi");
15725 set_conv_libfunc (ufix_optab, SImode, TFmode, "_q_qtou");
15726 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_q_itoq");
15727 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_q_utoq");
15732 /* Expand a block clear operation, and return 1 if successful. Return 0
15733 if we should let the compiler generate normal code.
15735 operands[0] is the destination
15736 operands[1] is the length
15737 operands[3] is the alignment */
15740 expand_block_clear (rtx operands[])
15742 rtx orig_dest = operands[0];
15743 rtx bytes_rtx = operands[1];
15744 rtx align_rtx = operands[3];
15745 bool constp = (GET_CODE (bytes_rtx) == CONST_INT);
15746 HOST_WIDE_INT align;
15747 HOST_WIDE_INT bytes;
15748 int offset;
15749 int clear_bytes;
15750 int clear_step;
15752 /* If this is not a fixed size move, just call memcpy */
15753 if (! constp)
15754 return 0;
15756 /* This must be a fixed size alignment */
15757 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
15758 align = INTVAL (align_rtx) * BITS_PER_UNIT;
15760 /* Anything to clear? */
15761 bytes = INTVAL (bytes_rtx);
15762 if (bytes <= 0)
15763 return 1;
15765 /* Use the builtin memset after a point, to avoid huge code bloat.
15766 When optimize_size, avoid any significant code bloat; calling
15767 memset is about 4 instructions, so allow for one instruction to
15768 load zero and three to do clearing. */
15769 if (TARGET_ALTIVEC && align >= 128)
15770 clear_step = 16;
15771 else if (TARGET_POWERPC64 && (align >= 64 || !STRICT_ALIGNMENT))
15772 clear_step = 8;
15773 else if (TARGET_SPE && align >= 64)
15774 clear_step = 8;
15775 else
15776 clear_step = 4;
15778 if (optimize_size && bytes > 3 * clear_step)
15779 return 0;
15780 if (! optimize_size && bytes > 8 * clear_step)
15781 return 0;
15783 for (offset = 0; bytes > 0; offset += clear_bytes, bytes -= clear_bytes)
15785 machine_mode mode = BLKmode;
15786 rtx dest;
15788 if (bytes >= 16 && TARGET_ALTIVEC && align >= 128)
15790 clear_bytes = 16;
15791 mode = V4SImode;
15793 else if (bytes >= 8 && TARGET_SPE && align >= 64)
15795 clear_bytes = 8;
15796 mode = V2SImode;
15798 else if (bytes >= 8 && TARGET_POWERPC64
15799 && (align >= 64 || !STRICT_ALIGNMENT))
15801 clear_bytes = 8;
15802 mode = DImode;
15803 if (offset == 0 && align < 64)
15805 rtx addr;
15807 /* If the address form is reg+offset with offset not a
15808 multiple of four, reload into reg indirect form here
15809 rather than waiting for reload. This way we get one
15810 reload, not one per store. */
15811 addr = XEXP (orig_dest, 0);
15812 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
15813 && GET_CODE (XEXP (addr, 1)) == CONST_INT
15814 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
15816 addr = copy_addr_to_reg (addr);
15817 orig_dest = replace_equiv_address (orig_dest, addr);
15821 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
15822 { /* move 4 bytes */
15823 clear_bytes = 4;
15824 mode = SImode;
15826 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
15827 { /* move 2 bytes */
15828 clear_bytes = 2;
15829 mode = HImode;
15831 else /* move 1 byte at a time */
15833 clear_bytes = 1;
15834 mode = QImode;
15837 dest = adjust_address (orig_dest, mode, offset);
15839 emit_move_insn (dest, CONST0_RTX (mode));
15842 return 1;
15846 /* Expand a block move operation, and return 1 if successful. Return 0
15847 if we should let the compiler generate normal code.
15849 operands[0] is the destination
15850 operands[1] is the source
15851 operands[2] is the length
15852 operands[3] is the alignment */
15854 #define MAX_MOVE_REG 4
15857 expand_block_move (rtx operands[])
15859 rtx orig_dest = operands[0];
15860 rtx orig_src = operands[1];
15861 rtx bytes_rtx = operands[2];
15862 rtx align_rtx = operands[3];
15863 int constp = (GET_CODE (bytes_rtx) == CONST_INT);
15864 int align;
15865 int bytes;
15866 int offset;
15867 int move_bytes;
15868 rtx stores[MAX_MOVE_REG];
15869 int num_reg = 0;
15871 /* If this is not a fixed size move, just call memcpy */
15872 if (! constp)
15873 return 0;
15875 /* This must be a fixed size alignment */
15876 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
15877 align = INTVAL (align_rtx) * BITS_PER_UNIT;
15879 /* Anything to move? */
15880 bytes = INTVAL (bytes_rtx);
15881 if (bytes <= 0)
15882 return 1;
15884 if (bytes > rs6000_block_move_inline_limit)
15885 return 0;
15887 for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes)
15889 union {
15890 rtx (*movmemsi) (rtx, rtx, rtx, rtx);
15891 rtx (*mov) (rtx, rtx);
15892 } gen_func;
15893 machine_mode mode = BLKmode;
15894 rtx src, dest;
15896 /* Altivec first, since it will be faster than a string move
15897 when it applies, and usually not significantly larger. */
15898 if (TARGET_ALTIVEC && bytes >= 16 && align >= 128)
15900 move_bytes = 16;
15901 mode = V4SImode;
15902 gen_func.mov = gen_movv4si;
15904 else if (TARGET_SPE && bytes >= 8 && align >= 64)
15906 move_bytes = 8;
15907 mode = V2SImode;
15908 gen_func.mov = gen_movv2si;
15910 else if (TARGET_STRING
15911 && bytes > 24 /* move up to 32 bytes at a time */
15912 && ! fixed_regs[5]
15913 && ! fixed_regs[6]
15914 && ! fixed_regs[7]
15915 && ! fixed_regs[8]
15916 && ! fixed_regs[9]
15917 && ! fixed_regs[10]
15918 && ! fixed_regs[11]
15919 && ! fixed_regs[12])
15921 move_bytes = (bytes > 32) ? 32 : bytes;
15922 gen_func.movmemsi = gen_movmemsi_8reg;
15924 else if (TARGET_STRING
15925 && bytes > 16 /* move up to 24 bytes at a time */
15926 && ! fixed_regs[5]
15927 && ! fixed_regs[6]
15928 && ! fixed_regs[7]
15929 && ! fixed_regs[8]
15930 && ! fixed_regs[9]
15931 && ! fixed_regs[10])
15933 move_bytes = (bytes > 24) ? 24 : bytes;
15934 gen_func.movmemsi = gen_movmemsi_6reg;
15936 else if (TARGET_STRING
15937 && bytes > 8 /* move up to 16 bytes at a time */
15938 && ! fixed_regs[5]
15939 && ! fixed_regs[6]
15940 && ! fixed_regs[7]
15941 && ! fixed_regs[8])
15943 move_bytes = (bytes > 16) ? 16 : bytes;
15944 gen_func.movmemsi = gen_movmemsi_4reg;
15946 else if (bytes >= 8 && TARGET_POWERPC64
15947 && (align >= 64 || !STRICT_ALIGNMENT))
15949 move_bytes = 8;
15950 mode = DImode;
15951 gen_func.mov = gen_movdi;
15952 if (offset == 0 && align < 64)
15954 rtx addr;
15956 /* If the address form is reg+offset with offset not a
15957 multiple of four, reload into reg indirect form here
15958 rather than waiting for reload. This way we get one
15959 reload, not one per load and/or store. */
15960 addr = XEXP (orig_dest, 0);
15961 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
15962 && GET_CODE (XEXP (addr, 1)) == CONST_INT
15963 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
15965 addr = copy_addr_to_reg (addr);
15966 orig_dest = replace_equiv_address (orig_dest, addr);
15968 addr = XEXP (orig_src, 0);
15969 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
15970 && GET_CODE (XEXP (addr, 1)) == CONST_INT
15971 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
15973 addr = copy_addr_to_reg (addr);
15974 orig_src = replace_equiv_address (orig_src, addr);
15978 else if (TARGET_STRING && bytes > 4 && !TARGET_POWERPC64)
15979 { /* move up to 8 bytes at a time */
15980 move_bytes = (bytes > 8) ? 8 : bytes;
15981 gen_func.movmemsi = gen_movmemsi_2reg;
15983 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
15984 { /* move 4 bytes */
15985 move_bytes = 4;
15986 mode = SImode;
15987 gen_func.mov = gen_movsi;
15989 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
15990 { /* move 2 bytes */
15991 move_bytes = 2;
15992 mode = HImode;
15993 gen_func.mov = gen_movhi;
15995 else if (TARGET_STRING && bytes > 1)
15996 { /* move up to 4 bytes at a time */
15997 move_bytes = (bytes > 4) ? 4 : bytes;
15998 gen_func.movmemsi = gen_movmemsi_1reg;
16000 else /* move 1 byte at a time */
16002 move_bytes = 1;
16003 mode = QImode;
16004 gen_func.mov = gen_movqi;
16007 src = adjust_address (orig_src, mode, offset);
16008 dest = adjust_address (orig_dest, mode, offset);
16010 if (mode != BLKmode)
16012 rtx tmp_reg = gen_reg_rtx (mode);
16014 emit_insn ((*gen_func.mov) (tmp_reg, src));
16015 stores[num_reg++] = (*gen_func.mov) (dest, tmp_reg);
16018 if (mode == BLKmode || num_reg >= MAX_MOVE_REG || bytes == move_bytes)
16020 int i;
16021 for (i = 0; i < num_reg; i++)
16022 emit_insn (stores[i]);
16023 num_reg = 0;
16026 if (mode == BLKmode)
16028 /* Move the address into scratch registers. The movmemsi
16029 patterns require zero offset. */
16030 if (!REG_P (XEXP (src, 0)))
16032 rtx src_reg = copy_addr_to_reg (XEXP (src, 0));
16033 src = replace_equiv_address (src, src_reg);
16035 set_mem_size (src, move_bytes);
16037 if (!REG_P (XEXP (dest, 0)))
16039 rtx dest_reg = copy_addr_to_reg (XEXP (dest, 0));
16040 dest = replace_equiv_address (dest, dest_reg);
16042 set_mem_size (dest, move_bytes);
16044 emit_insn ((*gen_func.movmemsi) (dest, src,
16045 GEN_INT (move_bytes & 31),
16046 align_rtx));
16050 return 1;
16054 /* Return a string to perform a load_multiple operation.
16055 operands[0] is the vector.
16056 operands[1] is the source address.
16057 operands[2] is the first destination register. */
16059 const char *
16060 rs6000_output_load_multiple (rtx operands[3])
16062 /* We have to handle the case where the pseudo used to contain the address
16063 is assigned to one of the output registers. */
16064 int i, j;
16065 int words = XVECLEN (operands[0], 0);
16066 rtx xop[10];
16068 if (XVECLEN (operands[0], 0) == 1)
16069 return "lwz %2,0(%1)";
16071 for (i = 0; i < words; i++)
16072 if (refers_to_regno_p (REGNO (operands[2]) + i,
16073 REGNO (operands[2]) + i + 1, operands[1], 0))
16075 if (i == words-1)
16077 xop[0] = GEN_INT (4 * (words-1));
16078 xop[1] = operands[1];
16079 xop[2] = operands[2];
16080 output_asm_insn ("lswi %2,%1,%0\n\tlwz %1,%0(%1)", xop);
16081 return "";
16083 else if (i == 0)
16085 xop[0] = GEN_INT (4 * (words-1));
16086 xop[1] = operands[1];
16087 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16088 output_asm_insn ("addi %1,%1,4\n\tlswi %2,%1,%0\n\tlwz %1,-4(%1)", xop);
16089 return "";
16091 else
16093 for (j = 0; j < words; j++)
16094 if (j != i)
16096 xop[0] = GEN_INT (j * 4);
16097 xop[1] = operands[1];
16098 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + j);
16099 output_asm_insn ("lwz %2,%0(%1)", xop);
16101 xop[0] = GEN_INT (i * 4);
16102 xop[1] = operands[1];
16103 output_asm_insn ("lwz %1,%0(%1)", xop);
16104 return "";
16108 return "lswi %2,%1,%N0";
16112 /* A validation routine: say whether CODE, a condition code, and MODE
16113 match. The other alternatives either don't make sense or should
16114 never be generated. */
16116 void
16117 validate_condition_mode (enum rtx_code code, machine_mode mode)
16119 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
16120 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
16121 && GET_MODE_CLASS (mode) == MODE_CC);
16123 /* These don't make sense. */
16124 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
16125 || mode != CCUNSmode);
16127 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
16128 || mode == CCUNSmode);
16130 gcc_assert (mode == CCFPmode
16131 || (code != ORDERED && code != UNORDERED
16132 && code != UNEQ && code != LTGT
16133 && code != UNGT && code != UNLT
16134 && code != UNGE && code != UNLE));
16136 /* These should never be generated except for
16137 flag_finite_math_only. */
16138 gcc_assert (mode != CCFPmode
16139 || flag_finite_math_only
16140 || (code != LE && code != GE
16141 && code != UNEQ && code != LTGT
16142 && code != UNGT && code != UNLT));
16144 /* These are invalid; the information is not there. */
16145 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
16149 /* Return 1 if ANDOP is a mask that has no bits on that are not in the
16150 mask required to convert the result of a rotate insn into a shift
16151 left insn of SHIFTOP bits. Both are known to be SImode CONST_INT. */
16154 includes_lshift_p (rtx shiftop, rtx andop)
16156 unsigned HOST_WIDE_INT shift_mask = ~(unsigned HOST_WIDE_INT) 0;
16158 shift_mask <<= INTVAL (shiftop);
16160 return (INTVAL (andop) & 0xffffffff & ~shift_mask) == 0;
16163 /* Similar, but for right shift. */
16166 includes_rshift_p (rtx shiftop, rtx andop)
16168 unsigned HOST_WIDE_INT shift_mask = ~(unsigned HOST_WIDE_INT) 0;
16170 shift_mask >>= INTVAL (shiftop);
16172 return (INTVAL (andop) & 0xffffffff & ~shift_mask) == 0;
16175 /* Return 1 if ANDOP is a mask suitable for use with an rldic insn
16176 to perform a left shift. It must have exactly SHIFTOP least
16177 significant 0's, then one or more 1's, then zero or more 0's. */
16180 includes_rldic_lshift_p (rtx shiftop, rtx andop)
16182 if (GET_CODE (andop) == CONST_INT)
16184 unsigned HOST_WIDE_INT c, lsb, shift_mask;
16186 c = INTVAL (andop);
16187 if (c == 0 || c == HOST_WIDE_INT_M1U)
16188 return 0;
16190 shift_mask = HOST_WIDE_INT_M1U;
16191 shift_mask <<= INTVAL (shiftop);
16193 /* Find the least significant one bit. */
16194 lsb = c & -c;
16196 /* It must coincide with the LSB of the shift mask. */
16197 if (-lsb != shift_mask)
16198 return 0;
16200 /* Invert to look for the next transition (if any). */
16201 c = ~c;
16203 /* Remove the low group of ones (originally low group of zeros). */
16204 c &= -lsb;
16206 /* Again find the lsb, and check we have all 1's above. */
16207 lsb = c & -c;
16208 return c == -lsb;
16210 else
16211 return 0;
16214 /* Return 1 if ANDOP is a mask suitable for use with an rldicr insn
16215 to perform a left shift. It must have SHIFTOP or more least
16216 significant 0's, with the remainder of the word 1's. */
16219 includes_rldicr_lshift_p (rtx shiftop, rtx andop)
16221 if (GET_CODE (andop) == CONST_INT)
16223 unsigned HOST_WIDE_INT c, lsb, shift_mask;
16225 shift_mask = HOST_WIDE_INT_M1U;
16226 shift_mask <<= INTVAL (shiftop);
16227 c = INTVAL (andop);
16229 /* Find the least significant one bit. */
16230 lsb = c & -c;
16232 /* It must be covered by the shift mask.
16233 This test also rejects c == 0. */
16234 if ((lsb & shift_mask) == 0)
16235 return 0;
16237 /* Check we have all 1's above the transition, and reject all 1's. */
16238 return c == -lsb && lsb != 1;
16240 else
16241 return 0;
16244 /* Return 1 if operands will generate a valid arguments to rlwimi
16245 instruction for insert with right shift in 64-bit mode. The mask may
16246 not start on the first bit or stop on the last bit because wrap-around
16247 effects of instruction do not correspond to semantics of RTL insn. */
16250 insvdi_rshift_rlwimi_p (rtx sizeop, rtx startop, rtx shiftop)
16252 if (INTVAL (startop) > 32
16253 && INTVAL (startop) < 64
16254 && INTVAL (sizeop) > 1
16255 && INTVAL (sizeop) + INTVAL (startop) < 64
16256 && INTVAL (shiftop) > 0
16257 && INTVAL (sizeop) + INTVAL (shiftop) < 32
16258 && (64 - (INTVAL (shiftop) & 63)) >= INTVAL (sizeop))
16259 return 1;
16261 return 0;
16264 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
16265 for lfq and stfq insns iff the registers are hard registers. */
16268 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
16270 /* We might have been passed a SUBREG. */
16271 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
16272 return 0;
16274 /* We might have been passed non floating point registers. */
16275 if (!FP_REGNO_P (REGNO (reg1))
16276 || !FP_REGNO_P (REGNO (reg2)))
16277 return 0;
16279 return (REGNO (reg1) == REGNO (reg2) - 1);
16282 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
16283 addr1 and addr2 must be in consecutive memory locations
16284 (addr2 == addr1 + 8). */
16287 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
16289 rtx addr1, addr2;
16290 unsigned int reg1, reg2;
16291 int offset1, offset2;
16293 /* The mems cannot be volatile. */
16294 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
16295 return 0;
16297 addr1 = XEXP (mem1, 0);
16298 addr2 = XEXP (mem2, 0);
16300 /* Extract an offset (if used) from the first addr. */
16301 if (GET_CODE (addr1) == PLUS)
16303 /* If not a REG, return zero. */
16304 if (GET_CODE (XEXP (addr1, 0)) != REG)
16305 return 0;
16306 else
16308 reg1 = REGNO (XEXP (addr1, 0));
16309 /* The offset must be constant! */
16310 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
16311 return 0;
16312 offset1 = INTVAL (XEXP (addr1, 1));
16315 else if (GET_CODE (addr1) != REG)
16316 return 0;
16317 else
16319 reg1 = REGNO (addr1);
16320 /* This was a simple (mem (reg)) expression. Offset is 0. */
16321 offset1 = 0;
16324 /* And now for the second addr. */
16325 if (GET_CODE (addr2) == PLUS)
16327 /* If not a REG, return zero. */
16328 if (GET_CODE (XEXP (addr2, 0)) != REG)
16329 return 0;
16330 else
16332 reg2 = REGNO (XEXP (addr2, 0));
16333 /* The offset must be constant. */
16334 if (GET_CODE (XEXP (addr2, 1)) != CONST_INT)
16335 return 0;
16336 offset2 = INTVAL (XEXP (addr2, 1));
16339 else if (GET_CODE (addr2) != REG)
16340 return 0;
16341 else
16343 reg2 = REGNO (addr2);
16344 /* This was a simple (mem (reg)) expression. Offset is 0. */
16345 offset2 = 0;
16348 /* Both of these must have the same base register. */
16349 if (reg1 != reg2)
16350 return 0;
16352 /* The offset for the second addr must be 8 more than the first addr. */
16353 if (offset2 != offset1 + 8)
16354 return 0;
16356 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
16357 instructions. */
16358 return 1;
16363 rs6000_secondary_memory_needed_rtx (machine_mode mode)
16365 static bool eliminated = false;
16366 rtx ret;
16368 if (mode != SDmode || TARGET_NO_SDMODE_STACK)
16369 ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
16370 else
16372 rtx mem = cfun->machine->sdmode_stack_slot;
16373 gcc_assert (mem != NULL_RTX);
16375 if (!eliminated)
16377 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
16378 cfun->machine->sdmode_stack_slot = mem;
16379 eliminated = true;
16381 ret = mem;
16384 if (TARGET_DEBUG_ADDR)
16386 fprintf (stderr, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
16387 GET_MODE_NAME (mode));
16388 if (!ret)
16389 fprintf (stderr, "\tNULL_RTX\n");
16390 else
16391 debug_rtx (ret);
16394 return ret;
16397 /* Return the mode to be used for memory when a secondary memory
16398 location is needed. For SDmode values we need to use DDmode, in
16399 all other cases we can use the same mode. */
16400 machine_mode
16401 rs6000_secondary_memory_needed_mode (machine_mode mode)
16403 if (lra_in_progress && mode == SDmode)
16404 return DDmode;
16405 return mode;
16408 static tree
16409 rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
16411 /* Don't walk into types. */
16412 if (*tp == NULL_TREE || *tp == error_mark_node || TYPE_P (*tp))
16414 *walk_subtrees = 0;
16415 return NULL_TREE;
16418 switch (TREE_CODE (*tp))
16420 case VAR_DECL:
16421 case PARM_DECL:
16422 case FIELD_DECL:
16423 case RESULT_DECL:
16424 case SSA_NAME:
16425 case REAL_CST:
16426 case MEM_REF:
16427 case VIEW_CONVERT_EXPR:
16428 if (TYPE_MODE (TREE_TYPE (*tp)) == SDmode)
16429 return *tp;
16430 break;
16431 default:
16432 break;
16435 return NULL_TREE;
16438 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
16439 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
16440 only work on the traditional altivec registers, note if an altivec register
16441 was chosen. */
16443 static enum rs6000_reg_type
16444 register_to_reg_type (rtx reg, bool *is_altivec)
16446 HOST_WIDE_INT regno;
16447 enum reg_class rclass;
16449 if (GET_CODE (reg) == SUBREG)
16450 reg = SUBREG_REG (reg);
16452 if (!REG_P (reg))
16453 return NO_REG_TYPE;
16455 regno = REGNO (reg);
16456 if (regno >= FIRST_PSEUDO_REGISTER)
16458 if (!lra_in_progress && !reload_in_progress && !reload_completed)
16459 return PSEUDO_REG_TYPE;
16461 regno = true_regnum (reg);
16462 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
16463 return PSEUDO_REG_TYPE;
16466 gcc_assert (regno >= 0);
16468 if (is_altivec && ALTIVEC_REGNO_P (regno))
16469 *is_altivec = true;
16471 rclass = rs6000_regno_regclass[regno];
16472 return reg_class_to_reg_type[(int)rclass];
16475 /* Helper function to return the cost of adding a TOC entry address. */
16477 static inline int
16478 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
16480 int ret;
16482 if (TARGET_CMODEL != CMODEL_SMALL)
16483 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
16485 else
16486 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
16488 return ret;
16491 /* Helper function for rs6000_secondary_reload to determine whether the memory
16492 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
16493 needs reloading. Return negative if the memory is not handled by the memory
16494 helper functions and to try a different reload method, 0 if no additional
16495 instructions are need, and positive to give the extra cost for the
16496 memory. */
16498 static int
16499 rs6000_secondary_reload_memory (rtx addr,
16500 enum reg_class rclass,
16501 enum machine_mode mode)
16503 int extra_cost = 0;
16504 rtx reg, and_arg, plus_arg0, plus_arg1;
16505 addr_mask_type addr_mask;
16506 const char *type = NULL;
16507 const char *fail_msg = NULL;
16509 if (GPR_REG_CLASS_P (rclass))
16510 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
16512 else if (rclass == FLOAT_REGS)
16513 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
16515 else if (rclass == ALTIVEC_REGS)
16516 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
16518 /* For the combined VSX_REGS, turn off Altivec AND -16. */
16519 else if (rclass == VSX_REGS)
16520 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
16521 & ~RELOAD_REG_AND_M16);
16523 else
16525 if (TARGET_DEBUG_ADDR)
16526 fprintf (stderr,
16527 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
16528 "class is not GPR, FPR, VMX\n",
16529 GET_MODE_NAME (mode), reg_class_names[rclass]);
16531 return -1;
16534 /* If the register isn't valid in this register class, just return now. */
16535 if ((addr_mask & RELOAD_REG_VALID) == 0)
16537 if (TARGET_DEBUG_ADDR)
16538 fprintf (stderr,
16539 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
16540 "not valid in class\n",
16541 GET_MODE_NAME (mode), reg_class_names[rclass]);
16543 return -1;
16546 switch (GET_CODE (addr))
16548 /* Does the register class supports auto update forms for this mode? We
16549 don't need a scratch register, since the powerpc only supports
16550 PRE_INC, PRE_DEC, and PRE_MODIFY. */
16551 case PRE_INC:
16552 case PRE_DEC:
16553 reg = XEXP (addr, 0);
16554 if (!base_reg_operand (addr, GET_MODE (reg)))
16556 fail_msg = "no base register #1";
16557 extra_cost = -1;
16560 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
16562 extra_cost = 1;
16563 type = "update";
16565 break;
16567 case PRE_MODIFY:
16568 reg = XEXP (addr, 0);
16569 plus_arg1 = XEXP (addr, 1);
16570 if (!base_reg_operand (reg, GET_MODE (reg))
16571 || GET_CODE (plus_arg1) != PLUS
16572 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
16574 fail_msg = "bad PRE_MODIFY";
16575 extra_cost = -1;
16578 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
16580 extra_cost = 1;
16581 type = "update";
16583 break;
16585 /* Do we need to simulate AND -16 to clear the bottom address bits used
16586 in VMX load/stores? Only allow the AND for vector sizes. */
16587 case AND:
16588 and_arg = XEXP (addr, 0);
16589 if (GET_MODE_SIZE (mode) != 16
16590 || GET_CODE (XEXP (addr, 1)) != CONST_INT
16591 || INTVAL (XEXP (addr, 1)) != -16)
16593 fail_msg = "bad Altivec AND #1";
16594 extra_cost = -1;
16597 if (rclass != ALTIVEC_REGS)
16599 if (legitimate_indirect_address_p (and_arg, false))
16600 extra_cost = 1;
16602 else if (legitimate_indexed_address_p (and_arg, false))
16603 extra_cost = 2;
16605 else
16607 fail_msg = "bad Altivec AND #2";
16608 extra_cost = -1;
16611 type = "and";
16613 break;
16615 /* If this is an indirect address, make sure it is a base register. */
16616 case REG:
16617 case SUBREG:
16618 if (!legitimate_indirect_address_p (addr, false))
16620 extra_cost = 1;
16621 type = "move";
16623 break;
16625 /* If this is an indexed address, make sure the register class can handle
16626 indexed addresses for this mode. */
16627 case PLUS:
16628 plus_arg0 = XEXP (addr, 0);
16629 plus_arg1 = XEXP (addr, 1);
16631 /* (plus (plus (reg) (constant)) (constant)) is generated during
16632 push_reload processing, so handle it now. */
16633 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
16635 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
16637 extra_cost = 1;
16638 type = "offset";
16642 /* (plus (plus (reg) (constant)) (reg)) is also generated during
16643 push_reload processing, so handle it now. */
16644 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
16646 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
16648 extra_cost = 1;
16649 type = "indexed #2";
16653 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
16655 fail_msg = "no base register #2";
16656 extra_cost = -1;
16659 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
16661 if ((addr_mask & RELOAD_REG_INDEXED) == 0
16662 || !legitimate_indexed_address_p (addr, false))
16664 extra_cost = 1;
16665 type = "indexed";
16669 /* Make sure the register class can handle offset addresses. */
16670 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
16672 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
16674 extra_cost = 1;
16675 type = "offset";
16679 else
16681 fail_msg = "bad PLUS";
16682 extra_cost = -1;
16685 break;
16687 case LO_SUM:
16688 if (!legitimate_lo_sum_address_p (mode, addr, false))
16690 fail_msg = "bad LO_SUM";
16691 extra_cost = -1;
16694 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
16696 extra_cost = 1;
16697 type = "lo_sum";
16699 break;
16701 /* Static addresses need to create a TOC entry. */
16702 case CONST:
16703 case SYMBOL_REF:
16704 case LABEL_REF:
16705 type = "address";
16706 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
16707 break;
16709 /* TOC references look like offsetable memory. */
16710 case UNSPEC:
16711 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
16713 fail_msg = "bad UNSPEC";
16714 extra_cost = -1;
16717 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
16719 extra_cost = 1;
16720 type = "toc reference";
16722 break;
16724 default:
16726 fail_msg = "bad address";
16727 extra_cost = -1;
16731 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
16733 if (extra_cost < 0)
16734 fprintf (stderr,
16735 "rs6000_secondary_reload_memory error: mode = %s, "
16736 "class = %s, addr_mask = '%s', %s\n",
16737 GET_MODE_NAME (mode),
16738 reg_class_names[rclass],
16739 rs6000_debug_addr_mask (addr_mask, false),
16740 (fail_msg != NULL) ? fail_msg : "<bad address>");
16742 else
16743 fprintf (stderr,
16744 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
16745 "addr_mask = '%s', extra cost = %d, %s\n",
16746 GET_MODE_NAME (mode),
16747 reg_class_names[rclass],
16748 rs6000_debug_addr_mask (addr_mask, false),
16749 extra_cost,
16750 (type) ? type : "<none>");
16752 debug_rtx (addr);
16755 return extra_cost;
16758 /* Helper function for rs6000_secondary_reload to return true if a move to a
16759 different register classe is really a simple move. */
16761 static bool
16762 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
16763 enum rs6000_reg_type from_type,
16764 machine_mode mode)
16766 int size;
16768 /* Add support for various direct moves available. In this function, we only
16769 look at cases where we don't need any extra registers, and one or more
16770 simple move insns are issued. At present, 32-bit integers are not allowed
16771 in FPR/VSX registers. Single precision binary floating is not a simple
16772 move because we need to convert to the single precision memory layout.
16773 The 4-byte SDmode can be moved. */
16774 size = GET_MODE_SIZE (mode);
16775 if (TARGET_DIRECT_MOVE
16776 && ((mode == SDmode) || (TARGET_POWERPC64 && size == 8))
16777 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
16778 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
16779 return true;
16781 else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
16782 && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
16783 || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
16784 return true;
16786 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
16787 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
16788 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
16789 return true;
16791 return false;
16794 /* Power8 helper function for rs6000_secondary_reload, handle all of the
16795 special direct moves that involve allocating an extra register, return the
16796 insn code of the helper function if there is such a function or
16797 CODE_FOR_nothing if not. */
16799 static bool
16800 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
16801 enum rs6000_reg_type from_type,
16802 machine_mode mode,
16803 secondary_reload_info *sri,
16804 bool altivec_p)
16806 bool ret = false;
16807 enum insn_code icode = CODE_FOR_nothing;
16808 int cost = 0;
16809 int size = GET_MODE_SIZE (mode);
16811 if (TARGET_POWERPC64)
16813 if (size == 16)
16815 /* Handle moving 128-bit values from GPRs to VSX point registers on
16816 power8 when running in 64-bit mode using XXPERMDI to glue the two
16817 64-bit values back together. */
16818 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
16820 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
16821 icode = reg_addr[mode].reload_vsx_gpr;
16824 /* Handle moving 128-bit values from VSX point registers to GPRs on
16825 power8 when running in 64-bit mode using XXPERMDI to get access to the
16826 bottom 64-bit value. */
16827 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
16829 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
16830 icode = reg_addr[mode].reload_gpr_vsx;
16834 else if (mode == SFmode)
16836 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
16838 cost = 3; /* xscvdpspn, mfvsrd, and. */
16839 icode = reg_addr[mode].reload_gpr_vsx;
16842 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
16844 cost = 2; /* mtvsrz, xscvspdpn. */
16845 icode = reg_addr[mode].reload_vsx_gpr;
16850 if (TARGET_POWERPC64 && size == 16)
16852 /* Handle moving 128-bit values from GPRs to VSX point registers on
16853 power8 when running in 64-bit mode using XXPERMDI to glue the two
16854 64-bit values back together. */
16855 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
16857 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
16858 icode = reg_addr[mode].reload_vsx_gpr;
16861 /* Handle moving 128-bit values from VSX point registers to GPRs on
16862 power8 when running in 64-bit mode using XXPERMDI to get access to the
16863 bottom 64-bit value. */
16864 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
16866 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
16867 icode = reg_addr[mode].reload_gpr_vsx;
16871 else if (!TARGET_POWERPC64 && size == 8)
16873 /* Handle moving 64-bit values from GPRs to floating point registers on
16874 power8 when running in 32-bit mode using FMRGOW to glue the two 32-bit
16875 values back together. Altivec register classes must be handled
16876 specially since a different instruction is used, and the secondary
16877 reload support requires a single instruction class in the scratch
16878 register constraint. However, right now TFmode is not allowed in
16879 Altivec registers, so the pattern will never match. */
16880 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
16882 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
16883 icode = reg_addr[mode].reload_fpr_gpr;
16887 if (icode != CODE_FOR_nothing)
16889 ret = true;
16890 if (sri)
16892 sri->icode = icode;
16893 sri->extra_cost = cost;
16897 return ret;
16900 /* Return whether a move between two register classes can be done either
16901 directly (simple move) or via a pattern that uses a single extra temporary
16902 (using power8's direct move in this case. */
16904 static bool
16905 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
16906 enum rs6000_reg_type from_type,
16907 machine_mode mode,
16908 secondary_reload_info *sri,
16909 bool altivec_p)
16911 /* Fall back to load/store reloads if either type is not a register. */
16912 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
16913 return false;
16915 /* If we haven't allocated registers yet, assume the move can be done for the
16916 standard register types. */
16917 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
16918 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
16919 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
16920 return true;
16922 /* Moves to the same set of registers is a simple move for non-specialized
16923 registers. */
16924 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
16925 return true;
16927 /* Check whether a simple move can be done directly. */
16928 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
16930 if (sri)
16932 sri->icode = CODE_FOR_nothing;
16933 sri->extra_cost = 0;
16935 return true;
16938 /* Now check if we can do it in a few steps. */
16939 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
16940 altivec_p);
16943 /* Inform reload about cases where moving X with a mode MODE to a register in
16944 RCLASS requires an extra scratch or immediate register. Return the class
16945 needed for the immediate register.
16947 For VSX and Altivec, we may need a register to convert sp+offset into
16948 reg+sp.
16950 For misaligned 64-bit gpr loads and stores we need a register to
16951 convert an offset address to indirect. */
16953 static reg_class_t
16954 rs6000_secondary_reload (bool in_p,
16955 rtx x,
16956 reg_class_t rclass_i,
16957 machine_mode mode,
16958 secondary_reload_info *sri)
16960 enum reg_class rclass = (enum reg_class) rclass_i;
16961 reg_class_t ret = ALL_REGS;
16962 enum insn_code icode;
16963 bool default_p = false;
16964 bool done_p = false;
16966 /* Allow subreg of memory before/during reload. */
16967 bool memory_p = (MEM_P (x)
16968 || (!reload_completed && GET_CODE (x) == SUBREG
16969 && MEM_P (SUBREG_REG (x))));
16971 sri->icode = CODE_FOR_nothing;
16972 sri->extra_cost = 0;
16973 icode = ((in_p)
16974 ? reg_addr[mode].reload_load
16975 : reg_addr[mode].reload_store);
16977 if (REG_P (x) || register_operand (x, mode))
16979 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
16980 bool altivec_p = (rclass == ALTIVEC_REGS);
16981 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
16983 if (!in_p)
16985 enum rs6000_reg_type exchange = to_type;
16986 to_type = from_type;
16987 from_type = exchange;
16990 /* Can we do a direct move of some sort? */
16991 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
16992 altivec_p))
16994 icode = (enum insn_code)sri->icode;
16995 default_p = false;
16996 done_p = true;
16997 ret = NO_REGS;
17001 /* Make sure 0.0 is not reloaded or forced into memory. */
17002 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
17004 ret = NO_REGS;
17005 default_p = false;
17006 done_p = true;
17009 /* If this is a scalar floating point value and we want to load it into the
17010 traditional Altivec registers, do it via a move via a traditional floating
17011 point register. Also make sure that non-zero constants use a FPR. */
17012 if (!done_p && reg_addr[mode].scalar_in_vmx_p
17013 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
17014 && (memory_p || (GET_CODE (x) == CONST_DOUBLE)))
17016 ret = FLOAT_REGS;
17017 default_p = false;
17018 done_p = true;
17021 /* Handle reload of load/stores if we have reload helper functions. */
17022 if (!done_p && icode != CODE_FOR_nothing && memory_p)
17024 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
17025 mode);
17027 if (extra_cost >= 0)
17029 done_p = true;
17030 ret = NO_REGS;
17031 if (extra_cost > 0)
17033 sri->extra_cost = extra_cost;
17034 sri->icode = icode;
17039 /* Handle unaligned loads and stores of integer registers. */
17040 if (!done_p && TARGET_POWERPC64
17041 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
17042 && memory_p
17043 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
17045 rtx addr = XEXP (x, 0);
17046 rtx off = address_offset (addr);
17048 if (off != NULL_RTX)
17050 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
17051 unsigned HOST_WIDE_INT offset = INTVAL (off);
17053 /* We need a secondary reload when our legitimate_address_p
17054 says the address is good (as otherwise the entire address
17055 will be reloaded), and the offset is not a multiple of
17056 four or we have an address wrap. Address wrap will only
17057 occur for LO_SUMs since legitimate_offset_address_p
17058 rejects addresses for 16-byte mems that will wrap. */
17059 if (GET_CODE (addr) == LO_SUM
17060 ? (1 /* legitimate_address_p allows any offset for lo_sum */
17061 && ((offset & 3) != 0
17062 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
17063 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
17064 && (offset & 3) != 0))
17066 if (in_p)
17067 sri->icode = CODE_FOR_reload_di_load;
17068 else
17069 sri->icode = CODE_FOR_reload_di_store;
17070 sri->extra_cost = 2;
17071 ret = NO_REGS;
17072 done_p = true;
17074 else
17075 default_p = true;
17077 else
17078 default_p = true;
17081 if (!done_p && !TARGET_POWERPC64
17082 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
17083 && memory_p
17084 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
17086 rtx addr = XEXP (x, 0);
17087 rtx off = address_offset (addr);
17089 if (off != NULL_RTX)
17091 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
17092 unsigned HOST_WIDE_INT offset = INTVAL (off);
17094 /* We need a secondary reload when our legitimate_address_p
17095 says the address is good (as otherwise the entire address
17096 will be reloaded), and we have a wrap.
17098 legitimate_lo_sum_address_p allows LO_SUM addresses to
17099 have any offset so test for wrap in the low 16 bits.
17101 legitimate_offset_address_p checks for the range
17102 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
17103 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
17104 [0x7ff4,0x7fff] respectively, so test for the
17105 intersection of these ranges, [0x7ffc,0x7fff] and
17106 [0x7ff4,0x7ff7] respectively.
17108 Note that the address we see here may have been
17109 manipulated by legitimize_reload_address. */
17110 if (GET_CODE (addr) == LO_SUM
17111 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
17112 : offset - (0x8000 - extra) < UNITS_PER_WORD)
17114 if (in_p)
17115 sri->icode = CODE_FOR_reload_si_load;
17116 else
17117 sri->icode = CODE_FOR_reload_si_store;
17118 sri->extra_cost = 2;
17119 ret = NO_REGS;
17120 done_p = true;
17122 else
17123 default_p = true;
17125 else
17126 default_p = true;
17129 if (!done_p)
17130 default_p = true;
17132 if (default_p)
17133 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
17135 gcc_assert (ret != ALL_REGS);
17137 if (TARGET_DEBUG_ADDR)
17139 fprintf (stderr,
17140 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
17141 "mode = %s",
17142 reg_class_names[ret],
17143 in_p ? "true" : "false",
17144 reg_class_names[rclass],
17145 GET_MODE_NAME (mode));
17147 if (reload_completed)
17148 fputs (", after reload", stderr);
17150 if (!done_p)
17151 fputs (", done_p not set", stderr);
17153 if (default_p)
17154 fputs (", default secondary reload", stderr);
17156 if (sri->icode != CODE_FOR_nothing)
17157 fprintf (stderr, ", reload func = %s, extra cost = %d",
17158 insn_data[sri->icode].name, sri->extra_cost);
17160 fputs ("\n", stderr);
17161 debug_rtx (x);
17164 return ret;
17167 /* Better tracing for rs6000_secondary_reload_inner. */
17169 static void
17170 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
17171 bool store_p)
17173 rtx set, clobber;
17175 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
17177 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
17178 store_p ? "store" : "load");
17180 if (store_p)
17181 set = gen_rtx_SET (VOIDmode, mem, reg);
17182 else
17183 set = gen_rtx_SET (VOIDmode, reg, mem);
17185 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
17186 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
17189 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
17190 ATTRIBUTE_NORETURN;
17192 static void
17193 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
17194 bool store_p)
17196 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
17197 gcc_unreachable ();
17200 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
17201 reload helper functions. These were identified in
17202 rs6000_secondary_reload_memory, and if reload decided to use the secondary
17203 reload, it calls the insns:
17204 reload_<RELOAD:mode>_<P:mptrsize>_store
17205 reload_<RELOAD:mode>_<P:mptrsize>_load
17207 which in turn calls this function, to do whatever is necessary to create
17208 valid addresses. */
17210 void
17211 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
17213 int regno = true_regnum (reg);
17214 machine_mode mode = GET_MODE (reg);
17215 addr_mask_type addr_mask;
17216 rtx addr;
17217 rtx new_addr;
17218 rtx op_reg, op0, op1;
17219 rtx and_op;
17220 rtx cc_clobber;
17221 rtvec rv;
17223 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER || !MEM_P (mem)
17224 || !base_reg_operand (scratch, GET_MODE (scratch)))
17225 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17227 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
17228 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
17230 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
17231 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
17233 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
17234 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
17236 else
17237 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17239 /* Make sure the mode is valid in this register class. */
17240 if ((addr_mask & RELOAD_REG_VALID) == 0)
17241 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17243 if (TARGET_DEBUG_ADDR)
17244 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
17246 new_addr = addr = XEXP (mem, 0);
17247 switch (GET_CODE (addr))
17249 /* Does the register class support auto update forms for this mode? If
17250 not, do the update now. We don't need a scratch register, since the
17251 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
17252 case PRE_INC:
17253 case PRE_DEC:
17254 op_reg = XEXP (addr, 0);
17255 if (!base_reg_operand (op_reg, Pmode))
17256 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17258 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
17260 emit_insn (gen_add2_insn (op_reg, GEN_INT (GET_MODE_SIZE (mode))));
17261 new_addr = op_reg;
17263 break;
17265 case PRE_MODIFY:
17266 op0 = XEXP (addr, 0);
17267 op1 = XEXP (addr, 1);
17268 if (!base_reg_operand (op0, Pmode)
17269 || GET_CODE (op1) != PLUS
17270 || !rtx_equal_p (op0, XEXP (op1, 0)))
17271 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17273 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
17275 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17276 new_addr = reg;
17278 break;
17280 /* Do we need to simulate AND -16 to clear the bottom address bits used
17281 in VMX load/stores? */
17282 case AND:
17283 op0 = XEXP (addr, 0);
17284 op1 = XEXP (addr, 1);
17285 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
17287 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
17288 op_reg = op0;
17290 else if (GET_CODE (op1) == PLUS)
17292 emit_insn (gen_rtx_SET (VOIDmode, scratch, op1));
17293 op_reg = scratch;
17296 else
17297 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17299 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
17300 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
17301 rv = gen_rtvec (2, gen_rtx_SET (VOIDmode, scratch, and_op), cc_clobber);
17302 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
17303 new_addr = scratch;
17305 break;
17307 /* If this is an indirect address, make sure it is a base register. */
17308 case REG:
17309 case SUBREG:
17310 if (!base_reg_operand (addr, GET_MODE (addr)))
17312 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17313 new_addr = scratch;
17315 break;
17317 /* If this is an indexed address, make sure the register class can handle
17318 indexed addresses for this mode. */
17319 case PLUS:
17320 op0 = XEXP (addr, 0);
17321 op1 = XEXP (addr, 1);
17322 if (!base_reg_operand (op0, Pmode))
17323 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17325 else if (int_reg_operand (op1, Pmode))
17327 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
17329 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17330 new_addr = scratch;
17334 /* Make sure the register class can handle offset addresses. */
17335 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
17337 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
17339 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17340 new_addr = scratch;
17344 else
17345 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17347 break;
17349 case LO_SUM:
17350 op0 = XEXP (addr, 0);
17351 op1 = XEXP (addr, 1);
17352 if (!base_reg_operand (op0, Pmode))
17353 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17355 else if (int_reg_operand (op1, Pmode))
17357 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
17359 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17360 new_addr = scratch;
17364 /* Make sure the register class can handle offset addresses. */
17365 else if (legitimate_lo_sum_address_p (mode, addr, false))
17367 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
17369 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17370 new_addr = scratch;
17374 else
17375 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17377 break;
17379 case SYMBOL_REF:
17380 case CONST:
17381 case LABEL_REF:
17382 rs6000_emit_move (scratch, addr, Pmode);
17383 new_addr = scratch;
17384 break;
17386 default:
17387 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17390 /* Adjust the address if it changed. */
17391 if (addr != new_addr)
17393 mem = replace_equiv_address_nv (mem, new_addr);
17394 if (TARGET_DEBUG_ADDR)
17395 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
17398 /* Now create the move. */
17399 if (store_p)
17400 emit_insn (gen_rtx_SET (VOIDmode, mem, reg));
17401 else
17402 emit_insn (gen_rtx_SET (VOIDmode, reg, mem));
17404 return;
17407 /* Convert reloads involving 64-bit gprs and misaligned offset
17408 addressing, or multiple 32-bit gprs and offsets that are too large,
17409 to use indirect addressing. */
17411 void
17412 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
17414 int regno = true_regnum (reg);
17415 enum reg_class rclass;
17416 rtx addr;
17417 rtx scratch_or_premodify = scratch;
17419 if (TARGET_DEBUG_ADDR)
17421 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
17422 store_p ? "store" : "load");
17423 fprintf (stderr, "reg:\n");
17424 debug_rtx (reg);
17425 fprintf (stderr, "mem:\n");
17426 debug_rtx (mem);
17427 fprintf (stderr, "scratch:\n");
17428 debug_rtx (scratch);
17431 gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
17432 gcc_assert (GET_CODE (mem) == MEM);
17433 rclass = REGNO_REG_CLASS (regno);
17434 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
17435 addr = XEXP (mem, 0);
17437 if (GET_CODE (addr) == PRE_MODIFY)
17439 scratch_or_premodify = XEXP (addr, 0);
17440 gcc_assert (REG_P (scratch_or_premodify));
17441 addr = XEXP (addr, 1);
17443 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
17445 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
17447 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
17449 /* Now create the move. */
17450 if (store_p)
17451 emit_insn (gen_rtx_SET (VOIDmode, mem, reg));
17452 else
17453 emit_insn (gen_rtx_SET (VOIDmode, reg, mem));
17455 return;
17458 /* Allocate a 64-bit stack slot to be used for copying SDmode values through if
17459 this function has any SDmode references. If we are on a power7 or later, we
17460 don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
17461 can load/store the value. */
17463 static void
17464 rs6000_alloc_sdmode_stack_slot (void)
17466 tree t;
17467 basic_block bb;
17468 gimple_stmt_iterator gsi;
17470 gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
17471 /* We use a different approach for dealing with the secondary
17472 memory in LRA. */
17473 if (ira_use_lra_p)
17474 return;
17476 if (TARGET_NO_SDMODE_STACK)
17477 return;
17479 FOR_EACH_BB_FN (bb, cfun)
17480 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
17482 tree ret = walk_gimple_op (gsi_stmt (gsi), rs6000_check_sdmode, NULL);
17483 if (ret)
17485 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
17486 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
17487 SDmode, 0);
17488 return;
17492 /* Check for any SDmode parameters of the function. */
17493 for (t = DECL_ARGUMENTS (cfun->decl); t; t = DECL_CHAIN (t))
17495 if (TREE_TYPE (t) == error_mark_node)
17496 continue;
17498 if (TYPE_MODE (TREE_TYPE (t)) == SDmode
17499 || TYPE_MODE (DECL_ARG_TYPE (t)) == SDmode)
17501 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
17502 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
17503 SDmode, 0);
17504 return;
17509 static void
17510 rs6000_instantiate_decls (void)
17512 if (cfun->machine->sdmode_stack_slot != NULL_RTX)
17513 instantiate_decl_rtl (cfun->machine->sdmode_stack_slot);
17516 /* Given an rtx X being reloaded into a reg required to be
17517 in class CLASS, return the class of reg to actually use.
17518 In general this is just CLASS; but on some machines
17519 in some cases it is preferable to use a more restrictive class.
17521 On the RS/6000, we have to return NO_REGS when we want to reload a
17522 floating-point CONST_DOUBLE to force it to be copied to memory.
17524 We also don't want to reload integer values into floating-point
17525 registers if we can at all help it. In fact, this can
17526 cause reload to die, if it tries to generate a reload of CTR
17527 into a FP register and discovers it doesn't have the memory location
17528 required.
17530 ??? Would it be a good idea to have reload do the converse, that is
17531 try to reload floating modes into FP registers if possible?
17534 static enum reg_class
17535 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
17537 machine_mode mode = GET_MODE (x);
17538 bool is_constant = CONSTANT_P (x);
17540 /* Do VSX tests before handling traditional floaitng point registers. */
17541 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
17543 if (is_constant)
17545 /* Zero is always allowed in all VSX registers. */
17546 if (x == CONST0_RTX (mode))
17547 return rclass;
17549 /* If this is a vector constant that can be formed with a few Altivec
17550 instructions, we want altivec registers. */
17551 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
17552 return ALTIVEC_REGS;
17554 /* Force constant to memory. */
17555 return NO_REGS;
17558 /* If this is a scalar floating point value, prefer the traditional
17559 floating point registers so that we can use D-form (register+offset)
17560 addressing. */
17561 if (GET_MODE_SIZE (mode) < 16)
17562 return FLOAT_REGS;
17564 /* Prefer the Altivec registers if Altivec is handling the vector
17565 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
17566 loads. */
17567 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
17568 || mode == V1TImode)
17569 return ALTIVEC_REGS;
17571 return rclass;
17574 if (is_constant || GET_CODE (x) == PLUS)
17576 if (reg_class_subset_p (GENERAL_REGS, rclass))
17577 return GENERAL_REGS;
17578 if (reg_class_subset_p (BASE_REGS, rclass))
17579 return BASE_REGS;
17580 return NO_REGS;
17583 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
17584 return GENERAL_REGS;
17586 return rclass;
17589 /* Debug version of rs6000_preferred_reload_class. */
17590 static enum reg_class
17591 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
17593 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
17595 fprintf (stderr,
17596 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
17597 "mode = %s, x:\n",
17598 reg_class_names[ret], reg_class_names[rclass],
17599 GET_MODE_NAME (GET_MODE (x)));
17600 debug_rtx (x);
17602 return ret;
17605 /* If we are copying between FP or AltiVec registers and anything else, we need
17606 a memory location. The exception is when we are targeting ppc64 and the
17607 move to/from fpr to gpr instructions are available. Also, under VSX, you
17608 can copy vector registers from the FP register set to the Altivec register
17609 set and vice versa. */
17611 static bool
17612 rs6000_secondary_memory_needed (enum reg_class from_class,
17613 enum reg_class to_class,
17614 machine_mode mode)
17616 enum rs6000_reg_type from_type, to_type;
17617 bool altivec_p = ((from_class == ALTIVEC_REGS)
17618 || (to_class == ALTIVEC_REGS));
17620 /* If a simple/direct move is available, we don't need secondary memory */
17621 from_type = reg_class_to_reg_type[(int)from_class];
17622 to_type = reg_class_to_reg_type[(int)to_class];
17624 if (rs6000_secondary_reload_move (to_type, from_type, mode,
17625 (secondary_reload_info *)0, altivec_p))
17626 return false;
17628 /* If we have a floating point or vector register class, we need to use
17629 memory to transfer the data. */
17630 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
17631 return true;
17633 return false;
17636 /* Debug version of rs6000_secondary_memory_needed. */
17637 static bool
17638 rs6000_debug_secondary_memory_needed (enum reg_class from_class,
17639 enum reg_class to_class,
17640 machine_mode mode)
17642 bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
17644 fprintf (stderr,
17645 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
17646 "to_class = %s, mode = %s\n",
17647 ret ? "true" : "false",
17648 reg_class_names[from_class],
17649 reg_class_names[to_class],
17650 GET_MODE_NAME (mode));
17652 return ret;
17655 /* Return the register class of a scratch register needed to copy IN into
17656 or out of a register in RCLASS in MODE. If it can be done directly,
17657 NO_REGS is returned. */
17659 static enum reg_class
17660 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
17661 rtx in)
17663 int regno;
17665 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
17666 #if TARGET_MACHO
17667 && MACHOPIC_INDIRECT
17668 #endif
17671 /* We cannot copy a symbolic operand directly into anything
17672 other than BASE_REGS for TARGET_ELF. So indicate that a
17673 register from BASE_REGS is needed as an intermediate
17674 register.
17676 On Darwin, pic addresses require a load from memory, which
17677 needs a base register. */
17678 if (rclass != BASE_REGS
17679 && (GET_CODE (in) == SYMBOL_REF
17680 || GET_CODE (in) == HIGH
17681 || GET_CODE (in) == LABEL_REF
17682 || GET_CODE (in) == CONST))
17683 return BASE_REGS;
17686 if (GET_CODE (in) == REG)
17688 regno = REGNO (in);
17689 if (regno >= FIRST_PSEUDO_REGISTER)
17691 regno = true_regnum (in);
17692 if (regno >= FIRST_PSEUDO_REGISTER)
17693 regno = -1;
17696 else if (GET_CODE (in) == SUBREG)
17698 regno = true_regnum (in);
17699 if (regno >= FIRST_PSEUDO_REGISTER)
17700 regno = -1;
17702 else
17703 regno = -1;
17705 /* If we have VSX register moves, prefer moving scalar values between
17706 Altivec registers and GPR by going via an FPR (and then via memory)
17707 instead of reloading the secondary memory address for Altivec moves. */
17708 if (TARGET_VSX
17709 && GET_MODE_SIZE (mode) < 16
17710 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
17711 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
17712 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
17713 && (regno >= 0 && INT_REGNO_P (regno)))))
17714 return FLOAT_REGS;
17716 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
17717 into anything. */
17718 if (rclass == GENERAL_REGS || rclass == BASE_REGS
17719 || (regno >= 0 && INT_REGNO_P (regno)))
17720 return NO_REGS;
17722 /* Constants, memory, and VSX registers can go into VSX registers (both the
17723 traditional floating point and the altivec registers). */
17724 if (rclass == VSX_REGS
17725 && (regno == -1 || VSX_REGNO_P (regno)))
17726 return NO_REGS;
17728 /* Constants, memory, and FP registers can go into FP registers. */
17729 if ((regno == -1 || FP_REGNO_P (regno))
17730 && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
17731 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
17733 /* Memory, and AltiVec registers can go into AltiVec registers. */
17734 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
17735 && rclass == ALTIVEC_REGS)
17736 return NO_REGS;
17738 /* We can copy among the CR registers. */
17739 if ((rclass == CR_REGS || rclass == CR0_REGS)
17740 && regno >= 0 && CR_REGNO_P (regno))
17741 return NO_REGS;
17743 /* Otherwise, we need GENERAL_REGS. */
17744 return GENERAL_REGS;
17747 /* Debug version of rs6000_secondary_reload_class. */
17748 static enum reg_class
17749 rs6000_debug_secondary_reload_class (enum reg_class rclass,
17750 machine_mode mode, rtx in)
17752 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
17753 fprintf (stderr,
17754 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
17755 "mode = %s, input rtx:\n",
17756 reg_class_names[ret], reg_class_names[rclass],
17757 GET_MODE_NAME (mode));
17758 debug_rtx (in);
17760 return ret;
17763 /* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
17765 static bool
17766 rs6000_cannot_change_mode_class (machine_mode from,
17767 machine_mode to,
17768 enum reg_class rclass)
17770 unsigned from_size = GET_MODE_SIZE (from);
17771 unsigned to_size = GET_MODE_SIZE (to);
17773 if (from_size != to_size)
17775 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
17777 if (reg_classes_intersect_p (xclass, rclass))
17779 unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to];
17780 unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from];
17782 /* Don't allow 64-bit types to overlap with 128-bit types that take a
17783 single register under VSX because the scalar part of the register
17784 is in the upper 64-bits, and not the lower 64-bits. Types like
17785 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
17786 IEEE floating point can't overlap, and neither can small
17787 values. */
17789 if (TARGET_IEEEQUAD && (to == TFmode || from == TFmode))
17790 return true;
17792 /* TDmode in floating-mode registers must always go into a register
17793 pair with the most significant word in the even-numbered register
17794 to match ISA requirements. In little-endian mode, this does not
17795 match subreg numbering, so we cannot allow subregs. */
17796 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
17797 return true;
17799 if (from_size < 8 || to_size < 8)
17800 return true;
17802 if (from_size == 8 && (8 * to_nregs) != to_size)
17803 return true;
17805 if (to_size == 8 && (8 * from_nregs) != from_size)
17806 return true;
17808 return false;
17810 else
17811 return false;
17814 if (TARGET_E500_DOUBLE
17815 && ((((to) == DFmode) + ((from) == DFmode)) == 1
17816 || (((to) == TFmode) + ((from) == TFmode)) == 1
17817 || (((to) == DDmode) + ((from) == DDmode)) == 1
17818 || (((to) == TDmode) + ((from) == TDmode)) == 1
17819 || (((to) == DImode) + ((from) == DImode)) == 1))
17820 return true;
17822 /* Since the VSX register set includes traditional floating point registers
17823 and altivec registers, just check for the size being different instead of
17824 trying to check whether the modes are vector modes. Otherwise it won't
17825 allow say DF and DI to change classes. For types like TFmode and TDmode
17826 that take 2 64-bit registers, rather than a single 128-bit register, don't
17827 allow subregs of those types to other 128 bit types. */
17828 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
17830 unsigned num_regs = (from_size + 15) / 16;
17831 if (hard_regno_nregs[FIRST_FPR_REGNO][to] > num_regs
17832 || hard_regno_nregs[FIRST_FPR_REGNO][from] > num_regs)
17833 return true;
17835 return (from_size != 8 && from_size != 16);
17838 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
17839 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
17840 return true;
17842 if (TARGET_SPE && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1
17843 && reg_classes_intersect_p (GENERAL_REGS, rclass))
17844 return true;
17846 return false;
17849 /* Debug version of rs6000_cannot_change_mode_class. */
17850 static bool
17851 rs6000_debug_cannot_change_mode_class (machine_mode from,
17852 machine_mode to,
17853 enum reg_class rclass)
17855 bool ret = rs6000_cannot_change_mode_class (from, to, rclass);
17857 fprintf (stderr,
17858 "rs6000_cannot_change_mode_class, return %s, from = %s, "
17859 "to = %s, rclass = %s\n",
17860 ret ? "true" : "false",
17861 GET_MODE_NAME (from), GET_MODE_NAME (to),
17862 reg_class_names[rclass]);
17864 return ret;
17867 /* Return a string to do a move operation of 128 bits of data. */
17869 const char *
17870 rs6000_output_move_128bit (rtx operands[])
17872 rtx dest = operands[0];
17873 rtx src = operands[1];
17874 machine_mode mode = GET_MODE (dest);
17875 int dest_regno;
17876 int src_regno;
17877 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
17878 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
17880 if (REG_P (dest))
17882 dest_regno = REGNO (dest);
17883 dest_gpr_p = INT_REGNO_P (dest_regno);
17884 dest_fp_p = FP_REGNO_P (dest_regno);
17885 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
17886 dest_vsx_p = dest_fp_p | dest_vmx_p;
17888 else
17890 dest_regno = -1;
17891 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
17894 if (REG_P (src))
17896 src_regno = REGNO (src);
17897 src_gpr_p = INT_REGNO_P (src_regno);
17898 src_fp_p = FP_REGNO_P (src_regno);
17899 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
17900 src_vsx_p = src_fp_p | src_vmx_p;
17902 else
17904 src_regno = -1;
17905 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
17908 /* Register moves. */
17909 if (dest_regno >= 0 && src_regno >= 0)
17911 if (dest_gpr_p)
17913 if (src_gpr_p)
17914 return "#";
17916 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
17917 return "#";
17920 else if (TARGET_VSX && dest_vsx_p)
17922 if (src_vsx_p)
17923 return "xxlor %x0,%x1,%x1";
17925 else if (TARGET_DIRECT_MOVE && src_gpr_p)
17926 return "#";
17929 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
17930 return "vor %0,%1,%1";
17932 else if (dest_fp_p && src_fp_p)
17933 return "#";
17936 /* Loads. */
17937 else if (dest_regno >= 0 && MEM_P (src))
17939 if (dest_gpr_p)
17941 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
17942 return "lq %0,%1";
17943 else
17944 return "#";
17947 else if (TARGET_ALTIVEC && dest_vmx_p
17948 && altivec_indexed_or_indirect_operand (src, mode))
17949 return "lvx %0,%y1";
17951 else if (TARGET_VSX && dest_vsx_p)
17953 if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
17954 return "lxvw4x %x0,%y1";
17955 else
17956 return "lxvd2x %x0,%y1";
17959 else if (TARGET_ALTIVEC && dest_vmx_p)
17960 return "lvx %0,%y1";
17962 else if (dest_fp_p)
17963 return "#";
17966 /* Stores. */
17967 else if (src_regno >= 0 && MEM_P (dest))
17969 if (src_gpr_p)
17971 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
17972 return "stq %1,%0";
17973 else
17974 return "#";
17977 else if (TARGET_ALTIVEC && src_vmx_p
17978 && altivec_indexed_or_indirect_operand (src, mode))
17979 return "stvx %1,%y0";
17981 else if (TARGET_VSX && src_vsx_p)
17983 if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
17984 return "stxvw4x %x1,%y0";
17985 else
17986 return "stxvd2x %x1,%y0";
17989 else if (TARGET_ALTIVEC && src_vmx_p)
17990 return "stvx %1,%y0";
17992 else if (src_fp_p)
17993 return "#";
17996 /* Constants. */
17997 else if (dest_regno >= 0
17998 && (GET_CODE (src) == CONST_INT
17999 || GET_CODE (src) == CONST_WIDE_INT
18000 || GET_CODE (src) == CONST_DOUBLE
18001 || GET_CODE (src) == CONST_VECTOR))
18003 if (dest_gpr_p)
18004 return "#";
18006 else if (TARGET_VSX && dest_vsx_p && zero_constant (src, mode))
18007 return "xxlxor %x0,%x0,%x0";
18009 else if (TARGET_ALTIVEC && dest_vmx_p)
18010 return output_vec_const_move (operands);
18013 if (TARGET_DEBUG_ADDR)
18015 fprintf (stderr, "\n===== Bad 128 bit move:\n");
18016 debug_rtx (gen_rtx_SET (VOIDmode, dest, src));
18019 gcc_unreachable ();
18022 /* Validate a 128-bit move. */
18023 bool
18024 rs6000_move_128bit_ok_p (rtx operands[])
18026 machine_mode mode = GET_MODE (operands[0]);
18027 return (gpc_reg_operand (operands[0], mode)
18028 || gpc_reg_operand (operands[1], mode));
18031 /* Return true if a 128-bit move needs to be split. */
18032 bool
18033 rs6000_split_128bit_ok_p (rtx operands[])
18035 if (!reload_completed)
18036 return false;
18038 if (!gpr_or_gpr_p (operands[0], operands[1]))
18039 return false;
18041 if (quad_load_store_p (operands[0], operands[1]))
18042 return false;
18044 return true;
18048 /* Given a comparison operation, return the bit number in CCR to test. We
18049 know this is a valid comparison.
18051 SCC_P is 1 if this is for an scc. That means that %D will have been
18052 used instead of %C, so the bits will be in different places.
18054 Return -1 if OP isn't a valid comparison for some reason. */
18057 ccr_bit (rtx op, int scc_p)
18059 enum rtx_code code = GET_CODE (op);
18060 machine_mode cc_mode;
18061 int cc_regnum;
18062 int base_bit;
18063 rtx reg;
18065 if (!COMPARISON_P (op))
18066 return -1;
18068 reg = XEXP (op, 0);
18070 gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg)));
18072 cc_mode = GET_MODE (reg);
18073 cc_regnum = REGNO (reg);
18074 base_bit = 4 * (cc_regnum - CR0_REGNO);
18076 validate_condition_mode (code, cc_mode);
18078 /* When generating a sCOND operation, only positive conditions are
18079 allowed. */
18080 gcc_assert (!scc_p
18081 || code == EQ || code == GT || code == LT || code == UNORDERED
18082 || code == GTU || code == LTU);
18084 switch (code)
18086 case NE:
18087 return scc_p ? base_bit + 3 : base_bit + 2;
18088 case EQ:
18089 return base_bit + 2;
18090 case GT: case GTU: case UNLE:
18091 return base_bit + 1;
18092 case LT: case LTU: case UNGE:
18093 return base_bit;
18094 case ORDERED: case UNORDERED:
18095 return base_bit + 3;
18097 case GE: case GEU:
18098 /* If scc, we will have done a cror to put the bit in the
18099 unordered position. So test that bit. For integer, this is ! LT
18100 unless this is an scc insn. */
18101 return scc_p ? base_bit + 3 : base_bit;
18103 case LE: case LEU:
18104 return scc_p ? base_bit + 3 : base_bit + 1;
18106 default:
18107 gcc_unreachable ();
18111 /* Return the GOT register. */
18114 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
18116 /* The second flow pass currently (June 1999) can't update
18117 regs_ever_live without disturbing other parts of the compiler, so
18118 update it here to make the prolog/epilogue code happy. */
18119 if (!can_create_pseudo_p ()
18120 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
18121 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
18123 crtl->uses_pic_offset_table = 1;
18125 return pic_offset_table_rtx;
18128 static rs6000_stack_t stack_info;
18130 /* Function to init struct machine_function.
18131 This will be called, via a pointer variable,
18132 from push_function_context. */
18134 static struct machine_function *
18135 rs6000_init_machine_status (void)
18137 stack_info.reload_completed = 0;
18138 return ggc_cleared_alloc<machine_function> ();
18141 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
18144 extract_MB (rtx op)
18146 int i;
18147 unsigned long val = INTVAL (op);
18149 /* If the high bit is zero, the value is the first 1 bit we find
18150 from the left. */
18151 if ((val & 0x80000000) == 0)
18153 gcc_assert (val & 0xffffffff);
18155 i = 1;
18156 while (((val <<= 1) & 0x80000000) == 0)
18157 ++i;
18158 return i;
18161 /* If the high bit is set and the low bit is not, or the mask is all
18162 1's, the value is zero. */
18163 if ((val & 1) == 0 || (val & 0xffffffff) == 0xffffffff)
18164 return 0;
18166 /* Otherwise we have a wrap-around mask. Look for the first 0 bit
18167 from the right. */
18168 i = 31;
18169 while (((val >>= 1) & 1) != 0)
18170 --i;
18172 return i;
18176 extract_ME (rtx op)
18178 int i;
18179 unsigned long val = INTVAL (op);
18181 /* If the low bit is zero, the value is the first 1 bit we find from
18182 the right. */
18183 if ((val & 1) == 0)
18185 gcc_assert (val & 0xffffffff);
18187 i = 30;
18188 while (((val >>= 1) & 1) == 0)
18189 --i;
18191 return i;
18194 /* If the low bit is set and the high bit is not, or the mask is all
18195 1's, the value is 31. */
18196 if ((val & 0x80000000) == 0 || (val & 0xffffffff) == 0xffffffff)
18197 return 31;
18199 /* Otherwise we have a wrap-around mask. Look for the first 0 bit
18200 from the left. */
18201 i = 0;
18202 while (((val <<= 1) & 0x80000000) != 0)
18203 ++i;
18205 return i;
18208 /* Write out a function code label. */
18210 void
18211 rs6000_output_function_entry (FILE *file, const char *fname)
18213 if (fname[0] != '.')
18215 switch (DEFAULT_ABI)
18217 default:
18218 gcc_unreachable ();
18220 case ABI_AIX:
18221 if (DOT_SYMBOLS)
18222 putc ('.', file);
18223 else
18224 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
18225 break;
18227 case ABI_ELFv2:
18228 case ABI_V4:
18229 case ABI_DARWIN:
18230 break;
18234 RS6000_OUTPUT_BASENAME (file, fname);
18237 /* Print an operand. Recognize special options, documented below. */
18239 #if TARGET_ELF
18240 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
18241 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
18242 #else
18243 #define SMALL_DATA_RELOC "sda21"
18244 #define SMALL_DATA_REG 0
18245 #endif
18247 void
18248 print_operand (FILE *file, rtx x, int code)
18250 int i;
18251 unsigned HOST_WIDE_INT uval;
18253 switch (code)
18255 /* %a is output_address. */
18257 case 'b':
18258 /* If constant, low-order 16 bits of constant, unsigned.
18259 Otherwise, write normally. */
18260 if (INT_P (x))
18261 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0xffff);
18262 else
18263 print_operand (file, x, 0);
18264 return;
18266 case 'B':
18267 /* If the low-order bit is zero, write 'r'; otherwise, write 'l'
18268 for 64-bit mask direction. */
18269 putc (((INTVAL (x) & 1) == 0 ? 'r' : 'l'), file);
18270 return;
18272 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
18273 output_operand. */
18275 case 'D':
18276 /* Like 'J' but get to the GT bit only. */
18277 gcc_assert (REG_P (x));
18279 /* Bit 1 is GT bit. */
18280 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
18282 /* Add one for shift count in rlinm for scc. */
18283 fprintf (file, "%d", i + 1);
18284 return;
18286 case 'e':
18287 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
18288 if (! INT_P (x))
18290 output_operand_lossage ("invalid %%e value");
18291 return;
18294 uval = INTVAL (x);
18295 if ((uval & 0xffff) == 0 && uval != 0)
18296 putc ('s', file);
18297 return;
18299 case 'E':
18300 /* X is a CR register. Print the number of the EQ bit of the CR */
18301 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
18302 output_operand_lossage ("invalid %%E value");
18303 else
18304 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
18305 return;
18307 case 'f':
18308 /* X is a CR register. Print the shift count needed to move it
18309 to the high-order four bits. */
18310 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
18311 output_operand_lossage ("invalid %%f value");
18312 else
18313 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
18314 return;
18316 case 'F':
18317 /* Similar, but print the count for the rotate in the opposite
18318 direction. */
18319 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
18320 output_operand_lossage ("invalid %%F value");
18321 else
18322 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
18323 return;
18325 case 'G':
18326 /* X is a constant integer. If it is negative, print "m",
18327 otherwise print "z". This is to make an aze or ame insn. */
18328 if (GET_CODE (x) != CONST_INT)
18329 output_operand_lossage ("invalid %%G value");
18330 else if (INTVAL (x) >= 0)
18331 putc ('z', file);
18332 else
18333 putc ('m', file);
18334 return;
18336 case 'h':
18337 /* If constant, output low-order five bits. Otherwise, write
18338 normally. */
18339 if (INT_P (x))
18340 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
18341 else
18342 print_operand (file, x, 0);
18343 return;
18345 case 'H':
18346 /* If constant, output low-order six bits. Otherwise, write
18347 normally. */
18348 if (INT_P (x))
18349 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
18350 else
18351 print_operand (file, x, 0);
18352 return;
18354 case 'I':
18355 /* Print `i' if this is a constant, else nothing. */
18356 if (INT_P (x))
18357 putc ('i', file);
18358 return;
18360 case 'j':
18361 /* Write the bit number in CCR for jump. */
18362 i = ccr_bit (x, 0);
18363 if (i == -1)
18364 output_operand_lossage ("invalid %%j code");
18365 else
18366 fprintf (file, "%d", i);
18367 return;
18369 case 'J':
18370 /* Similar, but add one for shift count in rlinm for scc and pass
18371 scc flag to `ccr_bit'. */
18372 i = ccr_bit (x, 1);
18373 if (i == -1)
18374 output_operand_lossage ("invalid %%J code");
18375 else
18376 /* If we want bit 31, write a shift count of zero, not 32. */
18377 fprintf (file, "%d", i == 31 ? 0 : i + 1);
18378 return;
18380 case 'k':
18381 /* X must be a constant. Write the 1's complement of the
18382 constant. */
18383 if (! INT_P (x))
18384 output_operand_lossage ("invalid %%k value");
18385 else
18386 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
18387 return;
18389 case 'K':
18390 /* X must be a symbolic constant on ELF. Write an
18391 expression suitable for an 'addi' that adds in the low 16
18392 bits of the MEM. */
18393 if (GET_CODE (x) == CONST)
18395 if (GET_CODE (XEXP (x, 0)) != PLUS
18396 || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
18397 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
18398 || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
18399 output_operand_lossage ("invalid %%K value");
18401 print_operand_address (file, x);
18402 fputs ("@l", file);
18403 return;
18405 /* %l is output_asm_label. */
18407 case 'L':
18408 /* Write second word of DImode or DFmode reference. Works on register
18409 or non-indexed memory only. */
18410 if (REG_P (x))
18411 fputs (reg_names[REGNO (x) + 1], file);
18412 else if (MEM_P (x))
18414 /* Handle possible auto-increment. Since it is pre-increment and
18415 we have already done it, we can just use an offset of word. */
18416 if (GET_CODE (XEXP (x, 0)) == PRE_INC
18417 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
18418 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
18419 UNITS_PER_WORD));
18420 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
18421 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
18422 UNITS_PER_WORD));
18423 else
18424 output_address (XEXP (adjust_address_nv (x, SImode,
18425 UNITS_PER_WORD),
18426 0));
18428 if (small_data_operand (x, GET_MODE (x)))
18429 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
18430 reg_names[SMALL_DATA_REG]);
18432 return;
18434 case 'm':
18435 /* MB value for a mask operand. */
18436 if (! mask_operand (x, SImode))
18437 output_operand_lossage ("invalid %%m value");
18439 fprintf (file, "%d", extract_MB (x));
18440 return;
18442 case 'M':
18443 /* ME value for a mask operand. */
18444 if (! mask_operand (x, SImode))
18445 output_operand_lossage ("invalid %%M value");
18447 fprintf (file, "%d", extract_ME (x));
18448 return;
18450 /* %n outputs the negative of its operand. */
18452 case 'N':
18453 /* Write the number of elements in the vector times 4. */
18454 if (GET_CODE (x) != PARALLEL)
18455 output_operand_lossage ("invalid %%N value");
18456 else
18457 fprintf (file, "%d", XVECLEN (x, 0) * 4);
18458 return;
18460 case 'O':
18461 /* Similar, but subtract 1 first. */
18462 if (GET_CODE (x) != PARALLEL)
18463 output_operand_lossage ("invalid %%O value");
18464 else
18465 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
18466 return;
18468 case 'p':
18469 /* X is a CONST_INT that is a power of two. Output the logarithm. */
18470 if (! INT_P (x)
18471 || INTVAL (x) < 0
18472 || (i = exact_log2 (INTVAL (x))) < 0)
18473 output_operand_lossage ("invalid %%p value");
18474 else
18475 fprintf (file, "%d", i);
18476 return;
18478 case 'P':
18479 /* The operand must be an indirect memory reference. The result
18480 is the register name. */
18481 if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG
18482 || REGNO (XEXP (x, 0)) >= 32)
18483 output_operand_lossage ("invalid %%P value");
18484 else
18485 fputs (reg_names[REGNO (XEXP (x, 0))], file);
18486 return;
18488 case 'q':
18489 /* This outputs the logical code corresponding to a boolean
18490 expression. The expression may have one or both operands
18491 negated (if one, only the first one). For condition register
18492 logical operations, it will also treat the negated
18493 CR codes as NOTs, but not handle NOTs of them. */
18495 const char *const *t = 0;
18496 const char *s;
18497 enum rtx_code code = GET_CODE (x);
18498 static const char * const tbl[3][3] = {
18499 { "and", "andc", "nor" },
18500 { "or", "orc", "nand" },
18501 { "xor", "eqv", "xor" } };
18503 if (code == AND)
18504 t = tbl[0];
18505 else if (code == IOR)
18506 t = tbl[1];
18507 else if (code == XOR)
18508 t = tbl[2];
18509 else
18510 output_operand_lossage ("invalid %%q value");
18512 if (GET_CODE (XEXP (x, 0)) != NOT)
18513 s = t[0];
18514 else
18516 if (GET_CODE (XEXP (x, 1)) == NOT)
18517 s = t[2];
18518 else
18519 s = t[1];
18522 fputs (s, file);
18524 return;
18526 case 'Q':
18527 if (! TARGET_MFCRF)
18528 return;
18529 fputc (',', file);
18530 /* FALLTHRU */
18532 case 'R':
18533 /* X is a CR register. Print the mask for `mtcrf'. */
18534 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
18535 output_operand_lossage ("invalid %%R value");
18536 else
18537 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
18538 return;
18540 case 's':
18541 /* Low 5 bits of 32 - value */
18542 if (! INT_P (x))
18543 output_operand_lossage ("invalid %%s value");
18544 else
18545 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
18546 return;
18548 case 'S':
18549 /* PowerPC64 mask position. All 0's is excluded.
18550 CONST_INT 32-bit mask is considered sign-extended so any
18551 transition must occur within the CONST_INT, not on the boundary. */
18552 if (! mask64_operand (x, DImode))
18553 output_operand_lossage ("invalid %%S value");
18555 uval = INTVAL (x);
18557 if (uval & 1) /* Clear Left */
18559 #if HOST_BITS_PER_WIDE_INT > 64
18560 uval &= ((unsigned HOST_WIDE_INT) 1 << 64) - 1;
18561 #endif
18562 i = 64;
18564 else /* Clear Right */
18566 uval = ~uval;
18567 #if HOST_BITS_PER_WIDE_INT > 64
18568 uval &= ((unsigned HOST_WIDE_INT) 1 << 64) - 1;
18569 #endif
18570 i = 63;
18572 while (uval != 0)
18573 --i, uval >>= 1;
18574 gcc_assert (i >= 0);
18575 fprintf (file, "%d", i);
18576 return;
18578 case 't':
18579 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
18580 gcc_assert (REG_P (x) && GET_MODE (x) == CCmode);
18582 /* Bit 3 is OV bit. */
18583 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
18585 /* If we want bit 31, write a shift count of zero, not 32. */
18586 fprintf (file, "%d", i == 31 ? 0 : i + 1);
18587 return;
18589 case 'T':
18590 /* Print the symbolic name of a branch target register. */
18591 if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO
18592 && REGNO (x) != CTR_REGNO))
18593 output_operand_lossage ("invalid %%T value");
18594 else if (REGNO (x) == LR_REGNO)
18595 fputs ("lr", file);
18596 else
18597 fputs ("ctr", file);
18598 return;
18600 case 'u':
18601 /* High-order or low-order 16 bits of constant, whichever is non-zero,
18602 for use in unsigned operand. */
18603 if (! INT_P (x))
18605 output_operand_lossage ("invalid %%u value");
18606 return;
18609 uval = INTVAL (x);
18610 if ((uval & 0xffff) == 0)
18611 uval >>= 16;
18613 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
18614 return;
18616 case 'v':
18617 /* High-order 16 bits of constant for use in signed operand. */
18618 if (! INT_P (x))
18619 output_operand_lossage ("invalid %%v value");
18620 else
18621 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
18622 (INTVAL (x) >> 16) & 0xffff);
18623 return;
18625 case 'U':
18626 /* Print `u' if this has an auto-increment or auto-decrement. */
18627 if (MEM_P (x)
18628 && (GET_CODE (XEXP (x, 0)) == PRE_INC
18629 || GET_CODE (XEXP (x, 0)) == PRE_DEC
18630 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
18631 putc ('u', file);
18632 return;
18634 case 'V':
18635 /* Print the trap code for this operand. */
18636 switch (GET_CODE (x))
18638 case EQ:
18639 fputs ("eq", file); /* 4 */
18640 break;
18641 case NE:
18642 fputs ("ne", file); /* 24 */
18643 break;
18644 case LT:
18645 fputs ("lt", file); /* 16 */
18646 break;
18647 case LE:
18648 fputs ("le", file); /* 20 */
18649 break;
18650 case GT:
18651 fputs ("gt", file); /* 8 */
18652 break;
18653 case GE:
18654 fputs ("ge", file); /* 12 */
18655 break;
18656 case LTU:
18657 fputs ("llt", file); /* 2 */
18658 break;
18659 case LEU:
18660 fputs ("lle", file); /* 6 */
18661 break;
18662 case GTU:
18663 fputs ("lgt", file); /* 1 */
18664 break;
18665 case GEU:
18666 fputs ("lge", file); /* 5 */
18667 break;
18668 default:
18669 gcc_unreachable ();
18671 break;
18673 case 'w':
18674 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
18675 normally. */
18676 if (INT_P (x))
18677 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
18678 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
18679 else
18680 print_operand (file, x, 0);
18681 return;
18683 case 'W':
18684 /* MB value for a PowerPC64 rldic operand. */
18685 i = clz_hwi (INTVAL (x));
18687 fprintf (file, "%d", i);
18688 return;
18690 case 'x':
18691 /* X is a FPR or Altivec register used in a VSX context. */
18692 if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
18693 output_operand_lossage ("invalid %%x value");
18694 else
18696 int reg = REGNO (x);
18697 int vsx_reg = (FP_REGNO_P (reg)
18698 ? reg - 32
18699 : reg - FIRST_ALTIVEC_REGNO + 32);
18701 #ifdef TARGET_REGNAMES
18702 if (TARGET_REGNAMES)
18703 fprintf (file, "%%vs%d", vsx_reg);
18704 else
18705 #endif
18706 fprintf (file, "%d", vsx_reg);
18708 return;
18710 case 'X':
18711 if (MEM_P (x)
18712 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
18713 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
18714 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
18715 putc ('x', file);
18716 return;
18718 case 'Y':
18719 /* Like 'L', for third word of TImode/PTImode */
18720 if (REG_P (x))
18721 fputs (reg_names[REGNO (x) + 2], file);
18722 else if (MEM_P (x))
18724 if (GET_CODE (XEXP (x, 0)) == PRE_INC
18725 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
18726 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 8));
18727 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
18728 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 8));
18729 else
18730 output_address (XEXP (adjust_address_nv (x, SImode, 8), 0));
18731 if (small_data_operand (x, GET_MODE (x)))
18732 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
18733 reg_names[SMALL_DATA_REG]);
18735 return;
18737 case 'z':
18738 /* X is a SYMBOL_REF. Write out the name preceded by a
18739 period and without any trailing data in brackets. Used for function
18740 names. If we are configured for System V (or the embedded ABI) on
18741 the PowerPC, do not emit the period, since those systems do not use
18742 TOCs and the like. */
18743 gcc_assert (GET_CODE (x) == SYMBOL_REF);
18745 /* For macho, check to see if we need a stub. */
18746 if (TARGET_MACHO)
18748 const char *name = XSTR (x, 0);
18749 #if TARGET_MACHO
18750 if (darwin_emit_branch_islands
18751 && MACHOPIC_INDIRECT
18752 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
18753 name = machopic_indirection_name (x, /*stub_p=*/true);
18754 #endif
18755 assemble_name (file, name);
18757 else if (!DOT_SYMBOLS)
18758 assemble_name (file, XSTR (x, 0));
18759 else
18760 rs6000_output_function_entry (file, XSTR (x, 0));
18761 return;
18763 case 'Z':
18764 /* Like 'L', for last word of TImode/PTImode. */
18765 if (REG_P (x))
18766 fputs (reg_names[REGNO (x) + 3], file);
18767 else if (MEM_P (x))
18769 if (GET_CODE (XEXP (x, 0)) == PRE_INC
18770 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
18771 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 12));
18772 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
18773 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 12));
18774 else
18775 output_address (XEXP (adjust_address_nv (x, SImode, 12), 0));
18776 if (small_data_operand (x, GET_MODE (x)))
18777 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
18778 reg_names[SMALL_DATA_REG]);
18780 return;
18782 /* Print AltiVec or SPE memory operand. */
18783 case 'y':
18785 rtx tmp;
18787 gcc_assert (MEM_P (x));
18789 tmp = XEXP (x, 0);
18791 /* Ugly hack because %y is overloaded. */
18792 if ((TARGET_SPE || TARGET_E500_DOUBLE)
18793 && (GET_MODE_SIZE (GET_MODE (x)) == 8
18794 || GET_MODE (x) == TFmode
18795 || GET_MODE (x) == TImode
18796 || GET_MODE (x) == PTImode))
18798 /* Handle [reg]. */
18799 if (REG_P (tmp))
18801 fprintf (file, "0(%s)", reg_names[REGNO (tmp)]);
18802 break;
18804 /* Handle [reg+UIMM]. */
18805 else if (GET_CODE (tmp) == PLUS &&
18806 GET_CODE (XEXP (tmp, 1)) == CONST_INT)
18808 int x;
18810 gcc_assert (REG_P (XEXP (tmp, 0)));
18812 x = INTVAL (XEXP (tmp, 1));
18813 fprintf (file, "%d(%s)", x, reg_names[REGNO (XEXP (tmp, 0))]);
18814 break;
18817 /* Fall through. Must be [reg+reg]. */
18819 if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x))
18820 && GET_CODE (tmp) == AND
18821 && GET_CODE (XEXP (tmp, 1)) == CONST_INT
18822 && INTVAL (XEXP (tmp, 1)) == -16)
18823 tmp = XEXP (tmp, 0);
18824 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
18825 && GET_CODE (tmp) == PRE_MODIFY)
18826 tmp = XEXP (tmp, 1);
18827 if (REG_P (tmp))
18828 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
18829 else
18831 if (GET_CODE (tmp) != PLUS
18832 || !REG_P (XEXP (tmp, 0))
18833 || !REG_P (XEXP (tmp, 1)))
18835 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
18836 break;
18839 if (REGNO (XEXP (tmp, 0)) == 0)
18840 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
18841 reg_names[ REGNO (XEXP (tmp, 0)) ]);
18842 else
18843 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
18844 reg_names[ REGNO (XEXP (tmp, 1)) ]);
18846 break;
18849 case 0:
18850 if (REG_P (x))
18851 fprintf (file, "%s", reg_names[REGNO (x)]);
18852 else if (MEM_P (x))
18854 /* We need to handle PRE_INC and PRE_DEC here, since we need to
18855 know the width from the mode. */
18856 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
18857 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
18858 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
18859 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
18860 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
18861 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
18862 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
18863 output_address (XEXP (XEXP (x, 0), 1));
18864 else
18865 output_address (XEXP (x, 0));
18867 else
18869 if (toc_relative_expr_p (x, false))
18870 /* This hack along with a corresponding hack in
18871 rs6000_output_addr_const_extra arranges to output addends
18872 where the assembler expects to find them. eg.
18873 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
18874 without this hack would be output as "x@toc+4". We
18875 want "x+4@toc". */
18876 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
18877 else
18878 output_addr_const (file, x);
18880 return;
18882 case '&':
18883 if (const char *name = get_some_local_dynamic_name ())
18884 assemble_name (file, name);
18885 else
18886 output_operand_lossage ("'%%&' used without any "
18887 "local dynamic TLS references");
18888 return;
18890 default:
18891 output_operand_lossage ("invalid %%xn code");
18895 /* Print the address of an operand. */
18897 void
18898 print_operand_address (FILE *file, rtx x)
18900 if (REG_P (x))
18901 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
18902 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST
18903 || GET_CODE (x) == LABEL_REF)
18905 output_addr_const (file, x);
18906 if (small_data_operand (x, GET_MODE (x)))
18907 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
18908 reg_names[SMALL_DATA_REG]);
18909 else
18910 gcc_assert (!TARGET_TOC);
18912 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
18913 && REG_P (XEXP (x, 1)))
18915 if (REGNO (XEXP (x, 0)) == 0)
18916 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
18917 reg_names[ REGNO (XEXP (x, 0)) ]);
18918 else
18919 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
18920 reg_names[ REGNO (XEXP (x, 1)) ]);
18922 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
18923 && GET_CODE (XEXP (x, 1)) == CONST_INT)
18924 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
18925 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
18926 #if TARGET_MACHO
18927 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
18928 && CONSTANT_P (XEXP (x, 1)))
18930 fprintf (file, "lo16(");
18931 output_addr_const (file, XEXP (x, 1));
18932 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
18934 #endif
18935 #if TARGET_ELF
18936 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
18937 && CONSTANT_P (XEXP (x, 1)))
18939 output_addr_const (file, XEXP (x, 1));
18940 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
18942 #endif
18943 else if (toc_relative_expr_p (x, false))
18945 /* This hack along with a corresponding hack in
18946 rs6000_output_addr_const_extra arranges to output addends
18947 where the assembler expects to find them. eg.
18948 (lo_sum (reg 9)
18949 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
18950 without this hack would be output as "x@toc+8@l(9)". We
18951 want "x+8@toc@l(9)". */
18952 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
18953 if (GET_CODE (x) == LO_SUM)
18954 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
18955 else
18956 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base, 0, 1))]);
18958 else
18959 gcc_unreachable ();
18962 /* Implement TARGET_OUTPUT_ADDR_CONST_EXTRA. */
18964 static bool
18965 rs6000_output_addr_const_extra (FILE *file, rtx x)
18967 if (GET_CODE (x) == UNSPEC)
18968 switch (XINT (x, 1))
18970 case UNSPEC_TOCREL:
18971 gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF
18972 && REG_P (XVECEXP (x, 0, 1))
18973 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
18974 output_addr_const (file, XVECEXP (x, 0, 0));
18975 if (x == tocrel_base && tocrel_offset != const0_rtx)
18977 if (INTVAL (tocrel_offset) >= 0)
18978 fprintf (file, "+");
18979 output_addr_const (file, CONST_CAST_RTX (tocrel_offset));
18981 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
18983 putc ('-', file);
18984 assemble_name (file, toc_label_name);
18986 else if (TARGET_ELF)
18987 fputs ("@toc", file);
18988 return true;
18990 #if TARGET_MACHO
18991 case UNSPEC_MACHOPIC_OFFSET:
18992 output_addr_const (file, XVECEXP (x, 0, 0));
18993 putc ('-', file);
18994 machopic_output_function_base_name (file);
18995 return true;
18996 #endif
18998 return false;
19001 /* Target hook for assembling integer objects. The PowerPC version has
19002 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
19003 is defined. It also needs to handle DI-mode objects on 64-bit
19004 targets. */
19006 static bool
19007 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
19009 #ifdef RELOCATABLE_NEEDS_FIXUP
19010 /* Special handling for SI values. */
19011 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
19013 static int recurse = 0;
19015 /* For -mrelocatable, we mark all addresses that need to be fixed up in
19016 the .fixup section. Since the TOC section is already relocated, we
19017 don't need to mark it here. We used to skip the text section, but it
19018 should never be valid for relocated addresses to be placed in the text
19019 section. */
19020 if (TARGET_RELOCATABLE
19021 && in_section != toc_section
19022 && !recurse
19023 && !CONST_SCALAR_INT_P (x)
19024 && CONSTANT_P (x))
19026 char buf[256];
19028 recurse = 1;
19029 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
19030 fixuplabelno++;
19031 ASM_OUTPUT_LABEL (asm_out_file, buf);
19032 fprintf (asm_out_file, "\t.long\t(");
19033 output_addr_const (asm_out_file, x);
19034 fprintf (asm_out_file, ")@fixup\n");
19035 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
19036 ASM_OUTPUT_ALIGN (asm_out_file, 2);
19037 fprintf (asm_out_file, "\t.long\t");
19038 assemble_name (asm_out_file, buf);
19039 fprintf (asm_out_file, "\n\t.previous\n");
19040 recurse = 0;
19041 return true;
19043 /* Remove initial .'s to turn a -mcall-aixdesc function
19044 address into the address of the descriptor, not the function
19045 itself. */
19046 else if (GET_CODE (x) == SYMBOL_REF
19047 && XSTR (x, 0)[0] == '.'
19048 && DEFAULT_ABI == ABI_AIX)
19050 const char *name = XSTR (x, 0);
19051 while (*name == '.')
19052 name++;
19054 fprintf (asm_out_file, "\t.long\t%s\n", name);
19055 return true;
19058 #endif /* RELOCATABLE_NEEDS_FIXUP */
19059 return default_assemble_integer (x, size, aligned_p);
19062 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
19063 /* Emit an assembler directive to set symbol visibility for DECL to
19064 VISIBILITY_TYPE. */
19066 static void
19067 rs6000_assemble_visibility (tree decl, int vis)
19069 if (TARGET_XCOFF)
19070 return;
19072 /* Functions need to have their entry point symbol visibility set as
19073 well as their descriptor symbol visibility. */
19074 if (DEFAULT_ABI == ABI_AIX
19075 && DOT_SYMBOLS
19076 && TREE_CODE (decl) == FUNCTION_DECL)
19078 static const char * const visibility_types[] = {
19079 NULL, "internal", "hidden", "protected"
19082 const char *name, *type;
19084 name = ((* targetm.strip_name_encoding)
19085 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
19086 type = visibility_types[vis];
19088 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
19089 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
19091 else
19092 default_assemble_visibility (decl, vis);
19094 #endif
19096 enum rtx_code
19097 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
19099 /* Reversal of FP compares takes care -- an ordered compare
19100 becomes an unordered compare and vice versa. */
19101 if (mode == CCFPmode
19102 && (!flag_finite_math_only
19103 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
19104 || code == UNEQ || code == LTGT))
19105 return reverse_condition_maybe_unordered (code);
19106 else
19107 return reverse_condition (code);
19110 /* Generate a compare for CODE. Return a brand-new rtx that
19111 represents the result of the compare. */
19113 static rtx
19114 rs6000_generate_compare (rtx cmp, machine_mode mode)
19116 machine_mode comp_mode;
19117 rtx compare_result;
19118 enum rtx_code code = GET_CODE (cmp);
19119 rtx op0 = XEXP (cmp, 0);
19120 rtx op1 = XEXP (cmp, 1);
19122 if (FLOAT_MODE_P (mode))
19123 comp_mode = CCFPmode;
19124 else if (code == GTU || code == LTU
19125 || code == GEU || code == LEU)
19126 comp_mode = CCUNSmode;
19127 else if ((code == EQ || code == NE)
19128 && unsigned_reg_p (op0)
19129 && (unsigned_reg_p (op1)
19130 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
19131 /* These are unsigned values, perhaps there will be a later
19132 ordering compare that can be shared with this one. */
19133 comp_mode = CCUNSmode;
19134 else
19135 comp_mode = CCmode;
19137 /* If we have an unsigned compare, make sure we don't have a signed value as
19138 an immediate. */
19139 if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT
19140 && INTVAL (op1) < 0)
19142 op0 = copy_rtx_if_shared (op0);
19143 op1 = force_reg (GET_MODE (op0), op1);
19144 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
19147 /* First, the compare. */
19148 compare_result = gen_reg_rtx (comp_mode);
19150 /* E500 FP compare instructions on the GPRs. Yuck! */
19151 if ((!TARGET_FPRS && TARGET_HARD_FLOAT)
19152 && FLOAT_MODE_P (mode))
19154 rtx cmp, or_result, compare_result2;
19155 machine_mode op_mode = GET_MODE (op0);
19156 bool reverse_p;
19158 if (op_mode == VOIDmode)
19159 op_mode = GET_MODE (op1);
19161 /* First reverse the condition codes that aren't directly supported. */
19162 switch (code)
19164 case NE:
19165 case UNLT:
19166 case UNLE:
19167 case UNGT:
19168 case UNGE:
19169 code = reverse_condition_maybe_unordered (code);
19170 reverse_p = true;
19171 break;
19173 case EQ:
19174 case LT:
19175 case LE:
19176 case GT:
19177 case GE:
19178 reverse_p = false;
19179 break;
19181 default:
19182 gcc_unreachable ();
19185 /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only.
19186 This explains the following mess. */
19188 switch (code)
19190 case EQ:
19191 switch (op_mode)
19193 case SFmode:
19194 cmp = (flag_finite_math_only && !flag_trapping_math)
19195 ? gen_tstsfeq_gpr (compare_result, op0, op1)
19196 : gen_cmpsfeq_gpr (compare_result, op0, op1);
19197 break;
19199 case DFmode:
19200 cmp = (flag_finite_math_only && !flag_trapping_math)
19201 ? gen_tstdfeq_gpr (compare_result, op0, op1)
19202 : gen_cmpdfeq_gpr (compare_result, op0, op1);
19203 break;
19205 case TFmode:
19206 cmp = (flag_finite_math_only && !flag_trapping_math)
19207 ? gen_tsttfeq_gpr (compare_result, op0, op1)
19208 : gen_cmptfeq_gpr (compare_result, op0, op1);
19209 break;
19211 default:
19212 gcc_unreachable ();
19214 break;
19216 case GT:
19217 case GE:
19218 switch (op_mode)
19220 case SFmode:
19221 cmp = (flag_finite_math_only && !flag_trapping_math)
19222 ? gen_tstsfgt_gpr (compare_result, op0, op1)
19223 : gen_cmpsfgt_gpr (compare_result, op0, op1);
19224 break;
19226 case DFmode:
19227 cmp = (flag_finite_math_only && !flag_trapping_math)
19228 ? gen_tstdfgt_gpr (compare_result, op0, op1)
19229 : gen_cmpdfgt_gpr (compare_result, op0, op1);
19230 break;
19232 case TFmode:
19233 cmp = (flag_finite_math_only && !flag_trapping_math)
19234 ? gen_tsttfgt_gpr (compare_result, op0, op1)
19235 : gen_cmptfgt_gpr (compare_result, op0, op1);
19236 break;
19238 default:
19239 gcc_unreachable ();
19241 break;
19243 case LT:
19244 case LE:
19245 switch (op_mode)
19247 case SFmode:
19248 cmp = (flag_finite_math_only && !flag_trapping_math)
19249 ? gen_tstsflt_gpr (compare_result, op0, op1)
19250 : gen_cmpsflt_gpr (compare_result, op0, op1);
19251 break;
19253 case DFmode:
19254 cmp = (flag_finite_math_only && !flag_trapping_math)
19255 ? gen_tstdflt_gpr (compare_result, op0, op1)
19256 : gen_cmpdflt_gpr (compare_result, op0, op1);
19257 break;
19259 case TFmode:
19260 cmp = (flag_finite_math_only && !flag_trapping_math)
19261 ? gen_tsttflt_gpr (compare_result, op0, op1)
19262 : gen_cmptflt_gpr (compare_result, op0, op1);
19263 break;
19265 default:
19266 gcc_unreachable ();
19268 break;
19270 default:
19271 gcc_unreachable ();
19274 /* Synthesize LE and GE from LT/GT || EQ. */
19275 if (code == LE || code == GE)
19277 emit_insn (cmp);
19279 compare_result2 = gen_reg_rtx (CCFPmode);
19281 /* Do the EQ. */
19282 switch (op_mode)
19284 case SFmode:
19285 cmp = (flag_finite_math_only && !flag_trapping_math)
19286 ? gen_tstsfeq_gpr (compare_result2, op0, op1)
19287 : gen_cmpsfeq_gpr (compare_result2, op0, op1);
19288 break;
19290 case DFmode:
19291 cmp = (flag_finite_math_only && !flag_trapping_math)
19292 ? gen_tstdfeq_gpr (compare_result2, op0, op1)
19293 : gen_cmpdfeq_gpr (compare_result2, op0, op1);
19294 break;
19296 case TFmode:
19297 cmp = (flag_finite_math_only && !flag_trapping_math)
19298 ? gen_tsttfeq_gpr (compare_result2, op0, op1)
19299 : gen_cmptfeq_gpr (compare_result2, op0, op1);
19300 break;
19302 default:
19303 gcc_unreachable ();
19306 emit_insn (cmp);
19308 /* OR them together. */
19309 or_result = gen_reg_rtx (CCFPmode);
19310 cmp = gen_e500_cr_ior_compare (or_result, compare_result,
19311 compare_result2);
19312 compare_result = or_result;
19315 code = reverse_p ? NE : EQ;
19317 emit_insn (cmp);
19319 else
19321 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
19322 CLOBBERs to match cmptf_internal2 pattern. */
19323 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
19324 && GET_MODE (op0) == TFmode
19325 && !TARGET_IEEEQUAD
19326 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128)
19327 emit_insn (gen_rtx_PARALLEL (VOIDmode,
19328 gen_rtvec (10,
19329 gen_rtx_SET (VOIDmode,
19330 compare_result,
19331 gen_rtx_COMPARE (comp_mode, op0, op1)),
19332 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19333 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19334 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19335 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19336 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19337 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19338 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19339 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19340 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
19341 else if (GET_CODE (op1) == UNSPEC
19342 && XINT (op1, 1) == UNSPEC_SP_TEST)
19344 rtx op1b = XVECEXP (op1, 0, 0);
19345 comp_mode = CCEQmode;
19346 compare_result = gen_reg_rtx (CCEQmode);
19347 if (TARGET_64BIT)
19348 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
19349 else
19350 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
19352 else
19353 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
19354 gen_rtx_COMPARE (comp_mode, op0, op1)));
19357 /* Some kinds of FP comparisons need an OR operation;
19358 under flag_finite_math_only we don't bother. */
19359 if (FLOAT_MODE_P (mode)
19360 && !flag_finite_math_only
19361 && !(TARGET_HARD_FLOAT && !TARGET_FPRS)
19362 && (code == LE || code == GE
19363 || code == UNEQ || code == LTGT
19364 || code == UNGT || code == UNLT))
19366 enum rtx_code or1, or2;
19367 rtx or1_rtx, or2_rtx, compare2_rtx;
19368 rtx or_result = gen_reg_rtx (CCEQmode);
19370 switch (code)
19372 case LE: or1 = LT; or2 = EQ; break;
19373 case GE: or1 = GT; or2 = EQ; break;
19374 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
19375 case LTGT: or1 = LT; or2 = GT; break;
19376 case UNGT: or1 = UNORDERED; or2 = GT; break;
19377 case UNLT: or1 = UNORDERED; or2 = LT; break;
19378 default: gcc_unreachable ();
19380 validate_condition_mode (or1, comp_mode);
19381 validate_condition_mode (or2, comp_mode);
19382 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
19383 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
19384 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
19385 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
19386 const_true_rtx);
19387 emit_insn (gen_rtx_SET (VOIDmode, or_result, compare2_rtx));
19389 compare_result = or_result;
19390 code = EQ;
19393 validate_condition_mode (code, GET_MODE (compare_result));
19395 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
19399 /* Emit the RTL for an sISEL pattern. */
19401 void
19402 rs6000_emit_sISEL (machine_mode mode ATTRIBUTE_UNUSED, rtx operands[])
19404 rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx);
19407 void
19408 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
19410 rtx condition_rtx;
19411 machine_mode op_mode;
19412 enum rtx_code cond_code;
19413 rtx result = operands[0];
19415 if (TARGET_ISEL && (mode == SImode || mode == DImode))
19417 rs6000_emit_sISEL (mode, operands);
19418 return;
19421 condition_rtx = rs6000_generate_compare (operands[1], mode);
19422 cond_code = GET_CODE (condition_rtx);
19424 if (FLOAT_MODE_P (mode)
19425 && !TARGET_FPRS && TARGET_HARD_FLOAT)
19427 rtx t;
19429 PUT_MODE (condition_rtx, SImode);
19430 t = XEXP (condition_rtx, 0);
19432 gcc_assert (cond_code == NE || cond_code == EQ);
19434 if (cond_code == NE)
19435 emit_insn (gen_e500_flip_gt_bit (t, t));
19437 emit_insn (gen_move_from_CR_gt_bit (result, t));
19438 return;
19441 if (cond_code == NE
19442 || cond_code == GE || cond_code == LE
19443 || cond_code == GEU || cond_code == LEU
19444 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
19446 rtx not_result = gen_reg_rtx (CCEQmode);
19447 rtx not_op, rev_cond_rtx;
19448 machine_mode cc_mode;
19450 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
19452 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
19453 SImode, XEXP (condition_rtx, 0), const0_rtx);
19454 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
19455 emit_insn (gen_rtx_SET (VOIDmode, not_result, not_op));
19456 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
19459 op_mode = GET_MODE (XEXP (operands[1], 0));
19460 if (op_mode == VOIDmode)
19461 op_mode = GET_MODE (XEXP (operands[1], 1));
19463 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
19465 PUT_MODE (condition_rtx, DImode);
19466 convert_move (result, condition_rtx, 0);
19468 else
19470 PUT_MODE (condition_rtx, SImode);
19471 emit_insn (gen_rtx_SET (VOIDmode, result, condition_rtx));
19475 /* Emit a branch of kind CODE to location LOC. */
19477 void
19478 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
19480 rtx condition_rtx, loc_ref;
19482 condition_rtx = rs6000_generate_compare (operands[0], mode);
19483 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
19484 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
19485 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
19486 loc_ref, pc_rtx)));
19489 /* Return the string to output a conditional branch to LABEL, which is
19490 the operand template of the label, or NULL if the branch is really a
19491 conditional return.
19493 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
19494 condition code register and its mode specifies what kind of
19495 comparison we made.
19497 REVERSED is nonzero if we should reverse the sense of the comparison.
19499 INSN is the insn. */
19501 char *
19502 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
19504 static char string[64];
19505 enum rtx_code code = GET_CODE (op);
19506 rtx cc_reg = XEXP (op, 0);
19507 machine_mode mode = GET_MODE (cc_reg);
19508 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
19509 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
19510 int really_reversed = reversed ^ need_longbranch;
19511 char *s = string;
19512 const char *ccode;
19513 const char *pred;
19514 rtx note;
19516 validate_condition_mode (code, mode);
19518 /* Work out which way this really branches. We could use
19519 reverse_condition_maybe_unordered here always but this
19520 makes the resulting assembler clearer. */
19521 if (really_reversed)
19523 /* Reversal of FP compares takes care -- an ordered compare
19524 becomes an unordered compare and vice versa. */
19525 if (mode == CCFPmode)
19526 code = reverse_condition_maybe_unordered (code);
19527 else
19528 code = reverse_condition (code);
19531 if ((!TARGET_FPRS && TARGET_HARD_FLOAT) && mode == CCFPmode)
19533 /* The efscmp/tst* instructions twiddle bit 2, which maps nicely
19534 to the GT bit. */
19535 switch (code)
19537 case EQ:
19538 /* Opposite of GT. */
19539 code = GT;
19540 break;
19542 case NE:
19543 code = UNLE;
19544 break;
19546 default:
19547 gcc_unreachable ();
19551 switch (code)
19553 /* Not all of these are actually distinct opcodes, but
19554 we distinguish them for clarity of the resulting assembler. */
19555 case NE: case LTGT:
19556 ccode = "ne"; break;
19557 case EQ: case UNEQ:
19558 ccode = "eq"; break;
19559 case GE: case GEU:
19560 ccode = "ge"; break;
19561 case GT: case GTU: case UNGT:
19562 ccode = "gt"; break;
19563 case LE: case LEU:
19564 ccode = "le"; break;
19565 case LT: case LTU: case UNLT:
19566 ccode = "lt"; break;
19567 case UNORDERED: ccode = "un"; break;
19568 case ORDERED: ccode = "nu"; break;
19569 case UNGE: ccode = "nl"; break;
19570 case UNLE: ccode = "ng"; break;
19571 default:
19572 gcc_unreachable ();
19575 /* Maybe we have a guess as to how likely the branch is. */
19576 pred = "";
19577 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
19578 if (note != NULL_RTX)
19580 /* PROB is the difference from 50%. */
19581 int prob = XINT (note, 0) - REG_BR_PROB_BASE / 2;
19583 /* Only hint for highly probable/improbable branches on newer
19584 cpus as static prediction overrides processor dynamic
19585 prediction. For older cpus we may as well always hint, but
19586 assume not taken for branches that are very close to 50% as a
19587 mispredicted taken branch is more expensive than a
19588 mispredicted not-taken branch. */
19589 if (rs6000_always_hint
19590 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
19591 && br_prob_note_reliable_p (note)))
19593 if (abs (prob) > REG_BR_PROB_BASE / 20
19594 && ((prob > 0) ^ need_longbranch))
19595 pred = "+";
19596 else
19597 pred = "-";
19601 if (label == NULL)
19602 s += sprintf (s, "b%slr%s ", ccode, pred);
19603 else
19604 s += sprintf (s, "b%s%s ", ccode, pred);
19606 /* We need to escape any '%' characters in the reg_names string.
19607 Assume they'd only be the first character.... */
19608 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
19609 *s++ = '%';
19610 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
19612 if (label != NULL)
19614 /* If the branch distance was too far, we may have to use an
19615 unconditional branch to go the distance. */
19616 if (need_longbranch)
19617 s += sprintf (s, ",$+8\n\tb %s", label);
19618 else
19619 s += sprintf (s, ",%s", label);
19622 return string;
19625 /* Return the string to flip the GT bit on a CR. */
19626 char *
19627 output_e500_flip_gt_bit (rtx dst, rtx src)
19629 static char string[64];
19630 int a, b;
19632 gcc_assert (GET_CODE (dst) == REG && CR_REGNO_P (REGNO (dst))
19633 && GET_CODE (src) == REG && CR_REGNO_P (REGNO (src)));
19635 /* GT bit. */
19636 a = 4 * (REGNO (dst) - CR0_REGNO) + 1;
19637 b = 4 * (REGNO (src) - CR0_REGNO) + 1;
19639 sprintf (string, "crnot %d,%d", a, b);
19640 return string;
19643 /* Return insn for VSX or Altivec comparisons. */
19645 static rtx
19646 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
19648 rtx mask;
19649 machine_mode mode = GET_MODE (op0);
19651 switch (code)
19653 default:
19654 break;
19656 case GE:
19657 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19658 return NULL_RTX;
19660 case EQ:
19661 case GT:
19662 case GTU:
19663 case ORDERED:
19664 case UNORDERED:
19665 case UNEQ:
19666 case LTGT:
19667 mask = gen_reg_rtx (mode);
19668 emit_insn (gen_rtx_SET (VOIDmode,
19669 mask,
19670 gen_rtx_fmt_ee (code, mode, op0, op1)));
19671 return mask;
19674 return NULL_RTX;
19677 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
19678 DMODE is expected destination mode. This is a recursive function. */
19680 static rtx
19681 rs6000_emit_vector_compare (enum rtx_code rcode,
19682 rtx op0, rtx op1,
19683 machine_mode dmode)
19685 rtx mask;
19686 bool swap_operands = false;
19687 bool try_again = false;
19689 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
19690 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
19692 /* See if the comparison works as is. */
19693 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
19694 if (mask)
19695 return mask;
19697 switch (rcode)
19699 case LT:
19700 rcode = GT;
19701 swap_operands = true;
19702 try_again = true;
19703 break;
19704 case LTU:
19705 rcode = GTU;
19706 swap_operands = true;
19707 try_again = true;
19708 break;
19709 case NE:
19710 case UNLE:
19711 case UNLT:
19712 case UNGE:
19713 case UNGT:
19714 /* Invert condition and try again.
19715 e.g., A != B becomes ~(A==B). */
19717 enum rtx_code rev_code;
19718 enum insn_code nor_code;
19719 rtx mask2;
19721 rev_code = reverse_condition_maybe_unordered (rcode);
19722 if (rev_code == UNKNOWN)
19723 return NULL_RTX;
19725 nor_code = optab_handler (one_cmpl_optab, dmode);
19726 if (nor_code == CODE_FOR_nothing)
19727 return NULL_RTX;
19729 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
19730 if (!mask2)
19731 return NULL_RTX;
19733 mask = gen_reg_rtx (dmode);
19734 emit_insn (GEN_FCN (nor_code) (mask, mask2));
19735 return mask;
19737 break;
19738 case GE:
19739 case GEU:
19740 case LE:
19741 case LEU:
19742 /* Try GT/GTU/LT/LTU OR EQ */
19744 rtx c_rtx, eq_rtx;
19745 enum insn_code ior_code;
19746 enum rtx_code new_code;
19748 switch (rcode)
19750 case GE:
19751 new_code = GT;
19752 break;
19754 case GEU:
19755 new_code = GTU;
19756 break;
19758 case LE:
19759 new_code = LT;
19760 break;
19762 case LEU:
19763 new_code = LTU;
19764 break;
19766 default:
19767 gcc_unreachable ();
19770 ior_code = optab_handler (ior_optab, dmode);
19771 if (ior_code == CODE_FOR_nothing)
19772 return NULL_RTX;
19774 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
19775 if (!c_rtx)
19776 return NULL_RTX;
19778 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
19779 if (!eq_rtx)
19780 return NULL_RTX;
19782 mask = gen_reg_rtx (dmode);
19783 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
19784 return mask;
19786 break;
19787 default:
19788 return NULL_RTX;
19791 if (try_again)
19793 if (swap_operands)
19794 std::swap (op0, op1);
19796 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
19797 if (mask)
19798 return mask;
19801 /* You only get two chances. */
19802 return NULL_RTX;
19805 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
19806 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
19807 operands for the relation operation COND. */
19810 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
19811 rtx cond, rtx cc_op0, rtx cc_op1)
19813 machine_mode dest_mode = GET_MODE (dest);
19814 machine_mode mask_mode = GET_MODE (cc_op0);
19815 enum rtx_code rcode = GET_CODE (cond);
19816 machine_mode cc_mode = CCmode;
19817 rtx mask;
19818 rtx cond2;
19819 rtx tmp;
19820 bool invert_move = false;
19822 if (VECTOR_UNIT_NONE_P (dest_mode))
19823 return 0;
19825 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
19826 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
19828 switch (rcode)
19830 /* Swap operands if we can, and fall back to doing the operation as
19831 specified, and doing a NOR to invert the test. */
19832 case NE:
19833 case UNLE:
19834 case UNLT:
19835 case UNGE:
19836 case UNGT:
19837 /* Invert condition and try again.
19838 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
19839 invert_move = true;
19840 rcode = reverse_condition_maybe_unordered (rcode);
19841 if (rcode == UNKNOWN)
19842 return 0;
19843 break;
19845 /* Mark unsigned tests with CCUNSmode. */
19846 case GTU:
19847 case GEU:
19848 case LTU:
19849 case LEU:
19850 cc_mode = CCUNSmode;
19851 break;
19853 default:
19854 break;
19857 /* Get the vector mask for the given relational operations. */
19858 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
19860 if (!mask)
19861 return 0;
19863 if (invert_move)
19865 tmp = op_true;
19866 op_true = op_false;
19867 op_false = tmp;
19870 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
19871 CONST0_RTX (dest_mode));
19872 emit_insn (gen_rtx_SET (VOIDmode,
19873 dest,
19874 gen_rtx_IF_THEN_ELSE (dest_mode,
19875 cond2,
19876 op_true,
19877 op_false)));
19878 return 1;
19881 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
19882 operands of the last comparison is nonzero/true, FALSE_COND if it
19883 is zero/false. Return 0 if the hardware has no such operation. */
19886 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
19888 enum rtx_code code = GET_CODE (op);
19889 rtx op0 = XEXP (op, 0);
19890 rtx op1 = XEXP (op, 1);
19891 REAL_VALUE_TYPE c1;
19892 machine_mode compare_mode = GET_MODE (op0);
19893 machine_mode result_mode = GET_MODE (dest);
19894 rtx temp;
19895 bool is_against_zero;
19897 /* These modes should always match. */
19898 if (GET_MODE (op1) != compare_mode
19899 /* In the isel case however, we can use a compare immediate, so
19900 op1 may be a small constant. */
19901 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
19902 return 0;
19903 if (GET_MODE (true_cond) != result_mode)
19904 return 0;
19905 if (GET_MODE (false_cond) != result_mode)
19906 return 0;
19908 /* Don't allow using floating point comparisons for integer results for
19909 now. */
19910 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
19911 return 0;
19913 /* First, work out if the hardware can do this at all, or
19914 if it's too slow.... */
19915 if (!FLOAT_MODE_P (compare_mode))
19917 if (TARGET_ISEL)
19918 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
19919 return 0;
19921 else if (TARGET_HARD_FLOAT && !TARGET_FPRS
19922 && SCALAR_FLOAT_MODE_P (compare_mode))
19923 return 0;
19925 is_against_zero = op1 == CONST0_RTX (compare_mode);
19927 /* A floating-point subtract might overflow, underflow, or produce
19928 an inexact result, thus changing the floating-point flags, so it
19929 can't be generated if we care about that. It's safe if one side
19930 of the construct is zero, since then no subtract will be
19931 generated. */
19932 if (SCALAR_FLOAT_MODE_P (compare_mode)
19933 && flag_trapping_math && ! is_against_zero)
19934 return 0;
19936 /* Eliminate half of the comparisons by switching operands, this
19937 makes the remaining code simpler. */
19938 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
19939 || code == LTGT || code == LT || code == UNLE)
19941 code = reverse_condition_maybe_unordered (code);
19942 temp = true_cond;
19943 true_cond = false_cond;
19944 false_cond = temp;
19947 /* UNEQ and LTGT take four instructions for a comparison with zero,
19948 it'll probably be faster to use a branch here too. */
19949 if (code == UNEQ && HONOR_NANS (compare_mode))
19950 return 0;
19952 if (GET_CODE (op1) == CONST_DOUBLE)
19953 REAL_VALUE_FROM_CONST_DOUBLE (c1, op1);
19955 /* We're going to try to implement comparisons by performing
19956 a subtract, then comparing against zero. Unfortunately,
19957 Inf - Inf is NaN which is not zero, and so if we don't
19958 know that the operand is finite and the comparison
19959 would treat EQ different to UNORDERED, we can't do it. */
19960 if (HONOR_INFINITIES (compare_mode)
19961 && code != GT && code != UNGE
19962 && (GET_CODE (op1) != CONST_DOUBLE || real_isinf (&c1))
19963 /* Constructs of the form (a OP b ? a : b) are safe. */
19964 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
19965 || (! rtx_equal_p (op0, true_cond)
19966 && ! rtx_equal_p (op1, true_cond))))
19967 return 0;
19969 /* At this point we know we can use fsel. */
19971 /* Reduce the comparison to a comparison against zero. */
19972 if (! is_against_zero)
19974 temp = gen_reg_rtx (compare_mode);
19975 emit_insn (gen_rtx_SET (VOIDmode, temp,
19976 gen_rtx_MINUS (compare_mode, op0, op1)));
19977 op0 = temp;
19978 op1 = CONST0_RTX (compare_mode);
19981 /* If we don't care about NaNs we can reduce some of the comparisons
19982 down to faster ones. */
19983 if (! HONOR_NANS (compare_mode))
19984 switch (code)
19986 case GT:
19987 code = LE;
19988 temp = true_cond;
19989 true_cond = false_cond;
19990 false_cond = temp;
19991 break;
19992 case UNGE:
19993 code = GE;
19994 break;
19995 case UNEQ:
19996 code = EQ;
19997 break;
19998 default:
19999 break;
20002 /* Now, reduce everything down to a GE. */
20003 switch (code)
20005 case GE:
20006 break;
20008 case LE:
20009 temp = gen_reg_rtx (compare_mode);
20010 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_NEG (compare_mode, op0)));
20011 op0 = temp;
20012 break;
20014 case ORDERED:
20015 temp = gen_reg_rtx (compare_mode);
20016 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_ABS (compare_mode, op0)));
20017 op0 = temp;
20018 break;
20020 case EQ:
20021 temp = gen_reg_rtx (compare_mode);
20022 emit_insn (gen_rtx_SET (VOIDmode, temp,
20023 gen_rtx_NEG (compare_mode,
20024 gen_rtx_ABS (compare_mode, op0))));
20025 op0 = temp;
20026 break;
20028 case UNGE:
20029 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
20030 temp = gen_reg_rtx (result_mode);
20031 emit_insn (gen_rtx_SET (VOIDmode, temp,
20032 gen_rtx_IF_THEN_ELSE (result_mode,
20033 gen_rtx_GE (VOIDmode,
20034 op0, op1),
20035 true_cond, false_cond)));
20036 false_cond = true_cond;
20037 true_cond = temp;
20039 temp = gen_reg_rtx (compare_mode);
20040 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_NEG (compare_mode, op0)));
20041 op0 = temp;
20042 break;
20044 case GT:
20045 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
20046 temp = gen_reg_rtx (result_mode);
20047 emit_insn (gen_rtx_SET (VOIDmode, temp,
20048 gen_rtx_IF_THEN_ELSE (result_mode,
20049 gen_rtx_GE (VOIDmode,
20050 op0, op1),
20051 true_cond, false_cond)));
20052 true_cond = false_cond;
20053 false_cond = temp;
20055 temp = gen_reg_rtx (compare_mode);
20056 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_NEG (compare_mode, op0)));
20057 op0 = temp;
20058 break;
20060 default:
20061 gcc_unreachable ();
20064 emit_insn (gen_rtx_SET (VOIDmode, dest,
20065 gen_rtx_IF_THEN_ELSE (result_mode,
20066 gen_rtx_GE (VOIDmode,
20067 op0, op1),
20068 true_cond, false_cond)));
20069 return 1;
20072 /* Same as above, but for ints (isel). */
20074 static int
20075 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
20077 rtx condition_rtx, cr;
20078 machine_mode mode = GET_MODE (dest);
20079 enum rtx_code cond_code;
20080 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
20081 bool signedp;
20083 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
20084 return 0;
20086 /* We still have to do the compare, because isel doesn't do a
20087 compare, it just looks at the CRx bits set by a previous compare
20088 instruction. */
20089 condition_rtx = rs6000_generate_compare (op, mode);
20090 cond_code = GET_CODE (condition_rtx);
20091 cr = XEXP (condition_rtx, 0);
20092 signedp = GET_MODE (cr) == CCmode;
20094 isel_func = (mode == SImode
20095 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
20096 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
20098 switch (cond_code)
20100 case LT: case GT: case LTU: case GTU: case EQ:
20101 /* isel handles these directly. */
20102 break;
20104 default:
20105 /* We need to swap the sense of the comparison. */
20107 std::swap (false_cond, true_cond);
20108 PUT_CODE (condition_rtx, reverse_condition (cond_code));
20110 break;
20113 false_cond = force_reg (mode, false_cond);
20114 if (true_cond != const0_rtx)
20115 true_cond = force_reg (mode, true_cond);
20117 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
20119 return 1;
20122 const char *
20123 output_isel (rtx *operands)
20125 enum rtx_code code;
20127 code = GET_CODE (operands[1]);
20129 if (code == GE || code == GEU || code == LE || code == LEU || code == NE)
20131 gcc_assert (GET_CODE (operands[2]) == REG
20132 && GET_CODE (operands[3]) == REG);
20133 PUT_CODE (operands[1], reverse_condition (code));
20134 return "isel %0,%3,%2,%j1";
20137 return "isel %0,%2,%3,%j1";
20140 void
20141 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20143 machine_mode mode = GET_MODE (op0);
20144 enum rtx_code c;
20145 rtx target;
20147 /* VSX/altivec have direct min/max insns. */
20148 if ((code == SMAX || code == SMIN)
20149 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
20150 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
20152 emit_insn (gen_rtx_SET (VOIDmode,
20153 dest,
20154 gen_rtx_fmt_ee (code, mode, op0, op1)));
20155 return;
20158 if (code == SMAX || code == SMIN)
20159 c = GE;
20160 else
20161 c = GEU;
20163 if (code == SMAX || code == UMAX)
20164 target = emit_conditional_move (dest, c, op0, op1, mode,
20165 op0, op1, mode, 0);
20166 else
20167 target = emit_conditional_move (dest, c, op0, op1, mode,
20168 op1, op0, mode, 0);
20169 gcc_assert (target);
20170 if (target != dest)
20171 emit_move_insn (dest, target);
20174 /* A subroutine of the atomic operation splitters. Jump to LABEL if
20175 COND is true. Mark the jump as unlikely to be taken. */
20177 static void
20178 emit_unlikely_jump (rtx cond, rtx label)
20180 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
20181 rtx x;
20183 x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
20184 x = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, x));
20185 add_int_reg_note (x, REG_BR_PROB, very_unlikely);
20188 /* A subroutine of the atomic operation splitters. Emit a load-locked
20189 instruction in MODE. For QI/HImode, possibly use a pattern than includes
20190 the zero_extend operation. */
20192 static void
20193 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
20195 rtx (*fn) (rtx, rtx) = NULL;
20197 switch (mode)
20199 case QImode:
20200 fn = gen_load_lockedqi;
20201 break;
20202 case HImode:
20203 fn = gen_load_lockedhi;
20204 break;
20205 case SImode:
20206 if (GET_MODE (mem) == QImode)
20207 fn = gen_load_lockedqi_si;
20208 else if (GET_MODE (mem) == HImode)
20209 fn = gen_load_lockedhi_si;
20210 else
20211 fn = gen_load_lockedsi;
20212 break;
20213 case DImode:
20214 fn = gen_load_lockeddi;
20215 break;
20216 case TImode:
20217 fn = gen_load_lockedti;
20218 break;
20219 default:
20220 gcc_unreachable ();
20222 emit_insn (fn (reg, mem));
20225 /* A subroutine of the atomic operation splitters. Emit a store-conditional
20226 instruction in MODE. */
20228 static void
20229 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
20231 rtx (*fn) (rtx, rtx, rtx) = NULL;
20233 switch (mode)
20235 case QImode:
20236 fn = gen_store_conditionalqi;
20237 break;
20238 case HImode:
20239 fn = gen_store_conditionalhi;
20240 break;
20241 case SImode:
20242 fn = gen_store_conditionalsi;
20243 break;
20244 case DImode:
20245 fn = gen_store_conditionaldi;
20246 break;
20247 case TImode:
20248 fn = gen_store_conditionalti;
20249 break;
20250 default:
20251 gcc_unreachable ();
20254 /* Emit sync before stwcx. to address PPC405 Erratum. */
20255 if (PPC405_ERRATUM77)
20256 emit_insn (gen_hwsync ());
20258 emit_insn (fn (res, mem, val));
20261 /* Expand barriers before and after a load_locked/store_cond sequence. */
20263 static rtx
20264 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
20266 rtx addr = XEXP (mem, 0);
20267 int strict_p = (reload_in_progress || reload_completed);
20269 if (!legitimate_indirect_address_p (addr, strict_p)
20270 && !legitimate_indexed_address_p (addr, strict_p))
20272 addr = force_reg (Pmode, addr);
20273 mem = replace_equiv_address_nv (mem, addr);
20276 switch (model)
20278 case MEMMODEL_RELAXED:
20279 case MEMMODEL_CONSUME:
20280 case MEMMODEL_ACQUIRE:
20281 break;
20282 case MEMMODEL_RELEASE:
20283 case MEMMODEL_ACQ_REL:
20284 emit_insn (gen_lwsync ());
20285 break;
20286 case MEMMODEL_SEQ_CST:
20287 emit_insn (gen_hwsync ());
20288 break;
20289 default:
20290 gcc_unreachable ();
20292 return mem;
20295 static void
20296 rs6000_post_atomic_barrier (enum memmodel model)
20298 switch (model)
20300 case MEMMODEL_RELAXED:
20301 case MEMMODEL_CONSUME:
20302 case MEMMODEL_RELEASE:
20303 break;
20304 case MEMMODEL_ACQUIRE:
20305 case MEMMODEL_ACQ_REL:
20306 case MEMMODEL_SEQ_CST:
20307 emit_insn (gen_isync ());
20308 break;
20309 default:
20310 gcc_unreachable ();
20314 /* A subroutine of the various atomic expanders. For sub-word operations,
20315 we must adjust things to operate on SImode. Given the original MEM,
20316 return a new aligned memory. Also build and return the quantities by
20317 which to shift and mask. */
20319 static rtx
20320 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
20322 rtx addr, align, shift, mask, mem;
20323 HOST_WIDE_INT shift_mask;
20324 machine_mode mode = GET_MODE (orig_mem);
20326 /* For smaller modes, we have to implement this via SImode. */
20327 shift_mask = (mode == QImode ? 0x18 : 0x10);
20329 addr = XEXP (orig_mem, 0);
20330 addr = force_reg (GET_MODE (addr), addr);
20332 /* Aligned memory containing subword. Generate a new memory. We
20333 do not want any of the existing MEM_ATTR data, as we're now
20334 accessing memory outside the original object. */
20335 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
20336 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20337 mem = gen_rtx_MEM (SImode, align);
20338 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
20339 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
20340 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
20342 /* Shift amount for subword relative to aligned word. */
20343 shift = gen_reg_rtx (SImode);
20344 addr = gen_lowpart (SImode, addr);
20345 emit_insn (gen_rlwinm (shift, addr, GEN_INT (3), GEN_INT (shift_mask)));
20346 if (BYTES_BIG_ENDIAN)
20347 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
20348 shift, 1, OPTAB_LIB_WIDEN);
20349 *pshift = shift;
20351 /* Mask for insertion. */
20352 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
20353 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
20354 *pmask = mask;
20356 return mem;
20359 /* A subroutine of the various atomic expanders. For sub-word operands,
20360 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
20362 static rtx
20363 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
20365 rtx x;
20367 x = gen_reg_rtx (SImode);
20368 emit_insn (gen_rtx_SET (VOIDmode, x,
20369 gen_rtx_AND (SImode,
20370 gen_rtx_NOT (SImode, mask),
20371 oldval)));
20373 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
20375 return x;
20378 /* A subroutine of the various atomic expanders. For sub-word operands,
20379 extract WIDE to NARROW via SHIFT. */
20381 static void
20382 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
20384 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
20385 wide, 1, OPTAB_LIB_WIDEN);
20386 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
20389 /* Expand an atomic compare and swap operation. */
20391 void
20392 rs6000_expand_atomic_compare_and_swap (rtx operands[])
20394 rtx boolval, retval, mem, oldval, newval, cond;
20395 rtx label1, label2, x, mask, shift;
20396 machine_mode mode, orig_mode;
20397 enum memmodel mod_s, mod_f;
20398 bool is_weak;
20400 boolval = operands[0];
20401 retval = operands[1];
20402 mem = operands[2];
20403 oldval = operands[3];
20404 newval = operands[4];
20405 is_weak = (INTVAL (operands[5]) != 0);
20406 mod_s = (enum memmodel) INTVAL (operands[6]);
20407 mod_f = (enum memmodel) INTVAL (operands[7]);
20408 orig_mode = mode = GET_MODE (mem);
20410 mask = shift = NULL_RTX;
20411 if (mode == QImode || mode == HImode)
20413 /* Before power8, we didn't have access to lbarx/lharx, so generate a
20414 lwarx and shift/mask operations. With power8, we need to do the
20415 comparison in SImode, but the store is still done in QI/HImode. */
20416 oldval = convert_modes (SImode, mode, oldval, 1);
20418 if (!TARGET_SYNC_HI_QI)
20420 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
20422 /* Shift and mask OLDVAL into position with the word. */
20423 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
20424 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20426 /* Shift and mask NEWVAL into position within the word. */
20427 newval = convert_modes (SImode, mode, newval, 1);
20428 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
20429 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20432 /* Prepare to adjust the return value. */
20433 retval = gen_reg_rtx (SImode);
20434 mode = SImode;
20436 else if (reg_overlap_mentioned_p (retval, oldval))
20437 oldval = copy_to_reg (oldval);
20439 mem = rs6000_pre_atomic_barrier (mem, mod_s);
20441 label1 = NULL_RTX;
20442 if (!is_weak)
20444 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
20445 emit_label (XEXP (label1, 0));
20447 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
20449 emit_load_locked (mode, retval, mem);
20451 x = retval;
20452 if (mask)
20454 x = expand_simple_binop (SImode, AND, retval, mask,
20455 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20458 cond = gen_reg_rtx (CCmode);
20459 /* If we have TImode, synthesize a comparison. */
20460 if (mode != TImode)
20461 x = gen_rtx_COMPARE (CCmode, x, oldval);
20462 else
20464 rtx xor1_result = gen_reg_rtx (DImode);
20465 rtx xor2_result = gen_reg_rtx (DImode);
20466 rtx or_result = gen_reg_rtx (DImode);
20467 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
20468 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
20469 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
20470 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
20472 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
20473 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
20474 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
20475 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
20478 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
20480 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20481 emit_unlikely_jump (x, label2);
20483 x = newval;
20484 if (mask)
20485 x = rs6000_mask_atomic_subword (retval, newval, mask);
20487 emit_store_conditional (orig_mode, cond, mem, x);
20489 if (!is_weak)
20491 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20492 emit_unlikely_jump (x, label1);
20495 if (mod_f != MEMMODEL_RELAXED)
20496 emit_label (XEXP (label2, 0));
20498 rs6000_post_atomic_barrier (mod_s);
20500 if (mod_f == MEMMODEL_RELAXED)
20501 emit_label (XEXP (label2, 0));
20503 if (shift)
20504 rs6000_finish_atomic_subword (operands[1], retval, shift);
20505 else if (mode != GET_MODE (operands[1]))
20506 convert_move (operands[1], retval, 1);
20508 /* In all cases, CR0 contains EQ on success, and NE on failure. */
20509 x = gen_rtx_EQ (SImode, cond, const0_rtx);
20510 emit_insn (gen_rtx_SET (VOIDmode, boolval, x));
20513 /* Expand an atomic exchange operation. */
20515 void
20516 rs6000_expand_atomic_exchange (rtx operands[])
20518 rtx retval, mem, val, cond;
20519 machine_mode mode;
20520 enum memmodel model;
20521 rtx label, x, mask, shift;
20523 retval = operands[0];
20524 mem = operands[1];
20525 val = operands[2];
20526 model = (enum memmodel) INTVAL (operands[3]);
20527 mode = GET_MODE (mem);
20529 mask = shift = NULL_RTX;
20530 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
20532 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
20534 /* Shift and mask VAL into position with the word. */
20535 val = convert_modes (SImode, mode, val, 1);
20536 val = expand_simple_binop (SImode, ASHIFT, val, shift,
20537 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20539 /* Prepare to adjust the return value. */
20540 retval = gen_reg_rtx (SImode);
20541 mode = SImode;
20544 mem = rs6000_pre_atomic_barrier (mem, model);
20546 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
20547 emit_label (XEXP (label, 0));
20549 emit_load_locked (mode, retval, mem);
20551 x = val;
20552 if (mask)
20553 x = rs6000_mask_atomic_subword (retval, val, mask);
20555 cond = gen_reg_rtx (CCmode);
20556 emit_store_conditional (mode, cond, mem, x);
20558 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20559 emit_unlikely_jump (x, label);
20561 rs6000_post_atomic_barrier (model);
20563 if (shift)
20564 rs6000_finish_atomic_subword (operands[0], retval, shift);
20567 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
20568 to perform. MEM is the memory on which to operate. VAL is the second
20569 operand of the binary operator. BEFORE and AFTER are optional locations to
20570 return the value of MEM either before of after the operation. MODEL_RTX
20571 is a CONST_INT containing the memory model to use. */
20573 void
20574 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
20575 rtx orig_before, rtx orig_after, rtx model_rtx)
20577 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
20578 machine_mode mode = GET_MODE (mem);
20579 machine_mode store_mode = mode;
20580 rtx label, x, cond, mask, shift;
20581 rtx before = orig_before, after = orig_after;
20583 mask = shift = NULL_RTX;
20584 /* On power8, we want to use SImode for the operation. On previous systems,
20585 use the operation in a subword and shift/mask to get the proper byte or
20586 halfword. */
20587 if (mode == QImode || mode == HImode)
20589 if (TARGET_SYNC_HI_QI)
20591 val = convert_modes (SImode, mode, val, 1);
20593 /* Prepare to adjust the return value. */
20594 before = gen_reg_rtx (SImode);
20595 if (after)
20596 after = gen_reg_rtx (SImode);
20597 mode = SImode;
20599 else
20601 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
20603 /* Shift and mask VAL into position with the word. */
20604 val = convert_modes (SImode, mode, val, 1);
20605 val = expand_simple_binop (SImode, ASHIFT, val, shift,
20606 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20608 switch (code)
20610 case IOR:
20611 case XOR:
20612 /* We've already zero-extended VAL. That is sufficient to
20613 make certain that it does not affect other bits. */
20614 mask = NULL;
20615 break;
20617 case AND:
20618 /* If we make certain that all of the other bits in VAL are
20619 set, that will be sufficient to not affect other bits. */
20620 x = gen_rtx_NOT (SImode, mask);
20621 x = gen_rtx_IOR (SImode, x, val);
20622 emit_insn (gen_rtx_SET (VOIDmode, val, x));
20623 mask = NULL;
20624 break;
20626 case NOT:
20627 case PLUS:
20628 case MINUS:
20629 /* These will all affect bits outside the field and need
20630 adjustment via MASK within the loop. */
20631 break;
20633 default:
20634 gcc_unreachable ();
20637 /* Prepare to adjust the return value. */
20638 before = gen_reg_rtx (SImode);
20639 if (after)
20640 after = gen_reg_rtx (SImode);
20641 store_mode = mode = SImode;
20645 mem = rs6000_pre_atomic_barrier (mem, model);
20647 label = gen_label_rtx ();
20648 emit_label (label);
20649 label = gen_rtx_LABEL_REF (VOIDmode, label);
20651 if (before == NULL_RTX)
20652 before = gen_reg_rtx (mode);
20654 emit_load_locked (mode, before, mem);
20656 if (code == NOT)
20658 x = expand_simple_binop (mode, AND, before, val,
20659 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20660 after = expand_simple_unop (mode, NOT, x, after, 1);
20662 else
20664 after = expand_simple_binop (mode, code, before, val,
20665 after, 1, OPTAB_LIB_WIDEN);
20668 x = after;
20669 if (mask)
20671 x = expand_simple_binop (SImode, AND, after, mask,
20672 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20673 x = rs6000_mask_atomic_subword (before, x, mask);
20675 else if (store_mode != mode)
20676 x = convert_modes (store_mode, mode, x, 1);
20678 cond = gen_reg_rtx (CCmode);
20679 emit_store_conditional (store_mode, cond, mem, x);
20681 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20682 emit_unlikely_jump (x, label);
20684 rs6000_post_atomic_barrier (model);
20686 if (shift)
20688 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
20689 then do the calcuations in a SImode register. */
20690 if (orig_before)
20691 rs6000_finish_atomic_subword (orig_before, before, shift);
20692 if (orig_after)
20693 rs6000_finish_atomic_subword (orig_after, after, shift);
20695 else if (store_mode != mode)
20697 /* QImode/HImode on machines with lbarx/lharx where we do the native
20698 operation and then do the calcuations in a SImode register. */
20699 if (orig_before)
20700 convert_move (orig_before, before, 1);
20701 if (orig_after)
20702 convert_move (orig_after, after, 1);
20704 else if (orig_after && after != orig_after)
20705 emit_move_insn (orig_after, after);
20708 /* Emit instructions to move SRC to DST. Called by splitters for
20709 multi-register moves. It will emit at most one instruction for
20710 each register that is accessed; that is, it won't emit li/lis pairs
20711 (or equivalent for 64-bit code). One of SRC or DST must be a hard
20712 register. */
20714 void
20715 rs6000_split_multireg_move (rtx dst, rtx src)
20717 /* The register number of the first register being moved. */
20718 int reg;
20719 /* The mode that is to be moved. */
20720 machine_mode mode;
20721 /* The mode that the move is being done in, and its size. */
20722 machine_mode reg_mode;
20723 int reg_mode_size;
20724 /* The number of registers that will be moved. */
20725 int nregs;
20727 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
20728 mode = GET_MODE (dst);
20729 nregs = hard_regno_nregs[reg][mode];
20730 if (FP_REGNO_P (reg))
20731 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
20732 ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? DFmode : SFmode);
20733 else if (ALTIVEC_REGNO_P (reg))
20734 reg_mode = V16QImode;
20735 else if (TARGET_E500_DOUBLE && mode == TFmode)
20736 reg_mode = DFmode;
20737 else
20738 reg_mode = word_mode;
20739 reg_mode_size = GET_MODE_SIZE (reg_mode);
20741 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
20743 /* TDmode residing in FP registers is special, since the ISA requires that
20744 the lower-numbered word of a register pair is always the most significant
20745 word, even in little-endian mode. This does not match the usual subreg
20746 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
20747 the appropriate constituent registers "by hand" in little-endian mode.
20749 Note we do not need to check for destructive overlap here since TDmode
20750 can only reside in even/odd register pairs. */
20751 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
20753 rtx p_src, p_dst;
20754 int i;
20756 for (i = 0; i < nregs; i++)
20758 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
20759 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
20760 else
20761 p_src = simplify_gen_subreg (reg_mode, src, mode,
20762 i * reg_mode_size);
20764 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
20765 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
20766 else
20767 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
20768 i * reg_mode_size);
20770 emit_insn (gen_rtx_SET (VOIDmode, p_dst, p_src));
20773 return;
20776 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
20778 /* Move register range backwards, if we might have destructive
20779 overlap. */
20780 int i;
20781 for (i = nregs - 1; i >= 0; i--)
20782 emit_insn (gen_rtx_SET (VOIDmode,
20783 simplify_gen_subreg (reg_mode, dst, mode,
20784 i * reg_mode_size),
20785 simplify_gen_subreg (reg_mode, src, mode,
20786 i * reg_mode_size)));
20788 else
20790 int i;
20791 int j = -1;
20792 bool used_update = false;
20793 rtx restore_basereg = NULL_RTX;
20795 if (MEM_P (src) && INT_REGNO_P (reg))
20797 rtx breg;
20799 if (GET_CODE (XEXP (src, 0)) == PRE_INC
20800 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
20802 rtx delta_rtx;
20803 breg = XEXP (XEXP (src, 0), 0);
20804 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
20805 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
20806 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
20807 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
20808 src = replace_equiv_address (src, breg);
20810 else if (! rs6000_offsettable_memref_p (src, reg_mode))
20812 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
20814 rtx basereg = XEXP (XEXP (src, 0), 0);
20815 if (TARGET_UPDATE)
20817 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
20818 emit_insn (gen_rtx_SET (VOIDmode, ndst,
20819 gen_rtx_MEM (reg_mode, XEXP (src, 0))));
20820 used_update = true;
20822 else
20823 emit_insn (gen_rtx_SET (VOIDmode, basereg,
20824 XEXP (XEXP (src, 0), 1)));
20825 src = replace_equiv_address (src, basereg);
20827 else
20829 rtx basereg = gen_rtx_REG (Pmode, reg);
20830 emit_insn (gen_rtx_SET (VOIDmode, basereg, XEXP (src, 0)));
20831 src = replace_equiv_address (src, basereg);
20835 breg = XEXP (src, 0);
20836 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
20837 breg = XEXP (breg, 0);
20839 /* If the base register we are using to address memory is
20840 also a destination reg, then change that register last. */
20841 if (REG_P (breg)
20842 && REGNO (breg) >= REGNO (dst)
20843 && REGNO (breg) < REGNO (dst) + nregs)
20844 j = REGNO (breg) - REGNO (dst);
20846 else if (MEM_P (dst) && INT_REGNO_P (reg))
20848 rtx breg;
20850 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
20851 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
20853 rtx delta_rtx;
20854 breg = XEXP (XEXP (dst, 0), 0);
20855 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
20856 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
20857 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
20859 /* We have to update the breg before doing the store.
20860 Use store with update, if available. */
20862 if (TARGET_UPDATE)
20864 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
20865 emit_insn (TARGET_32BIT
20866 ? (TARGET_POWERPC64
20867 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
20868 : gen_movsi_update (breg, breg, delta_rtx, nsrc))
20869 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
20870 used_update = true;
20872 else
20873 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
20874 dst = replace_equiv_address (dst, breg);
20876 else if (!rs6000_offsettable_memref_p (dst, reg_mode)
20877 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
20879 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
20881 rtx basereg = XEXP (XEXP (dst, 0), 0);
20882 if (TARGET_UPDATE)
20884 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
20885 emit_insn (gen_rtx_SET (VOIDmode,
20886 gen_rtx_MEM (reg_mode, XEXP (dst, 0)), nsrc));
20887 used_update = true;
20889 else
20890 emit_insn (gen_rtx_SET (VOIDmode, basereg,
20891 XEXP (XEXP (dst, 0), 1)));
20892 dst = replace_equiv_address (dst, basereg);
20894 else
20896 rtx basereg = XEXP (XEXP (dst, 0), 0);
20897 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
20898 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
20899 && REG_P (basereg)
20900 && REG_P (offsetreg)
20901 && REGNO (basereg) != REGNO (offsetreg));
20902 if (REGNO (basereg) == 0)
20904 rtx tmp = offsetreg;
20905 offsetreg = basereg;
20906 basereg = tmp;
20908 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
20909 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
20910 dst = replace_equiv_address (dst, basereg);
20913 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
20914 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode));
20917 for (i = 0; i < nregs; i++)
20919 /* Calculate index to next subword. */
20920 ++j;
20921 if (j == nregs)
20922 j = 0;
20924 /* If compiler already emitted move of first word by
20925 store with update, no need to do anything. */
20926 if (j == 0 && used_update)
20927 continue;
20929 emit_insn (gen_rtx_SET (VOIDmode,
20930 simplify_gen_subreg (reg_mode, dst, mode,
20931 j * reg_mode_size),
20932 simplify_gen_subreg (reg_mode, src, mode,
20933 j * reg_mode_size)));
20935 if (restore_basereg != NULL_RTX)
20936 emit_insn (restore_basereg);
20941 /* This page contains routines that are used to determine what the
20942 function prologue and epilogue code will do and write them out. */
20944 static inline bool
20945 save_reg_p (int r)
20947 return !call_used_regs[r] && df_regs_ever_live_p (r);
20950 /* Return the first fixed-point register that is required to be
20951 saved. 32 if none. */
20954 first_reg_to_save (void)
20956 int first_reg;
20958 /* Find lowest numbered live register. */
20959 for (first_reg = 13; first_reg <= 31; first_reg++)
20960 if (save_reg_p (first_reg))
20961 break;
20963 if (first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM
20964 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
20965 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
20966 || (TARGET_TOC && TARGET_MINIMAL_TOC))
20967 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
20968 first_reg = RS6000_PIC_OFFSET_TABLE_REGNUM;
20970 #if TARGET_MACHO
20971 if (flag_pic
20972 && crtl->uses_pic_offset_table
20973 && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM)
20974 return RS6000_PIC_OFFSET_TABLE_REGNUM;
20975 #endif
20977 return first_reg;
20980 /* Similar, for FP regs. */
20983 first_fp_reg_to_save (void)
20985 int first_reg;
20987 /* Find lowest numbered live register. */
20988 for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
20989 if (save_reg_p (first_reg))
20990 break;
20992 return first_reg;
20995 /* Similar, for AltiVec regs. */
20997 static int
20998 first_altivec_reg_to_save (void)
21000 int i;
21002 /* Stack frame remains as is unless we are in AltiVec ABI. */
21003 if (! TARGET_ALTIVEC_ABI)
21004 return LAST_ALTIVEC_REGNO + 1;
21006 /* On Darwin, the unwind routines are compiled without
21007 TARGET_ALTIVEC, and use save_world to save/restore the
21008 altivec registers when necessary. */
21009 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
21010 && ! TARGET_ALTIVEC)
21011 return FIRST_ALTIVEC_REGNO + 20;
21013 /* Find lowest numbered live register. */
21014 for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
21015 if (save_reg_p (i))
21016 break;
21018 return i;
21021 /* Return a 32-bit mask of the AltiVec registers we need to set in
21022 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
21023 the 32-bit word is 0. */
21025 static unsigned int
21026 compute_vrsave_mask (void)
21028 unsigned int i, mask = 0;
21030 /* On Darwin, the unwind routines are compiled without
21031 TARGET_ALTIVEC, and use save_world to save/restore the
21032 call-saved altivec registers when necessary. */
21033 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
21034 && ! TARGET_ALTIVEC)
21035 mask |= 0xFFF;
21037 /* First, find out if we use _any_ altivec registers. */
21038 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
21039 if (df_regs_ever_live_p (i))
21040 mask |= ALTIVEC_REG_BIT (i);
21042 if (mask == 0)
21043 return mask;
21045 /* Next, remove the argument registers from the set. These must
21046 be in the VRSAVE mask set by the caller, so we don't need to add
21047 them in again. More importantly, the mask we compute here is
21048 used to generate CLOBBERs in the set_vrsave insn, and we do not
21049 wish the argument registers to die. */
21050 for (i = crtl->args.info.vregno - 1; i >= ALTIVEC_ARG_MIN_REG; --i)
21051 mask &= ~ALTIVEC_REG_BIT (i);
21053 /* Similarly, remove the return value from the set. */
21055 bool yes = false;
21056 diddle_return_value (is_altivec_return_reg, &yes);
21057 if (yes)
21058 mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
21061 return mask;
21064 /* For a very restricted set of circumstances, we can cut down the
21065 size of prologues/epilogues by calling our own save/restore-the-world
21066 routines. */
21068 static void
21069 compute_save_world_info (rs6000_stack_t *info_ptr)
21071 info_ptr->world_save_p = 1;
21072 info_ptr->world_save_p
21073 = (WORLD_SAVE_P (info_ptr)
21074 && DEFAULT_ABI == ABI_DARWIN
21075 && !cfun->has_nonlocal_label
21076 && info_ptr->first_fp_reg_save == FIRST_SAVED_FP_REGNO
21077 && info_ptr->first_gp_reg_save == FIRST_SAVED_GP_REGNO
21078 && info_ptr->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
21079 && info_ptr->cr_save_p);
21081 /* This will not work in conjunction with sibcalls. Make sure there
21082 are none. (This check is expensive, but seldom executed.) */
21083 if (WORLD_SAVE_P (info_ptr))
21085 rtx_insn *insn;
21086 for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
21087 if (CALL_P (insn) && SIBLING_CALL_P (insn))
21089 info_ptr->world_save_p = 0;
21090 break;
21094 if (WORLD_SAVE_P (info_ptr))
21096 /* Even if we're not touching VRsave, make sure there's room on the
21097 stack for it, if it looks like we're calling SAVE_WORLD, which
21098 will attempt to save it. */
21099 info_ptr->vrsave_size = 4;
21101 /* If we are going to save the world, we need to save the link register too. */
21102 info_ptr->lr_save_p = 1;
21104 /* "Save" the VRsave register too if we're saving the world. */
21105 if (info_ptr->vrsave_mask == 0)
21106 info_ptr->vrsave_mask = compute_vrsave_mask ();
21108 /* Because the Darwin register save/restore routines only handle
21109 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
21110 check. */
21111 gcc_assert (info_ptr->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
21112 && (info_ptr->first_altivec_reg_save
21113 >= FIRST_SAVED_ALTIVEC_REGNO));
21115 return;
21119 static void
21120 is_altivec_return_reg (rtx reg, void *xyes)
21122 bool *yes = (bool *) xyes;
21123 if (REGNO (reg) == ALTIVEC_ARG_RETURN)
21124 *yes = true;
21128 /* Look for user-defined global regs in the range FIRST to LAST-1.
21129 We should not restore these, and so cannot use lmw or out-of-line
21130 restore functions if there are any. We also can't save them
21131 (well, emit frame notes for them), because frame unwinding during
21132 exception handling will restore saved registers. */
21134 static bool
21135 global_regs_p (unsigned first, unsigned last)
21137 while (first < last)
21138 if (global_regs[first++])
21139 return true;
21140 return false;
21143 /* Determine the strategy for savings/restoring registers. */
21145 enum {
21146 SAVRES_MULTIPLE = 0x1,
21147 SAVE_INLINE_FPRS = 0x2,
21148 SAVE_INLINE_GPRS = 0x4,
21149 REST_INLINE_FPRS = 0x8,
21150 REST_INLINE_GPRS = 0x10,
21151 SAVE_NOINLINE_GPRS_SAVES_LR = 0x20,
21152 SAVE_NOINLINE_FPRS_SAVES_LR = 0x40,
21153 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x80,
21154 SAVE_INLINE_VRS = 0x100,
21155 REST_INLINE_VRS = 0x200
21158 static int
21159 rs6000_savres_strategy (rs6000_stack_t *info,
21160 bool using_static_chain_p)
21162 int strategy = 0;
21163 bool lr_save_p;
21165 if (TARGET_MULTIPLE
21166 && !TARGET_POWERPC64
21167 && !(TARGET_SPE_ABI && info->spe_64bit_regs_used)
21168 && info->first_gp_reg_save < 31
21169 && !global_regs_p (info->first_gp_reg_save, 32))
21170 strategy |= SAVRES_MULTIPLE;
21172 if (crtl->calls_eh_return
21173 || cfun->machine->ra_need_lr)
21174 strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
21175 | SAVE_INLINE_GPRS | REST_INLINE_GPRS
21176 | SAVE_INLINE_VRS | REST_INLINE_VRS);
21178 if (info->first_fp_reg_save == 64
21179 /* The out-of-line FP routines use double-precision stores;
21180 we can't use those routines if we don't have such stores. */
21181 || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT)
21182 || global_regs_p (info->first_fp_reg_save, 64))
21183 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
21185 if (info->first_gp_reg_save == 32
21186 || (!(strategy & SAVRES_MULTIPLE)
21187 && global_regs_p (info->first_gp_reg_save, 32)))
21188 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21190 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
21191 || global_regs_p (info->first_altivec_reg_save, LAST_ALTIVEC_REGNO + 1))
21192 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21194 /* Define cutoff for using out-of-line functions to save registers. */
21195 if (DEFAULT_ABI == ABI_V4 || TARGET_ELF)
21197 if (!optimize_size)
21199 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
21200 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21201 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21203 else
21205 /* Prefer out-of-line restore if it will exit. */
21206 if (info->first_fp_reg_save > 61)
21207 strategy |= SAVE_INLINE_FPRS;
21208 if (info->first_gp_reg_save > 29)
21210 if (info->first_fp_reg_save == 64)
21211 strategy |= SAVE_INLINE_GPRS;
21212 else
21213 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21215 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO)
21216 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21219 else if (DEFAULT_ABI == ABI_DARWIN)
21221 if (info->first_fp_reg_save > 60)
21222 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
21223 if (info->first_gp_reg_save > 29)
21224 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21225 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21227 else
21229 gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
21230 if (info->first_fp_reg_save > 61)
21231 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
21232 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21233 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21236 /* Don't bother to try to save things out-of-line if r11 is occupied
21237 by the static chain. It would require too much fiddling and the
21238 static chain is rarely used anyway. FPRs are saved w.r.t the stack
21239 pointer on Darwin, and AIX uses r1 or r12. */
21240 if (using_static_chain_p
21241 && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
21242 strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
21243 | SAVE_INLINE_GPRS
21244 | SAVE_INLINE_VRS | REST_INLINE_VRS);
21246 /* We can only use the out-of-line routines to restore if we've
21247 saved all the registers from first_fp_reg_save in the prologue.
21248 Otherwise, we risk loading garbage. */
21249 if ((strategy & (SAVE_INLINE_FPRS | REST_INLINE_FPRS)) == SAVE_INLINE_FPRS)
21251 int i;
21253 for (i = info->first_fp_reg_save; i < 64; i++)
21254 if (!save_reg_p (i))
21256 strategy |= REST_INLINE_FPRS;
21257 break;
21261 /* If we are going to use store multiple, then don't even bother
21262 with the out-of-line routines, since the store-multiple
21263 instruction will always be smaller. */
21264 if ((strategy & SAVRES_MULTIPLE))
21265 strategy |= SAVE_INLINE_GPRS;
21267 /* info->lr_save_p isn't yet set if the only reason lr needs to be
21268 saved is an out-of-line save or restore. Set up the value for
21269 the next test (excluding out-of-line gpr restore). */
21270 lr_save_p = (info->lr_save_p
21271 || !(strategy & SAVE_INLINE_GPRS)
21272 || !(strategy & SAVE_INLINE_FPRS)
21273 || !(strategy & SAVE_INLINE_VRS)
21274 || !(strategy & REST_INLINE_FPRS)
21275 || !(strategy & REST_INLINE_VRS));
21277 /* The situation is more complicated with load multiple. We'd
21278 prefer to use the out-of-line routines for restores, since the
21279 "exit" out-of-line routines can handle the restore of LR and the
21280 frame teardown. However if doesn't make sense to use the
21281 out-of-line routine if that is the only reason we'd need to save
21282 LR, and we can't use the "exit" out-of-line gpr restore if we
21283 have saved some fprs; In those cases it is advantageous to use
21284 load multiple when available. */
21285 if ((strategy & SAVRES_MULTIPLE)
21286 && (!lr_save_p
21287 || info->first_fp_reg_save != 64))
21288 strategy |= REST_INLINE_GPRS;
21290 /* Saving CR interferes with the exit routines used on the SPE, so
21291 just punt here. */
21292 if (TARGET_SPE_ABI
21293 && info->spe_64bit_regs_used
21294 && info->cr_save_p)
21295 strategy |= REST_INLINE_GPRS;
21297 /* We can only use load multiple or the out-of-line routines to
21298 restore if we've used store multiple or out-of-line routines
21299 in the prologue, i.e. if we've saved all the registers from
21300 first_gp_reg_save. Otherwise, we risk loading garbage. */
21301 if ((strategy & (SAVE_INLINE_GPRS | REST_INLINE_GPRS | SAVRES_MULTIPLE))
21302 == SAVE_INLINE_GPRS)
21304 int i;
21306 for (i = info->first_gp_reg_save; i < 32; i++)
21307 if (!save_reg_p (i))
21309 strategy |= REST_INLINE_GPRS;
21310 break;
21314 if (TARGET_ELF && TARGET_64BIT)
21316 if (!(strategy & SAVE_INLINE_FPRS))
21317 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
21318 else if (!(strategy & SAVE_INLINE_GPRS)
21319 && info->first_fp_reg_save == 64)
21320 strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
21322 else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
21323 strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
21325 if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
21326 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
21328 return strategy;
21331 /* Calculate the stack information for the current function. This is
21332 complicated by having two separate calling sequences, the AIX calling
21333 sequence and the V.4 calling sequence.
21335 AIX (and Darwin/Mac OS X) stack frames look like:
21336 32-bit 64-bit
21337 SP----> +---------------------------------------+
21338 | back chain to caller | 0 0
21339 +---------------------------------------+
21340 | saved CR | 4 8 (8-11)
21341 +---------------------------------------+
21342 | saved LR | 8 16
21343 +---------------------------------------+
21344 | reserved for compilers | 12 24
21345 +---------------------------------------+
21346 | reserved for binders | 16 32
21347 +---------------------------------------+
21348 | saved TOC pointer | 20 40
21349 +---------------------------------------+
21350 | Parameter save area (P) | 24 48
21351 +---------------------------------------+
21352 | Alloca space (A) | 24+P etc.
21353 +---------------------------------------+
21354 | Local variable space (L) | 24+P+A
21355 +---------------------------------------+
21356 | Float/int conversion temporary (X) | 24+P+A+L
21357 +---------------------------------------+
21358 | Save area for AltiVec registers (W) | 24+P+A+L+X
21359 +---------------------------------------+
21360 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
21361 +---------------------------------------+
21362 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
21363 +---------------------------------------+
21364 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
21365 +---------------------------------------+
21366 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
21367 +---------------------------------------+
21368 old SP->| back chain to caller's caller |
21369 +---------------------------------------+
21371 The required alignment for AIX configurations is two words (i.e., 8
21372 or 16 bytes).
21374 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
21376 SP----> +---------------------------------------+
21377 | Back chain to caller | 0
21378 +---------------------------------------+
21379 | Save area for CR | 8
21380 +---------------------------------------+
21381 | Saved LR | 16
21382 +---------------------------------------+
21383 | Saved TOC pointer | 24
21384 +---------------------------------------+
21385 | Parameter save area (P) | 32
21386 +---------------------------------------+
21387 | Alloca space (A) | 32+P
21388 +---------------------------------------+
21389 | Local variable space (L) | 32+P+A
21390 +---------------------------------------+
21391 | Save area for AltiVec registers (W) | 32+P+A+L
21392 +---------------------------------------+
21393 | AltiVec alignment padding (Y) | 32+P+A+L+W
21394 +---------------------------------------+
21395 | Save area for GP registers (G) | 32+P+A+L+W+Y
21396 +---------------------------------------+
21397 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
21398 +---------------------------------------+
21399 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
21400 +---------------------------------------+
21403 V.4 stack frames look like:
21405 SP----> +---------------------------------------+
21406 | back chain to caller | 0
21407 +---------------------------------------+
21408 | caller's saved LR | 4
21409 +---------------------------------------+
21410 | Parameter save area (P) | 8
21411 +---------------------------------------+
21412 | Alloca space (A) | 8+P
21413 +---------------------------------------+
21414 | Varargs save area (V) | 8+P+A
21415 +---------------------------------------+
21416 | Local variable space (L) | 8+P+A+V
21417 +---------------------------------------+
21418 | Float/int conversion temporary (X) | 8+P+A+V+L
21419 +---------------------------------------+
21420 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
21421 +---------------------------------------+
21422 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
21423 +---------------------------------------+
21424 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
21425 +---------------------------------------+
21426 | SPE: area for 64-bit GP registers |
21427 +---------------------------------------+
21428 | SPE alignment padding |
21429 +---------------------------------------+
21430 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
21431 +---------------------------------------+
21432 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
21433 +---------------------------------------+
21434 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
21435 +---------------------------------------+
21436 old SP->| back chain to caller's caller |
21437 +---------------------------------------+
21439 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
21440 given. (But note below and in sysv4.h that we require only 8 and
21441 may round up the size of our stack frame anyways. The historical
21442 reason is early versions of powerpc-linux which didn't properly
21443 align the stack at program startup. A happy side-effect is that
21444 -mno-eabi libraries can be used with -meabi programs.)
21446 The EABI configuration defaults to the V.4 layout. However,
21447 the stack alignment requirements may differ. If -mno-eabi is not
21448 given, the required stack alignment is 8 bytes; if -mno-eabi is
21449 given, the required alignment is 16 bytes. (But see V.4 comment
21450 above.) */
21452 #ifndef ABI_STACK_BOUNDARY
21453 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
21454 #endif
21456 static rs6000_stack_t *
21457 rs6000_stack_info (void)
21459 rs6000_stack_t *info_ptr = &stack_info;
21460 int reg_size = TARGET_32BIT ? 4 : 8;
21461 int ehrd_size;
21462 int ehcr_size;
21463 int save_align;
21464 int first_gp;
21465 HOST_WIDE_INT non_fixed_size;
21466 bool using_static_chain_p;
21468 if (reload_completed && info_ptr->reload_completed)
21469 return info_ptr;
21471 memset (info_ptr, 0, sizeof (*info_ptr));
21472 info_ptr->reload_completed = reload_completed;
21474 if (TARGET_SPE)
21476 /* Cache value so we don't rescan instruction chain over and over. */
21477 if (cfun->machine->insn_chain_scanned_p == 0)
21478 cfun->machine->insn_chain_scanned_p
21479 = spe_func_has_64bit_regs_p () + 1;
21480 info_ptr->spe_64bit_regs_used = cfun->machine->insn_chain_scanned_p - 1;
21483 /* Select which calling sequence. */
21484 info_ptr->abi = DEFAULT_ABI;
21486 /* Calculate which registers need to be saved & save area size. */
21487 info_ptr->first_gp_reg_save = first_reg_to_save ();
21488 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
21489 even if it currently looks like we won't. Reload may need it to
21490 get at a constant; if so, it will have already created a constant
21491 pool entry for it. */
21492 if (((TARGET_TOC && TARGET_MINIMAL_TOC)
21493 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
21494 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
21495 && crtl->uses_const_pool
21496 && info_ptr->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
21497 first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
21498 else
21499 first_gp = info_ptr->first_gp_reg_save;
21501 info_ptr->gp_size = reg_size * (32 - first_gp);
21503 /* For the SPE, we have an additional upper 32-bits on each GPR.
21504 Ideally we should save the entire 64-bits only when the upper
21505 half is used in SIMD instructions. Since we only record
21506 registers live (not the size they are used in), this proves
21507 difficult because we'd have to traverse the instruction chain at
21508 the right time, taking reload into account. This is a real pain,
21509 so we opt to save the GPRs in 64-bits always if but one register
21510 gets used in 64-bits. Otherwise, all the registers in the frame
21511 get saved in 32-bits.
21513 So... since when we save all GPRs (except the SP) in 64-bits, the
21514 traditional GP save area will be empty. */
21515 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
21516 info_ptr->gp_size = 0;
21518 info_ptr->first_fp_reg_save = first_fp_reg_to_save ();
21519 info_ptr->fp_size = 8 * (64 - info_ptr->first_fp_reg_save);
21521 info_ptr->first_altivec_reg_save = first_altivec_reg_to_save ();
21522 info_ptr->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
21523 - info_ptr->first_altivec_reg_save);
21525 /* Does this function call anything? */
21526 info_ptr->calls_p = (! crtl->is_leaf
21527 || cfun->machine->ra_needs_full_frame);
21529 /* Determine if we need to save the condition code registers. */
21530 if (df_regs_ever_live_p (CR2_REGNO)
21531 || df_regs_ever_live_p (CR3_REGNO)
21532 || df_regs_ever_live_p (CR4_REGNO))
21534 info_ptr->cr_save_p = 1;
21535 if (DEFAULT_ABI == ABI_V4)
21536 info_ptr->cr_size = reg_size;
21539 /* If the current function calls __builtin_eh_return, then we need
21540 to allocate stack space for registers that will hold data for
21541 the exception handler. */
21542 if (crtl->calls_eh_return)
21544 unsigned int i;
21545 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
21546 continue;
21548 /* SPE saves EH registers in 64-bits. */
21549 ehrd_size = i * (TARGET_SPE_ABI
21550 && info_ptr->spe_64bit_regs_used != 0
21551 ? UNITS_PER_SPE_WORD : UNITS_PER_WORD);
21553 else
21554 ehrd_size = 0;
21556 /* In the ELFv2 ABI, we also need to allocate space for separate
21557 CR field save areas if the function calls __builtin_eh_return. */
21558 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
21560 /* This hard-codes that we have three call-saved CR fields. */
21561 ehcr_size = 3 * reg_size;
21562 /* We do *not* use the regular CR save mechanism. */
21563 info_ptr->cr_save_p = 0;
21565 else
21566 ehcr_size = 0;
21568 /* Determine various sizes. */
21569 info_ptr->reg_size = reg_size;
21570 info_ptr->fixed_size = RS6000_SAVE_AREA;
21571 info_ptr->vars_size = RS6000_ALIGN (get_frame_size (), 8);
21572 info_ptr->parm_size = RS6000_ALIGN (crtl->outgoing_args_size,
21573 TARGET_ALTIVEC ? 16 : 8);
21574 if (FRAME_GROWS_DOWNWARD)
21575 info_ptr->vars_size
21576 += RS6000_ALIGN (info_ptr->fixed_size + info_ptr->vars_size
21577 + info_ptr->parm_size,
21578 ABI_STACK_BOUNDARY / BITS_PER_UNIT)
21579 - (info_ptr->fixed_size + info_ptr->vars_size
21580 + info_ptr->parm_size);
21582 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
21583 info_ptr->spe_gp_size = 8 * (32 - first_gp);
21584 else
21585 info_ptr->spe_gp_size = 0;
21587 if (TARGET_ALTIVEC_ABI)
21588 info_ptr->vrsave_mask = compute_vrsave_mask ();
21589 else
21590 info_ptr->vrsave_mask = 0;
21592 if (TARGET_ALTIVEC_VRSAVE && info_ptr->vrsave_mask)
21593 info_ptr->vrsave_size = 4;
21594 else
21595 info_ptr->vrsave_size = 0;
21597 compute_save_world_info (info_ptr);
21599 /* Calculate the offsets. */
21600 switch (DEFAULT_ABI)
21602 case ABI_NONE:
21603 default:
21604 gcc_unreachable ();
21606 case ABI_AIX:
21607 case ABI_ELFv2:
21608 case ABI_DARWIN:
21609 info_ptr->fp_save_offset = - info_ptr->fp_size;
21610 info_ptr->gp_save_offset = info_ptr->fp_save_offset - info_ptr->gp_size;
21612 if (TARGET_ALTIVEC_ABI)
21614 info_ptr->vrsave_save_offset
21615 = info_ptr->gp_save_offset - info_ptr->vrsave_size;
21617 /* Align stack so vector save area is on a quadword boundary.
21618 The padding goes above the vectors. */
21619 if (info_ptr->altivec_size != 0)
21620 info_ptr->altivec_padding_size
21621 = info_ptr->vrsave_save_offset & 0xF;
21622 else
21623 info_ptr->altivec_padding_size = 0;
21625 info_ptr->altivec_save_offset
21626 = info_ptr->vrsave_save_offset
21627 - info_ptr->altivec_padding_size
21628 - info_ptr->altivec_size;
21629 gcc_assert (info_ptr->altivec_size == 0
21630 || info_ptr->altivec_save_offset % 16 == 0);
21632 /* Adjust for AltiVec case. */
21633 info_ptr->ehrd_offset = info_ptr->altivec_save_offset - ehrd_size;
21635 else
21636 info_ptr->ehrd_offset = info_ptr->gp_save_offset - ehrd_size;
21638 info_ptr->ehcr_offset = info_ptr->ehrd_offset - ehcr_size;
21639 info_ptr->cr_save_offset = reg_size; /* first word when 64-bit. */
21640 info_ptr->lr_save_offset = 2*reg_size;
21641 break;
21643 case ABI_V4:
21644 info_ptr->fp_save_offset = - info_ptr->fp_size;
21645 info_ptr->gp_save_offset = info_ptr->fp_save_offset - info_ptr->gp_size;
21646 info_ptr->cr_save_offset = info_ptr->gp_save_offset - info_ptr->cr_size;
21648 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
21650 /* Align stack so SPE GPR save area is aligned on a
21651 double-word boundary. */
21652 if (info_ptr->spe_gp_size != 0 && info_ptr->cr_save_offset != 0)
21653 info_ptr->spe_padding_size
21654 = 8 - (-info_ptr->cr_save_offset % 8);
21655 else
21656 info_ptr->spe_padding_size = 0;
21658 info_ptr->spe_gp_save_offset
21659 = info_ptr->cr_save_offset
21660 - info_ptr->spe_padding_size
21661 - info_ptr->spe_gp_size;
21663 /* Adjust for SPE case. */
21664 info_ptr->ehrd_offset = info_ptr->spe_gp_save_offset;
21666 else if (TARGET_ALTIVEC_ABI)
21668 info_ptr->vrsave_save_offset
21669 = info_ptr->cr_save_offset - info_ptr->vrsave_size;
21671 /* Align stack so vector save area is on a quadword boundary. */
21672 if (info_ptr->altivec_size != 0)
21673 info_ptr->altivec_padding_size
21674 = 16 - (-info_ptr->vrsave_save_offset % 16);
21675 else
21676 info_ptr->altivec_padding_size = 0;
21678 info_ptr->altivec_save_offset
21679 = info_ptr->vrsave_save_offset
21680 - info_ptr->altivec_padding_size
21681 - info_ptr->altivec_size;
21683 /* Adjust for AltiVec case. */
21684 info_ptr->ehrd_offset = info_ptr->altivec_save_offset;
21686 else
21687 info_ptr->ehrd_offset = info_ptr->cr_save_offset;
21688 info_ptr->ehrd_offset -= ehrd_size;
21689 info_ptr->lr_save_offset = reg_size;
21690 break;
21693 save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
21694 info_ptr->save_size = RS6000_ALIGN (info_ptr->fp_size
21695 + info_ptr->gp_size
21696 + info_ptr->altivec_size
21697 + info_ptr->altivec_padding_size
21698 + info_ptr->spe_gp_size
21699 + info_ptr->spe_padding_size
21700 + ehrd_size
21701 + ehcr_size
21702 + info_ptr->cr_size
21703 + info_ptr->vrsave_size,
21704 save_align);
21706 non_fixed_size = (info_ptr->vars_size
21707 + info_ptr->parm_size
21708 + info_ptr->save_size);
21710 info_ptr->total_size = RS6000_ALIGN (non_fixed_size + info_ptr->fixed_size,
21711 ABI_STACK_BOUNDARY / BITS_PER_UNIT);
21713 /* Determine if we need to save the link register. */
21714 if (info_ptr->calls_p
21715 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
21716 && crtl->profile
21717 && !TARGET_PROFILE_KERNEL)
21718 || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
21719 #ifdef TARGET_RELOCATABLE
21720 || (TARGET_RELOCATABLE && (get_pool_size () != 0))
21721 #endif
21722 || rs6000_ra_ever_killed ())
21723 info_ptr->lr_save_p = 1;
21725 using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
21726 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
21727 && call_used_regs[STATIC_CHAIN_REGNUM]);
21728 info_ptr->savres_strategy = rs6000_savres_strategy (info_ptr,
21729 using_static_chain_p);
21731 if (!(info_ptr->savres_strategy & SAVE_INLINE_GPRS)
21732 || !(info_ptr->savres_strategy & SAVE_INLINE_FPRS)
21733 || !(info_ptr->savres_strategy & SAVE_INLINE_VRS)
21734 || !(info_ptr->savres_strategy & REST_INLINE_GPRS)
21735 || !(info_ptr->savres_strategy & REST_INLINE_FPRS)
21736 || !(info_ptr->savres_strategy & REST_INLINE_VRS))
21737 info_ptr->lr_save_p = 1;
21739 if (info_ptr->lr_save_p)
21740 df_set_regs_ever_live (LR_REGNO, true);
21742 /* Determine if we need to allocate any stack frame:
21744 For AIX we need to push the stack if a frame pointer is needed
21745 (because the stack might be dynamically adjusted), if we are
21746 debugging, if we make calls, or if the sum of fp_save, gp_save,
21747 and local variables are more than the space needed to save all
21748 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
21749 + 18*8 = 288 (GPR13 reserved).
21751 For V.4 we don't have the stack cushion that AIX uses, but assume
21752 that the debugger can handle stackless frames. */
21754 if (info_ptr->calls_p)
21755 info_ptr->push_p = 1;
21757 else if (DEFAULT_ABI == ABI_V4)
21758 info_ptr->push_p = non_fixed_size != 0;
21760 else if (frame_pointer_needed)
21761 info_ptr->push_p = 1;
21763 else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
21764 info_ptr->push_p = 1;
21766 else
21767 info_ptr->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
21769 /* Zero offsets if we're not saving those registers. */
21770 if (info_ptr->fp_size == 0)
21771 info_ptr->fp_save_offset = 0;
21773 if (info_ptr->gp_size == 0)
21774 info_ptr->gp_save_offset = 0;
21776 if (! TARGET_ALTIVEC_ABI || info_ptr->altivec_size == 0)
21777 info_ptr->altivec_save_offset = 0;
21779 /* Zero VRSAVE offset if not saved and restored. */
21780 if (! TARGET_ALTIVEC_VRSAVE || info_ptr->vrsave_mask == 0)
21781 info_ptr->vrsave_save_offset = 0;
21783 if (! TARGET_SPE_ABI
21784 || info_ptr->spe_64bit_regs_used == 0
21785 || info_ptr->spe_gp_size == 0)
21786 info_ptr->spe_gp_save_offset = 0;
21788 if (! info_ptr->lr_save_p)
21789 info_ptr->lr_save_offset = 0;
21791 if (! info_ptr->cr_save_p)
21792 info_ptr->cr_save_offset = 0;
21794 return info_ptr;
21797 /* Return true if the current function uses any GPRs in 64-bit SIMD
21798 mode. */
21800 static bool
21801 spe_func_has_64bit_regs_p (void)
21803 rtx_insn *insns, *insn;
21805 /* Functions that save and restore all the call-saved registers will
21806 need to save/restore the registers in 64-bits. */
21807 if (crtl->calls_eh_return
21808 || cfun->calls_setjmp
21809 || crtl->has_nonlocal_goto)
21810 return true;
21812 insns = get_insns ();
21814 for (insn = NEXT_INSN (insns); insn != NULL_RTX; insn = NEXT_INSN (insn))
21816 if (INSN_P (insn))
21818 rtx i;
21820 /* FIXME: This should be implemented with attributes...
21822 (set_attr "spe64" "true")....then,
21823 if (get_spe64(insn)) return true;
21825 It's the only reliable way to do the stuff below. */
21827 i = PATTERN (insn);
21828 if (GET_CODE (i) == SET)
21830 machine_mode mode = GET_MODE (SET_SRC (i));
21832 if (SPE_VECTOR_MODE (mode))
21833 return true;
21834 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode))
21835 return true;
21840 return false;
21843 static void
21844 debug_stack_info (rs6000_stack_t *info)
21846 const char *abi_string;
21848 if (! info)
21849 info = rs6000_stack_info ();
21851 fprintf (stderr, "\nStack information for function %s:\n",
21852 ((current_function_decl && DECL_NAME (current_function_decl))
21853 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
21854 : "<unknown>"));
21856 switch (info->abi)
21858 default: abi_string = "Unknown"; break;
21859 case ABI_NONE: abi_string = "NONE"; break;
21860 case ABI_AIX: abi_string = "AIX"; break;
21861 case ABI_ELFv2: abi_string = "ELFv2"; break;
21862 case ABI_DARWIN: abi_string = "Darwin"; break;
21863 case ABI_V4: abi_string = "V.4"; break;
21866 fprintf (stderr, "\tABI = %5s\n", abi_string);
21868 if (TARGET_ALTIVEC_ABI)
21869 fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
21871 if (TARGET_SPE_ABI)
21872 fprintf (stderr, "\tSPE ABI extensions enabled.\n");
21874 if (info->first_gp_reg_save != 32)
21875 fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save);
21877 if (info->first_fp_reg_save != 64)
21878 fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save);
21880 if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
21881 fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
21882 info->first_altivec_reg_save);
21884 if (info->lr_save_p)
21885 fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p);
21887 if (info->cr_save_p)
21888 fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p);
21890 if (info->vrsave_mask)
21891 fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask);
21893 if (info->push_p)
21894 fprintf (stderr, "\tpush_p = %5d\n", info->push_p);
21896 if (info->calls_p)
21897 fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p);
21899 if (info->gp_save_offset)
21900 fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset);
21902 if (info->fp_save_offset)
21903 fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset);
21905 if (info->altivec_save_offset)
21906 fprintf (stderr, "\taltivec_save_offset = %5d\n",
21907 info->altivec_save_offset);
21909 if (info->spe_gp_save_offset)
21910 fprintf (stderr, "\tspe_gp_save_offset = %5d\n",
21911 info->spe_gp_save_offset);
21913 if (info->vrsave_save_offset)
21914 fprintf (stderr, "\tvrsave_save_offset = %5d\n",
21915 info->vrsave_save_offset);
21917 if (info->lr_save_offset)
21918 fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset);
21920 if (info->cr_save_offset)
21921 fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset);
21923 if (info->varargs_save_offset)
21924 fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
21926 if (info->total_size)
21927 fprintf (stderr, "\ttotal_size = "HOST_WIDE_INT_PRINT_DEC"\n",
21928 info->total_size);
21930 if (info->vars_size)
21931 fprintf (stderr, "\tvars_size = "HOST_WIDE_INT_PRINT_DEC"\n",
21932 info->vars_size);
21934 if (info->parm_size)
21935 fprintf (stderr, "\tparm_size = %5d\n", info->parm_size);
21937 if (info->fixed_size)
21938 fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size);
21940 if (info->gp_size)
21941 fprintf (stderr, "\tgp_size = %5d\n", info->gp_size);
21943 if (info->spe_gp_size)
21944 fprintf (stderr, "\tspe_gp_size = %5d\n", info->spe_gp_size);
21946 if (info->fp_size)
21947 fprintf (stderr, "\tfp_size = %5d\n", info->fp_size);
21949 if (info->altivec_size)
21950 fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size);
21952 if (info->vrsave_size)
21953 fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size);
21955 if (info->altivec_padding_size)
21956 fprintf (stderr, "\taltivec_padding_size= %5d\n",
21957 info->altivec_padding_size);
21959 if (info->spe_padding_size)
21960 fprintf (stderr, "\tspe_padding_size = %5d\n",
21961 info->spe_padding_size);
21963 if (info->cr_size)
21964 fprintf (stderr, "\tcr_size = %5d\n", info->cr_size);
21966 if (info->save_size)
21967 fprintf (stderr, "\tsave_size = %5d\n", info->save_size);
21969 if (info->reg_size != 4)
21970 fprintf (stderr, "\treg_size = %5d\n", info->reg_size);
21972 fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy);
21974 fprintf (stderr, "\n");
21978 rs6000_return_addr (int count, rtx frame)
21980 /* Currently we don't optimize very well between prolog and body
21981 code and for PIC code the code can be actually quite bad, so
21982 don't try to be too clever here. */
21983 if (count != 0
21984 || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
21986 cfun->machine->ra_needs_full_frame = 1;
21988 return
21989 gen_rtx_MEM
21990 (Pmode,
21991 memory_address
21992 (Pmode,
21993 plus_constant (Pmode,
21994 copy_to_reg
21995 (gen_rtx_MEM (Pmode,
21996 memory_address (Pmode, frame))),
21997 RETURN_ADDRESS_OFFSET)));
22000 cfun->machine->ra_need_lr = 1;
22001 return get_hard_reg_initial_val (Pmode, LR_REGNO);
22004 /* Say whether a function is a candidate for sibcall handling or not. */
22006 static bool
22007 rs6000_function_ok_for_sibcall (tree decl, tree exp)
22009 tree fntype;
22011 if (decl)
22012 fntype = TREE_TYPE (decl);
22013 else
22014 fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
22016 /* We can't do it if the called function has more vector parameters
22017 than the current function; there's nowhere to put the VRsave code. */
22018 if (TARGET_ALTIVEC_ABI
22019 && TARGET_ALTIVEC_VRSAVE
22020 && !(decl && decl == current_function_decl))
22022 function_args_iterator args_iter;
22023 tree type;
22024 int nvreg = 0;
22026 /* Functions with vector parameters are required to have a
22027 prototype, so the argument type info must be available
22028 here. */
22029 FOREACH_FUNCTION_ARGS(fntype, type, args_iter)
22030 if (TREE_CODE (type) == VECTOR_TYPE
22031 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
22032 nvreg++;
22034 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter)
22035 if (TREE_CODE (type) == VECTOR_TYPE
22036 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
22037 nvreg--;
22039 if (nvreg > 0)
22040 return false;
22043 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
22044 functions, because the callee may have a different TOC pointer to
22045 the caller and there's no way to ensure we restore the TOC when
22046 we return. With the secure-plt SYSV ABI we can't make non-local
22047 calls when -fpic/PIC because the plt call stubs use r30. */
22048 if (DEFAULT_ABI == ABI_DARWIN
22049 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
22050 && decl
22051 && !DECL_EXTERNAL (decl)
22052 && (*targetm.binds_local_p) (decl))
22053 || (DEFAULT_ABI == ABI_V4
22054 && (!TARGET_SECURE_PLT
22055 || !flag_pic
22056 || (decl
22057 && (*targetm.binds_local_p) (decl)))))
22059 tree attr_list = TYPE_ATTRIBUTES (fntype);
22061 if (!lookup_attribute ("longcall", attr_list)
22062 || lookup_attribute ("shortcall", attr_list))
22063 return true;
22066 return false;
22069 static int
22070 rs6000_ra_ever_killed (void)
22072 rtx_insn *top;
22073 rtx reg;
22074 rtx_insn *insn;
22076 if (cfun->is_thunk)
22077 return 0;
22079 if (cfun->machine->lr_save_state)
22080 return cfun->machine->lr_save_state - 1;
22082 /* regs_ever_live has LR marked as used if any sibcalls are present,
22083 but this should not force saving and restoring in the
22084 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
22085 clobbers LR, so that is inappropriate. */
22087 /* Also, the prologue can generate a store into LR that
22088 doesn't really count, like this:
22090 move LR->R0
22091 bcl to set PIC register
22092 move LR->R31
22093 move R0->LR
22095 When we're called from the epilogue, we need to avoid counting
22096 this as a store. */
22098 push_topmost_sequence ();
22099 top = get_insns ();
22100 pop_topmost_sequence ();
22101 reg = gen_rtx_REG (Pmode, LR_REGNO);
22103 for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
22105 if (INSN_P (insn))
22107 if (CALL_P (insn))
22109 if (!SIBLING_CALL_P (insn))
22110 return 1;
22112 else if (find_regno_note (insn, REG_INC, LR_REGNO))
22113 return 1;
22114 else if (set_of (reg, insn) != NULL_RTX
22115 && !prologue_epilogue_contains (insn))
22116 return 1;
22119 return 0;
22122 /* Emit instructions needed to load the TOC register.
22123 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
22124 a constant pool; or for SVR4 -fpic. */
22126 void
22127 rs6000_emit_load_toc_table (int fromprolog)
22129 rtx dest;
22130 dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
22132 if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
22134 char buf[30];
22135 rtx lab, tmp1, tmp2, got;
22137 lab = gen_label_rtx ();
22138 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
22139 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
22140 if (flag_pic == 2)
22141 got = gen_rtx_SYMBOL_REF (Pmode, toc_label_name);
22142 else
22143 got = rs6000_got_sym ();
22144 tmp1 = tmp2 = dest;
22145 if (!fromprolog)
22147 tmp1 = gen_reg_rtx (Pmode);
22148 tmp2 = gen_reg_rtx (Pmode);
22150 emit_insn (gen_load_toc_v4_PIC_1 (lab));
22151 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
22152 emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
22153 emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
22155 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
22157 emit_insn (gen_load_toc_v4_pic_si ());
22158 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
22160 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
22162 char buf[30];
22163 rtx temp0 = (fromprolog
22164 ? gen_rtx_REG (Pmode, 0)
22165 : gen_reg_rtx (Pmode));
22167 if (fromprolog)
22169 rtx symF, symL;
22171 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
22172 symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
22174 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
22175 symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
22177 emit_insn (gen_load_toc_v4_PIC_1 (symF));
22178 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
22179 emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
22181 else
22183 rtx tocsym, lab;
22185 tocsym = gen_rtx_SYMBOL_REF (Pmode, toc_label_name);
22186 lab = gen_label_rtx ();
22187 emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
22188 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
22189 if (TARGET_LINK_STACK)
22190 emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
22191 emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
22193 emit_insn (gen_addsi3 (dest, temp0, dest));
22195 else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
22197 /* This is for AIX code running in non-PIC ELF32. */
22198 char buf[30];
22199 rtx realsym;
22200 ASM_GENERATE_INTERNAL_LABEL (buf, "LCTOC", 1);
22201 realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
22203 emit_insn (gen_elf_high (dest, realsym));
22204 emit_insn (gen_elf_low (dest, dest, realsym));
22206 else
22208 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
22210 if (TARGET_32BIT)
22211 emit_insn (gen_load_toc_aix_si (dest));
22212 else
22213 emit_insn (gen_load_toc_aix_di (dest));
22217 /* Emit instructions to restore the link register after determining where
22218 its value has been stored. */
22220 void
22221 rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
22223 rs6000_stack_t *info = rs6000_stack_info ();
22224 rtx operands[2];
22226 operands[0] = source;
22227 operands[1] = scratch;
22229 if (info->lr_save_p)
22231 rtx frame_rtx = stack_pointer_rtx;
22232 HOST_WIDE_INT sp_offset = 0;
22233 rtx tmp;
22235 if (frame_pointer_needed
22236 || cfun->calls_alloca
22237 || info->total_size > 32767)
22239 tmp = gen_frame_mem (Pmode, frame_rtx);
22240 emit_move_insn (operands[1], tmp);
22241 frame_rtx = operands[1];
22243 else if (info->push_p)
22244 sp_offset = info->total_size;
22246 tmp = plus_constant (Pmode, frame_rtx,
22247 info->lr_save_offset + sp_offset);
22248 tmp = gen_frame_mem (Pmode, tmp);
22249 emit_move_insn (tmp, operands[0]);
22251 else
22252 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
22254 /* Freeze lr_save_p. We've just emitted rtl that depends on the
22255 state of lr_save_p so any change from here on would be a bug. In
22256 particular, stop rs6000_ra_ever_killed from considering the SET
22257 of lr we may have added just above. */
22258 cfun->machine->lr_save_state = info->lr_save_p + 1;
22261 static GTY(()) alias_set_type set = -1;
22263 alias_set_type
22264 get_TOC_alias_set (void)
22266 if (set == -1)
22267 set = new_alias_set ();
22268 return set;
22271 /* This returns nonzero if the current function uses the TOC. This is
22272 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
22273 is generated by the ABI_V4 load_toc_* patterns. */
22274 #if TARGET_ELF
22275 static int
22276 uses_TOC (void)
22278 rtx_insn *insn;
22280 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
22281 if (INSN_P (insn))
22283 rtx pat = PATTERN (insn);
22284 int i;
22286 if (GET_CODE (pat) == PARALLEL)
22287 for (i = 0; i < XVECLEN (pat, 0); i++)
22289 rtx sub = XVECEXP (pat, 0, i);
22290 if (GET_CODE (sub) == USE)
22292 sub = XEXP (sub, 0);
22293 if (GET_CODE (sub) == UNSPEC
22294 && XINT (sub, 1) == UNSPEC_TOC)
22295 return 1;
22299 return 0;
22301 #endif
22304 create_TOC_reference (rtx symbol, rtx largetoc_reg)
22306 rtx tocrel, tocreg, hi;
22308 if (TARGET_DEBUG_ADDR)
22310 if (GET_CODE (symbol) == SYMBOL_REF)
22311 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
22312 XSTR (symbol, 0));
22313 else
22315 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
22316 GET_RTX_NAME (GET_CODE (symbol)));
22317 debug_rtx (symbol);
22321 if (!can_create_pseudo_p ())
22322 df_set_regs_ever_live (TOC_REGISTER, true);
22324 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
22325 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
22326 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
22327 return tocrel;
22329 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
22330 if (largetoc_reg != NULL)
22332 emit_move_insn (largetoc_reg, hi);
22333 hi = largetoc_reg;
22335 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
22338 /* Issue assembly directives that create a reference to the given DWARF
22339 FRAME_TABLE_LABEL from the current function section. */
22340 void
22341 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
22343 fprintf (asm_out_file, "\t.ref %s\n",
22344 (* targetm.strip_name_encoding) (frame_table_label));
22347 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
22348 and the change to the stack pointer. */
22350 static void
22351 rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
22353 rtvec p;
22354 int i;
22355 rtx regs[3];
22357 i = 0;
22358 regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
22359 if (hard_frame_needed)
22360 regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
22361 if (!(REGNO (fp) == STACK_POINTER_REGNUM
22362 || (hard_frame_needed
22363 && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
22364 regs[i++] = fp;
22366 p = rtvec_alloc (i);
22367 while (--i >= 0)
22369 rtx mem = gen_frame_mem (BLKmode, regs[i]);
22370 RTVEC_ELT (p, i) = gen_rtx_SET (VOIDmode, mem, const0_rtx);
22373 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
22376 /* Emit the correct code for allocating stack space, as insns.
22377 If COPY_REG, make sure a copy of the old frame is left there.
22378 The generated code may use hard register 0 as a temporary. */
22380 static void
22381 rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
22383 rtx_insn *insn;
22384 rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
22385 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
22386 rtx todec = gen_int_mode (-size, Pmode);
22387 rtx par, set, mem;
22389 if (INTVAL (todec) != -size)
22391 warning (0, "stack frame too large");
22392 emit_insn (gen_trap ());
22393 return;
22396 if (crtl->limit_stack)
22398 if (REG_P (stack_limit_rtx)
22399 && REGNO (stack_limit_rtx) > 1
22400 && REGNO (stack_limit_rtx) <= 31)
22402 emit_insn (gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size)));
22403 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
22404 const0_rtx));
22406 else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF
22407 && TARGET_32BIT
22408 && DEFAULT_ABI == ABI_V4)
22410 rtx toload = gen_rtx_CONST (VOIDmode,
22411 gen_rtx_PLUS (Pmode,
22412 stack_limit_rtx,
22413 GEN_INT (size)));
22415 emit_insn (gen_elf_high (tmp_reg, toload));
22416 emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
22417 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
22418 const0_rtx));
22420 else
22421 warning (0, "stack limit expression is not supported");
22424 if (copy_reg)
22426 if (copy_off != 0)
22427 emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
22428 else
22429 emit_move_insn (copy_reg, stack_reg);
22432 if (size > 32767)
22434 /* Need a note here so that try_split doesn't get confused. */
22435 if (get_last_insn () == NULL_RTX)
22436 emit_note (NOTE_INSN_DELETED);
22437 insn = emit_move_insn (tmp_reg, todec);
22438 try_split (PATTERN (insn), insn, 0);
22439 todec = tmp_reg;
22442 insn = emit_insn (TARGET_32BIT
22443 ? gen_movsi_update_stack (stack_reg, stack_reg,
22444 todec, stack_reg)
22445 : gen_movdi_di_update_stack (stack_reg, stack_reg,
22446 todec, stack_reg));
22447 /* Since we didn't use gen_frame_mem to generate the MEM, grab
22448 it now and set the alias set/attributes. The above gen_*_update
22449 calls will generate a PARALLEL with the MEM set being the first
22450 operation. */
22451 par = PATTERN (insn);
22452 gcc_assert (GET_CODE (par) == PARALLEL);
22453 set = XVECEXP (par, 0, 0);
22454 gcc_assert (GET_CODE (set) == SET);
22455 mem = SET_DEST (set);
22456 gcc_assert (MEM_P (mem));
22457 MEM_NOTRAP_P (mem) = 1;
22458 set_mem_alias_set (mem, get_frame_alias_set ());
22460 RTX_FRAME_RELATED_P (insn) = 1;
22461 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
22462 gen_rtx_SET (VOIDmode, stack_reg,
22463 gen_rtx_PLUS (Pmode, stack_reg,
22464 GEN_INT (-size))));
22467 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
22469 #if PROBE_INTERVAL > 32768
22470 #error Cannot use indexed addressing mode for stack probing
22471 #endif
22473 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
22474 inclusive. These are offsets from the current stack pointer. */
22476 static void
22477 rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
22479 /* See if we have a constant small number of probes to generate. If so,
22480 that's the easy case. */
22481 if (first + size <= 32768)
22483 HOST_WIDE_INT i;
22485 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
22486 it exceeds SIZE. If only one probe is needed, this will not
22487 generate any code. Then probe at FIRST + SIZE. */
22488 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
22489 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
22490 -(first + i)));
22492 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
22493 -(first + size)));
22496 /* Otherwise, do the same as above, but in a loop. Note that we must be
22497 extra careful with variables wrapping around because we might be at
22498 the very top (or the very bottom) of the address space and we have
22499 to be able to handle this case properly; in particular, we use an
22500 equality test for the loop condition. */
22501 else
22503 HOST_WIDE_INT rounded_size;
22504 rtx r12 = gen_rtx_REG (Pmode, 12);
22505 rtx r0 = gen_rtx_REG (Pmode, 0);
22507 /* Sanity check for the addressing mode we're going to use. */
22508 gcc_assert (first <= 32768);
22510 /* Step 1: round SIZE to the previous multiple of the interval. */
22512 rounded_size = size & -PROBE_INTERVAL;
22515 /* Step 2: compute initial and final value of the loop counter. */
22517 /* TEST_ADDR = SP + FIRST. */
22518 emit_insn (gen_rtx_SET (VOIDmode, r12,
22519 plus_constant (Pmode, stack_pointer_rtx,
22520 -first)));
22522 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
22523 if (rounded_size > 32768)
22525 emit_move_insn (r0, GEN_INT (-rounded_size));
22526 emit_insn (gen_rtx_SET (VOIDmode, r0,
22527 gen_rtx_PLUS (Pmode, r12, r0)));
22529 else
22530 emit_insn (gen_rtx_SET (VOIDmode, r0,
22531 plus_constant (Pmode, r12, -rounded_size)));
22534 /* Step 3: the loop
22536 while (TEST_ADDR != LAST_ADDR)
22538 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
22539 probe at TEST_ADDR
22542 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
22543 until it is equal to ROUNDED_SIZE. */
22545 if (TARGET_64BIT)
22546 emit_insn (gen_probe_stack_rangedi (r12, r12, r0));
22547 else
22548 emit_insn (gen_probe_stack_rangesi (r12, r12, r0));
22551 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
22552 that SIZE is equal to ROUNDED_SIZE. */
22554 if (size != rounded_size)
22555 emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
22559 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
22560 absolute addresses. */
22562 const char *
22563 output_probe_stack_range (rtx reg1, rtx reg2)
22565 static int labelno = 0;
22566 char loop_lab[32], end_lab[32];
22567 rtx xops[2];
22569 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
22570 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
22572 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
22574 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
22575 xops[0] = reg1;
22576 xops[1] = reg2;
22577 if (TARGET_64BIT)
22578 output_asm_insn ("cmpd 0,%0,%1", xops);
22579 else
22580 output_asm_insn ("cmpw 0,%0,%1", xops);
22582 fputs ("\tbeq 0,", asm_out_file);
22583 assemble_name_raw (asm_out_file, end_lab);
22584 fputc ('\n', asm_out_file);
22586 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
22587 xops[1] = GEN_INT (-PROBE_INTERVAL);
22588 output_asm_insn ("addi %0,%0,%1", xops);
22590 /* Probe at TEST_ADDR and branch. */
22591 xops[1] = gen_rtx_REG (Pmode, 0);
22592 output_asm_insn ("stw %1,0(%0)", xops);
22593 fprintf (asm_out_file, "\tb ");
22594 assemble_name_raw (asm_out_file, loop_lab);
22595 fputc ('\n', asm_out_file);
22597 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
22599 return "";
22602 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
22603 with (plus:P (reg 1) VAL), and with REG2 replaced with RREG if REG2
22604 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
22605 deduce these equivalences by itself so it wasn't necessary to hold
22606 its hand so much. Don't be tempted to always supply d2_f_d_e with
22607 the actual cfa register, ie. r31 when we are using a hard frame
22608 pointer. That fails when saving regs off r1, and sched moves the
22609 r31 setup past the reg saves. */
22611 static rtx
22612 rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val,
22613 rtx reg2, rtx rreg, rtx split_reg)
22615 rtx real, temp;
22617 if (REGNO (reg) == STACK_POINTER_REGNUM && reg2 == NULL_RTX)
22619 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
22620 int i;
22622 gcc_checking_assert (val == 0);
22623 real = PATTERN (insn);
22624 if (GET_CODE (real) == PARALLEL)
22625 for (i = 0; i < XVECLEN (real, 0); i++)
22626 if (GET_CODE (XVECEXP (real, 0, i)) == SET)
22628 rtx set = XVECEXP (real, 0, i);
22630 RTX_FRAME_RELATED_P (set) = 1;
22632 RTX_FRAME_RELATED_P (insn) = 1;
22633 return insn;
22636 /* copy_rtx will not make unique copies of registers, so we need to
22637 ensure we don't have unwanted sharing here. */
22638 if (reg == reg2)
22639 reg = gen_raw_REG (GET_MODE (reg), REGNO (reg));
22641 if (reg == rreg)
22642 reg = gen_raw_REG (GET_MODE (reg), REGNO (reg));
22644 real = copy_rtx (PATTERN (insn));
22646 if (reg2 != NULL_RTX)
22647 real = replace_rtx (real, reg2, rreg);
22649 if (REGNO (reg) == STACK_POINTER_REGNUM)
22650 gcc_checking_assert (val == 0);
22651 else
22652 real = replace_rtx (real, reg,
22653 gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode,
22654 STACK_POINTER_REGNUM),
22655 GEN_INT (val)));
22657 /* We expect that 'real' is either a SET or a PARALLEL containing
22658 SETs (and possibly other stuff). In a PARALLEL, all the SETs
22659 are important so they all have to be marked RTX_FRAME_RELATED_P. */
22661 if (GET_CODE (real) == SET)
22663 rtx set = real;
22665 temp = simplify_rtx (SET_SRC (set));
22666 if (temp)
22667 SET_SRC (set) = temp;
22668 temp = simplify_rtx (SET_DEST (set));
22669 if (temp)
22670 SET_DEST (set) = temp;
22671 if (GET_CODE (SET_DEST (set)) == MEM)
22673 temp = simplify_rtx (XEXP (SET_DEST (set), 0));
22674 if (temp)
22675 XEXP (SET_DEST (set), 0) = temp;
22678 else
22680 int i;
22682 gcc_assert (GET_CODE (real) == PARALLEL);
22683 for (i = 0; i < XVECLEN (real, 0); i++)
22684 if (GET_CODE (XVECEXP (real, 0, i)) == SET)
22686 rtx set = XVECEXP (real, 0, i);
22688 temp = simplify_rtx (SET_SRC (set));
22689 if (temp)
22690 SET_SRC (set) = temp;
22691 temp = simplify_rtx (SET_DEST (set));
22692 if (temp)
22693 SET_DEST (set) = temp;
22694 if (GET_CODE (SET_DEST (set)) == MEM)
22696 temp = simplify_rtx (XEXP (SET_DEST (set), 0));
22697 if (temp)
22698 XEXP (SET_DEST (set), 0) = temp;
22700 RTX_FRAME_RELATED_P (set) = 1;
22704 /* If a store insn has been split into multiple insns, the
22705 true source register is given by split_reg. */
22706 if (split_reg != NULL_RTX)
22707 real = gen_rtx_SET (VOIDmode, SET_DEST (real), split_reg);
22709 RTX_FRAME_RELATED_P (insn) = 1;
22710 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
22712 return insn;
22715 /* Returns an insn that has a vrsave set operation with the
22716 appropriate CLOBBERs. */
22718 static rtx
22719 generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
22721 int nclobs, i;
22722 rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
22723 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
22725 clobs[0]
22726 = gen_rtx_SET (VOIDmode,
22727 vrsave,
22728 gen_rtx_UNSPEC_VOLATILE (SImode,
22729 gen_rtvec (2, reg, vrsave),
22730 UNSPECV_SET_VRSAVE));
22732 nclobs = 1;
22734 /* We need to clobber the registers in the mask so the scheduler
22735 does not move sets to VRSAVE before sets of AltiVec registers.
22737 However, if the function receives nonlocal gotos, reload will set
22738 all call saved registers live. We will end up with:
22740 (set (reg 999) (mem))
22741 (parallel [ (set (reg vrsave) (unspec blah))
22742 (clobber (reg 999))])
22744 The clobber will cause the store into reg 999 to be dead, and
22745 flow will attempt to delete an epilogue insn. In this case, we
22746 need an unspec use/set of the register. */
22748 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
22749 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
22751 if (!epiloguep || call_used_regs [i])
22752 clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode,
22753 gen_rtx_REG (V4SImode, i));
22754 else
22756 rtx reg = gen_rtx_REG (V4SImode, i);
22758 clobs[nclobs++]
22759 = gen_rtx_SET (VOIDmode,
22760 reg,
22761 gen_rtx_UNSPEC (V4SImode,
22762 gen_rtvec (1, reg), 27));
22766 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
22768 for (i = 0; i < nclobs; ++i)
22769 XVECEXP (insn, 0, i) = clobs[i];
22771 return insn;
22774 static rtx
22775 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
22777 rtx addr, mem;
22779 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
22780 mem = gen_frame_mem (GET_MODE (reg), addr);
22781 return gen_rtx_SET (VOIDmode, store ? mem : reg, store ? reg : mem);
22784 static rtx
22785 gen_frame_load (rtx reg, rtx frame_reg, int offset)
22787 return gen_frame_set (reg, frame_reg, offset, false);
22790 static rtx
22791 gen_frame_store (rtx reg, rtx frame_reg, int offset)
22793 return gen_frame_set (reg, frame_reg, offset, true);
22796 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
22797 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
22799 static rtx
22800 emit_frame_save (rtx frame_reg, machine_mode mode,
22801 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
22803 rtx reg, insn;
22805 /* Some cases that need register indexed addressing. */
22806 gcc_checking_assert (!((TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
22807 || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
22808 || (TARGET_E500_DOUBLE && mode == DFmode)
22809 || (TARGET_SPE_ABI
22810 && SPE_VECTOR_MODE (mode)
22811 && !SPE_CONST_OFFSET_OK (offset))));
22813 reg = gen_rtx_REG (mode, regno);
22814 insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
22815 return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
22816 NULL_RTX, NULL_RTX, NULL_RTX);
22819 /* Emit an offset memory reference suitable for a frame store, while
22820 converting to a valid addressing mode. */
22822 static rtx
22823 gen_frame_mem_offset (machine_mode mode, rtx reg, int offset)
22825 rtx int_rtx, offset_rtx;
22827 int_rtx = GEN_INT (offset);
22829 if ((TARGET_SPE_ABI && SPE_VECTOR_MODE (mode) && !SPE_CONST_OFFSET_OK (offset))
22830 || (TARGET_E500_DOUBLE && mode == DFmode))
22832 offset_rtx = gen_rtx_REG (Pmode, FIXED_SCRATCH);
22833 emit_move_insn (offset_rtx, int_rtx);
22835 else
22836 offset_rtx = int_rtx;
22838 return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, offset_rtx));
22841 #ifndef TARGET_FIX_AND_CONTINUE
22842 #define TARGET_FIX_AND_CONTINUE 0
22843 #endif
22845 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
22846 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
22847 #define LAST_SAVRES_REGISTER 31
22848 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
22850 enum {
22851 SAVRES_LR = 0x1,
22852 SAVRES_SAVE = 0x2,
22853 SAVRES_REG = 0x0c,
22854 SAVRES_GPR = 0,
22855 SAVRES_FPR = 4,
22856 SAVRES_VR = 8
22859 static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
22861 /* Temporary holding space for an out-of-line register save/restore
22862 routine name. */
22863 static char savres_routine_name[30];
22865 /* Return the name for an out-of-line register save/restore routine.
22866 We are saving/restoring GPRs if GPR is true. */
22868 static char *
22869 rs6000_savres_routine_name (rs6000_stack_t *info, int regno, int sel)
22871 const char *prefix = "";
22872 const char *suffix = "";
22874 /* Different targets are supposed to define
22875 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
22876 routine name could be defined with:
22878 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
22880 This is a nice idea in practice, but in reality, things are
22881 complicated in several ways:
22883 - ELF targets have save/restore routines for GPRs.
22885 - SPE targets use different prefixes for 32/64-bit registers, and
22886 neither of them fit neatly in the FOO_{PREFIX,SUFFIX} regimen.
22888 - PPC64 ELF targets have routines for save/restore of GPRs that
22889 differ in what they do with the link register, so having a set
22890 prefix doesn't work. (We only use one of the save routines at
22891 the moment, though.)
22893 - PPC32 elf targets have "exit" versions of the restore routines
22894 that restore the link register and can save some extra space.
22895 These require an extra suffix. (There are also "tail" versions
22896 of the restore routines and "GOT" versions of the save routines,
22897 but we don't generate those at present. Same problems apply,
22898 though.)
22900 We deal with all this by synthesizing our own prefix/suffix and
22901 using that for the simple sprintf call shown above. */
22902 if (TARGET_SPE)
22904 /* No floating point saves on the SPE. */
22905 gcc_assert ((sel & SAVRES_REG) == SAVRES_GPR);
22907 if ((sel & SAVRES_SAVE))
22908 prefix = info->spe_64bit_regs_used ? "_save64gpr_" : "_save32gpr_";
22909 else
22910 prefix = info->spe_64bit_regs_used ? "_rest64gpr_" : "_rest32gpr_";
22912 if ((sel & SAVRES_LR))
22913 suffix = "_x";
22915 else if (DEFAULT_ABI == ABI_V4)
22917 if (TARGET_64BIT)
22918 goto aix_names;
22920 if ((sel & SAVRES_REG) == SAVRES_GPR)
22921 prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
22922 else if ((sel & SAVRES_REG) == SAVRES_FPR)
22923 prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
22924 else if ((sel & SAVRES_REG) == SAVRES_VR)
22925 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
22926 else
22927 abort ();
22929 if ((sel & SAVRES_LR))
22930 suffix = "_x";
22932 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
22934 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
22935 /* No out-of-line save/restore routines for GPRs on AIX. */
22936 gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
22937 #endif
22939 aix_names:
22940 if ((sel & SAVRES_REG) == SAVRES_GPR)
22941 prefix = ((sel & SAVRES_SAVE)
22942 ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
22943 : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
22944 else if ((sel & SAVRES_REG) == SAVRES_FPR)
22946 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
22947 if ((sel & SAVRES_LR))
22948 prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
22949 else
22950 #endif
22952 prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
22953 suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
22956 else if ((sel & SAVRES_REG) == SAVRES_VR)
22957 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
22958 else
22959 abort ();
22962 if (DEFAULT_ABI == ABI_DARWIN)
22964 /* The Darwin approach is (slightly) different, in order to be
22965 compatible with code generated by the system toolchain. There is a
22966 single symbol for the start of save sequence, and the code here
22967 embeds an offset into that code on the basis of the first register
22968 to be saved. */
22969 prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
22970 if ((sel & SAVRES_REG) == SAVRES_GPR)
22971 sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
22972 ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
22973 (regno - 13) * 4, prefix, regno);
22974 else if ((sel & SAVRES_REG) == SAVRES_FPR)
22975 sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
22976 (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
22977 else if ((sel & SAVRES_REG) == SAVRES_VR)
22978 sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
22979 (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
22980 else
22981 abort ();
22983 else
22984 sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
22986 return savres_routine_name;
22989 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
22990 We are saving/restoring GPRs if GPR is true. */
22992 static rtx
22993 rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
22995 int regno = ((sel & SAVRES_REG) == SAVRES_GPR
22996 ? info->first_gp_reg_save
22997 : (sel & SAVRES_REG) == SAVRES_FPR
22998 ? info->first_fp_reg_save - 32
22999 : (sel & SAVRES_REG) == SAVRES_VR
23000 ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
23001 : -1);
23002 rtx sym;
23003 int select = sel;
23005 /* On the SPE, we never have any FPRs, but we do have 32/64-bit
23006 versions of the gpr routines. */
23007 if (TARGET_SPE_ABI && (sel & SAVRES_REG) == SAVRES_GPR
23008 && info->spe_64bit_regs_used)
23009 select ^= SAVRES_FPR ^ SAVRES_GPR;
23011 /* Don't generate bogus routine names. */
23012 gcc_assert (FIRST_SAVRES_REGISTER <= regno
23013 && regno <= LAST_SAVRES_REGISTER
23014 && select >= 0 && select <= 12);
23016 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
23018 if (sym == NULL)
23020 char *name;
23022 name = rs6000_savres_routine_name (info, regno, sel);
23024 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
23025 = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
23026 SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
23029 return sym;
23032 /* Emit a sequence of insns, including a stack tie if needed, for
23033 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
23034 reset the stack pointer, but move the base of the frame into
23035 reg UPDT_REGNO for use by out-of-line register restore routines. */
23037 static rtx
23038 rs6000_emit_stack_reset (rs6000_stack_t *info,
23039 rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
23040 unsigned updt_regno)
23042 rtx updt_reg_rtx;
23044 /* This blockage is needed so that sched doesn't decide to move
23045 the sp change before the register restores. */
23046 if (DEFAULT_ABI == ABI_V4
23047 || (TARGET_SPE_ABI
23048 && info->spe_64bit_regs_used != 0
23049 && info->first_gp_reg_save != 32))
23050 rs6000_emit_stack_tie (frame_reg_rtx, frame_pointer_needed);
23052 /* If we are restoring registers out-of-line, we will be using the
23053 "exit" variants of the restore routines, which will reset the
23054 stack for us. But we do need to point updt_reg into the
23055 right place for those routines. */
23056 updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
23058 if (frame_off != 0)
23059 return emit_insn (gen_add3_insn (updt_reg_rtx,
23060 frame_reg_rtx, GEN_INT (frame_off)));
23061 else if (REGNO (frame_reg_rtx) != updt_regno)
23062 return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
23064 return NULL_RTX;
23067 /* Return the register number used as a pointer by out-of-line
23068 save/restore functions. */
23070 static inline unsigned
23071 ptr_regno_for_savres (int sel)
23073 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
23074 return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
23075 return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
23078 /* Construct a parallel rtx describing the effect of a call to an
23079 out-of-line register save/restore routine, and emit the insn
23080 or jump_insn as appropriate. */
23082 static rtx
23083 rs6000_emit_savres_rtx (rs6000_stack_t *info,
23084 rtx frame_reg_rtx, int save_area_offset, int lr_offset,
23085 machine_mode reg_mode, int sel)
23087 int i;
23088 int offset, start_reg, end_reg, n_regs, use_reg;
23089 int reg_size = GET_MODE_SIZE (reg_mode);
23090 rtx sym;
23091 rtvec p;
23092 rtx par, insn;
23094 offset = 0;
23095 start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
23096 ? info->first_gp_reg_save
23097 : (sel & SAVRES_REG) == SAVRES_FPR
23098 ? info->first_fp_reg_save
23099 : (sel & SAVRES_REG) == SAVRES_VR
23100 ? info->first_altivec_reg_save
23101 : -1);
23102 end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
23103 ? 32
23104 : (sel & SAVRES_REG) == SAVRES_FPR
23105 ? 64
23106 : (sel & SAVRES_REG) == SAVRES_VR
23107 ? LAST_ALTIVEC_REGNO + 1
23108 : -1);
23109 n_regs = end_reg - start_reg;
23110 p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
23111 + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
23112 + n_regs);
23114 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
23115 RTVEC_ELT (p, offset++) = ret_rtx;
23117 RTVEC_ELT (p, offset++)
23118 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
23120 sym = rs6000_savres_routine_sym (info, sel);
23121 RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
23123 use_reg = ptr_regno_for_savres (sel);
23124 if ((sel & SAVRES_REG) == SAVRES_VR)
23126 /* Vector regs are saved/restored using [reg+reg] addressing. */
23127 RTVEC_ELT (p, offset++)
23128 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg));
23129 RTVEC_ELT (p, offset++)
23130 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
23132 else
23133 RTVEC_ELT (p, offset++)
23134 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
23136 for (i = 0; i < end_reg - start_reg; i++)
23137 RTVEC_ELT (p, i + offset)
23138 = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
23139 frame_reg_rtx, save_area_offset + reg_size * i,
23140 (sel & SAVRES_SAVE) != 0);
23142 if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
23143 RTVEC_ELT (p, i + offset)
23144 = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
23146 par = gen_rtx_PARALLEL (VOIDmode, p);
23148 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
23150 insn = emit_jump_insn (par);
23151 JUMP_LABEL (insn) = ret_rtx;
23153 else
23154 insn = emit_insn (par);
23155 return insn;
23158 /* Emit code to store CR fields that need to be saved into REG. */
23160 static void
23161 rs6000_emit_move_from_cr (rtx reg)
23163 /* Only the ELFv2 ABI allows storing only selected fields. */
23164 if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
23166 int i, cr_reg[8], count = 0;
23168 /* Collect CR fields that must be saved. */
23169 for (i = 0; i < 8; i++)
23170 if (save_reg_p (CR0_REGNO + i))
23171 cr_reg[count++] = i;
23173 /* If it's just a single one, use mfcrf. */
23174 if (count == 1)
23176 rtvec p = rtvec_alloc (1);
23177 rtvec r = rtvec_alloc (2);
23178 RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
23179 RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
23180 RTVEC_ELT (p, 0)
23181 = gen_rtx_SET (VOIDmode, reg,
23182 gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
23184 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
23185 return;
23188 /* ??? It might be better to handle count == 2 / 3 cases here
23189 as well, using logical operations to combine the values. */
23192 emit_insn (gen_movesi_from_cr (reg));
23195 /* Determine whether the gp REG is really used. */
23197 static bool
23198 rs6000_reg_live_or_pic_offset_p (int reg)
23200 /* If the function calls eh_return, claim used all the registers that would
23201 be checked for liveness otherwise. This is required for the PIC offset
23202 register with -mminimal-toc on AIX, as it is advertised as "fixed" for
23203 register allocation purposes in this case. */
23205 return (((crtl->calls_eh_return || df_regs_ever_live_p (reg))
23206 && (!call_used_regs[reg]
23207 || (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
23208 && !TARGET_SINGLE_PIC_BASE
23209 && TARGET_TOC && TARGET_MINIMAL_TOC)))
23210 || (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
23211 && !TARGET_SINGLE_PIC_BASE
23212 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
23213 || (DEFAULT_ABI == ABI_DARWIN && flag_pic))));
23216 /* Emit function prologue as insns. */
23218 void
23219 rs6000_emit_prologue (void)
23221 rs6000_stack_t *info = rs6000_stack_info ();
23222 machine_mode reg_mode = Pmode;
23223 int reg_size = TARGET_32BIT ? 4 : 8;
23224 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
23225 rtx frame_reg_rtx = sp_reg_rtx;
23226 unsigned int cr_save_regno;
23227 rtx cr_save_rtx = NULL_RTX;
23228 rtx insn;
23229 int strategy;
23230 int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
23231 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
23232 && call_used_regs[STATIC_CHAIN_REGNUM]);
23233 /* Offset to top of frame for frame_reg and sp respectively. */
23234 HOST_WIDE_INT frame_off = 0;
23235 HOST_WIDE_INT sp_off = 0;
23237 #ifdef ENABLE_CHECKING
23238 /* Track and check usage of r0, r11, r12. */
23239 int reg_inuse = using_static_chain_p ? 1 << 11 : 0;
23240 #define START_USE(R) do \
23242 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
23243 reg_inuse |= 1 << (R); \
23244 } while (0)
23245 #define END_USE(R) do \
23247 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
23248 reg_inuse &= ~(1 << (R)); \
23249 } while (0)
23250 #define NOT_INUSE(R) do \
23252 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
23253 } while (0)
23254 #else
23255 #define START_USE(R) do {} while (0)
23256 #define END_USE(R) do {} while (0)
23257 #define NOT_INUSE(R) do {} while (0)
23258 #endif
23260 if (DEFAULT_ABI == ABI_ELFv2)
23262 cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
23264 /* With -mminimal-toc we may generate an extra use of r2 below. */
23265 if (!TARGET_SINGLE_PIC_BASE
23266 && TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
23267 cfun->machine->r2_setup_needed = true;
23271 if (flag_stack_usage_info)
23272 current_function_static_stack_size = info->total_size;
23274 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
23276 HOST_WIDE_INT size = info->total_size;
23278 if (crtl->is_leaf && !cfun->calls_alloca)
23280 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
23281 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT,
23282 size - STACK_CHECK_PROTECT);
23284 else if (size > 0)
23285 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
23288 if (TARGET_FIX_AND_CONTINUE)
23290 /* gdb on darwin arranges to forward a function from the old
23291 address by modifying the first 5 instructions of the function
23292 to branch to the overriding function. This is necessary to
23293 permit function pointers that point to the old function to
23294 actually forward to the new function. */
23295 emit_insn (gen_nop ());
23296 emit_insn (gen_nop ());
23297 emit_insn (gen_nop ());
23298 emit_insn (gen_nop ());
23299 emit_insn (gen_nop ());
23302 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
23304 reg_mode = V2SImode;
23305 reg_size = 8;
23308 /* Handle world saves specially here. */
23309 if (WORLD_SAVE_P (info))
23311 int i, j, sz;
23312 rtx treg;
23313 rtvec p;
23314 rtx reg0;
23316 /* save_world expects lr in r0. */
23317 reg0 = gen_rtx_REG (Pmode, 0);
23318 if (info->lr_save_p)
23320 insn = emit_move_insn (reg0,
23321 gen_rtx_REG (Pmode, LR_REGNO));
23322 RTX_FRAME_RELATED_P (insn) = 1;
23325 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
23326 assumptions about the offsets of various bits of the stack
23327 frame. */
23328 gcc_assert (info->gp_save_offset == -220
23329 && info->fp_save_offset == -144
23330 && info->lr_save_offset == 8
23331 && info->cr_save_offset == 4
23332 && info->push_p
23333 && info->lr_save_p
23334 && (!crtl->calls_eh_return
23335 || info->ehrd_offset == -432)
23336 && info->vrsave_save_offset == -224
23337 && info->altivec_save_offset == -416);
23339 treg = gen_rtx_REG (SImode, 11);
23340 emit_move_insn (treg, GEN_INT (-info->total_size));
23342 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
23343 in R11. It also clobbers R12, so beware! */
23345 /* Preserve CR2 for save_world prologues */
23346 sz = 5;
23347 sz += 32 - info->first_gp_reg_save;
23348 sz += 64 - info->first_fp_reg_save;
23349 sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
23350 p = rtvec_alloc (sz);
23351 j = 0;
23352 RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
23353 gen_rtx_REG (SImode,
23354 LR_REGNO));
23355 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
23356 gen_rtx_SYMBOL_REF (Pmode,
23357 "*save_world"));
23358 /* We do floats first so that the instruction pattern matches
23359 properly. */
23360 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
23361 RTVEC_ELT (p, j++)
23362 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
23363 ? DFmode : SFmode,
23364 info->first_fp_reg_save + i),
23365 frame_reg_rtx,
23366 info->fp_save_offset + frame_off + 8 * i);
23367 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
23368 RTVEC_ELT (p, j++)
23369 = gen_frame_store (gen_rtx_REG (V4SImode,
23370 info->first_altivec_reg_save + i),
23371 frame_reg_rtx,
23372 info->altivec_save_offset + frame_off + 16 * i);
23373 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23374 RTVEC_ELT (p, j++)
23375 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
23376 frame_reg_rtx,
23377 info->gp_save_offset + frame_off + reg_size * i);
23379 /* CR register traditionally saved as CR2. */
23380 RTVEC_ELT (p, j++)
23381 = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO),
23382 frame_reg_rtx, info->cr_save_offset + frame_off);
23383 /* Explain about use of R0. */
23384 if (info->lr_save_p)
23385 RTVEC_ELT (p, j++)
23386 = gen_frame_store (reg0,
23387 frame_reg_rtx, info->lr_save_offset + frame_off);
23388 /* Explain what happens to the stack pointer. */
23390 rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
23391 RTVEC_ELT (p, j++) = gen_rtx_SET (VOIDmode, sp_reg_rtx, newval);
23394 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
23395 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
23396 treg, GEN_INT (-info->total_size), NULL_RTX);
23397 sp_off = frame_off = info->total_size;
23400 strategy = info->savres_strategy;
23402 /* For V.4, update stack before we do any saving and set back pointer. */
23403 if (! WORLD_SAVE_P (info)
23404 && info->push_p
23405 && (DEFAULT_ABI == ABI_V4
23406 || crtl->calls_eh_return))
23408 bool need_r11 = (TARGET_SPE
23409 ? (!(strategy & SAVE_INLINE_GPRS)
23410 && info->spe_64bit_regs_used == 0)
23411 : (!(strategy & SAVE_INLINE_FPRS)
23412 || !(strategy & SAVE_INLINE_GPRS)
23413 || !(strategy & SAVE_INLINE_VRS)));
23414 int ptr_regno = -1;
23415 rtx ptr_reg = NULL_RTX;
23416 int ptr_off = 0;
23418 if (info->total_size < 32767)
23419 frame_off = info->total_size;
23420 else if (need_r11)
23421 ptr_regno = 11;
23422 else if (info->cr_save_p
23423 || info->lr_save_p
23424 || info->first_fp_reg_save < 64
23425 || info->first_gp_reg_save < 32
23426 || info->altivec_size != 0
23427 || info->vrsave_mask != 0
23428 || crtl->calls_eh_return)
23429 ptr_regno = 12;
23430 else
23432 /* The prologue won't be saving any regs so there is no need
23433 to set up a frame register to access any frame save area.
23434 We also won't be using frame_off anywhere below, but set
23435 the correct value anyway to protect against future
23436 changes to this function. */
23437 frame_off = info->total_size;
23439 if (ptr_regno != -1)
23441 /* Set up the frame offset to that needed by the first
23442 out-of-line save function. */
23443 START_USE (ptr_regno);
23444 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
23445 frame_reg_rtx = ptr_reg;
23446 if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0)
23447 gcc_checking_assert (info->fp_save_offset + info->fp_size == 0);
23448 else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32)
23449 ptr_off = info->gp_save_offset + info->gp_size;
23450 else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0)
23451 ptr_off = info->altivec_save_offset + info->altivec_size;
23452 frame_off = -ptr_off;
23454 rs6000_emit_allocate_stack (info->total_size, ptr_reg, ptr_off);
23455 sp_off = info->total_size;
23456 if (frame_reg_rtx != sp_reg_rtx)
23457 rs6000_emit_stack_tie (frame_reg_rtx, false);
23460 /* If we use the link register, get it into r0. */
23461 if (!WORLD_SAVE_P (info) && info->lr_save_p)
23463 rtx addr, reg, mem;
23465 reg = gen_rtx_REG (Pmode, 0);
23466 START_USE (0);
23467 insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
23468 RTX_FRAME_RELATED_P (insn) = 1;
23470 if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
23471 | SAVE_NOINLINE_FPRS_SAVES_LR)))
23473 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
23474 GEN_INT (info->lr_save_offset + frame_off));
23475 mem = gen_rtx_MEM (Pmode, addr);
23476 /* This should not be of rs6000_sr_alias_set, because of
23477 __builtin_return_address. */
23479 insn = emit_move_insn (mem, reg);
23480 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
23481 NULL_RTX, NULL_RTX, NULL_RTX);
23482 END_USE (0);
23486 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
23487 r12 will be needed by out-of-line gpr restore. */
23488 cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
23489 && !(strategy & (SAVE_INLINE_GPRS
23490 | SAVE_NOINLINE_GPRS_SAVES_LR))
23491 ? 11 : 12);
23492 if (!WORLD_SAVE_P (info)
23493 && info->cr_save_p
23494 && REGNO (frame_reg_rtx) != cr_save_regno
23495 && !(using_static_chain_p && cr_save_regno == 11))
23497 cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
23498 START_USE (cr_save_regno);
23499 rs6000_emit_move_from_cr (cr_save_rtx);
23502 /* Do any required saving of fpr's. If only one or two to save, do
23503 it ourselves. Otherwise, call function. */
23504 if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
23506 int i;
23507 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
23508 if (save_reg_p (info->first_fp_reg_save + i))
23509 emit_frame_save (frame_reg_rtx,
23510 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
23511 ? DFmode : SFmode),
23512 info->first_fp_reg_save + i,
23513 info->fp_save_offset + frame_off + 8 * i,
23514 sp_off - frame_off);
23516 else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
23518 bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
23519 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
23520 unsigned ptr_regno = ptr_regno_for_savres (sel);
23521 rtx ptr_reg = frame_reg_rtx;
23523 if (REGNO (frame_reg_rtx) == ptr_regno)
23524 gcc_checking_assert (frame_off == 0);
23525 else
23527 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
23528 NOT_INUSE (ptr_regno);
23529 emit_insn (gen_add3_insn (ptr_reg,
23530 frame_reg_rtx, GEN_INT (frame_off)));
23532 insn = rs6000_emit_savres_rtx (info, ptr_reg,
23533 info->fp_save_offset,
23534 info->lr_save_offset,
23535 DFmode, sel);
23536 rs6000_frame_related (insn, ptr_reg, sp_off,
23537 NULL_RTX, NULL_RTX, NULL_RTX);
23538 if (lr)
23539 END_USE (0);
23542 /* Save GPRs. This is done as a PARALLEL if we are using
23543 the store-multiple instructions. */
23544 if (!WORLD_SAVE_P (info)
23545 && TARGET_SPE_ABI
23546 && info->spe_64bit_regs_used != 0
23547 && info->first_gp_reg_save != 32)
23549 int i;
23550 rtx spe_save_area_ptr;
23551 HOST_WIDE_INT save_off;
23552 int ool_adjust = 0;
23554 /* Determine whether we can address all of the registers that need
23555 to be saved with an offset from frame_reg_rtx that fits in
23556 the small const field for SPE memory instructions. */
23557 int spe_regs_addressable
23558 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
23559 + reg_size * (32 - info->first_gp_reg_save - 1))
23560 && (strategy & SAVE_INLINE_GPRS));
23562 if (spe_regs_addressable)
23564 spe_save_area_ptr = frame_reg_rtx;
23565 save_off = frame_off;
23567 else
23569 /* Make r11 point to the start of the SPE save area. We need
23570 to be careful here if r11 is holding the static chain. If
23571 it is, then temporarily save it in r0. */
23572 HOST_WIDE_INT offset;
23574 if (!(strategy & SAVE_INLINE_GPRS))
23575 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
23576 offset = info->spe_gp_save_offset + frame_off - ool_adjust;
23577 spe_save_area_ptr = gen_rtx_REG (Pmode, 11);
23578 save_off = frame_off - offset;
23580 if (using_static_chain_p)
23582 rtx r0 = gen_rtx_REG (Pmode, 0);
23584 START_USE (0);
23585 gcc_assert (info->first_gp_reg_save > 11);
23587 emit_move_insn (r0, spe_save_area_ptr);
23589 else if (REGNO (frame_reg_rtx) != 11)
23590 START_USE (11);
23592 emit_insn (gen_addsi3 (spe_save_area_ptr,
23593 frame_reg_rtx, GEN_INT (offset)));
23594 if (!using_static_chain_p && REGNO (frame_reg_rtx) == 11)
23595 frame_off = -info->spe_gp_save_offset + ool_adjust;
23598 if ((strategy & SAVE_INLINE_GPRS))
23600 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23601 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
23602 emit_frame_save (spe_save_area_ptr, reg_mode,
23603 info->first_gp_reg_save + i,
23604 (info->spe_gp_save_offset + save_off
23605 + reg_size * i),
23606 sp_off - save_off);
23608 else
23610 insn = rs6000_emit_savres_rtx (info, spe_save_area_ptr,
23611 info->spe_gp_save_offset + save_off,
23612 0, reg_mode,
23613 SAVRES_SAVE | SAVRES_GPR);
23615 rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off,
23616 NULL_RTX, NULL_RTX, NULL_RTX);
23619 /* Move the static chain pointer back. */
23620 if (!spe_regs_addressable)
23622 if (using_static_chain_p)
23624 emit_move_insn (spe_save_area_ptr, gen_rtx_REG (Pmode, 0));
23625 END_USE (0);
23627 else if (REGNO (frame_reg_rtx) != 11)
23628 END_USE (11);
23631 else if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS))
23633 bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0;
23634 int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0);
23635 unsigned ptr_regno = ptr_regno_for_savres (sel);
23636 rtx ptr_reg = frame_reg_rtx;
23637 bool ptr_set_up = REGNO (ptr_reg) == ptr_regno;
23638 int end_save = info->gp_save_offset + info->gp_size;
23639 int ptr_off;
23641 if (!ptr_set_up)
23642 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
23644 /* Need to adjust r11 (r12) if we saved any FPRs. */
23645 if (end_save + frame_off != 0)
23647 rtx offset = GEN_INT (end_save + frame_off);
23649 if (ptr_set_up)
23650 frame_off = -end_save;
23651 else
23652 NOT_INUSE (ptr_regno);
23653 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
23655 else if (!ptr_set_up)
23657 NOT_INUSE (ptr_regno);
23658 emit_move_insn (ptr_reg, frame_reg_rtx);
23660 ptr_off = -end_save;
23661 insn = rs6000_emit_savres_rtx (info, ptr_reg,
23662 info->gp_save_offset + ptr_off,
23663 info->lr_save_offset + ptr_off,
23664 reg_mode, sel);
23665 rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
23666 NULL_RTX, NULL_RTX, NULL_RTX);
23667 if (lr)
23668 END_USE (0);
23670 else if (!WORLD_SAVE_P (info) && (strategy & SAVRES_MULTIPLE))
23672 rtvec p;
23673 int i;
23674 p = rtvec_alloc (32 - info->first_gp_reg_save);
23675 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23676 RTVEC_ELT (p, i)
23677 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
23678 frame_reg_rtx,
23679 info->gp_save_offset + frame_off + reg_size * i);
23680 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
23681 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
23682 NULL_RTX, NULL_RTX, NULL_RTX);
23684 else if (!WORLD_SAVE_P (info))
23686 int i;
23687 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23688 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
23689 emit_frame_save (frame_reg_rtx, reg_mode,
23690 info->first_gp_reg_save + i,
23691 info->gp_save_offset + frame_off + reg_size * i,
23692 sp_off - frame_off);
23695 if (crtl->calls_eh_return)
23697 unsigned int i;
23698 rtvec p;
23700 for (i = 0; ; ++i)
23702 unsigned int regno = EH_RETURN_DATA_REGNO (i);
23703 if (regno == INVALID_REGNUM)
23704 break;
23707 p = rtvec_alloc (i);
23709 for (i = 0; ; ++i)
23711 unsigned int regno = EH_RETURN_DATA_REGNO (i);
23712 if (regno == INVALID_REGNUM)
23713 break;
23715 insn
23716 = gen_frame_store (gen_rtx_REG (reg_mode, regno),
23717 sp_reg_rtx,
23718 info->ehrd_offset + sp_off + reg_size * (int) i);
23719 RTVEC_ELT (p, i) = insn;
23720 RTX_FRAME_RELATED_P (insn) = 1;
23723 insn = emit_insn (gen_blockage ());
23724 RTX_FRAME_RELATED_P (insn) = 1;
23725 add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p));
23728 /* In AIX ABI we need to make sure r2 is really saved. */
23729 if (TARGET_AIX && crtl->calls_eh_return)
23731 rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
23732 rtx save_insn, join_insn, note;
23733 long toc_restore_insn;
23735 tmp_reg = gen_rtx_REG (Pmode, 11);
23736 tmp_reg_si = gen_rtx_REG (SImode, 11);
23737 if (using_static_chain_p)
23739 START_USE (0);
23740 emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
23742 else
23743 START_USE (11);
23744 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
23745 /* Peek at instruction to which this function returns. If it's
23746 restoring r2, then we know we've already saved r2. We can't
23747 unconditionally save r2 because the value we have will already
23748 be updated if we arrived at this function via a plt call or
23749 toc adjusting stub. */
23750 emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
23751 toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
23752 + RS6000_TOC_SAVE_SLOT);
23753 hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
23754 emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
23755 compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
23756 validate_condition_mode (EQ, CCUNSmode);
23757 lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
23758 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
23759 gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
23760 toc_save_done = gen_label_rtx ();
23761 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
23762 gen_rtx_EQ (VOIDmode, compare_result,
23763 const0_rtx),
23764 gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
23765 pc_rtx);
23766 jump = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, jump));
23767 JUMP_LABEL (jump) = toc_save_done;
23768 LABEL_NUSES (toc_save_done) += 1;
23770 save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
23771 TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
23772 sp_off - frame_off);
23774 emit_label (toc_save_done);
23776 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
23777 have a CFG that has different saves along different paths.
23778 Move the note to a dummy blockage insn, which describes that
23779 R2 is unconditionally saved after the label. */
23780 /* ??? An alternate representation might be a special insn pattern
23781 containing both the branch and the store. That might let the
23782 code that minimizes the number of DW_CFA_advance opcodes better
23783 freedom in placing the annotations. */
23784 note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL);
23785 if (note)
23786 remove_note (save_insn, note);
23787 else
23788 note = alloc_reg_note (REG_FRAME_RELATED_EXPR,
23789 copy_rtx (PATTERN (save_insn)), NULL_RTX);
23790 RTX_FRAME_RELATED_P (save_insn) = 0;
23792 join_insn = emit_insn (gen_blockage ());
23793 REG_NOTES (join_insn) = note;
23794 RTX_FRAME_RELATED_P (join_insn) = 1;
23796 if (using_static_chain_p)
23798 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
23799 END_USE (0);
23801 else
23802 END_USE (11);
23805 /* Save CR if we use any that must be preserved. */
23806 if (!WORLD_SAVE_P (info) && info->cr_save_p)
23808 rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
23809 GEN_INT (info->cr_save_offset + frame_off));
23810 rtx mem = gen_frame_mem (SImode, addr);
23812 /* If we didn't copy cr before, do so now using r0. */
23813 if (cr_save_rtx == NULL_RTX)
23815 START_USE (0);
23816 cr_save_rtx = gen_rtx_REG (SImode, 0);
23817 rs6000_emit_move_from_cr (cr_save_rtx);
23820 /* Saving CR requires a two-instruction sequence: one instruction
23821 to move the CR to a general-purpose register, and a second
23822 instruction that stores the GPR to memory.
23824 We do not emit any DWARF CFI records for the first of these,
23825 because we cannot properly represent the fact that CR is saved in
23826 a register. One reason is that we cannot express that multiple
23827 CR fields are saved; another reason is that on 64-bit, the size
23828 of the CR register in DWARF (4 bytes) differs from the size of
23829 a general-purpose register.
23831 This means if any intervening instruction were to clobber one of
23832 the call-saved CR fields, we'd have incorrect CFI. To prevent
23833 this from happening, we mark the store to memory as a use of
23834 those CR fields, which prevents any such instruction from being
23835 scheduled in between the two instructions. */
23836 rtx crsave_v[9];
23837 int n_crsave = 0;
23838 int i;
23840 crsave_v[n_crsave++] = gen_rtx_SET (VOIDmode, mem, cr_save_rtx);
23841 for (i = 0; i < 8; i++)
23842 if (save_reg_p (CR0_REGNO + i))
23843 crsave_v[n_crsave++]
23844 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
23846 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
23847 gen_rtvec_v (n_crsave, crsave_v)));
23848 END_USE (REGNO (cr_save_rtx));
23850 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
23851 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
23852 so we need to construct a frame expression manually. */
23853 RTX_FRAME_RELATED_P (insn) = 1;
23855 /* Update address to be stack-pointer relative, like
23856 rs6000_frame_related would do. */
23857 addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
23858 GEN_INT (info->cr_save_offset + sp_off));
23859 mem = gen_frame_mem (SImode, addr);
23861 if (DEFAULT_ABI == ABI_ELFv2)
23863 /* In the ELFv2 ABI we generate separate CFI records for each
23864 CR field that was actually saved. They all point to the
23865 same 32-bit stack slot. */
23866 rtx crframe[8];
23867 int n_crframe = 0;
23869 for (i = 0; i < 8; i++)
23870 if (save_reg_p (CR0_REGNO + i))
23872 crframe[n_crframe]
23873 = gen_rtx_SET (VOIDmode, mem,
23874 gen_rtx_REG (SImode, CR0_REGNO + i));
23876 RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
23877 n_crframe++;
23880 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
23881 gen_rtx_PARALLEL (VOIDmode,
23882 gen_rtvec_v (n_crframe, crframe)));
23884 else
23886 /* In other ABIs, by convention, we use a single CR regnum to
23887 represent the fact that all call-saved CR fields are saved.
23888 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
23889 rtx set = gen_rtx_SET (VOIDmode, mem,
23890 gen_rtx_REG (SImode, CR2_REGNO));
23891 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
23895 /* In the ELFv2 ABI we need to save all call-saved CR fields into
23896 *separate* slots if the routine calls __builtin_eh_return, so
23897 that they can be independently restored by the unwinder. */
23898 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
23900 int i, cr_off = info->ehcr_offset;
23901 rtx crsave;
23903 /* ??? We might get better performance by using multiple mfocrf
23904 instructions. */
23905 crsave = gen_rtx_REG (SImode, 0);
23906 emit_insn (gen_movesi_from_cr (crsave));
23908 for (i = 0; i < 8; i++)
23909 if (!call_used_regs[CR0_REGNO + i])
23911 rtvec p = rtvec_alloc (2);
23912 RTVEC_ELT (p, 0)
23913 = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
23914 RTVEC_ELT (p, 1)
23915 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
23917 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
23919 RTX_FRAME_RELATED_P (insn) = 1;
23920 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
23921 gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
23922 sp_reg_rtx, cr_off + sp_off));
23924 cr_off += reg_size;
23928 /* Update stack and set back pointer unless this is V.4,
23929 for which it was done previously. */
23930 if (!WORLD_SAVE_P (info) && info->push_p
23931 && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
23933 rtx ptr_reg = NULL;
23934 int ptr_off = 0;
23936 /* If saving altivec regs we need to be able to address all save
23937 locations using a 16-bit offset. */
23938 if ((strategy & SAVE_INLINE_VRS) == 0
23939 || (info->altivec_size != 0
23940 && (info->altivec_save_offset + info->altivec_size - 16
23941 + info->total_size - frame_off) > 32767)
23942 || (info->vrsave_size != 0
23943 && (info->vrsave_save_offset
23944 + info->total_size - frame_off) > 32767))
23946 int sel = SAVRES_SAVE | SAVRES_VR;
23947 unsigned ptr_regno = ptr_regno_for_savres (sel);
23949 if (using_static_chain_p
23950 && ptr_regno == STATIC_CHAIN_REGNUM)
23951 ptr_regno = 12;
23952 if (REGNO (frame_reg_rtx) != ptr_regno)
23953 START_USE (ptr_regno);
23954 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
23955 frame_reg_rtx = ptr_reg;
23956 ptr_off = info->altivec_save_offset + info->altivec_size;
23957 frame_off = -ptr_off;
23959 else if (REGNO (frame_reg_rtx) == 1)
23960 frame_off = info->total_size;
23961 rs6000_emit_allocate_stack (info->total_size, ptr_reg, ptr_off);
23962 sp_off = info->total_size;
23963 if (frame_reg_rtx != sp_reg_rtx)
23964 rs6000_emit_stack_tie (frame_reg_rtx, false);
23967 /* Set frame pointer, if needed. */
23968 if (frame_pointer_needed)
23970 insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
23971 sp_reg_rtx);
23972 RTX_FRAME_RELATED_P (insn) = 1;
23975 /* Save AltiVec registers if needed. Save here because the red zone does
23976 not always include AltiVec registers. */
23977 if (!WORLD_SAVE_P (info) && TARGET_ALTIVEC_ABI
23978 && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0)
23980 int end_save = info->altivec_save_offset + info->altivec_size;
23981 int ptr_off;
23982 /* Oddly, the vector save/restore functions point r0 at the end
23983 of the save area, then use r11 or r12 to load offsets for
23984 [reg+reg] addressing. */
23985 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
23986 int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR);
23987 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
23989 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
23990 NOT_INUSE (0);
23991 if (end_save + frame_off != 0)
23993 rtx offset = GEN_INT (end_save + frame_off);
23995 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
23997 else
23998 emit_move_insn (ptr_reg, frame_reg_rtx);
24000 ptr_off = -end_save;
24001 insn = rs6000_emit_savres_rtx (info, scratch_reg,
24002 info->altivec_save_offset + ptr_off,
24003 0, V4SImode, SAVRES_SAVE | SAVRES_VR);
24004 rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
24005 NULL_RTX, NULL_RTX, NULL_RTX);
24006 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
24008 /* The oddity mentioned above clobbered our frame reg. */
24009 emit_move_insn (frame_reg_rtx, ptr_reg);
24010 frame_off = ptr_off;
24013 else if (!WORLD_SAVE_P (info) && TARGET_ALTIVEC_ABI
24014 && info->altivec_size != 0)
24016 int i;
24018 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24019 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
24021 rtx areg, savereg, mem, split_reg;
24022 int offset;
24024 offset = (info->altivec_save_offset + frame_off
24025 + 16 * (i - info->first_altivec_reg_save));
24027 savereg = gen_rtx_REG (V4SImode, i);
24029 NOT_INUSE (0);
24030 areg = gen_rtx_REG (Pmode, 0);
24031 emit_move_insn (areg, GEN_INT (offset));
24033 /* AltiVec addressing mode is [reg+reg]. */
24034 mem = gen_frame_mem (V4SImode,
24035 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
24037 insn = emit_move_insn (mem, savereg);
24039 /* When we split a VSX store into two insns, we need to make
24040 sure the DWARF info knows which register we are storing.
24041 Pass it in to be used on the appropriate note. */
24042 if (!BYTES_BIG_ENDIAN
24043 && GET_CODE (PATTERN (insn)) == SET
24044 && GET_CODE (SET_SRC (PATTERN (insn))) == VEC_SELECT)
24045 split_reg = savereg;
24046 else
24047 split_reg = NULL_RTX;
24049 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
24050 areg, GEN_INT (offset), split_reg);
24054 /* VRSAVE is a bit vector representing which AltiVec registers
24055 are used. The OS uses this to determine which vector
24056 registers to save on a context switch. We need to save
24057 VRSAVE on the stack frame, add whatever AltiVec registers we
24058 used in this function, and do the corresponding magic in the
24059 epilogue. */
24061 if (!WORLD_SAVE_P (info)
24062 && TARGET_ALTIVEC
24063 && TARGET_ALTIVEC_VRSAVE
24064 && info->vrsave_mask != 0)
24066 rtx reg, vrsave;
24067 int offset;
24068 int save_regno;
24070 /* Get VRSAVE onto a GPR. Note that ABI_V4 and ABI_DARWIN might
24071 be using r12 as frame_reg_rtx and r11 as the static chain
24072 pointer for nested functions. */
24073 save_regno = 12;
24074 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
24075 && !using_static_chain_p)
24076 save_regno = 11;
24077 else if (REGNO (frame_reg_rtx) == 12)
24079 save_regno = 11;
24080 if (using_static_chain_p)
24081 save_regno = 0;
24084 NOT_INUSE (save_regno);
24085 reg = gen_rtx_REG (SImode, save_regno);
24086 vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
24087 if (TARGET_MACHO)
24088 emit_insn (gen_get_vrsave_internal (reg));
24089 else
24090 emit_insn (gen_rtx_SET (VOIDmode, reg, vrsave));
24092 /* Save VRSAVE. */
24093 offset = info->vrsave_save_offset + frame_off;
24094 insn = emit_insn (gen_frame_store (reg, frame_reg_rtx, offset));
24096 /* Include the registers in the mask. */
24097 emit_insn (gen_iorsi3 (reg, reg, GEN_INT ((int) info->vrsave_mask)));
24099 insn = emit_insn (generate_set_vrsave (reg, info, 0));
24102 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
24103 if (!TARGET_SINGLE_PIC_BASE
24104 && ((TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
24105 || (DEFAULT_ABI == ABI_V4
24106 && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
24107 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
24109 /* If emit_load_toc_table will use the link register, we need to save
24110 it. We use R12 for this purpose because emit_load_toc_table
24111 can use register 0. This allows us to use a plain 'blr' to return
24112 from the procedure more often. */
24113 int save_LR_around_toc_setup = (TARGET_ELF
24114 && DEFAULT_ABI == ABI_V4
24115 && flag_pic
24116 && ! info->lr_save_p
24117 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0);
24118 if (save_LR_around_toc_setup)
24120 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
24121 rtx tmp = gen_rtx_REG (Pmode, 12);
24123 insn = emit_move_insn (tmp, lr);
24124 RTX_FRAME_RELATED_P (insn) = 1;
24126 rs6000_emit_load_toc_table (TRUE);
24128 insn = emit_move_insn (lr, tmp);
24129 add_reg_note (insn, REG_CFA_RESTORE, lr);
24130 RTX_FRAME_RELATED_P (insn) = 1;
24132 else
24133 rs6000_emit_load_toc_table (TRUE);
24136 #if TARGET_MACHO
24137 if (!TARGET_SINGLE_PIC_BASE
24138 && DEFAULT_ABI == ABI_DARWIN
24139 && flag_pic && crtl->uses_pic_offset_table)
24141 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
24142 rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
24144 /* Save and restore LR locally around this call (in R0). */
24145 if (!info->lr_save_p)
24146 emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
24148 emit_insn (gen_load_macho_picbase (src));
24150 emit_move_insn (gen_rtx_REG (Pmode,
24151 RS6000_PIC_OFFSET_TABLE_REGNUM),
24152 lr);
24154 if (!info->lr_save_p)
24155 emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
24157 #endif
24159 /* If we need to, save the TOC register after doing the stack setup.
24160 Do not emit eh frame info for this save. The unwinder wants info,
24161 conceptually attached to instructions in this function, about
24162 register values in the caller of this function. This R2 may have
24163 already been changed from the value in the caller.
24164 We don't attempt to write accurate DWARF EH frame info for R2
24165 because code emitted by gcc for a (non-pointer) function call
24166 doesn't save and restore R2. Instead, R2 is managed out-of-line
24167 by a linker generated plt call stub when the function resides in
24168 a shared library. This behaviour is costly to describe in DWARF,
24169 both in terms of the size of DWARF info and the time taken in the
24170 unwinder to interpret it. R2 changes, apart from the
24171 calls_eh_return case earlier in this function, are handled by
24172 linux-unwind.h frob_update_context. */
24173 if (rs6000_save_toc_in_prologue_p ())
24175 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
24176 emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
24180 /* Write function prologue. */
24182 static void
24183 rs6000_output_function_prologue (FILE *file,
24184 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
24186 rs6000_stack_t *info = rs6000_stack_info ();
24188 if (TARGET_DEBUG_STACK)
24189 debug_stack_info (info);
24191 /* Write .extern for any function we will call to save and restore
24192 fp values. */
24193 if (info->first_fp_reg_save < 64
24194 && !TARGET_MACHO
24195 && !TARGET_ELF)
24197 char *name;
24198 int regno = info->first_fp_reg_save - 32;
24200 if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
24202 bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
24203 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
24204 name = rs6000_savres_routine_name (info, regno, sel);
24205 fprintf (file, "\t.extern %s\n", name);
24207 if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
24209 bool lr = (info->savres_strategy
24210 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
24211 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
24212 name = rs6000_savres_routine_name (info, regno, sel);
24213 fprintf (file, "\t.extern %s\n", name);
24217 /* ELFv2 ABI r2 setup code and local entry point. This must follow
24218 immediately after the global entry point label. */
24219 if (DEFAULT_ABI == ABI_ELFv2 && cfun->machine->r2_setup_needed)
24221 const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
24223 fprintf (file, "0:\taddis 2,12,.TOC.-0b@ha\n");
24224 fprintf (file, "\taddi 2,2,.TOC.-0b@l\n");
24226 fputs ("\t.localentry\t", file);
24227 assemble_name (file, name);
24228 fputs (",.-", file);
24229 assemble_name (file, name);
24230 fputs ("\n", file);
24233 /* Output -mprofile-kernel code. This needs to be done here instead of
24234 in output_function_profile since it must go after the ELFv2 ABI
24235 local entry point. */
24236 if (TARGET_PROFILE_KERNEL && crtl->profile)
24238 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
24239 gcc_assert (!TARGET_32BIT);
24241 asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
24242 asm_fprintf (file, "\tstd %s,16(%s)\n", reg_names[0], reg_names[1]);
24244 /* In the ELFv2 ABI we have no compiler stack word. It must be
24245 the resposibility of _mcount to preserve the static chain
24246 register if required. */
24247 if (DEFAULT_ABI != ABI_ELFv2
24248 && cfun->static_chain_decl != NULL)
24250 asm_fprintf (file, "\tstd %s,24(%s)\n",
24251 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
24252 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
24253 asm_fprintf (file, "\tld %s,24(%s)\n",
24254 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
24256 else
24257 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
24260 rs6000_pic_labelno++;
24263 /* Non-zero if vmx regs are restored before the frame pop, zero if
24264 we restore after the pop when possible. */
24265 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
24267 /* Restoring cr is a two step process: loading a reg from the frame
24268 save, then moving the reg to cr. For ABI_V4 we must let the
24269 unwinder know that the stack location is no longer valid at or
24270 before the stack deallocation, but we can't emit a cfa_restore for
24271 cr at the stack deallocation like we do for other registers.
24272 The trouble is that it is possible for the move to cr to be
24273 scheduled after the stack deallocation. So say exactly where cr
24274 is located on each of the two insns. */
24276 static rtx
24277 load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
24279 rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
24280 rtx reg = gen_rtx_REG (SImode, regno);
24281 rtx_insn *insn = emit_move_insn (reg, mem);
24283 if (!exit_func && DEFAULT_ABI == ABI_V4)
24285 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
24286 rtx set = gen_rtx_SET (VOIDmode, reg, cr);
24288 add_reg_note (insn, REG_CFA_REGISTER, set);
24289 RTX_FRAME_RELATED_P (insn) = 1;
24291 return reg;
24294 /* Reload CR from REG. */
24296 static void
24297 restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
24299 int count = 0;
24300 int i;
24302 if (using_mfcr_multiple)
24304 for (i = 0; i < 8; i++)
24305 if (save_reg_p (CR0_REGNO + i))
24306 count++;
24307 gcc_assert (count);
24310 if (using_mfcr_multiple && count > 1)
24312 rtx_insn *insn;
24313 rtvec p;
24314 int ndx;
24316 p = rtvec_alloc (count);
24318 ndx = 0;
24319 for (i = 0; i < 8; i++)
24320 if (save_reg_p (CR0_REGNO + i))
24322 rtvec r = rtvec_alloc (2);
24323 RTVEC_ELT (r, 0) = reg;
24324 RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
24325 RTVEC_ELT (p, ndx) =
24326 gen_rtx_SET (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i),
24327 gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
24328 ndx++;
24330 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
24331 gcc_assert (ndx == count);
24333 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
24334 CR field separately. */
24335 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
24337 for (i = 0; i < 8; i++)
24338 if (save_reg_p (CR0_REGNO + i))
24339 add_reg_note (insn, REG_CFA_RESTORE,
24340 gen_rtx_REG (SImode, CR0_REGNO + i));
24342 RTX_FRAME_RELATED_P (insn) = 1;
24345 else
24346 for (i = 0; i < 8; i++)
24347 if (save_reg_p (CR0_REGNO + i))
24349 rtx insn = emit_insn (gen_movsi_to_cr_one
24350 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
24352 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
24353 CR field separately, attached to the insn that in fact
24354 restores this particular CR field. */
24355 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
24357 add_reg_note (insn, REG_CFA_RESTORE,
24358 gen_rtx_REG (SImode, CR0_REGNO + i));
24360 RTX_FRAME_RELATED_P (insn) = 1;
24364 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
24365 if (!exit_func && DEFAULT_ABI != ABI_ELFv2
24366 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
24368 rtx_insn *insn = get_last_insn ();
24369 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
24371 add_reg_note (insn, REG_CFA_RESTORE, cr);
24372 RTX_FRAME_RELATED_P (insn) = 1;
24376 /* Like cr, the move to lr instruction can be scheduled after the
24377 stack deallocation, but unlike cr, its stack frame save is still
24378 valid. So we only need to emit the cfa_restore on the correct
24379 instruction. */
24381 static void
24382 load_lr_save (int regno, rtx frame_reg_rtx, int offset)
24384 rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
24385 rtx reg = gen_rtx_REG (Pmode, regno);
24387 emit_move_insn (reg, mem);
24390 static void
24391 restore_saved_lr (int regno, bool exit_func)
24393 rtx reg = gen_rtx_REG (Pmode, regno);
24394 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
24395 rtx_insn *insn = emit_move_insn (lr, reg);
24397 if (!exit_func && flag_shrink_wrap)
24399 add_reg_note (insn, REG_CFA_RESTORE, lr);
24400 RTX_FRAME_RELATED_P (insn) = 1;
24404 static rtx
24405 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
24407 if (DEFAULT_ABI == ABI_ELFv2)
24409 int i;
24410 for (i = 0; i < 8; i++)
24411 if (save_reg_p (CR0_REGNO + i))
24413 rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
24414 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
24415 cfa_restores);
24418 else if (info->cr_save_p)
24419 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
24420 gen_rtx_REG (SImode, CR2_REGNO),
24421 cfa_restores);
24423 if (info->lr_save_p)
24424 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
24425 gen_rtx_REG (Pmode, LR_REGNO),
24426 cfa_restores);
24427 return cfa_restores;
24430 /* Return true if OFFSET from stack pointer can be clobbered by signals.
24431 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
24432 below stack pointer not cloberred by signals. */
24434 static inline bool
24435 offset_below_red_zone_p (HOST_WIDE_INT offset)
24437 return offset < (DEFAULT_ABI == ABI_V4
24439 : TARGET_32BIT ? -220 : -288);
24442 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
24444 static void
24445 emit_cfa_restores (rtx cfa_restores)
24447 rtx_insn *insn = get_last_insn ();
24448 rtx *loc = &REG_NOTES (insn);
24450 while (*loc)
24451 loc = &XEXP (*loc, 1);
24452 *loc = cfa_restores;
24453 RTX_FRAME_RELATED_P (insn) = 1;
24456 /* Emit function epilogue as insns. */
24458 void
24459 rs6000_emit_epilogue (int sibcall)
24461 rs6000_stack_t *info;
24462 int restoring_GPRs_inline;
24463 int restoring_FPRs_inline;
24464 int using_load_multiple;
24465 int using_mtcr_multiple;
24466 int use_backchain_to_restore_sp;
24467 int restore_lr;
24468 int strategy;
24469 HOST_WIDE_INT frame_off = 0;
24470 rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
24471 rtx frame_reg_rtx = sp_reg_rtx;
24472 rtx cfa_restores = NULL_RTX;
24473 rtx insn;
24474 rtx cr_save_reg = NULL_RTX;
24475 machine_mode reg_mode = Pmode;
24476 int reg_size = TARGET_32BIT ? 4 : 8;
24477 int i;
24478 bool exit_func;
24479 unsigned ptr_regno;
24481 info = rs6000_stack_info ();
24483 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
24485 reg_mode = V2SImode;
24486 reg_size = 8;
24489 strategy = info->savres_strategy;
24490 using_load_multiple = strategy & SAVRES_MULTIPLE;
24491 restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
24492 restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
24493 using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601
24494 || rs6000_cpu == PROCESSOR_PPC603
24495 || rs6000_cpu == PROCESSOR_PPC750
24496 || optimize_size);
24497 /* Restore via the backchain when we have a large frame, since this
24498 is more efficient than an addis, addi pair. The second condition
24499 here will not trigger at the moment; We don't actually need a
24500 frame pointer for alloca, but the generic parts of the compiler
24501 give us one anyway. */
24502 use_backchain_to_restore_sp = (info->total_size > 32767 - info->lr_save_offset
24503 || (cfun->calls_alloca
24504 && !frame_pointer_needed));
24505 restore_lr = (info->lr_save_p
24506 && (restoring_FPRs_inline
24507 || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
24508 && (restoring_GPRs_inline
24509 || info->first_fp_reg_save < 64));
24511 if (WORLD_SAVE_P (info))
24513 int i, j;
24514 char rname[30];
24515 const char *alloc_rname;
24516 rtvec p;
24518 /* eh_rest_world_r10 will return to the location saved in the LR
24519 stack slot (which is not likely to be our caller.)
24520 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
24521 rest_world is similar, except any R10 parameter is ignored.
24522 The exception-handling stuff that was here in 2.95 is no
24523 longer necessary. */
24525 p = rtvec_alloc (9
24527 + 32 - info->first_gp_reg_save
24528 + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
24529 + 63 + 1 - info->first_fp_reg_save);
24531 strcpy (rname, ((crtl->calls_eh_return) ?
24532 "*eh_rest_world_r10" : "*rest_world"));
24533 alloc_rname = ggc_strdup (rname);
24535 j = 0;
24536 RTVEC_ELT (p, j++) = ret_rtx;
24537 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
24538 gen_rtx_REG (Pmode,
24539 LR_REGNO));
24540 RTVEC_ELT (p, j++)
24541 = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
24542 /* The instruction pattern requires a clobber here;
24543 it is shared with the restVEC helper. */
24544 RTVEC_ELT (p, j++)
24545 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
24548 /* CR register traditionally saved as CR2. */
24549 rtx reg = gen_rtx_REG (SImode, CR2_REGNO);
24550 RTVEC_ELT (p, j++)
24551 = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset);
24552 if (flag_shrink_wrap)
24554 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
24555 gen_rtx_REG (Pmode, LR_REGNO),
24556 cfa_restores);
24557 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24561 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
24563 rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
24564 RTVEC_ELT (p, j++)
24565 = gen_frame_load (reg,
24566 frame_reg_rtx, info->gp_save_offset + reg_size * i);
24567 if (flag_shrink_wrap)
24568 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24570 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
24572 rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
24573 RTVEC_ELT (p, j++)
24574 = gen_frame_load (reg,
24575 frame_reg_rtx, info->altivec_save_offset + 16 * i);
24576 if (flag_shrink_wrap)
24577 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24579 for (i = 0; info->first_fp_reg_save + i <= 63; i++)
24581 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
24582 ? DFmode : SFmode),
24583 info->first_fp_reg_save + i);
24584 RTVEC_ELT (p, j++)
24585 = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i);
24586 if (flag_shrink_wrap)
24587 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24589 RTVEC_ELT (p, j++)
24590 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
24591 RTVEC_ELT (p, j++)
24592 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
24593 RTVEC_ELT (p, j++)
24594 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
24595 RTVEC_ELT (p, j++)
24596 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
24597 RTVEC_ELT (p, j++)
24598 = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
24599 insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
24601 if (flag_shrink_wrap)
24603 REG_NOTES (insn) = cfa_restores;
24604 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
24605 RTX_FRAME_RELATED_P (insn) = 1;
24607 return;
24610 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
24611 if (info->push_p)
24612 frame_off = info->total_size;
24614 /* Restore AltiVec registers if we must do so before adjusting the
24615 stack. */
24616 if (TARGET_ALTIVEC_ABI
24617 && info->altivec_size != 0
24618 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24619 || (DEFAULT_ABI != ABI_V4
24620 && offset_below_red_zone_p (info->altivec_save_offset))))
24622 int i;
24623 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
24625 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
24626 if (use_backchain_to_restore_sp)
24628 int frame_regno = 11;
24630 if ((strategy & REST_INLINE_VRS) == 0)
24632 /* Of r11 and r12, select the one not clobbered by an
24633 out-of-line restore function for the frame register. */
24634 frame_regno = 11 + 12 - scratch_regno;
24636 frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno);
24637 emit_move_insn (frame_reg_rtx,
24638 gen_rtx_MEM (Pmode, sp_reg_rtx));
24639 frame_off = 0;
24641 else if (frame_pointer_needed)
24642 frame_reg_rtx = hard_frame_pointer_rtx;
24644 if ((strategy & REST_INLINE_VRS) == 0)
24646 int end_save = info->altivec_save_offset + info->altivec_size;
24647 int ptr_off;
24648 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
24649 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
24651 if (end_save + frame_off != 0)
24653 rtx offset = GEN_INT (end_save + frame_off);
24655 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
24657 else
24658 emit_move_insn (ptr_reg, frame_reg_rtx);
24660 ptr_off = -end_save;
24661 insn = rs6000_emit_savres_rtx (info, scratch_reg,
24662 info->altivec_save_offset + ptr_off,
24663 0, V4SImode, SAVRES_VR);
24665 else
24667 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24668 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
24670 rtx addr, areg, mem, reg;
24672 areg = gen_rtx_REG (Pmode, 0);
24673 emit_move_insn
24674 (areg, GEN_INT (info->altivec_save_offset
24675 + frame_off
24676 + 16 * (i - info->first_altivec_reg_save)));
24678 /* AltiVec addressing mode is [reg+reg]. */
24679 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
24680 mem = gen_frame_mem (V4SImode, addr);
24682 reg = gen_rtx_REG (V4SImode, i);
24683 emit_move_insn (reg, mem);
24687 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24688 if (((strategy & REST_INLINE_VRS) == 0
24689 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
24690 && (flag_shrink_wrap
24691 || (offset_below_red_zone_p
24692 (info->altivec_save_offset
24693 + 16 * (i - info->first_altivec_reg_save)))))
24695 rtx reg = gen_rtx_REG (V4SImode, i);
24696 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24700 /* Restore VRSAVE if we must do so before adjusting the stack. */
24701 if (TARGET_ALTIVEC
24702 && TARGET_ALTIVEC_VRSAVE
24703 && info->vrsave_mask != 0
24704 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24705 || (DEFAULT_ABI != ABI_V4
24706 && offset_below_red_zone_p (info->vrsave_save_offset))))
24708 rtx reg;
24710 if (frame_reg_rtx == sp_reg_rtx)
24712 if (use_backchain_to_restore_sp)
24714 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
24715 emit_move_insn (frame_reg_rtx,
24716 gen_rtx_MEM (Pmode, sp_reg_rtx));
24717 frame_off = 0;
24719 else if (frame_pointer_needed)
24720 frame_reg_rtx = hard_frame_pointer_rtx;
24723 reg = gen_rtx_REG (SImode, 12);
24724 emit_insn (gen_frame_load (reg, frame_reg_rtx,
24725 info->vrsave_save_offset + frame_off));
24727 emit_insn (generate_set_vrsave (reg, info, 1));
24730 insn = NULL_RTX;
24731 /* If we have a large stack frame, restore the old stack pointer
24732 using the backchain. */
24733 if (use_backchain_to_restore_sp)
24735 if (frame_reg_rtx == sp_reg_rtx)
24737 /* Under V.4, don't reset the stack pointer until after we're done
24738 loading the saved registers. */
24739 if (DEFAULT_ABI == ABI_V4)
24740 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
24742 insn = emit_move_insn (frame_reg_rtx,
24743 gen_rtx_MEM (Pmode, sp_reg_rtx));
24744 frame_off = 0;
24746 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24747 && DEFAULT_ABI == ABI_V4)
24748 /* frame_reg_rtx has been set up by the altivec restore. */
24750 else
24752 insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
24753 frame_reg_rtx = sp_reg_rtx;
24756 /* If we have a frame pointer, we can restore the old stack pointer
24757 from it. */
24758 else if (frame_pointer_needed)
24760 frame_reg_rtx = sp_reg_rtx;
24761 if (DEFAULT_ABI == ABI_V4)
24762 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
24763 /* Prevent reordering memory accesses against stack pointer restore. */
24764 else if (cfun->calls_alloca
24765 || offset_below_red_zone_p (-info->total_size))
24766 rs6000_emit_stack_tie (frame_reg_rtx, true);
24768 insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
24769 GEN_INT (info->total_size)));
24770 frame_off = 0;
24772 else if (info->push_p
24773 && DEFAULT_ABI != ABI_V4
24774 && !crtl->calls_eh_return)
24776 /* Prevent reordering memory accesses against stack pointer restore. */
24777 if (cfun->calls_alloca
24778 || offset_below_red_zone_p (-info->total_size))
24779 rs6000_emit_stack_tie (frame_reg_rtx, false);
24780 insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
24781 GEN_INT (info->total_size)));
24782 frame_off = 0;
24784 if (insn && frame_reg_rtx == sp_reg_rtx)
24786 if (cfa_restores)
24788 REG_NOTES (insn) = cfa_restores;
24789 cfa_restores = NULL_RTX;
24791 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
24792 RTX_FRAME_RELATED_P (insn) = 1;
24795 /* Restore AltiVec registers if we have not done so already. */
24796 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24797 && TARGET_ALTIVEC_ABI
24798 && info->altivec_size != 0
24799 && (DEFAULT_ABI == ABI_V4
24800 || !offset_below_red_zone_p (info->altivec_save_offset)))
24802 int i;
24804 if ((strategy & REST_INLINE_VRS) == 0)
24806 int end_save = info->altivec_save_offset + info->altivec_size;
24807 int ptr_off;
24808 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
24809 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
24810 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
24812 if (end_save + frame_off != 0)
24814 rtx offset = GEN_INT (end_save + frame_off);
24816 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
24818 else
24819 emit_move_insn (ptr_reg, frame_reg_rtx);
24821 ptr_off = -end_save;
24822 insn = rs6000_emit_savres_rtx (info, scratch_reg,
24823 info->altivec_save_offset + ptr_off,
24824 0, V4SImode, SAVRES_VR);
24825 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
24827 /* Frame reg was clobbered by out-of-line save. Restore it
24828 from ptr_reg, and if we are calling out-of-line gpr or
24829 fpr restore set up the correct pointer and offset. */
24830 unsigned newptr_regno = 1;
24831 if (!restoring_GPRs_inline)
24833 bool lr = info->gp_save_offset + info->gp_size == 0;
24834 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
24835 newptr_regno = ptr_regno_for_savres (sel);
24836 end_save = info->gp_save_offset + info->gp_size;
24838 else if (!restoring_FPRs_inline)
24840 bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR);
24841 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
24842 newptr_regno = ptr_regno_for_savres (sel);
24843 end_save = info->gp_save_offset + info->gp_size;
24846 if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
24847 frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
24849 if (end_save + ptr_off != 0)
24851 rtx offset = GEN_INT (end_save + ptr_off);
24853 frame_off = -end_save;
24854 emit_insn (gen_add3_insn (frame_reg_rtx, ptr_reg, offset));
24856 else
24858 frame_off = ptr_off;
24859 emit_move_insn (frame_reg_rtx, ptr_reg);
24863 else
24865 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24866 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
24868 rtx addr, areg, mem, reg;
24870 areg = gen_rtx_REG (Pmode, 0);
24871 emit_move_insn
24872 (areg, GEN_INT (info->altivec_save_offset
24873 + frame_off
24874 + 16 * (i - info->first_altivec_reg_save)));
24876 /* AltiVec addressing mode is [reg+reg]. */
24877 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
24878 mem = gen_frame_mem (V4SImode, addr);
24880 reg = gen_rtx_REG (V4SImode, i);
24881 emit_move_insn (reg, mem);
24885 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24886 if (((strategy & REST_INLINE_VRS) == 0
24887 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
24888 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
24890 rtx reg = gen_rtx_REG (V4SImode, i);
24891 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24895 /* Restore VRSAVE if we have not done so already. */
24896 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24897 && TARGET_ALTIVEC
24898 && TARGET_ALTIVEC_VRSAVE
24899 && info->vrsave_mask != 0
24900 && (DEFAULT_ABI == ABI_V4
24901 || !offset_below_red_zone_p (info->vrsave_save_offset)))
24903 rtx reg;
24905 reg = gen_rtx_REG (SImode, 12);
24906 emit_insn (gen_frame_load (reg, frame_reg_rtx,
24907 info->vrsave_save_offset + frame_off));
24909 emit_insn (generate_set_vrsave (reg, info, 1));
24912 /* If we exit by an out-of-line restore function on ABI_V4 then that
24913 function will deallocate the stack, so we don't need to worry
24914 about the unwinder restoring cr from an invalid stack frame
24915 location. */
24916 exit_func = (!restoring_FPRs_inline
24917 || (!restoring_GPRs_inline
24918 && info->first_fp_reg_save == 64));
24920 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
24921 *separate* slots if the routine calls __builtin_eh_return, so
24922 that they can be independently restored by the unwinder. */
24923 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
24925 int i, cr_off = info->ehcr_offset;
24927 for (i = 0; i < 8; i++)
24928 if (!call_used_regs[CR0_REGNO + i])
24930 rtx reg = gen_rtx_REG (SImode, 0);
24931 emit_insn (gen_frame_load (reg, frame_reg_rtx,
24932 cr_off + frame_off));
24934 insn = emit_insn (gen_movsi_to_cr_one
24935 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
24937 if (!exit_func && flag_shrink_wrap)
24939 add_reg_note (insn, REG_CFA_RESTORE,
24940 gen_rtx_REG (SImode, CR0_REGNO + i));
24942 RTX_FRAME_RELATED_P (insn) = 1;
24945 cr_off += reg_size;
24949 /* Get the old lr if we saved it. If we are restoring registers
24950 out-of-line, then the out-of-line routines can do this for us. */
24951 if (restore_lr && restoring_GPRs_inline)
24952 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
24954 /* Get the old cr if we saved it. */
24955 if (info->cr_save_p)
24957 unsigned cr_save_regno = 12;
24959 if (!restoring_GPRs_inline)
24961 /* Ensure we don't use the register used by the out-of-line
24962 gpr register restore below. */
24963 bool lr = info->gp_save_offset + info->gp_size == 0;
24964 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
24965 int gpr_ptr_regno = ptr_regno_for_savres (sel);
24967 if (gpr_ptr_regno == 12)
24968 cr_save_regno = 11;
24969 gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno);
24971 else if (REGNO (frame_reg_rtx) == 12)
24972 cr_save_regno = 11;
24974 cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx,
24975 info->cr_save_offset + frame_off,
24976 exit_func);
24979 /* Set LR here to try to overlap restores below. */
24980 if (restore_lr && restoring_GPRs_inline)
24981 restore_saved_lr (0, exit_func);
24983 /* Load exception handler data registers, if needed. */
24984 if (crtl->calls_eh_return)
24986 unsigned int i, regno;
24988 if (TARGET_AIX)
24990 rtx reg = gen_rtx_REG (reg_mode, 2);
24991 emit_insn (gen_frame_load (reg, frame_reg_rtx,
24992 frame_off + RS6000_TOC_SAVE_SLOT));
24995 for (i = 0; ; ++i)
24997 rtx mem;
24999 regno = EH_RETURN_DATA_REGNO (i);
25000 if (regno == INVALID_REGNUM)
25001 break;
25003 /* Note: possible use of r0 here to address SPE regs. */
25004 mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
25005 info->ehrd_offset + frame_off
25006 + reg_size * (int) i);
25008 emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
25012 /* Restore GPRs. This is done as a PARALLEL if we are using
25013 the load-multiple instructions. */
25014 if (TARGET_SPE_ABI
25015 && info->spe_64bit_regs_used
25016 && info->first_gp_reg_save != 32)
25018 /* Determine whether we can address all of the registers that need
25019 to be saved with an offset from frame_reg_rtx that fits in
25020 the small const field for SPE memory instructions. */
25021 int spe_regs_addressable
25022 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
25023 + reg_size * (32 - info->first_gp_reg_save - 1))
25024 && restoring_GPRs_inline);
25026 if (!spe_regs_addressable)
25028 int ool_adjust = 0;
25029 rtx old_frame_reg_rtx = frame_reg_rtx;
25030 /* Make r11 point to the start of the SPE save area. We worried about
25031 not clobbering it when we were saving registers in the prologue.
25032 There's no need to worry here because the static chain is passed
25033 anew to every function. */
25035 if (!restoring_GPRs_inline)
25036 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
25037 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
25038 emit_insn (gen_addsi3 (frame_reg_rtx, old_frame_reg_rtx,
25039 GEN_INT (info->spe_gp_save_offset
25040 + frame_off
25041 - ool_adjust)));
25042 /* Keep the invariant that frame_reg_rtx + frame_off points
25043 at the top of the stack frame. */
25044 frame_off = -info->spe_gp_save_offset + ool_adjust;
25047 if (restoring_GPRs_inline)
25049 HOST_WIDE_INT spe_offset = info->spe_gp_save_offset + frame_off;
25051 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25052 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
25054 rtx offset, addr, mem, reg;
25056 /* We're doing all this to ensure that the immediate offset
25057 fits into the immediate field of 'evldd'. */
25058 gcc_assert (SPE_CONST_OFFSET_OK (spe_offset + reg_size * i));
25060 offset = GEN_INT (spe_offset + reg_size * i);
25061 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, offset);
25062 mem = gen_rtx_MEM (V2SImode, addr);
25063 reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
25065 emit_move_insn (reg, mem);
25068 else
25069 rs6000_emit_savres_rtx (info, frame_reg_rtx,
25070 info->spe_gp_save_offset + frame_off,
25071 info->lr_save_offset + frame_off,
25072 reg_mode,
25073 SAVRES_GPR | SAVRES_LR);
25075 else if (!restoring_GPRs_inline)
25077 /* We are jumping to an out-of-line function. */
25078 rtx ptr_reg;
25079 int end_save = info->gp_save_offset + info->gp_size;
25080 bool can_use_exit = end_save == 0;
25081 int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0);
25082 int ptr_off;
25084 /* Emit stack reset code if we need it. */
25085 ptr_regno = ptr_regno_for_savres (sel);
25086 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
25087 if (can_use_exit)
25088 rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
25089 else if (end_save + frame_off != 0)
25090 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx,
25091 GEN_INT (end_save + frame_off)));
25092 else if (REGNO (frame_reg_rtx) != ptr_regno)
25093 emit_move_insn (ptr_reg, frame_reg_rtx);
25094 if (REGNO (frame_reg_rtx) == ptr_regno)
25095 frame_off = -end_save;
25097 if (can_use_exit && info->cr_save_p)
25098 restore_saved_cr (cr_save_reg, using_mtcr_multiple, true);
25100 ptr_off = -end_save;
25101 rs6000_emit_savres_rtx (info, ptr_reg,
25102 info->gp_save_offset + ptr_off,
25103 info->lr_save_offset + ptr_off,
25104 reg_mode, sel);
25106 else if (using_load_multiple)
25108 rtvec p;
25109 p = rtvec_alloc (32 - info->first_gp_reg_save);
25110 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25111 RTVEC_ELT (p, i)
25112 = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
25113 frame_reg_rtx,
25114 info->gp_save_offset + frame_off + reg_size * i);
25115 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
25117 else
25119 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25120 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
25121 emit_insn (gen_frame_load
25122 (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
25123 frame_reg_rtx,
25124 info->gp_save_offset + frame_off + reg_size * i));
25127 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
25129 /* If the frame pointer was used then we can't delay emitting
25130 a REG_CFA_DEF_CFA note. This must happen on the insn that
25131 restores the frame pointer, r31. We may have already emitted
25132 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
25133 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
25134 be harmless if emitted. */
25135 if (frame_pointer_needed)
25137 insn = get_last_insn ();
25138 add_reg_note (insn, REG_CFA_DEF_CFA,
25139 plus_constant (Pmode, frame_reg_rtx, frame_off));
25140 RTX_FRAME_RELATED_P (insn) = 1;
25143 /* Set up cfa_restores. We always need these when
25144 shrink-wrapping. If not shrink-wrapping then we only need
25145 the cfa_restore when the stack location is no longer valid.
25146 The cfa_restores must be emitted on or before the insn that
25147 invalidates the stack, and of course must not be emitted
25148 before the insn that actually does the restore. The latter
25149 is why it is a bad idea to emit the cfa_restores as a group
25150 on the last instruction here that actually does a restore:
25151 That insn may be reordered with respect to others doing
25152 restores. */
25153 if (flag_shrink_wrap
25154 && !restoring_GPRs_inline
25155 && info->first_fp_reg_save == 64)
25156 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
25158 for (i = info->first_gp_reg_save; i < 32; i++)
25159 if (!restoring_GPRs_inline
25160 || using_load_multiple
25161 || rs6000_reg_live_or_pic_offset_p (i))
25163 rtx reg = gen_rtx_REG (reg_mode, i);
25165 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
25169 if (!restoring_GPRs_inline
25170 && info->first_fp_reg_save == 64)
25172 /* We are jumping to an out-of-line function. */
25173 if (cfa_restores)
25174 emit_cfa_restores (cfa_restores);
25175 return;
25178 if (restore_lr && !restoring_GPRs_inline)
25180 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
25181 restore_saved_lr (0, exit_func);
25184 /* Restore fpr's if we need to do it without calling a function. */
25185 if (restoring_FPRs_inline)
25186 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
25187 if (save_reg_p (info->first_fp_reg_save + i))
25189 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
25190 ? DFmode : SFmode),
25191 info->first_fp_reg_save + i);
25192 emit_insn (gen_frame_load (reg, frame_reg_rtx,
25193 info->fp_save_offset + frame_off + 8 * i));
25194 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
25195 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
25198 /* If we saved cr, restore it here. Just those that were used. */
25199 if (info->cr_save_p)
25200 restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func);
25202 /* If this is V.4, unwind the stack pointer after all of the loads
25203 have been done, or set up r11 if we are restoring fp out of line. */
25204 ptr_regno = 1;
25205 if (!restoring_FPRs_inline)
25207 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
25208 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
25209 ptr_regno = ptr_regno_for_savres (sel);
25212 insn = rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
25213 if (REGNO (frame_reg_rtx) == ptr_regno)
25214 frame_off = 0;
25216 if (insn && restoring_FPRs_inline)
25218 if (cfa_restores)
25220 REG_NOTES (insn) = cfa_restores;
25221 cfa_restores = NULL_RTX;
25223 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
25224 RTX_FRAME_RELATED_P (insn) = 1;
25227 if (crtl->calls_eh_return)
25229 rtx sa = EH_RETURN_STACKADJ_RTX;
25230 emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
25233 if (!sibcall)
25235 rtvec p;
25236 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
25237 if (! restoring_FPRs_inline)
25239 p = rtvec_alloc (4 + 64 - info->first_fp_reg_save);
25240 RTVEC_ELT (p, 0) = ret_rtx;
25242 else
25244 if (cfa_restores)
25246 /* We can't hang the cfa_restores off a simple return,
25247 since the shrink-wrap code sometimes uses an existing
25248 return. This means there might be a path from
25249 pre-prologue code to this return, and dwarf2cfi code
25250 wants the eh_frame unwinder state to be the same on
25251 all paths to any point. So we need to emit the
25252 cfa_restores before the return. For -m64 we really
25253 don't need epilogue cfa_restores at all, except for
25254 this irritating dwarf2cfi with shrink-wrap
25255 requirement; The stack red-zone means eh_frame info
25256 from the prologue telling the unwinder to restore
25257 from the stack is perfectly good right to the end of
25258 the function. */
25259 emit_insn (gen_blockage ());
25260 emit_cfa_restores (cfa_restores);
25261 cfa_restores = NULL_RTX;
25263 p = rtvec_alloc (2);
25264 RTVEC_ELT (p, 0) = simple_return_rtx;
25267 RTVEC_ELT (p, 1) = ((restoring_FPRs_inline || !lr)
25268 ? gen_rtx_USE (VOIDmode,
25269 gen_rtx_REG (Pmode, LR_REGNO))
25270 : gen_rtx_CLOBBER (VOIDmode,
25271 gen_rtx_REG (Pmode, LR_REGNO)));
25273 /* If we have to restore more than two FP registers, branch to the
25274 restore function. It will return to our caller. */
25275 if (! restoring_FPRs_inline)
25277 int i;
25278 int reg;
25279 rtx sym;
25281 if (flag_shrink_wrap)
25282 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
25284 sym = rs6000_savres_routine_sym (info,
25285 SAVRES_FPR | (lr ? SAVRES_LR : 0));
25286 RTVEC_ELT (p, 2) = gen_rtx_USE (VOIDmode, sym);
25287 reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
25288 RTVEC_ELT (p, 3) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
25290 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
25292 rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
25294 RTVEC_ELT (p, i + 4)
25295 = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i);
25296 if (flag_shrink_wrap)
25297 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
25298 cfa_restores);
25302 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
25305 if (cfa_restores)
25307 if (sibcall)
25308 /* Ensure the cfa_restores are hung off an insn that won't
25309 be reordered above other restores. */
25310 emit_insn (gen_blockage ());
25312 emit_cfa_restores (cfa_restores);
25316 /* Write function epilogue. */
25318 static void
25319 rs6000_output_function_epilogue (FILE *file,
25320 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
25322 #if TARGET_MACHO
25323 macho_branch_islands ();
25324 /* Mach-O doesn't support labels at the end of objects, so if
25325 it looks like we might want one, insert a NOP. */
25327 rtx_insn *insn = get_last_insn ();
25328 rtx_insn *deleted_debug_label = NULL;
25329 while (insn
25330 && NOTE_P (insn)
25331 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
25333 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
25334 notes only, instead set their CODE_LABEL_NUMBER to -1,
25335 otherwise there would be code generation differences
25336 in between -g and -g0. */
25337 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
25338 deleted_debug_label = insn;
25339 insn = PREV_INSN (insn);
25341 if (insn
25342 && (LABEL_P (insn)
25343 || (NOTE_P (insn)
25344 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
25345 fputs ("\tnop\n", file);
25346 else if (deleted_debug_label)
25347 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
25348 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
25349 CODE_LABEL_NUMBER (insn) = -1;
25351 #endif
25353 /* Output a traceback table here. See /usr/include/sys/debug.h for info
25354 on its format.
25356 We don't output a traceback table if -finhibit-size-directive was
25357 used. The documentation for -finhibit-size-directive reads
25358 ``don't output a @code{.size} assembler directive, or anything
25359 else that would cause trouble if the function is split in the
25360 middle, and the two halves are placed at locations far apart in
25361 memory.'' The traceback table has this property, since it
25362 includes the offset from the start of the function to the
25363 traceback table itself.
25365 System V.4 Powerpc's (and the embedded ABI derived from it) use a
25366 different traceback table. */
25367 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
25368 && ! flag_inhibit_size_directive
25369 && rs6000_traceback != traceback_none && !cfun->is_thunk)
25371 const char *fname = NULL;
25372 const char *language_string = lang_hooks.name;
25373 int fixed_parms = 0, float_parms = 0, parm_info = 0;
25374 int i;
25375 int optional_tbtab;
25376 rs6000_stack_t *info = rs6000_stack_info ();
25378 if (rs6000_traceback == traceback_full)
25379 optional_tbtab = 1;
25380 else if (rs6000_traceback == traceback_part)
25381 optional_tbtab = 0;
25382 else
25383 optional_tbtab = !optimize_size && !TARGET_ELF;
25385 if (optional_tbtab)
25387 fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25388 while (*fname == '.') /* V.4 encodes . in the name */
25389 fname++;
25391 /* Need label immediately before tbtab, so we can compute
25392 its offset from the function start. */
25393 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
25394 ASM_OUTPUT_LABEL (file, fname);
25397 /* The .tbtab pseudo-op can only be used for the first eight
25398 expressions, since it can't handle the possibly variable
25399 length fields that follow. However, if you omit the optional
25400 fields, the assembler outputs zeros for all optional fields
25401 anyways, giving each variable length field is minimum length
25402 (as defined in sys/debug.h). Thus we can not use the .tbtab
25403 pseudo-op at all. */
25405 /* An all-zero word flags the start of the tbtab, for debuggers
25406 that have to find it by searching forward from the entry
25407 point or from the current pc. */
25408 fputs ("\t.long 0\n", file);
25410 /* Tbtab format type. Use format type 0. */
25411 fputs ("\t.byte 0,", file);
25413 /* Language type. Unfortunately, there does not seem to be any
25414 official way to discover the language being compiled, so we
25415 use language_string.
25416 C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9.
25417 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
25418 a number, so for now use 9. LTO and Go aren't assigned numbers
25419 either, so for now use 0. */
25420 if (lang_GNU_C ()
25421 || ! strcmp (language_string, "GNU GIMPLE")
25422 || ! strcmp (language_string, "GNU Go"))
25423 i = 0;
25424 else if (! strcmp (language_string, "GNU F77")
25425 || ! strcmp (language_string, "GNU Fortran"))
25426 i = 1;
25427 else if (! strcmp (language_string, "GNU Pascal"))
25428 i = 2;
25429 else if (! strcmp (language_string, "GNU Ada"))
25430 i = 3;
25431 else if (lang_GNU_CXX ()
25432 || ! strcmp (language_string, "GNU Objective-C++"))
25433 i = 9;
25434 else if (! strcmp (language_string, "GNU Java"))
25435 i = 13;
25436 else if (! strcmp (language_string, "GNU Objective-C"))
25437 i = 14;
25438 else
25439 gcc_unreachable ();
25440 fprintf (file, "%d,", i);
25442 /* 8 single bit fields: global linkage (not set for C extern linkage,
25443 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
25444 from start of procedure stored in tbtab, internal function, function
25445 has controlled storage, function has no toc, function uses fp,
25446 function logs/aborts fp operations. */
25447 /* Assume that fp operations are used if any fp reg must be saved. */
25448 fprintf (file, "%d,",
25449 (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
25451 /* 6 bitfields: function is interrupt handler, name present in
25452 proc table, function calls alloca, on condition directives
25453 (controls stack walks, 3 bits), saves condition reg, saves
25454 link reg. */
25455 /* The `function calls alloca' bit seems to be set whenever reg 31 is
25456 set up as a frame pointer, even when there is no alloca call. */
25457 fprintf (file, "%d,",
25458 ((optional_tbtab << 6)
25459 | ((optional_tbtab & frame_pointer_needed) << 5)
25460 | (info->cr_save_p << 1)
25461 | (info->lr_save_p)));
25463 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
25464 (6 bits). */
25465 fprintf (file, "%d,",
25466 (info->push_p << 7) | (64 - info->first_fp_reg_save));
25468 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
25469 fprintf (file, "%d,", (32 - first_reg_to_save ()));
25471 if (optional_tbtab)
25473 /* Compute the parameter info from the function decl argument
25474 list. */
25475 tree decl;
25476 int next_parm_info_bit = 31;
25478 for (decl = DECL_ARGUMENTS (current_function_decl);
25479 decl; decl = DECL_CHAIN (decl))
25481 rtx parameter = DECL_INCOMING_RTL (decl);
25482 machine_mode mode = GET_MODE (parameter);
25484 if (GET_CODE (parameter) == REG)
25486 if (SCALAR_FLOAT_MODE_P (mode))
25488 int bits;
25490 float_parms++;
25492 switch (mode)
25494 case SFmode:
25495 case SDmode:
25496 bits = 0x2;
25497 break;
25499 case DFmode:
25500 case DDmode:
25501 case TFmode:
25502 case TDmode:
25503 bits = 0x3;
25504 break;
25506 default:
25507 gcc_unreachable ();
25510 /* If only one bit will fit, don't or in this entry. */
25511 if (next_parm_info_bit > 0)
25512 parm_info |= (bits << (next_parm_info_bit - 1));
25513 next_parm_info_bit -= 2;
25515 else
25517 fixed_parms += ((GET_MODE_SIZE (mode)
25518 + (UNITS_PER_WORD - 1))
25519 / UNITS_PER_WORD);
25520 next_parm_info_bit -= 1;
25526 /* Number of fixed point parameters. */
25527 /* This is actually the number of words of fixed point parameters; thus
25528 an 8 byte struct counts as 2; and thus the maximum value is 8. */
25529 fprintf (file, "%d,", fixed_parms);
25531 /* 2 bitfields: number of floating point parameters (7 bits), parameters
25532 all on stack. */
25533 /* This is actually the number of fp registers that hold parameters;
25534 and thus the maximum value is 13. */
25535 /* Set parameters on stack bit if parameters are not in their original
25536 registers, regardless of whether they are on the stack? Xlc
25537 seems to set the bit when not optimizing. */
25538 fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
25540 if (! optional_tbtab)
25541 return;
25543 /* Optional fields follow. Some are variable length. */
25545 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single float,
25546 11 double float. */
25547 /* There is an entry for each parameter in a register, in the order that
25548 they occur in the parameter list. Any intervening arguments on the
25549 stack are ignored. If the list overflows a long (max possible length
25550 34 bits) then completely leave off all elements that don't fit. */
25551 /* Only emit this long if there was at least one parameter. */
25552 if (fixed_parms || float_parms)
25553 fprintf (file, "\t.long %d\n", parm_info);
25555 /* Offset from start of code to tb table. */
25556 fputs ("\t.long ", file);
25557 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
25558 RS6000_OUTPUT_BASENAME (file, fname);
25559 putc ('-', file);
25560 rs6000_output_function_entry (file, fname);
25561 putc ('\n', file);
25563 /* Interrupt handler mask. */
25564 /* Omit this long, since we never set the interrupt handler bit
25565 above. */
25567 /* Number of CTL (controlled storage) anchors. */
25568 /* Omit this long, since the has_ctl bit is never set above. */
25570 /* Displacement into stack of each CTL anchor. */
25571 /* Omit this list of longs, because there are no CTL anchors. */
25573 /* Length of function name. */
25574 if (*fname == '*')
25575 ++fname;
25576 fprintf (file, "\t.short %d\n", (int) strlen (fname));
25578 /* Function name. */
25579 assemble_string (fname, strlen (fname));
25581 /* Register for alloca automatic storage; this is always reg 31.
25582 Only emit this if the alloca bit was set above. */
25583 if (frame_pointer_needed)
25584 fputs ("\t.byte 31\n", file);
25586 fputs ("\t.align 2\n", file);
25590 /* A C compound statement that outputs the assembler code for a thunk
25591 function, used to implement C++ virtual function calls with
25592 multiple inheritance. The thunk acts as a wrapper around a virtual
25593 function, adjusting the implicit object parameter before handing
25594 control off to the real function.
25596 First, emit code to add the integer DELTA to the location that
25597 contains the incoming first argument. Assume that this argument
25598 contains a pointer, and is the one used to pass the `this' pointer
25599 in C++. This is the incoming argument *before* the function
25600 prologue, e.g. `%o0' on a sparc. The addition must preserve the
25601 values of all other incoming arguments.
25603 After the addition, emit code to jump to FUNCTION, which is a
25604 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
25605 not touch the return address. Hence returning from FUNCTION will
25606 return to whoever called the current `thunk'.
25608 The effect must be as if FUNCTION had been called directly with the
25609 adjusted first argument. This macro is responsible for emitting
25610 all of the code for a thunk function; output_function_prologue()
25611 and output_function_epilogue() are not invoked.
25613 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
25614 been extracted from it.) It might possibly be useful on some
25615 targets, but probably not.
25617 If you do not define this macro, the target-independent code in the
25618 C++ frontend will generate a less efficient heavyweight thunk that
25619 calls FUNCTION instead of jumping to it. The generic approach does
25620 not support varargs. */
25622 static void
25623 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
25624 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
25625 tree function)
25627 rtx this_rtx, funexp;
25628 rtx_insn *insn;
25630 reload_completed = 1;
25631 epilogue_completed = 1;
25633 /* Mark the end of the (empty) prologue. */
25634 emit_note (NOTE_INSN_PROLOGUE_END);
25636 /* Find the "this" pointer. If the function returns a structure,
25637 the structure return pointer is in r3. */
25638 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
25639 this_rtx = gen_rtx_REG (Pmode, 4);
25640 else
25641 this_rtx = gen_rtx_REG (Pmode, 3);
25643 /* Apply the constant offset, if required. */
25644 if (delta)
25645 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
25647 /* Apply the offset from the vtable, if required. */
25648 if (vcall_offset)
25650 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
25651 rtx tmp = gen_rtx_REG (Pmode, 12);
25653 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
25654 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
25656 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
25657 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
25659 else
25661 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
25663 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
25665 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
25668 /* Generate a tail call to the target function. */
25669 if (!TREE_USED (function))
25671 assemble_external (function);
25672 TREE_USED (function) = 1;
25674 funexp = XEXP (DECL_RTL (function), 0);
25675 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
25677 #if TARGET_MACHO
25678 if (MACHOPIC_INDIRECT)
25679 funexp = machopic_indirect_call_target (funexp);
25680 #endif
25682 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
25683 generate sibcall RTL explicitly. */
25684 insn = emit_call_insn (
25685 gen_rtx_PARALLEL (VOIDmode,
25686 gen_rtvec (4,
25687 gen_rtx_CALL (VOIDmode,
25688 funexp, const0_rtx),
25689 gen_rtx_USE (VOIDmode, const0_rtx),
25690 gen_rtx_USE (VOIDmode,
25691 gen_rtx_REG (SImode,
25692 LR_REGNO)),
25693 simple_return_rtx)));
25694 SIBLING_CALL_P (insn) = 1;
25695 emit_barrier ();
25697 /* Ensure we have a global entry point for the thunk. ??? We could
25698 avoid that if the target routine doesn't need a global entry point,
25699 but we do not know whether this is the case at this point. */
25700 if (DEFAULT_ABI == ABI_ELFv2)
25701 cfun->machine->r2_setup_needed = true;
25703 /* Run just enough of rest_of_compilation to get the insns emitted.
25704 There's not really enough bulk here to make other passes such as
25705 instruction scheduling worth while. Note that use_thunk calls
25706 assemble_start_function and assemble_end_function. */
25707 insn = get_insns ();
25708 shorten_branches (insn);
25709 final_start_function (insn, file, 1);
25710 final (insn, file, 1);
25711 final_end_function ();
25713 reload_completed = 0;
25714 epilogue_completed = 0;
25717 /* A quick summary of the various types of 'constant-pool tables'
25718 under PowerPC:
25720 Target Flags Name One table per
25721 AIX (none) AIX TOC object file
25722 AIX -mfull-toc AIX TOC object file
25723 AIX -mminimal-toc AIX minimal TOC translation unit
25724 SVR4/EABI (none) SVR4 SDATA object file
25725 SVR4/EABI -fpic SVR4 pic object file
25726 SVR4/EABI -fPIC SVR4 PIC translation unit
25727 SVR4/EABI -mrelocatable EABI TOC function
25728 SVR4/EABI -maix AIX TOC object file
25729 SVR4/EABI -maix -mminimal-toc
25730 AIX minimal TOC translation unit
25732 Name Reg. Set by entries contains:
25733 made by addrs? fp? sum?
25735 AIX TOC 2 crt0 as Y option option
25736 AIX minimal TOC 30 prolog gcc Y Y option
25737 SVR4 SDATA 13 crt0 gcc N Y N
25738 SVR4 pic 30 prolog ld Y not yet N
25739 SVR4 PIC 30 prolog gcc Y option option
25740 EABI TOC 30 prolog gcc Y option option
25744 /* Hash functions for the hash table. */
25746 static unsigned
25747 rs6000_hash_constant (rtx k)
25749 enum rtx_code code = GET_CODE (k);
25750 machine_mode mode = GET_MODE (k);
25751 unsigned result = (code << 3) ^ mode;
25752 const char *format;
25753 int flen, fidx;
25755 format = GET_RTX_FORMAT (code);
25756 flen = strlen (format);
25757 fidx = 0;
25759 switch (code)
25761 case LABEL_REF:
25762 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
25764 case CONST_WIDE_INT:
25766 int i;
25767 flen = CONST_WIDE_INT_NUNITS (k);
25768 for (i = 0; i < flen; i++)
25769 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
25770 return result;
25773 case CONST_DOUBLE:
25774 if (mode != VOIDmode)
25775 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
25776 flen = 2;
25777 break;
25779 case CODE_LABEL:
25780 fidx = 3;
25781 break;
25783 default:
25784 break;
25787 for (; fidx < flen; fidx++)
25788 switch (format[fidx])
25790 case 's':
25792 unsigned i, len;
25793 const char *str = XSTR (k, fidx);
25794 len = strlen (str);
25795 result = result * 613 + len;
25796 for (i = 0; i < len; i++)
25797 result = result * 613 + (unsigned) str[i];
25798 break;
25800 case 'u':
25801 case 'e':
25802 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
25803 break;
25804 case 'i':
25805 case 'n':
25806 result = result * 613 + (unsigned) XINT (k, fidx);
25807 break;
25808 case 'w':
25809 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
25810 result = result * 613 + (unsigned) XWINT (k, fidx);
25811 else
25813 size_t i;
25814 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
25815 result = result * 613 + (unsigned) (XWINT (k, fidx)
25816 >> CHAR_BIT * i);
25818 break;
25819 case '0':
25820 break;
25821 default:
25822 gcc_unreachable ();
25825 return result;
25828 hashval_t
25829 toc_hasher::hash (toc_hash_struct *thc)
25831 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
25834 /* Compare H1 and H2 for equivalence. */
25836 bool
25837 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
25839 rtx r1 = h1->key;
25840 rtx r2 = h2->key;
25842 if (h1->key_mode != h2->key_mode)
25843 return 0;
25845 return rtx_equal_p (r1, r2);
25848 /* These are the names given by the C++ front-end to vtables, and
25849 vtable-like objects. Ideally, this logic should not be here;
25850 instead, there should be some programmatic way of inquiring as
25851 to whether or not an object is a vtable. */
25853 #define VTABLE_NAME_P(NAME) \
25854 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
25855 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
25856 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
25857 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
25858 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
25860 #ifdef NO_DOLLAR_IN_LABEL
25861 /* Return a GGC-allocated character string translating dollar signs in
25862 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
25864 const char *
25865 rs6000_xcoff_strip_dollar (const char *name)
25867 char *strip, *p;
25868 const char *q;
25869 size_t len;
25871 q = (const char *) strchr (name, '$');
25873 if (q == 0 || q == name)
25874 return name;
25876 len = strlen (name);
25877 strip = XALLOCAVEC (char, len + 1);
25878 strcpy (strip, name);
25879 p = strip + (q - name);
25880 while (p)
25882 *p = '_';
25883 p = strchr (p + 1, '$');
25886 return ggc_alloc_string (strip, len);
25888 #endif
25890 void
25891 rs6000_output_symbol_ref (FILE *file, rtx x)
25893 /* Currently C++ toc references to vtables can be emitted before it
25894 is decided whether the vtable is public or private. If this is
25895 the case, then the linker will eventually complain that there is
25896 a reference to an unknown section. Thus, for vtables only,
25897 we emit the TOC reference to reference the symbol and not the
25898 section. */
25899 const char *name = XSTR (x, 0);
25901 if (VTABLE_NAME_P (name))
25903 RS6000_OUTPUT_BASENAME (file, name);
25905 else
25906 assemble_name (file, name);
25909 /* Output a TOC entry. We derive the entry name from what is being
25910 written. */
25912 void
25913 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
25915 char buf[256];
25916 const char *name = buf;
25917 rtx base = x;
25918 HOST_WIDE_INT offset = 0;
25920 gcc_assert (!TARGET_NO_TOC);
25922 /* When the linker won't eliminate them, don't output duplicate
25923 TOC entries (this happens on AIX if there is any kind of TOC,
25924 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
25925 CODE_LABELs. */
25926 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
25928 struct toc_hash_struct *h;
25930 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
25931 time because GGC is not initialized at that point. */
25932 if (toc_hash_table == NULL)
25933 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
25935 h = ggc_alloc<toc_hash_struct> ();
25936 h->key = x;
25937 h->key_mode = mode;
25938 h->labelno = labelno;
25940 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
25941 if (*found == NULL)
25942 *found = h;
25943 else /* This is indeed a duplicate.
25944 Set this label equal to that label. */
25946 fputs ("\t.set ", file);
25947 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
25948 fprintf (file, "%d,", labelno);
25949 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
25950 fprintf (file, "%d\n", ((*found)->labelno));
25952 #ifdef HAVE_AS_TLS
25953 if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF
25954 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
25955 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
25957 fputs ("\t.set ", file);
25958 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
25959 fprintf (file, "%d,", labelno);
25960 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
25961 fprintf (file, "%d\n", ((*found)->labelno));
25963 #endif
25964 return;
25968 /* If we're going to put a double constant in the TOC, make sure it's
25969 aligned properly when strict alignment is on. */
25970 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
25971 && STRICT_ALIGNMENT
25972 && GET_MODE_BITSIZE (mode) >= 64
25973 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
25974 ASM_OUTPUT_ALIGN (file, 3);
25977 (*targetm.asm_out.internal_label) (file, "LC", labelno);
25979 /* Handle FP constants specially. Note that if we have a minimal
25980 TOC, things we put here aren't actually in the TOC, so we can allow
25981 FP constants. */
25982 if (GET_CODE (x) == CONST_DOUBLE &&
25983 (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode))
25985 REAL_VALUE_TYPE rv;
25986 long k[4];
25988 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
25989 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
25990 REAL_VALUE_TO_TARGET_DECIMAL128 (rv, k);
25991 else
25992 REAL_VALUE_TO_TARGET_LONG_DOUBLE (rv, k);
25994 if (TARGET_64BIT)
25996 if (TARGET_ELF || TARGET_MINIMAL_TOC)
25997 fputs (DOUBLE_INT_ASM_OP, file);
25998 else
25999 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
26000 k[0] & 0xffffffff, k[1] & 0xffffffff,
26001 k[2] & 0xffffffff, k[3] & 0xffffffff);
26002 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
26003 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
26004 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
26005 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
26006 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
26007 return;
26009 else
26011 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26012 fputs ("\t.long ", file);
26013 else
26014 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
26015 k[0] & 0xffffffff, k[1] & 0xffffffff,
26016 k[2] & 0xffffffff, k[3] & 0xffffffff);
26017 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
26018 k[0] & 0xffffffff, k[1] & 0xffffffff,
26019 k[2] & 0xffffffff, k[3] & 0xffffffff);
26020 return;
26023 else if (GET_CODE (x) == CONST_DOUBLE &&
26024 (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
26026 REAL_VALUE_TYPE rv;
26027 long k[2];
26029 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
26031 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
26032 REAL_VALUE_TO_TARGET_DECIMAL64 (rv, k);
26033 else
26034 REAL_VALUE_TO_TARGET_DOUBLE (rv, k);
26036 if (TARGET_64BIT)
26038 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26039 fputs (DOUBLE_INT_ASM_OP, file);
26040 else
26041 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
26042 k[0] & 0xffffffff, k[1] & 0xffffffff);
26043 fprintf (file, "0x%lx%08lx\n",
26044 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
26045 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
26046 return;
26048 else
26050 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26051 fputs ("\t.long ", file);
26052 else
26053 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
26054 k[0] & 0xffffffff, k[1] & 0xffffffff);
26055 fprintf (file, "0x%lx,0x%lx\n",
26056 k[0] & 0xffffffff, k[1] & 0xffffffff);
26057 return;
26060 else if (GET_CODE (x) == CONST_DOUBLE &&
26061 (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
26063 REAL_VALUE_TYPE rv;
26064 long l;
26066 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
26067 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
26068 REAL_VALUE_TO_TARGET_DECIMAL32 (rv, l);
26069 else
26070 REAL_VALUE_TO_TARGET_SINGLE (rv, l);
26072 if (TARGET_64BIT)
26074 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26075 fputs (DOUBLE_INT_ASM_OP, file);
26076 else
26077 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
26078 if (WORDS_BIG_ENDIAN)
26079 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
26080 else
26081 fprintf (file, "0x%lx\n", l & 0xffffffff);
26082 return;
26084 else
26086 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26087 fputs ("\t.long ", file);
26088 else
26089 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
26090 fprintf (file, "0x%lx\n", l & 0xffffffff);
26091 return;
26094 else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT)
26096 unsigned HOST_WIDE_INT low;
26097 HOST_WIDE_INT high;
26099 low = INTVAL (x) & 0xffffffff;
26100 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
26102 /* TOC entries are always Pmode-sized, so when big-endian
26103 smaller integer constants in the TOC need to be padded.
26104 (This is still a win over putting the constants in
26105 a separate constant pool, because then we'd have
26106 to have both a TOC entry _and_ the actual constant.)
26108 For a 32-bit target, CONST_INT values are loaded and shifted
26109 entirely within `low' and can be stored in one TOC entry. */
26111 /* It would be easy to make this work, but it doesn't now. */
26112 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
26114 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
26116 low |= high << 32;
26117 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
26118 high = (HOST_WIDE_INT) low >> 32;
26119 low &= 0xffffffff;
26122 if (TARGET_64BIT)
26124 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26125 fputs (DOUBLE_INT_ASM_OP, file);
26126 else
26127 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
26128 (long) high & 0xffffffff, (long) low & 0xffffffff);
26129 fprintf (file, "0x%lx%08lx\n",
26130 (long) high & 0xffffffff, (long) low & 0xffffffff);
26131 return;
26133 else
26135 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
26137 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26138 fputs ("\t.long ", file);
26139 else
26140 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
26141 (long) high & 0xffffffff, (long) low & 0xffffffff);
26142 fprintf (file, "0x%lx,0x%lx\n",
26143 (long) high & 0xffffffff, (long) low & 0xffffffff);
26145 else
26147 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26148 fputs ("\t.long ", file);
26149 else
26150 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
26151 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
26153 return;
26157 if (GET_CODE (x) == CONST)
26159 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
26160 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
26162 base = XEXP (XEXP (x, 0), 0);
26163 offset = INTVAL (XEXP (XEXP (x, 0), 1));
26166 switch (GET_CODE (base))
26168 case SYMBOL_REF:
26169 name = XSTR (base, 0);
26170 break;
26172 case LABEL_REF:
26173 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
26174 CODE_LABEL_NUMBER (XEXP (base, 0)));
26175 break;
26177 case CODE_LABEL:
26178 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
26179 break;
26181 default:
26182 gcc_unreachable ();
26185 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26186 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
26187 else
26189 fputs ("\t.tc ", file);
26190 RS6000_OUTPUT_BASENAME (file, name);
26192 if (offset < 0)
26193 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
26194 else if (offset)
26195 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
26197 /* Mark large TOC symbols on AIX with [TE] so they are mapped
26198 after other TOC symbols, reducing overflow of small TOC access
26199 to [TC] symbols. */
26200 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
26201 ? "[TE]," : "[TC],", file);
26204 /* Currently C++ toc references to vtables can be emitted before it
26205 is decided whether the vtable is public or private. If this is
26206 the case, then the linker will eventually complain that there is
26207 a TOC reference to an unknown section. Thus, for vtables only,
26208 we emit the TOC reference to reference the symbol and not the
26209 section. */
26210 if (VTABLE_NAME_P (name))
26212 RS6000_OUTPUT_BASENAME (file, name);
26213 if (offset < 0)
26214 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
26215 else if (offset > 0)
26216 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
26218 else
26219 output_addr_const (file, x);
26221 #if HAVE_AS_TLS
26222 if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF
26223 && SYMBOL_REF_TLS_MODEL (base) != 0)
26225 if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_LOCAL_EXEC)
26226 fputs ("@le", file);
26227 else if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_INITIAL_EXEC)
26228 fputs ("@ie", file);
26229 /* Use global-dynamic for local-dynamic. */
26230 else if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_GLOBAL_DYNAMIC
26231 || SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_LOCAL_DYNAMIC)
26233 putc ('\n', file);
26234 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
26235 fputs ("\t.tc .", file);
26236 RS6000_OUTPUT_BASENAME (file, name);
26237 fputs ("[TC],", file);
26238 output_addr_const (file, x);
26239 fputs ("@m", file);
26242 #endif
26244 putc ('\n', file);
26247 /* Output an assembler pseudo-op to write an ASCII string of N characters
26248 starting at P to FILE.
26250 On the RS/6000, we have to do this using the .byte operation and
26251 write out special characters outside the quoted string.
26252 Also, the assembler is broken; very long strings are truncated,
26253 so we must artificially break them up early. */
26255 void
26256 output_ascii (FILE *file, const char *p, int n)
26258 char c;
26259 int i, count_string;
26260 const char *for_string = "\t.byte \"";
26261 const char *for_decimal = "\t.byte ";
26262 const char *to_close = NULL;
26264 count_string = 0;
26265 for (i = 0; i < n; i++)
26267 c = *p++;
26268 if (c >= ' ' && c < 0177)
26270 if (for_string)
26271 fputs (for_string, file);
26272 putc (c, file);
26274 /* Write two quotes to get one. */
26275 if (c == '"')
26277 putc (c, file);
26278 ++count_string;
26281 for_string = NULL;
26282 for_decimal = "\"\n\t.byte ";
26283 to_close = "\"\n";
26284 ++count_string;
26286 if (count_string >= 512)
26288 fputs (to_close, file);
26290 for_string = "\t.byte \"";
26291 for_decimal = "\t.byte ";
26292 to_close = NULL;
26293 count_string = 0;
26296 else
26298 if (for_decimal)
26299 fputs (for_decimal, file);
26300 fprintf (file, "%d", c);
26302 for_string = "\n\t.byte \"";
26303 for_decimal = ", ";
26304 to_close = "\n";
26305 count_string = 0;
26309 /* Now close the string if we have written one. Then end the line. */
26310 if (to_close)
26311 fputs (to_close, file);
26314 /* Generate a unique section name for FILENAME for a section type
26315 represented by SECTION_DESC. Output goes into BUF.
26317 SECTION_DESC can be any string, as long as it is different for each
26318 possible section type.
26320 We name the section in the same manner as xlc. The name begins with an
26321 underscore followed by the filename (after stripping any leading directory
26322 names) with the last period replaced by the string SECTION_DESC. If
26323 FILENAME does not contain a period, SECTION_DESC is appended to the end of
26324 the name. */
26326 void
26327 rs6000_gen_section_name (char **buf, const char *filename,
26328 const char *section_desc)
26330 const char *q, *after_last_slash, *last_period = 0;
26331 char *p;
26332 int len;
26334 after_last_slash = filename;
26335 for (q = filename; *q; q++)
26337 if (*q == '/')
26338 after_last_slash = q + 1;
26339 else if (*q == '.')
26340 last_period = q;
26343 len = strlen (after_last_slash) + strlen (section_desc) + 2;
26344 *buf = (char *) xmalloc (len);
26346 p = *buf;
26347 *p++ = '_';
26349 for (q = after_last_slash; *q; q++)
26351 if (q == last_period)
26353 strcpy (p, section_desc);
26354 p += strlen (section_desc);
26355 break;
26358 else if (ISALNUM (*q))
26359 *p++ = *q;
26362 if (last_period == 0)
26363 strcpy (p, section_desc);
26364 else
26365 *p = '\0';
26368 /* Emit profile function. */
26370 void
26371 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
26373 /* Non-standard profiling for kernels, which just saves LR then calls
26374 _mcount without worrying about arg saves. The idea is to change
26375 the function prologue as little as possible as it isn't easy to
26376 account for arg save/restore code added just for _mcount. */
26377 if (TARGET_PROFILE_KERNEL)
26378 return;
26380 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26382 #ifndef NO_PROFILE_COUNTERS
26383 # define NO_PROFILE_COUNTERS 0
26384 #endif
26385 if (NO_PROFILE_COUNTERS)
26386 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
26387 LCT_NORMAL, VOIDmode, 0);
26388 else
26390 char buf[30];
26391 const char *label_name;
26392 rtx fun;
26394 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
26395 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
26396 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
26398 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
26399 LCT_NORMAL, VOIDmode, 1, fun, Pmode);
26402 else if (DEFAULT_ABI == ABI_DARWIN)
26404 const char *mcount_name = RS6000_MCOUNT;
26405 int caller_addr_regno = LR_REGNO;
26407 /* Be conservative and always set this, at least for now. */
26408 crtl->uses_pic_offset_table = 1;
26410 #if TARGET_MACHO
26411 /* For PIC code, set up a stub and collect the caller's address
26412 from r0, which is where the prologue puts it. */
26413 if (MACHOPIC_INDIRECT
26414 && crtl->uses_pic_offset_table)
26415 caller_addr_regno = 0;
26416 #endif
26417 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
26418 LCT_NORMAL, VOIDmode, 1,
26419 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
26423 /* Write function profiler code. */
26425 void
26426 output_function_profiler (FILE *file, int labelno)
26428 char buf[100];
26430 switch (DEFAULT_ABI)
26432 default:
26433 gcc_unreachable ();
26435 case ABI_V4:
26436 if (!TARGET_32BIT)
26438 warning (0, "no profiling of 64-bit code for this ABI");
26439 return;
26441 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
26442 fprintf (file, "\tmflr %s\n", reg_names[0]);
26443 if (NO_PROFILE_COUNTERS)
26445 asm_fprintf (file, "\tstw %s,4(%s)\n",
26446 reg_names[0], reg_names[1]);
26448 else if (TARGET_SECURE_PLT && flag_pic)
26450 if (TARGET_LINK_STACK)
26452 char name[32];
26453 get_ppc476_thunk_name (name);
26454 asm_fprintf (file, "\tbl %s\n", name);
26456 else
26457 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
26458 asm_fprintf (file, "\tstw %s,4(%s)\n",
26459 reg_names[0], reg_names[1]);
26460 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
26461 asm_fprintf (file, "\taddis %s,%s,",
26462 reg_names[12], reg_names[12]);
26463 assemble_name (file, buf);
26464 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
26465 assemble_name (file, buf);
26466 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
26468 else if (flag_pic == 1)
26470 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
26471 asm_fprintf (file, "\tstw %s,4(%s)\n",
26472 reg_names[0], reg_names[1]);
26473 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
26474 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
26475 assemble_name (file, buf);
26476 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
26478 else if (flag_pic > 1)
26480 asm_fprintf (file, "\tstw %s,4(%s)\n",
26481 reg_names[0], reg_names[1]);
26482 /* Now, we need to get the address of the label. */
26483 if (TARGET_LINK_STACK)
26485 char name[32];
26486 get_ppc476_thunk_name (name);
26487 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
26488 assemble_name (file, buf);
26489 fputs ("-.\n1:", file);
26490 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
26491 asm_fprintf (file, "\taddi %s,%s,4\n",
26492 reg_names[11], reg_names[11]);
26494 else
26496 fputs ("\tbcl 20,31,1f\n\t.long ", file);
26497 assemble_name (file, buf);
26498 fputs ("-.\n1:", file);
26499 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
26501 asm_fprintf (file, "\tlwz %s,0(%s)\n",
26502 reg_names[0], reg_names[11]);
26503 asm_fprintf (file, "\tadd %s,%s,%s\n",
26504 reg_names[0], reg_names[0], reg_names[11]);
26506 else
26508 asm_fprintf (file, "\tlis %s,", reg_names[12]);
26509 assemble_name (file, buf);
26510 fputs ("@ha\n", file);
26511 asm_fprintf (file, "\tstw %s,4(%s)\n",
26512 reg_names[0], reg_names[1]);
26513 asm_fprintf (file, "\tla %s,", reg_names[0]);
26514 assemble_name (file, buf);
26515 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
26518 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
26519 fprintf (file, "\tbl %s%s\n",
26520 RS6000_MCOUNT, flag_pic ? "@plt" : "");
26521 break;
26523 case ABI_AIX:
26524 case ABI_ELFv2:
26525 case ABI_DARWIN:
26526 /* Don't do anything, done in output_profile_hook (). */
26527 break;
26533 /* The following variable value is the last issued insn. */
26535 static rtx last_scheduled_insn;
26537 /* The following variable helps to balance issuing of load and
26538 store instructions */
26540 static int load_store_pendulum;
26542 /* Power4 load update and store update instructions are cracked into a
26543 load or store and an integer insn which are executed in the same cycle.
26544 Branches have their own dispatch slot which does not count against the
26545 GCC issue rate, but it changes the program flow so there are no other
26546 instructions to issue in this cycle. */
26548 static int
26549 rs6000_variable_issue_1 (rtx_insn *insn, int more)
26551 last_scheduled_insn = insn;
26552 if (GET_CODE (PATTERN (insn)) == USE
26553 || GET_CODE (PATTERN (insn)) == CLOBBER)
26555 cached_can_issue_more = more;
26556 return cached_can_issue_more;
26559 if (insn_terminates_group_p (insn, current_group))
26561 cached_can_issue_more = 0;
26562 return cached_can_issue_more;
26565 /* If no reservation, but reach here */
26566 if (recog_memoized (insn) < 0)
26567 return more;
26569 if (rs6000_sched_groups)
26571 if (is_microcoded_insn (insn))
26572 cached_can_issue_more = 0;
26573 else if (is_cracked_insn (insn))
26574 cached_can_issue_more = more > 2 ? more - 2 : 0;
26575 else
26576 cached_can_issue_more = more - 1;
26578 return cached_can_issue_more;
26581 if (rs6000_cpu_attr == CPU_CELL && is_nonpipeline_insn (insn))
26582 return 0;
26584 cached_can_issue_more = more - 1;
26585 return cached_can_issue_more;
26588 static int
26589 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
26591 int r = rs6000_variable_issue_1 (insn, more);
26592 if (verbose)
26593 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
26594 return r;
26597 /* Adjust the cost of a scheduling dependency. Return the new cost of
26598 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
26600 static int
26601 rs6000_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
26603 enum attr_type attr_type;
26605 if (! recog_memoized (insn))
26606 return 0;
26608 switch (REG_NOTE_KIND (link))
26610 case REG_DEP_TRUE:
26612 /* Data dependency; DEP_INSN writes a register that INSN reads
26613 some cycles later. */
26615 /* Separate a load from a narrower, dependent store. */
26616 if (rs6000_sched_groups
26617 && GET_CODE (PATTERN (insn)) == SET
26618 && GET_CODE (PATTERN (dep_insn)) == SET
26619 && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
26620 && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
26621 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
26622 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
26623 return cost + 14;
26625 attr_type = get_attr_type (insn);
26627 switch (attr_type)
26629 case TYPE_JMPREG:
26630 /* Tell the first scheduling pass about the latency between
26631 a mtctr and bctr (and mtlr and br/blr). The first
26632 scheduling pass will not know about this latency since
26633 the mtctr instruction, which has the latency associated
26634 to it, will be generated by reload. */
26635 return 4;
26636 case TYPE_BRANCH:
26637 /* Leave some extra cycles between a compare and its
26638 dependent branch, to inhibit expensive mispredicts. */
26639 if ((rs6000_cpu_attr == CPU_PPC603
26640 || rs6000_cpu_attr == CPU_PPC604
26641 || rs6000_cpu_attr == CPU_PPC604E
26642 || rs6000_cpu_attr == CPU_PPC620
26643 || rs6000_cpu_attr == CPU_PPC630
26644 || rs6000_cpu_attr == CPU_PPC750
26645 || rs6000_cpu_attr == CPU_PPC7400
26646 || rs6000_cpu_attr == CPU_PPC7450
26647 || rs6000_cpu_attr == CPU_PPCE5500
26648 || rs6000_cpu_attr == CPU_PPCE6500
26649 || rs6000_cpu_attr == CPU_POWER4
26650 || rs6000_cpu_attr == CPU_POWER5
26651 || rs6000_cpu_attr == CPU_POWER7
26652 || rs6000_cpu_attr == CPU_POWER8
26653 || rs6000_cpu_attr == CPU_CELL)
26654 && recog_memoized (dep_insn)
26655 && (INSN_CODE (dep_insn) >= 0))
26657 switch (get_attr_type (dep_insn))
26659 case TYPE_CMP:
26660 case TYPE_COMPARE:
26661 case TYPE_FPCOMPARE:
26662 case TYPE_CR_LOGICAL:
26663 case TYPE_DELAYED_CR:
26664 return cost + 2;
26665 case TYPE_EXTS:
26666 case TYPE_MUL:
26667 if (get_attr_dot (dep_insn) == DOT_YES)
26668 return cost + 2;
26669 else
26670 break;
26671 case TYPE_SHIFT:
26672 if (get_attr_dot (dep_insn) == DOT_YES
26673 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
26674 return cost + 2;
26675 else
26676 break;
26677 default:
26678 break;
26680 break;
26682 case TYPE_STORE:
26683 case TYPE_FPSTORE:
26684 if ((rs6000_cpu == PROCESSOR_POWER6)
26685 && recog_memoized (dep_insn)
26686 && (INSN_CODE (dep_insn) >= 0))
26689 if (GET_CODE (PATTERN (insn)) != SET)
26690 /* If this happens, we have to extend this to schedule
26691 optimally. Return default for now. */
26692 return cost;
26694 /* Adjust the cost for the case where the value written
26695 by a fixed point operation is used as the address
26696 gen value on a store. */
26697 switch (get_attr_type (dep_insn))
26699 case TYPE_LOAD:
26700 case TYPE_CNTLZ:
26702 if (! store_data_bypass_p (dep_insn, insn))
26703 return get_attr_sign_extend (dep_insn)
26704 == SIGN_EXTEND_YES ? 6 : 4;
26705 break;
26707 case TYPE_SHIFT:
26709 if (! store_data_bypass_p (dep_insn, insn))
26710 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
26711 6 : 3;
26712 break;
26714 case TYPE_INTEGER:
26715 case TYPE_ADD:
26716 case TYPE_LOGICAL:
26717 case TYPE_COMPARE:
26718 case TYPE_EXTS:
26719 case TYPE_INSERT:
26721 if (! store_data_bypass_p (dep_insn, insn))
26722 return 3;
26723 break;
26725 case TYPE_STORE:
26726 case TYPE_FPLOAD:
26727 case TYPE_FPSTORE:
26729 if (get_attr_update (dep_insn) == UPDATE_YES
26730 && ! store_data_bypass_p (dep_insn, insn))
26731 return 3;
26732 break;
26734 case TYPE_MUL:
26736 if (! store_data_bypass_p (dep_insn, insn))
26737 return 17;
26738 break;
26740 case TYPE_DIV:
26742 if (! store_data_bypass_p (dep_insn, insn))
26743 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
26744 break;
26746 default:
26747 break;
26750 break;
26752 case TYPE_LOAD:
26753 if ((rs6000_cpu == PROCESSOR_POWER6)
26754 && recog_memoized (dep_insn)
26755 && (INSN_CODE (dep_insn) >= 0))
26758 /* Adjust the cost for the case where the value written
26759 by a fixed point instruction is used within the address
26760 gen portion of a subsequent load(u)(x) */
26761 switch (get_attr_type (dep_insn))
26763 case TYPE_LOAD:
26764 case TYPE_CNTLZ:
26766 if (set_to_load_agen (dep_insn, insn))
26767 return get_attr_sign_extend (dep_insn)
26768 == SIGN_EXTEND_YES ? 6 : 4;
26769 break;
26771 case TYPE_SHIFT:
26773 if (set_to_load_agen (dep_insn, insn))
26774 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
26775 6 : 3;
26776 break;
26778 case TYPE_INTEGER:
26779 case TYPE_ADD:
26780 case TYPE_LOGICAL:
26781 case TYPE_COMPARE:
26782 case TYPE_EXTS:
26783 case TYPE_INSERT:
26785 if (set_to_load_agen (dep_insn, insn))
26786 return 3;
26787 break;
26789 case TYPE_STORE:
26790 case TYPE_FPLOAD:
26791 case TYPE_FPSTORE:
26793 if (get_attr_update (dep_insn) == UPDATE_YES
26794 && set_to_load_agen (dep_insn, insn))
26795 return 3;
26796 break;
26798 case TYPE_MUL:
26800 if (set_to_load_agen (dep_insn, insn))
26801 return 17;
26802 break;
26804 case TYPE_DIV:
26806 if (set_to_load_agen (dep_insn, insn))
26807 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
26808 break;
26810 default:
26811 break;
26814 break;
26816 case TYPE_FPLOAD:
26817 if ((rs6000_cpu == PROCESSOR_POWER6)
26818 && get_attr_update (insn) == UPDATE_NO
26819 && recog_memoized (dep_insn)
26820 && (INSN_CODE (dep_insn) >= 0)
26821 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
26822 return 2;
26824 default:
26825 break;
26828 /* Fall out to return default cost. */
26830 break;
26832 case REG_DEP_OUTPUT:
26833 /* Output dependency; DEP_INSN writes a register that INSN writes some
26834 cycles later. */
26835 if ((rs6000_cpu == PROCESSOR_POWER6)
26836 && recog_memoized (dep_insn)
26837 && (INSN_CODE (dep_insn) >= 0))
26839 attr_type = get_attr_type (insn);
26841 switch (attr_type)
26843 case TYPE_FP:
26844 if (get_attr_type (dep_insn) == TYPE_FP)
26845 return 1;
26846 break;
26847 case TYPE_FPLOAD:
26848 if (get_attr_update (insn) == UPDATE_NO
26849 && get_attr_type (dep_insn) == TYPE_MFFGPR)
26850 return 2;
26851 break;
26852 default:
26853 break;
26856 case REG_DEP_ANTI:
26857 /* Anti dependency; DEP_INSN reads a register that INSN writes some
26858 cycles later. */
26859 return 0;
26861 default:
26862 gcc_unreachable ();
26865 return cost;
26868 /* Debug version of rs6000_adjust_cost. */
26870 static int
26871 rs6000_debug_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn,
26872 int cost)
26874 int ret = rs6000_adjust_cost (insn, link, dep_insn, cost);
26876 if (ret != cost)
26878 const char *dep;
26880 switch (REG_NOTE_KIND (link))
26882 default: dep = "unknown depencency"; break;
26883 case REG_DEP_TRUE: dep = "data dependency"; break;
26884 case REG_DEP_OUTPUT: dep = "output dependency"; break;
26885 case REG_DEP_ANTI: dep = "anti depencency"; break;
26888 fprintf (stderr,
26889 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
26890 "%s, insn:\n", ret, cost, dep);
26892 debug_rtx (insn);
26895 return ret;
26898 /* The function returns a true if INSN is microcoded.
26899 Return false otherwise. */
26901 static bool
26902 is_microcoded_insn (rtx_insn *insn)
26904 if (!insn || !NONDEBUG_INSN_P (insn)
26905 || GET_CODE (PATTERN (insn)) == USE
26906 || GET_CODE (PATTERN (insn)) == CLOBBER)
26907 return false;
26909 if (rs6000_cpu_attr == CPU_CELL)
26910 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
26912 if (rs6000_sched_groups
26913 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
26915 enum attr_type type = get_attr_type (insn);
26916 if ((type == TYPE_LOAD
26917 && get_attr_update (insn) == UPDATE_YES
26918 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
26919 || ((type == TYPE_LOAD || type == TYPE_STORE)
26920 && get_attr_update (insn) == UPDATE_YES
26921 && get_attr_indexed (insn) == INDEXED_YES)
26922 || type == TYPE_MFCR)
26923 return true;
26926 return false;
26929 /* The function returns true if INSN is cracked into 2 instructions
26930 by the processor (and therefore occupies 2 issue slots). */
26932 static bool
26933 is_cracked_insn (rtx_insn *insn)
26935 if (!insn || !NONDEBUG_INSN_P (insn)
26936 || GET_CODE (PATTERN (insn)) == USE
26937 || GET_CODE (PATTERN (insn)) == CLOBBER)
26938 return false;
26940 if (rs6000_sched_groups
26941 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
26943 enum attr_type type = get_attr_type (insn);
26944 if ((type == TYPE_LOAD
26945 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
26946 && get_attr_update (insn) == UPDATE_NO)
26947 || (type == TYPE_LOAD
26948 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
26949 && get_attr_update (insn) == UPDATE_YES
26950 && get_attr_indexed (insn) == INDEXED_NO)
26951 || (type == TYPE_STORE
26952 && get_attr_update (insn) == UPDATE_YES
26953 && get_attr_indexed (insn) == INDEXED_NO)
26954 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
26955 && get_attr_update (insn) == UPDATE_YES)
26956 || type == TYPE_DELAYED_CR
26957 || type == TYPE_COMPARE
26958 || (type == TYPE_EXTS
26959 && get_attr_dot (insn) == DOT_YES)
26960 || (type == TYPE_SHIFT
26961 && get_attr_dot (insn) == DOT_YES
26962 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
26963 || (type == TYPE_MUL
26964 && get_attr_dot (insn) == DOT_YES)
26965 || type == TYPE_DIV
26966 || (type == TYPE_INSERT
26967 && get_attr_size (insn) == SIZE_32))
26968 return true;
26971 return false;
26974 /* The function returns true if INSN can be issued only from
26975 the branch slot. */
26977 static bool
26978 is_branch_slot_insn (rtx_insn *insn)
26980 if (!insn || !NONDEBUG_INSN_P (insn)
26981 || GET_CODE (PATTERN (insn)) == USE
26982 || GET_CODE (PATTERN (insn)) == CLOBBER)
26983 return false;
26985 if (rs6000_sched_groups)
26987 enum attr_type type = get_attr_type (insn);
26988 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
26989 return true;
26990 return false;
26993 return false;
26996 /* The function returns true if out_inst sets a value that is
26997 used in the address generation computation of in_insn */
26998 static bool
26999 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
27001 rtx out_set, in_set;
27003 /* For performance reasons, only handle the simple case where
27004 both loads are a single_set. */
27005 out_set = single_set (out_insn);
27006 if (out_set)
27008 in_set = single_set (in_insn);
27009 if (in_set)
27010 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
27013 return false;
27016 /* Try to determine base/offset/size parts of the given MEM.
27017 Return true if successful, false if all the values couldn't
27018 be determined.
27020 This function only looks for REG or REG+CONST address forms.
27021 REG+REG address form will return false. */
27023 static bool
27024 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
27025 HOST_WIDE_INT *size)
27027 rtx addr_rtx;
27028 if MEM_SIZE_KNOWN_P (mem)
27029 *size = MEM_SIZE (mem);
27030 else
27031 return false;
27033 if (GET_CODE (XEXP (mem, 0)) == PRE_MODIFY)
27034 addr_rtx = XEXP (XEXP (mem, 0), 1);
27035 else
27036 addr_rtx = (XEXP (mem, 0));
27038 if (GET_CODE (addr_rtx) == REG)
27040 *base = addr_rtx;
27041 *offset = 0;
27043 else if (GET_CODE (addr_rtx) == PLUS
27044 && CONST_INT_P (XEXP (addr_rtx, 1)))
27046 *base = XEXP (addr_rtx, 0);
27047 *offset = INTVAL (XEXP (addr_rtx, 1));
27049 else
27050 return false;
27052 return true;
27055 /* The function returns true if the target storage location of
27056 mem1 is adjacent to the target storage location of mem2 */
27057 /* Return 1 if memory locations are adjacent. */
27059 static bool
27060 adjacent_mem_locations (rtx mem1, rtx mem2)
27062 rtx reg1, reg2;
27063 HOST_WIDE_INT off1, size1, off2, size2;
27065 if (get_memref_parts (mem1, &reg1, &off1, &size1)
27066 && get_memref_parts (mem2, &reg2, &off2, &size2))
27067 return ((REGNO (reg1) == REGNO (reg2))
27068 && ((off1 + size1 == off2)
27069 || (off2 + size2 == off1)));
27071 return false;
27074 /* This function returns true if it can be determined that the two MEM
27075 locations overlap by at least 1 byte based on base reg/offset/size. */
27077 static bool
27078 mem_locations_overlap (rtx mem1, rtx mem2)
27080 rtx reg1, reg2;
27081 HOST_WIDE_INT off1, size1, off2, size2;
27083 if (get_memref_parts (mem1, &reg1, &off1, &size1)
27084 && get_memref_parts (mem2, &reg2, &off2, &size2))
27085 return ((REGNO (reg1) == REGNO (reg2))
27086 && (((off1 <= off2) && (off1 + size1 > off2))
27087 || ((off2 <= off1) && (off2 + size2 > off1))));
27089 return false;
27092 /* A C statement (sans semicolon) to update the integer scheduling
27093 priority INSN_PRIORITY (INSN). Increase the priority to execute the
27094 INSN earlier, reduce the priority to execute INSN later. Do not
27095 define this macro if you do not need to adjust the scheduling
27096 priorities of insns. */
27098 static int
27099 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
27101 rtx load_mem, str_mem;
27102 /* On machines (like the 750) which have asymmetric integer units,
27103 where one integer unit can do multiply and divides and the other
27104 can't, reduce the priority of multiply/divide so it is scheduled
27105 before other integer operations. */
27107 #if 0
27108 if (! INSN_P (insn))
27109 return priority;
27111 if (GET_CODE (PATTERN (insn)) == USE)
27112 return priority;
27114 switch (rs6000_cpu_attr) {
27115 case CPU_PPC750:
27116 switch (get_attr_type (insn))
27118 default:
27119 break;
27121 case TYPE_MUL:
27122 case TYPE_DIV:
27123 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
27124 priority, priority);
27125 if (priority >= 0 && priority < 0x01000000)
27126 priority >>= 3;
27127 break;
27130 #endif
27132 if (insn_must_be_first_in_group (insn)
27133 && reload_completed
27134 && current_sched_info->sched_max_insns_priority
27135 && rs6000_sched_restricted_insns_priority)
27138 /* Prioritize insns that can be dispatched only in the first
27139 dispatch slot. */
27140 if (rs6000_sched_restricted_insns_priority == 1)
27141 /* Attach highest priority to insn. This means that in
27142 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
27143 precede 'priority' (critical path) considerations. */
27144 return current_sched_info->sched_max_insns_priority;
27145 else if (rs6000_sched_restricted_insns_priority == 2)
27146 /* Increase priority of insn by a minimal amount. This means that in
27147 haifa-sched.c:ready_sort(), only 'priority' (critical path)
27148 considerations precede dispatch-slot restriction considerations. */
27149 return (priority + 1);
27152 if (rs6000_cpu == PROCESSOR_POWER6
27153 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
27154 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
27155 /* Attach highest priority to insn if the scheduler has just issued two
27156 stores and this instruction is a load, or two loads and this instruction
27157 is a store. Power6 wants loads and stores scheduled alternately
27158 when possible */
27159 return current_sched_info->sched_max_insns_priority;
27161 return priority;
27164 /* Return true if the instruction is nonpipelined on the Cell. */
27165 static bool
27166 is_nonpipeline_insn (rtx_insn *insn)
27168 enum attr_type type;
27169 if (!insn || !NONDEBUG_INSN_P (insn)
27170 || GET_CODE (PATTERN (insn)) == USE
27171 || GET_CODE (PATTERN (insn)) == CLOBBER)
27172 return false;
27174 type = get_attr_type (insn);
27175 if (type == TYPE_MUL
27176 || type == TYPE_DIV
27177 || type == TYPE_SDIV
27178 || type == TYPE_DDIV
27179 || type == TYPE_SSQRT
27180 || type == TYPE_DSQRT
27181 || type == TYPE_MFCR
27182 || type == TYPE_MFCRF
27183 || type == TYPE_MFJMPR)
27185 return true;
27187 return false;
27191 /* Return how many instructions the machine can issue per cycle. */
27193 static int
27194 rs6000_issue_rate (void)
27196 /* Unless scheduling for register pressure, use issue rate of 1 for
27197 first scheduling pass to decrease degradation. */
27198 if (!reload_completed && !flag_sched_pressure)
27199 return 1;
27201 switch (rs6000_cpu_attr) {
27202 case CPU_RS64A:
27203 case CPU_PPC601: /* ? */
27204 case CPU_PPC7450:
27205 return 3;
27206 case CPU_PPC440:
27207 case CPU_PPC603:
27208 case CPU_PPC750:
27209 case CPU_PPC7400:
27210 case CPU_PPC8540:
27211 case CPU_PPC8548:
27212 case CPU_CELL:
27213 case CPU_PPCE300C2:
27214 case CPU_PPCE300C3:
27215 case CPU_PPCE500MC:
27216 case CPU_PPCE500MC64:
27217 case CPU_PPCE5500:
27218 case CPU_PPCE6500:
27219 case CPU_TITAN:
27220 return 2;
27221 case CPU_PPC476:
27222 case CPU_PPC604:
27223 case CPU_PPC604E:
27224 case CPU_PPC620:
27225 case CPU_PPC630:
27226 return 4;
27227 case CPU_POWER4:
27228 case CPU_POWER5:
27229 case CPU_POWER6:
27230 case CPU_POWER7:
27231 return 5;
27232 case CPU_POWER8:
27233 return 7;
27234 default:
27235 return 1;
27239 /* Return how many instructions to look ahead for better insn
27240 scheduling. */
27242 static int
27243 rs6000_use_sched_lookahead (void)
27245 switch (rs6000_cpu_attr)
27247 case CPU_PPC8540:
27248 case CPU_PPC8548:
27249 return 4;
27251 case CPU_CELL:
27252 return (reload_completed ? 8 : 0);
27254 default:
27255 return 0;
27259 /* We are choosing insn from the ready queue. Return zero if INSN can be
27260 chosen. */
27261 static int
27262 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
27264 if (ready_index == 0)
27265 return 0;
27267 if (rs6000_cpu_attr != CPU_CELL)
27268 return 0;
27270 gcc_assert (insn != NULL_RTX && INSN_P (insn));
27272 if (!reload_completed
27273 || is_nonpipeline_insn (insn)
27274 || is_microcoded_insn (insn))
27275 return 1;
27277 return 0;
27280 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
27281 and return true. */
27283 static bool
27284 find_mem_ref (rtx pat, rtx *mem_ref)
27286 const char * fmt;
27287 int i, j;
27289 /* stack_tie does not produce any real memory traffic. */
27290 if (tie_operand (pat, VOIDmode))
27291 return false;
27293 if (GET_CODE (pat) == MEM)
27295 *mem_ref = pat;
27296 return true;
27299 /* Recursively process the pattern. */
27300 fmt = GET_RTX_FORMAT (GET_CODE (pat));
27302 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
27304 if (fmt[i] == 'e')
27306 if (find_mem_ref (XEXP (pat, i), mem_ref))
27307 return true;
27309 else if (fmt[i] == 'E')
27310 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
27312 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
27313 return true;
27317 return false;
27320 /* Determine if PAT is a PATTERN of a load insn. */
27322 static bool
27323 is_load_insn1 (rtx pat, rtx *load_mem)
27325 if (!pat || pat == NULL_RTX)
27326 return false;
27328 if (GET_CODE (pat) == SET)
27329 return find_mem_ref (SET_SRC (pat), load_mem);
27331 if (GET_CODE (pat) == PARALLEL)
27333 int i;
27335 for (i = 0; i < XVECLEN (pat, 0); i++)
27336 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
27337 return true;
27340 return false;
27343 /* Determine if INSN loads from memory. */
27345 static bool
27346 is_load_insn (rtx insn, rtx *load_mem)
27348 if (!insn || !INSN_P (insn))
27349 return false;
27351 if (CALL_P (insn))
27352 return false;
27354 return is_load_insn1 (PATTERN (insn), load_mem);
27357 /* Determine if PAT is a PATTERN of a store insn. */
27359 static bool
27360 is_store_insn1 (rtx pat, rtx *str_mem)
27362 if (!pat || pat == NULL_RTX)
27363 return false;
27365 if (GET_CODE (pat) == SET)
27366 return find_mem_ref (SET_DEST (pat), str_mem);
27368 if (GET_CODE (pat) == PARALLEL)
27370 int i;
27372 for (i = 0; i < XVECLEN (pat, 0); i++)
27373 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
27374 return true;
27377 return false;
27380 /* Determine if INSN stores to memory. */
27382 static bool
27383 is_store_insn (rtx insn, rtx *str_mem)
27385 if (!insn || !INSN_P (insn))
27386 return false;
27388 return is_store_insn1 (PATTERN (insn), str_mem);
27391 /* Returns whether the dependence between INSN and NEXT is considered
27392 costly by the given target. */
27394 static bool
27395 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
27397 rtx insn;
27398 rtx next;
27399 rtx load_mem, str_mem;
27401 /* If the flag is not enabled - no dependence is considered costly;
27402 allow all dependent insns in the same group.
27403 This is the most aggressive option. */
27404 if (rs6000_sched_costly_dep == no_dep_costly)
27405 return false;
27407 /* If the flag is set to 1 - a dependence is always considered costly;
27408 do not allow dependent instructions in the same group.
27409 This is the most conservative option. */
27410 if (rs6000_sched_costly_dep == all_deps_costly)
27411 return true;
27413 insn = DEP_PRO (dep);
27414 next = DEP_CON (dep);
27416 if (rs6000_sched_costly_dep == store_to_load_dep_costly
27417 && is_load_insn (next, &load_mem)
27418 && is_store_insn (insn, &str_mem))
27419 /* Prevent load after store in the same group. */
27420 return true;
27422 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
27423 && is_load_insn (next, &load_mem)
27424 && is_store_insn (insn, &str_mem)
27425 && DEP_TYPE (dep) == REG_DEP_TRUE
27426 && mem_locations_overlap(str_mem, load_mem))
27427 /* Prevent load after store in the same group if it is a true
27428 dependence. */
27429 return true;
27431 /* The flag is set to X; dependences with latency >= X are considered costly,
27432 and will not be scheduled in the same group. */
27433 if (rs6000_sched_costly_dep <= max_dep_latency
27434 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
27435 return true;
27437 return false;
27440 /* Return the next insn after INSN that is found before TAIL is reached,
27441 skipping any "non-active" insns - insns that will not actually occupy
27442 an issue slot. Return NULL_RTX if such an insn is not found. */
27444 static rtx_insn *
27445 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
27447 if (insn == NULL_RTX || insn == tail)
27448 return NULL;
27450 while (1)
27452 insn = NEXT_INSN (insn);
27453 if (insn == NULL_RTX || insn == tail)
27454 return NULL;
27456 if (CALL_P (insn)
27457 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
27458 || (NONJUMP_INSN_P (insn)
27459 && GET_CODE (PATTERN (insn)) != USE
27460 && GET_CODE (PATTERN (insn)) != CLOBBER
27461 && INSN_CODE (insn) != CODE_FOR_stack_tie))
27462 break;
27464 return insn;
27467 /* We are about to begin issuing insns for this clock cycle. */
27469 static int
27470 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
27471 rtx_insn **ready ATTRIBUTE_UNUSED,
27472 int *pn_ready ATTRIBUTE_UNUSED,
27473 int clock_var ATTRIBUTE_UNUSED)
27475 int n_ready = *pn_ready;
27477 if (sched_verbose)
27478 fprintf (dump, "// rs6000_sched_reorder :\n");
27480 /* Reorder the ready list, if the second to last ready insn
27481 is a nonepipeline insn. */
27482 if (rs6000_cpu_attr == CPU_CELL && n_ready > 1)
27484 if (is_nonpipeline_insn (ready[n_ready - 1])
27485 && (recog_memoized (ready[n_ready - 2]) > 0))
27486 /* Simply swap first two insns. */
27487 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
27490 if (rs6000_cpu == PROCESSOR_POWER6)
27491 load_store_pendulum = 0;
27493 return rs6000_issue_rate ();
27496 /* Like rs6000_sched_reorder, but called after issuing each insn. */
27498 static int
27499 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
27500 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
27502 if (sched_verbose)
27503 fprintf (dump, "// rs6000_sched_reorder2 :\n");
27505 /* For Power6, we need to handle some special cases to try and keep the
27506 store queue from overflowing and triggering expensive flushes.
27508 This code monitors how load and store instructions are being issued
27509 and skews the ready list one way or the other to increase the likelihood
27510 that a desired instruction is issued at the proper time.
27512 A couple of things are done. First, we maintain a "load_store_pendulum"
27513 to track the current state of load/store issue.
27515 - If the pendulum is at zero, then no loads or stores have been
27516 issued in the current cycle so we do nothing.
27518 - If the pendulum is 1, then a single load has been issued in this
27519 cycle and we attempt to locate another load in the ready list to
27520 issue with it.
27522 - If the pendulum is -2, then two stores have already been
27523 issued in this cycle, so we increase the priority of the first load
27524 in the ready list to increase it's likelihood of being chosen first
27525 in the next cycle.
27527 - If the pendulum is -1, then a single store has been issued in this
27528 cycle and we attempt to locate another store in the ready list to
27529 issue with it, preferring a store to an adjacent memory location to
27530 facilitate store pairing in the store queue.
27532 - If the pendulum is 2, then two loads have already been
27533 issued in this cycle, so we increase the priority of the first store
27534 in the ready list to increase it's likelihood of being chosen first
27535 in the next cycle.
27537 - If the pendulum < -2 or > 2, then do nothing.
27539 Note: This code covers the most common scenarios. There exist non
27540 load/store instructions which make use of the LSU and which
27541 would need to be accounted for to strictly model the behavior
27542 of the machine. Those instructions are currently unaccounted
27543 for to help minimize compile time overhead of this code.
27545 if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn)
27547 int pos;
27548 int i;
27549 rtx_insn *tmp;
27550 rtx load_mem, str_mem;
27552 if (is_store_insn (last_scheduled_insn, &str_mem))
27553 /* Issuing a store, swing the load_store_pendulum to the left */
27554 load_store_pendulum--;
27555 else if (is_load_insn (last_scheduled_insn, &load_mem))
27556 /* Issuing a load, swing the load_store_pendulum to the right */
27557 load_store_pendulum++;
27558 else
27559 return cached_can_issue_more;
27561 /* If the pendulum is balanced, or there is only one instruction on
27562 the ready list, then all is well, so return. */
27563 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
27564 return cached_can_issue_more;
27566 if (load_store_pendulum == 1)
27568 /* A load has been issued in this cycle. Scan the ready list
27569 for another load to issue with it */
27570 pos = *pn_ready-1;
27572 while (pos >= 0)
27574 if (is_load_insn (ready[pos], &load_mem))
27576 /* Found a load. Move it to the head of the ready list,
27577 and adjust it's priority so that it is more likely to
27578 stay there */
27579 tmp = ready[pos];
27580 for (i=pos; i<*pn_ready-1; i++)
27581 ready[i] = ready[i + 1];
27582 ready[*pn_ready-1] = tmp;
27584 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
27585 INSN_PRIORITY (tmp)++;
27586 break;
27588 pos--;
27591 else if (load_store_pendulum == -2)
27593 /* Two stores have been issued in this cycle. Increase the
27594 priority of the first load in the ready list to favor it for
27595 issuing in the next cycle. */
27596 pos = *pn_ready-1;
27598 while (pos >= 0)
27600 if (is_load_insn (ready[pos], &load_mem)
27601 && !sel_sched_p ()
27602 && INSN_PRIORITY_KNOWN (ready[pos]))
27604 INSN_PRIORITY (ready[pos])++;
27606 /* Adjust the pendulum to account for the fact that a load
27607 was found and increased in priority. This is to prevent
27608 increasing the priority of multiple loads */
27609 load_store_pendulum--;
27611 break;
27613 pos--;
27616 else if (load_store_pendulum == -1)
27618 /* A store has been issued in this cycle. Scan the ready list for
27619 another store to issue with it, preferring a store to an adjacent
27620 memory location */
27621 int first_store_pos = -1;
27623 pos = *pn_ready-1;
27625 while (pos >= 0)
27627 if (is_store_insn (ready[pos], &str_mem))
27629 rtx str_mem2;
27630 /* Maintain the index of the first store found on the
27631 list */
27632 if (first_store_pos == -1)
27633 first_store_pos = pos;
27635 if (is_store_insn (last_scheduled_insn, &str_mem2)
27636 && adjacent_mem_locations (str_mem, str_mem2))
27638 /* Found an adjacent store. Move it to the head of the
27639 ready list, and adjust it's priority so that it is
27640 more likely to stay there */
27641 tmp = ready[pos];
27642 for (i=pos; i<*pn_ready-1; i++)
27643 ready[i] = ready[i + 1];
27644 ready[*pn_ready-1] = tmp;
27646 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
27647 INSN_PRIORITY (tmp)++;
27649 first_store_pos = -1;
27651 break;
27654 pos--;
27657 if (first_store_pos >= 0)
27659 /* An adjacent store wasn't found, but a non-adjacent store was,
27660 so move the non-adjacent store to the front of the ready
27661 list, and adjust its priority so that it is more likely to
27662 stay there. */
27663 tmp = ready[first_store_pos];
27664 for (i=first_store_pos; i<*pn_ready-1; i++)
27665 ready[i] = ready[i + 1];
27666 ready[*pn_ready-1] = tmp;
27667 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
27668 INSN_PRIORITY (tmp)++;
27671 else if (load_store_pendulum == 2)
27673 /* Two loads have been issued in this cycle. Increase the priority
27674 of the first store in the ready list to favor it for issuing in
27675 the next cycle. */
27676 pos = *pn_ready-1;
27678 while (pos >= 0)
27680 if (is_store_insn (ready[pos], &str_mem)
27681 && !sel_sched_p ()
27682 && INSN_PRIORITY_KNOWN (ready[pos]))
27684 INSN_PRIORITY (ready[pos])++;
27686 /* Adjust the pendulum to account for the fact that a store
27687 was found and increased in priority. This is to prevent
27688 increasing the priority of multiple stores */
27689 load_store_pendulum++;
27691 break;
27693 pos--;
27698 return cached_can_issue_more;
27701 /* Return whether the presence of INSN causes a dispatch group termination
27702 of group WHICH_GROUP.
27704 If WHICH_GROUP == current_group, this function will return true if INSN
27705 causes the termination of the current group (i.e, the dispatch group to
27706 which INSN belongs). This means that INSN will be the last insn in the
27707 group it belongs to.
27709 If WHICH_GROUP == previous_group, this function will return true if INSN
27710 causes the termination of the previous group (i.e, the dispatch group that
27711 precedes the group to which INSN belongs). This means that INSN will be
27712 the first insn in the group it belongs to). */
27714 static bool
27715 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
27717 bool first, last;
27719 if (! insn)
27720 return false;
27722 first = insn_must_be_first_in_group (insn);
27723 last = insn_must_be_last_in_group (insn);
27725 if (first && last)
27726 return true;
27728 if (which_group == current_group)
27729 return last;
27730 else if (which_group == previous_group)
27731 return first;
27733 return false;
27737 static bool
27738 insn_must_be_first_in_group (rtx_insn *insn)
27740 enum attr_type type;
27742 if (!insn
27743 || NOTE_P (insn)
27744 || DEBUG_INSN_P (insn)
27745 || GET_CODE (PATTERN (insn)) == USE
27746 || GET_CODE (PATTERN (insn)) == CLOBBER)
27747 return false;
27749 switch (rs6000_cpu)
27751 case PROCESSOR_POWER5:
27752 if (is_cracked_insn (insn))
27753 return true;
27754 case PROCESSOR_POWER4:
27755 if (is_microcoded_insn (insn))
27756 return true;
27758 if (!rs6000_sched_groups)
27759 return false;
27761 type = get_attr_type (insn);
27763 switch (type)
27765 case TYPE_MFCR:
27766 case TYPE_MFCRF:
27767 case TYPE_MTCR:
27768 case TYPE_DELAYED_CR:
27769 case TYPE_CR_LOGICAL:
27770 case TYPE_MTJMPR:
27771 case TYPE_MFJMPR:
27772 case TYPE_DIV:
27773 case TYPE_LOAD_L:
27774 case TYPE_STORE_C:
27775 case TYPE_ISYNC:
27776 case TYPE_SYNC:
27777 return true;
27778 default:
27779 break;
27781 break;
27782 case PROCESSOR_POWER6:
27783 type = get_attr_type (insn);
27785 switch (type)
27787 case TYPE_EXTS:
27788 case TYPE_CNTLZ:
27789 case TYPE_TRAP:
27790 case TYPE_MUL:
27791 case TYPE_INSERT:
27792 case TYPE_FPCOMPARE:
27793 case TYPE_MFCR:
27794 case TYPE_MTCR:
27795 case TYPE_MFJMPR:
27796 case TYPE_MTJMPR:
27797 case TYPE_ISYNC:
27798 case TYPE_SYNC:
27799 case TYPE_LOAD_L:
27800 case TYPE_STORE_C:
27801 return true;
27802 case TYPE_SHIFT:
27803 if (get_attr_dot (insn) == DOT_NO
27804 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
27805 return true;
27806 else
27807 break;
27808 case TYPE_DIV:
27809 if (get_attr_size (insn) == SIZE_32)
27810 return true;
27811 else
27812 break;
27813 case TYPE_LOAD:
27814 case TYPE_STORE:
27815 case TYPE_FPLOAD:
27816 case TYPE_FPSTORE:
27817 if (get_attr_update (insn) == UPDATE_YES)
27818 return true;
27819 else
27820 break;
27821 default:
27822 break;
27824 break;
27825 case PROCESSOR_POWER7:
27826 type = get_attr_type (insn);
27828 switch (type)
27830 case TYPE_CR_LOGICAL:
27831 case TYPE_MFCR:
27832 case TYPE_MFCRF:
27833 case TYPE_MTCR:
27834 case TYPE_DIV:
27835 case TYPE_COMPARE:
27836 case TYPE_ISYNC:
27837 case TYPE_LOAD_L:
27838 case TYPE_STORE_C:
27839 case TYPE_MFJMPR:
27840 case TYPE_MTJMPR:
27841 return true;
27842 case TYPE_MUL:
27843 case TYPE_SHIFT:
27844 case TYPE_EXTS:
27845 if (get_attr_dot (insn) == DOT_YES)
27846 return true;
27847 else
27848 break;
27849 case TYPE_LOAD:
27850 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
27851 || get_attr_update (insn) == UPDATE_YES)
27852 return true;
27853 else
27854 break;
27855 case TYPE_STORE:
27856 case TYPE_FPLOAD:
27857 case TYPE_FPSTORE:
27858 if (get_attr_update (insn) == UPDATE_YES)
27859 return true;
27860 else
27861 break;
27862 default:
27863 break;
27865 break;
27866 case PROCESSOR_POWER8:
27867 type = get_attr_type (insn);
27869 switch (type)
27871 case TYPE_CR_LOGICAL:
27872 case TYPE_DELAYED_CR:
27873 case TYPE_MFCR:
27874 case TYPE_MFCRF:
27875 case TYPE_MTCR:
27876 case TYPE_COMPARE:
27877 case TYPE_SYNC:
27878 case TYPE_ISYNC:
27879 case TYPE_LOAD_L:
27880 case TYPE_STORE_C:
27881 case TYPE_VECSTORE:
27882 case TYPE_MFJMPR:
27883 case TYPE_MTJMPR:
27884 return true;
27885 case TYPE_SHIFT:
27886 case TYPE_EXTS:
27887 case TYPE_MUL:
27888 if (get_attr_dot (insn) == DOT_YES)
27889 return true;
27890 else
27891 break;
27892 case TYPE_LOAD:
27893 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
27894 || get_attr_update (insn) == UPDATE_YES)
27895 return true;
27896 else
27897 break;
27898 case TYPE_STORE:
27899 if (get_attr_update (insn) == UPDATE_YES
27900 && get_attr_indexed (insn) == INDEXED_YES)
27901 return true;
27902 else
27903 break;
27904 default:
27905 break;
27907 break;
27908 default:
27909 break;
27912 return false;
27915 static bool
27916 insn_must_be_last_in_group (rtx_insn *insn)
27918 enum attr_type type;
27920 if (!insn
27921 || NOTE_P (insn)
27922 || DEBUG_INSN_P (insn)
27923 || GET_CODE (PATTERN (insn)) == USE
27924 || GET_CODE (PATTERN (insn)) == CLOBBER)
27925 return false;
27927 switch (rs6000_cpu) {
27928 case PROCESSOR_POWER4:
27929 case PROCESSOR_POWER5:
27930 if (is_microcoded_insn (insn))
27931 return true;
27933 if (is_branch_slot_insn (insn))
27934 return true;
27936 break;
27937 case PROCESSOR_POWER6:
27938 type = get_attr_type (insn);
27940 switch (type)
27942 case TYPE_EXTS:
27943 case TYPE_CNTLZ:
27944 case TYPE_TRAP:
27945 case TYPE_MUL:
27946 case TYPE_FPCOMPARE:
27947 case TYPE_MFCR:
27948 case TYPE_MTCR:
27949 case TYPE_MFJMPR:
27950 case TYPE_MTJMPR:
27951 case TYPE_ISYNC:
27952 case TYPE_SYNC:
27953 case TYPE_LOAD_L:
27954 case TYPE_STORE_C:
27955 return true;
27956 case TYPE_SHIFT:
27957 if (get_attr_dot (insn) == DOT_NO
27958 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
27959 return true;
27960 else
27961 break;
27962 case TYPE_DIV:
27963 if (get_attr_size (insn) == SIZE_32)
27964 return true;
27965 else
27966 break;
27967 default:
27968 break;
27970 break;
27971 case PROCESSOR_POWER7:
27972 type = get_attr_type (insn);
27974 switch (type)
27976 case TYPE_ISYNC:
27977 case TYPE_SYNC:
27978 case TYPE_LOAD_L:
27979 case TYPE_STORE_C:
27980 return true;
27981 case TYPE_LOAD:
27982 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
27983 && get_attr_update (insn) == UPDATE_YES)
27984 return true;
27985 else
27986 break;
27987 case TYPE_STORE:
27988 if (get_attr_update (insn) == UPDATE_YES
27989 && get_attr_indexed (insn) == INDEXED_YES)
27990 return true;
27991 else
27992 break;
27993 default:
27994 break;
27996 break;
27997 case PROCESSOR_POWER8:
27998 type = get_attr_type (insn);
28000 switch (type)
28002 case TYPE_MFCR:
28003 case TYPE_MTCR:
28004 case TYPE_ISYNC:
28005 case TYPE_SYNC:
28006 case TYPE_LOAD_L:
28007 case TYPE_STORE_C:
28008 return true;
28009 case TYPE_LOAD:
28010 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
28011 && get_attr_update (insn) == UPDATE_YES)
28012 return true;
28013 else
28014 break;
28015 case TYPE_STORE:
28016 if (get_attr_update (insn) == UPDATE_YES
28017 && get_attr_indexed (insn) == INDEXED_YES)
28018 return true;
28019 else
28020 break;
28021 default:
28022 break;
28024 break;
28025 default:
28026 break;
28029 return false;
28032 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
28033 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
28035 static bool
28036 is_costly_group (rtx *group_insns, rtx next_insn)
28038 int i;
28039 int issue_rate = rs6000_issue_rate ();
28041 for (i = 0; i < issue_rate; i++)
28043 sd_iterator_def sd_it;
28044 dep_t dep;
28045 rtx insn = group_insns[i];
28047 if (!insn)
28048 continue;
28050 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
28052 rtx next = DEP_CON (dep);
28054 if (next == next_insn
28055 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
28056 return true;
28060 return false;
28063 /* Utility of the function redefine_groups.
28064 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
28065 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
28066 to keep it "far" (in a separate group) from GROUP_INSNS, following
28067 one of the following schemes, depending on the value of the flag
28068 -minsert_sched_nops = X:
28069 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
28070 in order to force NEXT_INSN into a separate group.
28071 (2) X < sched_finish_regroup_exact: insert exactly X nops.
28072 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
28073 insertion (has a group just ended, how many vacant issue slots remain in the
28074 last group, and how many dispatch groups were encountered so far). */
28076 static int
28077 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
28078 rtx_insn *next_insn, bool *group_end, int can_issue_more,
28079 int *group_count)
28081 rtx nop;
28082 bool force;
28083 int issue_rate = rs6000_issue_rate ();
28084 bool end = *group_end;
28085 int i;
28087 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
28088 return can_issue_more;
28090 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
28091 return can_issue_more;
28093 force = is_costly_group (group_insns, next_insn);
28094 if (!force)
28095 return can_issue_more;
28097 if (sched_verbose > 6)
28098 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
28099 *group_count ,can_issue_more);
28101 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
28103 if (*group_end)
28104 can_issue_more = 0;
28106 /* Since only a branch can be issued in the last issue_slot, it is
28107 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
28108 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
28109 in this case the last nop will start a new group and the branch
28110 will be forced to the new group. */
28111 if (can_issue_more && !is_branch_slot_insn (next_insn))
28112 can_issue_more--;
28114 /* Do we have a special group ending nop? */
28115 if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
28116 || rs6000_cpu_attr == CPU_POWER8)
28118 nop = gen_group_ending_nop ();
28119 emit_insn_before (nop, next_insn);
28120 can_issue_more = 0;
28122 else
28123 while (can_issue_more > 0)
28125 nop = gen_nop ();
28126 emit_insn_before (nop, next_insn);
28127 can_issue_more--;
28130 *group_end = true;
28131 return 0;
28134 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
28136 int n_nops = rs6000_sched_insert_nops;
28138 /* Nops can't be issued from the branch slot, so the effective
28139 issue_rate for nops is 'issue_rate - 1'. */
28140 if (can_issue_more == 0)
28141 can_issue_more = issue_rate;
28142 can_issue_more--;
28143 if (can_issue_more == 0)
28145 can_issue_more = issue_rate - 1;
28146 (*group_count)++;
28147 end = true;
28148 for (i = 0; i < issue_rate; i++)
28150 group_insns[i] = 0;
28154 while (n_nops > 0)
28156 nop = gen_nop ();
28157 emit_insn_before (nop, next_insn);
28158 if (can_issue_more == issue_rate - 1) /* new group begins */
28159 end = false;
28160 can_issue_more--;
28161 if (can_issue_more == 0)
28163 can_issue_more = issue_rate - 1;
28164 (*group_count)++;
28165 end = true;
28166 for (i = 0; i < issue_rate; i++)
28168 group_insns[i] = 0;
28171 n_nops--;
28174 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
28175 can_issue_more++;
28177 /* Is next_insn going to start a new group? */
28178 *group_end
28179 = (end
28180 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
28181 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
28182 || (can_issue_more < issue_rate &&
28183 insn_terminates_group_p (next_insn, previous_group)));
28184 if (*group_end && end)
28185 (*group_count)--;
28187 if (sched_verbose > 6)
28188 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
28189 *group_count, can_issue_more);
28190 return can_issue_more;
28193 return can_issue_more;
28196 /* This function tries to synch the dispatch groups that the compiler "sees"
28197 with the dispatch groups that the processor dispatcher is expected to
28198 form in practice. It tries to achieve this synchronization by forcing the
28199 estimated processor grouping on the compiler (as opposed to the function
28200 'pad_goups' which tries to force the scheduler's grouping on the processor).
28202 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
28203 examines the (estimated) dispatch groups that will be formed by the processor
28204 dispatcher. It marks these group boundaries to reflect the estimated
28205 processor grouping, overriding the grouping that the scheduler had marked.
28206 Depending on the value of the flag '-minsert-sched-nops' this function can
28207 force certain insns into separate groups or force a certain distance between
28208 them by inserting nops, for example, if there exists a "costly dependence"
28209 between the insns.
28211 The function estimates the group boundaries that the processor will form as
28212 follows: It keeps track of how many vacant issue slots are available after
28213 each insn. A subsequent insn will start a new group if one of the following
28214 4 cases applies:
28215 - no more vacant issue slots remain in the current dispatch group.
28216 - only the last issue slot, which is the branch slot, is vacant, but the next
28217 insn is not a branch.
28218 - only the last 2 or less issue slots, including the branch slot, are vacant,
28219 which means that a cracked insn (which occupies two issue slots) can't be
28220 issued in this group.
28221 - less than 'issue_rate' slots are vacant, and the next insn always needs to
28222 start a new group. */
28224 static int
28225 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
28226 rtx_insn *tail)
28228 rtx_insn *insn, *next_insn;
28229 int issue_rate;
28230 int can_issue_more;
28231 int slot, i;
28232 bool group_end;
28233 int group_count = 0;
28234 rtx *group_insns;
28236 /* Initialize. */
28237 issue_rate = rs6000_issue_rate ();
28238 group_insns = XALLOCAVEC (rtx, issue_rate);
28239 for (i = 0; i < issue_rate; i++)
28241 group_insns[i] = 0;
28243 can_issue_more = issue_rate;
28244 slot = 0;
28245 insn = get_next_active_insn (prev_head_insn, tail);
28246 group_end = false;
28248 while (insn != NULL_RTX)
28250 slot = (issue_rate - can_issue_more);
28251 group_insns[slot] = insn;
28252 can_issue_more =
28253 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
28254 if (insn_terminates_group_p (insn, current_group))
28255 can_issue_more = 0;
28257 next_insn = get_next_active_insn (insn, tail);
28258 if (next_insn == NULL_RTX)
28259 return group_count + 1;
28261 /* Is next_insn going to start a new group? */
28262 group_end
28263 = (can_issue_more == 0
28264 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
28265 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
28266 || (can_issue_more < issue_rate &&
28267 insn_terminates_group_p (next_insn, previous_group)));
28269 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
28270 next_insn, &group_end, can_issue_more,
28271 &group_count);
28273 if (group_end)
28275 group_count++;
28276 can_issue_more = 0;
28277 for (i = 0; i < issue_rate; i++)
28279 group_insns[i] = 0;
28283 if (GET_MODE (next_insn) == TImode && can_issue_more)
28284 PUT_MODE (next_insn, VOIDmode);
28285 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
28286 PUT_MODE (next_insn, TImode);
28288 insn = next_insn;
28289 if (can_issue_more == 0)
28290 can_issue_more = issue_rate;
28291 } /* while */
28293 return group_count;
28296 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
28297 dispatch group boundaries that the scheduler had marked. Pad with nops
28298 any dispatch groups which have vacant issue slots, in order to force the
28299 scheduler's grouping on the processor dispatcher. The function
28300 returns the number of dispatch groups found. */
28302 static int
28303 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
28304 rtx_insn *tail)
28306 rtx_insn *insn, *next_insn;
28307 rtx nop;
28308 int issue_rate;
28309 int can_issue_more;
28310 int group_end;
28311 int group_count = 0;
28313 /* Initialize issue_rate. */
28314 issue_rate = rs6000_issue_rate ();
28315 can_issue_more = issue_rate;
28317 insn = get_next_active_insn (prev_head_insn, tail);
28318 next_insn = get_next_active_insn (insn, tail);
28320 while (insn != NULL_RTX)
28322 can_issue_more =
28323 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
28325 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
28327 if (next_insn == NULL_RTX)
28328 break;
28330 if (group_end)
28332 /* If the scheduler had marked group termination at this location
28333 (between insn and next_insn), and neither insn nor next_insn will
28334 force group termination, pad the group with nops to force group
28335 termination. */
28336 if (can_issue_more
28337 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
28338 && !insn_terminates_group_p (insn, current_group)
28339 && !insn_terminates_group_p (next_insn, previous_group))
28341 if (!is_branch_slot_insn (next_insn))
28342 can_issue_more--;
28344 while (can_issue_more)
28346 nop = gen_nop ();
28347 emit_insn_before (nop, next_insn);
28348 can_issue_more--;
28352 can_issue_more = issue_rate;
28353 group_count++;
28356 insn = next_insn;
28357 next_insn = get_next_active_insn (insn, tail);
28360 return group_count;
28363 /* We're beginning a new block. Initialize data structures as necessary. */
28365 static void
28366 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
28367 int sched_verbose ATTRIBUTE_UNUSED,
28368 int max_ready ATTRIBUTE_UNUSED)
28370 last_scheduled_insn = NULL_RTX;
28371 load_store_pendulum = 0;
28374 /* The following function is called at the end of scheduling BB.
28375 After reload, it inserts nops at insn group bundling. */
28377 static void
28378 rs6000_sched_finish (FILE *dump, int sched_verbose)
28380 int n_groups;
28382 if (sched_verbose)
28383 fprintf (dump, "=== Finishing schedule.\n");
28385 if (reload_completed && rs6000_sched_groups)
28387 /* Do not run sched_finish hook when selective scheduling enabled. */
28388 if (sel_sched_p ())
28389 return;
28391 if (rs6000_sched_insert_nops == sched_finish_none)
28392 return;
28394 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
28395 n_groups = pad_groups (dump, sched_verbose,
28396 current_sched_info->prev_head,
28397 current_sched_info->next_tail);
28398 else
28399 n_groups = redefine_groups (dump, sched_verbose,
28400 current_sched_info->prev_head,
28401 current_sched_info->next_tail);
28403 if (sched_verbose >= 6)
28405 fprintf (dump, "ngroups = %d\n", n_groups);
28406 print_rtl (dump, current_sched_info->prev_head);
28407 fprintf (dump, "Done finish_sched\n");
28412 struct _rs6000_sched_context
28414 short cached_can_issue_more;
28415 rtx last_scheduled_insn;
28416 int load_store_pendulum;
28419 typedef struct _rs6000_sched_context rs6000_sched_context_def;
28420 typedef rs6000_sched_context_def *rs6000_sched_context_t;
28422 /* Allocate store for new scheduling context. */
28423 static void *
28424 rs6000_alloc_sched_context (void)
28426 return xmalloc (sizeof (rs6000_sched_context_def));
28429 /* If CLEAN_P is true then initializes _SC with clean data,
28430 and from the global context otherwise. */
28431 static void
28432 rs6000_init_sched_context (void *_sc, bool clean_p)
28434 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
28436 if (clean_p)
28438 sc->cached_can_issue_more = 0;
28439 sc->last_scheduled_insn = NULL_RTX;
28440 sc->load_store_pendulum = 0;
28442 else
28444 sc->cached_can_issue_more = cached_can_issue_more;
28445 sc->last_scheduled_insn = last_scheduled_insn;
28446 sc->load_store_pendulum = load_store_pendulum;
28450 /* Sets the global scheduling context to the one pointed to by _SC. */
28451 static void
28452 rs6000_set_sched_context (void *_sc)
28454 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
28456 gcc_assert (sc != NULL);
28458 cached_can_issue_more = sc->cached_can_issue_more;
28459 last_scheduled_insn = sc->last_scheduled_insn;
28460 load_store_pendulum = sc->load_store_pendulum;
28463 /* Free _SC. */
28464 static void
28465 rs6000_free_sched_context (void *_sc)
28467 gcc_assert (_sc != NULL);
28469 free (_sc);
28473 /* Length in units of the trampoline for entering a nested function. */
28476 rs6000_trampoline_size (void)
28478 int ret = 0;
28480 switch (DEFAULT_ABI)
28482 default:
28483 gcc_unreachable ();
28485 case ABI_AIX:
28486 ret = (TARGET_32BIT) ? 12 : 24;
28487 break;
28489 case ABI_ELFv2:
28490 gcc_assert (!TARGET_32BIT);
28491 ret = 32;
28492 break;
28494 case ABI_DARWIN:
28495 case ABI_V4:
28496 ret = (TARGET_32BIT) ? 40 : 48;
28497 break;
28500 return ret;
28503 /* Emit RTL insns to initialize the variable parts of a trampoline.
28504 FNADDR is an RTX for the address of the function's pure code.
28505 CXT is an RTX for the static chain value for the function. */
28507 static void
28508 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
28510 int regsize = (TARGET_32BIT) ? 4 : 8;
28511 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
28512 rtx ctx_reg = force_reg (Pmode, cxt);
28513 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
28515 switch (DEFAULT_ABI)
28517 default:
28518 gcc_unreachable ();
28520 /* Under AIX, just build the 3 word function descriptor */
28521 case ABI_AIX:
28523 rtx fnmem, fn_reg, toc_reg;
28525 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
28526 error ("You cannot take the address of a nested function if you use "
28527 "the -mno-pointers-to-nested-functions option.");
28529 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
28530 fn_reg = gen_reg_rtx (Pmode);
28531 toc_reg = gen_reg_rtx (Pmode);
28533 /* Macro to shorten the code expansions below. */
28534 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
28536 m_tramp = replace_equiv_address (m_tramp, addr);
28538 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
28539 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
28540 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
28541 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
28542 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
28544 # undef MEM_PLUS
28546 break;
28548 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
28549 case ABI_ELFv2:
28550 case ABI_DARWIN:
28551 case ABI_V4:
28552 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
28553 LCT_NORMAL, VOIDmode, 4,
28554 addr, Pmode,
28555 GEN_INT (rs6000_trampoline_size ()), SImode,
28556 fnaddr, Pmode,
28557 ctx_reg, Pmode);
28558 break;
28563 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
28564 identifier as an argument, so the front end shouldn't look it up. */
28566 static bool
28567 rs6000_attribute_takes_identifier_p (const_tree attr_id)
28569 return is_attribute_p ("altivec", attr_id);
28572 /* Handle the "altivec" attribute. The attribute may have
28573 arguments as follows:
28575 __attribute__((altivec(vector__)))
28576 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
28577 __attribute__((altivec(bool__))) (always followed by 'unsigned')
28579 and may appear more than once (e.g., 'vector bool char') in a
28580 given declaration. */
28582 static tree
28583 rs6000_handle_altivec_attribute (tree *node,
28584 tree name ATTRIBUTE_UNUSED,
28585 tree args,
28586 int flags ATTRIBUTE_UNUSED,
28587 bool *no_add_attrs)
28589 tree type = *node, result = NULL_TREE;
28590 machine_mode mode;
28591 int unsigned_p;
28592 char altivec_type
28593 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
28594 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
28595 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
28596 : '?');
28598 while (POINTER_TYPE_P (type)
28599 || TREE_CODE (type) == FUNCTION_TYPE
28600 || TREE_CODE (type) == METHOD_TYPE
28601 || TREE_CODE (type) == ARRAY_TYPE)
28602 type = TREE_TYPE (type);
28604 mode = TYPE_MODE (type);
28606 /* Check for invalid AltiVec type qualifiers. */
28607 if (type == long_double_type_node)
28608 error ("use of %<long double%> in AltiVec types is invalid");
28609 else if (type == boolean_type_node)
28610 error ("use of boolean types in AltiVec types is invalid");
28611 else if (TREE_CODE (type) == COMPLEX_TYPE)
28612 error ("use of %<complex%> in AltiVec types is invalid");
28613 else if (DECIMAL_FLOAT_MODE_P (mode))
28614 error ("use of decimal floating point types in AltiVec types is invalid");
28615 else if (!TARGET_VSX)
28617 if (type == long_unsigned_type_node || type == long_integer_type_node)
28619 if (TARGET_64BIT)
28620 error ("use of %<long%> in AltiVec types is invalid for "
28621 "64-bit code without -mvsx");
28622 else if (rs6000_warn_altivec_long)
28623 warning (0, "use of %<long%> in AltiVec types is deprecated; "
28624 "use %<int%>");
28626 else if (type == long_long_unsigned_type_node
28627 || type == long_long_integer_type_node)
28628 error ("use of %<long long%> in AltiVec types is invalid without "
28629 "-mvsx");
28630 else if (type == double_type_node)
28631 error ("use of %<double%> in AltiVec types is invalid without -mvsx");
28634 switch (altivec_type)
28636 case 'v':
28637 unsigned_p = TYPE_UNSIGNED (type);
28638 switch (mode)
28640 case TImode:
28641 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
28642 break;
28643 case DImode:
28644 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
28645 break;
28646 case SImode:
28647 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
28648 break;
28649 case HImode:
28650 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
28651 break;
28652 case QImode:
28653 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
28654 break;
28655 case SFmode: result = V4SF_type_node; break;
28656 case DFmode: result = V2DF_type_node; break;
28657 /* If the user says 'vector int bool', we may be handed the 'bool'
28658 attribute _before_ the 'vector' attribute, and so select the
28659 proper type in the 'b' case below. */
28660 case V4SImode: case V8HImode: case V16QImode: case V4SFmode:
28661 case V2DImode: case V2DFmode:
28662 result = type;
28663 default: break;
28665 break;
28666 case 'b':
28667 switch (mode)
28669 case DImode: case V2DImode: result = bool_V2DI_type_node; break;
28670 case SImode: case V4SImode: result = bool_V4SI_type_node; break;
28671 case HImode: case V8HImode: result = bool_V8HI_type_node; break;
28672 case QImode: case V16QImode: result = bool_V16QI_type_node;
28673 default: break;
28675 break;
28676 case 'p':
28677 switch (mode)
28679 case V8HImode: result = pixel_V8HI_type_node;
28680 default: break;
28682 default: break;
28685 /* Propagate qualifiers attached to the element type
28686 onto the vector type. */
28687 if (result && result != type && TYPE_QUALS (type))
28688 result = build_qualified_type (result, TYPE_QUALS (type));
28690 *no_add_attrs = true; /* No need to hang on to the attribute. */
28692 if (result)
28693 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
28695 return NULL_TREE;
28698 /* AltiVec defines four built-in scalar types that serve as vector
28699 elements; we must teach the compiler how to mangle them. */
28701 static const char *
28702 rs6000_mangle_type (const_tree type)
28704 type = TYPE_MAIN_VARIANT (type);
28706 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
28707 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
28708 return NULL;
28710 if (type == bool_char_type_node) return "U6__boolc";
28711 if (type == bool_short_type_node) return "U6__bools";
28712 if (type == pixel_type_node) return "u7__pixel";
28713 if (type == bool_int_type_node) return "U6__booli";
28714 if (type == bool_long_type_node) return "U6__booll";
28716 /* Mangle IBM extended float long double as `g' (__float128) on
28717 powerpc*-linux where long-double-64 previously was the default. */
28718 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
28719 && TARGET_ELF
28720 && TARGET_LONG_DOUBLE_128
28721 && !TARGET_IEEEQUAD)
28722 return "g";
28724 /* For all other types, use normal C++ mangling. */
28725 return NULL;
28728 /* Handle a "longcall" or "shortcall" attribute; arguments as in
28729 struct attribute_spec.handler. */
28731 static tree
28732 rs6000_handle_longcall_attribute (tree *node, tree name,
28733 tree args ATTRIBUTE_UNUSED,
28734 int flags ATTRIBUTE_UNUSED,
28735 bool *no_add_attrs)
28737 if (TREE_CODE (*node) != FUNCTION_TYPE
28738 && TREE_CODE (*node) != FIELD_DECL
28739 && TREE_CODE (*node) != TYPE_DECL)
28741 warning (OPT_Wattributes, "%qE attribute only applies to functions",
28742 name);
28743 *no_add_attrs = true;
28746 return NULL_TREE;
28749 /* Set longcall attributes on all functions declared when
28750 rs6000_default_long_calls is true. */
28751 static void
28752 rs6000_set_default_type_attributes (tree type)
28754 if (rs6000_default_long_calls
28755 && (TREE_CODE (type) == FUNCTION_TYPE
28756 || TREE_CODE (type) == METHOD_TYPE))
28757 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
28758 NULL_TREE,
28759 TYPE_ATTRIBUTES (type));
28761 #if TARGET_MACHO
28762 darwin_set_default_type_attributes (type);
28763 #endif
28766 /* Return a reference suitable for calling a function with the
28767 longcall attribute. */
28770 rs6000_longcall_ref (rtx call_ref)
28772 const char *call_name;
28773 tree node;
28775 if (GET_CODE (call_ref) != SYMBOL_REF)
28776 return call_ref;
28778 /* System V adds '.' to the internal name, so skip them. */
28779 call_name = XSTR (call_ref, 0);
28780 if (*call_name == '.')
28782 while (*call_name == '.')
28783 call_name++;
28785 node = get_identifier (call_name);
28786 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
28789 return force_reg (Pmode, call_ref);
28792 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
28793 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
28794 #endif
28796 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
28797 struct attribute_spec.handler. */
28798 static tree
28799 rs6000_handle_struct_attribute (tree *node, tree name,
28800 tree args ATTRIBUTE_UNUSED,
28801 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
28803 tree *type = NULL;
28804 if (DECL_P (*node))
28806 if (TREE_CODE (*node) == TYPE_DECL)
28807 type = &TREE_TYPE (*node);
28809 else
28810 type = node;
28812 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
28813 || TREE_CODE (*type) == UNION_TYPE)))
28815 warning (OPT_Wattributes, "%qE attribute ignored", name);
28816 *no_add_attrs = true;
28819 else if ((is_attribute_p ("ms_struct", name)
28820 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
28821 || ((is_attribute_p ("gcc_struct", name)
28822 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
28824 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
28825 name);
28826 *no_add_attrs = true;
28829 return NULL_TREE;
28832 static bool
28833 rs6000_ms_bitfield_layout_p (const_tree record_type)
28835 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
28836 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
28837 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
28840 #ifdef USING_ELFOS_H
28842 /* A get_unnamed_section callback, used for switching to toc_section. */
28844 static void
28845 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
28847 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28848 && TARGET_MINIMAL_TOC
28849 && !TARGET_RELOCATABLE)
28851 if (!toc_initialized)
28853 toc_initialized = 1;
28854 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
28855 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
28856 fprintf (asm_out_file, "\t.tc ");
28857 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
28858 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
28859 fprintf (asm_out_file, "\n");
28861 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
28862 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
28863 fprintf (asm_out_file, " = .+32768\n");
28865 else
28866 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
28868 else if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28869 && !TARGET_RELOCATABLE)
28870 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
28871 else
28873 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
28874 if (!toc_initialized)
28876 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
28877 fprintf (asm_out_file, " = .+32768\n");
28878 toc_initialized = 1;
28883 /* Implement TARGET_ASM_INIT_SECTIONS. */
28885 static void
28886 rs6000_elf_asm_init_sections (void)
28888 toc_section
28889 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
28891 sdata2_section
28892 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
28893 SDATA2_SECTION_ASM_OP);
28896 /* Implement TARGET_SELECT_RTX_SECTION. */
28898 static section *
28899 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
28900 unsigned HOST_WIDE_INT align)
28902 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
28903 return toc_section;
28904 else
28905 return default_elf_select_rtx_section (mode, x, align);
28908 /* For a SYMBOL_REF, set generic flags and then perform some
28909 target-specific processing.
28911 When the AIX ABI is requested on a non-AIX system, replace the
28912 function name with the real name (with a leading .) rather than the
28913 function descriptor name. This saves a lot of overriding code to
28914 read the prefixes. */
28916 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
28917 static void
28918 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
28920 default_encode_section_info (decl, rtl, first);
28922 if (first
28923 && TREE_CODE (decl) == FUNCTION_DECL
28924 && !TARGET_AIX
28925 && DEFAULT_ABI == ABI_AIX)
28927 rtx sym_ref = XEXP (rtl, 0);
28928 size_t len = strlen (XSTR (sym_ref, 0));
28929 char *str = XALLOCAVEC (char, len + 2);
28930 str[0] = '.';
28931 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
28932 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
28936 static inline bool
28937 compare_section_name (const char *section, const char *templ)
28939 int len;
28941 len = strlen (templ);
28942 return (strncmp (section, templ, len) == 0
28943 && (section[len] == 0 || section[len] == '.'));
28946 bool
28947 rs6000_elf_in_small_data_p (const_tree decl)
28949 if (rs6000_sdata == SDATA_NONE)
28950 return false;
28952 /* We want to merge strings, so we never consider them small data. */
28953 if (TREE_CODE (decl) == STRING_CST)
28954 return false;
28956 /* Functions are never in the small data area. */
28957 if (TREE_CODE (decl) == FUNCTION_DECL)
28958 return false;
28960 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
28962 const char *section = DECL_SECTION_NAME (decl);
28963 if (compare_section_name (section, ".sdata")
28964 || compare_section_name (section, ".sdata2")
28965 || compare_section_name (section, ".gnu.linkonce.s")
28966 || compare_section_name (section, ".sbss")
28967 || compare_section_name (section, ".sbss2")
28968 || compare_section_name (section, ".gnu.linkonce.sb")
28969 || strcmp (section, ".PPC.EMB.sdata0") == 0
28970 || strcmp (section, ".PPC.EMB.sbss0") == 0)
28971 return true;
28973 else
28975 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
28977 if (size > 0
28978 && size <= g_switch_value
28979 /* If it's not public, and we're not going to reference it there,
28980 there's no need to put it in the small data section. */
28981 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
28982 return true;
28985 return false;
28988 #endif /* USING_ELFOS_H */
28990 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
28992 static bool
28993 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
28995 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
28998 /* Do not place thread-local symbols refs in the object blocks. */
29000 static bool
29001 rs6000_use_blocks_for_decl_p (const_tree decl)
29003 return !DECL_THREAD_LOCAL_P (decl);
29006 /* Return a REG that occurs in ADDR with coefficient 1.
29007 ADDR can be effectively incremented by incrementing REG.
29009 r0 is special and we must not select it as an address
29010 register by this routine since our caller will try to
29011 increment the returned register via an "la" instruction. */
29014 find_addr_reg (rtx addr)
29016 while (GET_CODE (addr) == PLUS)
29018 if (GET_CODE (XEXP (addr, 0)) == REG
29019 && REGNO (XEXP (addr, 0)) != 0)
29020 addr = XEXP (addr, 0);
29021 else if (GET_CODE (XEXP (addr, 1)) == REG
29022 && REGNO (XEXP (addr, 1)) != 0)
29023 addr = XEXP (addr, 1);
29024 else if (CONSTANT_P (XEXP (addr, 0)))
29025 addr = XEXP (addr, 1);
29026 else if (CONSTANT_P (XEXP (addr, 1)))
29027 addr = XEXP (addr, 0);
29028 else
29029 gcc_unreachable ();
29031 gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0);
29032 return addr;
29035 void
29036 rs6000_fatal_bad_address (rtx op)
29038 fatal_insn ("bad address", op);
29041 #if TARGET_MACHO
29043 typedef struct branch_island_d {
29044 tree function_name;
29045 tree label_name;
29046 int line_number;
29047 } branch_island;
29050 static vec<branch_island, va_gc> *branch_islands;
29052 /* Remember to generate a branch island for far calls to the given
29053 function. */
29055 static void
29056 add_compiler_branch_island (tree label_name, tree function_name,
29057 int line_number)
29059 branch_island bi = {function_name, label_name, line_number};
29060 vec_safe_push (branch_islands, bi);
29063 /* Generate far-jump branch islands for everything recorded in
29064 branch_islands. Invoked immediately after the last instruction of
29065 the epilogue has been emitted; the branch islands must be appended
29066 to, and contiguous with, the function body. Mach-O stubs are
29067 generated in machopic_output_stub(). */
29069 static void
29070 macho_branch_islands (void)
29072 char tmp_buf[512];
29074 while (!vec_safe_is_empty (branch_islands))
29076 branch_island *bi = &branch_islands->last ();
29077 const char *label = IDENTIFIER_POINTER (bi->label_name);
29078 const char *name = IDENTIFIER_POINTER (bi->function_name);
29079 char name_buf[512];
29080 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
29081 if (name[0] == '*' || name[0] == '&')
29082 strcpy (name_buf, name+1);
29083 else
29085 name_buf[0] = '_';
29086 strcpy (name_buf+1, name);
29088 strcpy (tmp_buf, "\n");
29089 strcat (tmp_buf, label);
29090 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
29091 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
29092 dbxout_stabd (N_SLINE, bi->line_number);
29093 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
29094 if (flag_pic)
29096 if (TARGET_LINK_STACK)
29098 char name[32];
29099 get_ppc476_thunk_name (name);
29100 strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
29101 strcat (tmp_buf, name);
29102 strcat (tmp_buf, "\n");
29103 strcat (tmp_buf, label);
29104 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
29106 else
29108 strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
29109 strcat (tmp_buf, label);
29110 strcat (tmp_buf, "_pic\n");
29111 strcat (tmp_buf, label);
29112 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
29115 strcat (tmp_buf, "\taddis r11,r11,ha16(");
29116 strcat (tmp_buf, name_buf);
29117 strcat (tmp_buf, " - ");
29118 strcat (tmp_buf, label);
29119 strcat (tmp_buf, "_pic)\n");
29121 strcat (tmp_buf, "\tmtlr r0\n");
29123 strcat (tmp_buf, "\taddi r12,r11,lo16(");
29124 strcat (tmp_buf, name_buf);
29125 strcat (tmp_buf, " - ");
29126 strcat (tmp_buf, label);
29127 strcat (tmp_buf, "_pic)\n");
29129 strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
29131 else
29133 strcat (tmp_buf, ":\nlis r12,hi16(");
29134 strcat (tmp_buf, name_buf);
29135 strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
29136 strcat (tmp_buf, name_buf);
29137 strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
29139 output_asm_insn (tmp_buf, 0);
29140 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
29141 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
29142 dbxout_stabd (N_SLINE, bi->line_number);
29143 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
29144 branch_islands->pop ();
29148 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
29149 already there or not. */
29151 static int
29152 no_previous_def (tree function_name)
29154 branch_island *bi;
29155 unsigned ix;
29157 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
29158 if (function_name == bi->function_name)
29159 return 0;
29160 return 1;
29163 /* GET_PREV_LABEL gets the label name from the previous definition of
29164 the function. */
29166 static tree
29167 get_prev_label (tree function_name)
29169 branch_island *bi;
29170 unsigned ix;
29172 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
29173 if (function_name == bi->function_name)
29174 return bi->label_name;
29175 return NULL_TREE;
29178 /* INSN is either a function call or a millicode call. It may have an
29179 unconditional jump in its delay slot.
29181 CALL_DEST is the routine we are calling. */
29183 char *
29184 output_call (rtx_insn *insn, rtx *operands, int dest_operand_number,
29185 int cookie_operand_number)
29187 static char buf[256];
29188 if (darwin_emit_branch_islands
29189 && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
29190 && (INTVAL (operands[cookie_operand_number]) & CALL_LONG))
29192 tree labelname;
29193 tree funname = get_identifier (XSTR (operands[dest_operand_number], 0));
29195 if (no_previous_def (funname))
29197 rtx label_rtx = gen_label_rtx ();
29198 char *label_buf, temp_buf[256];
29199 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
29200 CODE_LABEL_NUMBER (label_rtx));
29201 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
29202 labelname = get_identifier (label_buf);
29203 add_compiler_branch_island (labelname, funname, insn_line (insn));
29205 else
29206 labelname = get_prev_label (funname);
29208 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
29209 instruction will reach 'foo', otherwise link as 'bl L42'".
29210 "L42" should be a 'branch island', that will do a far jump to
29211 'foo'. Branch islands are generated in
29212 macho_branch_islands(). */
29213 sprintf (buf, "jbsr %%z%d,%.246s",
29214 dest_operand_number, IDENTIFIER_POINTER (labelname));
29216 else
29217 sprintf (buf, "bl %%z%d", dest_operand_number);
29218 return buf;
29221 /* Generate PIC and indirect symbol stubs. */
29223 void
29224 machopic_output_stub (FILE *file, const char *symb, const char *stub)
29226 unsigned int length;
29227 char *symbol_name, *lazy_ptr_name;
29228 char *local_label_0;
29229 static int label = 0;
29231 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
29232 symb = (*targetm.strip_name_encoding) (symb);
29235 length = strlen (symb);
29236 symbol_name = XALLOCAVEC (char, length + 32);
29237 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
29239 lazy_ptr_name = XALLOCAVEC (char, length + 32);
29240 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
29242 if (flag_pic == 2)
29243 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
29244 else
29245 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
29247 if (flag_pic == 2)
29249 fprintf (file, "\t.align 5\n");
29251 fprintf (file, "%s:\n", stub);
29252 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29254 label++;
29255 local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
29256 sprintf (local_label_0, "\"L%011d$spb\"", label);
29258 fprintf (file, "\tmflr r0\n");
29259 if (TARGET_LINK_STACK)
29261 char name[32];
29262 get_ppc476_thunk_name (name);
29263 fprintf (file, "\tbl %s\n", name);
29264 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
29266 else
29268 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
29269 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
29271 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
29272 lazy_ptr_name, local_label_0);
29273 fprintf (file, "\tmtlr r0\n");
29274 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
29275 (TARGET_64BIT ? "ldu" : "lwzu"),
29276 lazy_ptr_name, local_label_0);
29277 fprintf (file, "\tmtctr r12\n");
29278 fprintf (file, "\tbctr\n");
29280 else
29282 fprintf (file, "\t.align 4\n");
29284 fprintf (file, "%s:\n", stub);
29285 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29287 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
29288 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
29289 (TARGET_64BIT ? "ldu" : "lwzu"),
29290 lazy_ptr_name);
29291 fprintf (file, "\tmtctr r12\n");
29292 fprintf (file, "\tbctr\n");
29295 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
29296 fprintf (file, "%s:\n", lazy_ptr_name);
29297 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29298 fprintf (file, "%sdyld_stub_binding_helper\n",
29299 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
29302 /* Legitimize PIC addresses. If the address is already
29303 position-independent, we return ORIG. Newly generated
29304 position-independent addresses go into a reg. This is REG if non
29305 zero, otherwise we allocate register(s) as necessary. */
29307 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
29310 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
29311 rtx reg)
29313 rtx base, offset;
29315 if (reg == NULL && ! reload_in_progress && ! reload_completed)
29316 reg = gen_reg_rtx (Pmode);
29318 if (GET_CODE (orig) == CONST)
29320 rtx reg_temp;
29322 if (GET_CODE (XEXP (orig, 0)) == PLUS
29323 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
29324 return orig;
29326 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
29328 /* Use a different reg for the intermediate value, as
29329 it will be marked UNCHANGING. */
29330 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
29331 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
29332 Pmode, reg_temp);
29333 offset =
29334 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
29335 Pmode, reg);
29337 if (GET_CODE (offset) == CONST_INT)
29339 if (SMALL_INT (offset))
29340 return plus_constant (Pmode, base, INTVAL (offset));
29341 else if (! reload_in_progress && ! reload_completed)
29342 offset = force_reg (Pmode, offset);
29343 else
29345 rtx mem = force_const_mem (Pmode, orig);
29346 return machopic_legitimize_pic_address (mem, Pmode, reg);
29349 return gen_rtx_PLUS (Pmode, base, offset);
29352 /* Fall back on generic machopic code. */
29353 return machopic_legitimize_pic_address (orig, mode, reg);
29356 /* Output a .machine directive for the Darwin assembler, and call
29357 the generic start_file routine. */
29359 static void
29360 rs6000_darwin_file_start (void)
29362 static const struct
29364 const char *arg;
29365 const char *name;
29366 HOST_WIDE_INT if_set;
29367 } mapping[] = {
29368 { "ppc64", "ppc64", MASK_64BIT },
29369 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
29370 { "power4", "ppc970", 0 },
29371 { "G5", "ppc970", 0 },
29372 { "7450", "ppc7450", 0 },
29373 { "7400", "ppc7400", MASK_ALTIVEC },
29374 { "G4", "ppc7400", 0 },
29375 { "750", "ppc750", 0 },
29376 { "740", "ppc750", 0 },
29377 { "G3", "ppc750", 0 },
29378 { "604e", "ppc604e", 0 },
29379 { "604", "ppc604", 0 },
29380 { "603e", "ppc603", 0 },
29381 { "603", "ppc603", 0 },
29382 { "601", "ppc601", 0 },
29383 { NULL, "ppc", 0 } };
29384 const char *cpu_id = "";
29385 size_t i;
29387 rs6000_file_start ();
29388 darwin_file_start ();
29390 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
29392 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
29393 cpu_id = rs6000_default_cpu;
29395 if (global_options_set.x_rs6000_cpu_index)
29396 cpu_id = processor_target_table[rs6000_cpu_index].name;
29398 /* Look through the mapping array. Pick the first name that either
29399 matches the argument, has a bit set in IF_SET that is also set
29400 in the target flags, or has a NULL name. */
29402 i = 0;
29403 while (mapping[i].arg != NULL
29404 && strcmp (mapping[i].arg, cpu_id) != 0
29405 && (mapping[i].if_set & rs6000_isa_flags) == 0)
29406 i++;
29408 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
29411 #endif /* TARGET_MACHO */
29413 #if TARGET_ELF
29414 static int
29415 rs6000_elf_reloc_rw_mask (void)
29417 if (flag_pic)
29418 return 3;
29419 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29420 return 2;
29421 else
29422 return 0;
29425 /* Record an element in the table of global constructors. SYMBOL is
29426 a SYMBOL_REF of the function to be called; PRIORITY is a number
29427 between 0 and MAX_INIT_PRIORITY.
29429 This differs from default_named_section_asm_out_constructor in
29430 that we have special handling for -mrelocatable. */
29432 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
29433 static void
29434 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
29436 const char *section = ".ctors";
29437 char buf[16];
29439 if (priority != DEFAULT_INIT_PRIORITY)
29441 sprintf (buf, ".ctors.%.5u",
29442 /* Invert the numbering so the linker puts us in the proper
29443 order; constructors are run from right to left, and the
29444 linker sorts in increasing order. */
29445 MAX_INIT_PRIORITY - priority);
29446 section = buf;
29449 switch_to_section (get_section (section, SECTION_WRITE, NULL));
29450 assemble_align (POINTER_SIZE);
29452 if (TARGET_RELOCATABLE)
29454 fputs ("\t.long (", asm_out_file);
29455 output_addr_const (asm_out_file, symbol);
29456 fputs (")@fixup\n", asm_out_file);
29458 else
29459 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
29462 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
29463 static void
29464 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
29466 const char *section = ".dtors";
29467 char buf[16];
29469 if (priority != DEFAULT_INIT_PRIORITY)
29471 sprintf (buf, ".dtors.%.5u",
29472 /* Invert the numbering so the linker puts us in the proper
29473 order; constructors are run from right to left, and the
29474 linker sorts in increasing order. */
29475 MAX_INIT_PRIORITY - priority);
29476 section = buf;
29479 switch_to_section (get_section (section, SECTION_WRITE, NULL));
29480 assemble_align (POINTER_SIZE);
29482 if (TARGET_RELOCATABLE)
29484 fputs ("\t.long (", asm_out_file);
29485 output_addr_const (asm_out_file, symbol);
29486 fputs (")@fixup\n", asm_out_file);
29488 else
29489 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
29492 void
29493 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
29495 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
29497 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
29498 ASM_OUTPUT_LABEL (file, name);
29499 fputs (DOUBLE_INT_ASM_OP, file);
29500 rs6000_output_function_entry (file, name);
29501 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
29502 if (DOT_SYMBOLS)
29504 fputs ("\t.size\t", file);
29505 assemble_name (file, name);
29506 fputs (",24\n\t.type\t.", file);
29507 assemble_name (file, name);
29508 fputs (",@function\n", file);
29509 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
29511 fputs ("\t.globl\t.", file);
29512 assemble_name (file, name);
29513 putc ('\n', file);
29516 else
29517 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
29518 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
29519 rs6000_output_function_entry (file, name);
29520 fputs (":\n", file);
29521 return;
29524 if (TARGET_RELOCATABLE
29525 && !TARGET_SECURE_PLT
29526 && (get_pool_size () != 0 || crtl->profile)
29527 && uses_TOC ())
29529 char buf[256];
29531 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
29533 ASM_GENERATE_INTERNAL_LABEL (buf, "LCTOC", 1);
29534 fprintf (file, "\t.long ");
29535 assemble_name (file, buf);
29536 putc ('-', file);
29537 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
29538 assemble_name (file, buf);
29539 putc ('\n', file);
29542 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
29543 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
29545 if (DEFAULT_ABI == ABI_AIX)
29547 const char *desc_name, *orig_name;
29549 orig_name = (*targetm.strip_name_encoding) (name);
29550 desc_name = orig_name;
29551 while (*desc_name == '.')
29552 desc_name++;
29554 if (TREE_PUBLIC (decl))
29555 fprintf (file, "\t.globl %s\n", desc_name);
29557 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
29558 fprintf (file, "%s:\n", desc_name);
29559 fprintf (file, "\t.long %s\n", orig_name);
29560 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
29561 fputs ("\t.long 0\n", file);
29562 fprintf (file, "\t.previous\n");
29564 ASM_OUTPUT_LABEL (file, name);
29567 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
29568 static void
29569 rs6000_elf_file_end (void)
29571 #ifdef HAVE_AS_GNU_ATTRIBUTE
29572 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
29574 if (rs6000_passes_float)
29575 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n",
29576 ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT) ? 1
29577 : (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT) ? 3
29578 : 2));
29579 if (rs6000_passes_vector)
29580 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
29581 (TARGET_ALTIVEC_ABI ? 2
29582 : TARGET_SPE_ABI ? 3
29583 : 1));
29584 if (rs6000_returns_struct)
29585 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
29586 aix_struct_return ? 2 : 1);
29588 #endif
29589 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
29590 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
29591 file_end_indicate_exec_stack ();
29592 #endif
29594 #endif
29596 #if TARGET_XCOFF
29597 static void
29598 rs6000_xcoff_asm_output_anchor (rtx symbol)
29600 char buffer[100];
29602 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
29603 SYMBOL_REF_BLOCK_OFFSET (symbol));
29604 fprintf (asm_out_file, "%s", SET_ASM_OP);
29605 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
29606 fprintf (asm_out_file, ",");
29607 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
29608 fprintf (asm_out_file, "\n");
29611 static void
29612 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
29614 fputs (GLOBAL_ASM_OP, stream);
29615 RS6000_OUTPUT_BASENAME (stream, name);
29616 putc ('\n', stream);
29619 /* A get_unnamed_decl callback, used for read-only sections. PTR
29620 points to the section string variable. */
29622 static void
29623 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
29625 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
29626 *(const char *const *) directive,
29627 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
29630 /* Likewise for read-write sections. */
29632 static void
29633 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
29635 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
29636 *(const char *const *) directive,
29637 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
29640 static void
29641 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
29643 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
29644 *(const char *const *) directive,
29645 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
29648 /* A get_unnamed_section callback, used for switching to toc_section. */
29650 static void
29651 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
29653 if (TARGET_MINIMAL_TOC)
29655 /* toc_section is always selected at least once from
29656 rs6000_xcoff_file_start, so this is guaranteed to
29657 always be defined once and only once in each file. */
29658 if (!toc_initialized)
29660 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
29661 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
29662 toc_initialized = 1;
29664 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
29665 (TARGET_32BIT ? "" : ",3"));
29667 else
29668 fputs ("\t.toc\n", asm_out_file);
29671 /* Implement TARGET_ASM_INIT_SECTIONS. */
29673 static void
29674 rs6000_xcoff_asm_init_sections (void)
29676 read_only_data_section
29677 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
29678 &xcoff_read_only_section_name);
29680 private_data_section
29681 = get_unnamed_section (SECTION_WRITE,
29682 rs6000_xcoff_output_readwrite_section_asm_op,
29683 &xcoff_private_data_section_name);
29685 tls_data_section
29686 = get_unnamed_section (SECTION_TLS,
29687 rs6000_xcoff_output_tls_section_asm_op,
29688 &xcoff_tls_data_section_name);
29690 tls_private_data_section
29691 = get_unnamed_section (SECTION_TLS,
29692 rs6000_xcoff_output_tls_section_asm_op,
29693 &xcoff_private_data_section_name);
29695 read_only_private_data_section
29696 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
29697 &xcoff_private_data_section_name);
29699 toc_section
29700 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
29702 readonly_data_section = read_only_data_section;
29703 exception_section = data_section;
29706 static int
29707 rs6000_xcoff_reloc_rw_mask (void)
29709 return 3;
29712 static void
29713 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
29714 tree decl ATTRIBUTE_UNUSED)
29716 int smclass;
29717 static const char * const suffix[4] = { "PR", "RO", "RW", "TL" };
29719 if (flags & SECTION_CODE)
29720 smclass = 0;
29721 else if (flags & SECTION_TLS)
29722 smclass = 3;
29723 else if (flags & SECTION_WRITE)
29724 smclass = 2;
29725 else
29726 smclass = 1;
29728 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
29729 (flags & SECTION_CODE) ? "." : "",
29730 name, suffix[smclass], flags & SECTION_ENTSIZE);
29733 #define IN_NAMED_SECTION(DECL) \
29734 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
29735 && DECL_SECTION_NAME (DECL) != NULL)
29737 static section *
29738 rs6000_xcoff_select_section (tree decl, int reloc,
29739 unsigned HOST_WIDE_INT align)
29741 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
29742 named section. */
29743 if (align > BIGGEST_ALIGNMENT)
29745 resolve_unique_section (decl, reloc, true);
29746 if (IN_NAMED_SECTION (decl))
29747 return get_named_section (decl, NULL, reloc);
29750 if (decl_readonly_section (decl, reloc))
29752 if (TREE_PUBLIC (decl))
29753 return read_only_data_section;
29754 else
29755 return read_only_private_data_section;
29757 else
29759 #if HAVE_AS_TLS
29760 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
29762 if (TREE_PUBLIC (decl))
29763 return tls_data_section;
29764 else if (bss_initializer_p (decl))
29766 /* Convert to COMMON to emit in BSS. */
29767 DECL_COMMON (decl) = 1;
29768 return tls_comm_section;
29770 else
29771 return tls_private_data_section;
29773 else
29774 #endif
29775 if (TREE_PUBLIC (decl))
29776 return data_section;
29777 else
29778 return private_data_section;
29782 static void
29783 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
29785 const char *name;
29787 /* Use select_section for private data and uninitialized data with
29788 alignment <= BIGGEST_ALIGNMENT. */
29789 if (!TREE_PUBLIC (decl)
29790 || DECL_COMMON (decl)
29791 || (DECL_INITIAL (decl) == NULL_TREE
29792 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
29793 || DECL_INITIAL (decl) == error_mark_node
29794 || (flag_zero_initialized_in_bss
29795 && initializer_zerop (DECL_INITIAL (decl))))
29796 return;
29798 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
29799 name = (*targetm.strip_name_encoding) (name);
29800 set_decl_section_name (decl, name);
29803 /* Select section for constant in constant pool.
29805 On RS/6000, all constants are in the private read-only data area.
29806 However, if this is being placed in the TOC it must be output as a
29807 toc entry. */
29809 static section *
29810 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
29811 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
29813 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
29814 return toc_section;
29815 else
29816 return read_only_private_data_section;
29819 /* Remove any trailing [DS] or the like from the symbol name. */
29821 static const char *
29822 rs6000_xcoff_strip_name_encoding (const char *name)
29824 size_t len;
29825 if (*name == '*')
29826 name++;
29827 len = strlen (name);
29828 if (name[len - 1] == ']')
29829 return ggc_alloc_string (name, len - 4);
29830 else
29831 return name;
29834 /* Section attributes. AIX is always PIC. */
29836 static unsigned int
29837 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
29839 unsigned int align;
29840 unsigned int flags = default_section_type_flags (decl, name, reloc);
29842 /* Align to at least UNIT size. */
29843 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
29844 align = MIN_UNITS_PER_WORD;
29845 else
29846 /* Increase alignment of large objects if not already stricter. */
29847 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
29848 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
29849 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
29851 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
29854 /* Output at beginning of assembler file.
29856 Initialize the section names for the RS/6000 at this point.
29858 Specify filename, including full path, to assembler.
29860 We want to go into the TOC section so at least one .toc will be emitted.
29861 Also, in order to output proper .bs/.es pairs, we need at least one static
29862 [RW] section emitted.
29864 Finally, declare mcount when profiling to make the assembler happy. */
29866 static void
29867 rs6000_xcoff_file_start (void)
29869 rs6000_gen_section_name (&xcoff_bss_section_name,
29870 main_input_filename, ".bss_");
29871 rs6000_gen_section_name (&xcoff_private_data_section_name,
29872 main_input_filename, ".rw_");
29873 rs6000_gen_section_name (&xcoff_read_only_section_name,
29874 main_input_filename, ".ro_");
29875 rs6000_gen_section_name (&xcoff_tls_data_section_name,
29876 main_input_filename, ".tls_");
29877 rs6000_gen_section_name (&xcoff_tbss_section_name,
29878 main_input_filename, ".tbss_[UL]");
29880 fputs ("\t.file\t", asm_out_file);
29881 output_quoted_string (asm_out_file, main_input_filename);
29882 fputc ('\n', asm_out_file);
29883 if (write_symbols != NO_DEBUG)
29884 switch_to_section (private_data_section);
29885 switch_to_section (text_section);
29886 if (profile_flag)
29887 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
29888 rs6000_file_start ();
29891 /* Output at end of assembler file.
29892 On the RS/6000, referencing data should automatically pull in text. */
29894 static void
29895 rs6000_xcoff_file_end (void)
29897 switch_to_section (text_section);
29898 fputs ("_section_.text:\n", asm_out_file);
29899 switch_to_section (data_section);
29900 fputs (TARGET_32BIT
29901 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
29902 asm_out_file);
29905 struct declare_alias_data
29907 FILE *file;
29908 bool function_descriptor;
29911 /* Declare alias N. A helper function for for_node_and_aliases. */
29913 static bool
29914 rs6000_declare_alias (struct symtab_node *n, void *d)
29916 struct declare_alias_data *data = (struct declare_alias_data *)d;
29917 /* Main symbol is output specially, because varasm machinery does part of
29918 the job for us - we do not need to declare .globl/lglobs and such. */
29919 if (!n->alias || n->weakref)
29920 return false;
29922 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
29923 return false;
29925 /* Prevent assemble_alias from trying to use .set pseudo operation
29926 that does not behave as expected by the middle-end. */
29927 TREE_ASM_WRITTEN (n->decl) = true;
29929 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
29930 char *buffer = (char *) alloca (strlen (name) + 2);
29931 char *p;
29932 int dollar_inside = 0;
29934 strcpy (buffer, name);
29935 p = strchr (buffer, '$');
29936 while (p) {
29937 *p = '_';
29938 dollar_inside++;
29939 p = strchr (p + 1, '$');
29941 if (TREE_PUBLIC (n->decl))
29943 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
29945 if (dollar_inside) {
29946 if (data->function_descriptor)
29947 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
29948 else
29949 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
29951 if (data->function_descriptor)
29952 fputs ("\t.globl .", data->file);
29953 else
29954 fputs ("\t.globl ", data->file);
29955 RS6000_OUTPUT_BASENAME (data->file, buffer);
29956 putc ('\n', data->file);
29958 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
29959 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
29961 else
29963 if (dollar_inside)
29965 if (data->function_descriptor)
29966 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
29967 else
29968 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
29970 if (data->function_descriptor)
29971 fputs ("\t.lglobl .", data->file);
29972 else
29973 fputs ("\t.lglobl ", data->file);
29974 RS6000_OUTPUT_BASENAME (data->file, buffer);
29975 putc ('\n', data->file);
29977 if (data->function_descriptor)
29978 fputs (".", data->file);
29979 RS6000_OUTPUT_BASENAME (data->file, buffer);
29980 fputs (":\n", data->file);
29981 return false;
29984 /* This macro produces the initial definition of a function name.
29985 On the RS/6000, we need to place an extra '.' in the function name and
29986 output the function descriptor.
29987 Dollar signs are converted to underscores.
29989 The csect for the function will have already been created when
29990 text_section was selected. We do have to go back to that csect, however.
29992 The third and fourth parameters to the .function pseudo-op (16 and 044)
29993 are placeholders which no longer have any use.
29995 Because AIX assembler's .set command has unexpected semantics, we output
29996 all aliases as alternative labels in front of the definition. */
29998 void
29999 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
30001 char *buffer = (char *) alloca (strlen (name) + 1);
30002 char *p;
30003 int dollar_inside = 0;
30004 struct declare_alias_data data = {file, false};
30006 strcpy (buffer, name);
30007 p = strchr (buffer, '$');
30008 while (p) {
30009 *p = '_';
30010 dollar_inside++;
30011 p = strchr (p + 1, '$');
30013 if (TREE_PUBLIC (decl))
30015 if (!RS6000_WEAK || !DECL_WEAK (decl))
30017 if (dollar_inside) {
30018 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
30019 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
30021 fputs ("\t.globl .", file);
30022 RS6000_OUTPUT_BASENAME (file, buffer);
30023 putc ('\n', file);
30026 else
30028 if (dollar_inside) {
30029 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
30030 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
30032 fputs ("\t.lglobl .", file);
30033 RS6000_OUTPUT_BASENAME (file, buffer);
30034 putc ('\n', file);
30036 fputs ("\t.csect ", file);
30037 RS6000_OUTPUT_BASENAME (file, buffer);
30038 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
30039 RS6000_OUTPUT_BASENAME (file, buffer);
30040 fputs (":\n", file);
30041 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
30042 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
30043 RS6000_OUTPUT_BASENAME (file, buffer);
30044 fputs (", TOC[tc0], 0\n", file);
30045 in_section = NULL;
30046 switch_to_section (function_section (decl));
30047 putc ('.', file);
30048 RS6000_OUTPUT_BASENAME (file, buffer);
30049 fputs (":\n", file);
30050 data.function_descriptor = true;
30051 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
30052 if (write_symbols != NO_DEBUG && !DECL_IGNORED_P (decl))
30053 xcoffout_declare_function (file, decl, buffer);
30054 return;
30057 /* This macro produces the initial definition of a object (variable) name.
30058 Because AIX assembler's .set command has unexpected semantics, we output
30059 all aliases as alternative labels in front of the definition. */
30061 void
30062 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
30064 struct declare_alias_data data = {file, false};
30065 RS6000_OUTPUT_BASENAME (file, name);
30066 fputs (":\n", file);
30067 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
30070 #ifdef HAVE_AS_TLS
30071 static void
30072 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
30074 rtx symbol;
30075 int flags;
30077 default_encode_section_info (decl, rtl, first);
30079 /* Careful not to prod global register variables. */
30080 if (!MEM_P (rtl))
30081 return;
30082 symbol = XEXP (rtl, 0);
30083 if (GET_CODE (symbol) != SYMBOL_REF)
30084 return;
30086 flags = SYMBOL_REF_FLAGS (symbol);
30088 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
30089 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
30091 SYMBOL_REF_FLAGS (symbol) = flags;
30093 #endif /* HAVE_AS_TLS */
30094 #endif /* TARGET_XCOFF */
30096 /* Compute a (partial) cost for rtx X. Return true if the complete
30097 cost has been computed, and false if subexpressions should be
30098 scanned. In either case, *TOTAL contains the cost result. */
30100 static bool
30101 rs6000_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
30102 int *total, bool speed)
30104 machine_mode mode = GET_MODE (x);
30106 switch (code)
30108 /* On the RS/6000, if it is valid in the insn, it is free. */
30109 case CONST_INT:
30110 if (((outer_code == SET
30111 || outer_code == PLUS
30112 || outer_code == MINUS)
30113 && (satisfies_constraint_I (x)
30114 || satisfies_constraint_L (x)))
30115 || (outer_code == AND
30116 && (satisfies_constraint_K (x)
30117 || (mode == SImode
30118 ? satisfies_constraint_L (x)
30119 : satisfies_constraint_J (x))
30120 || mask_operand (x, mode)
30121 || (mode == DImode
30122 && mask64_operand (x, DImode))))
30123 || ((outer_code == IOR || outer_code == XOR)
30124 && (satisfies_constraint_K (x)
30125 || (mode == SImode
30126 ? satisfies_constraint_L (x)
30127 : satisfies_constraint_J (x))))
30128 || outer_code == ASHIFT
30129 || outer_code == ASHIFTRT
30130 || outer_code == LSHIFTRT
30131 || outer_code == ROTATE
30132 || outer_code == ROTATERT
30133 || outer_code == ZERO_EXTRACT
30134 || (outer_code == MULT
30135 && satisfies_constraint_I (x))
30136 || ((outer_code == DIV || outer_code == UDIV
30137 || outer_code == MOD || outer_code == UMOD)
30138 && exact_log2 (INTVAL (x)) >= 0)
30139 || (outer_code == COMPARE
30140 && (satisfies_constraint_I (x)
30141 || satisfies_constraint_K (x)))
30142 || ((outer_code == EQ || outer_code == NE)
30143 && (satisfies_constraint_I (x)
30144 || satisfies_constraint_K (x)
30145 || (mode == SImode
30146 ? satisfies_constraint_L (x)
30147 : satisfies_constraint_J (x))))
30148 || (outer_code == GTU
30149 && satisfies_constraint_I (x))
30150 || (outer_code == LTU
30151 && satisfies_constraint_P (x)))
30153 *total = 0;
30154 return true;
30156 else if ((outer_code == PLUS
30157 && reg_or_add_cint_operand (x, VOIDmode))
30158 || (outer_code == MINUS
30159 && reg_or_sub_cint_operand (x, VOIDmode))
30160 || ((outer_code == SET
30161 || outer_code == IOR
30162 || outer_code == XOR)
30163 && (INTVAL (x)
30164 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
30166 *total = COSTS_N_INSNS (1);
30167 return true;
30169 /* FALLTHRU */
30171 case CONST_DOUBLE:
30172 case CONST_WIDE_INT:
30173 case CONST:
30174 case HIGH:
30175 case SYMBOL_REF:
30176 case MEM:
30177 /* When optimizing for size, MEM should be slightly more expensive
30178 than generating address, e.g., (plus (reg) (const)).
30179 L1 cache latency is about two instructions. */
30180 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
30181 return true;
30183 case LABEL_REF:
30184 *total = 0;
30185 return true;
30187 case PLUS:
30188 case MINUS:
30189 if (FLOAT_MODE_P (mode))
30190 *total = rs6000_cost->fp;
30191 else
30192 *total = COSTS_N_INSNS (1);
30193 return false;
30195 case MULT:
30196 if (GET_CODE (XEXP (x, 1)) == CONST_INT
30197 && satisfies_constraint_I (XEXP (x, 1)))
30199 if (INTVAL (XEXP (x, 1)) >= -256
30200 && INTVAL (XEXP (x, 1)) <= 255)
30201 *total = rs6000_cost->mulsi_const9;
30202 else
30203 *total = rs6000_cost->mulsi_const;
30205 else if (mode == SFmode)
30206 *total = rs6000_cost->fp;
30207 else if (FLOAT_MODE_P (mode))
30208 *total = rs6000_cost->dmul;
30209 else if (mode == DImode)
30210 *total = rs6000_cost->muldi;
30211 else
30212 *total = rs6000_cost->mulsi;
30213 return false;
30215 case FMA:
30216 if (mode == SFmode)
30217 *total = rs6000_cost->fp;
30218 else
30219 *total = rs6000_cost->dmul;
30220 break;
30222 case DIV:
30223 case MOD:
30224 if (FLOAT_MODE_P (mode))
30226 *total = mode == DFmode ? rs6000_cost->ddiv
30227 : rs6000_cost->sdiv;
30228 return false;
30230 /* FALLTHRU */
30232 case UDIV:
30233 case UMOD:
30234 if (GET_CODE (XEXP (x, 1)) == CONST_INT
30235 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
30237 if (code == DIV || code == MOD)
30238 /* Shift, addze */
30239 *total = COSTS_N_INSNS (2);
30240 else
30241 /* Shift */
30242 *total = COSTS_N_INSNS (1);
30244 else
30246 if (GET_MODE (XEXP (x, 1)) == DImode)
30247 *total = rs6000_cost->divdi;
30248 else
30249 *total = rs6000_cost->divsi;
30251 /* Add in shift and subtract for MOD. */
30252 if (code == MOD || code == UMOD)
30253 *total += COSTS_N_INSNS (2);
30254 return false;
30256 case CTZ:
30257 case FFS:
30258 *total = COSTS_N_INSNS (4);
30259 return false;
30261 case POPCOUNT:
30262 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
30263 return false;
30265 case PARITY:
30266 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
30267 return false;
30269 case NOT:
30270 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
30272 *total = 0;
30273 return false;
30275 /* FALLTHRU */
30277 case AND:
30278 case CLZ:
30279 case IOR:
30280 case XOR:
30281 case ZERO_EXTRACT:
30282 *total = COSTS_N_INSNS (1);
30283 return false;
30285 case ASHIFT:
30286 case ASHIFTRT:
30287 case LSHIFTRT:
30288 case ROTATE:
30289 case ROTATERT:
30290 /* Handle mul_highpart. */
30291 if (outer_code == TRUNCATE
30292 && GET_CODE (XEXP (x, 0)) == MULT)
30294 if (mode == DImode)
30295 *total = rs6000_cost->muldi;
30296 else
30297 *total = rs6000_cost->mulsi;
30298 return true;
30300 else if (outer_code == AND)
30301 *total = 0;
30302 else
30303 *total = COSTS_N_INSNS (1);
30304 return false;
30306 case SIGN_EXTEND:
30307 case ZERO_EXTEND:
30308 if (GET_CODE (XEXP (x, 0)) == MEM)
30309 *total = 0;
30310 else
30311 *total = COSTS_N_INSNS (1);
30312 return false;
30314 case COMPARE:
30315 case NEG:
30316 case ABS:
30317 if (!FLOAT_MODE_P (mode))
30319 *total = COSTS_N_INSNS (1);
30320 return false;
30322 /* FALLTHRU */
30324 case FLOAT:
30325 case UNSIGNED_FLOAT:
30326 case FIX:
30327 case UNSIGNED_FIX:
30328 case FLOAT_TRUNCATE:
30329 *total = rs6000_cost->fp;
30330 return false;
30332 case FLOAT_EXTEND:
30333 if (mode == DFmode)
30334 *total = 0;
30335 else
30336 *total = rs6000_cost->fp;
30337 return false;
30339 case UNSPEC:
30340 switch (XINT (x, 1))
30342 case UNSPEC_FRSP:
30343 *total = rs6000_cost->fp;
30344 return true;
30346 default:
30347 break;
30349 break;
30351 case CALL:
30352 case IF_THEN_ELSE:
30353 if (!speed)
30355 *total = COSTS_N_INSNS (1);
30356 return true;
30358 else if (FLOAT_MODE_P (mode)
30359 && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS)
30361 *total = rs6000_cost->fp;
30362 return false;
30364 break;
30366 case NE:
30367 case EQ:
30368 case GTU:
30369 case LTU:
30370 /* Carry bit requires mode == Pmode.
30371 NEG or PLUS already counted so only add one. */
30372 if (mode == Pmode
30373 && (outer_code == NEG || outer_code == PLUS))
30375 *total = COSTS_N_INSNS (1);
30376 return true;
30378 if (outer_code == SET)
30380 if (XEXP (x, 1) == const0_rtx)
30382 if (TARGET_ISEL && !TARGET_MFCRF)
30383 *total = COSTS_N_INSNS (8);
30384 else
30385 *total = COSTS_N_INSNS (2);
30386 return true;
30388 else if (mode == Pmode)
30390 *total = COSTS_N_INSNS (3);
30391 return false;
30394 /* FALLTHRU */
30396 case GT:
30397 case LT:
30398 case UNORDERED:
30399 if (outer_code == SET && (XEXP (x, 1) == const0_rtx))
30401 if (TARGET_ISEL && !TARGET_MFCRF)
30402 *total = COSTS_N_INSNS (8);
30403 else
30404 *total = COSTS_N_INSNS (2);
30405 return true;
30407 /* CC COMPARE. */
30408 if (outer_code == COMPARE)
30410 *total = 0;
30411 return true;
30413 break;
30415 default:
30416 break;
30419 return false;
30422 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
30424 static bool
30425 rs6000_debug_rtx_costs (rtx x, int code, int outer_code, int opno, int *total,
30426 bool speed)
30428 bool ret = rs6000_rtx_costs (x, code, outer_code, opno, total, speed);
30430 fprintf (stderr,
30431 "\nrs6000_rtx_costs, return = %s, code = %s, outer_code = %s, "
30432 "opno = %d, total = %d, speed = %s, x:\n",
30433 ret ? "complete" : "scan inner",
30434 GET_RTX_NAME (code),
30435 GET_RTX_NAME (outer_code),
30436 opno,
30437 *total,
30438 speed ? "true" : "false");
30440 debug_rtx (x);
30442 return ret;
30445 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
30447 static int
30448 rs6000_debug_address_cost (rtx x, machine_mode mode,
30449 addr_space_t as, bool speed)
30451 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
30453 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
30454 ret, speed ? "true" : "false");
30455 debug_rtx (x);
30457 return ret;
30461 /* A C expression returning the cost of moving data from a register of class
30462 CLASS1 to one of CLASS2. */
30464 static int
30465 rs6000_register_move_cost (machine_mode mode,
30466 reg_class_t from, reg_class_t to)
30468 int ret;
30470 if (TARGET_DEBUG_COST)
30471 dbg_cost_ctrl++;
30473 /* Moves from/to GENERAL_REGS. */
30474 if (reg_classes_intersect_p (to, GENERAL_REGS)
30475 || reg_classes_intersect_p (from, GENERAL_REGS))
30477 reg_class_t rclass = from;
30479 if (! reg_classes_intersect_p (to, GENERAL_REGS))
30480 rclass = to;
30482 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
30483 ret = (rs6000_memory_move_cost (mode, rclass, false)
30484 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
30486 /* It's more expensive to move CR_REGS than CR0_REGS because of the
30487 shift. */
30488 else if (rclass == CR_REGS)
30489 ret = 4;
30491 /* For those processors that have slow LR/CTR moves, make them more
30492 expensive than memory in order to bias spills to memory .*/
30493 else if ((rs6000_cpu == PROCESSOR_POWER6
30494 || rs6000_cpu == PROCESSOR_POWER7
30495 || rs6000_cpu == PROCESSOR_POWER8)
30496 && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
30497 ret = 6 * hard_regno_nregs[0][mode];
30499 else
30500 /* A move will cost one instruction per GPR moved. */
30501 ret = 2 * hard_regno_nregs[0][mode];
30504 /* If we have VSX, we can easily move between FPR or Altivec registers. */
30505 else if (VECTOR_MEM_VSX_P (mode)
30506 && reg_classes_intersect_p (to, VSX_REGS)
30507 && reg_classes_intersect_p (from, VSX_REGS))
30508 ret = 2 * hard_regno_nregs[32][mode];
30510 /* Moving between two similar registers is just one instruction. */
30511 else if (reg_classes_intersect_p (to, from))
30512 ret = (mode == TFmode || mode == TDmode) ? 4 : 2;
30514 /* Everything else has to go through GENERAL_REGS. */
30515 else
30516 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
30517 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
30519 if (TARGET_DEBUG_COST)
30521 if (dbg_cost_ctrl == 1)
30522 fprintf (stderr,
30523 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
30524 ret, GET_MODE_NAME (mode), reg_class_names[from],
30525 reg_class_names[to]);
30526 dbg_cost_ctrl--;
30529 return ret;
30532 /* A C expressions returning the cost of moving data of MODE from a register to
30533 or from memory. */
30535 static int
30536 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
30537 bool in ATTRIBUTE_UNUSED)
30539 int ret;
30541 if (TARGET_DEBUG_COST)
30542 dbg_cost_ctrl++;
30544 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
30545 ret = 4 * hard_regno_nregs[0][mode];
30546 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
30547 || reg_classes_intersect_p (rclass, VSX_REGS)))
30548 ret = 4 * hard_regno_nregs[32][mode];
30549 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
30550 ret = 4 * hard_regno_nregs[FIRST_ALTIVEC_REGNO][mode];
30551 else
30552 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
30554 if (TARGET_DEBUG_COST)
30556 if (dbg_cost_ctrl == 1)
30557 fprintf (stderr,
30558 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
30559 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
30560 dbg_cost_ctrl--;
30563 return ret;
30566 /* Returns a code for a target-specific builtin that implements
30567 reciprocal of the function, or NULL_TREE if not available. */
30569 static tree
30570 rs6000_builtin_reciprocal (unsigned int fn, bool md_fn,
30571 bool sqrt ATTRIBUTE_UNUSED)
30573 if (optimize_insn_for_size_p ())
30574 return NULL_TREE;
30576 if (md_fn)
30577 switch (fn)
30579 case VSX_BUILTIN_XVSQRTDP:
30580 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
30581 return NULL_TREE;
30583 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
30585 case VSX_BUILTIN_XVSQRTSP:
30586 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
30587 return NULL_TREE;
30589 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
30591 default:
30592 return NULL_TREE;
30595 else
30596 switch (fn)
30598 case BUILT_IN_SQRT:
30599 if (!RS6000_RECIP_AUTO_RSQRTE_P (DFmode))
30600 return NULL_TREE;
30602 return rs6000_builtin_decls[RS6000_BUILTIN_RSQRT];
30604 case BUILT_IN_SQRTF:
30605 if (!RS6000_RECIP_AUTO_RSQRTE_P (SFmode))
30606 return NULL_TREE;
30608 return rs6000_builtin_decls[RS6000_BUILTIN_RSQRTF];
30610 default:
30611 return NULL_TREE;
30615 /* Load up a constant. If the mode is a vector mode, splat the value across
30616 all of the vector elements. */
30618 static rtx
30619 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
30621 rtx reg;
30623 if (mode == SFmode || mode == DFmode)
30625 rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, mode);
30626 reg = force_reg (mode, d);
30628 else if (mode == V4SFmode)
30630 rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, SFmode);
30631 rtvec v = gen_rtvec (4, d, d, d, d);
30632 reg = gen_reg_rtx (mode);
30633 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
30635 else if (mode == V2DFmode)
30637 rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, DFmode);
30638 rtvec v = gen_rtvec (2, d, d);
30639 reg = gen_reg_rtx (mode);
30640 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
30642 else
30643 gcc_unreachable ();
30645 return reg;
30648 /* Generate an FMA instruction. */
30650 static void
30651 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
30653 machine_mode mode = GET_MODE (target);
30654 rtx dst;
30656 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
30657 gcc_assert (dst != NULL);
30659 if (dst != target)
30660 emit_move_insn (target, dst);
30663 /* Generate a FMSUB instruction: dst = fma(m1, m2, -a). */
30665 static void
30666 rs6000_emit_msub (rtx target, rtx m1, rtx m2, rtx a)
30668 machine_mode mode = GET_MODE (target);
30669 rtx dst;
30671 /* Altivec does not support fms directly;
30672 generate in terms of fma in that case. */
30673 if (optab_handler (fms_optab, mode) != CODE_FOR_nothing)
30674 dst = expand_ternary_op (mode, fms_optab, m1, m2, a, target, 0);
30675 else
30677 a = expand_unop (mode, neg_optab, a, NULL_RTX, 0);
30678 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
30680 gcc_assert (dst != NULL);
30682 if (dst != target)
30683 emit_move_insn (target, dst);
30686 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
30688 static void
30689 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
30691 machine_mode mode = GET_MODE (dst);
30692 rtx r;
30694 /* This is a tad more complicated, since the fnma_optab is for
30695 a different expression: fma(-m1, m2, a), which is the same
30696 thing except in the case of signed zeros.
30698 Fortunately we know that if FMA is supported that FNMSUB is
30699 also supported in the ISA. Just expand it directly. */
30701 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
30703 r = gen_rtx_NEG (mode, a);
30704 r = gen_rtx_FMA (mode, m1, m2, r);
30705 r = gen_rtx_NEG (mode, r);
30706 emit_insn (gen_rtx_SET (VOIDmode, dst, r));
30709 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
30710 add a reg_note saying that this was a division. Support both scalar and
30711 vector divide. Assumes no trapping math and finite arguments. */
30713 void
30714 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
30716 machine_mode mode = GET_MODE (dst);
30717 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
30718 int i;
30720 /* Low precision estimates guarantee 5 bits of accuracy. High
30721 precision estimates guarantee 14 bits of accuracy. SFmode
30722 requires 23 bits of accuracy. DFmode requires 52 bits of
30723 accuracy. Each pass at least doubles the accuracy, leading
30724 to the following. */
30725 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
30726 if (mode == DFmode || mode == V2DFmode)
30727 passes++;
30729 enum insn_code code = optab_handler (smul_optab, mode);
30730 insn_gen_fn gen_mul = GEN_FCN (code);
30732 gcc_assert (code != CODE_FOR_nothing);
30734 one = rs6000_load_constant_and_splat (mode, dconst1);
30736 /* x0 = 1./d estimate */
30737 x0 = gen_reg_rtx (mode);
30738 emit_insn (gen_rtx_SET (VOIDmode, x0,
30739 gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
30740 UNSPEC_FRES)));
30742 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
30743 if (passes > 1) {
30745 /* e0 = 1. - d * x0 */
30746 e0 = gen_reg_rtx (mode);
30747 rs6000_emit_nmsub (e0, d, x0, one);
30749 /* x1 = x0 + e0 * x0 */
30750 x1 = gen_reg_rtx (mode);
30751 rs6000_emit_madd (x1, e0, x0, x0);
30753 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
30754 ++i, xprev = xnext, eprev = enext) {
30756 /* enext = eprev * eprev */
30757 enext = gen_reg_rtx (mode);
30758 emit_insn (gen_mul (enext, eprev, eprev));
30760 /* xnext = xprev + enext * xprev */
30761 xnext = gen_reg_rtx (mode);
30762 rs6000_emit_madd (xnext, enext, xprev, xprev);
30765 } else
30766 xprev = x0;
30768 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
30770 /* u = n * xprev */
30771 u = gen_reg_rtx (mode);
30772 emit_insn (gen_mul (u, n, xprev));
30774 /* v = n - (d * u) */
30775 v = gen_reg_rtx (mode);
30776 rs6000_emit_nmsub (v, d, u, n);
30778 /* dst = (v * xprev) + u */
30779 rs6000_emit_madd (dst, v, xprev, u);
30781 if (note_p)
30782 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
30785 /* Newton-Raphson approximation of single/double-precision floating point
30786 rsqrt. Assumes no trapping math and finite arguments. */
30788 void
30789 rs6000_emit_swrsqrt (rtx dst, rtx src)
30791 machine_mode mode = GET_MODE (src);
30792 rtx x0 = gen_reg_rtx (mode);
30793 rtx y = gen_reg_rtx (mode);
30795 /* Low precision estimates guarantee 5 bits of accuracy. High
30796 precision estimates guarantee 14 bits of accuracy. SFmode
30797 requires 23 bits of accuracy. DFmode requires 52 bits of
30798 accuracy. Each pass at least doubles the accuracy, leading
30799 to the following. */
30800 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
30801 if (mode == DFmode || mode == V2DFmode)
30802 passes++;
30804 REAL_VALUE_TYPE dconst3_2;
30805 int i;
30806 rtx halfthree;
30807 enum insn_code code = optab_handler (smul_optab, mode);
30808 insn_gen_fn gen_mul = GEN_FCN (code);
30810 gcc_assert (code != CODE_FOR_nothing);
30812 /* Load up the constant 1.5 either as a scalar, or as a vector. */
30813 real_from_integer (&dconst3_2, VOIDmode, 3, SIGNED);
30814 SET_REAL_EXP (&dconst3_2, REAL_EXP (&dconst3_2) - 1);
30816 halfthree = rs6000_load_constant_and_splat (mode, dconst3_2);
30818 /* x0 = rsqrt estimate */
30819 emit_insn (gen_rtx_SET (VOIDmode, x0,
30820 gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
30821 UNSPEC_RSQRT)));
30823 /* y = 0.5 * src = 1.5 * src - src -> fewer constants */
30824 rs6000_emit_msub (y, src, halfthree, src);
30826 for (i = 0; i < passes; i++)
30828 rtx x1 = gen_reg_rtx (mode);
30829 rtx u = gen_reg_rtx (mode);
30830 rtx v = gen_reg_rtx (mode);
30832 /* x1 = x0 * (1.5 - y * (x0 * x0)) */
30833 emit_insn (gen_mul (u, x0, x0));
30834 rs6000_emit_nmsub (v, y, u, halfthree);
30835 emit_insn (gen_mul (x1, x0, v));
30836 x0 = x1;
30839 emit_move_insn (dst, x0);
30840 return;
30843 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
30844 (Power7) targets. DST is the target, and SRC is the argument operand. */
30846 void
30847 rs6000_emit_popcount (rtx dst, rtx src)
30849 machine_mode mode = GET_MODE (dst);
30850 rtx tmp1, tmp2;
30852 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
30853 if (TARGET_POPCNTD)
30855 if (mode == SImode)
30856 emit_insn (gen_popcntdsi2 (dst, src));
30857 else
30858 emit_insn (gen_popcntddi2 (dst, src));
30859 return;
30862 tmp1 = gen_reg_rtx (mode);
30864 if (mode == SImode)
30866 emit_insn (gen_popcntbsi2 (tmp1, src));
30867 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
30868 NULL_RTX, 0);
30869 tmp2 = force_reg (SImode, tmp2);
30870 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
30872 else
30874 emit_insn (gen_popcntbdi2 (tmp1, src));
30875 tmp2 = expand_mult (DImode, tmp1,
30876 GEN_INT ((HOST_WIDE_INT)
30877 0x01010101 << 32 | 0x01010101),
30878 NULL_RTX, 0);
30879 tmp2 = force_reg (DImode, tmp2);
30880 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
30885 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
30886 target, and SRC is the argument operand. */
30888 void
30889 rs6000_emit_parity (rtx dst, rtx src)
30891 machine_mode mode = GET_MODE (dst);
30892 rtx tmp;
30894 tmp = gen_reg_rtx (mode);
30896 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
30897 if (TARGET_CMPB)
30899 if (mode == SImode)
30901 emit_insn (gen_popcntbsi2 (tmp, src));
30902 emit_insn (gen_paritysi2_cmpb (dst, tmp));
30904 else
30906 emit_insn (gen_popcntbdi2 (tmp, src));
30907 emit_insn (gen_paritydi2_cmpb (dst, tmp));
30909 return;
30912 if (mode == SImode)
30914 /* Is mult+shift >= shift+xor+shift+xor? */
30915 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
30917 rtx tmp1, tmp2, tmp3, tmp4;
30919 tmp1 = gen_reg_rtx (SImode);
30920 emit_insn (gen_popcntbsi2 (tmp1, src));
30922 tmp2 = gen_reg_rtx (SImode);
30923 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
30924 tmp3 = gen_reg_rtx (SImode);
30925 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
30927 tmp4 = gen_reg_rtx (SImode);
30928 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
30929 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
30931 else
30932 rs6000_emit_popcount (tmp, src);
30933 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
30935 else
30937 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
30938 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
30940 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
30942 tmp1 = gen_reg_rtx (DImode);
30943 emit_insn (gen_popcntbdi2 (tmp1, src));
30945 tmp2 = gen_reg_rtx (DImode);
30946 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
30947 tmp3 = gen_reg_rtx (DImode);
30948 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
30950 tmp4 = gen_reg_rtx (DImode);
30951 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
30952 tmp5 = gen_reg_rtx (DImode);
30953 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
30955 tmp6 = gen_reg_rtx (DImode);
30956 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
30957 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
30959 else
30960 rs6000_emit_popcount (tmp, src);
30961 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
30965 /* Expand an Altivec constant permutation for little endian mode.
30966 There are two issues: First, the two input operands must be
30967 swapped so that together they form a double-wide array in LE
30968 order. Second, the vperm instruction has surprising behavior
30969 in LE mode: it interprets the elements of the source vectors
30970 in BE mode ("left to right") and interprets the elements of
30971 the destination vector in LE mode ("right to left"). To
30972 correct for this, we must subtract each element of the permute
30973 control vector from 31.
30975 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
30976 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
30977 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
30978 serve as the permute control vector. Then, in BE mode,
30980 vperm 9,10,11,12
30982 places the desired result in vr9. However, in LE mode the
30983 vector contents will be
30985 vr10 = 00000003 00000002 00000001 00000000
30986 vr11 = 00000007 00000006 00000005 00000004
30988 The result of the vperm using the same permute control vector is
30990 vr9 = 05000000 07000000 01000000 03000000
30992 That is, the leftmost 4 bytes of vr10 are interpreted as the
30993 source for the rightmost 4 bytes of vr9, and so on.
30995 If we change the permute control vector to
30997 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
30999 and issue
31001 vperm 9,11,10,12
31003 we get the desired
31005 vr9 = 00000006 00000004 00000002 00000000. */
31007 void
31008 altivec_expand_vec_perm_const_le (rtx operands[4])
31010 unsigned int i;
31011 rtx perm[16];
31012 rtx constv, unspec;
31013 rtx target = operands[0];
31014 rtx op0 = operands[1];
31015 rtx op1 = operands[2];
31016 rtx sel = operands[3];
31018 /* Unpack and adjust the constant selector. */
31019 for (i = 0; i < 16; ++i)
31021 rtx e = XVECEXP (sel, 0, i);
31022 unsigned int elt = 31 - (INTVAL (e) & 31);
31023 perm[i] = GEN_INT (elt);
31026 /* Expand to a permute, swapping the inputs and using the
31027 adjusted selector. */
31028 if (!REG_P (op0))
31029 op0 = force_reg (V16QImode, op0);
31030 if (!REG_P (op1))
31031 op1 = force_reg (V16QImode, op1);
31033 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
31034 constv = force_reg (V16QImode, constv);
31035 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
31036 UNSPEC_VPERM);
31037 if (!REG_P (target))
31039 rtx tmp = gen_reg_rtx (V16QImode);
31040 emit_move_insn (tmp, unspec);
31041 unspec = tmp;
31044 emit_move_insn (target, unspec);
31047 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
31048 permute control vector. But here it's not a constant, so we must
31049 generate a vector NAND or NOR to do the adjustment. */
31051 void
31052 altivec_expand_vec_perm_le (rtx operands[4])
31054 rtx notx, iorx, unspec;
31055 rtx target = operands[0];
31056 rtx op0 = operands[1];
31057 rtx op1 = operands[2];
31058 rtx sel = operands[3];
31059 rtx tmp = target;
31060 rtx norreg = gen_reg_rtx (V16QImode);
31061 machine_mode mode = GET_MODE (target);
31063 /* Get everything in regs so the pattern matches. */
31064 if (!REG_P (op0))
31065 op0 = force_reg (mode, op0);
31066 if (!REG_P (op1))
31067 op1 = force_reg (mode, op1);
31068 if (!REG_P (sel))
31069 sel = force_reg (V16QImode, sel);
31070 if (!REG_P (target))
31071 tmp = gen_reg_rtx (mode);
31073 /* Invert the selector with a VNAND if available, else a VNOR.
31074 The VNAND is preferred for future fusion opportunities. */
31075 notx = gen_rtx_NOT (V16QImode, sel);
31076 iorx = (TARGET_P8_VECTOR
31077 ? gen_rtx_IOR (V16QImode, notx, notx)
31078 : gen_rtx_AND (V16QImode, notx, notx));
31079 emit_insn (gen_rtx_SET (VOIDmode, norreg, iorx));
31081 /* Permute with operands reversed and adjusted selector. */
31082 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
31083 UNSPEC_VPERM);
31085 /* Copy into target, possibly by way of a register. */
31086 if (!REG_P (target))
31088 emit_move_insn (tmp, unspec);
31089 unspec = tmp;
31092 emit_move_insn (target, unspec);
31095 /* Expand an Altivec constant permutation. Return true if we match
31096 an efficient implementation; false to fall back to VPERM. */
31098 bool
31099 altivec_expand_vec_perm_const (rtx operands[4])
31101 struct altivec_perm_insn {
31102 HOST_WIDE_INT mask;
31103 enum insn_code impl;
31104 unsigned char perm[16];
31106 static const struct altivec_perm_insn patterns[] = {
31107 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
31108 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
31109 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
31110 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
31111 { OPTION_MASK_ALTIVEC,
31112 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
31113 : CODE_FOR_altivec_vmrglb_direct),
31114 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
31115 { OPTION_MASK_ALTIVEC,
31116 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
31117 : CODE_FOR_altivec_vmrglh_direct),
31118 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
31119 { OPTION_MASK_ALTIVEC,
31120 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
31121 : CODE_FOR_altivec_vmrglw_direct),
31122 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
31123 { OPTION_MASK_ALTIVEC,
31124 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
31125 : CODE_FOR_altivec_vmrghb_direct),
31126 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
31127 { OPTION_MASK_ALTIVEC,
31128 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
31129 : CODE_FOR_altivec_vmrghh_direct),
31130 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
31131 { OPTION_MASK_ALTIVEC,
31132 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
31133 : CODE_FOR_altivec_vmrghw_direct),
31134 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
31135 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
31136 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
31137 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
31138 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
31141 unsigned int i, j, elt, which;
31142 unsigned char perm[16];
31143 rtx target, op0, op1, sel, x;
31144 bool one_vec;
31146 target = operands[0];
31147 op0 = operands[1];
31148 op1 = operands[2];
31149 sel = operands[3];
31151 /* Unpack the constant selector. */
31152 for (i = which = 0; i < 16; ++i)
31154 rtx e = XVECEXP (sel, 0, i);
31155 elt = INTVAL (e) & 31;
31156 which |= (elt < 16 ? 1 : 2);
31157 perm[i] = elt;
31160 /* Simplify the constant selector based on operands. */
31161 switch (which)
31163 default:
31164 gcc_unreachable ();
31166 case 3:
31167 one_vec = false;
31168 if (!rtx_equal_p (op0, op1))
31169 break;
31170 /* FALLTHRU */
31172 case 2:
31173 for (i = 0; i < 16; ++i)
31174 perm[i] &= 15;
31175 op0 = op1;
31176 one_vec = true;
31177 break;
31179 case 1:
31180 op1 = op0;
31181 one_vec = true;
31182 break;
31185 /* Look for splat patterns. */
31186 if (one_vec)
31188 elt = perm[0];
31190 for (i = 0; i < 16; ++i)
31191 if (perm[i] != elt)
31192 break;
31193 if (i == 16)
31195 if (!BYTES_BIG_ENDIAN)
31196 elt = 15 - elt;
31197 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
31198 return true;
31201 if (elt % 2 == 0)
31203 for (i = 0; i < 16; i += 2)
31204 if (perm[i] != elt || perm[i + 1] != elt + 1)
31205 break;
31206 if (i == 16)
31208 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
31209 x = gen_reg_rtx (V8HImode);
31210 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
31211 GEN_INT (field)));
31212 emit_move_insn (target, gen_lowpart (V16QImode, x));
31213 return true;
31217 if (elt % 4 == 0)
31219 for (i = 0; i < 16; i += 4)
31220 if (perm[i] != elt
31221 || perm[i + 1] != elt + 1
31222 || perm[i + 2] != elt + 2
31223 || perm[i + 3] != elt + 3)
31224 break;
31225 if (i == 16)
31227 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
31228 x = gen_reg_rtx (V4SImode);
31229 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
31230 GEN_INT (field)));
31231 emit_move_insn (target, gen_lowpart (V16QImode, x));
31232 return true;
31237 /* Look for merge and pack patterns. */
31238 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
31240 bool swapped;
31242 if ((patterns[j].mask & rs6000_isa_flags) == 0)
31243 continue;
31245 elt = patterns[j].perm[0];
31246 if (perm[0] == elt)
31247 swapped = false;
31248 else if (perm[0] == elt + 16)
31249 swapped = true;
31250 else
31251 continue;
31252 for (i = 1; i < 16; ++i)
31254 elt = patterns[j].perm[i];
31255 if (swapped)
31256 elt = (elt >= 16 ? elt - 16 : elt + 16);
31257 else if (one_vec && elt >= 16)
31258 elt -= 16;
31259 if (perm[i] != elt)
31260 break;
31262 if (i == 16)
31264 enum insn_code icode = patterns[j].impl;
31265 machine_mode omode = insn_data[icode].operand[0].mode;
31266 machine_mode imode = insn_data[icode].operand[1].mode;
31268 /* For little-endian, don't use vpkuwum and vpkuhum if the
31269 underlying vector type is not V4SI and V8HI, respectively.
31270 For example, using vpkuwum with a V8HI picks up the even
31271 halfwords (BE numbering) when the even halfwords (LE
31272 numbering) are what we need. */
31273 if (!BYTES_BIG_ENDIAN
31274 && icode == CODE_FOR_altivec_vpkuwum_direct
31275 && ((GET_CODE (op0) == REG
31276 && GET_MODE (op0) != V4SImode)
31277 || (GET_CODE (op0) == SUBREG
31278 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
31279 continue;
31280 if (!BYTES_BIG_ENDIAN
31281 && icode == CODE_FOR_altivec_vpkuhum_direct
31282 && ((GET_CODE (op0) == REG
31283 && GET_MODE (op0) != V8HImode)
31284 || (GET_CODE (op0) == SUBREG
31285 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
31286 continue;
31288 /* For little-endian, the two input operands must be swapped
31289 (or swapped back) to ensure proper right-to-left numbering
31290 from 0 to 2N-1. */
31291 if (swapped ^ !BYTES_BIG_ENDIAN)
31292 std::swap (op0, op1);
31293 if (imode != V16QImode)
31295 op0 = gen_lowpart (imode, op0);
31296 op1 = gen_lowpart (imode, op1);
31298 if (omode == V16QImode)
31299 x = target;
31300 else
31301 x = gen_reg_rtx (omode);
31302 emit_insn (GEN_FCN (icode) (x, op0, op1));
31303 if (omode != V16QImode)
31304 emit_move_insn (target, gen_lowpart (V16QImode, x));
31305 return true;
31309 if (!BYTES_BIG_ENDIAN)
31311 altivec_expand_vec_perm_const_le (operands);
31312 return true;
31315 return false;
31318 /* Expand a Paired Single, VSX Permute Doubleword, or SPE constant permutation.
31319 Return true if we match an efficient implementation. */
31321 static bool
31322 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
31323 unsigned char perm0, unsigned char perm1)
31325 rtx x;
31327 /* If both selectors come from the same operand, fold to single op. */
31328 if ((perm0 & 2) == (perm1 & 2))
31330 if (perm0 & 2)
31331 op0 = op1;
31332 else
31333 op1 = op0;
31335 /* If both operands are equal, fold to simpler permutation. */
31336 if (rtx_equal_p (op0, op1))
31338 perm0 = perm0 & 1;
31339 perm1 = (perm1 & 1) + 2;
31341 /* If the first selector comes from the second operand, swap. */
31342 else if (perm0 & 2)
31344 if (perm1 & 2)
31345 return false;
31346 perm0 -= 2;
31347 perm1 += 2;
31348 std::swap (op0, op1);
31350 /* If the second selector does not come from the second operand, fail. */
31351 else if ((perm1 & 2) == 0)
31352 return false;
31354 /* Success! */
31355 if (target != NULL)
31357 machine_mode vmode, dmode;
31358 rtvec v;
31360 vmode = GET_MODE (target);
31361 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
31362 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4);
31363 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
31364 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
31365 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
31366 emit_insn (gen_rtx_SET (VOIDmode, target, x));
31368 return true;
31371 bool
31372 rs6000_expand_vec_perm_const (rtx operands[4])
31374 rtx target, op0, op1, sel;
31375 unsigned char perm0, perm1;
31377 target = operands[0];
31378 op0 = operands[1];
31379 op1 = operands[2];
31380 sel = operands[3];
31382 /* Unpack the constant selector. */
31383 perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3;
31384 perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3;
31386 return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1);
31389 /* Test whether a constant permutation is supported. */
31391 static bool
31392 rs6000_vectorize_vec_perm_const_ok (machine_mode vmode,
31393 const unsigned char *sel)
31395 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
31396 if (TARGET_ALTIVEC)
31397 return true;
31399 /* Check for ps_merge* or evmerge* insns. */
31400 if ((TARGET_PAIRED_FLOAT && vmode == V2SFmode)
31401 || (TARGET_SPE && vmode == V2SImode))
31403 rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
31404 rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
31405 return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]);
31408 return false;
31411 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */
31413 static void
31414 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
31415 machine_mode vmode, unsigned nelt, rtx perm[])
31417 machine_mode imode;
31418 rtx x;
31420 imode = vmode;
31421 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT)
31423 imode = GET_MODE_INNER (vmode);
31424 imode = mode_for_size (GET_MODE_BITSIZE (imode), MODE_INT, 0);
31425 imode = mode_for_vector (imode, nelt);
31428 x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm));
31429 x = expand_vec_perm (vmode, op0, op1, x, target);
31430 if (x != target)
31431 emit_move_insn (target, x);
31434 /* Expand an extract even operation. */
31436 void
31437 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
31439 machine_mode vmode = GET_MODE (target);
31440 unsigned i, nelt = GET_MODE_NUNITS (vmode);
31441 rtx perm[16];
31443 for (i = 0; i < nelt; i++)
31444 perm[i] = GEN_INT (i * 2);
31446 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
31449 /* Expand a vector interleave operation. */
31451 void
31452 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
31454 machine_mode vmode = GET_MODE (target);
31455 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
31456 rtx perm[16];
31458 high = (highp ? 0 : nelt / 2);
31459 for (i = 0; i < nelt / 2; i++)
31461 perm[i * 2] = GEN_INT (i + high);
31462 perm[i * 2 + 1] = GEN_INT (i + nelt + high);
31465 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
31468 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
31469 void
31470 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
31472 HOST_WIDE_INT hwi_scale (scale);
31473 REAL_VALUE_TYPE r_pow;
31474 rtvec v = rtvec_alloc (2);
31475 rtx elt;
31476 rtx scale_vec = gen_reg_rtx (V2DFmode);
31477 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
31478 elt = CONST_DOUBLE_FROM_REAL_VALUE (r_pow, DFmode);
31479 RTVEC_ELT (v, 0) = elt;
31480 RTVEC_ELT (v, 1) = elt;
31481 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
31482 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
31485 /* Return an RTX representing where to find the function value of a
31486 function returning MODE. */
31487 static rtx
31488 rs6000_complex_function_value (machine_mode mode)
31490 unsigned int regno;
31491 rtx r1, r2;
31492 machine_mode inner = GET_MODE_INNER (mode);
31493 unsigned int inner_bytes = GET_MODE_SIZE (inner);
31495 if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
31496 regno = FP_ARG_RETURN;
31497 else
31499 regno = GP_ARG_RETURN;
31501 /* 32-bit is OK since it'll go in r3/r4. */
31502 if (TARGET_32BIT && inner_bytes >= 4)
31503 return gen_rtx_REG (mode, regno);
31506 if (inner_bytes >= 8)
31507 return gen_rtx_REG (mode, regno);
31509 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
31510 const0_rtx);
31511 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
31512 GEN_INT (inner_bytes));
31513 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
31516 /* Target hook for TARGET_FUNCTION_VALUE.
31518 On the SPE, both FPs and vectors are returned in r3.
31520 On RS/6000 an integer value is in r3 and a floating-point value is in
31521 fp1, unless -msoft-float. */
31523 static rtx
31524 rs6000_function_value (const_tree valtype,
31525 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
31526 bool outgoing ATTRIBUTE_UNUSED)
31528 machine_mode mode;
31529 unsigned int regno;
31530 machine_mode elt_mode;
31531 int n_elts;
31533 /* Special handling for structs in darwin64. */
31534 if (TARGET_MACHO
31535 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
31537 CUMULATIVE_ARGS valcum;
31538 rtx valret;
31540 valcum.words = 0;
31541 valcum.fregno = FP_ARG_MIN_REG;
31542 valcum.vregno = ALTIVEC_ARG_MIN_REG;
31543 /* Do a trial code generation as if this were going to be passed as
31544 an argument; if any part goes in memory, we return NULL. */
31545 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
31546 if (valret)
31547 return valret;
31548 /* Otherwise fall through to standard ABI rules. */
31551 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
31552 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (valtype), valtype,
31553 &elt_mode, &n_elts))
31555 int first_reg, n_regs, i;
31556 rtx par;
31558 if (SCALAR_FLOAT_MODE_P (elt_mode))
31560 /* _Decimal128 must use even/odd register pairs. */
31561 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
31562 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
31564 else
31566 first_reg = ALTIVEC_ARG_RETURN;
31567 n_regs = 1;
31570 par = gen_rtx_PARALLEL (TYPE_MODE (valtype), rtvec_alloc (n_elts));
31571 for (i = 0; i < n_elts; i++)
31573 rtx r = gen_rtx_REG (elt_mode, first_reg + i * n_regs);
31574 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
31575 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
31578 return par;
31581 if (TARGET_32BIT && TARGET_POWERPC64 && TYPE_MODE (valtype) == DImode)
31583 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
31584 return gen_rtx_PARALLEL (DImode,
31585 gen_rtvec (2,
31586 gen_rtx_EXPR_LIST (VOIDmode,
31587 gen_rtx_REG (SImode, GP_ARG_RETURN),
31588 const0_rtx),
31589 gen_rtx_EXPR_LIST (VOIDmode,
31590 gen_rtx_REG (SImode,
31591 GP_ARG_RETURN + 1),
31592 GEN_INT (4))));
31594 if (TARGET_32BIT && TARGET_POWERPC64 && TYPE_MODE (valtype) == DCmode)
31596 return gen_rtx_PARALLEL (DCmode,
31597 gen_rtvec (4,
31598 gen_rtx_EXPR_LIST (VOIDmode,
31599 gen_rtx_REG (SImode, GP_ARG_RETURN),
31600 const0_rtx),
31601 gen_rtx_EXPR_LIST (VOIDmode,
31602 gen_rtx_REG (SImode,
31603 GP_ARG_RETURN + 1),
31604 GEN_INT (4)),
31605 gen_rtx_EXPR_LIST (VOIDmode,
31606 gen_rtx_REG (SImode,
31607 GP_ARG_RETURN + 2),
31608 GEN_INT (8)),
31609 gen_rtx_EXPR_LIST (VOIDmode,
31610 gen_rtx_REG (SImode,
31611 GP_ARG_RETURN + 3),
31612 GEN_INT (12))));
31615 mode = TYPE_MODE (valtype);
31616 if ((INTEGRAL_TYPE_P (valtype) && GET_MODE_BITSIZE (mode) < BITS_PER_WORD)
31617 || POINTER_TYPE_P (valtype))
31618 mode = TARGET_32BIT ? SImode : DImode;
31620 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
31621 /* _Decimal128 must use an even/odd register pair. */
31622 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
31623 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT && TARGET_FPRS
31624 && ((TARGET_SINGLE_FLOAT && (mode == SFmode)) || TARGET_DOUBLE_FLOAT))
31625 regno = FP_ARG_RETURN;
31626 else if (TREE_CODE (valtype) == COMPLEX_TYPE
31627 && targetm.calls.split_complex_arg)
31628 return rs6000_complex_function_value (mode);
31629 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
31630 return register is used in both cases, and we won't see V2DImode/V2DFmode
31631 for pure altivec, combine the two cases. */
31632 else if (TREE_CODE (valtype) == VECTOR_TYPE
31633 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
31634 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
31635 regno = ALTIVEC_ARG_RETURN;
31636 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
31637 && (mode == DFmode || mode == DCmode
31638 || mode == TFmode || mode == TCmode))
31639 return spe_build_register_parallel (mode, GP_ARG_RETURN);
31640 else
31641 regno = GP_ARG_RETURN;
31643 return gen_rtx_REG (mode, regno);
31646 /* Define how to find the value returned by a library function
31647 assuming the value has mode MODE. */
31649 rs6000_libcall_value (machine_mode mode)
31651 unsigned int regno;
31653 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
31655 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
31656 return gen_rtx_PARALLEL (DImode,
31657 gen_rtvec (2,
31658 gen_rtx_EXPR_LIST (VOIDmode,
31659 gen_rtx_REG (SImode, GP_ARG_RETURN),
31660 const0_rtx),
31661 gen_rtx_EXPR_LIST (VOIDmode,
31662 gen_rtx_REG (SImode,
31663 GP_ARG_RETURN + 1),
31664 GEN_INT (4))));
31667 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
31668 /* _Decimal128 must use an even/odd register pair. */
31669 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
31670 else if (SCALAR_FLOAT_MODE_P (mode)
31671 && TARGET_HARD_FLOAT && TARGET_FPRS
31672 && ((TARGET_SINGLE_FLOAT && mode == SFmode) || TARGET_DOUBLE_FLOAT))
31673 regno = FP_ARG_RETURN;
31674 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
31675 return register is used in both cases, and we won't see V2DImode/V2DFmode
31676 for pure altivec, combine the two cases. */
31677 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
31678 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
31679 regno = ALTIVEC_ARG_RETURN;
31680 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
31681 return rs6000_complex_function_value (mode);
31682 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
31683 && (mode == DFmode || mode == DCmode
31684 || mode == TFmode || mode == TCmode))
31685 return spe_build_register_parallel (mode, GP_ARG_RETURN);
31686 else
31687 regno = GP_ARG_RETURN;
31689 return gen_rtx_REG (mode, regno);
31693 /* Return true if we use LRA instead of reload pass. */
31694 static bool
31695 rs6000_lra_p (void)
31697 return rs6000_lra_flag;
31700 /* Given FROM and TO register numbers, say whether this elimination is allowed.
31701 Frame pointer elimination is automatically handled.
31703 For the RS/6000, if frame pointer elimination is being done, we would like
31704 to convert ap into fp, not sp.
31706 We need r30 if -mminimal-toc was specified, and there are constant pool
31707 references. */
31709 static bool
31710 rs6000_can_eliminate (const int from, const int to)
31712 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
31713 ? ! frame_pointer_needed
31714 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
31715 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC || get_pool_size () == 0
31716 : true);
31719 /* Define the offset between two registers, FROM to be eliminated and its
31720 replacement TO, at the start of a routine. */
31721 HOST_WIDE_INT
31722 rs6000_initial_elimination_offset (int from, int to)
31724 rs6000_stack_t *info = rs6000_stack_info ();
31725 HOST_WIDE_INT offset;
31727 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
31728 offset = info->push_p ? 0 : -info->total_size;
31729 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
31731 offset = info->push_p ? 0 : -info->total_size;
31732 if (FRAME_GROWS_DOWNWARD)
31733 offset += info->fixed_size + info->vars_size + info->parm_size;
31735 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
31736 offset = FRAME_GROWS_DOWNWARD
31737 ? info->fixed_size + info->vars_size + info->parm_size
31738 : 0;
31739 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
31740 offset = info->total_size;
31741 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
31742 offset = info->push_p ? info->total_size : 0;
31743 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
31744 offset = 0;
31745 else
31746 gcc_unreachable ();
31748 return offset;
31751 static rtx
31752 rs6000_dwarf_register_span (rtx reg)
31754 rtx parts[8];
31755 int i, words;
31756 unsigned regno = REGNO (reg);
31757 machine_mode mode = GET_MODE (reg);
31759 if (TARGET_SPE
31760 && regno < 32
31761 && (SPE_VECTOR_MODE (GET_MODE (reg))
31762 || (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode)
31763 && mode != SFmode && mode != SDmode && mode != SCmode)))
31765 else
31766 return NULL_RTX;
31768 regno = REGNO (reg);
31770 /* The duality of the SPE register size wreaks all kinds of havoc.
31771 This is a way of distinguishing r0 in 32-bits from r0 in
31772 64-bits. */
31773 words = (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD;
31774 gcc_assert (words <= 4);
31775 for (i = 0; i < words; i++, regno++)
31777 if (BYTES_BIG_ENDIAN)
31779 parts[2 * i] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
31780 parts[2 * i + 1] = gen_rtx_REG (SImode, regno);
31782 else
31784 parts[2 * i] = gen_rtx_REG (SImode, regno);
31785 parts[2 * i + 1] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
31789 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (words * 2, parts));
31792 /* Fill in sizes for SPE register high parts in table used by unwinder. */
31794 static void
31795 rs6000_init_dwarf_reg_sizes_extra (tree address)
31797 if (TARGET_SPE)
31799 int i;
31800 machine_mode mode = TYPE_MODE (char_type_node);
31801 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
31802 rtx mem = gen_rtx_MEM (BLKmode, addr);
31803 rtx value = gen_int_mode (4, mode);
31805 for (i = FIRST_SPE_HIGH_REGNO; i < LAST_SPE_HIGH_REGNO+1; i++)
31807 int column = DWARF_REG_TO_UNWIND_COLUMN
31808 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
31809 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
31811 emit_move_insn (adjust_address (mem, mode, offset), value);
31815 if (TARGET_MACHO && ! TARGET_ALTIVEC)
31817 int i;
31818 machine_mode mode = TYPE_MODE (char_type_node);
31819 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
31820 rtx mem = gen_rtx_MEM (BLKmode, addr);
31821 rtx value = gen_int_mode (16, mode);
31823 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
31824 The unwinder still needs to know the size of Altivec registers. */
31826 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
31828 int column = DWARF_REG_TO_UNWIND_COLUMN
31829 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
31830 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
31832 emit_move_insn (adjust_address (mem, mode, offset), value);
31837 /* Map internal gcc register numbers to debug format register numbers.
31838 FORMAT specifies the type of debug register number to use:
31839 0 -- debug information, except for frame-related sections
31840 1 -- DWARF .debug_frame section
31841 2 -- DWARF .eh_frame section */
31843 unsigned int
31844 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
31846 /* We never use the GCC internal number for SPE high registers.
31847 Those are mapped to the 1200..1231 range for all debug formats. */
31848 if (SPE_HIGH_REGNO_P (regno))
31849 return regno - FIRST_SPE_HIGH_REGNO + 1200;
31851 /* Except for the above, we use the internal number for non-DWARF
31852 debug information, and also for .eh_frame. */
31853 if ((format == 0 && write_symbols != DWARF2_DEBUG) || format == 2)
31854 return regno;
31856 /* On some platforms, we use the standard DWARF register
31857 numbering for .debug_info and .debug_frame. */
31858 #ifdef RS6000_USE_DWARF_NUMBERING
31859 if (regno <= 63)
31860 return regno;
31861 if (regno == LR_REGNO)
31862 return 108;
31863 if (regno == CTR_REGNO)
31864 return 109;
31865 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
31866 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
31867 The actual code emitted saves the whole of CR, so we map CR2_REGNO
31868 to the DWARF reg for CR. */
31869 if (format == 1 && regno == CR2_REGNO)
31870 return 64;
31871 if (CR_REGNO_P (regno))
31872 return regno - CR0_REGNO + 86;
31873 if (regno == CA_REGNO)
31874 return 101; /* XER */
31875 if (ALTIVEC_REGNO_P (regno))
31876 return regno - FIRST_ALTIVEC_REGNO + 1124;
31877 if (regno == VRSAVE_REGNO)
31878 return 356;
31879 if (regno == VSCR_REGNO)
31880 return 67;
31881 if (regno == SPE_ACC_REGNO)
31882 return 99;
31883 if (regno == SPEFSCR_REGNO)
31884 return 612;
31885 #endif
31886 return regno;
31889 /* target hook eh_return_filter_mode */
31890 static machine_mode
31891 rs6000_eh_return_filter_mode (void)
31893 return TARGET_32BIT ? SImode : word_mode;
31896 /* Target hook for scalar_mode_supported_p. */
31897 static bool
31898 rs6000_scalar_mode_supported_p (machine_mode mode)
31900 if (DECIMAL_FLOAT_MODE_P (mode))
31901 return default_decimal_float_supported_p ();
31902 else
31903 return default_scalar_mode_supported_p (mode);
31906 /* Target hook for vector_mode_supported_p. */
31907 static bool
31908 rs6000_vector_mode_supported_p (machine_mode mode)
31911 if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (mode))
31912 return true;
31914 if (TARGET_SPE && SPE_VECTOR_MODE (mode))
31915 return true;
31917 else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
31918 return true;
31920 else
31921 return false;
31924 /* Target hook for invalid_arg_for_unprototyped_fn. */
31925 static const char *
31926 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
31928 return (!rs6000_darwin64_abi
31929 && typelist == 0
31930 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
31931 && (funcdecl == NULL_TREE
31932 || (TREE_CODE (funcdecl) == FUNCTION_DECL
31933 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
31934 ? N_("AltiVec argument passed to unprototyped function")
31935 : NULL;
31938 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
31939 setup by using __stack_chk_fail_local hidden function instead of
31940 calling __stack_chk_fail directly. Otherwise it is better to call
31941 __stack_chk_fail directly. */
31943 static tree ATTRIBUTE_UNUSED
31944 rs6000_stack_protect_fail (void)
31946 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
31947 ? default_hidden_stack_protect_fail ()
31948 : default_external_stack_protect_fail ();
31951 void
31952 rs6000_final_prescan_insn (rtx_insn *insn, rtx *operand ATTRIBUTE_UNUSED,
31953 int num_operands ATTRIBUTE_UNUSED)
31955 if (rs6000_warn_cell_microcode)
31957 const char *temp;
31958 int insn_code_number = recog_memoized (insn);
31959 location_t location = INSN_LOCATION (insn);
31961 /* Punt on insns we cannot recognize. */
31962 if (insn_code_number < 0)
31963 return;
31965 temp = get_insn_template (insn_code_number, insn);
31967 if (get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS)
31968 warning_at (location, OPT_mwarn_cell_microcode,
31969 "emitting microcode insn %s\t[%s] #%d",
31970 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
31971 else if (get_attr_cell_micro (insn) == CELL_MICRO_CONDITIONAL)
31972 warning_at (location, OPT_mwarn_cell_microcode,
31973 "emitting conditional microcode insn %s\t[%s] #%d",
31974 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
31978 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
31980 #if TARGET_ELF
31981 static unsigned HOST_WIDE_INT
31982 rs6000_asan_shadow_offset (void)
31984 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
31986 #endif
31988 /* Mask options that we want to support inside of attribute((target)) and
31989 #pragma GCC target operations. Note, we do not include things like
31990 64/32-bit, endianess, hard/soft floating point, etc. that would have
31991 different calling sequences. */
31993 struct rs6000_opt_mask {
31994 const char *name; /* option name */
31995 HOST_WIDE_INT mask; /* mask to set */
31996 bool invert; /* invert sense of mask */
31997 bool valid_target; /* option is a target option */
32000 static struct rs6000_opt_mask const rs6000_opt_masks[] =
32002 { "altivec", OPTION_MASK_ALTIVEC, false, true },
32003 { "cmpb", OPTION_MASK_CMPB, false, true },
32004 { "crypto", OPTION_MASK_CRYPTO, false, true },
32005 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
32006 { "dlmzb", OPTION_MASK_DLMZB, false, true },
32007 { "fprnd", OPTION_MASK_FPRND, false, true },
32008 { "hard-dfp", OPTION_MASK_DFP, false, true },
32009 { "htm", OPTION_MASK_HTM, false, true },
32010 { "isel", OPTION_MASK_ISEL, false, true },
32011 { "mfcrf", OPTION_MASK_MFCRF, false, true },
32012 { "mfpgpr", OPTION_MASK_MFPGPR, false, true },
32013 { "mulhw", OPTION_MASK_MULHW, false, true },
32014 { "multiple", OPTION_MASK_MULTIPLE, false, true },
32015 { "popcntb", OPTION_MASK_POPCNTB, false, true },
32016 { "popcntd", OPTION_MASK_POPCNTD, false, true },
32017 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
32018 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
32019 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
32020 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
32021 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
32022 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
32023 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
32024 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
32025 { "string", OPTION_MASK_STRING, false, true },
32026 { "update", OPTION_MASK_NO_UPDATE, true , true },
32027 { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, false },
32028 { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, false },
32029 { "vsx", OPTION_MASK_VSX, false, true },
32030 { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true },
32031 #ifdef OPTION_MASK_64BIT
32032 #if TARGET_AIX_OS
32033 { "aix64", OPTION_MASK_64BIT, false, false },
32034 { "aix32", OPTION_MASK_64BIT, true, false },
32035 #else
32036 { "64", OPTION_MASK_64BIT, false, false },
32037 { "32", OPTION_MASK_64BIT, true, false },
32038 #endif
32039 #endif
32040 #ifdef OPTION_MASK_EABI
32041 { "eabi", OPTION_MASK_EABI, false, false },
32042 #endif
32043 #ifdef OPTION_MASK_LITTLE_ENDIAN
32044 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
32045 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
32046 #endif
32047 #ifdef OPTION_MASK_RELOCATABLE
32048 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
32049 #endif
32050 #ifdef OPTION_MASK_STRICT_ALIGN
32051 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
32052 #endif
32053 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
32054 { "string", OPTION_MASK_STRING, false, false },
32057 /* Builtin mask mapping for printing the flags. */
32058 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
32060 { "altivec", RS6000_BTM_ALTIVEC, false, false },
32061 { "vsx", RS6000_BTM_VSX, false, false },
32062 { "spe", RS6000_BTM_SPE, false, false },
32063 { "paired", RS6000_BTM_PAIRED, false, false },
32064 { "fre", RS6000_BTM_FRE, false, false },
32065 { "fres", RS6000_BTM_FRES, false, false },
32066 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
32067 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
32068 { "popcntd", RS6000_BTM_POPCNTD, false, false },
32069 { "cell", RS6000_BTM_CELL, false, false },
32070 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
32071 { "crypto", RS6000_BTM_CRYPTO, false, false },
32072 { "htm", RS6000_BTM_HTM, false, false },
32073 { "hard-dfp", RS6000_BTM_DFP, false, false },
32074 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
32075 { "long-double-128", RS6000_BTM_LDBL128, false, false },
32078 /* Option variables that we want to support inside attribute((target)) and
32079 #pragma GCC target operations. */
32081 struct rs6000_opt_var {
32082 const char *name; /* option name */
32083 size_t global_offset; /* offset of the option in global_options. */
32084 size_t target_offset; /* offset of the option in target optiosn. */
32087 static struct rs6000_opt_var const rs6000_opt_vars[] =
32089 { "friz",
32090 offsetof (struct gcc_options, x_TARGET_FRIZ),
32091 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
32092 { "avoid-indexed-addresses",
32093 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
32094 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
32095 { "paired",
32096 offsetof (struct gcc_options, x_rs6000_paired_float),
32097 offsetof (struct cl_target_option, x_rs6000_paired_float), },
32098 { "longcall",
32099 offsetof (struct gcc_options, x_rs6000_default_long_calls),
32100 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
32103 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
32104 parsing. Return true if there were no errors. */
32106 static bool
32107 rs6000_inner_target_options (tree args, bool attr_p)
32109 bool ret = true;
32111 if (args == NULL_TREE)
32114 else if (TREE_CODE (args) == STRING_CST)
32116 char *p = ASTRDUP (TREE_STRING_POINTER (args));
32117 char *q;
32119 while ((q = strtok (p, ",")) != NULL)
32121 bool error_p = false;
32122 bool not_valid_p = false;
32123 const char *cpu_opt = NULL;
32125 p = NULL;
32126 if (strncmp (q, "cpu=", 4) == 0)
32128 int cpu_index = rs6000_cpu_name_lookup (q+4);
32129 if (cpu_index >= 0)
32130 rs6000_cpu_index = cpu_index;
32131 else
32133 error_p = true;
32134 cpu_opt = q+4;
32137 else if (strncmp (q, "tune=", 5) == 0)
32139 int tune_index = rs6000_cpu_name_lookup (q+5);
32140 if (tune_index >= 0)
32141 rs6000_tune_index = tune_index;
32142 else
32144 error_p = true;
32145 cpu_opt = q+5;
32148 else
32150 size_t i;
32151 bool invert = false;
32152 char *r = q;
32154 error_p = true;
32155 if (strncmp (r, "no-", 3) == 0)
32157 invert = true;
32158 r += 3;
32161 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
32162 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
32164 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
32166 if (!rs6000_opt_masks[i].valid_target)
32167 not_valid_p = true;
32168 else
32170 error_p = false;
32171 rs6000_isa_flags_explicit |= mask;
32173 /* VSX needs altivec, so -mvsx automagically sets
32174 altivec. */
32175 if (mask == OPTION_MASK_VSX && !invert)
32176 mask |= OPTION_MASK_ALTIVEC;
32178 if (rs6000_opt_masks[i].invert)
32179 invert = !invert;
32181 if (invert)
32182 rs6000_isa_flags &= ~mask;
32183 else
32184 rs6000_isa_flags |= mask;
32186 break;
32189 if (error_p && !not_valid_p)
32191 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
32192 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
32194 size_t j = rs6000_opt_vars[i].global_offset;
32195 *((int *) ((char *)&global_options + j)) = !invert;
32196 error_p = false;
32197 break;
32202 if (error_p)
32204 const char *eprefix, *esuffix;
32206 ret = false;
32207 if (attr_p)
32209 eprefix = "__attribute__((__target__(";
32210 esuffix = ")))";
32212 else
32214 eprefix = "#pragma GCC target ";
32215 esuffix = "";
32218 if (cpu_opt)
32219 error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt, eprefix,
32220 q, esuffix);
32221 else if (not_valid_p)
32222 error ("%s\"%s\"%s is not allowed", eprefix, q, esuffix);
32223 else
32224 error ("%s\"%s\"%s is invalid", eprefix, q, esuffix);
32229 else if (TREE_CODE (args) == TREE_LIST)
32233 tree value = TREE_VALUE (args);
32234 if (value)
32236 bool ret2 = rs6000_inner_target_options (value, attr_p);
32237 if (!ret2)
32238 ret = false;
32240 args = TREE_CHAIN (args);
32242 while (args != NULL_TREE);
32245 else
32246 gcc_unreachable ();
32248 return ret;
32251 /* Print out the target options as a list for -mdebug=target. */
32253 static void
32254 rs6000_debug_target_options (tree args, const char *prefix)
32256 if (args == NULL_TREE)
32257 fprintf (stderr, "%s<NULL>", prefix);
32259 else if (TREE_CODE (args) == STRING_CST)
32261 char *p = ASTRDUP (TREE_STRING_POINTER (args));
32262 char *q;
32264 while ((q = strtok (p, ",")) != NULL)
32266 p = NULL;
32267 fprintf (stderr, "%s\"%s\"", prefix, q);
32268 prefix = ", ";
32272 else if (TREE_CODE (args) == TREE_LIST)
32276 tree value = TREE_VALUE (args);
32277 if (value)
32279 rs6000_debug_target_options (value, prefix);
32280 prefix = ", ";
32282 args = TREE_CHAIN (args);
32284 while (args != NULL_TREE);
32287 else
32288 gcc_unreachable ();
32290 return;
32294 /* Hook to validate attribute((target("..."))). */
32296 static bool
32297 rs6000_valid_attribute_p (tree fndecl,
32298 tree ARG_UNUSED (name),
32299 tree args,
32300 int flags)
32302 struct cl_target_option cur_target;
32303 bool ret;
32304 tree old_optimize = build_optimization_node (&global_options);
32305 tree new_target, new_optimize;
32306 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
32308 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
32310 if (TARGET_DEBUG_TARGET)
32312 tree tname = DECL_NAME (fndecl);
32313 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
32314 if (tname)
32315 fprintf (stderr, "function: %.*s\n",
32316 (int) IDENTIFIER_LENGTH (tname),
32317 IDENTIFIER_POINTER (tname));
32318 else
32319 fprintf (stderr, "function: unknown\n");
32321 fprintf (stderr, "args:");
32322 rs6000_debug_target_options (args, " ");
32323 fprintf (stderr, "\n");
32325 if (flags)
32326 fprintf (stderr, "flags: 0x%x\n", flags);
32328 fprintf (stderr, "--------------------\n");
32331 old_optimize = build_optimization_node (&global_options);
32332 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
32334 /* If the function changed the optimization levels as well as setting target
32335 options, start with the optimizations specified. */
32336 if (func_optimize && func_optimize != old_optimize)
32337 cl_optimization_restore (&global_options,
32338 TREE_OPTIMIZATION (func_optimize));
32340 /* The target attributes may also change some optimization flags, so update
32341 the optimization options if necessary. */
32342 cl_target_option_save (&cur_target, &global_options);
32343 rs6000_cpu_index = rs6000_tune_index = -1;
32344 ret = rs6000_inner_target_options (args, true);
32346 /* Set up any additional state. */
32347 if (ret)
32349 ret = rs6000_option_override_internal (false);
32350 new_target = build_target_option_node (&global_options);
32352 else
32353 new_target = NULL;
32355 new_optimize = build_optimization_node (&global_options);
32357 if (!new_target)
32358 ret = false;
32360 else if (fndecl)
32362 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
32364 if (old_optimize != new_optimize)
32365 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
32368 cl_target_option_restore (&global_options, &cur_target);
32370 if (old_optimize != new_optimize)
32371 cl_optimization_restore (&global_options,
32372 TREE_OPTIMIZATION (old_optimize));
32374 return ret;
32378 /* Hook to validate the current #pragma GCC target and set the state, and
32379 update the macros based on what was changed. If ARGS is NULL, then
32380 POP_TARGET is used to reset the options. */
32382 bool
32383 rs6000_pragma_target_parse (tree args, tree pop_target)
32385 tree prev_tree = build_target_option_node (&global_options);
32386 tree cur_tree;
32387 struct cl_target_option *prev_opt, *cur_opt;
32388 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
32389 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
32391 if (TARGET_DEBUG_TARGET)
32393 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
32394 fprintf (stderr, "args:");
32395 rs6000_debug_target_options (args, " ");
32396 fprintf (stderr, "\n");
32398 if (pop_target)
32400 fprintf (stderr, "pop_target:\n");
32401 debug_tree (pop_target);
32403 else
32404 fprintf (stderr, "pop_target: <NULL>\n");
32406 fprintf (stderr, "--------------------\n");
32409 if (! args)
32411 cur_tree = ((pop_target)
32412 ? pop_target
32413 : target_option_default_node);
32414 cl_target_option_restore (&global_options,
32415 TREE_TARGET_OPTION (cur_tree));
32417 else
32419 rs6000_cpu_index = rs6000_tune_index = -1;
32420 if (!rs6000_inner_target_options (args, false)
32421 || !rs6000_option_override_internal (false)
32422 || (cur_tree = build_target_option_node (&global_options))
32423 == NULL_TREE)
32425 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
32426 fprintf (stderr, "invalid pragma\n");
32428 return false;
32432 target_option_current_node = cur_tree;
32434 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
32435 change the macros that are defined. */
32436 if (rs6000_target_modify_macros_ptr)
32438 prev_opt = TREE_TARGET_OPTION (prev_tree);
32439 prev_bumask = prev_opt->x_rs6000_builtin_mask;
32440 prev_flags = prev_opt->x_rs6000_isa_flags;
32442 cur_opt = TREE_TARGET_OPTION (cur_tree);
32443 cur_flags = cur_opt->x_rs6000_isa_flags;
32444 cur_bumask = cur_opt->x_rs6000_builtin_mask;
32446 diff_bumask = (prev_bumask ^ cur_bumask);
32447 diff_flags = (prev_flags ^ cur_flags);
32449 if ((diff_flags != 0) || (diff_bumask != 0))
32451 /* Delete old macros. */
32452 rs6000_target_modify_macros_ptr (false,
32453 prev_flags & diff_flags,
32454 prev_bumask & diff_bumask);
32456 /* Define new macros. */
32457 rs6000_target_modify_macros_ptr (true,
32458 cur_flags & diff_flags,
32459 cur_bumask & diff_bumask);
32463 return true;
32467 /* Remember the last target of rs6000_set_current_function. */
32468 static GTY(()) tree rs6000_previous_fndecl;
32470 /* Establish appropriate back-end context for processing the function
32471 FNDECL. The argument might be NULL to indicate processing at top
32472 level, outside of any function scope. */
32473 static void
32474 rs6000_set_current_function (tree fndecl)
32476 tree old_tree = (rs6000_previous_fndecl
32477 ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl)
32478 : NULL_TREE);
32480 tree new_tree = (fndecl
32481 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
32482 : NULL_TREE);
32484 if (TARGET_DEBUG_TARGET)
32486 bool print_final = false;
32487 fprintf (stderr, "\n==================== rs6000_set_current_function");
32489 if (fndecl)
32490 fprintf (stderr, ", fndecl %s (%p)",
32491 (DECL_NAME (fndecl)
32492 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
32493 : "<unknown>"), (void *)fndecl);
32495 if (rs6000_previous_fndecl)
32496 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
32498 fprintf (stderr, "\n");
32499 if (new_tree)
32501 fprintf (stderr, "\nnew fndecl target specific options:\n");
32502 debug_tree (new_tree);
32503 print_final = true;
32506 if (old_tree)
32508 fprintf (stderr, "\nold fndecl target specific options:\n");
32509 debug_tree (old_tree);
32510 print_final = true;
32513 if (print_final)
32514 fprintf (stderr, "--------------------\n");
32517 /* Only change the context if the function changes. This hook is called
32518 several times in the course of compiling a function, and we don't want to
32519 slow things down too much or call target_reinit when it isn't safe. */
32520 if (fndecl && fndecl != rs6000_previous_fndecl)
32522 rs6000_previous_fndecl = fndecl;
32523 if (old_tree == new_tree)
32526 else if (new_tree)
32528 cl_target_option_restore (&global_options,
32529 TREE_TARGET_OPTION (new_tree));
32530 if (TREE_TARGET_GLOBALS (new_tree))
32531 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
32532 else
32533 TREE_TARGET_GLOBALS (new_tree)
32534 = save_target_globals_default_opts ();
32537 else if (old_tree)
32539 new_tree = target_option_current_node;
32540 cl_target_option_restore (&global_options,
32541 TREE_TARGET_OPTION (new_tree));
32542 if (TREE_TARGET_GLOBALS (new_tree))
32543 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
32544 else if (new_tree == target_option_default_node)
32545 restore_target_globals (&default_target_globals);
32546 else
32547 TREE_TARGET_GLOBALS (new_tree)
32548 = save_target_globals_default_opts ();
32554 /* Save the current options */
32556 static void
32557 rs6000_function_specific_save (struct cl_target_option *ptr,
32558 struct gcc_options *opts)
32560 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
32561 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
32564 /* Restore the current options */
32566 static void
32567 rs6000_function_specific_restore (struct gcc_options *opts,
32568 struct cl_target_option *ptr)
32571 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
32572 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
32573 (void) rs6000_option_override_internal (false);
32576 /* Print the current options */
32578 static void
32579 rs6000_function_specific_print (FILE *file, int indent,
32580 struct cl_target_option *ptr)
32582 rs6000_print_isa_options (file, indent, "Isa options set",
32583 ptr->x_rs6000_isa_flags);
32585 rs6000_print_isa_options (file, indent, "Isa options explicit",
32586 ptr->x_rs6000_isa_flags_explicit);
32589 /* Helper function to print the current isa or misc options on a line. */
32591 static void
32592 rs6000_print_options_internal (FILE *file,
32593 int indent,
32594 const char *string,
32595 HOST_WIDE_INT flags,
32596 const char *prefix,
32597 const struct rs6000_opt_mask *opts,
32598 size_t num_elements)
32600 size_t i;
32601 size_t start_column = 0;
32602 size_t cur_column;
32603 size_t max_column = 76;
32604 const char *comma = "";
32606 if (indent)
32607 start_column += fprintf (file, "%*s", indent, "");
32609 if (!flags)
32611 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
32612 return;
32615 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
32617 /* Print the various mask options. */
32618 cur_column = start_column;
32619 for (i = 0; i < num_elements; i++)
32621 if ((flags & opts[i].mask) != 0)
32623 const char *no_str = rs6000_opt_masks[i].invert ? "no-" : "";
32624 size_t len = (strlen (comma)
32625 + strlen (prefix)
32626 + strlen (no_str)
32627 + strlen (rs6000_opt_masks[i].name));
32629 cur_column += len;
32630 if (cur_column > max_column)
32632 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
32633 cur_column = start_column + len;
32634 comma = "";
32637 fprintf (file, "%s%s%s%s", comma, prefix, no_str,
32638 rs6000_opt_masks[i].name);
32639 flags &= ~ opts[i].mask;
32640 comma = ", ";
32644 fputs ("\n", file);
32647 /* Helper function to print the current isa options on a line. */
32649 static void
32650 rs6000_print_isa_options (FILE *file, int indent, const char *string,
32651 HOST_WIDE_INT flags)
32653 rs6000_print_options_internal (file, indent, string, flags, "-m",
32654 &rs6000_opt_masks[0],
32655 ARRAY_SIZE (rs6000_opt_masks));
32658 static void
32659 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
32660 HOST_WIDE_INT flags)
32662 rs6000_print_options_internal (file, indent, string, flags, "",
32663 &rs6000_builtin_mask_names[0],
32664 ARRAY_SIZE (rs6000_builtin_mask_names));
32668 /* Hook to determine if one function can safely inline another. */
32670 static bool
32671 rs6000_can_inline_p (tree caller, tree callee)
32673 bool ret = false;
32674 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
32675 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
32677 /* If callee has no option attributes, then it is ok to inline. */
32678 if (!callee_tree)
32679 ret = true;
32681 /* If caller has no option attributes, but callee does then it is not ok to
32682 inline. */
32683 else if (!caller_tree)
32684 ret = false;
32686 else
32688 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
32689 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
32691 /* Callee's options should a subset of the caller's, i.e. a vsx function
32692 can inline an altivec function but a non-vsx function can't inline a
32693 vsx function. */
32694 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
32695 == callee_opts->x_rs6000_isa_flags)
32696 ret = true;
32699 if (TARGET_DEBUG_TARGET)
32700 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
32701 (DECL_NAME (caller)
32702 ? IDENTIFIER_POINTER (DECL_NAME (caller))
32703 : "<unknown>"),
32704 (DECL_NAME (callee)
32705 ? IDENTIFIER_POINTER (DECL_NAME (callee))
32706 : "<unknown>"),
32707 (ret ? "can" : "cannot"));
32709 return ret;
32712 /* Allocate a stack temp and fixup the address so it meets the particular
32713 memory requirements (either offetable or REG+REG addressing). */
32716 rs6000_allocate_stack_temp (machine_mode mode,
32717 bool offsettable_p,
32718 bool reg_reg_p)
32720 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
32721 rtx addr = XEXP (stack, 0);
32722 int strict_p = (reload_in_progress || reload_completed);
32724 if (!legitimate_indirect_address_p (addr, strict_p))
32726 if (offsettable_p
32727 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
32728 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
32730 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
32731 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
32734 return stack;
32737 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
32738 to such a form to deal with memory reference instructions like STFIWX that
32739 only take reg+reg addressing. */
32742 rs6000_address_for_fpconvert (rtx x)
32744 int strict_p = (reload_in_progress || reload_completed);
32745 rtx addr;
32747 gcc_assert (MEM_P (x));
32748 addr = XEXP (x, 0);
32749 if (! legitimate_indirect_address_p (addr, strict_p)
32750 && ! legitimate_indexed_address_p (addr, strict_p))
32752 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
32754 rtx reg = XEXP (addr, 0);
32755 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
32756 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
32757 gcc_assert (REG_P (reg));
32758 emit_insn (gen_add3_insn (reg, reg, size_rtx));
32759 addr = reg;
32761 else if (GET_CODE (addr) == PRE_MODIFY)
32763 rtx reg = XEXP (addr, 0);
32764 rtx expr = XEXP (addr, 1);
32765 gcc_assert (REG_P (reg));
32766 gcc_assert (GET_CODE (expr) == PLUS);
32767 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
32768 addr = reg;
32771 x = replace_equiv_address (x, copy_addr_to_reg (addr));
32774 return x;
32777 /* Given a memory reference, if it is not in the form for altivec memory
32778 reference instructions (i.e. reg or reg+reg addressing with AND of -16),
32779 convert to the altivec format. */
32782 rs6000_address_for_altivec (rtx x)
32784 gcc_assert (MEM_P (x));
32785 if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x)))
32787 rtx addr = XEXP (x, 0);
32788 int strict_p = (reload_in_progress || reload_completed);
32790 if (!legitimate_indexed_address_p (addr, strict_p)
32791 && !legitimate_indirect_address_p (addr, strict_p))
32792 addr = copy_to_mode_reg (Pmode, addr);
32794 addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
32795 x = change_address (x, GET_MODE (x), addr);
32798 return x;
32801 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
32803 On the RS/6000, all integer constants are acceptable, most won't be valid
32804 for particular insns, though. Only easy FP constants are acceptable. */
32806 static bool
32807 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
32809 if (TARGET_ELF && tls_referenced_p (x))
32810 return false;
32812 return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR)
32813 || GET_MODE (x) == VOIDmode
32814 || (TARGET_POWERPC64 && mode == DImode)
32815 || easy_fp_constant (x, mode)
32816 || easy_vector_constant (x, mode));
32821 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
32823 void
32824 rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
32826 const bool direct_call_p
32827 = GET_CODE (func_desc) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (func_desc);
32828 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
32829 rtx toc_load = NULL_RTX;
32830 rtx toc_restore = NULL_RTX;
32831 rtx func_addr;
32832 rtx abi_reg = NULL_RTX;
32833 rtx call[4];
32834 int n_call;
32835 rtx insn;
32837 /* Handle longcall attributes. */
32838 if (INTVAL (cookie) & CALL_LONG)
32839 func_desc = rs6000_longcall_ref (func_desc);
32841 /* Handle indirect calls. */
32842 if (GET_CODE (func_desc) != SYMBOL_REF
32843 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
32845 /* Save the TOC into its reserved slot before the call,
32846 and prepare to restore it after the call. */
32847 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
32848 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
32849 rtx stack_toc_mem = gen_frame_mem (Pmode,
32850 gen_rtx_PLUS (Pmode, stack_ptr,
32851 stack_toc_offset));
32852 toc_restore = gen_rtx_SET (VOIDmode, toc_reg, stack_toc_mem);
32854 /* Can we optimize saving the TOC in the prologue or
32855 do we need to do it at every call? */
32856 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
32857 cfun->machine->save_toc_in_prologue = true;
32858 else
32860 MEM_VOLATILE_P (stack_toc_mem) = 1;
32861 emit_move_insn (stack_toc_mem, toc_reg);
32864 if (DEFAULT_ABI == ABI_ELFv2)
32866 /* A function pointer in the ELFv2 ABI is just a plain address, but
32867 the ABI requires it to be loaded into r12 before the call. */
32868 func_addr = gen_rtx_REG (Pmode, 12);
32869 emit_move_insn (func_addr, func_desc);
32870 abi_reg = func_addr;
32872 else
32874 /* A function pointer under AIX is a pointer to a data area whose
32875 first word contains the actual address of the function, whose
32876 second word contains a pointer to its TOC, and whose third word
32877 contains a value to place in the static chain register (r11).
32878 Note that if we load the static chain, our "trampoline" need
32879 not have any executable code. */
32881 /* Load up address of the actual function. */
32882 func_desc = force_reg (Pmode, func_desc);
32883 func_addr = gen_reg_rtx (Pmode);
32884 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
32886 /* Prepare to load the TOC of the called function. Note that the
32887 TOC load must happen immediately before the actual call so
32888 that unwinding the TOC registers works correctly. See the
32889 comment in frob_update_context. */
32890 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
32891 rtx func_toc_mem = gen_rtx_MEM (Pmode,
32892 gen_rtx_PLUS (Pmode, func_desc,
32893 func_toc_offset));
32894 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
32896 /* If we have a static chain, load it up. But, if the call was
32897 originally direct, the 3rd word has not been written since no
32898 trampoline has been built, so we ought not to load it, lest we
32899 override a static chain value. */
32900 if (!direct_call_p && TARGET_POINTERS_TO_NESTED_FUNCTIONS)
32902 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
32903 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
32904 rtx func_sc_mem = gen_rtx_MEM (Pmode,
32905 gen_rtx_PLUS (Pmode, func_desc,
32906 func_sc_offset));
32907 emit_move_insn (sc_reg, func_sc_mem);
32908 abi_reg = sc_reg;
32912 else
32914 /* Direct calls use the TOC: for local calls, the callee will
32915 assume the TOC register is set; for non-local calls, the
32916 PLT stub needs the TOC register. */
32917 abi_reg = toc_reg;
32918 func_addr = func_desc;
32921 /* Create the call. */
32922 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
32923 if (value != NULL_RTX)
32924 call[0] = gen_rtx_SET (VOIDmode, value, call[0]);
32925 n_call = 1;
32927 if (toc_load)
32928 call[n_call++] = toc_load;
32929 if (toc_restore)
32930 call[n_call++] = toc_restore;
32932 call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
32934 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
32935 insn = emit_call_insn (insn);
32937 /* Mention all registers defined by the ABI to hold information
32938 as uses in CALL_INSN_FUNCTION_USAGE. */
32939 if (abi_reg)
32940 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
32943 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
32945 void
32946 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
32948 rtx call[2];
32949 rtx insn;
32951 gcc_assert (INTVAL (cookie) == 0);
32953 /* Create the call. */
32954 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
32955 if (value != NULL_RTX)
32956 call[0] = gen_rtx_SET (VOIDmode, value, call[0]);
32958 call[1] = simple_return_rtx;
32960 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
32961 insn = emit_call_insn (insn);
32963 /* Note use of the TOC register. */
32964 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
32965 /* We need to also mark a use of the link register since the function we
32966 sibling-call to will use it to return to our caller. */
32967 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, LR_REGNO));
32970 /* Return whether we need to always update the saved TOC pointer when we update
32971 the stack pointer. */
32973 static bool
32974 rs6000_save_toc_in_prologue_p (void)
32976 return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue);
32979 #ifdef HAVE_GAS_HIDDEN
32980 # define USE_HIDDEN_LINKONCE 1
32981 #else
32982 # define USE_HIDDEN_LINKONCE 0
32983 #endif
32985 /* Fills in the label name that should be used for a 476 link stack thunk. */
32987 void
32988 get_ppc476_thunk_name (char name[32])
32990 gcc_assert (TARGET_LINK_STACK);
32992 if (USE_HIDDEN_LINKONCE)
32993 sprintf (name, "__ppc476.get_thunk");
32994 else
32995 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
32998 /* This function emits the simple thunk routine that is used to preserve
32999 the link stack on the 476 cpu. */
33001 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
33002 static void
33003 rs6000_code_end (void)
33005 char name[32];
33006 tree decl;
33008 if (!TARGET_LINK_STACK)
33009 return;
33011 get_ppc476_thunk_name (name);
33013 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
33014 build_function_type_list (void_type_node, NULL_TREE));
33015 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
33016 NULL_TREE, void_type_node);
33017 TREE_PUBLIC (decl) = 1;
33018 TREE_STATIC (decl) = 1;
33020 #if RS6000_WEAK
33021 if (USE_HIDDEN_LINKONCE)
33023 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
33024 targetm.asm_out.unique_section (decl, 0);
33025 switch_to_section (get_named_section (decl, NULL, 0));
33026 DECL_WEAK (decl) = 1;
33027 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
33028 targetm.asm_out.globalize_label (asm_out_file, name);
33029 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
33030 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
33032 else
33033 #endif
33035 switch_to_section (text_section);
33036 ASM_OUTPUT_LABEL (asm_out_file, name);
33039 DECL_INITIAL (decl) = make_node (BLOCK);
33040 current_function_decl = decl;
33041 init_function_start (decl);
33042 first_function_block_is_cold = false;
33043 /* Make sure unwind info is emitted for the thunk if needed. */
33044 final_start_function (emit_barrier (), asm_out_file, 1);
33046 fputs ("\tblr\n", asm_out_file);
33048 final_end_function ();
33049 init_insn_lengths ();
33050 free_after_compilation (cfun);
33051 set_cfun (NULL);
33052 current_function_decl = NULL;
33055 /* Add r30 to hard reg set if the prologue sets it up and it is not
33056 pic_offset_table_rtx. */
33058 static void
33059 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
33061 if (!TARGET_SINGLE_PIC_BASE
33062 && TARGET_TOC
33063 && TARGET_MINIMAL_TOC
33064 && get_pool_size () != 0)
33065 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
33069 /* Helper function for rs6000_split_logical to emit a logical instruction after
33070 spliting the operation to single GPR registers.
33072 DEST is the destination register.
33073 OP1 and OP2 are the input source registers.
33074 CODE is the base operation (AND, IOR, XOR, NOT).
33075 MODE is the machine mode.
33076 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
33077 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
33078 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
33080 static void
33081 rs6000_split_logical_inner (rtx dest,
33082 rtx op1,
33083 rtx op2,
33084 enum rtx_code code,
33085 machine_mode mode,
33086 bool complement_final_p,
33087 bool complement_op1_p,
33088 bool complement_op2_p)
33090 rtx bool_rtx;
33092 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
33093 if (op2 && GET_CODE (op2) == CONST_INT
33094 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
33095 && !complement_final_p && !complement_op1_p && !complement_op2_p)
33097 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
33098 HOST_WIDE_INT value = INTVAL (op2) & mask;
33100 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
33101 if (code == AND)
33103 if (value == 0)
33105 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
33106 return;
33109 else if (value == mask)
33111 if (!rtx_equal_p (dest, op1))
33112 emit_insn (gen_rtx_SET (VOIDmode, dest, op1));
33113 return;
33117 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
33118 into separate ORI/ORIS or XORI/XORIS instrucitons. */
33119 else if (code == IOR || code == XOR)
33121 if (value == 0)
33123 if (!rtx_equal_p (dest, op1))
33124 emit_insn (gen_rtx_SET (VOIDmode, dest, op1));
33125 return;
33130 if (code == AND && mode == SImode
33131 && !complement_final_p && !complement_op1_p && !complement_op2_p)
33133 emit_insn (gen_andsi3 (dest, op1, op2));
33134 return;
33137 if (complement_op1_p)
33138 op1 = gen_rtx_NOT (mode, op1);
33140 if (complement_op2_p)
33141 op2 = gen_rtx_NOT (mode, op2);
33143 bool_rtx = ((code == NOT)
33144 ? gen_rtx_NOT (mode, op1)
33145 : gen_rtx_fmt_ee (code, mode, op1, op2));
33147 if (complement_final_p)
33148 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
33150 emit_insn (gen_rtx_SET (VOIDmode, dest, bool_rtx));
33153 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
33154 operations are split immediately during RTL generation to allow for more
33155 optimizations of the AND/IOR/XOR.
33157 OPERANDS is an array containing the destination and two input operands.
33158 CODE is the base operation (AND, IOR, XOR, NOT).
33159 MODE is the machine mode.
33160 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
33161 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
33162 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
33163 CLOBBER_REG is either NULL or a scratch register of type CC to allow
33164 formation of the AND instructions. */
33166 static void
33167 rs6000_split_logical_di (rtx operands[3],
33168 enum rtx_code code,
33169 bool complement_final_p,
33170 bool complement_op1_p,
33171 bool complement_op2_p)
33173 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
33174 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
33175 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
33176 enum hi_lo { hi = 0, lo = 1 };
33177 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
33178 size_t i;
33180 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
33181 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
33182 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
33183 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
33185 if (code == NOT)
33186 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
33187 else
33189 if (GET_CODE (operands[2]) != CONST_INT)
33191 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
33192 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
33194 else
33196 HOST_WIDE_INT value = INTVAL (operands[2]);
33197 HOST_WIDE_INT value_hi_lo[2];
33199 gcc_assert (!complement_final_p);
33200 gcc_assert (!complement_op1_p);
33201 gcc_assert (!complement_op2_p);
33203 value_hi_lo[hi] = value >> 32;
33204 value_hi_lo[lo] = value & lower_32bits;
33206 for (i = 0; i < 2; i++)
33208 HOST_WIDE_INT sub_value = value_hi_lo[i];
33210 if (sub_value & sign_bit)
33211 sub_value |= upper_32bits;
33213 op2_hi_lo[i] = GEN_INT (sub_value);
33215 /* If this is an AND instruction, check to see if we need to load
33216 the value in a register. */
33217 if (code == AND && sub_value != -1 && sub_value != 0
33218 && !and_operand (op2_hi_lo[i], SImode))
33219 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
33224 for (i = 0; i < 2; i++)
33226 /* Split large IOR/XOR operations. */
33227 if ((code == IOR || code == XOR)
33228 && GET_CODE (op2_hi_lo[i]) == CONST_INT
33229 && !complement_final_p
33230 && !complement_op1_p
33231 && !complement_op2_p
33232 && !logical_const_operand (op2_hi_lo[i], SImode))
33234 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
33235 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
33236 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
33237 rtx tmp = gen_reg_rtx (SImode);
33239 /* Make sure the constant is sign extended. */
33240 if ((hi_16bits & sign_bit) != 0)
33241 hi_16bits |= upper_32bits;
33243 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
33244 code, SImode, false, false, false);
33246 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
33247 code, SImode, false, false, false);
33249 else
33250 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
33251 code, SImode, complement_final_p,
33252 complement_op1_p, complement_op2_p);
33255 return;
33258 /* Split the insns that make up boolean operations operating on multiple GPR
33259 registers. The boolean MD patterns ensure that the inputs either are
33260 exactly the same as the output registers, or there is no overlap.
33262 OPERANDS is an array containing the destination and two input operands.
33263 CODE is the base operation (AND, IOR, XOR, NOT).
33264 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
33265 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
33266 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
33268 void
33269 rs6000_split_logical (rtx operands[3],
33270 enum rtx_code code,
33271 bool complement_final_p,
33272 bool complement_op1_p,
33273 bool complement_op2_p)
33275 machine_mode mode = GET_MODE (operands[0]);
33276 machine_mode sub_mode;
33277 rtx op0, op1, op2;
33278 int sub_size, regno0, regno1, nregs, i;
33280 /* If this is DImode, use the specialized version that can run before
33281 register allocation. */
33282 if (mode == DImode && !TARGET_POWERPC64)
33284 rs6000_split_logical_di (operands, code, complement_final_p,
33285 complement_op1_p, complement_op2_p);
33286 return;
33289 op0 = operands[0];
33290 op1 = operands[1];
33291 op2 = (code == NOT) ? NULL_RTX : operands[2];
33292 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
33293 sub_size = GET_MODE_SIZE (sub_mode);
33294 regno0 = REGNO (op0);
33295 regno1 = REGNO (op1);
33297 gcc_assert (reload_completed);
33298 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
33299 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
33301 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
33302 gcc_assert (nregs > 1);
33304 if (op2 && REG_P (op2))
33305 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
33307 for (i = 0; i < nregs; i++)
33309 int offset = i * sub_size;
33310 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
33311 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
33312 rtx sub_op2 = ((code == NOT)
33313 ? NULL_RTX
33314 : simplify_subreg (sub_mode, op2, mode, offset));
33316 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
33317 complement_final_p, complement_op1_p,
33318 complement_op2_p);
33321 return;
33325 /* Return true if the peephole2 can combine a load involving a combination of
33326 an addis instruction and a load with an offset that can be fused together on
33327 a power8. */
33329 bool
33330 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
33331 rtx addis_value, /* addis value. */
33332 rtx target, /* target register that is loaded. */
33333 rtx mem) /* bottom part of the memory addr. */
33335 rtx addr;
33336 rtx base_reg;
33338 /* Validate arguments. */
33339 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
33340 return false;
33342 if (!base_reg_operand (target, GET_MODE (target)))
33343 return false;
33345 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
33346 return false;
33348 /* Allow sign/zero extension. */
33349 if (GET_CODE (mem) == ZERO_EXTEND
33350 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
33351 mem = XEXP (mem, 0);
33353 if (!MEM_P (mem))
33354 return false;
33356 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
33357 return false;
33359 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
33360 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
33361 return false;
33363 /* Validate that the register used to load the high value is either the
33364 register being loaded, or we can safely replace its use.
33366 This function is only called from the peephole2 pass and we assume that
33367 there are 2 instructions in the peephole (addis and load), so we want to
33368 check if the target register was not used in the memory address and the
33369 register to hold the addis result is dead after the peephole. */
33370 if (REGNO (addis_reg) != REGNO (target))
33372 if (reg_mentioned_p (target, mem))
33373 return false;
33375 if (!peep2_reg_dead_p (2, addis_reg))
33376 return false;
33378 /* If the target register being loaded is the stack pointer, we must
33379 avoid loading any other value into it, even temporarily. */
33380 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
33381 return false;
33384 base_reg = XEXP (addr, 0);
33385 return REGNO (addis_reg) == REGNO (base_reg);
33388 /* During the peephole2 pass, adjust and expand the insns for a load fusion
33389 sequence. We adjust the addis register to use the target register. If the
33390 load sign extends, we adjust the code to do the zero extending load, and an
33391 explicit sign extension later since the fusion only covers zero extending
33392 loads.
33394 The operands are:
33395 operands[0] register set with addis (to be replaced with target)
33396 operands[1] value set via addis
33397 operands[2] target register being loaded
33398 operands[3] D-form memory reference using operands[0]. */
33400 void
33401 expand_fusion_gpr_load (rtx *operands)
33403 rtx addis_value = operands[1];
33404 rtx target = operands[2];
33405 rtx orig_mem = operands[3];
33406 rtx new_addr, new_mem, orig_addr, offset;
33407 enum rtx_code plus_or_lo_sum;
33408 machine_mode target_mode = GET_MODE (target);
33409 machine_mode extend_mode = target_mode;
33410 machine_mode ptr_mode = Pmode;
33411 enum rtx_code extend = UNKNOWN;
33413 if (GET_CODE (orig_mem) == ZERO_EXTEND
33414 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
33416 extend = GET_CODE (orig_mem);
33417 orig_mem = XEXP (orig_mem, 0);
33418 target_mode = GET_MODE (orig_mem);
33421 gcc_assert (MEM_P (orig_mem));
33423 orig_addr = XEXP (orig_mem, 0);
33424 plus_or_lo_sum = GET_CODE (orig_addr);
33425 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
33427 offset = XEXP (orig_addr, 1);
33428 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
33429 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
33431 if (extend != UNKNOWN)
33432 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
33434 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
33435 UNSPEC_FUSION_GPR);
33436 emit_insn (gen_rtx_SET (VOIDmode, target, new_mem));
33438 if (extend == SIGN_EXTEND)
33440 int sub_off = ((BYTES_BIG_ENDIAN)
33441 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
33442 : 0);
33443 rtx sign_reg
33444 = simplify_subreg (target_mode, target, extend_mode, sub_off);
33446 emit_insn (gen_rtx_SET (VOIDmode, target,
33447 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
33450 return;
33453 /* Return a string to fuse an addis instruction with a gpr load to the same
33454 register that we loaded up the addis instruction. The address that is used
33455 is the logical address that was formed during peephole2:
33456 (lo_sum (high) (low-part))
33458 The code is complicated, so we call output_asm_insn directly, and just
33459 return "". */
33461 const char *
33462 emit_fusion_gpr_load (rtx target, rtx mem)
33464 rtx addis_value;
33465 rtx fuse_ops[10];
33466 rtx addr;
33467 rtx load_offset;
33468 const char *addis_str = NULL;
33469 const char *load_str = NULL;
33470 const char *mode_name = NULL;
33471 char insn_template[80];
33472 machine_mode mode;
33473 const char *comment_str = ASM_COMMENT_START;
33475 if (GET_CODE (mem) == ZERO_EXTEND)
33476 mem = XEXP (mem, 0);
33478 gcc_assert (REG_P (target) && MEM_P (mem));
33480 if (*comment_str == ' ')
33481 comment_str++;
33483 addr = XEXP (mem, 0);
33484 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
33485 gcc_unreachable ();
33487 addis_value = XEXP (addr, 0);
33488 load_offset = XEXP (addr, 1);
33490 /* Now emit the load instruction to the same register. */
33491 mode = GET_MODE (mem);
33492 switch (mode)
33494 case QImode:
33495 mode_name = "char";
33496 load_str = "lbz";
33497 break;
33499 case HImode:
33500 mode_name = "short";
33501 load_str = "lhz";
33502 break;
33504 case SImode:
33505 mode_name = "int";
33506 load_str = "lwz";
33507 break;
33509 case DImode:
33510 gcc_assert (TARGET_POWERPC64);
33511 mode_name = "long";
33512 load_str = "ld";
33513 break;
33515 default:
33516 gcc_unreachable ();
33519 /* Emit the addis instruction. */
33520 fuse_ops[0] = target;
33521 if (satisfies_constraint_L (addis_value))
33523 fuse_ops[1] = addis_value;
33524 addis_str = "lis %0,%v1";
33527 else if (GET_CODE (addis_value) == PLUS)
33529 rtx op0 = XEXP (addis_value, 0);
33530 rtx op1 = XEXP (addis_value, 1);
33532 if (REG_P (op0) && CONST_INT_P (op1)
33533 && satisfies_constraint_L (op1))
33535 fuse_ops[1] = op0;
33536 fuse_ops[2] = op1;
33537 addis_str = "addis %0,%1,%v2";
33541 else if (GET_CODE (addis_value) == HIGH)
33543 rtx value = XEXP (addis_value, 0);
33544 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
33546 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
33547 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
33548 if (TARGET_ELF)
33549 addis_str = "addis %0,%2,%1@toc@ha";
33551 else if (TARGET_XCOFF)
33552 addis_str = "addis %0,%1@u(%2)";
33554 else
33555 gcc_unreachable ();
33558 else if (GET_CODE (value) == PLUS)
33560 rtx op0 = XEXP (value, 0);
33561 rtx op1 = XEXP (value, 1);
33563 if (GET_CODE (op0) == UNSPEC
33564 && XINT (op0, 1) == UNSPEC_TOCREL
33565 && CONST_INT_P (op1))
33567 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
33568 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
33569 fuse_ops[3] = op1;
33570 if (TARGET_ELF)
33571 addis_str = "addis %0,%2,%1+%3@toc@ha";
33573 else if (TARGET_XCOFF)
33574 addis_str = "addis %0,%1+%3@u(%2)";
33576 else
33577 gcc_unreachable ();
33581 else if (satisfies_constraint_L (value))
33583 fuse_ops[1] = value;
33584 addis_str = "lis %0,%v1";
33587 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
33589 fuse_ops[1] = value;
33590 addis_str = "lis %0,%1@ha";
33594 if (!addis_str)
33595 fatal_insn ("Could not generate addis value for fusion", addis_value);
33597 sprintf (insn_template, "%s\t\t%s gpr load fusion, type %s", addis_str,
33598 comment_str, mode_name);
33599 output_asm_insn (insn_template, fuse_ops);
33601 /* Emit the D-form load instruction. */
33602 if (CONST_INT_P (load_offset) && satisfies_constraint_I (load_offset))
33604 sprintf (insn_template, "%s %%0,%%1(%%0)", load_str);
33605 fuse_ops[1] = load_offset;
33606 output_asm_insn (insn_template, fuse_ops);
33609 else if (GET_CODE (load_offset) == UNSPEC
33610 && XINT (load_offset, 1) == UNSPEC_TOCREL)
33612 if (TARGET_ELF)
33613 sprintf (insn_template, "%s %%0,%%1@toc@l(%%0)", load_str);
33615 else if (TARGET_XCOFF)
33616 sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
33618 else
33619 gcc_unreachable ();
33621 fuse_ops[1] = XVECEXP (load_offset, 0, 0);
33622 output_asm_insn (insn_template, fuse_ops);
33625 else if (GET_CODE (load_offset) == PLUS
33626 && GET_CODE (XEXP (load_offset, 0)) == UNSPEC
33627 && XINT (XEXP (load_offset, 0), 1) == UNSPEC_TOCREL
33628 && CONST_INT_P (XEXP (load_offset, 1)))
33630 rtx tocrel_unspec = XEXP (load_offset, 0);
33631 if (TARGET_ELF)
33632 sprintf (insn_template, "%s %%0,%%1+%%2@toc@l(%%0)", load_str);
33634 else if (TARGET_XCOFF)
33635 sprintf (insn_template, "%s %%0,%%1+%%2@l(%%0)", load_str);
33637 else
33638 gcc_unreachable ();
33640 fuse_ops[1] = XVECEXP (tocrel_unspec, 0, 0);
33641 fuse_ops[2] = XEXP (load_offset, 1);
33642 output_asm_insn (insn_template, fuse_ops);
33645 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (load_offset))
33647 sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
33649 fuse_ops[1] = load_offset;
33650 output_asm_insn (insn_template, fuse_ops);
33653 else
33654 fatal_insn ("Unable to generate load offset for fusion", load_offset);
33656 return "";
33659 /* Analyze vector computations and remove unnecessary doubleword
33660 swaps (xxswapdi instructions). This pass is performed only
33661 for little-endian VSX code generation.
33663 For this specific case, loads and stores of 4x32 and 2x64 vectors
33664 are inefficient. These are implemented using the lvx2dx and
33665 stvx2dx instructions, which invert the order of doublewords in
33666 a vector register. Thus the code generation inserts an xxswapdi
33667 after each such load, and prior to each such store. (For spill
33668 code after register assignment, an additional xxswapdi is inserted
33669 following each store in order to return a hard register to its
33670 unpermuted value.)
33672 The extra xxswapdi instructions reduce performance. This can be
33673 particularly bad for vectorized code. The purpose of this pass
33674 is to reduce the number of xxswapdi instructions required for
33675 correctness.
33677 The primary insight is that much code that operates on vectors
33678 does not care about the relative order of elements in a register,
33679 so long as the correct memory order is preserved. If we have
33680 a computation where all input values are provided by lvxd2x/xxswapdi
33681 sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
33682 and all intermediate computations are pure SIMD (independent of
33683 element order), then all the xxswapdi's associated with the loads
33684 and stores may be removed.
33686 This pass uses some of the infrastructure and logical ideas from
33687 the "web" pass in web.c. We create maximal webs of computations
33688 fitting the description above using union-find. Each such web is
33689 then optimized by removing its unnecessary xxswapdi instructions.
33691 The pass is placed prior to global optimization so that we can
33692 perform the optimization in the safest and simplest way possible;
33693 that is, by replacing each xxswapdi insn with a register copy insn.
33694 Subsequent forward propagation will remove copies where possible.
33696 There are some operations sensitive to element order for which we
33697 can still allow the operation, provided we modify those operations.
33698 These include CONST_VECTORs, for which we must swap the first and
33699 second halves of the constant vector; and SUBREGs, for which we
33700 must adjust the byte offset to account for the swapped doublewords.
33701 A remaining opportunity would be non-immediate-form splats, for
33702 which we should adjust the selected lane of the input. We should
33703 also make code generation adjustments for sum-across operations,
33704 since this is a common vectorizer reduction.
33706 Because we run prior to the first split, we can see loads and stores
33707 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
33708 vector loads and stores that have not yet been split into a permuting
33709 load/store and a swap. (One way this can happen is with a builtin
33710 call to vec_vsx_{ld,st}.) We can handle these as well, but rather
33711 than deleting a swap, we convert the load/store into a permuting
33712 load/store (which effectively removes the swap). */
33714 /* Notes on Permutes
33716 We do not currently handle computations that contain permutes. There
33717 is a general transformation that can be performed correctly, but it
33718 may introduce more expensive code than it replaces. To handle these
33719 would require a cost model to determine when to perform the optimization.
33720 This commentary records how this could be done if desired.
33722 The most general permute is something like this (example for V16QI):
33724 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
33725 (parallel [(const_int a0) (const_int a1)
33727 (const_int a14) (const_int a15)]))
33729 where a0,...,a15 are in [0,31] and select elements from op1 and op2
33730 to produce in the result.
33732 Regardless of mode, we can convert the PARALLEL to a mask of 16
33733 byte-element selectors. Let's call this M, with M[i] representing
33734 the ith byte-element selector value. Then if we swap doublewords
33735 throughout the computation, we can get correct behavior by replacing
33736 M with M' as follows:
33738 { M[i+8]+8 : i < 8, M[i+8] in [0,7] U [16,23]
33739 M'[i] = { M[i+8]-8 : i < 8, M[i+8] in [8,15] U [24,31]
33740 { M[i-8]+8 : i >= 8, M[i-8] in [0,7] U [16,23]
33741 { M[i-8]-8 : i >= 8, M[i-8] in [8,15] U [24,31]
33743 This seems promising at first, since we are just replacing one mask
33744 with another. But certain masks are preferable to others. If M
33745 is a mask that matches a vmrghh pattern, for example, M' certainly
33746 will not. Instead of a single vmrghh, we would generate a load of
33747 M' and a vperm. So we would need to know how many xxswapd's we can
33748 remove as a result of this transformation to determine if it's
33749 profitable; and preferably the logic would need to be aware of all
33750 the special preferable masks.
33752 Another form of permute is an UNSPEC_VPERM, in which the mask is
33753 already in a register. In some cases, this mask may be a constant
33754 that we can discover with ud-chains, in which case the above
33755 transformation is ok. However, the common usage here is for the
33756 mask to be produced by an UNSPEC_LVSL, in which case the mask
33757 cannot be known at compile time. In such a case we would have to
33758 generate several instructions to compute M' as above at run time,
33759 and a cost model is needed again. */
33761 /* This is based on the union-find logic in web.c. web_entry_base is
33762 defined in df.h. */
33763 class swap_web_entry : public web_entry_base
33765 public:
33766 /* Pointer to the insn. */
33767 rtx_insn *insn;
33768 /* Set if insn contains a mention of a vector register. All other
33769 fields are undefined if this field is unset. */
33770 unsigned int is_relevant : 1;
33771 /* Set if insn is a load. */
33772 unsigned int is_load : 1;
33773 /* Set if insn is a store. */
33774 unsigned int is_store : 1;
33775 /* Set if insn is a doubleword swap. This can either be a register swap
33776 or a permuting load or store (test is_load and is_store for this). */
33777 unsigned int is_swap : 1;
33778 /* Set if the insn has a live-in use of a parameter register. */
33779 unsigned int is_live_in : 1;
33780 /* Set if the insn has a live-out def of a return register. */
33781 unsigned int is_live_out : 1;
33782 /* Set if the insn contains a subreg reference of a vector register. */
33783 unsigned int contains_subreg : 1;
33784 /* Set if the insn contains a 128-bit integer operand. */
33785 unsigned int is_128_int : 1;
33786 /* Set if this is a call-insn. */
33787 unsigned int is_call : 1;
33788 /* Set if this insn does not perform a vector operation for which
33789 element order matters, or if we know how to fix it up if it does.
33790 Undefined if is_swap is set. */
33791 unsigned int is_swappable : 1;
33792 /* A nonzero value indicates what kind of special handling for this
33793 insn is required if doublewords are swapped. Undefined if
33794 is_swappable is not set. */
33795 unsigned int special_handling : 3;
33796 /* Set if the web represented by this entry cannot be optimized. */
33797 unsigned int web_not_optimizable : 1;
33798 /* Set if this insn should be deleted. */
33799 unsigned int will_delete : 1;
33802 enum special_handling_values {
33803 SH_NONE = 0,
33804 SH_CONST_VECTOR,
33805 SH_SUBREG,
33806 SH_NOSWAP_LD,
33807 SH_NOSWAP_ST,
33808 SH_EXTRACT,
33809 SH_SPLAT
33812 /* Union INSN with all insns containing definitions that reach USE.
33813 Detect whether USE is live-in to the current function. */
33814 static void
33815 union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
33817 struct df_link *link = DF_REF_CHAIN (use);
33819 if (!link)
33820 insn_entry[INSN_UID (insn)].is_live_in = 1;
33822 while (link)
33824 if (DF_REF_IS_ARTIFICIAL (link->ref))
33825 insn_entry[INSN_UID (insn)].is_live_in = 1;
33827 if (DF_REF_INSN_INFO (link->ref))
33829 rtx def_insn = DF_REF_INSN (link->ref);
33830 (void)unionfind_union (insn_entry + INSN_UID (insn),
33831 insn_entry + INSN_UID (def_insn));
33834 link = link->next;
33838 /* Union INSN with all insns containing uses reached from DEF.
33839 Detect whether DEF is live-out from the current function. */
33840 static void
33841 union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
33843 struct df_link *link = DF_REF_CHAIN (def);
33845 if (!link)
33846 insn_entry[INSN_UID (insn)].is_live_out = 1;
33848 while (link)
33850 /* This could be an eh use or some other artificial use;
33851 we treat these all the same (killing the optimization). */
33852 if (DF_REF_IS_ARTIFICIAL (link->ref))
33853 insn_entry[INSN_UID (insn)].is_live_out = 1;
33855 if (DF_REF_INSN_INFO (link->ref))
33857 rtx use_insn = DF_REF_INSN (link->ref);
33858 (void)unionfind_union (insn_entry + INSN_UID (insn),
33859 insn_entry + INSN_UID (use_insn));
33862 link = link->next;
33866 /* Return 1 iff INSN is a load insn, including permuting loads that
33867 represent an lvxd2x instruction; else return 0. */
33868 static unsigned int
33869 insn_is_load_p (rtx insn)
33871 rtx body = PATTERN (insn);
33873 if (GET_CODE (body) == SET)
33875 if (GET_CODE (SET_SRC (body)) == MEM)
33876 return 1;
33878 if (GET_CODE (SET_SRC (body)) == VEC_SELECT
33879 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM)
33880 return 1;
33882 return 0;
33885 if (GET_CODE (body) != PARALLEL)
33886 return 0;
33888 rtx set = XVECEXP (body, 0, 0);
33890 if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM)
33891 return 1;
33893 return 0;
33896 /* Return 1 iff INSN is a store insn, including permuting stores that
33897 represent an stvxd2x instruction; else return 0. */
33898 static unsigned int
33899 insn_is_store_p (rtx insn)
33901 rtx body = PATTERN (insn);
33902 if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM)
33903 return 1;
33904 if (GET_CODE (body) != PARALLEL)
33905 return 0;
33906 rtx set = XVECEXP (body, 0, 0);
33907 if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM)
33908 return 1;
33909 return 0;
33912 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
33913 a permuting load, or a permuting store. */
33914 static unsigned int
33915 insn_is_swap_p (rtx insn)
33917 rtx body = PATTERN (insn);
33918 if (GET_CODE (body) != SET)
33919 return 0;
33920 rtx rhs = SET_SRC (body);
33921 if (GET_CODE (rhs) != VEC_SELECT)
33922 return 0;
33923 rtx parallel = XEXP (rhs, 1);
33924 if (GET_CODE (parallel) != PARALLEL)
33925 return 0;
33926 unsigned int len = XVECLEN (parallel, 0);
33927 if (len != 2 && len != 4 && len != 8 && len != 16)
33928 return 0;
33929 for (unsigned int i = 0; i < len / 2; ++i)
33931 rtx op = XVECEXP (parallel, 0, i);
33932 if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i)
33933 return 0;
33935 for (unsigned int i = len / 2; i < len; ++i)
33937 rtx op = XVECEXP (parallel, 0, i);
33938 if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2)
33939 return 0;
33941 return 1;
33944 /* Return 1 iff OP is an operand that will not be affected by having
33945 vector doublewords swapped in memory. */
33946 static unsigned int
33947 rtx_is_swappable_p (rtx op, unsigned int *special)
33949 enum rtx_code code = GET_CODE (op);
33950 int i, j;
33951 rtx parallel;
33953 switch (code)
33955 case LABEL_REF:
33956 case SYMBOL_REF:
33957 case CLOBBER:
33958 case REG:
33959 return 1;
33961 case VEC_CONCAT:
33962 case ASM_INPUT:
33963 case ASM_OPERANDS:
33964 return 0;
33966 case CONST_VECTOR:
33968 *special = SH_CONST_VECTOR;
33969 return 1;
33972 case VEC_DUPLICATE:
33973 /* Opportunity: If XEXP (op, 0) has the same mode as the result,
33974 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
33975 it represents a vector splat for which we can do special
33976 handling. */
33977 if (GET_CODE (XEXP (op, 0)) == CONST_INT)
33978 return 1;
33979 else if (GET_CODE (XEXP (op, 0)) == REG
33980 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
33981 /* This catches V2DF and V2DI splat, at a minimum. */
33982 return 1;
33983 else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
33984 /* If the duplicated item is from a select, defer to the select
33985 processing to see if we can change the lane for the splat. */
33986 return rtx_is_swappable_p (XEXP (op, 0), special);
33987 else
33988 return 0;
33990 case VEC_SELECT:
33991 /* A vec_extract operation is ok if we change the lane. */
33992 if (GET_CODE (XEXP (op, 0)) == REG
33993 && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
33994 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
33995 && XVECLEN (parallel, 0) == 1
33996 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT)
33998 *special = SH_EXTRACT;
33999 return 1;
34001 else
34002 return 0;
34004 case UNSPEC:
34006 /* Various operations are unsafe for this optimization, at least
34007 without significant additional work. Permutes are obviously
34008 problematic, as both the permute control vector and the ordering
34009 of the target values are invalidated by doubleword swapping.
34010 Vector pack and unpack modify the number of vector lanes.
34011 Merge-high/low will not operate correctly on swapped operands.
34012 Vector shifts across element boundaries are clearly uncool,
34013 as are vector select and concatenate operations. Vector
34014 sum-across instructions define one operand with a specific
34015 order-dependent element, so additional fixup code would be
34016 needed to make those work. Vector set and non-immediate-form
34017 vector splat are element-order sensitive. A few of these
34018 cases might be workable with special handling if required. */
34019 int val = XINT (op, 1);
34020 switch (val)
34022 default:
34023 break;
34024 case UNSPEC_VMRGH_DIRECT:
34025 case UNSPEC_VMRGL_DIRECT:
34026 case UNSPEC_VPACK_SIGN_SIGN_SAT:
34027 case UNSPEC_VPACK_SIGN_UNS_SAT:
34028 case UNSPEC_VPACK_UNS_UNS_MOD:
34029 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
34030 case UNSPEC_VPACK_UNS_UNS_SAT:
34031 case UNSPEC_VPERM:
34032 case UNSPEC_VPERM_UNS:
34033 case UNSPEC_VPERMHI:
34034 case UNSPEC_VPERMSI:
34035 case UNSPEC_VPKPX:
34036 case UNSPEC_VSLDOI:
34037 case UNSPEC_VSLO:
34038 case UNSPEC_VSRO:
34039 case UNSPEC_VSUM2SWS:
34040 case UNSPEC_VSUM4S:
34041 case UNSPEC_VSUM4UBS:
34042 case UNSPEC_VSUMSWS:
34043 case UNSPEC_VSUMSWS_DIRECT:
34044 case UNSPEC_VSX_CONCAT:
34045 case UNSPEC_VSX_SET:
34046 case UNSPEC_VSX_SLDWI:
34047 case UNSPEC_VUNPACK_HI_SIGN:
34048 case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
34049 case UNSPEC_VUNPACK_LO_SIGN:
34050 case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
34051 case UNSPEC_VUPKHPX:
34052 case UNSPEC_VUPKHS_V4SF:
34053 case UNSPEC_VUPKHU_V4SF:
34054 case UNSPEC_VUPKLPX:
34055 case UNSPEC_VUPKLS_V4SF:
34056 case UNSPEC_VUPKLU_V4SF:
34057 /* The following could be handled as an idiom with XXSPLTW.
34058 These place a scalar in BE element zero, but the XXSPLTW
34059 will currently expect it in BE element 2 in a swapped
34060 region. When one of these feeds an XXSPLTW with no other
34061 defs/uses either way, we can avoid the lane change for
34062 XXSPLTW and things will be correct. TBD. */
34063 case UNSPEC_VSX_CVDPSPN:
34064 case UNSPEC_VSX_CVSPDP:
34065 case UNSPEC_VSX_CVSPDPN:
34066 return 0;
34067 case UNSPEC_VSPLT_DIRECT:
34068 *special = SH_SPLAT;
34069 return 1;
34073 default:
34074 break;
34077 const char *fmt = GET_RTX_FORMAT (code);
34078 int ok = 1;
34080 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
34081 if (fmt[i] == 'e' || fmt[i] == 'u')
34083 unsigned int special_op = SH_NONE;
34084 ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
34085 /* Ensure we never have two kinds of special handling
34086 for the same insn. */
34087 if (*special != SH_NONE && special_op != SH_NONE
34088 && *special != special_op)
34089 return 0;
34090 *special = special_op;
34092 else if (fmt[i] == 'E')
34093 for (j = 0; j < XVECLEN (op, i); ++j)
34095 unsigned int special_op = SH_NONE;
34096 ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
34097 /* Ensure we never have two kinds of special handling
34098 for the same insn. */
34099 if (*special != SH_NONE && special_op != SH_NONE
34100 && *special != special_op)
34101 return 0;
34102 *special = special_op;
34105 return ok;
34108 /* Return 1 iff INSN is an operand that will not be affected by
34109 having vector doublewords swapped in memory (in which case
34110 *SPECIAL is unchanged), or that can be modified to be correct
34111 if vector doublewords are swapped in memory (in which case
34112 *SPECIAL is changed to a value indicating how). */
34113 static unsigned int
34114 insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
34115 unsigned int *special)
34117 /* Calls are always bad. */
34118 if (GET_CODE (insn) == CALL_INSN)
34119 return 0;
34121 /* Loads and stores seen here are not permuting, but we can still
34122 fix them up by converting them to permuting ones. Exceptions:
34123 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
34124 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
34125 for the SET source. */
34126 rtx body = PATTERN (insn);
34127 int i = INSN_UID (insn);
34129 if (insn_entry[i].is_load)
34131 if (GET_CODE (body) == SET)
34133 *special = SH_NOSWAP_LD;
34134 return 1;
34136 else
34137 return 0;
34140 if (insn_entry[i].is_store)
34142 if (GET_CODE (body) == SET && GET_CODE (SET_SRC (body)) != UNSPEC)
34144 *special = SH_NOSWAP_ST;
34145 return 1;
34147 else
34148 return 0;
34151 /* Otherwise check the operands for vector lane violations. */
34152 return rtx_is_swappable_p (body, special);
34155 enum chain_purpose { FOR_LOADS, FOR_STORES };
34157 /* Return true if the UD or DU chain headed by LINK is non-empty,
34158 and every entry on the chain references an insn that is a
34159 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
34160 register swap must have only permuting loads as reaching defs.
34161 If PURPOSE is FOR_STORES, each such register swap must have only
34162 register swaps or permuting stores as reached uses. */
34163 static bool
34164 chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
34165 enum chain_purpose purpose)
34167 if (!link)
34168 return false;
34170 for (; link; link = link->next)
34172 if (!VECTOR_MODE_P (GET_MODE (DF_REF_REG (link->ref))))
34173 continue;
34175 if (DF_REF_IS_ARTIFICIAL (link->ref))
34176 return false;
34178 rtx reached_insn = DF_REF_INSN (link->ref);
34179 unsigned uid = INSN_UID (reached_insn);
34180 struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn);
34182 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
34183 || insn_entry[uid].is_store)
34184 return false;
34186 if (purpose == FOR_LOADS)
34188 df_ref use;
34189 FOR_EACH_INSN_INFO_USE (use, insn_info)
34191 struct df_link *swap_link = DF_REF_CHAIN (use);
34193 while (swap_link)
34195 if (DF_REF_IS_ARTIFICIAL (link->ref))
34196 return false;
34198 rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
34199 unsigned uid2 = INSN_UID (swap_def_insn);
34201 /* Only permuting loads are allowed. */
34202 if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
34203 return false;
34205 swap_link = swap_link->next;
34209 else if (purpose == FOR_STORES)
34211 df_ref def;
34212 FOR_EACH_INSN_INFO_DEF (def, insn_info)
34214 struct df_link *swap_link = DF_REF_CHAIN (def);
34216 while (swap_link)
34218 if (DF_REF_IS_ARTIFICIAL (link->ref))
34219 return false;
34221 rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
34222 unsigned uid2 = INSN_UID (swap_use_insn);
34224 /* Permuting stores or register swaps are allowed. */
34225 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
34226 return false;
34228 swap_link = swap_link->next;
34234 return true;
34237 /* Mark the xxswapdi instructions associated with permuting loads and
34238 stores for removal. Note that we only flag them for deletion here,
34239 as there is a possibility of a swap being reached from multiple
34240 loads, etc. */
34241 static void
34242 mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
34244 rtx insn = insn_entry[i].insn;
34245 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
34247 if (insn_entry[i].is_load)
34249 df_ref def;
34250 FOR_EACH_INSN_INFO_DEF (def, insn_info)
34252 struct df_link *link = DF_REF_CHAIN (def);
34254 /* We know by now that these are swaps, so we can delete
34255 them confidently. */
34256 while (link)
34258 rtx use_insn = DF_REF_INSN (link->ref);
34259 insn_entry[INSN_UID (use_insn)].will_delete = 1;
34260 link = link->next;
34264 else if (insn_entry[i].is_store)
34266 df_ref use;
34267 FOR_EACH_INSN_INFO_USE (use, insn_info)
34269 /* Ignore uses for addressability. */
34270 machine_mode mode = GET_MODE (DF_REF_REG (use));
34271 if (!VECTOR_MODE_P (mode))
34272 continue;
34274 struct df_link *link = DF_REF_CHAIN (use);
34276 /* We know by now that these are swaps, so we can delete
34277 them confidently. */
34278 while (link)
34280 rtx def_insn = DF_REF_INSN (link->ref);
34281 insn_entry[INSN_UID (def_insn)].will_delete = 1;
34282 link = link->next;
34288 /* OP is either a CONST_VECTOR or an expression containing one.
34289 Swap the first half of the vector with the second in the first
34290 case. Recurse to find it in the second. */
34291 static void
34292 swap_const_vector_halves (rtx op)
34294 int i;
34295 enum rtx_code code = GET_CODE (op);
34296 if (GET_CODE (op) == CONST_VECTOR)
34298 int half_units = GET_MODE_NUNITS (GET_MODE (op)) / 2;
34299 for (i = 0; i < half_units; ++i)
34301 rtx temp = CONST_VECTOR_ELT (op, i);
34302 CONST_VECTOR_ELT (op, i) = CONST_VECTOR_ELT (op, i + half_units);
34303 CONST_VECTOR_ELT (op, i + half_units) = temp;
34306 else
34308 int j;
34309 const char *fmt = GET_RTX_FORMAT (code);
34310 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
34311 if (fmt[i] == 'e' || fmt[i] == 'u')
34312 swap_const_vector_halves (XEXP (op, i));
34313 else if (fmt[i] == 'E')
34314 for (j = 0; j < XVECLEN (op, i); ++j)
34315 swap_const_vector_halves (XVECEXP (op, i, j));
34319 /* Find all subregs of a vector expression that perform a narrowing,
34320 and adjust the subreg index to account for doubleword swapping. */
34321 static void
34322 adjust_subreg_index (rtx op)
34324 enum rtx_code code = GET_CODE (op);
34325 if (code == SUBREG
34326 && (GET_MODE_SIZE (GET_MODE (op))
34327 < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
34329 unsigned int index = SUBREG_BYTE (op);
34330 if (index < 8)
34331 index += 8;
34332 else
34333 index -= 8;
34334 SUBREG_BYTE (op) = index;
34337 const char *fmt = GET_RTX_FORMAT (code);
34338 int i,j;
34339 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
34340 if (fmt[i] == 'e' || fmt[i] == 'u')
34341 adjust_subreg_index (XEXP (op, i));
34342 else if (fmt[i] == 'E')
34343 for (j = 0; j < XVECLEN (op, i); ++j)
34344 adjust_subreg_index (XVECEXP (op, i, j));
34347 /* Convert the non-permuting load INSN to a permuting one. */
34348 static void
34349 permute_load (rtx_insn *insn)
34351 rtx body = PATTERN (insn);
34352 rtx mem_op = SET_SRC (body);
34353 rtx tgt_reg = SET_DEST (body);
34354 machine_mode mode = GET_MODE (tgt_reg);
34355 int n_elts = GET_MODE_NUNITS (mode);
34356 int half_elts = n_elts / 2;
34357 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
34358 int i, j;
34359 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
34360 XVECEXP (par, 0, i) = GEN_INT (j);
34361 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
34362 XVECEXP (par, 0, i) = GEN_INT (j);
34363 rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
34364 SET_SRC (body) = sel;
34365 INSN_CODE (insn) = -1; /* Force re-recognition. */
34366 df_insn_rescan (insn);
34368 if (dump_file)
34369 fprintf (dump_file, "Replacing load %d with permuted load\n",
34370 INSN_UID (insn));
34373 /* Convert the non-permuting store INSN to a permuting one. */
34374 static void
34375 permute_store (rtx_insn *insn)
34377 rtx body = PATTERN (insn);
34378 rtx src_reg = SET_SRC (body);
34379 machine_mode mode = GET_MODE (src_reg);
34380 int n_elts = GET_MODE_NUNITS (mode);
34381 int half_elts = n_elts / 2;
34382 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
34383 int i, j;
34384 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
34385 XVECEXP (par, 0, i) = GEN_INT (j);
34386 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
34387 XVECEXP (par, 0, i) = GEN_INT (j);
34388 rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
34389 SET_SRC (body) = sel;
34390 INSN_CODE (insn) = -1; /* Force re-recognition. */
34391 df_insn_rescan (insn);
34393 if (dump_file)
34394 fprintf (dump_file, "Replacing store %d with permuted store\n",
34395 INSN_UID (insn));
34398 /* Given OP that contains a vector extract operation, adjust the index
34399 of the extracted lane to account for the doubleword swap. */
34400 static void
34401 adjust_extract (rtx_insn *insn)
34403 rtx src = SET_SRC (PATTERN (insn));
34404 /* The vec_select may be wrapped in a vec_duplicate for a splat, so
34405 account for that. */
34406 rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
34407 rtx par = XEXP (sel, 1);
34408 int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
34409 int lane = INTVAL (XVECEXP (par, 0, 0));
34410 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
34411 XVECEXP (par, 0, 0) = GEN_INT (lane);
34412 INSN_CODE (insn) = -1; /* Force re-recognition. */
34413 df_insn_rescan (insn);
34415 if (dump_file)
34416 fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
34419 /* Given OP that contains a vector direct-splat operation, adjust the index
34420 of the source lane to account for the doubleword swap. */
34421 static void
34422 adjust_splat (rtx_insn *insn)
34424 rtx body = PATTERN (insn);
34425 rtx unspec = XEXP (body, 1);
34426 int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
34427 int lane = INTVAL (XVECEXP (unspec, 0, 1));
34428 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
34429 XVECEXP (unspec, 0, 1) = GEN_INT (lane);
34430 INSN_CODE (insn) = -1; /* Force re-recognition. */
34431 df_insn_rescan (insn);
34433 if (dump_file)
34434 fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
34437 /* The insn described by INSN_ENTRY[I] can be swapped, but only
34438 with special handling. Take care of that here. */
34439 static void
34440 handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
34442 rtx_insn *insn = insn_entry[i].insn;
34443 rtx body = PATTERN (insn);
34445 switch (insn_entry[i].special_handling)
34447 default:
34448 gcc_unreachable ();
34449 case SH_CONST_VECTOR:
34451 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
34452 gcc_assert (GET_CODE (body) == SET);
34453 rtx rhs = SET_SRC (body);
34454 swap_const_vector_halves (rhs);
34455 if (dump_file)
34456 fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
34457 break;
34459 case SH_SUBREG:
34460 /* A subreg of the same size is already safe. For subregs that
34461 select a smaller portion of a reg, adjust the index for
34462 swapped doublewords. */
34463 adjust_subreg_index (body);
34464 if (dump_file)
34465 fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
34466 break;
34467 case SH_NOSWAP_LD:
34468 /* Convert a non-permuting load to a permuting one. */
34469 permute_load (insn);
34470 break;
34471 case SH_NOSWAP_ST:
34472 /* Convert a non-permuting store to a permuting one. */
34473 permute_store (insn);
34474 break;
34475 case SH_EXTRACT:
34476 /* Change the lane on an extract operation. */
34477 adjust_extract (insn);
34478 break;
34479 case SH_SPLAT:
34480 /* Change the lane on a direct-splat operation. */
34481 adjust_splat (insn);
34482 break;
34486 /* Find the insn from the Ith table entry, which is known to be a
34487 register swap Y = SWAP(X). Replace it with a copy Y = X. */
34488 static void
34489 replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
34491 rtx_insn *insn = insn_entry[i].insn;
34492 rtx body = PATTERN (insn);
34493 rtx src_reg = XEXP (SET_SRC (body), 0);
34494 rtx copy = gen_rtx_SET (VOIDmode, SET_DEST (body), src_reg);
34495 rtx_insn *new_insn = emit_insn_before (copy, insn);
34496 set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
34497 df_insn_rescan (new_insn);
34499 if (dump_file)
34501 unsigned int new_uid = INSN_UID (new_insn);
34502 fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
34505 df_insn_delete (insn);
34506 remove_insn (insn);
34507 insn->set_deleted ();
34510 /* Dump the swap table to DUMP_FILE. */
34511 static void
34512 dump_swap_insn_table (swap_web_entry *insn_entry)
34514 int e = get_max_uid ();
34515 fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
34517 for (int i = 0; i < e; ++i)
34518 if (insn_entry[i].is_relevant)
34520 swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
34521 fprintf (dump_file, "%6d %6d ", i,
34522 pred_entry && pred_entry->insn
34523 ? INSN_UID (pred_entry->insn) : 0);
34524 if (insn_entry[i].is_load)
34525 fputs ("load ", dump_file);
34526 if (insn_entry[i].is_store)
34527 fputs ("store ", dump_file);
34528 if (insn_entry[i].is_swap)
34529 fputs ("swap ", dump_file);
34530 if (insn_entry[i].is_live_in)
34531 fputs ("live-in ", dump_file);
34532 if (insn_entry[i].is_live_out)
34533 fputs ("live-out ", dump_file);
34534 if (insn_entry[i].contains_subreg)
34535 fputs ("subreg ", dump_file);
34536 if (insn_entry[i].is_128_int)
34537 fputs ("int128 ", dump_file);
34538 if (insn_entry[i].is_call)
34539 fputs ("call ", dump_file);
34540 if (insn_entry[i].is_swappable)
34542 fputs ("swappable ", dump_file);
34543 if (insn_entry[i].special_handling == SH_CONST_VECTOR)
34544 fputs ("special:constvec ", dump_file);
34545 else if (insn_entry[i].special_handling == SH_SUBREG)
34546 fputs ("special:subreg ", dump_file);
34547 else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
34548 fputs ("special:load ", dump_file);
34549 else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
34550 fputs ("special:store ", dump_file);
34551 else if (insn_entry[i].special_handling == SH_EXTRACT)
34552 fputs ("special:extract ", dump_file);
34553 else if (insn_entry[i].special_handling == SH_SPLAT)
34554 fputs ("special:splat ", dump_file);
34556 if (insn_entry[i].web_not_optimizable)
34557 fputs ("unoptimizable ", dump_file);
34558 if (insn_entry[i].will_delete)
34559 fputs ("delete ", dump_file);
34560 fputs ("\n", dump_file);
34562 fputs ("\n", dump_file);
34565 /* Main entry point for this pass. */
34566 unsigned int
34567 rs6000_analyze_swaps (function *fun)
34569 swap_web_entry *insn_entry;
34570 basic_block bb;
34571 rtx_insn *insn;
34573 /* Dataflow analysis for use-def chains. */
34574 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
34575 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
34576 df_analyze ();
34577 df_set_flags (DF_DEFER_INSN_RESCAN);
34579 /* Allocate structure to represent webs of insns. */
34580 insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
34582 /* Walk the insns to gather basic data. */
34583 FOR_ALL_BB_FN (bb, fun)
34584 FOR_BB_INSNS (bb, insn)
34586 unsigned int uid = INSN_UID (insn);
34587 if (NONDEBUG_INSN_P (insn))
34589 insn_entry[uid].insn = insn;
34591 if (GET_CODE (insn) == CALL_INSN)
34592 insn_entry[uid].is_call = 1;
34594 /* Walk the uses and defs to see if we mention vector regs.
34595 Record any constraints on optimization of such mentions. */
34596 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
34597 df_ref mention;
34598 FOR_EACH_INSN_INFO_USE (mention, insn_info)
34600 /* We use DF_REF_REAL_REG here to get inside any subregs. */
34601 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
34603 /* If a use gets its value from a call insn, it will be
34604 a hard register and will look like (reg:V4SI 3 3).
34605 The df analysis creates two mentions for GPR3 and GPR4,
34606 both DImode. We must recognize this and treat it as a
34607 vector mention to ensure the call is unioned with this
34608 use. */
34609 if (mode == DImode && DF_REF_INSN_INFO (mention))
34611 rtx feeder = DF_REF_INSN (mention);
34612 /* FIXME: It is pretty hard to get from the df mention
34613 to the mode of the use in the insn. We arbitrarily
34614 pick a vector mode here, even though the use might
34615 be a real DImode. We can be too conservative
34616 (create a web larger than necessary) because of
34617 this, so consider eventually fixing this. */
34618 if (GET_CODE (feeder) == CALL_INSN)
34619 mode = V4SImode;
34622 if (VECTOR_MODE_P (mode))
34624 insn_entry[uid].is_relevant = 1;
34625 if (mode == TImode || mode == V1TImode)
34626 insn_entry[uid].is_128_int = 1;
34627 if (DF_REF_INSN_INFO (mention))
34628 insn_entry[uid].contains_subreg
34629 = !rtx_equal_p (DF_REF_REG (mention),
34630 DF_REF_REAL_REG (mention));
34631 union_defs (insn_entry, insn, mention);
34634 FOR_EACH_INSN_INFO_DEF (mention, insn_info)
34636 /* We use DF_REF_REAL_REG here to get inside any subregs. */
34637 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
34639 /* If we're loading up a hard vector register for a call,
34640 it looks like (set (reg:V4SI 9 9) (...)). The df
34641 analysis creates two mentions for GPR9 and GPR10, both
34642 DImode. So relying on the mode from the mentions
34643 isn't sufficient to ensure we union the call into the
34644 web with the parameter setup code. */
34645 if (mode == DImode && GET_CODE (insn) == SET
34646 && VECTOR_MODE_P (GET_MODE (SET_DEST (insn))))
34647 mode = GET_MODE (SET_DEST (insn));
34649 if (VECTOR_MODE_P (mode))
34651 insn_entry[uid].is_relevant = 1;
34652 if (mode == TImode || mode == V1TImode)
34653 insn_entry[uid].is_128_int = 1;
34654 if (DF_REF_INSN_INFO (mention))
34655 insn_entry[uid].contains_subreg
34656 = !rtx_equal_p (DF_REF_REG (mention),
34657 DF_REF_REAL_REG (mention));
34658 /* REG_FUNCTION_VALUE_P is not valid for subregs. */
34659 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
34660 insn_entry[uid].is_live_out = 1;
34661 union_uses (insn_entry, insn, mention);
34665 if (insn_entry[uid].is_relevant)
34667 /* Determine if this is a load or store. */
34668 insn_entry[uid].is_load = insn_is_load_p (insn);
34669 insn_entry[uid].is_store = insn_is_store_p (insn);
34671 /* Determine if this is a doubleword swap. If not,
34672 determine whether it can legally be swapped. */
34673 if (insn_is_swap_p (insn))
34674 insn_entry[uid].is_swap = 1;
34675 else
34677 unsigned int special = SH_NONE;
34678 insn_entry[uid].is_swappable
34679 = insn_is_swappable_p (insn_entry, insn, &special);
34680 if (special != SH_NONE && insn_entry[uid].contains_subreg)
34681 insn_entry[uid].is_swappable = 0;
34682 else if (special != SH_NONE)
34683 insn_entry[uid].special_handling = special;
34684 else if (insn_entry[uid].contains_subreg)
34685 insn_entry[uid].special_handling = SH_SUBREG;
34691 if (dump_file)
34693 fprintf (dump_file, "\nSwap insn entry table when first built\n");
34694 dump_swap_insn_table (insn_entry);
34697 /* Record unoptimizable webs. */
34698 unsigned e = get_max_uid (), i;
34699 for (i = 0; i < e; ++i)
34701 if (!insn_entry[i].is_relevant)
34702 continue;
34704 swap_web_entry *root
34705 = (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
34707 if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
34708 || (insn_entry[i].contains_subreg
34709 && insn_entry[i].special_handling != SH_SUBREG)
34710 || insn_entry[i].is_128_int || insn_entry[i].is_call
34711 || !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
34712 root->web_not_optimizable = 1;
34714 /* If we have loads or stores that aren't permuting then the
34715 optimization isn't appropriate. */
34716 else if ((insn_entry[i].is_load || insn_entry[i].is_store)
34717 && !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
34718 root->web_not_optimizable = 1;
34720 /* If we have permuting loads or stores that are not accompanied
34721 by a register swap, the optimization isn't appropriate. */
34722 else if (insn_entry[i].is_load && insn_entry[i].is_swap)
34724 rtx insn = insn_entry[i].insn;
34725 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
34726 df_ref def;
34728 FOR_EACH_INSN_INFO_DEF (def, insn_info)
34730 struct df_link *link = DF_REF_CHAIN (def);
34732 if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
34734 root->web_not_optimizable = 1;
34735 break;
34739 else if (insn_entry[i].is_store && insn_entry[i].is_swap)
34741 rtx insn = insn_entry[i].insn;
34742 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
34743 df_ref use;
34745 FOR_EACH_INSN_INFO_USE (use, insn_info)
34747 struct df_link *link = DF_REF_CHAIN (use);
34749 if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
34751 root->web_not_optimizable = 1;
34752 break;
34758 if (dump_file)
34760 fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
34761 dump_swap_insn_table (insn_entry);
34764 /* For each load and store in an optimizable web (which implies
34765 the loads and stores are permuting), find the associated
34766 register swaps and mark them for removal. Due to various
34767 optimizations we may mark the same swap more than once. Also
34768 perform special handling for swappable insns that require it. */
34769 for (i = 0; i < e; ++i)
34770 if ((insn_entry[i].is_load || insn_entry[i].is_store)
34771 && insn_entry[i].is_swap)
34773 swap_web_entry* root_entry
34774 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
34775 if (!root_entry->web_not_optimizable)
34776 mark_swaps_for_removal (insn_entry, i);
34778 else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
34780 swap_web_entry* root_entry
34781 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
34782 if (!root_entry->web_not_optimizable)
34783 handle_special_swappables (insn_entry, i);
34786 /* Now delete the swaps marked for removal. */
34787 for (i = 0; i < e; ++i)
34788 if (insn_entry[i].will_delete)
34789 replace_swap_with_copy (insn_entry, i);
34791 /* Clean up. */
34792 free (insn_entry);
34793 return 0;
34796 const pass_data pass_data_analyze_swaps =
34798 RTL_PASS, /* type */
34799 "swaps", /* name */
34800 OPTGROUP_NONE, /* optinfo_flags */
34801 TV_NONE, /* tv_id */
34802 0, /* properties_required */
34803 0, /* properties_provided */
34804 0, /* properties_destroyed */
34805 0, /* todo_flags_start */
34806 TODO_df_finish, /* todo_flags_finish */
34809 class pass_analyze_swaps : public rtl_opt_pass
34811 public:
34812 pass_analyze_swaps(gcc::context *ctxt)
34813 : rtl_opt_pass(pass_data_analyze_swaps, ctxt)
34816 /* opt_pass methods: */
34817 virtual bool gate (function *)
34819 return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
34820 && rs6000_optimize_swaps);
34823 virtual unsigned int execute (function *fun)
34825 return rs6000_analyze_swaps (fun);
34828 }; // class pass_analyze_swaps
34830 rtl_opt_pass *
34831 make_pass_analyze_swaps (gcc::context *ctxt)
34833 return new pass_analyze_swaps (ctxt);
34836 #ifdef RS6000_GLIBC_ATOMIC_FENV
34837 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
34838 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
34839 #endif
34841 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
34843 static void
34844 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
34846 if (!TARGET_HARD_FLOAT || !TARGET_FPRS)
34848 #ifdef RS6000_GLIBC_ATOMIC_FENV
34849 if (atomic_hold_decl == NULL_TREE)
34851 atomic_hold_decl
34852 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
34853 get_identifier ("__atomic_feholdexcept"),
34854 build_function_type_list (void_type_node,
34855 double_ptr_type_node,
34856 NULL_TREE));
34857 TREE_PUBLIC (atomic_hold_decl) = 1;
34858 DECL_EXTERNAL (atomic_hold_decl) = 1;
34861 if (atomic_clear_decl == NULL_TREE)
34863 atomic_clear_decl
34864 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
34865 get_identifier ("__atomic_feclearexcept"),
34866 build_function_type_list (void_type_node,
34867 NULL_TREE));
34868 TREE_PUBLIC (atomic_clear_decl) = 1;
34869 DECL_EXTERNAL (atomic_clear_decl) = 1;
34872 tree const_double = build_qualified_type (double_type_node,
34873 TYPE_QUAL_CONST);
34874 tree const_double_ptr = build_pointer_type (const_double);
34875 if (atomic_update_decl == NULL_TREE)
34877 atomic_update_decl
34878 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
34879 get_identifier ("__atomic_feupdateenv"),
34880 build_function_type_list (void_type_node,
34881 const_double_ptr,
34882 NULL_TREE));
34883 TREE_PUBLIC (atomic_update_decl) = 1;
34884 DECL_EXTERNAL (atomic_update_decl) = 1;
34887 tree fenv_var = create_tmp_var (double_type_node);
34888 mark_addressable (fenv_var);
34889 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
34891 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
34892 *clear = build_call_expr (atomic_clear_decl, 0);
34893 *update = build_call_expr (atomic_update_decl, 1,
34894 fold_convert (const_double_ptr, fenv_addr));
34895 #endif
34896 return;
34899 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
34900 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
34901 tree call_mffs = build_call_expr (mffs, 0);
34903 /* Generates the equivalent of feholdexcept (&fenv_var)
34905 *fenv_var = __builtin_mffs ();
34906 double fenv_hold;
34907 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
34908 __builtin_mtfsf (0xff, fenv_hold); */
34910 /* Mask to clear everything except for the rounding modes and non-IEEE
34911 arithmetic flag. */
34912 const unsigned HOST_WIDE_INT hold_exception_mask =
34913 HOST_WIDE_INT_C (0xffffffff00000007);
34915 tree fenv_var = create_tmp_var (double_type_node);
34917 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
34919 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
34920 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
34921 build_int_cst (uint64_type_node,
34922 hold_exception_mask));
34924 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
34925 fenv_llu_and);
34927 tree hold_mtfsf = build_call_expr (mtfsf, 2,
34928 build_int_cst (unsigned_type_node, 0xff),
34929 fenv_hold_mtfsf);
34931 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
34933 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
34935 double fenv_clear = __builtin_mffs ();
34936 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
34937 __builtin_mtfsf (0xff, fenv_clear); */
34939 /* Mask to clear everything except for the rounding modes and non-IEEE
34940 arithmetic flag. */
34941 const unsigned HOST_WIDE_INT clear_exception_mask =
34942 HOST_WIDE_INT_C (0xffffffff00000000);
34944 tree fenv_clear = create_tmp_var (double_type_node);
34946 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
34948 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
34949 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
34950 fenv_clean_llu,
34951 build_int_cst (uint64_type_node,
34952 clear_exception_mask));
34954 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
34955 fenv_clear_llu_and);
34957 tree clear_mtfsf = build_call_expr (mtfsf, 2,
34958 build_int_cst (unsigned_type_node, 0xff),
34959 fenv_clear_mtfsf);
34961 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
34963 /* Generates the equivalent of feupdateenv (&fenv_var)
34965 double old_fenv = __builtin_mffs ();
34966 double fenv_update;
34967 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
34968 (*(uint64_t*)fenv_var 0x1ff80fff);
34969 __builtin_mtfsf (0xff, fenv_update); */
34971 const unsigned HOST_WIDE_INT update_exception_mask =
34972 HOST_WIDE_INT_C (0xffffffff1fffff00);
34973 const unsigned HOST_WIDE_INT new_exception_mask =
34974 HOST_WIDE_INT_C (0x1ff80fff);
34976 tree old_fenv = create_tmp_var (double_type_node);
34977 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
34979 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
34980 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
34981 build_int_cst (uint64_type_node,
34982 update_exception_mask));
34984 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
34985 build_int_cst (uint64_type_node,
34986 new_exception_mask));
34988 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
34989 old_llu_and, new_llu_and);
34991 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
34992 new_llu_mask);
34994 tree update_mtfsf = build_call_expr (mtfsf, 2,
34995 build_int_cst (unsigned_type_node, 0xff),
34996 fenv_update_mtfsf);
34998 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
35002 struct gcc_target targetm = TARGET_INITIALIZER;
35004 #include "gt-rs6000.h"